From c36bc725cd801efd53f2db2aee9d91bca616369f Mon Sep 17 00:00:00 2001 From: Rory& Date: Tue, 1 Jul 2025 14:30:12 +0200 Subject: Various changes --- .idea/.gitignore | 8 ++ .idea/editor.xml | 247 ++++++++++++++++++++++++++++++++++ .idea/encodings.xml | 4 + .idea/misc.xml | 17 +++ .idea/vcs.xml | 6 + flake.nix | 19 ++- main.c | 183 ++++++++++++++++++++----- main.sh | 2 +- template/ap_cgit.txt | 1 + template/ap_emma.txt | 3 +- template/ap_matrix.txt | 6 +- template/ap_owncast.txt | 4 +- template/p_emma_unused_domains.txt | 9 ++ template/p_paths.txt | 5 +- template/p_separate.txt | 5 +- template/p_user_agent_bot.txt | 11 +- template/p_user_agent_likely_fake.txt | 3 +- 17 files changed, 481 insertions(+), 52 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/editor.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/vcs.xml create mode 100644 template/ap_cgit.txt create mode 100644 template/p_emma_unused_domains.txt diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/editor.xml b/.idea/editor.xml new file mode 100644 index 0000000..ec90224 --- /dev/null +++ b/.idea/editor.xml @@ -0,0 +1,247 @@ + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..df87cf9 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..b7e78e5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,17 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/flake.nix b/flake.nix index c384ad5..351c138 100644 --- a/flake.nix +++ b/flake.nix @@ -1,11 +1,18 @@ { inputs.nixpkgs.url = "github:NixOS/nixpkgs"; - outputs = { self, nixpkgs }: { - devShells.default = nixpkgs.lib.mkShell { - buildInputs = [ - nixpkgs.libnftnl - ]; - }; + outputs = { self, nixpkgs }: + let + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + }; + in { + devShells.x86_64-linux.default = pkgs.mkShell { + buildInputs = [ + pkgs.meson + pkgs.gcc + ]; + }; }; } diff --git a/main.c b/main.c index 487f700..d94c50d 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -13,6 +14,32 @@ // #define ACCESS_LOG_FILE "access.log.1" #define ACCESS_LOG_FILE "/var/log/nginx/access.log" +#define print_bool_opt(x) \ + if (x) { \ + fprintf(stderr, #x ": true\n"); \ + } else { \ + fprintf(stderr, #x ": false\n"); \ + } +#define print_str_opt(x) \ + if (x != nullptr) { \ + fprintf(stderr, #x ": %s\n", x); \ + } else { \ + fprintf(stderr, #x ": null\n"); \ + } + +#define ansi(x) "\033[" x +#define ansi_mod(x) "\033[" x "m" + +#define ANSI_CLEAR_LINE ansi("2K") +#define ANSI_BLINK ansi_mod("5") +#define ANSI_RESET ansi_mod("0") + +#define ANSI_COLOR(r,g,b) ansi_mod("38;2;" #r ";" #g ";" #b) +#define ANSI_HSL(h,s,l) ansi_mod("38;2;" \ + (int)((l < 50 ? l + s : l - s) * (1 + h / 360.0) / 100.0 * 255) ";" \ + (int)((l < 50 ? l + s : l - s) * (1 - h / 360.0) / 100.0 * 255) ";" \ + (int)(l * 255 / 100)) + typedef struct { char** patterns; @@ -20,6 +47,7 @@ typedef struct char** allowed_ips; // size_t allowedIpCount; char** banned_ips; + int* banned_ip_hits; size_t banned_ip_count; size_t banned_ip_size; } MatchRules; @@ -32,6 +60,10 @@ bool print_bans = true; bool print_only = false; bool print_progress = true; +bool check_allowed_ip = true; +bool check_allowed_pattern = true; +bool check_already_banned_ip = true; + bool dump_patterns = false; char* allowed_patterns_path = ALLOWED_PATTERNS_FILE; @@ -50,6 +82,24 @@ void* xmalloc(size_t size) return ptr; } +char* str_to_hex(const char* str) +{ + const size_t len = strlen(str); + char* hex = xmalloc(len * 2 + 3); + for (size_t i = 0; i < len; i++) + { + sprintf(hex + i * 3, "%02x ", (unsigned char)str[i]); + } + sprintf(hex + len * 3, "00 "); + hex[len * 3 + 2] = '\0'; + return hex; +} + +bool streq(const char* a, const char* b) +{ + return a == b || (a != nullptr && b != nullptr && strcmp(a, b) == 0); +} + void signal_handler(int signal_number) { int wait_status; @@ -193,7 +243,7 @@ char** read_lines(const char* filename) char** grow_string_array(char** array, size_t newSize) { - printf("Growing charptr array to %zu\n", newSize); + fprintf(stderr, ANSI_CLEAR_LINE ANSI_BLINK "Growing charptr array to %zu" ANSI_RESET "\n", newSize); char** newArray = realloc(array, sizeof(char*) * newSize); if (newArray == NULL) { @@ -203,9 +253,20 @@ char** grow_string_array(char** array, size_t newSize) return newArray; } -void ban_ip(char* ip) +int* grow_int_array(int* array, size_t newSize) { + fprintf(stderr, ANSI_CLEAR_LINE ANSI_BLINK "Growing int array to %zu" ANSI_RESET "\n", newSize); + int* newArray = realloc(array, sizeof(int) * newSize); + if (newArray == NULL) + { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + return newArray; +} +void ban_ip(char* ip) +{ } void process_line(char* line, void* cbData) @@ -229,37 +290,58 @@ void process_line(char* line, void* cbData) // printf("meow %s\n", ip); - for (int j = 0; rules->allowed_ips[j] != NULL; j++) - { - if (strstr(ip, rules->allowed_ips[j]) == ip) + if (check_allowed_ip) + for (int j = 0; rules->allowed_ips[j] != NULL; j++) { - if (print_allowed_ip) - printf("Allowed IP: \"%s\" (~%s): %s\n", ip, rules->allowed_ips[j], line); - free(ip); - return; + if (streq(ip, rules->allowed_ips[j])) + { + if (print_allowed_ip) + printf("Allowed IP: \"%s\" (~%s): %s\n", ip, rules->allowed_ips[j], line); + free(ip); + return; + } } - } - for (size_t j = 0; j < rules->banned_ip_count; j++) - { - if (strstr(ip, rules->banned_ips[j]) == ip) + if (check_already_banned_ip) + for (size_t j = 0; j < rules->banned_ip_count; j++) { - if (print_already_banned_ip) - printf("Banned IP: \"%s\": %s\n", rules->banned_ips[j], line); - free(ip); - return; + if (strlen(rules->banned_ips[j]) < 5) + { + for (size_t k = 0; k < rules->banned_ip_count; k++) + { + printf("rules->banned_ips[%lu] = \"%s\" (#%s)\n", k, rules->banned_ips[k], str_to_hex(rules->banned_ips[k])); + } + printf("ASSERT: Broken rule #%lu: \"%s\" (#%s)\n", j, rules->banned_ips[j], str_to_hex(rules->banned_ips[j])); + abort(); + } + + if (streq(ip, rules->banned_ips[j])) + { + if (print_already_banned_ip) + printf("Banned IP: \"%s\" (rule %lu: %s): %s\n", ip, j, rules->banned_ips[j], line); + + if (strlen(ip) != strlen(rules->banned_ips[j]) || strcmp(ip, rules->banned_ips[j]) != 0) + { + fprintf(stderr, "ASSERT: IP address length did not match! #%lu: \"%s\" (#%s) != \"%s\" (#%s)\n", j, rules->banned_ips[j], str_to_hex(rules->banned_ips[j]), ip, + str_to_hex(ip)); + abort(); + } + + free(ip); + return; + } } - } - for (int i = 0; rules->allowed_patterns[i] != NULL; i++) - { - if (strstr(line, rules->allowed_patterns[i]) != NULL) + if (check_allowed_pattern) + for (int i = 0; rules->allowed_patterns[i] != NULL; i++) { - if (print_allowed_pattern) - printf("%15s matched allowed pattern \"%s\": %s\n", ip, rules->allowed_patterns[i], line); - return; + if (strstr(line, rules->allowed_patterns[i]) != NULL) + { + if (print_allowed_pattern) + printf("%15s matched allowed pattern \"%s\": %s\n", ip, rules->allowed_patterns[i], line); + return; + } } - } for (int i = 0; rules->patterns[i] != NULL; i++) { @@ -269,11 +351,14 @@ void process_line(char* line, void* cbData) { rules->banned_ip_size *= 2; rules->banned_ips = grow_string_array(rules->banned_ips, rules->banned_ip_size); + rules->banned_ip_hits = grow_int_array(rules->banned_ip_hits, rules->banned_ip_size); } - rules->banned_ips[rules->banned_ip_count] = ip; rules->banned_ips[rules->banned_ip_count][strcspn(rules->banned_ips[rules->banned_ip_count], "\n")] = 0; rules->banned_ips[rules->banned_ip_count + 1] = nullptr; + rules->banned_ips[rules->banned_ip_count] = ip; + rules->banned_ip_hits[rules->banned_ip_count]++; + fprintf(stderr, "DBG: %s -> %d hits\n", rules->banned_ips[rules->banned_ip_count], rules->banned_ip_hits[rules->banned_ip_count]); rules->banned_ip_count++; if (print_bans) printf("[Bans=%6lu] %15s matched pattern %4d (\"%s\"): %s\n", rules->banned_ip_count, ip, i, rules->patterns[i], line); @@ -342,6 +427,12 @@ int main(int argc, char* argv[]) print_only = strstr(argv[i], "=false") == NULL; else if (strstr(argv[i], "--print-progress") != NULL) print_progress = strstr(argv[i], "=false") == NULL; + else if (strstr(argv[i], "--check-allowed-ip") != NULL) + check_allowed_ip = strstr(argv[i], "=false") == NULL; + else if (strstr(argv[i], "--check-allowed-pattern") != NULL) + check_allowed_pattern = strstr(argv[i], "=false") == NULL; + else if (strstr(argv[i], "--check-already-banned-ip") != NULL) + check_already_banned_ip = strstr(argv[i], "=false") == NULL; else if (strstr(argv[i], "--dump-patterns") != NULL) dump_patterns = true; else if (strcmp(argv[i], "--debug") == 0) @@ -359,7 +450,7 @@ int main(int argc, char* argv[]) printf(" --allowed-ips Path to the allowed IPs file (default: %s)\n", ALLOWED_IPS_FILE); printf(" --allowed-patterns Path to the allowed patterns file (default: %s)\n", ALLOWED_PATTERNS_FILE); printf(" --patterns Path to the patterns file (default: %s)\n", PATTERNS_FILE); - printf(" --access-log Path to the access log file (default: %s)\n", ACCESS_LOG_FILE); + printf(" --access-log Path to the access log file (default: %s)\n", ACCESS_LOG_FILE); printf(" --print-allowed-ip[=false] Print allowed IPs (default: %s)\n", print_allowed_ip ? "true" : "false"); printf(" --print-allowed-pattern[=false] Print allowed patterns (default: %s)\n", print_allowed_pattern ? "true" : "false"); printf(" --print-already-banned-ip[=false] Print already banned IPs (default: %s)\n", print_already_banned_ip ? "true" : "false"); @@ -368,6 +459,9 @@ int main(int argc, char* argv[]) printf(" --print-only[=false] Print only the lines that match the rules (default: %s)\n", print_only ? "true" : "false"); printf(" --print-progress[=false] Print progress while reading file (default: %s)\n", print_progress ? "true" : "false"); printf(" --debug Enable debug mode (prints all information)\n"); + printf(" --check-allowed-ip[=false] Check allowed IPs (default: %s)\n", check_allowed_ip ? "true" : "false"); + printf(" --check-allowed-pattern[=false] Check allowed patterns (default: %s)\n", check_allowed_pattern ? "true" : "false"); + printf(" --check-already-banned-ip[=false] Check already banned IPs (default: %s)\n", check_already_banned_ip ? "true" : "false"); printf(" --dump-patterns Dump pattern table and exit\n"); return 0; } @@ -383,16 +477,24 @@ int main(int argc, char* argv[]) access_log_path = "/dev/stdin"; } - fprintf(stderr, "allowed_patterns_path: %s\n", allowed_patterns_path); - fprintf(stderr, "allowed_ips_path: %s\n", allowed_ips_path); - fprintf(stderr, "patterns_path: %s\n", patterns_path); - fprintf(stderr, "access_log_path: %s\n", access_log_path); + print_bool_opt(print_allowed_ip); + print_bool_opt(print_allowed_pattern); + print_bool_opt(print_already_banned_ip); + print_bool_opt(print_indeterminate); + print_bool_opt(print_bans); + print_bool_opt(print_only); + print_bool_opt(print_progress); - fprintf(stderr, "print_allowed_ip: %hhd\n", print_allowed_ip); - fprintf(stderr, "print_allowed_pattern: %hhd\n", print_allowed_pattern); - fprintf(stderr, "print_already_banned_ip: %hhd\n", print_already_banned_ip); - fprintf(stderr, "print_indeterminate: %hhd\n", print_indeterminate); - fprintf(stderr, "print_bans: %hhd\n", print_bans); + print_bool_opt(check_allowed_ip); + print_bool_opt(check_allowed_pattern); + print_bool_opt(check_already_banned_ip); + + print_bool_opt(dump_patterns); + + print_str_opt(allowed_patterns_path); + print_str_opt(allowed_ips_path); + print_str_opt(patterns_path); + print_str_opt(access_log_path); MatchRules rules; rules.patterns = read_lines(patterns_path); @@ -401,6 +503,7 @@ int main(int argc, char* argv[]) rules.banned_ip_size = 8; rules.banned_ip_count = 0; rules.banned_ips = xmalloc(sizeof(char**) * rules.banned_ip_size); + rules.banned_ip_hits = xmalloc(sizeof(int*) * rules.banned_ip_size); //rules.bannedIps[0] = ; if (dump_patterns) @@ -425,10 +528,16 @@ int main(int argc, char* argv[]) fprintf(stderr, "Banned %lu IPs:\n", rules.banned_ip_count); - for (int i = 0; rules.banned_ips[i] != NULL; i++) + for (int i = 0; i < rules.banned_ip_count; i++) { + printf("[BAN %4d] %s: %d hits\n", i, rules.banned_ips[i], rules.banned_ip_hits[i * sizeof(int)]); free(rules.banned_ips[i]); } + // for (int i = 0; rules.banned_ips[i] != NULL; i++) + // { + // printf("[BANNED IP %4d] %s\n", i, rules.banned_ips[i]); + // free(rules.banned_ips[i]); + // } free(rules.banned_ips); for (int i = 0; rules.patterns[i] != NULL; i++) diff --git a/main.sh b/main.sh index 61853c6..efdcf2a 100755 --- a/main.sh +++ b/main.sh @@ -1,6 +1,6 @@ #! /usr/bin/env sh ./build_patterns.sh -gcc -O0 -ggdb -std=c23 -pie -fPIE main.c || exit 1 +gcc -O0 -ggdb -std=c23 -pie -fPIE -fsanitize=address main.c || exit 1 #sudo valgrind --leak-check=full -s ./a.out rsync -raPz *.c *.txt a.out rory.gay: diff --git a/template/ap_cgit.txt b/template/ap_cgit.txt new file mode 100644 index 0000000..fb4a38f --- /dev/null +++ b/template/ap_cgit.txt @@ -0,0 +1 @@ +GET /cgit. \ No newline at end of file diff --git a/template/ap_emma.txt b/template/ap_emma.txt index 2412087..40d9fc7 100644 --- a/template/ap_emma.txt +++ b/template/ap_emma.txt @@ -2,4 +2,5 @@ GET /server.git/ GET /matrix/thirdparty/nheko.git/ # git: "-" "git/ -"Mozilla/5.0 (compatible; Let's Encrypt validation server; +https://www.letsencrypt.org)" \ No newline at end of file +"Mozilla/5.0 (compatible; Let's Encrypt validation server; +https://www.letsencrypt.org)" +"HEAD / HTTP/1.1" 200 0 "-" "axios/1.9.0" \ No newline at end of file diff --git a/template/ap_matrix.txt b/template/ap_matrix.txt index c1349d9..44a8d51 100644 --- a/template/ap_matrix.txt +++ b/template/ap_matrix.txt @@ -2,4 +2,8 @@ /_matrix/client/ /_matrix/federation/ /_matrix/key -"Synapse (bot; +https://github.com/matrix-org/synapse)" \ No newline at end of file + /_matrix/media +"Synapse (bot; +https://github.com/matrix-org/synapse)" +# RMU: + "https://mru.rory.gay/service-worker.js" + "GET /service-worker.js HTTP/ \ No newline at end of file diff --git a/template/ap_owncast.txt b/template/ap_owncast.txt index 8107640..7046b2a 100644 --- a/template/ap_owncast.txt +++ b/template/ap_owncast.txt @@ -5,4 +5,6 @@ "-" "libmpv" # initiated by... is this too loose? "https://stream.rory.gay/sw.js" - "https://stream.rory.gay/" \ No newline at end of file + "https://stream.rory.gay/" + "GET /api/status HTTP/2.0" 200 + "GET /api/config HTTP/2.0" 200 \ No newline at end of file diff --git a/template/p_emma_unused_domains.txt b/template/p_emma_unused_domains.txt new file mode 100644 index 0000000..f05db76 --- /dev/null +++ b/template/p_emma_unused_domains.txt @@ -0,0 +1,9 @@ +{host="www.awoo.co.in" +{host="awoo.co.in" +{host="lfs.rory.gay" +{host="www.lfs.rory.gay" +{host="www.catgirlsaresexy.com" +{host="catgirlsaresexy.com" +# Why are you doing direct connections? +{host="51.210.113.110" +{host="127.0.0.1" \ No newline at end of file diff --git a/template/p_paths.txt b/template/p_paths.txt index 59cf8d1..31597dd 100644 --- a/template/p_paths.txt +++ b/template/p_paths.txt @@ -437,4 +437,7 @@ /yarn.lock /zabbix/favicon.ico - //recordings/theme/main.css \ No newline at end of file + //recordings/theme/main.css + /Backup.zip + /archive.zip + /GponForm \ No newline at end of file diff --git a/template/p_separate.txt b/template/p_separate.txt index a787b04..0d8c883 100644 --- a/template/p_separate.txt +++ b/template/p_separate.txt @@ -1,4 +1,5 @@ "CONNECT "SSTP_DUPLEX_POST -# Why are you doing direct connections? -{host="51.210.113.110" \ No newline at end of file + / HTTP/2.0" 200 0 "-" "curl/ + / HTTP/1.1" 200 0 "-" "curl/ + / HTTP/1.1" 200 0 "-" "python-httpx/ \ No newline at end of file diff --git a/template/p_user_agent_bot.txt b/template/p_user_agent_bot.txt index c177145..ec366ad 100644 --- a/template/p_user_agent_bot.txt +++ b/template/p_user_agent_bot.txt @@ -15,6 +15,11 @@ meta-externalfetcher/ +https://developer.amazon.com +https://search.brave.com +https://opensiteexplorer.org ++https://awario.com/ ++https://help.seranking.com/en/blex-crawler ++https://app.twingly.com/public-docs/crawler ++http://duckduckgo.com/duckassistbot.html +mailto:fruitsaladisland@gmail.com # Other (mozilla/5.0) Mozilla/5.0 zgrab/ Mozilla/5.0; Keydrop.io/ @@ -38,4 +43,8 @@ ModatScanner/ AppleBot AhrefsBot # Unknown and nondescript - "link_checker/0.1.0" +"link_checker/0.1.0" +"SuperFastScraper/ +ThinkBot/ +Thinkbot/ +oii-research/ \ No newline at end of file diff --git a/template/p_user_agent_likely_fake.txt b/template/p_user_agent_likely_fake.txt index 9e762fe..9dc5661 100644 --- a/template/p_user_agent_likely_fake.txt +++ b/template/p_user_agent_likely_fake.txt @@ -1,3 +1,4 @@ .0.0.0 (Windows; U; Windows NT 10.0) AppleWebKit/ -(Windows; U; Windows NT 11.0) AppleWebKit/ \ No newline at end of file +(Windows; U; Windows NT 11.0) AppleWebKit/ +"Mozilla/5.0 (Windows NT 10.0; Win64; x64)" \ No newline at end of file -- cgit 1.5.1