From fdff462fa41da088bf9eff94d2d3022a0dbdee6a Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sun, 7 Jul 2019 22:46:28 +0200 Subject: [PATCH] Add support for regex filters for whitelisting. Signed-off-by: DL6ER --- src/FTL.h | 2 +- src/database/gravity-db.c | 24 ++++++-- src/dnsmasq_interface.c | 8 ++- src/regex.c | 117 +++++++++++++++++++++----------------- src/regex_r.h | 3 +- test/gravity.db.sql | 29 ++++++++++ 6 files changed, 122 insertions(+), 61 deletions(-) diff --git a/src/FTL.h b/src/FTL.h index 1215e3db..379f0d81 100644 --- a/src/FTL.h +++ b/src/FTL.h @@ -107,7 +107,7 @@ enum { TYPE_A = 1, TYPE_AAAA, TYPE_ANY, TYPE_SRV, TYPE_SOA, TYPE_PTR, TYPE_TXT, enum { REPLY_UNKNOWN, REPLY_NODATA, REPLY_NXDOMAIN, REPLY_CNAME, REPLY_IP, REPLY_DOMAIN, REPLY_RRNAME, REPLY_SERVFAIL, REPLY_REFUSED, REPLY_NOTIMP, REPLY_OTHER }; enum { PRIVACY_SHOW_ALL = 0, PRIVACY_HIDE_DOMAINS, PRIVACY_HIDE_DOMAINS_CLIENTS, PRIVACY_MAXIMUM, PRIVACY_NOSTATS }; enum { MODE_IP, MODE_NX, MODE_NULL, MODE_IP_NODATA_AAAA, MODE_NODATA }; -enum { GRAVITY_LIST, BLACK_LIST, WHITE_LIST, REGEX_LIST, UNKNOWN_LIST }; +enum { GRAVITY_LIST, BLACK_LIST, WHITE_LIST, REGEX_BLACK_LIST, REGEX_WHITE_LIST, UNKNOWN_LIST }; // Use out own memory handling functions that will detect possible errors // and report accordingly in the log. This will make debugging FTL crashs diff --git a/src/database/gravity-db.c b/src/database/gravity-db.c index 9765c593..9b2104e1 100644 --- a/src/database/gravity-db.c +++ b/src/database/gravity-db.c @@ -15,6 +15,8 @@ // global variable counters #include "memory.h" #include "sqlite3.h" +// match_regex() +#include "regex_r.h" // Private variables static sqlite3 *gravity_db = NULL; @@ -113,8 +115,11 @@ bool gravityDB_getTable(const unsigned char list) case BLACK_LIST: querystr = "SELECT domain FROM vw_blacklist;"; break; - case REGEX_LIST: - querystr = "SELECT domain FROM vw_regex;"; + case REGEX_BLACK_LIST: + querystr = "SELECT domain FROM vw_black_regex;"; + break; + case REGEX_WHITE_LIST: + querystr = "SELECT domain FROM vw_black_regex;"; break; default: logg("gravityDB_getTable(%i): Requested list is not known!", list); @@ -201,8 +206,11 @@ int gravityDB_count(const unsigned char list) case WHITE_LIST: querystr = "SELECT COUNT(*) FROM vw_whitelist;"; break; - case REGEX_LIST: - querystr = "SELECT COUNT(*) FROM vw_regex;"; + case REGEX_BLACK_LIST: + querystr = "SELECT COUNT(*) FROM vw_black_regex;"; + break; + case REGEX_WHITE_LIST: + querystr = "SELECT COUNT(*) FROM vw_white_regex;"; break; default: logg("gravityDB_count(%i): Requested list is not known!", list); @@ -289,8 +297,12 @@ bool in_whitelist(const char *domain) // all host parameters to NULL. sqlite3_clear_bindings(whitelist_stmt); - // Return result. + // Return early in case we already have found an exact match. // SELECT EXISTS(...) either returns 0 (false) or 1 (true). - return result == 1; + if(result == 1) + return true; + + // If not: Walk regex-based whitelist filters in addition + return match_regex(domain, REGEX_WHITELIST); } diff --git a/src/dnsmasq_interface.c b/src/dnsmasq_interface.c index 202c7dfe..fe225e83 100644 --- a/src/dnsmasq_interface.c +++ b/src/dnsmasq_interface.c @@ -225,7 +225,8 @@ void _FTL_new_query(const unsigned int flags, const char *name, const struct all // of a specific domain. The logic herein is: // If matched, then compare against whitelist // If in whitelist, negate matched so this function returns: not-to-be-blocked - if(match_regex(domainString) && !in_whitelist(domainString)) + if(match_regex(domainString, REGEX_BLACKLIST) && + !in_whitelist(domainString)) { // We have to block this domain block_single_domain_regex(domainString); @@ -1439,6 +1440,11 @@ static int FTL_table_import(const char *tablename, const unsigned char list, con if(len == 0) continue; + // Do not add gravity or blacklist domains that match + // a regex-based whitelist filter + if(match_regex(domain, REGEX_WHITELIST)) + continue; + // As of here we assume the entry to be valid // Rehash every 1000 valid names if(rhash && ((name_count - cache_size) > 1000)) diff --git a/src/regex.c b/src/regex.c index e6a5c85b..8b36e412 100644 --- a/src/regex.c +++ b/src/regex.c @@ -19,55 +19,57 @@ #include "datastructure.h" #include -static int num_regex; -static regex_t *regex = NULL; -static bool *regexconfigured = NULL; -static char **regexbuffer = NULL; +static int num_regex[2] = { 0 }; +static regex_t *regex[2] = { NULL }; +static bool *regexconfigured[2] = { NULL }; +static char **regexbuffer[2] = { NULL }; -static void log_regex_error(const char *where, const int errcode, const int index) +static const char regextype[2][10] = { "whitelist", "blacklist" }; + +static void log_regex_error(const char *where, const int errcode, const int index, const unsigned char regexid) { // Regex failed for some reason (probably user syntax error) // Get error string and log it - const size_t length = regerror(errcode, ®ex[index], NULL, 0); + const size_t length = regerror(errcode, ®ex[regexid][index], NULL, 0); char *buffer = calloc(length,sizeof(char)); - (void) regerror (errcode, ®ex[index], buffer, length); - logg("ERROR %s regex on line %i: %s (%i)", where, index+1, buffer, errcode); + (void) regerror (errcode, ®ex[regexid][index], buffer, length); + logg("ERROR %s regex %s no. %i: %s (%i)", where, regextype[regexid], index+1, buffer, errcode); free(buffer); } -static bool init_regex(const char *regexin, const int index) +static bool init_regex(const char *regexin, const int index, const unsigned char regexid) { // compile regular expressions into data structures that // can be used with regexec to match against a string - const int errcode = regcomp(®ex[index], regexin, REG_EXTENDED); + const int errcode = regcomp(®ex[regexid][index], regexin, REG_EXTENDED); if(errcode != 0) { - log_regex_error("compiling", errcode, index); + log_regex_error("compiling", errcode, index, regexid); return false; } // Store compiled regex string in buffer if in regex debug mode if(config.debug & DEBUG_REGEX) { - regexbuffer[index] = strdup(regexin); + regexbuffer[regexid][index] = strdup(regexin); } return true; } -bool match_regex(const char *input) +bool match_regex(const char *input, const unsigned char regexid) { bool matched = false; // Start matching timer timer_start(REGEX_TIMER); - for(int index = 0; index < num_regex; index++) + for(int index = 0; index < num_regex[regexid]; index++) { // Only check regex which have been successfully compiled - if(!regexconfigured[index]) + if(!regexconfigured[regexid][index]) continue; // Try to match the compiled regular expression against input - int errcode = regexec(®ex[index], input, 0, NULL, 0); + int errcode = regexec(®ex[regexid][index], input, 0, NULL, 0); if (errcode == 0) { // Match, return true @@ -75,13 +77,13 @@ bool match_regex(const char *input) // Print match message when in regex debug mode if(config.debug & DEBUG_REGEX) - logg("Regex in line %i \"%s\" matches \"%s\"", index+1, regexbuffer[index], input); + logg("Regex %s in line %i \"%s\" matches \"%s\"", regextype[regexid], index+1, regexbuffer[regexid][index], input); break; } else if (errcode != REG_NOMATCH) { // Error, return false afterwards - log_regex_error("matching", errcode, index); + log_regex_error("matching", errcode, index, regexid); break; } } @@ -90,7 +92,7 @@ bool match_regex(const char *input) // Only log evaluation times if they are longer than normal if(elapsed > 10.0) - logg("WARN: Regex evaluation took %.3f msec", elapsed); + logg("WARN: Regex %s evaluation took %.3f msec", regextype[regexid], elapsed); // No match, no error, return false return matched; @@ -112,60 +114,64 @@ void free_regex(void) return; // Disable blocking regex checking and free regex datastructure - for(int index = 0; index < num_regex; index++) + for(int regexid = 0; regexid < 2; regexid++) { - if(regexconfigured[index]) + for(int index = 0; index < num_regex[regexid]; index++) { - regfree(®ex[index]); - - // Also free buffered regex strings if in regex debug mode - if(config.debug & DEBUG_REGEX) + if(regexconfigured[regexid][index]) { - free(regexbuffer[index]); - regexbuffer[index] = NULL; + regfree(®ex[regexid][index]); + + // Also free buffered regex strings if in regex debug mode + if(config.debug & DEBUG_REGEX) + { + free(regexbuffer[regexid][index]); + regexbuffer[regexid][index] = NULL; + } } } + + // Free array with regex datastructure + free(regex[regexid]); + regex[regexid] = NULL; + free(regexconfigured[regexid]); + regexconfigured[regexid] = NULL; + + // Reset counter for number of regex + num_regex[regexid] = 0; } - - // Free array with regex datastructure - free(regex); - regex = NULL; - free(regexconfigured); - regexconfigured = NULL; - - // Reset counter for number of regex - num_regex = 0; } -void read_regex_from_database(void) +static void read_regex_tables(unsigned char regexid) { // Get number of lines in the regex table - num_regex = gravityDB_count(REGEX_LIST); + unsigned char databaseID = regexid == REGEX_BLACKLIST ? REGEX_BLACK_LIST : REGEX_WHITE_LIST; + num_regex[regexid] = gravityDB_count(databaseID); - if(num_regex == 0) + if(num_regex[regexid] == 0) { - logg("INFO: No regex entries found"); + logg("INFO: No regex %s entries found", regextype[regexid]); return; } - else if(num_regex == DB_FAILED) + else if(num_regex[regexid] == DB_FAILED) { - logg("WARN: Database query failed, assuming there are no regex entries"); - num_regex = 0; + logg("WARN: Database query failed, assuming there are no regex %s entries", regextype[regexid]); + num_regex[regexid] = 0; return; } // Allocate memory for regex - regex = calloc(num_regex, sizeof(regex_t)); - regexconfigured = calloc(num_regex, sizeof(bool)); + regex[regexid] = calloc(num_regex[regexid], sizeof(regex_t)); + regexconfigured[regexid] = calloc(num_regex[regexid], sizeof(bool)); // Buffer strings if in regex debug mode if(config.debug & DEBUG_REGEX) - regexbuffer = calloc(num_regex, sizeof(char*)); + regexbuffer[regexid] = calloc(num_regex[regexid], sizeof(char*)); - // Connect to whitelist table - if(!gravityDB_getTable(REGEX_LIST)) + // Connect to regex blacklist table + if(!gravityDB_getTable(databaseID)) { - logg("read_regex_from_database(): Error getting table from database"); + logg("read_regex_from_database(): Error getting regex %s table from database", regextype[regexid]); return; } @@ -176,7 +182,7 @@ void read_regex_from_database(void) { // Avoid buffer overflow if database table changed // since we counted its entries - if(i >= num_regex) + if(i >= num_regex[regexid]) break; // Skip this entry if empty: an empty regex filter would match @@ -188,7 +194,7 @@ void read_regex_from_database(void) continue; // Copy this regex domain into memory - regexconfigured[i] = init_regex(domain, i); + regexconfigured[regexid][i] = init_regex(domain, i, regexid); // Increase counter i++; @@ -198,7 +204,14 @@ void read_regex_from_database(void) gravityDB_finalizeTable(); } +void read_regex_from_database(void) +{ + read_regex_tables(REGEX_BLACKLIST); + read_regex_tables(REGEX_WHITELIST); +} + void log_regex(const double time) { - logg("Compiled %i Regex filters in %.1f msec", num_regex, time); + const int sum_regex = num_regex[0] + num_regex[1]; + logg("Compiled %i Regex filters in %.1f msec", sum_regex, time); } diff --git a/src/regex_r.h b/src/regex_r.h index f3e68a8d..4e751b60 100644 --- a/src/regex_r.h +++ b/src/regex_r.h @@ -10,11 +10,12 @@ #ifndef REGEX_H #define REGEX_H -bool match_regex(const char *input); +bool match_regex(const char *input, const unsigned char regexid); void free_regex(void); void read_regex_from_database(void); void log_regex(const double time); enum { REGEX_UNKNOWN, REGEX_BLOCKED, REGEX_NOTBLOCKED }; +enum { REGEX_BLACKLIST, REGEX_WHITELIST }; #endif //REGEX_H diff --git a/test/gravity.db.sql b/test/gravity.db.sql index 00cb6a9a..f6925c67 100644 --- a/test/gravity.db.sql +++ b/test/gravity.db.sql @@ -60,6 +60,23 @@ CREATE TABLE regex_by_group PRIMARY KEY (regex_id, group_id) ); +CREATE TABLE whitelist_regex +( + id INTEGER PRIMARY KEY AUTOINCREMENT, + domain TEXT UNIQUE NOT NULL, + enabled BOOLEAN NOT NULL DEFAULT 1, + date_added INTEGER NOT NULL DEFAULT (cast(strftime('%s', 'now') as int)), + date_modified INTEGER NOT NULL DEFAULT (cast(strftime('%s', 'now') as int)), + comment TEXT +); + +CREATE TABLE whitelist_regex_by_group +( + whitelist_regex_id INTEGER NOT NULL REFERENCES whitelist_regex (id), + group_id INTEGER NOT NULL REFERENCES "group" (id), + PRIMARY KEY (whitelist_regex_id, group_id) +); + CREATE TABLE adlist ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -129,6 +146,18 @@ CREATE TRIGGER tr_regex_update AFTER UPDATE ON regex UPDATE regex SET date_modified = (cast(strftime('%s', 'now') as int)) WHERE domain = NEW.domain; END; +CREATE VIEW vw_regex_whitelist AS SELECT DISTINCT domain + FROM regex_whitelist + LEFT JOIN regex_whitelist_by_group ON regex_whitelist_by_group.regex_whitelist_id = regex_whitelist.id + LEFT JOIN "group" ON "group".id = regex_whitelist_by_group.group_id + WHERE regex_whitelist.enabled = 1 AND (regex_whitelist_by_group.group_id IS NULL OR "group".enabled = 1) + ORDER BY regex_whitelist.id; + +CREATE TRIGGER tr_regex_whitelist_update AFTER UPDATE ON regex_whitelist + BEGIN + UPDATE regex_whitelist SET date_modified = (cast(strftime('%s', 'now') as int)) WHERE domain = NEW.domain; + END; + CREATE VIEW vw_adlist AS SELECT DISTINCT address FROM adlist LEFT JOIN adlist_by_group ON adlist_by_group.adlist_id = adlist.id