Add support for regex filters for whitelisting.

Signed-off-by: DL6ER <dl6er@dl6er.de>
This commit is contained in:
DL6ER
2019-07-07 22:46:28 +02:00
parent ac1018aa2a
commit fdff462fa4
6 changed files with 122 additions and 61 deletions

View File

@@ -107,7 +107,7 @@ enum { TYPE_A = 1, TYPE_AAAA, TYPE_ANY, TYPE_SRV, TYPE_SOA, TYPE_PTR, TYPE_TXT,
enum { REPLY_UNKNOWN, REPLY_NODATA, REPLY_NXDOMAIN, REPLY_CNAME, REPLY_IP, REPLY_DOMAIN, REPLY_RRNAME, REPLY_SERVFAIL, REPLY_REFUSED, REPLY_NOTIMP, REPLY_OTHER };
enum { PRIVACY_SHOW_ALL = 0, PRIVACY_HIDE_DOMAINS, PRIVACY_HIDE_DOMAINS_CLIENTS, PRIVACY_MAXIMUM, PRIVACY_NOSTATS };
enum { MODE_IP, MODE_NX, MODE_NULL, MODE_IP_NODATA_AAAA, MODE_NODATA };
enum { GRAVITY_LIST, BLACK_LIST, WHITE_LIST, REGEX_LIST, UNKNOWN_LIST };
enum { GRAVITY_LIST, BLACK_LIST, WHITE_LIST, REGEX_BLACK_LIST, REGEX_WHITE_LIST, UNKNOWN_LIST };
// Use out own memory handling functions that will detect possible errors
// and report accordingly in the log. This will make debugging FTL crashs

View File

@@ -15,6 +15,8 @@
// global variable counters
#include "memory.h"
#include "sqlite3.h"
// match_regex()
#include "regex_r.h"
// Private variables
static sqlite3 *gravity_db = NULL;
@@ -113,8 +115,11 @@ bool gravityDB_getTable(const unsigned char list)
case BLACK_LIST:
querystr = "SELECT domain FROM vw_blacklist;";
break;
case REGEX_LIST:
querystr = "SELECT domain FROM vw_regex;";
case REGEX_BLACK_LIST:
querystr = "SELECT domain FROM vw_black_regex;";
break;
case REGEX_WHITE_LIST:
querystr = "SELECT domain FROM vw_black_regex;";
break;
default:
logg("gravityDB_getTable(%i): Requested list is not known!", list);
@@ -201,8 +206,11 @@ int gravityDB_count(const unsigned char list)
case WHITE_LIST:
querystr = "SELECT COUNT(*) FROM vw_whitelist;";
break;
case REGEX_LIST:
querystr = "SELECT COUNT(*) FROM vw_regex;";
case REGEX_BLACK_LIST:
querystr = "SELECT COUNT(*) FROM vw_black_regex;";
break;
case REGEX_WHITE_LIST:
querystr = "SELECT COUNT(*) FROM vw_white_regex;";
break;
default:
logg("gravityDB_count(%i): Requested list is not known!", list);
@@ -289,8 +297,12 @@ bool in_whitelist(const char *domain)
// all host parameters to NULL.
sqlite3_clear_bindings(whitelist_stmt);
// Return result.
// Return early in case we already have found an exact match.
// SELECT EXISTS(...) either returns 0 (false) or 1 (true).
return result == 1;
if(result == 1)
return true;
// If not: Walk regex-based whitelist filters in addition
return match_regex(domain, REGEX_WHITELIST);
}

View File

@@ -225,7 +225,8 @@ void _FTL_new_query(const unsigned int flags, const char *name, const struct all
// of a specific domain. The logic herein is:
// If matched, then compare against whitelist
// If in whitelist, negate matched so this function returns: not-to-be-blocked
if(match_regex(domainString) && !in_whitelist(domainString))
if(match_regex(domainString, REGEX_BLACKLIST) &&
!in_whitelist(domainString))
{
// We have to block this domain
block_single_domain_regex(domainString);
@@ -1439,6 +1440,11 @@ static int FTL_table_import(const char *tablename, const unsigned char list, con
if(len == 0)
continue;
// Do not add gravity or blacklist domains that match
// a regex-based whitelist filter
if(match_regex(domain, REGEX_WHITELIST))
continue;
// As of here we assume the entry to be valid
// Rehash every 1000 valid names
if(rhash && ((name_count - cache_size) > 1000))

View File

@@ -19,55 +19,57 @@
#include "datastructure.h"
#include <regex.h>
static int num_regex;
static regex_t *regex = NULL;
static bool *regexconfigured = NULL;
static char **regexbuffer = NULL;
static int num_regex[2] = { 0 };
static regex_t *regex[2] = { NULL };
static bool *regexconfigured[2] = { NULL };
static char **regexbuffer[2] = { NULL };
static void log_regex_error(const char *where, const int errcode, const int index)
static const char regextype[2][10] = { "whitelist", "blacklist" };
static void log_regex_error(const char *where, const int errcode, const int index, const unsigned char regexid)
{
// Regex failed for some reason (probably user syntax error)
// Get error string and log it
const size_t length = regerror(errcode, &regex[index], NULL, 0);
const size_t length = regerror(errcode, &regex[regexid][index], NULL, 0);
char *buffer = calloc(length,sizeof(char));
(void) regerror (errcode, &regex[index], buffer, length);
logg("ERROR %s regex on line %i: %s (%i)", where, index+1, buffer, errcode);
(void) regerror (errcode, &regex[regexid][index], buffer, length);
logg("ERROR %s regex %s no. %i: %s (%i)", where, regextype[regexid], index+1, buffer, errcode);
free(buffer);
}
static bool init_regex(const char *regexin, const int index)
static bool init_regex(const char *regexin, const int index, const unsigned char regexid)
{
// compile regular expressions into data structures that
// can be used with regexec to match against a string
const int errcode = regcomp(&regex[index], regexin, REG_EXTENDED);
const int errcode = regcomp(&regex[regexid][index], regexin, REG_EXTENDED);
if(errcode != 0)
{
log_regex_error("compiling", errcode, index);
log_regex_error("compiling", errcode, index, regexid);
return false;
}
// Store compiled regex string in buffer if in regex debug mode
if(config.debug & DEBUG_REGEX)
{
regexbuffer[index] = strdup(regexin);
regexbuffer[regexid][index] = strdup(regexin);
}
return true;
}
bool match_regex(const char *input)
bool match_regex(const char *input, const unsigned char regexid)
{
bool matched = false;
// Start matching timer
timer_start(REGEX_TIMER);
for(int index = 0; index < num_regex; index++)
for(int index = 0; index < num_regex[regexid]; index++)
{
// Only check regex which have been successfully compiled
if(!regexconfigured[index])
if(!regexconfigured[regexid][index])
continue;
// Try to match the compiled regular expression against input
int errcode = regexec(&regex[index], input, 0, NULL, 0);
int errcode = regexec(&regex[regexid][index], input, 0, NULL, 0);
if (errcode == 0)
{
// Match, return true
@@ -75,13 +77,13 @@ bool match_regex(const char *input)
// Print match message when in regex debug mode
if(config.debug & DEBUG_REGEX)
logg("Regex in line %i \"%s\" matches \"%s\"", index+1, regexbuffer[index], input);
logg("Regex %s in line %i \"%s\" matches \"%s\"", regextype[regexid], index+1, regexbuffer[regexid][index], input);
break;
}
else if (errcode != REG_NOMATCH)
{
// Error, return false afterwards
log_regex_error("matching", errcode, index);
log_regex_error("matching", errcode, index, regexid);
break;
}
}
@@ -90,7 +92,7 @@ bool match_regex(const char *input)
// Only log evaluation times if they are longer than normal
if(elapsed > 10.0)
logg("WARN: Regex evaluation took %.3f msec", elapsed);
logg("WARN: Regex %s evaluation took %.3f msec", regextype[regexid], elapsed);
// No match, no error, return false
return matched;
@@ -112,60 +114,64 @@ void free_regex(void)
return;
// Disable blocking regex checking and free regex datastructure
for(int index = 0; index < num_regex; index++)
for(int regexid = 0; regexid < 2; regexid++)
{
if(regexconfigured[index])
for(int index = 0; index < num_regex[regexid]; index++)
{
regfree(&regex[index]);
// Also free buffered regex strings if in regex debug mode
if(config.debug & DEBUG_REGEX)
if(regexconfigured[regexid][index])
{
free(regexbuffer[index]);
regexbuffer[index] = NULL;
regfree(&regex[regexid][index]);
// Also free buffered regex strings if in regex debug mode
if(config.debug & DEBUG_REGEX)
{
free(regexbuffer[regexid][index]);
regexbuffer[regexid][index] = NULL;
}
}
}
// Free array with regex datastructure
free(regex[regexid]);
regex[regexid] = NULL;
free(regexconfigured[regexid]);
regexconfigured[regexid] = NULL;
// Reset counter for number of regex
num_regex[regexid] = 0;
}
// Free array with regex datastructure
free(regex);
regex = NULL;
free(regexconfigured);
regexconfigured = NULL;
// Reset counter for number of regex
num_regex = 0;
}
void read_regex_from_database(void)
static void read_regex_tables(unsigned char regexid)
{
// Get number of lines in the regex table
num_regex = gravityDB_count(REGEX_LIST);
unsigned char databaseID = regexid == REGEX_BLACKLIST ? REGEX_BLACK_LIST : REGEX_WHITE_LIST;
num_regex[regexid] = gravityDB_count(databaseID);
if(num_regex == 0)
if(num_regex[regexid] == 0)
{
logg("INFO: No regex entries found");
logg("INFO: No regex %s entries found", regextype[regexid]);
return;
}
else if(num_regex == DB_FAILED)
else if(num_regex[regexid] == DB_FAILED)
{
logg("WARN: Database query failed, assuming there are no regex entries");
num_regex = 0;
logg("WARN: Database query failed, assuming there are no regex %s entries", regextype[regexid]);
num_regex[regexid] = 0;
return;
}
// Allocate memory for regex
regex = calloc(num_regex, sizeof(regex_t));
regexconfigured = calloc(num_regex, sizeof(bool));
regex[regexid] = calloc(num_regex[regexid], sizeof(regex_t));
regexconfigured[regexid] = calloc(num_regex[regexid], sizeof(bool));
// Buffer strings if in regex debug mode
if(config.debug & DEBUG_REGEX)
regexbuffer = calloc(num_regex, sizeof(char*));
regexbuffer[regexid] = calloc(num_regex[regexid], sizeof(char*));
// Connect to whitelist table
if(!gravityDB_getTable(REGEX_LIST))
// Connect to regex blacklist table
if(!gravityDB_getTable(databaseID))
{
logg("read_regex_from_database(): Error getting table from database");
logg("read_regex_from_database(): Error getting regex %s table from database", regextype[regexid]);
return;
}
@@ -176,7 +182,7 @@ void read_regex_from_database(void)
{
// Avoid buffer overflow if database table changed
// since we counted its entries
if(i >= num_regex)
if(i >= num_regex[regexid])
break;
// Skip this entry if empty: an empty regex filter would match
@@ -188,7 +194,7 @@ void read_regex_from_database(void)
continue;
// Copy this regex domain into memory
regexconfigured[i] = init_regex(domain, i);
regexconfigured[regexid][i] = init_regex(domain, i, regexid);
// Increase counter
i++;
@@ -198,7 +204,14 @@ void read_regex_from_database(void)
gravityDB_finalizeTable();
}
void read_regex_from_database(void)
{
read_regex_tables(REGEX_BLACKLIST);
read_regex_tables(REGEX_WHITELIST);
}
void log_regex(const double time)
{
logg("Compiled %i Regex filters in %.1f msec", num_regex, time);
const int sum_regex = num_regex[0] + num_regex[1];
logg("Compiled %i Regex filters in %.1f msec", sum_regex, time);
}

View File

@@ -10,11 +10,12 @@
#ifndef REGEX_H
#define REGEX_H
bool match_regex(const char *input);
bool match_regex(const char *input, const unsigned char regexid);
void free_regex(void);
void read_regex_from_database(void);
void log_regex(const double time);
enum { REGEX_UNKNOWN, REGEX_BLOCKED, REGEX_NOTBLOCKED };
enum { REGEX_BLACKLIST, REGEX_WHITELIST };
#endif //REGEX_H

View File

@@ -60,6 +60,23 @@ CREATE TABLE regex_by_group
PRIMARY KEY (regex_id, group_id)
);
CREATE TABLE whitelist_regex
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
domain TEXT UNIQUE NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT 1,
date_added INTEGER NOT NULL DEFAULT (cast(strftime('%s', 'now') as int)),
date_modified INTEGER NOT NULL DEFAULT (cast(strftime('%s', 'now') as int)),
comment TEXT
);
CREATE TABLE whitelist_regex_by_group
(
whitelist_regex_id INTEGER NOT NULL REFERENCES whitelist_regex (id),
group_id INTEGER NOT NULL REFERENCES "group" (id),
PRIMARY KEY (whitelist_regex_id, group_id)
);
CREATE TABLE adlist
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -129,6 +146,18 @@ CREATE TRIGGER tr_regex_update AFTER UPDATE ON regex
UPDATE regex SET date_modified = (cast(strftime('%s', 'now') as int)) WHERE domain = NEW.domain;
END;
CREATE VIEW vw_regex_whitelist AS SELECT DISTINCT domain
FROM regex_whitelist
LEFT JOIN regex_whitelist_by_group ON regex_whitelist_by_group.regex_whitelist_id = regex_whitelist.id
LEFT JOIN "group" ON "group".id = regex_whitelist_by_group.group_id
WHERE regex_whitelist.enabled = 1 AND (regex_whitelist_by_group.group_id IS NULL OR "group".enabled = 1)
ORDER BY regex_whitelist.id;
CREATE TRIGGER tr_regex_whitelist_update AFTER UPDATE ON regex_whitelist
BEGIN
UPDATE regex_whitelist SET date_modified = (cast(strftime('%s', 'now') as int)) WHERE domain = NEW.domain;
END;
CREATE VIEW vw_adlist AS SELECT DISTINCT address
FROM adlist
LEFT JOIN adlist_by_group ON adlist_by_group.adlist_id = adlist.id