mirror of
https://github.com/pi-hole/dnsmasq.git
synced 2025-12-20 02:38:32 +00:00
Improve performance when reading large hostfiles.
This commit is contained in:
86
src/cache.c
86
src/cache.c
@@ -635,11 +635,12 @@ struct crec *cache_find_by_addr(struct crec *crecp, struct all_addr *addr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void add_hosts_entry(struct crec *cache, struct all_addr *addr, int addrlen,
|
static void add_hosts_entry(struct crec *cache, struct all_addr *addr, int addrlen,
|
||||||
unsigned short flags, int index, int addr_dup)
|
unsigned short flags, int index, struct crec **rhash)
|
||||||
{
|
{
|
||||||
struct crec *lookup = cache_find_by_name(NULL, cache->name.sname, 0, flags & (F_IPV4 | F_IPV6));
|
struct crec *lookup = cache_find_by_name(NULL, cache->name.sname, 0, flags & (F_IPV4 | F_IPV6));
|
||||||
int i, nameexists = 0;
|
int i, nameexists = 0;
|
||||||
struct cname *a;
|
struct cname *a;
|
||||||
|
unsigned int j;
|
||||||
|
|
||||||
/* Remove duplicates in hosts files. */
|
/* Remove duplicates in hosts files. */
|
||||||
if (lookup && (lookup->flags & F_HOSTS))
|
if (lookup && (lookup->flags & F_HOSTS))
|
||||||
@@ -653,33 +654,41 @@ static void add_hosts_entry(struct crec *cache, struct all_addr *addr, int addrl
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Ensure there is only one address -> name mapping (first one trumps)
|
/* Ensure there is only one address -> name mapping (first one trumps)
|
||||||
We do this by steam here, first we see if the address is the same as
|
We do this by steam here, The entries are kept in hash chains, linked
|
||||||
the last one we saw, which eliminates most in the case of an ad-block
|
by ->next (which is unused at this point) held in hash buckets in
|
||||||
file with thousands of entries for the same address.
|
the array rhash. Note that rhash and the values in ->next are only valid
|
||||||
Then we search and bail at the first matching address that came from
|
whilst reading hosts files: the buckets are then freed, and the
|
||||||
a HOSTS file. Since the first host entry gets reverse, we know
|
->next pointer used for other things.
|
||||||
then that it must exist without searching exhaustively for it. */
|
|
||||||
|
|
||||||
if (addr_dup)
|
We search and bail at the first matching address that came from
|
||||||
flags &= ~F_REVERSE;
|
a HOSTS file. Since the first host entry gets reverse, we know
|
||||||
else
|
then that it must exist without searching exhaustively for it.
|
||||||
for (i=0; i<hash_size; i++)
|
|
||||||
|
This complexity avoids O(n^2) divergent CPU use whilst reading
|
||||||
|
large (10000 entry) hosts files. */
|
||||||
|
|
||||||
|
/* hash address */
|
||||||
|
for (j = 0, i = 0; i < addrlen; i++)
|
||||||
|
j += ((unsigned char *)addr)[i] + (j << 6) + (j << 16) - j;
|
||||||
|
|
||||||
|
for (lookup = rhash[j % RHASHSIZE]; lookup; lookup = lookup->next)
|
||||||
|
if ((lookup->flags & F_HOSTS) &&
|
||||||
|
(lookup->flags & flags & (F_IPV4 | F_IPV6)) &&
|
||||||
|
memcmp(&lookup->addr.addr, addr, addrlen) == 0)
|
||||||
{
|
{
|
||||||
for (lookup = hash_table[i]; lookup; lookup = lookup->hash_next)
|
flags &= ~F_REVERSE;
|
||||||
if ((lookup->flags & F_HOSTS) &&
|
break;
|
||||||
(lookup->flags & flags & (F_IPV4 | F_IPV6)) &&
|
|
||||||
memcmp(&lookup->addr.addr, addr, addrlen) == 0)
|
|
||||||
{
|
|
||||||
flags &= ~F_REVERSE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (lookup)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cache->flags = flags;
|
cache->flags = flags;
|
||||||
cache->uid = index;
|
cache->uid = index;
|
||||||
|
|
||||||
|
/* maintain address has chain */
|
||||||
|
cache->next = rhash[j % RHASHSIZE];
|
||||||
|
rhash[j % RHASHSIZE] = cache;
|
||||||
|
|
||||||
memcpy(&cache->addr.addr, addr, addrlen);
|
memcpy(&cache->addr.addr, addr, addrlen);
|
||||||
|
|
||||||
cache_hash(cache);
|
cache_hash(cache);
|
||||||
|
|
||||||
/* don't need to do alias stuff for second and subsequent addresses. */
|
/* don't need to do alias stuff for second and subsequent addresses. */
|
||||||
@@ -743,14 +752,14 @@ static int gettok(FILE *f, char *token)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int read_hostsfile(char *filename, int index, int cache_size)
|
static int read_hostsfile(char *filename, int index, int cache_size, struct crec **rhash)
|
||||||
{
|
{
|
||||||
FILE *f = fopen(filename, "r");
|
FILE *f = fopen(filename, "r");
|
||||||
char *token = daemon->namebuff, *domain_suffix = NULL;
|
char *token = daemon->namebuff, *domain_suffix = NULL;
|
||||||
int addr_count = 0, name_count = cache_size, lineno = 0;
|
int addr_count = 0, name_count = cache_size, lineno = 0;
|
||||||
unsigned short flags = 0, saved_flags = 0;
|
unsigned short flags = 0;
|
||||||
struct all_addr addr, saved_addr;
|
struct all_addr addr;
|
||||||
int atnl, addrlen = 0, addr_dup;
|
int atnl, addrlen = 0;
|
||||||
|
|
||||||
if (!f)
|
if (!f)
|
||||||
{
|
{
|
||||||
@@ -762,7 +771,6 @@ static int read_hostsfile(char *filename, int index, int cache_size)
|
|||||||
|
|
||||||
while ((atnl = gettok(f, token)) != EOF)
|
while ((atnl = gettok(f, token)) != EOF)
|
||||||
{
|
{
|
||||||
addr_dup = 0;
|
|
||||||
lineno++;
|
lineno++;
|
||||||
|
|
||||||
#ifdef HAVE_IPV6
|
#ifdef HAVE_IPV6
|
||||||
@@ -794,14 +802,6 @@ static int read_hostsfile(char *filename, int index, int cache_size)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (saved_flags == flags && memcmp(&addr, &saved_addr, addrlen) == 0)
|
|
||||||
addr_dup = 1;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
saved_flags = flags;
|
|
||||||
saved_addr = addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
addr_count++;
|
addr_count++;
|
||||||
|
|
||||||
/* rehash every 1000 names. */
|
/* rehash every 1000 names. */
|
||||||
@@ -832,14 +832,13 @@ static int read_hostsfile(char *filename, int index, int cache_size)
|
|||||||
strcpy(cache->name.sname, canon);
|
strcpy(cache->name.sname, canon);
|
||||||
strcat(cache->name.sname, ".");
|
strcat(cache->name.sname, ".");
|
||||||
strcat(cache->name.sname, domain_suffix);
|
strcat(cache->name.sname, domain_suffix);
|
||||||
add_hosts_entry(cache, &addr, addrlen, flags, index, addr_dup);
|
add_hosts_entry(cache, &addr, addrlen, flags, index, rhash);
|
||||||
addr_dup = 1;
|
|
||||||
name_count++;
|
name_count++;
|
||||||
}
|
}
|
||||||
if ((cache = whine_malloc(sizeof(struct crec) + strlen(canon)+1-SMALLDNAME)))
|
if ((cache = whine_malloc(sizeof(struct crec) + strlen(canon)+1-SMALLDNAME)))
|
||||||
{
|
{
|
||||||
strcpy(cache->name.sname, canon);
|
strcpy(cache->name.sname, canon);
|
||||||
add_hosts_entry(cache, &addr, addrlen, flags, index, addr_dup);
|
add_hosts_entry(cache, &addr, addrlen, flags, index, rhash);
|
||||||
name_count++;
|
name_count++;
|
||||||
}
|
}
|
||||||
free(canon);
|
free(canon);
|
||||||
@@ -863,6 +862,7 @@ void cache_reload(void)
|
|||||||
struct crec *cache, **up, *tmp;
|
struct crec *cache, **up, *tmp;
|
||||||
int i, total_size = daemon->cachesize;
|
int i, total_size = daemon->cachesize;
|
||||||
struct hostsfile *ah;
|
struct hostsfile *ah;
|
||||||
|
struct crec **reverse_hash;
|
||||||
|
|
||||||
cache_inserted = cache_live_freed = 0;
|
cache_inserted = cache_live_freed = 0;
|
||||||
|
|
||||||
@@ -896,13 +896,21 @@ void cache_reload(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!(reverse_hash = whine_malloc(sizeof(struct crec *) * RHASHSIZE)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < RHASHSIZE; i++)
|
||||||
|
reverse_hash[i] = NULL;
|
||||||
|
|
||||||
if (!option_bool(OPT_NO_HOSTS))
|
if (!option_bool(OPT_NO_HOSTS))
|
||||||
total_size = read_hostsfile(HOSTSFILE, 0, total_size);
|
total_size = read_hostsfile(HOSTSFILE, 0, total_size, reverse_hash);
|
||||||
|
|
||||||
daemon->addn_hosts = expand_filelist(daemon->addn_hosts);
|
daemon->addn_hosts = expand_filelist(daemon->addn_hosts);
|
||||||
for (ah = daemon->addn_hosts; ah; ah = ah->next)
|
for (ah = daemon->addn_hosts; ah; ah = ah->next)
|
||||||
if (!(ah->flags & AH_INACTIVE))
|
if (!(ah->flags & AH_INACTIVE))
|
||||||
total_size = read_hostsfile(ah->fname, ah->index, total_size);
|
total_size = read_hostsfile(ah->fname, ah->index, total_size, reverse_hash);
|
||||||
|
|
||||||
|
free(reverse_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *get_domain(struct in_addr addr)
|
char *get_domain(struct in_addr addr)
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define FTABSIZ 150 /* max number of outstanding requests (default) */
|
#define FTABSIZ 150 /* max number of outstanding requests (default) */
|
||||||
|
#define RHASHSIZE 1024 /* hash buckets for address lookup during hostfile read */
|
||||||
#define MAX_PROCS 20 /* max no children for TCP requests */
|
#define MAX_PROCS 20 /* max no children for TCP requests */
|
||||||
#define CHILD_LIFETIME 150 /* secs 'till terminated (RFC1035 suggests > 120s) */
|
#define CHILD_LIFETIME 150 /* secs 'till terminated (RFC1035 suggests > 120s) */
|
||||||
#define EDNS_PKTSZ 4096 /* default max EDNS.0 UDP packet from RFC5625 */
|
#define EDNS_PKTSZ 4096 /* default max EDNS.0 UDP packet from RFC5625 */
|
||||||
|
|||||||
Reference in New Issue
Block a user