Revise resource handling for number of concurrent DNS queries.

This used to have a global limit, but that has a problem when using
different servers for different upstream domains. Queries which are
routed by domain to an upstream server which is not responding will
build up and trigger the limit, which breaks DNS service for all other
domains which could be handled by other servers. The change is to make
the limit per server-group, where a server group is the set of servers
configured for a particular domain. In the common case, where only
default servers are declared, there is no effective change.
This commit is contained in:
Simon Kelley
2021-06-13 21:29:22 +01:00
parent 4a6550d69a
commit 3236f358f8
7 changed files with 87 additions and 120 deletions

View File

@@ -23,7 +23,7 @@
#define SAFE_PKTSZ 1280 /* "go anywhere" UDP packet size */
#define KEYBLOCK_LEN 40 /* choose to minimise fragmentation when storing DNSSEC keys */
#define DNSSEC_WORK 50 /* Max number of queries to validate one question */
#define TIMEOUT 10 /* drop UDP queries after TIMEOUT seconds */
#define TIMEOUT 10 /* drop UDP queries after TIMEOUT seconds */
#define FORWARD_TEST 50 /* try all servers every 50 queries */
#define FORWARD_TIME 20 /* or 20 seconds */
#define UDP_TEST_TIME 60 /* How often to reset our idea of max packet size. */

View File

@@ -24,7 +24,7 @@ struct daemon *daemon;
static volatile pid_t pid = 0;
static volatile int pipewrite;
static int set_dns_listeners(time_t now);
static void set_dns_listeners(void);
static void check_dns_listeners(time_t now);
static void sig_handler(int sig);
static void async_event(int pipe, time_t now);
@@ -1042,16 +1042,10 @@ int main (int argc, char **argv)
while (1)
{
int t, timeout = -1;
int timeout = -1;
poll_reset();
/* if we are out of resources, find how long we have to wait
for some to come free, we'll loop around then and restart
listening for queries */
if ((t = set_dns_listeners(now)) != 0)
timeout = t * 1000;
/* Whilst polling for the dbus, or doing a tftp transfer, wake every quarter second */
if (daemon->tftp_trans ||
(option_bool(OPT_DBUS) && !daemon->dbus))
@@ -1061,6 +1055,8 @@ int main (int argc, char **argv)
else if (is_dad_listeners())
timeout = 1000;
set_dns_listeners();
#ifdef HAVE_DBUS
set_dbus_listeners();
#endif
@@ -1685,12 +1681,12 @@ void clear_cache_and_reload(time_t now)
#endif
}
static int set_dns_listeners(time_t now)
static void set_dns_listeners(void)
{
struct serverfd *serverfdp;
struct listener *listener;
struct randfd_list *rfl;
int wait = 0, i;
int i;
#ifdef HAVE_TFTP
int tftp = 0;
@@ -1703,10 +1699,6 @@ static int set_dns_listeners(time_t now)
}
#endif
/* will we be able to get memory? */
if (daemon->port != 0)
get_new_frec(now, &wait, NULL);
for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next)
poll_listen(serverfdp->fd, POLLIN);
@@ -1725,10 +1717,9 @@ static int set_dns_listeners(time_t now)
for (listener = daemon->listeners; listener; listener = listener->next)
{
/* only listen for queries if we have resources */
if (listener->fd != -1 && wait == 0)
if (listener->fd != -1)
poll_listen(listener->fd, POLLIN);
/* Only listen for TCP connections when a process slot
is available. Death of a child goes through the select loop, so
we don't need to explicitly arrange to wake up here,
@@ -1741,15 +1732,12 @@ static int set_dns_listeners(time_t now)
if (tftp <= daemon->tftp_max && listener->tftpfd != -1)
poll_listen(listener->tftpfd, POLLIN);
#endif
}
if (!option_bool(OPT_DEBUG))
for (i = 0; i < MAX_PROCS; i++)
if (daemon->tcp_pipes[i] != -1)
poll_listen(daemon->tcp_pipes[i], POLLIN);
return wait;
}
static void check_dns_listeners(time_t now)
@@ -2100,7 +2088,7 @@ int delay_dhcp(time_t start, int sec, int fd, uint32_t addr, unsigned short id)
poll_reset();
if (fd != -1)
poll_listen(fd, POLLIN);
set_dns_listeners(now);
set_dns_listeners();
set_log_writer();
#ifdef HAVE_DHCP6

View File

@@ -1390,7 +1390,6 @@ void receive_query(struct listener *listen, time_t now);
unsigned char *tcp_request(int confd, time_t now,
union mysockaddr *local_addr, struct in_addr netmask, int auth_dns);
void server_gone(struct server *server);
struct frec *get_new_frec(time_t now, int *wait, struct frec *force);
int send_from(int fd, int nowild, char *packet, size_t len,
union mysockaddr *to, union all_addr *source,
unsigned int iface);
@@ -1728,6 +1727,7 @@ int filter_servers(int seed, int flags, int *lowout, int *highout);
int is_local_answer(time_t now, int first, char *name);
size_t make_local_answer(int flags, int gotname, size_t size, struct dns_header *header,
char *name, int first, int last);
int server_samegroup(struct server *a, struct server *b);
#ifdef HAVE_DNSSEC
int dnssec_server(struct server *server, char *keyname, int *firstp, int *lastp);
#endif

View File

@@ -195,6 +195,11 @@ int lookup_domain(char *qdomain, int flags, int *lowout, int *highout)
return 1;
}
/* Return first server in group of equivalent servers; this is the "master" record. */
int server_samegroup(struct server *a, struct server *b)
{
return order_servers(a, b) == 0;
}
int filter_servers(int seed, int flags, int *lowout, int *highout)
{

View File

@@ -16,12 +16,13 @@
#include "dnsmasq.h"
static struct frec *get_new_frec(time_t now, struct server *serv, struct frec *force);
static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp);
static struct frec *lookup_frec_by_query(void *hash, unsigned int flags);
static unsigned short get_id(void);
static void free_frec(struct frec *f);
static void query_full(time_t now);
static void query_full(time_t now, char *domain);
/* Send a UDP packet with its source address set as "source"
unless nowild is true, when we just send it with the kernel default */
@@ -219,7 +220,7 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
/* If we've been spammed with many duplicates, return REFUSED. */
if (!daemon->free_frec_src)
{
query_full(now);
query_full(now, NULL);
goto reply;
}
@@ -242,7 +243,7 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
}
}
/* retry existing query */
/* new query */
if (!forward)
{
/* new query */
@@ -269,7 +270,7 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
master = daemon->serverarray[first];
if (!(forward = get_new_frec(now, NULL, NULL)))
if (!(forward = get_new_frec(now, master, NULL)))
goto reply;
/* table full - flags == 0, return REFUSED */
@@ -780,8 +781,9 @@ static int dnssec_validate(struct frec **forwardp, struct dns_header *header,
/* Make sure we don't expire and free the orig frec during the
allocation of a new one. */
if (--orig->work_counter == 0 ||
!(new = get_new_frec(now, NULL, orig)) ||
(serverind = dnssec_server(server, daemon->keyname, NULL, NULL)) == -1)
(serverind = dnssec_server(server, daemon->keyname, NULL, NULL)) == -1 ||
!(server = daemon->serverarray[serverind]) ||
!(new = get_new_frec(now, server, orig)))
{
status = STAT_ABANDONED;
if (new)
@@ -793,8 +795,6 @@ static int dnssec_validate(struct frec **forwardp, struct dns_header *header,
struct frec *next = new->next;
size_t nn;
server = daemon->serverarray[serverind];
*new = *forward; /* copy everything, then overwrite */
new->next = next;
new->blocking_query = NULL;
@@ -1922,29 +1922,6 @@ unsigned char *tcp_request(int confd, time_t now,
return packet;
}
static struct frec *allocate_frec(time_t now)
{
struct frec *f;
if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
{
f->next = daemon->frec_list;
f->time = now;
f->sentto = NULL;
f->rfds = NULL;
f->flags = 0;
#ifdef HAVE_DNSSEC
f->dependent = NULL;
f->blocking_query = NULL;
f->stash = NULL;
#endif
daemon->frec_list = f;
}
return f;
}
/* return a UDP socket bound to a random port, have to cope with straying into
occupied port nos and reserved ones. */
static int random_sock(struct server *s)
@@ -2167,96 +2144,81 @@ static void free_frec(struct frec *f)
/* if wait==NULL return a free or older than TIMEOUT record.
else return *wait zero if one available, or *wait is delay to
when the oldest in-use record will expire. Impose an absolute
/* Impose an absolute
limit of 4*TIMEOUT before we wipe things (for random sockets).
If force is non-NULL, always return a result, even if we have
to allocate above the limit, and never free the record pointed
to by the force argument. */
struct frec *get_new_frec(time_t now, int *wait, struct frec *force)
static struct frec *get_new_frec(time_t now, struct server *master, struct frec *force)
{
struct frec *f, *oldest, *target;
int count;
if (wait)
*wait = 0;
for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next, count++)
if (!f->sentto)
target = f;
else
{
#ifdef HAVE_DNSSEC
/* Don't free DNSSEC sub-queries here, as we may end up with
dangling references to them. They'll go when their "real" query
is freed. */
if (!f->dependent && f != force)
#endif
{
if (difftime(now, f->time) >= 4*TIMEOUT)
{
free_frec(f);
target = f;
}
if (!oldest || difftime(f->time, oldest->time) <= 0)
oldest = f;
}
}
if (target)
/* look for free records, garbage collect old records and count number in use by our server-group. */
for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next)
{
target->time = now;
return target;
}
/* can't find empty one, use oldest if there is one
and it's older than timeout */
if (!force && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
{
/* keep stuff for twice timeout if we can by allocating a new
record instead */
if (difftime(now, oldest->time) < 2*TIMEOUT &&
count <= daemon->ftabsize &&
(f = allocate_frec(now)))
return f;
if (!wait)
if (!f->sentto)
target = f;
else
{
free_frec(oldest);
oldest->time = now;
#ifdef HAVE_DNSSEC
/* Don't free DNSSEC sub-queries here, as we may end up with
dangling references to them. They'll go when their "real" query
is freed. */
if (!f->dependent && f != force)
#endif
{
if (difftime(now, f->time) >= 4*TIMEOUT)
{
free_frec(f);
target = f;
}
if (!oldest || difftime(f->time, oldest->time) <= 0)
oldest = f;
}
}
return oldest;
if (f->sentto && ((int)difftime(now, f->time)) < TIMEOUT && server_samegroup(f->sentto, master))
count++;
}
/* none available, calculate time 'till oldest record expires */
if (!force && count > daemon->ftabsize)
if (!force && count >= daemon->ftabsize)
{
if (oldest && wait)
*wait = oldest->time + (time_t)TIMEOUT - now;
query_full(now);
query_full(now, master->domain);
return NULL;
}
if (!(f = allocate_frec(now)) && wait)
/* wait one second on malloc failure */
*wait = 1;
if (!target && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
{
/* can't find empty one, use oldest if there is one and it's older than timeout */
free_frec(oldest);
target = oldest;
}
if (!target && (target = (struct frec *)whine_malloc(sizeof(struct frec))))
{
target->next = daemon->frec_list;
daemon->frec_list = target;
}
return f; /* OK if malloc fails and this is NULL */
if (target)
target->time = now;
return target;
}
static void query_full(time_t now)
static void query_full(time_t now, char *domain)
{
static time_t last_log = 0;
if ((int)difftime(now, last_log) > 5)
{
last_log = now;
my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
if (!domain || strlen(domain) == 0)
my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
else
my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries to %s reached (max: %d)"), domain, daemon->ftabsize);
}
}