Fix bug in TCP process handling.

Fix bug which caused dnsmasq to lose track of processes forked
to handle TCP DNS connections under heavy load. The code
checked that at least one free process table slot was
available before listening on TCP sockets, but didn't take
into account that more than one TCP connection could
arrive, so that check was not sufficient to ensure that
there would be slots for all new processes. It compounded
this error by silently failing to store the process when
it did run out of slots. Even when this bug is triggered,
all the right things happen, and answers are still returned.
Only under very exceptional circumstances, does the bug
manifest itself: see
https://lists.thekelleys.org.uk/pipermail/dnsmasq-discuss/2021q2/014976.html

Thanks to Tijs Van Buggenhout for finding the conditions under
which the bug manifests itself, and then working out
exactly what was going on.
This commit is contained in:
Simon Kelley
2021-04-09 16:08:05 +01:00
parent d55e2d086d
commit ad90eb075d
2 changed files with 43 additions and 19 deletions

View File

@@ -3,6 +3,24 @@ version 2.86
Thanks to Aichun Li for spotting this ommision, and the initial Thanks to Aichun Li for spotting this ommision, and the initial
patch. patch.
Fix bug which caused dnsmasq to lose track of processes forked
to handle TCP DNS connections under heavy load. The code
checked that at least one free process table slot was
available before listening on TCP sockets, but didn't take
into account that more than one TCP connection could
arrive, so that check was not sufficient to ensure that
there would be slots for all new processes. It compounded
this error by silently failing to store the process when
it did run out of slots. Even when this bug is triggered,
all the right things happen, and answers are still returned.
Only under very exceptional circumstances, does the bug
manifest itself: see
https://lists.thekelleys.org.uk/pipermail/dnsmasq-discuss/2021q2/014976.html
Thanks to Tijs Van Buggenhout for finding the conditions under
which the bug manifests itself, and then working out
exactly what was going on.
version 2.85 version 2.85
Fix problem with DNS retries in 2.83/2.84. Fix problem with DNS retries in 2.83/2.84.

View File

@@ -1716,22 +1716,24 @@ static int set_dns_listeners(time_t now)
for (rfl = daemon->rfl_poll; rfl; rfl = rfl->next) for (rfl = daemon->rfl_poll; rfl; rfl = rfl->next)
poll_listen(rfl->rfd->fd, POLLIN); poll_listen(rfl->rfd->fd, POLLIN);
/* check to see if we have free tcp process slots. */
for (i = MAX_PROCS - 1; i >= 0; i--)
if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
break;
for (listener = daemon->listeners; listener; listener = listener->next) for (listener = daemon->listeners; listener; listener = listener->next)
{ {
/* only listen for queries if we have resources */ /* only listen for queries if we have resources */
if (listener->fd != -1 && wait == 0) if (listener->fd != -1 && wait == 0)
poll_listen(listener->fd, POLLIN); poll_listen(listener->fd, POLLIN);
/* death of a child goes through the select loop, so /* Only listen for TCP connections when a process slot
we don't need to explicitly arrange to wake up here */ is available. Death of a child goes through the select loop, so
if (listener->tcpfd != -1) we don't need to explicitly arrange to wake up here,
for (i = 0; i < MAX_PROCS; i++) we'll be called again when a slot becomes available. */
if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) if (listener->tcpfd != -1 && i >= 0)
{ poll_listen(listener->tcpfd, POLLIN);
poll_listen(listener->tcpfd, POLLIN);
break;
}
#ifdef HAVE_TFTP #ifdef HAVE_TFTP
/* tftp == 0 in single-port mode. */ /* tftp == 0 in single-port mode. */
if (tftp <= daemon->tftp_max && listener->tftpfd != -1) if (tftp <= daemon->tftp_max && listener->tftpfd != -1)
@@ -1797,7 +1799,16 @@ static void check_dns_listeners(time_t now)
tftp_request(listener, now); tftp_request(listener, now);
#endif #endif
if (listener->tcpfd != -1 && poll_check(listener->tcpfd, POLLIN)) /* check to see if we have a free tcp process slot.
Note that we can't assume that because we had
at least one a poll() time, that we still do.
There may be more waiting connections after
poll() returns then free process slots. */
for (i = MAX_PROCS - 1; i >= 0; i--)
if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
break;
if (listener->tcpfd != -1 && i >= 0 && poll_check(listener->tcpfd, POLLIN))
{ {
int confd, client_ok = 1; int confd, client_ok = 1;
struct irec *iface = NULL; struct irec *iface = NULL;
@@ -1887,7 +1898,6 @@ static void check_dns_listeners(time_t now)
close(pipefd[0]); close(pipefd[0]);
else else
{ {
int i;
#ifdef HAVE_LINUX_NETWORK #ifdef HAVE_LINUX_NETWORK
/* The child process inherits the netlink socket, /* The child process inherits the netlink socket,
which it never uses, but when the parent (us) which it never uses, but when the parent (us)
@@ -1907,13 +1917,9 @@ static void check_dns_listeners(time_t now)
read_write(pipefd[0], &a, 1, 1); read_write(pipefd[0], &a, 1, 1);
#endif #endif
for (i = 0; i < MAX_PROCS; i++) /* i holds index of free slot */
if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) daemon->tcp_pids[i] = p;
{ daemon->tcp_pipes[i] = pipefd[0];
daemon->tcp_pids[i] = p;
daemon->tcp_pipes[i] = pipefd[0];
break;
}
} }
close(confd); close(confd);