Fix problems with ipset or nftset and TCP DNS transport.

If DNS is happening over TCP, the query is handled by a forked
process. Of ipset ot nftset is configured, this might include
inserting addresses in the *sets. Before this update, that
was done by the forked process using handles inherited from the
parent "master" process.

This is inherently racy. If the master process or another
child process tries to do updates at the same time, the
updates can clash and fail.

To see this, you need a busy server doing lots of DNS
queries over TCP, and ipset or nftset configured.

Going forward, we use the already established pipe to send the
updates from the child back to the master process, which
serialises them.
This commit is contained in:
Simon Kelley
2025-05-07 23:38:15 +01:00
parent e86d53c438
commit 98189ff988
7 changed files with 97 additions and 25 deletions

View File

@@ -864,6 +864,16 @@ void cache_update_hwm(void)
}
#endif
#if defined(HAVE_IPSET) || defined(HAVE_NFTSET)
void cache_send_ipset(unsigned char op, struct ipsets *sets, int flags, union all_addr *addr)
{
read_write(daemon->pipe_to_parent, &op, sizeof(op), RW_WRITE);
read_write(daemon->pipe_to_parent, (unsigned char *)&sets, sizeof(sets), RW_WRITE);
read_write(daemon->pipe_to_parent, (unsigned char *)&flags, sizeof(flags), RW_WRITE);
read_write(daemon->pipe_to_parent, (unsigned char *)addr, sizeof(*addr), RW_WRITE);
}
#endif
/* A marshalled cache entry arrives on fd, read, unmarshall and insert into cache of master process. */
int cache_recv_insert(time_t now, int fd)
{
@@ -1012,11 +1022,45 @@ int cache_recv_insert(time_t now, int fd)
#endif
crecp = really_insert(daemon->namebuff, &addr, class, now, ttl, flags);
}
/* loop reading RRs, since we don't want to go back to the poll() loop
and start processing other queries which might pollute the insertion
chain. The child will never block between the first OP_RR and the OP_END */
continue;
#if defined(HAVE_IPSET) || defined(HAVE_NFTSET)
case PIPE_OP_IPSET:
case PIPE_OP_NFTSET:
{
struct ipsets *sets;
char **sets_cur;
if (!read_write(fd, (unsigned char *)&sets, sizeof(sets), RW_READ) ||
!read_write(fd, (unsigned char *)&flags, sizeof(flags), RW_READ) ||
!read_write(fd, (unsigned char *)&addr, sizeof(addr), RW_READ))
return 0;
for (sets_cur = sets->sets; *sets_cur; sets_cur++)
{
int rc = -1;
#ifdef HAVE_IPSET
if (op == PIPE_OP_IPSET)
rc = add_to_ipset(*sets_cur, &addr, flags, 0);
#endif
#ifdef HAVE_NFTSET
if (op == PIPE_OP_NFTSET)
rc = add_to_nftset(*sets_cur, &addr, flags, 0);
#endif
if (rc == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, sets->domain, &addr, *sets_cur, op == PIPE_OP_IPSET);
}
return 1;
}
#endif
}
}
}

View File

@@ -153,6 +153,7 @@ NO_AUTH
NO_DUMPFILE
NO_LOOP
NO_INOTIFY
NO_IPSET
these are available to explicitly disable compile time options which would
otherwise be enabled automatically or which are enabled by default
in the distributed source tree. Building dnsmasq
@@ -287,7 +288,6 @@ HAVE_SOCKADDR_SA_LEN
#define HAVE_BSD_NETWORK
#define HAVE_GETOPT_LONG
#define HAVE_SOCKADDR_SA_LEN
#define NO_IPSET
/* Define before sys/socket.h is included so we get socklen_t */
#define _BSD_SOCKLEN_T_
/* Select the RFC_3542 version of the IPv6 socket API.
@@ -297,7 +297,6 @@ HAVE_SOCKADDR_SA_LEN
#ifndef SOL_TCP
# define SOL_TCP IPPROTO_TCP
#endif
#define NO_IPSET
#elif defined(__NetBSD__)
#define HAVE_BSD_NETWORK
@@ -347,6 +346,11 @@ HAVE_SOCKADDR_SA_LEN
#undef HAVE_AUTH
#endif
#if !defined(HAVE_LINUX_NETWORK)
#undef HAVE_IPSET
#undef HAVE_NFTSET
#endif
#if defined(NO_IPSET)
#undef HAVE_IPSET
#endif
@@ -460,4 +464,4 @@ static char *compile_opts =
#endif
"dumpfile";
#endif /* defined(HAVE_DHCP) */
#endif /* defined(DNSMASQ_COMPILE_OPTS) */

View File

@@ -539,8 +539,10 @@ struct crec {
#define PIPE_OP_RR 1 /* Resource record */
#define PIPE_OP_END 2 /* Cache entry complete: commit */
#define PIPE_OP_RESULT 3 /* validation result. */
#define PIPE_OP_RESULT 3 /* Validation result */
#define PIPE_OP_STATS 4 /* Update parent's stats */
#define PIPE_OP_IPSET 5 /* Update IPset */
#define PIPE_OP_NFTSET 6 /* Update NFTset */
/* struct sockaddr is not large enough to hold any address,
and specifically not big enough to hold an IPv6 address.
@@ -1361,6 +1363,10 @@ int cache_recv_insert(time_t now, int fd);
#ifdef HAVE_DNSSEC
void cache_update_hwm(void);
#endif
#if defined(HAVE_IPSET) || defined(HAVE_NFTSET)
void cache_send_ipset(unsigned char op, struct ipsets *sets,
int flags, union all_addr *addr);
#endif
struct crec *cache_insert(char *name, union all_addr *addr, unsigned short class,
time_t now, unsigned long ttl, unsigned int flags);
void cache_reload(void);

View File

@@ -704,16 +704,19 @@ static size_t process_reply(struct dns_header *header, time_t now, struct server
(void)ad_reqd;
(void)do_bit;
#ifdef HAVE_IPSET
if (daemon->ipsets && extract_name(header, n, NULL, daemon->namebuff, EXTR_NAME_EXTRACT, 0))
ipsets = domain_find_sets(daemon->ipsets, daemon->namebuff);
#if defined(HAVE_IPSET) || defined(HAVE_NFTSET)
if ((daemon->ipsets || daemon->nftsets) && extract_name(header, n, NULL, daemon->namebuff, EXTR_NAME_EXTRACT, 0))
{
# ifdef HAVE_IPSET
ipsets = domain_find_sets(daemon->ipsets, daemon->namebuff);
# endif
# ifdef HAVE_NFTSET
nftsets = domain_find_sets(daemon->nftsets, daemon->namebuff);
# endif
}
#endif
#ifdef HAVE_NFTSET
if (daemon->nftsets && extract_name(header, n, NULL, daemon->namebuff, EXTR_NAME_EXTRACT, 0))
nftsets = domain_find_sets(daemon->nftsets, daemon->namebuff);
#endif
if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign, NULL)))
{
/* Get extended RCODE. */

View File

@@ -16,7 +16,7 @@
#include "dnsmasq.h"
#if defined(HAVE_IPSET) && defined(HAVE_LINUX_NETWORK)
#if defined(HAVE_IPSET)
#include <string.h>
#include <errno.h>

View File

@@ -17,7 +17,7 @@
#include "dnsmasq.h"
#if defined (HAVE_NFTSET) && defined (HAVE_LINUX_NETWORK)
#if defined (HAVE_NFTSET)
#include <nftables/libnftables.h>

View File

@@ -1057,19 +1057,34 @@ int extract_addresses(struct dns_header *header, size_t qlen, char *name, time_t
private_net6(&addr.addr6, !option_bool(OPT_LOCAL_REBIND)))
return 1;
}
if (flags & (F_IPV4 | F_IPV6))
{
/* If we're a child process, send this to the parent,
since the ipset and nfset access is not re-entrant. */
#ifdef HAVE_IPSET
if (ipsets && (flags & (F_IPV4 | F_IPV6)))
for (ipsets_cur = ipsets->sets; *ipsets_cur; ipsets_cur++)
if (add_to_ipset(*ipsets_cur, &addr, flags, 0) == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, ipsets->domain, &addr, *ipsets_cur, 1);
if (ipsets)
{
if (daemon->pipe_to_parent != -1)
cache_send_ipset(PIPE_OP_IPSET, ipsets, flags, &addr);
else
for (ipsets_cur = ipsets->sets; *ipsets_cur; ipsets_cur++)
if (add_to_ipset(*ipsets_cur, &addr, flags, 0) == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, ipsets->domain, &addr, *ipsets_cur, 1);
}
#endif
#ifdef HAVE_NFTSET
if (nftsets && (flags & (F_IPV4 | F_IPV6)))
for (nftsets_cur = nftsets->sets; *nftsets_cur; nftsets_cur++)
if (add_to_nftset(*nftsets_cur, &addr, flags, 0) == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, nftsets->domain, &addr, *nftsets_cur, 0);
if (nftsets)
{
if (daemon->pipe_to_parent != -1)
cache_send_ipset(PIPE_OP_NFTSET, nftsets, flags, &addr);
else
for (nftsets_cur = nftsets->sets; *nftsets_cur; nftsets_cur++)
if (add_to_nftset(*nftsets_cur, &addr, flags, 0) == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, nftsets->domain, &addr, *nftsets_cur, 0);
}
#endif
}
}
if (insert)