Tidy-up of TCP-child pipe handling code.

Functionality is unchanged, but the code is easier to read and understand.

Also fix memory leak of blocks when cache insert fails.
This commit is contained in:
Simon Kelley
2025-05-11 15:30:30 +01:00
parent 8ddabd11bc
commit b0aa604fcc
3 changed files with 198 additions and 190 deletions

View File

@@ -780,6 +780,13 @@ void cache_end_insert(void)
if (insert_error)
return;
/* signal start of cache insert transaction to master process */
if (daemon->pipe_to_parent != -1)
{
unsigned char op = PIPE_OP_INSERT;
read_write(daemon->pipe_to_parent, &op, sizeof(op), RW_WRITE);
}
while (new_chain)
{
struct crec *tmp = new_chain->next;
@@ -799,12 +806,10 @@ void cache_end_insert(void)
char *name = cache_get_name(new_chain);
ssize_t m = strlen(name);
unsigned int flags = new_chain->flags;
unsigned char op = PIPE_OP_RR;
#ifdef HAVE_DNSSEC
u16 class = new_chain->uid;
#endif
read_write(daemon->pipe_to_parent, &op, sizeof(op), RW_WRITE);
read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), RW_WRITE);
read_write(daemon->pipe_to_parent, (unsigned char *)name, m, RW_WRITE);
read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->ttd, sizeof(new_chain->ttd), RW_WRITE);
@@ -818,7 +823,7 @@ void cache_end_insert(void)
blockdata_write(new_chain->addr.rrblock.rrdata, new_chain->addr.rrblock.datalen, daemon->pipe_to_parent);
}
#ifdef HAVE_DNSSEC
if (flags & F_DNSKEY)
else if (flags & F_DNSKEY)
{
read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), RW_WRITE);
blockdata_write(new_chain->addr.key.keydata, new_chain->addr.key.keylen, daemon->pipe_to_parent);
@@ -840,8 +845,8 @@ void cache_end_insert(void)
/* signal end of cache insert in master process */
if (daemon->pipe_to_parent != -1)
{
unsigned char op = PIPE_OP_END;
read_write(daemon->pipe_to_parent, &op, sizeof(op), RW_WRITE);
ssize_t m = -1;
read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), RW_WRITE);
}
}
@@ -874,195 +879,204 @@ void cache_send_ipset(unsigned char op, struct ipsets *sets, int flags, union al
}
#endif
/* A marshalled cache entry arrives on fd, read, unmarshall and insert into cache of master process. */
/* Retrieve and handle a result from child TCP-handler.
Return 0 when pipe is closed by far end. */
int cache_recv_insert(time_t now, int fd)
{
ssize_t m;
union all_addr addr;
unsigned long ttl;
time_t ttd;
unsigned int flags;
struct crec *crecp = NULL;
unsigned char op;
cache_start_insert();
if (!read_write(fd, &op, sizeof(op), RW_READ))
return 0;
while (1)
switch (op)
{
if (!read_write(fd, &op, sizeof(op), RW_READ))
return 0;
case PIPE_OP_INSERT:
{
/* A marshalled set if cache entries arrives on fd, read, unmarshall and insert into cache of master process. */
ssize_t m;
union all_addr addr;
unsigned long ttl;
time_t ttd;
unsigned int flags;
struct crec *crecp = NULL;
switch (op)
{
case PIPE_OP_END:
cache_end_insert();
return 1;
cache_start_insert();
#ifdef HAVE_DNSSEC
case PIPE_OP_STATS:
/* loop reading RRs, since we don't want to go back to the poll() loop
and start processing other queries which might pollute the insertion
chain. The child will never block between the first OP_RR and the
minus-one length marking the end. */
while (1)
{
/* Sneak in possibly updated crypto HWM. */
unsigned int val;
if (!read_write(fd, (unsigned char *)&val, sizeof(val), RW_READ))
return 0;
if (val > daemon->metrics[METRIC_CRYPTO_HWM])
daemon->metrics[METRIC_CRYPTO_HWM] = val;
if (!read_write(fd, (unsigned char *)&val, sizeof(val), RW_READ))
return 0;
if (val > daemon->metrics[METRIC_SIG_FAIL_HWM])
daemon->metrics[METRIC_SIG_FAIL_HWM] = val;
if (!read_write(fd, (unsigned char *)&val, sizeof(val), RW_READ))
return 0;
if (val > daemon->metrics[METRIC_WORK_HWM])
daemon->metrics[METRIC_WORK_HWM] = val;
return 1;
}
case PIPE_OP_RESULT:
{
/* UDP validation moved to TCP to avoid truncation.
Restart UDP validation process with the returned result. */
int status, uid, keycount, validatecount;
int *keycountp, *validatecountp;
size_t ret_len;
struct frec *forward;
if (!read_write(fd, (unsigned char *)&status, sizeof(status), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&ret_len, sizeof(ret_len), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)daemon->packet, ret_len, RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&forward, sizeof(forward), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&uid, sizeof(uid), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&keycount, sizeof(keycount), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&keycountp, sizeof(keycountp), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&validatecount, sizeof(validatecount), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&validatecountp, sizeof(validatecountp), RW_READ))
if (!read_write(fd, (unsigned char *)&m, sizeof(m), RW_READ))
return 0;
/* There's a tiny chance that the frec may have been freed
and reused before the TCP process returns. Detect that with
the uid field which is unique modulo 2^32 for each use. */
if (uid == forward->uid)
if (m == -1)
{
/* repatriate the work counters from the child process. */
*keycountp = keycount;
*validatecountp = validatecount;
if (!forward->dependent)
return_reply(now, forward, (struct dns_header *)daemon->packet, ret_len, status);
else
pop_and_retry_query(forward, status, now);
cache_end_insert();
return 1;
}
return 1;
}
#endif
if (!read_write(fd, (unsigned char *)daemon->namebuff, m, RW_READ) ||
!read_write(fd, (unsigned char *)&ttd, sizeof(ttd), RW_READ) ||
!read_write(fd, (unsigned char *)&flags, sizeof(flags), RW_READ) ||
!read_write(fd, (unsigned char *)&addr, sizeof(addr), RW_READ))
return 0;
case PIPE_OP_RR:
if (!read_write(fd, (unsigned char *)&m, sizeof(m), RW_READ) ||
!read_write(fd, (unsigned char *)daemon->namebuff, m, RW_READ) ||
!read_write(fd, (unsigned char *)&ttd, sizeof(ttd), RW_READ) ||
!read_write(fd, (unsigned char *)&flags, sizeof(flags), RW_READ) ||
!read_write(fd, (unsigned char *)&addr, sizeof(addr), RW_READ))
return 0;
daemon->namebuff[m] = 0;
daemon->namebuff[m] = 0;
ttl = difftime(ttd, now);
ttl = difftime(ttd, now);
if (flags & F_CNAME)
{
struct crec *newc = really_insert(daemon->namebuff, NULL, C_IN, now, ttl, flags);
/* This relies on the fact that the target of a CNAME immediately precedes
it because of the order of extraction in extract_addresses, and
the order reversal on the new_chain. */
if (newc)
{
newc->addr.cname.is_name_ptr = 0;
if (flags & F_CNAME)
{
struct crec *newc = really_insert(daemon->namebuff, NULL, C_IN, now, ttl, flags);
/* This relies on the fact that the target of a CNAME immediately precedes
it because of the order of extraction in extract_addresses, and
the order reversal on the new_chain. */
if (newc)
{
newc->addr.cname.is_name_ptr = 0;
if (!crecp)
newc->addr.cname.target.cache = NULL;
else
{
next_uid(crecp);
newc->addr.cname.target.cache = crecp;
newc->addr.cname.uid = crecp->uid;
}
}
}
else
{
unsigned short class = C_IN;
struct blockdata *block = NULL;
if (!crecp)
newc->addr.cname.target.cache = NULL;
else
{
next_uid(crecp);
newc->addr.cname.target.cache = crecp;
newc->addr.cname.uid = crecp->uid;
}
}
}
else
{
unsigned short class = C_IN;
if ((flags & F_RR) && !(flags & F_NEG) && (flags & F_KEYTAG)
&& !(addr.rrblock.rrdata = blockdata_read(fd, addr.rrblock.datalen)))
return 0;
if ((flags & F_RR) && !(flags & F_NEG) && (flags & F_KEYTAG)
&& !(block = addr.rrblock.rrdata = blockdata_read(fd, addr.rrblock.datalen)))
continue;
#ifdef HAVE_DNSSEC
if (flags & F_DNSKEY)
{
if (!read_write(fd, (unsigned char *)&class, sizeof(class), RW_READ) ||
!(addr.key.keydata = blockdata_read(fd, addr.key.keylen)))
return 0;
}
else if (flags & F_DS)
{
if (!read_write(fd, (unsigned char *)&class, sizeof(class), RW_READ) ||
(!(flags & F_NEG) && !(addr.key.keydata = blockdata_read(fd, addr.key.keylen))))
return 0;
}
else if (flags & F_DNSKEY)
{
if (!read_write(fd, (unsigned char *)&class, sizeof(class), RW_READ))
return 0;
if (!(block = addr.key.keydata = blockdata_read(fd, addr.key.keylen)))
continue;
}
else if (flags & F_DS)
{
if (!read_write(fd, (unsigned char *)&class, sizeof(class), RW_READ))
return 0;
if (!(flags & F_NEG) && !(block = addr.ds.keydata = blockdata_read(fd, addr.ds.keylen)))
continue;
}
#endif
crecp = really_insert(daemon->namebuff, &addr, class, now, ttl, flags);
}
if (!(crecp = really_insert(daemon->namebuff, &addr, class, now, ttl, flags)))
blockdata_free(block);
}
}
}
/* loop reading RRs, since we don't want to go back to the poll() loop
and start processing other queries which might pollute the insertion
chain. The child will never block between the first OP_RR and the OP_END */
continue;
#ifdef HAVE_DNSSEC
case PIPE_OP_STATS:
{
/* Sneak in possibly updated crypto HWM. */
unsigned int val;
if (!read_write(fd, (unsigned char *)&val, sizeof(val), RW_READ))
return 0;
if (val > daemon->metrics[METRIC_CRYPTO_HWM])
daemon->metrics[METRIC_CRYPTO_HWM] = val;
if (!read_write(fd, (unsigned char *)&val, sizeof(val), RW_READ))
return 0;
if (val > daemon->metrics[METRIC_SIG_FAIL_HWM])
daemon->metrics[METRIC_SIG_FAIL_HWM] = val;
if (!read_write(fd, (unsigned char *)&val, sizeof(val), RW_READ))
return 0;
if (val > daemon->metrics[METRIC_WORK_HWM])
daemon->metrics[METRIC_WORK_HWM] = val;
return 1;
}
case PIPE_OP_RESULT:
{
/* UDP validation moved to TCP to avoid truncation.
Restart UDP validation process with the returned result. */
int status, uid, keycount, validatecount;
int *keycountp, *validatecountp;
size_t ret_len;
struct frec *forward;
if (!read_write(fd, (unsigned char *)&status, sizeof(status), RW_READ) ||
!read_write(fd, (unsigned char *)&ret_len, sizeof(ret_len), RW_READ) ||
!read_write(fd, (unsigned char *)daemon->packet, ret_len, RW_READ) ||
!read_write(fd, (unsigned char *)&forward, sizeof(forward), RW_READ) ||
!read_write(fd, (unsigned char *)&uid, sizeof(uid), RW_READ) ||
!read_write(fd, (unsigned char *)&keycount, sizeof(keycount), RW_READ) ||
!read_write(fd, (unsigned char *)&keycountp, sizeof(keycountp), RW_READ) ||
!read_write(fd, (unsigned char *)&validatecount, sizeof(validatecount), RW_READ) ||
!read_write(fd, (unsigned char *)&validatecountp, sizeof(validatecountp), RW_READ))
return 0;
/* There's a tiny chance that the frec may have been freed
and reused before the TCP process returns. Detect that with
the uid field which is unique modulo 2^32 for each use. */
if (uid == forward->uid)
{
/* repatriate the work counters from the child process. */
*keycountp = keycount;
*validatecountp = validatecount;
if (!forward->dependent)
return_reply(now, forward, (struct dns_header *)daemon->packet, ret_len, status);
else
pop_and_retry_query(forward, status, now);
}
return 1;
}
#endif
#if defined(HAVE_IPSET) || defined(HAVE_NFTSET)
case PIPE_OP_IPSET:
case PIPE_OP_NFTSET:
{
struct ipsets *sets;
char **sets_cur;
case PIPE_OP_IPSET:
case PIPE_OP_NFTSET:
{
struct ipsets *sets;
char **sets_cur;
unsigned int flags;
union all_addr addr;
if (!read_write(fd, (unsigned char *)&sets, sizeof(sets), RW_READ) ||
!read_write(fd, (unsigned char *)&flags, sizeof(flags), RW_READ) ||
!read_write(fd, (unsigned char *)&addr, sizeof(addr), RW_READ))
return 0;
if (!read_write(fd, (unsigned char *)&sets, sizeof(sets), RW_READ) ||
!read_write(fd, (unsigned char *)&flags, sizeof(flags), RW_READ) ||
!read_write(fd, (unsigned char *)&addr, sizeof(addr), RW_READ))
return 0;
for (sets_cur = sets->sets; *sets_cur; sets_cur++)
{
int rc = -1;
for (sets_cur = sets->sets; *sets_cur; sets_cur++)
{
int rc = -1;
#ifdef HAVE_IPSET
if (op == PIPE_OP_IPSET)
rc = add_to_ipset(*sets_cur, &addr, flags, 0);
if (op == PIPE_OP_IPSET)
rc = add_to_ipset(*sets_cur, &addr, flags, 0);
#endif
#ifdef HAVE_NFTSET
if (op == PIPE_OP_NFTSET)
rc = add_to_nftset(*sets_cur, &addr, flags, 0);
if (op == PIPE_OP_NFTSET)
rc = add_to_nftset(*sets_cur, &addr, flags, 0);
#endif
if (rc == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, sets->domain, &addr, *sets_cur, op == PIPE_OP_IPSET);
}
if (rc == 0)
log_query((flags & (F_IPV4 | F_IPV6)) | F_IPSET, sets->domain, &addr, *sets_cur, op == PIPE_OP_IPSET);
}
return 1;
}
return 1;
}
#endif
}
}
return 0;
}
int cache_find_non_terminal(char *name, time_t now)

View File

@@ -38,7 +38,6 @@ static void async_event(int pipe, time_t now);
static void fatal_event(struct event_desc *ev, char *msg);
static int read_event(int fd, struct event_desc *evp, char **msg);
static void poll_resolv(int force, int do_reload, time_t now);
static void tcp_init(void);
int main (int argc, char **argv)
{
@@ -422,7 +421,11 @@ int main (int argc, char **argv)
/* safe_malloc returns zero'd memory */
daemon->randomsocks = safe_malloc(daemon->numrrand * sizeof(struct randfd));
tcp_init();
daemon->tcp_pids = safe_malloc(daemon->max_procs*sizeof(pid_t));
daemon->tcp_pipes = safe_malloc(daemon->max_procs*sizeof(int));
for (i = 0; i < daemon->max_procs; i++)
daemon->tcp_pipes[i] = -1;
}
#ifdef HAVE_INOTIFY
@@ -1071,10 +1074,6 @@ int main (int argc, char **argv)
daemon->pipe_to_parent = -1;
if (daemon->port != 0)
for (i = 0; i < daemon->max_procs; i++)
daemon->tcp_pipes[i] = -1;
#ifdef HAVE_INOTIFY
/* Using inotify, have to select a resolv file at startup */
poll_resolv(1, 0, now);
@@ -2419,8 +2418,4 @@ int delay_dhcp(time_t start, int sec, int fd, uint32_t addr, unsigned short id)
}
#endif /* HAVE_DHCP */
void tcp_init(void)
{
daemon->tcp_pids = safe_malloc(daemon->max_procs*sizeof(pid_t));
daemon->tcp_pipes = safe_malloc(daemon->max_procs*sizeof(int));
}

View File

@@ -537,12 +537,11 @@ struct crec {
#define SRC_HOSTS 2
#define SRC_AH 3
#define PIPE_OP_RR 1 /* Resource record */
#define PIPE_OP_END 2 /* Cache entry complete: commit */
#define PIPE_OP_RESULT 3 /* Validation result */
#define PIPE_OP_STATS 4 /* Update parent's stats */
#define PIPE_OP_IPSET 5 /* Update IPset */
#define PIPE_OP_NFTSET 6 /* Update NFTset */
#define PIPE_OP_INSERT 1 /* Cache entry */
#define PIPE_OP_RESULT 2 /* Validation result */
#define PIPE_OP_STATS 3 /* Update parent's stats */
#define PIPE_OP_IPSET 4 /* Update IPset */
#define PIPE_OP_NFTSET 5 /* Update NFTset */
/* struct sockaddr is not large enough to hold any address,
and specifically not big enough to hold an IPv6 address.