Handle truncated response UDP-to-TCP to downstream queries when validating.

A relatively common situation is that the reply to a downstream query
will fit in a UDP packet when no DNSSEC RRs are present, but overflows
when the RRSIGS, NSEC ect are added. This extends the automatic
move from UDP to TCP to downstream queries which get truncated replies,
in the hope that once stripped of the DNSSEC RRs, the reply can be returned
via UDP, nwithout making the downstream retry with TCP.

If the downstream sets the DO bit, (ie it wants the DNSSEC RRs, then
this path is not taken, since the downstream will have to get a truncated
repsonse and retry to get a correct answer.
This commit is contained in:
Simon Kelley
2024-10-12 22:25:06 +01:00
parent 1c26ec2876
commit d15d371051
6 changed files with 193 additions and 88 deletions

View File

@@ -898,10 +898,16 @@ int cache_recv_insert(time_t now, int fd)
{
int status, uid, keycount, validatecount;
int *keycountp, *validatecountp;
size_t ret_len;
struct frec *forward;
if (!read_write(fd, (unsigned char *)&status, sizeof(status), 1))
return 0;
if (!read_write(fd, (unsigned char *)&ret_len, sizeof(ret_len), 1))
return 0;
if (!read_write(fd, (unsigned char *)daemon->packet, ret_len, 1))
return 0;
if (!read_write(fd, (unsigned char *)&forward, sizeof(forward), 1))
return 0;
if (!read_write(fd, (unsigned char *)&uid, sizeof(uid), 1))
@@ -923,10 +929,13 @@ int cache_recv_insert(time_t now, int fd)
/* repatriate the work counters from the child process. */
*keycountp = keycount;
*validatecountp = validatecount;
pop_and_retry_query(forward, status, now);
if (!forward->dependent)
return_reply(now, forward, (struct dns_header *)daemon->packet, ret_len, status);
else
pop_and_retry_query(forward, status, now);
}
return 1;
}
#endif
@@ -2281,12 +2290,21 @@ void log_query(unsigned int flags, char *name, union all_addr *addr, char *arg,
if (option_bool(OPT_EXTRALOG))
{
int display_id = daemon->log_display_id;
char *proto = "";
if (option_bool(OPT_LOG_PROTO))
proto = (display_id < 0) ? "TCP " : "UDP ";
if (display_id < 0)
display_id = -display_id;
if (flags & F_NOEXTRA)
my_syslog(LOG_INFO, "%u %s %s%s%s %s%s", daemon->log_display_id, source, name, gap, verb, dest, extra);
my_syslog(LOG_INFO, "%s%u %s %s%s%s %s%s", proto, display_id, source, name, gap, verb, dest, extra);
else
{
int port = prettyprint_addr(daemon->log_source_addr, daemon->addrbuff2);
my_syslog(LOG_INFO, "%u %s/%u %s %s%s%s %s%s", daemon->log_display_id, daemon->addrbuff2, port, source, name, gap, verb, dest, extra);
my_syslog(LOG_INFO, "%s%u %s/%u %s %s%s%s %s%s", proto, display_id, daemon->addrbuff2, port, source, name, gap, verb, dest, extra);
}
}
else

View File

@@ -2074,7 +2074,7 @@ static void check_dns_listeners(time_t now)
cache_recv_insert() calls pop_and_retry_query() after the result
arrives via the pipe to the parent. */
int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header *header,
size_t plen, int class, struct server *server, int *keycount, int *validatecount)
ssize_t *plen, int class, struct server *server, int *keycount, int *validatecount)
{
struct server *s;
@@ -2148,8 +2148,8 @@ int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header
}
}
status = tcp_key_recurse(now, status, header, plen, class, daemon->namebuff, daemon->keyname,
server, 0, 0, keycount, validatecount);
status = tcp_from_udp(now, status, header, plen, class, daemon->namebuff, daemon->keyname,
server, keycount, validatecount);
/* close upstream connections. */
for (s = daemon->servers; s; s = s->next)
@@ -2167,6 +2167,8 @@ int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header
/* tell our parent we're done, and what the result was then exit. */
read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
read_write(daemon->pipe_to_parent, (unsigned char *)&status, sizeof(status), 0);
read_write(daemon->pipe_to_parent, (unsigned char *)plen, sizeof(*plen), 0);
read_write(daemon->pipe_to_parent, (unsigned char *)header, *plen, 0);
read_write(daemon->pipe_to_parent, (unsigned char *)&forward, sizeof(forward), 0);
read_write(daemon->pipe_to_parent, (unsigned char *)&forward->uid, sizeof(forward->uid), 0);
read_write(daemon->pipe_to_parent, (unsigned char *)keycount, sizeof(*keycount), 0);

View File

@@ -282,7 +282,8 @@ struct event_desc {
#define OPT_NO_IDENT 70
#define OPT_CACHE_RR 71
#define OPT_LOCALHOST_SERVICE 72
#define OPT_LAST 73
#define OPT_LOG_PROTO 73
#define OPT_LAST 74
#define OPTION_BITS (sizeof(unsigned int)*8)
#define OPTION_SIZE ( (OPT_LAST/OPTION_BITS)+((OPT_LAST%OPTION_BITS)!=0) )
@@ -747,9 +748,7 @@ struct dyndir {
#define STAT_SECURE_WILDCARD 0x70000
#define STAT_OK 0x80000
#define STAT_ABANDONED 0x90000
#define STAT_NEED_DS_QUERY 0xa0000
#define STAT_NEED_KEY_QUERY 0xb0000
#define STAT_ASYNC 0xc0000
#define STAT_ASYNC 0xa0000
#define DNSSEC_FAIL_NYV 0x0001 /* key not yet valid */
#define DNSSEC_FAIL_EXP 0x0002 /* key expired */
@@ -1527,11 +1526,12 @@ int option_read_dynfile(char *file, int flags);
/* forward.c */
void reply_query(int fd, time_t now);
void receive_query(struct listener *listen, time_t now);
void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status);
#ifdef HAVE_DNSSEC
void pop_and_retry_query(struct frec *forward, int status, time_t now);
int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
int class, char *name, char *keyname, struct server *server,
int have_mark, unsigned int mark, int *keycount, int *validatecount);
int tcp_from_udp(time_t now, int status, struct dns_header *header, ssize_t *n,
int class, char *name, char *keyname, struct server *server,
int *keycount, int *validatecount);
#endif
unsigned char *tcp_request(int confd, time_t now,
union mysockaddr *local_addr, struct in_addr netmask, int auth_dns);
@@ -1654,7 +1654,7 @@ void send_event(int fd, int event, int data, char *msg);
void clear_cache_and_reload(time_t now);
#ifdef HAVE_DNSSEC
int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header *header,
size_t plen, int class, struct server *server, int *keycount, int *validatecount);
ssize_t *plen, int class, struct server *server, int *keycount, int *validatecount);
#endif
/* netlink.c */

View File

@@ -25,8 +25,10 @@ static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struc
static unsigned short get_id(void);
static void free_frec(struct frec *f);
static void query_full(time_t now, char *domain);
static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
int class, char *name, char *keyname, struct server *server,
int have_mark, unsigned int mark, int *keycount, int *validatecount);
static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status);
/* Send a UDP packet with its source address set as "source"
unless nowild is true, when we just send it with the kernel default */
@@ -310,8 +312,8 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
goto reply;
/* table full - flags == 0, return REFUSED */
/* Keep copy of query if we're doing fast retry. */
if (daemon->fast_retry_time != 0)
/* Keep copy of query if we're doing fast retry or DNSSEC. */
if (daemon->fast_retry_time != 0 || option_bool(OPT_DNSSEC_VALID))
{
forward->stash = blockdata_alloc((char *)header, plen);
forward->stash_len = plen;
@@ -910,36 +912,59 @@ static void dnssec_validate(struct frec *forward, struct dns_header *header,
/* If all replies to a query are REFUSED, give up. */
if (RCODE(header) == REFUSED)
status = STAT_ABANDONED;
else if ((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) && (header->hb3 & HB3_TC))
else if (header->hb3 & HB3_TC)
{
/* Truncated answer can't be validated.
If this is an answer to a DNSSEC-generated query, we
switch to TCP mode. For downstream queries get the client
switch to TCP mode. For downstream queries, if the client didn't ask for
DNSSEC RRs, do the query over TCP, and hope that it fits once the DNSSEC RRs
have been stripped, otherwise get the client
to retry over TCP, so return an answer with the TC bit set. */
unsigned char *p = (unsigned char *)(header+1);
/* Get the query we sent by UDP */
blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
if (extract_name(header, forward->stash_len, &p, daemon->namebuff, 1, 4))
log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", (forward->flags & FREC_DNSKEY_QUERY) ? T_DNSKEY : T_DS);
/* Don't count failed UDP attempt AND TCP */
orig->work_counter++;
/* NOTE: Can't move connection marks from UDP to TCP */
status = swap_to_tcp(forward, now, (forward->flags & FREC_DNSKEY_QUERY) ? STAT_NEED_KEY_QUERY : STAT_NEED_DS_QUERY,
header, forward->stash_len, forward->class, forward->sentto, &orig->work_counter, &orig->validate_counter);
/* We forked a new process. pop_and_retry_query() will be called when is completes. */
if (STAT_ISEQUAL(status, STAT_ASYNC))
if ((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) || !(forward->flags & FREC_DO_QUESTION))
{
forward->flags |= FREC_GONE_TO_TCP;
return;
status = (forward->flags & FREC_DNSKEY_QUERY) ? STAT_NEED_KEY:
((forward->flags & FREC_DS_QUERY) ? STAT_NEED_DS : STAT_OK);
/* Get the query we sent by UDP */
blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
if (extract_request(header, forward->stash_len, daemon->namebuff, NULL))
log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", 0);
/* Don't count failed UDP attempt AND TCP */
if (status != STAT_OK)
orig->work_counter++;
else
{
/* repeat changes made as packet forwarded over UDP */
int cacheable;
header->id = htons(forward->new_id);
plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &cacheable);
plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ);
/* For debugging, set Checking Disabled, otherwise, have the upstream check too,
this allows it to select auth servers when one is returning bad data. */
if (option_bool(OPT_DNSSEC_DEBUG))
header->hb4 |= HB4_CD;
}
/* NOTE: Can't move connection marks from UDP to TCP */
plen = forward->stash_len;
status = swap_to_tcp(forward, now, status, header, &plen, forward->class, forward->sentto, &orig->work_counter, &orig->validate_counter);
/* We forked a new process. pop_and_retry_query() will be called when is completes. */
if (STAT_ISEQUAL(status, STAT_ASYNC))
{
forward->flags |= FREC_GONE_TO_TCP;
return;
}
}
else
status = STAT_TRUNCATED;
}
else if (header->hb3 & HB3_TC)
status = STAT_TRUNCATED;
else
{
/* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
@@ -1090,8 +1115,7 @@ static void dnssec_validate(struct frec *forward, struct dns_header *header,
if (log_resource)
{
/* Log the actual validation that made us barf. */
unsigned char *p = (unsigned char *)(header+1);
if (extract_name(header, plen, &p, daemon->namebuff, 1, 4))
if (extract_request(header, plen, daemon->namebuff, NULL))
my_syslog(LOG_WARNING, _("validation of %s failed: resource limit exceeded."),
daemon->namebuff[0] ? daemon->namebuff : ".");
}
@@ -1226,7 +1250,7 @@ void reply_query(int fd, time_t now)
#endif
{
/* in fast retry mode, we have a copy of the query. */
if (daemon->fast_retry_time != 0 && forward->stash)
if ((daemon->fast_retry_time != 0 || option_bool(OPT_DNSSEC_VALID)) && forward->stash)
{
blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
nn = forward->stash_len;
@@ -1322,7 +1346,7 @@ void reply_query(int fd, time_t now)
return_reply(now, forward, header, n, STAT_OK);
}
static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status)
void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status)
{
int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
size_t nn;
@@ -1962,7 +1986,7 @@ static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet,
{
int data_sent = 0, timedout = 0;
struct server *serv;
if (firstsendto == -1)
firstsendto = start;
else
@@ -2062,6 +2086,77 @@ static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet,
}
#ifdef HAVE_DNSSEC
/* An answer to an downstream query or DNSSEC subquery has
returned truncated. (Which type held in status).
Resend the query (in header) via TCP */
int tcp_from_udp(time_t now, int status, struct dns_header *header, ssize_t *plenp,
int class, char *name, char *keyname, struct server *server,
int *keycount, int *validatecount)
{
unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
struct dns_header *new_header = (struct dns_header *)&packet[2];
int start, first, last, new_status;
ssize_t n = *plenp;
int have_req = extract_request(header, n, keyname, NULL);
int log_save = daemon->log_display_id;
*plenp = 0;
if (!packet)
return STAT_ABANDONED;
memcpy(new_header, header, n);
/* Set TCP flag in logs. */
daemon->log_display_id = -daemon->log_display_id;
/* send orginal query to same server that generated truncated reply on UDP. */
first = start = server->arrayposn;
last = first + 1;
if (!STAT_ISEQUAL(status, STAT_OK) && (!have_req || (start = dnssec_server(server, keyname, &first, &last)) == -1))
new_status = STAT_ABANDONED;
else if ((n = tcp_talk(first, last, start, packet, n, 0, 0, &server)) == 0)
new_status = STAT_ABANDONED;
else
{
if (have_req)
{
if (STAT_ISEQUAL(status, STAT_OK))
log_query_mysockaddr(F_SERVER | F_FORWARD, keyname, &server->addr, NULL, 0);
else
log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr,
STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
}
new_status = tcp_key_recurse(now, status, new_header, n, class, name, keyname, server, 0, 0, keycount, validatecount);
if (STAT_ISEQUAL(status, STAT_OK))
{
/* downstream query: strip DNSSSEC RRs and see if it will
fit in a UDP reply. */
rrfilter(new_header, (size_t *)&n, RRFILTER_DNSSEC);
if (n >= daemon->edns_pktsz)
{
new_header->ancount = htons(0);
new_header->nscount = htons(0);
new_header->arcount = htons(0);
n = resize_packet(header, n, NULL, 0);
new_status = STAT_TRUNCATED;
}
/* return the stripped or truncated reply. */
memcpy(header, new_header, n);
*plenp = n;
}
}
daemon->log_display_id = log_save;
free(packet);
return new_status;
}
/* Recurse down the key hierarchy */
int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
int class, char *name, char *keyname, struct server *server,
@@ -2070,7 +2165,7 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
int first, last, start, new_status;
unsigned char *packet = NULL;
struct dns_header *new_header = NULL;
while (1)
{
size_t m;
@@ -2081,10 +2176,6 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class, validatecount);
else if (STAT_ISEQUAL(status, STAT_NEED_DS))
new_status = dnssec_validate_ds(now, header, n, name, keyname, class, validatecount);
else if (STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY))
new_status = STAT_NEED_KEY;
else if (STAT_ISEQUAL(status, STAT_NEED_DS_QUERY))
new_status = STAT_NEED_DS;
else
new_status = dnssec_validate_reply(now, header, n, name, keyname, &class,
!option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC),
@@ -2102,8 +2193,7 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
if (STAT_ISEQUAL(new_status, STAT_ABANDONED))
{
/* Log the actual validation that made us barf. */
unsigned char *p = (unsigned char *)(header+1);
if (extract_name(header, n, &p, daemon->namebuff, 1, 4))
if (extract_request(header, n, daemon->namebuff, NULL))
my_syslog(LOG_WARNING, _("validation of %s failed: resource limit exceeded."),
daemon->namebuff[0] ? daemon->namebuff : ".");
break;
@@ -2123,35 +2213,23 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
break;
}
if (STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY) || STAT_ISEQUAL(status, STAT_NEED_DS_QUERY))
{
/* recycling UDP query, copy into new buffer and get the name we're looking for. */
unsigned char *p = (unsigned char *)(header+1);
if (extract_name(header, n, &p, keyname, 1, 4))
{
memcpy(new_header, header, n);
m = n;
}
else
{
new_status = STAT_ABANDONED;
break;
}
}
else
m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class,
STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz);
m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class,
STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz);
if ((start = dnssec_server(server, keyname, &first, &last)) == -1 ||
(m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0)
if ((start = dnssec_server(server, keyname, &first, &last)) == -1)
{
new_status = STAT_ABANDONED;
break;
}
if ((m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0)
{
new_status = STAT_ABANDONED;
break;
}
log_save = daemon->log_display_id;
daemon->log_display_id = ++daemon->log_id;
daemon->log_display_id = -(++daemon->log_id);
log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr,
STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
@@ -2161,16 +2239,14 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
daemon->log_display_id = log_save;
/* If we got STAT_OK from a DS or KEY validation on recursing, loop round and try the failed validation again.
Exception is if we're the first round is for DS or KEY and we're in the first invokation of this function.
In that new_status will be STAT_OK if the validation worked, but we're all done anyway. */
if (!STAT_ISEQUAL(new_status, STAT_OK) || STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY) || STAT_ISEQUAL(status, STAT_NEED_DS_QUERY))
break;
/* If we got STAT_OK from a DS or KEY validation on recursing, loop round and try the failed validation again. */
if (!STAT_ISEQUAL(new_status, STAT_OK))
break;
}
if (packet)
free(packet);
return new_status;
}
#endif
@@ -2289,8 +2365,9 @@ unsigned char *tcp_request(int confd, time_t now,
query_count++;
/* log_query gets called indirectly all over the place, so
pass these in global variables - sorry. */
daemon->log_display_id = ++daemon->log_id;
pass these in global variables - sorry.
log_display_id is negative for TCP connections. */
daemon->log_display_id = -(++daemon->log_id);
daemon->log_source_addr = &peer_addr;
/* save state of "cd" flag in query */

View File

@@ -3411,8 +3411,16 @@ static int one_opt(int option, char *arg, char *errstr, char *gen_err, int comma
case 'q': /* --log-queries */
set_option_bool(OPT_LOG);
if (arg && strcmp(arg, "extra") == 0)
set_option_bool(OPT_EXTRALOG);
if (arg)
{
if (strcmp(arg, "extra") == 0)
set_option_bool(OPT_EXTRALOG);
else if (strcmp(arg, "proto") == 0)
{
set_option_bool(OPT_EXTRALOG);
set_option_bool(OPT_LOG_PROTO);
}
}
break;
case LOPT_MAX_LOGS: /* --log-async */