From d15d3710516af7c86ea05625e9be00885da8aac3 Mon Sep 17 00:00:00 2001 From: Simon Kelley Date: Sat, 12 Oct 2024 22:25:06 +0100 Subject: [PATCH] Handle truncated response UDP-to-TCP to downstream queries when validating. A relatively common situation is that the reply to a downstream query will fit in a UDP packet when no DNSSEC RRs are present, but overflows when the RRSIGS, NSEC ect are added. This extends the automatic move from UDP to TCP to downstream queries which get truncated replies, in the hope that once stripped of the DNSSEC RRs, the reply can be returned via UDP, nwithout making the downstream retry with TCP. If the downstream sets the DO bit, (ie it wants the DNSSEC RRs, then this path is not taken, since the downstream will have to get a truncated repsonse and retry to get a correct answer. --- man/dnsmasq.8 | 2 +- src/cache.c | 28 +++++-- src/dnsmasq.c | 8 +- src/dnsmasq.h | 16 ++-- src/forward.c | 215 ++++++++++++++++++++++++++++++++++---------------- src/option.c | 12 ++- 6 files changed, 193 insertions(+), 88 deletions(-) diff --git a/man/dnsmasq.8 b/man/dnsmasq.8 index 32bdeff..bc3257b 100644 --- a/man/dnsmasq.8 +++ b/man/dnsmasq.8 @@ -133,7 +133,7 @@ only, to stop dnsmasq daemonising in production, use Log the results of DNS queries handled by dnsmasq. Enable a full cache dump on receipt of SIGUSR1. If the argument "extra" is supplied, ie .B --log-queries=extra then the log has extra information at the start of each line. -This consists of a serial number which ties together the log lines associated with an individual query, and the IP address of the requestor. +This consists of a serial number which ties together the log lines associated with an individual query, and the IP address of the requestor. If the argument "proto" is supplied, this shows everything that "extra" does and also the network protocol used to communicate the queries. .TP .B \-8, --log-facility= Set the facility to which dnsmasq will send syslog entries, this diff --git a/src/cache.c b/src/cache.c index 43dd094..fed7409 100644 --- a/src/cache.c +++ b/src/cache.c @@ -898,10 +898,16 @@ int cache_recv_insert(time_t now, int fd) { int status, uid, keycount, validatecount; int *keycountp, *validatecountp; + size_t ret_len; + struct frec *forward; if (!read_write(fd, (unsigned char *)&status, sizeof(status), 1)) return 0; + if (!read_write(fd, (unsigned char *)&ret_len, sizeof(ret_len), 1)) + return 0; + if (!read_write(fd, (unsigned char *)daemon->packet, ret_len, 1)) + return 0; if (!read_write(fd, (unsigned char *)&forward, sizeof(forward), 1)) return 0; if (!read_write(fd, (unsigned char *)&uid, sizeof(uid), 1)) @@ -923,10 +929,13 @@ int cache_recv_insert(time_t now, int fd) /* repatriate the work counters from the child process. */ *keycountp = keycount; *validatecountp = validatecount; - - pop_and_retry_query(forward, status, now); + + if (!forward->dependent) + return_reply(now, forward, (struct dns_header *)daemon->packet, ret_len, status); + else + pop_and_retry_query(forward, status, now); } - + return 1; } #endif @@ -2281,12 +2290,21 @@ void log_query(unsigned int flags, char *name, union all_addr *addr, char *arg, if (option_bool(OPT_EXTRALOG)) { + int display_id = daemon->log_display_id; + char *proto = ""; + + if (option_bool(OPT_LOG_PROTO)) + proto = (display_id < 0) ? "TCP " : "UDP "; + + if (display_id < 0) + display_id = -display_id; + if (flags & F_NOEXTRA) - my_syslog(LOG_INFO, "%u %s %s%s%s %s%s", daemon->log_display_id, source, name, gap, verb, dest, extra); + my_syslog(LOG_INFO, "%s%u %s %s%s%s %s%s", proto, display_id, source, name, gap, verb, dest, extra); else { int port = prettyprint_addr(daemon->log_source_addr, daemon->addrbuff2); - my_syslog(LOG_INFO, "%u %s/%u %s %s%s%s %s%s", daemon->log_display_id, daemon->addrbuff2, port, source, name, gap, verb, dest, extra); + my_syslog(LOG_INFO, "%s%u %s/%u %s %s%s%s %s%s", proto, display_id, daemon->addrbuff2, port, source, name, gap, verb, dest, extra); } } else diff --git a/src/dnsmasq.c b/src/dnsmasq.c index e3d04ed..0f2955b 100644 --- a/src/dnsmasq.c +++ b/src/dnsmasq.c @@ -2074,7 +2074,7 @@ static void check_dns_listeners(time_t now) cache_recv_insert() calls pop_and_retry_query() after the result arrives via the pipe to the parent. */ int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header *header, - size_t plen, int class, struct server *server, int *keycount, int *validatecount) + ssize_t *plen, int class, struct server *server, int *keycount, int *validatecount) { struct server *s; @@ -2148,8 +2148,8 @@ int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header } } - status = tcp_key_recurse(now, status, header, plen, class, daemon->namebuff, daemon->keyname, - server, 0, 0, keycount, validatecount); + status = tcp_from_udp(now, status, header, plen, class, daemon->namebuff, daemon->keyname, + server, keycount, validatecount); /* close upstream connections. */ for (s = daemon->servers; s; s = s->next) @@ -2167,6 +2167,8 @@ int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header /* tell our parent we're done, and what the result was then exit. */ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0); read_write(daemon->pipe_to_parent, (unsigned char *)&status, sizeof(status), 0); + read_write(daemon->pipe_to_parent, (unsigned char *)plen, sizeof(*plen), 0); + read_write(daemon->pipe_to_parent, (unsigned char *)header, *plen, 0); read_write(daemon->pipe_to_parent, (unsigned char *)&forward, sizeof(forward), 0); read_write(daemon->pipe_to_parent, (unsigned char *)&forward->uid, sizeof(forward->uid), 0); read_write(daemon->pipe_to_parent, (unsigned char *)keycount, sizeof(*keycount), 0); diff --git a/src/dnsmasq.h b/src/dnsmasq.h index a8bb206..552c389 100644 --- a/src/dnsmasq.h +++ b/src/dnsmasq.h @@ -282,7 +282,8 @@ struct event_desc { #define OPT_NO_IDENT 70 #define OPT_CACHE_RR 71 #define OPT_LOCALHOST_SERVICE 72 -#define OPT_LAST 73 +#define OPT_LOG_PROTO 73 +#define OPT_LAST 74 #define OPTION_BITS (sizeof(unsigned int)*8) #define OPTION_SIZE ( (OPT_LAST/OPTION_BITS)+((OPT_LAST%OPTION_BITS)!=0) ) @@ -747,9 +748,7 @@ struct dyndir { #define STAT_SECURE_WILDCARD 0x70000 #define STAT_OK 0x80000 #define STAT_ABANDONED 0x90000 -#define STAT_NEED_DS_QUERY 0xa0000 -#define STAT_NEED_KEY_QUERY 0xb0000 -#define STAT_ASYNC 0xc0000 +#define STAT_ASYNC 0xa0000 #define DNSSEC_FAIL_NYV 0x0001 /* key not yet valid */ #define DNSSEC_FAIL_EXP 0x0002 /* key expired */ @@ -1527,11 +1526,12 @@ int option_read_dynfile(char *file, int flags); /* forward.c */ void reply_query(int fd, time_t now); void receive_query(struct listener *listen, time_t now); +void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status); #ifdef HAVE_DNSSEC void pop_and_retry_query(struct frec *forward, int status, time_t now); -int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, - int class, char *name, char *keyname, struct server *server, - int have_mark, unsigned int mark, int *keycount, int *validatecount); +int tcp_from_udp(time_t now, int status, struct dns_header *header, ssize_t *n, + int class, char *name, char *keyname, struct server *server, + int *keycount, int *validatecount); #endif unsigned char *tcp_request(int confd, time_t now, union mysockaddr *local_addr, struct in_addr netmask, int auth_dns); @@ -1654,7 +1654,7 @@ void send_event(int fd, int event, int data, char *msg); void clear_cache_and_reload(time_t now); #ifdef HAVE_DNSSEC int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header *header, - size_t plen, int class, struct server *server, int *keycount, int *validatecount); + ssize_t *plen, int class, struct server *server, int *keycount, int *validatecount); #endif /* netlink.c */ diff --git a/src/forward.c b/src/forward.c index 2e7b902..50a177f 100644 --- a/src/forward.c +++ b/src/forward.c @@ -25,8 +25,10 @@ static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struc static unsigned short get_id(void); static void free_frec(struct frec *f); static void query_full(time_t now, char *domain); +static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, + int class, char *name, char *keyname, struct server *server, + int have_mark, unsigned int mark, int *keycount, int *validatecount); -static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status); /* Send a UDP packet with its source address set as "source" unless nowild is true, when we just send it with the kernel default */ @@ -310,8 +312,8 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr, goto reply; /* table full - flags == 0, return REFUSED */ - /* Keep copy of query if we're doing fast retry. */ - if (daemon->fast_retry_time != 0) + /* Keep copy of query if we're doing fast retry or DNSSEC. */ + if (daemon->fast_retry_time != 0 || option_bool(OPT_DNSSEC_VALID)) { forward->stash = blockdata_alloc((char *)header, plen); forward->stash_len = plen; @@ -910,36 +912,59 @@ static void dnssec_validate(struct frec *forward, struct dns_header *header, /* If all replies to a query are REFUSED, give up. */ if (RCODE(header) == REFUSED) status = STAT_ABANDONED; - else if ((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) && (header->hb3 & HB3_TC)) + else if (header->hb3 & HB3_TC) { /* Truncated answer can't be validated. If this is an answer to a DNSSEC-generated query, we - switch to TCP mode. For downstream queries get the client + switch to TCP mode. For downstream queries, if the client didn't ask for + DNSSEC RRs, do the query over TCP, and hope that it fits once the DNSSEC RRs + have been stripped, otherwise get the client to retry over TCP, so return an answer with the TC bit set. */ - unsigned char *p = (unsigned char *)(header+1); - - /* Get the query we sent by UDP */ - blockdata_retrieve(forward->stash, forward->stash_len, (void *)header); - - if (extract_name(header, forward->stash_len, &p, daemon->namebuff, 1, 4)) - log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", (forward->flags & FREC_DNSKEY_QUERY) ? T_DNSKEY : T_DS); - - /* Don't count failed UDP attempt AND TCP */ - orig->work_counter++; - - /* NOTE: Can't move connection marks from UDP to TCP */ - status = swap_to_tcp(forward, now, (forward->flags & FREC_DNSKEY_QUERY) ? STAT_NEED_KEY_QUERY : STAT_NEED_DS_QUERY, - header, forward->stash_len, forward->class, forward->sentto, &orig->work_counter, &orig->validate_counter); - - /* We forked a new process. pop_and_retry_query() will be called when is completes. */ - if (STAT_ISEQUAL(status, STAT_ASYNC)) + if ((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) || !(forward->flags & FREC_DO_QUESTION)) { - forward->flags |= FREC_GONE_TO_TCP; - return; + status = (forward->flags & FREC_DNSKEY_QUERY) ? STAT_NEED_KEY: + ((forward->flags & FREC_DS_QUERY) ? STAT_NEED_DS : STAT_OK); + + /* Get the query we sent by UDP */ + blockdata_retrieve(forward->stash, forward->stash_len, (void *)header); + + if (extract_request(header, forward->stash_len, daemon->namebuff, NULL)) + log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", 0); + + /* Don't count failed UDP attempt AND TCP */ + if (status != STAT_OK) + orig->work_counter++; + else + { + /* repeat changes made as packet forwarded over UDP */ + int cacheable; + + header->id = htons(forward->new_id); + + plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &cacheable); + plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ); + + /* For debugging, set Checking Disabled, otherwise, have the upstream check too, + this allows it to select auth servers when one is returning bad data. */ + if (option_bool(OPT_DNSSEC_DEBUG)) + header->hb4 |= HB4_CD; + + } + + /* NOTE: Can't move connection marks from UDP to TCP */ + plen = forward->stash_len; + status = swap_to_tcp(forward, now, status, header, &plen, forward->class, forward->sentto, &orig->work_counter, &orig->validate_counter); + + /* We forked a new process. pop_and_retry_query() will be called when is completes. */ + if (STAT_ISEQUAL(status, STAT_ASYNC)) + { + forward->flags |= FREC_GONE_TO_TCP; + return; + } } + else + status = STAT_TRUNCATED; } - else if (header->hb3 & HB3_TC) - status = STAT_TRUNCATED; else { /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise @@ -1090,8 +1115,7 @@ static void dnssec_validate(struct frec *forward, struct dns_header *header, if (log_resource) { /* Log the actual validation that made us barf. */ - unsigned char *p = (unsigned char *)(header+1); - if (extract_name(header, plen, &p, daemon->namebuff, 1, 4)) + if (extract_request(header, plen, daemon->namebuff, NULL)) my_syslog(LOG_WARNING, _("validation of %s failed: resource limit exceeded."), daemon->namebuff[0] ? daemon->namebuff : "."); } @@ -1226,7 +1250,7 @@ void reply_query(int fd, time_t now) #endif { /* in fast retry mode, we have a copy of the query. */ - if (daemon->fast_retry_time != 0 && forward->stash) + if ((daemon->fast_retry_time != 0 || option_bool(OPT_DNSSEC_VALID)) && forward->stash) { blockdata_retrieve(forward->stash, forward->stash_len, (void *)header); nn = forward->stash_len; @@ -1322,7 +1346,7 @@ void reply_query(int fd, time_t now) return_reply(now, forward, header, n, STAT_OK); } -static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status) +void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status) { int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0; size_t nn; @@ -1962,7 +1986,7 @@ static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet, { int data_sent = 0, timedout = 0; struct server *serv; - + if (firstsendto == -1) firstsendto = start; else @@ -2062,6 +2086,77 @@ static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet, } #ifdef HAVE_DNSSEC +/* An answer to an downstream query or DNSSEC subquery has + returned truncated. (Which type held in status). + Resend the query (in header) via TCP */ +int tcp_from_udp(time_t now, int status, struct dns_header *header, ssize_t *plenp, + int class, char *name, char *keyname, struct server *server, + int *keycount, int *validatecount) +{ + unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16)); + struct dns_header *new_header = (struct dns_header *)&packet[2]; + int start, first, last, new_status; + ssize_t n = *plenp; + int have_req = extract_request(header, n, keyname, NULL); + int log_save = daemon->log_display_id; + + *plenp = 0; + + if (!packet) + return STAT_ABANDONED; + + memcpy(new_header, header, n); + + /* Set TCP flag in logs. */ + daemon->log_display_id = -daemon->log_display_id; + + /* send orginal query to same server that generated truncated reply on UDP. */ + first = start = server->arrayposn; + last = first + 1; + + if (!STAT_ISEQUAL(status, STAT_OK) && (!have_req || (start = dnssec_server(server, keyname, &first, &last)) == -1)) + new_status = STAT_ABANDONED; + else if ((n = tcp_talk(first, last, start, packet, n, 0, 0, &server)) == 0) + new_status = STAT_ABANDONED; + else + { + if (have_req) + { + if (STAT_ISEQUAL(status, STAT_OK)) + log_query_mysockaddr(F_SERVER | F_FORWARD, keyname, &server->addr, NULL, 0); + else + log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr, + STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0); + } + + new_status = tcp_key_recurse(now, status, new_header, n, class, name, keyname, server, 0, 0, keycount, validatecount); + + if (STAT_ISEQUAL(status, STAT_OK)) + { + /* downstream query: strip DNSSSEC RRs and see if it will + fit in a UDP reply. */ + rrfilter(new_header, (size_t *)&n, RRFILTER_DNSSEC); + + if (n >= daemon->edns_pktsz) + { + new_header->ancount = htons(0); + new_header->nscount = htons(0); + new_header->arcount = htons(0); + n = resize_packet(header, n, NULL, 0); + new_status = STAT_TRUNCATED; + } + + /* return the stripped or truncated reply. */ + memcpy(header, new_header, n); + *plenp = n; + } + } + + daemon->log_display_id = log_save; + free(packet); + return new_status; +} + /* Recurse down the key hierarchy */ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, int class, char *name, char *keyname, struct server *server, @@ -2070,7 +2165,7 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, int first, last, start, new_status; unsigned char *packet = NULL; struct dns_header *new_header = NULL; - + while (1) { size_t m; @@ -2081,10 +2176,6 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class, validatecount); else if (STAT_ISEQUAL(status, STAT_NEED_DS)) new_status = dnssec_validate_ds(now, header, n, name, keyname, class, validatecount); - else if (STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY)) - new_status = STAT_NEED_KEY; - else if (STAT_ISEQUAL(status, STAT_NEED_DS_QUERY)) - new_status = STAT_NEED_DS; else new_status = dnssec_validate_reply(now, header, n, name, keyname, &class, !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC), @@ -2102,8 +2193,7 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, if (STAT_ISEQUAL(new_status, STAT_ABANDONED)) { /* Log the actual validation that made us barf. */ - unsigned char *p = (unsigned char *)(header+1); - if (extract_name(header, n, &p, daemon->namebuff, 1, 4)) + if (extract_request(header, n, daemon->namebuff, NULL)) my_syslog(LOG_WARNING, _("validation of %s failed: resource limit exceeded."), daemon->namebuff[0] ? daemon->namebuff : "."); break; @@ -2123,35 +2213,23 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, break; } - if (STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY) || STAT_ISEQUAL(status, STAT_NEED_DS_QUERY)) - { - /* recycling UDP query, copy into new buffer and get the name we're looking for. */ - unsigned char *p = (unsigned char *)(header+1); - - if (extract_name(header, n, &p, keyname, 1, 4)) - { - memcpy(new_header, header, n); - m = n; - } - else - { - new_status = STAT_ABANDONED; - break; - } - } - else - m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, - STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz); + m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, + STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz); - if ((start = dnssec_server(server, keyname, &first, &last)) == -1 || - (m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0) + if ((start = dnssec_server(server, keyname, &first, &last)) == -1) + { + new_status = STAT_ABANDONED; + break; + } + + if ((m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0) { new_status = STAT_ABANDONED; break; } log_save = daemon->log_display_id; - daemon->log_display_id = ++daemon->log_id; + daemon->log_display_id = -(++daemon->log_id); log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr, STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0); @@ -2161,16 +2239,14 @@ int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, daemon->log_display_id = log_save; - /* If we got STAT_OK from a DS or KEY validation on recursing, loop round and try the failed validation again. - Exception is if we're the first round is for DS or KEY and we're in the first invokation of this function. - In that new_status will be STAT_OK if the validation worked, but we're all done anyway. */ - if (!STAT_ISEQUAL(new_status, STAT_OK) || STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY) || STAT_ISEQUAL(status, STAT_NEED_DS_QUERY)) - break; + /* If we got STAT_OK from a DS or KEY validation on recursing, loop round and try the failed validation again. */ + if (!STAT_ISEQUAL(new_status, STAT_OK)) + break; } - + if (packet) free(packet); - + return new_status; } #endif @@ -2289,8 +2365,9 @@ unsigned char *tcp_request(int confd, time_t now, query_count++; /* log_query gets called indirectly all over the place, so - pass these in global variables - sorry. */ - daemon->log_display_id = ++daemon->log_id; + pass these in global variables - sorry. + log_display_id is negative for TCP connections. */ + daemon->log_display_id = -(++daemon->log_id); daemon->log_source_addr = &peer_addr; /* save state of "cd" flag in query */ diff --git a/src/option.c b/src/option.c index ed0d9e1..be94b8e 100644 --- a/src/option.c +++ b/src/option.c @@ -3411,8 +3411,16 @@ static int one_opt(int option, char *arg, char *errstr, char *gen_err, int comma case 'q': /* --log-queries */ set_option_bool(OPT_LOG); - if (arg && strcmp(arg, "extra") == 0) - set_option_bool(OPT_EXTRALOG); + if (arg) + { + if (strcmp(arg, "extra") == 0) + set_option_bool(OPT_EXTRALOG); + else if (strcmp(arg, "proto") == 0) + { + set_option_bool(OPT_EXTRALOG); + set_option_bool(OPT_LOG_PROTO); + } + } break; case LOPT_MAX_LOGS: /* --log-async */