diff --git a/src/cache.c b/src/cache.c index 0eacec9..3119b86 100644 --- a/src/cache.c +++ b/src/cache.c @@ -891,6 +891,31 @@ int cache_recv_insert(time_t now, int fd) return 1; } +#ifdef HAVE_DNSSEC + /* UDP validation moved to TCP to avoid truncation. + Restart UDP validation process with the returned result. */ + if (m == -2) + { + int status, uid; + struct frec *forward; + + if (!read_write(fd, (unsigned char *)&status, sizeof(status), 1)) + return 0; + if (!read_write(fd, (unsigned char *)&forward, sizeof(forward), 1)) + return 0; + if (!read_write(fd, (unsigned char *)&uid, sizeof(uid), 1)) + return 0; + + /* There's a tiny chance that the frec may have been freed + and reused before the TCP process returns. Detect that with + the uid field which is unique modulo 2^32 for each use. */ + if (uid == forward->uid) + pop_and_retry_query(forward, status, now); + + return 1; + } +#endif + if (!read_write(fd, (unsigned char *)daemon->namebuff, m, 1) || !read_write(fd, (unsigned char *)&ttd, sizeof(ttd), 1) || !read_write(fd, (unsigned char *)&flags, sizeof(flags), 1) || diff --git a/src/dnsmasq.c b/src/dnsmasq.c index a9f26ae..e4a45f8 100644 --- a/src/dnsmasq.c +++ b/src/dnsmasq.c @@ -1992,6 +1992,11 @@ static void check_dns_listeners(time_t now) /* The child can use up to TCP_MAX_QUERIES ids, so skip that many. */ daemon->log_id += TCP_MAX_QUERIES; +#ifdef HAVE_DNSSEC + /* It can do more if making DNSSEC queries too. */ + if (option_bool(OPT_DNSSEC_VALID)) + daemon->log_id += daemon->limit[LIMIT_WORK]; +#endif } else { @@ -2028,10 +2033,6 @@ static void check_dns_listeners(time_t now) daemon->pipe_to_parent = pipefd[1]; } - /* start with no upstream connections. */ - for (s = daemon->servers; s; s = s->next) - s->tcpfd = -1; - /* The connected socket inherits non-blocking attribute from the listening socket. Reset that here. */ @@ -2048,6 +2049,7 @@ static void check_dns_listeners(time_t now) { shutdown(s->tcpfd, SHUT_RDWR); close(s->tcpfd); + s->tcpfd = -1; } if (!option_bool(OPT_DEBUG)) @@ -2061,6 +2063,124 @@ static void check_dns_listeners(time_t now) } } +#ifdef HAVE_DNSSEC +/* If a DNSSEC query over UDP returns a truncated answer, + we swap to the TCP path. This routine is responsible for forking + the required process, the child then calls tcp_key_recurse() and + returns the result of the validation through the pipe to the parent + (which has also primed the cache with the relevant DS and DNSKEY records). + If we're in debug mode, don't fork and return the result directly, otherwise + return STAT_ASYNC. The UDP validation process will restart when + cache_recv_insert() calls pop_and_retry_query() after the result + arrives via the pipe to the parent. */ +int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header *header, + size_t plen, int class, struct server *server, int *keycount, int *validatecount) +{ + struct server *s; + + if (!option_bool(OPT_DEBUG)) + { + pid_t p; + int i, pipefd[2]; +#ifdef HAVE_LINUX_NETWORK + unsigned char a = 0; +#endif + + /* check to see if we have a free tcp process slot. */ + for (i = daemon->max_procs - 1; i >= 0; i--) + if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) + break; + + /* No slots */ + if (i < 0) + return STAT_ABANDONED; + + if (pipe(pipefd) == 0 && (p = fork()) != 0) + { + close(pipefd[1]); /* parent needs read pipe end. */ + if (p == -1) + { + /* fork() failed */ + close(pipefd[0]); + return STAT_ABANDONED; + } + +#ifdef HAVE_LINUX_NETWORK + /* The child process inherits the netlink socket, + which it never uses, but when the parent (us) + uses it in the future, the answer may go to the + child, resulting in the parent blocking + forever awaiting the result. To avoid this + the child closes the netlink socket, but there's + a nasty race, since the parent may use netlink + before the child has done the close. + + To avoid this, the parent blocks here until a + single byte comes back up the pipe, which + is sent by the child after it has closed the + netlink socket. */ + read_write(pipefd[0], &a, 1, 1); +#endif + + /* i holds index of free slot */ + daemon->tcp_pids[i] = p; + daemon->tcp_pipes[i] = pipefd[0]; + daemon->metrics[METRIC_TCP_CONNECTIONS]++; + if (daemon->metrics[METRIC_TCP_CONNECTIONS] > daemon->max_procs_used) + daemon->max_procs_used = daemon->metrics[METRIC_TCP_CONNECTIONS]; + + /* child can use a maximum of this many log serials. */ + daemon->log_id += daemon->limit[LIMIT_WORK]; + + /* tell the caller we've forked. */ + return STAT_ASYNC; + } + else + { + /* child starts here. */ +#ifdef HAVE_LINUX_NETWORK + /* See comment above re: netlink socket. */ + close(daemon->netlinkfd); + read_write(pipefd[1], &a, 1, 0); +#endif + close(pipefd[0]); /* close read end in child. */ + daemon->pipe_to_parent = pipefd[1]; + } + } + + status = tcp_key_recurse(now, status, header, plen, class, daemon->namebuff, daemon->keyname, + server, 0, 0, keycount, validatecount); + + /* close upstream connections. */ + for (s = daemon->servers; s; s = s->next) + if (s->tcpfd != -1) + { + shutdown(s->tcpfd, SHUT_RDWR); + close(s->tcpfd); + s->tcpfd = -1; + } + + if (!option_bool(OPT_DEBUG)) + { + ssize_t m = -2; + + /* tell our parent we're done, and what the result was then exit. */ + read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0); + read_write(daemon->pipe_to_parent, (unsigned char *)&status, sizeof(status), 0); + read_write(daemon->pipe_to_parent, (unsigned char *)&forward, sizeof(forward), 0); + read_write(daemon->pipe_to_parent, (unsigned char *)&forward->uid, sizeof(forward->uid), 0); + close(daemon->pipe_to_parent); + + flush_log(); + _exit(0); + } + + /* path for debug mode. */ + return status; +} +#endif + + #ifdef HAVE_DHCP int make_icmp_sock(void) { diff --git a/src/dnsmasq.h b/src/dnsmasq.h index e455c3f..a8bb206 100644 --- a/src/dnsmasq.h +++ b/src/dnsmasq.h @@ -747,6 +747,9 @@ struct dyndir { #define STAT_SECURE_WILDCARD 0x70000 #define STAT_OK 0x80000 #define STAT_ABANDONED 0x90000 +#define STAT_NEED_DS_QUERY 0xa0000 +#define STAT_NEED_KEY_QUERY 0xb0000 +#define STAT_ASYNC 0xc0000 #define DNSSEC_FAIL_NYV 0x0001 /* key not yet valid */ #define DNSSEC_FAIL_EXP 0x0002 /* key expired */ @@ -774,6 +777,7 @@ struct dyndir { #define FREC_TEST_PKTSZ 256 #define FREC_HAS_EXTRADATA 512 #define FREC_HAS_PHEADER 1024 +#define FREC_GONE_TO_TCP 2048 #define HASH_SIZE 32 /* SHA-256 digest size */ @@ -797,7 +801,7 @@ struct frec { struct blockdata *stash; /* Saved reply, whilst we validate */ size_t stash_len; #ifdef HAVE_DNSSEC - int class, work_counter, validate_counter; + int uid, class, work_counter, validate_counter; struct frec *dependent; /* Query awaiting internally-generated DNSKEY or DS query */ struct frec *next_dependent; /* list of above. */ struct frec *blocking_query; /* Query which is blocking us. */ @@ -1523,6 +1527,12 @@ int option_read_dynfile(char *file, int flags); /* forward.c */ void reply_query(int fd, time_t now); void receive_query(struct listener *listen, time_t now); +#ifdef HAVE_DNSSEC +void pop_and_retry_query(struct frec *forward, int status, time_t now); +int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, + int class, char *name, char *keyname, struct server *server, + int have_mark, unsigned int mark, int *keycount, int *validatecount); +#endif unsigned char *tcp_request(int confd, time_t now, union mysockaddr *local_addr, struct in_addr netmask, int auth_dns); void server_gone(struct server *server); @@ -1642,6 +1652,10 @@ void queue_event(int event); void send_alarm(time_t event, time_t now); void send_event(int fd, int event, int data, char *msg); void clear_cache_and_reload(time_t now); +#ifdef HAVE_DNSSEC +int swap_to_tcp(struct frec *forward, time_t now, int status, struct dns_header *header, + size_t plen, int class, struct server *server, int *keycount, int *validatecount); +#endif /* netlink.c */ #ifdef HAVE_LINUX_NETWORK diff --git a/src/domain-match.c b/src/domain-match.c index cf2da77..2527b52 100644 --- a/src/domain-match.c +++ b/src/domain-match.c @@ -744,6 +744,7 @@ int add_update_server(int flags, serv->flags = flags; serv->domain = alloc_domain; serv->domain_len = strlen(alloc_domain); + serv->tcpfd = -1; return 1; } diff --git a/src/forward.c b/src/forward.c index d235ce3..737da1b 100644 --- a/src/forward.c +++ b/src/forward.c @@ -22,7 +22,6 @@ static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigne #ifdef HAVE_DNSSEC static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header); #endif - static unsigned short get_id(void); static void free_frec(struct frec *f); static void query_full(time_t now, char *domain); @@ -897,47 +896,64 @@ static void dnssec_validate(struct frec *forward, struct dns_header *header, daemon->log_display_id = forward->frec_src.log_id; /* We've had a reply already, which we're validating. Ignore this duplicate */ - if (forward->blocking_query) + if (forward->blocking_query || (forward->flags & FREC_GONE_TO_TCP)) return; - - /* If all replies to a query are REFUSED, give up. */ - if (RCODE(header) == REFUSED) - status = STAT_ABANDONED; - else if (header->hb3 & HB3_TC) - { - /* Truncated answer can't be validated. - If this is an answer to a DNSSEC-generated query, we still - need to get the client to retry over TCP, so return - an answer with the TC bit set, even if the actual answer fits. - */ - status = STAT_TRUNCATED; - if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) - { - unsigned char *p = (unsigned char *)(header+1); - if (extract_name(header, plen, &p, daemon->namebuff, 0, 4) == 1) - log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", (forward->flags & FREC_DNSKEY_QUERY) ? T_DNSKEY : T_DS); - } - } /* Find the original query that started it all.... */ for (orig = forward; orig->dependent; orig = orig->dependent); - + /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise would invite infinite loops, since the answers to DNSKEY and DS queries will not be cached, so they'll be repeated. */ if (!STAT_ISEQUAL(status, STAT_BOGUS) && !STAT_ISEQUAL(status, STAT_TRUNCATED) && !STAT_ISEQUAL(status, STAT_ABANDONED)) { - if (forward->flags & FREC_DNSKEY_QUERY) - status = dnssec_validate_by_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class, &orig->validate_counter); - else if (forward->flags & FREC_DS_QUERY) - status = dnssec_validate_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class, &orig->validate_counter); - else - status = dnssec_validate_reply(now, header, plen, daemon->namebuff, daemon->keyname, &forward->class, - !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC), - NULL, NULL, NULL, &orig->validate_counter); + /* If all replies to a query are REFUSED, give up. */ + if (RCODE(header) == REFUSED) + status = STAT_ABANDONED; + else if ((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) && (header->hb3 & HB3_TC)) + { + /* Truncated answer can't be validated. + If this is an answer to a DNSSEC-generated query, we + switch to TCP mode. For downstream queries get the client + to retry over TCP, so return an answer with the TC bit set. */ + unsigned char *p = (unsigned char *)(header+1); + + /* Get the query we sent by UDP */ + blockdata_retrieve(forward->stash, forward->stash_len, (void *)header); + + if (extract_name(header, plen, &p, daemon->namebuff, 0, 4) == 1) + log_query(F_UPSTREAM | F_NOEXTRA, daemon->namebuff, NULL, "truncated", (forward->flags & FREC_DNSKEY_QUERY) ? T_DNSKEY : T_DS); + + /* NOTE: Can't move connection marks from UDP to TCP */ + status = swap_to_tcp(forward, now, (forward->flags & FREC_DNSKEY_QUERY) ? STAT_NEED_KEY_QUERY : STAT_NEED_DS_QUERY, + header, forward->stash_len, forward->class, forward->sentto, &orig->work_counter, &orig->validate_counter); - if (STAT_ISEQUAL(status, STAT_ABANDONED)) - log_resource = 1; + /* We forked a new process. pop_and_retry_query() will be called when is completes. */ + if (status == STAT_ASYNC) + { + forward->flags |= FREC_GONE_TO_TCP; + return; + } + } + else if (header->hb3 & HB3_TC) + status = STAT_TRUNCATED; + else + { + /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise + would invite infinite loops, since the answers to DNSKEY and DS queries + will not be cached, so they'll be repeated. */ + if (forward->flags & FREC_DNSKEY_QUERY) + status = dnssec_validate_by_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class, &orig->validate_counter); + else if (forward->flags & FREC_DS_QUERY) + status = dnssec_validate_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class, &orig->validate_counter); + else + status = dnssec_validate_reply(now, header, plen, daemon->namebuff, daemon->keyname, &forward->class, + !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC), + NULL, NULL, NULL, &orig->validate_counter); + + if (STAT_ISEQUAL(status, STAT_ABANDONED)) + log_resource = 1; + } } /* Can't validate, as we're missing key data. Put this @@ -1083,25 +1099,29 @@ static void dnssec_validate(struct frec *forward, struct dns_header *header, header, (size_t)plen, &forward->sentto->addr, NULL, -daemon->port); #endif - /* Validated original answer, all done. */ if (!forward->dependent) + /* Validated original answer, all done. */ return_reply(now, forward, header, plen, status); else - { - /* validated subsidiary query/queries, (and cached result) - pop that and return to the previous query/queries we were working on. */ - struct frec *prev, *nxt = forward->dependent; + pop_and_retry_query(forward, status, now); +} - free_frec(forward); - - while ((prev = nxt)) - { - /* ->next_dependent will have changed after return from recursive call below. */ - nxt = prev->next_dependent; - prev->blocking_query = NULL; /* already gone */ - blockdata_retrieve(prev->stash, prev->stash_len, (void *)header); - dnssec_validate(prev, header, prev->stash_len, status, now); - } +void pop_and_retry_query(struct frec *forward, int status, time_t now) +{ + /* validated subsidiary query/queries, (and cached result) + pop that and return to the previous query/queries we were working on. */ + struct frec *prev, *nxt = forward->dependent; + struct dns_header *header = (struct dns_header *)daemon->packet; + + free_frec(forward); + + while ((prev = nxt)) + { + /* ->next_dependent will have changed after return from recursive call below. */ + nxt = prev->next_dependent; + prev->blocking_query = NULL; /* already gone */ + blockdata_retrieve(prev->stash, prev->stash_len, (void *)header); + dnssec_validate(prev, header, prev->stash_len, status, now); } } #endif @@ -2040,9 +2060,9 @@ static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet, #ifdef HAVE_DNSSEC /* Recurse down the key hierarchy */ -static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, - int class, char *name, char *keyname, struct server *server, - int have_mark, unsigned int mark, int *keycount, int *validatecount) +int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, + int class, char *name, char *keyname, struct server *server, + int have_mark, unsigned int mark, int *keycount, int *validatecount) { int first, last, start, new_status; unsigned char *packet = NULL; @@ -2058,7 +2078,11 @@ static int tcp_key_recurse(time_t now, int status, struct dns_header *header, si new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class, validatecount); else if (STAT_ISEQUAL(status, STAT_NEED_DS)) new_status = dnssec_validate_ds(now, header, n, name, keyname, class, validatecount); - else + else if (STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY)) + new_status = STAT_NEED_KEY; + else if (STAT_ISEQUAL(status, STAT_NEED_DS_QUERY)) + new_status = STAT_NEED_DS; + else new_status = dnssec_validate_reply(now, header, n, name, keyname, &class, !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC), NULL, NULL, NULL, validatecount); @@ -2096,10 +2120,27 @@ static int tcp_key_recurse(time_t now, int status, struct dns_header *header, si break; } - m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, - STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz); + if (STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY) || STAT_ISEQUAL(status, STAT_NEED_DS_QUERY)) + { + /* recycling UDP query, copy into new buffer and get the name we're looking for. */ + unsigned char *p = (unsigned char *)(header+1); + + if (extract_name(header, n, &p, keyname, 0, 4) == 1) + { + memcpy(new_header, header, n); + m = n; + } + else + { + new_status = STAT_ABANDONED; + break; + } + } + else + m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, + STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz); - if ((start = dnssec_server(server, daemon->keyname, &first, &last)) == -1 || + if ((start = dnssec_server(server, keyname, &first, &last)) == -1 || (m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0) { new_status = STAT_ABANDONED; @@ -2117,8 +2158,11 @@ static int tcp_key_recurse(time_t now, int status, struct dns_header *header, si daemon->log_display_id = log_save; - if (!STAT_ISEQUAL(new_status, STAT_OK)) - break; + /* If we got STAT_OK from a DS or KEY validation on recursing, loop round and try the failed validation again. + Exception is if we're the first round is for DS or KEY and we're in the first invokation of this function. + In that new_status will be STAT_OK if the validation worked, but we're all done anyway. */ + if (!STAT_ISEQUAL(new_status, STAT_OK) || STAT_ISEQUAL(status, STAT_NEED_KEY_QUERY) || STAT_ISEQUAL(status, STAT_NEED_DS_QUERY)) + break; } if (packet) @@ -2223,7 +2267,7 @@ unsigned char *tcp_request(int confd, time_t now, if (!do_stale) { - if (query_count == TCP_MAX_QUERIES) + if (query_count >= TCP_MAX_QUERIES) break; if (!read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) || @@ -2453,6 +2497,9 @@ unsigned char *tcp_request(int confd, time_t now, if ((daemon->limit[LIMIT_WORK] - keycount) > (int)daemon->metrics[METRIC_WORK_HWM]) daemon->metrics[METRIC_WORK_HWM] = daemon->limit[LIMIT_WORK] - keycount; + + /* include DNSSEC queries in the limit for a connection. */ + query_count += daemon->limit[LIMIT_WORK] - keycount; } #endif @@ -2883,6 +2930,9 @@ static struct frec *get_new_frec(time_t now, struct server *master, int force) { struct frec *f, *oldest, *target; int count; +#ifdef HAVE_DNSSEC + static int next_uid = 0; +#endif /* look for free records, garbage collect old records and count number in use by our server-group. */ for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next) @@ -2937,6 +2987,9 @@ static struct frec *get_new_frec(time_t now, struct server *master, int force) { target->time = now; target->forward_delay = daemon->fast_retry_time; +#ifdef HAVE_DNSSEC + target->uid = next_uid++; +#endif } return target;