Remove two-decade old hack.

answer_request() builds answers in the same packet buffer
as the request.  This means that any EDNS0 header from the
original request is overwritten. If the answer is in cache, that's
fine: dnsmasq adds its own EDNS0 header, but if the cache lookup fails
partially and the request needs to be sent upstream, it's a problem.

This was fixed a long, long time ago by running the cache
lookup twice if the request included an EDNS0 header. The first time,
nothing would be written to the answer packet, nad if the cache
lookup failed, the untouched question packet was still available
to forward upstream. If cache lookup succeeded, the whole thing
was done again, this time writing the data into the reply packet.
In a world where EDNS0 was rare and so was memory, this was a
reasonable solution. Today EDNS0 is ubiquitous so basically
every query is being looked up twice in the cache. There's also
the problem that any code change which makes successive cache lookups
for a query possibly return different answers adds a subtle hidden
bug, because this hack depends on absence of that behaviour.

This commit removes the lookup-twice hack entirely. answer_request()
can now return zero and overwrite the question packet. The code which
was previously added to support stale caching by saving a copy of the
query in the block-storage system is extended to always be active.
This handles the case where answer_request() returns no answer OR
a stale answer and a copy of the original query is needed to forward
upstream.
This commit is contained in:
Simon Kelley
2023-09-11 22:11:50 +01:00
parent 3b5ddf37d9
commit 768b45a023
2 changed files with 172 additions and 246 deletions

View File

@@ -1814,11 +1814,7 @@ void receive_query(struct listener *listen, time_t now)
int stale, filtered;
int ad_reqd = do_bit;
int fd = listen->fd;
struct blockdata *saved_question = NULL;
/* In case answer is stale */
if (daemon->cache_max_expiry != 0)
saved_question = blockdata_alloc((char *) header, (size_t)n);
struct blockdata *saved_question = blockdata_alloc((char *) header, (size_t)n);
/* RFC 6840 5.7 */
if (header->hb4 & HB4_AD)
@@ -1861,11 +1857,10 @@ void receive_query(struct listener *listen, time_t now)
daemon->metrics[METRIC_DNS_STALE_ANSWERED]++;
}
if (stale && saved_question)
if (stale)
{
/* We answered with stale cache data, so forward the query anyway to
refresh that. Restore saved query. */
blockdata_retrieve(saved_question, (size_t)n, header);
refresh that. */
m = 0;
/* We've already answered the client, so don't send it the answer
@@ -1873,15 +1868,20 @@ void receive_query(struct listener *listen, time_t now)
fd = -1;
}
blockdata_free(saved_question);
if (m == 0)
if (saved_question)
{
if (forward_query(fd, &source_addr, &dst_addr, if_index,
header, (size_t)n, ((char *) header) + udp_size, now, NULL, ad_reqd, do_bit, 0))
daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
else
daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
if (m == 0)
{
blockdata_retrieve(saved_question, (size_t)n, header);
if (forward_query(fd, &source_addr, &dst_addr, if_index,
header, (size_t)n, ((char *) header) + udp_size, now, NULL, ad_reqd, do_bit, 0))
daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
else
daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
}
blockdata_free(saved_question);
}
}
}
@@ -2178,25 +2178,15 @@ unsigned char *tcp_request(int confd, time_t now,
{
int ede = EDE_UNSET;
if (do_stale)
{
/* We answered the last query with stale data. Now try and get fresh data.
Restore saved query */
if (!saved_question)
break;
blockdata_retrieve(saved_question, (size_t)saved_size, header);
size = saved_size;
}
else
if (!do_stale)
{
if (query_count == TCP_MAX_QUERIES)
return packet;
break;
if (!read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
!(size = c1 << 8 | c2) ||
!read_write(confd, payload, size, 1))
return packet;
break;
}
if (size < (int)sizeof(struct dns_header))
@@ -2304,14 +2294,11 @@ unsigned char *tcp_request(int confd, time_t now,
m = 0;
else
{
if (daemon->cache_max_expiry != 0)
{
if (saved_question)
blockdata_free(saved_question);
saved_question = blockdata_alloc((char *) header, (size_t)size);
saved_size = size;
}
if (saved_question)
blockdata_free(saved_question);
saved_question = blockdata_alloc((char *) header, (size_t)size);
saved_size = size;
/* m > 0 if answered from cache */
m = answer_request(header, ((char *) header) + 65536, (size_t)size,
@@ -2320,11 +2307,14 @@ unsigned char *tcp_request(int confd, time_t now,
/* Do this by steam now we're not in the select() loop */
check_log_writer(1);
if (m == 0)
if (m == 0 && saved_question)
{
struct server *master;
int start;
blockdata_retrieve(saved_question, (size_t)saved_size, header);
size = saved_size;
if (lookup_domain(daemon->namebuff, gotname, &first, &last))
flags = is_local_answer(now, first, daemon->namebuff);
else
@@ -2486,7 +2476,7 @@ unsigned char *tcp_request(int confd, time_t now,
break;
/* If we answered with stale data, this process will now try and get fresh data into
the cache then and cannot therefore accept new queries. Close the incoming
the cache and cannot therefore accept new queries. Close the incoming
connection to signal that to the client. Then set do_stale and loop round
once more to try and get fresh data, after which we exit. */
if (stale)