mirror of
https://github.com/pi-hole/dnsmasq.git
synced 2026-02-14 23:19:01 +00:00
Optimise TCP send.
In the DNS TCP code, there are a couple of places where we have a buffer containing a message which we need to send via TCP. The DNS protocol is that this is sent as <16-bit length in network order><message> Making two write calls, one for the length and one for the message causes the TCP stack to send two packets, one for each. A single packet containing both is preferable from a performance POV. Implement a scatter-gather version of our read_write() wrapper and use it where necessary to send TCP DNS messages.
This commit is contained in:
@@ -1527,6 +1527,7 @@ int memcmp_masked(unsigned char *a, unsigned char *b, int len,
|
||||
int expand_buf(struct iovec *iov, size_t size);
|
||||
char *print_mac(char *buff, unsigned char *mac, int len);
|
||||
int read_write(int fd, unsigned char *packet, int size, int rw);
|
||||
int read_writev(int fd, struct iovec *iov, int iovcnt, int rw);
|
||||
void close_fds(long max_fd, int spare1, int spare2, int spare3);
|
||||
int wildcard_match(const char* wildcard, const char* match);
|
||||
int wildcard_matchn(const char* wildcard, const char* match, int num);
|
||||
|
||||
@@ -2029,9 +2029,9 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
||||
int class, rclass, type, rtype;
|
||||
unsigned char *p;
|
||||
struct timeval tv;
|
||||
struct iovec sendio[2];
|
||||
#ifdef MSG_FASTOPEN
|
||||
struct msghdr msg;
|
||||
struct iovec sendio[2];
|
||||
#endif
|
||||
|
||||
(void)mark;
|
||||
@@ -2044,6 +2044,12 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
||||
GETSHORT(type, p);
|
||||
GETSHORT(class, p);
|
||||
|
||||
length = htons(qsize);
|
||||
sendio[0].iov_base = &length;
|
||||
sendio[0].iov_len = sizeof(length);
|
||||
sendio[1].iov_base = header;
|
||||
sendio[1].iov_len = qsize;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int data_sent = 0, fatal = 0;
|
||||
@@ -2065,8 +2071,6 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
||||
*servp = serv = daemon->serverarray[start];
|
||||
|
||||
retry:
|
||||
length = htons(qsize);
|
||||
|
||||
if (serv->tcpfd == -1)
|
||||
{
|
||||
if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
|
||||
@@ -2097,12 +2101,7 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
||||
tv.tv_sec += TCP_TIMEOUT;
|
||||
setsockopt(serv->tcpfd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
|
||||
#endif
|
||||
|
||||
#ifdef MSG_FASTOPEN
|
||||
sendio[0].iov_base = (unsigned char *)&length;
|
||||
sendio[0].iov_len = sizeof(length);
|
||||
sendio[1].iov_base = (unsigned char *)header;
|
||||
sendio[1].iov_len = qsize;
|
||||
msg.msg_name = &serv->addr.sa;
|
||||
msg.msg_namelen = sa_len(&serv->addr);
|
||||
msg.msg_iov = sendio;
|
||||
@@ -2138,11 +2137,9 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
||||
serv->flags &= ~SERV_GOT_TCP;
|
||||
}
|
||||
|
||||
/* We us the _ONCE variant of read_write() here because we've set a timeout on the tcp socket
|
||||
/* We use the _ONCE variant of read_write() here because we've set a timeout on the tcp socket
|
||||
and wish to abort if the whole data is not read/written within the timeout. */
|
||||
if ((!data_sent &&
|
||||
(!read_write(serv->tcpfd, (unsigned char *)&length, sizeof(length), RW_WRITE_ONCE) ||
|
||||
!read_write(serv->tcpfd, (unsigned char *)header, qsize, RW_WRITE_ONCE))) ||
|
||||
if ((!data_sent && !read_writev(serv->tcpfd, sendio, 2, RW_WRITE_ONCE)) ||
|
||||
!read_write(serv->tcpfd, (unsigned char *)&length, sizeof(length), RW_READ_ONCE) ||
|
||||
!expand_buf(recvbuff, (rsize = ntohs(length))) ||
|
||||
!read_write(serv->tcpfd, recvbuff->iov_base, rsize, RW_READ_ONCE))
|
||||
@@ -2376,7 +2373,8 @@ void tcp_request(int confd, time_t now, struct iovec *bigbuff,
|
||||
unsigned int mark = 0;
|
||||
int have_mark = 0;
|
||||
int first, last, filtered, do_stale = 0;
|
||||
|
||||
struct iovec out_iov[2];
|
||||
|
||||
bigbuff->iov_base = NULL;
|
||||
bigbuff->iov_len = 0;
|
||||
|
||||
@@ -2746,9 +2744,13 @@ void tcp_request(int confd, time_t now, struct iovec *bigbuff,
|
||||
report_addresses(header, m, mark);
|
||||
#endif
|
||||
|
||||
/* use scatter-gather IO so that length doesn't end up in separate packet. */
|
||||
out_len = htons(m);
|
||||
if (!read_write(confd, (unsigned char *)&out_len, sizeof(out_len), RW_WRITE) |
|
||||
!read_write(confd, bigbuff->iov_base, m, RW_WRITE))
|
||||
out_iov[0].iov_len = sizeof(out_len);
|
||||
out_iov[0].iov_base = &out_len;
|
||||
out_iov[1].iov_len = m;
|
||||
out_iov[1].iov_base = bigbuff->iov_base;
|
||||
if (!read_writev(confd, out_iov, 2, RW_WRITE))
|
||||
break;
|
||||
|
||||
/* If we answered with stale data, this process will now try and get fresh data into
|
||||
|
||||
63
src/util.c
63
src/util.c
@@ -757,43 +757,60 @@ int retry_send(ssize_t rc)
|
||||
"once" fails on EAGAIN, as this a timeout.
|
||||
This indicates a timeout of a TCP socket.
|
||||
*/
|
||||
int read_write(int fd, unsigned char *packet, int size, int rw)
|
||||
int read_writev(int fd, struct iovec *iov, int iovcnt, int rw)
|
||||
{
|
||||
ssize_t n, done;
|
||||
|
||||
for (done = 0; done < size; done += n)
|
||||
{
|
||||
if (rw & 1)
|
||||
n = read(fd, &packet[done], (size_t)(size - done));
|
||||
else
|
||||
n = write(fd, &packet[done], (size_t)(size - done));
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
int cur = 0;
|
||||
ssize_t n, done = 0;
|
||||
|
||||
while (cur < iovcnt)
|
||||
{
|
||||
iov[cur].iov_len -= done;
|
||||
iov[cur].iov_base = ((char *)iov[cur].iov_base) + done;
|
||||
|
||||
if (rw & 1)
|
||||
n = readv(fd, &iov[cur], iovcnt - cur);
|
||||
else
|
||||
n = writev(fd, &iov[cur], iovcnt - cur);
|
||||
|
||||
iov[cur].iov_len += done;
|
||||
iov[cur].iov_base = ((char *)iov[cur].iov_base) - done;
|
||||
|
||||
if (n == -1)
|
||||
{
|
||||
n = 0; /* don't mess with counter when we loop. */
|
||||
|
||||
if (errno == EINTR || errno == ENOMEM || errno == ENOBUFS)
|
||||
continue;
|
||||
|
||||
if (errno == EAGAIN || errno == EWOULDBLOCK)
|
||||
{
|
||||
/* "once" variant */
|
||||
if (rw & 2)
|
||||
return 0;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(rw & 2) && (errno == EAGAIN || errno == EWOULDBLOCK))
|
||||
continue;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (n == 0 && (rw & 1))
|
||||
return 0;
|
||||
|
||||
done += n;
|
||||
while ((size_t)done >= iov[cur].iov_len)
|
||||
done -= iov[cur++].iov_len;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int read_write(int fd, unsigned char *packet, int size, int rw)
|
||||
{
|
||||
struct iovec iov;
|
||||
|
||||
/* size == 0 is not an error, just a NOOP. */
|
||||
if (size == 0)
|
||||
return 1;
|
||||
|
||||
iov.iov_len = (size_t)size;
|
||||
iov.iov_base = packet;
|
||||
|
||||
return read_writev(fd, &iov, 1, rw);
|
||||
}
|
||||
|
||||
/* close all fds except STDIN, STDOUT and STDERR, spare1, spare2 and spare3 */
|
||||
void close_fds(long max_fd, int spare1, int spare2, int spare3)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user