mirror of
https://github.com/pi-hole/dnsmasq.git
synced 2026-02-15 07:25:42 +00:00
Optimise TCP send.
In the DNS TCP code, there are a couple of places where we have a buffer containing a message which we need to send via TCP. The DNS protocol is that this is sent as <16-bit length in network order><message> Making two write calls, one for the length and one for the message causes the TCP stack to send two packets, one for each. A single packet containing both is preferable from a performance POV. Implement a scatter-gather version of our read_write() wrapper and use it where necessary to send TCP DNS messages.
This commit is contained in:
@@ -1527,6 +1527,7 @@ int memcmp_masked(unsigned char *a, unsigned char *b, int len,
|
|||||||
int expand_buf(struct iovec *iov, size_t size);
|
int expand_buf(struct iovec *iov, size_t size);
|
||||||
char *print_mac(char *buff, unsigned char *mac, int len);
|
char *print_mac(char *buff, unsigned char *mac, int len);
|
||||||
int read_write(int fd, unsigned char *packet, int size, int rw);
|
int read_write(int fd, unsigned char *packet, int size, int rw);
|
||||||
|
int read_writev(int fd, struct iovec *iov, int iovcnt, int rw);
|
||||||
void close_fds(long max_fd, int spare1, int spare2, int spare3);
|
void close_fds(long max_fd, int spare1, int spare2, int spare3);
|
||||||
int wildcard_match(const char* wildcard, const char* match);
|
int wildcard_match(const char* wildcard, const char* match);
|
||||||
int wildcard_matchn(const char* wildcard, const char* match, int num);
|
int wildcard_matchn(const char* wildcard, const char* match, int num);
|
||||||
|
|||||||
@@ -2029,9 +2029,9 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
|||||||
int class, rclass, type, rtype;
|
int class, rclass, type, rtype;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
struct timeval tv;
|
struct timeval tv;
|
||||||
|
struct iovec sendio[2];
|
||||||
#ifdef MSG_FASTOPEN
|
#ifdef MSG_FASTOPEN
|
||||||
struct msghdr msg;
|
struct msghdr msg;
|
||||||
struct iovec sendio[2];
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(void)mark;
|
(void)mark;
|
||||||
@@ -2044,6 +2044,12 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
|||||||
GETSHORT(type, p);
|
GETSHORT(type, p);
|
||||||
GETSHORT(class, p);
|
GETSHORT(class, p);
|
||||||
|
|
||||||
|
length = htons(qsize);
|
||||||
|
sendio[0].iov_base = &length;
|
||||||
|
sendio[0].iov_len = sizeof(length);
|
||||||
|
sendio[1].iov_base = header;
|
||||||
|
sendio[1].iov_len = qsize;
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
int data_sent = 0, fatal = 0;
|
int data_sent = 0, fatal = 0;
|
||||||
@@ -2065,8 +2071,6 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
|||||||
*servp = serv = daemon->serverarray[start];
|
*servp = serv = daemon->serverarray[start];
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
length = htons(qsize);
|
|
||||||
|
|
||||||
if (serv->tcpfd == -1)
|
if (serv->tcpfd == -1)
|
||||||
{
|
{
|
||||||
if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
|
if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
|
||||||
@@ -2097,12 +2101,7 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
|||||||
tv.tv_sec += TCP_TIMEOUT;
|
tv.tv_sec += TCP_TIMEOUT;
|
||||||
setsockopt(serv->tcpfd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
|
setsockopt(serv->tcpfd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MSG_FASTOPEN
|
#ifdef MSG_FASTOPEN
|
||||||
sendio[0].iov_base = (unsigned char *)&length;
|
|
||||||
sendio[0].iov_len = sizeof(length);
|
|
||||||
sendio[1].iov_base = (unsigned char *)header;
|
|
||||||
sendio[1].iov_len = qsize;
|
|
||||||
msg.msg_name = &serv->addr.sa;
|
msg.msg_name = &serv->addr.sa;
|
||||||
msg.msg_namelen = sa_len(&serv->addr);
|
msg.msg_namelen = sa_len(&serv->addr);
|
||||||
msg.msg_iov = sendio;
|
msg.msg_iov = sendio;
|
||||||
@@ -2138,11 +2137,9 @@ static ssize_t tcp_talk(int first, int last, int start, struct dns_header *heade
|
|||||||
serv->flags &= ~SERV_GOT_TCP;
|
serv->flags &= ~SERV_GOT_TCP;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We us the _ONCE variant of read_write() here because we've set a timeout on the tcp socket
|
/* We use the _ONCE variant of read_write() here because we've set a timeout on the tcp socket
|
||||||
and wish to abort if the whole data is not read/written within the timeout. */
|
and wish to abort if the whole data is not read/written within the timeout. */
|
||||||
if ((!data_sent &&
|
if ((!data_sent && !read_writev(serv->tcpfd, sendio, 2, RW_WRITE_ONCE)) ||
|
||||||
(!read_write(serv->tcpfd, (unsigned char *)&length, sizeof(length), RW_WRITE_ONCE) ||
|
|
||||||
!read_write(serv->tcpfd, (unsigned char *)header, qsize, RW_WRITE_ONCE))) ||
|
|
||||||
!read_write(serv->tcpfd, (unsigned char *)&length, sizeof(length), RW_READ_ONCE) ||
|
!read_write(serv->tcpfd, (unsigned char *)&length, sizeof(length), RW_READ_ONCE) ||
|
||||||
!expand_buf(recvbuff, (rsize = ntohs(length))) ||
|
!expand_buf(recvbuff, (rsize = ntohs(length))) ||
|
||||||
!read_write(serv->tcpfd, recvbuff->iov_base, rsize, RW_READ_ONCE))
|
!read_write(serv->tcpfd, recvbuff->iov_base, rsize, RW_READ_ONCE))
|
||||||
@@ -2376,7 +2373,8 @@ void tcp_request(int confd, time_t now, struct iovec *bigbuff,
|
|||||||
unsigned int mark = 0;
|
unsigned int mark = 0;
|
||||||
int have_mark = 0;
|
int have_mark = 0;
|
||||||
int first, last, filtered, do_stale = 0;
|
int first, last, filtered, do_stale = 0;
|
||||||
|
struct iovec out_iov[2];
|
||||||
|
|
||||||
bigbuff->iov_base = NULL;
|
bigbuff->iov_base = NULL;
|
||||||
bigbuff->iov_len = 0;
|
bigbuff->iov_len = 0;
|
||||||
|
|
||||||
@@ -2746,9 +2744,13 @@ void tcp_request(int confd, time_t now, struct iovec *bigbuff,
|
|||||||
report_addresses(header, m, mark);
|
report_addresses(header, m, mark);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* use scatter-gather IO so that length doesn't end up in separate packet. */
|
||||||
out_len = htons(m);
|
out_len = htons(m);
|
||||||
if (!read_write(confd, (unsigned char *)&out_len, sizeof(out_len), RW_WRITE) |
|
out_iov[0].iov_len = sizeof(out_len);
|
||||||
!read_write(confd, bigbuff->iov_base, m, RW_WRITE))
|
out_iov[0].iov_base = &out_len;
|
||||||
|
out_iov[1].iov_len = m;
|
||||||
|
out_iov[1].iov_base = bigbuff->iov_base;
|
||||||
|
if (!read_writev(confd, out_iov, 2, RW_WRITE))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* If we answered with stale data, this process will now try and get fresh data into
|
/* If we answered with stale data, this process will now try and get fresh data into
|
||||||
|
|||||||
63
src/util.c
63
src/util.c
@@ -757,43 +757,60 @@ int retry_send(ssize_t rc)
|
|||||||
"once" fails on EAGAIN, as this a timeout.
|
"once" fails on EAGAIN, as this a timeout.
|
||||||
This indicates a timeout of a TCP socket.
|
This indicates a timeout of a TCP socket.
|
||||||
*/
|
*/
|
||||||
int read_write(int fd, unsigned char *packet, int size, int rw)
|
int read_writev(int fd, struct iovec *iov, int iovcnt, int rw)
|
||||||
{
|
{
|
||||||
ssize_t n, done;
|
int cur = 0;
|
||||||
|
ssize_t n, done = 0;
|
||||||
for (done = 0; done < size; done += n)
|
|
||||||
{
|
|
||||||
if (rw & 1)
|
|
||||||
n = read(fd, &packet[done], (size_t)(size - done));
|
|
||||||
else
|
|
||||||
n = write(fd, &packet[done], (size_t)(size - done));
|
|
||||||
|
|
||||||
if (n == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
|
while (cur < iovcnt)
|
||||||
|
{
|
||||||
|
iov[cur].iov_len -= done;
|
||||||
|
iov[cur].iov_base = ((char *)iov[cur].iov_base) + done;
|
||||||
|
|
||||||
|
if (rw & 1)
|
||||||
|
n = readv(fd, &iov[cur], iovcnt - cur);
|
||||||
|
else
|
||||||
|
n = writev(fd, &iov[cur], iovcnt - cur);
|
||||||
|
|
||||||
|
iov[cur].iov_len += done;
|
||||||
|
iov[cur].iov_base = ((char *)iov[cur].iov_base) - done;
|
||||||
|
|
||||||
if (n == -1)
|
if (n == -1)
|
||||||
{
|
{
|
||||||
n = 0; /* don't mess with counter when we loop. */
|
|
||||||
|
|
||||||
if (errno == EINTR || errno == ENOMEM || errno == ENOBUFS)
|
if (errno == EINTR || errno == ENOMEM || errno == ENOBUFS)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (errno == EAGAIN || errno == EWOULDBLOCK)
|
if (!(rw & 2) && (errno == EAGAIN || errno == EWOULDBLOCK))
|
||||||
{
|
continue;
|
||||||
/* "once" variant */
|
|
||||||
if (rw & 2)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n == 0 && (rw & 1))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
done += n;
|
||||||
|
while ((size_t)done >= iov[cur].iov_len)
|
||||||
|
done -= iov[cur++].iov_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int read_write(int fd, unsigned char *packet, int size, int rw)
|
||||||
|
{
|
||||||
|
struct iovec iov;
|
||||||
|
|
||||||
|
/* size == 0 is not an error, just a NOOP. */
|
||||||
|
if (size == 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
iov.iov_len = (size_t)size;
|
||||||
|
iov.iov_base = packet;
|
||||||
|
|
||||||
|
return read_writev(fd, &iov, 1, rw);
|
||||||
|
}
|
||||||
|
|
||||||
/* close all fds except STDIN, STDOUT and STDERR, spare1, spare2 and spare3 */
|
/* close all fds except STDIN, STDOUT and STDERR, spare1, spare2 and spare3 */
|
||||||
void close_fds(long max_fd, int spare1, int spare2, int spare3)
|
void close_fds(long max_fd, int spare1, int spare2, int spare3)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user