diff options
author | Florian Obser <florian@cvs.openbsd.org> | 2023-09-15 05:32:01 +0000 |
---|---|---|
committer | Florian Obser <florian@cvs.openbsd.org> | 2023-09-15 05:32:01 +0000 |
commit | 0185b1b33d3ccf4154acc064d7f5a54de3ccdd97 (patch) | |
tree | b1af0b10bbc929355e740142bfdf09effbad9cf1 /sbin/unwind | |
parent | cf25b3e829edf9e9f5ad9aea8be17e5ef3223123 (diff) |
Improve handling of ENOBUFS by not running in a tight loop forever.
This cherry-picks upstream git commit
0ee44ef384593ed0382d1ce6048d5a9c9440b45c.
Issue reported by landry@, which could be traced back to a buggy WiFi
driver that would constantly return ENOBUFS. This in turn lead
unwind(8) enter a tight loop through poll / sendto without making any
progress and burning 100% cpu.
OK sthen
I missed to commit this file when commiting to unbound.
Pointed out by tb, thanks!
Diffstat (limited to 'sbin/unwind')
-rw-r--r-- | sbin/unwind/libunbound/util/netevent.c | 102 |
1 files changed, 100 insertions, 2 deletions
diff --git a/sbin/unwind/libunbound/util/netevent.c b/sbin/unwind/libunbound/util/netevent.c index 204e4883cf2..b9395a8998b 100644 --- a/sbin/unwind/libunbound/util/netevent.c +++ b/sbin/unwind/libunbound/util/netevent.c @@ -116,6 +116,8 @@ /** timeout in millisec to wait for write to unblock, packets dropped after.*/ #define SEND_BLOCKED_WAIT_TIMEOUT 200 +/** max number of times to wait for write to unblock, packets dropped after.*/ +#define SEND_BLOCKED_MAX_RETRY 5 /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */ #ifndef SO_TIMESTAMP @@ -402,9 +404,10 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, WSAGetLastError() == WSAENOBUFS || WSAGetLastError() == WSAEWOULDBLOCK) { #endif + int retries = 0; /* if we set the fd blocking, other threads suddenly * have a blocking fd that they operate on */ - while(sent == -1 && ( + while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( #ifndef USE_WINSOCK errno == EAGAIN || errno == EINTR || # ifdef EWOULDBLOCK @@ -419,6 +422,13 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, #endif )) { #if defined(HAVE_POLL) || defined(USE_WINSOCK) + int send_nobufs = ( +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ); struct pollfd p; int pret; memset(&p, 0, sizeof(p)); @@ -457,8 +467,48 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, log_err("poll udp out failed: %s", sock_strerror(errno)); return 0; + } else if((pret < 0 && +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ) || (send_nobufs && retries > 0)) { + /* ENOBUFS, and poll returned without + * a timeout. Or the retried send call + * returned ENOBUFS. It is good to + * wait a bit for the error to clear. */ + /* The timeout is 20*(2^(retries+1)), + * it increases exponentially, starting + * at 40 msec. After 5 tries, 1240 msec + * have passed in total, when poll + * returned the error, and 1200 msec + * when send returned the errors. */ +#ifndef USE_WINSOCK + pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#else + pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#endif + if(pret < 0 && +#ifndef USE_WINSOCK + errno != EAGAIN && errno != EINTR && +# ifdef EWOULDBLOCK + errno != EWOULDBLOCK && +# endif + errno != ENOBUFS +#else + WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEINTR && + WSAGetLastError() != WSAENOBUFS && + WSAGetLastError() != WSAEWOULDBLOCK +#endif + ) { + log_err("poll udp out timer failed: %s", + sock_strerror(errno)); + } } #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ + retries++; if (!is_connected) { sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), sldns_buffer_remaining(packet), 0, @@ -665,7 +715,8 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, WSAGetLastError() == WSAENOBUFS || WSAGetLastError() == WSAEWOULDBLOCK) { #endif - while(sent == -1 && ( + int retries = 0; + while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( #ifndef USE_WINSOCK errno == EAGAIN || errno == EINTR || # ifdef EWOULDBLOCK @@ -680,6 +731,13 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, #endif )) { #if defined(HAVE_POLL) || defined(USE_WINSOCK) + int send_nobufs = ( +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ); struct pollfd p; int pret; memset(&p, 0, sizeof(p)); @@ -718,8 +776,48 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, log_err("poll udp out failed: %s", sock_strerror(errno)); return 0; + } else if((pret < 0 && +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ) || (send_nobufs && retries > 0)) { + /* ENOBUFS, and poll returned without + * a timeout. Or the retried send call + * returned ENOBUFS. It is good to + * wait a bit for the error to clear. */ + /* The timeout is 20*(2^(retries+1)), + * it increases exponentially, starting + * at 40 msec. After 5 tries, 1240 msec + * have passed in total, when poll + * returned the error, and 1200 msec + * when send returned the errors. */ +#ifndef USE_WINSOCK + pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#else + pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#endif + if(pret < 0 && +#ifndef USE_WINSOCK + errno != EAGAIN && errno != EINTR && +# ifdef EWOULDBLOCK + errno != EWOULDBLOCK && +# endif + errno != ENOBUFS +#else + WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEINTR && + WSAGetLastError() != WSAENOBUFS && + WSAGetLastError() != WSAEWOULDBLOCK +#endif + ) { + log_err("poll udp out timer failed: %s", + sock_strerror(errno)); + } } #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ + retries++; sent = sendmsg(c->fd, &msg, 0); } } |