diff options
Diffstat (limited to 'usr.sbin/nsd/server.c')
-rw-r--r-- | usr.sbin/nsd/server.c | 1409 |
1 files changed, 900 insertions, 509 deletions
diff --git a/usr.sbin/nsd/server.c b/usr.sbin/nsd/server.c index 7aecf03e650..33b3fb88666 100644 --- a/usr.sbin/nsd/server.c +++ b/usr.sbin/nsd/server.c @@ -1,7 +1,7 @@ /* * server.c -- nsd(8) network input/output * - * Copyright (c) 2001-2011, NLnet Labs. All rights reserved. + * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. * * See LICENSE for the license. * @@ -33,6 +33,11 @@ #ifndef SHUT_WR #define SHUT_WR 1 #endif +#ifndef USE_MINI_EVENT +#include <event.h> +#else +#include "mini_event.h" +#endif #include <openssl/rand.h> @@ -41,12 +46,17 @@ #include "netio.h" #include "xfrd.h" #include "xfrd-tcp.h" +#include "xfrd-disk.h" #include "difffile.h" #include "nsec3.h" #include "ipc.h" +#include "udb.h" +#include "remote.h" #include "lookup3.h" #include "rrl.h" +#define RELOAD_SYNC_TIMEOUT 25 /* seconds */ + /* * Data for the UDP handlers. */ @@ -57,19 +67,33 @@ struct udp_handler_data query_type *query; }; -/* - * Data for the TCP accept handlers. Most data is simply passed along - * to the TCP connection handler. - */ struct tcp_accept_handler_data { struct nsd *nsd; struct nsd_socket *socket; - size_t tcp_accept_handler_count; - netio_handler_type *tcp_accept_handlers; + int event_added; + struct event event; }; -int slowaccept; -struct timespec slowaccept_timeout; +/* + * These globals are used to enable the TCP accept handlers + * when the number of TCP connection drops below the maximum + * number of TCP connections. + */ +static size_t tcp_accept_handler_count; +static struct tcp_accept_handler_data* tcp_accept_handlers; + +static struct event slowaccept_event; +static int slowaccept; + +#ifndef NONBLOCKING_IS_BROKEN +# define NUM_RECV_PER_SELECT 100 +#endif + +#if (!defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG)) +struct mmsghdr msgs[NUM_RECV_PER_SELECT]; +struct iovec iovecs[NUM_RECV_PER_SELECT]; +struct query *queries[NUM_RECV_PER_SELECT]; +#endif /* * Data for the TCP connection handlers. @@ -104,14 +128,6 @@ struct tcp_handler_data query_type* query; /* - * These fields are used to enable the TCP accept handlers - * when the number of TCP connection drops below the maximum - * number of TCP connections. - */ - size_t tcp_accept_handler_count; - netio_handler_type *tcp_accept_handlers; - - /* * The query_state is used to remember if we are performing an * AXFR, if we're done processing, or if we should discard the * query and connection. @@ -119,6 +135,11 @@ struct tcp_handler_data query_state_type query_state; /* + * The event for the file descriptor and tcp timeout + */ + struct event event; + + /* * The bytes_transmitted field is used to remember the number * of bytes transmitted when receiving or sending a DNS * packet. The count includes the two additional bytes used @@ -135,9 +156,7 @@ struct tcp_handler_data /* * Handle incoming queries on the UDP server sockets. */ -static void handle_udp(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types); +static void handle_udp(int fd, short event, void* arg); /* * Handle incoming connections on the TCP sockets. These handlers @@ -148,27 +167,21 @@ static void handle_udp(netio_type *netio, * NETIO_EVENT_NONE type. This is done using the function * configure_tcp_accept_handlers. */ -static void handle_tcp_accept(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types); +static void handle_tcp_accept(int fd, short event, void* arg); /* * Handle incoming queries on a TCP connection. The TCP connections * are configured to be non-blocking and the handler may be called * multiple times before a complete query is received. */ -static void handle_tcp_reading(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types); +static void handle_tcp_reading(int fd, short event, void* arg); /* * Handle outgoing responses on a TCP connection. The TCP connections * are configured to be non-blocking and the handler may be called * multiple times before a complete response is sent. */ -static void handle_tcp_writing(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types); +static void handle_tcp_writing(int fd, short event, void* arg); /* * Send all children the quit nonblocking, then close pipe. @@ -183,18 +196,9 @@ static void set_children_stats(struct nsd* nsd); #endif /* BIND8_STATS */ /* - * Change the event types the HANDLERS are interested in to - * EVENT_TYPES. + * Change the event types the HANDLERS are interested in to EVENT_TYPES. */ -static void configure_handler_event_types(size_t count, - netio_handler_type *handlers, - netio_event_types_type event_types); - -/* - * start xfrdaemon (again). - */ -static pid_t -server_start_xfrd(struct nsd *nsd, netio_handler_type* handler); +static void configure_handler_event_types(short event_types); static uint16_t *compressed_dname_offsets = 0; static uint32_t compression_table_capacity = 0; @@ -252,6 +256,9 @@ restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, default: /* SERVER MAIN */ close(nsd->children[i].parent_fd); nsd->children[i].parent_fd = -1; + if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); + } if(!nsd->children[i].handler) { ipc_data = (struct main_ipc_handler_data*) region_alloc( @@ -265,8 +272,6 @@ restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, ipc_data->got_bytes = 0; ipc_data->total_bytes = 0; ipc_data->acl_num = 0; - ipc_data->busy_writing_zone_state = 0; - ipc_data->write_conn = xfrd_tcp_create(region); nsd->children[i].handler = (struct netio_handler*) region_alloc( region, sizeof(struct netio_handler)); nsd->children[i].handler->fd = nsd->children[i].child_fd; @@ -280,25 +285,32 @@ restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, ipc_data = (struct main_ipc_handler_data*) nsd->children[i].handler->user_data; ipc_data->forward_mode = 0; - ipc_data->busy_writing_zone_state = 0; /* restart - update fd */ nsd->children[i].handler->fd = nsd->children[i].child_fd; break; case 0: /* CHILD */ + /* the child need not be able to access the + * nsd.db file */ + namedb_close_udb(nsd->db); nsd->pid = 0; nsd->child_count = 0; nsd->server_kind = nsd->children[i].kind; nsd->this_child = &nsd->children[i]; /* remove signal flags inherited from parent the parent will handle them. */ + nsd->signal_hint_reload_hup = 0; nsd->signal_hint_reload = 0; nsd->signal_hint_child = 0; nsd->signal_hint_quit = 0; nsd->signal_hint_shutdown = 0; nsd->signal_hint_stats = 0; nsd->signal_hint_statsusr = 0; + close(*xfrd_sock_p); close(nsd->this_child->child_fd); nsd->this_child->child_fd = -1; + if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); + } server_child(nsd); /* NOTREACH */ exit(0); @@ -384,6 +396,60 @@ server_init(struct nsd *nsd) return -1; } +#if defined(SO_RCVBUF) || defined(SO_SNDBUF) + if(1) { + int rcv = 1*1024*1024; + int snd = 1*1024*1024; + +#ifdef SO_RCVBUF +# ifdef SO_RCVBUFFORCE + if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, + (socklen_t)sizeof(rcv)) < 0) { + if(errno != EPERM && errno != ENOBUFS) { + log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, " + "...) failed: %s", strerror(errno)); + return -1; + } +# else + if(1) { +# endif /* SO_RCVBUFFORCE */ + if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, + (socklen_t)sizeof(rcv)) < 0) { + if(errno != ENOBUFS) { + log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, " + "...) failed: %s", strerror(errno)); + return -1; + } + } + } +#endif /* SO_RCVBUF */ + +#ifdef SO_SNDBUF +# ifdef SO_SNDBUFFORCE + if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, + (socklen_t)sizeof(snd)) < 0) { + if(errno != EPERM && errno != ENOBUFS) { + log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, " + "...) failed: %s", strerror(errno)); + return -1; + } +# else + if(1) { +# endif /* SO_SNDBUFFORCE */ + if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, + (socklen_t)sizeof(snd)) < 0) { + if(errno != ENOBUFS) { + log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, " + "...) failed: %s", strerror(errno)); + return -1; + } + } + } +#endif /* SO_SNDBUF */ + + } +#endif /* defined(SO_RCVBUF) || defined(SO_SNDBUF) */ + #if defined(INET6) if (nsd->udp[i].addr->ai_family == AF_INET6) { # if defined(IPV6_V6ONLY) @@ -596,21 +662,19 @@ server_prepare(struct nsd *nsd) #endif /* RATELIMIT */ /* Open the database... */ - if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, nsd->child_count)) == NULL) { + if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { log_msg(LOG_ERR, "unable to open the database %s: %s", nsd->dbfile, strerror(errno)); + unlink(nsd->task[0]->fname); + unlink(nsd->task[1]->fname); + xfrd_del_tempdir(nsd); return -1; } - - /* Read diff file */ - if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) { - log_msg(LOG_ERR, "The diff file contains errors. Will continue " - "without it"); - } - -#ifdef NSEC3 - prehash(nsd->db, 0); -#endif + /* check if zone files have been modified */ + /* NULL for taskudb because we send soainfo in a moment, batched up, + * for all zones */ + if(nsd->options->zonefiles_check) + namedb_check_zonefiles(nsd->db, nsd->options, NULL, NULL); compression_table_capacity = 0; initialize_dname_compression_tables(nsd); @@ -685,62 +749,199 @@ server_shutdown(struct nsd *nsd) } } - log_finalize(); tsig_finalize(); +#ifdef HAVE_SSL + daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ +#endif +#if 0 /* OS collects memory pages */ nsd_options_destroy(nsd->options); region_destroy(nsd->region); - +#endif + log_finalize(); exit(0); } -static pid_t -server_start_xfrd(struct nsd *nsd, netio_handler_type* handler) +void +server_prepare_xfrd(struct nsd* nsd) +{ + char tmpfile[256]; + /* create task mmaps */ + nsd->mytask = 0; + snprintf(tmpfile, sizeof(tmpfile), "%snsd.%u.task.0", + nsd->options->xfrdir, (unsigned)getpid()); + nsd->task[0] = task_file_create(tmpfile); + if(!nsd->task[0]) + exit(1); + snprintf(tmpfile, sizeof(tmpfile), "%snsd.%u.task.1", + nsd->options->xfrdir, (unsigned)getpid()); + nsd->task[1] = task_file_create(tmpfile); + if(!nsd->task[1]) { + unlink(nsd->task[0]->fname); + exit(1); + } + assert(udb_base_get_userdata(nsd->task[0])->data == 0); + assert(udb_base_get_userdata(nsd->task[1])->data == 0); + /* create xfrd listener structure */ + nsd->xfrd_listener = region_alloc(nsd->region, + sizeof(netio_handler_type)); + nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) + region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); + nsd->xfrd_listener->fd = -1; + ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = + nsd; + ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = + xfrd_tcp_create(nsd->region, QIOBUFSZ); +} + + +void +server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) { pid_t pid; int sockets[2] = {0,0}; - zone_type* zone; struct ipc_handler_conn_data *data; - /* no need to send updates for zones, because xfrd will read from fork-memory */ - for(zone = nsd->db->zones; zone; zone=zone->next) { - zone->updated = 0; - } - if(handler->fd != -1) - close(handler->fd); + if(nsd->xfrd_listener->fd != -1) + close(nsd->xfrd_listener->fd); + if(del_db) { + /* recreate taskdb that xfrd was using, it may be corrupt */ + /* we (or reload) use nsd->mytask, and xfrd uses the other */ + char* tmpfile = nsd->task[1-nsd->mytask]->fname; + nsd->task[1-nsd->mytask]->fname = NULL; + /* free alloc already, so udb does not shrink itself */ + udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); + nsd->task[1-nsd->mytask]->alloc = NULL; + udb_base_free(nsd->task[1-nsd->mytask]); + /* create new file, overwrite the old one */ + nsd->task[1-nsd->mytask] = task_file_create(tmpfile); + free(tmpfile); + } if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); - return -1; + return; } pid = fork(); switch (pid) { case -1: log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); break; - case 0: - /* CHILD: close first socket, use second one */ + default: + /* PARENT: close first socket, use second one */ close(sockets[0]); - xfrd_init(sockets[1], nsd); + if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); + } + if(del_db) xfrd_free_namedb(nsd); + /* use other task than I am using, since if xfrd died and is + * restarted, the reload is using nsd->mytask */ + nsd->mytask = 1 - nsd->mytask; + xfrd_init(sockets[1], nsd, del_db, reload_active); /* ENOTREACH */ break; - default: - /* PARENT: close second socket, use first one */ + case 0: + /* CHILD: close second socket, use first one */ close(sockets[1]); - handler->fd = sockets[0]; + if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); + } + nsd->xfrd_listener->fd = sockets[0]; break; } - /* PARENT only */ - handler->timeout = NULL; - handler->event_types = NETIO_EVENT_READ; - handler->event_handler = parent_handle_xfrd_command; + /* server-parent only */ + nsd->xfrd_listener->timeout = NULL; + nsd->xfrd_listener->event_types = NETIO_EVENT_READ; + nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; /* clear ongoing ipc reads */ - data = (struct ipc_handler_conn_data *) handler->user_data; + data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; data->conn->is_reading = 0; - return pid; +} + +/** add all soainfo to taskdb */ +static void +add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) +{ + struct radnode* n; + udb_ptr task_last; /* last task, mytask is empty so NULL */ + /* add all SOA INFO to mytask */ + udb_ptr_init(&task_last, taskudb); + for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { + task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem); + } + udb_ptr_unlink(&task_last, taskudb); +} + +void +server_send_soa_xfrd(struct nsd* nsd, int shortsoa) +{ + /* normally this exchanges the SOA from nsd->xfrd and the expire back. + * parent fills one taskdb with soas, xfrd fills other with expires. + * then they exchange and process. + * shortsoa: xfrd crashes and needs to be restarted and one taskdb + * may be in use by reload. Fill SOA in taskdb and give to xfrd. + * expire notifications can be sent back via a normal reload later + * (xfrd will wait for current running reload to finish if any). + */ + sig_atomic_t cmd = 0; +#ifdef BIND8_STATS + pid_t mypid; +#endif + int xfrd_sock = nsd->xfrd_listener->fd; + struct udb_base* taskudb = nsd->task[nsd->mytask]; + udb_ptr t; + if(shortsoa) { + /* put SOA in xfrd task because mytask may be in use */ + taskudb = nsd->task[1-nsd->mytask]; + } + + add_all_soa_to_task(nsd, taskudb); + if(!shortsoa) { + /* wait for xfrd to signal task is ready, RELOAD signal */ + if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || + cmd != NSD_RELOAD) { + log_msg(LOG_ERR, "did not get start signal from xfrd"); + exit(1); + } + } + /* give xfrd our task, signal it with RELOAD_DONE */ + task_process_sync(taskudb); + cmd = NSD_RELOAD_DONE; + if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", + (int)nsd->pid, strerror(errno)); + } +#ifdef BIND8_STATS + mypid = getpid(); + if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { + log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", + strerror(errno)); + } +#endif + + if(!shortsoa) { + /* process the xfrd task works (expiry data) */ + nsd->mytask = 1 - nsd->mytask; + taskudb = nsd->task[nsd->mytask]; + task_remap(taskudb); + udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); + while(!udb_ptr_is_null(&t)) { + task_process_expire(nsd->db, TASKLIST(&t)); + udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); + } + udb_ptr_unlink(&t, taskudb); + task_clear(taskudb); + + /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ + cmd = NSD_RELOAD_DONE; + if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", + (int)nsd->pid, strerror(errno)); + } + } } /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ -static ssize_t +ssize_t block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) { uint8_t* buf = (uint8_t*) p; @@ -795,58 +996,121 @@ block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) return total; } +static void +reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) +{ + sig_atomic_t cmd = NSD_QUIT_SYNC; + udb_ptr t, next; + udb_base* u = nsd->task[nsd->mytask]; + udb_ptr_init(&next, u); + udb_ptr_new(&t, u, udb_base_get_userdata(u)); + udb_base_set_userdata(u, 0); + while(!udb_ptr_is_null(&t)) { + /* store next in list so this one can be deleted or reused */ + udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); + udb_rptr_zero(&TASKLIST(&t)->next, u); + + /* process task t */ + /* append results for task t and update last_task */ + task_process_in_reload(nsd, u, last_task, &t); + + /* go to next */ + udb_ptr_set_ptr(&t, u, &next); + + /* if the parent has quit, we must quit too, poll the fd for cmds */ + if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); + if(cmd == NSD_QUIT) { + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); + /* sync to disk (if needed) */ + udb_base_sync(nsd->db->udb, 0); + /* unlink files of remainder of tasks */ + while(!udb_ptr_is_null(&t)) { + if(TASKLIST(&t)->task_type == task_apply_xfr) { + xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); + } + udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); + } + udb_ptr_unlink(&t, u); + udb_ptr_unlink(&next, u); + exit(0); + } + } + + } + udb_ptr_unlink(&t, u); + udb_ptr_unlink(&next, u); +} + +#ifdef BIND8_STATS +static void +parent_send_stats(struct nsd* nsd, int cmdfd) +{ + size_t i; + if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { + log_msg(LOG_ERR, "could not write stats to reload"); + return; + } + for(i=0; i<nsd->child_count; i++) + if(!write_socket(cmdfd, &nsd->children[i].query_count, + sizeof(stc_t))) { + log_msg(LOG_ERR, "could not write stats to reload"); + return; + } +} + +static void +reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) +{ + struct nsdst s; + stc_t* p; + size_t i; + if(block_read(nsd, cmdfd, &s, sizeof(s), + RELOAD_SYNC_TIMEOUT) != sizeof(s)) { + log_msg(LOG_ERR, "could not read stats from oldpar"); + return; + } + s.db_disk = nsd->db->udb->base_size; + s.db_mem = region_get_mem(nsd->db->region); + p = (stc_t*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, + nsd->child_count); + if(!p) return; + for(i=0; i<nsd->child_count; i++) { + if(block_read(nsd, cmdfd, p++, sizeof(stc_t), 1)!=sizeof(stc_t)) + return; + } +} +#endif /* BIND8_STATS */ + /* * Reload the database, stop parent, re-fork children and continue. * as server_main. */ static void server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, - int cmdsocket, int* xfrd_sock_p) + int cmdsocket) { - pid_t old_pid; +#ifdef BIND8_STATS + pid_t mypid; +#endif sig_atomic_t cmd = NSD_QUIT_SYNC; - zone_type* zone; - int xfrd_sock = *xfrd_sock_p; int ret; + udb_ptr last_task; + + /* see what tasks we got from xfrd */ + task_remap(nsd->task[nsd->mytask]); + udb_ptr_init(&last_task, nsd->task[nsd->mytask]); + reload_process_tasks(nsd, &last_task, cmdsocket); - if(db_crc_different(nsd->db) == 0) { - DEBUG(DEBUG_XFRD,1, (LOG_INFO, - "CRC the same. skipping %s.", nsd->db->filename)); - } else { - DEBUG(DEBUG_XFRD,1, (LOG_INFO, - "CRC different. reread of %s.", nsd->db->filename)); - namedb_close(nsd->db); - if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, - nsd->child_count)) == NULL) { - log_msg(LOG_ERR, "unable to reload the database: %s", strerror(errno)); - exit(1); - } -#if defined(NSEC3) && !defined(FULL_PREHASH) - prehash(nsd->db, 0); -#endif - } - if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) { - log_msg(LOG_ERR, "unable to load the diff file: %s", nsd->options->difffile); - exit(1); - } - log_msg(LOG_INFO, "memory recyclebin holds %lu bytes", (unsigned long) - region_get_recycle_size(nsd->db->region)); #ifndef NDEBUG if(nsd_debug_level >= 1) region_log_stats(nsd->db->region); #endif /* NDEBUG */ -#ifdef NSEC3 -#ifdef FULL_PREHASH - prehash(nsd->db, 1); -#endif /* FULL_PREHASH */ -#endif /* NSEC3 */ + /* sync to disk (if needed) */ + udb_base_sync(nsd->db->udb, 0); initialize_dname_compression_tables(nsd); - /* Get our new process id */ - old_pid = nsd->pid; - nsd->pid = getpid(); - #ifdef BIND8_STATS /* Restart dumping stats if required. */ time(&nsd->st.boot); @@ -854,38 +1118,29 @@ server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, #endif /* Start new child processes */ - if (server_start_children(nsd, server_region, netio, xfrd_sock_p) != 0) { - send_children_quit(nsd); /* no wait required */ + if (server_start_children(nsd, server_region, netio, &nsd-> + xfrd_listener->fd) != 0) { + send_children_quit(nsd); exit(1); } /* if the parent has quit, we must quit too, poll the fd for cmds */ if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { - DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", cmd)); + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); if(cmd == NSD_QUIT) { DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); - send_children_quit(nsd); /* no wait required */ + send_children_quit(nsd); exit(0); } } - /* Overwrite pid before closing old parent, to avoid race condition: - * - parent process already closed - * - pidfile still contains old_pid - * - control script contacts parent process, using contents of pidfile - */ - if (writepid(nsd) == -1) { - log_msg(LOG_ERR, "cannot overwrite the pidfile %s: %s", nsd->pidfile, strerror(errno)); - } - -#define RELOAD_SYNC_TIMEOUT 25 /* seconds */ /* Send quit command to parent: blocking, wait for receipt. */ do { DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) { - log_msg(LOG_ERR, "problems sending command from reload %d to oldnsd %d: %s", - (int)nsd->pid, (int)old_pid, strerror(errno)); + log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", + strerror(errno)); } /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); @@ -903,77 +1158,29 @@ server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, if(cmd == NSD_QUIT) { /* small race condition possible here, parent got quit cmd. */ send_children_quit(nsd); - unlinkpid(nsd->pidfile); exit(1); } assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); - - /* inform xfrd of new SOAs */ - cmd = NSD_SOA_BEGIN; - if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { - log_msg(LOG_ERR, "problems sending soa begin from reload %d to xfrd: %s", - (int)nsd->pid, strerror(errno)); - } - for(zone= nsd->db->zones; zone; zone = zone->next) { - uint16_t sz; - const dname_type *dname_ns=0, *dname_em=0; - if(zone->updated == 0) - continue; - DEBUG(DEBUG_IPC,1, (LOG_INFO, "nsd: sending soa info for zone %s", - dname_to_string(domain_dname(zone->apex),0))); - cmd = NSD_SOA_INFO; - sz = dname_total_size(domain_dname(zone->apex)); - if(zone->soa_rrset) { - dname_ns = domain_dname( - rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[0])); - dname_em = domain_dname( - rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[1])); - sz += sizeof(uint32_t)*6 + sizeof(uint8_t)*2 - + dname_ns->name_size + dname_em->name_size; - } - sz = htons(sz); - /* use blocking writes */ - if(!write_socket(xfrd_sock, &cmd, sizeof(cmd)) || - !write_socket(xfrd_sock, &sz, sizeof(sz)) || - !write_socket(xfrd_sock, domain_dname(zone->apex), - dname_total_size(domain_dname(zone->apex)))) - { - log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s", - (int)nsd->pid, strerror(errno)); - } - if(zone->soa_rrset) { - uint32_t ttl = htonl(zone->soa_rrset->rrs[0].ttl); - assert(dname_ns && dname_em); - assert(zone->soa_rrset->rr_count > 0); - assert(rrset_rrtype(zone->soa_rrset) == TYPE_SOA); - assert(zone->soa_rrset->rrs[0].rdata_count == 7); - if(!write_socket(xfrd_sock, &ttl, sizeof(uint32_t)) - || !write_socket(xfrd_sock, &dname_ns->name_size, sizeof(uint8_t)) - || !write_socket(xfrd_sock, dname_name(dname_ns), dname_ns->name_size) - || !write_socket(xfrd_sock, &dname_em->name_size, sizeof(uint8_t)) - || !write_socket(xfrd_sock, dname_name(dname_em), dname_em->name_size) - || !write_socket(xfrd_sock, rdata_atom_data( - zone->soa_rrset->rrs[0].rdatas[2]), sizeof(uint32_t)) - || !write_socket(xfrd_sock, rdata_atom_data( - zone->soa_rrset->rrs[0].rdatas[3]), sizeof(uint32_t)) - || !write_socket(xfrd_sock, rdata_atom_data( - zone->soa_rrset->rrs[0].rdatas[4]), sizeof(uint32_t)) - || !write_socket(xfrd_sock, rdata_atom_data( - zone->soa_rrset->rrs[0].rdatas[5]), sizeof(uint32_t)) - || !write_socket(xfrd_sock, rdata_atom_data( - zone->soa_rrset->rrs[0].rdatas[6]), sizeof(uint32_t))) - { - log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s", - (int)nsd->pid, strerror(errno)); - } - } - zone->updated = 0; +#ifdef BIND8_STATS + reload_do_stats(cmdsocket, nsd, &last_task); +#endif + udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); + task_process_sync(nsd->task[nsd->mytask]); + + /* send soainfo to the xfrd process, signal it that reload is done, + * it picks up the taskudb */ + cmd = NSD_RELOAD_DONE; + if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", + strerror(errno)); } - cmd = NSD_SOA_END; - if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { - log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", - (int)nsd->pid, strerror(errno)); +#ifdef BIND8_STATS + mypid = getpid(); + if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { + log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", + strerror(errno)); } +#endif /* try to reopen file */ if (nsd->file_rotation_ok) @@ -1004,6 +1211,10 @@ server_signal_mode(struct nsd *nsd) nsd->signal_hint_reload = 0; return NSD_RELOAD; } + else if(nsd->signal_hint_reload_hup) { + nsd->signal_hint_reload_hup = 0; + return NSD_RELOAD_REQ; + } else if(nsd->signal_hint_stats) { nsd->signal_hint_stats = 0; #ifdef BIND8_STATS @@ -1028,37 +1239,23 @@ server_main(struct nsd *nsd) region_type *server_region = region_create(xalloc, free); netio_type *netio = netio_create(server_region); netio_handler_type reload_listener; - netio_handler_type xfrd_listener; int reload_sockets[2] = {-1, -1}; struct timespec timeout_spec; - int fd; int status; pid_t child_pid; pid_t reload_pid = -1; - pid_t xfrd_pid = -1; sig_atomic_t mode; -#ifdef RATELIMIT - rrl_init((nsd->this_child - nsd->children)/sizeof(nsd->children[0])); -#endif - /* Ensure we are the main process */ assert(nsd->server_kind == NSD_SERVER_MAIN); - xfrd_listener.user_data = (struct ipc_handler_conn_data*)region_alloc( - server_region, sizeof(struct ipc_handler_conn_data)); - xfrd_listener.fd = -1; - ((struct ipc_handler_conn_data*)xfrd_listener.user_data)->nsd = nsd; - ((struct ipc_handler_conn_data*)xfrd_listener.user_data)->conn = - xfrd_tcp_create(server_region); - - /* Start the XFRD process */ - xfrd_pid = server_start_xfrd(nsd, &xfrd_listener); - netio_add_handler(netio, &xfrd_listener); + /* Add listener for the XFRD process */ + netio_add_handler(netio, nsd->xfrd_listener); /* Start the child processes that handle incoming queries */ - if (server_start_children(nsd, server_region, netio, &xfrd_listener.fd) != 0) { - send_children_quit(nsd); /* no wait required */ + if (server_start_children(nsd, server_region, netio, + &nsd->xfrd_listener->fd) != 0) { + send_children_quit(nsd); exit(1); } reload_listener.fd = -1; @@ -1087,27 +1284,34 @@ server_main(struct nsd *nsd) "server %d died unexpectedly with status %d, restarting", (int) child_pid, status); restart_child_servers(nsd, server_region, netio, - &xfrd_listener.fd); + &nsd->xfrd_listener->fd); } else if (child_pid == reload_pid) { - sig_atomic_t cmd = NSD_SOA_END; + sig_atomic_t cmd = NSD_RELOAD_DONE; +#ifdef BIND8_STATS + pid_t mypid; +#endif log_msg(LOG_WARNING, "Reload process %d failed with status %d, continuing with old database", (int) child_pid, status); reload_pid = -1; - if(reload_listener.fd > 0) close(reload_listener.fd); + if(reload_listener.fd != -1) close(reload_listener.fd); reload_listener.fd = -1; reload_listener.event_types = NETIO_EVENT_NONE; + task_process_sync(nsd->task[nsd->mytask]); /* inform xfrd reload attempt ended */ - if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { + if(!write_socket(nsd->xfrd_listener->fd, + &cmd, sizeof(cmd)) == -1) { log_msg(LOG_ERR, "problems " "sending SOAEND to xfrd: %s", strerror(errno)); } - } else if (child_pid == xfrd_pid) { - log_msg(LOG_WARNING, - "xfrd process %d failed with status %d, restarting ", - (int) child_pid, status); - xfrd_pid = server_start_xfrd(nsd, &xfrd_listener); +#ifdef BIND8_STATS + mypid = getpid(); + if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { + log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", + strerror(errno)); + } +#endif } else { log_msg(LOG_WARNING, "Unknown child %d terminated with status %d", @@ -1135,18 +1339,30 @@ server_main(struct nsd *nsd) } break; + case NSD_RELOAD_REQ: { + sig_atomic_t cmd = NSD_RELOAD_REQ; + log_msg(LOG_WARNING, "SIGHUP received, reloading..."); + DEBUG(DEBUG_IPC,1, (LOG_INFO, + "main: ipc send reload_req to xfrd")); + if(!write_socket(nsd->xfrd_listener->fd, + &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "server_main: could not send " + "reload_req to xfrd: %s", strerror(errno)); + } + nsd->mode = NSD_RUN; + } break; case NSD_RELOAD: /* Continue to run nsd after reload */ nsd->mode = NSD_RUN; - + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); if (reload_pid != -1) { log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", (int) reload_pid); break; } - log_msg(LOG_WARNING, "signal received, reloading..."); - + /* switch the mytask to keep track of who owns task*/ + nsd->mytask = 1 - nsd->mytask; if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); reload_pid = -1; @@ -1163,12 +1379,13 @@ server_main(struct nsd *nsd) /* CHILD */ close(reload_sockets[0]); server_reload(nsd, server_region, netio, - reload_sockets[1], &xfrd_listener.fd); + reload_sockets[1]); DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); close(reload_sockets[1]); DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); /* drop stale xfrd ipc data */ - ((struct ipc_handler_conn_data*)xfrd_listener.user_data) + ((struct ipc_handler_conn_data*)nsd-> + xfrd_listener->user_data) ->conn->is_reading = 0; reload_pid = -1; reload_listener.fd = -1; @@ -1196,7 +1413,8 @@ server_main(struct nsd *nsd) /* stop xfrd ipc writes in progress */ DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc send indication reload")); - if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { + if(!write_socket(nsd->xfrd_listener->fd, + &cmd, sizeof(cmd))) { log_msg(LOG_ERR, "server_main: could not send reload " "indication to xfrd: %s", strerror(errno)); } @@ -1216,19 +1434,23 @@ server_main(struct nsd *nsd) log_msg(LOG_ERR, "server_main: " "could not ack quit: %s", strerror(errno)); } +#ifdef BIND8_STATS + parent_send_stats(nsd, reload_listener.fd); +#endif /* BIND8_STATS */ close(reload_listener.fd); } + DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); /* only quit children after xfrd has acked */ - send_children_quit(nsd); /* no wait required */ + send_children_quit(nsd); - namedb_fd_close(nsd->db); +#if 0 /* OS collects memory pages */ region_destroy(server_region); +#endif server_shutdown(nsd); /* ENOTREACH */ break; case NSD_SHUTDOWN: - send_children_quit_and_wait(nsd); log_msg(LOG_WARNING, "signal received, shutting down..."); break; case NSD_REAP_CHILDREN: @@ -1242,20 +1464,24 @@ server_main(struct nsd *nsd) nsd->mode = NSD_RUN; break; default: - log_msg(LOG_WARNING, "NSD main server mode invalid: %d", nsd->mode); + log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); nsd->mode = NSD_RUN; break; } } - /* Truncate the pid file. */ - if ((fd = open(nsd->pidfile, O_WRONLY | O_TRUNC, 0644)) == -1) { - log_msg(LOG_ERR, "can not truncate the pid file %s: %s", nsd->pidfile, strerror(errno)); - } else { - close(fd); - } + /* close opened ports to avoid race with restart of nsd */ + server_close_all_sockets(nsd->udp, nsd->ifs); + server_close_all_sockets(nsd->tcp, nsd->ifs); +#ifdef HAVE_SSL + daemon_remote_close(nsd->rc); +#endif + send_children_quit_and_wait(nsd); + /* Unlink it if possible... */ unlinkpid(nsd->pidfile); + unlink(nsd->task[0]->fname); + unlink(nsd->task[1]->fname); if(reload_listener.fd != -1) { sig_atomic_t cmd = NSD_QUIT; @@ -1268,22 +1494,24 @@ server_main(struct nsd *nsd) fsync(reload_listener.fd); close(reload_listener.fd); } - if(xfrd_listener.fd != -1) { + if(nsd->xfrd_listener->fd != -1) { /* complete quit, stop xfrd */ sig_atomic_t cmd = NSD_QUIT; DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc send quit to xfrd")); - if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { + if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", strerror(errno)); } - fsync(xfrd_listener.fd); - close(xfrd_listener.fd); - (void)kill(xfrd_pid, SIGTERM); + fsync(nsd->xfrd_listener->fd); + close(nsd->xfrd_listener->fd); + (void)kill(nsd->pid, SIGTERM); } + xfrd_del_tempdir(nsd); - namedb_fd_close(nsd->db); +#if 0 /* OS collects memory pages */ region_destroy(server_region); +#endif server_shutdown(nsd); } @@ -1293,6 +1521,44 @@ server_process_query(struct nsd *nsd, struct query *query) return query_process(query, nsd); } +static query_state_type +server_process_query_udp(struct nsd *nsd, struct query *query) +{ +#ifdef RATELIMIT + if(query_process(query, nsd) != QUERY_DISCARDED) { + if(rrl_process_query(query)) + return rrl_slip(query); + else return QUERY_PROCESSED; + } + return QUERY_DISCARDED; +#else + return query_process(query, nsd); +#endif +} + +struct event_base* +nsd_child_event_base(void) +{ + struct event_base* base; +#ifdef USE_MINI_EVENT + static time_t secs; + static struct timeval now; + base = event_init(&secs, &now); +#else +# if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) + /* libev */ + base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); +# else + /* libevent */ +# ifdef HAVE_EVENT_BASE_NEW + base = event_base_new(); +# else + base = event_init(); +# endif +# endif +#endif + return base; +} /* * Serve DNS requests. @@ -1302,11 +1568,19 @@ server_child(struct nsd *nsd) { size_t i; region_type *server_region = region_create(xalloc, free); - netio_type *netio = netio_create(server_region); - netio_handler_type *tcp_accept_handlers; + struct event_base* event_base = nsd_child_event_base(); query_type *udp_query; sig_atomic_t mode; + if(!event_base) { + log_msg(LOG_ERR, "nsd server could not create event base"); + exit(1); + } + +#ifdef RATELIMIT + rrl_init((nsd->this_child - nsd->children)/sizeof(nsd->children[0])); +#endif + assert(nsd->server_kind != NSD_SERVER_MAIN); DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); @@ -1318,29 +1592,45 @@ server_child(struct nsd *nsd) } if (nsd->this_child && nsd->this_child->parent_fd != -1) { - netio_handler_type *handler; - - handler = (netio_handler_type *) region_alloc( - server_region, sizeof(netio_handler_type)); - handler->fd = nsd->this_child->parent_fd; - handler->timeout = NULL; - handler->user_data = (struct ipc_handler_conn_data*)region_alloc( + struct event *handler; + struct ipc_handler_conn_data* user_data = + (struct ipc_handler_conn_data*)region_alloc( server_region, sizeof(struct ipc_handler_conn_data)); - ((struct ipc_handler_conn_data*)handler->user_data)->nsd = nsd; - ((struct ipc_handler_conn_data*)handler->user_data)->conn = - xfrd_tcp_create(server_region); - handler->event_types = NETIO_EVENT_READ; - handler->event_handler = child_handle_parent_command; - netio_add_handler(netio, handler); + user_data->nsd = nsd; + user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); + + handler = (struct event*) region_alloc( + server_region, sizeof(*handler)); + event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| + EV_READ, child_handle_parent_command, user_data); + if(event_base_set(event_base, handler) != 0) + log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); + if(event_add(handler, NULL) != 0) + log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); } if (nsd->server_kind & NSD_SERVER_UDP) { +#if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) udp_query = query_create(server_region, compressed_dname_offsets, compression_table_size); - +#else + udp_query = NULL; + memset(msgs, 0, sizeof(msgs)); + for (i = 0; i < NUM_RECV_PER_SELECT; i++) { + queries[i] = query_create(server_region, + compressed_dname_offsets, compression_table_size); + query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); + iovecs[i].iov_base = buffer_begin(queries[i]->packet); + iovecs[i].iov_len = buffer_remaining(queries[i]->packet);; + msgs[i].msg_hdr.msg_iov = &iovecs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + msgs[i].msg_hdr.msg_name = &queries[i]->addr; + msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; + } +#endif for (i = 0; i < nsd->ifs; ++i) { struct udp_handler_data *data; - netio_handler_type *handler; + struct event *handler; data = (struct udp_handler_data *) region_alloc( server_region, @@ -1349,14 +1639,14 @@ server_child(struct nsd *nsd) data->nsd = nsd; data->socket = &nsd->udp[i]; - handler = (netio_handler_type *) region_alloc( - server_region, sizeof(netio_handler_type)); - handler->fd = nsd->udp[i].s; - handler->timeout = NULL; - handler->user_data = data; - handler->event_types = NETIO_EVENT_READ; - handler->event_handler = handle_udp; - netio_add_handler(netio, handler); + handler = (struct event*) region_alloc( + server_region, sizeof(*handler)); + event_set(handler, nsd->udp[i].s, EV_PERSIST|EV_READ, + handle_udp, data); + if(event_base_set(event_base, handler) != 0) + log_msg(LOG_ERR, "nsd udp: event_base_set failed"); + if(event_add(handler, NULL) != 0) + log_msg(LOG_ERR, "nsd udp: event_add failed"); } } @@ -1365,30 +1655,25 @@ server_child(struct nsd *nsd) * and disable them based on the current number of active TCP * connections. */ - tcp_accept_handlers = (netio_handler_type *) region_alloc( - server_region, nsd->ifs * sizeof(netio_handler_type)); + tcp_accept_handler_count = nsd->ifs; + tcp_accept_handlers = (struct tcp_accept_handler_data*) region_alloc( + server_region, nsd->ifs * sizeof(*tcp_accept_handlers)); if (nsd->server_kind & NSD_SERVER_TCP) { for (i = 0; i < nsd->ifs; ++i) { - struct tcp_accept_handler_data *data; - netio_handler_type *handler; - - data = (struct tcp_accept_handler_data *) region_alloc( - server_region, - sizeof(struct tcp_accept_handler_data)); + struct event *handler = &tcp_accept_handlers[i].event; + struct tcp_accept_handler_data* data = + &tcp_accept_handlers[i]; data->nsd = nsd; data->socket = &nsd->tcp[i]; - data->tcp_accept_handler_count = nsd->ifs; - data->tcp_accept_handlers = tcp_accept_handlers; - - handler = &tcp_accept_handlers[i]; - handler->fd = nsd->tcp[i].s; - handler->timeout = NULL; - handler->user_data = data; - handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_ACCEPT; - handler->event_handler = handle_tcp_accept; - netio_add_handler(netio, handler); + event_set(handler, nsd->tcp[i].s, EV_PERSIST|EV_READ, + handle_tcp_accept, data); + if(event_base_set(event_base, handler) != 0) + log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); + if(event_add(handler, NULL) != 0) + log_msg(LOG_ERR, "nsd tcp: event_add failed"); + data->event_added = 1; } - } + } else tcp_accept_handler_count = 0; /* The main loop... */ while ((mode = nsd->mode) != NSD_QUIT) { @@ -1425,9 +1710,9 @@ server_child(struct nsd *nsd) } else if(mode == NSD_RUN) { /* Wait for a query... */ - if (netio_dispatch(netio, NULL, NULL) == -1) { + if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { if (errno != EINTR) { - log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); + log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); break; } } @@ -1435,7 +1720,7 @@ server_child(struct nsd *nsd) /* ignore here, quit */ } else { log_msg(LOG_ERR, "mode bad value %d, back to service.", - mode); + (int)mode); nsd->mode = NSD_RUN; } } @@ -1444,96 +1729,208 @@ server_child(struct nsd *nsd) bind8_stats(nsd); #endif /* BIND8_STATS */ - namedb_fd_close(nsd->db); +#if 0 /* OS collects memory pages */ + event_base_free(event_base); region_destroy(server_region); - server_shutdown(nsd); -} - -static query_state_type -server_process_query_udp(struct nsd *nsd, struct query *query) -{ -#ifdef RATELIMIT - if(query_process(query, nsd) != QUERY_DISCARDED) { - if(rrl_process_query(query)) - return rrl_slip(query); - else return QUERY_PROCESSED; - } - return QUERY_DISCARDED; -#else - return query_process(query, nsd); #endif + server_shutdown(nsd); } +#if defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) static void -handle_udp(netio_type *ATTR_UNUSED(netio), - netio_handler_type *handler, - netio_event_types_type event_types) +handle_udp(int fd, short event, void* arg) { - struct udp_handler_data *data - = (struct udp_handler_data *) handler->user_data; - int received, sent; - struct query *q = data->query; + struct udp_handler_data *data = (struct udp_handler_data *) arg; + int received, sent, recvcount, i; + struct query *q; - if (!(event_types & NETIO_EVENT_READ)) { + if (!(event & EV_READ)) { return; } + recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); + /* this printf strangely gave a performance increase on Linux */ + /* printf("recvcount %d \n", recvcount); */ + if (recvcount == -1) { + if (errno != EAGAIN && errno != EINTR) { + log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); + STATUP(data->nsd, rxerr); + } + /* Simply no data available */ + return; + } + for (i = 0; i < recvcount; i++) { + loopstart: + received = msgs[i].msg_len; + q = queries[i]; + if (received == -1) { + log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( + msgs[i].msg_hdr.msg_flags)); + STATUP(data->nsd, rxerr); + query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); + iovecs[i].iov_len = buffer_remaining(q->packet); + goto swap_drop; + } - /* Account... */ + /* Account... */ + if (data->socket->addr->ai_family == AF_INET) { + STATUP(data->nsd, qudp); + } else if (data->socket->addr->ai_family == AF_INET6) { + STATUP(data->nsd, qudp6); + } + + buffer_skip(q->packet, received); + buffer_flip(q->packet); + + /* Process and answer the query... */ + if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { + if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { + STATUP(data->nsd, nona); + } + + /* Add EDNS0 and TSIG info if necessary. */ + query_add_optional(q, data->nsd); + + buffer_flip(q->packet); + iovecs[i].iov_len = buffer_remaining(q->packet); #ifdef BIND8_STATS - if (data->socket->addr->ai_family == AF_INET) { - STATUP(data->nsd, qudp); - } else if (data->socket->addr->ai_family == AF_INET6) { - STATUP(data->nsd, qudp6); + /* Account the rcode & TC... */ + STATUP2(data->nsd, rcode, RCODE(q->packet)); + if (TC(q->packet)) + STATUP(data->nsd, truncated); +#endif /* BIND8_STATS */ + } else { + query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); + iovecs[i].iov_len = buffer_remaining(q->packet); + swap_drop: + STATUP(data->nsd, dropped); + if(i != recvcount-1) { + /* swap with last and decrease recvcount */ + struct mmsghdr mtmp = msgs[i]; + struct iovec iotmp = iovecs[i]; + recvcount--; + msgs[i] = msgs[recvcount]; + iovecs[i] = iovecs[recvcount]; + queries[i] = queries[recvcount]; + msgs[recvcount] = mtmp; + iovecs[recvcount] = iotmp; + queries[recvcount] = q; + msgs[i].msg_hdr.msg_iov = &iovecs[i]; + msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; + goto loopstart; + } else { recvcount --; } + } } -#endif - /* Initialize the query... */ - query_reset(q, UDP_MAX_MESSAGE_LEN, 0); + /* send until all are sent */ + i = 0; + while(i<recvcount) { + sent = sendmmsg(fd, &msgs[i], recvcount-i, 0); + if(sent == -1) { + log_msg(LOG_ERR, "sendmmsg failed: %s", strerror(errno)); +#ifdef BIND8_STATS + data->nsd->st.txerr += recvcount-i; +#endif /* BIND8_STATS */ + break; + } + i += sent; + } + for(i=0; i<recvcount; i++) { + query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); + iovecs[i].iov_len = buffer_remaining(queries[i]->packet); + } +} - received = recvfrom(handler->fd, - buffer_begin(q->packet), - buffer_remaining(q->packet), - 0, - (struct sockaddr *)&q->addr, - &q->addrlen); - if (received == -1) { +#else /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ + +static void +handle_udp(int fd, short event, void* arg) +{ + struct udp_handler_data *data = (struct udp_handler_data *) arg; + int received, sent; +#ifndef NONBLOCKING_IS_BROKEN +#ifdef HAVE_RECVMMSG + int recvcount; +#endif /* HAVE_RECVMMSG */ + int i; +#endif /* NONBLOCKING_IS_BROKEN */ + struct query *q; +#if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) + q = data->query; +#endif + + if (!(event & EV_READ)) { + return; + } +#ifndef NONBLOCKING_IS_BROKEN +#ifdef HAVE_RECVMMSG + recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); + /* this printf strangely gave a performance increase on Linux */ + /* printf("recvcount %d \n", recvcount); */ + if (recvcount == -1) { if (errno != EAGAIN && errno != EINTR) { - log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); + log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); STATUP(data->nsd, rxerr); - /* No zone statup */ } - } else { + /* Simply no data available */ + return; + } + for (i = 0; i < recvcount; i++) { + received = msgs[i].msg_len; + msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; + if (received == -1) { + log_msg(LOG_ERR, "recvmmsg failed"); + STATUP(data->nsd, rxerr); + /* the error can be found in msgs[i].msg_hdr.msg_flags */ + query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); + continue; + } + q = queries[i]; +#else + for(i=0; i<NUM_RECV_PER_SELECT; i++) { +#endif /* HAVE_RECVMMSG */ +#endif /* NONBLOCKING_IS_BROKEN */ + +#if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) + /* Initialize the query... */ + query_reset(q, UDP_MAX_MESSAGE_LEN, 0); + + received = recvfrom(fd, + buffer_begin(q->packet), + buffer_remaining(q->packet), + 0, + (struct sockaddr *)&q->addr, + &q->addrlen); + if (received == -1) { + if (errno != EAGAIN && errno != EINTR) { + log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); + STATUP(data->nsd, rxerr); + } + return; + } +#endif /* NONBLOCKING_IS_BROKEN || !HAVE_RECVMMSG */ + + /* Account... */ + if (data->socket->addr->ai_family == AF_INET) { + STATUP(data->nsd, qudp); + } else if (data->socket->addr->ai_family == AF_INET6) { + STATUP(data->nsd, qudp6); + } + buffer_skip(q->packet, received); buffer_flip(q->packet); /* Process and answer the query... */ if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { -#ifdef BIND8_STATS if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { STATUP(data->nsd, nona); -# ifdef USE_ZONE_STATS - if (q->zone) - ZTATUP(q->zone, nona); -# endif } -# ifdef USE_ZONE_STATS - if (q->zone) { - if (data->socket->addr->ai_family == AF_INET) { - ZTATUP(q->zone, qudp); - } else if (data->socket->addr->ai_family == AF_INET6) { - ZTATUP(q->zone, qudp6); - } - } -# endif -#endif - /* Add EDNS0 and TSIG info if necessary. */ query_add_optional(q, data->nsd); buffer_flip(q->packet); - sent = sendto(handler->fd, + sent = sendto(fd, buffer_begin(q->packet), buffer_remaining(q->packet), 0, @@ -1542,62 +1939,43 @@ handle_udp(netio_type *ATTR_UNUSED(netio), if (sent == -1) { log_msg(LOG_ERR, "sendto failed: %s", strerror(errno)); STATUP(data->nsd, txerr); - -#ifdef USE_ZONE_STATS - if (q->zone) - ZTATUP(q->zone, txerr); -#endif } else if ((size_t) sent != buffer_remaining(q->packet)) { log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet)); -#ifdef BIND8_STATS } else { +#ifdef BIND8_STATS /* Account the rcode & TC... */ STATUP2(data->nsd, rcode, RCODE(q->packet)); -# ifdef USE_ZONE_STATS - if (q->zone) - ZTATUP2(q->zone, rcode, RCODE(q->packet)); -# endif - if (TC(q->packet)) { + if (TC(q->packet)) STATUP(data->nsd, truncated); -# ifdef USE_ZONE_STATS - if (q->zone) - ZTATUP(q->zone, truncated); -# endif - } #endif /* BIND8_STATS */ } -#ifdef BIND8_STATS } else { STATUP(data->nsd, dropped); -# ifdef USE_ZONE_STATS - if (q->zone) { - ZTATUP(q->zone, dropped); - } -# endif -#endif } +#ifndef NONBLOCKING_IS_BROKEN +#ifdef HAVE_RECVMMSG + query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); +#endif } +#endif } +#endif /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ static void -cleanup_tcp_handler(netio_type *netio, netio_handler_type *handler) +cleanup_tcp_handler(struct tcp_handler_data* data) { - struct tcp_handler_data *data - = (struct tcp_handler_data *) handler->user_data; - netio_remove_handler(netio, handler); - close(handler->fd); - slowaccept = 0; + event_del(&data->event); + close(data->event.ev_fd); /* * Enable the TCP accept handlers when the current number of * TCP connections is about to drop below the maximum number * of TCP connections. */ - if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { - configure_handler_event_types(data->tcp_accept_handler_count, - data->tcp_accept_handlers, - NETIO_EVENT_READ); + if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { + configure_handler_event_types(EV_READ|EV_PERSIST); + slowaccept = 0; } --data->nsd->current_tcp_count; assert(data->nsd->current_tcp_count >= 0); @@ -1606,28 +1984,27 @@ cleanup_tcp_handler(netio_type *netio, netio_handler_type *handler) } static void -handle_tcp_reading(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types) +handle_tcp_reading(int fd, short event, void* arg) { - struct tcp_handler_data *data - = (struct tcp_handler_data *) handler->user_data; + struct tcp_handler_data *data = (struct tcp_handler_data *) arg; ssize_t received; + struct event_base* ev_base; + struct timeval timeout; - if (event_types & NETIO_EVENT_TIMEOUT) { + if ((event & EV_TIMEOUT)) { /* Connection timed out. */ - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } if (data->nsd->tcp_query_count > 0 && data->query_count >= data->nsd->tcp_query_count) { /* No more queries allowed on this tcp connection. */ - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } - assert(event_types & NETIO_EVENT_READ); + assert((event & EV_READ)); if (data->bytes_transmitted == 0) { query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); @@ -1637,7 +2014,7 @@ handle_tcp_reading(netio_type *netio, * Check if we received the leading packet length bytes yet. */ if (data->bytes_transmitted < sizeof(uint16_t)) { - received = read(handler->fd, + received = read(fd, (char *) &data->query->tcplen + data->bytes_transmitted, sizeof(uint16_t) - data->bytes_transmitted); @@ -1649,16 +2026,18 @@ handle_tcp_reading(netio_type *netio, */ return; } else { + char buf[48]; + addr2str(&data->query->addr, buf, sizeof(buf)); #ifdef ECONNRESET if (verbosity >= 2 || errno != ECONNRESET) #endif /* ECONNRESET */ - log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno)); - cleanup_tcp_handler(netio, handler); + log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); + cleanup_tcp_handler(data); return; } } else if (received == 0) { /* EOF */ - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } @@ -1685,13 +2064,13 @@ handle_tcp_reading(netio_type *netio, */ if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } if (data->query->tcplen > data->query->maxlen) { VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } @@ -1701,7 +2080,7 @@ handle_tcp_reading(netio_type *netio, assert(buffer_remaining(data->query->packet) > 0); /* Read the (remaining) query data. */ - received = read(handler->fd, + received = read(fd, buffer_current(data->query->packet), buffer_remaining(data->query->packet)); if (received == -1) { @@ -1712,16 +2091,18 @@ handle_tcp_reading(netio_type *netio, */ return; } else { + char buf[48]; + addr2str(&data->query->addr, buf, sizeof(buf)); #ifdef ECONNRESET if (verbosity >= 2 || errno != ECONNRESET) #endif /* ECONNRESET */ - log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno)); - cleanup_tcp_handler(netio, handler); + log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); + cleanup_tcp_handler(data); return; } } else if (received == 0) { /* EOF */ - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } @@ -1738,17 +2119,15 @@ handle_tcp_reading(netio_type *netio, assert(buffer_position(data->query->packet) == data->query->tcplen); /* Account... */ -#ifdef BIND8_STATS -# ifndef INET6 - STATUP(data->nsd, ctcp); -# else +#ifndef INET6 + STATUP(data->nsd, ctcp); +#else if (data->query->addr.ss_family == AF_INET) { STATUP(data->nsd, ctcp); } else if (data->query->addr.ss_family == AF_INET6) { STATUP(data->nsd, ctcp6); } -# endif -#endif /* BIND8_STATS */ +#endif /* We have a complete query, process it. */ @@ -1760,42 +2139,16 @@ handle_tcp_reading(netio_type *netio, if (data->query_state == QUERY_DISCARDED) { /* Drop the packet and the entire connection... */ STATUP(data->nsd, dropped); -#if defined(BIND8_STATS) && defined(USE_ZONE_STATS) - if (data->query->zone) { - ZTATUP(data->query->zone, dropped); - } -#endif - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } -#ifdef BIND8_STATS if (RCODE(data->query->packet) == RCODE_OK && !AA(data->query->packet)) { STATUP(data->nsd, nona); -# ifdef USE_ZONE_STATS - if (data->query->zone) - ZTATUP(data->query->zone, nona); -# endif } -# ifdef USE_ZONE_STATS - if (data->query->zone) { -# ifndef INET6 - ZTATUP(data->query->zone, ctcp); -# else - if (data->query->addr.ss_family == AF_INET) { - ZTATUP(data->query->zone, ctcp); - } else if (data->query->addr.ss_family == AF_INET6) { - ZTATUP(data->query->zone, ctcp6); - } -# endif - } -# endif /* USE_ZONE_STATS */ - -#endif /* BIND8_STATS */ - query_add_optional(data->query, data->nsd); /* Switch to the tcp write handler. */ @@ -1803,31 +2156,37 @@ handle_tcp_reading(netio_type *netio, data->query->tcplen = buffer_remaining(data->query->packet); data->bytes_transmitted = 0; - handler->timeout->tv_sec = data->nsd->tcp_timeout; - handler->timeout->tv_nsec = 0L; - timespec_add(handler->timeout, netio_current_time(netio)); - - handler->event_types = NETIO_EVENT_WRITE | NETIO_EVENT_TIMEOUT; - handler->event_handler = handle_tcp_writing; + timeout.tv_sec = data->nsd->tcp_timeout; + timeout.tv_usec = 0L; + + ev_base = data->event.ev_base; + event_del(&data->event); + event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, + handle_tcp_writing, data); + if(event_base_set(ev_base, &data->event) != 0) + log_msg(LOG_ERR, "event base set tcpr failed"); + if(event_add(&data->event, &timeout) != 0) + log_msg(LOG_ERR, "event add tcpr failed"); + /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ + handle_tcp_writing(fd, EV_WRITE, data); } static void -handle_tcp_writing(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types) +handle_tcp_writing(int fd, short event, void* arg) { - struct tcp_handler_data *data - = (struct tcp_handler_data *) handler->user_data; + struct tcp_handler_data *data = (struct tcp_handler_data *) arg; ssize_t sent; struct query *q = data->query; + struct timeval timeout; + struct event_base* ev_base; - if (event_types & NETIO_EVENT_TIMEOUT) { + if ((event & EV_TIMEOUT)) { /* Connection timed out. */ - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } - assert(event_types & NETIO_EVENT_WRITE); + assert((event & EV_WRITE)); if (data->bytes_transmitted < sizeof(q->tcplen)) { /* Writing the response packet length. */ @@ -1838,9 +2197,9 @@ handle_tcp_writing(netio_type *netio, iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; iov[1].iov_base = buffer_begin(q->packet); iov[1].iov_len = buffer_limit(q->packet); - sent = writev(handler->fd, iov, 2); + sent = writev(fd, iov, 2); #else /* HAVE_WRITEV */ - sent = write(handler->fd, + sent = write(fd, (const char *) &n_tcplen + data->bytes_transmitted, sizeof(n_tcplen) - data->bytes_transmitted); #endif /* HAVE_WRITEV */ @@ -1856,10 +2215,10 @@ handle_tcp_writing(netio_type *netio, if(verbosity >= 2 || errno != ECONNRESET) #endif /* ECONNRESET */ #ifdef EPIPE - if(verbosity >= 2 || errno != EPIPE) + if(verbosity >= 2 || errno != EPIPE) #endif /* EPIPE 'broken pipe' */ - log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); - cleanup_tcp_handler(netio, handler); + log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); + cleanup_tcp_handler(data); return; } } @@ -1878,9 +2237,9 @@ handle_tcp_writing(netio_type *netio, /* handle potential 'packet done' code */ goto packet_could_be_done; #endif - } - - sent = write(handler->fd, + } + + sent = write(fd, buffer_current(q->packet), buffer_remaining(q->packet)); if (sent == -1) { @@ -1895,10 +2254,10 @@ handle_tcp_writing(netio_type *netio, if(verbosity >= 2 || errno != ECONNRESET) #endif /* ECONNRESET */ #ifdef EPIPE - if(verbosity >= 2 || errno != EPIPE) + if(verbosity >= 2 || errno != EPIPE) #endif /* EPIPE 'broken pipe' */ log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); - cleanup_tcp_handler(netio, handler); + cleanup_tcp_handler(data); return; } } @@ -1930,9 +2289,16 @@ handle_tcp_writing(netio_type *netio, q->tcplen = buffer_remaining(q->packet); data->bytes_transmitted = 0; /* Reset timeout. */ - handler->timeout->tv_sec = data->nsd->tcp_timeout; - handler->timeout->tv_nsec = 0; - timespec_add(handler->timeout, netio_current_time(netio)); + timeout.tv_sec = data->nsd->tcp_timeout; + timeout.tv_usec = 0L; + ev_base = data->event.ev_base; + event_del(&data->event); + event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, + handle_tcp_writing, data); + if(event_base_set(ev_base, &data->event) != 0) + log_msg(LOG_ERR, "event base set tcpw failed"); + if(event_add(&data->event, &timeout) != 0) + log_msg(LOG_ERR, "event add tcpw failed"); /* * Write data if/when the socket is writable @@ -1949,44 +2315,56 @@ handle_tcp_writing(netio_type *netio, if (data->nsd->tcp_query_count > 0 && data->query_count >= data->nsd->tcp_query_count) { - (void) shutdown(handler->fd, SHUT_WR); + (void) shutdown(fd, SHUT_WR); } data->bytes_transmitted = 0; - handler->timeout->tv_sec = data->nsd->tcp_timeout; - handler->timeout->tv_nsec = 0; - timespec_add(handler->timeout, netio_current_time(netio)); - - handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT; - handler->event_handler = handle_tcp_reading; + timeout.tv_sec = data->nsd->tcp_timeout; + timeout.tv_usec = 0L; + ev_base = data->event.ev_base; + event_del(&data->event); + event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, + handle_tcp_reading, data); + if(event_base_set(ev_base, &data->event) != 0) + log_msg(LOG_ERR, "event base set tcpw failed"); + if(event_add(&data->event, &timeout) != 0) + log_msg(LOG_ERR, "event add tcpw failed"); } +static void +handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), + void* ATTR_UNUSED(arg)) +{ + if(slowaccept) { + configure_handler_event_types(EV_PERSIST | EV_READ); + slowaccept = 0; + } +} + /* * Handle an incoming TCP connection. The connection is accepted and - * a new TCP reader event handler is added to NETIO. The TCP handler + * a new TCP reader event handler is added. The TCP handler * is responsible for cleanup when the connection is closed. */ static void -handle_tcp_accept(netio_type *netio, - netio_handler_type *handler, - netio_event_types_type event_types) +handle_tcp_accept(int fd, short event, void* arg) { struct tcp_accept_handler_data *data - = (struct tcp_accept_handler_data *) handler->user_data; + = (struct tcp_accept_handler_data *) arg; int s; struct tcp_handler_data *tcp_data; region_type *tcp_region; - netio_handler_type *tcp_handler; #ifdef INET6 struct sockaddr_storage addr; #else struct sockaddr_in addr; #endif socklen_t addrlen; + struct timeval timeout; - if (!(event_types & NETIO_EVENT_READ)) { + if (!(event & EV_READ)) { return; } @@ -1996,7 +2374,7 @@ handle_tcp_accept(netio_type *netio, /* Accept it... */ addrlen = sizeof(addr); - s = accept(handler->fd, (struct sockaddr *) &addr, &addrlen); + s = accept(fd, (struct sockaddr *) &addr, &addrlen); if (s == -1) { /** * EMFILE and ENFILE is a signal that the limit of open @@ -2006,9 +2384,16 @@ handle_tcp_accept(netio_type *netio, */ if (errno == EMFILE || errno == ENFILE) { if (!slowaccept) { - slowaccept_timeout.tv_sec = NETIO_SLOW_ACCEPT_TIMEOUT; - slowaccept_timeout.tv_nsec = 0L; - timespec_add(&slowaccept_timeout, netio_current_time(netio)); + /* disable accept events */ + struct timeval tv; + configure_handler_event_types(0); + tv.tv_sec = SLOW_ACCEPT_TIMEOUT; + tv.tv_usec = 0L; + event_set(&slowaccept_event, -1, EV_TIMEOUT, + handle_slowaccept_timeout, NULL); + (void)event_base_set(data->event.ev_base, + &slowaccept_event); + (void)event_add(&slowaccept_event, &tv); slowaccept = 1; /* We don't want to spam the logs here */ } @@ -2045,28 +2430,20 @@ handle_tcp_accept(netio_type *netio, tcp_data->nsd = data->nsd; tcp_data->query_count = 0; - tcp_data->tcp_accept_handler_count = data->tcp_accept_handler_count; - tcp_data->tcp_accept_handlers = data->tcp_accept_handlers; - tcp_data->query_state = QUERY_PROCESSED; tcp_data->bytes_transmitted = 0; memcpy(&tcp_data->query->addr, &addr, addrlen); tcp_data->query->addrlen = addrlen; - tcp_handler = (netio_handler_type *) region_alloc( - tcp_region, sizeof(netio_handler_type)); - tcp_handler->fd = s; - tcp_handler->timeout = (struct timespec *) region_alloc( - tcp_region, sizeof(struct timespec)); - tcp_handler->timeout->tv_sec = data->nsd->tcp_timeout; - tcp_handler->timeout->tv_nsec = 0L; - timespec_add(tcp_handler->timeout, netio_current_time(netio)); + timeout.tv_sec = data->nsd->tcp_timeout; + timeout.tv_usec = 0; - tcp_handler->user_data = tcp_data; - tcp_handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT; - tcp_handler->event_handler = handle_tcp_reading; - - netio_add_handler(netio, tcp_handler); + event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, + handle_tcp_reading, tcp_data); + if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) + log_msg(LOG_ERR, "cannot set tcp event base"); + if(event_add(&tcp_data->event, &timeout) != 0) + log_msg(LOG_ERR, "cannot set tcp event base"); /* * Keep track of the total number of TCP handlers installed so @@ -2075,9 +2452,7 @@ handle_tcp_accept(netio_type *netio, */ ++data->nsd->current_tcp_count; if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { - configure_handler_event_types(data->tcp_accept_handler_count, - data->tcp_accept_handlers, - NETIO_EVENT_NONE); + configure_handler_event_types(0); } } @@ -2098,7 +2473,6 @@ send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) (int) nsd->children[i].pid, strerror(errno)); } else if (timeout > 0) { - /* wait for reply */ (void)block_read(NULL, nsd->children[i].child_fd, &command, sizeof(command), timeout); @@ -2113,6 +2487,7 @@ send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) static void send_children_quit(struct nsd* nsd) { + DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); send_children_command(nsd, NSD_QUIT, 0); } @@ -2138,15 +2513,31 @@ set_children_stats(struct nsd* nsd) #endif /* BIND8_STATS */ static void -configure_handler_event_types(size_t count, - netio_handler_type *handlers, - netio_event_types_type event_types) +configure_handler_event_types(short event_types) { size_t i; - assert(handlers); - - for (i = 0; i < count; ++i) { - handlers[i].event_types = event_types; + for (i = 0; i < tcp_accept_handler_count; ++i) { + struct event* handler = &tcp_accept_handlers[i].event; + if(event_types) { + /* reassign */ + int fd = handler->ev_fd; + struct event_base* base = handler->ev_base; + if(tcp_accept_handlers[i].event_added) + event_del(handler); + event_set(handler, fd, event_types, + handle_tcp_accept, &tcp_accept_handlers[i]); + if(event_base_set(base, handler) != 0) + log_msg(LOG_ERR, "conhand: cannot event_base"); + if(event_add(handler, NULL) != 0) + log_msg(LOG_ERR, "conhand: cannot event_add"); + tcp_accept_handlers[i].event_added = 1; + } else { + /* remove */ + if(tcp_accept_handlers[i].event_added) { + event_del(handler); + tcp_accept_handlers[i].event_added = 0; + } + } } } |