diff options
Diffstat (limited to 'usr.sbin/nsd/server.c')
-rw-r--r-- | usr.sbin/nsd/server.c | 1900 |
1 files changed, 1900 insertions, 0 deletions
diff --git a/usr.sbin/nsd/server.c b/usr.sbin/nsd/server.c new file mode 100644 index 00000000000..c0a84dafe0b --- /dev/null +++ b/usr.sbin/nsd/server.c @@ -0,0 +1,1900 @@ +/* + * server.c -- nsd(8) network input/output + * + * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. + * + * See LICENSE for the license. + * + */ + +#include <config.h> + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#include <netinet/in.h> +#include <arpa/inet.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <fcntl.h> +#include <netdb.h> + +#include "axfr.h" +#include "namedb.h" +#include "netio.h" +#include "xfrd.h" +#include "xfrd-tcp.h" +#include "difffile.h" +#include "nsec3.h" +#include "ipc.h" + +/* + * Data for the UDP handlers. + */ +struct udp_handler_data +{ + struct nsd *nsd; + struct nsd_socket *socket; + query_type *query; +}; + +/* + * Data for the TCP accept handlers. Most data is simply passed along + * to the TCP connection handler. + */ +struct tcp_accept_handler_data { + struct nsd *nsd; + struct nsd_socket *socket; + size_t tcp_accept_handler_count; + netio_handler_type *tcp_accept_handlers; +}; + +/* + * Data for the TCP connection handlers. + * + * The TCP handlers use non-blocking I/O. This is necessary to avoid + * blocking the entire server on a slow TCP connection, but does make + * reading from and writing to the socket more complicated. + * + * Basically, whenever a read/write would block (indicated by the + * EAGAIN errno variable) we remember the position we were reading + * from/writing to and return from the TCP reading/writing event + * handler. When the socket becomes readable/writable again we + * continue from the same position. + */ +struct tcp_handler_data +{ + /* + * The region used to allocate all TCP connection related + * data, including this structure. This region is destroyed + * when the connection is closed. + */ + region_type* region; + + /* + * The global nsd structure. + */ + struct nsd* nsd; + + /* + * The current query data for this TCP connection. + */ + query_type* query; + + /* + * These fields are used to enable the TCP accept handlers + * when the number of TCP connection drops below the maximum + * number of TCP connections. + */ + size_t tcp_accept_handler_count; + netio_handler_type* tcp_accept_handlers; + + /* + * The query_state is used to remember if we are performing an + * AXFR, if we're done processing, or if we should discard the + * query and connection. + */ + query_state_type query_state; + + /* + * The bytes_transmitted field is used to remember the number + * of bytes transmitted when receiving or sending a DNS + * packet. The count includes the two additional bytes used + * to specify the packet length on a TCP connection. + */ + size_t bytes_transmitted; + + /* + * The number of queries handled by this specific TCP connection. + */ + int query_count; +}; + +/* + * Handle incoming queries on the UDP server sockets. + */ +static void handle_udp(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types); + +/* + * Handle incoming connections on the TCP sockets. These handlers + * usually wait for the NETIO_EVENT_READ event (indicating an incoming + * connection) but are disabled when the number of current TCP + * connections is equal to the maximum number of TCP connections. + * Disabling is done by changing the handler to wait for the + * NETIO_EVENT_NONE type. This is done using the function + * configure_tcp_accept_handlers. + */ +static void handle_tcp_accept(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types); + +/* + * Handle incoming queries on a TCP connection. The TCP connections + * are configured to be non-blocking and the handler may be called + * multiple times before a complete query is received. + */ +static void handle_tcp_reading(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types); + +/* + * Handle outgoing responses on a TCP connection. The TCP connections + * are configured to be non-blocking and the handler may be called + * multiple times before a complete response is sent. + */ +static void handle_tcp_writing(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types); + +/* + * Send all children the quit nonblocking, then close pipe. + */ +static void send_children_quit(struct nsd* nsd); + +/* set childrens flags to send NSD_STATS to them */ +#ifdef BIND8_STATS +static void set_children_stats(struct nsd* nsd); +#endif /* BIND8_STATS */ + +/* + * Change the event types the HANDLERS are interested in to + * EVENT_TYPES. + */ +static void configure_handler_event_types(size_t count, + netio_handler_type *handlers, + netio_event_types_type event_types); + +/* + * start xfrdaemon (again). + */ +static pid_t +server_start_xfrd(struct nsd *nsd, netio_handler_type* handler); + +static uint16_t *compressed_dname_offsets = 0; +static uint32_t compression_table_capacity = 0; +static uint32_t compression_table_size = 0; + +/* + * Remove the specified pid from the list of child pids. Returns -1 if + * the pid is not in the list, child_num otherwise. The field is set to 0. + */ +static int +delete_child_pid(struct nsd *nsd, pid_t pid) +{ + size_t i; + for (i = 0; i < nsd->child_count; ++i) { + if (nsd->children[i].pid == pid) { + nsd->children[i].pid = 0; + if(!nsd->children[i].need_to_exit) { + if(nsd->children[i].child_fd > 0) + close(nsd->children[i].child_fd); + nsd->children[i].child_fd = -1; + if(nsd->children[i].handler) + nsd->children[i].handler->fd = -1; + } + return i; + } + } + return -1; +} + +/* + * Restart child servers if necessary. + */ +static int +restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, + int* xfrd_sock_p) +{ + struct main_ipc_handler_data *ipc_data; + size_t i; + int sv[2]; + + /* Fork the child processes... */ + for (i = 0; i < nsd->child_count; ++i) { + if (nsd->children[i].pid <= 0) { + if (nsd->children[i].child_fd > 0) + close(nsd->children[i].child_fd); + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { + log_msg(LOG_ERR, "socketpair: %s", + strerror(errno)); + return -1; + } + nsd->children[i].child_fd = sv[0]; + nsd->children[i].parent_fd = sv[1]; + nsd->children[i].pid = fork(); + switch (nsd->children[i].pid) { + default: /* SERVER MAIN */ + close(nsd->children[i].parent_fd); + nsd->children[i].parent_fd = -1; + if(!nsd->children[i].handler) + { + ipc_data = (struct main_ipc_handler_data*) region_alloc( + region, sizeof(struct main_ipc_handler_data)); + ipc_data->nsd = nsd; + ipc_data->child = &nsd->children[i]; + ipc_data->child_num = i; + ipc_data->xfrd_sock = xfrd_sock_p; + ipc_data->packet = buffer_create(region, QIOBUFSZ); + ipc_data->forward_mode = 0; + ipc_data->got_bytes = 0; + ipc_data->total_bytes = 0; + ipc_data->acl_num = 0; + ipc_data->busy_writing_zone_state = 0; + ipc_data->write_conn = xfrd_tcp_create(region); + nsd->children[i].handler = (struct netio_handler*) region_alloc( + region, sizeof(struct netio_handler)); + nsd->children[i].handler->fd = nsd->children[i].child_fd; + nsd->children[i].handler->timeout = NULL; + nsd->children[i].handler->user_data = ipc_data; + nsd->children[i].handler->event_types = NETIO_EVENT_READ; + nsd->children[i].handler->event_handler = parent_handle_child_command; + netio_add_handler(netio, nsd->children[i].handler); + } + /* clear any ongoing ipc */ + ipc_data = (struct main_ipc_handler_data*) + nsd->children[i].handler->user_data; + ipc_data->forward_mode = 0; + ipc_data->busy_writing_zone_state = 0; + /* restart - update fd */ + nsd->children[i].handler->fd = nsd->children[i].child_fd; + break; + case 0: /* CHILD */ + nsd->pid = 0; + nsd->child_count = 0; + nsd->server_kind = nsd->children[i].kind; + nsd->this_child = &nsd->children[i]; + /* remove signal flags inherited from parent + the parent will handle them. */ + nsd->signal_hint_reload = 0; + nsd->signal_hint_child = 0; + nsd->signal_hint_quit = 0; + nsd->signal_hint_shutdown = 0; + nsd->signal_hint_stats = 0; + nsd->signal_hint_statsusr = 0; + close(nsd->this_child->child_fd); + nsd->this_child->child_fd = -1; + server_child(nsd); + /* NOTREACH */ + exit(0); + case -1: + log_msg(LOG_ERR, "fork failed: %s", + strerror(errno)); + return -1; + } + } + } + return 0; +} + +#ifdef BIND8_STATS +static void set_bind8_alarm(struct nsd* nsd) +{ + /* resync so that the next alarm is on the next whole minute */ + if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ + alarm(nsd->st.period - (time(NULL) % nsd->st.period)); +} +#endif + +static void +cleanup_dname_compression_tables(void *ptr) +{ + free(ptr); + compressed_dname_offsets = NULL; + compression_table_capacity = 0; +} + +static void +initialize_dname_compression_tables(struct nsd *nsd) +{ + size_t needed = domain_table_count(nsd->db->domains) + 1; + needed += EXTRA_DOMAIN_NUMBERS; + if(compression_table_capacity < needed) { + compressed_dname_offsets = (uint16_t *) xalloc( + needed * sizeof(uint16_t)); + region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, + compressed_dname_offsets); + compression_table_capacity = needed; + compression_table_size=domain_table_count(nsd->db->domains)+1; + } + memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); + compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ +} + +/* + * Initialize the server, create and bind the sockets. + * + */ +int +server_init(struct nsd *nsd) +{ + size_t i; +#if defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU))) + int on = 1; +#endif + + /* UDP */ + + /* Make a socket... */ + for (i = 0; i < nsd->ifs; i++) { + if (!nsd->udp[i].addr) { + nsd->udp[i].s = -1; + continue; + } + if ((nsd->udp[i].s = socket(nsd->udp[i].addr->ai_family, nsd->udp[i].addr->ai_socktype, 0)) == -1) { +#if defined(INET6) + if (nsd->udp[i].addr->ai_family == AF_INET6 && + errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { + log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported"); + continue; + } +#endif /* INET6 */ + log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); + return -1; + } + +#if defined(INET6) + if (nsd->udp[i].addr->ai_family == AF_INET6) { +# if defined(IPV6_V6ONLY) + if (setsockopt(nsd->udp[i].s, + IPPROTO_IPV6, IPV6_V6ONLY, + &on, sizeof(on)) < 0) + { + log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", + strerror(errno)); + return -1; + } +# endif +# if defined(IPV6_USE_MIN_MTU) + /* + * There is no fragmentation of IPv6 datagrams + * during forwarding in the network. Therefore + * we do not send UDP datagrams larger than + * the minimum IPv6 MTU of 1280 octets. The + * EDNS0 message length can be larger if the + * network stack supports IPV6_USE_MIN_MTU. + */ + if (setsockopt(nsd->udp[i].s, + IPPROTO_IPV6, IPV6_USE_MIN_MTU, + &on, sizeof(on)) < 0) + { + log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", + strerror(errno)); + return -1; + } +# elif defined(IPV6_MTU) + /* + * On Linux, PMTUD is disabled by default for datagrams + * so set the MTU equal to the MIN MTU to get the same. + */ + on = IPV6_MIN_MTU; + if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU, + &on, sizeof(on)) < 0) + { + log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", + strerror(errno)); + return -1; + } + on = 1; +# endif + } +#endif +#if defined(AF_INET) + if (nsd->udp[i].addr->ai_family == AF_INET) { +# if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) + int action = IP_PMTUDISC_DONT; + if (setsockopt(nsd->udp[i].s, IPPROTO_IP, + IP_MTU_DISCOVER, &action, sizeof(action)) < 0) + { + log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", + strerror(errno)); + return -1; + } +# elif defined(IP_DONTFRAG) + int off = 0; + if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG, + &off, sizeof(off)) < 0) + { + log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", + strerror(errno)); + return -1; + } +# endif + } +#endif + /* set it nonblocking */ + /* otherwise, on OSes with thundering herd problems, the + UDP recv could block NSD after select returns readable. */ + if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno)); + } + + /* Bind it... */ + if (bind(nsd->udp[i].s, (struct sockaddr *) nsd->udp[i].addr->ai_addr, nsd->udp[i].addr->ai_addrlen) != 0) { + log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno)); + return -1; + } + } + + /* TCP */ + + /* Make a socket... */ + for (i = 0; i < nsd->ifs; i++) { + if (!nsd->tcp[i].addr) { + nsd->tcp[i].s = -1; + continue; + } + if ((nsd->tcp[i].s = socket(nsd->tcp[i].addr->ai_family, nsd->tcp[i].addr->ai_socktype, 0)) == -1) { +#if defined(INET6) + if (nsd->tcp[i].addr->ai_family == AF_INET6 && + errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { + log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported"); + continue; + } +#endif /* INET6 */ + log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); + return -1; + } + +#ifdef SO_REUSEADDR + if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) { + log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno)); + } +#endif /* SO_REUSEADDR */ + +#if defined(INET6) && defined(IPV6_V6ONLY) + if (nsd->tcp[i].addr->ai_family == AF_INET6 && + setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0) + { + log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno)); + return -1; + } +#endif + /* set it nonblocking */ + /* (StevensUNP p463), if tcp listening socket is blocking, then + it may block in accept, even if select() says readable. */ + if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno)); + } + + /* Bind it... */ + if (bind(nsd->tcp[i].s, (struct sockaddr *) nsd->tcp[i].addr->ai_addr, nsd->tcp[i].addr->ai_addrlen) != 0) { + log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno)); + return -1; + } + + /* Listen to it... */ + if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) { + log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); + return -1; + } + } + + return 0; +} + +/* + * Prepare the server for take off. + * + */ +int +server_prepare(struct nsd *nsd) +{ + /* Open the database... */ + if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, nsd->child_count)) == NULL) { + log_msg(LOG_ERR, "unable to open the database %s: %s", + nsd->dbfile, strerror(errno)); + return -1; + } + + /* Read diff file */ + if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) { + log_msg(LOG_ERR, "The diff file contains errors. Will continue " + "without it"); + } + +#ifdef NSEC3 + prehash(nsd->db, 0); +#endif + + compression_table_capacity = 0; + initialize_dname_compression_tables(nsd); + +#ifdef BIND8_STATS + /* Initialize times... */ + time(&nsd->st.boot); + set_bind8_alarm(nsd); +#endif /* BIND8_STATS */ + + return 0; +} + +/* + * Fork the required number of servers. + */ +static int +server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, + int* xfrd_sock_p) +{ + size_t i; + + /* Start all child servers initially. */ + for (i = 0; i < nsd->child_count; ++i) { + nsd->children[i].pid = 0; + } + + return restart_child_servers(nsd, region, netio, xfrd_sock_p); +} + +static void +close_all_sockets(struct nsd_socket sockets[], size_t n) +{ + size_t i; + + /* Close all the sockets... */ + for (i = 0; i < n; ++i) { + if (sockets[i].s != -1) { + close(sockets[i].s); + free(sockets[i].addr); + sockets[i].s = -1; + } + } +} + +/* + * Close the sockets, shutdown the server and exit. + * Does not return. + * + */ +static void +server_shutdown(struct nsd *nsd) +{ + size_t i; + + close_all_sockets(nsd->udp, nsd->ifs); + close_all_sockets(nsd->tcp, nsd->ifs); + /* CHILD: close command channel to parent */ + if(nsd->this_child && nsd->this_child->parent_fd > 0) + { + close(nsd->this_child->parent_fd); + nsd->this_child->parent_fd = -1; + } + /* SERVER: close command channels to children */ + if(!nsd->this_child) + { + for(i=0; i < nsd->child_count; ++i) + if(nsd->children[i].child_fd > 0) + { + close(nsd->children[i].child_fd); + nsd->children[i].child_fd = -1; + } + } + + log_finalize(); + tsig_finalize(); + + nsd_options_destroy(nsd->options); + region_destroy(nsd->region); + + exit(0); +} + +static pid_t +server_start_xfrd(struct nsd *nsd, netio_handler_type* handler) +{ + pid_t pid; + int sockets[2] = {0,0}; + zone_type* zone; + struct ipc_handler_conn_data *data; + /* no need to send updates for zones, because xfrd will read from fork-memory */ + for(zone = nsd->db->zones; zone; zone=zone->next) { + zone->updated = 0; + } + + if(handler->fd != -1) + close(handler->fd); + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { + log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); + return -1; + } + pid = fork(); + switch (pid) { + case -1: + log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); + break; + case 0: + /* CHILD: close first socket, use second one */ + close(sockets[0]); + xfrd_init(sockets[1], nsd); + /* ENOTREACH */ + break; + default: + /* PARENT: close second socket, use first one */ + close(sockets[1]); + handler->fd = sockets[0]; + break; + } + /* PARENT only */ + handler->timeout = NULL; + handler->event_types = NETIO_EVENT_READ; + handler->event_handler = parent_handle_xfrd_command; + /* clear ongoing ipc reads */ + data = (struct ipc_handler_conn_data *) handler->user_data; + data->conn->is_reading = 0; + return pid; +} + +/* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ +static ssize_t +block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) +{ + uint8_t* buf = (uint8_t*) p; + ssize_t total = 0; + fd_set rfds; + struct timeval tv; + FD_ZERO(&rfds); + + while( total < sz) { + ssize_t ret; + FD_SET(s, &rfds); + tv.tv_sec = timeout; + tv.tv_usec = 0; + ret = select(s+1, &rfds, NULL, NULL, timeout==-1?NULL:&tv); + if(ret == -1) { + if(errno == EAGAIN) + /* blocking read */ + continue; + if(errno == EINTR) { + if(nsd->signal_hint_quit || nsd->signal_hint_shutdown) + return -1; + /* other signals can be handled later */ + continue; + } + /* some error */ + return -1; + } + if(ret == 0) { + /* operation timed out */ + return -2; + } + ret = read(s, buf+total, sz-total); + if(ret == -1) { + if(errno == EAGAIN) + /* blocking read */ + continue; + if(errno == EINTR) { + if(nsd->signal_hint_quit || nsd->signal_hint_shutdown) + return -1; + /* other signals can be handled later */ + continue; + } + /* some error */ + return -1; + } + if(ret == 0) { + /* closed connection! */ + return 0; + } + total += ret; + } + return total; +} + +/* + * Reload the database, stop parent, re-fork children and continue. + * as server_main. + */ +static void +server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, + int cmdsocket, int* xfrd_sock_p) +{ + pid_t old_pid; + sig_atomic_t cmd = NSD_QUIT_SYNC; + zone_type* zone; + int xfrd_sock = *xfrd_sock_p; + int ret; + + if(db_crc_different(nsd->db) == 0) { + DEBUG(DEBUG_XFRD,1, (LOG_INFO, + "CRC the same. skipping %s.", nsd->db->filename)); + } else { + DEBUG(DEBUG_XFRD,1, (LOG_INFO, + "CRC different. reread of %s.", nsd->db->filename)); + namedb_close(nsd->db); + if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, + nsd->child_count)) == NULL) { + log_msg(LOG_ERR, "unable to reload the database: %s", strerror(errno)); + exit(1); + } + } + if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) { + log_msg(LOG_ERR, "unable to load the diff file: %s", nsd->options->difffile); + exit(1); + } + log_msg(LOG_INFO, "memory recyclebin holds %lu bytes", (unsigned long) + region_get_recycle_size(nsd->db->region)); +#ifndef NDEBUG + if(nsd_debug_level >= 1) + region_log_stats(nsd->db->region); +#endif /* NDEBUG */ +#ifdef NSEC3 + prehash(nsd->db, 1); +#endif /* NSEC3 */ + + initialize_dname_compression_tables(nsd); + + /* Get our new process id */ + old_pid = nsd->pid; + nsd->pid = getpid(); + +#ifdef BIND8_STATS + /* Restart dumping stats if required. */ + time(&nsd->st.boot); + set_bind8_alarm(nsd); +#endif + + /* Start new child processes */ + if (server_start_children(nsd, server_region, netio, xfrd_sock_p) != 0) { + send_children_quit(nsd); + exit(1); + } + + /* Overwrite pid before closing old parent, to avoid race condition: + * - parent process already closed + * - pidfile still contains old_pid + * - control script contacts parent process, using contents of pidfile + */ + if (writepid(nsd) == -1) { + log_msg(LOG_ERR, "cannot overwrite the pidfile %s: %s", nsd->pidfile, strerror(errno)); + } + +#define RELOAD_SYNC_TIMEOUT 25 /* seconds */ + /* Send quit command to parent: blocking, wait for receipt. */ + do { + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); + if (write_socket(cmdsocket, &cmd, sizeof(cmd)) == -1) + { + log_msg(LOG_ERR, "problems sending command from reload %d to oldnsd %d: %s", + (int)nsd->pid, (int)old_pid, strerror(errno)); + } + /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); + ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), + RELOAD_SYNC_TIMEOUT); + if(ret == -2) { + DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); + } + } while (ret == -2); + if(ret == -1) { + log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", + strerror(errno)); + } + DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, cmd)); + assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); + + /* inform xfrd of new SOAs */ + cmd = NSD_SOA_BEGIN; + if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "problems sending soa begin from reload %d to xfrd: %s", + (int)nsd->pid, strerror(errno)); + } + for(zone= nsd->db->zones; zone; zone = zone->next) { + uint16_t sz; + const dname_type *dname_ns=0, *dname_em=0; + if(zone->updated == 0) + continue; + DEBUG(DEBUG_IPC,1, (LOG_INFO, "nsd: sending soa info for zone %s", + dname_to_string(domain_dname(zone->apex),0))); + cmd = NSD_SOA_INFO; + sz = dname_total_size(domain_dname(zone->apex)); + if(zone->soa_rrset) { + dname_ns = domain_dname( + rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[0])); + dname_em = domain_dname( + rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[1])); + sz += sizeof(uint32_t)*6 + sizeof(uint8_t)*2 + + dname_ns->name_size + dname_em->name_size; + } + sz = htons(sz); + /* use blocking writes */ + if(!write_socket(xfrd_sock, &cmd, sizeof(cmd)) || + !write_socket(xfrd_sock, &sz, sizeof(sz)) || + !write_socket(xfrd_sock, domain_dname(zone->apex), + dname_total_size(domain_dname(zone->apex)))) + { + log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s", + (int)nsd->pid, strerror(errno)); + } + if(zone->soa_rrset) { + uint32_t ttl = htonl(zone->soa_rrset->rrs[0].ttl); + assert(dname_ns && dname_em); + assert(zone->soa_rrset->rr_count > 0); + assert(rrset_rrtype(zone->soa_rrset) == TYPE_SOA); + assert(zone->soa_rrset->rrs[0].rdata_count == 7); + if(!write_socket(xfrd_sock, &ttl, sizeof(uint32_t)) + || !write_socket(xfrd_sock, &dname_ns->name_size, sizeof(uint8_t)) + || !write_socket(xfrd_sock, dname_name(dname_ns), dname_ns->name_size) + || !write_socket(xfrd_sock, &dname_em->name_size, sizeof(uint8_t)) + || !write_socket(xfrd_sock, dname_name(dname_em), dname_em->name_size) + || !write_socket(xfrd_sock, rdata_atom_data( + zone->soa_rrset->rrs[0].rdatas[2]), sizeof(uint32_t)) + || !write_socket(xfrd_sock, rdata_atom_data( + zone->soa_rrset->rrs[0].rdatas[3]), sizeof(uint32_t)) + || !write_socket(xfrd_sock, rdata_atom_data( + zone->soa_rrset->rrs[0].rdatas[4]), sizeof(uint32_t)) + || !write_socket(xfrd_sock, rdata_atom_data( + zone->soa_rrset->rrs[0].rdatas[5]), sizeof(uint32_t)) + || !write_socket(xfrd_sock, rdata_atom_data( + zone->soa_rrset->rrs[0].rdatas[6]), sizeof(uint32_t))) + { + log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s", + (int)nsd->pid, strerror(errno)); + } + } + zone->updated = 0; + } + cmd = NSD_SOA_END; + if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", + (int)nsd->pid, strerror(errno)); + } + + /* try to reopen file */ + if (nsd->file_rotation_ok) + log_reopen(nsd->log_filename, 1); + /* exit reload, continue as new server_main */ +} + +/* + * Get the mode depending on the signal hints that have been received. + * Multiple signal hints can be received and will be handled in turn. + */ +static sig_atomic_t +server_signal_mode(struct nsd *nsd) +{ + if(nsd->signal_hint_quit) { + nsd->signal_hint_quit = 0; + return NSD_QUIT; + } + else if(nsd->signal_hint_shutdown) { + nsd->signal_hint_shutdown = 0; + return NSD_SHUTDOWN; + } + else if(nsd->signal_hint_child) { + nsd->signal_hint_child = 0; + return NSD_REAP_CHILDREN; + } + else if(nsd->signal_hint_reload) { + nsd->signal_hint_reload = 0; + return NSD_RELOAD; + } + else if(nsd->signal_hint_stats) { + nsd->signal_hint_stats = 0; +#ifdef BIND8_STATS + set_bind8_alarm(nsd); +#endif + return NSD_STATS; + } + else if(nsd->signal_hint_statsusr) { + nsd->signal_hint_statsusr = 0; + return NSD_STATS; + } + return NSD_RUN; +} + +/* + * The main server simply waits for signals and child processes to + * terminate. Child processes are restarted as necessary. + */ +void +server_main(struct nsd *nsd) +{ + region_type *server_region = region_create(xalloc, free); + netio_type *netio = netio_create(server_region); + netio_handler_type reload_listener; + netio_handler_type xfrd_listener; + int reload_sockets[2] = {-1, -1}; + struct timespec timeout_spec; + int fd; + int status; + pid_t child_pid; + pid_t reload_pid = -1; + pid_t xfrd_pid = -1; + sig_atomic_t mode; + + /* Ensure we are the main process */ + assert(nsd->server_kind == NSD_SERVER_MAIN); + + xfrd_listener.user_data = (struct ipc_handler_conn_data*)region_alloc( + server_region, sizeof(struct ipc_handler_conn_data)); + xfrd_listener.fd = -1; + ((struct ipc_handler_conn_data*)xfrd_listener.user_data)->nsd = nsd; + ((struct ipc_handler_conn_data*)xfrd_listener.user_data)->conn = + xfrd_tcp_create(server_region); + + /* Start the XFRD process */ + xfrd_pid = server_start_xfrd(nsd, &xfrd_listener); + netio_add_handler(netio, &xfrd_listener); + + /* Start the child processes that handle incoming queries */ + if (server_start_children(nsd, server_region, netio, &xfrd_listener.fd) != 0) { + send_children_quit(nsd); + exit(1); + } + reload_listener.fd = -1; + + /* This_child MUST be 0, because this is the parent process */ + assert(nsd->this_child == 0); + + /* Run the server until we get a shutdown signal */ + while ((mode = nsd->mode) != NSD_SHUTDOWN) { + /* Did we receive a signal that changes our mode? */ + if(mode == NSD_RUN) { + nsd->mode = mode = server_signal_mode(nsd); + } + + switch (mode) { + case NSD_RUN: + /* see if any child processes terminated */ + while((child_pid = waitpid(0, &status, WNOHANG)) != -1 && child_pid != 0) { + int is_child = delete_child_pid(nsd, child_pid); + if (is_child != -1 && nsd->children[is_child].need_to_exit) { + if(nsd->children[is_child].child_fd == -1) + nsd->children[is_child].has_exited = 1; + parent_check_all_children_exited(nsd); + } else if(is_child != -1) { + log_msg(LOG_WARNING, + "server %d died unexpectedly with status %d, restarting", + (int) child_pid, status); + restart_child_servers(nsd, server_region, netio, + &xfrd_listener.fd); + } else if (child_pid == reload_pid) { + sig_atomic_t cmd = NSD_SOA_END; + log_msg(LOG_WARNING, + "Reload process %d failed with status %d, continuing with old database", + (int) child_pid, status); + reload_pid = -1; + if(reload_listener.fd > 0) close(reload_listener.fd); + reload_listener.fd = -1; + reload_listener.event_types = NETIO_EVENT_NONE; + /* inform xfrd reload attempt ended */ + if(!write_socket(xfrd_listener.fd, + &cmd, sizeof(cmd)) == -1) { + log_msg(LOG_ERR, "problems " + "sending SOAEND to xfrd: %s", + strerror(errno)); + } + } else if (child_pid == xfrd_pid) { + log_msg(LOG_WARNING, + "xfrd process %d failed with status %d, restarting ", + (int) child_pid, status); + xfrd_pid = server_start_xfrd(nsd, &xfrd_listener); + } else { + log_msg(LOG_WARNING, + "Unknown child %d terminated with status %d", + (int) child_pid, status); + } + } + if (child_pid == -1) { + if (errno == EINTR) { + continue; + } + log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); + } + if (nsd->mode != NSD_RUN) + break; + + /* timeout to collect processes. In case no sigchild happens. */ + timeout_spec.tv_sec = 60; + timeout_spec.tv_nsec = 0; + + /* listen on ports, timeout for collecting terminated children */ + if(netio_dispatch(netio, &timeout_spec, 0) == -1) { + if (errno != EINTR) { + log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); + } + } + + break; + case NSD_RELOAD: + /* Continue to run nsd after reload */ + nsd->mode = NSD_RUN; + + if (reload_pid != -1) { + log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", + (int) reload_pid); + break; + } + + log_msg(LOG_WARNING, "signal received, reloading..."); + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { + log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); + reload_pid = -1; + break; + } + + /* Do actual reload */ + reload_pid = fork(); + switch (reload_pid) { + case -1: + log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); + break; + case 0: + /* CHILD */ + close(reload_sockets[0]); + server_reload(nsd, server_region, netio, + reload_sockets[1], &xfrd_listener.fd); + DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); + close(reload_sockets[1]); + DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); + /* drop stale xfrd ipc data */ + ((struct ipc_handler_conn_data*)xfrd_listener.user_data) + ->conn->is_reading = 0; + reload_pid = -1; + reload_listener.fd = -1; + reload_listener.event_types = NETIO_EVENT_NONE; + DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); + break; + default: + /* PARENT, keep running until NSD_QUIT_SYNC + * received from CHILD. + */ + close(reload_sockets[1]); + reload_listener.fd = reload_sockets[0]; + reload_listener.timeout = NULL; + reload_listener.user_data = nsd; + reload_listener.event_types = NETIO_EVENT_READ; + reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ + netio_add_handler(netio, &reload_listener); + break; + } + break; + case NSD_QUIT_SYNC: + /* synchronisation of xfrd, parent and reload */ + if(!nsd->quit_sync_done && reload_listener.fd > 0) { + sig_atomic_t cmd = NSD_RELOAD; + /* stop xfrd ipc writes in progress */ + DEBUG(DEBUG_IPC,1, (LOG_INFO, + "main: ipc send indication reload")); + if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "server_main: could not send reload " + "indication to xfrd: %s", strerror(errno)); + } + /* wait for ACK from xfrd */ + DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); + nsd->quit_sync_done = 1; + } + nsd->mode = NSD_RUN; + break; + case NSD_QUIT: + /* silent shutdown during reload */ + if(reload_listener.fd > 0) { + /* acknowledge the quit, to sync reload that we will really quit now */ + sig_atomic_t cmd = NSD_RELOAD; + DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); + if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "server_main: " + "could not ack quit: %s", strerror(errno)); + } + close(reload_listener.fd); + } + /* only quit children after xfrd has acked */ + send_children_quit(nsd); + + region_destroy(server_region); + namedb_close(nsd->db); + server_shutdown(nsd); + + /* ENOTREACH */ + break; + case NSD_SHUTDOWN: + send_children_quit(nsd); + log_msg(LOG_WARNING, "signal received, shutting down..."); + break; + case NSD_REAP_CHILDREN: + /* continue; wait for child in run loop */ + nsd->mode = NSD_RUN; + break; + case NSD_STATS: +#ifdef BIND8_STATS + set_children_stats(nsd); +#endif + nsd->mode = NSD_RUN; + break; + default: + log_msg(LOG_WARNING, "NSD main server mode invalid: %d", nsd->mode); + nsd->mode = NSD_RUN; + break; + } + } + + /* Truncate the pid file. */ + if ((fd = open(nsd->pidfile, O_WRONLY | O_TRUNC, 0644)) == -1) { + log_msg(LOG_ERR, "can not truncate the pid file %s: %s", nsd->pidfile, strerror(errno)); + } + close(fd); + + /* Unlink it if possible... */ + unlinkpid(nsd->pidfile); + + if(reload_listener.fd > 0) + close(reload_listener.fd); + if(xfrd_listener.fd > 0) { + /* complete quit, stop xfrd */ + sig_atomic_t cmd = NSD_QUIT; + DEBUG(DEBUG_IPC,1, (LOG_INFO, + "main: ipc send quit to xfrd")); + if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) { + log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", + strerror(errno)); + } + fsync(xfrd_listener.fd); + close(xfrd_listener.fd); + } + + namedb_close(nsd->db); + region_destroy(server_region); + server_shutdown(nsd); +} + +static query_state_type +server_process_query(struct nsd *nsd, struct query *query) +{ + return query_process(query, nsd); +} + + +/* + * Serve DNS requests. + */ +void +server_child(struct nsd *nsd) +{ + size_t i; + region_type *server_region = region_create(xalloc, free); + netio_type *netio = netio_create(server_region); + netio_handler_type *tcp_accept_handlers; + query_type *udp_query; + sig_atomic_t mode; + + assert(nsd->server_kind != NSD_SERVER_MAIN); + DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); + + if (!(nsd->server_kind & NSD_SERVER_TCP)) { + close_all_sockets(nsd->tcp, nsd->ifs); + } + if (!(nsd->server_kind & NSD_SERVER_UDP)) { + close_all_sockets(nsd->udp, nsd->ifs); + } + + if (nsd->this_child && nsd->this_child->parent_fd != -1) { + netio_handler_type *handler; + + handler = (netio_handler_type *) region_alloc( + server_region, sizeof(netio_handler_type)); + handler->fd = nsd->this_child->parent_fd; + handler->timeout = NULL; + handler->user_data = (struct ipc_handler_conn_data*)region_alloc( + server_region, sizeof(struct ipc_handler_conn_data)); + ((struct ipc_handler_conn_data*)handler->user_data)->nsd = nsd; + ((struct ipc_handler_conn_data*)handler->user_data)->conn = + xfrd_tcp_create(server_region); + handler->event_types = NETIO_EVENT_READ; + handler->event_handler = child_handle_parent_command; + netio_add_handler(netio, handler); + } + + if (nsd->server_kind & NSD_SERVER_UDP) { + udp_query = query_create(server_region, + compressed_dname_offsets, compression_table_size); + + for (i = 0; i < nsd->ifs; ++i) { + struct udp_handler_data *data; + netio_handler_type *handler; + + data = (struct udp_handler_data *) region_alloc( + server_region, + sizeof(struct udp_handler_data)); + data->query = udp_query; + data->nsd = nsd; + data->socket = &nsd->udp[i]; + + handler = (netio_handler_type *) region_alloc( + server_region, sizeof(netio_handler_type)); + handler->fd = nsd->udp[i].s; + handler->timeout = NULL; + handler->user_data = data; + handler->event_types = NETIO_EVENT_READ; + handler->event_handler = handle_udp; + netio_add_handler(netio, handler); + } + } + + /* + * Keep track of all the TCP accept handlers so we can enable + * and disable them based on the current number of active TCP + * connections. + */ + tcp_accept_handlers = (netio_handler_type *) region_alloc( + server_region, nsd->ifs * sizeof(netio_handler_type)); + if (nsd->server_kind & NSD_SERVER_TCP) { + for (i = 0; i < nsd->ifs; ++i) { + struct tcp_accept_handler_data *data; + netio_handler_type *handler; + + data = (struct tcp_accept_handler_data *) region_alloc( + server_region, + sizeof(struct tcp_accept_handler_data)); + data->nsd = nsd; + data->socket = &nsd->tcp[i]; + data->tcp_accept_handler_count = nsd->ifs; + data->tcp_accept_handlers = tcp_accept_handlers; + + handler = &tcp_accept_handlers[i]; + handler->fd = nsd->tcp[i].s; + handler->timeout = NULL; + handler->user_data = data; + handler->event_types = NETIO_EVENT_READ; + handler->event_handler = handle_tcp_accept; + netio_add_handler(netio, handler); + } + } + + /* The main loop... */ + while ((mode = nsd->mode) != NSD_QUIT) { + if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); + + /* Do we need to do the statistics... */ + if (mode == NSD_STATS) { +#ifdef BIND8_STATS + /* Dump the statistics */ + bind8_stats(nsd); +#else /* !BIND8_STATS */ + log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); +#endif /* BIND8_STATS */ + + nsd->mode = NSD_RUN; + } + else if (mode == NSD_REAP_CHILDREN) { + /* got signal, notify parent. parent reaps terminated children. */ + if (nsd->this_child->parent_fd > 0) { + sig_atomic_t parent_notify = NSD_REAP_CHILDREN; + if (write(nsd->this_child->parent_fd, + &parent_notify, + sizeof(parent_notify)) == -1) + { + log_msg(LOG_ERR, "problems sending command from %d to parent: %s", + (int) nsd->this_child->pid, strerror(errno)); + } + } else /* no parent, so reap 'em */ + while (waitpid(0, NULL, WNOHANG) > 0) ; + nsd->mode = NSD_RUN; + } + else if(mode == NSD_RUN) { + /* Wait for a query... */ + if (netio_dispatch(netio, NULL, NULL) == -1) { + if (errno != EINTR) { + log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); + break; + } + } + } else if(mode == NSD_QUIT) { + /* ignore here, quit */ + } else { + log_msg(LOG_ERR, "mode bad value %d, back to service.", + mode); + nsd->mode = NSD_RUN; + } + } + +#ifdef BIND8_STATS + bind8_stats(nsd); +#endif /* BIND8_STATS */ + + namedb_close(nsd->db); + region_destroy(server_region); + server_shutdown(nsd); +} + + +static void +handle_udp(netio_type *ATTR_UNUSED(netio), + netio_handler_type *handler, + netio_event_types_type event_types) +{ + struct udp_handler_data *data + = (struct udp_handler_data *) handler->user_data; + int received, sent; + struct query *q = data->query; + + if (!(event_types & NETIO_EVENT_READ)) { + return; + } + + /* Account... */ + if (data->socket->addr->ai_family == AF_INET) { + STATUP(data->nsd, qudp); + } else if (data->socket->addr->ai_family == AF_INET6) { + STATUP(data->nsd, qudp6); + } + + /* Initialize the query... */ + query_reset(q, UDP_MAX_MESSAGE_LEN, 0); + + received = recvfrom(handler->fd, + buffer_begin(q->packet), + buffer_remaining(q->packet), + 0, + (struct sockaddr *)&q->addr, + &q->addrlen); + if (received == -1) { + if (errno != EAGAIN && errno != EINTR) { + log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); + STATUP(data->nsd, rxerr); + } + } else { + buffer_skip(q->packet, received); + buffer_flip(q->packet); + + /* Process and answer the query... */ + if (server_process_query(data->nsd, q) != QUERY_DISCARDED) { + if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { + STATUP(data->nsd, nona); + } + + /* Add EDNS0 and TSIG info if necessary. */ + query_add_optional(q, data->nsd); + + buffer_flip(q->packet); + + sent = sendto(handler->fd, + buffer_begin(q->packet), + buffer_remaining(q->packet), + 0, + (struct sockaddr *) &q->addr, + q->addrlen); + if (sent == -1) { + log_msg(LOG_ERR, "sendto failed: %s", strerror(errno)); + STATUP(data->nsd, txerr); + } else if ((size_t) sent != buffer_remaining(q->packet)) { + log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet)); + } else { +#ifdef BIND8_STATS + /* Account the rcode & TC... */ + STATUP2(data->nsd, rcode, RCODE(q->packet)); + if (TC(q->packet)) + STATUP(data->nsd, truncated); +#endif /* BIND8_STATS */ + } + } else { + STATUP(data->nsd, dropped); + } + } +} + + +static void +cleanup_tcp_handler(netio_type *netio, netio_handler_type *handler) +{ + struct tcp_handler_data *data + = (struct tcp_handler_data *) handler->user_data; + netio_remove_handler(netio, handler); + close(handler->fd); + + /* + * Enable the TCP accept handlers when the current number of + * TCP connections is about to drop below the maximum number + * of TCP connections. + */ + if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { + configure_handler_event_types(data->tcp_accept_handler_count, + data->tcp_accept_handlers, + NETIO_EVENT_READ); + } + --data->nsd->current_tcp_count; + assert(data->nsd->current_tcp_count >= 0); + + region_destroy(data->region); +} + +static void +handle_tcp_reading(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types) +{ + struct tcp_handler_data *data + = (struct tcp_handler_data *) handler->user_data; + ssize_t received; + + if (event_types & NETIO_EVENT_TIMEOUT) { + /* Connection timed out. */ + cleanup_tcp_handler(netio, handler); + return; + } + + if (data->nsd->tcp_query_count > 0 && + data->query_count >= data->nsd->tcp_query_count) { + /* No more queries allowed on this tcp connection. */ + cleanup_tcp_handler(netio, handler); + return; + } + + assert(event_types & NETIO_EVENT_READ); + + if (data->bytes_transmitted == 0) { + query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); + } + + /* + * Check if we received the leading packet length bytes yet. + */ + if (data->bytes_transmitted < sizeof(uint16_t)) { + received = read(handler->fd, + (char *) &data->query->tcplen + + data->bytes_transmitted, + sizeof(uint16_t) - data->bytes_transmitted); + if (received == -1) { + if (errno == EAGAIN || errno == EINTR) { + /* + * Read would block, wait until more + * data is available. + */ + return; + } else { +#ifdef ECONNRESET + if (verbosity >= 2 || errno != ECONNRESET) +#endif /* ECONNRESET */ + log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno)); + cleanup_tcp_handler(netio, handler); + return; + } + } else if (received == 0) { + /* EOF */ + cleanup_tcp_handler(netio, handler); + return; + } + + data->bytes_transmitted += received; + if (data->bytes_transmitted < sizeof(uint16_t)) { + /* + * Not done with the tcplen yet, wait for more + * data to become available. + */ + return; + } + + assert(data->bytes_transmitted == sizeof(uint16_t)); + + data->query->tcplen = ntohs(data->query->tcplen); + + /* + * Minimum query size is: + * + * Size of the header (12) + * + Root domain name (1) + * + Query class (2) + * + Query type (2) + */ + if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { + VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); + cleanup_tcp_handler(netio, handler); + return; + } + + if (data->query->tcplen > data->query->maxlen) { + VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); + cleanup_tcp_handler(netio, handler); + return; + } + + buffer_set_limit(data->query->packet, data->query->tcplen); + } + + assert(buffer_remaining(data->query->packet) > 0); + + /* Read the (remaining) query data. */ + received = read(handler->fd, + buffer_current(data->query->packet), + buffer_remaining(data->query->packet)); + if (received == -1) { + if (errno == EAGAIN || errno == EINTR) { + /* + * Read would block, wait until more data is + * available. + */ + return; + } else { +#ifdef ECONNRESET + if (verbosity >= 2 || errno != ECONNRESET) +#endif /* ECONNRESET */ + log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno)); + cleanup_tcp_handler(netio, handler); + return; + } + } else if (received == 0) { + /* EOF */ + cleanup_tcp_handler(netio, handler); + return; + } + + data->bytes_transmitted += received; + buffer_skip(data->query->packet, received); + if (buffer_remaining(data->query->packet) > 0) { + /* + * Message not yet complete, wait for more data to + * become available. + */ + return; + } + + assert(buffer_position(data->query->packet) == data->query->tcplen); + + /* Account... */ +#ifndef INET6 + STATUP(data->nsd, ctcp); +#else + if (data->query->addr.ss_family == AF_INET) { + STATUP(data->nsd, ctcp); + } else if (data->query->addr.ss_family == AF_INET6) { + STATUP(data->nsd, ctcp6); + } +#endif + + /* We have a complete query, process it. */ + + /* tcp-query-count: handle query counter ++ */ + data->query_count++; + + buffer_flip(data->query->packet); + data->query_state = server_process_query(data->nsd, data->query); + if (data->query_state == QUERY_DISCARDED) { + /* Drop the packet and the entire connection... */ + STATUP(data->nsd, dropped); + cleanup_tcp_handler(netio, handler); + return; + } + + if (RCODE(data->query->packet) == RCODE_OK + && !AA(data->query->packet)) + { + STATUP(data->nsd, nona); + } + + query_add_optional(data->query, data->nsd); + + /* Switch to the tcp write handler. */ + buffer_flip(data->query->packet); + data->query->tcplen = buffer_remaining(data->query->packet); + data->bytes_transmitted = 0; + + handler->timeout->tv_sec = data->nsd->tcp_timeout; + handler->timeout->tv_nsec = 0L; + timespec_add(handler->timeout, netio_current_time(netio)); + + handler->event_types = NETIO_EVENT_WRITE | NETIO_EVENT_TIMEOUT; + handler->event_handler = handle_tcp_writing; +} + +static void +handle_tcp_writing(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types) +{ + struct tcp_handler_data *data + = (struct tcp_handler_data *) handler->user_data; + ssize_t sent; + struct query *q = data->query; + + if (event_types & NETIO_EVENT_TIMEOUT) { + /* Connection timed out. */ + cleanup_tcp_handler(netio, handler); + return; + } + + assert(event_types & NETIO_EVENT_WRITE); + + if (data->bytes_transmitted < sizeof(q->tcplen)) { + /* Writing the response packet length. */ + uint16_t n_tcplen = htons(q->tcplen); + sent = write(handler->fd, + (const char *) &n_tcplen + data->bytes_transmitted, + sizeof(n_tcplen) - data->bytes_transmitted); + if (sent == -1) { + if (errno == EAGAIN || errno == EINTR) { + /* + * Write would block, wait until + * socket becomes writable again. + */ + return; + } else { +#ifdef ECONNRESET + if(verbosity >= 2 || errno != ECONNRESET) +#endif /* ECONNRESET */ + log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); + cleanup_tcp_handler(netio, handler); + return; + } + } + + data->bytes_transmitted += sent; + if (data->bytes_transmitted < sizeof(q->tcplen)) { + /* + * Writing not complete, wait until socket + * becomes writable again. + */ + return; + } + + assert(data->bytes_transmitted == sizeof(q->tcplen)); + } + + assert(data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)); + + sent = write(handler->fd, + buffer_current(q->packet), + buffer_remaining(q->packet)); + if (sent == -1) { + if (errno == EAGAIN || errno == EINTR) { + /* + * Write would block, wait until + * socket becomes writable again. + */ + return; + } else { +#ifdef ECONNRESET + if(verbosity >= 2 || errno != ECONNRESET) +#endif /* ECONNRESET */ + log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); + cleanup_tcp_handler(netio, handler); + return; + } + } + + buffer_skip(q->packet, sent); + data->bytes_transmitted += sent; + if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { + /* + * Still more data to write when socket becomes + * writable again. + */ + return; + } + + assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); + + if (data->query_state == QUERY_IN_AXFR) { + /* Continue processing AXFR and writing back results. */ + buffer_clear(q->packet); + data->query_state = query_axfr(data->nsd, q); + if (data->query_state != QUERY_PROCESSED) { + query_add_optional(data->query, data->nsd); + + /* Reset data. */ + buffer_flip(q->packet); + q->tcplen = buffer_remaining(q->packet); + data->bytes_transmitted = 0; + /* Reset timeout. */ + handler->timeout->tv_sec = data->nsd->tcp_timeout; + handler->timeout->tv_nsec = 0; + timespec_add(handler->timeout, netio_current_time(netio)); + + /* + * Write data if/when the socket is writable + * again. + */ + return; + } + } + + /* + * Done sending, wait for the next request to arrive on the + * TCP socket by installing the TCP read handler. + */ + if (data->nsd->tcp_query_count > 0 && + data->query_count >= data->nsd->tcp_query_count) { + + (void) shutdown(handler->fd, SHUT_WR); + } + + data->bytes_transmitted = 0; + + handler->timeout->tv_sec = data->nsd->tcp_timeout; + handler->timeout->tv_nsec = 0; + timespec_add(handler->timeout, netio_current_time(netio)); + + handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT; + handler->event_handler = handle_tcp_reading; +} + + +/* + * Handle an incoming TCP connection. The connection is accepted and + * a new TCP reader event handler is added to NETIO. The TCP handler + * is responsible for cleanup when the connection is closed. + */ +static void +handle_tcp_accept(netio_type *netio, + netio_handler_type *handler, + netio_event_types_type event_types) +{ + struct tcp_accept_handler_data *data + = (struct tcp_accept_handler_data *) handler->user_data; + int s; + struct tcp_handler_data *tcp_data; + region_type *tcp_region; + netio_handler_type *tcp_handler; +#ifdef INET6 + struct sockaddr_storage addr; +#else + struct sockaddr_in addr; +#endif + socklen_t addrlen; + + if (!(event_types & NETIO_EVENT_READ)) { + return; + } + + if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { + return; + } + + /* Accept it... */ + addrlen = sizeof(addr); + s = accept(handler->fd, (struct sockaddr *) &addr, &addrlen); + if (s == -1) { + /* EINTR is a signal interrupt. The others are various OS ways + of saying that the client has closed the connection. */ + if ( errno != EINTR + && errno != EWOULDBLOCK +#ifdef ECONNABORTED + && errno != ECONNABORTED +#endif /* ECONNABORTED */ +#ifdef EPROTO + && errno != EPROTO +#endif /* EPROTO */ + ) { + log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); + } + return; + } + + if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); + close(s); + return; + } + + /* + * This region is deallocated when the TCP connection is + * closed by the TCP handler. + */ + tcp_region = region_create(xalloc, free); + tcp_data = (struct tcp_handler_data *) region_alloc( + tcp_region, sizeof(struct tcp_handler_data)); + tcp_data->region = tcp_region; + tcp_data->query = query_create(tcp_region, compressed_dname_offsets, + compression_table_size); + tcp_data->nsd = data->nsd; + tcp_data->query_count = 0; + + tcp_data->tcp_accept_handler_count = data->tcp_accept_handler_count; + tcp_data->tcp_accept_handlers = data->tcp_accept_handlers; + + tcp_data->query_state = QUERY_PROCESSED; + tcp_data->bytes_transmitted = 0; + memcpy(&tcp_data->query->addr, &addr, addrlen); + tcp_data->query->addrlen = addrlen; + + tcp_handler = (netio_handler_type *) region_alloc( + tcp_region, sizeof(netio_handler_type)); + tcp_handler->fd = s; + tcp_handler->timeout = (struct timespec *) region_alloc( + tcp_region, sizeof(struct timespec)); + tcp_handler->timeout->tv_sec = data->nsd->tcp_timeout; + tcp_handler->timeout->tv_nsec = 0L; + timespec_add(tcp_handler->timeout, netio_current_time(netio)); + + tcp_handler->user_data = tcp_data; + tcp_handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT; + tcp_handler->event_handler = handle_tcp_reading; + + netio_add_handler(netio, tcp_handler); + + /* + * Keep track of the total number of TCP handlers installed so + * we can stop accepting connections when the maximum number + * of simultaneous TCP connections is reached. + */ + ++data->nsd->current_tcp_count; + if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { + configure_handler_event_types(data->tcp_accept_handler_count, + data->tcp_accept_handlers, + NETIO_EVENT_NONE); + } +} + +static void +send_children_quit(struct nsd* nsd) +{ + sig_atomic_t command = NSD_QUIT; + size_t i; + assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); + for (i = 0; i < nsd->child_count; ++i) { + if (nsd->children[i].pid > 0 && nsd->children[i].child_fd > 0) { + if (write(nsd->children[i].child_fd, + &command, + sizeof(command)) == -1) + { + if(errno != EAGAIN && errno != EINTR) + log_msg(LOG_ERR, "problems sending command %d to server %d: %s", + (int) command, + (int) nsd->children[i].pid, + strerror(errno)); + } + fsync(nsd->children[i].child_fd); + close(nsd->children[i].child_fd); + nsd->children[i].child_fd = -1; + } + } +} + +#ifdef BIND8_STATS +static void +set_children_stats(struct nsd* nsd) +{ + size_t i; + assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); + DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); + for (i = 0; i < nsd->child_count; ++i) { + nsd->children[i].need_to_send_STATS = 1; + nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; + } +} +#endif /* BIND8_STATS */ + +static void +configure_handler_event_types(size_t count, + netio_handler_type *handlers, + netio_event_types_type event_types) +{ + size_t i; + + assert(handlers); + + for (i = 0; i < count; ++i) { + handlers[i].event_types = event_types; + } +} |