summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr.sbin/nsd/LICENSE2
-rw-r--r--usr.sbin/nsd/acx_nlnetlabs.m48
-rw-r--r--usr.sbin/nsd/answer.c2
-rw-r--r--usr.sbin/nsd/answer.h2
-rw-r--r--usr.sbin/nsd/axfr.h2
-rw-r--r--usr.sbin/nsd/buffer.c2
-rw-r--r--usr.sbin/nsd/buffer.h2
-rw-r--r--usr.sbin/nsd/compat/memcmp.c4
-rw-r--r--usr.sbin/nsd/compat/memmove.c4
-rw-r--r--usr.sbin/nsd/compat/strptime.c2
-rw-r--r--usr.sbin/nsd/configlexer.lex39
-rw-r--r--usr.sbin/nsd/configyyrename.h32
-rw-r--r--usr.sbin/nsd/dbaccess.c864
-rw-r--r--usr.sbin/nsd/dbcreate.c521
-rw-r--r--usr.sbin/nsd/difffile.c2138
-rw-r--r--usr.sbin/nsd/difffile.h143
-rw-r--r--usr.sbin/nsd/dname.h35
-rw-r--r--usr.sbin/nsd/edns.c2
-rw-r--r--usr.sbin/nsd/edns.h2
-rw-r--r--usr.sbin/nsd/ipc.c557
-rw-r--r--usr.sbin/nsd/ipc.h22
-rw-r--r--usr.sbin/nsd/iterated_hash.c2
-rw-r--r--usr.sbin/nsd/iterated_hash.h5
-rw-r--r--usr.sbin/nsd/mini_event.c446
-rw-r--r--usr.sbin/nsd/mini_event.h183
-rw-r--r--usr.sbin/nsd/namedb.c671
-rw-r--r--usr.sbin/nsd/namedb.h347
-rw-r--r--usr.sbin/nsd/netio.c29
-rw-r--r--usr.sbin/nsd/netio.h5
-rwxr-xr-xusr.sbin/nsd/nsd-control-setup.sh.in160
-rw-r--r--usr.sbin/nsd/nsd-control.8.in245
-rw-r--r--usr.sbin/nsd/nsd-control.c415
-rw-r--r--usr.sbin/nsd/nsd-mem.c360
-rw-r--r--usr.sbin/nsd/nsd.conf.sample.in267
-rw-r--r--usr.sbin/nsd/nsd.h101
-rw-r--r--usr.sbin/nsd/nsec3.h86
-rw-r--r--usr.sbin/nsd/options.c1311
-rw-r--r--usr.sbin/nsd/options.h154
-rw-r--r--usr.sbin/nsd/packet.c6
-rw-r--r--usr.sbin/nsd/packet.h2
-rw-r--r--usr.sbin/nsd/query.h11
-rw-r--r--usr.sbin/nsd/radtree.c1411
-rw-r--r--usr.sbin/nsd/radtree.h244
-rw-r--r--usr.sbin/nsd/rbtree.c38
-rw-r--r--usr.sbin/nsd/rbtree.h4
-rw-r--r--usr.sbin/nsd/rdata.h2
-rw-r--r--usr.sbin/nsd/region-allocator.h6
-rw-r--r--usr.sbin/nsd/remote.c1943
-rw-r--r--usr.sbin/nsd/remote.h102
-rw-r--r--usr.sbin/nsd/rrl.h2
-rw-r--r--usr.sbin/nsd/tsig-openssl.c2
-rw-r--r--usr.sbin/nsd/tsig-openssl.h2
-rw-r--r--usr.sbin/nsd/tsig.c68
-rw-r--r--usr.sbin/nsd/tsig.h12
-rw-r--r--usr.sbin/nsd/udb.c2018
-rw-r--r--usr.sbin/nsd/udb.h784
-rw-r--r--usr.sbin/nsd/udbradtree.c1463
-rw-r--r--usr.sbin/nsd/udbradtree.h245
-rw-r--r--usr.sbin/nsd/udbzone.c786
-rw-r--r--usr.sbin/nsd/udbzone.h147
-rw-r--r--usr.sbin/nsd/xfrd-disk.h13
-rw-r--r--usr.sbin/nsd/xfrd-notify.c176
-rw-r--r--usr.sbin/nsd/xfrd-notify.h30
-rw-r--r--usr.sbin/nsd/xfrd-tcp.h95
-rw-r--r--usr.sbin/nsd/xfrd.h106
-rw-r--r--usr.sbin/nsd/zlexer.lex35
66 files changed, 15885 insertions, 3040 deletions
diff --git a/usr.sbin/nsd/LICENSE b/usr.sbin/nsd/LICENSE
index 955c3665a36..55faacfc49b 100644
--- a/usr.sbin/nsd/LICENSE
+++ b/usr.sbin/nsd/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
This software is open source.
diff --git a/usr.sbin/nsd/acx_nlnetlabs.m4 b/usr.sbin/nsd/acx_nlnetlabs.m4
index 719112645aa..e1cf83a70bd 100644
--- a/usr.sbin/nsd/acx_nlnetlabs.m4
+++ b/usr.sbin/nsd/acx_nlnetlabs.m4
@@ -2,7 +2,9 @@
# Copyright 2009, Wouter Wijngaards, NLnet Labs.
# BSD licensed.
#
-# Version 24
+# Version 26
+# 2013-09-19 FLTO help text improved.
+# 2013-07-18 Enable ACX_CHECK_COMPILER_FLAG to test for -Wstrict-prototypes
# 2013-06-25 FLTO has --disable-flto option.
# 2013-05-03 Update W32_SLEEP for newer mingw that links but not defines it.
# 2013-03-22 Fix ACX_RSRC_VERSION for long version numbers.
@@ -119,7 +121,7 @@ AC_MSG_CHECKING(whether $CC supports -$1)
cache=`echo $1 | sed 'y%.=/+-%___p_%'`
AC_CACHE_VAL(cv_prog_cc_flag_$cache,
[
-echo 'void f(){}' >conftest.c
+echo 'void f(void){}' >conftest.c
if test -z "`$CC $CPPFLAGS $CFLAGS -$1 -c conftest.c 2>&1`"; then
eval "cv_prog_cc_flag_$cache=yes"
else
@@ -409,7 +411,7 @@ dnl Check if CC supports -flto.
dnl in a way that supports clang and suncc (that flag does something else,
dnl but fails to link). It sets it in CFLAGS if it works.
AC_DEFUN([ACX_CHECK_FLTO], [
- AC_ARG_ENABLE([flto], AS_HELP_STRING([--disable-flto], [Disable link-time optimization]))
+ AC_ARG_ENABLE([flto], AS_HELP_STRING([--disable-flto], [Disable link-time optimization (gcc specific option)]))
AS_IF([test "x$enable_flto" != "xno"], [
AC_MSG_CHECKING([if $CC supports -flto])
BAKCFLAGS="$CFLAGS"
diff --git a/usr.sbin/nsd/answer.c b/usr.sbin/nsd/answer.c
index 8fa4ab16821..0377f0b5859 100644
--- a/usr.sbin/nsd/answer.c
+++ b/usr.sbin/nsd/answer.c
@@ -1,7 +1,7 @@
/*
* answer.c -- manipulating query answers and encoding them.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/answer.h b/usr.sbin/nsd/answer.h
index 85d349f438d..acb3665af11 100644
--- a/usr.sbin/nsd/answer.h
+++ b/usr.sbin/nsd/answer.h
@@ -1,7 +1,7 @@
/*
* answer.h -- manipulating query answers and encoding them.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/axfr.h b/usr.sbin/nsd/axfr.h
index b5d7afc29fd..33a68629523 100644
--- a/usr.sbin/nsd/axfr.h
+++ b/usr.sbin/nsd/axfr.h
@@ -1,7 +1,7 @@
/*
* axfr.h -- generating AXFR responses.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/buffer.c b/usr.sbin/nsd/buffer.c
index 49151018fa9..d71fa15e3f3 100644
--- a/usr.sbin/nsd/buffer.c
+++ b/usr.sbin/nsd/buffer.c
@@ -1,7 +1,7 @@
/*
* buffer.c -- generic memory buffer .
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/buffer.h b/usr.sbin/nsd/buffer.h
index 28610fe9310..bee7d8b29eb 100644
--- a/usr.sbin/nsd/buffer.h
+++ b/usr.sbin/nsd/buffer.h
@@ -1,7 +1,7 @@
/*
* buffer.h -- generic memory buffer.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/compat/memcmp.c b/usr.sbin/nsd/compat/memcmp.c
index 6d3d27ac9e7..371b3d11baf 100644
--- a/usr.sbin/nsd/compat/memcmp.c
+++ b/usr.sbin/nsd/compat/memcmp.c
@@ -1,7 +1,7 @@
/*
- * memcmp.c: memcmp compat implementation.
+ * memcmp.c: memcmp compat implementation.
*
- * Copyright (c) 2010-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2010, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*/
diff --git a/usr.sbin/nsd/compat/memmove.c b/usr.sbin/nsd/compat/memmove.c
index fd65a93f84e..f83996684fe 100644
--- a/usr.sbin/nsd/compat/memmove.c
+++ b/usr.sbin/nsd/compat/memmove.c
@@ -1,7 +1,7 @@
/*
- * memmove.c: memmove compat implementation.
+ * memmove.c: memmove compat implementation.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*/
diff --git a/usr.sbin/nsd/compat/strptime.c b/usr.sbin/nsd/compat/strptime.c
index 6986d35ce73..4ec96c12cef 100644
--- a/usr.sbin/nsd/compat/strptime.c
+++ b/usr.sbin/nsd/compat/strptime.c
@@ -10,7 +10,7 @@
* - Does not properly processes year day
*
* LICENSE
- * Copyright (c) 2008-2011, NLnet Labs, Matthijs Mekking.
+ * Copyright (c) 2008, NLnet Labs, Matthijs Mekking.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr.sbin/nsd/configlexer.lex b/usr.sbin/nsd/configlexer.lex
index 55bf4cfe62a..ee4ad1522d9 100644
--- a/usr.sbin/nsd/configlexer.lex
+++ b/usr.sbin/nsd/configlexer.lex
@@ -2,7 +2,7 @@
/*
* configlexer.lex - lexical analyzer for NSD config file
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
*
* See LICENSE for the license.
*
@@ -20,8 +20,6 @@
#include "configparser.h"
void c_error(const char *message);
-#define YY_NO_UNPUT
-
#if 0
#define LEXOUT(s) printf s /* used ONLY when debugging */
#else
@@ -47,6 +45,15 @@ static void config_start_include(const char* filename)
c_error_msg("includes nested too deeply, skipped (>%d)", MAXINCLUDES);
return;
}
+ if (cfg_parser->chroot) {
+ int l = strlen(cfg_parser->chroot); /* chroot has trailing slash */
+ if (strncmp(cfg_parser->chroot, filename, l) != 0) {
+ c_error_msg("include file '%s' is not relative to chroot '%s'",
+ filename, cfg_parser->chroot);
+ return;
+ }
+ filename += l - 1; /* strip chroot without trailing slash */
+ }
input = fopen(filename, "r");
if(!input) {
c_error_msg("cannot open include file '%s': %s",
@@ -82,6 +89,16 @@ static void config_end_include(void)
#endif
%}
+%option noinput
+%option nounput
+%{
+#ifndef YY_NO_UNPUT
+#define YY_NO_UNPUT 1
+#endif
+#ifndef YY_NO_INPUT
+#define YY_NO_INPUT 1
+#endif
+%}
SPACE [ \t]
LETTER [a-zA-Z]
@@ -104,6 +121,8 @@ debug-mode{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_DEBUG_MODE;}
hide-version{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_HIDE_VERSION;}
ip4-only{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_IP4_ONLY;}
ip6-only{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_IP6_ONLY;}
+do-ip4{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_DO_IP4;}
+do-ip6{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_DO_IP6;}
database{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_DATABASE;}
identity{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_IDENTITY;}
nsid{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_NSID;}
@@ -117,12 +136,13 @@ ipv6-edns-size{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_IPV6_EDNS_SIZE;}
pidfile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_PIDFILE;}
port{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_PORT;}
statistics{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_STATISTICS;}
-zone-stats-file{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONESTATSFILE;}
chroot{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_CHROOT;}
username{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_USERNAME;}
zonesdir{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONESDIR;}
+zonelistfile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONELISTFILE;}
difffile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_DIFFFILE;}
xfrdfile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRDFILE;}
+xfrdir{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRDIR;}
xfrd-reload-timeout{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRD_RELOAD_TIMEOUT;}
verbosity{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_VERBOSITY;}
zone{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONE;}
@@ -137,6 +157,16 @@ allow-axfr-fallback{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ALLOW_AXFR_F
key{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_KEY;}
algorithm{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ALGORITHM;}
secret{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_SECRET;}
+pattern{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_PATTERN;}
+include-pattern{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_INCLUDEPATTERN;}
+remote-control{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_REMOTE_CONTROL;}
+control-enable{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_CONTROL_ENABLE;}
+control-interface{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_CONTROL_INTERFACE;}
+control-port{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_CONTROL_PORT;}
+server-key-file{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_SERVER_KEY_FILE;}
+server-cert-file{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_SERVER_CERT_FILE;}
+control-key-file{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_CONTROL_KEY_FILE;}
+control-cert-file{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_CONTROL_CERT_FILE;}
AXFR { LEXOUT(("v(%s) ", yytext)); return VAR_AXFR;}
UDP { LEXOUT(("v(%s) ", yytext)); return VAR_UDP;}
rrl-size{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_RRL_SIZE;}
@@ -146,6 +176,7 @@ rrl-ipv4-prefix-length{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_RRL_IPV4_
rrl-ipv6-prefix-length{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_RRL_IPV6_PREFIX_LENGTH;}
rrl-whitelist-ratelimit{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_RRL_WHITELIST_RATELIMIT;}
rrl-whitelist{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_RRL_WHITELIST;}
+zonefiles-check{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONEFILES_CHECK;}
{NEWLINE} { LEXOUT(("NL\n")); cfg_parser->line++;}
/* Quoted strings. Strip leading and ending quotes */
diff --git a/usr.sbin/nsd/configyyrename.h b/usr.sbin/nsd/configyyrename.h
index 856dfe96d3e..6beb810aa8e 100644
--- a/usr.sbin/nsd/configyyrename.h
+++ b/usr.sbin/nsd/configyyrename.h
@@ -1,7 +1,7 @@
/*
* configyyrename.h -- renames for config file yy values to avoid conflicts.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -32,6 +32,7 @@
#define yyps c_ps
#define yypv c_pv
#define yys c_s
+#define yyss c_ss
#define yy_yys c_yys
#define yystate c_state
#define yytmp c_tmp
@@ -85,5 +86,34 @@
#define yyget_leng c_get_leng
#define yylineno c_lineno
#define yyget_text c_get_text
+#define yyvsp c_vsp
+#define yyvs c_vs
+#define yytext c_text
+#define yyleng c_leng
+#define yy_meta c__meta
+#define yy_start c__start
+#define yy_nxt c__nxt
+#define yy_n_chars c__n_chars
+#define yy_more_flag c__more_flag
+#define yy_more_len c__more_len
+#define yy_try_NUL_trans c__try_NUL_trans
+#define yy_last_accepting_cpos c__last_accepting_cpos
+#define yy_last_accepting_state c__last_accepting_state
+#define yy_init c__init
+#define yy_base c__base
+#define yy_accept c__accept
+#define yy_c_buf_p c__c_buf_p
+#define yy_chk c__chk
+#define yy_current_buffer c__current_buffer
+#define yy_def c__def
+#define yy_did_buffer_switch_on_eof c__did_buffer_switch_on_eof
+#define yy_ec c__ec
+#define yy_fatal_error c__fatal_error
+#define yy_flex_alloc c__flex_alloc
+#define yy_flex_free c__flex_free
+#define yy_flex_realloc c__flex_realloc
+#define yy_get_next_buffer c__get_next_buffer
+#define yy_get_previous_state c__get_previous_state
+#define yy_hold_char c__hold_char
#endif /* CONFIGYYRENAME_H */
diff --git a/usr.sbin/nsd/dbaccess.c b/usr.sbin/nsd/dbaccess.c
index abecce7a1f5..866c762ea70 100644
--- a/usr.sbin/nsd/dbaccess.c
+++ b/usr.sbin/nsd/dbaccess.c
@@ -1,7 +1,7 @@
/*
* dbaccess.c -- access methods for nsd(8) database
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -17,467 +17,569 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
-#include <stdio.h> /* DEBUG */
#include "dns.h"
#include "namedb.h"
#include "util.h"
#include "options.h"
+#include "rdata.h"
+#include "udb.h"
+#include "udbradtree.h"
+#include "udbzone.h"
+#include "zonec.h"
+#include "nsec3.h"
+#include "difffile.h"
+
+static time_t udb_time = 0;
+static unsigned udb_rrsets = 0;
+static unsigned udb_rrset_count = 0;
-int
-namedb_lookup(struct namedb *db,
- const dname_type *dname,
- domain_type **closest_match,
- domain_type **closest_encloser)
+void
+namedb_close(struct namedb* db)
{
- return domain_table_search(
- db->domains, dname, closest_match, closest_encloser);
+ if(db) {
+ if(db->udb) {
+ udb_base_close(db->udb);
+ udb_base_free(db->udb);
+ }
+ zonec_desetup_parser();
+ region_destroy(db->region);
+ }
}
-static int
-read_magic(namedb_type *db)
+void
+namedb_close_udb(struct namedb* db)
{
- char buf[NAMEDB_MAGIC_SIZE];
-
- if (fread(buf, sizeof(char), sizeof(buf), db->fd) != sizeof(buf))
- return 0;
-
- return memcmp(buf, NAMEDB_MAGIC, NAMEDB_MAGIC_SIZE) == 0;
+ if(db) {
+ /* we cannot actually munmap the data, because other
+ * processes still need to access the udb, so cleanup the
+ * udb */
+ udb_base_free_keep_mmap(db->udb);
+ db->udb = NULL;
+ }
}
-static const dname_type *
-read_dname(FILE *fd, region_type *region)
+void
+apex_rrset_checks(namedb_type* db, rrset_type* rrset, domain_type* domain)
{
- uint8_t size;
- uint8_t temp[MAXDOMAINLEN];
+ uint32_t soa_minimum;
+ unsigned i;
+ zone_type* zone = rrset->zone;
+ assert(domain == zone->apex);
+ (void)domain;
+ if (rrset_rrtype(rrset) == TYPE_SOA) {
+ zone->soa_rrset = rrset;
- if (fread(&size, sizeof(uint8_t), 1, fd) != 1)
- return NULL;
- if (fread(temp, sizeof(uint8_t), size, fd) != size)
- return NULL;
+ /* BUG #103 add another soa with a tweaked ttl */
+ if(zone->soa_nx_rrset == 0) {
+ zone->soa_nx_rrset = region_alloc(db->region,
+ sizeof(rrset_type));
+ zone->soa_nx_rrset->rr_count = 1;
+ zone->soa_nx_rrset->next = 0;
+ zone->soa_nx_rrset->zone = zone;
+ zone->soa_nx_rrset->rrs = region_alloc(db->region,
+ sizeof(rr_type));
+ }
+ memcpy(zone->soa_nx_rrset->rrs, rrset->rrs, sizeof(rr_type));
- return dname_make(region, temp, 1);
+ /* check the ttl and MINIMUM value and set accordinly */
+ memcpy(&soa_minimum, rdata_atom_data(rrset->rrs->rdatas[6]),
+ rdata_atom_size(rrset->rrs->rdatas[6]));
+ if (rrset->rrs->ttl > ntohl(soa_minimum)) {
+ zone->soa_nx_rrset->rrs[0].ttl = ntohl(soa_minimum);
+ }
+ } else if (rrset_rrtype(rrset) == TYPE_NS) {
+ zone->ns_rrset = rrset;
+ } else if (rrset_rrtype(rrset) == TYPE_RRSIG) {
+ for (i = 0; i < rrset->rr_count; ++i) {
+ if(rr_rrsig_type_covered(&rrset->rrs[i])==TYPE_DNSKEY){
+ zone->is_secure = 1;
+ break;
+ }
+ }
+ }
}
-static int
-read_size(namedb_type *db, uint32_t *result)
+/** read rr */
+static void
+read_rr(namedb_type* db, rr_type* rr, udb_ptr* urr, domain_type* domain)
{
- if (fread(result, sizeof(*result), 1, db->fd) == 1) {
- *result = ntohl(*result);
- return 1;
- } else {
- return 0;
+ buffer_type buffer;
+ ssize_t c;
+ assert(udb_ptr_get_type(urr) == udb_chunk_type_rr);
+ rr->owner = domain;
+ rr->type = RR(urr)->type;
+ rr->klass = RR(urr)->klass;
+ rr->ttl = RR(urr)->ttl;
+
+ buffer_create_from(&buffer, RR(urr)->wire, RR(urr)->len);
+ c = rdata_wireformat_to_rdata_atoms(db->region, db->domains,
+ rr->type, RR(urr)->len, &buffer, &rr->rdatas);
+ if(c == -1) {
+ /* safe on error */
+ rr->rdata_count = 0;
+ rr->rdatas = NULL;
+ return;
}
+ rr->rdata_count = c;
}
-static domain_type *
-read_domain(namedb_type *db, uint32_t domain_count, domain_type **domains)
+/** calculate rr count */
+static uint16_t
+calculate_rr_count(udb_base* udb, udb_ptr* rrset)
{
- uint32_t domain_number;
-
- if (!read_size(db, &domain_number))
- return NULL;
-
- if (domain_number == 0 || domain_number > domain_count)
- return NULL;
-
- return domains[domain_number - 1];
+ udb_ptr rr;
+ uint16_t num = 0;
+ udb_ptr_new(&rr, udb, &RRSET(rrset)->rrs);
+ while(rr.data) {
+ num++;
+ udb_ptr_set_rptr(&rr, udb, &RR(&rr)->next);
+ }
+ udb_ptr_unlink(&rr, udb);
+ return num;
}
-static zone_type *
-read_zone(namedb_type *db, uint32_t zone_count, zone_type **zones)
+/** read rrset */
+static void
+read_rrset(udb_base* udb, namedb_type* db, zone_type* zone,
+ domain_type* domain, udb_ptr* urrset)
{
- uint32_t zone_number;
-
- if (!read_size(db, &zone_number))
- return NULL;
-
- if (zone_number == 0 || zone_number > zone_count)
- return NULL;
-
- return zones[zone_number - 1];
+ rrset_type* rrset;
+ udb_ptr urr;
+ unsigned i;
+ assert(udb_ptr_get_type(urrset) == udb_chunk_type_rrset);
+ /* if no RRs, do not create anything (robust) */
+ if(RRSET(urrset)->rrs.data == 0)
+ return;
+ rrset = (rrset_type *) region_alloc(db->region, sizeof(rrset_type));
+ rrset->zone = zone;
+ rrset->rr_count = calculate_rr_count(udb, urrset);
+ rrset->rrs = (rr_type *) region_alloc(
+ db->region, rrset->rr_count * sizeof(rr_type));
+ /* add the RRs */
+ udb_ptr_new(&urr, udb, &RRSET(urrset)->rrs);
+ for(i=0; i<rrset->rr_count; i++) {
+ read_rr(db, &rrset->rrs[i], &urr, domain);
+ udb_ptr_set_rptr(&urr, udb, &RR(&urr)->next);
+ }
+ udb_ptr_unlink(&urr, udb);
+ domain_add_rrset(domain, rrset);
+ if(domain == zone->apex)
+ apex_rrset_checks(db, rrset, domain);
}
-static int
-read_rdata_atom(namedb_type *db, uint16_t type, int index, uint32_t domain_count, domain_type **domains, rdata_atom_type *result)
+/** read one elem from db, of type domain_d */
+static void read_node_elem(udb_base* udb, namedb_type* db,
+ region_type* dname_region, zone_type* zone, struct domain_d* d)
{
- uint8_t data[65536];
-
- if (rdata_atom_is_domain(type, index)) {
- result->domain = read_domain(db, domain_count, domains);
- if (!result->domain)
- return 0;
- } else {
- uint16_t size;
-
- if (fread(&size, sizeof(size), 1, db->fd) != 1)
- return 0;
- size = ntohs(size);
- if (fread(data, sizeof(uint8_t), size, db->fd) != size)
- return 0;
-
- result->data = (uint16_t *) region_alloc(
- db->region, sizeof(uint16_t) + size);
- memcpy(result->data, &size, sizeof(uint16_t));
- memcpy((uint8_t *) result->data + sizeof(uint16_t), data, size);
+ const dname_type* dname;
+ domain_type* domain;
+ udb_ptr urrset;
+
+ dname = dname_make(dname_region, d->name, 0);
+ if(!dname) return;
+ domain = domain_table_insert(db->domains, dname);
+ assert(domain); /* domain_table_insert should always return non-NULL */
+
+ /* add rrsets */
+ udb_ptr_init(&urrset, udb);
+ udb_ptr_set_rptr(&urrset, udb, &d->rrsets);
+ while(urrset.data) {
+ read_rrset(udb, db, zone, domain, &urrset);
+ udb_ptr_set_rptr(&urrset, udb, &RRSET(&urrset)->next);
+
+ if(++udb_rrsets % ZONEC_PCT_COUNT == 0 && time(NULL) > udb_time + ZONEC_PCT_TIME) {
+ udb_time = time(NULL);
+ VERBOSITY(1, (LOG_INFO, "read %s %d %%",
+ zone->opts->name, udb_rrsets*100/udb_rrset_count));
+ }
}
-
- return 1;
+ region_free_all(dname_region);
+ udb_ptr_unlink(&urrset, udb);
}
-static rrset_type *
-read_rrset(namedb_type *db,
- uint32_t domain_count, domain_type **domains,
- uint32_t zone_count, zone_type **zones)
+/** recurse read radix from disk. This radix tree is by domain name, so max of
+ * 256 depth, and thus the stack usage is small. */
+static void read_zone_recurse(udb_base* udb, namedb_type* db,
+ region_type* dname_region, zone_type* zone, struct udb_radnode_d* node)
{
- rrset_type *rrset;
- int i, j;
- domain_type *owner;
- uint16_t type;
- uint16_t klass;
- uint32_t soa_minimum;
-
- owner = read_domain(db, domain_count, domains);
- if (!owner)
- return NULL;
-
- rrset = (rrset_type *) region_alloc(db->region, sizeof(rrset_type));
-
- rrset->zone = read_zone(db, zone_count, zones);
- if (!rrset->zone)
- return NULL;
-
- if (fread(&type, sizeof(type), 1, db->fd) != 1)
- return NULL;
- type = ntohs(type);
-
- if (fread(&klass, sizeof(klass), 1, db->fd) != 1)
- return NULL;
- klass = ntohs(klass);
-
- if (fread(&rrset->rr_count, sizeof(rrset->rr_count), 1, db->fd) != 1)
- return NULL;
- rrset->rr_count = ntohs(rrset->rr_count);
- rrset->rrs = (rr_type *) region_alloc(
- db->region, rrset->rr_count * sizeof(rr_type));
-
- assert(rrset->rr_count > 0);
-
- for (i = 0; i < rrset->rr_count; ++i) {
- rr_type *rr = &rrset->rrs[i];
-
- rr->owner = owner;
- rr->type = type;
- rr->klass = klass;
-
- if (fread(&rr->rdata_count, sizeof(rr->rdata_count), 1, db->fd) != 1)
- return NULL;
- rr->rdata_count = ntohs(rr->rdata_count);
- rr->rdatas = (rdata_atom_type *) region_alloc(
- db->region, rr->rdata_count * sizeof(rdata_atom_type));
-
- if (fread(&rr->ttl, sizeof(rr->ttl), 1, db->fd) != 1)
- return NULL;
- rr->ttl = ntohl(rr->ttl);
-
- for (j = 0; j < rr->rdata_count; ++j) {
- if (!read_rdata_atom(db, rr->type, j, domain_count, domains, &rr->rdatas[j]))
- return NULL;
+ if(node->elem.data) {
+ /* pre-order process of node->elem, for radix tree this is
+ * also in-order processing (identical to order tree_next()) */
+ read_node_elem(udb, db, dname_region, zone, (struct domain_d*)
+ (udb->base + node->elem.data));
+ }
+ if(node->lookup.data) {
+ uint16_t i;
+ struct udb_radarray_d* a = (struct udb_radarray_d*)
+ (udb->base + node->lookup.data);
+ /* we do not care for what the exact radix key is, we want
+ * to add all of them and the read routine does not need
+ * the radix-key, it has it stored */
+ for(i=0; i<a->len; i++) {
+ if(a->array[i].node.data) {
+ read_zone_recurse(udb, db, dname_region, zone,
+ (struct udb_radnode_d*)(udb->base +
+ a->array[i].node.data));
+ }
}
}
+}
- domain_add_rrset(owner, rrset);
-
- if (rrset_rrtype(rrset) == TYPE_SOA) {
- assert(owner == rrset->zone->apex);
- rrset->zone->soa_rrset = rrset;
-
- /* BUG #103 add another soa with a tweaked ttl */
- rrset->zone->soa_nx_rrset = region_alloc(db->region, sizeof(rrset_type));
- rrset->zone->soa_nx_rrset->rrs =
- region_alloc(db->region, rrset->rr_count * sizeof(rr_type));
-
- memcpy(rrset->zone->soa_nx_rrset->rrs, rrset->rrs, sizeof(rr_type));
- rrset->zone->soa_nx_rrset->rr_count = 1;
- rrset->zone->soa_nx_rrset->next = 0;
+/** read zone data */
+static void
+read_zone_data(udb_base* udb, namedb_type* db, region_type* dname_region,
+ udb_ptr* z, zone_type* zone)
+{
+ udb_ptr dtree;
+ /* recursively read domains, we only read so ptrs stay valid */
+ udb_ptr_new(&dtree, udb, &ZONE(z)->domains);
+ if(RADTREE(&dtree)->root.data)
+ read_zone_recurse(udb, db, dname_region, zone,
+ (struct udb_radnode_d*)
+ (udb->base + RADTREE(&dtree)->root.data));
+ udb_ptr_unlink(&dtree, udb);
+}
- /* also add a link to the zone */
- rrset->zone->soa_nx_rrset->zone = rrset->zone;
+/** create a zone */
+zone_type*
+namedb_zone_create(namedb_type* db, const dname_type* dname,
+ zone_options_t* zo)
+{
+ zone_type* zone = (zone_type *) region_alloc(db->region,
+ sizeof(zone_type));
+ zone->node = radname_insert(db->zonetree, dname_name(dname),
+ dname->name_size, zone);
+ assert(zone->node);
+ zone->apex = domain_table_insert(db->domains, dname);
+ zone->apex->usage++; /* the zone.apex reference */
+ zone->apex->is_apex = 1;
+ zone->soa_rrset = NULL;
+ zone->soa_nx_rrset = NULL;
+ zone->ns_rrset = NULL;
+#ifdef NSEC3
+ zone->nsec3_param = NULL;
+ zone->nsec3_last = NULL;
+ zone->nsec3tree = NULL;
+ zone->hashtree = NULL;
+ zone->wchashtree = NULL;
+ zone->dshashtree = NULL;
+#endif
+ zone->opts = zo;
+ zone->is_secure = 0;
+ zone->is_changed = 0;
+ zone->is_ok = 1;
+ return zone;
+}
- /* check the ttl and MINIMUM value and set accordinly */
- memcpy(&soa_minimum, rdata_atom_data(rrset->rrs->rdatas[6]),
- rdata_atom_size(rrset->rrs->rdatas[6]));
- if (rrset->rrs->ttl > ntohl(soa_minimum)) {
- rrset->zone->soa_nx_rrset->rrs[0].ttl = ntohl(soa_minimum);
+void
+namedb_zone_delete(namedb_type* db, zone_type* zone)
+{
+ /* RRs and UDB and NSEC3 and so on must be already deleted */
+ radix_delete(db->zonetree, zone->node);
+
+ /* see if apex can be deleted */
+ if(zone->apex) {
+ zone->apex->usage --;
+ if(zone->apex->usage == 0) {
+ /* delete the apex, possibly */
+ domain_table_deldomain(db, zone->apex);
}
- owner->has_SOA = 1;
+ }
- } else if (owner == rrset->zone->apex
- && rrset_rrtype(rrset) == TYPE_NS)
- {
- rrset->zone->ns_rrset = rrset;
+ /* soa_rrset is freed when the SOA was deleted */
+ if(zone->soa_nx_rrset) {
+ region_recycle(db->region, zone->soa_nx_rrset->rrs,
+ sizeof(rr_type));
+ region_recycle(db->region, zone->soa_nx_rrset,
+ sizeof(rrset_type));
}
#ifdef NSEC3
-#ifndef FULL_PREHASH
- else if (type == TYPE_NSEC3) {
- if (0 != namedb_add_nsec3_domain(db, owner, rrset->zone)) {
- return NULL;
- }
- }
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
- if (rrset_rrtype(rrset) == TYPE_RRSIG && owner == rrset->zone->apex) {
- for (i = 0; i < rrset->rr_count; ++i) {
- if (rr_rrsig_type_covered(&rrset->rrs[i]) == TYPE_DNSKEY) {
- rrset->zone->is_secure = 1;
- break;
- }
- }
+ hash_tree_delete(db->region, zone->nsec3tree);
+ hash_tree_delete(db->region, zone->hashtree);
+ hash_tree_delete(db->region, zone->wchashtree);
+ hash_tree_delete(db->region, zone->dshashtree);
+#endif
+ region_recycle(db->region, zone, sizeof(zone_type));
+}
+
+/** read a zone */
+static void
+read_zone(udb_base* udb, namedb_type* db, nsd_options_t* opt,
+ region_type* dname_region, udb_ptr* z)
+{
+ /* construct dname */
+ const dname_type* dname = dname_make(dname_region, ZONE(z)->name, 0);
+ zone_options_t* zo = dname?zone_options_find(opt, dname):NULL;
+ zone_type* zone;
+ if(!dname) return;
+ if(!zo) {
+ /* deleted from the options, remove it from the nsd.db too */
+ VERBOSITY(2, (LOG_WARNING, "zone %s is deleted",
+ dname_to_string(dname, NULL)));
+ udb_zone_delete(udb, z);
+ region_free_all(dname_region);
+ return;
}
- return rrset;
+ assert(udb_ptr_get_type(z) == udb_chunk_type_zone);
+ udb_rrsets = 0;
+ udb_rrset_count = ZONE(z)->rrset_count;
+ zone = namedb_zone_create(db, dname, zo);
+ region_free_all(dname_region);
+ read_zone_data(udb, db, dname_region, z, zone);
+ zone->is_changed = (ZONE(z)->is_changed != 0);
+#ifdef NSEC3
+ prehash_zone_complete(db, zone);
+#endif
}
-struct namedb *
-namedb_open (const char *filename, nsd_options_t* opt, size_t num_children)
+/** read zones from nsd.db */
+static void
+read_zones(udb_base* udb, namedb_type* db, nsd_options_t* opt,
+ region_type* dname_region)
{
- namedb_type *db;
+ udb_ptr ztree, n, z;
+ udb_ptr_init(&z, udb);
+ udb_ptr_new(&ztree, udb, udb_base_get_userdata(udb));
+ udb_radix_first(udb,&ztree,&n);
+ udb_time = time(NULL);
+ while(n.data) {
+ udb_ptr_set_rptr(&z, udb, &RADNODE(&n)->elem);
+ udb_radix_next(udb, &n); /* store in case n is deleted */
+ read_zone(udb, db, opt, dname_region, &z);
+ udb_ptr_zero(&z, udb);
+ }
+ udb_ptr_unlink(&ztree, udb);
+ udb_ptr_unlink(&n, udb);
+ udb_ptr_unlink(&z, udb);
+}
+/** try to read the udb file or fail */
+static int
+try_read_udb(namedb_type* db, int fd, const char* filename,
+ nsd_options_t* opt)
+{
/*
* Temporary region used while loading domain names from the
* database. The region is freed after each time a dname is
* read from the database.
*/
- region_type *dname_region;
+ region_type* dname_region;
+
+ assert(fd != -1);
+ if(!(db->udb=udb_base_create_fd(filename, fd, &namedb_walkfunc,
+ NULL))) {
+ /* fd is closed by failed udb create call */
+ VERBOSITY(1, (LOG_WARNING, "can not use %s, "
+ "will create anew", filename));
+ return 0;
+ }
+ /* sanity check if can be opened */
+ if(udb_base_get_userflags(db->udb) != 0) {
+ log_msg(LOG_WARNING, "%s was not closed properly, it might "
+ "be corrupted, will create anew", filename);
+ udb_base_free(db->udb);
+ db->udb = NULL;
+ return 0;
+ }
+ /* read if it can be opened */
+ dname_region = region_create(xalloc, free);
+ /* this operation does not fail, we end up with
+ * something, even if that is an empty namedb */
+ read_zones(db->udb, db, opt, dname_region);
+ region_destroy(dname_region);
+ return 1;
+}
+
+struct namedb *
+namedb_open (const char* filename, nsd_options_t* opt)
+{
+ namedb_type* db;
/*
- * Temporary region used to store array of domains and zones
- * while loading the database. The region is freed before
- * returning.
+ * Region used to store the loaded database. The region is
+ * freed in namedb_close.
*/
- region_type *temp_region;
-
- uint32_t dname_count;
- domain_type **domains; /* Indexed by domain number. */
-
- uint32_t zone_count;
- zone_type **zones; /* Indexed by zone number. */
-
- uint32_t i;
- uint32_t rrset_count = 0;
- uint32_t rr_count = 0;
-
- rrset_type *rrset;
-
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(namedb_type) = %lu\n", (unsigned long) sizeof(namedb_type)));
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(zone_type) = %lu\n", (unsigned long) sizeof(zone_type)));
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(domain_type) = %lu\n", (unsigned long) sizeof(domain_type)));
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(rrset_type) = %lu\n", (unsigned long) sizeof(rrset_type)));
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(rr_type) = %lu\n", (unsigned long) sizeof(rr_type)));
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(rdata_atom_type) = %lu\n", (unsigned long) sizeof(rdata_atom_type)));
- DEBUG(DEBUG_DBACCESS, 2,
- (LOG_INFO, "sizeof(rbnode_t) = %lu\n", (unsigned long) sizeof(rbnode_t)));
-
- if ((db = namedb_create()) == NULL) {
- log_msg(LOG_ERR,
- "insufficient memory to create database");
- return NULL;
+ region_type* db_region;
+ int fd;
+
+ /* attempt to open, if does not exist, create a new one */
+ fd = open(filename, O_RDWR);
+ if(fd == -1) {
+ if(errno != ENOENT) {
+ log_msg(LOG_ERR, "%s: %s", filename, strerror(errno));
+ return NULL;
+ }
}
- db->filename = region_strdup(db->region, filename);
+
+#ifdef USE_MMAP_ALLOC
+ db_region = region_create_custom(mmap_alloc, mmap_free, MMAP_ALLOC_CHUNK_SIZE,
+ MMAP_ALLOC_LARGE_OBJECT_SIZE, MMAP_ALLOC_INITIAL_CLEANUP_SIZE, 1);
+#else /* !USE_MMAP_ALLOC */
+ db_region = region_create_custom(xalloc, free, DEFAULT_CHUNK_SIZE,
+ DEFAULT_LARGE_OBJECT_SIZE, DEFAULT_INITIAL_CLEANUP_SIZE, 1);
+#endif /* !USE_MMAP_ALLOC */
+ db = (namedb_type *) region_alloc(db_region, sizeof(struct namedb));
+ db->region = db_region;
+ db->domains = domain_table_create(db->region);
+ db->zonetree = radix_tree_create(db->region);
+ db->diff_skip = 0;
+ db->diff_pos = 0;
if (gettimeofday(&(db->diff_timestamp), NULL) != 0) {
log_msg(LOG_ERR, "unable to load %s: cannot initialize"
- "timestamp", db->filename);
- namedb_destroy(db);
- return NULL;
- }
-
- /* Open it... */
- db->fd = fopen(db->filename, "r");
- if (db->fd == NULL) {
- log_msg(LOG_ERR, "unable to load %s: %s",
- db->filename, strerror(errno));
- namedb_destroy(db);
- return NULL;
- }
-
- if (!read_magic(db)) {
- log_msg(LOG_ERR, "corrupted database (read magic): %s", db->filename);
- log_msg(LOG_ERR, "cannot load database, incompatible version "
- "number. Please rebuild database and "
- "start again.");
- namedb_close(db);
+ "timestamp", filename);
+ region_destroy(db_region);
+ close(fd);
return NULL;
- }
+ }
- if (!read_size(db, &zone_count)) {
- log_msg(LOG_ERR, "corrupted database (read size): %s", db->filename);
- namedb_close(db);
- return NULL;
+ /* attempt to read the file (if it exists) */
+ if(fd != -1) {
+ if(!try_read_udb(db, fd, filename, opt))
+ fd = -1;
}
-
- DEBUG(DEBUG_DBACCESS, 1,
- (LOG_INFO, "Retrieving %lu zones\n", (unsigned long) zone_count));
-
- temp_region = region_create(xalloc, free);
- dname_region = region_create(xalloc, free);
-
- db->zone_count = zone_count;
- zones = (zone_type **) region_alloc(temp_region,
- zone_count * sizeof(zone_type *));
- for (i = 0; i < zone_count; ++i) {
- const dname_type *dname = read_dname(db->fd, dname_region);
- if (!dname) {
- log_msg(LOG_ERR, "corrupted database (read dname): %s", db->filename);
- region_destroy(dname_region);
- region_destroy(temp_region);
- namedb_close(db);
- return NULL;
- }
- zones[i] = (zone_type *) region_alloc(db->region,
- sizeof(zone_type));
- zones[i]->next = db->zones;
- db->zones = zones[i];
- zones[i]->apex = domain_table_insert(db->domains, dname);
- zones[i]->soa_rrset = NULL;
- zones[i]->soa_nx_rrset = NULL;
- zones[i]->ns_rrset = NULL;
-#ifdef NSEC3
- zones[i]->nsec3_soa_rr = NULL;
- zones[i]->nsec3_last = NULL;
-#endif
- zones[i]->opts = zone_options_find(opt, domain_dname(zones[i]->apex));
- zones[i]->number = i + 1;
- zones[i]->is_secure = 0;
- zones[i]->updated = 1;
- zones[i]->is_ok = 0;
- zones[i]->dirty = region_alloc(db->region, sizeof(uint8_t)*num_children);
- memset(zones[i]->dirty, 0, sizeof(uint8_t)*num_children);
- if(!zones[i]->opts) {
- log_msg(LOG_ERR, "cannot load database. Zone %s in db "
- "%s, but not in config file (might "
- "happen if you edited the config "
- "file). Please rebuild database and "
- "start again.",
- dname_to_string(dname, NULL), db->filename);
- region_destroy(dname_region);
- region_destroy(temp_region);
- namedb_close(db);
+ /* attempt to create the file (if necessary or failed read) */
+ if(fd == -1) {
+ if(!(db->udb=udb_base_create_new(filename, &namedb_walkfunc,
+ NULL))) {
+ region_destroy(db_region);
return NULL;
}
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- zones[i]->nsec3_domains = NULL;
- if (0 != zone_nsec3_domains_create(db, zones[i])) {
- log_msg(LOG_ERR,
- "insufficient memory for NSEC3 tree, "
- "unable to read database");
- region_destroy(dname_region);
- region_destroy(temp_region);
- namedb_close(db);
+ if(!udb_dns_init_file(db->udb)) {
+ region_destroy(db->region);
return NULL;
}
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
- region_free_all(dname_region);
}
+ zonec_setup_parser(db);
+ return db;
+}
- if (!read_size(db, &dname_count)) {
- log_msg(LOG_ERR, "corrupted database (read size): %s", db->filename);
- region_destroy(dname_region);
- region_destroy(temp_region);
- namedb_close(db);
- return NULL;
+/** the the file mtime stat (or nonexist or error) */
+static int
+file_get_mtime(const char* file, time_t* mtime, int* nonexist)
+{
+ struct stat s;
+ if(stat(file, &s) != 0) {
+ *mtime = 0;
+ *nonexist = (errno == ENOENT);
+ return 0;
}
+ *nonexist = 0;
+ *mtime = s.st_mtime;
+ return 1;
+}
- DEBUG(DEBUG_DBACCESS, 1,
- (LOG_INFO, "Retrieving %lu domain names\n", (unsigned long) dname_count));
-
- domains = (domain_type **) region_alloc(
- temp_region, dname_count * sizeof(domain_type *));
- for (i = 0; i < dname_count; ++i) {
- const dname_type *dname = read_dname(db->fd, dname_region);
- if (!dname) {
- log_msg(LOG_ERR, "corrupted database (read dname): %s", db->filename);
- region_destroy(dname_region);
- region_destroy(temp_region);
- namedb_close(db);
- return NULL;
+void
+namedb_read_zonefile(struct namedb* db, struct zone* zone, udb_base* taskudb,
+ udb_ptr* last_task)
+{
+ time_t mtime = 0;
+ int nonexist = 0;
+ unsigned int errors;
+ const char* fname;
+ if(!db || !db->udb || !zone || !zone->opts || !zone->opts->pattern->zonefile)
+ return;
+ fname = config_make_zonefile(zone->opts);
+ if(!file_get_mtime(fname, &mtime, &nonexist)) {
+ if(nonexist) {
+ VERBOSITY(2, (LOG_INFO, "zonefile %s does not exist",
+ fname));
+ } else
+ log_msg(LOG_ERR, "zonefile %s: %s",
+ fname, strerror(errno));
+ if(taskudb) task_new_soainfo(taskudb, last_task, zone);
+ return;
+ } else {
+ /* check the mtime */
+ if(udb_zone_get_mtime(db->udb, dname_name(domain_dname(
+ zone->apex)), domain_dname(zone->apex)->name_size)
+ >= (uint64_t)mtime) {
+ VERBOSITY(3, (LOG_INFO, "zonefile %s is not modified",
+ fname));
+ return;
}
- domains[i] = domain_table_insert(db->domains, dname);
- region_free_all(dname_region);
}
- region_destroy(dname_region);
-
-#ifndef NDEBUG
- fprintf(stderr, "database region after loading domain names: ");
- region_dump_stats(db->region, stderr);
- fprintf(stderr, "\n");
+ assert(parser);
+ /* wipe zone from memory */
+#ifdef NSEC3
+ nsec3_hash_tree_clear(zone);
#endif
-
- while ((rrset = read_rrset(db, dname_count, domains, zone_count, zones))) {
- ++rrset_count;
- rr_count += rrset->rr_count;
- }
-
- DEBUG(DEBUG_DBACCESS, 1,
- (LOG_INFO, "Retrieved %lu RRs in %lu RRsets\n",
- (unsigned long) rr_count, (unsigned long) rrset_count));
-
- region_destroy(temp_region);
-
- if ((db->crc_pos = ftello(db->fd)) == -1) {
- log_msg(LOG_ERR, "ftello %s failed: %s",
- db->filename, strerror(errno));
- namedb_close(db);
- return NULL;
- }
- if (!read_size(db, &db->crc)) {
- log_msg(LOG_ERR, "corrupted database (read size): %s", db->filename);
- namedb_close(db);
- return NULL;
- }
- if (!read_magic(db)) {
- log_msg(LOG_ERR, "corrupted database (read magic): %s", db->filename);
- log_msg(LOG_ERR, "cannot load database, incompatible version "
- "number. Please rebuild database and "
- "start again.");
- namedb_close(db);
- return NULL;
+ delete_zone_rrs(db, zone);
+#ifdef NSEC3
+ nsec3_clear_precompile(db, zone);
+ zone->nsec3_param = NULL;
+#endif /* NSEC3 */
+ errors = zonec_read(zone->opts->name, fname, zone);
+ if(errors > 0) {
+ region_type* dname_region;
+ udb_ptr z;
+ log_msg(LOG_ERR, "zone %s file %s read with %u errors",
+ zone->opts->name, fname, errors);
+ /* wipe (partial) zone from memory */
+ zone->is_ok = 1;
+#ifdef NSEC3
+ nsec3_hash_tree_clear(zone);
+#endif
+ delete_zone_rrs(db, zone);
+#ifdef NSEC3
+ nsec3_clear_precompile(db, zone);
+ zone->nsec3_param = NULL;
+#endif /* NSEC3 */
+ /* see if we can revert to the udb stored version */
+ if(!udb_zone_search(db->udb, &z, dname_name(domain_dname(
+ zone->apex)), domain_dname(zone->apex)->name_size)) {
+ /* tell that zone contents has been lost */
+ if(taskudb) task_new_soainfo(taskudb, last_task, zone);
+ return;
+ }
+ /* read from udb */
+ dname_region = region_create(xalloc, free);
+ udb_rrsets = 0;
+ udb_rrset_count = ZONE(&z)->rrset_count;
+ udb_time = time(NULL);
+ read_zone_data(db->udb, db, dname_region, &z, zone);
+ region_destroy(dname_region);
+ udb_ptr_unlink(&z, db->udb);
+ } else {
+ VERBOSITY(1, (LOG_INFO, "zone %s read with no errors",
+ zone->opts->name));
+ zone->is_ok = 1;
+ zone->is_changed = 0;
+ /* store zone into udb */
+ if(!write_zone_to_udb(db->udb, zone, mtime)) {
+ log_msg(LOG_ERR, "failed to store zone in db");
+ } else {
+ VERBOSITY(2, (LOG_INFO, "zone %s written to db",
+ zone->opts->name));
+ }
}
-
- fclose(db->fd);
- db->fd = NULL;
-
-#ifndef NDEBUG
- fprintf(stderr, "database region after loading database: ");
- region_dump_stats(db->region, stderr);
- fprintf(stderr, "\n");
+ if(taskudb) task_new_soainfo(taskudb, last_task, zone);
+#ifdef NSEC3
+ prehash_zone_complete(db, zone);
#endif
-
- return db;
}
-void
-namedb_close (struct namedb *db)
+void namedb_check_zonefile(struct namedb* db, udb_base* taskudb,
+ udb_ptr* last_task, zone_options_t* zopt)
{
- namedb_fd_close(db);
- if (db) {
- namedb_destroy(db);
+ zone_type* zone;
+ const dname_type* dname = (const dname_type*)zopt->node.key;
+ /* find zone to go with it, or create it */
+ zone = namedb_find_zone(db, dname);
+ if(!zone) {
+ zone = namedb_zone_create(db, dname, zopt);
}
+ namedb_read_zonefile(db, zone, taskudb, last_task);
}
-void
-namedb_fd_close (struct namedb *db)
+void namedb_check_zonefiles(struct namedb* db, nsd_options_t* opt,
+ udb_base* taskudb, udb_ptr* last_task)
{
- if (db && db->fd) {
- fclose(db->fd);
+ zone_options_t* zo;
+ /* check all zones in opt, create if not exist in main db */
+ RBTREE_FOR(zo, zone_options_t*, opt->zone_options) {
+ namedb_check_zonefile(db, taskudb, last_task, zo);
}
}
-
diff --git a/usr.sbin/nsd/dbcreate.c b/usr.sbin/nsd/dbcreate.c
index f193792debb..f0fbb112784 100644
--- a/usr.sbin/nsd/dbcreate.c
+++ b/usr.sbin/nsd/dbcreate.c
@@ -1,7 +1,7 @@
/*
* dbcreate.c -- routines to create an nsd(8) name database
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -9,6 +9,7 @@
#include "config.h"
+#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
@@ -17,266 +18,362 @@
#include <unistd.h>
#include "namedb.h"
+#include "udb.h"
+#include "udbradtree.h"
+#include "udbzone.h"
+#include "options.h"
-static int write_db (namedb_type *db);
-static int write_number(struct namedb *db, uint32_t number);
+/* pathname directory separator character */
+#define PATHSEP '/'
-struct namedb *
-namedb_new (const char *filename)
+/** add an rdata (uncompressed) to the destination */
+static size_t
+add_rdata(rr_type* rr, unsigned i, uint8_t* buf, size_t buflen)
{
- namedb_type *db;
- /* Make a new structure... */
- if ((db = namedb_create()) == NULL) {
- log_msg(LOG_ERR,
- "insufficient memory to create database");
- return NULL;
- }
- db->filename = region_strdup(db->region, filename);
- db->crc = 0xffffffff;
- db->diff_skip = 0;
- db->fd = NULL;
-
- if (gettimeofday(&(db->diff_timestamp), NULL) != 0) {
- log_msg(LOG_ERR, "unable to load %s: cannot initialize "
- "timestamp", db->filename);
- namedb_destroy(db);
- return NULL;
- }
-
- /*
- * Unlink the old database, if it exists. This is useful to
- * ensure that NSD doesn't see the changes until a reload is done.
- */
- if (unlink(db->filename) == -1 && errno != ENOENT) {
- namedb_destroy(db);
- return NULL;
- }
-
- /* Create the database */
- if ((db->fd = fopen(db->filename, "w")) == NULL) {
- namedb_destroy(db);
- return NULL;
- }
-
- if (!write_data_crc(db->fd, NAMEDB_MAGIC, NAMEDB_MAGIC_SIZE, &db->crc)) {
- fclose(db->fd);
- namedb_discard(db);
- return NULL;
+ switch(rdata_atom_wireformat_type(rr->type, i)) {
+ case RDATA_WF_COMPRESSED_DNAME:
+ case RDATA_WF_UNCOMPRESSED_DNAME:
+ {
+ const dname_type* dname = domain_dname(
+ rdata_atom_domain(rr->rdatas[i]));
+ if(dname->name_size > buflen)
+ return 0;
+ memmove(buf, dname_name(dname), dname->name_size);
+ return dname->name_size;
+ }
+ default:
+ break;
}
-
- return db;
+ memmove(buf, rdata_atom_data(rr->rdatas[i]),
+ rdata_atom_size(rr->rdatas[i]));
+ return rdata_atom_size(rr->rdatas[i]);
}
-
-int
-namedb_save (struct namedb *db)
+/* marshal rdata into buffer, must be MAX_RDLENGTH in size */
+size_t
+rr_marshal_rdata(rr_type* rr, uint8_t* rdata, size_t sz)
{
- if (write_db(db) != 0) {
- return -1;
- }
-
- /* Finish up and write the crc */
- if (!write_number(db, ~db->crc)) {
- fclose(db->fd);
- return -1;
+ size_t len = 0;
+ unsigned i;
+ assert(rr);
+ for(i=0; i<rr->rdata_count; i++) {
+ len += add_rdata(rr, i, rdata+len, sz-len);
}
-
- /* Write the magic... */
- if (!write_data_crc(db->fd, NAMEDB_MAGIC, NAMEDB_MAGIC_SIZE, &db->crc)) {
- fclose(db->fd);
- return -1;
- }
-
- /* Close the database */
- fclose(db->fd);
- namedb_destroy(db);
- return 0;
+ return len;
}
-
+/** delete an RR */
void
-namedb_discard (struct namedb *db)
+udb_del_rr(udb_base* udb, udb_ptr* z, rr_type* rr)
{
- unlink(db->filename);
- namedb_destroy(db);
+ /* marshal the rdata (uncompressed) into a buffer */
+ uint8_t rdata[MAX_RDLENGTH];
+ size_t rdatalen = rr_marshal_rdata(rr, rdata, sizeof(rdata));
+ assert(udb);
+ udb_zone_del_rr(udb, z, dname_name(domain_dname(rr->owner)),
+ domain_dname(rr->owner)->name_size, rr->type, rr->klass,
+ rdata, rdatalen);
}
-static int
-write_dname(struct namedb *db, domain_type *domain)
+/** write rr */
+int
+udb_write_rr(udb_base* udb, udb_ptr* z, rr_type* rr)
{
- const dname_type *dname = domain_dname(domain);
-
- if (!write_data_crc(db->fd, &dname->name_size, sizeof(dname->name_size), &db->crc))
- return -1;
-
- if (!write_data_crc(db->fd, dname_name(dname), dname->name_size, &db->crc))
- return -1;
-
- return 0;
+ /* marshal the rdata (uncompressed) into a buffer */
+ uint8_t rdata[MAX_RDLENGTH];
+ size_t rdatalen = 0;
+ unsigned i;
+ assert(rr);
+ for(i=0; i<rr->rdata_count; i++) {
+ rdatalen += add_rdata(rr, i, rdata+rdatalen,
+ sizeof(rdata)-rdatalen);
+ }
+ assert(udb);
+ return udb_zone_add_rr(udb, z, dname_name(domain_dname(rr->owner)),
+ domain_dname(rr->owner)->name_size, rr->type, rr->klass,
+ rr->ttl, rdata, rdatalen);
}
+/** write rrset */
static int
-write_number(struct namedb *db, uint32_t number)
+write_rrset(udb_base* udb, udb_ptr* z, rrset_type* rrset)
{
- number = htonl(number);
- return write_data_crc(db->fd, &number, sizeof(number), &db->crc);
+ unsigned i;
+ for(i=0; i<rrset->rr_count; i++) {
+ if(!udb_write_rr(udb, z, &rrset->rrs[i]))
+ return 0;
+ }
+ return 1;
}
+/** write a zone */
static int
-write_rrset(struct namedb *db, domain_type *domain, rrset_type *rrset)
+write_zone(udb_base* udb, udb_ptr* z, zone_type* zone)
{
- uint16_t rr_count;
- int i, j;
- uint16_t type;
- uint16_t klass;
-
- assert(db);
- assert(domain);
- assert(rrset);
-
- rr_count = htons(rrset->rr_count);
-
- if (!write_number(db, domain->number))
- return 1;
-
- if (!write_number(db, rrset->zone->number))
- return 1;
-
- type = htons(rrset_rrtype(rrset));
- if (!write_data_crc(db->fd, &type, sizeof(type), &db->crc))
- return 1;
-
- klass = htons(rrset_rrclass(rrset));
- if (!write_data_crc(db->fd, &klass, sizeof(klass), &db->crc))
- return 1;
-
- if (!write_data_crc(db->fd, &rr_count, sizeof(rr_count), &db->crc))
- return 1;
-
- for (i = 0; i < rrset->rr_count; ++i) {
- rr_type *rr = &rrset->rrs[i];
- uint32_t ttl;
- uint16_t rdata_count;
-
- rdata_count = htons(rr->rdata_count);
- if (!write_data_crc(db->fd, &rdata_count, sizeof(rdata_count), &db->crc))
- return 1;
-
- ttl = htonl(rr->ttl);
- if (!write_data_crc(db->fd, &ttl, sizeof(ttl), &db->crc))
- return 1;
-
- for (j = 0; j < rr->rdata_count; ++j) {
- rdata_atom_type atom = rr->rdatas[j];
- if (rdata_atom_is_domain(rr->type, j)) {
- if (!write_number(db, rdata_atom_domain(atom)->number))
- return 1;
-
- } else {
- uint16_t size = htons(rdata_atom_size(atom));
- if (!write_data_crc(db->fd, &size, sizeof(size), &db->crc))
- return 1;
-
- if (!write_data_crc(db->fd,
- rdata_atom_data(atom),
- rdata_atom_size(atom), &db->crc))
- return 1;
-
+ /* write all domains in the zone */
+ domain_type* walk;
+ rrset_type* rrset;
+ int n = 0, c = 0;
+ time_t t = time(NULL);
+
+ /* count domains: for pct logging */
+ for(walk=zone->apex; walk && domain_is_subdomain(walk, zone->apex);
+ walk=domain_next(walk)) {
+ n++;
+ }
+ /* write them */
+ for(walk=zone->apex; walk && domain_is_subdomain(walk, zone->apex);
+ walk=domain_next(walk)) {
+ /* write all rrsets (in the zone) for this domain */
+ for(rrset=walk->rrsets; rrset; rrset=rrset->next) {
+ if(rrset->zone == zone) {
+ if(!write_rrset(udb, z, rrset))
+ return 0;
}
}
+ /* only check every ... domains, and print pct */
+ if(++c % ZONEC_PCT_COUNT == 0 && time(NULL) > t + ZONEC_PCT_TIME) {
+ t = time(NULL);
+ VERBOSITY(1, (LOG_INFO, "write %s %d %%",
+ zone->opts->name, c*100/n));
+ }
}
+ return 1;
+}
- return 0;
+/** create and write a zone */
+int
+write_zone_to_udb(udb_base* udb, zone_type* zone, time_t mtime)
+{
+ udb_ptr z;
+ /* make udb dirty */
+ udb_base_set_userflags(udb, 1);
+ /* find or create zone */
+ if(udb_zone_search(udb, &z, dname_name(domain_dname(zone->apex)),
+ domain_dname(zone->apex)->name_size)) {
+ /* wipe existing contents */
+ udb_zone_clear(udb, &z);
+ } else {
+ if(!udb_zone_create(udb, &z, dname_name(domain_dname(
+ zone->apex)), domain_dname(zone->apex)->name_size)) {
+ udb_base_set_userflags(udb, 0);
+ return 0;
+ }
+ }
+ /* set mtime */
+ ZONE(&z)->mtime = (uint64_t)mtime;
+ ZONE(&z)->is_changed = 0;
+ udb_zone_set_log_str(udb, &z, NULL);
+ /* write zone */
+ if(!write_zone(udb, &z, zone)) {
+ udb_base_set_userflags(udb, 0);
+ return 0;
+ }
+ udb_ptr_unlink(&z, udb);
+ udb_base_set_userflags(udb, 0);
+ return 1;
}
static int
-number_dnames_iterator(domain_type *node, void *user_data)
+print_rrs(FILE* out, struct zone* zone)
{
- uint32_t *current_number = (uint32_t *) user_data;
-
- node->number = *current_number;
- ++*current_number;
-
- return 0;
+ rrset_type *rrset;
+ domain_type *domain = zone->apex;
+ region_type* region = region_create(xalloc, free);
+ struct state_pretty_rr* state = create_pretty_rr(region);
+ /* first print the SOA record for the zone */
+ if(zone->soa_rrset) {
+ size_t i;
+ for(i=0; i < zone->soa_rrset->rr_count; i++) {
+ if(!print_rr(out, state, &zone->soa_rrset->rrs[i])){
+ log_msg(LOG_ERR, "There was an error "
+ "printing SOARR to zone %s",
+ zone->opts->name);
+ region_destroy(region);
+ return 0;
+ }
+ }
+ }
+ /* go through entire tree below the zone apex (incl subzones) */
+ while(domain && domain_is_subdomain(domain, zone->apex))
+ {
+ for(rrset = domain->rrsets; rrset; rrset=rrset->next)
+ {
+ size_t i;
+ if(rrset->zone != zone || rrset == zone->soa_rrset)
+ continue;
+ for(i=0; i < rrset->rr_count; i++) {
+ if(!print_rr(out, state, &rrset->rrs[i])){
+ log_msg(LOG_ERR, "There was an error "
+ "printing RR to zone %s",
+ zone->opts->name);
+ region_destroy(region);
+ return 0;
+ }
+ }
+ }
+ domain = domain_next(domain);
+ }
+ region_destroy(region);
+ return 1;
}
static int
-write_dname_iterator(domain_type *node, void *user_data)
+print_header(zone_type* zone, FILE* out, time_t* now, const char* logs)
{
- namedb_type *db = (namedb_type *) user_data;
-
- return write_dname(db, node);
+ char buf[4096];
+ /* ctime prints newline at end of this line */
+ snprintf(buf, sizeof(buf), "; zone %s written by NSD %s on %s",
+ zone->opts->name, PACKAGE_VERSION, ctime(now));
+ if(!write_data(out, buf, strlen(buf)))
+ return 0;
+ if(!logs || logs[0] == 0) return 1;
+ snprintf(buf, sizeof(buf), "; %s\n", logs);
+ return write_data(out, buf, strlen(buf));
}
static int
-write_domain_iterator(domain_type *node, void *user_data)
+write_to_zonefile(zone_type* zone, const char* filename, const char* logs)
{
- namedb_type *db = (namedb_type *) user_data;
- rrset_type *rrset;
- int error = 0;
-
- for (rrset = node->rrsets; rrset; rrset = rrset->next) {
- error += write_rrset(db, node, rrset);
+ time_t now = time(0);
+ FILE *out;
+ VERBOSITY(1, (LOG_INFO, "writing zone %s to file %s",
+ zone->opts->name, filename));
+
+ out = fopen(filename, "w");
+ if(!out) {
+ log_msg(LOG_ERR, "cannot write zone %s file %s: %s",
+ zone->opts->name, filename, strerror(errno));
+ return 0;
+ }
+ if(!print_header(zone, out, &now, logs)) {
+ fclose(out);
+ log_msg(LOG_ERR, "There was an error printing "
+ "the header to zone %s", zone->opts->name);
+ return 0;
+ }
+ if(!print_rrs(out, zone)) {
+ fclose(out);
+ return 0;
}
+ fclose(out);
+ return 1;
+}
- return error;
+/** create directories above this file, .../dir/dir/dir/file */
+int
+create_dirs(const char* path)
+{
+ char dir[4096];
+ char* p;
+ strlcpy(dir, path, sizeof(dir));
+ /* if we start with / then do not try to create '' */
+ if(dir[0] == PATHSEP)
+ p = strchr(dir+1, PATHSEP);
+ else p = strchr(dir, PATHSEP);
+ /* create each directory component from the left */
+ while(p) {
+ assert(*p == PATHSEP);
+ *p = 0; /* end the directory name here */
+ if(mkdir(dir
+#ifndef MKDIR_HAS_ONE_ARG
+ , 0750
+#endif
+ ) == -1) {
+ if(errno != EEXIST) {
+ log_msg(LOG_ERR, "create dir %s: %s",
+ dir, strerror(errno));
+ return 0;
+ }
+ /* it already exists, OK, continue */
+ }
+ *p = PATHSEP;
+ p = strchr(p+1, PATHSEP);
+ }
+ return 1;
}
-/*
- * Writes databse data into open database *db
- *
- * Returns zero if success.
- */
+/** create pathname components and check if file exists */
static int
-write_db(namedb_type *db)
+create_path_components(const char* path, int* notexist)
{
- zone_type *zone;
- uint32_t terminator = 0;
- uint32_t dname_count = 1;
- uint32_t zone_count = 1;
- int errors = 0;
-
- for (zone = db->zones; zone; zone = zone->next) {
- zone->number = zone_count;
- ++zone_count;
-
- if (!zone->soa_rrset) {
- fprintf(stderr, "SOA record not present in %s\n",
- dname_to_string(domain_dname(zone->apex),
- NULL));
- ++errors;
+ /* stat the file, to see if it exists, and if its directories exist */
+ struct stat s;
+ if(stat(path, &s) != 0) {
+ if(errno == ENOENT) {
+ *notexist = 1;
+ /* see if we need to create pathname components */
+ return create_dirs(path);
}
+ log_msg(LOG_ERR, "cannot stat %s: %s", path, strerror(errno));
+ return 0;
}
+ *notexist = 0;
+ return 1;
+}
- if (errors > 0)
- return -1;
-
- --zone_count;
- if (!write_number(db, zone_count))
- return -1;
- for (zone = db->zones; zone; zone = zone->next) {
- if (write_dname(db, zone->apex))
- return -1;
+void
+namedb_write_zonefile(namedb_type* db, zone_options_t* zopt)
+{
+ const char* zfile;
+ int notexist = 0;
+ zone_type* zone;
+ /* if no zone exists, it has no contents or it has no zonefile
+ * configured, then no need to write data to disk */
+ if(!zopt->pattern->zonefile)
+ return;
+ zone = namedb_find_zone(db, (const dname_type*)zopt->node.key);
+ if(!zone || !zone->apex)
+ return;
+ /* write if file does not exist, or if changed */
+ /* so, determine filename, create directory components, check exist*/
+ zfile = config_make_zonefile(zopt);
+ if(!create_path_components(zfile, &notexist)) {
+ log_msg(LOG_ERR, "could not write zone %s to file %s because "
+ "the path could not be created", zopt->name, zfile);
+ return;
}
- if (domain_table_iterate(db->domains, number_dnames_iterator, &dname_count))
- return -1;
-
- --dname_count;
- if (!write_number(db, dname_count))
- return -1;
-
- DEBUG(DEBUG_ZONEC, 1,
- (LOG_INFO, "Storing %lu domain names\n", (unsigned long) dname_count));
-
- if (domain_table_iterate(db->domains, write_dname_iterator, db))
- return -1;
-
- if (domain_table_iterate(db->domains, write_domain_iterator, db))
- return -1;
-
- if (!write_data_crc(db->fd, &terminator, sizeof(terminator), &db->crc))
- return -1;
+ /* if not changed, do not write. */
+ if(notexist || zone->is_changed) {
+ char logs[4096];
+ char bakfile[4096];
+ udb_ptr zudb;
+ if(!udb_zone_search(db->udb, &zudb,
+ dname_name(domain_dname(zone->apex)),
+ domain_dname(zone->apex)->name_size))
+ return; /* zone does not exist in db */
+ /* write to zfile~ first, then rename if that works */
+ snprintf(bakfile, sizeof(bakfile), "%s~", zfile);
+ if(ZONE(&zudb)->log_str.data) {
+ udb_ptr s;
+ udb_ptr_new(&s, db->udb, &ZONE(&zudb)->log_str);
+ strlcpy(logs, (char*)udb_ptr_data(&s), sizeof(logs));
+ udb_ptr_unlink(&s, db->udb);
+ } else logs[0] = 0;
+ if(!write_to_zonefile(zone, bakfile, logs)) {
+ udb_ptr_unlink(&zudb, db->udb);
+ return; /* error already printed */
+ }
+ if(rename(bakfile, zfile) == -1) {
+ log_msg(LOG_ERR, "rename(%s to %s) failed: %s",
+ bakfile, zfile, strerror(errno));
+ udb_ptr_unlink(&zudb, db->udb);
+ return;
+ }
+ zone->is_changed = 0;
+ ZONE(&zudb)->mtime = (uint64_t)time(0);
+ ZONE(&zudb)->is_changed = 0;
+ udb_zone_set_log_str(db->udb, &zudb, NULL);
+ udb_ptr_unlink(&zudb, db->udb);
+ }
+}
- return 0;
+void
+namedb_write_zonefiles(namedb_type* db, nsd_options_t* options)
+{
+ zone_options_t* zo;
+ RBTREE_FOR(zo, zone_options_t*, options->zone_options) {
+ namedb_write_zonefile(db, zo);
+ }
}
diff --git a/usr.sbin/nsd/difffile.c b/usr.sbin/nsd/difffile.c
index 2b6d721d878..0719cf6f9dc 100644
--- a/usr.sbin/nsd/difffile.c
+++ b/usr.sbin/nsd/difffile.c
@@ -1,7 +1,7 @@
/*
* difffile.c - DIFF file handling source code. Read and write diff files.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -14,22 +14,26 @@
#include <stdlib.h>
#include <errno.h>
#include "difffile.h"
+#include "xfrd-disk.h"
#include "util.h"
#include "packet.h"
#include "rdata.h"
+#include "udb.h"
+#include "udbzone.h"
#include "nsec3.h"
+#include "nsd.h"
+#include "rrl.h"
static int
-write_32(FILE *out, uint32_t val)
+write_64(FILE *out, uint64_t val)
{
- val = htonl(val);
return write_data(out, &val, sizeof(val));
}
static int
-write_16(FILE *out, uint16_t val)
+write_32(FILE *out, uint32_t val)
{
- val = htons(val);
+ val = htonl(val);
return write_data(out, &val, sizeof(val));
}
@@ -49,142 +53,117 @@ write_str(FILE *out, const char* str)
}
void
-diff_write_packet(const char* zone, uint32_t new_serial, uint16_t id,
- uint32_t seq_nr, uint8_t* data, size_t len, nsd_options_t* opt)
+diff_write_packet(const char* zone, const char* pat, uint32_t old_serial,
+ uint32_t new_serial, uint32_t seq_nr, uint8_t* data, size_t len,
+ struct nsd* nsd, uint64_t filenumber)
{
- const char* filename = opt->difffile;
- struct timeval tv;
- FILE *df;
- uint32_t file_len = sizeof(uint32_t) + strlen(zone) +
- sizeof(new_serial) + sizeof(id) + sizeof(seq_nr) + len;
-
- if (gettimeofday(&tv, NULL) != 0) {
- log_msg(LOG_ERR, "could not set timestamp for %s: %s",
- filename, strerror(errno));
+ FILE* df = xfrd_open_xfrfile(nsd, filenumber, seq_nr?"a":"w");
+ if(!df) {
+ log_msg(LOG_ERR, "could not open transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
return;
}
- df = fopen(filename, "a");
- if(!df) {
- log_msg(LOG_ERR, "could not open file %s for append: %s",
- filename, strerror(errno));
- return;
+ /* if first part, first write the header */
+ if(seq_nr == 0) {
+ struct timeval tv;
+ if (gettimeofday(&tv, NULL) != 0) {
+ log_msg(LOG_ERR, "could not get timestamp for %s: %s",
+ zone, strerror(errno));
+ }
+ if(!write_32(df, DIFF_PART_XFRF) ||
+ !write_8(df, 0) /* notcommitted(yet) */ ||
+ !write_32(df, 0) /* numberofparts when done */ ||
+ !write_64(df, (uint64_t) tv.tv_sec) ||
+ !write_32(df, (uint32_t) tv.tv_usec) ||
+ !write_32(df, old_serial) ||
+ !write_32(df, new_serial) ||
+ !write_64(df, (uint64_t) tv.tv_sec) ||
+ !write_32(df, (uint32_t) tv.tv_usec) ||
+ !write_str(df, zone) ||
+ !write_str(df, pat)) {
+ log_msg(LOG_ERR, "could not write transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
+ fclose(df);
+ return;
+ }
}
- if(!write_32(df, DIFF_PART_IXFR) ||
- !write_32(df, (uint32_t) tv.tv_sec) ||
- !write_32(df, (uint32_t) tv.tv_usec) ||
- !write_32(df, file_len) ||
- !write_str(df, zone) ||
- !write_32(df, new_serial) ||
- !write_16(df, id) ||
- !write_32(df, seq_nr) ||
+ if(!write_32(df, DIFF_PART_XXFR) ||
+ !write_32(df, len) ||
!write_data(df, data, len) ||
- !write_32(df, file_len))
+ !write_32(df, len))
{
- log_msg(LOG_ERR, "could not write to file %s: %s",
- filename, strerror(errno));
+ log_msg(LOG_ERR, "could not write transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
}
fclose(df);
}
void
-diff_write_commit(const char* zone, uint32_t old_serial,
- uint32_t new_serial, uint16_t id, uint32_t num_parts,
- uint8_t commit, const char* log_str, nsd_options_t* opt)
+diff_write_commit(const char* zone, uint32_t old_serial, uint32_t new_serial,
+ uint32_t num_parts, uint8_t commit, const char* log_str,
+ struct nsd* nsd, uint64_t filenumber)
{
- const char* filename = opt->difffile;
struct timeval tv;
- FILE *df;
- uint32_t len;
+ FILE* df;
if (gettimeofday(&tv, NULL) != 0) {
log_msg(LOG_ERR, "could not set timestamp for %s: %s",
- filename, strerror(errno));
- return;
+ zone, strerror(errno));
}
- df = fopen(filename, "a");
+ /* overwrite the first part of the file with 'committed = 1',
+ * as well as the end_time and number of parts.
+ * also write old_serial and new_serial, so that a bad file mixup
+ * will result in unusable serial numbers. */
+
+ df = xfrd_open_xfrfile(nsd, filenumber, "r+");
if(!df) {
- log_msg(LOG_ERR, "could not open file %s for append: %s",
- filename, strerror(errno));
+ log_msg(LOG_ERR, "could not open transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
return;
}
-
- len = strlen(zone) + sizeof(len) + sizeof(old_serial) +
- sizeof(new_serial) + sizeof(id) + sizeof(num_parts) +
- sizeof(commit) + strlen(log_str) + sizeof(len);
-
- if(!write_32(df, DIFF_PART_SURE) ||
- !write_32(df, (uint32_t) tv.tv_sec) ||
+ if(!write_32(df, DIFF_PART_XFRF) ||
+ !write_8(df, commit) /* committed */ ||
+ !write_32(df, num_parts) ||
+ !write_64(df, (uint64_t) tv.tv_sec) ||
!write_32(df, (uint32_t) tv.tv_usec) ||
- !write_32(df, len) ||
- !write_str(df, zone) ||
!write_32(df, old_serial) ||
- !write_32(df, new_serial) ||
- !write_16(df, id) ||
- !write_32(df, num_parts) ||
- !write_8(df, commit) ||
- !write_str(df, log_str) ||
- !write_32(df, len))
+ !write_32(df, new_serial))
{
- log_msg(LOG_ERR, "could not write to file %s: %s",
- filename, strerror(errno));
+ log_msg(LOG_ERR, "could not write transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
+ fclose(df);
+ return;
+ }
+
+ /* append the log_str to the end of the file */
+ if(fseek(df, 0, SEEK_END) == -1) {
+ log_msg(LOG_ERR, "could not fseek transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
+ fclose(df);
+ return;
+ }
+ if(!write_str(df, log_str)) {
+ log_msg(LOG_ERR, "could not write transfer %s file %lld: %s",
+ zone, (long long)filenumber, strerror(errno));
+ fclose(df);
+ return;
+
}
fflush(df);
fclose(df);
}
-/*
- * Checksum to signal no data change occured (for example, by a
- * zonec run.
- */
int
-db_crc_different(namedb_type* db)
+diff_read_64(FILE *in, uint64_t* result)
{
- FILE *fd = fopen(db->filename, "r");
- uint32_t crc_file;
- char buf[NAMEDB_MAGIC_SIZE];
- if(fd == NULL) {
- log_msg(LOG_ERR, "unable to load %s: %s",
- db->filename, strerror(errno));
- return -1;
- }
-
- /* seek to position of CRC, check it and magic no */
- if(fseeko(fd, db->crc_pos, SEEK_SET)==-1) {
- log_msg(LOG_ERR, "unable to fseeko %s: %s. db changed?",
- db->filename, strerror(errno));
- fclose(fd);
- return -1;
- }
-
- if(fread(&crc_file, sizeof(crc_file), 1, fd) != 1) {
- if(!feof(fd))
- log_msg(LOG_ERR, "could not read %s CRC: %s. "
- "db changed?", db->filename, strerror(errno));
- fclose(fd);
- return -1;
- }
- crc_file = ntohl(crc_file);
-
- if(fread(buf, sizeof(char), sizeof(buf), fd) != sizeof(buf)) {
- if(!feof(fd))
- log_msg(LOG_ERR, "could not read %s magic: %s. "
- "db changed?", db->filename, strerror(errno));
- fclose(fd);
- return -1;
- }
- if(memcmp(buf, NAMEDB_MAGIC, NAMEDB_MAGIC_SIZE) != 0) {
- fclose(fd);
- return -1;
- }
-
- fclose(fd);
-
- if(db->crc == crc_file)
+ if (fread(result, sizeof(*result), 1, in) == 1) {
+ return 1;
+ } else {
return 0;
- return 1;
+ }
}
int
@@ -199,17 +178,6 @@ diff_read_32(FILE *in, uint32_t* result)
}
int
-diff_read_16(FILE *in, uint16_t* result)
-{
- if (fread(result, sizeof(*result), 1, in) == 1) {
- *result = ntohs(*result);
- return 1;
- } else {
- return 0;
- }
-}
-
-int
diff_read_8(FILE *in, uint8_t* result)
{
if (fread(result, sizeof(*result), 1, in) == 1) {
@@ -259,7 +227,7 @@ has_data_below(domain_type* top)
assert(d != NULL);
/* in the canonical ordering subdomains are after this name */
d = domain_next(d);
- while(d != NULL && dname_is_subdomain(domain_dname(d), domain_dname(top))) {
+ while(d != NULL && domain_is_subdomain(d, top)) {
if(d->is_existing)
return 1;
d = domain_next(d);
@@ -267,35 +235,8 @@ has_data_below(domain_type* top)
return 0;
}
-
-/* this routine makes empty terminals non-existent.
- * @domain the lowest empty terminal
- * @ce the closest encloser
- */
-static domain_type*
-rrset_delete_empty_terminals(domain_type* domain, domain_type* ce)
-{
- assert(domain);
- if (domain->rrsets == 0) {
- /* if there is no data below it, it becomes non existing.
- also empty nonterminals above it become nonexisting */
- /* check for data below this node. */
- if(!has_data_below(domain)) {
- /* nonexist this domain and all parent empty nonterminals */
- domain_type* p = domain;
- while(p != NULL && p->rrsets == 0) {
- if(p == ce || has_data_below(p))
- return p;
- p->is_existing = 0;
- p = p->parent;
- }
- }
- }
- return NULL;
-}
-
-
-static domain_type*
+/** remove rrset. Adjusts zone params. Does not remove domain */
+static void
rrset_delete(namedb_type* db, domain_type* domain, rrset_type* rrset)
{
int i;
@@ -306,40 +247,29 @@ rrset_delete(namedb_type* db, domain_type* domain, rrset_type* rrset)
}
if(!*pp) {
/* rrset does not exist for domain */
- return NULL;
+ return;
}
*pp = rrset->next;
DEBUG(DEBUG_XFRD,2, (LOG_INFO, "delete rrset of %s type %s",
- dname_to_string(domain_dname(domain),0),
+ domain_to_string(domain),
rrtype_to_string(rrset_rrtype(rrset))));
/* is this a SOA rrset ? */
if(rrset->zone->soa_rrset == rrset) {
rrset->zone->soa_rrset = 0;
- rrset->zone->updated = 1;
- domain->has_SOA = 0;
}
if(rrset->zone->ns_rrset == rrset) {
rrset->zone->ns_rrset = 0;
}
if(domain == rrset->zone->apex && rrset_rrtype(rrset) == TYPE_RRSIG) {
for (i = 0; i < rrset->rr_count; ++i) {
- if (rr_rrsig_type_covered(&rrset->rrs[i]) == TYPE_DNSKEY) {
+ if(rr_rrsig_type_covered(&rrset->rrs[i])==TYPE_DNSKEY) {
rrset->zone->is_secure = 0;
break;
}
}
}
-
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- if (rrset->rrs[0].type == TYPE_NSEC3) {
- namedb_del_nsec3_domain(db, domain, rrset->zone);
- }
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
/* recycle the memory space of the rrset */
for (i = 0; i < rrset->rr_count; ++i)
add_rdata_to_recyclebin(db, &rrset->rrs[i]);
@@ -349,60 +279,334 @@ rrset_delete(namedb_type* db, domain_type* domain, rrset_type* rrset)
region_recycle(db->region, rrset, sizeof(rrset_type));
/* is the node now an empty node (completely deleted) */
- if (domain->rrsets == 0) {
- return domain;
+ if(domain->rrsets == 0) {
+ /* if there is no data below it, it becomes non existing.
+ also empty nonterminals above it become nonexisting */
+ /* check for data below this node. */
+ if(!has_data_below(domain)) {
+ /* nonexist this domain and all parent empty nonterminals */
+ domain_type* p = domain;
+ while(p != NULL && p->rrsets == 0) {
+ if(has_data_below(p))
+ break;
+ p->is_existing = 0;
+ p = p->parent;
+ }
+ }
}
- return NULL;
}
static int
-rdatas_equal(rdata_atom_type *a, rdata_atom_type *b, int num, uint16_t type)
+rdatas_equal(rdata_atom_type *a, rdata_atom_type *b, int num, uint16_t type,
+ int* rdnum, char** reason)
{
int k;
for(k = 0; k < num; k++)
{
if(rdata_atom_is_domain(type, k)) {
if(dname_compare(domain_dname(a[k].domain),
- domain_dname(b[k].domain))!=0)
+ domain_dname(b[k].domain))!=0) {
+ *rdnum = k;
+ *reason = "dname data";
+ return 0;
+ }
+ } else if(rdata_atom_is_literal_domain(type, k)) {
+ /* literal dname, but compare case insensitive */
+ if(a[k].data[0] != b[k].data[0]) {
+ *rdnum = k;
+ *reason = "literal dname len";
+ return 0; /* uncompressed len must be equal*/
+ }
+ if(!dname_equal_nocase((uint8_t*)(a[k].data+1),
+ (uint8_t*)(b[k].data+1), a[k].data[0])) {
+ *rdnum = k;
+ *reason = "literal dname data";
return 0;
+ }
} else {
/* check length */
- if(a[k].data[0] != b[k].data[0])
+ if(a[k].data[0] != b[k].data[0]) {
+ *rdnum = k;
+ *reason = "rdata len";
return 0;
+ }
/* check data */
- if(memcmp(a[k].data+1, b[k].data+1, a[k].data[0])!=0)
+ if(memcmp(a[k].data+1, b[k].data+1, a[k].data[0])!=0) {
+ *rdnum = k;
+ *reason = "rdata data";
return 0;
+ }
}
}
return 1;
}
-static int
-find_rr_num(rrset_type* rrset,
- uint16_t type, uint16_t klass,
+static void
+debug_find_rr_num(rrset_type* rrset, uint16_t type, uint16_t klass,
rdata_atom_type *rdatas, ssize_t rdata_num)
{
- int i;
+ int i, rd;
+ char* reason = "";
+
+ for(i=0; i < rrset->rr_count; ++i) {
+ if (rrset->rrs[i].type != type) {
+ log_msg(LOG_WARNING, "diff: RR <%s, %s> does not match "
+ "RR num %d type %s",
+ dname_to_string(rrset->rrs[i].owner->dname,0),
+ rrtype_to_string(type), i,
+ rrtype_to_string(rrset->rrs[i].type));
+ }
+ if (rrset->rrs[i].klass != klass) {
+ log_msg(LOG_WARNING, "diff: RR <%s, %s> class %d "
+ "does not match RR num %d class %d",
+ dname_to_string(rrset->rrs[i].owner->dname,0),
+ rrtype_to_string(type),
+ klass, i,
+ rrset->rrs[i].klass);
+ }
+ if (rrset->rrs[i].rdata_count != rdata_num) {
+ log_msg(LOG_WARNING, "diff: RR <%s, %s> rdlen %u "
+ "does not match RR num %d rdlen %d",
+ dname_to_string(rrset->rrs[i].owner->dname,0),
+ rrtype_to_string(type),
+ (unsigned) rdata_num, i,
+ (unsigned) rrset->rrs[i].rdata_count);
+ }
+ if (!rdatas_equal(rdatas, rrset->rrs[i].rdatas, rdata_num, type,
+ &rd, &reason)) {
+ log_msg(LOG_WARNING, "diff: RR <%s, %s> rdata element "
+ "%d differs from RR num %d rdata (%s)",
+ dname_to_string(rrset->rrs[i].owner->dname,0),
+ rrtype_to_string(type),
+ rd, i, reason);
+ }
+ }
+}
+
+static int
+find_rr_num(rrset_type* rrset, uint16_t type, uint16_t klass,
+ rdata_atom_type *rdatas, ssize_t rdata_num, int add)
+{
+ int i, rd;
+ char* reason;
for(i=0; i < rrset->rr_count; ++i) {
if(rrset->rrs[i].type == type &&
rrset->rrs[i].klass == klass &&
rrset->rrs[i].rdata_count == rdata_num &&
- rdatas_equal(rdatas, rrset->rrs[i].rdatas, rdata_num, type))
+ rdatas_equal(rdatas, rrset->rrs[i].rdatas, rdata_num, type,
+ &rd, &reason))
{
return i;
}
}
-
+ /* this is odd. Log why rr cannot be found. */
+ if (!add) {
+ debug_find_rr_num(rrset, type, klass, rdatas, rdata_num);
+ }
return -1;
}
-static int
+#ifdef NSEC3
+/* see if nsec3 deletion triggers need action */
+static void
+nsec3_delete_rr_trigger(namedb_type* db, rr_type* rr, zone_type* zone,
+ udb_ptr* udbz)
+{
+ /* the RR has not actually been deleted yet, so we can inspect it */
+ if(!zone->nsec3_param)
+ return;
+ /* see if the domain was an NSEC3-domain in the chain, but no longer */
+ if(rr->type == TYPE_NSEC3 && rr->owner->nsec3 &&
+ rr->owner->nsec3->nsec3_node.key &&
+ nsec3_rr_uses_params(rr, zone) &&
+ nsec3_in_chain_count(rr->owner, zone) <= 1) {
+ domain_type* prev = nsec3_chain_find_prev(zone, rr->owner);
+ /* remove from prehash because no longer an NSEC3 domain */
+ if(domain_is_prehash(db->domains, rr->owner))
+ prehash_del(db->domains, rr->owner);
+ /* fixup the last in the zone */
+ if(rr->owner == zone->nsec3_last)
+ zone->nsec3_last = prev;
+ /* unlink from the nsec3tree */
+ zone_del_domain_in_hash_tree(zone->nsec3tree,
+ &rr->owner->nsec3->nsec3_node);
+ /* add previous NSEC3 to the prehash list */
+ if(prev && prev != rr->owner)
+ prehash_add(db->domains, prev);
+ else nsec3_clear_precompile(db, zone);
+ /* this domain becomes ordinary data domain: done later */
+ }
+ /* see if the rr was NSEC3PARAM that we were using */
+ else if(rr->type == TYPE_NSEC3PARAM && rr == zone->nsec3_param) {
+ /* clear trees, wipe hashes, wipe precompile */
+ nsec3_clear_precompile(db, zone);
+ /* pick up new nsec3param from udb */
+ nsec3_find_zone_param(db, zone, udbz);
+ /* if no more NSEC3, done */
+ if(!zone->nsec3_param)
+ return;
+ nsec3_precompile_newparam(db, zone);
+ }
+}
+
+/* see if nsec3 prehash can be removed with new rrset content */
+static void
+nsec3_rrsets_changed_remove_prehash(domain_type* domain, zone_type* zone)
+{
+ /* deletion of rrset already done, we can check if conditions apply */
+ /* see if the domain is no longer precompiled */
+ /* it has a hash_node, but no longer fulfills conditions */
+ if(nsec3_domain_part_of_zone(domain, zone) && domain->nsec3 &&
+ domain->nsec3->hash_node.key &&
+ !nsec3_condition_hash(domain, zone)) {
+ /* remove precompile */
+ domain->nsec3->nsec3_cover = NULL;
+ domain->nsec3->nsec3_wcard_child_cover = NULL;
+ domain->nsec3->nsec3_is_exact = 0;
+ /* remove it from the hash tree */
+ zone_del_domain_in_hash_tree(zone->hashtree,
+ &domain->nsec3->hash_node);
+ zone_del_domain_in_hash_tree(zone->wchashtree,
+ &domain->nsec3->wchash_node);
+ }
+ if(domain != zone->apex && domain->nsec3 &&
+ domain->nsec3->dshash_node.key &&
+ !nsec3_condition_dshash(domain, zone)) {
+ /* remove precompile */
+ domain->nsec3->nsec3_ds_parent_cover = NULL;
+ domain->nsec3->nsec3_ds_parent_is_exact = 0;
+ /* remove it from the hash tree */
+ zone_del_domain_in_hash_tree(zone->dshashtree,
+ &domain->nsec3->dshash_node);
+ }
+}
+
+/* see if domain needs to get precompiled info */
+static void
+nsec3_rrsets_changed_add_prehash(namedb_type* db, domain_type* domain,
+ zone_type* zone)
+{
+ if(!zone->nsec3_param)
+ return;
+ if((!domain->nsec3 || !domain->nsec3->hash_node.key)
+ && nsec3_condition_hash(domain, zone)) {
+ region_type* tmpregion = region_create(xalloc, free);
+ nsec3_precompile_domain(db, domain, zone, tmpregion);
+ region_destroy(tmpregion);
+ }
+ if((!domain->nsec3 || !domain->nsec3->dshash_node.key)
+ && nsec3_condition_dshash(domain, zone)) {
+ nsec3_precompile_domain_ds(db, domain, zone);
+ }
+}
+
+/* see if nsec3 rrset-deletion triggers need action */
+static void
+nsec3_delete_rrset_trigger(namedb_type* db, domain_type* domain,
+ zone_type* zone, uint16_t type)
+{
+ if(!zone->nsec3_param)
+ return;
+ nsec3_rrsets_changed_remove_prehash(domain, zone);
+ /* for type nsec3, or a delegation, the domain may have become a
+ * 'normal' domain with its remaining data now */
+ if(type == TYPE_NSEC3 || type == TYPE_NS || type == TYPE_DS)
+ nsec3_rrsets_changed_add_prehash(db, domain, zone);
+ /* for type DNAME or a delegation, obscured data may be revealed */
+ if(type == TYPE_NS || type == TYPE_DS || type == TYPE_DNAME) {
+ /* walk over subdomains and check them each */
+ domain_type *d;
+ for(d=domain_next(domain); d && domain_is_subdomain(d, domain);
+ d=domain_next(d)) {
+ nsec3_rrsets_changed_add_prehash(db, d, zone);
+ }
+ }
+}
+
+/* see if nsec3 addition triggers need action */
+static void
+nsec3_add_rr_trigger(namedb_type* db, rr_type* rr, zone_type* zone,
+ udb_ptr* udbz)
+{
+ /* the RR has been added in full, also to UDB (and thus NSEC3PARAM
+ * in the udb has been adjusted) */
+ if(zone->nsec3_param && rr->type == TYPE_NSEC3 &&
+ (!rr->owner->nsec3 || !rr->owner->nsec3->nsec3_node.key)
+ && nsec3_rr_uses_params(rr, zone)) {
+ /* added NSEC3 into the chain */
+ nsec3_precompile_nsec3rr(db, rr->owner, zone);
+ /* the domain has become an NSEC3-domain, if it was precompiled
+ * previously, remove that, neatly done in routine above */
+ nsec3_rrsets_changed_remove_prehash(rr->owner, zone);
+ /* set this NSEC3 to prehash */
+ prehash_add(db->domains, rr->owner);
+ } else if(!zone->nsec3_param && rr->type == TYPE_NSEC3PARAM) {
+ /* see if this means NSEC3 chain can be used */
+ nsec3_find_zone_param(db, zone, udbz);
+ if(!zone->nsec3_param)
+ return;
+ nsec3_zone_trees_create(db->region, zone);
+ nsec3_precompile_newparam(db, zone);
+ }
+}
+
+/* see if nsec3 rrset-addition triggers need action */
+static void
+nsec3_add_rrset_trigger(namedb_type* db, domain_type* domain, zone_type* zone,
+ uint16_t type)
+{
+ /* the rrset has been added so we can inspect it */
+ if(!zone->nsec3_param)
+ return;
+ /* because the rrset is added we can check conditions easily.
+ * check if domain needs to become precompiled now */
+ nsec3_rrsets_changed_add_prehash(db, domain, zone);
+ /* if a delegation, it changes from normal name to unhashed referral */
+ if(type == TYPE_NS || type == TYPE_DS) {
+ nsec3_rrsets_changed_remove_prehash(domain, zone);
+ }
+ /* if delegation or DNAME added, then some RRs may get obscured */
+ if(type == TYPE_NS || type == TYPE_DS || type == TYPE_DNAME) {
+ /* walk over subdomains and check them each */
+ domain_type *d;
+ for(d=domain_next(domain); d && domain_is_subdomain(d, domain);
+ d=domain_next(d)) {
+ nsec3_rrsets_changed_remove_prehash(d, zone);
+ }
+ }
+}
+#endif /* NSEC3 */
+
+/* fixup usage lower for domain names in the rdata */
+static void
+rr_lower_usage(namedb_type* db, rr_type* rr)
+{
+ unsigned i;
+ for(i=0; i<rr->rdata_count; i++) {
+ if(rdata_atom_is_domain(rr->type, i)) {
+ assert(rdata_atom_domain(rr->rdatas[i])->usage > 0);
+ rdata_atom_domain(rr->rdatas[i])->usage --;
+ if(rdata_atom_domain(rr->rdatas[i])->usage == 0)
+ domain_table_deldomain(db,
+ rdata_atom_domain(rr->rdatas[i]));
+ }
+ }
+}
+
+static void
+rrset_lower_usage(namedb_type* db, rrset_type* rrset)
+{
+ unsigned i;
+ for(i=0; i<rrset->rr_count; i++)
+ rr_lower_usage(db, &rrset->rrs[i]);
+}
+
+int
delete_RR(namedb_type* db, const dname_type* dname,
uint16_t type, uint16_t klass,
- domain_type* prevdomain,
buffer_type* packet, size_t rdatalen, zone_type *zone,
- region_type* temp_region, int is_axfr)
+ region_type* temp_region, udb_ptr* udbz)
{
domain_type *domain;
rrset_type *rrset;
@@ -436,36 +640,30 @@ delete_RR(namedb_type* db, const dname_type* dname,
dname_to_string(dname,0));
return 0;
}
- rrnum = find_rr_num(rrset, type, klass, rdatas, rdata_num);
+ rrnum = find_rr_num(rrset, type, klass, rdatas, rdata_num, 0);
if(rrnum == -1) {
log_msg(LOG_WARNING, "diff: RR <%s, %s> does not exist",
dname_to_string(dname,0), rrtype_to_string(type));
return 1; /* not fatal error */
}
+ /* delete the normalized RR from the udb */
+ udb_del_rr(db->udb, udbz, &rrset->rrs[rrnum]);
#ifdef NSEC3
-#ifndef FULL_PREHASH
- if (is_axfr == 0) {
- struct domain *parent = domain;
- do {
- if (0 != namedb_add_nsec3_mod_domain(db,
- parent)) {
- return 0;
- }
- parent = parent->parent;
- } while (parent != zone->apex->parent);
- }
-#else
- (void)is_axfr;
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
+ /* process triggers for RR deletions */
+ nsec3_delete_rr_trigger(db, &rrset->rrs[rrnum], zone, udbz);
+#endif
+ /* lower usage (possibly deleting other domains, and thus
+ * invalidating the current RR's domain pointers) */
+ rr_lower_usage(db, &rrset->rrs[rrnum]);
if(rrset->rr_count == 1) {
/* delete entire rrset */
- domain = rrset_delete(db, domain, rrset);
- if (domain && domain != prevdomain && !domain->nextdiff) {
- /* this domain is not yet in the diff chain */
- prevdomain->nextdiff = domain;
- }
+ rrset_delete(db, domain, rrset);
+#ifdef NSEC3
+ /* cleanup nsec3 */
+ nsec3_delete_rrset_trigger(db, domain, zone, type);
+#endif
+ /* see if the domain can be deleted (and inspect parents) */
+ domain_table_deldomain(db, domain);
} else {
/* swap out the bad RR and decrease the count */
rr_type* rrs_orig = rrset->rrs;
@@ -482,17 +680,40 @@ delete_RR(namedb_type* db, const dname_type* dname,
}
region_recycle(db->region, rrs_orig,
sizeof(rr_type) * rrset->rr_count);
+#ifdef NSEC3
+ if(type == TYPE_NSEC3PARAM && zone->nsec3_param) {
+ /* fixup nsec3_param pointer to same RR */
+ assert(zone->nsec3_param >= rrs_orig &&
+ zone->nsec3_param <=
+ rrs_orig+rrset->rr_count);
+ /* last moved to rrnum, others at same index*/
+ if(zone->nsec3_param == &rrs_orig[
+ rrset->rr_count-1])
+ zone->nsec3_param = &rrset->rrs[rrnum];
+ else
+ zone->nsec3_param =
+ (void*)zone->nsec3_param
+ -(void*)rrs_orig +
+ (void*)rrset->rrs;
+ }
+#endif /* NSEC3 */
rrset->rr_count --;
+#ifdef NSEC3
+ /* for type nsec3, the domain may have become a
+ * 'normal' domain with its remaining data now */
+ if(type == TYPE_NSEC3)
+ nsec3_rrsets_changed_add_prehash(db, domain,
+ zone);
+#endif /* NSEC3 */
}
}
return 1;
}
-static int
+int
add_RR(namedb_type* db, const dname_type* dname,
uint16_t type, uint16_t klass, uint32_t ttl,
- buffer_type* packet, size_t rdatalen, zone_type *zone,
- int is_axfr)
+ buffer_type* packet, size_t rdatalen, zone_type *zone, udb_ptr* udbz)
{
domain_type* domain;
rrset_type* rrset;
@@ -500,6 +721,7 @@ add_RR(namedb_type* db, const dname_type* dname,
rr_type *rrs_old;
ssize_t rdata_num;
int rrnum;
+ int rrset_added = 0;
domain = domain_table_find(db->domains, dname);
if(!domain) {
/* create the domain */
@@ -517,6 +739,7 @@ add_RR(namedb_type* db, const dname_type* dname,
rrset->rrs = 0;
rrset->rr_count = 0;
domain_add_rrset(domain, rrset);
+ rrset_added = 1;
}
/* dnames in rdata are normalized, conform RFC 4035,
@@ -529,21 +752,13 @@ add_RR(namedb_type* db, const dname_type* dname,
dname_to_string(dname,0));
return 0;
}
- rrnum = find_rr_num(rrset, type, klass, rdatas, rdata_num);
+ rrnum = find_rr_num(rrset, type, klass, rdatas, rdata_num, 1);
if(rrnum != -1) {
DEBUG(DEBUG_XFRD, 2, (LOG_ERR, "diff: RR <%s, %s> already exists",
dname_to_string(dname,0), rrtype_to_string(type)));
/* ignore already existing RR: lenient accepting of messages */
return 1;
}
- if(domain == zone->apex) {
- /* make sure we don't get multiple soa rrs */
- if (type == TYPE_SOA && rrset->rr_count > 0) {
- log_msg(LOG_ERR, "diff: multiple soa records for %s",
- dname_to_string(dname,0));
- return 0;
- }
- }
/* re-alloc the rrs and add the new */
rrs_old = rrset->rrs;
@@ -567,190 +782,104 @@ add_RR(namedb_type* db, const dname_type* dname,
/* see if it is a SOA */
if(domain == zone->apex) {
- if(type == TYPE_SOA) {
- uint32_t soa_minimum;
- zone->soa_rrset = rrset;
- zone->updated = 1;
- /* BUG #103 tweaked SOA ttl value */
- if(zone->soa_nx_rrset == 0) {
- zone->soa_nx_rrset = region_alloc(db->region,
- sizeof(rrset_type));
- if(!zone->soa_nx_rrset) {
- log_msg(LOG_ERR, "out of memory, %s:%d",
- __FILE__, __LINE__);
- exit(1);
- }
- zone->soa_nx_rrset->rr_count = 1;
- zone->soa_nx_rrset->next = 0;
- zone->soa_nx_rrset->zone = zone;
- zone->soa_nx_rrset->rrs = region_alloc(db->region,
- sizeof(rr_type));
- if(!zone->soa_nx_rrset->rrs) {
- log_msg(LOG_ERR, "out of memory, %s:%d",
- __FILE__, __LINE__);
- exit(1);
- }
- }
- memcpy(zone->soa_nx_rrset->rrs, rrset->rrs, sizeof(rr_type));
- memcpy(&soa_minimum, rdata_atom_data(rrset->rrs->rdatas[6]),
- rdata_atom_size(rrset->rrs->rdatas[6]));
- if (rrset->rrs->ttl > ntohl(soa_minimum)) {
- rrset->zone->soa_nx_rrset->rrs[0].ttl = ntohl(soa_minimum);
- }
- domain->has_SOA = 1;
- }
- if(type == TYPE_NS) {
- zone->ns_rrset = rrset;
- }
- if(type == TYPE_RRSIG) {
- int i;
- for (i = 0; i < rrset->rr_count; ++i) {
- if (rr_rrsig_type_covered(&rrset->rrs[i]) == TYPE_DNSKEY) {
- zone->is_secure = 1;
- break;
- }
- }
+ apex_rrset_checks(db, rrset, domain);
+#ifdef NSEC3
+ if(type == TYPE_NSEC3PARAM && zone->nsec3_param) {
+ /* the pointer just changed, fix it up to point
+ * to the same record */
+ assert(zone->nsec3_param >= rrs_old &&
+ zone->nsec3_param < rrs_old+rrset->rr_count);
+ /* in this order to make sure no overflow/underflow*/
+ zone->nsec3_param = (void*)zone->nsec3_param -
+ (void*)rrs_old + (void*)rrset->rrs;
}
+#endif /* NSEC3 */
}
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- if ((type == TYPE_NSEC3) &&
- (rrset->rr_count == 1)) {
- /* NSEC3 RRset just added */
- if (0 != namedb_add_nsec3_domain(db, domain, zone))
- return 0;
+ /* write the just-normalized RR to the udb */
+ if(!udb_write_rr(db->udb, udbz, &rrset->rrs[rrset->rr_count - 1])) {
+ log_msg(LOG_ERR, "could not add RR to nsd.db, disk-space?");
+ return 0;
}
- if (is_axfr == 0) {
- struct domain *parent = domain;
- do {
- if (0 != namedb_add_nsec3_mod_domain(db, parent))
- return 0;
- parent = parent->parent;
- } while (parent != zone->apex->parent);
+#ifdef NSEC3
+ if(rrset_added) {
+ domain_type* p = domain->parent;
+ nsec3_add_rrset_trigger(db, domain, zone, type);
+ /* go up and process (possibly created) empty nonterminals,
+ * until we hit the apex or root */
+ while(p && p->rrsets == NULL && !p->is_apex) {
+ nsec3_rrsets_changed_add_prehash(db, p, zone);
+ p = p->parent;
+ }
}
-#else
- (void)is_axfr;
-#endif /* !FULL_PREHASH */
+ nsec3_add_rr_trigger(db, &rrset->rrs[rrset->rr_count - 1], zone, udbz);
#endif /* NSEC3 */
-
return 1;
}
static zone_type*
-find_zone(namedb_type* db, const dname_type* zone_name, nsd_options_t* opt,
- size_t child_count)
+find_or_create_zone(namedb_type* db, const dname_type* zone_name,
+ nsd_options_t* opt, const char* zstr, const char* patname)
{
- domain_type *domain;
zone_type* zone;
- zone_options_t* opts;
- domain = domain_table_find(db->domains, zone_name);
- if(!domain) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfr: creating domain %s",
- dname_to_string(zone_name,0)));
- /* create the zone and domain of apex (zone has config options) */
- domain = domain_table_insert(db->domains, zone_name);
- } else {
- /* O(1) if SOA exists */
- zone = domain_find_zone(domain);
- /* if domain was empty (no rrsets, empty zone) search in zonelist */
- /* check apex to make sure we don't find a parent zone */
- if(!zone || zone->apex != domain)
- zone = namedb_find_zone(db, domain);
- if(zone) {
- assert(zone->apex == domain);
- return zone;
+ zone_options_t* zopt;
+ zone = namedb_find_zone(db, zone_name);
+ if(zone) {
+ return zone;
+ }
+ zopt = zone_options_find(opt, zone_name);
+ if(!zopt) {
+ /* if _implicit_ then insert as _part_of_config */
+ if(strncmp(patname, PATTERN_IMPLICIT_MARKER,
+ strlen(PATTERN_IMPLICIT_MARKER)) == 0) {
+ zopt = zone_options_create(opt->region);
+ if(!zopt) return 0;
+ zopt->part_of_config = 1;
+ zopt->name = region_strdup(opt->region, zstr);
+ zopt->pattern = pattern_options_find(opt, patname);
+ if(!zopt->name || !zopt->pattern) return 0;
+ if(!nsd_options_insert_zone(opt, zopt)) {
+ log_msg(LOG_ERR, "bad domain name or duplicate zone '%s' "
+ "pattern %s", zstr, patname);
+ }
+ } else {
+ /* create zone : presumably already added to zonelist
+ * by xfrd, who wrote the AXFR or IXFR to disk, so we only
+ * need to add it to our config.
+ * This process does not need linesize and offset zonelist */
+ zopt = zone_list_zone_insert(opt, zstr, patname, 0, 0);
+ if(!zopt)
+ return 0;
}
}
- /* lookup in config */
- opts = zone_options_find(opt, domain_dname(domain));
- if(!opts) {
- log_msg(LOG_ERR, "xfr: zone %s not in config.",
- dname_to_string(zone_name,0));
- return 0;
- }
- /* create the zone */
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfr: creating zone_type %s",
- dname_to_string(zone_name,0)));
- zone = (zone_type *) region_alloc(db->region, sizeof(zone_type));
- if(!zone) {
- log_msg(LOG_ERR, "out of memory, %s:%d", __FILE__, __LINE__);
- exit(1);
- }
- zone->next = db->zones;
- zone->opts = opts;
- db->zones = zone;
- db->zone_count++;
- zone->apex = domain;
- zone->soa_rrset = 0;
- zone->soa_nx_rrset = 0;
- zone->ns_rrset = 0;
-#ifdef NSEC3
- zone->nsec3_soa_rr = NULL;
- zone->nsec3_last = NULL;
-#endif
- zone->dirty = region_alloc(db->region, sizeof(uint8_t)*child_count);
- if(!zone->dirty) {
- log_msg(LOG_ERR, "out of memory, %s:%d", __FILE__, __LINE__);
- exit(1);
- }
- memset(zone->dirty, 0, sizeof(uint8_t)*child_count);
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- zone->nsec3_domains = NULL;
-
- if (0 != zone_nsec3_domains_create(db, zone)) {
- log_msg(LOG_ERR,
- "xfr: zone NSEC3 domains "
- "memory allocation failure");
- return 0;
- }
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
- zone->number = db->zone_count;
- zone->is_secure = 0;
- zone->updated = 1;
- zone->is_ok = 0;
+ zone = namedb_zone_create(db, zone_name, zopt);
return zone;
}
-static void
+void
delete_zone_rrs(namedb_type* db, zone_type* zone)
{
rrset_type *rrset;
- domain_type *domain = zone->apex;
- domain_type *next = NULL;
- zone->updated = 1;
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- zone_nsec3_domains_destroy(db, zone);
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
+ domain_type *domain = zone->apex, *next;
/* go through entire tree below the zone apex (incl subzones) */
- while(domain && dname_is_subdomain(
- domain_dname(domain), domain_dname(zone->apex)))
+ while(domain && domain_is_subdomain(domain, zone->apex))
{
DEBUG(DEBUG_XFRD,2, (LOG_INFO, "delete zone visit %s",
- dname_to_string(domain_dname(domain),0)));
+ domain_to_string(domain)));
/* delete all rrsets of the zone */
while((rrset = domain_find_any_rrset(domain, zone))) {
- (void)rrset_delete(db, domain, rrset);
+ /* lower usage can delete other domains */
+ rrset_lower_usage(db, rrset);
+ /* rrset del does not delete our domain(yet) */
+ rrset_delete(db, domain, rrset);
}
+ /* the delete upcoming could delete parents, but nothing next
+ * or after the domain so store next ptr */
next = domain_next(domain);
- domain->nextdiff = next;
+ /* see if the domain can be deleted (and inspect parents) */
+ domain_table_deldomain(db, domain);
domain = next;
}
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- if (0 != zone_nsec3_domains_create(db, zone)) {
- log_msg(LOG_ERR,
- "Zone %s: unable to create zone NSEC3 prehash table",
- dname_to_string(domain_dname(zone->apex),
- NULL));
- }
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "axfrdel: recyclebin holds %lu bytes",
(unsigned long) region_get_recycle_size(db->region)));
@@ -760,34 +889,19 @@ delete_zone_rrs(namedb_type* db, zone_type* zone)
#endif
assert(zone->soa_rrset == 0);
- /* keep zone->soa_nx_rrset alloced */
+ /* keep zone->soa_nx_rrset alloced: it is reused */
assert(zone->ns_rrset == 0);
assert(zone->is_secure == 0);
- assert(zone->updated == 1);
-}
-
-/* fix empty terminals */
-static void
-fix_empty_terminals(zone_type* zone_db)
-{
- domain_type* domain = zone_db->apex, *ce = NULL, *next = NULL;
- while (domain) {
- ce = rrset_delete_empty_terminals(domain, ce);
- next = domain->nextdiff;
- domain->nextdiff = NULL;
- domain = next;
- }
}
/* return value 0: syntaxerror,badIXFR, 1:OK, 2:done_and_skip_it */
static int
-apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
- const char* zone, uint32_t serialno, nsd_options_t* opt,
- uint16_t id, uint32_t seq_nr, uint32_t seq_total,
+apply_ixfr(namedb_type* db, FILE *in, const char* zone, uint32_t serialno,
+ nsd_options_t* opt, uint32_t seq_nr, uint32_t seq_total,
int* is_axfr, int* delete_mode, int* rr_count,
- size_t child_count)
+ udb_ptr* udbz, struct zone** zone_res, const char* patname, int* bytes)
{
- uint32_t filelen, msglen, pkttype, timestamp[2];
+ uint32_t msglen, checklen, pkttype;
int qcount, ancount, counter;
buffer_type* packet;
region_type* region;
@@ -795,36 +909,24 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
uint16_t rrlen;
const dname_type *dname_zone, *dname;
zone_type* zone_db;
- domain_type* last_in_list;
- char file_zone_name[3072];
- uint32_t file_serial, file_seq_nr;
- uint16_t file_id;
- off_t mempos;
-
- memmove(&mempos, startpos, sizeof(off_t));
- if(fseeko(in, mempos, SEEK_SET) == -1) {
- log_msg(LOG_INFO, "could not fseeko: %s.", strerror(errno));
- return 0;
- }
- /* read ixfr packet RRs and apply to in memory db */
- if(!diff_read_32(in, &pkttype) || pkttype != DIFF_PART_IXFR) {
+ /* note that errors could not really happen due to format of the
+ * packet since xfrd has checked all dnames and RRs before commit,
+ * this is why the errors are fatal (exit process), it must be
+ * something internal or a bad disk or something. */
+
+ /* read ixfr packet RRs and apply to in memory db */
+ if(!diff_read_32(in, &pkttype) || pkttype != DIFF_PART_XXFR) {
log_msg(LOG_ERR, "could not read type or wrong type");
return 0;
}
- if(!diff_read_32(in, &timestamp[0]) ||
- !diff_read_32(in, &timestamp[1])) {
- log_msg(LOG_ERR, "could not read timestamp");
- return 0;
- }
- if(!diff_read_32(in, &filelen)) {
+ if(!diff_read_32(in, &msglen)) {
log_msg(LOG_ERR, "could not read len");
return 0;
}
- /* read header */
- if(filelen < QHEADERSZ + sizeof(uint32_t)*3 + sizeof(uint16_t)) {
+ if(msglen < QHEADERSZ) {
log_msg(LOG_ERR, "msg too short");
return 0;
}
@@ -834,35 +936,7 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
log_msg(LOG_ERR, "out of memory");
return 0;
}
-
- if(!diff_read_str(in, file_zone_name, sizeof(file_zone_name)) ||
- !diff_read_32(in, &file_serial) ||
- !diff_read_16(in, &file_id) ||
- !diff_read_32(in, &file_seq_nr))
- {
- log_msg(LOG_ERR, "could not part data");
- region_destroy(region);
- return 0;
- }
-
- if(strcmp(file_zone_name, zone) != 0 || serialno != file_serial ||
- id != file_id || seq_nr != file_seq_nr) {
- log_msg(LOG_ERR, "internal error: reading part with changed id");
- region_destroy(region);
- return 0;
- }
- msglen = filelen - sizeof(uint32_t)*3 - sizeof(uint16_t)
- - strlen(file_zone_name);
packet = buffer_create(region, QIOBUFSZ);
- dname_zone = dname_parse(region, zone);
- zone_db = find_zone(db, dname_zone, opt, child_count);
- if(!zone_db) {
- log_msg(LOG_ERR, "no zone exists");
- region_destroy(region);
- /* break out and stop the IXFR, ignore it */
- return 2;
- }
-
if(msglen > QIOBUFSZ) {
log_msg(LOG_ERR, "msg too long");
region_destroy(region);
@@ -876,6 +950,23 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
}
buffer_set_limit(packet, msglen);
+ /* see if check on data fails: checks that we are not reading
+ * random garbage */
+ if(!diff_read_32(in, &checklen) || checklen != msglen) {
+ log_msg(LOG_ERR, "transfer part has incorrect checkvalue");
+ return 0;
+ }
+ *bytes += msglen;
+
+ dname_zone = dname_parse(region, zone);
+ zone_db = find_or_create_zone(db, dname_zone, opt, zone, patname);
+ if(!zone_db) {
+ log_msg(LOG_ERR, "could not create zone %s %s", zone, patname);
+ region_destroy(region);
+ return 0;
+ }
+ *zone_res = zone_db;
+
/* only answer section is really used, question, additional and
authority section RRs are skipped */
qcount = QDCOUNT(packet);
@@ -931,8 +1022,8 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
}
if(buffer_read_u32(packet) != serialno) {
buffer_skip(packet, -4);
- log_msg(LOG_ERR, "SOA serial %d different from commit %d",
- buffer_read_u32(packet), serialno);
+ log_msg(LOG_ERR, "SOA serial %u different from commit %u",
+ (unsigned)buffer_read_u32(packet), (unsigned)serialno);
region_destroy(region);
return 0;
}
@@ -941,13 +1032,11 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
*rr_count = 1;
*is_axfr = 0;
*delete_mode = 0;
-
DEBUG(DEBUG_XFRD,2, (LOG_INFO, "diff: %s start count %d, ax %d, delmode %d",
dname_to_string(dname_zone, 0), *rr_count, *is_axfr, *delete_mode));
}
else counter = 0;
- last_in_list = zone_db->apex;
for(; counter < ancount; ++counter,++(*rr_count))
{
uint16_t type, klass;
@@ -978,7 +1067,15 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
if(*rr_count == 1 && type != TYPE_SOA) {
/* second RR: if not SOA: this is an AXFR; delete all zone contents */
+#ifdef NSEC3
+ nsec3_hash_tree_clear(zone_db);
+#endif
delete_zone_rrs(db, zone_db);
+ udb_zone_clear(db->udb, udbz);
+#ifdef NSEC3
+ nsec3_clear_precompile(db, zone_db);
+ zone_db->nsec3_param = NULL;
+#endif /* NSEC3 */
/* add everything else (incl end SOA) */
*delete_mode = 0;
*is_axfr = 1;
@@ -1000,7 +1097,15 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
thisserial = buffer_read_u32(packet);
if(thisserial == serialno) {
/* AXFR */
+#ifdef NSEC3
+ nsec3_hash_tree_clear(zone_db);
+#endif
delete_zone_rrs(db, zone_db);
+ udb_zone_clear(db->udb, udbz);
+#ifdef NSEC3
+ nsec3_clear_precompile(db, zone_db);
+ zone_db->nsec3_param = NULL;
+#endif /* NSEC3 */
*delete_mode = 0;
*is_axfr = 1;
}
@@ -1040,26 +1145,22 @@ apply_ixfr(namedb_type* db, FILE *in, const off_t* startpos,
&& seq_nr == seq_total-1) {
continue; /* do not delete final SOA RR for IXFR */
}
- if(!delete_RR(db, dname, type, klass, last_in_list, packet,
- rrlen, zone_db, region, *is_axfr)) {
+ if(!delete_RR(db, dname, type, klass, packet,
+ rrlen, zone_db, region, udbz)) {
region_destroy(region);
return 0;
}
- if (!*is_axfr && last_in_list->nextdiff) {
- last_in_list = last_in_list->nextdiff;
- }
}
else
{
/* add this rr */
if(!add_RR(db, dname, type, klass, ttl, packet,
- rrlen, zone_db, *is_axfr)) {
+ rrlen, zone_db, udbz)) {
region_destroy(region);
return 0;
}
}
}
- fix_empty_terminals(zone_db);
region_destroy(region);
return 1;
}
@@ -1089,601 +1190,764 @@ check_for_bad_serial(namedb_type* db, const char* zone_str, uint32_t old_serial)
return 0;
}
-/* for multiple tcp packets use a data structure that has
- * a rbtree (zone_names) with for each zone:
- * has a rbtree by sequence number
- * with inside a serial_number and ID (for checking only)
- * and contains a off_t to the IXFR packet in the file.
- * so when you get a commit for a zone, get zone obj, find sequence,
- * then check if you have all sequence numbers available. Apply all packets.
- */
-struct diff_read_data {
- /* rbtree of struct diff_zone*/
- rbtree_t* zones;
- /* region for allocation */
- region_type* region;
-};
-struct diff_zone {
- /* key is dname of zone */
- rbnode_t node;
- /* rbtree of struct diff_xfrpart */
- rbtree_t* parts;
-};
-struct diff_xfrpart {
- /* key is sequence number */
- rbnode_t node;
- uint32_t seq_nr;
- uint32_t new_serial;
- uint16_t id;
- off_t file_pos;
-};
-
-static struct diff_read_data*
-diff_read_data_create()
-{
- region_type* region = region_create(xalloc, free);
- struct diff_read_data* data = (struct diff_read_data*)
- region_alloc(region, sizeof(struct diff_read_data));
- if(!data) {
- log_msg(LOG_ERR, "out of memory, %s:%d", __FILE__, __LINE__);
- exit(1);
- }
- data->region = region;
- data->zones = rbtree_create(region,
- (int (*)(const void *, const void *)) dname_compare);
- return data;
-}
-
-static struct diff_zone*
-diff_read_find_zone(struct diff_read_data* data, const char* name)
-{
- const dname_type* dname = dname_parse(data->region, name);
- struct diff_zone* zp = (struct diff_zone*)
- rbtree_search(data->zones, dname);
- return zp;
-}
-
-static int intcompf(const void* a, const void* b)
-{
- if(*(uint32_t*)a < *(uint32_t*)b)
- return -1;
- if(*(uint32_t*)a > *(uint32_t*)b)
- return +1;
- return 0;
-}
-
-static struct diff_zone*
-diff_read_insert_zone(struct diff_read_data* data, const char* name)
-{
- const dname_type* dname = dname_parse(data->region, name);
- struct diff_zone* zp = region_alloc(data->region,
- sizeof(struct diff_zone));
- if(!zp) {
- log_msg(LOG_ERR, "out of memory, %s:%d", __FILE__, __LINE__);
- exit(1);
- }
- zp->node = *RBTREE_NULL;
- zp->node.key = dname;
- zp->parts = rbtree_create(data->region, intcompf);
- rbtree_insert(data->zones, (rbnode_t*)zp);
- return zp;
-}
-
-static struct diff_xfrpart*
-diff_read_find_part(struct diff_zone* zp, uint32_t seq_nr)
-{
- struct diff_xfrpart* xp = (struct diff_xfrpart*)
- rbtree_search(zp->parts, &seq_nr);
- return xp;
-}
-
-static struct diff_xfrpart*
-diff_read_insert_part(struct diff_read_data* data,
- struct diff_zone* zp, uint32_t seq_nr)
-{
- struct diff_xfrpart* xp = region_alloc(data->region,
- sizeof(struct diff_xfrpart));
- if(!xp) {
- log_msg(LOG_ERR, "out of memory, %s:%d", __FILE__, __LINE__);
- exit(1);
- }
- xp->node = *RBTREE_NULL;
- xp->node.key = &xp->seq_nr;
- xp->seq_nr = seq_nr;
- rbtree_insert(zp->parts, (rbnode_t*)xp);
- return xp;
-}
-
-/* mark commit as rollback and close inputfile, fatal exits */
-static void
-mark_and_exit(nsd_options_t* opt, FILE* f, off_t commitpos, const char* desc)
-{
- const char* filename = opt->difffile;
- fclose(f);
- if(!(f = fopen(filename, "r+"))) {
- log_msg(LOG_ERR, "mark xfr, failed to re-open difffile %s: %s",
- filename, strerror(errno));
- } else if(fseeko(f, commitpos, SEEK_SET) == -1) {
- log_msg(LOG_INFO, "could not fseeko: %s.", strerror(errno));
- fclose(f);
- } else {
- uint8_t c = 0;
- (void)write_data(f, &c, sizeof(c));
- fclose(f);
- log_msg(LOG_ERR, "marked xfr as failed: %s", desc);
- log_msg(LOG_ERR, "marked xfr so that next reload can succeed");
- }
- exit(1);
-}
-
static int
-read_sure_part(namedb_type* db, FILE *in, nsd_options_t* opt,
- struct diff_read_data* data, struct diff_log** log,
- size_t child_count)
+apply_ixfr_for_zone(nsd_type* nsd, zone_type* zonedb, FILE* in,
+ nsd_options_t* opt, udb_base* taskudb, udb_ptr* last_task,
+ uint32_t xfrfilenr)
{
char zone_buf[3072];
char log_buf[5120];
- uint32_t old_serial, new_serial, num_parts;
- uint16_t id;
+ char patname_buf[2048];
+
+ uint32_t old_serial, new_serial, num_parts, type;
+ uint64_t time_end_0, time_start_0;
+ uint32_t time_end_1, time_start_1;
uint8_t committed;
- struct diff_zone *zp;
uint32_t i;
- int have_all_parts = 1;
- struct diff_log* thislog = 0;
- off_t commitpos;
+ int num_bytes = 0;
/* read zone name and serial */
- if(!diff_read_str(in, zone_buf, sizeof(zone_buf)) ||
- !diff_read_32(in, &old_serial) ||
- !diff_read_32(in, &new_serial) ||
- !diff_read_16(in, &id) ||
- !diff_read_32(in, &num_parts)) {
- log_msg(LOG_ERR, "diff file bad commit part");
+ if(!diff_read_32(in, &type)) {
+ log_msg(LOG_ERR, "diff file too short");
return 0;
}
- commitpos = ftello(in); /* position of commit byte */
- if(commitpos == -1) {
- log_msg(LOG_INFO, "could not ftello: %s.", strerror(errno));
+ if(type != DIFF_PART_XFRF) {
+ log_msg(LOG_ERR, "xfr file has wrong format");
return 0;
+
}
+ /* committed and num_parts are first because they need to be
+ * updated once the rest is written. The log buf is not certain
+ * until its done, so at end of file. The patname is in case a
+ * new zone is created, we know what the options-pattern is */
if(!diff_read_8(in, &committed) ||
- !diff_read_str(in, log_buf, sizeof(log_buf)) )
- {
+ !diff_read_32(in, &num_parts) ||
+ !diff_read_64(in, &time_end_0) ||
+ !diff_read_32(in, &time_end_1) ||
+ !diff_read_32(in, &old_serial) ||
+ !diff_read_32(in, &new_serial) ||
+ !diff_read_64(in, &time_start_0) ||
+ !diff_read_32(in, &time_start_1) ||
+ !diff_read_str(in, zone_buf, sizeof(zone_buf)) ||
+ !diff_read_str(in, patname_buf, sizeof(patname_buf))) {
log_msg(LOG_ERR, "diff file bad commit part");
return 0;
}
- if(log) {
- thislog = (struct diff_log*)region_alloc(db->region, sizeof(struct diff_log));
- if(!thislog) {
- log_msg(LOG_ERR, "out of memory, %s:%d", __FILE__, __LINE__);
- exit(1);
- }
- thislog->zone_name = region_strdup(db->region, zone_buf);
- thislog->comment = region_strdup(db->region, log_buf);
- thislog->error = 0;
- thislog->next = *log;
- *log = thislog;
- }
-
/* has been read in completely */
- zp = diff_read_find_zone(data, zone_buf);
- if(!zp) {
- log_msg(LOG_ERR, "diff file commit without IXFR");
- if(thislog)
- thislog->error = "error no IXFR parts";
- return 1;
+ if(strcmp(zone_buf, dname_to_string(zonedb->apex->dname,0)) != 0) {
+ log_msg(LOG_ERR, "file %s does not match task %s",
+ zone_buf, dname_to_string(zonedb->apex->dname,0));
+ return 0;
}
- if(committed && check_for_bad_serial(db, zone_buf, old_serial)) {
- DEBUG(DEBUG_XFRD,1, (LOG_ERR,
- "skipping diff file commit with bad serial"));
- zp->parts->root = RBTREE_NULL;
- zp->parts->count = 0;
- if(thislog)
- thislog->error = "error bad serial";
- return 1;
+ if(!committed) {
+ log_msg(LOG_ERR, "diff file %s was not committed", zone_buf);
+ return 0;
}
- for(i=0; i<num_parts; i++) {
- struct diff_xfrpart *xp = diff_read_find_part(zp, i);
- if(!xp || xp->id != id || xp->new_serial != new_serial) {
- have_all_parts = 0;
- }
+ if(num_parts == 0) {
+ log_msg(LOG_ERR, "diff file %s was not completed", zone_buf);
+ return 0;
}
- if(!have_all_parts) {
+ if(check_for_bad_serial(nsd->db, zone_buf, old_serial)) {
DEBUG(DEBUG_XFRD,1, (LOG_ERR,
- "skipping diff file commit without all parts"));
- if(thislog)
- thislog->error = "error missing parts";
+ "skipping diff file commit with bad serial"));
+ return 1;
}
- if(committed && have_all_parts)
+ if(committed)
{
int is_axfr=0, delete_mode=0, rr_count=0;
- off_t resume_pos;
+ const dname_type* apex = zonedb->apex->dname;
+ udb_ptr z;
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- struct region *region;
- dname_type const *zone_dname;
- struct zone *zone;
-
- region = region_create(xalloc, free);
- if (region == NULL) {
- log_msg(LOG_ERR, "out of memory");
- return 0;
- }
- zone_dname = dname_parse(region, zone_buf);
- if (zone_dname == NULL) {
- log_msg(LOG_ERR, "out of memory");
- region_destroy(region);
- return 0;
- }
- zone = find_zone(db, zone_dname, opt, child_count);
- region_destroy(region);
- if (zone == NULL) {
- log_msg(LOG_ERR, "no zone exists");
- /* just stop trying applying ixfr */
- return 1;
- }
- if (0 != namedb_nsec3_mod_domains_create(db)) {
- log_msg(LOG_ERR,
- "unable to allocate space "
- "for modified NSEC3 domains");
- return 0;
+ DEBUG(DEBUG_XFRD,1, (LOG_INFO, "processing xfr: %s", zone_buf));
+ if(udb_base_get_userflags(nsd->db->udb) != 0) {
+ log_msg(LOG_ERR, "database corrupted, cannot update");
+ xfrd_unlink_xfrfile(nsd, xfrfilenr);
+ exit(1);
}
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "processing xfr: %s", log_buf));
-
- resume_pos = ftello(in);
- if(resume_pos == -1) {
- log_msg(LOG_INFO, "could not ftello: %s.", strerror(errno));
- return 0;
+ /* all parts were checked by xfrd before commit */
+ if(!udb_zone_search(nsd->db->udb, &z, dname_name(apex),
+ apex->name_size)) {
+ /* create it */
+ if(!udb_zone_create(nsd->db->udb, &z, dname_name(apex),
+ apex->name_size)) {
+ /* out of disk space perhaps */
+ log_msg(LOG_ERR, "could not udb_create_zone "
+ "%s, disk space full?", log_buf);
+ return 0;
+ }
}
+ /* set the udb dirty until we are finished applying changes */
+ udb_base_set_userflags(nsd->db->udb, 1);
+ /* read and apply all of the parts */
for(i=0; i<num_parts; i++) {
- struct diff_xfrpart *xp = diff_read_find_part(zp, i);
int ret;
DEBUG(DEBUG_XFRD,2, (LOG_INFO, "processing xfr: apply part %d", (int)i));
- ret = apply_ixfr(db, in, &xp->file_pos, zone_buf, new_serial, opt,
- id, xp->seq_nr, num_parts, &is_axfr, &delete_mode,
- &rr_count, child_count);
+ ret = apply_ixfr(nsd->db, in, zone_buf, new_serial, opt,
+ i, num_parts, &is_axfr, &delete_mode,
+ &rr_count, &z, &zonedb, patname_buf, &num_bytes);
if(ret == 0) {
- log_msg(LOG_ERR, "bad ixfr packet part %d in %s", (int)i,
- opt->difffile);
- mark_and_exit(opt, in, commitpos, log_buf);
+ log_msg(LOG_ERR, "bad ixfr packet part %d in diff file for %s", (int)i, zone_buf);
+ xfrd_unlink_xfrfile(nsd, xfrfilenr);
+ /* the udb is still dirty, it is bad */
+ exit(1);
} else if(ret == 2) {
break;
}
}
+ udb_base_set_userflags(nsd->db->udb, 0);
+ /* read the final log_str: but do not fail on it */
+ if(!diff_read_str(in, log_buf, sizeof(log_buf))) {
+ log_msg(LOG_ERR, "could not read log for transfer %s",
+ zone_buf);
+ snprintf(log_buf, sizeof(log_buf), "error reading log");
+ }
#ifdef NSEC3
-#ifndef FULL_PREHASH
- if (is_axfr != 0)
- prehash_zone(db, zone);
- else
- prehash_zone_incremental(db, zone);
-#endif /* !FULL_PREHASH */
+ if(zonedb) prehash_zone(nsd->db, zonedb);
#endif /* NSEC3 */
-
- if(fseeko(in, resume_pos, SEEK_SET) == -1) {
- log_msg(LOG_INFO, "could not fseeko: %s.", strerror(errno));
- return 0;
+ zonedb->is_changed = 1;
+ ZONE(&z)->is_changed = 1;
+ ZONE(&z)->mtime = time_end_0;
+ udb_zone_set_log_str(nsd->db->udb, &z, log_buf);
+ udb_ptr_unlink(&z, nsd->db->udb);
+ if(taskudb) task_new_soainfo(taskudb, last_task, zonedb);
+
+ if(1 <= verbosity) {
+ double elapsed = (double)(time_end_0 - time_start_0)+
+ (double)((double)time_end_1
+ -(double)time_start_1) / 1000000.0;
+ VERBOSITY(2, (LOG_INFO, "zone %s %s of %d bytes in %g seconds",
+ zone_buf, log_buf, num_bytes, elapsed));
}
}
else {
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "skipping xfr: %s", log_buf));
}
-
- /* clean out the parts for the zone after the commit/rollback */
- zp->parts->root = RBTREE_NULL;
- zp->parts->count = 0;
return 1;
}
+struct udb_base* task_file_create(const char* file)
+{
+ return udb_base_create_new(file, &namedb_walkfunc, NULL);
+}
+
static int
-store_ixfr_data(FILE *in, uint32_t len, struct diff_read_data* data, off_t* startpos)
+task_create_new_elem(struct udb_base* udb, udb_ptr* last, udb_ptr* e,
+ size_t sz, const dname_type* zname)
{
- char zone_name[3072];
- struct diff_zone* zp;
- struct diff_xfrpart* xp;
- uint32_t new_serial, seq;
- uint16_t id;
- if(!diff_read_str(in, zone_name, sizeof(zone_name)) ||
- !diff_read_32(in, &new_serial) ||
- !diff_read_16(in, &id) ||
- !diff_read_32(in, &seq)) {
- log_msg(LOG_INFO, "could not read ixfr store info: file format error");
+ if(!udb_ptr_alloc_space(e, udb, udb_chunk_type_task, sz)) {
return 0;
}
- len -= sizeof(uint32_t)*3 + sizeof(uint16_t) + strlen(zone_name);
- if(fseeko(in, len, SEEK_CUR) == -1)
- log_msg(LOG_INFO, "fseek failed: %s", strerror(errno));
- /* store the info */
- zp = diff_read_find_zone(data, zone_name);
- if(!zp)
- zp = diff_read_insert_zone(data, zone_name);
- xp = diff_read_find_part(zp, seq);
- if(xp) {
- log_msg(LOG_INFO, "discarding partial xfr part: %s %d", zone_name, seq);
- /* overwrite with newer value (which probably relates to next commit) */
+ if(udb_ptr_is_null(last)) {
+ udb_base_set_userdata(udb, e->data);
+ } else {
+ udb_rptr_set_ptr(&TASKLIST(last)->next, udb, e);
}
- else {
- xp = diff_read_insert_part(data, zp, seq);
+ udb_ptr_set_ptr(last, udb, e);
+
+ /* fill in tasklist item */
+ udb_rel_ptr_init(&TASKLIST(e)->next);
+ TASKLIST(e)->size = sz;
+ TASKLIST(e)->oldserial = 0;
+ TASKLIST(e)->newserial = 0;
+ TASKLIST(e)->yesno = 0;
+
+ if(zname) {
+ memmove(TASKLIST(e)->zname, zname, dname_total_size(zname));
}
- xp->new_serial = new_serial;
- xp->id = id;
- memmove(&xp->file_pos, startpos, sizeof(off_t));
return 1;
}
-static int
-read_process_part(namedb_type* db, FILE *in, uint32_t type,
- nsd_options_t* opt, struct diff_read_data* data,
- struct diff_log** log, size_t child_count, off_t* startpos)
+void task_new_soainfo(struct udb_base* udb, udb_ptr* last, struct zone* z)
{
- uint32_t len, len2;
+ /* calculate size */
+ udb_ptr e;
+ size_t sz;
+ const dname_type* apex, *ns, *em;
+ if(!z || !z->apex || !domain_dname(z->apex))
+ return; /* safety check */
+
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "nsd: add soa info for zone %s",
+ domain_to_string(z->apex)));
+ apex = domain_dname(z->apex);
+ sz = sizeof(struct task_list_d) + dname_total_size(apex);
+ if(z->soa_rrset) {
+ ns = domain_dname(rdata_atom_domain(
+ z->soa_rrset->rrs[0].rdatas[0]));
+ em = domain_dname(rdata_atom_domain(
+ z->soa_rrset->rrs[0].rdatas[1]));
+ sz += sizeof(uint32_t)*6 + sizeof(uint8_t)*2
+ + ns->name_size + em->name_size;
+ } else {
+ ns = 0;
+ em = 0;
+ }
- /* read length */
- if(!diff_read_32(in, &len))
- return 1;
- /* read content */
- if(type == DIFF_PART_IXFR) {
- DEBUG(DEBUG_XFRD,2, (LOG_INFO, "part IXFR len %d", len));
- if(!store_ixfr_data(in, len, data, startpos))
- return 0;
+ /* create new task_list item */
+ if(!task_create_new_elem(udb, last, &e, sz, apex)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add SOAINFO");
+ return;
}
- else if(type == DIFF_PART_SURE) {
- DEBUG(DEBUG_XFRD,2, (LOG_INFO, "part SURE len %d", len));
- if(!read_sure_part(db, in, opt, data, log, child_count))
- return 0;
- } else {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "unknown part %x len %d", type, len));
- return 0;
+ TASKLIST(&e)->task_type = task_soa_info;
+
+ if(z->soa_rrset) {
+ uint32_t ttl = htonl(z->soa_rrset->rrs[0].ttl);
+ uint8_t* p = (uint8_t*)TASKLIST(&e)->zname;
+ p += dname_total_size(apex);
+ memmove(p, &ttl, sizeof(uint32_t));
+ p += sizeof(uint32_t);
+ memmove(p, &ns->name_size, sizeof(uint8_t));
+ p += sizeof(uint8_t);
+ memmove(p, dname_name(ns), ns->name_size);
+ p += ns->name_size;
+ memmove(p, &em->name_size, sizeof(uint8_t));
+ p += sizeof(uint8_t);
+ memmove(p, dname_name(em), em->name_size);
+ p += em->name_size;
+ memmove(p, rdata_atom_data(z->soa_rrset->rrs[0].rdatas[2]),
+ sizeof(uint32_t));
+ p += sizeof(uint32_t);
+ memmove(p, rdata_atom_data(z->soa_rrset->rrs[0].rdatas[3]),
+ sizeof(uint32_t));
+ p += sizeof(uint32_t);
+ memmove(p, rdata_atom_data(z->soa_rrset->rrs[0].rdatas[4]),
+ sizeof(uint32_t));
+ p += sizeof(uint32_t);
+ memmove(p, rdata_atom_data(z->soa_rrset->rrs[0].rdatas[5]),
+ sizeof(uint32_t));
+ p += sizeof(uint32_t);
+ memmove(p, rdata_atom_data(z->soa_rrset->rrs[0].rdatas[6]),
+ sizeof(uint32_t));
}
- /* read length */
- if(!diff_read_32(in, &len2))
- return 1; /* short read is OK */
- /* verify length */
- if(len != len2)
- return 0; /* bad data is wrong */
- return 1;
+ udb_ptr_unlink(&e, udb);
}
-/*
- * Finds smallest offset in data structs
- * returns 0 if no offsets in the data structs.
- */
-static int
-find_smallest_offset(struct diff_read_data* data, off_t* offset)
+void task_process_sync(struct udb_base* taskudb)
{
- int found_any = 0;
- struct diff_zone* dz;
- struct diff_xfrpart* dx;
- off_t mem_offset, mem_fpos;
+ /* need to sync before other process uses the mmap? */
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "task procsync %s size %d",
+ taskudb->fname, (int)taskudb->base_size));
+ (void)taskudb;
+}
- if(!data || !data->zones)
- return 0;
- RBTREE_FOR(dz, struct diff_zone*, data->zones)
- {
- if(!dz->parts)
- continue;
- RBTREE_FOR(dx, struct diff_xfrpart*, dz->parts)
- {
- memmove(&mem_fpos, &dx->file_pos, sizeof(off_t));
+void task_remap(struct udb_base* taskudb)
+{
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "task remap %s size %d",
+ taskudb->fname, (int)taskudb->glob_data->fsize));
+ udb_base_remap_process(taskudb);
+}
- if(found_any) {
- memmove(&mem_offset, offset, sizeof(off_t));
+void task_clear(struct udb_base* taskudb)
+{
+ udb_ptr t, n;
+ udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb));
+ udb_base_set_userdata(taskudb, 0);
+ udb_ptr_init(&n, taskudb);
+ while(!udb_ptr_is_null(&t)) {
+ udb_ptr_set_rptr(&n, taskudb, &TASKLIST(&t)->next);
+ udb_rptr_zero(&TASKLIST(&t)->next, taskudb);
+ udb_ptr_free_space(&t, taskudb, TASKLIST(&t)->size);
+ udb_ptr_set_ptr(&t, taskudb, &n);
+ }
+ udb_ptr_unlink(&t, taskudb);
+ udb_ptr_unlink(&n, taskudb);
+}
- if(mem_fpos < mem_offset)
- memmove(offset, &mem_fpos, sizeof(off_t));
- } else {
- found_any = 1;
- memmove(offset, &mem_fpos, sizeof(off_t));
- }
- }
+void task_new_expire(struct udb_base* udb, udb_ptr* last,
+ const struct dname* z, int expired)
+{
+ udb_ptr e;
+ if(!z) return;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add expire info for zone %s",
+ dname_to_string(z,NULL)));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)+
+ dname_total_size(z), z)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add expire");
+ return;
}
+ TASKLIST(&e)->task_type = task_expire;
+ TASKLIST(&e)->yesno = expired;
+ udb_ptr_unlink(&e, udb);
+}
- return found_any;
+void task_new_check_zonefiles(udb_base* udb, udb_ptr* last,
+ const dname_type* zone)
+{
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task checkzonefiles"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d) +
+ (zone?dname_total_size(zone):0), zone)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add check_zones");
+ return;
+ }
+ TASKLIST(&e)->task_type = task_check_zonefiles;
+ TASKLIST(&e)->yesno = (zone!=NULL);
+ udb_ptr_unlink(&e, udb);
}
-int
-diff_read_file(namedb_type* db, nsd_options_t* opt, struct diff_log** log,
+void task_new_write_zonefiles(udb_base* udb, udb_ptr* last,
+ const dname_type* zone)
+{
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task writezonefiles"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d) +
+ (zone?dname_total_size(zone):0), zone)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add writezones");
+ return;
+ }
+ TASKLIST(&e)->task_type = task_write_zonefiles;
+ TASKLIST(&e)->yesno = (zone!=NULL);
+ udb_ptr_unlink(&e, udb);
+}
+
+void task_new_set_verbosity(udb_base* udb, udb_ptr* last, int v)
+{
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task set_verbosity"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d),
+ NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add set_v");
+ return;
+ }
+ TASKLIST(&e)->task_type = task_set_verbosity;
+ TASKLIST(&e)->yesno = v;
+ udb_ptr_unlink(&e, udb);
+}
+
+#ifdef BIND8_STATS
+void* task_new_stat_info(udb_base* udb, udb_ptr* last, struct nsdst* stat,
size_t child_count)
{
- const char* filename = opt->difffile;
- FILE *df;
- uint32_t type, timestamp[2], curr_timestamp[2];
- struct diff_read_data* data = diff_read_data_create();
- off_t startpos;
+ void* p;
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task stat_info"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)+
+ sizeof(*stat) + sizeof(stc_t)*child_count, NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add stati");
+ return NULL;
+ }
+ TASKLIST(&e)->task_type = task_stat_info;
+ p = TASKLIST(&e)->zname;
+ memcpy(p, stat, sizeof(*stat));
+ udb_ptr_unlink(&e, udb);
+ return p + sizeof(*stat);
+}
+#endif /* BIND8_STATS */
- df = fopen(filename, "r");
- if(!df) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "could not open file %s for reading: %s",
- filename, strerror(errno)));
- region_destroy(data->region);
- return 1;
+void
+task_new_add_zone(udb_base* udb, udb_ptr* last, const char* zone,
+ const char* pattern)
+{
+ size_t zlen = strlen(zone);
+ size_t plen = strlen(pattern);
+ void *p;
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task addzone %s %s", zone, pattern));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)+
+ zlen + 1 + plen + 1, NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add addz");
+ return;
}
+ TASKLIST(&e)->task_type = task_add_zone;
+ p = TASKLIST(&e)->zname;
+ memcpy(p, zone, zlen+1);
+ memmove(p+zlen+1, pattern, plen+1);
+ udb_ptr_unlink(&e, udb);
+}
- /* check timestamp */
- curr_timestamp[0] = (uint32_t) db->diff_timestamp.tv_sec;
- curr_timestamp[1] = (uint32_t) db->diff_timestamp.tv_usec;
+void
+task_new_del_zone(udb_base* udb, udb_ptr* last, const dname_type* dname)
+{
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task delzone %s", dname_to_string(dname, 0)));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)
+ +dname_total_size(dname), dname)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add delz");
+ return;
+ }
+ TASKLIST(&e)->task_type = task_del_zone;
+ udb_ptr_unlink(&e, udb);
+}
- if(!diff_read_32(df, &type)) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "difffile %s is empty",
- filename));
- db->diff_skip = 0;
- db->diff_pos = 0;
+void task_new_add_key(udb_base* udb, udb_ptr* last, key_options_t* key)
+{
+ char* p;
+ udb_ptr e;
+ assert(key->name && key->algorithm && key->secret);
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task addkey"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)
+ +strlen(key->name)+1+strlen(key->algorithm)+1+
+ strlen(key->secret)+1, NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add addk");
+ return;
}
- else if (!diff_read_32(df, &timestamp[0]) ||
- !diff_read_32(df, &timestamp[1])) {
- log_msg(LOG_ERR, "difffile %s bad first part: no timestamp",
- filename);
- region_destroy(data->region);
- fclose(df);
- return 0;
+ TASKLIST(&e)->task_type = task_add_key;
+ p = (char*)TASKLIST(&e)->zname;
+ memmove(p, key->name, strlen(key->name)+1);
+ p+=strlen(key->name)+1;
+ memmove(p, key->algorithm, strlen(key->algorithm)+1);
+ p+=strlen(key->algorithm)+1;
+ memmove(p, key->secret, strlen(key->secret)+1);
+ udb_ptr_unlink(&e, udb);
+}
+
+void task_new_del_key(udb_base* udb, udb_ptr* last, const char* name)
+{
+ char* p;
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task delkey"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)
+ +strlen(name)+1, NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add delk");
+ return;
}
- else if (curr_timestamp[0] != timestamp[0] ||
- curr_timestamp[1] != timestamp[1]) {
- /* new timestamp, no skipping */
- db->diff_timestamp.tv_sec = (time_t) timestamp[0];
- db->diff_timestamp.tv_usec = (suseconds_t) timestamp[1];
-
- if (db->diff_skip) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "new timestamp on "
- "difffile %s, restoring diff_skip and diff_pos "
- "[old timestamp: %u.%u; new timestamp: %u.%u]",
- filename, curr_timestamp[0], curr_timestamp[1],
- timestamp[0], timestamp[1]));
- db->diff_skip = 0;
- db->diff_pos = 0;
- }
+ TASKLIST(&e)->task_type = task_del_key;
+ p = (char*)TASKLIST(&e)->zname;
+ memmove(p, name, strlen(name)+1);
+ udb_ptr_unlink(&e, udb);
+}
+
+void task_new_add_pattern(udb_base* udb, udb_ptr* last, pattern_options_t* p)
+{
+ region_type* temp;
+ buffer_type* buffer;
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task addpattern %s", p->pname));
+ temp = region_create(xalloc, free);
+ buffer = buffer_create(temp, 4096);
+ pattern_options_marshal(buffer, p);
+ buffer_flip(buffer);
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)
+ + buffer_limit(buffer), NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add addp");
+ region_destroy(temp);
+ return;
}
+ TASKLIST(&e)->task_type = task_add_pattern;
+ TASKLIST(&e)->yesno = buffer_limit(buffer);
+ memmove(TASKLIST(&e)->zname, buffer_begin(buffer),
+ buffer_limit(buffer));
+ udb_ptr_unlink(&e, udb);
+ region_destroy(temp);
+}
- /* Always seek, to diff_pos or to beginning of the file. */
- if (fseeko(df, 0, SEEK_SET)==-1) {
- log_msg(LOG_INFO, "could not fseeko file %s: %s.", filename,
- strerror(errno));
- region_destroy(data->region);
- fclose(df);
- return 0;
+void task_new_del_pattern(udb_base* udb, udb_ptr* last, const char* name)
+{
+ char* p;
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task delpattern %s", name));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)
+ +strlen(name)+1, NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add delp");
+ return;
}
- if(db->diff_skip) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "skip diff file"));
- if(fseeko(df, db->diff_pos, SEEK_SET)==-1) {
- log_msg(LOG_INFO, "could not fseeko file %s: %s. "
- "Reread from start.", filename,
- strerror(errno));
- }
+ TASKLIST(&e)->task_type = task_del_pattern;
+ p = (char*)TASKLIST(&e)->zname;
+ memmove(p, name, strlen(name)+1);
+ udb_ptr_unlink(&e, udb);
+}
+
+void task_new_opt_change(udb_base* udb, udb_ptr* last, nsd_options_t* opt)
+{
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task opt_change"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d),
+ NULL)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add o_c");
+ return;
}
+ TASKLIST(&e)->task_type = task_opt_change;
+#ifdef RATELIMIT
+ TASKLIST(&e)->oldserial = opt->rrl_ratelimit;
+ TASKLIST(&e)->newserial = opt->rrl_whitelist_ratelimit;
+ TASKLIST(&e)->yesno = (uint64_t) opt->rrl_slip;
+#else
+ (void)opt;
+#endif
+ udb_ptr_unlink(&e, udb);
+}
- startpos = ftello(df);
- if(startpos == -1) {
- log_msg(LOG_INFO, "could not ftello: %s.", strerror(errno));
- region_destroy(data->region);
- fclose(df);
+int
+task_new_apply_xfr(udb_base* udb, udb_ptr* last, const dname_type* dname,
+ uint32_t old_serial, uint32_t new_serial, uint64_t filenumber)
+{
+ udb_ptr e;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "add task apply_xfr"));
+ if(!task_create_new_elem(udb, last, &e, sizeof(struct task_list_d)
+ +dname_total_size(dname), dname)) {
+ log_msg(LOG_ERR, "tasklist: out of space, cannot add applyxfr");
return 0;
}
+ TASKLIST(&e)->oldserial = old_serial;
+ TASKLIST(&e)->newserial = new_serial;
+ TASKLIST(&e)->yesno = filenumber;
+ TASKLIST(&e)->task_type = task_apply_xfr;
+ udb_ptr_unlink(&e, udb);
+ return 1;
+}
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "start of diff file read at pos %u",
- (uint32_t) db->diff_pos));
- while(diff_read_32(df, &type))
- {
- DEBUG(DEBUG_XFRD,2, (LOG_INFO, "iter loop"));
-
- /* read timestamp */
- if(!diff_read_32(df, &timestamp[0]) ||
- !diff_read_32(df, &timestamp[1])) {
- log_msg(LOG_INFO, "could not read timestamp: %s.",
- strerror(errno));
- region_destroy(data->region);
- fclose(df);
- return 0;
- }
+void
+task_process_expire(namedb_type* db, struct task_list_d* task)
+{
+ uint8_t ok;
+ zone_type* z = namedb_find_zone(db, task->zname);
+ assert(task->task_type == task_expire);
+ if(!z) {
+ DEBUG(DEBUG_IPC, 1, (LOG_WARNING, "zone %s %s but not in zonetree",
+ dname_to_string(task->zname, NULL),
+ task->yesno?"expired":"unexpired"));
+ return;
+ }
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: expire task zone %s %s",
+ dname_to_string(task->zname,0),
+ task->yesno?"expired":"unexpired"));
+ /* find zone, set expire flag */
+ ok = !task->yesno;
+ /* only update zone->is_ok if needed to minimize copy-on-write
+ * of memory pages shared after fork() */
+ if(ok && !z->is_ok)
+ z->is_ok = 1;
+ else if(!ok && z->is_ok)
+ z->is_ok = 0;
+}
- if(!read_process_part(db, df, type, opt, data, log,
- child_count, &startpos))
- {
- log_msg(LOG_INFO, "error processing diff file");
- region_destroy(data->region);
- fclose(df);
- return 0;
- }
- startpos = ftello(df);
- if(startpos == -1) {
- log_msg(LOG_INFO, "could not ftello: %s.", strerror(errno));
- region_destroy(data->region);
- fclose(df);
- return 0;
- }
+static void
+task_process_set_verbosity(struct task_list_d* task)
+{
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "verbosity task %d", (int)task->yesno));
+ verbosity = task->yesno;
+}
+
+static void
+task_process_checkzones(struct nsd* nsd, udb_base* udb, udb_ptr* last_task,
+ struct task_list_d* task)
+{
+ /* on SIGHUP check if zone-text-files changed and if so,
+ * reread. When from xfrd-reload, no need to fstat the files */
+ if(task->yesno) {
+ zone_options_t* zo = zone_options_find(nsd->options,
+ task->zname);
+ if(zo)
+ namedb_check_zonefile(nsd->db, udb, last_task, zo);
+ } else {
+ /* check all zones */
+ namedb_check_zonefiles(nsd->db, nsd->options, udb, last_task);
}
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "end of diff file read"));
+}
- if(find_smallest_offset(data, &db->diff_pos)) {
- /* can skip to the first unused element */
- DEBUG(DEBUG_XFRD,2, (LOG_INFO, "next time skip diff file"));
- db->diff_skip = 1;
+static void
+task_process_writezones(struct nsd* nsd, struct task_list_d* task)
+{
+ if(task->yesno) {
+ zone_options_t* zo = zone_options_find(nsd->options,
+ task->zname);
+ if(zo)
+ namedb_write_zonefile(nsd->db, zo);
} else {
- /* all processed, can skip to here next time */
- DEBUG(DEBUG_XFRD,2, (LOG_INFO, "next time skip diff file"));
- db->diff_skip = 1;
- db->diff_pos = ftello(df);
- if(db->diff_pos == -1) {
- log_msg(LOG_INFO, "could not ftello: %s.",
- strerror(errno));
- db->diff_skip = 0;
- }
+ namedb_write_zonefiles(nsd->db, nsd->options);
}
+}
- region_destroy(data->region);
- fclose(df);
- return 1;
+static void
+task_process_add_zone(struct nsd* nsd, udb_base* udb, udb_ptr* last_task,
+ struct task_list_d* task)
+{
+ zone_type* z;
+ const dname_type* zdname;
+ const char* zname = (const char*)task->zname;
+ const char* pname = zname + strlen(zname)+1;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "addzone task %s %s", zname, pname));
+ zdname = dname_parse(nsd->db->region, zname);
+ if(!zdname) {
+ log_msg(LOG_ERR, "can not parse zone name %s", zname);
+ return;
+ }
+ /* create zone */
+ z = find_or_create_zone(nsd->db, zdname, nsd->options, zname, pname);
+ if(!z) {
+ region_recycle(nsd->db->region, (void*)zdname,
+ dname_total_size(zdname));
+ log_msg(LOG_ERR, "can not add zone %s %s", zname, pname);
+ return;
+ }
+ /* if zone is empty, attempt to read the zonefile from disk (if any) */
+ if(!z->soa_rrset && z->opts->pattern->zonefile) {
+ namedb_read_zonefile(nsd->db, z, udb, last_task);
+ }
}
-static int diff_broken(FILE *df, off_t* break_pos)
+static void
+task_process_del_zone(struct nsd* nsd, struct task_list_d* task)
{
- uint32_t type, len, len2;
- *break_pos = ftello(df);
+ udb_ptr udbz;
+ zone_type* zone;
+ zone_options_t* zopt;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "delzone task %s", dname_to_string(
+ task->zname, NULL)));
+ zone = namedb_find_zone(nsd->db, task->zname);
+ if(!zone)
+ return;
- /* try to read and validate parts of the file */
- while(diff_read_32(df, &type)) /* cannot read type is no error, normal EOF */
- {
- /* check type */
- if(type != DIFF_PART_IXFR && type != DIFF_PART_SURE)
- return 1;
- /* check length */
- if(!diff_read_32(df, &len))
- return 1; /* EOF inside the part is error */
- if(fseeko(df, len, SEEK_CUR) == -1)
- {
- log_msg(LOG_INFO, "fseeko failed: %s", strerror(errno));
- return 1;
- }
- /* fseek clears EOF flag, but try reading length value,
- if EOF, the part is truncated */
- if(!diff_read_32(df, &len2))
- return 1;
- if(len != len2)
- return 1; /* bad part, lengths must agree */
- /* this part is ok */
- *break_pos = ftello(df);
+#ifdef NSEC3
+ nsec3_hash_tree_clear(zone);
+#endif
+ delete_zone_rrs(nsd->db, zone);
+ if(udb_zone_search(nsd->db->udb, &udbz, dname_name(task->zname),
+ task->zname->name_size)) {
+ udb_zone_delete(nsd->db->udb, &udbz);
+ udb_ptr_unlink(&udbz, nsd->db->udb);
}
- return 0;
+#ifdef NSEC3
+ nsec3_clear_precompile(nsd->db, zone);
+ zone->nsec3_param = NULL;
+#endif /* NSEC3 */
+
+ /* remove from zonetree, apex, soa */
+ zopt = zone->opts;
+ namedb_zone_delete(nsd->db, zone);
+ /* remove from options (zone_list already edited by xfrd) */
+ zone_options_delete(nsd->options, zopt);
+}
+
+static void
+task_process_add_key(struct nsd* nsd, struct task_list_d* task)
+{
+ key_options_t key;
+ key.name = (char*)task->zname;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "addkey task %s", key.name));
+ key.algorithm = key.name + strlen(key.name)+1;
+ key.secret = key.algorithm + strlen(key.algorithm)+1;
+ key_options_add_modify(nsd->options, &key);
+ memset(key.secret, 0xdd, strlen(key.secret)); /* wipe secret */
}
-void diff_snip_garbage(namedb_type* db, nsd_options_t* opt)
+static void
+task_process_del_key(struct nsd* nsd, struct task_list_d* task)
{
- off_t break_pos;
- const char* filename = opt->difffile;
- FILE *df;
+ char* name = (char*)task->zname;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "delkey task %s", name));
+ /* this is reload and nothing is using the TSIG key right now */
+ key_options_remove(nsd->options, name);
+}
- /* open file here and keep open, so it cannot change under our nose */
- df = fopen(filename, "r+");
- if(!df) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "could not open file %s for garbage collecting: %s",
- filename, strerror(errno)));
+static void
+task_process_add_pattern(struct nsd* nsd, struct task_list_d* task)
+{
+ region_type* temp = region_create(xalloc, free);
+ buffer_type buffer;
+ pattern_options_t *pat;
+ buffer_create_from(&buffer, task->zname, task->yesno);
+ pat = pattern_options_unmarshal(temp, &buffer);
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "addpattern task %s", pat->pname));
+ pattern_options_add_modify(nsd->options, pat);
+ region_destroy(temp);
+}
+
+static void
+task_process_del_pattern(struct nsd* nsd, struct task_list_d* task)
+{
+ char* name = (char*)task->zname;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "delpattern task %s", name));
+ pattern_options_remove(nsd->options, name);
+}
+
+static void
+task_process_opt_change(struct nsd* nsd, struct task_list_d* task)
+{
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "optchange task"));
+#ifdef RATELIMIT
+ nsd->options->rrl_ratelimit = task->oldserial;
+ nsd->options->rrl_whitelist_ratelimit = task->newserial;
+ nsd->options->rrl_slip = task->yesno;
+ rrl_set_limit(nsd->options->rrl_ratelimit, nsd->options->rrl_whitelist_ratelimit,
+ nsd->options->rrl_slip);
+#else
+ (void)nsd; (void)task;
+#endif
+}
+
+static void
+task_process_apply_xfr(struct nsd* nsd, udb_base* udb, udb_ptr *last_task,
+ udb_ptr* task)
+{
+ /* we have to use an udb_ptr task here, because the apply_xfr procedure
+ * appends soa_info which may remap and change the pointer. */
+ zone_type* zone;
+ FILE* df;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "applyxfr task %s", dname_to_string(
+ TASKLIST(task)->zname, NULL)));
+ zone = namedb_find_zone(nsd->db, TASKLIST(task)->zname);
+ if(!zone) {
+ /* assume the zone has been deleted and a zone transfer was
+ * still waiting to be processed */
return;
}
- /* and skip into file, since nsd does not read anything before the pos */
- if(db->diff_skip) {
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "garbage collect skip diff file"));
- if(fseeko(df, db->diff_pos, SEEK_SET)==-1) {
- log_msg(LOG_INFO, "could not fseeko file %s: %s.",
- filename, strerror(errno));
- fclose(df);
- return;
- }
+ /* apply the XFR */
+ /* oldserial, newserial, yesno is filenumber */
+ df = xfrd_open_xfrfile(nsd, TASKLIST(task)->yesno, "r");
+ if(!df) {
+ /* could not open file to update */
+ /* there is no reply to xfrd failed-update,
+ * because xfrd has a scan for apply-failures. */
+ return;
}
-
- /* detect break point */
- if(diff_broken(df, &break_pos))
- {
- /* snip off at break_pos */
- DEBUG(DEBUG_XFRD,1, (LOG_INFO, "snipping off trailing partial part of %s",
- filename));
- if(ftruncate(fileno(df), break_pos) == -1)
- log_msg(LOG_ERR, "ftruncate %s failed: %s",
- filename, strerror(errno));
+ /* read and apply zone transfer */
+ if(!apply_ixfr_for_zone(nsd, zone, df, nsd->options, udb,
+ last_task, TASKLIST(task)->yesno)) {
+ /* there is no reply to xfrd failed-update,
+ * because xfrd has a scan for apply-failures. */
}
fclose(df);
+ xfrd_unlink_xfrfile(nsd, TASKLIST(task)->yesno);
+}
+
+
+void task_process_in_reload(struct nsd* nsd, udb_base* udb, udb_ptr *last_task,
+ udb_ptr* task)
+{
+ switch(TASKLIST(task)->task_type) {
+ case task_expire:
+ task_process_expire(nsd->db, TASKLIST(task));
+ break;
+ case task_check_zonefiles:
+ task_process_checkzones(nsd, udb, last_task, TASKLIST(task));
+ break;
+ case task_write_zonefiles:
+ task_process_writezones(nsd, TASKLIST(task));
+ break;
+ case task_set_verbosity:
+ task_process_set_verbosity(TASKLIST(task));
+ break;
+ case task_add_zone:
+ task_process_add_zone(nsd, udb, last_task, TASKLIST(task));
+ break;
+ case task_del_zone:
+ task_process_del_zone(nsd, TASKLIST(task));
+ break;
+ case task_add_key:
+ task_process_add_key(nsd, TASKLIST(task));
+ break;
+ case task_del_key:
+ task_process_del_key(nsd, TASKLIST(task));
+ break;
+ case task_add_pattern:
+ task_process_add_pattern(nsd, TASKLIST(task));
+ break;
+ case task_del_pattern:
+ task_process_del_pattern(nsd, TASKLIST(task));
+ break;
+ case task_opt_change:
+ task_process_opt_change(nsd, TASKLIST(task));
+ break;
+ case task_apply_xfr:
+ task_process_apply_xfr(nsd, udb, last_task, task);
+ break;
+ default:
+ log_msg(LOG_WARNING, "unhandled task in reload type %d",
+ (int)TASKLIST(task)->task_type);
+ break;
+ }
+ udb_ptr_free_space(task, udb, TASKLIST(task)->size);
}
diff --git a/usr.sbin/nsd/difffile.h b/usr.sbin/nsd/difffile.h
index d54c629b5a7..d5f2cb8833f 100644
--- a/usr.sbin/nsd/difffile.h
+++ b/usr.sbin/nsd/difffile.h
@@ -1,7 +1,7 @@
/*
* difffile.h - nsd.diff file handling header file. Read/write diff files.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -9,62 +9,123 @@
#ifndef DIFFFILE_H
#define DIFFFILE_H
-#include "config.h"
#include "rbtree.h"
#include "namedb.h"
#include "options.h"
+#include "udb.h"
+struct nsd;
+struct nsdst;
-#define DIFF_PART_IXFR ('I'<<24 | 'X'<<16 | 'F'<<8 | 'R')
-#define DIFF_PART_SURE ('S'<<24 | 'U'<<16 | 'R'<<8 | 'E')
-
-/*
- * Used to pass commit logs
- */
-struct diff_log {
- char* zone_name;
- char* error;
- char* comment;
- struct diff_log* next;
-};
+#define DIFF_PART_XXFR ('X'<<24 | 'X'<<16 | 'F'<<8 | 'R')
+#define DIFF_PART_XFRF ('X'<<24 | 'F'<<16 | 'R'<<8 | 'F')
/* write an xfr packet data to the diff file, type=IXFR.
- The diff file is created if necessary. */
-void diff_write_packet(const char* zone, uint32_t new_serial, uint16_t id,
- uint32_t seq_nr, uint8_t* data, size_t len, nsd_options_t* opt);
+ The diff file is created if necessary, with initial header(notcommitted). */
+void diff_write_packet(const char* zone, const char* pat, uint32_t old_serial,
+ uint32_t new_serial, uint32_t seq_nr, uint8_t* data, size_t len,
+ struct nsd* nsd, uint64_t filenumber);
/*
- * Write a commit packet to the diff file, type=SURE.
- * The zone data (preceding ixfr packets) are committed.
- * See NSD-DIFFFILE for meaning of the arguments.
+ * Overwrite header of diff file with committed vale and other data.
+ * append log string.
*/
void diff_write_commit(const char* zone, uint32_t old_serial,
- uint32_t new_serial, uint16_t id, uint32_t num_parts,
- uint8_t commit, const char* log_msg,
- nsd_options_t* opt);
-
-/* check if the crc in the nsd.db is the same in memory as on disk.
- returns 1 if different. 0 if the same. returns -1 on error. */
-int db_crc_different(namedb_type* db);
-
-/* read the diff file and apply to the database in memory.
- It will attempt to skip bad data.
- If you pass a non-null value log, log comments are alloced in namedb.region
- then, *log must be 0 on start of call (entries are prepended).
- returns 0 on an unrecoverable error. */
-int diff_read_file(namedb_type* db, nsd_options_t* opt, struct diff_log** log,
- size_t child_count);
-
-/* check the diff file for garbage at the end (bad type, partial write)
- * and snip it off.
- */
-void diff_snip_garbage(namedb_type* db, nsd_options_t* opt);
+ uint32_t new_serial, uint32_t num_parts, uint8_t commit,
+ const char* log_msg, struct nsd* nsd, uint64_t filenumber);
/*
* These functions read parts of the diff file.
*/
int diff_read_32(FILE *in, uint32_t* result);
-int diff_read_16(FILE *in, uint16_t* result);
int diff_read_8(FILE *in, uint8_t* result);
int diff_read_str(FILE* in, char* buf, size_t len);
+/* delete the RRs for a zone from memory */
+void delete_zone_rrs(namedb_type* db, zone_type* zone);
+/* delete an RR */
+int delete_RR(namedb_type* db, const dname_type* dname,
+ uint16_t type, uint16_t klass,
+ buffer_type* packet, size_t rdatalen, zone_type *zone,
+ region_type* temp_region, struct udb_ptr* udbz);
+/* add an RR */
+int add_RR(namedb_type* db, const dname_type* dname,
+ uint16_t type, uint16_t klass, uint32_t ttl,
+ buffer_type* packet, size_t rdatalen, zone_type *zone,
+ struct udb_ptr* udbz);
+
+/* task udb structure */
+struct task_list_d {
+ /** next task in list */
+ udb_rel_ptr next;
+ /** task type */
+ enum {
+ /** expire or un-expire a zone */
+ task_expire,
+ /** apply an ixfr or axfr to a zone */
+ task_apply_xfr,
+ /** soa info for zone */
+ task_soa_info,
+ /** check mtime of zonefiles and read them, done on SIGHUP */
+ task_check_zonefiles,
+ /** write zonefiles (if changed) */
+ task_write_zonefiles,
+ /** set verbosity */
+ task_set_verbosity,
+ /** statistic info */
+ task_stat_info,
+ /** add a zone */
+ task_add_zone,
+ /** delete zone */
+ task_del_zone,
+ /** add TSIG key */
+ task_add_key,
+ /** delete TSIG key */
+ task_del_key,
+ /** add pattern */
+ task_add_pattern,
+ /** delete pattern */
+ task_del_pattern,
+ /** options change */
+ task_opt_change
+ } task_type;
+ uint32_t size; /* size of this struct */
+
+ /** soainfo: zonename dname, soaRR wireform */
+ /** expire: zonename, boolyesno */
+ /** apply_xfr: zonename, serials, yesno is filenamecounter */
+ uint32_t oldserial, newserial;
+ /** general variable. for some used to see if zname is present. */
+ uint64_t yesno;
+ struct dname zname[0];
+};
+#define TASKLIST(ptr) ((struct task_list_d*)UDB_PTR(ptr))
+/** create udb for tasks */
+struct udb_base* task_file_create(const char* file);
+void task_remap(udb_base* udb);
+void task_process_sync(udb_base* udb);
+void task_clear(udb_base* udb);
+void task_new_soainfo(udb_base* udb, udb_ptr* last, struct zone* z);
+void task_new_expire(udb_base* udb, udb_ptr* last,
+ const struct dname* z, int expired);
+void* task_new_stat_info(udb_base* udb, udb_ptr* last, struct nsdst* stat,
+ size_t child_count);
+void task_new_check_zonefiles(udb_base* udb, udb_ptr* last,
+ const dname_type* zone);
+void task_new_write_zonefiles(udb_base* udb, udb_ptr* last,
+ const dname_type* zone);
+void task_new_set_verbosity(udb_base* udb, udb_ptr* last, int v);
+void task_new_add_zone(udb_base* udb, udb_ptr* last, const char* zone,
+ const char* pattern);
+void task_new_del_zone(udb_base* udb, udb_ptr* last, const dname_type* dname);
+void task_new_add_key(udb_base* udb, udb_ptr* last, key_options_t* key);
+void task_new_del_key(udb_base* udb, udb_ptr* last, const char* name);
+void task_new_add_pattern(udb_base* udb, udb_ptr* last, pattern_options_t* p);
+void task_new_del_pattern(udb_base* udb, udb_ptr* last, const char* name);
+void task_new_opt_change(udb_base* udb, udb_ptr* last, nsd_options_t* opt);
+int task_new_apply_xfr(udb_base* udb, udb_ptr* last, const dname_type* zone,
+ uint32_t old_serial, uint32_t new_serial, uint64_t filenumber);
+void task_process_in_reload(struct nsd* nsd, udb_base* udb, udb_ptr *last_task,
+ udb_ptr* task);
+void task_process_expire(namedb_type* db, struct task_list_d* task);
+
#endif /* DIFFFILE_H */
diff --git a/usr.sbin/nsd/dname.h b/usr.sbin/nsd/dname.h
index fccc3ee2967..a9aa15ad177 100644
--- a/usr.sbin/nsd/dname.h
+++ b/usr.sbin/nsd/dname.h
@@ -1,7 +1,7 @@
/*
* dname.h -- Domain name handling.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -179,9 +179,9 @@ dname_label(const dname_type *dname, uint8_t label)
* Return < 0 if LEFT < RIGHT, 0 if LEFT == RIGHT, and > 0 if LEFT >
* RIGHT. The comparison is case sensitive.
*
- * Pre: vleft != NULL && vright != NULL
+ * Pre: left != NULL && right != NULL
*/
-int dname_compare(const void *vleft, const void *right);
+int dname_compare(const dname_type *left, const dname_type *right);
/*
@@ -346,21 +346,6 @@ label_next(const uint8_t *label)
const char *dname_to_string(const dname_type *dname,
const dname_type *origin);
-/*
- * Convert DNAME to its string representation. This is a reentrant
- * version of dname_to_string. The buf argument is a pointer to a
- * user defined result buffer capable of holding the string representation
- * of a DNAME. Due to escape sequences and such, this buffer is recommeneded
- * to be at least 5 * MAXDOMAINLEN in size.
- *
- * If ORIGIN is provided and DNAME is a subdomain of ORIGIN the dname
- * will be represented relative to ORIGIN.
- *
- * Pre: dname != NULL
- */
-const char *dname_to_string_r(const dname_type *dname,
- const dname_type *origin,
- char *buf);
/*
* Create a dname containing the single label specified by STR
@@ -389,13 +374,11 @@ const dname_type *dname_replace(region_type* region,
const dname_type* src,
const dname_type* dest);
-#ifndef FULL_PREHASH
-/**
- * Create a dname representing the wildcard form of the passed dname.
- */
-int dname_make_wildcard(struct region *region,
- struct dname const *dname,
- struct dname const **wildcard);
-#endif
+/** Convert uncompressed wireformat dname to a string */
+char* wiredname2str(const uint8_t* dname);
+/** convert uncompressed label to string */
+char* wirelabel2str(const uint8_t* label);
+/** check if two uncompressed dnames of the same total length are equal */
+int dname_equal_nocase(uint8_t* a, uint8_t* b, uint16_t len);
#endif /* _DNAME_H_ */
diff --git a/usr.sbin/nsd/edns.c b/usr.sbin/nsd/edns.c
index b69873f5fa1..57c2e6c6634 100644
--- a/usr.sbin/nsd/edns.c
+++ b/usr.sbin/nsd/edns.c
@@ -1,7 +1,7 @@
/*
* edns.c -- EDNS definitions (RFC 2671).
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/edns.h b/usr.sbin/nsd/edns.h
index 8de1b685f01..b8643e954e4 100644
--- a/usr.sbin/nsd/edns.h
+++ b/usr.sbin/nsd/edns.h
@@ -1,7 +1,7 @@
/*
* edns.h -- EDNS definitions (RFC 2671).
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/ipc.c b/usr.sbin/nsd/ipc.c
index 28e1cc5e7ec..141b0f3a83d 100644
--- a/usr.sbin/nsd/ipc.c
+++ b/usr.sbin/nsd/ipc.c
@@ -1,7 +1,7 @@
/*
* ipc.c - Interprocess communication routines. Handlers read and write.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -11,6 +11,7 @@
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
+#include <fcntl.h>
#include "ipc.h"
#include "buffer.h"
#include "xfrd-tcp.h"
@@ -18,59 +19,16 @@
#include "namedb.h"
#include "xfrd.h"
#include "xfrd-notify.h"
+#include "difffile.h"
-/* set is_ok for the zone according to the zone message */
-static zone_type* handle_xfrd_zone_state(struct nsd* nsd, buffer_type* packet);
-/* write ipc ZONE_STATE message into the buffer */
-static void write_zone_state_packet(buffer_type* packet, zone_type* zone);
/* attempt to send NSD_STATS command to child fd */
static void send_stat_to_child(struct main_ipc_handler_data* data, int fd);
-/* write IPC expire notification msg to a buffer */
-static void xfrd_write_expire_notification(buffer_type* buffer, xfrd_zone_t* zone);
/* send reload request over the IPC channel */
static void xfrd_send_reload_req(xfrd_state_t* xfrd);
/* send quit request over the IPC channel */
static void xfrd_send_quit_req(xfrd_state_t* xfrd);
-/* get SOA INFO out of IPC packet buffer */
-static void xfrd_handle_ipc_SOAINFO(xfrd_state_t* xfrd, buffer_type* packet);
/* perform read part of handle ipc for xfrd */
-static void xfrd_handle_ipc_read(netio_handler_type *handler, xfrd_state_t* xfrd);
-
-static zone_type*
-handle_xfrd_zone_state(struct nsd* nsd, buffer_type* packet)
-{
- uint8_t ok;
- const dname_type *dname;
- domain_type *domain;
- zone_type *zone;
-
- ok = buffer_read_u8(packet);
- dname = (dname_type*)buffer_current(packet);
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "handler zone state %s is %s",
- dname_to_string(dname, NULL), ok?"ok":"expired"));
- /* find in zone_types, if does not exist, we cannot serve anyway */
- /* find zone in config, since that one always exists */
- domain = domain_table_find(nsd->db->domains, dname);
- if(!domain) {
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "zone state msg, empty zone (domain %s)",
- dname_to_string(dname, NULL)));
- return NULL;
- }
- zone = domain_find_zone(domain);
- if(!zone || dname_compare(domain_dname(zone->apex), dname) != 0) {
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "zone state msg, empty zone (zone %s)",
- dname_to_string(dname, NULL)));
- return NULL;
- }
- assert(zone);
- /* only update zone->is_ok if needed to minimize copy-on-write
- of memory pages shared after fork() */
- if(ok && !zone->is_ok)
- zone->is_ok = 1;
- if(!ok && zone->is_ok)
- zone->is_ok = 0;
- return zone;
-}
+static void xfrd_handle_ipc_read(struct event* handler, xfrd_state_t* xfrd);
static void
ipc_child_quit(struct nsd* nsd)
@@ -80,42 +38,27 @@ ipc_child_quit(struct nsd* nsd)
#ifdef BIND8_STATS
bind8_stats(nsd);
#endif /* BIND8_STATS */
+
+#if 0 /* OS collects memory pages */
+ event_base_free(event_base);
+ region_destroy(server_region);
+#endif
server_shutdown(nsd);
exit(0);
}
void
-child_handle_parent_command(netio_type *ATTR_UNUSED(netio),
- netio_handler_type *handler,
- netio_event_types_type event_types)
+child_handle_parent_command(int fd, short event, void* arg)
{
sig_atomic_t mode;
int len;
struct ipc_handler_conn_data *data =
- (struct ipc_handler_conn_data *) handler->user_data;
- if (!(event_types & NETIO_EVENT_READ)) {
- return;
- }
-
- if(data->conn->is_reading) {
- int ret = conn_read(data->conn);
- if(ret == -1) {
- log_msg(LOG_ERR, "handle_parent_command: error in conn_read: %s",
- strerror(errno));
- data->conn->is_reading = 0;
- return;
- }
- if(ret == 0) {
- return; /* continue later */
- }
- /* completed */
- data->conn->is_reading = 0;
- buffer_flip(data->conn->packet);
- (void)handle_xfrd_zone_state(data->nsd, data->conn->packet);
+ (struct ipc_handler_conn_data *) arg;
+ if (!(event & EV_READ)) {
return;
}
- if ((len = read(handler->fd, &mode, sizeof(mode))) == -1) {
+ if ((len = read(fd, &mode, sizeof(mode))) == -1) {
log_msg(LOG_ERR, "handle_parent_command: read: %s",
strerror(errno));
return;
@@ -123,7 +66,7 @@ child_handle_parent_command(netio_type *ATTR_UNUSED(netio),
if (len == 0)
{
/* parent closed the connection. Quit */
- data->nsd->mode = NSD_QUIT;
+ ipc_child_quit(data->nsd);
return;
}
@@ -139,15 +82,22 @@ child_handle_parent_command(netio_type *ATTR_UNUSED(netio),
server_close_all_sockets(data->nsd->udp, data->nsd->ifs);
server_close_all_sockets(data->nsd->tcp, data->nsd->ifs);
/* mode == NSD_QUIT_CHILD */
- (void)write(handler->fd, &mode, sizeof(mode));
+ (void)write(fd, &mode, sizeof(mode));
ipc_child_quit(data->nsd);
break;
- case NSD_ZONE_STATE:
- data->conn->is_reading = 1;
- data->conn->total_bytes = 0;
- data->conn->msglen = 0;
- data->conn->fd = handler->fd;
- buffer_clear(data->conn->packet);
+ case NSD_QUIT_WITH_STATS:
+#ifdef BIND8_STATS
+ DEBUG(DEBUG_IPC, 2, (LOG_INFO, "quit QUIT_WITH_STATS"));
+ /* reply with ack and stats and then quit */
+ if(!write_socket(fd, &mode, sizeof(mode))) {
+ log_msg(LOG_ERR, "cannot write quitwst to parent");
+ }
+ if(!write_socket(fd, &data->nsd->st, sizeof(data->nsd->st))) {
+ log_msg(LOG_ERR, "cannot write stats to parent");
+ }
+ fsync(fd);
+#endif /* BIND8_STATS */
+ ipc_child_quit(data->nsd);
break;
default:
log_msg(LOG_ERR, "handle_parent_command: bad mode %d",
@@ -169,38 +119,6 @@ parent_handle_xfrd_command(netio_type *ATTR_UNUSED(netio),
return;
}
- if(data->conn->is_reading) {
- /* handle ZONE_STATE forward to children */
- int ret = conn_read(data->conn);
- size_t i;
- zone_type* zone;
- if(ret == -1) {
- log_msg(LOG_ERR, "main xfrd listener: error in conn_read: %s",
- strerror(errno));
- data->conn->is_reading = 0;
- return;
- }
- if(ret == 0) {
- return; /* continue later */
- }
- /* completed */
- data->conn->is_reading = 0;
- buffer_flip(data->conn->packet);
- zone = handle_xfrd_zone_state(data->nsd, data->conn->packet);
- if(!zone)
- return;
- /* forward to all children */
- for (i = 0; i < data->nsd->child_count; ++i) {
- if(!zone->dirty[i]) {
- zone->dirty[i] = 1;
- stack_push(data->nsd->children[i].dirty_zones, zone);
- data->nsd->children[i].handler->event_types |=
- NETIO_EVENT_WRITE;
- }
- }
- return;
- }
-
if ((len = read(handler->fd, &mode, sizeof(mode))) == -1) {
log_msg(LOG_ERR, "handle_xfrd_command: read: %s",
strerror(errno));
@@ -212,6 +130,7 @@ parent_handle_xfrd_command(netio_type *ATTR_UNUSED(netio),
DEBUG(DEBUG_IPC,1, (LOG_INFO, "handle_xfrd_command: xfrd closed channel."));
close(handler->fd);
handler->fd = -1;
+ data->nsd->mode = NSD_SHUTDOWN;
return;
}
@@ -221,18 +140,15 @@ parent_handle_xfrd_command(netio_type *ATTR_UNUSED(netio),
data->nsd->signal_hint_reload = 1;
break;
case NSD_QUIT:
+ case NSD_SHUTDOWN:
data->nsd->mode = mode;
break;
+ case NSD_STATS:
+ data->nsd->signal_hint_stats = 1;
+ break;
case NSD_REAP_CHILDREN:
data->nsd->signal_hint_child = 1;
break;
- case NSD_ZONE_STATE:
- data->conn->is_reading = 1;
- data->conn->total_bytes = 0;
- data->conn->msglen = 0;
- data->conn->fd = handler->fd;
- buffer_clear(data->conn->packet);
- break;
default:
log_msg(LOG_ERR, "handle_xfrd_command: bad mode %d",
(int) mode);
@@ -241,27 +157,6 @@ parent_handle_xfrd_command(netio_type *ATTR_UNUSED(netio),
}
static void
-write_zone_state_packet(buffer_type* packet, zone_type* zone)
-{
- sig_atomic_t cmd = NSD_ZONE_STATE;
- uint8_t ok = zone->is_ok;
- uint16_t sz;
- if(!zone->apex) {
- return;
- }
- sz = dname_total_size(domain_dname(zone->apex)) + 1;
- sz = htons(sz);
-
- buffer_clear(packet);
- buffer_write(packet, &cmd, sizeof(cmd));
- buffer_write(packet, &sz, sizeof(sz));
- buffer_write(packet, &ok, sizeof(ok));
- buffer_write(packet, domain_dname(zone->apex),
- dname_total_size(domain_dname(zone->apex)));
- buffer_flip(packet);
-}
-
-static void
send_stat_to_child(struct main_ipc_handler_data* data, int fd)
{
sig_atomic_t cmd = NSD_STATS;
@@ -275,6 +170,7 @@ send_stat_to_child(struct main_ipc_handler_data* data, int fd)
data->child->need_to_send_STATS = 0;
}
+#ifndef NDEBUG
int packet_read_query_section(buffer_type *packet, uint8_t* dest, uint16_t* qtype, uint16_t* qclass);
static void
debug_print_fwd_name(int ATTR_UNUSED(len), buffer_type* packet, int acl_num)
@@ -297,11 +193,16 @@ debug_print_fwd_name(int ATTR_UNUSED(len), buffer_type* packet, int acl_num)
buffer_set_position(packet, bufpos);
region_destroy(tempregion);
}
+#endif
static void
send_quit_to_child(struct main_ipc_handler_data* data, int fd)
{
+#ifdef BIND8_STATS
+ sig_atomic_t cmd = NSD_QUIT_WITH_STATS;
+#else
sig_atomic_t cmd = NSD_QUIT;
+#endif
if(write(fd, &cmd, sizeof(cmd)) == -1) {
if(errno == EAGAIN || errno == EINTR)
return; /* try again later */
@@ -314,6 +215,75 @@ send_quit_to_child(struct main_ipc_handler_data* data, int fd)
(int)data->child->pid));
}
+/** the child is done, mark it as exited */
+static void
+child_is_done(struct nsd* nsd, int fd)
+{
+ size_t i;
+ if(fd != -1) close(fd);
+ for(i=0; i<nsd->child_count; ++i)
+ if(nsd->children[i].child_fd == fd) {
+ nsd->children[i].child_fd = -1;
+ nsd->children[i].has_exited = 1;
+ nsd->children[i].handler->fd = -1;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "server %d is done",
+ (int)nsd->children[i].pid));
+ }
+ parent_check_all_children_exited(nsd);
+}
+
+#ifdef BIND8_STATS
+/** add stats to total */
+void
+stats_add(struct nsdst* total, struct nsdst* s)
+{
+ unsigned i;
+ for(i=0; i<sizeof(total->qtype)/sizeof(stc_t); i++)
+ total->qtype[i] += s->qtype[i];
+ for(i=0; i<sizeof(total->qclass)/sizeof(stc_t); i++)
+ total->qclass[i] += s->qclass[i];
+ total->qudp += s->qudp;
+ total->qudp6 += s->qudp6;
+ total->ctcp += s->ctcp;
+ total->ctcp6 += s->ctcp6;
+ for(i=0; i<sizeof(total->rcode)/sizeof(stc_t); i++)
+ total->rcode[i] += s->rcode[i];
+ for(i=0; i<sizeof(total->opcode)/sizeof(stc_t); i++)
+ total->opcode[i] += s->opcode[i];
+ total->dropped += s->dropped;
+ total->truncated += s->truncated;
+ total->wrongzone += s->wrongzone;
+ total->txerr += s->txerr;
+ total->rxerr += s->rxerr;
+ total->edns += s->edns;
+ total->ednserr += s->ednserr;
+ total->raxfr += s->raxfr;
+ total->nona += s->nona;
+
+ total->db_disk = s->db_disk;
+ total->db_mem = s->db_mem;
+}
+
+#define FINAL_STATS_TIMEOUT 10 /* seconds */
+static void
+read_child_stats(struct nsd* nsd, struct nsd_child* child, int fd)
+{
+ struct nsdst s;
+ errno=0;
+ if(block_read(nsd, fd, &s, sizeof(s), FINAL_STATS_TIMEOUT)!=sizeof(s)) {
+ log_msg(LOG_ERR, "problems reading finalstats from server "
+ "%d: %s", (int)child->pid, strerror(errno));
+ } else {
+ stats_add(&nsd->st, &s);
+ child->query_count = s.qudp + s.qudp6 + s.ctcp + s.ctcp6;
+ /* we know that the child is going to close the connection
+ * now (this is an ACK of the QUIT_W_STATS so we know the
+ * child is done, no longer sending e.g. NOTIFY contents) */
+ child_is_done(nsd, fd);
+ }
+}
+#endif /* BIND8_STATS */
+
void
parent_handle_child_command(netio_type *ATTR_UNUSED(netio),
netio_handler_type *handler,
@@ -326,43 +296,14 @@ parent_handle_child_command(netio_type *ATTR_UNUSED(netio),
/* do a nonblocking write to the child if it is ready. */
if (event_types & NETIO_EVENT_WRITE) {
- if(!data->busy_writing_zone_state &&
- !data->child->need_to_send_STATS &&
- !data->child->need_to_send_QUIT &&
- !data->child->need_to_exit &&
- data->child->dirty_zones->num > 0) {
- /* create packet from next dirty zone */
- zone_type* zone = (zone_type*)stack_pop(data->child->dirty_zones);
- assert(zone);
- zone->dirty[data->child_num] = 0;
- data->busy_writing_zone_state = 1;
- write_zone_state_packet(data->write_conn->packet, zone);
- data->write_conn->msglen = buffer_limit(data->write_conn->packet);
- data->write_conn->total_bytes = sizeof(uint16_t); /* len bytes already in packet */
- data->write_conn->fd = handler->fd;
- }
- if(data->busy_writing_zone_state) {
- /* write more of packet */
- int ret = conn_write(data->write_conn);
- if(ret == -1) {
- log_msg(LOG_ERR, "handle_child_cmd %d: could not write: %s",
- (int)data->child->pid, strerror(errno));
- data->busy_writing_zone_state = 0;
- } else if(ret == 1) {
- data->busy_writing_zone_state = 0; /* completed */
- }
- } else if(data->child->need_to_send_STATS &&
- !data->child->need_to_exit) {
+ if(data->child->need_to_send_STATS &&
+ !data->child->need_to_exit) {
send_stat_to_child(data, handler->fd);
} else if(data->child->need_to_send_QUIT) {
send_quit_to_child(data, handler->fd);
if(!data->child->need_to_send_QUIT)
handler->event_types = NETIO_EVENT_READ;
- }
- if(!data->busy_writing_zone_state &&
- !data->child->need_to_send_STATS &&
- !data->child->need_to_send_QUIT &&
- data->child->dirty_zones->num == 0) {
+ } else {
handler->event_types = NETIO_EVENT_READ;
}
}
@@ -468,18 +409,7 @@ parent_handle_child_command(netio_type *ATTR_UNUSED(netio),
}
if (len == 0)
{
- size_t i;
- if(handler->fd != -1) close(handler->fd);
- for(i=0; i<data->nsd->child_count; ++i)
- if(data->nsd->children[i].child_fd == handler->fd) {
- data->nsd->children[i].child_fd = -1;
- data->nsd->children[i].has_exited = 1;
- DEBUG(DEBUG_IPC,1, (LOG_INFO,
- "server %d closed cmd channel",
- (int) data->nsd->children[i].pid));
- }
- handler->fd = -1;
- parent_check_all_children_exited(data->nsd);
+ child_is_done(data->nsd, handler->fd);
return;
}
@@ -487,6 +417,11 @@ parent_handle_child_command(netio_type *ATTR_UNUSED(netio),
case NSD_QUIT:
data->nsd->mode = mode;
break;
+#ifdef BIND8_STATS
+ case NSD_QUIT_WITH_STATS:
+ read_child_stats(data->nsd, data->child, handler->fd);
+ break;
+#endif /* BIND8_STATS */
case NSD_STATS:
data->nsd->signal_hint_stats = 1;
break;
@@ -573,33 +508,16 @@ parent_handle_reload_command(netio_type *ATTR_UNUSED(netio),
}
static void
-xfrd_write_expire_notification(buffer_type* buffer, xfrd_zone_t* zone)
-{
- sig_atomic_t cmd = NSD_ZONE_STATE;
- uint8_t ok = 1;
- uint16_t sz = dname_total_size(zone->apex) + 1;
- sz = htons(sz);
- if(zone->state == xfrd_zone_expired)
- ok = 0;
-
- DEBUG(DEBUG_IPC,1, (LOG_INFO,
- "xfrd encoding ipc zone state msg for zone %s state %d.",
- zone->apex_str, (int)zone->state));
-
- buffer_clear(buffer);
- buffer_write(buffer, &cmd, sizeof(cmd));
- buffer_write(buffer, &sz, sizeof(sz));
- buffer_write(buffer, &ok, sizeof(ok));
- buffer_write(buffer, zone->apex, dname_total_size(zone->apex));
- buffer_flip(buffer);
-}
-
-static void
xfrd_send_reload_req(xfrd_state_t* xfrd)
{
sig_atomic_t req = NSD_RELOAD;
+ uint64_t p = xfrd->last_task->data;
+ udb_ptr_unlink(xfrd->last_task, xfrd->nsd->task[xfrd->nsd->mytask]);
+ task_process_sync(xfrd->nsd->task[xfrd->nsd->mytask]);
/* ask server_main for a reload */
- if(write(xfrd->ipc_handler.fd, &req, sizeof(req)) == -1) {
+ if(write(xfrd->ipc_handler.ev_fd, &req, sizeof(req)) == -1) {
+ udb_ptr_init(xfrd->last_task, xfrd->nsd->task[xfrd->nsd->mytask]);
+ udb_ptr_set(xfrd->last_task, xfrd->nsd->task[xfrd->nsd->mytask], p);
if(errno == EAGAIN || errno == EINTR)
return; /* try again later */
log_msg(LOG_ERR, "xfrd: problems sending reload command: %s",
@@ -607,21 +525,55 @@ xfrd_send_reload_req(xfrd_state_t* xfrd)
return;
}
DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: asked nsd to reload new updates"));
+ /* swapped task to other side, start to use other task udb. */
+ xfrd->nsd->mytask = 1 - xfrd->nsd->mytask;
+ task_remap(xfrd->nsd->task[xfrd->nsd->mytask]);
+ udb_ptr_init(xfrd->last_task, xfrd->nsd->task[xfrd->nsd->mytask]);
+ assert(udb_base_get_userdata(xfrd->nsd->task[xfrd->nsd->mytask])->data == 0);
+
xfrd_prepare_zones_for_reload();
xfrd->reload_cmd_last_sent = xfrd_time();
xfrd->need_to_send_reload = 0;
xfrd->can_send_reload = 0;
}
+void
+ipc_xfrd_set_listening(struct xfrd_state* xfrd, short mode)
+{
+ int fd = xfrd->ipc_handler.ev_fd;
+ struct event_base* base = xfrd->event_base;
+ event_del(&xfrd->ipc_handler);
+ event_set(&xfrd->ipc_handler, fd, mode, xfrd_handle_ipc, xfrd);
+ if(event_base_set(base, &xfrd->ipc_handler) != 0)
+ log_msg(LOG_ERR, "ipc: cannot set event_base");
+ /* no timeout for IPC events */
+ if(event_add(&xfrd->ipc_handler, NULL) != 0)
+ log_msg(LOG_ERR, "ipc: cannot add event");
+ xfrd->ipc_handler_flags = mode;
+}
+
+static void
+xfrd_send_shutdown_req(xfrd_state_t* xfrd)
+{
+ sig_atomic_t cmd = NSD_SHUTDOWN;
+ xfrd->ipc_send_blocked = 1;
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ);
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc send shutdown"));
+ if(!write_socket(xfrd->ipc_handler.ev_fd, &cmd, sizeof(cmd))) {
+ log_msg(LOG_ERR, "xfrd: error writing shutdown to main: %s",
+ strerror(errno));
+ }
+ xfrd->need_to_send_shutdown = 0;
+}
+
static void
xfrd_send_quit_req(xfrd_state_t* xfrd)
{
sig_atomic_t cmd = NSD_QUIT;
xfrd->ipc_send_blocked = 1;
- xfrd->ipc_handler.event_types &= (~NETIO_EVENT_WRITE);
- xfrd->sending_zone_state = 0;
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ);
DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc send ackreload(quit)"));
- if(!write_socket(xfrd->ipc_handler.fd, &cmd, sizeof(cmd))) {
+ if(!write_socket(xfrd->ipc_handler.ev_fd, &cmd, sizeof(cmd))) {
log_msg(LOG_ERR, "xfrd: error writing ack to main: %s",
strerror(errno));
}
@@ -629,119 +581,55 @@ xfrd_send_quit_req(xfrd_state_t* xfrd)
}
static void
-xfrd_handle_ipc_SOAINFO(xfrd_state_t* xfrd, buffer_type* packet)
+xfrd_send_stats(xfrd_state_t* xfrd)
{
- xfrd_soa_t soa;
- xfrd_soa_t* soa_ptr = &soa;
- xfrd_zone_t* zone;
- /* dname is sent in memory format */
- const dname_type* dname = (const dname_type*)buffer_begin(packet);
-
- /* find zone and decode SOA */
- zone = (xfrd_zone_t*)rbtree_search(xfrd->zones, dname);
- buffer_skip(packet, dname_total_size(dname));
-
- if(!buffer_available(packet, sizeof(uint32_t)*6 + sizeof(uint8_t)*2)) {
- /* NSD has zone without any info */
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "SOAINFO for %s lost zone",
- dname_to_string(dname,0)));
- soa_ptr = NULL;
- } else {
- /* read soa info */
- memset(&soa, 0, sizeof(soa));
- /* left out type, klass, count for speed */
- soa.type = htons(TYPE_SOA);
- soa.klass = htons(CLASS_IN);
- soa.ttl = htonl(buffer_read_u32(packet));
- soa.rdata_count = htons(7);
- soa.prim_ns[0] = buffer_read_u8(packet);
- if(!buffer_available(packet, soa.prim_ns[0]))
- return;
- buffer_read(packet, soa.prim_ns+1, soa.prim_ns[0]);
- soa.email[0] = buffer_read_u8(packet);
- if(!buffer_available(packet, soa.email[0]))
- return;
- buffer_read(packet, soa.email+1, soa.email[0]);
-
- soa.serial = htonl(buffer_read_u32(packet));
- soa.refresh = htonl(buffer_read_u32(packet));
- soa.retry = htonl(buffer_read_u32(packet));
- soa.expire = htonl(buffer_read_u32(packet));
- soa.minimum = htonl(buffer_read_u32(packet));
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "SOAINFO for %s %u",
- dname_to_string(dname,0), ntohl(soa.serial)));
- }
-
- if(!zone) {
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: zone %s master zone updated",
- dname_to_string(dname,0)));
- notify_handle_master_zone_soainfo(xfrd->notify_zones,
- dname, soa_ptr);
- return;
+ sig_atomic_t cmd = NSD_STATS;
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc send stats"));
+ if(!write_socket(xfrd->ipc_handler.ev_fd, &cmd, sizeof(cmd))) {
+ log_msg(LOG_ERR, "xfrd: error writing stats to main: %s",
+ strerror(errno));
}
- xfrd_handle_incoming_soa(zone, soa_ptr, xfrd_time());
+ xfrd->need_to_send_stats = 0;
}
void
-xfrd_handle_ipc(netio_type* ATTR_UNUSED(netio),
- netio_handler_type *handler,
- netio_event_types_type event_types)
+xfrd_handle_ipc(int ATTR_UNUSED(fd), short event, void* arg)
{
- xfrd_state_t* xfrd = (xfrd_state_t*)handler->user_data;
- if ((event_types & NETIO_EVENT_READ))
+ xfrd_state_t* xfrd = (xfrd_state_t*)arg;
+ if ((event & EV_READ))
{
/* first attempt to read as a signal from main
* could block further send operations */
- xfrd_handle_ipc_read(handler, xfrd);
+ xfrd_handle_ipc_read(&xfrd->ipc_handler, xfrd);
}
- if ((event_types & NETIO_EVENT_WRITE))
+ if ((event & EV_WRITE))
{
- if(xfrd->ipc_send_blocked) { /* wait for SOA_END */
- handler->event_types = NETIO_EVENT_READ;
+ if(xfrd->ipc_send_blocked) { /* wait for RELOAD_DONE */
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ);
return;
}
- /* if necessary prepare a packet */
- if(!(xfrd->can_send_reload && xfrd->need_to_send_reload) &&
- !xfrd->need_to_send_quit &&
- !xfrd->sending_zone_state &&
- xfrd->dirty_zones->num > 0) {
- xfrd_zone_t* zone = (xfrd_zone_t*)stack_pop(xfrd->dirty_zones);
- assert(zone);
- zone->dirty = 0;
- xfrd->sending_zone_state = 1;
- xfrd_write_expire_notification(xfrd->ipc_conn_write->packet, zone);
- xfrd->ipc_conn_write->msglen = buffer_limit(xfrd->ipc_conn_write->packet);
- /* skip length bytes; they are encoded in the packet, after cmd */
- xfrd->ipc_conn_write->total_bytes = sizeof(uint16_t);
- }
- /* write a bit */
- if(xfrd->sending_zone_state) {
- /* call conn_write */
- int ret = conn_write(xfrd->ipc_conn_write);
- if(ret == -1) {
- log_msg(LOG_ERR, "xfrd: error in write ipc: %s", strerror(errno));
- xfrd->sending_zone_state = 0;
- }
- else if(ret == 1) { /* done */
- xfrd->sending_zone_state = 0;
- }
+ if(xfrd->need_to_send_shutdown) {
+ xfrd_send_shutdown_req(xfrd);
} else if(xfrd->need_to_send_quit) {
xfrd_send_quit_req(xfrd);
} else if(xfrd->can_send_reload && xfrd->need_to_send_reload) {
xfrd_send_reload_req(xfrd);
+ } else if(xfrd->need_to_send_stats) {
+ xfrd_send_stats(xfrd);
}
if(!(xfrd->can_send_reload && xfrd->need_to_send_reload) &&
+ !xfrd->need_to_send_shutdown &&
!xfrd->need_to_send_quit &&
- !xfrd->sending_zone_state &&
- xfrd->dirty_zones->num == 0) {
- handler->event_types = NETIO_EVENT_READ; /* disable writing for now */
+ !xfrd->need_to_send_stats) {
+ /* disable writing for now */
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ);
}
}
}
static void
-xfrd_handle_ipc_read(netio_handler_type *handler, xfrd_state_t* xfrd)
+xfrd_handle_ipc_read(struct event* handler, xfrd_state_t* xfrd)
{
sig_atomic_t cmd;
int len;
@@ -770,6 +658,7 @@ xfrd_handle_ipc_read(netio_handler_type *handler, xfrd_state_t* xfrd)
}
if(xfrd->ipc_conn->is_reading) {
/* reading an IPC message */
+ buffer_type* tmp;
int ret = conn_read(xfrd->ipc_conn);
if(ret == -1) {
log_msg(LOG_ERR, "xfrd: error in read ipc: %s", strerror(errno));
@@ -779,26 +668,22 @@ xfrd_handle_ipc_read(netio_handler_type *handler, xfrd_state_t* xfrd)
if(ret == 0)
return;
buffer_flip(xfrd->ipc_conn->packet);
- if(xfrd->ipc_is_soa) {
- xfrd->ipc_conn->is_reading = 0;
- xfrd_handle_ipc_SOAINFO(xfrd, xfrd->ipc_conn->packet);
- } else {
- /* use ipc_conn to read remaining data as well */
- buffer_type* tmp = xfrd->ipc_pass;
- xfrd->ipc_conn->is_reading=2;
- xfrd->ipc_pass = xfrd->ipc_conn->packet;
- xfrd->ipc_conn->packet = tmp;
- xfrd->ipc_conn->total_bytes = sizeof(xfrd->ipc_conn->msglen);
- xfrd->ipc_conn->msglen = 2*sizeof(uint32_t);
- buffer_clear(xfrd->ipc_conn->packet);
- buffer_set_limit(xfrd->ipc_conn->packet, xfrd->ipc_conn->msglen);
- }
+ /* use ipc_conn to read remaining data as well */
+ tmp = xfrd->ipc_pass;
+ xfrd->ipc_conn->is_reading=2;
+ xfrd->ipc_pass = xfrd->ipc_conn->packet;
+ xfrd->ipc_conn->packet = tmp;
+ xfrd->ipc_conn->total_bytes = sizeof(xfrd->ipc_conn->msglen);
+ xfrd->ipc_conn->msglen = 2*sizeof(uint32_t);
+ buffer_clear(xfrd->ipc_conn->packet);
+ buffer_set_limit(xfrd->ipc_conn->packet, xfrd->ipc_conn->msglen);
return;
}
- if((len = read(handler->fd, &cmd, sizeof(cmd))) == -1) {
- log_msg(LOG_ERR, "xfrd_handle_ipc: read: %s",
- strerror(errno));
+ if((len = read(handler->ev_fd, &cmd, sizeof(cmd))) == -1) {
+ if(errno != EINTR && errno != EAGAIN)
+ log_msg(LOG_ERR, "xfrd_handle_ipc: read: %s",
+ strerror(errno));
return;
}
if(len == 0)
@@ -812,48 +697,49 @@ xfrd_handle_ipc_read(netio_handler_type *handler, xfrd_state_t* xfrd)
switch(cmd) {
case NSD_QUIT:
case NSD_SHUTDOWN:
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: main send shutdown cmd."));
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: main sent shutdown cmd."));
xfrd->shutdown = 1;
break;
- case NSD_SOA_BEGIN:
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv SOA_BEGIN"));
- /* reload starts sending SOA INFOs; don't block */
- xfrd->parent_soa_info_pass = 1;
- /* reset the nonblocking ipc write;
- the new parent does not want half a packet */
- xfrd->sending_zone_state = 0;
- break;
- case NSD_SOA_INFO:
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv SOA_INFO"));
- assert(xfrd->parent_soa_info_pass);
- xfrd->ipc_is_soa = 1;
- xfrd->ipc_conn->is_reading = 1;
- break;
- case NSD_SOA_END:
+ case NSD_RELOAD_DONE:
/* reload has finished */
- DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv SOA_END"));
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv RELOAD_DONE"));
+#ifdef BIND8_STATS
+ if(block_read(NULL, handler->ev_fd, &xfrd->reload_pid,
+ sizeof(pid_t), -1) != sizeof(pid_t)) {
+ log_msg(LOG_ERR, "xfrd cannot get reload_pid");
+ }
+#endif /* BIND8_STATS */
+ /* read the not-mytask for the results and soainfo */
+ xfrd_process_task_result(xfrd,
+ xfrd->nsd->task[1-xfrd->nsd->mytask]);
+ /* reset the IPC, (and the nonblocking ipc write;
+ the new parent does not want half a packet) */
xfrd->can_send_reload = 1;
- xfrd->parent_soa_info_pass = 0;
xfrd->ipc_send_blocked = 0;
- handler->event_types |= NETIO_EVENT_WRITE;
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ|EV_WRITE);
xfrd_reopen_logfile();
xfrd_check_failed_updates();
- xfrd_send_expy_all_zones();
break;
case NSD_PASS_TO_XFRD:
DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv PASS_TO_XFRD"));
- xfrd->ipc_is_soa = 0;
xfrd->ipc_conn->is_reading = 1;
break;
+ case NSD_RELOAD_REQ:
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv RELOAD_REQ"));
+ /* make reload happen, right away, and schedule file check */
+ task_new_check_zonefiles(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, NULL);
+ xfrd_set_reload_now(xfrd);
+ break;
case NSD_RELOAD:
/* main tells us that reload is done, stop ipc send to main */
DEBUG(DEBUG_IPC,1, (LOG_INFO, "xfrd: ipc recv RELOAD"));
- handler->event_types |= NETIO_EVENT_WRITE;
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ|EV_WRITE);
xfrd->need_to_send_quit = 1;
break;
default:
log_msg(LOG_ERR, "xfrd_handle_ipc: bad mode %d (%d)", (int)cmd,
- ntohl(cmd));
+ (int)ntohl(cmd));
break;
}
@@ -864,4 +750,3 @@ xfrd_handle_ipc_read(netio_handler_type *handler, xfrd_state_t* xfrd)
buffer_clear(xfrd->ipc_conn->packet);
}
}
-
diff --git a/usr.sbin/nsd/ipc.h b/usr.sbin/nsd/ipc.h
index 0bd02e32b72..cb27db46063 100644
--- a/usr.sbin/nsd/ipc.h
+++ b/usr.sbin/nsd/ipc.h
@@ -1,7 +1,7 @@
/*
* ipc.h - Interprocess communication routines. Handlers read and write.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -10,12 +10,14 @@
#ifndef NSD_IPC_H
#define NSD_IPC_H
-#include "config.h"
#include "netio.h"
struct buffer;
struct nsd;
struct nsd_child;
struct xfrd_tcp;
+struct xfrd_state;
+struct nsdst;
+struct event;
/*
* Data for the server_main IPC handler
@@ -35,10 +37,6 @@ struct main_ipc_handler_data
uint16_t total_bytes;
uint32_t acl_num;
int32_t acl_xfr;
-
- /* writing data, connection and state */
- uint8_t busy_writing_zone_state;
- struct xfrd_tcp *write_conn;
};
/*
@@ -78,17 +76,21 @@ void parent_handle_child_command(netio_type *netio,
* Routine used by server_child.
* Handle a command received from the parent process.
*/
-void child_handle_parent_command(netio_type *netio,
- netio_handler_type *handler, netio_event_types_type event_types);
+void child_handle_parent_command(int fd, short event, void* arg);
/*
* Routine used by xfrd
* Handle interprocess communication with parent process, read and write.
*/
-void xfrd_handle_ipc(netio_type *netio,
- netio_handler_type *handler, netio_event_types_type event_types);
+void xfrd_handle_ipc(int fd, short event, void* arg);
/* check if all children have exited in an orderly fashion and set mode */
void parent_check_all_children_exited(struct nsd* nsd);
+/** add stats to total */
+void stats_add(struct nsdst* total, struct nsdst* s);
+
+/** set event to listen to given mode, no timeout, must be added already */
+void ipc_xfrd_set_listening(struct xfrd_state* xfrd, short mode);
+
#endif /* NSD_IPC_H */
diff --git a/usr.sbin/nsd/iterated_hash.c b/usr.sbin/nsd/iterated_hash.c
index 4211f503c25..e8606a3b06c 100644
--- a/usr.sbin/nsd/iterated_hash.c
+++ b/usr.sbin/nsd/iterated_hash.c
@@ -1,7 +1,7 @@
/*
* iterated_hash.c -- nsec3 hash calculation.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/iterated_hash.h b/usr.sbin/nsd/iterated_hash.h
index 2a6bef399d0..9997e62598b 100644
--- a/usr.sbin/nsd/iterated_hash.h
+++ b/usr.sbin/nsd/iterated_hash.h
@@ -1,7 +1,7 @@
/*
* iterated_hash.h -- nsec3 hash calculation.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -10,10 +10,11 @@
#ifndef ITERATED_HASH_H
#define ITERATED_HASH_H
-#include "config.h"
#ifdef NSEC3
#include <openssl/sha.h>
+#define NSEC3_SHA1_HASH 1 /* same type code as DS hash */
+
int iterated_hash(unsigned char out[SHA_DIGEST_LENGTH],
const unsigned char *salt,int saltlength,
const unsigned char *in,int inlength,int iterations);
diff --git a/usr.sbin/nsd/mini_event.c b/usr.sbin/nsd/mini_event.c
new file mode 100644
index 00000000000..4048bcfae89
--- /dev/null
+++ b/usr.sbin/nsd/mini_event.c
@@ -0,0 +1,446 @@
+/*
+ * mini_event.c - implementation of part of libevent api, portably.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/**
+ * \file
+ * fake libevent implementation. Less broad in functionality, and only
+ * supports select(2).
+ */
+
+#include "config.h"
+#ifdef HAVE_TIME_H
+#include <time.h>
+#endif
+#include <string.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#if defined(USE_MINI_EVENT) && !defined(USE_WINSOCK)
+#ifdef HAVE_WINSOCK2_H
+#define FD_SET_T (u_int)
+#else
+#define FD_SET_T
+#endif
+
+#include <signal.h>
+#include "mini_event.h"
+#include "util.h"
+
+/** compare events in tree, based on timevalue, ptr for uniqueness */
+int
+mini_ev_cmp(const void* a, const void* b)
+{
+ const struct event* e = (const struct event*)a;
+ const struct event* f = (const struct event*)b;
+ if(e->ev_timeout.tv_sec < f->ev_timeout.tv_sec)
+ return -1;
+ if(e->ev_timeout.tv_sec > f->ev_timeout.tv_sec)
+ return 1;
+ if(e->ev_timeout.tv_usec < f->ev_timeout.tv_usec)
+ return -1;
+ if(e->ev_timeout.tv_usec > f->ev_timeout.tv_usec)
+ return 1;
+ if(e < f)
+ return -1;
+ if(e > f)
+ return 1;
+ return 0;
+}
+
+/** set time */
+static int
+settime(struct event_base* base)
+{
+ if(gettimeofday(base->time_tv, NULL) < 0) {
+ return -1;
+ }
+#ifndef S_SPLINT_S
+ *base->time_secs = (time_t)base->time_tv->tv_sec;
+#endif
+ return 0;
+}
+
+/** create event base */
+void *
+event_init(time_t* time_secs, struct timeval* time_tv)
+{
+ struct event_base* base = (struct event_base*)malloc(
+ sizeof(struct event_base));
+ if(!base)
+ return NULL;
+ memset(base, 0, sizeof(*base));
+ base->region = region_create(xalloc, free);
+ if(!base->region) {
+ free(base);
+ return NULL;
+ }
+ base->time_secs = time_secs;
+ base->time_tv = time_tv;
+ if(settime(base) < 0) {
+ event_base_free(base);
+ return NULL;
+ }
+ base->times = rbtree_create(base->region, mini_ev_cmp);
+ if(!base->times) {
+ event_base_free(base);
+ return NULL;
+ }
+ base->capfd = MAX_FDS;
+#ifdef FD_SETSIZE
+ if((int)FD_SETSIZE < base->capfd)
+ base->capfd = (int)FD_SETSIZE;
+#endif
+ base->fds = (struct event**)calloc((size_t)base->capfd,
+ sizeof(struct event*));
+ if(!base->fds) {
+ event_base_free(base);
+ return NULL;
+ }
+ base->signals = (struct event**)calloc(MAX_SIG, sizeof(struct event*));
+ if(!base->signals) {
+ event_base_free(base);
+ return NULL;
+ }
+#ifndef S_SPLINT_S
+ FD_ZERO(&base->reads);
+ FD_ZERO(&base->writes);
+#endif
+ return base;
+}
+
+/** get version */
+const char *
+event_get_version(void)
+{
+ return "mini-event-"PACKAGE_VERSION;
+}
+
+/** get polling method, select */
+const char *
+event_get_method(void)
+{
+ return "select";
+}
+
+/** call timeouts handlers, and return how long to wait for next one or -1 */
+static int
+handle_timeouts(struct event_base* base, struct timeval* now,
+ struct timeval* wait)
+{
+ struct event* p;
+ int tofired = 0;
+#ifndef S_SPLINT_S
+ wait->tv_sec = (time_t)-1;
+#endif
+
+ while((rbnode_t*)(p = (struct event*)rbtree_first(base->times))
+ !=RBTREE_NULL) {
+#ifndef S_SPLINT_S
+ if(p->ev_timeout.tv_sec > now->tv_sec ||
+ (p->ev_timeout.tv_sec==now->tv_sec &&
+ p->ev_timeout.tv_usec > now->tv_usec)) {
+ /* there is a next larger timeout. wait for it */
+ wait->tv_sec = p->ev_timeout.tv_sec - now->tv_sec;
+ if(now->tv_usec > p->ev_timeout.tv_usec) {
+ wait->tv_sec--;
+ wait->tv_usec = 1000000 - (now->tv_usec -
+ p->ev_timeout.tv_usec);
+ } else {
+ wait->tv_usec = p->ev_timeout.tv_usec
+ - now->tv_usec;
+ }
+ return tofired;
+ }
+#endif
+ /* event times out, remove it */
+ tofired = 1;
+ (void)rbtree_delete(base->times, p);
+ p->ev_flags &= ~EV_TIMEOUT;
+ (*p->ev_callback)(p->ev_fd, EV_TIMEOUT, p->ev_arg);
+ }
+ return tofired;
+}
+
+/** call select and callbacks for that */
+static int
+handle_select(struct event_base* base, struct timeval* wait)
+{
+ fd_set r, w;
+ int ret, i;
+
+#ifndef S_SPLINT_S
+ if(wait->tv_sec==(time_t)-1)
+ wait = NULL;
+#endif
+ memmove(&r, &base->reads, sizeof(fd_set));
+ memmove(&w, &base->writes, sizeof(fd_set));
+ memmove(&base->ready, &base->content, sizeof(fd_set));
+
+ if((ret = select(base->maxfd+1, &r, &w, NULL, wait)) == -1) {
+ ret = errno;
+ if(settime(base) < 0)
+ return -1;
+ errno = ret;
+ if(ret == EAGAIN || ret == EINTR)
+ return 0;
+ return -1;
+ }
+ if(settime(base) < 0)
+ return -1;
+
+ for(i=0; i<base->maxfd+1; i++) {
+ short bits = 0;
+ if(!base->fds[i] || !(FD_ISSET(i, &base->ready))) {
+ continue;
+ }
+ if(FD_ISSET(i, &r)) {
+ bits |= EV_READ;
+ ret--;
+ }
+ if(FD_ISSET(i, &w)) {
+ bits |= EV_WRITE;
+ ret--;
+ }
+ bits &= base->fds[i]->ev_flags;
+ if(bits) {
+ (*base->fds[i]->ev_callback)(base->fds[i]->ev_fd,
+ bits, base->fds[i]->ev_arg);
+ if(ret==0)
+ break;
+ }
+ }
+ return 0;
+}
+
+/** run select once */
+int
+event_base_loop(struct event_base* base, int flags)
+{
+ struct timeval wait;
+ if(!(flags & EVLOOP_ONCE))
+ return event_base_dispatch(base);
+ /* see if timeouts need handling */
+ if(handle_timeouts(base, base->time_tv, &wait))
+ return 0; /* there were timeouts, end of loop */
+ if(base->need_to_exit)
+ return 0;
+ /* do select */
+ if(handle_select(base, &wait) < 0) {
+ if(base->need_to_exit)
+ return 0;
+ return -1;
+ }
+ return 0;
+}
+
+/** run select in a loop */
+int
+event_base_dispatch(struct event_base* base)
+{
+ struct timeval wait;
+ if(settime(base) < 0)
+ return -1;
+ while(!base->need_to_exit)
+ {
+ /* see if timeouts need handling */
+ (void)handle_timeouts(base, base->time_tv, &wait);
+ if(base->need_to_exit)
+ return 0;
+ /* do select */
+ if(handle_select(base, &wait) < 0) {
+ if(base->need_to_exit)
+ return 0;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/** exit that loop */
+int
+event_base_loopexit(struct event_base* base,
+ struct timeval* ATTR_UNUSED(tv))
+{
+ base->need_to_exit = 1;
+ return 0;
+}
+
+/* free event base, free events yourself */
+void
+event_base_free(struct event_base* base)
+{
+ if(!base)
+ return;
+ if(base->times)
+ free(base->times);
+ if(base->fds)
+ free(base->fds);
+ if(base->signals)
+ free(base->signals);
+ region_destroy(base->region);
+ free(base);
+}
+
+/** set content of event */
+void
+event_set(struct event* ev, int fd, short bits,
+ void (*cb)(int, short, void *), void* arg)
+{
+ ev->node.key = ev;
+ ev->ev_fd = fd;
+ ev->ev_flags = bits;
+ ev->ev_callback = cb;
+ ev->ev_arg = arg;
+ ev->added = 0;
+}
+
+/* add event to a base */
+int
+event_base_set(struct event_base* base, struct event* ev)
+{
+ ev->ev_base = base;
+ ev->added = 0;
+ return 0;
+}
+
+/* add event to make it active, you may not change it with event_set anymore */
+int
+event_add(struct event* ev, struct timeval* tv)
+{
+ if(ev->added)
+ event_del(ev);
+ if(ev->ev_fd != -1 && ev->ev_fd >= ev->ev_base->capfd)
+ return -1;
+ if( (ev->ev_flags&(EV_READ|EV_WRITE)) && ev->ev_fd != -1) {
+ ev->ev_base->fds[ev->ev_fd] = ev;
+ if(ev->ev_flags&EV_READ) {
+ FD_SET(FD_SET_T ev->ev_fd, &ev->ev_base->reads);
+ }
+ if(ev->ev_flags&EV_WRITE) {
+ FD_SET(FD_SET_T ev->ev_fd, &ev->ev_base->writes);
+ }
+ FD_SET(FD_SET_T ev->ev_fd, &ev->ev_base->content);
+ FD_CLR(FD_SET_T ev->ev_fd, &ev->ev_base->ready);
+ if(ev->ev_fd > ev->ev_base->maxfd)
+ ev->ev_base->maxfd = ev->ev_fd;
+ }
+ if(tv && (ev->ev_flags&EV_TIMEOUT)) {
+#ifndef S_SPLINT_S
+ struct timeval* now = ev->ev_base->time_tv;
+ ev->ev_timeout.tv_sec = tv->tv_sec + now->tv_sec;
+ ev->ev_timeout.tv_usec = tv->tv_usec + now->tv_usec;
+ while(ev->ev_timeout.tv_usec > 1000000) {
+ ev->ev_timeout.tv_usec -= 1000000;
+ ev->ev_timeout.tv_sec++;
+ }
+#endif
+ (void)rbtree_insert(ev->ev_base->times, &ev->node);
+ }
+ ev->added = 1;
+ return 0;
+}
+
+/* remove event, you may change it again */
+int
+event_del(struct event* ev)
+{
+ if(ev->ev_fd != -1 && ev->ev_fd >= ev->ev_base->capfd)
+ return -1;
+ if((ev->ev_flags&EV_TIMEOUT))
+ (void)rbtree_delete(ev->ev_base->times, &ev->node);
+ if((ev->ev_flags&(EV_READ|EV_WRITE)) && ev->ev_fd != -1) {
+ ev->ev_base->fds[ev->ev_fd] = NULL;
+ FD_CLR(FD_SET_T ev->ev_fd, &ev->ev_base->reads);
+ FD_CLR(FD_SET_T ev->ev_fd, &ev->ev_base->writes);
+ FD_CLR(FD_SET_T ev->ev_fd, &ev->ev_base->ready);
+ FD_CLR(FD_SET_T ev->ev_fd, &ev->ev_base->content);
+ }
+ ev->added = 0;
+ return 0;
+}
+
+/** which base gets to handle signals */
+static struct event_base* signal_base = NULL;
+
+/** signal handler */
+static RETSIGTYPE
+sigh(int sig)
+{
+ struct event* ev;
+ if(!signal_base || sig < 0 || sig >= MAX_SIG)
+ return;
+ ev = signal_base->signals[sig];
+ if(!ev)
+ return;
+ (*ev->ev_callback)(sig, EV_SIGNAL, ev->ev_arg);
+}
+
+/** install signal handler */
+int
+signal_add(struct event* ev, struct timeval* ATTR_UNUSED(tv))
+{
+ struct sigaction action;
+ if(ev->ev_fd == -1 || ev->ev_fd >= MAX_SIG)
+ return -1;
+ signal_base = ev->ev_base;
+ ev->ev_base->signals[ev->ev_fd] = ev;
+ ev->added = 1;
+ action.sa_handler = sigh;
+ sigfillset(&action.sa_mask);
+ action.sa_flags = 0;
+ return sigaction(ev->ev_fd, &action, NULL);
+}
+
+/** remove signal handler */
+int
+signal_del(struct event* ev)
+{
+ if(ev->ev_fd == -1 || ev->ev_fd >= MAX_SIG)
+ return -1;
+ ev->ev_base->signals[ev->ev_fd] = NULL;
+ ev->added = 0;
+ return 0;
+}
+
+#else /* USE_MINI_EVENT */
+#ifndef USE_WINSOCK
+int
+mini_ev_cmp(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b))
+{
+ return 0;
+}
+#endif /* not USE_WINSOCK */
+#endif /* USE_MINI_EVENT */
diff --git a/usr.sbin/nsd/mini_event.h b/usr.sbin/nsd/mini_event.h
new file mode 100644
index 00000000000..b40983b6479
--- /dev/null
+++ b/usr.sbin/nsd/mini_event.h
@@ -0,0 +1,183 @@
+/*
+ * mini-event.h - micro implementation of libevent api, using select() only.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * This file implements part of the event(3) libevent api.
+ * The back end is only select. Max number of fds is limited.
+ * Max number of signals is limited, one handler per signal only.
+ * And one handler per fd.
+ *
+ * Although limited to select() and a max (1024) open fds, it
+ * is efficient:
+ * o dispatch call caches fd_sets to use.
+ * o handler calling takes time ~ to the number of fds.
+ * o timeouts are stored in a redblack tree, sorted, so take log(n).
+ * Timeouts are only accurate to the second (no subsecond accuracy).
+ * To avoid cpu hogging, fractional timeouts are rounded up to a whole second.
+ */
+
+#ifndef MINI_EVENT_H
+#define MINI_EVENT_H
+struct region;
+
+#if defined(USE_MINI_EVENT) && !defined(USE_WINSOCK)
+
+#ifndef HAVE_EVENT_BASE_FREE
+#define HAVE_EVENT_BASE_FREE
+#endif
+
+/** event timeout */
+#define EV_TIMEOUT 0x01
+/** event fd readable */
+#define EV_READ 0x02
+/** event fd writable */
+#define EV_WRITE 0x04
+/** event signal */
+#define EV_SIGNAL 0x08
+/** event must persist */
+#define EV_PERSIST 0x10
+
+/* needs our redblack tree */
+#include "rbtree.h"
+
+/** max number of file descriptors to support */
+#define MAX_FDS 1024
+/** max number of signals to support */
+#define MAX_SIG 32
+
+/** event base */
+struct event_base
+{
+ /** sorted by timeout (absolute), ptr */
+ rbtree_t* times;
+ /** array of 0 - maxfd of ptr to event for it */
+ struct event** fds;
+ /** max fd in use */
+ int maxfd;
+ /** capacity - size of the fds array */
+ int capfd;
+ /* fdset for read write, for fds ready, and added */
+ fd_set
+ /** fds for reading */
+ reads,
+ /** fds for writing */
+ writes,
+ /** fds determined ready for use */
+ ready,
+ /** ready plus newly added events. */
+ content;
+ /** array of 0 - maxsig of ptr to event for it */
+ struct event** signals;
+ /** if we need to exit */
+ int need_to_exit;
+ /** where to store time in seconds */
+ time_t* time_secs;
+ /** where to store time in microseconds */
+ struct timeval* time_tv;
+ /** region for allocation */
+ struct region* region;
+};
+
+/**
+ * Event structure. Has some of the event elements.
+ */
+struct event {
+ /** node in timeout rbtree */
+ rbnode_t node;
+ /** is event already added */
+ int added;
+
+ /** event base it belongs to */
+ struct event_base *ev_base;
+ /** fd to poll or -1 for timeouts. signal number for sigs. */
+ int ev_fd;
+ /** what events this event is interested in, see EV_.. above. */
+ short ev_flags;
+ /** timeout value */
+ struct timeval ev_timeout;
+
+ /** callback to call: fd, eventbits, userarg */
+ void (*ev_callback)(int, short, void *arg);
+ /** callback user arg */
+ void *ev_arg;
+};
+
+/* function prototypes (some are as they appear in event.h) */
+/** create event base */
+void *event_init(time_t* time_secs, struct timeval* time_tv);
+/** get version */
+const char *event_get_version(void);
+/** get polling method, select */
+const char *event_get_method(void);
+/** run select in a loop */
+int event_base_dispatch(struct event_base *);
+/** exit that loop */
+int event_base_loopexit(struct event_base *, struct timeval *);
+/** run select once */
+#define EVLOOP_ONCE 1
+int event_base_loop(struct event_base* base, int flags);
+/** free event base. Free events yourself */
+void event_base_free(struct event_base *);
+/** set content of event */
+void event_set(struct event *, int, short, void (*)(int, short, void *), void *);
+/** add event to a base. You *must* call this for every event. */
+int event_base_set(struct event_base *, struct event *);
+/** add event to make it active. You may not change it with event_set anymore */
+int event_add(struct event *, struct timeval *);
+/** remove event. You may change it again */
+int event_del(struct event *);
+
+/** add a timer */
+#define evtimer_add(ev, tv) event_add(ev, tv)
+/** remove a timer */
+#define evtimer_del(ev) event_del(ev)
+
+/* uses different implementation. Cannot mix fd/timeouts and signals inside
+ * the same struct event. create several event structs for that. */
+/** install signal handler */
+int signal_add(struct event *, struct timeval *);
+/** set signal event contents */
+#define signal_set(ev, x, cb, arg) \
+ event_set(ev, x, EV_SIGNAL|EV_PERSIST, cb, arg)
+/** remove signal handler */
+int signal_del(struct event *);
+
+#endif /* USE_MINI_EVENT and not USE_WINSOCK */
+
+/** compare events in tree, based on timevalue, ptr for uniqueness */
+int mini_ev_cmp(const void* a, const void* b);
+
+#endif /* MINI_EVENT_H */
diff --git a/usr.sbin/nsd/namedb.c b/usr.sbin/nsd/namedb.c
index 5ed3b31baf6..5ffb6ff10dd 100644
--- a/usr.sbin/nsd/namedb.c
+++ b/usr.sbin/nsd/namedb.c
@@ -1,7 +1,7 @@
/*
* namedb.c -- common namedb operations.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -16,14 +16,14 @@
#include <limits.h>
#include <stdio.h>
#include <string.h>
-#include <errno.h>
#include "namedb.h"
+#include "nsec3.h"
static domain_type *
-allocate_domain_info(domain_table_type *table,
- const dname_type *dname,
- domain_type *parent)
+allocate_domain_info(domain_table_type* table,
+ const dname_type* dname,
+ domain_type* parent)
{
domain_type *result;
@@ -33,70 +33,312 @@ allocate_domain_info(domain_table_type *table,
result = (domain_type *) region_alloc(table->region,
sizeof(domain_type));
- result->node.key = dname_partial_copy(
+ result->dname = dname_partial_copy(
table->region, dname, domain_dname(parent)->label_count + 1);
result->parent = parent;
- result->nextdiff = NULL;
result->wildcard_child_closest_match = result;
result->rrsets = NULL;
- result->number = 0;
+ result->usage = 0;
#ifdef NSEC3
- result->nsec3_cover = NULL;
-#ifdef FULL_PREHASH
- result->nsec3_wcard_child_cover = NULL;
- result->nsec3_ds_parent_cover = NULL;
- result->nsec3_lookup = NULL;
- result->nsec3_is_exact = 0;
- result->nsec3_ds_parent_is_exact = 0;
-#endif /* FULL_PREHASH */
-#endif /* NSEC3 */
+ result->nsec3 = NULL;
+#endif
result->is_existing = 0;
result->is_apex = 0;
- result->has_SOA = 0;
+ assert(table->numlist_last); /* it exists because root exists */
+ /* push this domain at the end of the numlist */
+ result->number = table->numlist_last->number+1;
+ result->numlist_next = NULL;
+ result->numlist_prev = table->numlist_last;
+ table->numlist_last->numlist_next = result;
+ table->numlist_last = result;
return result;
}
+#ifdef NSEC3
+void
+allocate_domain_nsec3(domain_table_type* table, domain_type* result)
+{
+ if(result->nsec3)
+ return;
+ result->nsec3 = (struct nsec3_domain_data*) region_alloc(table->region,
+ sizeof(struct nsec3_domain_data));
+ result->nsec3->nsec3_cover = NULL;
+ result->nsec3->nsec3_wcard_child_cover = NULL;
+ result->nsec3->nsec3_ds_parent_cover = NULL;
+ result->nsec3->nsec3_is_exact = 0;
+ result->nsec3->nsec3_ds_parent_is_exact = 0;
+ result->nsec3->have_nsec3_hash = 0;
+ result->nsec3->have_nsec3_wc_hash = 0;
+ result->nsec3->have_nsec3_ds_parent_hash = 0;
+ result->nsec3->prehash_prev = NULL;
+ result->nsec3->prehash_next = NULL;
+ result->nsec3->nsec3_node.key = NULL;
+ result->nsec3->hash_node.key = NULL;
+ result->nsec3->wchash_node.key = NULL;
+ result->nsec3->dshash_node.key = NULL;
+}
+#endif /* NSEC3 */
+
+/** make the domain last in the numlist, changes numbers of domains */
+static void
+numlist_make_last(domain_table_type* table, domain_type* domain)
+{
+ size_t sw;
+ domain_type* last = table->numlist_last;
+ if(domain == last)
+ return;
+ /* swap numbers with the last element */
+ sw = domain->number;
+ domain->number = last->number;
+ last->number = sw;
+ /* swap list position with the last element */
+ assert(domain->numlist_next);
+ assert(last->numlist_prev);
+ if(domain->numlist_next != last) {
+ /* case 1: there are nodes between domain .. last */
+ domain_type* span_start = domain->numlist_next;
+ domain_type* span_end = last->numlist_prev;
+ /* these assignments walk the new list from start to end */
+ if(domain->numlist_prev)
+ domain->numlist_prev->numlist_next = last;
+ last->numlist_prev = domain->numlist_prev;
+ last->numlist_next = span_start;
+ span_start->numlist_prev = last;
+ span_end->numlist_next = domain;
+ domain->numlist_prev = span_end;
+ domain->numlist_next = NULL;
+ } else {
+ /* case 2: domain and last are neighbors */
+ /* these assignments walk the new list from start to end */
+ if(domain->numlist_prev)
+ domain->numlist_prev->numlist_next = last;
+ last->numlist_prev = domain->numlist_prev;
+ last->numlist_next = domain;
+ domain->numlist_prev = last;
+ domain->numlist_next = NULL;
+ }
+ table->numlist_last = domain;
+}
+
+/** pop the biggest domain off the numlist */
+static domain_type*
+numlist_pop_last(domain_table_type* table)
+{
+ domain_type* d = table->numlist_last;
+ table->numlist_last = table->numlist_last->numlist_prev;
+ if(table->numlist_last)
+ table->numlist_last->numlist_next = NULL;
+ return d;
+}
+
+/** see if a domain is eligible to be deleted, and thus is not used */
+static int
+domain_can_be_deleted(domain_type* domain)
+{
+ domain_type* n;
+ /* it has data or it has usage, do not delete it */
+ if(domain->rrsets) return 0;
+ if(domain->usage) return 0;
+ n = domain_next(domain);
+ /* it has children domains, do not delete it */
+ if(n && domain_is_subdomain(n, domain))
+ return 0;
+ return 1;
+}
+
+#ifdef NSEC3
+/** see if domain is on the prehash list */
+int domain_is_prehash(domain_table_type* table, domain_type* domain)
+{
+ if(domain->nsec3
+ && (domain->nsec3->prehash_prev || domain->nsec3->prehash_next))
+ return 1;
+ return (table->prehash_list == domain);
+}
+
+/** remove domain node from NSEC3 tree in hash space */
+void
+zone_del_domain_in_hash_tree(rbtree_t* tree, rbnode_t* node)
+{
+ if(!node->key)
+ return;
+ rbtree_delete(tree, node->key);
+ /* note that domain is no longer in the tree */
+ node->key = NULL;
+}
+
+/** clear the prehash list */
+void prehash_clear(domain_table_type* table)
+{
+ domain_type* d = table->prehash_list, *n;
+ while(d) {
+ n = d->nsec3->prehash_next;
+ d->nsec3->prehash_prev = NULL;
+ d->nsec3->prehash_next = NULL;
+ d = n;
+ }
+ table->prehash_list = NULL;
+}
+
+/** add domain to prehash list */
+void
+prehash_add(domain_table_type* table, domain_type* domain)
+{
+ if(domain_is_prehash(table, domain))
+ return;
+ allocate_domain_nsec3(table, domain);
+ domain->nsec3->prehash_next = table->prehash_list;
+ if(table->prehash_list)
+ table->prehash_list->nsec3->prehash_prev = domain;
+ table->prehash_list = domain;
+}
+
+/** remove domain from prehash list */
+void
+prehash_del(domain_table_type* table, domain_type* domain)
+{
+ if(domain->nsec3->prehash_next)
+ domain->nsec3->prehash_next->nsec3->prehash_prev =
+ domain->nsec3->prehash_prev;
+ if(domain->nsec3->prehash_prev)
+ domain->nsec3->prehash_prev->nsec3->prehash_next =
+ domain->nsec3->prehash_next;
+ else table->prehash_list = domain->nsec3->prehash_next;
+ domain->nsec3->prehash_next = NULL;
+ domain->nsec3->prehash_prev = NULL;
+}
+#endif /* NSEC3 */
+
+/** perform domain name deletion */
+static void
+do_deldomain(namedb_type* db, domain_type* domain)
+{
+ assert(domain && domain->parent); /* exists and not root */
+ /* first adjust the number list so that domain is the last one */
+ numlist_make_last(db->domains, domain);
+ /* pop off the domain from the number list */
+ (void)numlist_pop_last(db->domains);
+
+#ifdef NSEC3
+ /* if on prehash list, remove from prehash */
+ if(domain_is_prehash(db->domains, domain))
+ prehash_del(db->domains, domain);
+
+ /* see if nsec3-nodes are used */
+ if(domain->nsec3) {
+ if(domain->nsec3->nsec3_node.key)
+ zone_del_domain_in_hash_tree(nsec3_tree_zone(db, domain)
+ ->nsec3tree, &domain->nsec3->nsec3_node);
+ if(domain->nsec3->hash_node.key)
+ zone_del_domain_in_hash_tree(nsec3_tree_zone(db, domain)
+ ->hashtree, &domain->nsec3->hash_node);
+ if(domain->nsec3->wchash_node.key)
+ zone_del_domain_in_hash_tree(nsec3_tree_zone(db, domain)
+ ->wchashtree, &domain->nsec3->wchash_node);
+ if(domain->nsec3->dshash_node.key)
+ zone_del_domain_in_hash_tree(nsec3_tree_dszone(db, domain)
+ ->dshashtree, &domain->nsec3->dshash_node);
+ region_recycle(db->domains->region, domain->nsec3,
+ sizeof(struct nsec3_domain_data));
+ }
+#endif /* NSEC3 */
+
+ /* see if this domain is someones wildcard-child-closest-match,
+ * which can only be the parent, and then it should use the
+ * one-smaller than this domain as closest-match. */
+ if(domain->parent->wildcard_child_closest_match == domain)
+ domain->parent->wildcard_child_closest_match =
+ domain_previous(domain);
+
+ /* actual removal */
+ radix_delete(db->domains->nametree, domain->rnode);
+ region_recycle(db->domains->region, (dname_type*)domain->dname,
+ dname_total_size(domain->dname));
+ region_recycle(db->domains->region, domain, sizeof(domain_type));
+}
+
+void
+domain_table_deldomain(namedb_type* db, domain_type* domain)
+{
+ while(domain_can_be_deleted(domain)) {
+ /* delete it */
+ do_deldomain(db, domain);
+ /* test parent */
+ domain = domain->parent;
+ }
+}
+
+/** clear hash tree */
+void
+hash_tree_clear(rbtree_t* tree)
+{
+ rbnode_t* n;
+ if(!tree) return;
+
+ /* note that elements are no longer in the tree */
+ for(n=rbtree_first(tree); n!=RBTREE_NULL; n=rbtree_next(n)) {
+ n->key = NULL;
+ }
+ tree->count = 0;
+ tree->root = RBTREE_NULL;
+}
+
+void hash_tree_delete(region_type* region, rbtree_t* tree)
+{
+ region_recycle(region, tree, sizeof(rbtree_t));
+}
+
+/** add domain nsec3 node to hashedspace tree */
+void zone_add_domain_in_hash_tree(region_type* region, rbtree_t** tree,
+ int (*cmpf)(const void*, const void*),
+ domain_type* domain, rbnode_t* node)
+{
+ if(!*tree)
+ *tree = rbtree_create(region, cmpf);
+ memset(node, 0, sizeof(rbnode_t));
+ node->key = domain;
+ rbtree_insert(*tree, node);
+}
+
domain_table_type *
-domain_table_create(region_type *region)
+domain_table_create(region_type* region)
{
- const dname_type *origin;
- domain_table_type *result;
- domain_type *root;
+ const dname_type* origin;
+ domain_table_type* result;
+ domain_type* root;
assert(region);
origin = dname_make(region, (uint8_t *) "", 0);
root = (domain_type *) region_alloc(region, sizeof(domain_type));
- root->node.key = origin;
+ root->dname = origin;
root->parent = NULL;
- root->nextdiff = NULL;
root->wildcard_child_closest_match = root;
root->rrsets = NULL;
root->number = 1; /* 0 is used for after header */
+ root->usage = 1; /* do not delete root, ever */
root->is_existing = 0;
root->is_apex = 0;
- root->has_SOA = 0;
+ root->numlist_prev = NULL;
+ root->numlist_next = NULL;
#ifdef NSEC3
- root->nsec3_cover = NULL;
-#ifdef FULL_PREHASH
- root->nsec3_is_exact = 0;
- root->nsec3_ds_parent_is_exact = 0;
- root->nsec3_wcard_child_cover = NULL;
- root->nsec3_ds_parent_cover = NULL;
- root->nsec3_lookup = NULL;
-#endif /* FULL_PREHASH */
-#endif /* NSEC3 */
+ root->nsec3 = NULL;
+#endif
result = (domain_table_type *) region_alloc(region,
sizeof(domain_table_type));
result->region = region;
- result->names_to_domains = rbtree_create(
- region, (int (*)(const void *, const void *)) dname_compare);
- rbtree_insert(result->names_to_domains, (rbnode_t *) root);
+ result->nametree = radix_tree_create(region);
+ root->rnode = radname_insert(result->nametree, dname_name(root->dname),
+ root->dname->name_size, root);
result->root = root;
+ result->numlist_last = root;
+#ifdef NSEC3
+ result->prehash_list = NULL;
+#endif
return result;
}
@@ -115,7 +357,9 @@ domain_table_search(domain_table_type *table,
assert(closest_match);
assert(closest_encloser);
- exact = rbtree_find_less_equal(table->names_to_domains, dname, (rbnode_t **) closest_match);
+ exact = radname_find_less_equal(table->nametree, dname_name(dname),
+ dname->name_size, (struct radnode**)closest_match);
+ *closest_match = (domain_type*)((*(struct radnode**)closest_match)->elem);
assert(*closest_match);
*closest_encloser = *closest_match;
@@ -135,11 +379,11 @@ domain_table_search(domain_table_type *table,
}
domain_type *
-domain_table_find(domain_table_type *table,
- const dname_type *dname)
+domain_table_find(domain_table_type* table,
+ const dname_type* dname)
{
- domain_type *closest_match;
- domain_type *closest_encloser;
+ domain_type* closest_match;
+ domain_type* closest_encloser;
int exact;
exact = domain_table_search(
@@ -149,12 +393,12 @@ domain_table_find(domain_table_type *table,
domain_type *
-domain_table_insert(domain_table_type *table,
- const dname_type *dname)
+domain_table_insert(domain_table_type* table,
+ const dname_type* dname)
{
- domain_type *closest_match;
- domain_type *closest_encloser;
- domain_type *result;
+ domain_type* closest_match;
+ domain_type* closest_encloser;
+ domain_type* result;
int exact;
assert(table);
@@ -172,8 +416,9 @@ domain_table_insert(domain_table_type *table,
result = allocate_domain_info(table,
dname,
closest_encloser);
- rbtree_insert(table->names_to_domains, (rbnode_t *) result);
- result->number = table->names_to_domains->count;
+ result->rnode = radname_insert(table->nametree,
+ dname_name(result->dname),
+ result->dname->name_size, result);
/*
* If the newly added domain name is larger
@@ -199,26 +444,21 @@ domain_table_insert(domain_table_type *table,
}
int
-domain_table_iterate(domain_table_type *table,
+domain_table_iterate(domain_table_type* table,
domain_table_iterator_type iterator,
- void *user_data)
+ void* user_data)
{
- const void *dname;
- void *node;
int error = 0;
-
- assert(table);
-
- RBTREE_WALK(table->names_to_domains, dname, node) {
- error += iterator((domain_type *) node, user_data);
+ struct radnode* n;
+ for(n = radix_first(table->nametree); n; n = radix_next(n)) {
+ error += iterator((domain_type*)n->elem, user_data);
}
-
return error;
}
void
-domain_add_rrset(domain_type *domain, rrset_type *rrset)
+domain_add_rrset(domain_type* domain, rrset_type* rrset)
{
#if 0 /* fast */
rrset->next = domain->rrsets;
@@ -240,9 +480,9 @@ domain_add_rrset(domain_type *domain, rrset_type *rrset)
rrset_type *
-domain_find_rrset(domain_type *domain, zone_type *zone, uint16_t type)
+domain_find_rrset(domain_type* domain, zone_type* zone, uint16_t type)
{
- rrset_type *result = domain->rrsets;
+ rrset_type* result = domain->rrsets;
while (result) {
if (result->zone == zone && rrset_rrtype(result) == type) {
@@ -254,9 +494,9 @@ domain_find_rrset(domain_type *domain, zone_type *zone, uint16_t type)
}
rrset_type *
-domain_find_any_rrset(domain_type *domain, zone_type *zone)
+domain_find_any_rrset(domain_type* domain, zone_type* zone)
{
- rrset_type *result = domain->rrsets;
+ rrset_type* result = domain->rrsets;
while (result) {
if (result->zone == zone) {
@@ -268,9 +508,9 @@ domain_find_any_rrset(domain_type *domain, zone_type *zone)
}
zone_type *
-domain_find_zone(domain_type *domain)
+domain_find_zone(domain_type* domain)
{
- rrset_type *rrset;
+ rrset_type* rrset;
while (domain) {
for (rrset = domain->rrsets; rrset; rrset = rrset->next) {
if (rrset_rrtype(rrset) == TYPE_SOA) {
@@ -282,22 +522,10 @@ domain_find_zone(domain_type *domain)
return NULL;
}
-#ifndef FULL_PREHASH
-domain_type *
-domain_find_zone_apex(domain_type *domain) {
- while (domain != NULL) {
- if (domain->has_SOA != 0)
- return domain;
- domain = domain->parent;
- }
- return NULL;
-}
-#endif /* !FULL_PREHASH */
-
zone_type *
-domain_find_parent_zone(zone_type *zone)
+domain_find_parent_zone(zone_type* zone)
{
- rrset_type *rrset;
+ rrset_type* rrset;
assert(zone);
@@ -310,7 +538,7 @@ domain_find_parent_zone(zone_type *zone)
}
domain_type *
-domain_find_ns_rrsets(domain_type *domain, zone_type *zone, rrset_type **ns)
+domain_find_ns_rrsets(domain_type* domain, zone_type* zone, rrset_type **ns)
{
while (domain && domain != zone->apex) {
*ns = domain_find_rrset(domain, zone, TYPE_NS);
@@ -324,18 +552,18 @@ domain_find_ns_rrsets(domain_type *domain, zone_type *zone, rrset_type **ns)
}
int
-domain_is_glue(domain_type *domain, zone_type *zone)
+domain_is_glue(domain_type* domain, zone_type* zone)
{
- rrset_type *unused;
- domain_type *ns_domain = domain_find_ns_rrsets(domain, zone, &unused);
+ rrset_type* unused;
+ domain_type* ns_domain = domain_find_ns_rrsets(domain, zone, &unused);
return (ns_domain != NULL &&
domain_find_rrset(ns_domain, zone, TYPE_SOA) == NULL);
}
domain_type *
-domain_wildcard_child(domain_type *domain)
+domain_wildcard_child(domain_type* domain)
{
- domain_type *wildcard_child;
+ domain_type* wildcard_child;
assert(domain);
assert(domain->wildcard_child_closest_match);
@@ -351,14 +579,14 @@ domain_wildcard_child(domain_type *domain)
}
int
-zone_is_secure(zone_type *zone)
+zone_is_secure(zone_type* zone)
{
assert(zone);
return zone->is_secure;
}
uint16_t
-rr_rrsig_type_covered(rr_type *rr)
+rr_rrsig_type_covered(rr_type* rr)
{
assert(rr->type == TYPE_RRSIG);
assert(rr->rdata_count > 0);
@@ -368,20 +596,16 @@ rr_rrsig_type_covered(rr_type *rr)
}
zone_type *
-namedb_find_zone(namedb_type *db, domain_type *domain)
+namedb_find_zone(namedb_type* db, const dname_type* dname)
{
- zone_type *zone;
-
- for (zone = db->zones; zone; zone = zone->next) {
- if (zone->apex == domain)
- break;
- }
-
- return zone;
+ struct radnode* n = radname_search(db->zonetree, dname_name(dname),
+ dname->name_size);
+ if(n) return (zone_type*)n->elem;
+ return NULL;
}
rrset_type *
-domain_find_non_cname_rrset(domain_type *domain, zone_type *zone)
+domain_find_non_cname_rrset(domain_type* domain, zone_type* zone)
{
/* find any rrset type that is not allowed next to a CNAME */
/* nothing is allowed next to a CNAME, except RRSIG, NSEC, NSEC3 */
@@ -402,251 +626,12 @@ domain_find_non_cname_rrset(domain_type *domain, zone_type *zone)
return NULL;
}
-/**
- * Create namedb.
- *
- */
-struct namedb *
-namedb_create(void)
-{
- struct namedb *db = NULL;
- region_type *region = NULL;
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- region_type *nsec3_region = NULL;
- region_type *nsec3_mod_region = NULL;
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
-#ifdef USE_MMAP_ALLOC
- region = region_create_custom(mmap_alloc, mmap_free,
- MMAP_ALLOC_CHUNK_SIZE, MMAP_ALLOC_LARGE_OBJECT_SIZE,
- MMAP_ALLOC_INITIAL_CLEANUP_SIZE, 1);
-#else /* !USE_MMAP_ALLOC */
- region = region_create_custom(xalloc, free,
- DEFAULT_CHUNK_SIZE, DEFAULT_LARGE_OBJECT_SIZE,
- DEFAULT_INITIAL_CLEANUP_SIZE, 1);
-#endif /* !USE_MMAP_ALLOC */
- if (region == NULL)
- return NULL;
-
-#ifdef NSEC3
-#ifndef FULL_PREHASH
-#ifdef USE_MMAP_ALLOC
- nsec3_region = region_create_custom(mmap_alloc, mmap_free,
- MMAP_ALLOC_CHUNK_SIZE, MMAP_ALLOC_LARGE_OBJECT_SIZE,
- MMAP_ALLOC_INITIAL_CLEANUP_SIZE, 1);
-#else /* !USE_MMAP_ALLOC */
- nsec3_region = region_create_custom(xalloc, free,
- DEFAULT_CHUNK_SIZE, DEFAULT_LARGE_OBJECT_SIZE,
- DEFAULT_INITIAL_CLEANUP_SIZE, 1);
-#endif /* !USE_MMAP_ALLOC */
- if (nsec3_region == NULL) {
- region_destroy(region);
- return NULL;
- }
-#ifdef USE_MMAP_ALLOC
- nsec3_mod_region = region_create_custom(mmap_alloc, mmap_free,
- MMAP_ALLOC_CHUNK_SIZE, MMAP_ALLOC_LARGE_OBJECT_SIZE,
- MMAP_ALLOC_INITIAL_CLEANUP_SIZE, 1);
-#else /* !USE_MMAP_ALLOC */
- nsec3_mod_region = region_create_custom(xalloc, free,
- DEFAULT_CHUNK_SIZE, DEFAULT_LARGE_OBJECT_SIZE,
- DEFAULT_INITIAL_CLEANUP_SIZE, 1);
-#endif /* !USE_MMAP_ALLOC */
- if (nsec3_mod_region == NULL) {
- region_destroy(region);
- region_destroy(nsec3_region);
- return NULL;
- }
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
- /* Make a new structure... */
- db = (namedb_type *) region_alloc(region, sizeof(namedb_type));
- db->region = region;
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- db->nsec3_region = nsec3_region;
- db->nsec3_mod_region = nsec3_mod_region;
- db->nsec3_mod_domains = NULL;
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
- db->domains = domain_table_create(region);
- db->zones = NULL;
- db->zone_count = 0;
- db->filename = NULL;
- db->fd = NULL;
- db->crc = ~0;
- db->crc_pos = 0;
- db->diff_skip = 0;
- db->diff_pos = 0;
- return db;
-}
-
-/**
- * Destroy namedb.
- *
- */
-void
-namedb_destroy(struct namedb *db)
-{
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- region_destroy(db->nsec3_mod_region);
- db->nsec3_mod_region = NULL;
- db->nsec3_mod_domains = NULL;
- region_destroy(db->nsec3_region);
- db->nsec3_region = NULL;
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
- region_destroy(db->region);
-}
-
-
-#ifdef NSEC3
-#ifndef FULL_PREHASH
-int
-zone_nsec3_domains_create(struct namedb *db, struct zone *zone)
-{
- if ((db == NULL) || (zone == NULL))
- return EINVAL;
- if (zone->nsec3_domains != NULL)
- return 0;
- zone->nsec3_domains = rbtree_create(db->nsec3_region,
- dname_compare);
- if (zone->nsec3_domains == NULL)
- return ENOMEM;
- return 0;
-}
-
-int
-zone_nsec3_domains_destroy(struct namedb *db, struct zone *zone)
-{
- rbnode_t *node;
- if ((db == NULL) || (zone == NULL))
- return EINVAL;
- if (zone->nsec3_domains == NULL)
- return 0;
-
- node = rbtree_postorder_first(zone->nsec3_domains->root);
- while (node != RBTREE_NULL) {
- struct nsec3_domain *nsec3_domain =
- (struct nsec3_domain *) node;
- node = rbtree_postorder_next(node);
-
- if (nsec3_domain->covers != NULL) {
- nsec3_domain->covers->nsec3_cover = NULL;
- }
- region_recycle(db->nsec3_region, nsec3_domain,
- sizeof(*nsec3_domain));
- }
- region_recycle(db->nsec3_region, zone->nsec3_domains,
- sizeof(*(zone->nsec3_domains)));
- zone->nsec3_domains = NULL;
- return 0;
-}
-
-
int
-namedb_add_nsec3_domain(struct namedb *db, struct domain *domain,
- struct zone *zone)
+namedb_lookup(struct namedb* db,
+ const dname_type* dname,
+ domain_type **closest_match,
+ domain_type **closest_encloser)
{
- struct nsec3_domain *nsec3_domain;
- if (zone->nsec3_domains == NULL)
- return 0;
- nsec3_domain = (struct nsec3_domain *) region_alloc(db->nsec3_region,
- sizeof(*nsec3_domain));
- if (nsec3_domain == NULL)
- return ENOMEM;
- nsec3_domain->node.key = domain_dname(domain);
- nsec3_domain->nsec3_domain = domain;
- nsec3_domain->covers = NULL;
- if (rbtree_insert(zone->nsec3_domains, (rbnode_t *) nsec3_domain) == NULL) {
- region_recycle(db->nsec3_region, nsec3_domain, sizeof(*nsec3_domain));
- }
- return 0;
+ return domain_table_search(
+ db->domains, dname, closest_match, closest_encloser);
}
-
-
-int
-namedb_del_nsec3_domain(struct namedb *db, struct domain *domain,
- struct zone *zone)
-{
- rbnode_t *node;
- struct nsec3_domain *nsec3_domain;
- int error = 0;
-
- if (zone->nsec3_domains == NULL)
- return 0;
-
- node = rbtree_delete(zone->nsec3_domains, domain_dname(domain));
- if (node == NULL)
- return 0;
-
- nsec3_domain = (struct nsec3_domain *) node;
- if (nsec3_domain->covers != NULL) {
- /*
- * It is possible that this NSEC3 domain was modified
- * due to the addition/deletion of another NSEC3 domain.
- * Make sure it gets added to the NSEC3 list later by
- * making sure it's covered domain is added to the
- * NSEC3 mod list. S64#3441
- */
- error = namedb_add_nsec3_mod_domain(db, nsec3_domain->covers);
- nsec3_domain->covers->nsec3_cover = NULL;
- nsec3_domain->covers = NULL;
- }
- region_recycle(db->nsec3_region, nsec3_domain, sizeof(*nsec3_domain));
- return error;
-}
-
-
-int
-namedb_nsec3_mod_domains_create(struct namedb *db)
-{
- if (db == NULL)
- return EINVAL;
- namedb_nsec3_mod_domains_destroy(db);
-
- db->nsec3_mod_domains = rbtree_create(db->nsec3_mod_region, dname_compare);
- if (db->nsec3_mod_domains == NULL)
- return ENOMEM;
- return 0;
-}
-
-
-int
-namedb_nsec3_mod_domains_destroy(struct namedb *db)
-{
- if (db == NULL)
- return EINVAL;
- if (db->nsec3_mod_domains == NULL)
- return 0;
- region_free_all(db->nsec3_mod_region);
- db->nsec3_mod_domains = NULL;
- return 0;
-}
-
-int
-namedb_add_nsec3_mod_domain(struct namedb *db, struct domain *domain)
-{
- struct nsec3_mod_domain *nsec3_mod_domain;
- nsec3_mod_domain = (struct nsec3_mod_domain *)
- region_alloc(db->nsec3_mod_region, sizeof(*nsec3_mod_domain));
- if (nsec3_mod_domain == NULL) {
- log_msg(LOG_ERR,
- "memory allocation failure on modified domain");
- return ENOMEM;
- }
- nsec3_mod_domain->node.key = domain_dname(domain);
- nsec3_mod_domain->domain = domain;
-
- if (rbtree_insert(db->nsec3_mod_domains, (rbnode_t *) nsec3_mod_domain) == NULL) {
- region_recycle(db->nsec3_mod_region, nsec3_mod_domain,
- sizeof(*nsec3_mod_domain));
- }
- return 0;
-}
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
diff --git a/usr.sbin/nsd/namedb.h b/usr.sbin/nsd/namedb.h
index e50986a0fa4..dc0cb3fbff7 100644
--- a/usr.sbin/nsd/namedb.h
+++ b/usr.sbin/nsd/namedb.h
@@ -1,7 +1,7 @@
/*
* namedb.h -- nsd(8) internal namespace database definitions
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -14,13 +14,12 @@
#include "dname.h"
#include "dns.h"
+#include "radtree.h"
#include "rbtree.h"
-#include "util.h"
struct zone_options;
struct nsd_options;
-
-#define NAMEDB_MAGIC "NSDdbV08"
-#define NAMEDB_MAGIC_SIZE 8
+struct udb_base;
+struct udb_ptr;
typedef union rdata_atom rdata_atom_type;
typedef struct rrset rrset_type;
@@ -35,102 +34,104 @@ typedef struct zone zone_type;
struct domain_table
{
- region_type *region;
- rbtree_t *names_to_domains;
- domain_type *root;
+ region_type* region;
+ struct radtree *nametree;
+ domain_type* root;
+ /* ptr to biggest domain.number and last in list.
+ * the root is the lowest and first in the list. */
+ domain_type *numlist_last;
+#ifdef NSEC3
+ /* the prehash list, start of the list */
+ domain_type* prehash_list;
+#endif /* NSEC3 */
};
-struct domain
-{
- rbnode_t node;
- domain_type *parent;
- domain_type *nextdiff;
- domain_type *wildcard_child_closest_match;
- rrset_type *rrsets;
#ifdef NSEC3
- domain_type *nsec3_cover; /* != NULL is exact cover */
-#ifdef FULL_PREHASH
- /* (if nsec3 chain complete) nsec_cover is always the covering nsec3
- record */
+struct nsec3_domain_data {
+ /* (if nsec3 chain complete) always the covering nsec3 record */
+ domain_type* nsec3_cover;
/* the nsec3 that covers the wildcard child of this domain. */
- domain_type *nsec3_wcard_child_cover;
+ domain_type* nsec3_wcard_child_cover;
/* for the DS case we must answer on the parent side of zone cut */
- domain_type *nsec3_ds_parent_cover;
- /* the NSEC3 domain that has a hash-base32 <= than this dname. */
- /* or NULL (no smaller one within this zone)
- * this variable is used to look up the NSEC3 record that matches
- * or covers a given b64-encoded-hash-string domain name.
- * The result of the lookup is stored in the *_cover variables.
- * The variable makes it possible to perform a rbtree lookup for
- * a name, then take this 'jump' to the previous element that contains
- * an NSEC3 record, with hopefully the correct parameters. */
- domain_type *nsec3_lookup;
-#endif /* FULL_PREHASH */
+ domain_type* nsec3_ds_parent_cover;
+ /* NSEC3 domains to prehash, prev and next on the list or cleared */
+ domain_type* prehash_prev, *prehash_next;
+ /* entry in the nsec3tree (for NSEC3s in the chain in use) */
+ rbnode_t nsec3_node;
+ /* entry in the hashtree (for precompiled domains) */
+ rbnode_t hash_node;
+ /* entry in the wchashtree (the wildcard precompile) */
+ rbnode_t wchash_node;
+ /* entry in the dshashtree (the parent ds precompile) */
+ rbnode_t dshash_node;
+
+ /* nsec3 hash */
+ uint8_t nsec3_hash[NSEC3_HASH_LEN];
+ /* nsec3 hash of wildcard before this name */
+ uint8_t nsec3_wc_hash[NSEC3_HASH_LEN];
+ /* parent-side DS hash */
+ uint8_t nsec3_ds_parent_hash[NSEC3_HASH_LEN];
+ /* if the nsec3 has is available */
+ unsigned have_nsec3_hash : 1;
+ unsigned have_nsec3_wc_hash : 1;
+ unsigned have_nsec3_ds_parent_hash : 1;
+ /* if the domain has an NSEC3 for it, use cover ptr to get it. */
+ unsigned nsec3_is_exact : 1;
+ /* same but on parent side */
+ unsigned nsec3_ds_parent_is_exact : 1;
+};
#endif /* NSEC3 */
- uint32_t number; /* Unique domain name number. */
+
+struct domain
+{
+ struct radnode* rnode;
+ const dname_type* dname;
+ domain_type* parent;
+ domain_type* wildcard_child_closest_match;
+ rrset_type* rrsets;
+#ifdef NSEC3
+ struct nsec3_domain_data* nsec3;
+#endif
+ /* double-linked list sorted by domain.number */
+ domain_type* numlist_prev, *numlist_next;
+ size_t number; /* Unique domain name number. */
+ size_t usage; /* number of ptrs to this from RRs(in rdata) and
+ from zone-apex pointers, also the root has one
+ more to make sure it cannot be deleted. */
/*
* This domain name exists (see wildcard clarification draft).
*/
unsigned is_existing : 1;
unsigned is_apex : 1;
- unsigned has_SOA : 1;
-#ifdef NSEC3
-#ifdef FULL_PREHASH
- /* if the domain has an NSEC3 for it, use cover ptr to get it. */
- unsigned nsec3_is_exact : 1;
- /* same but on parent side */
- unsigned nsec3_ds_parent_is_exact : 1;
-#endif /* FULL_PREHASH */
-#endif /* NSEC3 */
};
struct zone
{
- zone_type *next;
- domain_type *apex;
- rrset_type *soa_rrset;
- rrset_type *soa_nx_rrset; /* see bug #103 */
- rrset_type *ns_rrset;
+ struct radnode *node; /* this entry in zonetree */
+ domain_type* apex;
+ rrset_type* soa_rrset;
+ rrset_type* soa_nx_rrset; /* see bug #103 */
+ rrset_type* ns_rrset;
#ifdef NSEC3
- rr_type *nsec3_soa_rr; /* rrset with SOA bit set */
- domain_type *nsec3_last; /* last domain with nsec3, wraps */
-#ifndef FULL_PREHASH
- rbtree_t *nsec3_domains;
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
-#if defined(BIND8_STATS) && defined(USE_ZONE_STATS)
- struct nsdst st;
-#endif /* defined(BIND8_STATS) && defined(USE_ZONE_STATS) */
-
- struct zone_options *opts;
- uint32_t number;
- uint8_t* dirty; /* array of dirty-flags, per child */
+ rr_type* nsec3_param; /* NSEC3PARAM RR of chain in use or NULL */
+ domain_type* nsec3_last; /* last domain with nsec3, wraps */
+ /* in these trees, the root contains an elem ptr to the radtree* */
+ rbtree_t* nsec3tree; /* tree with relevant NSEC3 domains */
+ rbtree_t* hashtree; /* tree, hashed NSEC3precompiled domains */
+ rbtree_t* wchashtree; /* tree, wildcard hashed domains */
+ rbtree_t* dshashtree; /* tree, ds-parent-hash domains */
+#endif
+ struct zone_options* opts;
unsigned is_secure : 1; /* zone uses DNSSEC */
- unsigned updated : 1; /* zone SOA was updated */
unsigned is_ok : 1; /* zone has not expired. */
+ unsigned is_changed : 1; /* zone was changed by AXFR */
};
-#ifdef NSEC3
-#ifndef FULL_PREHASH
-struct nsec3_domain {
- rbnode_t node;
- struct domain *nsec3_domain;
- struct domain *covers;
-};
-
-struct nsec3_mod_domain {
- rbnode_t node;
- struct domain *domain;
-};
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
/* a RR in DNS */
struct rr {
- domain_type *owner;
- rdata_atom_type *rdatas;
+ domain_type* owner;
+ rdata_atom_type* rdatas;
uint32_t ttl;
uint16_t type;
uint16_t klass;
@@ -143,9 +144,9 @@ struct rr {
*/
struct rrset
{
- rrset_type *next;
- zone_type *zone;
- rr_type *rrs;
+ rrset_type* next;
+ zone_type* zone;
+ rr_type* rrs;
uint16_t rr_count;
};
@@ -157,10 +158,10 @@ struct rrset
union rdata_atom
{
/* RDATA_WF_COMPRESSED_DNAME, RDATA_WF_UNCOMPRESSED_DNAME */
- domain_type *domain;
+ domain_type* domain;
/* Default. */
- uint16_t *data;
+ uint16_t* data;
};
/*
@@ -171,8 +172,8 @@ domain_table_type *domain_table_create(region_type *region);
/*
* Search the domain table for a match and the closest encloser.
*/
-int domain_table_search(domain_table_type *table,
- const dname_type *dname,
+int domain_table_search(domain_table_type* table,
+ const dname_type* dname,
domain_type **closest_match,
domain_type **closest_encloser);
@@ -181,17 +182,17 @@ int domain_table_search(domain_table_type *table,
* root domain).
*/
static inline uint32_t
-domain_table_count(domain_table_type *table)
+domain_table_count(domain_table_type* table)
{
- return table->names_to_domains->count;
+ return table->nametree->count;
}
/*
* Find the specified dname in the domain_table. NULL is returned if
* there is no exact match.
*/
-domain_type *domain_table_find(domain_table_type *table,
- const dname_type *dname);
+domain_type* domain_table_find(domain_table_type* table,
+ const dname_type* dname);
/*
* Insert a domain name in the domain table. If the domain name is
@@ -203,6 +204,17 @@ domain_type *domain_table_find(domain_table_type *table,
domain_type *domain_table_insert(domain_table_type *table,
const dname_type *dname);
+/* put domain into nsec3 hash space tree */
+void zone_add_domain_in_hash_tree(region_type* region, rbtree_t** tree,
+ int (*cmpf)(const void*, const void*), domain_type* domain,
+ rbnode_t* node);
+void zone_del_domain_in_hash_tree(rbtree_t* tree, rbnode_t* node);
+void hash_tree_clear(rbtree_t* tree);
+void hash_tree_delete(region_type* region, rbtree_t* tree);
+void prehash_clear(domain_table_type* table);
+void prehash_add(domain_table_type* table, domain_type* domain);
+void prehash_del(domain_table_type* table, domain_type* domain);
+int domain_is_prehash(domain_table_type* table, domain_type* domain);
/*
* Iterate over all the domain names in the domain tree.
@@ -210,87 +222,80 @@ domain_type *domain_table_insert(domain_table_type *table,
typedef int (*domain_table_iterator_type)(domain_type *node,
void *user_data);
-int domain_table_iterate(domain_table_type *table,
+int domain_table_iterate(domain_table_type* table,
domain_table_iterator_type iterator,
- void *user_data);
+ void* user_data);
/*
* Add an RRset to the specified domain. Updates the is_existing flag
* as required.
*/
-void domain_add_rrset(domain_type *domain, rrset_type *rrset);
+void domain_add_rrset(domain_type* domain, rrset_type* rrset);
-rrset_type *domain_find_rrset(domain_type *domain, zone_type *zone, uint16_t type);
-rrset_type *domain_find_any_rrset(domain_type *domain, zone_type *zone);
+rrset_type* domain_find_rrset(domain_type* domain, zone_type* zone, uint16_t type);
+rrset_type* domain_find_any_rrset(domain_type* domain, zone_type* zone);
-zone_type *domain_find_zone(domain_type *domain);
-zone_type *domain_find_parent_zone(zone_type *zone);
+zone_type* domain_find_zone(domain_type* domain);
+zone_type* domain_find_parent_zone(zone_type* zone);
-#ifndef FULL_PREHASH
-domain_type *domain_find_zone_apex(domain_type *domain);
-#endif /* !FULL_PREHASH */
-domain_type *domain_find_ns_rrsets(domain_type *domain, zone_type *zone, rrset_type **ns);
+domain_type* domain_find_ns_rrsets(domain_type* domain, zone_type* zone, rrset_type **ns);
-int domain_is_glue(domain_type *domain, zone_type *zone);
+int domain_is_glue(domain_type* domain, zone_type* zone);
-rrset_type *domain_find_non_cname_rrset(domain_type *domain, zone_type *zone);
+rrset_type* domain_find_non_cname_rrset(domain_type* domain, zone_type* zone);
-domain_type *domain_wildcard_child(domain_type *domain);
+domain_type* domain_wildcard_child(domain_type* domain);
-int zone_is_secure(zone_type *zone);
+int zone_is_secure(zone_type* zone);
static inline const dname_type *
-domain_dname(domain_type *domain)
+domain_dname(domain_type* domain)
{
- return (const dname_type *) domain->node.key;
+ return domain->dname;
}
static inline domain_type *
-domain_previous(domain_type *domain)
+domain_previous(domain_type* domain)
{
- rbnode_t *prev = rbtree_previous((rbnode_t *) domain);
- return prev == RBTREE_NULL ? NULL : (domain_type *) prev;
+ struct radnode* prev = radix_prev(domain->rnode);
+ return prev == NULL ? NULL : (domain_type*)prev->elem;
}
static inline domain_type *
-domain_next(domain_type *domain)
+domain_next(domain_type* domain)
{
- rbnode_t *next = rbtree_next((rbnode_t *) domain);
- return next == RBTREE_NULL ? NULL : (domain_type *) next;
+ struct radnode* next = radix_next(domain->rnode);
+ return next == NULL ? NULL : (domain_type*)next->elem;
}
+/* easy comparison for subdomain, true if d1 is subdomain of d2. */
+static inline int domain_is_subdomain(domain_type* d1, domain_type* d2)
+{ return dname_is_subdomain(domain_dname(d1), domain_dname(d2)); }
+/* easy printout, to static buffer of dname_to_string, fqdn. */
+static inline const char* domain_to_string(domain_type* domain)
+{ return dname_to_string(domain_dname(domain), NULL); }
+
/*
* The type covered by the signature in the specified RRSIG RR.
*/
-uint16_t rr_rrsig_type_covered(rr_type *rr);
+uint16_t rr_rrsig_type_covered(rr_type* rr);
typedef struct namedb namedb_type;
struct namedb
{
- region_type *region;
-#ifdef NSEC3
-#ifndef FULL_PREHASH
- region_type *nsec3_region;
- region_type *nsec3_mod_region;
- rbtree_t *nsec3_mod_domains;
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
- domain_table_type *domains;
- zone_type *zones;
- size_t zone_count;
- char *filename;
- FILE *fd;
+ region_type* region;
+ domain_table_type* domains;
+ struct radtree* zonetree;
+ struct udb_base* udb;
/* the timestamp on the ixfr.db file */
struct timeval diff_timestamp;
- /* the CRC on the nsd.db file and position of CRC in the db file */
- uint32_t crc;
- off_t crc_pos;
/* if diff_skip=1, diff_pos contains the nsd.diff place to continue */
uint8_t diff_skip;
off_t diff_pos;
};
static inline int rdata_atom_is_domain(uint16_t type, size_t index);
+static inline int rdata_atom_is_literal_domain(uint16_t type, size_t index);
static inline domain_type *
rdata_atom_domain(rdata_atom_type atom)
@@ -311,27 +316,48 @@ rdata_atom_data(rdata_atom_type atom)
}
+/* Find the zone for the specified dname in DB. */
+zone_type *namedb_find_zone(namedb_type *db, const dname_type *dname);
/*
- * Find the zone for the specified DOMAIN in DB.
+ * Delete a domain name from the domain table. Removes dname_info node.
+ * Only deletes if usage is 0, has no rrsets and no children. Checks parents
+ * for deletion as well. Adjusts numberlist(domain.number), and
+ * wcard_child closest match.
*/
-zone_type *namedb_find_zone(namedb_type *db, domain_type *domain);
-
-/* dbcreate.c */
-struct namedb *namedb_new(const char *filename);
-int namedb_save(struct namedb *db);
-void namedb_discard(struct namedb *db);
+void domain_table_deldomain(namedb_type* db, domain_type* domain);
+/** dbcreate.c */
+int udb_write_rr(struct udb_base* udb, struct udb_ptr* z, rr_type* rr);
+void udb_del_rr(struct udb_base* udb, struct udb_ptr* z, rr_type* rr);
+int write_zone_to_udb(struct udb_base* udb, zone_type* zone, time_t mtime);
+/** marshal rdata into buffer, must be MAX_RDLENGTH in size */
+size_t rr_marshal_rdata(rr_type* rr, uint8_t* rdata, size_t sz);
/* dbaccess.c */
-int namedb_lookup (struct namedb *db,
- const dname_type *dname,
+int namedb_lookup (struct namedb* db,
+ const dname_type* dname,
domain_type **closest_match,
domain_type **closest_encloser);
/* pass number of children (to alloc in dirty array */
-struct namedb *namedb_open(const char *filename, struct nsd_options* opt,
- size_t num_children);
-void namedb_fd_close(struct namedb *db);
-void namedb_close(struct namedb *db);
+struct namedb *namedb_open(const char *filename, struct nsd_options* opt);
+void namedb_close_udb(struct namedb* db);
+void namedb_close(struct namedb* db);
+void namedb_check_zonefiles(struct namedb* db, struct nsd_options* opt,
+ struct udb_base* taskudb, struct udb_ptr* last_task);
+void namedb_check_zonefile(struct namedb* db, struct udb_base* taskudb,
+ struct udb_ptr* last_task, struct zone_options* zo);
+/** zone one zonefile into memory and revert on parse error, write to udb */
+void namedb_read_zonefile(struct namedb* db, struct zone* zone,
+ struct udb_base* taskudb, struct udb_ptr* last_task);
+void apex_rrset_checks(struct namedb* db, rrset_type* rrset,
+ domain_type* domain);
+zone_type* namedb_zone_create(namedb_type* db, const dname_type* dname,
+ struct zone_options* zopt);
+void namedb_zone_delete(namedb_type* db, zone_type* zone);
+void namedb_write_zonefile(namedb_type* db, struct zone_options* zopt);
+void namedb_write_zonefiles(namedb_type* db, struct nsd_options* options);
+int create_dirs(const char* path);
+void allocate_domain_nsec3(domain_table_type *table, domain_type *result);
static inline int
rdata_atom_is_domain(uint16_t type, size_t index)
@@ -343,6 +369,15 @@ rdata_atom_is_domain(uint16_t type, size_t index)
|| descriptor->wireformat[index] == RDATA_WF_UNCOMPRESSED_DNAME));
}
+static inline int
+rdata_atom_is_literal_domain(uint16_t type, size_t index)
+{
+ const rrtype_descriptor_type *descriptor
+ = rrtype_descriptor_by_type(type);
+ return (index < descriptor->maximum
+ && (descriptor->wireformat[index] == RDATA_WF_LITERAL_DNAME));
+}
+
static inline rdata_wireformat_type
rdata_atom_wireformat_type(uint16_t type, size_t index)
{
@@ -353,7 +388,7 @@ rdata_atom_wireformat_type(uint16_t type, size_t index)
}
static inline uint16_t
-rrset_rrtype(rrset_type *rrset)
+rrset_rrtype(rrset_type* rrset)
{
assert(rrset);
assert(rrset->rr_count > 0);
@@ -361,35 +396,11 @@ rrset_rrtype(rrset_type *rrset)
}
static inline uint16_t
-rrset_rrclass(rrset_type *rrset)
+rrset_rrclass(rrset_type* rrset)
{
assert(rrset);
assert(rrset->rr_count > 0);
return rrset->rrs[0].klass;
}
-/**
- * Allocate and initialize a struct namedb.
- * Returns a pointer to a valid struct namedb or NULL on failure.
- */
-struct namedb * namedb_create(void);
-
-/**
- * Destroy a struct namedb created using the namedb_create function.
- * Frees all regions associated with the namedb structure.
- */
-void namedb_destroy(struct namedb *db);
-
-#ifdef NSEC3
-#ifndef FULL_PREHASH
-int zone_nsec3_domains_create(struct namedb *db, struct zone *zone);
-int zone_nsec3_domains_destroy(struct namedb *db, struct zone *zone);
-int namedb_add_nsec3_domain(struct namedb *db, struct domain *domain, struct zone *zone);
-int namedb_del_nsec3_domain(struct namedb *db, struct domain *domain, struct zone *zone);
-int namedb_nsec3_mod_domains_create(struct namedb *db);
-int namedb_nsec3_mod_domains_destroy(struct namedb *db);
-int namedb_add_nsec3_mod_domain(struct namedb *db, struct domain *domain);
-#endif /* !FULL_PREHASH */
-#endif /* NSEC3 */
-
#endif
diff --git a/usr.sbin/nsd/netio.c b/usr.sbin/nsd/netio.c
index 2c64b6d1f67..ad8ee16ee60 100644
--- a/usr.sbin/nsd/netio.c
+++ b/usr.sbin/nsd/netio.c
@@ -1,7 +1,7 @@
/*
* netio.c -- network I/O support.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -151,32 +151,7 @@ netio_dispatch(netio_type *netio, const struct timespec *timeout, const sigset_t
max_fd = handler->fd;
}
if (handler->event_types & NETIO_EVENT_READ) {
- extern int slowaccept;
- extern struct timespec slowaccept_timeout;
-
- if ((handler->event_types & NETIO_EVENT_ACCEPT) && slowaccept) {
- if (timespec_compare(&slowaccept_timeout, netio_current_time(netio)) < 0) {
- slowaccept = 0;
- }
- if (slowaccept) {
- /** Timeout after slowaccept timeout. */
- struct timespec relative;
- relative.tv_sec = slowaccept_timeout.tv_sec;
- relative.tv_nsec = slowaccept_timeout.tv_nsec;
- timespec_subtract(&relative, netio_current_time(netio));
- if (!have_timeout ||
- timespec_compare(&relative, &minimum_timeout) < 0) {
- have_timeout = 1;
- minimum_timeout.tv_sec = relative.tv_sec;
- minimum_timeout.tv_nsec = relative.tv_nsec;
- }
- } else {
- FD_SET(handler->fd, &readfds);
- }
- } else {
- /* Not accept event or not slow accept */
- FD_SET(handler->fd, &readfds);
- }
+ FD_SET(handler->fd, &readfds);
}
if (handler->event_types & NETIO_EVENT_WRITE) {
FD_SET(handler->fd, &writefds);
diff --git a/usr.sbin/nsd/netio.h b/usr.sbin/nsd/netio.h
index c6686afc26f..c8299b97adb 100644
--- a/usr.sbin/nsd/netio.h
+++ b/usr.sbin/nsd/netio.h
@@ -1,7 +1,7 @@
/*
* netio.h -- network I/O support.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -50,8 +50,6 @@
#include "region-allocator.h"
-#define NETIO_SLOW_ACCEPT_TIMEOUT 2 /* in seconds */
-
/*
* The type of events a handler is interested in. These can be OR'ed
* together to specify multiple event types.
@@ -62,7 +60,6 @@ enum netio_event_types {
NETIO_EVENT_WRITE = 2,
NETIO_EVENT_EXCEPT = 4,
NETIO_EVENT_TIMEOUT = 8,
- NETIO_EVENT_ACCEPT = 16
};
typedef enum netio_event_types netio_event_types_type;
diff --git a/usr.sbin/nsd/nsd-control-setup.sh.in b/usr.sbin/nsd/nsd-control-setup.sh.in
new file mode 100755
index 00000000000..394afb40c41
--- /dev/null
+++ b/usr.sbin/nsd/nsd-control-setup.sh.in
@@ -0,0 +1,160 @@
+#!/bin/sh
+#
+# nsd-control-setup.sh - set up SSL certificates for nsd-control
+#
+# Copyright (c) 2011, NLnet Labs. All rights reserved.
+#
+# This software is open source.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# Neither the name of the NLNET LABS nor the names of its contributors may
+# be used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# settings:
+
+# directory for files
+DESTDIR=@configdir@
+
+# issuer and subject name for certificates
+SERVERNAME=nsd
+CLIENTNAME=nsd-control
+
+# validity period for certificates
+DAYS=7200
+
+# size of keys in bits
+BITS=1536
+
+# hash algorithm
+HASH=sha256
+
+# base name for nsd server keys
+SVR_BASE=nsd_server
+
+# base name for nsd-control keys
+CTL_BASE=nsd_control
+
+# we want -rw-r--- access (say you run this as root: grp=yes (server), all=no).
+umask 0026
+
+# end of options
+
+# functions:
+error ( ) {
+ echo "$0 fatal error: $1"
+ exit 1
+}
+
+# check arguments:
+while test $# -ne 0; do
+ case $1 in
+ -d)
+ if test $# -eq 1; then error "need argument for -d"; fi
+ DESTDIR="$2"
+ shift
+ ;;
+ *)
+ echo "nsd-control-setup.sh - setup SSL keys for nsd-control"
+ echo " -d dir use directory to store keys and certificates."
+ echo " default: $DESTDIR"
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+# go!:
+echo "setup in directory $DESTDIR"
+cd "$DESTDIR" || error "could not cd to $DESTDIR"
+
+# create certificate keys; do not recreate if they already exist.
+if test -f $SVR_BASE.key; then
+ echo "$SVR_BASE.key exists"
+else
+ echo "generating $SVR_BASE.key"
+ openssl genrsa -out $SVR_BASE.key $BITS || error "could not genrsa"
+fi
+if test -f $CTL_BASE.key; then
+ echo "$CTL_BASE.key exists"
+else
+ echo "generating $CTL_BASE.key"
+ openssl genrsa -out $CTL_BASE.key $BITS || error "could not genrsa"
+fi
+
+# create self-signed cert for server
+cat >request.cfg <<EOF
+[req]
+default_bits=$BITS
+default_md=$HASH
+prompt=no
+distinguished_name=req_distinguished_name
+
+[req_distinguished_name]
+commonName=$SERVERNAME
+EOF
+test -f request.cfg || error "could not create request.cfg"
+
+echo "create $SVR_BASE.pem (self signed certificate)"
+openssl req -key $SVR_BASE.key -config request.cfg -new -x509 -days $DAYS -out $SVR_BASE.pem || error "could not create $SVR_BASE.pem"
+# create trusted usage pem
+openssl x509 -in $SVR_BASE.pem -addtrust serverAuth -out $SVR_BASE"_trust.pem"
+
+# create client request and sign it, piped
+cat >request.cfg <<EOF
+[req]
+default_bits=$BITS
+default_md=$HASH
+prompt=no
+distinguished_name=req_distinguished_name
+
+[req_distinguished_name]
+commonName=$CLIENTNAME
+EOF
+test -f request.cfg || error "could not create request.cfg"
+
+echo "create $CTL_BASE.pem (signed client certificate)"
+openssl req -key $CTL_BASE.key -config request.cfg -new | openssl x509 -req -days $DAYS -CA $SVR_BASE"_trust.pem" -CAkey $SVR_BASE.key -CAcreateserial -$HASH -out $CTL_BASE.pem
+test -f $CTL_BASE.pem || error "could not create $CTL_BASE.pem"
+# create trusted usage pem
+# openssl x509 -in $CTL_BASE.pem -addtrust clientAuth -out $CTL_BASE"_trust.pem"
+
+# see details with openssl x509 -noout -text < $SVR_BASE.pem
+# echo "create $CTL_BASE""_browser.pfx (web client certificate)"
+# echo "create webbrowser PKCS#12 .PFX certificate file. In Firefox import in:"
+# echo "preferences - advanced - encryption - view certificates - your certs"
+# echo "empty password is used, simply click OK on the password dialog box."
+# openssl pkcs12 -export -in $CTL_BASE"_trust.pem" -inkey $CTL_BASE.key -name "nsd remote control client cert" -out $CTL_BASE"_browser.pfx" -password "pass:" || error "could not create browser certificate"
+
+# remove unused permissions
+chmod o-rw $SVR_BASE.pem $SVR_BASE.key $CTL_BASE.pem $CTL_BASE.key
+
+# remove crap
+rm -f request.cfg
+rm -f $CTL_BASE"_trust.pem" $SVR_BASE"_trust.pem" $SVR_BASE"_trust.srl"
+
+echo "Setup success. Certificates created. Enable in nsd.conf file to use"
+
+exit 0
diff --git a/usr.sbin/nsd/nsd-control.8.in b/usr.sbin/nsd/nsd-control.8.in
new file mode 100644
index 00000000000..bf610f1097b
--- /dev/null
+++ b/usr.sbin/nsd/nsd-control.8.in
@@ -0,0 +1,245 @@
+.TH "nsd\-control" "8" "Oct 29, 2013" "NLnet Labs" "nsd 4.0.0"
+.\" Copyright (c) 2011, NLnet Labs. All rights reserved.
+.\" See LICENSE for the license.
+.SH "NAME"
+.LP
+.B nsd\-control,
+.B nsd\-control\-setup
+\- NSD remote server control utility.
+.SH "SYNOPSIS"
+.B nsd\-control
+.RB [ \-c
+.IR cfgfile ]
+.RB [ \-s
+.IR server ]
+.IR command
+.SH "DESCRIPTION"
+.B nsd\-control
+performs remote administration on the \fInsd\fR(8) DNS server. It reads
+the configuration file, contacts the nsd server over SSL, sends the
+command and displays the result.
+.P
+The available options are:
+.TP
+.B \-h
+Show the version and commandline option help.
+.TP
+.B \-c \fIcfgfile
+The config file to read with settings. If not given the default
+config file @nsdconfigfile@ is used.
+.TP
+.B \-s \fIserver[@port]
+IPv4 or IPv6 address of the server to contact. If not given, the
+address is read from the config file.
+.SH "COMMANDS"
+There are several commands that the server understands.
+.TP
+.B start
+Start the server. Simply execs \fInsd\fR(8). The nsd executable
+is searched for in the \fBPATH\fR set in the environment. It is started
+with the config file specified using \fI\-c\fR or the default config file.
+.TP
+.B stop
+Stop the server. The server daemon exits.
+.TP
+.B reload [<zone>]
+Reload zonefiles and reopen logfile. Without argument reads changed
+zonefiles. With argument reads the zonefile for the given zone and
+loads it.
+.TP
+.B reconfig
+Reload nsd.conf and apply changes to TSIG keys and configuration patterns,
+and apply the changes to add and remove zones that are mentioned in the config.
+Other changes are not applied, such as listening ip address and port and chroot.
+The pattern updates means that the configuration options for
+zones (request\-xfr, zonefile, notify, ...) are updated. Also new
+patterns are available for use with the addzone command.
+.TP
+.B repattern
+Same as the reconfig option.
+.TP
+.B log_reopen
+Reopen the logfile, for log rotate that wants to move the logfile away
+and create a new logfile. The log can also be reopened with kill \-HUP
+(which also reloads all zonefiles).
+.TP
+.B status
+Display server status. Exit code 3 if not running (the connection to the
+port is refused), 1 on error, 0 if running.
+.TP
+.B stats
+Output a sequence of name=value lines with statistics information, requires
+NSD to be compiled with this option enabled.
+.TP
+.B stats_noreset
+Same as stats, but does not zero the counters.
+.TP
+.B addzone <zone name> <pattern name>
+Add a new zone to the running server. The zone is added to the zonelist
+file on disk, so it stays after a restart. The pattern name determines
+the options for the new zone. For slave zones a zone transfer is
+immediately attempted. For zones with a zonefile, the zone file is
+attempted to be read in.
+.TP
+.B delzone <zone name>
+Remove the zone from the running server. The zone is removed from the
+zonelist file on disk, from the nsd.db file and from the memory. If it
+had a zonefile, this remains (but may be outdated). Zones configured
+inside nsd.conf itself cannot be removed this way because the daemon
+does not write to the nsd.conf file, you need to add such zones to the
+zonelist file to be able to delete them with the delzone command.
+.TP
+.B write [<zone>]
+Write zonefiles to disk, or the given zonefile to disk. Zones that have
+changed (via AXFR or IXFR) are written, or if the zonefile has not been
+created yet then it is created. Directory components of the zonefile
+path are created if necessary.
+.TP
+.B notify [<zone>]
+Send NOTIFY messages to slave servers. Sends to the IP addresses
+configured in the 'notify:' lists for the master zones hosted on this
+server. Usually NSD sends NOTIFY messages right away when a master zone
+serial is updated. If a zone is given, notifies are sent for that zone.
+These slave servers are supposed to initiate a zone transfer request
+later (to this server or another master), this can be allowed via
+the 'provide\-xfr:' acl list configuration.
+.TP
+.B transfer [<zone>]
+Attempt to update slave zones that are hosted on this server by contacting
+the masters. The masters are configured via 'request\-xfr:' lists.
+If a zone is given, that zone is updated. Usually NSD receives a NOTIFY
+from the masters (configured via 'allow\-notify:' acl list) that a new zone
+serial has to be transferred.
+.TP
+.B force_transfer [<zone>]
+Force update slave zones that are hosted on this server. Even if the
+master hosts the same serial number of the zone, a full AXFR is performed
+to fetch it. If you want to use IXFR and check that the serial number
+increases, use the 'transfer' command.
+.TP
+.B zonestatus [<zone>]
+Print state of the zone, the serial numbers and since when they have
+been acquired. Also prints the notify action (to which server), and
+zone transfer (and from which master) if there is activity right now.
+.TP
+.B serverpid
+Prints the PID of the server process. This is used for statistics (and
+only works when NSD is compiled with statistics enabled). This pid is
+not for sending unix signals, use the pid from nsd.pid for that, that pid
+is also stable.
+.TP
+.B verbosity <number>
+Change logging verbosity.
+.SH "EXIT CODE"
+The nsd\-control program exits with status code 1 on error, 0 on success.
+.SH "SET UP"
+The setup requires a self\-signed certificate and private keys for both
+the server and client. The script \fInsd\-control\-setup\fR generates
+these in the default run directory, or with \-d in another directory.
+If you change the access control permissions on the key files you can decide
+who can use nsd\-control, by default owner and group but not all users.
+The script preserves private keys present in the directory.
+After running the script as root, turn on \fBcontrol\-enable\fR in
+\fInsd.conf\fR.
+.SH "STATISTIC COUNTERS"
+The \fIstats\fR command shows a number of statistic counters.
+.TP
+.I num.queries
+number of queries received (the tcp and udp queries added up).
+.TP
+.I serverX.queries
+number of queries handled by the server process. The number of
+server processes is set with the config statement \fBserver\-count\fR.
+.TP
+.I time.boot
+uptime in seconds since the server was started. With fractional seconds.
+.TP
+.I time.elapsed
+time since the last stats report, in seconds. With fractional seconds.
+Can be zero if polled quickly and the previous stats command resets the
+counters, so that the next gets a fully zero, and zero elapsed time, report.
+.TP
+.I size.db.disk
+size of nsd.db on disk, in bytes.
+.TP
+.I size.db.mem
+size of the DNS database in memory, in bytes.
+.TP
+.I size.xfrd.mem
+size of memory for zone transfers and notifies in xfrd process, excludes
+TSIG data, in bytes.
+.TP
+.I size.config.disk
+size of zonelist file on disk, excludes the nsd.conf size, in bytes.
+.TP
+.I size.config.mem
+size of config data in memory, kept twice in server and xfrd process,
+in bytes.
+.TP
+.I num.type.X
+number of queries with this query type.
+.TP
+.I num.opcode.X
+number of queries with this opcode.
+.TP
+.I num.class.X
+number of queries with this query class.
+.TP
+.I num.rcode.X
+number of answers that carried this return code.
+.TP
+.I num.edns
+number of queries with EDNS OPT.
+.TP
+.I num.ednserr
+number of queries which failed EDNS parse.
+.TP
+.I num.udp
+number of queries over UDP ip4.
+.TP
+.I num.udp6
+number of queries over UDP ip6.
+.TP
+.I num.tcp
+number of connections over TCP ip4.
+.TP
+.I num.tcp6
+number of connections over TCP ip6.
+.TP
+.I num.answer_wo_aa
+number of answers with NOERROR rcode and without AA flag, this includes the referrals.
+.TP
+.I num.rxerr
+number of queries for which the receive failed.
+.TP
+.I num.txerr
+number of answers for which the transmit failed.
+.TP
+.I num.raxfr
+number of AXFR requests from clients (that got served with reply).
+.TP
+.I num.truncated
+number of answers with TC flag set.
+.TP
+.I num.dropped
+number of queries that were dropped because they failed sanity check.
+.TP
+.I zone.master
+number of master zones served. These are zones with no 'request\-xfr:'
+entries.
+.TP
+.I zone.slave
+number of slave zones served. These are zones with 'request\-xfr'
+entries.
+.SH "FILES"
+.TP
+.I @nsdconfigfile@
+nsd configuration file.
+.TP
+.I @configdir@
+directory with private keys (nsd_server.key and nsd_control.key) and
+self\-signed certificates (nsd_server.pem and nsd_control.pem).
+.SH "SEE ALSO"
+\fInsd.conf\fR(5),
+\fInsd\fR(8),
+\fInsd\-checkconf\fR(8)
diff --git a/usr.sbin/nsd/nsd-control.c b/usr.sbin/nsd/nsd-control.c
new file mode 100644
index 00000000000..e9551851f17
--- /dev/null
+++ b/usr.sbin/nsd/nsd-control.c
@@ -0,0 +1,415 @@
+/*
+ * nsd-control.c - remote control utility for nsd.
+ *
+ * Copyright (c) 2011, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * The remote control utility contacts the nsd server over ssl and
+ * sends the command, receives the answer, and displays the result
+ * from the commandline.
+ */
+
+#include "config.h"
+#ifdef HAVE_SSL
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+#ifdef HAVE_OPENSSL_SSL_H
+#include <openssl/ssl.h>
+#endif
+#ifdef HAVE_OPENSSL_ERR_H
+#include <openssl/err.h>
+#endif
+#ifdef HAVE_OPENSSL_RAND_H
+#include <openssl/rand.h>
+#endif
+#include "util.h"
+#include "tsig.h"
+#include "options.h"
+
+/** Give nsd-control usage, and exit (1). */
+static void
+usage()
+{
+ printf("Usage: nsd-control [options] command\n");
+ printf(" Remote control utility for nsd server.\n");
+ printf("Version %s. Report bugs to <%s>.\n",
+ PACKAGE_VERSION, PACKAGE_BUGREPORT);
+ printf("Options:\n");
+ printf(" -c file config file, default is %s\n", CONFIGFILE);
+ printf(" -s ip[@port] server address, if omitted config is used.\n");
+ printf(" -h show this usage help.\n");
+ printf("Commands:\n");
+ printf(" start start server; runs nsd(8)\n");
+ printf(" stop stops the server\n");
+ printf(" reload [<zone>] reload modified zonefiles from disk\n");
+ printf(" reconfig reload the config file\n");
+ printf(" repattern the same as reconfig\n");
+ printf(" log_reopen reopen logfile (for log rotate)\n");
+ printf(" status display status of server\n");
+ printf(" stats print statistics\n");
+ printf(" stats_noreset peek at statistics\n");
+ printf(" addzone <name> <pattern> add a new zone\n");
+ printf(" delzone <name> remove a zone\n");
+ printf(" write [<zone>] write changed zonefiles to disk\n");
+ printf(" notify [<zone>] send NOTIFY messages to slave servers\n");
+ printf(" transfer [<zone>] try to update slave zones to newer serial\n");
+ printf(" force_transfer [<zone>] update slave zones with AXFR, no serial check\n");
+ printf(" zonestatus [<zone>] print state, serial, activity\n");
+ printf(" serverpid get pid of server process\n");
+ printf(" verbosity <number> change logging detail\n");
+ exit(1);
+}
+
+/** exit with ssl error */
+static void ssl_err(const char* s)
+{
+ fprintf(stderr, "error: %s\n", s);
+ ERR_print_errors_fp(stderr);
+ exit(1);
+}
+
+/** setup SSL context */
+static SSL_CTX*
+setup_ctx(nsd_options_t* cfg)
+{
+ char* s_cert, *c_key, *c_cert;
+ SSL_CTX* ctx;
+
+ s_cert = cfg->server_cert_file;
+ c_key = cfg->control_key_file;
+ c_cert = cfg->control_cert_file;
+
+ /* filenames may be relative to zonesdir */
+ if (cfg->zonesdir && cfg->zonesdir[0] &&
+ (s_cert[0] != '/' || c_key[0] != '/' || c_cert[0] != '/')) {
+ if(chdir(cfg->zonesdir))
+ ssl_err("could not chdir to zonesdir");
+ }
+
+ ctx = SSL_CTX_new(SSLv23_client_method());
+ if(!ctx)
+ ssl_err("could not allocate SSL_CTX pointer");
+ if(!(SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2))
+ ssl_err("could not set SSL_OP_NO_SSLv2");
+ if(!SSL_CTX_use_certificate_file(ctx,c_cert,SSL_FILETYPE_PEM) ||
+ !SSL_CTX_use_PrivateKey_file(ctx,c_key,SSL_FILETYPE_PEM)
+ || !SSL_CTX_check_private_key(ctx))
+ ssl_err("Error setting up SSL_CTX client key and cert");
+ if (SSL_CTX_load_verify_locations(ctx, s_cert, NULL) != 1)
+ ssl_err("Error setting up SSL_CTX verify, server cert");
+ SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL);
+
+ return ctx;
+}
+
+/** contact the server with TCP connect */
+static int
+contact_server(const char* svr, nsd_options_t* cfg, int statuscmd)
+{
+#ifdef INET6
+ struct sockaddr_storage addr;
+#else
+ struct sockaddr_in addr;
+#endif
+ socklen_t addrlen;
+ int fd;
+ int port = cfg->control_port;
+ /* use svr or a config entry */
+ if(!svr) {
+ if(cfg->control_interface)
+ svr = cfg->control_interface->address;
+ else svr = "127.0.0.1";
+ /* config 0 addr (everything), means ask localhost */
+ if(strcmp(svr, "0.0.0.0") == 0)
+ svr = "127.0.0.1";
+ else if(strcmp(svr, "::0") == 0 ||
+ strcmp(svr, "0::0") == 0 ||
+ strcmp(svr, "0::") == 0 ||
+ strcmp(svr, "::") == 0)
+ svr = "::1";
+ }
+ if(strchr(svr, '@')) {
+ char* ps = strchr(svr, '@');
+ *ps++ = 0;
+ port = atoi(ps);
+ if(!port) {
+ fprintf(stderr, "could not parse port %s\n", ps);
+ exit(1);
+ }
+ }
+ if(strchr(svr, ':')) {
+ struct sockaddr_in6 sa;
+ addrlen = (socklen_t)sizeof(struct sockaddr_in6);
+ memset(&sa, 0, addrlen);
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = (in_port_t)htons((uint16_t)port);
+ if(inet_pton((int)sa.sin6_family, svr, &sa.sin6_addr) <= 0) {
+ fprintf(stderr, "could not parse IP: %s\n", svr);
+ exit(1);
+ }
+ memcpy(&addr, &sa, addrlen);
+ } else { /* ip4 */
+ struct sockaddr_in sa;
+ addrlen = (socklen_t)sizeof(struct sockaddr_in);
+ memset(&sa, 0, addrlen);
+ sa.sin_family = AF_INET;
+ sa.sin_port = (in_port_t)htons((uint16_t)port);
+ if(inet_pton((int)sa.sin_family, svr, &sa.sin_addr) <= 0) {
+ fprintf(stderr, "could not parse IP: %s\n", svr);
+ exit(1);
+ }
+ memcpy(&addr, &sa, addrlen);
+ }
+
+ fd = socket(strchr(svr, ':')?AF_INET6:AF_INET, SOCK_STREAM, 0);
+ if(fd == -1) {
+ fprintf(stderr, "socket: %s\n", strerror(errno));
+ exit(1);
+ }
+ if(connect(fd, (struct sockaddr*)&addr, addrlen) < 0) {
+ fprintf(stderr, "error: connect (%s@%d): %s\n", svr, port,
+ strerror(errno));
+ if(errno == ECONNREFUSED && statuscmd) {
+ printf("nsd is stopped\n");
+ exit(3);
+ }
+ exit(1);
+ }
+ return fd;
+}
+
+/** setup SSL on the connection */
+static SSL*
+setup_ssl(SSL_CTX* ctx, int fd)
+{
+ SSL* ssl;
+ X509* x;
+ int r;
+
+ ssl = SSL_new(ctx);
+ if(!ssl)
+ ssl_err("could not SSL_new");
+ SSL_set_connect_state(ssl);
+ (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
+ if(!SSL_set_fd(ssl, fd))
+ ssl_err("could not SSL_set_fd");
+ while(1) {
+ ERR_clear_error();
+ if( (r=SSL_do_handshake(ssl)) == 1)
+ break;
+ r = SSL_get_error(ssl, r);
+ if(r != SSL_ERROR_WANT_READ && r != SSL_ERROR_WANT_WRITE)
+ ssl_err("SSL handshake failed");
+ /* wants to be called again */
+ }
+
+ /* check authenticity of server */
+ if(SSL_get_verify_result(ssl) != X509_V_OK)
+ ssl_err("SSL verification failed");
+ x = SSL_get_peer_certificate(ssl);
+ if(!x)
+ ssl_err("Server presented no peer certificate");
+ X509_free(x);
+ return ssl;
+}
+
+/** send stdin to server */
+static void
+send_file(SSL* ssl, FILE* in, char* buf, size_t sz)
+{
+ while(fgets(buf, (int)sz, in)) {
+ if(SSL_write(ssl, buf, (int)strlen(buf)) <= 0)
+ ssl_err("could not SSL_write contents");
+ }
+}
+
+/** send command and display result */
+static int
+go_cmd(SSL* ssl, int argc, char* argv[])
+{
+ char pre[10];
+ const char* space=" ";
+ const char* newline="\n";
+ int was_error = 0, first_line = 1;
+ int r, i;
+ char buf[1024];
+ snprintf(pre, sizeof(pre), "NSDCT%d ", NSD_CONTROL_VERSION);
+ if(SSL_write(ssl, pre, (int)strlen(pre)) <= 0)
+ ssl_err("could not SSL_write");
+ for(i=0; i<argc; i++) {
+ if(SSL_write(ssl, space, (int)strlen(space)) <= 0)
+ ssl_err("could not SSL_write");
+ if(SSL_write(ssl, argv[i], (int)strlen(argv[i])) <= 0)
+ ssl_err("could not SSL_write");
+ }
+ if(SSL_write(ssl, newline, (int)strlen(newline)) <= 0)
+ ssl_err("could not SSL_write");
+
+ /* TODO remove or use file upload */
+ if(argc == 1 && strcmp(argv[0], "load_cache") == 0) {
+ send_file(ssl, stdin, buf, sizeof(buf));
+ }
+
+ while(1) {
+ ERR_clear_error();
+ if((r = SSL_read(ssl, buf, (int)sizeof(buf)-1)) <= 0) {
+ if(SSL_get_error(ssl, r) == SSL_ERROR_ZERO_RETURN) {
+ /* EOF */
+ break;
+ }
+ ssl_err("could not SSL_read");
+ }
+ buf[r] = 0;
+ printf("%s", buf);
+ if(first_line && strncmp(buf, "error", 5) == 0)
+ was_error = 1;
+ first_line = 0;
+ }
+ return was_error;
+}
+
+/** go ahead and read config, contact server and perform command and display */
+static int
+go(const char* cfgfile, char* svr, int argc, char* argv[])
+{
+ nsd_options_t* opt;
+ int fd, ret;
+ SSL_CTX* ctx;
+ SSL* ssl;
+
+ /* read config */
+ if(!(opt = nsd_options_create(region_create(xalloc, free)))) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ tsig_init(opt->region);
+ if(!parse_options_file(opt, cfgfile, NULL, NULL)) {
+ fprintf(stderr, "could not read config file\n");
+ exit(1);
+ }
+ if(!opt->control_enable)
+ fprintf(stderr, "warning: control-enable is 'no' in the config file.\n");
+ ctx = setup_ctx(opt);
+
+ /* contact server */
+ fd = contact_server(svr, opt, argc>0&&strcmp(argv[0],"status")==0);
+ ssl = setup_ssl(ctx, fd);
+
+ /* send command */
+ ret = go_cmd(ssl, argc, argv);
+
+ SSL_free(ssl);
+ close(fd);
+ SSL_CTX_free(ctx);
+ region_destroy(opt->region);
+ return ret;
+}
+
+/** getopt global, in case header files fail to declare it. */
+extern int optind;
+/** getopt global, in case header files fail to declare it. */
+extern char* optarg;
+
+/** Main routine for nsd-control */
+int main(int argc, char* argv[])
+{
+ int c;
+ const char* cfgfile = CONFIGFILE;
+ char* svr = NULL;
+#ifdef USE_WINSOCK
+ int r;
+ WSADATA wsa_data;
+#endif
+ log_init("nsd-control");
+
+ ERR_load_crypto_strings();
+ ERR_load_SSL_strings();
+ OpenSSL_add_all_algorithms();
+ (void)SSL_library_init();
+
+ if(!RAND_status()) {
+ /* try to seed it */
+ unsigned char buf[256];
+ unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid();
+ size_t i;
+ v = seed;
+ for(i=0; i<256/sizeof(v); i++) {
+ memmove(buf+i*sizeof(v), &v, sizeof(v));
+ v = v*seed + (unsigned int)i;
+ }
+ RAND_seed(buf, 256);
+ fprintf(stderr, "warning: no entropy, seeding openssl PRNG with time\n");
+ }
+
+ /* parse the options */
+ while( (c=getopt(argc, argv, "c:s:h")) != -1) {
+ switch(c) {
+ case 'c':
+ cfgfile = optarg;
+ break;
+ case 's':
+ svr = optarg;
+ break;
+ case '?':
+ case 'h':
+ default:
+ usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+ if(argc == 0)
+ usage();
+ if(argc >= 1 && strcmp(argv[0], "start")==0) {
+ if(execl(NSD_START_PATH, "nsd", "-c", cfgfile,
+ (char*)NULL) < 0) {
+ fprintf(stderr, "could not exec %s: %s\n",
+ NSD_START_PATH, strerror(errno));
+ exit(1);
+ }
+ }
+
+ return go(cfgfile, svr, argc, argv);
+}
+
+#else /* HAVE_SSL */
+int main(void)
+{
+ printf("error: NSD was compiled without SSL.\n");
+ return 1;
+}
+#endif /* HAVE_SSL */
diff --git a/usr.sbin/nsd/nsd-mem.c b/usr.sbin/nsd/nsd-mem.c
new file mode 100644
index 00000000000..0981eafef73
--- /dev/null
+++ b/usr.sbin/nsd/nsd-mem.c
@@ -0,0 +1,360 @@
+/*
+ * nsd-mem.c -- nsd-mem(8)
+ *
+ * Copyright (c) 2013, NLnet Labs. All rights reserved.
+ *
+ * See LICENSE for the license.
+ *
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "nsd.h"
+#include "tsig.h"
+#include "options.h"
+#include "namedb.h"
+#include "udb.h"
+#include "udbzone.h"
+#include "util.h"
+
+static void error(const char *format, ...) ATTR_FORMAT(printf, 1, 2);
+
+/*
+ * Print the help text.
+ *
+ */
+static void
+usage (void)
+{
+ fprintf(stderr, "Usage: nsd-mem [-c configfile]\n");
+ fprintf(stderr, "Version %s. Report bugs to <%s>.\n",
+ PACKAGE_VERSION, PACKAGE_BUGREPORT);
+}
+
+/*
+ * Something went wrong, give error messages and exit.
+ *
+ */
+static void
+error(const char *format, ...)
+{
+ va_list args;
+ va_start(args, format);
+ log_vmsg(LOG_ERR, format, args);
+ va_end(args);
+ exit(1);
+}
+
+/* zone memory structure */
+struct zone_mem {
+ /* size of data (allocated in db.region) */
+ size_t data;
+ /* unused space (in db.region) due to alignment */
+ size_t data_unused;
+ /* udb data allocated */
+ size_t udb_data;
+ /* udb overhead (chunk2**x - data) */
+ size_t udb_overhead;
+
+ /* count of number of domains */
+ size_t domaincount;
+};
+
+/* total memory structure */
+struct tot_mem {
+ /* size of data (allocated in db.region) */
+ size_t data;
+ /* unused space (in db.region) due to alignment */
+ size_t data_unused;
+ /* udb data allocated */
+ size_t udb_data;
+ /* udb overhead (chunk2**x - data) */
+ size_t udb_overhead;
+
+ /* count of number of domains */
+ size_t domaincount;
+
+ /* options data */
+ size_t opt_data;
+ /* unused in options region */
+ size_t opt_unused;
+ /* dname compression table */
+ size_t compresstable;
+#ifdef RATELIMIT
+ /* size of rrl tables */
+ size_t rrl;
+#endif
+
+ /* total ram usage */
+ size_t ram;
+ /* total nsd.db disk usage */
+ size_t disk;
+};
+
+static void
+account_zone(struct namedb* db, struct zone_mem* zmem)
+{
+ zmem->data = region_get_mem(db->region);
+ zmem->data_unused = region_get_mem_unused(db->region);
+ zmem->udb_data = (size_t)db->udb->alloc->disk->stat_data;
+ zmem->udb_overhead = (size_t)(db->udb->alloc->disk->stat_alloc -
+ db->udb->alloc->disk->stat_data);
+ zmem->domaincount = db->domains->nametree->count;
+}
+
+static void
+pretty_mem(size_t x, const char* s)
+{
+ char buf[32];
+ memset(buf, 0, sizeof(buf));
+ if(snprintf(buf, sizeof(buf), "%12lld", (long long)x) > 12) {
+ printf("%12lld %s\n", (long long)x, s);
+ return;
+ }
+ printf("%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c %s\n",
+ buf[0], buf[1], buf[2], (buf[2]==' '?' ':'.'),
+ buf[3], buf[4], buf[5], (buf[5]==' '?' ':'.'),
+ buf[6], buf[7], buf[8], (buf[8]==' '?' ':'.'),
+ buf[9], buf[10], buf[11], s);
+}
+
+static void
+print_zone_mem(struct zone_mem* z)
+{
+ pretty_mem(z->data, "zone data");
+ pretty_mem(z->data_unused, "zone unused space (due to alignment)");
+ pretty_mem(z->udb_data, "data in nsd.db");
+ pretty_mem(z->udb_overhead, "overhead in nsd.db");
+}
+
+static void
+account_total(nsd_options_t* opt, struct tot_mem* t)
+{
+ t->opt_data = region_get_mem(opt->region);
+ t->opt_unused = region_get_mem_unused(opt->region);
+ t->compresstable = sizeof(uint16_t) *
+ (t->domaincount + 1 + EXTRA_DOMAIN_NUMBERS);
+ t->compresstable *= opt->server_count;
+
+#ifdef RATELIMIT
+#define SIZE_RRL_BUCKET (8 + 4 + 4 + 4 + 4 + 2)
+ t->rrl = opt->rrl_size * SIZE_RRL_BUCKET;
+ t->rrl *= opt->server_count;
+#endif
+
+ t->ram = t->data + t->data_unused + t->opt_data + t->opt_unused +
+ t->compresstable;
+#ifdef RATELIMIT
+ t->ram += t->rrl;
+#endif
+ t->disk = t->udb_data + t->udb_overhead;
+}
+
+static void
+print_tot_mem(struct tot_mem* t)
+{
+ printf("\ntotal\n");
+ pretty_mem(t->data, "data");
+ pretty_mem(t->data_unused, "unused space (due to alignment)");
+ pretty_mem(t->opt_data, "options");
+ pretty_mem(t->opt_unused, "options unused space (due to alignment)");
+ pretty_mem(t->compresstable, "name table (depends on servercount)");
+#ifdef RATELIMIT
+ pretty_mem(t->rrl, "RRL table (depends on servercount)");
+#endif
+ pretty_mem(t->udb_data, "data in nsd.db");
+ pretty_mem(t->udb_overhead, "overhead in nsd.db");
+ printf("\nsummary\n");
+
+ pretty_mem(t->ram, "ram usage (excl space for buffers)");
+ pretty_mem(t->disk, "disk usage (excl 12% space claimed for growth)");
+}
+
+static void
+add_mem(struct tot_mem* t, struct zone_mem* z)
+{
+ t->data += z->data;
+ t->data_unused += z->data_unused;
+ t->udb_data += z->udb_data;
+ t->udb_overhead += z->udb_overhead;
+ t->domaincount += z->domaincount;
+}
+
+static void
+check_zone_mem(const char* tf, const char* df, zone_options_t* zo,
+ nsd_options_t* opt, struct tot_mem* totmem)
+{
+ struct namedb* db;
+ const dname_type* dname = (const dname_type*)zo->node.key;
+ zone_type* zone;
+ struct udb_base* taskudb;
+ udb_ptr last_task;
+ struct zone_mem zmem;
+
+ printf("zone %s\n", zo->name);
+
+ /* init*/
+ memset(&zmem, 0, sizeof(zmem));
+ db = namedb_open(df, opt);
+ if(!db) error("cannot open %s: %s", df, strerror(errno));
+ zone = namedb_zone_create(db, dname, zo);
+ taskudb = udb_base_create_new(tf, &namedb_walkfunc, NULL);
+ udb_ptr_init(&last_task, taskudb);
+
+ /* read the zone */
+ namedb_read_zonefile(db, zone, taskudb, &last_task);
+
+ /* account the memory for this zone */
+ account_zone(db, &zmem);
+
+ /* pretty print the memory for this zone */
+ print_zone_mem(&zmem);
+
+ /* delete the zone from memory */
+ namedb_close(db);
+ udb_base_free(taskudb);
+ unlink(df);
+ unlink(tf);
+
+ /* add up totals */
+ add_mem(totmem, &zmem);
+}
+
+static void
+check_mem(nsd_options_t* opt)
+{
+ struct tot_mem totmem;
+ zone_options_t* zo;
+ char tf[512];
+ char df[512];
+ memset(&totmem, 0, sizeof(totmem));
+ snprintf(tf, sizeof(tf), "./nsd-mem-task-%u.db", (unsigned)getpid());
+ snprintf(df, sizeof(df), "./nsd-mem-db-%u.db", (unsigned)getpid());
+
+ /* read all zones and account memory */
+ RBTREE_FOR(zo, zone_options_t*, opt->zone_options) {
+ check_zone_mem(tf, df, zo, opt, &totmem);
+ }
+
+ /* calculate more total statistics */
+ account_total(opt, &totmem);
+ /* print statistics */
+ print_tot_mem(&totmem);
+
+ /* final advice */
+ printf("\nFinal advice estimate:\n");
+ printf("(The partial mmap causes reload&AXFR to take longer(disk access))\n");
+ pretty_mem(totmem.ram + totmem.disk, "data and big mmap");
+ pretty_mem(totmem.ram + totmem.disk/6, "data and partial mmap");
+}
+
+/* dummy functions to link */
+struct nsd;
+int writepid(struct nsd * ATTR_UNUSED(nsd))
+{
+ return 0;
+}
+void unlinkpid(const char * ATTR_UNUSED(file))
+{
+}
+void bind8_stats(struct nsd * ATTR_UNUSED(nsd))
+{
+}
+
+void sig_handler(int ATTR_UNUSED(sig))
+{
+}
+
+extern char *optarg;
+extern int optind;
+
+int
+main(int argc, char *argv[])
+{
+ /* Scratch variables... */
+ int c;
+ struct nsd nsd;
+ const char *configfile = CONFIGFILE;
+ memset(&nsd, 0, sizeof(nsd));
+
+ log_init("nsd-mem");
+
+ /* Parse the command line... */
+ while ((c = getopt(argc, argv, "c:h"
+ )) != -1) {
+ switch (c) {
+ case 'c':
+ configfile = optarg;
+ break;
+ case 'h':
+ usage();
+ exit(0);
+ case '?':
+ default:
+ usage();
+ exit(1);
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* Commandline parse error */
+ if (argc != 0) {
+ usage();
+ exit(1);
+ }
+
+ /* Read options */
+ nsd.options = nsd_options_create(region_create_custom(xalloc, free,
+ DEFAULT_CHUNK_SIZE, DEFAULT_LARGE_OBJECT_SIZE,
+ DEFAULT_INITIAL_CLEANUP_SIZE, 1));
+ tsig_init(nsd.options->region);
+ if(!parse_options_file(nsd.options, configfile, NULL, NULL)) {
+ error("could not read config: %s\n", configfile);
+ }
+ if(!parse_zone_list_file(nsd.options)) {
+ error("could not read zonelist file %s\n",
+ nsd.options->zonelistfile);
+ }
+ if (verbosity == 0)
+ verbosity = nsd.options->verbosity;
+
+#ifdef HAVE_CHROOT
+ if(nsd.chrootdir == 0) nsd.chrootdir = nsd.options->chroot;
+#ifdef CHROOTDIR
+ /* if still no chrootdir, fallback to default */
+ if(nsd.chrootdir == 0) nsd.chrootdir = CHROOTDIR;
+#endif /* CHROOTDIR */
+#endif /* HAVE_CHROOT */
+ if(nsd.options->zonesdir && nsd.options->zonesdir[0]) {
+ if(chdir(nsd.options->zonesdir)) {
+ error("cannot chdir to '%s': %s",
+ nsd.options->zonesdir, strerror(errno));
+ }
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "changed directory to %s",
+ nsd.options->zonesdir));
+ }
+
+ /* Chroot */
+#ifdef HAVE_CHROOT
+ if (nsd.chrootdir && strlen(nsd.chrootdir)) {
+ if(chdir(nsd.chrootdir)) {
+ error("unable to chdir to chroot: %s", strerror(errno));
+ }
+ DEBUG(DEBUG_IPC,1, (LOG_INFO, "changed root directory to %s",
+ nsd.chrootdir));
+ }
+#endif /* HAVE_CHROOT */
+
+ check_mem(nsd.options);
+
+ exit(0);
+}
diff --git a/usr.sbin/nsd/nsd.conf.sample.in b/usr.sbin/nsd/nsd.conf.sample.in
index fe1a4874c5c..002d40e7065 100644
--- a/usr.sbin/nsd/nsd.conf.sample.in
+++ b/usr.sbin/nsd/nsd.conf.sample.in
@@ -8,10 +8,15 @@
# This is a comment.
# Sample configuration file
+# include: "file" # include that file's text over here.
# options for the nsd server
server:
- # uncomment to specify specific interfaces to bind (default wildcard interface).
+ # Number of NSD servers to fork. Put the number of CPUs to use here.
+ # server-count: 1
+
+ # uncomment to specify specific interfaces to bind (default are the
+ # wildcard interfaces 0.0.0.0 and ::0).
# ip-address: 1.2.3.4
# ip-address: 1.2.3.4@5678
# ip-address: 12fe::8ef0
@@ -19,36 +24,65 @@ server:
# Allow binding to non local addresses. Default no.
# ip-transparent: no
- # don't answer VERSION.BIND and VERSION.SERVER CHAOS class queries
- # hide-version: no
-
# enable debug mode, does not fork daemon process into the background.
# debug-mode: no
- # listen only on IPv4 connections
- # ip4-only: no
+ # listen on IPv4 connections
+ # do-ip4: yes
+
+ # listen on IPv6 connections
+ # do-ip6: yes
+
+ # port to answer queries on. default is 53.
+ # port: 53
+
+ # Verbosity level.
+ # verbosity: 0
- # listen only on IPv6 connections
- # ip6-only: no
+ # After binding socket, drop user privileges.
+ # can be a username, id or id.gid.
+ # username: @user@
+
+ # Run NSD in a chroot-jail.
+ # make sure to have pidfile and database reachable from there.
+ # by default, no chroot-jail is used.
+ # chroot: "@configdir@"
+
+ # The directory for zonefile: files. The daemon chdirs here.
+ # zonesdir: "@zonesdir@"
+ # the list of dynamically added zones.
+ # zonelistfile: "@zonelistfile@"
+
# the database to use
# database: "@dbfile@"
+ # log messages to file. Default to stderr and syslog (with
+ # facility LOG_DAEMON). stderr disappears when daemon goes to bg.
+ # logfile: "@logfile@"
+
+ # File to store pid for nsd in.
+ # pidfile: "@pidfile@"
+
+ # The file where secondary zone refresh and expire timeouts are kept.
+ # If you delete this file, all secondary zones are forced to be
+ # 'refreshing' (as if nsd got a notify).
+ # xfrdfile: "@xfrdfile@"
+
+ # The directory where zone transfers are stored, in a subdir of it.
+ # xfrdir: "@xfrdir@"
+
+ # don't answer VERSION.BIND and VERSION.SERVER CHAOS class queries
+ # hide-version: no
+
# identify the server (CH TXT ID.SERVER entry).
# identity: "unidentified server"
# NSID identity (hex string). default disabled.
# nsid: "aabbccdd"
- # log messages to file. Default to stderr and syslog (with facility LOG_DAEMON).
- # logfile: "@logfile@"
-
- # Number of NSD servers to fork.
- # server-count: 1
-
# Maximum number of concurrent TCP connections per server.
- # This option should have a value below 1000.
- # tcp-count: 10
+ # tcp-count: 100
# Maximum number of queries served on a single TCP connection.
# By default 0, which means no maximum.
@@ -63,44 +97,14 @@ server:
# Preferred EDNS buffer size for IPv6.
# ipv6-edns-size: 4096
- # File to store pid for nsd in.
- # pidfile: "@pidfile@"
-
- # port to answer queries on. default is 53.
- # port: 53
-
- # statistics are produced every number of seconds.
+ # statistics are produced every number of seconds. Prints to log.
# statistics: 3600
- # if per zone statistics is enabled, file to store statistics.
- # zone-stats-file: "@zonestatsfile@"
-
- # Run NSD in a chroot-jail.
- # make sure to have pidfile and database reachable from there.
- # by default, no chroot-jail is used.
- # chroot: "@configdir@"
-
- # After binding socket, drop user privileges.
- # can be a username, id or id.gid.
- # username: @user@
-
- # The directory for zonefile: files.
- # zonesdir: "@zonesdir@"
-
- # The file where incoming zone transfers are stored.
- # run nsd-patch to update zone files, then you can safely delete it.
- # difffile: "@difffile@"
-
- # The file where secondary zone refresh and expire timeouts are kept.
- # If you delete this file, all secondary zones are forced to be
- # 'refreshing' (as if nsd got a notify).
- # xfrdfile: "@xfrdfile@"
-
# Number of seconds between reloads triggered by xfrd.
- # xfrd-reload-timeout: 10
+ # xfrd-reload-timeout: 1
- # Verbosity level.
- # verbosity: 0
+ # check mtime of all zone files on start and sighup
+ # zonefiles-check: yes
# RRLconfig
# Response Rate Limiting, size of the hashtable. Default 1000000.
@@ -132,79 +136,113 @@ server:
# rrl-whitelist-ratelimit: 2000
# RRLend
-# key for zone 1
-key:
- name: mskey
- algorithm: hmac-md5
- secret: "K2tf3TRjvQkVCmJF3/Z9vA=="
+# Remote control config section.
+remote-control:
+ # Enable remote control with nsd-control(8) here.
+ # set up the keys and certificates with nsd-control-setup.
+ # control-enable: no
-# Sample zone 1
-zone:
- name: "example.com"
- zonefile: "example.com.zone"
+ # what interfaces are listened to for control, default is on localhost.
+ # control-interface: 127.0.0.1
+ # control-interface: ::1
- # This is a slave zone. Masters are listed below.
- # If no access control elements are provided, this zone
- # will not be served to/from other servers.
+ # port number for remote control operations (uses TLS over TCP).
+ # control-port: 8952
- # master 1
- allow-notify: 168.192.44.42 mskey
- request-xfr: 168.192.44.42 mskey
+ # nsd server key file for remote control.
+ # server-key-file: "@configdir@/nsd_server.key"
- # master 2
- allow-notify: 10.0.0.11 NOKEY
- request-xfr: 10.0.0.11 NOKEY
+ # nsd server certificate file for remote control.
+ # server-cert-file: "@configdir@/nsd_server.pem"
- # By default, a slave will request a zone transfer with IXFR/TCP.
- # If you want to make use of IXFR/UDP use
- allow-notify: 10.0.0.12 NOKEY
- request-xfr: UDP 10.0.0.12 NOKEY
+ # nsd-control key file.
+ # control-key-file: "@configdir@/nsd_control.key"
- # for a master that only speaks AXFR (like NSD) use
- allow-notify: 10.0.0.13 NOKEY
- request-xfr: AXFR 10.0.0.13 NOKEY
+ # nsd-control certificate file.
+ # control-cert-file: "@configdir@/nsd_control.pem"
- # Attention: You cannot use UDP and AXFR together. AXFR is always over
- # TCP. If you use UDP, we higly recommend you to deploy TSIG.
- # Allow AXFR fallback if the master does not support IXFR. Default
- # is yes.
- allow-axfr-fallback: "yes"
+# Secret keys for TSIGs that secure zone transfers.
+# You could include: "secret.keys" and put the 'key:' statements in there,
+# and give that file special access control permissions.
+#
+# key:
+ # The key name is sent to the other party, it must be the same
+ #name: "keyname"
+ # algorithm hmac-md5, or hmac-sha1, or hmac-sha256 (if compiled in)
+ #algorithm: hmac-sha256
+ # secret material, must be the same as the other party uses.
+ # base64 encoded random number.
+ # e.g. from dd if=/dev/random of=/dev/stdout count=1 bs=32 | base64
+ #secret: "K2tf3TRjvQkVCmJF3/Z9vA=="
+
+
+# Patterns have zone configuration and they are shared by one or more zones.
+#
+# pattern:
+ # name by which the pattern is referred to
+ #name: "myzones"
+ # the zonefile for the zones that use this pattern.
+ # if relative then from the zonesdir (inside the chroot).
+ # the name is processed: %s - zone name (as appears in zone:name).
+ # %1 - first character of zone name, %2 second, %3 third.
+ # %z - topleveldomain label of zone, %y, %x next labels in name.
+ # if label or character does not exist you get a dot '.'.
+ # for example "%s.zone" or "zones/%1/%2/%3/%s" or "secondary/%z/%s"
+ #zonefile: "%s.zone"
+
+ # If no master and slave access control elements are provided,
+ # this zone will not be served to/from other servers.
+
+ # A master zone needs notify: and provide-xfr: lists. A slave
+ # may also allow zone transfer (for debug or other secondaries).
+ # notify these slaves when the master zone changes, address TSIG|NOKEY
+ # IP can be ipv4 and ipv6, with @port for a nondefault port number.
+ #notify: 192.0.2.1 NOKEY
+ # allow these IPs and TSIG to transfer zones, addr TSIG|NOKEY|BLOCKED
+ # address range 192.0.2.0/24, 1.2.3.4&255.255.0.0, 3.0.2.20-3.0.2.40
+ #provide-xfr: 192.0.2.0/24 my_tsig_key_name
+ # set the number of retries for notify.
+ #notify-retry: 5
# uncomment to provide AXFR to all the world
# provide-xfr: 0.0.0.0/0 NOKEY
# provide-xfr: ::0/0 NOKEY
+ # A slave zone needs allow-notify: and request-xfr: lists.
+ #allow-notify: 2001:db8::0/64 my_tsig_key_name
+ # By default, a slave will request a zone transfer with IXFR/TCP.
+ # If you want to make use of IXFR/UDP use: UDP addr tsigkey
+ # for a master that only speaks AXFR (like NSD) use AXFR addr tsigkey
+ #request-xfr: 192.0.2.2 the_tsig_key_name
+ # Attention: You cannot use UDP and AXFR together. AXFR is always over
+ # TCP. If you use UDP, we higly recommend you to deploy TSIG.
+ # Allow AXFR fallback if the master does not support IXFR. Default
+ # is yes.
+ #allow-axfr-fallback: yes
# set local interface for sending zone transfer requests.
- outgoing-interface: 10.0.0.10
-
-# Sample zone 2
-zone:
- name: "example.net"
- zonefile: "example.net.signed.zone"
-
- # This is a master zone. Slaves are listed below.
- # If no access control elements are provided, this zone
- # will not be served to/from other servers.
+ # default is let the OS choose.
+ #outgoing-interface: 10.0.0.10
- # secondary 1. Uses port 5300.
- notify: 10.0.0.14@5300 sec1_key
- provide-xfr: 10.0.0.14@5300 sec1_key
+ # if you give another pattern name here, at this point the settings
+ # from that pattern are inserted into this one (as if it were a
+ # macro). The statement can be given in between other statements,
+ # because the order of access control elements can make a difference
+ # (which master to request from first, which slave to notify first).
+ #include-pattern: "common-masters"
- # secondary 2.
- notify: 10.11.12.14 sec2_key
- provide-xfr: 10.11.12.14 sec2_key
-
- # also provide xfr to operator's network.
- provide-xfr: 169.192.85.0/24 NOKEY
- # uncomment to disable xfr for the address.
- # provide-xfr: 169.192.85.66 BLOCKED
-
- # set the number of retries for notify.
- notify-retry: 5
- # set local interface for sending notifies
- outgoing-interface: 10.0.0.15
+# Fixed zone entries. Here you can config zones that cannot be deleted.
+# Zones that are dynamically added and deleted are put in the zonelist file.
+#
+# zone:
+ # name: "example.com"
+ # you can give a pattern here, all the settings from that pattern
+ # are then inserted at this point
+ # include-pattern: "master"
+ # You can also specify (additional) options directly for this zone.
+ # zonefile: "example.com.zone"
+ # request-xfr: 192.0.2.1 example.com.key
# RRLconfig
# Response Rate Limiting, whitelist types
@@ -220,20 +258,3 @@ zone:
# rrl-whitelist: all
# RRLend
-
-# keys for zone 2
-key:
- name: "sec1_key"
- algorithm: hmac-md5
- secret: "6KM6qiKfwfEpamEq72HQdA=="
-
-key:
- name: sec2_key
- algorithm: hmac-sha1
- secret: "m83H2x8R0zbDf3yRKhrqgw=="
-
-key:
- name: sec3_key
- algorithm: hmac-sha256
- secret: "m83H2x8R0zbDf3yRKhrqgw=="
-
diff --git a/usr.sbin/nsd/nsd.h b/usr.sbin/nsd/nsd.h
index 2dd4676937e..955fc4fbae2 100644
--- a/usr.sbin/nsd/nsd.h
+++ b/usr.sbin/nsd/nsd.h
@@ -1,7 +1,7 @@
/*
* nsd.h -- nsd(8) definitions and prototypes
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -14,9 +14,10 @@
#include "dns.h"
#include "edns.h"
-#include "util.h"
struct netio_handler;
struct nsd_options;
+struct udb_base;
+struct daemon_remote;
/* The NSD runtime states and NSD ipc command values */
#define NSD_RUN 0
@@ -26,42 +27,38 @@ struct nsd_options;
#define NSD_REAP_CHILDREN 4
#define NSD_QUIT 5
/*
- * NSD_SOA_INFO is followed by u16(len in network byte order), dname,
- * and then nothing (no info) or soa info.
- */
-#define NSD_SOA_INFO 6
-/*
* PASS_TO_XFRD is followed by the u16(len in network order) and
* then network packet contents. packet is a notify(acl checked), or
* xfr reply from a master(acl checked).
* followed by u32(acl number that matched from notify/xfr acl).
*/
-#define NSD_PASS_TO_XFRD 7
+#define NSD_PASS_TO_XFRD 6
/*
- * NSD_ZONE_STATE is followed by u16(len in network byte order),
- * octet 0: zone is expired, 1: zone ok. and dname of zone.
+ * RELOAD_REQ is sent when parent receives a SIGHUP and tells
+ * xfrd that it wants to initiate a reload (and thus task swap).
*/
-#define NSD_ZONE_STATE 8
+#define NSD_RELOAD_REQ 7
/*
- * SOA BEGIN is sent at the start of a reload SOA_INFO pass
- * xfrd will not send to the parent (deadlock prevention).
- */
-#define NSD_SOA_BEGIN 9
-/*
- * SOA END is sent at the end of a reload SOA_INFO pass.
+ * RELOAD_DONE is sent at the end of a reload pass.
* xfrd then knows that reload phase is over.
*/
-#define NSD_SOA_END 10
+#define NSD_RELOAD_DONE 8
/*
* QUIT_SYNC is sent to signify a synchronisation of ipc
* channel content during reload
*/
-#define NSD_QUIT_SYNC 11
+#define NSD_QUIT_SYNC 9
+/*
+ * QUIT_WITH_STATS is sent during a reload when BIND8_STATS is defined,
+ * from parent to children. The stats are transferred too from child to
+ * parent with this commandvalue, when the child is exiting.
+ */
+#define NSD_QUIT_WITH_STATS 10
/*
* QUIT_CHILD is sent at exit, to make sure the child has exited so that
* port53 is free when all of nsd's processes have exited at shutdown time
*/
-#define NSD_QUIT_CHILD 12
+#define NSD_QUIT_CHILD 11
#define NSD_SERVER_MAIN 0x0U
#define NSD_SERVER_UDP 0x1U
@@ -76,29 +73,20 @@ struct nsd_options;
#ifdef BIND8_STATS
-#define LASTELEM(arr) (sizeof(arr) / sizeof(arr[0]) - 1)
-
-#define STATUP(nsd, stc) nsd->st.stc++
-#define STATUP2(nsd, stc, i) nsd->st.stc[(i) <= (LASTELEM(nsd->st.stc) - 1) ? i : LASTELEM(nsd->st.stc)]++
-
-# ifdef USE_ZONE_STATS
+/* Counter for statistics */
+typedef unsigned long stc_t;
-# define ZTATUP(zone, stc) zone->st.stc++
-# define ZTATUP2(zone, stc, i) zone->st.stc[(i) <= (LASTELEM(zone->st.stc) - 1) ? i : LASTELEM(zone->st.stc)]++
-
-# else
-
-# define ZTATUP(zone, stc) /* Nothing */
-# define ZTATUP2(zone, stc, i) /* Nothing */
+#define LASTELEM(arr) (sizeof(arr) / sizeof(arr[0]) - 1)
-# endif /* USE_ZONE_STATS */
+#define STATUP(nsd, stc) nsd->st.stc++
+/* #define STATUP2(nsd, stc, i) ((i) <= (LASTELEM(nsd->st.stc) - 1)) ? nsd->st.stc[(i)]++ : \
+ nsd->st.stc[LASTELEM(nsd->st.stc)]++ */
-#else /* BIND8_STATS */
+#define STATUP2(nsd, stc, i) nsd->st.stc[(i) <= (LASTELEM(nsd->st.stc) - 1) ? i : LASTELEM(nsd->st.stc)]++
+#else /* BIND8_STATS */
#define STATUP(nsd, stc) /* Nothing */
#define STATUP2(nsd, stc, i) /* Nothing */
-#define ZTATUP(zone, stc) /* Nothing */
-#define ZTATUP2(zone, stc, i) /* Nothing */
#endif /* BIND8_STATS */
@@ -133,12 +121,15 @@ struct nsd_child
*/
uint8_t need_to_send_STATS, need_to_send_QUIT;
uint8_t need_to_exit, has_exited;
- stack_type* dirty_zones; /* stack of type zone_type* */
/*
* The handler for handling the commands from the child.
*/
struct netio_handler* handler;
+
+#ifdef BIND8_STATS
+ stc_t query_count;
+#endif
};
/* NSD configuration and run-time variables */
@@ -153,6 +144,7 @@ struct nsd
/* Run-time variables */
pid_t pid;
volatile sig_atomic_t mode;
+ volatile sig_atomic_t signal_hint_reload_hup;
volatile sig_atomic_t signal_hint_reload;
volatile sig_atomic_t signal_hint_child;
volatile sig_atomic_t signal_hint_quit;
@@ -170,12 +162,15 @@ struct nsd
/* NULL if this is the parent process. */
struct nsd_child *this_child;
+ /* mmaps with data exchange from xfrd and reload */
+ struct udb_base* task[2];
+ int mytask; /* the base used by this process */
+ struct netio_handler* xfrd_listener;
+ struct daemon_remote* rc;
+
/* Configuration */
const char *dbfile;
const char *pidfile;
-#ifdef USE_ZONE_STATS
- const char *zonestatsfile;
-#endif
const char *log_filename;
const char *username;
uid_t uid;
@@ -210,7 +205,20 @@ struct nsd
size_t ipv6_edns_size;
#ifdef BIND8_STATS
- struct nsdst st;
+
+ struct nsdst {
+ time_t boot;
+ int period; /* Produce statistics dump every st_period seconds */
+ stc_t qtype[257]; /* Counters per qtype */
+ stc_t qclass[4]; /* Class IN or Class CH or other */
+ stc_t qudp, qudp6; /* Number of queries udp and udp6 */
+ stc_t ctcp, ctcp6; /* Number of tcp and tcp6 connections */
+ stc_t rcode[17], opcode[6]; /* Rcodes & opcodes */
+ /* Dropped, truncated, queries for nonconfigured zone, tx errors */
+ stc_t dropped, truncated, wrongzone, txerr, rxerr;
+ stc_t edns, ednserr, raxfr, nona;
+ uint64_t db_disk, db_mem;
+ } st;
#endif /* BIND8_STATS */
struct nsd_options* options;
@@ -230,7 +238,16 @@ void server_main(struct nsd *nsd);
void server_child(struct nsd *nsd);
void server_shutdown(struct nsd *nsd);
void server_close_all_sockets(struct nsd_socket sockets[], size_t n);
+struct event_base* nsd_child_event_base(void);
/* extra domain numbers for temporary domains */
#define EXTRA_DOMAIN_NUMBERS 1024
+#define SLOW_ACCEPT_TIMEOUT 2 /* in seconds */
+/* allocate and init xfrd variables */
+void server_prepare_xfrd(struct nsd *nsd);
+/* start xfrdaemon (again) */
+void server_start_xfrd(struct nsd *nsd, int del_db, int reload_active);
+/* send SOA serial numbers to xfrd */
+void server_send_soa_xfrd(struct nsd *nsd, int shortsoa);
+ssize_t block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout);
#endif /* _NSD_H_ */
diff --git a/usr.sbin/nsd/nsec3.h b/usr.sbin/nsd/nsec3.h
index d55b4825394..96c4367ff33 100644
--- a/usr.sbin/nsd/nsec3.h
+++ b/usr.sbin/nsd/nsec3.h
@@ -1,7 +1,7 @@
/*
* nsec3.h -- nsec3 handling.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -9,9 +9,8 @@
#ifndef NSEC3_H
#define NSEC3_H
-#include "config.h"
#ifdef NSEC3
-
+struct udb_ptr;
struct domain;
struct dname;
struct region;
@@ -19,40 +18,23 @@ struct zone;
struct namedb;
struct query;
struct answer;
-#ifndef FULL_PREHASH
struct rr;
-struct nsec3_domain;
-#endif
/*
- * Create the hashed name of the nsec3 record
- * for the given dname.
+ * calculate prehash information for zone.
*/
-const struct dname *nsec3_hash_dname(struct region *region,
- struct zone *zone, const struct dname *dname);
-
+void prehash_zone(struct namedb* db, struct zone* zone);
/*
- * calculate prehash information for all zones,
- * selects only updated=1 zones if bool set.
+ * calculate prehash for zone, assumes no partial precompile or prehashlist
*/
-void prehash(struct namedb* db, int updated_only);
-#ifndef FULL_PREHASH
-void prehash_zone(struct namedb *db, struct zone *zone);
-void prehash_zone_incremental(struct namedb *db, struct zone *zone);
-#endif
+void prehash_zone_complete(struct namedb* db, struct zone* zone);
/*
- * finds nsec3 that covers the given domain dname.
+ * finds nsec3 that covers the given domain hash.
* returns true if the find is exact.
- * hashname is the already hashed dname for the NSEC3.
*/
-#ifdef FULL_PREHASH
-int nsec3_find_cover(struct namedb* db, struct zone* zone,
- const struct dname* hashname, struct domain** result);
-#else
-int nsec3_find_cover(struct namedb* ATTR_UNUSED(db), struct zone* zone,
- const struct dname* hashname, struct nsec3_domain** result);
-#endif
+int nsec3_find_cover(struct zone* zone, uint8_t* hash, size_t hashlen,
+ struct domain** result);
/*
* _answer_ Routines used to add the correct nsec3 record to a query answer.
@@ -62,9 +44,8 @@ int nsec3_find_cover(struct namedb* ATTR_UNUSED(db), struct zone* zone,
* add proof for wildcards that the name below the wildcard.parent
* does not exist
*/
-void nsec3_answer_wildcard(struct query *query, struct answer *answer,
- struct domain *wildcard, struct namedb* db,
- const struct dname *qname);
+void nsec3_answer_wildcard(struct query* query, struct answer* answer,
+ struct domain* wildcard, const struct dname* qname);
/*
* add NSEC3 to provide domain name but not rrset exists,
@@ -84,7 +65,7 @@ void nsec3_answer_delegation(struct query *query, struct answer *answer);
*/
void nsec3_answer_authoritative(struct domain** match, struct query *query,
struct answer *answer, struct domain* closest_encloser,
- struct namedb* db, const struct dname* qname);
+ const struct dname* qname);
/*
* True if domain is a NSEC3 (+RRSIG) data only variety.
@@ -92,5 +73,48 @@ void nsec3_answer_authoritative(struct domain** match, struct query *query,
*/
int domain_has_only_NSEC3(struct domain* domain, struct zone* zone);
+/* get hashed bytes */
+void nsec3_hash_and_store(struct zone* zone, const struct dname* dname,
+ uint8_t* store);
+/* see if NSEC3 record uses the params in use for the zone */
+int nsec3_rr_uses_params(struct rr* rr, struct zone* zone);
+/* number of NSEC3s that are in the zone chain */
+int nsec3_in_chain_count(struct domain* domain, struct zone* zone);
+/* find previous NSEC3, or, lastinzone, or, NULL */
+struct domain* nsec3_chain_find_prev(struct zone* zone, struct domain* domain);
+/* clear nsec3 precompile for the zone */
+void nsec3_clear_precompile(struct namedb* db, struct zone* zone);
+/* if domain is part of nsec3hashed domains of a zone */
+int nsec3_domain_part_of_zone(struct domain* d, struct zone* z);
+/* condition when a domain is precompiled */
+int nsec3_condition_hash(struct domain* d, struct zone* z);
+/* condition when a domain is ds precompiled */
+int nsec3_condition_dshash(struct domain* d, struct zone* z);
+/* set nsec3param for this zone or NULL if no NSEC3 available */
+void nsec3_find_zone_param(struct namedb* db, struct zone* zone,
+ struct udb_ptr* z);
+/* hash domain and wcchild, and lookup nsec3 in tree, and precompile */
+void nsec3_precompile_domain(struct namedb* db, struct domain* domain,
+ struct zone* zone, struct region* tmpregion);
+/* hash ds_parent_cover, and lookup nsec3 and precompile */
+void nsec3_precompile_domain_ds(struct namedb* db, struct domain* domain,
+ struct zone* zone);
+/* put nsec3 into nsec3tree and adjust zonelast */
+void nsec3_precompile_nsec3rr(struct namedb* db, struct domain* domain,
+ struct zone* zone);
+/* precompile entire zone, assumes all is null at start */
+void nsec3_precompile_newparam(struct namedb* db, struct zone* zone);
+/* create b32.zone for a hash, allocated in the region */
+const struct dname* nsec3_b32_create(struct region* region, struct zone* zone,
+ unsigned char* hash);
+/* create trees for nsec3 updates and lookups in zone */
+void nsec3_zone_trees_create(struct region* region, struct zone* zone);
+/* clear trees for nsec3 in zone */
+void nsec3_hash_tree_clear(struct zone* zone);
+/* lookup zone that contains domain's nsec3 trees */
+struct zone* nsec3_tree_zone(struct namedb* db, struct domain* domain);
+/* lookup zone that contains domain's ds tree */
+struct zone* nsec3_tree_dszone(struct namedb* db, struct domain* domain);
+
#endif /* NSEC3 */
#endif /* NSEC3_H*/
diff --git a/usr.sbin/nsd/options.c b/usr.sbin/nsd/options.c
index 39cfa610864..221a0f7eb35 100644
--- a/usr.sbin/nsd/options.c
+++ b/usr.sbin/nsd/options.c
@@ -1,7 +1,7 @@
/*
* options.c -- options functions.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -13,10 +13,11 @@
#include "options.h"
#include "query.h"
#include "tsig.h"
+#include "difffile.h"
#include "rrl.h"
#include "configyyrename.h"
-nsd_options_t* nsd_options = 0;
+#include "configparser.h"
config_parser_state_t* cfg_parser = 0;
extern FILE* c_in, *c_out;
int c_parse(void);
@@ -24,28 +25,36 @@ int c_lex(void);
int c_wrap(void);
void c_error(const char *message);
-nsd_options_t* nsd_options_create(region_type* region)
+static int
+rbtree_strcmp(const void* p1, const void* p2)
+{
+ return strcmp((const char*)p1, (const char*)p2);
+}
+
+nsd_options_t*
+nsd_options_create(region_type* region)
{
nsd_options_t* opt;
opt = (nsd_options_t*)region_alloc(region, sizeof(nsd_options_t));
opt->region = region;
opt->zone_options = rbtree_create(region,
(int (*)(const void *, const void *)) dname_compare);
- opt->keys = NULL;
- opt->numkeys = 0;
+ opt->configfile = NULL;
+ opt->patterns = rbtree_create(region, rbtree_strcmp);
+ opt->keys = rbtree_create(region, rbtree_strcmp);
opt->ip_addresses = NULL;
opt->ip_transparent = 0;
opt->debug_mode = 0;
opt->verbosity = 0;
opt->hide_version = 0;
- opt->ip4_only = 0;
- opt->ip6_only = 0;
+ opt->do_ip4 = 1;
+ opt->do_ip6 = 1;
opt->database = DBFILE;
opt->identity = 0;
opt->nsid = 0;
opt->logfile = 0;
opt->server_count = 1;
- opt->tcp_count = 10;
+ opt->tcp_count = 100;
opt->tcp_query_count = 0;
opt->tcp_timeout = TCP_TIMEOUT;
opt->ipv4_edns_size = EDNS_MAX_MESSAGE_LEN;
@@ -54,17 +63,12 @@ nsd_options_t* nsd_options_create(region_type* region)
opt->port = UDP_PORT;
/* deprecated? opt->port = TCP_PORT; */
opt->statistics = 0;
-#ifdef USE_ZONE_STATS
- opt->zonestatsfile = ZONESTATSFILE;
-#else
- opt->zonestatsfile = 0;
-#endif
opt->chroot = 0;
opt->username = USER;
opt->zonesdir = ZONESDIR;
- opt->difffile = DIFFFILE;
opt->xfrdfile = XFRDFILE;
- opt->xfrd_reload_timeout = 10;
+ opt->xfrdir = XFRDIR;
+ opt->zonelistfile = ZONELISTFILE;
#ifdef RATELIMIT
opt->rrl_size = RRL_BUCKETS;
opt->rrl_ratelimit = RRL_LIMIT/2;
@@ -73,11 +77,20 @@ nsd_options_t* nsd_options_create(region_type* region)
opt->rrl_ipv6_prefix_length = RRL_IPV6_PREFIX_LENGTH;
opt->rrl_whitelist_ratelimit = RRL_WLIST_LIMIT/2;
#endif
- nsd_options = opt;
+ opt->zonefiles_check = 1;
+ opt->xfrd_reload_timeout = 1;
+ opt->control_enable = 0;
+ opt->control_interface = NULL;
+ opt->control_port = NSD_CONTROL_PORT;
+ opt->server_key_file = CONFIGDIR"/nsd_server.key";
+ opt->server_cert_file = CONFIGDIR"/nsd_server.pem";
+ opt->control_key_file = CONFIGDIR"/nsd_control.key";
+ opt->control_cert_file = CONFIGDIR"/nsd_control.pem";
return opt;
}
-int nsd_options_insert_zone(nsd_options_t* opt, zone_options_t* zone)
+int
+nsd_options_insert_zone(nsd_options_t* opt, zone_options_t* zone)
{
/* create dname for lookup */
const dname_type* dname = dname_parse(opt->region, zone->name);
@@ -89,23 +102,40 @@ int nsd_options_insert_zone(nsd_options_t* opt, zone_options_t* zone)
return 1;
}
-int parse_options_file(nsd_options_t* opt, const char* file)
+int
+nsd_options_insert_pattern(nsd_options_t* opt, pattern_options_t* pat)
+{
+ if(!pat->pname)
+ return 0;
+ pat->node.key = pat->pname;
+ if(!rbtree_insert(opt->patterns, (rbnode_t*)pat))
+ return 0;
+ return 1;
+}
+
+int
+parse_options_file(nsd_options_t* opt, const char* file,
+ void (*err)(void*,const char*), void* err_arg)
{
FILE *in = 0;
- zone_options_t* zone;
+ pattern_options_t* pat;
acl_options_t* acl;
- if(!cfg_parser)
+ if(!cfg_parser) {
cfg_parser = (config_parser_state_t*)region_alloc(
opt->region, sizeof(config_parser_state_t));
+ cfg_parser->chroot = 0;
+ }
+ cfg_parser->err = err;
+ cfg_parser->err_arg = err_arg;
cfg_parser->filename = file;
cfg_parser->line = 1;
cfg_parser->errors = 0;
+ cfg_parser->server_settings_seen = 0;
cfg_parser->opt = opt;
+ cfg_parser->current_pattern = 0;
cfg_parser->current_zone = 0;
- cfg_parser->current_key = opt->keys;
- while(cfg_parser->current_key && cfg_parser->current_key->next)
- cfg_parser->current_key = cfg_parser->current_key->next;
+ cfg_parser->current_key = 0;
cfg_parser->current_ip_address_option = opt->ip_addresses;
while(cfg_parser->current_ip_address_option && cfg_parser->current_ip_address_option->next)
cfg_parser->current_ip_address_option = cfg_parser->current_ip_address_option->next;
@@ -113,16 +143,34 @@ int parse_options_file(nsd_options_t* opt, const char* file)
cfg_parser->current_request_xfr = 0;
cfg_parser->current_notify = 0;
cfg_parser->current_provide_xfr = 0;
-
+
in = fopen(cfg_parser->filename, "r");
if(!in) {
- fprintf(stderr, "Could not open %s: %s\n", file, strerror(errno));
+ if(err) {
+ char m[MAXSYSLOGMSGLEN];
+ snprintf(m, sizeof(m), "Could not open %s: %s\n",
+ file, strerror(errno));
+ err(err_arg, m);
+ } else {
+ fprintf(stderr, "Could not open %s: %s\n",
+ file, strerror(errno));
+ }
return 0;
}
c_in = in;
c_parse();
fclose(in);
+ opt->configfile = region_strdup(opt->region, file);
+ if(cfg_parser->current_pattern) {
+ if(!cfg_parser->current_pattern->pname)
+ c_error("last pattern has no name");
+ else {
+ if(!nsd_options_insert_pattern(cfg_parser->opt,
+ cfg_parser->current_pattern))
+ c_error("duplicate pattern");
+ }
+ }
if(cfg_parser->current_zone) {
if(!cfg_parser->current_zone->name)
c_error("last zone has no name");
@@ -131,83 +179,499 @@ int parse_options_file(nsd_options_t* opt, const char* file)
cfg_parser->current_zone))
c_error("duplicate zone");
}
- if(!cfg_parser->current_zone->zonefile)
- c_error("last zone has no zonefile");
+ if(!cfg_parser->current_zone->pattern)
+ c_error("last zone has no pattern");
}
- if(opt->keys)
+ if(cfg_parser->current_key)
{
- if(!opt->keys->name)
+ if(!cfg_parser->current_key->name)
c_error("last key has no name");
- if(!opt->keys->algorithm)
+ if(!cfg_parser->current_key->algorithm)
c_error("last key has no algorithm");
- if(!opt->keys->secret)
+ if(!cfg_parser->current_key->secret)
c_error("last key has no secret blob");
+ key_options_insert(opt, cfg_parser->current_key);
}
- RBTREE_FOR(zone, zone_options_t*, opt->zone_options)
+ RBTREE_FOR(pat, pattern_options_t*, opt->patterns)
{
- if(!zone->name)
- continue;
- if(!zone->zonefile)
- continue;
/* lookup keys for acls */
- for(acl=zone->allow_notify; acl; acl=acl->next)
+ for(acl=pat->allow_notify; acl; acl=acl->next)
{
if(acl->nokey || acl->blocked)
continue;
acl->key_options = key_options_find(opt, acl->key_name);
if(!acl->key_options)
- c_error_msg("key %s in zone %s could not be found",
- acl->key_name, zone->name);
+ c_error_msg("key %s in pattern %s could not be found",
+ acl->key_name, pat->pname);
}
- for(acl=zone->notify; acl; acl=acl->next)
+ for(acl=pat->notify; acl; acl=acl->next)
{
if(acl->nokey || acl->blocked)
continue;
acl->key_options = key_options_find(opt, acl->key_name);
if(!acl->key_options)
- c_error_msg("key %s in zone %s could not be found",
- acl->key_name, zone->name);
+ c_error_msg("key %s in pattern %s could not be found",
+ acl->key_name, pat->pname);
}
- for(acl=zone->request_xfr; acl; acl=acl->next)
+ for(acl=pat->request_xfr; acl; acl=acl->next)
{
if(acl->nokey || acl->blocked)
continue;
acl->key_options = key_options_find(opt, acl->key_name);
if(!acl->key_options)
- c_error_msg("key %s in zone %s could not be found",
- acl->key_name, zone->name);
+ c_error_msg("key %s in pattern %s could not be found",
+ acl->key_name, pat->pname);
}
- for(acl=zone->provide_xfr; acl; acl=acl->next)
+ for(acl=pat->provide_xfr; acl; acl=acl->next)
{
if(acl->nokey || acl->blocked)
continue;
acl->key_options = key_options_find(opt, acl->key_name);
if(!acl->key_options)
- c_error_msg("key %s in zone %s could not be found",
- acl->key_name, zone->name);
+ c_error_msg("key %s in pattern %s could not be found",
+ acl->key_name, pat->pname);
}
}
if(cfg_parser->errors > 0)
{
- fprintf(stderr, "read %s failed: %d errors in configuration file\n",
- cfg_parser->filename,
- cfg_parser->errors);
+ if(err) {
+ char m[MAXSYSLOGMSGLEN];
+ snprintf(m, sizeof(m), "read %s failed: %d errors in "
+ "configuration file\n", cfg_parser->filename,
+ cfg_parser->errors);
+ err(err_arg, m);
+ } else {
+ fprintf(stderr, "read %s failed: %d errors in "
+ "configuration file\n", cfg_parser->filename,
+ cfg_parser->errors);
+ }
return 0;
}
return 1;
}
-void c_error_va_list(const char *fmt, va_list args)
+#define ZONELIST_HEADER "# NSD zone list\n# name pattern\n"
+static int
+comp_zonebucket(const void* a, const void* b)
+{
+ return *(const int*)b - *(const int*)a;
+}
+
+/* insert free entry into zonelist free buckets */
+static void
+zone_list_free_insert(nsd_options_t* opt, int linesize, off_t off)
+{
+ struct zonelist_free* e;
+ struct zonelist_bucket* b = (struct zonelist_bucket*)rbtree_search(
+ opt->zonefree, &linesize);
+ if(!b) {
+ b = region_alloc_zero(opt->region, sizeof(*b));
+ b->linesize = linesize;
+ b->node = *RBTREE_NULL;
+ b->node.key = &b->linesize;
+ rbtree_insert(opt->zonefree, &b->node);
+ }
+ e = (struct zonelist_free*)region_alloc_zero(opt->region, sizeof(*e));
+ e->next = b->list;
+ b->list = e;
+ e->off = off;
+ opt->zonefree_number++;
+}
+
+zone_options_t*
+zone_list_zone_insert(nsd_options_t* opt, const char* nm, const char* patnm,
+ int linesize, off_t off)
+{
+ pattern_options_t* pat = pattern_options_find(opt, patnm);
+ zone_options_t* zone;
+ if(!pat) {
+ log_msg(LOG_ERR, "pattern does not exist for zone %s "
+ "pattern %s", nm, patnm);
+ return NULL;
+ }
+ zone = zone_options_create(opt->region);
+ zone->part_of_config = 0;
+ zone->name = region_strdup(opt->region, nm);
+ zone->linesize = linesize;
+ zone->off = off;
+ zone->pattern = pat;
+ if(!nsd_options_insert_zone(opt, zone)) {
+ log_msg(LOG_ERR, "bad domain name or duplicate zone '%s' "
+ "pattern %s", nm, patnm);
+ region_recycle(opt->region, (void*)zone->name, strlen(nm)+1);
+ region_recycle(opt->region, zone, sizeof(*zone));
+ return NULL;
+ }
+ return zone;
+}
+
+int
+parse_zone_list_file(nsd_options_t* opt)
+{
+ /* zonelist looks like this:
+ # name pattern
+ add example.com master
+ del example.net slave
+ add foo.bar.nl slave
+ add rutabaga.uk config
+ */
+ char buf[1024];
+
+ /* create empty data structures */
+ opt->zonefree = rbtree_create(opt->region, comp_zonebucket);
+ opt->zonelist = NULL;
+ opt->zonefree_number = 0;
+ opt->zonelist_off = 0;
+
+ /* try to open the zonelist file, an empty or nonexist file is OK */
+ opt->zonelist = fopen(opt->zonelistfile, "r+");
+ if(!opt->zonelist) {
+ if(errno == ENOENT)
+ return 1; /* file does not exist, it is created later */
+ log_msg(LOG_ERR, "could not open zone list %s: %s", opt->zonelistfile,
+ strerror(errno));
+ return 0;
+ }
+ /* read header */
+ buf[strlen(ZONELIST_HEADER)] = 0;
+ if(fread(buf, 1, strlen(ZONELIST_HEADER), opt->zonelist) !=
+ strlen(ZONELIST_HEADER) || strncmp(buf, ZONELIST_HEADER,
+ strlen(ZONELIST_HEADER)) != 0) {
+ log_msg(LOG_ERR, "zone list %s contains bad header\n", opt->zonelistfile);
+ fclose(opt->zonelist);
+ opt->zonelist = NULL;
+ return 0;
+ }
+
+ /* read entries in file */
+ while(fgets(buf, sizeof(buf), opt->zonelist)) {
+ /* skip comments and empty lines */
+ if(buf[0] == 0 || buf[0] == '\n' || buf[0] == '#')
+ continue;
+ if(strncmp(buf, "add ", 4) == 0) {
+ int linesize = strlen(buf);
+ /* parse the 'add' line */
+ /* pick last space on the line, so that the domain
+ * name can have a space in it (but not the pattern)*/
+ char* space = strrchr(buf+4, ' ');
+ char* nm, *patnm;
+ if(!space) {
+ /* parse error */
+ log_msg(LOG_ERR, "parse error in %s: '%s'",
+ opt->zonelistfile, buf);
+ continue;
+ }
+ nm = buf+4;
+ *space = 0;
+ patnm = space+1;
+ if(linesize && buf[linesize-1] == '\n')
+ buf[linesize-1] = 0;
+
+ /* store offset and line size for zone entry */
+ /* and create zone entry in zonetree */
+ (void)zone_list_zone_insert(opt, nm, patnm, linesize,
+ ftello(opt->zonelist)-linesize);
+ } else if(strncmp(buf, "del ", 4) == 0) {
+ /* store offset and line size for deleted entry */
+ int linesize = strlen(buf);
+ zone_list_free_insert(opt, linesize,
+ ftello(opt->zonelist)-linesize);
+ } else {
+ log_msg(LOG_WARNING, "bad data in %s, '%s'", opt->zonelistfile,
+ buf);
+ }
+ }
+ /* store EOF offset */
+ opt->zonelist_off = ftello(opt->zonelist);
+ return 1;
+}
+
+void
+zone_options_delete(nsd_options_t* opt, zone_options_t* zone)
+{
+ rbtree_delete(opt->zone_options, zone->node.key);
+ region_recycle(opt->region, (void*)zone->node.key, dname_total_size(
+ (dname_type*)zone->node.key));
+ region_recycle(opt->region, zone, sizeof(*zone));
+}
+
+/* add a new zone to the zonelist */
+zone_options_t*
+zone_list_add(nsd_options_t* opt, const char* zname, const char* pname)
+{
+ int r;
+ struct zonelist_free* e;
+ struct zonelist_bucket* b;
+ int linesize = 6 + strlen(zname) + strlen(pname);
+ /* create zone entry */
+ zone_options_t* zone = zone_list_zone_insert(opt, zname, pname,
+ linesize, 0);
+ if(!zone)
+ return NULL;
+
+ /* use free entry or append to file or create new file */
+ if(!opt->zonelist || opt->zonelist_off == 0) {
+ /* create new file */
+ if(opt->zonelist) fclose(opt->zonelist);
+ opt->zonelist = fopen(opt->zonelistfile, "w+");
+ if(!opt->zonelist) {
+ log_msg(LOG_ERR, "could not create zone list %s: %s",
+ opt->zonelistfile, strerror(errno));
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ r = fprintf(opt->zonelist, ZONELIST_HEADER);
+ if(r != strlen(ZONELIST_HEADER)) {
+ if(r == -1)
+ log_msg(LOG_ERR, "could not write to %s: %s",
+ opt->zonelistfile, strerror(errno));
+ else log_msg(LOG_ERR, "partial write to %s: disk full",
+ opt->zonelistfile);
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ zone->off = ftello(opt->zonelist);
+ if(zone->off == -1)
+ log_msg(LOG_ERR, "ftello(%s): %s", opt->zonelistfile, strerror(errno));
+ r = fprintf(opt->zonelist, "add %s %s\n", zname, pname);
+ if(r != zone->linesize) {
+ if(r == -1)
+ log_msg(LOG_ERR, "could not write to %s: %s",
+ opt->zonelistfile, strerror(errno));
+ else log_msg(LOG_ERR, "partial write to %s: disk full",
+ opt->zonelistfile);
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ opt->zonelist_off = ftello(opt->zonelist);
+ if(opt->zonelist_off == -1)
+ log_msg(LOG_ERR, "ftello(%s): %s", opt->zonelistfile, strerror(errno));
+ if(fflush(opt->zonelist) != 0) {
+ log_msg(LOG_ERR, "fflush %s: %s", opt->zonelistfile, strerror(errno));
+ }
+ return zone;
+ }
+ b = (struct zonelist_bucket*)rbtree_search(opt->zonefree,
+ &zone->linesize);
+ if(!b || b->list == NULL) {
+ /* no empty place, append to file */
+ zone->off = opt->zonelist_off;
+ if(fseeko(opt->zonelist, zone->off, SEEK_SET) == -1) {
+ log_msg(LOG_ERR, "fseeko(%s): %s", opt->zonelistfile, strerror(errno));
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ r = fprintf(opt->zonelist, "add %s %s\n", zname, pname);
+ if(r != zone->linesize) {
+ if(r == -1)
+ log_msg(LOG_ERR, "could not write to %s: %s",
+ opt->zonelistfile, strerror(errno));
+ else log_msg(LOG_ERR, "partial write to %s: disk full",
+ opt->zonelistfile);
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ opt->zonelist_off += linesize;
+ if(fflush(opt->zonelist) != 0) {
+ log_msg(LOG_ERR, "fflush %s: %s", opt->zonelistfile, strerror(errno));
+ }
+ return zone;
+ }
+ /* reuse empty spot */
+ e = b->list;
+ zone->off = e->off;
+ if(fseeko(opt->zonelist, zone->off, SEEK_SET) == -1) {
+ log_msg(LOG_ERR, "fseeko(%s): %s", opt->zonelistfile, strerror(errno));
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ r = fprintf(opt->zonelist, "add %s %s\n", zname, pname);
+ if(r != zone->linesize) {
+ if(r == -1)
+ log_msg(LOG_ERR, "could not write to %s: %s",
+ opt->zonelistfile, strerror(errno));
+ else log_msg(LOG_ERR, "partial write to %s: disk full",
+ opt->zonelistfile);
+ log_msg(LOG_ERR, "zone %s could not be added", zname);
+ zone_options_delete(opt, zone);
+ return NULL;
+ }
+ if(fflush(opt->zonelist) != 0) {
+ log_msg(LOG_ERR, "fflush %s: %s", opt->zonelistfile, strerror(errno));
+ }
+
+ /* snip off and recycle element */
+ b->list = e->next;
+ region_recycle(opt->region, e, sizeof(*e));
+ if(b->list == NULL) {
+ rbtree_delete(opt->zonefree, &b->linesize);
+ region_recycle(opt->region, b, sizeof(*b));
+ }
+ opt->zonefree_number--;
+ return zone;
+}
+
+/* remove a zone on the zonelist */
+void
+zone_list_del(nsd_options_t* opt, zone_options_t* zone)
+{
+ /* put its space onto the free entry */
+ if(fseeko(opt->zonelist, zone->off, SEEK_SET) == -1) {
+ log_msg(LOG_ERR, "fseeko(%s): %s", opt->zonelistfile, strerror(errno));
+ return;
+ }
+ fprintf(opt->zonelist, "del");
+ zone_list_free_insert(opt, zone->linesize, zone->off);
+
+ /* remove zone_options_t */
+ zone_options_delete(opt, zone);
+
+ /* see if we need to compact: it is going to halve the zonelist */
+ if(opt->zonefree_number > opt->zone_options->count) {
+ zone_list_compact(opt);
+ } else {
+ if(fflush(opt->zonelist) != 0) {
+ log_msg(LOG_ERR, "fflush %s: %s", opt->zonelistfile, strerror(errno));
+ }
+ }
+}
+/* postorder delete of zonelist free space tree */
+static void
+delbucket(region_type* region, struct zonelist_bucket* b)
+{
+ struct zonelist_free* e, *f;
+ if(!b || (rbnode_t*)b==RBTREE_NULL)
+ return;
+ delbucket(region, (struct zonelist_bucket*)b->node.left);
+ delbucket(region, (struct zonelist_bucket*)b->node.right);
+ e = b->list;
+ while(e) {
+ f = e->next;
+ region_recycle(region, e, sizeof(*e));
+ e = f;
+ }
+ region_recycle(region, b, sizeof(*b));
+}
+
+/* compact zonelist file */
+void
+zone_list_compact(nsd_options_t* opt)
+{
+ char outname[1024];
+ FILE* out;
+ zone_options_t* zone;
+ off_t off;
+ int r;
+ snprintf(outname, sizeof(outname), "%s~", opt->zonelistfile);
+ /* useful, when : count-of-free > count-of-used */
+ /* write zonelist to zonelist~ */
+ out = fopen(outname, "w+");
+ if(!out) {
+ log_msg(LOG_ERR, "could not open %s: %s", outname, strerror(errno));
+ return;
+ }
+ r = fprintf(out, ZONELIST_HEADER);
+ if(r == -1) {
+ log_msg(LOG_ERR, "write %s failed: %s", outname,
+ strerror(errno));
+ fclose(out);
+ return;
+ } else if(r != strlen(ZONELIST_HEADER)) {
+ log_msg(LOG_ERR, "write %s was partial: disk full",
+ outname);
+ fclose(out);
+ return;
+ }
+ off = ftello(out);
+ if(off == -1) {
+ log_msg(LOG_ERR, "ftello(%s): %s", outname, strerror(errno));
+ fclose(out);
+ return;
+ }
+ RBTREE_FOR(zone, zone_options_t*, opt->zone_options) {
+ if(zone->part_of_config)
+ continue;
+ r = fprintf(out, "add %s %s\n", zone->name,
+ zone->pattern->pname);
+ if(r < 0) {
+ log_msg(LOG_ERR, "write %s failed: %s", outname,
+ strerror(errno));
+ fclose(out);
+ return;
+ } else if(r != zone->linesize) {
+ log_msg(LOG_ERR, "write %s was partial: disk full",
+ outname);
+ fclose(out);
+ return;
+ }
+ }
+ if(fflush(out) != 0) {
+ log_msg(LOG_ERR, "fflush %s: %s", outname, strerror(errno));
+ }
+
+ /* rename zonelist~ onto zonelist */
+ if(rename(outname, opt->zonelistfile) == -1) {
+ log_msg(LOG_ERR, "rename(%s to %s) failed: %s",
+ outname, opt->zonelistfile, strerror(errno));
+ fclose(out);
+ return;
+ }
+ fclose(opt->zonelist);
+ /* set offsets */
+ RBTREE_FOR(zone, zone_options_t*, opt->zone_options) {
+ if(zone->part_of_config)
+ continue;
+ zone->off = off;
+ off += zone->linesize;
+ }
+ /* empty the free tree */
+ delbucket(opt->region, (struct zonelist_bucket*)opt->zonefree->root);
+ opt->zonefree->root = RBTREE_NULL;
+ opt->zonefree->count = 0;
+ opt->zonefree_number = 0;
+ /* finish */
+ opt->zonelist = out;
+ opt->zonelist_off = off;
+}
+
+/* close zonelist file */
+void
+zone_list_close(nsd_options_t* opt)
+{
+ fclose(opt->zonelist);
+ opt->zonelist = NULL;
+}
+
+
+void
+c_error_va_list(const char* fmt, va_list args)
{
cfg_parser->errors++;
+ if(cfg_parser->err) {
+ char m[MAXSYSLOGMSGLEN];
+ snprintf(m, sizeof(m), "%s:%d: error: ", cfg_parser->filename,
+ cfg_parser->line);
+ (*cfg_parser->err)(cfg_parser->err_arg, m);
+ vsnprintf(m, sizeof(m), fmt, args);
+ (*cfg_parser->err)(cfg_parser->err_arg, m);
+ (*cfg_parser->err)(cfg_parser->err_arg, "\n");
+ return;
+ }
fprintf(stderr, "%s:%d: error: ", cfg_parser->filename,
cfg_parser->line);
vfprintf(stderr, fmt, args);
fprintf(stderr, "\n");
}
-void c_error_msg(const char* fmt, ...)
+void
+c_error_msg(const char* fmt, ...)
{
va_list args;
va_start(args, fmt);
@@ -215,62 +679,546 @@ void c_error_msg(const char* fmt, ...)
va_end(args);
}
-void c_error(const char *str)
+void
+c_error(const char* str)
{
- cfg_parser->errors++;
- fprintf(stderr, "%s:%d: error: %s\n", cfg_parser->filename,
- cfg_parser->line, str);
+ c_error_msg("%s", str);
}
-int c_wrap()
+int
+c_wrap()
{
return 1;
}
-zone_options_t* zone_options_create(region_type* region)
+zone_options_t*
+zone_options_create(region_type* region)
{
zone_options_t* zone;
zone = (zone_options_t*)region_alloc(region, sizeof(zone_options_t));
zone->node = *RBTREE_NULL;
zone->name = 0;
- zone->zonefile = 0;
- zone->allow_notify = 0;
- zone->request_xfr = 0;
- zone->notify = 0;
- zone->notify_retry = 5;
- zone->provide_xfr = 0;
- zone->outgoing_interface = 0;
- zone->allow_axfr_fallback = 1;
+ zone->pattern = 0;
+ zone->part_of_config = 0;
+ return zone;
+}
+
+/* true is booleans are the same truth value */
+#define booleq(x,y) ( ((x) && (y)) || (!(x) && !(y)) )
+
+int
+acl_equal(acl_options_t* p, acl_options_t* q)
+{
+ if(!booleq(p->use_axfr_only, q->use_axfr_only)) return 0;
+ if(!booleq(p->allow_udp, q->allow_udp)) return 0;
+ if(strcmp(p->ip_address_spec, q->ip_address_spec)!=0) return 0;
+ /* the ip6, port, addr, mask, type: are derived from the ip_address_spec */
+ if(!booleq(p->nokey, q->nokey)) return 0;
+ if(!booleq(p->blocked, q->blocked)) return 0;
+ if(p->key_name && q->key_name) {
+ if(strcmp(p->key_name, q->key_name)!=0) return 0;
+ } else if(p->key_name && !q->key_name) return 0;
+ else if(!p->key_name && q->key_name) return 0;
+ /* key_options is derived from key_name */
+ return 1;
+}
+
+int
+acl_list_equal(acl_options_t* p, acl_options_t* q)
+{
+ /* must be same and in same order */
+ while(p && q) {
+ if(!acl_equal(p, q))
+ return 0;
+ p = p->next;
+ q = q->next;
+ }
+ if(!p && !q) return 1;
+ /* different lengths */
+ return 0;
+}
+
+pattern_options_t*
+pattern_options_create(region_type* region)
+{
+ pattern_options_t* p;
+ p = (pattern_options_t*)region_alloc(region, sizeof(pattern_options_t));
+ p->node = *RBTREE_NULL;
+ p->pname = 0;
+ p->zonefile = 0;
+ p->allow_notify = 0;
+ p->request_xfr = 0;
+ p->notify = 0;
+ p->provide_xfr = 0;
+ p->outgoing_interface = 0;
+ p->notify_retry = 5;
+ p->notify_retry_is_default = 1;
+ p->allow_axfr_fallback = 1;
+ p->allow_axfr_fallback_is_default = 1;
+ p->implicit = 0;
+ p->xfrd_flags = 0;
#ifdef RATELIMIT
- zone->rrl_whitelist = 0;
+ p->rrl_whitelist = 0;
#endif
- return zone;
+ return p;
+}
+
+static void
+acl_delete(region_type* region, acl_options_t* acl)
+{
+ if(acl->ip_address_spec)
+ region_recycle(region, (void*)acl->ip_address_spec,
+ strlen(acl->ip_address_spec)+1);
+ if(acl->key_name)
+ region_recycle(region, (void*)acl->key_name,
+ strlen(acl->key_name)+1);
+ /* key_options is a convenience pointer, not owned by the acl */
+ region_recycle(region, acl, sizeof(*acl));
+}
+
+static void
+acl_list_delete(region_type* region, acl_options_t* list)
+{
+ acl_options_t* n;
+ while(list) {
+ n = list->next;
+ acl_delete(region, list);
+ list = n;
+ }
}
-key_options_t* key_options_create(region_type* region)
+void
+pattern_options_remove(nsd_options_t* opt, const char* name)
+{
+ pattern_options_t* p = (pattern_options_t*)rbtree_delete(
+ opt->patterns, name);
+ /* delete p and its contents */
+ if (!p)
+ return;
+ if(p->pname)
+ region_recycle(opt->region, (void*)p->pname,
+ strlen(p->pname)+1);
+ if(p->zonefile)
+ region_recycle(opt->region, (void*)p->zonefile,
+ strlen(p->zonefile)+1);
+ acl_list_delete(opt->region, p->allow_notify);
+ acl_list_delete(opt->region, p->request_xfr);
+ acl_list_delete(opt->region, p->notify);
+ acl_list_delete(opt->region, p->provide_xfr);
+ acl_list_delete(opt->region, p->outgoing_interface);
+
+ region_recycle(opt->region, p, sizeof(pattern_options_t));
+}
+
+static acl_options_t*
+copy_acl(region_type* region, acl_options_t* a)
+{
+ acl_options_t* b;
+ if(!a) return NULL;
+ b = (acl_options_t*)region_alloc(region, sizeof(*b));
+ /* copy the whole lot */
+ *b = *a;
+ /* fix the pointers */
+ if(a->ip_address_spec)
+ b->ip_address_spec = region_strdup(region, a->ip_address_spec);
+ if(a->key_name)
+ b->key_name = region_strdup(region, a->key_name);
+ b->next = NULL;
+ b->key_options = NULL;
+ return b;
+}
+
+static acl_options_t*
+copy_acl_list(nsd_options_t* opt, acl_options_t* a)
+{
+ acl_options_t* b, *blast = NULL, *blist = NULL;
+ while(a) {
+ b = copy_acl(opt->region, a);
+ /* fixup key_options */
+ if(b->key_name)
+ b->key_options = key_options_find(opt, b->key_name);
+ else b->key_options = NULL;
+
+ /* link as last into list */
+ b->next = NULL;
+ if(!blist) blist = b;
+ else blast->next = b;
+ blast = b;
+
+ a = a->next;
+ }
+ return blist;
+}
+
+static void
+copy_changed_acl(nsd_options_t* opt, acl_options_t** orig,
+ acl_options_t* anew)
+{
+ if(!acl_list_equal(*orig, anew)) {
+ acl_list_delete(opt->region, *orig);
+ *orig = copy_acl_list(opt, anew);
+ }
+}
+
+static void
+copy_pat_fixed(region_type* region, pattern_options_t* orig,
+ pattern_options_t* p)
+{
+ orig->allow_axfr_fallback = p->allow_axfr_fallback;
+ orig->allow_axfr_fallback_is_default =
+ p->allow_axfr_fallback_is_default;
+ orig->notify_retry = p->notify_retry;
+ orig->notify_retry_is_default = p->notify_retry_is_default;
+ orig->implicit = p->implicit;
+ if(p->zonefile)
+ orig->zonefile = region_strdup(region, p->zonefile);
+ else orig->zonefile = NULL;
+#ifdef RATELIMIT
+ orig->rrl_whitelist = p->rrl_whitelist;
+#endif
+}
+
+void
+pattern_options_add_modify(nsd_options_t* opt, pattern_options_t* p)
+{
+ pattern_options_t* orig = pattern_options_find(opt, p->pname);
+ if(!orig) {
+ /* needs to be copied to opt region */
+ orig = pattern_options_create(opt->region);
+ orig->pname = region_strdup(opt->region, p->pname);
+ copy_pat_fixed(opt->region, orig, p);
+ orig->allow_notify = copy_acl_list(opt, p->allow_notify);
+ orig->request_xfr = copy_acl_list(opt, p->request_xfr);
+ orig->notify = copy_acl_list(opt, p->notify);
+ orig->provide_xfr = copy_acl_list(opt, p->provide_xfr);
+ orig->outgoing_interface = copy_acl_list(opt,
+ p->outgoing_interface);
+ nsd_options_insert_pattern(opt, orig);
+ } else {
+ /* modify in place so pointers stay valid (and copy
+ into region). Do not touch unchanged acls. */
+ if(orig->zonefile)
+ region_recycle(opt->region, (char*)orig->zonefile,
+ strlen(orig->zonefile)+1);
+ copy_pat_fixed(opt->region, orig, p);
+ copy_changed_acl(opt, &orig->allow_notify, p->allow_notify);
+ copy_changed_acl(opt, &orig->request_xfr, p->request_xfr);
+ copy_changed_acl(opt, &orig->notify, p->notify);
+ copy_changed_acl(opt, &orig->provide_xfr, p->provide_xfr);
+ copy_changed_acl(opt, &orig->outgoing_interface,
+ p->outgoing_interface);
+ }
+}
+
+pattern_options_t*
+pattern_options_find(nsd_options_t* opt, const char* name)
+{
+ return (pattern_options_t*)rbtree_search(opt->patterns, name);
+}
+
+int
+pattern_options_equal(pattern_options_t* p, pattern_options_t* q)
+{
+ if(strcmp(p->pname, q->pname) != 0) return 0;
+ if(!p->zonefile && q->zonefile) return 0;
+ else if(p->zonefile && !q->zonefile) return 0;
+ else if(p->zonefile && q->zonefile) {
+ if(strcmp(p->zonefile, q->zonefile) != 0) return 0;
+ }
+ if(!booleq(p->allow_axfr_fallback, q->allow_axfr_fallback)) return 0;
+ if(!booleq(p->allow_axfr_fallback_is_default,
+ q->allow_axfr_fallback_is_default)) return 0;
+ if(p->notify_retry != q->notify_retry) return 0;
+ if(!booleq(p->notify_retry_is_default,
+ q->notify_retry_is_default)) return 0;
+ if(!booleq(p->implicit, q->implicit)) return 0;
+ if(!acl_list_equal(p->allow_notify, q->allow_notify)) return 0;
+ if(!acl_list_equal(p->request_xfr, q->request_xfr)) return 0;
+ if(!acl_list_equal(p->notify, q->notify)) return 0;
+ if(!acl_list_equal(p->provide_xfr, q->provide_xfr)) return 0;
+ if(!acl_list_equal(p->outgoing_interface, q->outgoing_interface))
+ return 0;
+#ifdef RATELIMIT
+ if(p->rrl_whitelist != q->rrl_whitelist) return 0;
+#endif
+ return 1;
+}
+
+static void
+marshal_u8(struct buffer* b, uint8_t v)
+{
+ buffer_reserve(b, 1);
+ buffer_write_u8(b, v);
+}
+
+static uint8_t
+unmarshal_u8(struct buffer* b)
+{
+ return buffer_read_u8(b);
+}
+
+#ifdef RATELIMIT
+static void
+marshal_u16(struct buffer* b, uint16_t v)
+{
+ buffer_reserve(b, 2);
+ buffer_write_u16(b, v);
+}
+#endif
+
+#ifdef RATELIMIT
+static uint16_t
+unmarshal_u16(struct buffer* b)
+{
+ return buffer_read_u16(b);
+}
+#endif
+
+static void
+marshal_str(struct buffer* b, const char* s)
+{
+ if(!s) marshal_u8(b, 0);
+ else {
+ size_t len = strlen(s);
+ marshal_u8(b, 1);
+ buffer_reserve(b, len+1);
+ buffer_write(b, s, len+1);
+ }
+}
+
+static char*
+unmarshal_str(region_type* r, struct buffer* b)
+{
+ uint8_t nonnull = unmarshal_u8(b);
+ if(nonnull) {
+ char* result = region_strdup(r, (char*)buffer_current(b));
+ size_t len = strlen((char*)buffer_current(b));
+ buffer_skip(b, len+1);
+ return result;
+ } else return NULL;
+}
+
+static void
+marshal_acl(struct buffer* b, acl_options_t* acl)
+{
+ buffer_reserve(b, sizeof(*acl));
+ buffer_write(b, acl, sizeof(*acl));
+ marshal_str(b, acl->ip_address_spec);
+ marshal_str(b, acl->key_name);
+}
+
+static acl_options_t*
+unmarshal_acl(region_type* r, struct buffer* b)
+{
+ acl_options_t* acl = (acl_options_t*)region_alloc(r, sizeof(*acl));
+ buffer_read(b, acl, sizeof(*acl));
+ acl->next = NULL;
+ acl->key_options = NULL;
+ acl->ip_address_spec = unmarshal_str(r, b);
+ acl->key_name = unmarshal_str(r, b);
+ return acl;
+}
+
+static void
+marshal_acl_list(struct buffer* b, acl_options_t* list)
+{
+ while(list) {
+ marshal_u8(b, 1); /* is there a next one marker */
+ marshal_acl(b, list);
+ list = list->next;
+ }
+ marshal_u8(b, 0); /* end of list marker */
+}
+
+static acl_options_t*
+unmarshal_acl_list(region_type* r, struct buffer* b)
+{
+ acl_options_t* a, *last=NULL, *list=NULL;
+ while(unmarshal_u8(b)) {
+ a = unmarshal_acl(r, b);
+ /* link in */
+ a->next = NULL;
+ if(!list) list = a;
+ else last->next = a;
+ last = a;
+ }
+ return list;
+}
+
+void
+pattern_options_marshal(struct buffer* b, pattern_options_t* p)
+{
+ marshal_str(b, p->pname);
+ marshal_str(b, p->zonefile);
+#ifdef RATELIMIT
+ marshal_u16(b, p->rrl_whitelist);
+#endif
+ marshal_u8(b, p->allow_axfr_fallback);
+ marshal_u8(b, p->allow_axfr_fallback_is_default);
+ marshal_u8(b, p->notify_retry);
+ marshal_u8(b, p->notify_retry_is_default);
+ marshal_u8(b, p->implicit);
+ marshal_acl_list(b, p->allow_notify);
+ marshal_acl_list(b, p->request_xfr);
+ marshal_acl_list(b, p->notify);
+ marshal_acl_list(b, p->provide_xfr);
+ marshal_acl_list(b, p->outgoing_interface);
+}
+
+pattern_options_t*
+pattern_options_unmarshal(region_type* r, struct buffer* b)
+{
+ pattern_options_t* p = pattern_options_create(r);
+ p->pname = unmarshal_str(r, b);
+ p->zonefile = unmarshal_str(r, b);
+#ifdef RATELIMIT
+ p->rrl_whitelist = unmarshal_u16(b);
+#endif
+ p->allow_axfr_fallback = unmarshal_u8(b);
+ p->allow_axfr_fallback_is_default = unmarshal_u8(b);
+ p->notify_retry = unmarshal_u8(b);
+ p->notify_retry_is_default = unmarshal_u8(b);
+ p->implicit = unmarshal_u8(b);
+ p->allow_notify = unmarshal_acl_list(r, b);
+ p->request_xfr = unmarshal_acl_list(r, b);
+ p->notify = unmarshal_acl_list(r, b);
+ p->provide_xfr = unmarshal_acl_list(r, b);
+ p->outgoing_interface = unmarshal_acl_list(r, b);
+ return p;
+}
+
+key_options_t*
+key_options_create(region_type* region)
{
key_options_t* key;
- key = (key_options_t*)region_alloc(region, sizeof(key_options_t));
- key->name = 0;
- key->next = 0;
- key->algorithm = 0;
- key->secret = 0;
- key->tsig_key = 0;
+ key = (key_options_t*)region_alloc_zero(region, sizeof(key_options_t));
return key;
}
-key_options_t* key_options_find(nsd_options_t* opt, const char* name)
+void
+key_options_insert(nsd_options_t* opt, key_options_t* key)
+{
+ if(!key->name) return;
+ key->node.key = key->name;
+ (void)rbtree_insert(opt->keys, &key->node);
+}
+
+key_options_t*
+key_options_find(nsd_options_t* opt, const char* name)
{
- key_options_t* key = opt->keys;
- while(key) {
- if(strcmp(key->name, name)==0)
- return key;
- key = key->next;
+ return (key_options_t*)rbtree_search(opt->keys, name);
+}
+
+/** remove tsig_key contents */
+void
+key_options_desetup(region_type* region, key_options_t* key)
+{
+ /* keep tsig_key pointer so that existing references keep valid */
+ if(!key->tsig_key)
+ return;
+ /* name stays the same */
+ if(key->tsig_key->data) {
+ /* wipe secret! */
+ memset(key->tsig_key->data, 0xdd, key->tsig_key->size);
+ region_recycle(region, key->tsig_key->data,
+ key->tsig_key->size);
+ key->tsig_key->data = NULL;
+ key->tsig_key->size = 0;
}
- return 0;
}
-int acl_check_incoming(acl_options_t* acl, struct query* q,
+/** add tsig_key contents */
+void
+key_options_setup(region_type* region, key_options_t* key)
+{
+ uint8_t data[16384]; /* 16KB */
+ int size;
+ if(!key->tsig_key) {
+ /* create it */
+ key->tsig_key = (tsig_key_type *) region_alloc(region,
+ sizeof(tsig_key_type));
+ /* create name */
+ key->tsig_key->name = dname_parse(region, key->name);
+ if(!key->tsig_key->name) {
+ log_msg(LOG_ERR, "Failed to parse tsig key name %s",
+ key->name);
+ /* key and base64 were checked during syntax parse */
+ exit(1);
+ }
+ key->tsig_key->size = 0;
+ key->tsig_key->data = NULL;
+ }
+ size = b64_pton(key->secret, data, sizeof(data));
+ if(size == -1) {
+ log_msg(LOG_ERR, "Failed to parse tsig key data %s",
+ key->name);
+ /* key and base64 were checked during syntax parse */
+ exit(1);
+ }
+ key->tsig_key->size = size;
+ key->tsig_key->data = (uint8_t *)region_alloc_init(region, data, size);
+}
+
+void
+key_options_remove(nsd_options_t* opt, const char* name)
+{
+ key_options_t* k = key_options_find(opt, name);
+ if(!k) return;
+ (void)rbtree_delete(opt->keys, name);
+ if(k->name)
+ region_recycle(opt->region, k->name, strlen(k->name)+1);
+ if(k->algorithm)
+ region_recycle(opt->region, k->algorithm, strlen(k->algorithm)+1);
+ if(k->secret) {
+ memset(k->secret, 0xdd, strlen(k->secret)); /* wipe secret! */
+ region_recycle(opt->region, k->secret, strlen(k->secret)+1);
+ }
+ if(k->tsig_key) {
+ tsig_del_key(k->tsig_key);
+ if(k->tsig_key->name)
+ region_recycle(opt->region, (void*)k->tsig_key->name,
+ dname_total_size(k->tsig_key->name));
+ key_options_desetup(opt->region, k);
+ region_recycle(opt->region, k->tsig_key, sizeof(tsig_key_type));
+ }
+ region_recycle(opt->region, k, sizeof(key_options_t));
+}
+
+int
+key_options_equal(key_options_t* p, key_options_t* q)
+{
+ return strcmp(p->name, q->name)==0 && strcmp(p->algorithm,
+ q->algorithm)==0 && strcmp(p->secret, q->secret)==0;
+}
+
+void
+key_options_add_modify(nsd_options_t* opt, key_options_t* key)
+{
+ key_options_t* orig = key_options_find(opt, key->name);
+ if(!orig) {
+ /* needs to be copied to opt region */
+ orig = key_options_create(opt->region);
+ orig->name = region_strdup(opt->region, key->name);
+ orig->algorithm = region_strdup(opt->region, key->algorithm);
+ orig->secret = region_strdup(opt->region, key->secret);
+ key_options_setup(opt->region, orig);
+ tsig_add_key(orig->tsig_key);
+ key_options_insert(opt, orig);
+ } else {
+ /* modify entries in existing key, and copy to opt region */
+ key_options_desetup(opt->region, orig);
+ region_recycle(opt->region, orig->algorithm,
+ strlen(orig->algorithm)+1);
+ orig->algorithm = region_strdup(opt->region, key->algorithm);
+ region_recycle(opt->region, orig->secret,
+ strlen(orig->secret)+1);
+ orig->secret = region_strdup(opt->region, key->secret);
+ key_options_setup(opt->region, orig);
+ }
+}
+
+int
+acl_check_incoming(acl_options_t* acl, struct query* q,
acl_options_t** reason)
{
/* check each acl element.
@@ -311,7 +1259,8 @@ int acl_check_incoming(acl_options_t* acl, struct query* q,
}
#ifdef INET6
-int acl_addr_matches_ipv6host(acl_options_t* acl, struct sockaddr_storage* addr_storage, unsigned int port)
+int
+acl_addr_matches_ipv6host(acl_options_t* acl, struct sockaddr_storage* addr_storage, unsigned int port)
{
struct sockaddr_in6* addr = (struct sockaddr_in6*)addr_storage;
if(acl->port != 0 && acl->port != port)
@@ -339,7 +1288,8 @@ int acl_addr_matches_ipv6host(acl_options_t* acl, struct sockaddr_storage* addr_
}
#endif
-int acl_addr_matches_ipv4host(acl_options_t* acl, struct sockaddr_in* addr, unsigned int port)
+int
+acl_addr_matches_ipv4host(acl_options_t* acl, struct sockaddr_in* addr, unsigned int port)
{
if(acl->port != 0 && acl->port != port)
return 0;
@@ -365,7 +1315,8 @@ int acl_addr_matches_ipv4host(acl_options_t* acl, struct sockaddr_in* addr, unsi
return 1;
}
-int acl_addr_matches_host(acl_options_t* acl, acl_options_t* host)
+int
+acl_addr_matches_host(acl_options_t* acl, acl_options_t* host)
{
if(acl->is_ipv6)
{
@@ -387,7 +1338,8 @@ int acl_addr_matches_host(acl_options_t* acl, acl_options_t* host)
return 0;
}
-int acl_addr_matches(acl_options_t* acl, struct query* q)
+int
+acl_addr_matches(acl_options_t* acl, struct query* q)
{
if(acl->is_ipv6)
{
@@ -411,7 +1363,8 @@ int acl_addr_matches(acl_options_t* acl, struct query* q)
return 0;
}
-int acl_addr_match_mask(uint32_t* a, uint32_t* b, uint32_t* mask, size_t sz)
+int
+acl_addr_match_mask(uint32_t* a, uint32_t* b, uint32_t* mask, size_t sz)
{
size_t i;
#ifndef NDEBUG
@@ -427,7 +1380,8 @@ int acl_addr_match_mask(uint32_t* a, uint32_t* b, uint32_t* mask, size_t sz)
return 1;
}
-int acl_addr_match_range(uint32_t* minval, uint32_t* x, uint32_t* maxval, size_t sz)
+int
+acl_addr_match_range(uint32_t* minval, uint32_t* x, uint32_t* maxval, size_t sz)
{
size_t i;
uint8_t checkmin = 1, checkmax = 1;
@@ -456,7 +1410,8 @@ int acl_addr_match_range(uint32_t* minval, uint32_t* x, uint32_t* maxval, size_t
return 1;
}
-int acl_key_matches(acl_options_t* acl, struct query* q)
+int
+acl_key_matches(acl_options_t* acl, struct query* q)
{
if(acl->blocked)
return 1;
@@ -527,42 +1482,93 @@ acl_same_host(acl_options_t* a, acl_options_t* b)
}
#if defined(HAVE_SSL)
-void key_options_tsig_add(nsd_options_t* opt)
+void
+key_options_tsig_add(nsd_options_t* opt)
{
key_options_t* optkey;
- uint8_t data[4000];
- tsig_key_type* tsigkey;
- const dname_type* dname;
- int size;
-
- for(optkey = opt->keys; optkey; optkey = optkey->next)
- {
- dname = dname_parse(opt->region, optkey->name);
- if(!dname) {
- log_msg(LOG_ERR, "Failed to parse tsig key name %s", optkey->name);
- continue;
- }
- size = b64_pton(optkey->secret, data, sizeof(data));
- if(size == -1) {
- log_msg(LOG_ERR, "Failed to parse tsig key data %s", optkey->name);
- continue;
- }
- tsigkey = (tsig_key_type *) region_alloc(opt->region, sizeof(tsig_key_type));
- tsigkey->name = dname;
- tsigkey->size = size;
- tsigkey->data = (uint8_t *) region_alloc_init(opt->region, data, tsigkey->size);
- tsig_add_key(tsigkey);
- optkey->tsig_key = tsigkey;
+ RBTREE_FOR(optkey, key_options_t*, opt->keys) {
+ key_options_setup(opt->region, optkey);
+ tsig_add_key(optkey->tsig_key);
}
}
#endif
-int zone_is_slave(zone_options_t* opt)
+int
+zone_is_slave(zone_options_t* opt)
+{
+ return opt && opt->pattern && opt->pattern->request_xfr != 0;
+}
+
+/* get a character in string (or replacement char if not long enough) */
+static const char*
+get_char(const char* str, size_t i)
+{
+ static char res[2];
+ if(i >= strlen(str))
+ return ".";
+ res[0] = str[i];
+ res[1] = 0;
+ return res;
+}
+/* get end label of the zone name (or .) */
+static const char*
+get_end_label(zone_options_t* zone, int i)
+{
+ const dname_type* d = (const dname_type*)zone->node.key;
+ if(i >= d->label_count) {
+ return ".";
+ }
+ return wirelabel2str(dname_label(d, i));
+}
+/* replace occurrences of one with two */
+void
+replace_str(char* str, size_t len, const char* one, const char* two)
+{
+ char* pos;
+ char* at = str;
+ while( (pos=strstr(at, one)) ) {
+ if(strlen(str)+strlen(two)-strlen(one) >= len)
+ return; /* no more space to replace */
+ /* stuff before pos is fine */
+ /* move the stuff after pos to make space for two, add
+ * one to length of remainder to also copy the 0 byte end */
+ memmove(pos+strlen(two), pos+strlen(one),
+ strlen(pos+strlen(one))+1);
+ /* copy in two */
+ memmove(pos, two, strlen(two));
+ /* at is end of the newly inserted two (avoids recursion if
+ * two contains one) */
+ at = pos+strlen(two);
+ }
+}
+
+const char*
+config_make_zonefile(zone_options_t* zone)
{
- return opt->request_xfr != 0;
+ static char f[1024];
+ /* if not a template, return as-is */
+ if(!strchr(zone->pattern->zonefile, '%'))
+ return zone->pattern->zonefile;
+ strlcpy(f, zone->pattern->zonefile, sizeof(f));
+ if(strstr(f, "%1"))
+ replace_str(f, sizeof(f), "%1", get_char(zone->name, 0));
+ if(strstr(f, "%2"))
+ replace_str(f, sizeof(f), "%2", get_char(zone->name, 1));
+ if(strstr(f, "%3"))
+ replace_str(f, sizeof(f), "%3", get_char(zone->name, 2));
+ if(strstr(f, "%z"))
+ replace_str(f, sizeof(f), "%z", get_end_label(zone, 1));
+ if(strstr(f, "%y"))
+ replace_str(f, sizeof(f), "%y", get_end_label(zone, 2));
+ if(strstr(f, "%x"))
+ replace_str(f, sizeof(f), "%x", get_end_label(zone, 3));
+ if(strstr(f, "%s"))
+ replace_str(f, sizeof(f), "%s", zone->name);
+ return f;
}
-zone_options_t* zone_options_find(nsd_options_t* opt, const struct dname* apex)
+zone_options_t*
+zone_options_find(nsd_options_t* opt, const struct dname* apex)
{
return (zone_options_t*) rbtree_search(opt->zone_options, apex);
}
@@ -583,7 +1589,8 @@ acl_find_num(acl_options_t* acl, int num)
}
/* true if ipv6 address, false if ipv4 */
-int parse_acl_is_ipv6(const char* p)
+int
+parse_acl_is_ipv6(const char* p)
{
/* see if addr is ipv6 or ipv4 -- by : and . */
while(*p) {
@@ -595,7 +1602,8 @@ int parse_acl_is_ipv6(const char* p)
}
/* returns range type. mask is the 2nd part of the range */
-int parse_acl_range_type(char* ip, char** mask)
+int
+parse_acl_range_type(char* ip, char** mask)
{
char *p;
if((p=strchr(ip, '&'))!=0) {
@@ -618,7 +1626,8 @@ int parse_acl_range_type(char* ip, char** mask)
}
/* parses subnet mask, fills 0 mask as well */
-void parse_acl_range_subnet(char* p, void* addr, int maxbits)
+void
+parse_acl_range_subnet(char* p, void* addr, int maxbits)
{
int subnet_bits = atoi(p);
uint8_t* addr_bytes = (uint8_t*)addr;
@@ -641,7 +1650,8 @@ void parse_acl_range_subnet(char* p, void* addr, int maxbits)
}
}
-acl_options_t* parse_acl_info(region_type* region, char* ip, const char* key)
+acl_options_t*
+parse_acl_info(region_type* region, char* ip, const char* key)
{
char* p;
acl_options_t* acl = (acl_options_t*)region_alloc(region, sizeof(acl_options_t));
@@ -704,7 +1714,62 @@ acl_options_t* parse_acl_info(region_type* region, char* ip, const char* key)
return acl;
}
-void nsd_options_destroy(nsd_options_t* opt)
+/* copy acl list at end of parser start, update current */
+static
+void append_acl(acl_options_t** start, acl_options_t** cur,
+ acl_options_t* list)
+{
+ while(list) {
+ acl_options_t* acl = copy_acl(cfg_parser->opt->region, list);
+ acl->next = NULL;
+ if(*cur)
+ (*cur)->next = acl;
+ else *start = acl;
+ *cur = acl;
+ list = list->next;
+ }
+}
+
+void
+config_apply_pattern(const char* name)
+{
+ /* find the pattern */
+ pattern_options_t* pat = pattern_options_find(cfg_parser->opt, name);
+ pattern_options_t* a = cfg_parser->current_pattern;
+ if(!pat) {
+ c_error_msg("could not find pattern %s", name);
+ return;
+ }
+
+ /* apply settings */
+ if(pat->zonefile)
+ a->zonefile = region_strdup(cfg_parser->opt->region,
+ pat->zonefile);
+ if(!pat->allow_axfr_fallback_is_default) {
+ a->allow_axfr_fallback = pat->allow_axfr_fallback;
+ a->allow_axfr_fallback_is_default = 0;
+ }
+ if(!pat->notify_retry_is_default) {
+ a->notify_retry = pat->notify_retry;
+ a->notify_retry_is_default = 0;
+ }
+#ifdef RATELIMIT
+ a->rrl_whitelist |= pat->rrl_whitelist;
+#endif
+ /* append acl items */
+ append_acl(&a->allow_notify, &cfg_parser->current_allow_notify,
+ pat->allow_notify);
+ append_acl(&a->request_xfr, &cfg_parser->current_request_xfr,
+ pat->request_xfr);
+ append_acl(&a->notify, &cfg_parser->current_notify, pat->notify);
+ append_acl(&a->provide_xfr, &cfg_parser->current_provide_xfr,
+ pat->provide_xfr);
+ append_acl(&a->outgoing_interface, &cfg_parser->
+ current_outgoing_interface, pat->outgoing_interface);
+}
+
+void
+nsd_options_destroy(nsd_options_t* opt)
{
region_destroy(opt->region);
}
diff --git a/usr.sbin/nsd/options.h b/usr.sbin/nsd/options.h
index cab7d5749cf..4cad972a683 100644
--- a/usr.sbin/nsd/options.h
+++ b/usr.sbin/nsd/options.h
@@ -1,7 +1,7 @@
/*
* options.h -- nsd.conf options definitions and prototypes
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -17,8 +17,10 @@
struct query;
struct dname;
struct tsig_key;
+struct buffer;
typedef struct nsd_options nsd_options_t;
+typedef struct pattern_options pattern_options_t;
typedef struct zone_options zone_options_t;
typedef struct ipaddress_option ip_address_option_t;
typedef struct acl_options acl_options_t;
@@ -28,12 +30,24 @@ typedef struct config_parser_state config_parser_state_t;
* Options global for nsd.
*/
struct nsd_options {
+ /* config file name */
+ char* configfile;
/* options for zones, by apex, contains zone_options_t */
rbtree_t* zone_options;
+ /* patterns, by name, contains pattern_options_t */
+ rbtree_t* patterns;
- /* list of keys defined */
- key_options_t* keys;
- size_t numkeys;
+ /* free space in zonelist file, contains zonelist_bucket */
+ rbtree_t* zonefree;
+ /* number of free space lines in zonelist file */
+ size_t zonefree_number;
+ /* zonelist file if open */
+ FILE* zonelist;
+ /* last offset in file (or 0 if none) */
+ off_t zonelist_off;
+
+ /* rbtree of keys defined, by name */
+ rbtree_t* keys;
/* list of ip adresses to bind to (or NULL for all) */
ip_address_option_t* ip_addresses;
@@ -42,8 +56,8 @@ struct nsd_options {
int debug_mode;
int verbosity;
int hide_version;
- int ip4_only;
- int ip6_only;
+ int do_ip4;
+ int do_ip6;
const char* database;
const char* identity;
const char* logfile;
@@ -56,14 +70,30 @@ struct nsd_options {
const char* pidfile;
const char* port;
int statistics;
- const char* zonestatsfile;
const char* chroot;
const char* username;
const char* zonesdir;
- const char* difffile;
const char* xfrdfile;
+ const char* xfrdir;
+ const char* zonelistfile;
const char* nsid;
int xfrd_reload_timeout;
+ int zonefiles_check;
+
+ /** remote control section. enable toggle. */
+ int control_enable;
+ /** the interfaces the remote control should listen on */
+ ip_address_option_t* control_interface;
+ /** port number for the control port */
+ int control_port;
+ /** private key file for server */
+ char* server_key_file;
+ /** certificate file for server */
+ char* server_cert_file;
+ /** private key file for nsd-control */
+ char* control_key_file;
+ /** certificate file for nsd-control */
+ char* control_cert_file;
#ifdef RATELIMIT
/** number of buckets in rrl hashtable */
@@ -88,14 +118,11 @@ struct ipaddress_option {
};
/*
- * Options for a zone
+ * Pattern of zone options, used to contain options for zone(s).
*/
-struct zone_options {
- /* key is dname of apex */
+struct pattern_options {
rbnode_t node;
-
- /* is apex of the zone */
- const char* name;
+ const char* pname; /* name of the pattern, key of rbtree */
const char* zonefile;
acl_options_t* allow_notify;
acl_options_t* request_xfr;
@@ -106,7 +133,32 @@ struct zone_options {
uint16_t rrl_whitelist; /* bitmap with rrl types */
#endif
uint8_t allow_axfr_fallback;
+ uint8_t allow_axfr_fallback_is_default;
uint8_t notify_retry;
+ uint8_t notify_retry_is_default;
+ uint8_t implicit; /* pattern is implicit, part_of_config zone used */
+ uint8_t xfrd_flags;
+};
+
+#define PATTERN_IMPLICIT_MARKER "_implicit_"
+
+/*
+ * Options for a zone
+ */
+struct zone_options {
+ /* key is dname of apex */
+ rbnode_t node;
+
+ /* is apex of the zone */
+ const char* name;
+ /* if not part of config, the offset and linesize of zonelist entry */
+ off_t off;
+ int linesize;
+ /* pattern for the zone options, if zone is part_of_config, this is
+ * a anonymous pattern created in-place */
+ pattern_options_t* pattern;
+ /* zone is fixed into the main config, not in zonelist, cannot delete */
+ uint8_t part_of_config;
};
union acl_addr_storage {
@@ -125,10 +177,10 @@ struct acl_options {
acl_options_t* next;
/* options */
- uint8_t use_axfr_only;
- uint8_t allow_udp;
time_t ixfr_disabled;
int bad_xfr_count;
+ uint8_t use_axfr_only;
+ uint8_t allow_udp;
/* ip address range */
const char* ip_address_spec;
@@ -154,21 +206,36 @@ struct acl_options {
* Key definition
*/
struct key_options {
- key_options_t* next;
- const char* name;
- const char* algorithm;
- const char* secret;
+ rbnode_t node; /* key of tree is name */
+ char* name;
+ char* algorithm;
+ char* secret;
struct tsig_key* tsig_key;
};
+/** zone list free space */
+struct zonelist_free {
+ struct zonelist_free* next;
+ off_t off;
+};
+/** zonelist free bucket for a particular line length */
+struct zonelist_bucket {
+ rbnode_t node; /* key is ptr to linesize */
+ int linesize;
+ struct zonelist_free* list;
+};
+
/*
* Used during options parsing
*/
struct config_parser_state {
const char* filename;
+ const char* chroot;
int line;
int errors;
+ int server_settings_seen;
nsd_options_t* opt;
+ pattern_options_t* current_pattern;
zone_options_t* current_zone;
key_options_t* current_key;
ip_address_option_t* current_ip_address_option;
@@ -177,6 +244,8 @@ struct config_parser_state {
acl_options_t* current_notify;
acl_options_t* current_provide_xfr;
acl_options_t* current_outgoing_interface;
+ void (*err)(void*,const char*);
+ void* err_arg;
};
extern config_parser_state_t* cfg_parser;
@@ -188,14 +257,41 @@ static inline size_t nsd_options_num_zones(nsd_options_t* opt)
{ return opt->zone_options->count; }
/* insert a zone into the main options tree, returns 0 on error */
int nsd_options_insert_zone(nsd_options_t* opt, zone_options_t* zone);
+/* insert a pattern into the main options tree, returns 0 on error */
+int nsd_options_insert_pattern(nsd_options_t* opt, pattern_options_t* pat);
-/* parses options file. Returns false on failure */
-int parse_options_file(nsd_options_t* opt, const char* file);
+/* parses options file. Returns false on failure. callback, if nonNULL,
+ * gets called with error strings, default prints. */
+int parse_options_file(nsd_options_t* opt, const char* file,
+ void (*err)(void*,const char*), void* err_arg);
zone_options_t* zone_options_create(region_type* region);
+void zone_options_delete(nsd_options_t* opt, zone_options_t* zone);
/* find a zone by apex domain name, or NULL if not found. */
zone_options_t* zone_options_find(nsd_options_t* opt, const struct dname* apex);
+pattern_options_t* pattern_options_create(region_type* region);
+pattern_options_t* pattern_options_find(nsd_options_t* opt, const char* name);
+int pattern_options_equal(pattern_options_t* p, pattern_options_t* q);
+void pattern_options_remove(nsd_options_t* opt, const char* name);
+void pattern_options_add_modify(nsd_options_t* opt, pattern_options_t* p);
+void pattern_options_marshal(struct buffer* buffer, pattern_options_t* p);
+pattern_options_t* pattern_options_unmarshal(region_type* r, struct buffer* b);
key_options_t* key_options_create(region_type* region);
+void key_options_insert(nsd_options_t* opt, key_options_t* key);
key_options_t* key_options_find(nsd_options_t* opt, const char* name);
+void key_options_remove(nsd_options_t* opt, const char* name);
+int key_options_equal(key_options_t* p, key_options_t* q);
+void key_options_add_modify(nsd_options_t* opt, key_options_t* key);
+/* read in zone list file. Returns false on failure */
+int parse_zone_list_file(nsd_options_t* opt);
+/* create zone entry and add to the zonelist file */
+zone_options_t* zone_list_add(nsd_options_t* opt, const char* zname,
+ const char* pname);
+/* create zonelist entry, do not insert in file (called by _add) */
+zone_options_t* zone_list_zone_insert(nsd_options_t* opt, const char* nm,
+ const char* patnm, int linesize, off_t off);
+void zone_list_del(nsd_options_t* opt, zone_options_t* zone);
+void zone_list_compact(nsd_options_t* opt);
+void zone_list_close(nsd_options_t* opt);
#if defined(HAVE_SSL)
/* tsig must be inited, adds all keys in options to tsig. */
@@ -218,8 +314,18 @@ int acl_same_host(acl_options_t* a, acl_options_t* b);
/* find acl by number in the list */
acl_options_t* acl_find_num(acl_options_t* acl, int num);
+/* see if two acl lists are the same (same elements in same order, or empty) */
+int acl_list_equal(acl_options_t* p, acl_options_t* q);
+/* see if two acl are the same */
+int acl_equal(acl_options_t* p, acl_options_t* q);
+
/* see if a zone is a slave or a master zone */
int zone_is_slave(zone_options_t* opt);
+/* create zonefile name, returns static pointer (perhaps to options data) */
+const char* config_make_zonefile(zone_options_t* zone);
+
+#define ZONEC_PCT_TIME 5 /* seconds, then it starts to print pcts */
+#define ZONEC_PCT_COUNT 100000 /* elements before pct check is done */
/* parsing helpers */
void c_error(const char* msg);
@@ -233,5 +339,9 @@ int parse_acl_range_type(char* ip, char** mask);
void parse_acl_range_subnet(char* p, void* addr, int maxbits);
/* clean up options */
void nsd_options_destroy(nsd_options_t* opt);
+/* replace occurrences of one with two in buf, pass length of buffer */
+void replace_str(char* buf, size_t len, const char* one, const char* two);
+/* apply pattern to the existing pattern in the parser */
+void config_apply_pattern(const char* name);
#endif /* OPTIONS_H */
diff --git a/usr.sbin/nsd/packet.c b/usr.sbin/nsd/packet.c
index a4ab76e9511..4cba1600c8f 100644
--- a/usr.sbin/nsd/packet.c
+++ b/usr.sbin/nsd/packet.c
@@ -1,7 +1,7 @@
/*
* packet.c -- low-level DNS packet encoding and decoding functions.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -22,7 +22,7 @@ encode_dname(query_type *q, domain_type *domain)
query_put_dname_offset(q, domain, buffer_position(q->packet));
DEBUG(DEBUG_NAME_COMPRESSION, 2,
(LOG_INFO, "dname: %s, number: %lu, offset: %u\n",
- dname_to_string(domain_dname(domain), NULL),
+ domain_to_string(domain),
(unsigned long) domain->number,
query_get_dname_offset(q, domain)));
buffer_write(q->packet, dname_name(domain_dname(domain)),
@@ -32,7 +32,7 @@ encode_dname(query_type *q, domain_type *domain)
if (domain->parent) {
DEBUG(DEBUG_NAME_COMPRESSION, 2,
(LOG_INFO, "dname: %s, number: %lu, pointer: %u\n",
- dname_to_string(domain_dname(domain), NULL),
+ domain_to_string(domain),
(unsigned long) domain->number,
query_get_dname_offset(q, domain)));
assert(query_get_dname_offset(q, domain) <= MAX_COMPRESSION_OFFSET);
diff --git a/usr.sbin/nsd/packet.h b/usr.sbin/nsd/packet.h
index c9a34061b74..2efa288b91e 100644
--- a/usr.sbin/nsd/packet.h
+++ b/usr.sbin/nsd/packet.h
@@ -1,7 +1,7 @@
/*
* packet.h -- low-level DNS packet encoding and decoding functions.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/query.h b/usr.sbin/nsd/query.h
index 24fafd447ca..4ff21f770c5 100644
--- a/usr.sbin/nsd/query.h
+++ b/usr.sbin/nsd/query.h
@@ -1,7 +1,7 @@
/*
* query.h -- manipulation with the queries
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -76,9 +76,6 @@ struct query {
/* The zone used to answer the query. */
zone_type *zone;
- /* The domain used to answer the query. */
- domain_type *domain;
-
/* The delegation domain, if any. */
domain_type *delegation_domain;
@@ -106,10 +103,10 @@ struct query {
* query name when generated from a wildcard record.
*/
uint16_t *compressed_dname_offsets;
- uint32_t compressed_dname_offsets_size;
+ size_t compressed_dname_offsets_size;
/* number of temporary domains used for the query */
- uint32_t number_temporary_domains;
+ size_t number_temporary_domains;
/*
* Used for AXFR processing.
@@ -175,7 +172,7 @@ void query_add_compression_domain(struct query *query,
*/
query_type *query_create(region_type *region,
uint16_t *compressed_dname_offsets,
- uint32_t compressed_dname_size);
+ size_t compressed_dname_size);
/*
* Reset a query structure so it is ready for receiving and processing
diff --git a/usr.sbin/nsd/radtree.c b/usr.sbin/nsd/radtree.c
new file mode 100644
index 00000000000..11bfb4fcd30
--- /dev/null
+++ b/usr.sbin/nsd/radtree.c
@@ -0,0 +1,1411 @@
+/*
+ * radtree -- generic radix tree for binary strings.
+ *
+ * Copyright (c) 2010, NLnet Labs. See LICENSE for license.
+ */
+#include "config.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#include "radtree.h"
+#include "util.h"
+#include "region-allocator.h"
+
+#include <stdio.h>
+#include <ctype.h>
+
+struct radtree* radix_tree_create(struct region* region)
+{
+ struct radtree* rt = (struct radtree*)region_alloc(region, sizeof(*rt));
+ if(!rt) return NULL;
+ rt->region = region;
+ radix_tree_init(rt);
+ return rt;
+}
+
+void radix_tree_init(struct radtree* rt)
+{
+ rt->root = NULL;
+ rt->count = 0;
+}
+
+/** delete radnodes in postorder recursion */
+static void radnode_del_postorder(struct region* region, struct radnode* n)
+{
+ unsigned i;
+ if(!n) return;
+ for(i=0; i<n->len; i++) {
+ radnode_del_postorder(region, n->array[i].node);
+ region_recycle(region, n->array[i].str, n->array[i].len);
+ }
+ region_recycle(region, n->array, n->capacity*sizeof(struct radsel));
+ region_recycle(region, n, sizeof(*n));
+}
+
+void radix_tree_clear(struct radtree* rt)
+{
+ radnode_del_postorder(rt->region, rt->root);
+ rt->root = NULL;
+ rt->count = 0;
+}
+
+void radix_tree_delete(struct radtree* rt)
+{
+ if(!rt) return;
+ radix_tree_clear(rt);
+ region_recycle(rt->region, rt, sizeof(*rt));
+}
+
+/** return last elem-containing node in this subtree (excl self) */
+static struct radnode*
+radnode_last_in_subtree(struct radnode* n)
+{
+ int idx;
+ /* try last entry in array first */
+ for(idx=((int)n->len)-1; idx >= 0; idx--) {
+ if(n->array[idx].node) {
+ /* does it have entries in its subtrees? */
+ if(n->array[idx].node->len > 0) {
+ struct radnode* s = radnode_last_in_subtree(
+ n->array[idx].node);
+ if(s) return s;
+ }
+ /* no, does it have an entry itself? */
+ if(n->array[idx].node->elem)
+ return n->array[idx].node;
+ }
+ }
+ return NULL;
+}
+
+/** last in subtree, incl self */
+static struct radnode*
+radnode_last_in_subtree_incl_self(struct radnode* n)
+{
+ struct radnode* s = radnode_last_in_subtree(n);
+ if(s) return s;
+ if(n->elem) return n;
+ return NULL;
+}
+
+/** return first elem-containing node in this subtree (excl self) */
+static struct radnode*
+radnode_first_in_subtree(struct radnode* n)
+{
+ unsigned idx;
+ struct radnode* s;
+ /* try every subnode */
+ for(idx=0; idx<n->len; idx++) {
+ if(n->array[idx].node) {
+ /* does it have elem itself? */
+ if(n->array[idx].node->elem)
+ return n->array[idx].node;
+ /* try its subtrees */
+ if((s=radnode_first_in_subtree(n->array[idx].node))!=0)
+ return s;
+ }
+ }
+ return NULL;
+}
+
+/** Find an entry in arrays from idx-1 to 0 */
+static struct radnode*
+radnode_find_prev_from_idx(struct radnode* n, unsigned from)
+{
+ unsigned idx = from;
+ while(idx > 0) {
+ idx --;
+ if(n->array[idx].node) {
+ struct radnode* s = radnode_last_in_subtree_incl_self(
+ n->array[idx].node);
+ if(s) return s;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Find a prefix of the key, in whole-nodes.
+ * Finds the longest prefix that corresponds to a whole radnode entry.
+ * There may be a slightly longer prefix in one of the array elements.
+ * @param result: the longest prefix, the entry itself if *respos==len,
+ * otherwise an array entry, residx.
+ * @param respos: pos in string where next unmatched byte is, if == len an
+ * exact match has been found. If == 0 then a "" match was found.
+ * @return false if no prefix found, not even the root "" prefix.
+ */
+static int radix_find_prefix_node(struct radtree* rt, uint8_t* k,
+ radstrlen_t len, struct radnode** result, radstrlen_t* respos)
+{
+ struct radnode* n = rt->root;
+ radstrlen_t pos = 0;
+ uint8_t byte;
+ *respos = 0;
+ *result = n;
+ if(!n) return 0;
+ while(n) {
+ if(pos == len) {
+ return 1;
+ }
+ byte = k[pos];
+ if(byte < n->offset) {
+ return 1;
+ }
+ byte -= n->offset;
+ if(byte >= n->len) {
+ return 1;
+ }
+ pos++;
+ if(n->array[byte].len != 0) {
+ /* must match additional string */
+ if(pos+n->array[byte].len > len) {
+ return 1;
+ }
+ if(memcmp(&k[pos], n->array[byte].str,
+ n->array[byte].len) != 0) {
+ return 1;
+ }
+ pos += n->array[byte].len;
+ }
+ n = n->array[byte].node;
+ if(!n) return 1;
+ *respos = pos;
+ *result = n;
+ }
+ return 1;
+}
+
+/** grow array to at least the given size, offset unchanged */
+static int
+radnode_array_grow(struct region* region, struct radnode* n, unsigned want)
+{
+ unsigned ns = ((unsigned)n->capacity)*2;
+ struct radsel* a;
+ assert(want <= 256); /* cannot be more, range of uint8 */
+ if(want > ns)
+ ns = want;
+ if(ns > 256) ns = 256;
+ /* we do not use realloc, because we want to keep the old array
+ * in case alloc fails, so that the tree is still usable */
+ a = (struct radsel*)region_alloc(region, ns*sizeof(struct radsel));
+ if(!a) return 0;
+ assert(n->len <= n->capacity);
+ assert(n->capacity < ns);
+ memcpy(&a[0], &n->array[0], n->len*sizeof(struct radsel));
+ region_recycle(region, n->array, n->capacity*sizeof(struct radsel));
+ n->array = a;
+ n->capacity = ns;
+ return 1;
+}
+
+/** make space in radnode array for another byte */
+static int
+radnode_array_space(struct region* region, struct radnode* n, uint8_t byte)
+{
+ /* is there an array? */
+ if(!n->array || n->capacity == 0) {
+ n->array = (struct radsel*)region_alloc(region,
+ sizeof(struct radsel));
+ if(!n->array) return 0;
+ memset(&n->array[0], 0, sizeof(struct radsel));
+ n->len = 1;
+ n->capacity = 1;
+ n->offset = byte;
+ /* is the array unused? */
+ } else if(n->len == 0 && n->capacity != 0) {
+ n->len = 1;
+ n->offset = byte;
+ memset(&n->array[0], 0, sizeof(struct radsel));
+ /* is it below the offset? */
+ } else if(byte < n->offset) {
+ /* is capacity enough? */
+ unsigned idx;
+ unsigned need = n->offset-byte;
+ if(n->len+need > n->capacity) {
+ /* grow array */
+ if(!radnode_array_grow(region, n, n->len+need))
+ return 0;
+ }
+ /* reshuffle items to end */
+ memmove(&n->array[need], &n->array[0],
+ n->len*sizeof(struct radsel));
+ /* fixup pidx */
+ for(idx = 0; idx < n->len; idx++) {
+ if(n->array[idx+need].node)
+ n->array[idx+need].node->pidx = idx+need;
+ }
+ /* zero the first */
+ memset(&n->array[0], 0, need*sizeof(struct radsel));
+ n->len += need;
+ n->offset = byte;
+ /* is it above the max? */
+ } else if(byte-n->offset >= n->len) {
+ /* is capacity enough? */
+ unsigned need = (byte-n->offset) - n->len + 1;
+ /* grow array */
+ if(n->len + need > n->capacity) {
+ if(!radnode_array_grow(region, n, n->len+need))
+ return 0;
+ }
+ /* zero added entries */
+ memset(&n->array[n->len], 0, need*sizeof(struct radsel));
+ /* grow length */
+ n->len += need;
+ }
+ return 1;
+}
+
+/** create a prefix in the array strs */
+static int
+radsel_str_create(struct region* region, struct radsel* r, uint8_t* k,
+ radstrlen_t pos, radstrlen_t len)
+{
+ r->str = (uint8_t*)region_alloc(region, sizeof(uint8_t)*(len-pos));
+ if(!r->str)
+ return 0; /* out of memory */
+ memmove(r->str, k+pos, len-pos);
+ r->len = len-pos;
+ return 1;
+}
+
+/** see if one byte string p is a prefix of another x (equality is true) */
+static int
+bstr_is_prefix(uint8_t* p, radstrlen_t plen, uint8_t* x, radstrlen_t xlen)
+{
+ /* if plen is zero, it is an (empty) prefix */
+ if(plen == 0)
+ return 1;
+ /* if so, p must be shorter */
+ if(plen > xlen)
+ return 0;
+ return (memcmp(p, x, plen) == 0);
+}
+
+/** number of bytes in common for the two strings */
+static radstrlen_t
+bstr_common(uint8_t* x, radstrlen_t xlen, uint8_t* y, radstrlen_t ylen)
+{
+ unsigned i, max = ((xlen<ylen)?xlen:ylen);
+ for(i=0; i<max; i++) {
+ if(x[i] != y[i])
+ return i;
+ }
+ return max;
+}
+
+
+int
+bstr_is_prefix_ext(uint8_t* p, radstrlen_t plen, uint8_t* x, radstrlen_t xlen)
+{
+ return bstr_is_prefix(p, plen, x, xlen);
+}
+
+radstrlen_t
+bstr_common_ext(uint8_t* x, radstrlen_t xlen, uint8_t* y, radstrlen_t ylen)
+{
+ return bstr_common(x, xlen, y, ylen);
+}
+
+/** allocate remainder from prefixes for a split:
+ * plen: len prefix, l: longer bstring, llen: length of l. */
+static int
+radsel_prefix_remainder(struct region* region, radstrlen_t plen,
+ uint8_t* l, radstrlen_t llen,
+ uint8_t** s, radstrlen_t* slen)
+{
+ *slen = llen - plen;
+ *s = (uint8_t*)region_alloc(region, (*slen)*sizeof(uint8_t));
+ if(!*s)
+ return 0;
+ memmove(*s, l+plen, llen-plen);
+ return 1;
+}
+
+/** radsel create a split when two nodes have shared prefix.
+ * @param r: radsel that gets changed, it contains a node.
+ * @param k: key byte string
+ * @param pos: position where the string enters the radsel (e.g. r.str)
+ * @param len: length of k.
+ * @param add: additional node for the string k.
+ * removed by called on failure.
+ * @return false on alloc failure, no changes made.
+ */
+static int
+radsel_split(struct region* region, struct radsel* r, uint8_t* k,
+ radstrlen_t pos, radstrlen_t len, struct radnode* add)
+{
+ uint8_t* addstr = k+pos;
+ radstrlen_t addlen = len-pos;
+ if(bstr_is_prefix(addstr, addlen, r->str, r->len)) {
+ uint8_t* split_str=NULL, *dupstr=NULL;
+ radstrlen_t split_len=0;
+ /* 'add' is a prefix of r.node */
+ /* also for empty addstr */
+ /* set it up so that the 'add' node has r.node as child */
+ /* so, r.node gets moved below the 'add' node, but we do
+ * this so that the r.node stays the same pointer for its
+ * key name */
+ assert(addlen != r->len);
+ assert(addlen < r->len);
+ if(r->len-addlen > 1) {
+ /* shift one because a char is in the lookup array */
+ if(!radsel_prefix_remainder(region, addlen+1, r->str,
+ r->len, &split_str, &split_len))
+ return 0;
+ }
+ if(addlen != 0) {
+ dupstr = (uint8_t*)region_alloc(region,
+ addlen*sizeof(uint8_t));
+ if(!dupstr) {
+ region_recycle(region, split_str, split_len);
+ return 0;
+ }
+ memcpy(dupstr, addstr, addlen);
+ }
+ if(!radnode_array_space(region, add, r->str[addlen])) {
+ region_recycle(region, split_str, split_len);
+ region_recycle(region, dupstr, addlen);
+ return 0;
+ }
+ /* alloc succeeded, now link it in */
+ add->parent = r->node->parent;
+ add->pidx = r->node->pidx;
+ add->array[0].node = r->node;
+ add->array[0].str = split_str;
+ add->array[0].len = split_len;
+ r->node->parent = add;
+ r->node->pidx = 0;
+
+ r->node = add;
+ region_recycle(region, r->str, r->len);
+ r->str = dupstr;
+ r->len = addlen;
+ } else if(bstr_is_prefix(r->str, r->len, addstr, addlen)) {
+ uint8_t* split_str = NULL;
+ radstrlen_t split_len = 0;
+ /* r.node is a prefix of 'add' */
+ /* set it up so that the 'r.node' has 'add' as child */
+ /* and basically, r.node is already completely fine,
+ * we only need to create a node as its child */
+ assert(addlen != r->len);
+ assert(r->len < addlen);
+ if(addlen-r->len > 1) {
+ /* shift one because a character goes into array */
+ if(!radsel_prefix_remainder(region, r->len+1, addstr,
+ addlen, &split_str, &split_len))
+ return 0;
+ }
+ if(!radnode_array_space(region, r->node, addstr[r->len])) {
+ region_recycle(region, split_str, split_len);
+ return 0;
+ }
+ /* alloc succeeded, now link it in */
+ add->parent = r->node;
+ add->pidx = addstr[r->len] - r->node->offset;
+ r->node->array[add->pidx].node = add;
+ r->node->array[add->pidx].str = split_str;
+ r->node->array[add->pidx].len = split_len;
+ } else {
+ /* okay we need to create a new node that chooses between
+ * the nodes 'add' and r.node
+ * We do this so that r.node stays the same pointer for its
+ * key name. */
+ struct radnode* com;
+ uint8_t* common_str=NULL, *s1_str=NULL, *s2_str=NULL;
+ radstrlen_t common_len, s1_len=0, s2_len=0;
+ common_len = bstr_common(r->str, r->len, addstr, addlen);
+ assert(common_len < r->len);
+ assert(common_len < addlen);
+
+ /* create the new node for choice */
+ com = (struct radnode*)region_alloc_zero(region, sizeof(*com));
+ if(!com) return 0; /* out of memory */
+
+ /* create the two substrings for subchoices */
+ if(r->len-common_len > 1) {
+ /* shift by one char because it goes in lookup array */
+ if(!radsel_prefix_remainder(region, common_len+1,
+ r->str, r->len, &s1_str, &s1_len)) {
+ region_recycle(region, com, sizeof(*com));
+ return 0;
+ }
+ }
+ if(addlen-common_len > 1) {
+ if(!radsel_prefix_remainder(region, common_len+1,
+ addstr, addlen, &s2_str, &s2_len)) {
+ region_recycle(region, com, sizeof(*com));
+ region_recycle(region, s1_str, s1_len);
+ return 0;
+ }
+ }
+
+ /* create the shared prefix to go in r */
+ if(common_len > 0) {
+ common_str = (uint8_t*)region_alloc(region,
+ common_len*sizeof(uint8_t*));
+ if(!common_str) {
+ region_recycle(region, com, sizeof(*com));
+ region_recycle(region, s1_str, s1_len);
+ region_recycle(region, s2_str, s2_len);
+ return 0;
+ }
+ memcpy(common_str, addstr, common_len);
+ }
+
+ /* make space in the common node array */
+ if(!radnode_array_space(region, com, r->str[common_len]) ||
+ !radnode_array_space(region, com, addstr[common_len])) {
+ region_recycle(region, com->array, com->capacity*sizeof(struct radsel));
+ region_recycle(region, com, sizeof(*com));
+ region_recycle(region, common_str, common_len);
+ region_recycle(region, s1_str, s1_len);
+ region_recycle(region, s2_str, s2_len);
+ return 0;
+ }
+
+ /* allocs succeeded, proceed to link it all up */
+ com->parent = r->node->parent;
+ com->pidx = r->node->pidx;
+ r->node->parent = com;
+ r->node->pidx = r->str[common_len]-com->offset;
+ add->parent = com;
+ add->pidx = addstr[common_len]-com->offset;
+ com->array[r->node->pidx].node = r->node;
+ com->array[r->node->pidx].str = s1_str;
+ com->array[r->node->pidx].len = s1_len;
+ com->array[add->pidx].node = add;
+ com->array[add->pidx].str = s2_str;
+ com->array[add->pidx].len = s2_len;
+ region_recycle(region, r->str, r->len);
+ r->str = common_str;
+ r->len = common_len;
+ r->node = com;
+ }
+ return 1;
+}
+
+struct radnode* radix_insert(struct radtree* rt, uint8_t* k, radstrlen_t len,
+ void* elem)
+{
+ struct radnode* n;
+ radstrlen_t pos = 0;
+ /* create new element to add */
+ struct radnode* add = (struct radnode*)region_alloc_zero(rt->region,
+ sizeof(*add));
+ if(!add) return NULL; /* out of memory */
+ add->elem = elem;
+
+ /* find out where to add it */
+ if(!radix_find_prefix_node(rt, k, len, &n, &pos)) {
+ /* new root */
+ assert(rt->root == NULL);
+ if(len == 0) {
+ rt->root = add;
+ } else {
+ /* add a root to point to new node */
+ n = (struct radnode*)region_alloc_zero(rt->region,
+ sizeof(*n));
+ if(!n) return NULL;
+ if(!radnode_array_space(rt->region, n, k[0])) {
+ region_recycle(rt->region, n->array,
+ n->capacity*sizeof(struct radsel));
+ region_recycle(rt->region, n, sizeof(*n));
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ add->parent = n;
+ add->pidx = 0;
+ n->array[0].node = add;
+ if(len > 1) {
+ if(!radsel_prefix_remainder(rt->region, 1, k, len,
+ &n->array[0].str, &n->array[0].len)) {
+ region_recycle(rt->region, n->array,
+ n->capacity*sizeof(struct radsel));
+ region_recycle(rt->region, n, sizeof(*n));
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ }
+ rt->root = n;
+ }
+ } else if(pos == len) {
+ /* found an exact match */
+ if(n->elem) {
+ /* already exists, failure */
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ n->elem = elem;
+ region_recycle(rt->region, add, sizeof(*add));
+ add = n;
+ } else {
+ /* n is a node which can accomodate */
+ uint8_t byte;
+ assert(pos < len);
+ byte = k[pos];
+
+ /* see if it falls outside of array */
+ if(byte < n->offset || byte-n->offset >= n->len) {
+ /* make space in the array for it; adjusts offset */
+ if(!radnode_array_space(rt->region, n, byte)) {
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ assert(byte>=n->offset && byte-n->offset<n->len);
+ byte -= n->offset;
+ /* see if more prefix needs to be split off */
+ if(pos+1 < len) {
+ if(!radsel_str_create(rt->region, &n->array[byte],
+ k, pos+1, len)) {
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ }
+ /* insert the new node in the new bucket */
+ add->parent = n;
+ add->pidx = byte;
+ n->array[byte].node = add;
+ /* so a bucket exists and byte falls in it */
+ } else if(n->array[byte-n->offset].node == NULL) {
+ /* use existing bucket */
+ byte -= n->offset;
+ if(pos+1 < len) {
+ /* split off more prefix */
+ if(!radsel_str_create(rt->region, &n->array[byte],
+ k, pos+1, len)) {
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ }
+ /* insert the new node in the new bucket */
+ add->parent = n;
+ add->pidx = byte;
+ n->array[byte].node = add;
+ } else {
+ /* use bucket but it has a shared prefix,
+ * split that out and create a new intermediate
+ * node to split out between the two.
+ * One of the two might exactmatch the new
+ * intermediate node */
+ if(!radsel_split(rt->region, &n->array[byte-n->offset],
+ k, pos+1, len, add)) {
+ region_recycle(rt->region, add, sizeof(*add));
+ return NULL;
+ }
+ }
+ }
+
+ rt->count ++;
+ return add;
+}
+
+/** Delete a radnode */
+static void radnode_delete(struct region* region, struct radnode* n)
+{
+ unsigned i;
+ if(!n) return;
+ for(i=0; i<n->len; i++) {
+ /* safe to free NULL str */
+ region_recycle(region, n->array[i].str, n->array[i].len);
+ }
+ region_recycle(region, n->array, n->capacity*sizeof(struct radsel));
+ region_recycle(region, n, sizeof(*n));
+}
+
+/** Cleanup node with one child, it is removed and joined into parent[x] str */
+static int
+radnode_cleanup_onechild(struct region* region, struct radnode* n,
+ struct radnode* par)
+{
+ uint8_t* join;
+ radstrlen_t joinlen;
+ uint8_t pidx = n->pidx;
+ struct radnode* child = n->array[0].node;
+ /* node had one child, merge them into the parent. */
+ /* keep the child node, so its pointers stay valid. */
+
+ /* at parent, append child->str to array str */
+ assert(pidx < par->len);
+ joinlen = par->array[pidx].len + n->array[0].len + 1;
+ join = (uint8_t*)region_alloc(region, joinlen*sizeof(uint8_t));
+ if(!join) {
+ /* cleanup failed due to out of memory */
+ /* the tree is inefficient, with node n still existing */
+ return 0;
+ }
+ /* we know that .str and join are malloced, thus aligned */
+ memcpy(join, par->array[pidx].str, par->array[pidx].len);
+ /* the array lookup is gone, put its character in the lookup string*/
+ join[par->array[pidx].len] = child->pidx + n->offset;
+ /* but join+len may not be aligned */
+ memmove(join+par->array[pidx].len+1, n->array[0].str, n->array[0].len);
+ region_recycle(region, par->array[pidx].str, par->array[pidx].len);
+ par->array[pidx].str = join;
+ par->array[pidx].len = joinlen;
+ /* and set the node to our child. */
+ par->array[pidx].node = child;
+ child->parent = par;
+ child->pidx = pidx;
+ /* we are unlinked, delete our node */
+ radnode_delete(region, n);
+ return 1;
+}
+
+/** remove array of nodes */
+static void
+radnode_array_clean_all(struct region* region, struct radnode* n)
+{
+ n->offset = 0;
+ n->len = 0;
+ /* shrink capacity */
+ region_recycle(region, n->array, n->capacity*sizeof(struct radsel));
+ n->array = NULL;
+ n->capacity = 0;
+}
+
+/** see if capacity can be reduced for the given node array */
+static void
+radnode_array_reduce_if_needed(struct region* region, struct radnode* n)
+{
+ if(n->len <= n->capacity/2 && n->len != n->capacity) {
+ struct radsel* a = (struct radsel*)region_alloc(region,
+ sizeof(*a)*n->len);
+ if(!a) return;
+ memcpy(a, n->array, sizeof(*a)*n->len);
+ region_recycle(region, n->array, n->capacity*sizeof(*a));
+ n->array = a;
+ n->capacity = n->len;
+ }
+}
+
+/** remove NULL nodes from front of array */
+static void
+radnode_array_clean_front(struct region* region, struct radnode* n)
+{
+ /* move them up and adjust offset */
+ unsigned idx, shuf = 0;
+ /* remove until a nonNULL entry */
+ while(shuf < n->len && n->array[shuf].node == NULL)
+ shuf++;
+ if(shuf == 0)
+ return;
+ if(shuf == n->len) {
+ /* the array is empty, the tree is inefficient */
+ radnode_array_clean_all(region, n);
+ return;
+ }
+ assert(shuf < n->len);
+ assert((int)shuf <= 255-(int)n->offset);
+ memmove(&n->array[0], &n->array[shuf],
+ (n->len - shuf)*sizeof(struct radsel));
+ n->offset += shuf;
+ n->len -= shuf;
+ for(idx=0; idx<n->len; idx++)
+ if(n->array[idx].node)
+ n->array[idx].node->pidx = idx;
+ /* see if capacity can be reduced */
+ radnode_array_reduce_if_needed(region, n);
+}
+
+/** remove NULL nodes from end of array */
+static void
+radnode_array_clean_end(struct region* region, struct radnode* n)
+{
+ /* shorten it */
+ unsigned shuf = 0;
+ /* remove until a nonNULL entry */
+ while(shuf < n->len && n->array[n->len-1-shuf].node == NULL)
+ shuf++;
+ if(shuf == 0)
+ return;
+ if(shuf == n->len) {
+ /* the array is empty, the tree is inefficient */
+ radnode_array_clean_all(region, n);
+ return;
+ }
+ assert(shuf < n->len);
+ n->len -= shuf;
+ /* array elements can stay where they are */
+ /* see if capacity can be reduced */
+ radnode_array_reduce_if_needed(region, n);
+}
+
+/** clean up radnode leaf, where we know it has a parent */
+static void
+radnode_cleanup_leaf(struct region* region, struct radnode* n,
+ struct radnode* par)
+{
+ uint8_t pidx;
+ /* node was a leaf */
+ /* delete leaf node, but store parent+idx */
+ pidx = n->pidx;
+ radnode_delete(region, n);
+
+ /* set parent+idx entry to NULL str and node.*/
+ assert(pidx < par->len);
+ region_recycle(region, par->array[pidx].str, par->array[pidx].len);
+ par->array[pidx].str = NULL;
+ par->array[pidx].len = 0;
+ par->array[pidx].node = NULL;
+
+ /* see if par offset or len must be adjusted */
+ if(par->len == 1) {
+ /* removed final element from array */
+ radnode_array_clean_all(region, par);
+ } else if(pidx == 0) {
+ /* removed first element from array */
+ radnode_array_clean_front(region, par);
+ } else if(pidx == par->len-1) {
+ /* removed last element from array */
+ radnode_array_clean_end(region, par);
+ }
+}
+
+/**
+ * Cleanup a radix node that was made smaller, see if it can
+ * be merged with others.
+ * @param rt: tree to remove root if needed.
+ * @param n: node to cleanup
+ * @return false on alloc failure.
+ */
+static int
+radnode_cleanup(struct radtree* rt, struct radnode* n)
+{
+ while(n) {
+ if(n->elem) {
+ /* cannot delete node with a data element */
+ return 1;
+ } else if(n->len == 1 && n->parent) {
+ return radnode_cleanup_onechild(rt->region, n, n->parent);
+ } else if(n->len == 0) {
+ struct radnode* par = n->parent;
+ if(!par) {
+ /* root deleted */
+ radnode_delete(rt->region, n);
+ rt->root = NULL;
+ return 1;
+ }
+ /* remove and delete the leaf node */
+ radnode_cleanup_leaf(rt->region, n, par);
+ /* see if parent can now be cleaned up */
+ n = par;
+ } else {
+ /* node cannot be cleaned up */
+ return 1;
+ }
+ }
+ /* ENOTREACH */
+ return 1;
+}
+
+void radix_delete(struct radtree* rt, struct radnode* n)
+{
+ if(!n) return;
+ n->elem = NULL;
+ rt->count --;
+ if(!radnode_cleanup(rt, n)) {
+ /* out of memory in cleanup. the elem ptr is NULL, but
+ * the radix tree could be inefficient. */
+ }
+}
+
+struct radnode* radix_search(struct radtree* rt, uint8_t* k, radstrlen_t len)
+{
+ struct radnode* n = rt->root;
+ radstrlen_t pos = 0;
+ uint8_t byte;
+ while(n) {
+ if(pos == len)
+ return n->elem?n:NULL;
+ byte = k[pos];
+ if(byte < n->offset)
+ return NULL;
+ byte -= n->offset;
+ if(byte >= n->len)
+ return NULL;
+ pos++;
+ if(n->array[byte].len != 0) {
+ /* must match additional string */
+ if(pos+n->array[byte].len > len)
+ return NULL; /* no match */
+ if(memcmp(&k[pos], n->array[byte].str,
+ n->array[byte].len) != 0)
+ return NULL; /* no match */
+ pos += n->array[byte].len;
+ }
+ n = n->array[byte].node;
+ }
+ return NULL;
+}
+
+/** return self or a previous element */
+static int ret_self_or_prev(struct radnode* n, struct radnode** result)
+{
+ if(n->elem)
+ *result = n;
+ else *result = radix_prev(n);
+ return 0;
+}
+
+int radix_find_less_equal(struct radtree* rt, uint8_t* k, radstrlen_t len,
+ struct radnode** result)
+{
+ struct radnode* n = rt->root;
+ radstrlen_t pos = 0;
+ uint8_t byte;
+ int r;
+ if(!n) {
+ /* empty tree */
+ *result = NULL;
+ return 0;
+ }
+ while(pos < len) {
+ byte = k[pos];
+ if(byte < n->offset) {
+ /* so the previous is the element itself */
+ /* or something before this element */
+ return ret_self_or_prev(n, result);
+ }
+ byte -= n->offset;
+ if(byte >= n->len) {
+ /* so, the previous is the last of array, or itself */
+ /* or something before this element */
+ if((*result=radnode_last_in_subtree_incl_self(n))==0)
+ *result = radix_prev(n);
+ return 0;
+ }
+ pos++;
+ if(!n->array[byte].node) {
+ /* no match */
+ /* Find an entry in arrays from byte-1 to 0 */
+ *result = radnode_find_prev_from_idx(n, byte);
+ if(*result)
+ return 0;
+ /* this entry or something before it */
+ return ret_self_or_prev(n, result);
+ }
+ if(n->array[byte].len != 0) {
+ /* must match additional string */
+ if(pos+n->array[byte].len > len) {
+ /* the additional string is longer than key*/
+ if( (memcmp(&k[pos], n->array[byte].str,
+ len-pos)) <= 0) {
+ /* and the key is before this node */
+ *result = radix_prev(n->array[byte].node);
+ } else {
+ /* the key is after the additional
+ * string, thus everything in that
+ * subtree is smaller. */
+ *result=radnode_last_in_subtree_incl_self(n->array[byte].node);
+ /* if somehow that is NULL,
+ * then we have an inefficient tree:
+ * byte+1 is larger than us, so find
+ * something in byte-1 and before */
+ if(!*result)
+ *result = radix_prev(n->array[byte].node);
+ }
+ return 0; /* no match */
+ }
+ if( (r=memcmp(&k[pos], n->array[byte].str,
+ n->array[byte].len)) < 0) {
+ *result = radix_prev(n->array[byte].node);
+ return 0; /* no match */
+ } else if(r > 0) {
+ /* the key is larger than the additional
+ * string, thus everything in that subtree
+ * is smaller */
+ *result=radnode_last_in_subtree_incl_self(n->array[byte].node);
+ /* if we have an inefficient tree */
+ if(!*result) *result = radix_prev(n->array[byte].node);
+ return 0; /* no match */
+ }
+ pos += n->array[byte].len;
+ }
+ n = n->array[byte].node;
+ }
+ if(n->elem) {
+ /* exact match */
+ *result = n;
+ return 1;
+ }
+ /* there is a node which is an exact match, but it has no element */
+ *result = radix_prev(n);
+ return 0;
+}
+
+
+struct radnode* radix_first(struct radtree* rt)
+{
+ struct radnode* n;
+ if(!rt || !rt->root) return NULL;
+ n = rt->root;
+ if(n->elem) return n;
+ return radix_next(n);
+}
+
+struct radnode* radix_last(struct radtree* rt)
+{
+ if(!rt || !rt->root) return NULL;
+ return radnode_last_in_subtree_incl_self(rt->root);
+}
+
+struct radnode* radix_next(struct radnode* n)
+{
+ if(n->len) {
+ /* go down */
+ struct radnode* s = radnode_first_in_subtree(n);
+ if(s) return s;
+ }
+ /* go up - the parent->elem is not useful, because it is before us */
+ while(n->parent) {
+ unsigned idx = n->pidx;
+ n = n->parent;
+ idx++;
+ for(; idx < n->len; idx++) {
+ /* go down the next branch */
+ if(n->array[idx].node) {
+ struct radnode* s;
+ /* node itself */
+ if(n->array[idx].node->elem)
+ return n->array[idx].node;
+ /* or subtree */
+ s = radnode_first_in_subtree(
+ n->array[idx].node);
+ if(s) return s;
+ }
+ }
+ }
+ return NULL;
+}
+
+struct radnode* radix_prev(struct radnode* n)
+{
+ /* must go up, since all array nodes are after this node */
+ while(n->parent) {
+ uint8_t idx = n->pidx;
+ struct radnode* s;
+ n = n->parent;
+ assert(n->len > 0); /* since we are a child */
+ /* see if there are elements in previous branches there */
+ s = radnode_find_prev_from_idx(n, idx);
+ if(s) return s;
+ /* the current node is before the array */
+ if(n->elem)
+ return n;
+ }
+ return NULL;
+}
+
+/** convert one character from domain-name to radname */
+static uint8_t char_d2r(uint8_t c)
+{
+ if(c < 'A') return c+1; /* make space for 00 */
+ else if(c <= 'Z') return c-'A'+'a'; /* lowercase */
+ else return c;
+}
+
+/** convert one character from radname to domain-name (still lowercased) */
+static uint8_t char_r2d(uint8_t c)
+{
+ assert(c != 0); /* end of label */
+ if(c <= 'A') return c-1;
+ else return c;
+}
+
+/** copy and convert a range of characters */
+static void cpy_d2r(uint8_t* to, const uint8_t* from, int len)
+{
+ int i;
+ for(i=0; i<len; i++)
+ to[i] = char_d2r(from[i]);
+}
+
+/** copy and convert a range of characters */
+static void cpy_r2d(uint8_t* to, uint8_t* from, uint8_t len)
+{
+ uint8_t i;
+ for(i=0; i<len; i++)
+ to[i] = char_r2d(from[i]);
+}
+
+/* radname code: domain to radix-bstring */
+void radname_d2r(uint8_t* k, radstrlen_t* len, const uint8_t* dname,
+ size_t dlen)
+{
+ /* the domain name is converted as follows,
+ * to preserve the normal (NSEC) ordering of domain names.
+ * lowercased, and 'end-of-label' is a '00' byte,
+ * bytes 00-'A' are +1 moved to make space for 00 byte.
+ * final root label is not appended (string ends).
+ * because the only allowed empty label is the final root label,
+ * we can also remove the last 00 label-end.
+ * The total result length is one-or-two less than the dname.
+ *
+ * examples (numbers are bytes, letters are ascii):
+ * - root: dname: 0, radname: ''
+ * - nl.: dname: 3nl0, radname: 'nl'
+ * - labs.nl: dname 4labs3nl0, radname: 'nl0labs'
+ * - x.labs.nl: dname 1x4labs3nl0, radname: 'nl0labs0x'
+ */
+
+ /* conversion by putting the label starts on a stack */
+ const uint8_t* labstart[130];
+ unsigned int lab = 0, kpos, dpos = 0;
+ /* sufficient space */
+ assert(k && dname);
+ assert(dlen <= 256); /* and therefore not more than 128 labels */
+ assert(*len >= dlen);
+ assert(dlen > 0); /* even root label has dlen=1 */
+
+ /* root */
+ if(dlen == 1) {
+ assert(dname[0] == 0);
+ *len = 0;
+ return;
+ }
+
+ /* walk through domain name and remember label positions */
+ do {
+ /* compression pointers not allowed */
+ if((dname[dpos] & 0xc0)) {
+ *len = 0;
+ return; /* format error */
+ }
+ labstart[lab++] = &dname[dpos];
+ if(dpos + dname[dpos] + 1 >= dlen) {
+ *len = 0;
+ return; /* format error */
+ }
+ /* skip the label contents */
+ dpos += dname[dpos];
+ dpos ++;
+ } while(dname[dpos] != 0);
+ /* exit condition makes root label not in labelstart stack */
+ /* because the root was handled before, we know there is some text */
+ assert(lab > 0);
+ lab-=1;
+ kpos = *labstart[lab];
+ cpy_d2r(k, labstart[lab]+1, kpos);
+ /* if there are more labels, copy them over */
+ while(lab) {
+ /* put 'end-of-label' 00 to end previous label */
+ k[kpos++]=0;
+ /* append the label */
+ lab--;
+ cpy_d2r(k+kpos, labstart[lab]+1, *labstart[lab]);
+ kpos += *labstart[lab];
+ }
+ /* done */
+ assert(kpos == dlen-2); /* no rootlabel, one less label-marker */
+ *len = kpos;
+}
+
+/* radname code: radix-bstring to domain */
+void radname_r2d(uint8_t* k, radstrlen_t len, uint8_t* dname, size_t* dlen)
+{
+ /* find labels and push on stack */
+ uint8_t* labstart[130];
+ uint8_t lablen[130];
+ unsigned int lab = 0, dpos, kpos = 0;
+ /* sufficient space */
+ assert(k && dname);
+ assert((size_t)*dlen >= (size_t)len+2);
+ assert(len <= 256);
+ /* root label */
+ if(len == 0) {
+ assert(*dlen > 0);
+ dname[0]=0;
+ *dlen=1;
+ return;
+ }
+ /* find labels */
+ while(kpos < len) {
+ lablen[lab]=0;
+ labstart[lab]=&k[kpos];
+ /* skip to next label */
+ while(kpos < len && k[kpos] != 0) {
+ lablen[lab]++;
+ kpos++;
+ }
+ lab++;
+ /* skip 00 byte for label-end */
+ if(kpos < len) {
+ assert(k[kpos] == 0);
+ kpos++;
+ }
+ }
+ /* copy the labels over to the domain name */
+ dpos = 0;
+ while(lab) {
+ lab--;
+ /* label length */
+ dname[dpos++] = lablen[lab];
+ /* label content */
+ cpy_r2d(dname+dpos, labstart[lab], lablen[lab]);
+ dpos += lablen[lab];
+ }
+ /* append root label */
+ dname[dpos++] = 0;
+ /* assert the domain name is wellformed */
+ assert((int)dpos == (int)len+2);
+ assert(dname[dpos-1] == 0); /* ends with root label */
+ *dlen = dpos;
+}
+
+/** insert by domain name */
+struct radnode*
+radname_insert(struct radtree* rt, const uint8_t* d, size_t max, void* elem)
+{
+ /* convert and insert */
+ uint8_t radname[300];
+ radstrlen_t len = (radstrlen_t)sizeof(radname);
+ if(max > sizeof(radname))
+ return NULL; /* too long */
+ radname_d2r(radname, &len, d, max);
+ return radix_insert(rt, radname, len, elem);
+}
+
+/** delete by domain name */
+void
+radname_delete(struct radtree* rt, const uint8_t* d, size_t max)
+{
+ /* search and remove */
+ struct radnode* n = radname_search(rt, d, max);
+ if(n) radix_delete(rt, n);
+}
+
+/* search for exact match of domain name, converted to radname in tree */
+struct radnode* radname_search(struct radtree* rt, const uint8_t* d,
+ size_t max)
+{
+ /* stack of labels in the domain name */
+ const uint8_t* labstart[130];
+ unsigned int lab, dpos, lpos;
+ struct radnode* n = rt->root;
+ uint8_t byte;
+ radstrlen_t i;
+ uint8_t b;
+
+ /* search for root? it is '' */
+ if(max < 1)
+ return NULL;
+ if(d[0] == 0) {
+ if(!n) return NULL;
+ return n->elem?n:NULL;
+ }
+
+ /* find labels stack in domain name */
+ lab = 0;
+ dpos = 0;
+ /* must have one label, since root is specialcased */
+ do {
+ if((d[dpos] & 0xc0))
+ return NULL; /* compression ptrs not allowed error */
+ labstart[lab++] = &d[dpos];
+ if(dpos + d[dpos] + 1 >= max)
+ return NULL; /* format error: outside of bounds */
+ /* skip the label contents */
+ dpos += d[dpos];
+ dpos ++;
+ } while(d[dpos] != 0);
+ /* exit condition makes that root label is not in the labstarts */
+ /* now: dpos+1 is length of domain name. lab is number of labels-1 */
+
+ /* start processing at the last label */
+ lab-=1;
+ lpos = 0;
+ while(n) {
+ /* fetch next byte this label */
+ if(lpos < *labstart[lab])
+ /* lpos+1 to skip labelstart, lpos++ to move forward */
+ byte = char_d2r(labstart[lab][++lpos]);
+ else {
+ if(lab == 0) /* last label - we're done */
+ return n->elem?n:NULL;
+ /* next label, search for byte 00 */
+ lpos = 0;
+ lab--;
+ byte = 0;
+ }
+ /* find that byte in the array */
+ if(byte < n->offset)
+ return NULL;
+ byte -= n->offset;
+ if(byte >= n->len)
+ return NULL;
+ if(n->array[byte].len != 0) {
+ /* must match additional string */
+ /* see how many bytes we need and start matching them*/
+ for(i=0; i<n->array[byte].len; i++) {
+ /* next byte to match */
+ if(lpos < *labstart[lab])
+ b = char_d2r(labstart[lab][++lpos]);
+ else {
+ /* if last label, no match since
+ * we are in the additional string */
+ if(lab == 0)
+ return NULL;
+ /* next label, search for byte 00 */
+ lpos = 0;
+ lab--;
+ b = 0;
+ }
+ if(n->array[byte].str[i] != b)
+ return NULL; /* not matched */
+ }
+ }
+ n = n->array[byte].node;
+ }
+ return NULL;
+}
+
+/* find domain name or smaller or equal domain name in radix tree */
+int radname_find_less_equal(struct radtree* rt, const uint8_t* d, size_t max,
+ struct radnode** result)
+{
+ /* stack of labels in the domain name */
+ const uint8_t* labstart[130];
+ unsigned int lab, dpos, lpos;
+ struct radnode* n = rt->root;
+ uint8_t byte;
+ radstrlen_t i;
+ uint8_t b;
+
+ /* empty tree */
+ if(!n) {
+ *result = NULL;
+ return 0;
+ }
+
+ /* search for root? it is '' */
+ if(max < 1) {
+ *result = NULL;
+ return 0; /* parse error, out of bounds */
+ }
+ if(d[0] == 0) {
+ if(n->elem) {
+ *result = n;
+ return 1;
+ }
+ /* no smaller element than the root */
+ *result = NULL;
+ return 0;
+ }
+
+ /* find labels stack in domain name */
+ lab = 0;
+ dpos = 0;
+ /* must have one label, since root is specialcased */
+ do {
+ if((d[dpos] & 0xc0)) {
+ *result = NULL;
+ return 0; /* compression ptrs not allowed error */
+ }
+ labstart[lab++] = &d[dpos];
+ if(dpos + d[dpos] + 1 >= max) {
+ *result = NULL; /* format error: outside of bounds */
+ return 0;
+ }
+ /* skip the label contents */
+ dpos += d[dpos];
+ dpos ++;
+ } while(d[dpos] != 0);
+ /* exit condition makes that root label is not in the labstarts */
+ /* now: dpos+1 is length of domain name. lab is number of labels-1 */
+
+ /* start processing at the last label */
+ lab-=1;
+ lpos = 0;
+ while(1) {
+ /* fetch next byte this label */
+ if(lpos < *labstart[lab])
+ /* lpos+1 to skip labelstart, lpos++ to move forward */
+ byte = char_d2r(labstart[lab][++lpos]);
+ else {
+ if(lab == 0) {
+ /* last label - we're done */
+ /* exact match */
+ if(n->elem) {
+ *result = n;
+ return 1;
+ }
+ /* there is a node which is an exact match,
+ * but there no element in it */
+ *result = radix_prev(n);
+ return 0;
+ }
+ /* next label, search for byte 0 the label separator */
+ lpos = 0;
+ lab--;
+ byte = 0;
+ }
+ /* find that byte in the array */
+ if(byte < n->offset)
+ /* so the previous is the element itself */
+ /* or something before this element */
+ return ret_self_or_prev(n, result);
+ byte -= n->offset;
+ if(byte >= n->len) {
+ /* so, the previous is the last of array, or itself */
+ /* or something before this element */
+ *result = radnode_last_in_subtree_incl_self(n);
+ if(!*result)
+ *result = radix_prev(n);
+ return 0;
+ }
+ if(!n->array[byte].node) {
+ /* no match */
+ /* Find an entry in arrays from byte-1 to 0 */
+ *result = radnode_find_prev_from_idx(n, byte);
+ if(*result)
+ return 0;
+ /* this entry or something before it */
+ return ret_self_or_prev(n, result);
+ }
+ if(n->array[byte].len != 0) {
+ /* must match additional string */
+ /* see how many bytes we need and start matching them*/
+ for(i=0; i<n->array[byte].len; i++) {
+ /* next byte to match */
+ if(lpos < *labstart[lab])
+ b = char_d2r(labstart[lab][++lpos]);
+ else {
+ /* if last label, no match since
+ * we are in the additional string */
+ if(lab == 0) {
+ /* dname ended, thus before
+ * this array element */
+ *result =radix_prev(
+ n->array[byte].node);
+ return 0;
+ }
+ /* next label, search for byte 00 */
+ lpos = 0;
+ lab--;
+ b = 0;
+ }
+ if(b < n->array[byte].str[i]) {
+ *result =radix_prev(
+ n->array[byte].node);
+ return 0;
+ } else if(b > n->array[byte].str[i]) {
+ /* the key is after the additional,
+ * so everything in its subtree is
+ * smaller */
+ *result = radnode_last_in_subtree_incl_self(n->array[byte].node);
+ /* if that is NULL, we have an
+ * inefficient tree, find in byte-1*/
+ if(!*result)
+ *result = radix_prev(n->array[byte].node);
+ return 0;
+ }
+ }
+ }
+ n = n->array[byte].node;
+ }
+ /* ENOTREACH */
+ return 0;
+}
+
diff --git a/usr.sbin/nsd/radtree.h b/usr.sbin/nsd/radtree.h
new file mode 100644
index 00000000000..6f54de01641
--- /dev/null
+++ b/usr.sbin/nsd/radtree.h
@@ -0,0 +1,244 @@
+/*
+ * radtree -- generic radix tree for binary strings.
+ *
+ * Copyright (c) 2010, NLnet Labs. See LICENSE for license.
+ */
+#ifndef RADTREE_H
+#define RADTREE_H
+
+struct radnode;
+struct region;
+
+/** length of the binary string */
+typedef uint16_t radstrlen_t;
+
+/**
+ * The radix tree
+ *
+ * The elements are stored based on binary strings(0-255) of a given length.
+ * They are sorted, a prefix is sorted before its suffixes.
+ * If you want to know the key string, you should store it yourself, the
+ * tree stores it in the parts necessary for lookup.
+ * For binary strings for domain names see the radname routines.
+ */
+struct radtree {
+ /** root node in tree */
+ struct radnode* root;
+ /** count of number of elements */
+ size_t count;
+ /** region for allocation */
+ struct region* region;
+};
+
+/**
+ * A radix tree lookup node.
+ * The array is malloced separately from the radnode.
+ */
+struct radnode {
+ /** data element associated with the binary string up to this node */
+ void* elem;
+ /** parent node (NULL for the root) */
+ struct radnode* parent;
+ /** index in the parent lookup array */
+ uint8_t pidx;
+ /** offset of the lookup array, add to [i] for lookups */
+ uint8_t offset;
+ /** length of the lookup array */
+ uint16_t len;
+ /** capacity of the lookup array (can be larger than length) */
+ uint16_t capacity;
+ /** the lookup array by [byte-offset] */
+ struct radsel* array;
+};
+
+/**
+ * radix select edge in array
+ */
+struct radsel {
+ /** additional string after the selection-byte for this edge. */
+ uint8_t* str;
+ /** length of the additional string for this edge */
+ radstrlen_t len;
+ /** node that deals with byte+str */
+ struct radnode* node;
+};
+
+/**
+ * Create new radix tree
+ * @param region: where to allocate the tree.
+ * @return new tree or NULL on alloc failure.
+ */
+struct radtree* radix_tree_create(struct region* region);
+
+/**
+ * Init new radix tree.
+ * @param rt: radix tree to be initialized.
+ */
+void radix_tree_init(struct radtree* rt);
+
+/**
+ * Delete intermediate nodes from radix tree
+ * @param rt: radix tree to be initialized.
+ */
+void radix_tree_clear(struct radtree* rt);
+
+/**
+ * Delete radix tree.
+ * @param rt: radix tree to be deleted.
+ */
+void radix_tree_delete(struct radtree* rt);
+
+
+/**
+ * Insert element into radix tree.
+ * @param rt: the radix tree.
+ * @param key: key string.
+ * @param len: length of key.
+ * @param elem: pointer to element data.
+ * @return NULL on failure - out of memory.
+ * NULL on failure - duplicate entry.
+ * On success the new radix node for this element.
+ */
+struct radnode* radix_insert(struct radtree* rt, uint8_t* k, radstrlen_t len,
+ void* elem);
+
+/**
+ * Delete element from radix tree.
+ * @param rt: the radix tree.
+ * @param n: radix node for that element.
+ * if NULL, nothing is deleted.
+ */
+void radix_delete(struct radtree* rt, struct radnode* n);
+
+/**
+ * Find radix element in tree.
+ * @param rt: the radix tree.
+ * @param key: key string.
+ * @param len: length of key.
+ * @return the radix node or NULL if not found.
+ */
+struct radnode* radix_search(struct radtree* rt, uint8_t* k, radstrlen_t len);
+
+/**
+ * Find radix element in tree, and if not found, find the closest smaller or
+ * equal element in the tree.
+ * @param rt: the radix tree.
+ * @param key: key string.
+ * @param len: length of key.
+ * @param result: returns the radix node or closest match (NULL if key is
+ * smaller than the smallest key in the tree).
+ * @return true if exact match, false if no match.
+ */
+int radix_find_less_equal(struct radtree* rt, uint8_t* k, radstrlen_t len,
+ struct radnode** result);
+
+/**
+ * Return the first (smallest) element in the tree.
+ * @param rt: the radix tree.
+ * @return: first node or NULL if none.
+ */
+struct radnode* radix_first(struct radtree* rt);
+
+/**
+ * Return the last (largest) element in the tree.
+ * @param rt: the radix tree.
+ * @return: last node or NULL if none.
+ */
+struct radnode* radix_last(struct radtree* rt);
+
+/**
+ * Return the next element.
+ * @param n: the element to go from.
+ * @return: next node or NULL if none.
+ */
+struct radnode* radix_next(struct radnode* n);
+
+/**
+ * Return the previous element.
+ * @param n: the element to go from.
+ * @return: prev node or NULL if none.
+ */
+struct radnode* radix_prev(struct radnode* n);
+
+/*
+ * Perform a walk through all elements of the tree.
+ * node: variable of type struct radnode*.
+ * tree: pointer to the tree.
+ * for(node=radix_first(tree); node; node=radix_next(node))
+*/
+
+/**
+ * Create a binary string to represent a domain name
+ * @param k: string buffer to store into
+ * @param len: output length, initially, the max, output the result.
+ * @param dname: the domain name to convert, in wireformat.
+ * @param dlen: length of space for dname.
+ */
+void radname_d2r(uint8_t* k, radstrlen_t* len, const uint8_t* dname,
+ size_t dlen);
+
+/**
+ * Convert a binary string back to a domain name.
+ * @param k: the binary string.
+ * @param len: length of k.
+ * @param dname: buffer to store domain name into.
+ * @param dlen: length of dname (including root label).
+ */
+void radname_r2d(uint8_t* k, radstrlen_t len, uint8_t* dname, size_t* dlen);
+
+/**
+ * Search the radix tree using a domain name.
+ * The name is internally converted to a radname.
+ * @param rt: tree
+ * @param d: domain name, no compression pointers allowed.
+ * @param max: max length to go from d.
+ * @return NULL on parse error or if not found.
+ */
+struct radnode* radname_search(struct radtree* rt, const uint8_t* d,
+ size_t max);
+
+/**
+ * Find radix element in tree by domain name, and if not found,
+ * find the closest smaller or equal element in the tree.
+ * The name is internally converted to a radname (same sorting order).
+ * @param rt: the radix tree.
+ * @param d: domain name, no compression pointers allowed.
+ * @param max: max length to go from d.
+ * @param result: returns the radix node or closest match (NULL if key is
+ * smaller than the smallest key in the tree).
+ * could result in NULL on a parse error as well (with return false).
+ * @return true if exact match, false if no match.
+ */
+int radname_find_less_equal(struct radtree* rt, const uint8_t* d, size_t max,
+ struct radnode** result);
+
+/**
+ * Insert radix element by domain name.
+ * @param rt: the radix tree
+ * @param d: domain name, no compression pointers.
+ * @param max: max length from d.
+ * @param elem: the element pointer to insert.
+ * @return NULL on failure - out of memory.
+ * NULL on failure - duplicate entry.
+ * NULL on failure - parse error.
+ * On success the radix node for this element.
+ */
+struct radnode* radname_insert(struct radtree* rt, const uint8_t* d,
+ size_t max, void* elem);
+
+/**
+ * Delete element by domain name from radix tree.
+ * @param rt: the radix tree.
+ * @param d: the domain name. If it is not in the tree nothing happens.
+ * @param max: max length.
+ */
+void radname_delete(struct radtree* rt, const uint8_t* d, size_t max);
+
+/** number of bytes in common in strings */
+radstrlen_t bstr_common_ext(uint8_t* x, radstrlen_t xlen, uint8_t* y,
+ radstrlen_t ylen);
+/** true if one is prefix of the other */
+int bstr_is_prefix_ext(uint8_t* p, radstrlen_t plen, uint8_t* x,
+ radstrlen_t xlen);
+
+#endif /* RADTREE_H */
diff --git a/usr.sbin/nsd/rbtree.c b/usr.sbin/nsd/rbtree.c
index c7d384fe66d..80f7bbb2b6e 100644
--- a/usr.sbin/nsd/rbtree.c
+++ b/usr.sbin/nsd/rbtree.c
@@ -1,7 +1,7 @@
/*
* rbtree.c -- generic red black tree
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -551,39 +551,3 @@ rbtree_previous(rbnode_t *node)
}
return node;
}
-
-
-/**
- * Given an rbtree "root" node, find the first node under that tree in
- * postorder.
- */
-rbnode_t *
-rbtree_postorder_first(rbnode_t *root)
-{
- rbnode_t *node = root;
- do {
- while (node->left != RBTREE_NULL) {
- node = node->left;
- }
- while ((node->left == RBTREE_NULL) &&
- (node->right != RBTREE_NULL)) {
- node = node->right;
- }
- } while (node->left != node->right);
- return node;
-}
-
-
-/**
- * Given any node in an rbtree, find the next node in postorder.
- */
-rbnode_t *
-rbtree_postorder_next(rbnode_t *node)
-{
- if ((node->parent->right == RBTREE_NULL) ||
- (node->parent->right == node))
- node = node->parent;
- else
- node = rbtree_postorder_first(node->parent->right);
- return node;
-}
diff --git a/usr.sbin/nsd/rbtree.h b/usr.sbin/nsd/rbtree.h
index 028d715397c..a381cf0788f 100644
--- a/usr.sbin/nsd/rbtree.h
+++ b/usr.sbin/nsd/rbtree.h
@@ -1,7 +1,7 @@
/*
* rbtree.h -- generic red-black tree
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -60,8 +60,6 @@ rbnode_t *rbtree_first(rbtree_t *rbtree);
rbnode_t *rbtree_last(rbtree_t *rbtree);
rbnode_t *rbtree_next(rbnode_t *rbtree);
rbnode_t *rbtree_previous(rbnode_t *rbtree);
-rbnode_t *rbtree_postorder_first(rbnode_t *root);
-rbnode_t *rbtree_postorder_next(rbnode_t *node);
#define RBTREE_WALK(rbtree, k, d) \
for((rbtree)->_node = rbtree_first(rbtree);\
diff --git a/usr.sbin/nsd/rdata.h b/usr.sbin/nsd/rdata.h
index 0cddb16e0f4..0da8eab6ec0 100644
--- a/usr.sbin/nsd/rdata.h
+++ b/usr.sbin/nsd/rdata.h
@@ -1,7 +1,7 @@
/*
* rdata.h -- RDATA conversion functions.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/region-allocator.h b/usr.sbin/nsd/region-allocator.h
index a047a1dfc5a..7a7bfe96f2a 100644
--- a/usr.sbin/nsd/region-allocator.h
+++ b/usr.sbin/nsd/region-allocator.h
@@ -1,7 +1,7 @@
/*
* region-allocator.h -- region based memory allocator.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -129,6 +129,10 @@ void region_dump_stats(region_type *region, FILE *out);
/* get size of recyclebin */
size_t region_get_recycle_size(region_type* region);
+/* get size of region memory in use */
+size_t region_get_mem(region_type* region);
+/* get size of region memory unused */
+size_t region_get_mem_unused(region_type* region);
/* Debug print REGION statistics to LOG. */
void region_log_stats(region_type *region);
diff --git a/usr.sbin/nsd/remote.c b/usr.sbin/nsd/remote.c
new file mode 100644
index 00000000000..d4858d2202a
--- /dev/null
+++ b/usr.sbin/nsd/remote.c
@@ -0,0 +1,1943 @@
+/*
+ * remote.c - remote control for the NSD daemon.
+ *
+ * Copyright (c) 2008, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the remote control functionality for the daemon.
+ * The remote control can be performed using either the commandline
+ * nsd-control tool, or a SSLv3/TLS capable web browser.
+ * The channel is secured using SSLv3 or TLSv1, and certificates.
+ * Both the server and the client(control tool) have their own keys.
+ */
+#include "config.h"
+#ifdef HAVE_SSL
+
+#ifdef HAVE_OPENSSL_SSL_H
+#include "openssl/ssl.h"
+#endif
+#ifdef HAVE_OPENSSL_ERR_H
+#include <openssl/err.h>
+#endif
+#include <ctype.h>
+#include <unistd.h>
+#include <assert.h>
+#include <fcntl.h>
+#ifndef USE_MINI_EVENT
+#include <event.h>
+#else
+#include "mini_event.h"
+#endif
+#include "remote.h"
+#include "util.h"
+#include "xfrd.h"
+#include "xfrd-notify.h"
+#include "xfrd-tcp.h"
+#include "nsd.h"
+#include "options.h"
+#include "difffile.h"
+#include "xfrd.h"
+#include "ipc.h"
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+
+/** number of seconds timeout on incoming remote control handshake */
+#define REMOTE_CONTROL_TCP_TIMEOUT 120
+
+/** repattern to master or slave */
+#define REPAT_SLAVE 1
+#define REPAT_MASTER 2
+
+/** if you want zero to be inhibited in stats output.
+ * it omits zeroes for types that have no acronym and unused-rcodes */
+const int inhibit_zero = 1;
+
+/**
+ * a busy control command connection, SSL state
+ * Defined here to keep the definition private, and keep SSL out of the .h
+ */
+struct rc_state {
+ /** the next item in list */
+ struct rc_state* next, *prev;
+ /* if the event was added to the event_base */
+ int event_added;
+ /** the commpoint */
+ struct event c;
+ /** timeout for this state */
+ struct timeval tval;
+ /** in the handshake part */
+ enum { rc_none, rc_hs_read, rc_hs_write } shake_state;
+ /** the ssl state */
+ SSL* ssl;
+ /** the rc this is part of */
+ struct daemon_remote* rc;
+ /** stats list next item */
+ struct rc_state* stats_next;
+ /** stats list indicator (0 is not part of stats list, 1 is stats,
+ * 2 is stats_noreset. */
+ int in_stats_list;
+};
+
+/**
+ * list of events for accepting connections
+ */
+struct acceptlist {
+ struct acceptlist* next;
+ int event_added;
+ struct event c;
+};
+
+/**
+ * The remote control state.
+ */
+struct daemon_remote {
+ /** the master process for this remote control */
+ struct xfrd_state* xfrd;
+ /** commpoints for accepting remote control connections */
+ struct acceptlist* accept_list;
+ /** number of active commpoints that are handling remote control */
+ int active;
+ /** max active commpoints */
+ int max_active;
+ /** current commpoints busy; double linked, malloced */
+ struct rc_state* busy_list;
+ /** commpoints waiting for stats to complete (also in busy_list) */
+ struct rc_state* stats_list;
+ /** last time stats was reported */
+ struct timeval stats_time, boot_time;
+ /** the SSL context for creating new SSL streams */
+ SSL_CTX* ctx;
+};
+
+/**
+ * Print fixed line of text over ssl connection in blocking mode
+ * @param ssl: print to
+ * @param text: the text.
+ * @return false on connection failure.
+ */
+static int ssl_print_text(SSL* ssl, const char* text);
+
+/**
+ * printf style printing to the ssl connection
+ * @param ssl: the SSL connection to print to. Blocking.
+ * @param format: printf style format string.
+ * @return success or false on a network failure.
+ */
+static int ssl_printf(SSL* ssl, const char* format, ...)
+ ATTR_FORMAT(printf, 2, 3);
+
+/**
+ * Read until \n is encountered
+ * If SSL signals EOF, the string up to then is returned (without \n).
+ * @param ssl: the SSL connection to read from. blocking.
+ * @param buf: buffer to read to.
+ * @param max: size of buffer.
+ * @return false on connection failure.
+ */
+static int ssl_read_line(SSL* ssl, char* buf, size_t max);
+
+/** perform the accept of a new remote control connection */
+static void
+remote_accept_callback(int fd, short event, void* arg);
+
+/** perform remote control */
+static void
+remote_control_callback(int fd, short event, void* arg);
+
+
+/** ---- end of private defines ---- **/
+
+
+/** log ssl crypto err */
+static void
+log_crypto_err(const char* str)
+{
+ /* error:[error code]:[library name]:[function name]:[reason string] */
+ char buf[128];
+ unsigned long e;
+ ERR_error_string_n(ERR_get_error(), buf, sizeof(buf));
+ log_msg(LOG_ERR, "%s crypto %s", str, buf);
+ while( (e=ERR_get_error()) ) {
+ ERR_error_string_n(e, buf, sizeof(buf));
+ log_msg(LOG_ERR, "and additionally crypto %s", buf);
+ }
+}
+
+#ifdef BIND8_STATS
+/** subtract timers and the values do not overflow or become negative */
+static void
+timeval_subtract(struct timeval* d, const struct timeval* end,
+ const struct timeval* start)
+{
+#ifndef S_SPLINT_S
+ time_t end_usec = end->tv_usec;
+ d->tv_sec = end->tv_sec - start->tv_sec;
+ if(end_usec < start->tv_usec) {
+ end_usec += 1000000;
+ d->tv_sec--;
+ }
+ d->tv_usec = end_usec - start->tv_usec;
+#endif
+}
+#endif /* BIND8_STATS */
+
+struct daemon_remote*
+daemon_remote_create(nsd_options_t* cfg)
+{
+ char* s_cert;
+ char* s_key;
+ struct daemon_remote* rc = (struct daemon_remote*)xalloc_zero(
+ sizeof(*rc));
+ rc->max_active = 10;
+ assert(cfg->control_enable);
+
+ /* init SSL library */
+ ERR_load_crypto_strings();
+ ERR_load_SSL_strings();
+ OpenSSL_add_all_algorithms();
+ (void)SSL_library_init();
+
+ rc->ctx = SSL_CTX_new(SSLv23_server_method());
+ if(!rc->ctx) {
+ log_crypto_err("could not SSL_CTX_new");
+ free(rc);
+ return NULL;
+ }
+ /* no SSLv2 because has defects */
+ if(!(SSL_CTX_set_options(rc->ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2)){
+ log_crypto_err("could not set SSL_OP_NO_SSLv2");
+ daemon_remote_delete(rc);
+ return NULL;
+ }
+ s_cert = cfg->server_cert_file;
+ s_key = cfg->server_key_file;
+ VERBOSITY(2, (LOG_INFO, "setup SSL certificates"));
+ if (!SSL_CTX_use_certificate_file(rc->ctx,s_cert,SSL_FILETYPE_PEM)) {
+ log_msg(LOG_ERR, "Error for server-cert-file: %s", s_cert);
+ log_crypto_err("Error in SSL_CTX use_certificate_file");
+ goto setup_error;
+ }
+ if(!SSL_CTX_use_PrivateKey_file(rc->ctx,s_key,SSL_FILETYPE_PEM)) {
+ log_msg(LOG_ERR, "Error for server-key-file: %s", s_key);
+ log_crypto_err("Error in SSL_CTX use_PrivateKey_file");
+ goto setup_error;
+ }
+ if(!SSL_CTX_check_private_key(rc->ctx)) {
+ log_msg(LOG_ERR, "Error for server-key-file: %s", s_key);
+ log_crypto_err("Error in SSL_CTX check_private_key");
+ goto setup_error;
+ }
+ if(!SSL_CTX_load_verify_locations(rc->ctx, s_cert, NULL)) {
+ log_crypto_err("Error setting up SSL_CTX verify locations");
+ setup_error:
+ daemon_remote_delete(rc);
+ return NULL;
+ }
+ SSL_CTX_set_client_CA_list(rc->ctx, SSL_load_client_CA_file(s_cert));
+ SSL_CTX_set_verify(rc->ctx, SSL_VERIFY_PEER, NULL);
+
+ /* and try to open the ports */
+ if(!daemon_remote_open_ports(rc, cfg)) {
+ log_msg(LOG_ERR, "could not open remote control port");
+ goto setup_error;
+ }
+
+ if(gettimeofday(&rc->boot_time, NULL) == -1)
+ log_msg(LOG_ERR, "gettimeofday: %s", strerror(errno));
+ rc->stats_time = rc->boot_time;
+
+ return rc;
+}
+
+void daemon_remote_close(struct daemon_remote* rc)
+{
+ struct rc_state* p, *np;
+ struct acceptlist* h, *nh;
+ if(!rc) return;
+
+ /* close listen sockets */
+ h = rc->accept_list;
+ while(h) {
+ nh = h->next;
+ if(h->event_added)
+ event_del(&h->c);
+ close(h->c.ev_fd);
+ free(h);
+ h = nh;
+ }
+ rc->accept_list = NULL;
+
+ /* close busy connection sockets */
+ p = rc->busy_list;
+ while(p) {
+ np = p->next;
+ if(p->event_added)
+ event_del(&p->c);
+ if(p->ssl)
+ SSL_free(p->ssl);
+ close(p->c.ev_fd);
+ free(p);
+ p = np;
+ }
+ rc->busy_list = NULL;
+ rc->active = 0;
+}
+
+void daemon_remote_delete(struct daemon_remote* rc)
+{
+ if(!rc) return;
+ daemon_remote_close(rc);
+ if(rc->ctx) {
+ SSL_CTX_free(rc->ctx);
+ }
+ free(rc);
+}
+
+static int
+create_tcp_accept_sock(struct addrinfo* addr, int* noproto)
+{
+#if defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU)))
+ int on = 1;
+#endif
+ int s;
+ *noproto = 0;
+ if ((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
+#if defined(INET6)
+ if (addr->ai_family == AF_INET6 &&
+ errno == EAFNOSUPPORT) {
+ *noproto = 1;
+ log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported");
+ return -1;
+ }
+#endif /* INET6 */
+ log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
+ return -1;
+ }
+#ifdef SO_REUSEADDR
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) {
+ log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno));
+ }
+#endif /* SO_REUSEADDR */
+#if defined(INET6) && defined(IPV6_V6ONLY)
+ if (addr->ai_family == AF_INET6 &&
+ setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0)
+ {
+ log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno));
+ return -1;
+ }
+#endif
+ /* set it nonblocking */
+ /* (StevensUNP p463), if tcp listening socket is blocking, then
+ it may block in accept, even if select() says readable. */
+ if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
+ log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno));
+ }
+ /* Bind it... */
+ if (bind(s, (struct sockaddr *)addr->ai_addr, addr->ai_addrlen) != 0) {
+ log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno));
+ return -1;
+ }
+ /* Listen to it... */
+ if (listen(s, TCP_BACKLOG_REMOTE) == -1) {
+ log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
+ return -1;
+ }
+ return s;
+}
+
+/**
+ * Add and open a new control port
+ * @param rc: rc with result list.
+ * @param ip: ip str
+ * @param nr: port nr
+ * @param noproto_is_err: if lack of protocol support is an error.
+ * @return false on failure.
+ */
+static int
+add_open(struct daemon_remote* rc, const char* ip, int nr, int noproto_is_err)
+{
+ struct addrinfo hints;
+ struct addrinfo* res;
+ struct acceptlist* hl;
+ int noproto;
+ int fd, r;
+ char port[15];
+ snprintf(port, sizeof(port), "%d", nr);
+ port[sizeof(port)-1]=0;
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE | AI_NUMERICHOST;
+ if((r = getaddrinfo(ip, port, &hints, &res)) != 0 || !res) {
+ log_msg(LOG_ERR, "control interface %s:%s getaddrinfo: %s %s",
+ ip?ip:"default", port, gai_strerror(r),
+#ifdef EAI_SYSTEM
+ r==EAI_SYSTEM?(char*)strerror(errno):""
+#else
+ ""
+#endif
+ );
+ return 0;
+ }
+
+ /* open fd */
+ fd = create_tcp_accept_sock(res, &noproto);
+ freeaddrinfo(res);
+ if(fd == -1 && noproto) {
+ if(!noproto_is_err)
+ return 1; /* return success, but do nothing */
+ log_msg(LOG_ERR, "cannot open control interface %s %d : "
+ "protocol not supported", ip, nr);
+ return 0;
+ }
+ if(fd == -1) {
+ log_msg(LOG_ERR, "cannot open control interface %s %d", ip, nr);
+ return 0;
+ }
+
+ /* alloc */
+ hl = (struct acceptlist*)xalloc_zero(sizeof(*hl));
+ hl->next = rc->accept_list;
+ rc->accept_list = hl;
+
+ hl->c.ev_fd = fd;
+ hl->event_added = 0;
+ return 1;
+}
+
+int
+daemon_remote_open_ports(struct daemon_remote* rc, nsd_options_t* cfg)
+{
+ assert(cfg->control_enable && cfg->control_port);
+ if(cfg->control_interface) {
+ ip_address_option_t* p;
+ for(p = cfg->control_interface; p; p = p->next) {
+ if(!add_open(rc, p->address, cfg->control_port, 1)) {
+ return 0;
+ }
+ }
+ } else {
+ /* defaults */
+ if(cfg->do_ip6 && !add_open(rc, "::1", cfg->control_port, 0)) {
+ return 0;
+ }
+ if(cfg->do_ip4 &&
+ !add_open(rc, "127.0.0.1", cfg->control_port, 1)) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void
+daemon_remote_attach(struct daemon_remote* rc, struct xfrd_state* xfrd)
+{
+ int fd;
+ struct acceptlist* p;
+ if(!rc) return;
+ rc->xfrd = xfrd;
+ for(p = rc->accept_list; p; p = p->next) {
+ /* add event */
+ fd = p->c.ev_fd;
+ event_set(&p->c, fd, EV_PERSIST|EV_READ, remote_accept_callback,
+ rc);
+ if(event_base_set(xfrd->event_base, &p->c) != 0)
+ log_msg(LOG_ERR, "remote: cannot set event_base");
+ if(event_add(&p->c, NULL) != 0)
+ log_msg(LOG_ERR, "remote: cannot add event");
+ p->event_added = 1;
+ }
+}
+
+static void
+remote_accept_callback(int fd, short event, void* arg)
+{
+ struct daemon_remote *rc = (struct daemon_remote*)arg;
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int newfd;
+ struct rc_state* n;
+
+ if (!(event & EV_READ)) {
+ return;
+ }
+
+ /* perform the accept */
+ addrlen = sizeof(addr);
+ newfd = accept(fd, (struct sockaddr*)&addr, &addrlen);
+ if(newfd == -1) {
+ if ( errno != EINTR
+ && errno != EWOULDBLOCK
+#ifdef ECONNABORTED
+ && errno != ECONNABORTED
+#endif /* ECONNABORTED */
+#ifdef EPROTO
+ && errno != EPROTO
+#endif /* EPROTO */
+ ) {
+ log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
+ }
+ return;
+ }
+
+ /* create new commpoint unless we are servicing already */
+ if(rc->active >= rc->max_active) {
+ log_msg(LOG_WARNING, "drop incoming remote control: "
+ "too many connections");
+ close_exit:
+ close(newfd);
+ return;
+ }
+ if (fcntl(newfd, F_SETFL, O_NONBLOCK) == -1) {
+ log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
+ goto close_exit;
+ }
+
+ /* setup state to service the remote control command */
+ n = (struct rc_state*)calloc(1, sizeof(*n));
+ if(!n) {
+ log_msg(LOG_ERR, "out of memory");
+ goto close_exit;
+ }
+
+ n->tval.tv_sec = REMOTE_CONTROL_TCP_TIMEOUT;
+ n->tval.tv_usec = 0L;
+
+ event_set(&n->c, newfd, EV_PERSIST|EV_TIMEOUT|EV_READ,
+ remote_control_callback, n);
+ if(event_base_set(xfrd->event_base, &n->c) != 0)
+ log_msg(LOG_ERR, "remote_accept: cannot set event_base");
+ if(event_add(&n->c, &n->tval) != 0)
+ log_msg(LOG_ERR, "remote_accept: cannot add event");
+ n->event_added = 1;
+
+ if(2 <= verbosity) {
+ char s[128];
+ addr2str(&addr, s, sizeof(s));
+ VERBOSITY(2, (LOG_INFO, "new control connection from %s", s));
+ }
+
+ n->shake_state = rc_hs_read;
+ n->ssl = SSL_new(rc->ctx);
+ if(!n->ssl) {
+ log_crypto_err("could not SSL_new");
+ event_del(&n->c);
+ free(n);
+ goto close_exit;
+ }
+ SSL_set_accept_state(n->ssl);
+ (void)SSL_set_mode(n->ssl, SSL_MODE_AUTO_RETRY);
+ if(!SSL_set_fd(n->ssl, newfd)) {
+ log_crypto_err("could not SSL_set_fd");
+ event_del(&n->c);
+ SSL_free(n->ssl);
+ free(n);
+ goto close_exit;
+ }
+
+ n->rc = rc;
+ n->stats_next = NULL;
+ n->in_stats_list = 0;
+ n->prev = NULL;
+ n->next = rc->busy_list;
+ if(n->next) n->next->prev = n;
+ rc->busy_list = n;
+ rc->active ++;
+
+ /* perform the first nonblocking read already, for windows,
+ * so it can return wouldblock. could be faster too. */
+ remote_control_callback(newfd, EV_READ, n);
+}
+
+/** delete from list */
+static void
+state_list_remove_elem(struct rc_state** list, struct rc_state* todel)
+{
+ if(todel->prev) todel->prev->next = todel->next;
+ else *list = todel->next;
+ if(todel->next) todel->next->prev = todel->prev;
+}
+
+/** delete from stats list */
+static void
+stats_list_remove_elem(struct rc_state** list, struct rc_state* todel)
+{
+ while(*list) {
+ if( (*list) == todel) {
+ *list = (*list)->stats_next;
+ return;
+ }
+ list = &(*list)->stats_next;
+ }
+}
+
+/** decrease active count and remove commpoint from busy list */
+static void
+clean_point(struct daemon_remote* rc, struct rc_state* s)
+{
+ if(s->in_stats_list)
+ stats_list_remove_elem(&rc->stats_list, s);
+ state_list_remove_elem(&rc->busy_list, s);
+ rc->active --;
+ if(s->event_added)
+ event_del(&s->c);
+ if(s->ssl) {
+ SSL_shutdown(s->ssl);
+ SSL_free(s->ssl);
+ }
+ close(s->c.ev_fd);
+ free(s);
+}
+
+static int
+ssl_print_text(SSL* ssl, const char* text)
+{
+ int r;
+ if(!ssl)
+ return 0;
+ ERR_clear_error();
+ if((r=SSL_write(ssl, text, (int)strlen(text))) <= 0) {
+ if(SSL_get_error(ssl, r) == SSL_ERROR_ZERO_RETURN) {
+ VERBOSITY(2, (LOG_WARNING, "in SSL_write, peer "
+ "closed connection"));
+ return 0;
+ }
+ log_crypto_err("could not SSL_write");
+ return 0;
+ }
+ return 1;
+}
+
+/** print text over the ssl connection */
+static int
+ssl_print_vmsg(SSL* ssl, const char* format, va_list args)
+{
+ char msg[1024];
+ vsnprintf(msg, sizeof(msg), format, args);
+ return ssl_print_text(ssl, msg);
+}
+
+/** printf style printing to the ssl connection */
+static int
+ssl_printf(SSL* ssl, const char* format, ...)
+{
+ va_list args;
+ int ret;
+ va_start(args, format);
+ ret = ssl_print_vmsg(ssl, format, args);
+ va_end(args);
+ return ret;
+}
+
+static int
+ssl_read_line(SSL* ssl, char* buf, size_t max)
+{
+ int r;
+ size_t len = 0;
+ if(!ssl)
+ return 0;
+ while(len < max) {
+ ERR_clear_error();
+ if((r=SSL_read(ssl, buf+len, 1)) <= 0) {
+ if(SSL_get_error(ssl, r) == SSL_ERROR_ZERO_RETURN) {
+ buf[len] = 0;
+ return 1;
+ }
+ log_crypto_err("could not SSL_read");
+ return 0;
+ }
+ if(buf[len] == '\n') {
+ /* return string without \n */
+ buf[len] = 0;
+ return 1;
+ }
+ len++;
+ }
+ buf[max-1] = 0;
+ log_msg(LOG_ERR, "control line too long (%d): %s", (int)max, buf);
+ return 0;
+}
+
+/** skip whitespace, return new pointer into string */
+static char*
+skipwhite(char* str)
+{
+ /* EOS \0 is not a space */
+ while( isspace(*str) )
+ str++;
+ return str;
+}
+
+/** send the OK to the control client */
+static void
+send_ok(SSL* ssl)
+{
+ (void)ssl_printf(ssl, "ok\n");
+}
+
+/** get zone argument (if any) or NULL, false on error */
+static int
+get_zone_arg(SSL* ssl, xfrd_state_t* xfrd, char* arg,
+ zone_options_t** zo)
+{
+ const dname_type* dname;
+ if(!arg[0]) {
+ /* no argument present, return NULL */
+ *zo = NULL;
+ return 1;
+ }
+ dname = dname_parse(xfrd->region, arg);
+ if(!dname) {
+ ssl_printf(ssl, "error cannot parse zone name '%s'\n", arg);
+ *zo = NULL;
+ return 0;
+ }
+ *zo = zone_options_find(xfrd->nsd->options, dname);
+ region_recycle(xfrd->region, (void*)dname, dname_total_size(dname));
+ if(!*zo) {
+ ssl_printf(ssl, "error zone %s not configured\n", arg);
+ return 0;
+ }
+ return 1;
+}
+
+/** do the stop command */
+static void
+do_stop(SSL* ssl, xfrd_state_t* xfrd)
+{
+ xfrd->need_to_send_shutdown = 1;
+
+ if(!(xfrd->ipc_handler_flags&EV_WRITE)) {
+ ipc_xfrd_set_listening(xfrd, EV_PERSIST|EV_READ|EV_WRITE);
+ }
+
+ send_ok(ssl);
+}
+
+/** do the log_reopen command, it only needs reload_now */
+static void
+do_log_reopen(SSL* ssl, xfrd_state_t* xfrd)
+{
+ xfrd_set_reload_now(xfrd);
+ send_ok(ssl);
+}
+
+/** do the reload command */
+static void
+do_reload(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zo;
+ if(!get_zone_arg(ssl, xfrd, arg, &zo))
+ return;
+ task_new_check_zonefiles(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, zo?(const dname_type*)zo->node.key:NULL);
+ xfrd_set_reload_now(xfrd);
+ send_ok(ssl);
+}
+
+/** do the write command */
+static void
+do_write(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zo;
+ if(!get_zone_arg(ssl, xfrd, arg, &zo))
+ return;
+ task_new_write_zonefiles(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, zo?(const dname_type*)zo->node.key:NULL);
+ xfrd_set_reload_now(xfrd);
+ send_ok(ssl);
+}
+
+/** do the notify command */
+static void
+do_notify(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zo;
+ if(!get_zone_arg(ssl, xfrd, arg, &zo))
+ return;
+ if(zo) {
+ struct notify_zone_t* n = (struct notify_zone_t*)rbtree_search(
+ xfrd->notify_zones, (const dname_type*)zo->node.key);
+ if(n) {
+ xfrd_notify_start(n);
+ send_ok(ssl);
+ } else {
+ ssl_printf(ssl, "error zone does not have notify\n");
+ }
+ } else {
+ struct notify_zone_t* n;
+ RBTREE_FOR(n, struct notify_zone_t*, xfrd->notify_zones) {
+ xfrd_notify_start(n);
+ }
+ send_ok(ssl);
+ }
+}
+
+/** do the transfer command */
+static void
+do_transfer(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zo;
+ xfrd_zone_t* zone;
+ if(!get_zone_arg(ssl, xfrd, arg, &zo))
+ return;
+ if(zo) {
+ zone = (xfrd_zone_t*)rbtree_search(xfrd->zones, (const
+ dname_type*)zo->node.key);
+ if(zone) {
+ xfrd_handle_notify_and_start_xfr(zone, NULL);
+ send_ok(ssl);
+ } else {
+ ssl_printf(ssl, "error zone not slave\n");
+ }
+ } else {
+ RBTREE_FOR(zone, xfrd_zone_t*, xfrd->zones) {
+ xfrd_handle_notify_and_start_xfr(zone, NULL);
+ }
+ ssl_printf(ssl, "ok, %u zones\n", (unsigned)xfrd->zones->count);
+ }
+}
+
+/** force transfer a zone */
+static void
+force_transfer_zone(xfrd_zone_t* zone)
+{
+ /* if in TCP transaction, stop it immediately. */
+ if(zone->tcp_conn != -1)
+ xfrd_tcp_release(xfrd->tcp_set, zone);
+ else if(zone->zone_handler.ev_fd != -1)
+ xfrd_udp_release(zone);
+ /* pretend we not longer have it and force any
+ * zone to be downloaded (even same serial, w AXFR) */
+ zone->soa_disk_acquired = 0;
+ xfrd_handle_notify_and_start_xfr(zone, NULL);
+}
+
+/** do the force transfer command */
+static void
+do_force_transfer(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zo;
+ xfrd_zone_t* zone;
+ if(!get_zone_arg(ssl, xfrd, arg, &zo))
+ return;
+ if(zo) {
+ zone = (xfrd_zone_t*)rbtree_search(xfrd->zones, (const
+ dname_type*)zo->node.key);
+ if(zone) {
+ force_transfer_zone(zone);
+ send_ok(ssl);
+ } else {
+ ssl_printf(ssl, "error zone not slave\n");
+ }
+ } else {
+ RBTREE_FOR(zone, xfrd_zone_t*, xfrd->zones) {
+ force_transfer_zone(zone);
+ }
+ ssl_printf(ssl, "ok, %u zones\n", (unsigned)xfrd->zones->count);
+ }
+}
+
+static int
+print_soa_status(SSL* ssl, const char* str, xfrd_soa_t* soa, time_t acq)
+{
+ if(acq) {
+ if(!ssl_printf(ssl, " %s: \"%u since %s\"\n", str,
+ (unsigned)ntohl(soa->serial), xfrd_pretty_time(acq)))
+ return 0;
+ } else {
+ if(!ssl_printf(ssl, " %s: none\n", str))
+ return 0;
+ }
+ return 1;
+}
+
+/** print zonestatus for one domain */
+static int
+print_zonestatus(SSL* ssl, xfrd_state_t* xfrd, zone_options_t* zo)
+{
+ xfrd_zone_t* xz = (xfrd_zone_t*)rbtree_search(xfrd->zones,
+ (const dname_type*)zo->node.key);
+ struct notify_zone_t* nz = (struct notify_zone_t*)rbtree_search(
+ xfrd->notify_zones, (const dname_type*)zo->node.key);
+ if(!ssl_printf(ssl, "zone: %s\n", zo->name))
+ return 0;
+ if(!zo->part_of_config) {
+ if(!ssl_printf(ssl, " pattern: %s\n", zo->pattern->pname))
+ return 0;
+ }
+ if(nz) {
+ if(nz->is_waiting) {
+ if(!ssl_printf(ssl, " notify: \"waiting-for-fd\"\n"))
+ return 0;
+ } else if(nz->notify_send_enable) {
+ if(!ssl_printf(ssl, " notify: \"sent try %d "
+ "to %s with serial %u\"\n", nz->notify_retry,
+ nz->notify_current->ip_address_spec,
+ (unsigned)ntohl(nz->current_soa->serial)))
+ return 0;
+ }
+ }
+ if(!xz) {
+ if(!ssl_printf(ssl, " state: master\n"))
+ return 0;
+ return 1;
+ }
+ if(!ssl_printf(ssl, " state: %s\n",
+ (xz->state == xfrd_zone_ok)?"ok":(
+ (xz->state == xfrd_zone_expired)?"expired":"refreshing")))
+ return 0;
+ if(!print_soa_status(ssl, "served-serial", &xz->soa_nsd,
+ xz->soa_nsd_acquired))
+ return 0;
+ if(!print_soa_status(ssl, "commit-serial", &xz->soa_disk,
+ xz->soa_disk_acquired))
+ return 0;
+ if(xz->round_num != -1) {
+ if(!print_soa_status(ssl, "notified-serial", &xz->soa_notified,
+ xz->soa_notified_acquired))
+ return 0;
+ }
+
+ /* UDP */
+ if(xz->udp_waiting) {
+ if(!ssl_printf(ssl, " transfer: \"waiting-for-UDP-fd\"\n"))
+ return 0;
+ } else if(xz->zone_handler.ev_fd != -1 && xz->tcp_conn == -1) {
+ if(!ssl_printf(ssl, " transfer: \"sent UDP to %s\"\n",
+ xz->master->ip_address_spec))
+ return 0;
+ }
+
+ /* TCP */
+ if(xz->tcp_waiting) {
+ if(!ssl_printf(ssl, " transfer: \"waiting-for-TCP-fd\"\n"))
+ return 0;
+ } else if(xz->tcp_conn != -1) {
+ if(!ssl_printf(ssl, " transfer: \"TCP connected to %s\"\n",
+ xz->master->ip_address_spec))
+ return 0;
+ }
+
+ return 1;
+}
+
+/** do the zonestatus command */
+static void
+do_zonestatus(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zo;
+ if(!get_zone_arg(ssl, xfrd, arg, &zo))
+ return;
+ if(zo) (void)print_zonestatus(ssl, xfrd, zo);
+ else {
+ RBTREE_FOR(zo, zone_options_t*,
+ xfrd->nsd->options->zone_options) {
+ if(!print_zonestatus(ssl, xfrd, zo))
+ return;
+ }
+ }
+}
+
+/** do the verbosity command */
+static void
+do_verbosity(SSL* ssl, char* str)
+{
+ int val = atoi(str);
+ if(strcmp(str, "") == 0) {
+ ssl_printf(ssl, "verbosity %d\n", verbosity);
+ return;
+ }
+ if(val == 0 && strcmp(str, "0") != 0) {
+ ssl_printf(ssl, "error in verbosity number syntax: %s\n", str);
+ return;
+ }
+ verbosity = val;
+ task_new_set_verbosity(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, val);
+ xfrd_set_reload_now(xfrd);
+ send_ok(ssl);
+}
+
+/** find second argument, modifies string */
+static int
+find_arg2(SSL* ssl, char* arg, char** arg2)
+{
+ char* as = strrchr(arg, ' ');
+ if(as) {
+ as[0]=0;
+ *arg2 = as+1;
+ while(isspace(*as) && as > arg)
+ as--;
+ as[0]=0;
+ return 1;
+ }
+ ssl_printf(ssl, "error could not find next argument "
+ "after %s\n", arg);
+ return 0;
+}
+
+/** do the status command */
+static void
+do_status(SSL* ssl, xfrd_state_t* xfrd)
+{
+ if(!ssl_printf(ssl, "version: %s\n", PACKAGE_VERSION))
+ return;
+ if(!ssl_printf(ssl, "verbosity: %d\n", verbosity))
+ return;
+#ifdef RATELIMIT
+ if(!ssl_printf(ssl, "ratelimit: %d\n",
+ (int)xfrd->nsd->options->rrl_ratelimit))
+ return;
+#else
+ (void)xfrd;
+#endif
+}
+
+/** do the stats command */
+static void
+do_stats(struct daemon_remote* rc, int peek, struct rc_state* rs)
+{
+#ifdef BIND8_STATS
+ /* queue up to get stats after a reload is done (to gather statistics
+ * from the servers) */
+ assert(!rs->in_stats_list);
+ if(peek) rs->in_stats_list = 2;
+ else rs->in_stats_list = 1;
+ rs->stats_next = rc->stats_list;
+ rc->stats_list = rs;
+ /* block the tcp waiting for the reload */
+ event_del(&rs->c);
+ rs->event_added = 0;
+ /* force a reload */
+ xfrd_set_reload_now(xfrd);
+#else
+ (void)rc; (void)peek;
+ (void)ssl_printf(rs->ssl, "error no stats enabled at compile time\n");
+#endif /* BIND8_STATS */
+}
+
+/** do the addzone command */
+static void
+do_addzone(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ zone_options_t* zopt;
+ char* arg2 = NULL;
+ if(!find_arg2(ssl, arg, &arg2))
+ return;
+
+ /* if we add it to the xfrd now, then xfrd could download AXFR and
+ * store it and the NSD-reload would see it in the difffile before
+ * it sees the add-config task.
+ */
+ /* thus: AXFRs and IXFRs must store the pattern name in the
+ * difffile, so that it can be added when the AXFR or IXFR is seen.
+ */
+
+ /* check that the pattern exists */
+ if(!rbtree_search(xfrd->nsd->options->patterns, arg2)) {
+ (void)ssl_printf(ssl, "error pattern does not exist\n");
+ return;
+ }
+
+ /* add to zonelist and adds to config in memory */
+ zopt = zone_list_add(xfrd->nsd->options, arg, arg2);
+ if(!zopt) {
+ /* also dname parse error here */
+ (void)ssl_printf(ssl, "error could not add zonelist entry\n");
+ return;
+ }
+ /* make addzone task and schedule reload */
+ task_new_add_zone(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, arg, arg2);
+ xfrd_set_reload_now(xfrd);
+ /* add to xfrd - notify (for master and slaves) */
+ init_notify_send(xfrd->notify_zones, xfrd->region, zopt);
+ /* add to xfrd - slave */
+ if(zone_is_slave(zopt)) {
+ xfrd_init_slave_zone(xfrd, zopt);
+ }
+
+ send_ok(ssl);
+}
+
+/** do the delzone command */
+static void
+do_delzone(SSL* ssl, xfrd_state_t* xfrd, char* arg)
+{
+ const dname_type* dname;
+ zone_options_t* zopt;
+
+ dname = dname_parse(xfrd->region, arg);
+ if(!dname) {
+ (void)ssl_printf(ssl, "error cannot parse zone name\n");
+ return;
+ }
+
+ /* see if we have the zone in question */
+ zopt = zone_options_find(xfrd->nsd->options, dname);
+ if(!zopt) {
+ region_recycle(xfrd->region, (void*)dname,
+ dname_total_size(dname));
+ /* nothing to do */
+ if(!ssl_printf(ssl, "warning zone %s not present\n", arg))
+ return;
+ send_ok(ssl);
+ return;
+ }
+
+ /* see if it can be deleted */
+ if(zopt->part_of_config) {
+ region_recycle(xfrd->region, (void*)dname,
+ dname_total_size(dname));
+ (void)ssl_printf(ssl, "error zone defined in nsd.conf, "
+ "cannot delete it in this manner: remove it from "
+ "nsd.conf yourself and repattern\n");
+ return;
+ }
+
+ /* create deletion task */
+ task_new_del_zone(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, dname);
+ xfrd_set_reload_now(xfrd);
+ /* delete it in xfrd */
+ if(zone_is_slave(zopt)) {
+ xfrd_del_slave_zone(xfrd, dname);
+ }
+ xfrd_del_notify(xfrd, dname);
+ /* delete from config */
+ zone_list_del(xfrd->nsd->options, zopt);
+
+ region_recycle(xfrd->region, (void*)dname, dname_total_size(dname));
+ send_ok(ssl);
+}
+
+/** remove TSIG key from config and add task so that reload does too */
+static void remove_key(xfrd_state_t* xfrd, const char* kname)
+{
+ /* add task before deletion because the name string could be deleted */
+ task_new_del_key(xfrd->nsd->task[xfrd->nsd->mytask], xfrd->last_task,
+ kname);
+ key_options_remove(xfrd->nsd->options, kname);
+ xfrd_set_reload_now(xfrd); /* this is executed when the current control
+ command ends, thus the entire config changes are bunched up */
+}
+
+/** add TSIG key to config and add task so that reload does too */
+static void add_key(xfrd_state_t* xfrd, key_options_t* k)
+{
+ key_options_add_modify(xfrd->nsd->options, k);
+ task_new_add_key(xfrd->nsd->task[xfrd->nsd->mytask], xfrd->last_task,
+ k);
+ xfrd_set_reload_now(xfrd);
+}
+
+/** check if keys have changed */
+static void repat_keys(xfrd_state_t* xfrd, nsd_options_t* newopt)
+{
+ nsd_options_t* oldopt = xfrd->nsd->options;
+ key_options_t* k;
+ /* find deleted keys */
+ k = (key_options_t*)rbtree_first(oldopt->keys);
+ while((rbnode_t*)k != RBTREE_NULL) {
+ key_options_t* next = (key_options_t*)rbtree_next(
+ (rbnode_t*)k);
+ if(!key_options_find(newopt, k->name))
+ remove_key(xfrd, k->name);
+ k = next;
+ }
+ /* find added or changed keys */
+ RBTREE_FOR(k, key_options_t*, newopt->keys) {
+ key_options_t* origk = key_options_find(oldopt, k->name);
+ if(!origk)
+ add_key(xfrd, k);
+ else if(!key_options_equal(k, origk))
+ add_key(xfrd, k);
+ }
+}
+
+/** find zone given the implicit pattern */
+static const dname_type*
+parse_implicit_name(xfrd_state_t* xfrd,const char* pname)
+{
+ if(strncmp(pname, PATTERN_IMPLICIT_MARKER,
+ strlen(PATTERN_IMPLICIT_MARKER)) != 0)
+ return NULL;
+ return dname_parse(xfrd->region, pname +
+ strlen(PATTERN_IMPLICIT_MARKER));
+}
+
+/** remove cfgzone and add task so that reload does too */
+static void
+remove_cfgzone(xfrd_state_t* xfrd, const char* pname)
+{
+ /* dname and find the zone for the implicit pattern */
+ zone_options_t* zopt = NULL;
+ const dname_type* dname = parse_implicit_name(xfrd, pname);
+ if(!dname) {
+ /* should have a parseable name, but it did not */
+ return;
+ }
+
+ /* find the zone entry for the implicit pattern */
+ zopt = zone_options_find(xfrd->nsd->options, dname);
+ if(!zopt) {
+ /* this should not happen; implicit pattern has zone entry */
+ region_recycle(xfrd->region, (void*)dname,
+ dname_total_size(dname));
+ return;
+ }
+
+ /* create deletion task */
+ task_new_del_zone(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, dname);
+ xfrd_set_reload_now(xfrd);
+ /* delete it in xfrd */
+ if(zone_is_slave(zopt)) {
+ xfrd_del_slave_zone(xfrd, dname);
+ }
+ xfrd_del_notify(xfrd, dname);
+
+ /* delete from zoneoptions */
+ zone_options_delete(xfrd->nsd->options, zopt);
+
+ /* recycle parsed dname */
+ region_recycle(xfrd->region, (void*)dname, dname_total_size(dname));
+}
+
+/** add cfgzone and add task so that reload does too */
+static void
+add_cfgzone(xfrd_state_t* xfrd, const char* pname)
+{
+ /* add to our zonelist */
+ zone_options_t* zopt = zone_options_create(xfrd->nsd->options->region);
+ if(!zopt)
+ return;
+ zopt->part_of_config = 1;
+ zopt->name = region_strdup(xfrd->nsd->options->region,
+ pname + strlen(PATTERN_IMPLICIT_MARKER));
+ zopt->pattern = pattern_options_find(xfrd->nsd->options, pname);
+ if(!zopt->name || !zopt->pattern)
+ return;
+ if(!nsd_options_insert_zone(xfrd->nsd->options, zopt)) {
+ log_msg(LOG_ERR, "bad domain name or duplicate zone '%s' "
+ "pattern %s", zopt->name, pname);
+ }
+
+ /* make addzone task and schedule reload */
+ task_new_add_zone(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, zopt->name, pname);
+ xfrd_set_reload_now(xfrd);
+ /* add to xfrd - notify (for master and slaves) */
+ init_notify_send(xfrd->notify_zones, xfrd->region, zopt);
+ /* add to xfrd - slave */
+ if(zone_is_slave(zopt)) {
+ xfrd_init_slave_zone(xfrd, zopt);
+ }
+}
+
+/** remove pattern and add task so that reload does too */
+static void
+remove_pat(xfrd_state_t* xfrd, const char* name)
+{
+ /* add task before deletion, because name-string could be deleted */
+ task_new_del_pattern(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, name);
+ pattern_options_remove(xfrd->nsd->options, name);
+ xfrd_set_reload_now(xfrd);
+}
+
+/** add pattern and add task so that reload does too */
+static void
+add_pat(xfrd_state_t* xfrd, pattern_options_t* p)
+{
+ pattern_options_add_modify(xfrd->nsd->options, p);
+ task_new_add_pattern(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, p);
+ xfrd_set_reload_now(xfrd);
+}
+
+/** interrupt zones that are using changed or removed patterns */
+static void
+repat_interrupt_zones(xfrd_state_t* xfrd, nsd_options_t* newopt)
+{
+ /* if masterlist changed:
+ * interrupt slave zone (UDP or TCP) transfers.
+ * slave zones reset master to start of list.
+ */
+ xfrd_zone_t* xz;
+ struct notify_zone_t* nz;
+ RBTREE_FOR(xz, xfrd_zone_t*, xfrd->zones) {
+ pattern_options_t* oldp = xz->zone_options->pattern;
+ pattern_options_t* newp = pattern_options_find(newopt,
+ oldp->pname);
+ if(!newp || !acl_list_equal(oldp->request_xfr,
+ newp->request_xfr)) {
+ /* interrupt transfer */
+ if(xz->tcp_conn != -1) {
+ xfrd_tcp_release(xfrd->tcp_set, xz);
+ xfrd_set_refresh_now(xz);
+ } else if(xz->zone_handler.ev_fd != -1) {
+ xfrd_udp_release(xz);
+ xfrd_set_refresh_now(xz);
+ }
+ xz->master = 0;
+ xz->master_num = 0;
+ xz->next_master = -1;
+ xz->round_num = 0; /* fresh set of retries */
+ }
+ }
+ /* if notify list changed:
+ * interrupt notify that is busy.
+ * reset notify to start of list. (clear all other reset_notify)
+ */
+ RBTREE_FOR(nz, struct notify_zone_t*, xfrd->notify_zones) {
+ pattern_options_t* oldp = nz->options->pattern;
+ pattern_options_t* newp = pattern_options_find(newopt,
+ oldp->pname);
+ if(!newp || !acl_list_equal(oldp->notify, newp->notify)) {
+ /* interrupt notify */
+ if(nz->notify_send_enable) {
+ notify_disable(nz);
+ /* set to restart the notify after the
+ * pattern has been changed. */
+ nz->notify_restart = 2;
+ } else {
+ nz->notify_restart = 1;
+ }
+ } else {
+ nz->notify_restart = 0;
+ }
+ }
+}
+
+/** for notify, after the pattern changes, restart the affected notifies */
+static void
+repat_interrupt_notify_start(xfrd_state_t* xfrd)
+{
+ struct notify_zone_t* nz;
+ RBTREE_FOR(nz, struct notify_zone_t*, xfrd->notify_zones) {
+ if(nz->notify_restart) {
+ if(nz->notify_current)
+ nz->notify_current = nz->options->pattern->notify;
+ if(nz->notify_restart == 2) {
+ if(nz->notify_restart)
+ xfrd_notify_start(nz);
+ }
+ }
+ }
+}
+
+/** check if patterns have changed */
+static void
+repat_patterns(xfrd_state_t* xfrd, nsd_options_t* newopt)
+{
+ /* zones that use changed patterns must have:
+ * - their AXFR/IXFR interrupted: try again, acl may have changed.
+ * if the old master/key still exists, OK, fix master-numptrs and
+ * keep going. Otherwise, stop xfer and reset TSIG.
+ * - send NOTIFY reset to start of NOTIFY list (and TSIG reset).
+ */
+ nsd_options_t* oldopt = xfrd->nsd->options;
+ pattern_options_t* p;
+ int search_zones = 0;
+
+ repat_interrupt_zones(xfrd, newopt);
+ /* find deleted patterns */
+ p = (pattern_options_t*)rbtree_first(oldopt->patterns);
+ while((rbnode_t*)p != RBTREE_NULL) {
+ pattern_options_t* next = (pattern_options_t*)rbtree_next(
+ (rbnode_t*)p);
+ if(!pattern_options_find(newopt, p->pname)) {
+ if(p->implicit) {
+ /* first remove its zone */
+ VERBOSITY(1, (LOG_INFO, "zone removed from config: %s", p->pname + strlen(PATTERN_IMPLICIT_MARKER)));
+ remove_cfgzone(xfrd, p->pname);
+ }
+ remove_pat(xfrd, p->pname);
+ }
+ p = next;
+ }
+ /* find added or changed patterns */
+ RBTREE_FOR(p, pattern_options_t*, newopt->patterns) {
+ pattern_options_t* origp = pattern_options_find(oldopt,
+ p->pname);
+ if(!origp) {
+ /* no zones can use it, no zone_interrupt needed */
+ add_pat(xfrd, p);
+ if(p->implicit) {
+ VERBOSITY(1, (LOG_INFO, "zone added to config: %s", p->pname + strlen(PATTERN_IMPLICIT_MARKER)));
+ add_cfgzone(xfrd, p->pname);
+ }
+ } else if(!pattern_options_equal(p, origp)) {
+ uint8_t newstate = 0;
+ if (p->request_xfr && !origp->request_xfr) {
+ newstate = REPAT_SLAVE;
+ } else if (!p->request_xfr && origp->request_xfr) {
+ newstate = REPAT_MASTER;
+ }
+ add_pat(xfrd, p);
+ if (p->implicit && newstate) {
+ const dname_type* dname =
+ parse_implicit_name(xfrd, p->pname);
+ if (dname) {
+ if (newstate == REPAT_SLAVE) {
+ zone_options_t* zopt =
+ zone_options_find(
+ oldopt, dname);
+ if (zopt) {
+ xfrd_init_slave_zone(
+ xfrd, zopt);
+ }
+ } else if (newstate == REPAT_MASTER) {
+ xfrd_del_slave_zone(xfrd,
+ dname);
+ }
+ region_recycle(xfrd->region,
+ (void*)dname,
+ dname_total_size(dname));
+ }
+ } else if(!p->implicit && newstate) {
+ /* search all zones with this pattern */
+ search_zones = 1;
+ origp->xfrd_flags = newstate;
+ }
+ }
+ }
+ if (search_zones) {
+ zone_options_t* zone_opt;
+ /* search in oldopt because 1) it contains zonelist zones,
+ * and 2) you need oldopt(existing) to call xfrd_init */
+ RBTREE_FOR(zone_opt, zone_options_t*, oldopt->zone_options) {
+ pattern_options_t* oldp = zone_opt->pattern;
+ if (!oldp->implicit) {
+ if (oldp->xfrd_flags == REPAT_SLAVE) {
+ /* xfrd needs stable reference so get
+ * it from the oldopt(modified) tree */
+ xfrd_init_slave_zone(xfrd, zone_opt);
+ } else if (oldp->xfrd_flags == REPAT_MASTER) {
+ xfrd_del_slave_zone(xfrd,
+ (const dname_type*)
+ zone_opt->node.key);
+ }
+ oldp->xfrd_flags = 0;
+ }
+ }
+ }
+ repat_interrupt_notify_start(xfrd);
+}
+
+/** true if options are different that can be set via repat. */
+static int
+repat_options_changed(xfrd_state_t* xfrd, nsd_options_t* newopt)
+{
+#ifdef RATELIMIT
+ if(xfrd->nsd->options->rrl_ratelimit != newopt->rrl_ratelimit)
+ return 1;
+ if(xfrd->nsd->options->rrl_whitelist_ratelimit != newopt->rrl_whitelist_ratelimit)
+ return 1;
+ if(xfrd->nsd->options->rrl_slip != newopt->rrl_slip)
+ return 1;
+#else
+ (void)xfrd; (void)newopt;
+#endif
+ return 0;
+}
+
+/** check if global options have changed */
+static void
+repat_options(xfrd_state_t* xfrd, nsd_options_t* newopt)
+{
+ if(repat_options_changed(xfrd, newopt)) {
+ /* update our options */
+#ifdef RATELIMIT
+ xfrd->nsd->options->rrl_ratelimit = newopt->rrl_ratelimit;
+ xfrd->nsd->options->rrl_whitelist_ratelimit = newopt->rrl_whitelist_ratelimit;
+ xfrd->nsd->options->rrl_slip = newopt->rrl_slip;
+#endif
+ task_new_opt_change(xfrd->nsd->task[xfrd->nsd->mytask],
+ xfrd->last_task, newopt);
+ xfrd_set_reload_now(xfrd);
+ }
+}
+
+/** print errors over ssl, gets pointer-to-pointer to ssl, so it can set
+ * the pointer to NULL on failure and stop printing */
+static void
+print_ssl_cfg_err(void* arg, const char* str)
+{
+ SSL** ssl = (SSL**)arg;
+ if(!*ssl) return;
+ if(!ssl_printf(*ssl, "%s", str))
+ *ssl = NULL; /* failed, stop printing */
+}
+
+/** do the repattern command: reread config file and apply keys, patterns */
+static void
+do_repattern(SSL* ssl, xfrd_state_t* xfrd)
+{
+ region_type* region = region_create(xalloc, free);
+ nsd_options_t* opt;
+ const char* cfgfile = xfrd->nsd->options->configfile;
+
+ /* check chroot and configfile, if possible to reread */
+ if(xfrd->nsd->chrootdir) {
+ size_t l = strlen(xfrd->nsd->chrootdir);
+ while(l>0 && xfrd->nsd->chrootdir[l-1] == '/')
+ --l;
+ if(strncmp(xfrd->nsd->chrootdir, cfgfile, l) != 0) {
+ ssl_printf(ssl, "error %s is not relative to %s: "
+ "chroot prevents reread of config\n",
+ cfgfile, xfrd->nsd->chrootdir);
+ region_destroy(region);
+ return;
+ }
+ cfgfile += l;
+ }
+
+ ssl_printf(ssl, "reconfig start, read %s\n", cfgfile);
+ opt = nsd_options_create(region);
+ if(!parse_options_file(opt, cfgfile, &print_ssl_cfg_err, &ssl)) {
+ /* error already printed */
+ region_destroy(region);
+ return;
+ }
+ /* check for differences in TSIG keys and patterns, and apply,
+ * first the keys, so that pattern->keyptr can be set right. */
+ repat_keys(xfrd, opt);
+ repat_patterns(xfrd, opt);
+ repat_options(xfrd, opt);
+ send_ok(ssl);
+ region_destroy(region);
+}
+
+/** do the serverpid command: printout pid of server process */
+static void
+do_serverpid(SSL* ssl, xfrd_state_t* xfrd)
+{
+ (void)ssl_printf(ssl, "%u\n", (unsigned)xfrd->reload_pid);
+}
+
+/** check for name with end-of-string, space or tab after it */
+static int
+cmdcmp(char* p, const char* cmd, size_t len)
+{
+ return strncmp(p,cmd,len)==0 && (p[len]==0||p[len]==' '||p[len]=='\t');
+}
+
+/** execute a remote control command */
+static void
+execute_cmd(struct daemon_remote* rc, SSL* ssl, char* cmd, struct rc_state* rs)
+{
+ char* p = skipwhite(cmd);
+ /* compare command */
+ if(cmdcmp(p, "stop", 4)) {
+ do_stop(ssl, rc->xfrd);
+ } else if(cmdcmp(p, "reload", 6)) {
+ do_reload(ssl, rc->xfrd, skipwhite(p+6));
+ } else if(cmdcmp(p, "write", 5)) {
+ do_write(ssl, rc->xfrd, skipwhite(p+5));
+ } else if(cmdcmp(p, "status", 6)) {
+ do_status(ssl, rc->xfrd);
+ } else if(cmdcmp(p, "stats_noreset", 13)) {
+ do_stats(rc, 1, rs);
+ } else if(cmdcmp(p, "stats", 5)) {
+ do_stats(rc, 0, rs);
+ } else if(cmdcmp(p, "log_reopen", 10)) {
+ do_log_reopen(ssl, rc->xfrd);
+ } else if(cmdcmp(p, "addzone", 7)) {
+ do_addzone(ssl, rc->xfrd, skipwhite(p+7));
+ } else if(cmdcmp(p, "delzone", 7)) {
+ do_delzone(ssl, rc->xfrd, skipwhite(p+7));
+ } else if(cmdcmp(p, "notify", 6)) {
+ do_notify(ssl, rc->xfrd, skipwhite(p+6));
+ } else if(cmdcmp(p, "transfer", 8)) {
+ do_transfer(ssl, rc->xfrd, skipwhite(p+8));
+ } else if(cmdcmp(p, "force_transfer", 14)) {
+ do_force_transfer(ssl, rc->xfrd, skipwhite(p+14));
+ } else if(cmdcmp(p, "zonestatus", 10)) {
+ do_zonestatus(ssl, rc->xfrd, skipwhite(p+10));
+ } else if(cmdcmp(p, "verbosity", 9)) {
+ do_verbosity(ssl, skipwhite(p+9));
+ } else if(cmdcmp(p, "repattern", 9)) {
+ do_repattern(ssl, rc->xfrd);
+ } else if(cmdcmp(p, "reconfig", 8)) {
+ do_repattern(ssl, rc->xfrd);
+ } else if(cmdcmp(p, "serverpid", 9)) {
+ do_serverpid(ssl, rc->xfrd);
+ } else {
+ (void)ssl_printf(ssl, "error unknown command '%s'\n", p);
+ }
+}
+
+/** handle remote control request */
+static void
+handle_req(struct daemon_remote* rc, struct rc_state* s, SSL* ssl)
+{
+ int r;
+ char pre[10];
+ char magic[8];
+ char buf[1024];
+ if (fcntl(s->c.ev_fd, F_SETFL, 0) == -1) { /* set blocking */
+ log_msg(LOG_ERR, "cannot fcntl rc: %s", strerror(errno));
+ }
+
+ /* try to read magic UBCT[version]_space_ string */
+ ERR_clear_error();
+ if((r=SSL_read(ssl, magic, (int)sizeof(magic)-1)) <= 0) {
+ if(SSL_get_error(ssl, r) == SSL_ERROR_ZERO_RETURN)
+ return;
+ log_crypto_err("could not SSL_read");
+ return;
+ }
+ magic[7] = 0;
+ if( r != 7 || strncmp(magic, "NSDCT", 5) != 0) {
+ VERBOSITY(2, (LOG_INFO, "control connection has bad header"));
+ /* probably wrong tool connected, ignore it completely */
+ return;
+ }
+
+ /* read the command line */
+ if(!ssl_read_line(ssl, buf, sizeof(buf))) {
+ return;
+ }
+ snprintf(pre, sizeof(pre), "NSDCT%d ", NSD_CONTROL_VERSION);
+ if(strcmp(magic, pre) != 0) {
+ VERBOSITY(2, (LOG_INFO, "control connection had bad "
+ "version %s, cmd: %s", magic, buf));
+ ssl_printf(ssl, "error version mismatch\n");
+ return;
+ }
+ VERBOSITY(2, (LOG_INFO, "control cmd: %s", buf));
+
+ /* figure out what to do */
+ execute_cmd(rc, ssl, buf, s);
+}
+
+static void
+remote_control_callback(int fd, short event, void* arg)
+{
+ struct rc_state* s = (struct rc_state*)arg;
+ struct daemon_remote* rc = s->rc;
+ int r;
+ if( (event&EV_TIMEOUT) ) {
+ log_msg(LOG_ERR, "remote control timed out");
+ clean_point(rc, s);
+ return;
+ }
+ /* (continue to) setup the SSL connection */
+ ERR_clear_error();
+ r = SSL_do_handshake(s->ssl);
+ if(r != 1) {
+ int r2 = SSL_get_error(s->ssl, r);
+ if(r2 == SSL_ERROR_WANT_READ) {
+ if(s->shake_state == rc_hs_read) {
+ /* try again later */
+ return;
+ }
+ s->shake_state = rc_hs_read;
+ event_del(&s->c);
+ event_set(&s->c, fd, EV_PERSIST|EV_TIMEOUT|EV_READ,
+ remote_control_callback, s);
+ if(event_base_set(xfrd->event_base, &s->c) != 0)
+ log_msg(LOG_ERR, "remote_accept: cannot set event_base");
+ if(event_add(&s->c, &s->tval) != 0)
+ log_msg(LOG_ERR, "remote_accept: cannot add event");
+ return;
+ } else if(r2 == SSL_ERROR_WANT_WRITE) {
+ if(s->shake_state == rc_hs_write) {
+ /* try again later */
+ return;
+ }
+ s->shake_state = rc_hs_write;
+ event_del(&s->c);
+ event_set(&s->c, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE,
+ remote_control_callback, s);
+ if(event_base_set(xfrd->event_base, &s->c) != 0)
+ log_msg(LOG_ERR, "remote_accept: cannot set event_base");
+ if(event_add(&s->c, &s->tval) != 0)
+ log_msg(LOG_ERR, "remote_accept: cannot add event");
+ return;
+ } else {
+ if(r == 0)
+ log_msg(LOG_ERR, "remote control connection closed prematurely");
+ log_crypto_err("remote control failed ssl");
+ clean_point(rc, s);
+ return;
+ }
+ }
+ s->shake_state = rc_none;
+
+ /* once handshake has completed, check authentication */
+ if(SSL_get_verify_result(s->ssl) == X509_V_OK) {
+ X509* x = SSL_get_peer_certificate(s->ssl);
+ if(!x) {
+ VERBOSITY(2, (LOG_INFO, "remote control connection "
+ "provided no client certificate"));
+ clean_point(rc, s);
+ return;
+ }
+ VERBOSITY(3, (LOG_INFO, "remote control connection authenticated"));
+ X509_free(x);
+ } else {
+ VERBOSITY(2, (LOG_INFO, "remote control connection failed to "
+ "authenticate with client certificate"));
+ clean_point(rc, s);
+ return;
+ }
+
+ /* if OK start to actually handle the request */
+ handle_req(rc, s, s->ssl);
+
+ if(!s->in_stats_list) {
+ VERBOSITY(3, (LOG_INFO, "remote control operation completed"));
+ clean_point(rc, s);
+ }
+}
+
+#ifdef BIND8_STATS
+static const char*
+opcode2str(int o)
+{
+ switch(o) {
+ case OPCODE_QUERY: return "QUERY";
+ case OPCODE_IQUERY: return "IQUERY";
+ case OPCODE_STATUS: return "STATUS";
+ case OPCODE_NOTIFY: return "NOTIFY";
+ case OPCODE_UPDATE: return "UPDATE";
+ default: return "OTHER";
+ }
+}
+
+/** print long number */
+static int
+print_longnum(SSL* ssl, char* desc, uint64_t x)
+{
+ if(x > (uint64_t)1024*1024*1024) {
+ /* more than a Gb */
+ size_t front = (size_t)(x / (uint64_t)1000000);
+ size_t back = (size_t)(x % (uint64_t)1000000);
+ return ssl_printf(ssl, "%s%u%6.6u\n", desc,
+ (unsigned)front, (unsigned)back);
+ } else {
+ return ssl_printf(ssl, "%s%u\n", desc, (unsigned)x);
+ }
+}
+
+static void
+print_stats(SSL* ssl, xfrd_state_t* xfrd, struct timeval* now)
+{
+ const char* rcstr[] = {"NOERROR", "FORMERR", "SERVFAIL", "NXDOMAIN",
+ "NOTIMP", "REFUSED", "YXDOMAIN", "YXRRSET", "NXRRSET", "NOTAUTH",
+ "NOTZONE", "RCODE11", "RCODE12", "RCODE13", "RCODE14", "RCODE15",
+ "BADVERS"
+ };
+ size_t i;
+ stc_t total = 0;
+ struct timeval elapsed, uptime;
+
+ /* per CPU and total */
+ for(i=0; i<xfrd->nsd->child_count; i++) {
+ if(!ssl_printf(ssl, "server%d.queries=%u\n", (int)i,
+ (unsigned)xfrd->nsd->children[i].query_count))
+ return;
+ total += xfrd->nsd->children[i].query_count;
+ }
+ if(!ssl_printf(ssl, "num.queries=%u\n", (unsigned)total))
+ return;
+
+ /* time elapsed and uptime (in seconds) */
+ timeval_subtract(&uptime, now, &xfrd->nsd->rc->boot_time);
+ timeval_subtract(&elapsed, now, &xfrd->nsd->rc->stats_time);
+ if(!ssl_printf(ssl, "time.boot=%u.%6.6u\n",
+ (unsigned)uptime.tv_sec, (unsigned)uptime.tv_usec))
+ return;
+ if(!ssl_printf(ssl, "time.elapsed=%u.%6.6u\n",
+ (unsigned)elapsed.tv_sec, (unsigned)elapsed.tv_usec))
+ return;
+
+ /* mem info, database on disksize */
+ if(!print_longnum(ssl, "size.db.disk=", xfrd->nsd->st.db_disk))
+ return;
+ if(!print_longnum(ssl, "size.db.mem=", xfrd->nsd->st.db_mem))
+ return;
+ if(!print_longnum(ssl, "size.xfrd.mem=", region_get_mem(xfrd->region)))
+ return;
+ if(!print_longnum(ssl, "size.config.disk=",
+ xfrd->nsd->options->zonelist_off))
+ return;
+ if(!print_longnum(ssl, "size.config.mem=", region_get_mem(
+ xfrd->nsd->options->region)))
+ return;
+
+ for(i=0; i<= 255; i++) {
+ if(inhibit_zero && xfrd->nsd->st.qtype[i] == 0 &&
+ strncmp(rrtype_to_string(i), "TYPE", 4) == 0)
+ continue;
+ if(!ssl_printf(ssl, "num.type.%s=%u\n",
+ rrtype_to_string(i), (unsigned)xfrd->nsd->st.qtype[i]))
+ return;
+ }
+
+ /* opcode */
+ for(i=0; i<6; i++) {
+ if(inhibit_zero && xfrd->nsd->st.opcode[i] == 0 &&
+ i != OPCODE_QUERY)
+ continue;
+ if(!ssl_printf(ssl, "num.opcode.%s=%u\n", opcode2str(i),
+ (unsigned)xfrd->nsd->st.opcode[i]))
+ return;
+ }
+
+ /* qclass */
+ for(i=0; i<4; i++) {
+ if(inhibit_zero && xfrd->nsd->st.qclass[i] == 0 &&
+ i != CLASS_IN)
+ continue;
+ if(!ssl_printf(ssl, "num.class.%s=%u\n", rrclass_to_string(i),
+ (unsigned)xfrd->nsd->st.qclass[i]))
+ return;
+ }
+
+ /* rcode */
+ for(i=0; i<17; i++) {
+ if(inhibit_zero && xfrd->nsd->st.rcode[i] == 0 &&
+ i > RCODE_YXDOMAIN) /* NSD does not use larger */
+ continue;
+ if(!ssl_printf(ssl, "num.rcode.%s=%u\n", rcstr[i],
+ (unsigned)xfrd->nsd->st.rcode[i]))
+ return;
+ }
+
+ /* edns */
+ if(!ssl_printf(ssl, "num.edns=%u\n", (unsigned)xfrd->nsd->st.edns))
+ return;
+
+ /* ednserr */
+ if(!ssl_printf(ssl, "num.ednserr=%u\n",
+ (unsigned)xfrd->nsd->st.ednserr))
+ return;
+
+ /* qudp */
+ if(!ssl_printf(ssl, "num.udp=%u\n", (unsigned)xfrd->nsd->st.qudp))
+ return;
+ /* qudp6 */
+ if(!ssl_printf(ssl, "num.udp6=%u\n", (unsigned)xfrd->nsd->st.qudp6))
+ return;
+ /* ctcp */
+ if(!ssl_printf(ssl, "num.tcp=%u\n", (unsigned)xfrd->nsd->st.ctcp))
+ return;
+ /* ctcp6 */
+ if(!ssl_printf(ssl, "num.tcp6=%u\n", (unsigned)xfrd->nsd->st.ctcp6))
+ return;
+
+ /* nona */
+ if(!ssl_printf(ssl, "num.answer_wo_aa=%u\n",
+ (unsigned)xfrd->nsd->st.nona))
+ return;
+
+ /* rxerr */
+ if(!ssl_printf(ssl, "num.rxerr=%u\n", (unsigned)xfrd->nsd->st.rxerr))
+ return;
+
+ /* txerr */
+ if(!ssl_printf(ssl, "num.txerr=%u\n", (unsigned)xfrd->nsd->st.txerr))
+ return;
+
+ /* number of requested-axfr, number of times axfr served to clients */
+ if(!ssl_printf(ssl, "num.raxfr=%u\n", (unsigned)xfrd->nsd->st.raxfr))
+ return;
+
+ /* truncated */
+ if(!ssl_printf(ssl, "num.truncated=%u\n",
+ (unsigned)xfrd->nsd->st.truncated))
+ return;
+
+ /* dropped */
+ if(!ssl_printf(ssl, "num.dropped=%u\n",
+ (unsigned)xfrd->nsd->st.dropped))
+ return;
+
+ /* zone statistics */
+ if(!ssl_printf(ssl, "zone.master=%u\n",
+ (unsigned)(xfrd->notify_zones->count - xfrd->zones->count)))
+ return;
+ if(!ssl_printf(ssl, "zone.slave=%u\n", (unsigned)xfrd->zones->count))
+ return;
+}
+
+static void
+clear_stats(xfrd_state_t* xfrd)
+{
+ size_t i;
+ uint64_t dbd = xfrd->nsd->st.db_disk;
+ uint64_t dbm = xfrd->nsd->st.db_mem;
+ for(i=0; i<xfrd->nsd->child_count; i++) {
+ xfrd->nsd->children[i].query_count = 0;
+ }
+ memset(&xfrd->nsd->st, 0, sizeof(struct nsdst));
+ xfrd->nsd->st.db_disk = dbd;
+ xfrd->nsd->st.db_mem = dbm;
+}
+
+void
+daemon_remote_process_stats(struct daemon_remote* rc)
+{
+ struct rc_state* s;
+ struct timeval now;
+ if(!rc) return;
+ if(gettimeofday(&now, NULL) == -1)
+ log_msg(LOG_ERR, "gettimeofday: %s", strerror(errno));
+ /* pop one and give it stats */
+ while((s = rc->stats_list)) {
+ assert(s->in_stats_list);
+ print_stats(s->ssl, rc->xfrd, &now);
+ if(s->in_stats_list == 1) {
+ clear_stats(rc->xfrd);
+ rc->stats_time = now;
+ }
+ VERBOSITY(3, (LOG_INFO, "remote control stats printed"));
+ rc->stats_list = s->next;
+ s->in_stats_list = 0;
+ clean_point(rc, s);
+ }
+}
+#endif /* BIND8_STATS */
+
+#endif /* HAVE_SSL */
diff --git a/usr.sbin/nsd/remote.h b/usr.sbin/nsd/remote.h
new file mode 100644
index 00000000000..4317e1fec65
--- /dev/null
+++ b/usr.sbin/nsd/remote.h
@@ -0,0 +1,102 @@
+/*
+ * remote.h - remote control for the NSD daemon.
+ *
+ * Copyright (c) 2008, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ *
+ * This file contains the remote control functionality for the daemon.
+ * The remote control can be performed using either the commandline
+ * nsd-control tool, or a SSLv3/TLS capable web browser.
+ * The channel is secured using SSLv3 or TLSv1, and certificates.
+ * Both the server and the client(control tool) have their own keys.
+ */
+
+#ifndef DAEMON_REMOTE_H
+#define DAEMON_REMOTE_H
+struct xfrd_state;
+struct nsd_options;
+
+/* private, defined in remote.c to keep ssl.h out of this header */
+struct daemon_remote;
+struct rc_state;
+
+/* the remote control needs less backlog than the tcp53 service */
+#define TCP_BACKLOG_REMOTE 16 /* listen() tcp backlog */
+
+/**
+ * Create new remote control state for the daemon.
+ * Also setups the control port.
+ * @param cfg: config file with key file settings.
+ * @return new state, or NULL on failure.
+ */
+struct daemon_remote* daemon_remote_create(struct nsd_options* cfg);
+
+/**
+ * remote control state to delete.
+ * @param rc: state to delete.
+ */
+void daemon_remote_delete(struct daemon_remote* rc);
+
+/**
+ * Close remote control ports. Clears up busy connections.
+ * Does not delete the rc itself, or the ssl context (with its keys).
+ * @param rc: state to close.
+ */
+void daemon_remote_close(struct daemon_remote* rc);
+
+/**
+ * Open and create listening ports for remote control.
+ * @param rc: rc state that contains list of accept port sockets.
+ * @param cfg: config options.
+ * @return false on failure.
+ */
+int daemon_remote_open_ports(struct daemon_remote* rc,
+ struct nsd_options* cfg);
+
+/**
+ * Setup comm points for accepting remote control connections.
+ * @param rc: state
+ * @param xfrd: the process that hosts the control connection.
+ * The rc is attached to its event base.
+ */
+void daemon_remote_attach(struct daemon_remote* rc, struct xfrd_state* xfrd);
+
+/**
+ * Process statistic results and send them
+ * @param rc: state.
+ */
+void daemon_remote_process_stats(struct daemon_remote* rc);
+
+#endif /* DAEMON_REMOTE_H */
diff --git a/usr.sbin/nsd/rrl.h b/usr.sbin/nsd/rrl.h
index 48dbb53b8cb..1ffd841664b 100644
--- a/usr.sbin/nsd/rrl.h
+++ b/usr.sbin/nsd/rrl.h
@@ -72,5 +72,7 @@ enum rrl_type rrlstr2type(const char* s);
/** for unit test, update rrl bucket; return rate */
uint32_t rrl_update(query_type* query, uint32_t hash, uint64_t source,
uint16_t flags, int32_t now, uint32_t lm);
+/** set the rate limit counters, pass variables in qps */
+void rrl_set_limit(size_t lm, size_t wlm, size_t sm);
#endif /* RRL_H */
diff --git a/usr.sbin/nsd/tsig-openssl.c b/usr.sbin/nsd/tsig-openssl.c
index 797f7fbf2ab..6795e750f1f 100644
--- a/usr.sbin/nsd/tsig-openssl.c
+++ b/usr.sbin/nsd/tsig-openssl.c
@@ -1,7 +1,7 @@
/*
* tsig-openssl.h -- Interface to OpenSSL for TSIG support.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/tsig-openssl.h b/usr.sbin/nsd/tsig-openssl.h
index 263c715b113..859c280c4c0 100644
--- a/usr.sbin/nsd/tsig-openssl.h
+++ b/usr.sbin/nsd/tsig-openssl.h
@@ -1,7 +1,7 @@
/*
* tsig-openssl.h -- Interface to OpenSSL for TSIG support.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
diff --git a/usr.sbin/nsd/tsig.c b/usr.sbin/nsd/tsig.c
index cf2872b563e..1844e98d9e1 100644
--- a/usr.sbin/nsd/tsig.c
+++ b/usr.sbin/nsd/tsig.c
@@ -1,7 +1,7 @@
/*
- * tsig.h -- TSIG definitions (RFC 2845).
+ * tsig.c -- TSIG implementation (RFC 2845).
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -16,16 +16,18 @@
#include "tsig-openssl.h"
#include "dns.h"
#include "packet.h"
+#include "query.h"
+#include "rbtree.h"
static region_type *tsig_region;
struct tsig_key_table
{
- struct tsig_key_table *next;
+ rbnode_t node; /* by dname */
tsig_key_type *key;
};
typedef struct tsig_key_table tsig_key_table_type;
-static tsig_key_table_type *tsig_key_table;
+static rbtree_t *tsig_key_table;
struct tsig_algorithm_table
{
@@ -83,11 +85,17 @@ tsig_digest_variables(tsig_record_type *tsig, int tsig_timers_only)
}
}
+static int
+tree_dname_compare(const void* a, const void* b)
+{
+ return dname_compare((const dname_type*)a, (const dname_type*)b);
+}
+
int
tsig_init(region_type *region)
{
tsig_region = region;
- tsig_key_table = NULL;
+ tsig_key_table = rbtree_create(region, &tree_dname_compare);
tsig_algorithm_table = NULL;
#if defined(HAVE_SSL)
@@ -99,11 +107,31 @@ tsig_init(region_type *region)
void
tsig_add_key(tsig_key_type *key)
{
- tsig_key_table_type *entry = (tsig_key_table_type *) region_alloc(
+ tsig_key_table_type *entry = (tsig_key_table_type *) region_alloc_zero(
tsig_region, sizeof(tsig_key_table_type));
entry->key = key;
- entry->next = tsig_key_table;
- tsig_key_table = entry;
+ entry->node.key = entry->key->name;
+ (void)rbtree_insert(tsig_key_table, &entry->node);
+}
+
+void
+tsig_del_key(tsig_key_type *key)
+{
+ tsig_key_table_type *entry;
+ if(!key) return;
+ entry = (tsig_key_table_type*)rbtree_delete(tsig_key_table, key->name);
+ if(!entry) return;
+ region_recycle(tsig_region, entry, sizeof(tsig_key_table_type));
+}
+
+tsig_key_type*
+tsig_find_key(const dname_type* name)
+{
+ tsig_key_table_type* entry;
+ entry = (tsig_key_table_type*)rbtree_search(tsig_key_table, name);
+ if(entry)
+ return entry->key;
+ return NULL;
}
void
@@ -222,11 +250,21 @@ tsig_create_record_custom(tsig_record_type *tsig, region_type *region,
large_object_size, initial_cleanup_size, 0);
tsig->context_region = region_create_custom(xalloc, free, chunk_size,
large_object_size, initial_cleanup_size, 0);
- region_add_cleanup(region, tsig_cleanup, tsig);
+ if(region)
+ region_add_cleanup(region, tsig_cleanup, tsig);
tsig_init_record(tsig, NULL, NULL);
}
void
+tsig_delete_record(tsig_record_type* tsig, region_type* region)
+{
+ if(region)
+ region_remove_cleanup(region, tsig_cleanup, tsig);
+ region_destroy(tsig->rr_region);
+ region_destroy(tsig->context_region);
+}
+
+void
tsig_init_record(tsig_record_type *tsig,
tsig_algorithm_type *algorithm,
tsig_key_type *key)
@@ -246,7 +284,6 @@ tsig_init_record(tsig_record_type *tsig,
int
tsig_from_query(tsig_record_type *tsig)
{
- tsig_key_table_type *key_entry;
tsig_key_type *key = NULL;
tsig_algorithm_table_type *algorithm_entry;
tsig_algorithm_type *algorithm = NULL;
@@ -257,16 +294,7 @@ tsig_from_query(tsig_record_type *tsig)
assert(!tsig->algorithm);
assert(!tsig->key);
- /* XXX: TODO: slow linear check for keyname */
- for (key_entry = tsig_key_table;
- key_entry;
- key_entry = key_entry->next)
- {
- if (dname_compare(tsig->key_name, key_entry->key->name) == 0) {
- key = key_entry->key;
- break;
- }
- }
+ key = (tsig_key_type*)tsig_find_key(tsig->key_name);
for (algorithm_entry = tsig_algorithm_table;
algorithm_entry;
diff --git a/usr.sbin/nsd/tsig.h b/usr.sbin/nsd/tsig.h
index f09a07e5aba..71cad7740c7 100644
--- a/usr.sbin/nsd/tsig.h
+++ b/usr.sbin/nsd/tsig.h
@@ -1,7 +1,7 @@
/*
* tsig.h -- TSIG definitions (RFC 2845).
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -103,7 +103,7 @@ struct tsig_key
{
const dname_type *name;
size_t size;
- const uint8_t *data;
+ uint8_t *data;
};
struct tsig_record
@@ -144,6 +144,7 @@ int tsig_init(region_type *region);
* Add the specified key to the TSIG key table.
*/
void tsig_add_key(tsig_key_type *key);
+void tsig_del_key(tsig_key_type *key);
/*
* Add the specified algorithm to the TSIG algorithm table.
@@ -172,6 +173,7 @@ void tsig_create_record(tsig_record_type* tsig,
/*
* Like tsig_create_record, with custom region settings.
* The size params are used to customise the rr_region and context_region.
+ * If region is NULL, no cleanup is attached to it.
*/
void tsig_create_record_custom(tsig_record_type* tsig,
region_type* region,
@@ -180,6 +182,12 @@ void tsig_create_record_custom(tsig_record_type* tsig,
size_t initial_cleanup_size);
/*
+ * Destroy tsig record internals (the main ptr is user alloced).
+ * if region is nonNULL, removes cleanup.
+ */
+void tsig_delete_record(tsig_record_type* tsig, region_type* region);
+
+/*
* Call this before starting to analyze or signing a sequence of
* packets.
*
diff --git a/usr.sbin/nsd/udb.c b/usr.sbin/nsd/udb.c
new file mode 100644
index 00000000000..6c0ffe7d0c0
--- /dev/null
+++ b/usr.sbin/nsd/udb.c
@@ -0,0 +1,2018 @@
+/* udb.c - u(micro) data base.
+ * By W.C.A. Wijngaards
+ * Copyright 2010, NLnet Labs.
+ * BSD, see LICENSE.
+ */
+#include "config.h"
+#include "udb.h"
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+#include "lookup3.h"
+#include "util.h"
+
+/* mmap and friends */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+/* for systems without, portable definition, failed-1 and async is a flag */
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void*)-1)
+#endif
+#ifndef MS_SYNC
+#define MS_SYNC 0
+#endif
+
+/** move and fixup xl segment */
+static void move_xl_segment(void* base, udb_base* udb, udb_void xl,
+ udb_void n, uint64_t sz, uint64_t startseg);
+/** attempt to compact the data and move free space to the end */
+static int udb_alloc_compact(void* base, udb_alloc* alloc);
+
+/** convert pointer to the data part to a pointer to the base of the chunk */
+static udb_void
+chunk_from_dataptr(udb_void data)
+{
+ /* we use that sizeof(udb_chunk_d) != sizeof(udb_xl_chunk_d) and
+ * that xl_chunk_d is aligned on x**1024 boundaries. */
+ udb_void xl = data - sizeof(udb_xl_chunk_d);
+ if( (xl & (UDB_ALLOC_CHUNK_SIZE-1)) == 0)
+ return xl;
+ return data - sizeof(udb_chunk_d);
+}
+
+udb_void chunk_from_dataptr_ext(udb_void data) {
+ return chunk_from_dataptr(data);
+}
+
+#ifndef NDEBUG
+/** read last octet from a chunk */
+static uint8_t
+chunk_get_last(void* base, udb_void chunk, int exp)
+{
+ return *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1));
+}
+#endif
+
+/** write last octet of a chunk */
+static void
+chunk_set_last(void* base, udb_void chunk, int exp, uint8_t value)
+{
+ *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1)) = value;
+}
+
+/** create udb_base from a file descriptor (must be at start of file) */
+udb_base*
+udb_base_create_fd(const char* fname, int fd, udb_walk_relptr_func walkfunc,
+ void* arg)
+{
+ uint64_t m;
+ udb_glob_d g;
+ ssize_t r;
+ udb_base* udb = (udb_base*)xalloc_zero(sizeof(*udb));
+ if(!udb) {
+ log_msg(LOG_ERR, "out of memory");
+ close(fd);
+ return NULL;
+ }
+ udb->fname = strdup(fname);
+ if(!udb->fname) {
+ log_msg(LOG_ERR, "out of memory");
+ free(udb);
+ close(fd);
+ return NULL;
+ }
+ udb->walkfunc = walkfunc;
+ udb->walkarg = arg;
+ udb->fd = fd;
+ udb->ram_size = 1024;
+ udb->ram_mask = (int)udb->ram_size - 1;
+ udb->ram_hash = (udb_ptr**)xalloc_zero(sizeof(udb_ptr*)*udb->ram_size);
+ if(!udb->ram_hash) {
+ free(udb->fname);
+ free(udb);
+ log_msg(LOG_ERR, "out of memory");
+ close(fd);
+ return NULL;
+ }
+
+ /* read magic */
+ if((r=read(fd, &m, sizeof(m))) == -1) {
+ log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
+ goto fail;
+ } else if(r != (ssize_t)sizeof(m)) {
+ log_msg(LOG_ERR, "%s: file too short", fname);
+ goto fail;
+ }
+ /* TODO : what if bigendian and littleendian file, see magic */
+ if(m != UDB_MAGIC) {
+ log_msg(LOG_ERR, "%s: wrong type of file", fname);
+ goto fail;
+ }
+ /* read header */
+ if((r=read(fd, &g, sizeof(g))) == -1) {
+ log_msg(LOG_ERR, "%s: %s\n", fname, strerror(errno));
+ goto fail;
+ } else if(r != (ssize_t)sizeof(g)) {
+ log_msg(LOG_ERR, "%s: file too short", fname);
+ goto fail;
+ }
+ if(g.version != 0) {
+ log_msg(LOG_ERR, "%s: unknown file version %d", fname,
+ (int)g.version);
+ goto fail;
+ }
+ if(g.hsize < UDB_HEADER_SIZE) {
+ log_msg(LOG_ERR, "%s: header size too small %d", fname,
+ (int)g.hsize);
+ goto fail;
+ }
+ if(g.hsize > UDB_HEADER_SIZE) {
+ log_msg(LOG_WARNING, "%s: header size too large %d", fname,
+ (int)g.hsize);
+ log_msg(LOG_WARNING, "attempting to continue...");
+ }
+ if(g.clean_close != 0) {
+ log_msg(LOG_WARNING, "%s: not cleanly closed %d", fname,
+ (int)g.clean_close);
+ log_msg(LOG_WARNING, "attempting to continue...");
+ }
+ /* TODO check if too large (>4g on 32bit); mmap-usage would fail */
+
+ /* mmap it */
+ if(g.fsize < UDB_HEADER_SIZE || g.fsize < g.hsize) {
+ log_msg(LOG_ERR, "%s: file too short", fname);
+ goto fail;
+ }
+ udb->base_size = (size_t)g.fsize;
+ /* note the size_t casts must be there for portability, on some
+ * systems the layout of memory is otherwise broken. */
+ udb->base = mmap(NULL, (size_t)udb->base_size,
+ (int)PROT_READ|PROT_WRITE, (int)MAP_SHARED,
+ (int)udb->fd, (off_t)0);
+ if(udb->base == MAP_FAILED) {
+ udb->base = NULL;
+ log_msg(LOG_ERR, "mmap(size %u) error: %s",
+ (unsigned)udb->base_size, strerror(errno));
+ fail:
+ close(fd);
+ free(udb->fname);
+ free(udb->ram_hash);
+ free(udb);
+ return NULL;
+ }
+
+ /* init completion */
+ udb->glob_data = (udb_glob_d*)(udb->base+sizeof(uint64_t));
+ r = 0;
+ if(udb->glob_data->dirty_alloc != udb_dirty_clean)
+ r = 1;
+ udb->alloc = udb_alloc_create(udb, (udb_alloc_d*)(
+ (void*)udb->glob_data+sizeof(*udb->glob_data)));
+ if(!udb->alloc) {
+ log_msg(LOG_ERR, "out of memory");
+ udb_base_free(udb);
+ return NULL;
+ }
+ if(r) {
+ /* and compact now, or resume compacting */
+ udb_alloc_compact(udb, udb->alloc);
+ udb_base_sync(udb, 1);
+ }
+
+ return udb;
+}
+
+udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc,
+ void* arg)
+{
+ int fd = open(fname, O_RDWR);
+ if(fd == -1) {
+ log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
+ return NULL;
+ }
+ return udb_base_create_fd(fname, fd, walkfunc, arg);
+}
+
+/** init new udb_global structure */
+static void udb_glob_init_new(udb_glob_d* g)
+{
+ memset(g, 0, sizeof(*g));
+ g->hsize = UDB_HEADER_SIZE;
+ g->fsize = UDB_HEADER_SIZE;
+}
+
+/** write data to file and check result */
+static int
+write_fdata(const char* fname, int fd, void* data, size_t len)
+{
+ ssize_t w;
+ if((w=write(fd, data, len)) == -1) {
+ log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
+ close(fd);
+ return 0;
+ } else if(w != (ssize_t)len) {
+ log_msg(LOG_ERR, "%s: short write (disk full?)", fname);
+ close(fd);
+ return 0;
+ }
+ return 1;
+}
+
+udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc,
+ void* arg)
+{
+ uint64_t m;
+ udb_glob_d g;
+ udb_alloc_d a;
+ uint64_t endsize = UDB_HEADER_SIZE;
+ uint64_t endexp = 0;
+ int fd = open(fname, O_CREAT|O_RDWR, 0600);
+ if(fd == -1) {
+ log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
+ return NULL;
+ }
+ m = UDB_MAGIC;
+ udb_glob_init_new(&g);
+ udb_alloc_init_new(&a);
+
+ /* write new data to file (closes fd on error) */
+ if(!write_fdata(fname, fd, &m, sizeof(m)))
+ return NULL;
+ if(!write_fdata(fname, fd, &g, sizeof(g)))
+ return NULL;
+ if(!write_fdata(fname, fd, &a, sizeof(a)))
+ return NULL;
+ if(!write_fdata(fname, fd, &endsize, sizeof(endsize)))
+ return NULL;
+ if(!write_fdata(fname, fd, &endexp, sizeof(endexp)))
+ return NULL;
+ /* rewind to start */
+ if(lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
+ log_msg(LOG_ERR, "%s: lseek %s", fname, strerror(errno));
+ close(fd);
+ return NULL;
+ }
+ return udb_base_create_fd(fname, fd, walkfunc, arg);
+}
+
+/** shrink the udb base if it has unused space at the end */
+static void
+udb_base_shrink(udb_base* udb, uint64_t nsize)
+{
+ udb->glob_data->dirty_alloc = udb_dirty_fsize;
+ udb->glob_data->fsize = nsize;
+ /* sync, does not *seem* to be required on Linux, but it is
+ certainly required on OpenBSD. Otherwise changed data is lost. */
+ msync(udb->base, udb->base_size, MS_ASYNC);
+ if(ftruncate(udb->fd, (off_t)nsize) != 0) {
+ log_msg(LOG_ERR, "%s: ftruncate(%u) %s", udb->fname,
+ (unsigned)nsize, strerror(errno));
+ }
+ udb->glob_data->dirty_alloc = udb_dirty_clean;
+}
+
+void udb_base_close(udb_base* udb)
+{
+ if(!udb)
+ return;
+ if(udb->fd != -1 && udb->base && udb->alloc) {
+ uint64_t nsize = udb->alloc->disk->nextgrow;
+ if(nsize < udb->base_size)
+ udb_base_shrink(udb, nsize);
+ }
+ if(udb->fd != -1) {
+ close(udb->fd);
+ udb->fd = -1;
+ }
+ if(udb->base) {
+ if(munmap(udb->base, udb->base_size) == -1) {
+ log_msg(LOG_ERR, "munmap: %s", strerror(errno));
+ }
+ udb->base = NULL;
+ }
+}
+
+void udb_base_free(udb_base* udb)
+{
+ if(!udb)
+ return;
+ udb_base_close(udb);
+ udb_alloc_delete(udb->alloc);
+ free(udb->ram_hash);
+ free(udb->fname);
+ free(udb);
+}
+
+void udb_base_free_keep_mmap(udb_base* udb)
+{
+ if(!udb) return;
+ if(udb->fd != -1) {
+ close(udb->fd);
+ udb->fd = -1;
+ }
+ udb->base = NULL;
+ udb_alloc_delete(udb->alloc);
+ free(udb->ram_hash);
+ free(udb->fname);
+ free(udb);
+}
+
+void udb_base_sync(udb_base* udb, int wait)
+{
+ if(msync(udb->base, udb->base_size, wait?MS_SYNC:MS_ASYNC) != 0) {
+ log_msg(LOG_ERR, "msync(%s) error %s",
+ udb->fname, strerror(errno));
+ }
+}
+
+/** hash a chunk pointer */
+static uint32_t
+chunk_hash_ptr(udb_void p)
+{
+ /* put p into an array of uint32 */
+ uint32_t h[sizeof(p)/sizeof(uint32_t)];
+ memcpy(&h, &p, sizeof(h));
+ return hashword(h, sizeof(p)/sizeof(uint32_t), 0x8763);
+}
+
+/** check that the given pointer is on the bucket for the given offset */
+int udb_ptr_is_on_bucket(udb_base* udb, udb_ptr* ptr, udb_void to)
+{
+ uint32_t i = chunk_hash_ptr(to) & udb->ram_mask;
+ udb_ptr* p;
+ assert((size_t)i < udb->ram_size);
+ for(p = udb->ram_hash[i]; p; p=p->next) {
+ if(p == ptr)
+ return 1;
+ }
+ return 0;
+}
+
+/** grow the ram array */
+static void
+grow_ram_hash(udb_base* udb, udb_ptr** newhash)
+{
+ size_t i;
+ size_t osize= udb->ram_size;
+ udb_ptr* p, *np;
+ udb_ptr** oldhash = udb->ram_hash;
+ udb->ram_size *= 2;
+ udb->ram_mask <<= 1;
+ udb->ram_mask |= 1;
+ udb->ram_hash = newhash;
+ /* have to link in every element in the old list into the new list*/
+ for(i=0; i<osize; i++) {
+ p = oldhash[i];
+ while(p) {
+ np = p->next;
+ /* link into newhash */
+ p->prev=NULL;
+ p->next=newhash[chunk_hash_ptr(p->data)&udb->ram_mask];
+ if(p->next) p->next->prev = p;
+ /* go to next element of oldhash */
+ p = np;
+ }
+ }
+ free(oldhash);
+}
+
+void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr)
+{
+ uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
+ assert((size_t)i < udb->ram_size);
+#ifdef UDB_CHECK
+ assert(udb_valid_dataptr(udb, ptr->data)); /* must be to whole chunk*/
+#endif
+ udb->ram_num++;
+ if(udb->ram_num == udb->ram_size && udb->ram_size<(size_t)0xefffffff) {
+ /* grow the array, if allocation succeeds */
+ udb_ptr** newram = (udb_ptr**)xalloc_zero(sizeof(udb_ptr*)*
+ udb->ram_size*2);
+ if(newram) {
+ grow_ram_hash(udb, newram);
+ }
+ }
+ ptr->prev = NULL;
+ ptr->next = udb->ram_hash[i];
+ udb->ram_hash[i] = ptr;
+ if(ptr->next)
+ ptr->next->prev = ptr;
+}
+
+void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr)
+{
+ assert(ptr->data);
+#ifdef UDB_CHECK
+ assert(udb_valid_dataptr(udb, ptr->data)); /* ptr must be inited */
+ assert(udb_ptr_is_on_bucket(udb, ptr, ptr->data));
+#endif
+ udb->ram_num--;
+ if(ptr->next)
+ ptr->next->prev = ptr->prev;
+ if(ptr->prev)
+ ptr->prev->next = ptr->next;
+ else {
+ uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
+ assert((size_t)i < udb->ram_size);
+ udb->ram_hash[i] = ptr->next;
+ }
+}
+
+/** change a set of ram ptrs to a new value */
+static void
+udb_base_ram_ptr_edit(udb_base* udb, udb_void old, udb_void newd)
+{
+ uint32_t io = chunk_hash_ptr(old) & udb->ram_mask;
+ udb_ptr* p, *np;
+ /* edit them and move them into the new position */
+ p = udb->ram_hash[io];
+ while(p) {
+ np = p->next;
+ if(p->data == old) {
+ udb_base_unlink_ptr(udb, p);
+ p->data = newd;
+ udb_base_link_ptr(udb, p);
+ }
+ p = np;
+ }
+}
+
+udb_rel_ptr* udb_base_get_userdata(udb_base* udb)
+{
+ return &udb->glob_data->user_global;
+}
+
+void udb_base_set_userdata(udb_base* udb, udb_void user)
+{
+#ifdef UDB_CHECK
+ if(user) { assert(udb_valid_dataptr(udb, user)); }
+#endif
+ udb_rel_ptr_set(udb->base, &udb->glob_data->user_global, user);
+}
+
+void udb_base_set_userflags(udb_base* udb, uint8_t v)
+{
+ udb->glob_data->userflags = v;
+}
+
+uint8_t udb_base_get_userflags(udb_base* udb)
+{
+ return udb->glob_data->userflags;
+}
+
+/** re-mmap the udb to specified size */
+static void*
+udb_base_remap(udb_base* udb, udb_alloc* alloc, uint64_t nsize)
+{
+ void* nb;
+ /* for use with valgrind, do not use mremap, but the other version */
+#ifdef MREMAP_MAYMOVE
+ nb = mremap(udb->base, udb->base_size, nsize, MREMAP_MAYMOVE);
+ if(nb == MAP_FAILED) {
+ log_msg(LOG_ERR, "mremap(%s, size %u) error %s",
+ udb->fname, (unsigned)nsize, strerror(errno));
+ return 0;
+ }
+#else /* !HAVE MREMAP */
+ /* use munmap-mmap to simulate mremap */
+ if(munmap(udb->base, udb->base_size) != 0) {
+ log_msg(LOG_ERR, "munmap(%s) error %s",
+ udb->fname, strerror(errno));
+ }
+ /* provide hint for new location */
+ /* note the size_t casts must be there for portability, on some
+ * systems the layout of memory is otherwise broken. */
+ nb = mmap(udb->base, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
+ (int)MAP_SHARED, (int)udb->fd, (off_t)0);
+ /* retry the mmap without basept in case of ENOMEM (FreeBSD8),
+ * the kernel can then try to mmap it at a different location
+ * where more memory is available */
+ if(nb == MAP_FAILED && errno == ENOMEM) {
+ nb = mmap(NULL, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
+ (int)MAP_SHARED, (int)udb->fd, (off_t)0);
+ }
+ if(nb == MAP_FAILED) {
+ log_msg(LOG_ERR, "mmap(%s, size %u) error %s",
+ udb->fname, (unsigned)nsize, strerror(errno));
+ udb->base = NULL;
+ return 0;
+ }
+#endif /* HAVE MREMAP */
+ if(nb != udb->base) {
+ /* fix up realpointers in udb and alloc */
+ /* but mremap may have been nice and not move the base */
+ udb->base = nb;
+ udb->glob_data = (udb_glob_d*)(nb+sizeof(uint64_t));
+ /* use passed alloc pointer because the udb->alloc may not
+ * be initialized yet */
+ alloc->disk = (udb_alloc_d*)((void*)udb->glob_data
+ +sizeof(*udb->glob_data));
+ }
+ udb->base_size = nsize;
+ return nb;
+}
+
+void
+udb_base_remap_process(udb_base* udb)
+{
+ /* assume that fsize is still accessible */
+ udb_base_remap(udb, udb->alloc, udb->glob_data->fsize);
+}
+
+/** grow file to specified size and re-mmap, return new base */
+static void*
+udb_base_grow_and_remap(udb_base* udb, uint64_t nsize)
+{
+ /* grow file by writing a single zero at that spot, the
+ * rest is filled in with zeroes. */
+ uint8_t z = 0;
+ ssize_t w;
+
+ assert(nsize > 0);
+ udb->glob_data->dirty_alloc = udb_dirty_fsize;
+#ifdef HAVE_PWRITE
+ if((w=pwrite(udb->fd, &z, sizeof(z), (off_t)(nsize-1))) == -1) {
+#else
+ if(lseek(udb->fd, (off_t)(nsize-1), SEEK_SET) == -1) {
+ log_msg(LOG_ERR, "fseek %s: %s", udb->fname, strerror(errno));
+ return 0;
+ }
+ if((w=write(udb->fd, &z, sizeof(z))) == -1) {
+#endif
+ log_msg(LOG_ERR, "grow(%s, size %u) error %s",
+ udb->fname, (unsigned)nsize, strerror(errno));
+ return 0;
+ } else if(w != (ssize_t)sizeof(z)) {
+ log_msg(LOG_ERR, "grow(%s, size %u) failed (disk full?)",
+ udb->fname, (unsigned)nsize);
+ return 0;
+ }
+ udb->glob_data->fsize = nsize;
+ udb->glob_data->dirty_alloc = udb_dirty_clean;
+ return udb_base_remap(udb, udb->alloc, nsize);
+}
+
+int udb_exp_size(uint64_t a)
+{
+ /* find enclosing value such that 2**x >= a */
+ int x = 0;
+ uint64_t i = a;
+ assert(a != 0);
+
+ i --;
+ /* could optimise this with uint8* access, depends on endianness */
+ /* first whole bytes */
+ while( (i&(~(uint64_t)0xff)) ) {
+ i >>= 8;
+ x += 8;
+ }
+ /* now details */
+ while(i) {
+ i >>= 1;
+ x ++;
+ }
+ assert( ((uint64_t)1<<x) >= a);
+ assert( x==0 || ((uint64_t)1<<(x-1)) < a);
+ return x;
+}
+
+int udb_exp_offset(uint64_t o)
+{
+ /* this means measuring the number of 0 bits on the right */
+ /* so, if exp zero bits then (o&(2**x-1))==0 */
+ int x = 0;
+ uint64_t i = o;
+ assert(o != 0);
+ /* first whole bytes */
+ while( (i&(uint64_t)0xff) == 0) {
+ i >>= 8;
+ x += 8;
+ }
+ /* now details */
+ while( (i&(uint64_t)0x1) == 0) {
+ i >>= 1;
+ x ++;
+ }
+ assert( o % ((uint64_t)1<<x) == 0);
+ assert( o % ((uint64_t)1<<(x+1)) != 0);
+ return x;
+}
+
+void udb_alloc_init_new(udb_alloc_d* a)
+{
+ assert(UDB_HEADER_SIZE % UDB_ALLOC_CHUNK_MINSIZE == 0);
+ memset(a, 0, sizeof(*a));
+ /* set new allocations after header, as if allocated in a sequence
+ * of minsize allocations */
+ a->nextgrow = UDB_HEADER_SIZE;
+}
+
+/** fsck the file size, false if failed and file is useless */
+static int
+fsck_fsize(udb_base* udb, udb_alloc* alloc)
+{
+ off_t realsize;
+ log_msg(LOG_WARNING, "udb-fsck %s: file size wrong", udb->fname);
+ realsize = lseek(udb->fd, (off_t)0, SEEK_END);
+ if(realsize == (off_t)-1) {
+ log_msg(LOG_ERR, "lseek(%s): %s", udb->fname, strerror(errno));
+ return 0;
+ }
+ udb->glob_data->fsize = (uint64_t)realsize;
+ if(!udb_base_remap(udb, alloc, (uint64_t)realsize))
+ return 0;
+ udb->glob_data->dirty_alloc = udb_dirty_clean;
+ log_msg(LOG_WARNING, "udb-fsck %s: file size fixed (sync)", udb->fname);
+ udb_base_sync(udb, 1);
+ return 1;
+}
+
+/** regenerate freelist add a new free chunk, return next todo */
+static udb_void
+regen_free(void* base, udb_void c, int exp, udb_alloc_d* regen)
+{
+ udb_free_chunk_d* cp = UDB_FREE_CHUNK(c);
+ uint64_t esz = (uint64_t)1<<exp;
+ if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
+ return 0;
+ }
+ cp->type = udb_chunk_type_free;
+ cp->flags = 0;
+ chunk_set_last(base, c, exp, (uint8_t)exp);
+ cp->prev = 0;
+ cp->next = regen->free[exp-UDB_ALLOC_CHUNK_MINEXP];
+ if(cp->next)
+ UDB_FREE_CHUNK(cp->next)->prev = c;
+ regen->stat_free += esz;
+ return c + esz;
+}
+
+/** regenerate xl chunk, return next todo */
+static udb_void
+regen_xl(void* base, udb_void c, udb_alloc_d* regen)
+{
+ udb_xl_chunk_d* cp = UDB_XL_CHUNK(c);
+ uint64_t xlsz = cp->size;
+ if( (xlsz&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
+ return 0;
+ }
+ if( (c&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
+ return 0;
+ }
+ /* fixup end-size and end-expmarker */
+ regen->stat_alloc += xlsz;
+ return c + xlsz;
+}
+
+/** regenerate data chunk, return next todo */
+static udb_void
+regen_data(void* base, udb_void c, int exp, udb_alloc_d* regen)
+{
+ uint64_t esz = (uint64_t)1<<exp;
+ if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
+ return 0;
+ }
+ chunk_set_last(base, c, exp, (uint8_t)exp);
+ regen->stat_alloc += esz;
+ return c + esz;
+}
+
+/** regenerate a relptr structure inside a data segment */
+static void
+regen_relptr_func(void* base, udb_rel_ptr* rp, void* arg)
+{
+ udb_void* a = (udb_void*)arg;
+ /* ignore 0 pointers */
+ if(!rp->data)
+ return;
+
+ /* edit relptrs that point to oldmoved to point to newmoved. */
+ if(rp->data == a[0])
+ rp->data = a[1];
+
+ /* regenerate relptr lists, add this item to the relptr list for
+ * the data that it points to */
+ udb_rel_ptr_link(base, rp, rp->data);
+}
+
+/** regenerate the relptrs store in this data segment */
+static void
+regen_its_ptrs(void* base, udb_base* udb, udb_chunk_d* atp,
+ void* data, uint64_t dsz, udb_void rb_old, udb_void rb_new)
+{
+ udb_void arg[2];
+ arg[0] = rb_old; arg[1] = rb_new;
+ /* walk through the structs here and put them on their respective
+ * relptr lists */
+ (*udb->walkfunc)(base, udb->walkarg, atp->type, data, dsz,
+ &regen_relptr_func, arg);
+
+}
+
+/** regenerate relptrlists in the file */
+static void
+regen_ptrlist(void* base, udb_base* udb, udb_alloc* alloc,
+ udb_void rb_old, udb_void rb_new)
+{
+ udb_void at = alloc->udb->glob_data->hsize;
+ /* clear all ptrlist start pointers in the file. */
+ while(at < alloc->disk->nextgrow) {
+ int exp = (int)UDB_CHUNK(at)->exp;
+ udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
+ if(exp == UDB_EXP_XL) {
+ UDB_XL_CHUNK(at)->ptrlist = 0;
+ at += UDB_XL_CHUNK(at)->size;
+ } else if(tp == udb_chunk_type_free) {
+ at += (uint64_t)1<<exp;
+ } else { /* data chunk */
+ UDB_CHUNK(at)->ptrlist = 0;
+ at += (uint64_t)1<<exp;
+ }
+ }
+ /* walk through all relptr structs and put on the right list. */
+ at = alloc->udb->glob_data->hsize;
+ while(at < alloc->disk->nextgrow) {
+ udb_chunk_d* atp = UDB_CHUNK(at);
+ int exp = (int)atp->exp;
+ udb_chunk_type tp = (udb_chunk_type)atp->type;
+ uint64_t sz = ((exp == UDB_EXP_XL)?UDB_XL_CHUNK(at)->size:
+ (uint64_t)1<<exp);
+ if(exp == UDB_EXP_XL) {
+ assert(at != rb_old); /* should have been freed */
+ regen_its_ptrs(base, udb, atp,
+ ((void*)atp)+sizeof(udb_xl_chunk_d),
+ sz-sizeof(udb_xl_chunk_d) - sizeof(uint64_t)*2,
+ rb_old, rb_new);
+ at += sz;
+ } else if(tp == udb_chunk_type_free) {
+ at += sz;
+ } else { /* data chunk */
+ assert(at != rb_old); /* should have been freed */
+ regen_its_ptrs(base, udb, atp,
+ ((void*)atp)+sizeof(udb_chunk_d),
+ sz-sizeof(udb_chunk_d)-1, rb_old, rb_new);
+ at += sz;
+ }
+ }
+}
+
+
+/** mark free elements from ex XL chunk space and later fixups pick that up */
+static void
+rb_mark_free_segs(void* base, udb_void s, uint64_t m)
+{
+ udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
+ /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
+ assert(s >= UDB_ALLOC_CHUNK_SIZE);
+ while(q >= s) {
+ UDB_CHUNK(q)->exp = UDB_ALLOC_CHUNKS_MAX;
+ UDB_CHUNK(q)->type = udb_chunk_type_free;
+ q -= UDB_ALLOC_CHUNK_SIZE;
+ }
+}
+
+
+/** fsck rollback or rollforward XL move results */
+static int
+fsck_rb_xl(void* base, udb_base* udb, udb_void rb_old, udb_void rb_new,
+ uint64_t rb_size, uint64_t rb_seg)
+{
+
+ if(rb_old <= rb_new)
+ return 0; /* XL move one way */
+ if( (rb_size&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
+ return 0; /* not aligned */
+ if( (rb_old&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
+ return 0; /* not aligned */
+ if( (rb_new&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
+ return 0; /* not aligned */
+ if(rb_new + rb_size <= rb_old) {
+ /* not overlapping: resume copy */
+ memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
+ /* and free up old piece(s) */
+ rb_mark_free_segs(base, rb_old, rb_size);
+ } else {
+ /* overlapping, see what segment we stopped at
+ * and continue there. */
+ move_xl_segment(base, udb, rb_old, rb_new, rb_size, rb_seg);
+ /* free up old piece(s); from the end of the moved segment,
+ * until the end of the old segment */
+ rb_mark_free_segs(base, rb_new+rb_size, (rb_old+rb_size)-
+ (rb_new+rb_size));
+ }
+ /* do not call fix_ptrs, regenptrs does the job */
+ return 1;
+}
+
+/** fsck rollback or rollforward move results */
+static int
+fsck_rb(void* base, udb_void rb_old, udb_void rb_new, uint64_t rb_size,
+ udb_void* make_free)
+{
+ if( (rb_size&(rb_size-1)) != 0)
+ return 0; /* not powerof2 */
+ if( (rb_old&(rb_size-1)) != 0)
+ return 0; /* not aligned */
+ if( (rb_new&(rb_size-1)) != 0)
+ return 0; /* not aligned */
+ /* resume copy */
+ memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
+ /* do not call fix_ptrs, regenptrs does the job */
+ /* make sure udb_old is freed */
+ *make_free = rb_old;
+ return 1;
+}
+
+/** fsck the file and salvage, false if failed and file is useless */
+static int
+fsck_file(udb_base* udb, udb_alloc* alloc, int moved)
+{
+ void* base = udb->base;
+ udb_alloc_d regen;
+ udb_void at = udb->glob_data->hsize;
+ udb_void rb_old = udb->glob_data->rb_old;
+ udb_void rb_new = udb->glob_data->rb_new;
+ udb_void rb_seg = udb->glob_data->rb_seg;
+ udb_void make_free = 0;
+ uint64_t rb_size = udb->glob_data->rb_size;
+ log_msg(LOG_WARNING, "udb-fsck %s: salvaging", udb->fname);
+ /* walk through the file, use the exp values to see what can be
+ * salvaged */
+ if(moved && rb_old && rb_new && rb_size) {
+ if(rb_old+rb_size <= alloc->disk->nextgrow
+ && rb_new+rb_size <= alloc->disk->nextgrow) {
+ /* we can use the move information to fix up the
+ * duplicate element (or partially moved element) */
+ if(rb_size > 1024*1024) {
+ /* XL chunk */
+ if(!fsck_rb_xl(base, udb, rb_old, rb_new,
+ rb_size, rb_seg))
+ return 0;
+ } else {
+ if(!fsck_rb(base, rb_old, rb_new, rb_size,
+ &make_free))
+ return 0;
+ }
+ }
+ }
+
+ /* rebuild freelists */
+ /* recalculate stats in alloc (except 'stat_data') */
+ /* possibly new end 'nextgrow' value */
+ memset(&regen, 0, sizeof(regen));
+ regen.nextgrow = alloc->disk->nextgrow;
+ while(at < regen.nextgrow) {
+ /* figure out this chunk */
+ int exp = (int)UDB_CHUNK(at)->exp;
+ udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
+ /* consistency check possible here with end-exp */
+ if(tp == udb_chunk_type_free || at == make_free) {
+ at = regen_free(base, at, exp, &regen);
+ if(!at) return 0;
+ } else if(exp == UDB_EXP_XL) {
+ /* allocated data of XL size */
+ at = regen_xl(base, at, &regen);
+ if(!at) return 0;
+ } else if(exp >= UDB_ALLOC_CHUNK_MINEXP
+ && exp <= UDB_ALLOC_CHUNKS_MAX) {
+ /* allocated data */
+ at = regen_data(base, at, exp, &regen);
+ if(!at) return 0;
+ } else {
+ /* garbage; this must be EOF then */
+ regen.nextgrow = at;
+ break;
+ }
+ }
+ *alloc->disk = regen;
+
+ /* rebuild relptr lists */
+ regen_ptrlist(base, udb, alloc, rb_old, rb_new);
+
+ log_msg(LOG_WARNING, "udb-fsck %s: salvaged successfully (sync)",
+ udb->fname);
+ udb->glob_data->rb_old = 0;
+ udb->glob_data->rb_new = 0;
+ udb->glob_data->rb_size = 0;
+ udb->glob_data->dirty_alloc = udb_dirty_clean;
+ udb_base_sync(udb, 1);
+ return 1;
+}
+
+
+udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk)
+{
+ udb_alloc* alloc = (udb_alloc*)xalloc_zero(sizeof(*alloc));
+ if(!alloc)
+ return NULL;
+ alloc->udb = udb;
+ alloc->disk = disk;
+ /* see if committed but uncompleted actions need to be done */
+ /* preserves the alloc state */
+ if(udb->glob_data->dirty_alloc != udb_dirty_clean) {
+ if(udb->glob_data->dirty_alloc == udb_dirty_fsize) {
+ if(fsck_fsize(udb, alloc))
+ return alloc;
+ } else if(udb->glob_data->dirty_alloc == udb_dirty_fl) {
+ if(fsck_file(udb, alloc, 0))
+ return alloc;
+ } else if(udb->glob_data->dirty_alloc == udb_dirty_compact) {
+ if(fsck_file(udb, alloc, 1))
+ return alloc;
+ }
+ log_msg(LOG_ERR, "error: file allocation dirty (%d)",
+ (int)udb->glob_data->dirty_alloc);
+ free(alloc);
+ return NULL;
+ }
+ return alloc;
+}
+
+void udb_alloc_delete(udb_alloc* alloc)
+{
+ if(!alloc) return;
+ free(alloc);
+}
+
+/** unlink this element from its freelist */
+static void
+udb_alloc_unlink_fl(void* base, udb_alloc* alloc, udb_void chunk, int exp)
+{
+ udb_free_chunk_d* fp = UDB_FREE_CHUNK(chunk);
+ assert(chunk);
+ /* chunk is a free chunk */
+ assert(fp->exp == (uint8_t)exp);
+ assert(fp->type == udb_chunk_type_free);
+ assert(chunk_get_last(base, chunk, exp) == (uint8_t)exp);
+ /* and thus freelist not empty */
+ assert(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]);
+ /* unlink */
+ if(fp->prev)
+ UDB_FREE_CHUNK(fp->prev)->next = fp->next;
+ else alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
+ if(fp->next)
+ UDB_FREE_CHUNK(fp->next)->prev = fp->prev;
+}
+
+/** pop first element off freelist, list may not be empty */
+static udb_void
+udb_alloc_pop_fl(void* base, udb_alloc* alloc, int exp)
+{
+ udb_void f = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
+ udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
+ assert(f);
+ assert(fp->exp == (uint8_t)exp);
+ assert(fp->type == udb_chunk_type_free);
+ assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
+ alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
+ if(fp->next) {
+ UDB_FREE_CHUNK(fp->next)->prev = 0;
+ }
+ return f;
+}
+
+/** push new element onto freelist */
+static void
+udb_alloc_push_fl(void* base, udb_alloc* alloc, udb_void f, int exp)
+{
+ udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
+ assert(f);
+ fp->exp = (uint8_t)exp;
+ fp->type = udb_chunk_type_free;
+ fp->flags = 0;
+ fp->prev = 0;
+ fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
+ if(fp->next)
+ UDB_FREE_CHUNK(fp->next)->prev = f;
+ chunk_set_last(base, f, exp, (uint8_t)exp);
+ alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
+}
+
+/** push new element onto freelist - do not initialize the elt */
+static void
+udb_alloc_push_fl_noinit(void* base, udb_alloc* alloc, udb_void f, int exp)
+{
+ udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
+ assert(f);
+ assert(fp->exp == (uint8_t)exp);
+ assert(fp->type == udb_chunk_type_free);
+ assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
+ fp->prev = 0;
+ fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
+ if(fp->next)
+ UDB_FREE_CHUNK(fp->next)->prev = f;
+ alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
+}
+
+/** add free chunks at end until specified alignment occurs */
+static void
+grow_align(void* base, udb_alloc* alloc, uint64_t esz)
+{
+ while( (alloc->disk->nextgrow & (esz-1)) != 0) {
+ /* the nextgrow is not a whole multiple of esz. */
+ /* grow a free chunk of max allowed size */
+ int fexp = udb_exp_offset(alloc->disk->nextgrow);
+ uint64_t fsz = (uint64_t)1<<fexp;
+ udb_void f = alloc->disk->nextgrow;
+ udb_void fn = alloc->disk->nextgrow+fsz;
+ assert(fn <= alloc->udb->base_size);
+ alloc->disk->stat_free += fsz;
+ udb_alloc_push_fl(base, alloc, f, fexp);
+ /* now increase nextgrow to commit that free chunk */
+ alloc->disk->nextgrow = fn;
+ }
+}
+
+/** append chunks at end of memory space to get size exp, return dataptr */
+static udb_void
+grow_chunks(void* base, udb_alloc* alloc, size_t sz, int exp)
+{
+ uint64_t esz = (uint64_t)1<<exp;
+ udb_void ret;
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+ grow_align(base, alloc, esz);
+ /* free chunks are grown, grow the one we want to use */
+ ret = alloc->disk->nextgrow;
+ /* take a new alloced chunk into use */
+ UDB_CHUNK(ret)->exp = (uint8_t)exp;
+ UDB_CHUNK(ret)->flags = 0;
+ UDB_CHUNK(ret)->ptrlist = 0;
+ UDB_CHUNK(ret)->type = udb_chunk_type_data;
+ /* store last octet */
+ chunk_set_last(base, ret, exp, (uint8_t)exp);
+ /* update stats */
+ alloc->disk->stat_alloc += esz;
+ alloc->disk->stat_data += sz;
+ /* now increase nextgrow to commit this newly allocated chunk */
+ alloc->disk->nextgrow += esz;
+ assert(alloc->disk->nextgrow <= alloc->udb->base_size);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ return ret + sizeof(udb_chunk_d); /* ptr to data */
+}
+
+/** calculate how much space is necessary to grow for this exp */
+static uint64_t
+grow_end_calc(udb_alloc* alloc, int exp)
+{
+ uint64_t sz = (uint64_t)1<<exp;
+ uint64_t ng = alloc->disk->nextgrow;
+ uint64_t res;
+ /* if nextgrow is 2**expness, no extra growth needed, only size */
+ if( (ng & (sz-1)) == 0) {
+ /* sz-1 is like 0xfff, and checks if ng is whole 2**exp */
+ return ng+sz; /* must grow exactly 2**exp */
+ }
+ /* grow until 2**expness and then we need 2**exp as well */
+ /* so, round ng down to whole sz (basically ng-ng%sz, or ng/sz*sz)
+ * and then add the sz twice (go up to whole sz, and to allocate) */
+ res = (ng & ~(sz-1)) + 2*sz;
+ return res;
+}
+
+/** see if we need to grow more than specified to enable sustained growth */
+static uint64_t
+grow_extra_check(udb_alloc* alloc, uint64_t ge)
+{
+ const uint64_t mb = 1024*1024;
+ uint64_t bsz = alloc->udb->base_size;
+ if(bsz <= mb) {
+ /* below 1 Mb, double sizes for exponential growth */
+ /* takes about 15 times to grow to 1Mb */
+ if(ge < bsz*2)
+ return bsz*2;
+ } else {
+ uint64_t gnow = ge - bsz;
+ /* above 1Mb, grow at least 1 Mb, or 12.5% of current size,
+ * in whole megabytes rounded up. */
+ uint64_t want = ((bsz / 8) & ~(mb-1)) + mb;
+ if(gnow < want)
+ return bsz + want;
+ }
+ return ge;
+}
+
+/** see if free space is enogh to warrant shrink (while file is open) */
+static int
+enough_free(udb_alloc* alloc)
+{
+ if(alloc->udb->base_size <= 2*1024*1024) {
+ /* below 1 Mb, grown by double size, (so up to 2 mb),
+ * do not shrink unless we can 1/3 in size */
+ if(((size_t)alloc->disk->nextgrow)*3 <= alloc->udb->base_size)
+ return 1;
+ } else {
+ /* grown 12.5%, shrink 25% if possible, at least one mb */
+ /* between 1mb and 4mb size, it shrinks by 1mb if possible */
+ uint64_t space = alloc->udb->base_size - alloc->disk->nextgrow;
+ if(space >= 1024*1024 && (space*4 >= alloc->udb->base_size
+ || alloc->udb->base_size < 4*1024*1024))
+ return 1;
+ }
+ return 0;
+}
+
+/** grow space for a chunk of 2**exp and return dataptr */
+static udb_void
+udb_alloc_grow_space(void* base, udb_alloc* alloc, size_t sz, int exp)
+{
+ /* commit the grow action
+ * - the file grow only changes filesize, but not the nextgrow.
+ * - taking space after nextgrow into use (as free space),
+ * is like free-ing a chunk (one at a time).
+ * - and the last chunk taken into use is like alloc.
+ */
+ /* predict how much free space is needed for this */
+ uint64_t grow_end = grow_end_calc(alloc, exp);
+ assert(alloc->udb->base_size >= alloc->disk->nextgrow);
+ if(grow_end <= alloc->udb->base_size) {
+ /* we can do this with the available space */
+ return grow_chunks(base, alloc, sz, exp);
+ }
+ /* we have to grow the file, re-mmap */
+ /* see if we need to grow a little more, to avoid endless grow
+ * efforts on adding data */
+ grow_end = grow_extra_check(alloc, grow_end);
+ if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
+ return 0; /* mmap or write failed (disk or mem full) */
+ }
+ /* we have enough space now */
+ assert(grow_end <= alloc->udb->base_size);
+ assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
+ return grow_chunks(base, alloc, sz, exp);
+}
+
+/** take XL allocation into use at end of file, return dataptr */
+static udb_void
+grow_xl(void* base, udb_alloc* alloc, uint64_t xlsz, uint64_t sz)
+{
+ udb_void ret;
+ udb_xl_chunk_d* p;
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+
+ /* align growth to whole mbs */
+ grow_align(base, alloc, UDB_ALLOC_CHUNK_SIZE);
+
+ /* grow XL segment */
+ ret = alloc->disk->nextgrow;
+ p = UDB_XL_CHUNK(ret);
+ p->exp = UDB_EXP_XL;
+ p->size = xlsz;
+ p->flags = 0;
+ p->ptrlist = 0;
+ p->type = udb_chunk_type_data;
+
+ /* also put size and marker at end for compaction */
+ *((uint64_t*)(UDB_REL(base, ret+xlsz-sizeof(uint64_t)*2))) = xlsz;
+ *((uint8_t*)(UDB_REL(base, ret+xlsz-1))) = UDB_EXP_XL;
+
+ /* stats */
+ alloc->disk->stat_data += sz;
+ alloc->disk->stat_alloc += xlsz;
+ /* now increase the nextgrow to commit this xl chunk */
+ alloc->disk->nextgrow += xlsz;
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ return ret + sizeof(udb_xl_chunk_d); /* data ptr */
+}
+
+/** make space for XL allocation */
+static udb_void
+udb_alloc_xl_space(void* base, udb_alloc* alloc, size_t sz)
+{
+ /* allocate whole mbs of space, at end of space */
+ uint64_t asz = sz + sizeof(udb_xl_chunk_d) + sizeof(uint64_t)*2;
+ uint64_t need=(asz+UDB_ALLOC_CHUNK_SIZE-1)&(~(UDB_ALLOC_CHUNK_SIZE-1));
+ uint64_t grow_end = grow_end_calc(alloc, UDB_ALLOC_CHUNKS_MAX) + need;
+ assert(need >= asz);
+ if(grow_end <= alloc->udb->base_size) {
+ /* can do this in available space */
+ return grow_xl(base, alloc, need, sz);
+ }
+ /* have to grow file and re-mmap */
+ grow_end = grow_extra_check(alloc, grow_end);
+ if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
+ return 0; /* mmap or write failed (disk or mem full) */
+ }
+ /* we have enough space now */
+ assert(grow_end <= alloc->udb->base_size);
+ assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
+ return grow_xl(base, alloc, need, sz);
+}
+
+/** divide big(2**e2) into pieces so 2**exp fits */
+static udb_void
+udb_alloc_subdivide(void* base, udb_alloc* alloc, udb_void big, int e2,
+ int exp)
+{
+ int e = e2;
+ uint64_t sz = (uint64_t)1<<e2;
+ assert(big && e2 > exp);
+ /* so the returned piece to use is the first piece,
+ * offload the later half until it fits */
+ do {
+ sz >>= 1; /* divide size of big by two */
+ e--; /* that means its exp is one smaller */
+ udb_alloc_push_fl(base, alloc, big+sz, e);
+ } while(e != exp);
+ /* exit loop when last pushed is same size as what we want */
+ return big;
+}
+
+/** returns the exponent size of the chunk needed for data sz */
+static int
+udb_alloc_exp_needed(size_t sz)
+{
+ uint64_t asz = sz + sizeof(udb_chunk_d) + 1;
+ if(asz > UDB_ALLOC_CHUNK_SIZE) {
+ return UDB_EXP_XL;
+ } else if(asz <= UDB_ALLOC_CHUNK_MINSIZE) {
+ return UDB_ALLOC_CHUNK_MINEXP;
+ }
+ return udb_exp_size(asz);
+}
+
+udb_void udb_alloc_space(udb_alloc* alloc, size_t sz)
+{
+ void* base = alloc->udb->base;
+ /* calculate actual allocation size */
+ int e2, exp = udb_alloc_exp_needed(sz);
+ if(exp == UDB_EXP_XL)
+ return udb_alloc_xl_space(base, alloc, sz);
+ /* see if there is a free chunk of that size exactly */
+ if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) {
+ /* snip from freelist, udb_chunk_d */
+ udb_void ret;
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+ ret = udb_alloc_pop_fl(base, alloc, exp);
+ /* use it - size octets already OK */
+ UDB_CHUNK(ret)->flags = 0;
+ UDB_CHUNK(ret)->ptrlist = 0;
+ UDB_CHUNK(ret)->type = udb_chunk_type_data;
+ /* update stats */
+ alloc->disk->stat_data += sz;
+ alloc->disk->stat_alloc += (1<<exp);
+ assert(alloc->disk->stat_free >= (1u<<exp));
+ alloc->disk->stat_free -= (1<<exp);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ return ret + sizeof(udb_chunk_d); /* ptr to data */
+ }
+ /* see if we can subdivide a larger chunk */
+ for(e2 = exp+1; e2 < UDB_ALLOC_CHUNKS_MAX; e2++)
+ if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
+ udb_void big, ret; /* udb_chunk_d */
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+ big = udb_alloc_pop_fl(base, alloc, e2);
+ /* push other parts onto freelists (needs inited) */
+ ret = udb_alloc_subdivide(base, alloc, big, e2, exp);
+ /* use final part (needs inited) */
+ UDB_CHUNK(ret)->exp = (uint8_t)exp;
+ /* if stop here; the new exp makes smaller free chunk*/
+ UDB_CHUNK(ret)->flags = 0;
+ UDB_CHUNK(ret)->ptrlist = 0;
+ /* set type to commit data chunk */
+ UDB_CHUNK(ret)->type = udb_chunk_type_data;
+ /* store last octet */
+ chunk_set_last(base, ret, exp, (uint8_t)exp);
+ /* update stats */
+ alloc->disk->stat_data += sz;
+ alloc->disk->stat_alloc += (1<<exp);
+ assert(alloc->disk->stat_free >= (1u<<exp));
+ alloc->disk->stat_free -= (1<<exp);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ return ret + sizeof(udb_chunk_d); /* ptr to data */
+ }
+ /* we need to grow an extra chunk */
+ return udb_alloc_grow_space(base, alloc, sz, exp);
+}
+
+/** see if there is free space to allocate a chunk into */
+static int
+have_free_for(udb_alloc* alloc, int exp)
+{
+ int e2;
+ if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP])
+ return exp;
+ for(e2 = exp+1; e2 < UDB_ALLOC_CHUNKS_MAX; e2++)
+ if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
+ return e2;
+ }
+ return 0;
+}
+
+/** fix relptr prev and next for moved relptr structures */
+static void
+chunk_fix_ptr_each(void* base, udb_rel_ptr* rp, void* arg)
+{
+ udb_void* data = (udb_void*)arg;
+ udb_void r;
+ if(!rp->data)
+ return;
+ r = UDB_SYSTOREL(base, rp);
+ if(rp->next)
+ UDB_REL_PTR(rp->next)->prev = r;
+ if(rp->prev)
+ UDB_REL_PTR(rp->prev)->next = r;
+ else {
+ /* if this is a pointer to its own chunk, fix it up;
+ * the data ptr gets set by relptr_edit later. */
+ if(rp->data == data[0])
+ UDB_CHUNK(data[1])->ptrlist = r;
+ else UDB_CHUNK(chunk_from_dataptr(rp->data))->ptrlist = r;
+ }
+}
+
+/** fix pointers from and to a moved chunk */
+static void
+chunk_fix_ptrs(void* base, udb_base* udb, udb_chunk_d* cp, udb_void data,
+ uint64_t dsz, udb_void olddata)
+{
+ udb_void d[2];
+ d[0] = olddata;
+ d[1] = data;
+ (*udb->walkfunc)(base, udb->walkarg, cp->type, UDB_REL(base, data),
+ dsz, &chunk_fix_ptr_each, d);
+ udb_rel_ptr_edit(base, cp->ptrlist, data);
+ udb_base_ram_ptr_edit(udb, olddata, data);
+}
+
+/** move an allocated chunk to use a free chunk */
+static void
+move_chunk(void* base, udb_alloc* alloc, udb_void f, int exp, uint64_t esz,
+ int e2)
+{
+ udb_void res = udb_alloc_pop_fl(base, alloc, e2);
+ udb_chunk_d* rp;
+ udb_chunk_d* fp;
+ if(exp != e2) {
+ /* it is bigger, subdivide it */
+ res = udb_alloc_subdivide(base, alloc, res, e2, exp);
+ }
+ assert(res != f);
+ /* setup rollback information */
+ alloc->udb->glob_data->rb_old = f;
+ alloc->udb->glob_data->rb_new = res;
+ alloc->udb->glob_data->rb_size = esz;
+ /* take the res, exp into use */
+ rp = UDB_CHUNK(res);
+ fp = UDB_CHUNK(f);
+ /* copy over the data */
+ memcpy(rp, fp, esz);
+ /* adjust rel ptrs */
+ chunk_fix_ptrs(base, alloc->udb, rp, res+sizeof(udb_chunk_d),
+ esz-sizeof(udb_chunk_d)-1, f+sizeof(udb_chunk_d));
+
+ /* do not freeup the fp; caller does that */
+}
+
+/** unlink several free elements to overwrite with xl chunk */
+static void
+free_xl_space(void* base, udb_alloc* alloc, udb_void s, uint64_t m)
+{
+ udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
+ /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
+ assert(s >= UDB_ALLOC_CHUNK_SIZE);
+ while(q >= s) {
+ assert(UDB_CHUNK(q)->exp == UDB_ALLOC_CHUNKS_MAX);
+ assert(UDB_CHUNK(q)->type == udb_chunk_type_free);
+ udb_alloc_unlink_fl(base, alloc, q, UDB_ALLOC_CHUNKS_MAX);
+ q -= UDB_ALLOC_CHUNK_SIZE;
+ }
+}
+
+/** move an XL chunk, and keep track of segments for rollback */
+static void
+move_xl_segment(void* base, udb_base* udb, udb_void xl, udb_void n,
+ uint64_t sz, uint64_t startseg)
+{
+ udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
+ udb_xl_chunk_d* np = UDB_XL_CHUNK(n);
+ uint64_t amount = xl - n;
+ assert(n < xl); /* move to compact */
+
+ /* setup move rollback */
+ udb->glob_data->rb_old = xl;
+ udb->glob_data->rb_new = n;
+ udb->glob_data->rb_size = sz;
+
+ /* is it overlapping? */
+ if(sz <= amount) {
+ memcpy(np, xlp, sz);
+ } else {
+ /* move and commit per 1M segment to avoid data loss */
+ uint64_t seg, maxseg = amount/UDB_ALLOC_CHUNK_SIZE;
+ for(seg = startseg; seg<maxseg; seg++) {
+ udb->glob_data->rb_seg = seg;
+ memcpy(np+seg*UDB_ALLOC_CHUNK_SIZE,
+ xlp+seg*UDB_ALLOC_CHUNK_SIZE,
+ UDB_ALLOC_CHUNK_SIZE);
+ }
+
+ }
+}
+
+/** move list of XL chunks to the front by the shift amount */
+static void
+move_xl_list(void* base, udb_alloc* alloc, udb_void xl_start, uint64_t xl_sz,
+ uint64_t amount)
+{
+ udb_void xl = xl_start;
+ assert( (xl_start&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
+ assert( (amount&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
+ assert( (xl_sz&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
+ while(xl < xl_start+xl_sz) {
+ udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
+ udb_void n = xl-amount;
+ uint64_t sz = xlp->size;
+ assert(xlp->exp == UDB_EXP_XL);
+ move_xl_segment(base, alloc->udb, xl, n, sz, 0);
+ chunk_fix_ptrs(base, alloc->udb, UDB_CHUNK(n),
+ n+sizeof(udb_xl_chunk_d),
+ sz-sizeof(udb_xl_chunk_d)-sizeof(uint64_t)*2,
+ xl+sizeof(udb_xl_chunk_d));
+ }
+ alloc->disk->stat_free -= amount;
+ alloc->disk->nextgrow -= amount;
+ alloc->udb->glob_data->rb_old = 0;
+ alloc->udb->glob_data->rb_new = 0;
+ alloc->udb->glob_data->rb_size = 0;
+}
+
+/** see if free chunk can coagulate with another chunk, return other chunk */
+static udb_void
+coagulate_possible(void* base, udb_alloc* alloc, udb_void f, int exp,
+ uint64_t esz)
+{
+ udb_void other = f^esz;
+ if(exp == UDB_ALLOC_CHUNKS_MAX)
+ return 0; /* no further merges */
+ if(other >= alloc->udb->base_size)
+ return 0; /* not allocated */
+ if(other >= alloc->disk->nextgrow)
+ return 0; /* not in use */
+ if(other < alloc->udb->glob_data->hsize)
+ return 0; /* cannot merge with header */
+ /* the header is also protected by the special exp marker */
+ /* see if the other chunk is a free chunk */
+
+ /* check closest marker to avoid large memory churn */
+ /* and also it makes XL allocations and header special markers work */
+ if(f > other) {
+ assert(f > 1); /* this is certain because of header */
+ if(*((uint8_t*)UDB_REL(base, f-1)) == (uint8_t)exp) {
+ /* can do it if the other part is a free chunk */
+ assert(UDB_FREE_CHUNK(other)->exp == (uint8_t)exp);
+ if(UDB_CHUNK(other)->type == udb_chunk_type_free)
+ return other;
+ }
+ } else {
+ if(UDB_CHUNK(other)->exp == (uint8_t)exp) {
+ /* can do it if the other part is a free chunk */
+ assert(chunk_get_last(base, other, exp)==(uint8_t)exp);
+ if(UDB_CHUNK(other)->type == udb_chunk_type_free)
+ return other;
+ }
+ }
+ return 0;
+}
+
+/** coagulate and then add new free segment, return final free segment */
+static udb_void
+coagulate_and_push(void* base, udb_alloc* alloc, udb_void last, int exp,
+ uint64_t esz)
+{
+ /* new free chunk here, attempt coagulate */
+ udb_void other;
+ while( (other=coagulate_possible(base, alloc, last, exp, esz)) ) {
+ /* unlink that other chunk */
+ udb_alloc_unlink_fl(base, alloc, other, exp);
+ /* merge up */
+ if(other < last)
+ last = other;
+ exp++;
+ esz <<= 1;
+ }
+ /* free the final segment */
+ udb_alloc_push_fl(base, alloc, last, exp);
+ return last;
+}
+
+/** attempt to compact the data and move free space to the end */
+static int
+udb_alloc_compact(void* base, udb_alloc* alloc)
+{
+ udb_void last;
+ int exp, e2;
+ uint64_t esz;
+ uint64_t at = alloc->disk->nextgrow;
+ udb_void xl_start = 0;
+ uint64_t xl_sz = 0;
+ while(at > alloc->udb->glob_data->hsize) {
+ /* grab last entry */
+ exp = (int)*((uint8_t*)UDB_REL(base, at-1));
+ if(exp == UDB_EXP_XL) {
+ /* for XL chunks:
+ * - inspect the size of the XLchunklist at end
+ * - attempt to compact in front of of XLchunklist
+ */
+ uint64_t xlsz = *((uint64_t*)UDB_REL(base,
+ at-sizeof(uint64_t)*2));
+ udb_void xl = at-xlsz;
+#ifndef NDEBUG
+ udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
+ assert(xlp->exp == UDB_EXP_XL);
+ assert(xlp->type != udb_chunk_type_free);
+#endif
+ /* got thesegment add to the xl chunk list */
+ if(xl_start != 0 && xl+xlsz != xl_start) {
+ /* nonadjoining XL part, but they are aligned,
+ * so the space in between is whole Mbs,
+ * shift the later part(s) and continue */
+ uint64_t m = xl_start - (xl+xlsz);
+ assert(xl_start > xl+xlsz);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
+ free_xl_space(base, alloc, xl+xlsz, m);
+ move_xl_list(base, alloc, xl_start, xl_sz, m);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ }
+ xl_start = xl;
+ xl_sz += xlsz;
+ at = xl;
+ continue;
+ /* end of XL if */
+ } else if(exp < UDB_ALLOC_CHUNK_MINEXP
+ || exp > UDB_ALLOC_CHUNKS_MAX)
+ break; /* special chunk or garbage */
+ esz = (uint64_t)1<<exp;
+ last = at - esz;
+ assert(UDB_CHUNK(last)->exp == (uint8_t)exp);
+ if(UDB_CHUNK(last)->type == udb_chunk_type_free) {
+ /* if xlstart continue looking to move stuff, but do
+ * not unlink this free segment */
+ if(!xl_start) {
+ /* it is a free chunk, remove it */
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+ udb_alloc_unlink_fl(base, alloc, last, exp);
+ alloc->disk->stat_free -= esz;
+ alloc->disk->nextgrow = last;
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ /* and continue at this point */
+ }
+ at = last;
+ } else if( (e2=have_free_for(alloc, exp)) ) {
+ /* last entry can be allocated in free chunks
+ * move it to its new position, adjust rel_ptrs */
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
+ move_chunk(base, alloc, last, exp, esz, e2);
+ if(xl_start) {
+ last = coagulate_and_push(base, alloc,
+ last, exp, esz);
+ } else {
+ /* shorten usage */
+ alloc->disk->stat_free -= esz;
+ alloc->disk->nextgrow = last;
+ }
+ alloc->udb->glob_data->rb_old = 0;
+ alloc->udb->glob_data->rb_new = 0;
+ alloc->udb->glob_data->rb_size = 0;
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ /* and continue in front of it */
+ at = last;
+ } else {
+ /* cannot compact this block, stop compacting */
+ break;
+ }
+ /* if that worked, repeat it */
+ }
+ /* if we passed xl chunks, see if XL-chunklist can move */
+ if(xl_start) {
+ /* calculate free space in front of the XLchunklist. */
+ /* has to be whole mbs of free space */
+ /* if so, we can move the XL chunks. Move them all back
+ * by the new free space. */
+ /* this compacts very well, but the XL chunks can be moved
+ * multiple times; worst case for every mb freed a huge sized
+ * xlchunklist gets moved. */
+ /* free space must be, since aligned and coagulated, in
+ * chunks of a whole MB */
+ udb_void at = xl_start;
+ uint64_t m = 0;
+ while(*((uint8_t*)UDB_REL(base, at-1))==UDB_ALLOC_CHUNKS_MAX){
+ udb_void chunk = at - UDB_ALLOC_CHUNK_SIZE;
+ if(UDB_CHUNK(chunk)->type != udb_chunk_type_free)
+ break;
+ assert(UDB_CHUNK(chunk)->exp==UDB_ALLOC_CHUNKS_MAX);
+ m += UDB_ALLOC_CHUNK_SIZE;
+ at = chunk;
+ }
+ if(m != 0) {
+ assert(at+m == xl_start);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
+ free_xl_space(base, alloc, at, m);
+ move_xl_list(base, alloc, xl_start, xl_sz, m);
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ }
+ }
+
+ /* if enough free, shrink the file; re-mmap */
+ if(enough_free(alloc)) {
+ uint64_t nsize = alloc->disk->nextgrow;
+ udb_base_shrink(alloc->udb, nsize);
+ if(!udb_base_remap(alloc->udb, alloc, nsize))
+ return 0;
+ }
+ return 1;
+}
+
+#ifdef UDB_CHECK
+/** check that rptrs are really zero before free */
+void udb_check_rptr_zero(void* base, udb_rel_ptr* p, void* arg)
+{
+ (void)base;
+ (void)arg;
+ assert(p->data == 0);
+}
+#endif /* UDB_CHECK */
+
+/** free XL chunk as multiples of CHUNK_SIZE free segments */
+static void
+udb_free_xl(void* base, udb_alloc* alloc, udb_void f, udb_xl_chunk_d* fp,
+ size_t sz)
+{
+ uint64_t xlsz = fp->size;
+ uint64_t c;
+ /* lightweight check for buffer overflow in xl data */
+ assert(*((uint64_t*)(UDB_REL(base, f+xlsz-sizeof(uint64_t)*2)))==xlsz);
+ assert(*((uint8_t*)(UDB_REL(base, f+xlsz-1))) == UDB_EXP_XL);
+ assert( (xlsz & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* whole mbs */
+ assert( (f & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
+#ifdef UDB_CHECK
+ /* check that relptrs in this chunk have been zeroed */
+ (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
+ UDB_REL(base, f+sizeof(udb_xl_chunk_d)), xlsz,
+ &udb_check_rptr_zero, NULL);
+#endif
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+ /* update stats */
+ alloc->disk->stat_data -= sz;
+ alloc->disk->stat_alloc -= xlsz;
+ alloc->disk->stat_free += xlsz;
+ /* walk in reverse, so the front blocks go first on the list */
+ c = f + xlsz - UDB_ALLOC_CHUNK_SIZE;
+ /* because of header and alignment we know f >= UDB_ALLOC_CHUNK_SIZE*/
+ assert(f >= UDB_ALLOC_CHUNK_SIZE);
+ while(c >= f) {
+ /* free a block of CHUNK_SIZE (1 Mb) */
+ udb_alloc_push_fl(base, alloc, c, UDB_ALLOC_CHUNKS_MAX);
+ c -= UDB_ALLOC_CHUNK_SIZE;
+ }
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+}
+
+int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz)
+{
+ void* base;
+ /* lookup chunk ptr */
+ udb_void f;
+ udb_chunk_d* fp;
+ uint64_t esz;
+ int exp;
+ udb_void other;
+ int coagulated = 0;
+ if(!r)
+ return 1; /* free(NULL) does nothing */
+
+ /* lookup size of chunk */
+ base = alloc->udb->base;
+ /* fails for XL blocks */
+ f = chunk_from_dataptr(r);
+ fp = UDB_CHUNK(f);
+ assert(fp->type != udb_chunk_type_free);
+
+ /* see if it has a ptrlist, if so: trouble, the list is not properly
+ * cleaned up. (although you can imagine a wholesale delete where
+ * it does not matter) */
+ assert(fp->ptrlist == 0);
+
+ /* set ptrlist to 0 to stop relptr from using it, robustness. */
+ fp->ptrlist = 0;
+
+ if(fp->exp == UDB_EXP_XL) {
+ udb_free_xl(base, alloc, f, (udb_xl_chunk_d*)fp, sz);
+ /* compact */
+ return udb_alloc_compact(base, alloc);
+ }
+ /* it is a regular chunk of 2**exp size */
+ exp = (int)fp->exp;
+ esz = (uint64_t)1<<exp;
+ /* light check for e.g. buffer overflow of the data */
+ assert(sz < esz);
+ assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
+#ifdef UDB_CHECK
+ /* check that relptrs in this chunk have been zeroed */
+ (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
+ UDB_REL(base, r), esz, &udb_check_rptr_zero, NULL);
+#endif
+
+ /* update the stats */
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
+ alloc->disk->stat_data -= sz;
+ alloc->disk->stat_free += esz;
+ alloc->disk->stat_alloc -= esz;
+
+ /* if it can be merged with other free chunks, do so */
+ while( (other=coagulate_possible(base, alloc, f, exp, esz)) ) {
+ coagulated = 1;
+ /* unlink that other chunk and expand it (it has same size) */
+ udb_alloc_unlink_fl(base, alloc, other, exp);
+ /* merge up */
+ if(other < f)
+ f = other;
+ exp++;
+ esz <<= 1;
+ }
+ if(coagulated) {
+ /* put big free chunk into freelist, and init it */
+ udb_alloc_push_fl(base, alloc, f, exp);
+ } else {
+ /* we do not need to touch the last-exp-byte, which may save
+ * a reference to that page of memory */
+ fp->type = udb_chunk_type_free;
+ fp->flags = 0;
+ udb_alloc_push_fl_noinit(base, alloc, f, exp);
+ }
+ alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
+ /* compact */
+ return udb_alloc_compact(base, alloc);
+}
+
+udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz)
+{
+ /* could be faster maybe, if grown? */
+ udb_void r = udb_alloc_space(alloc, sz);
+ if(!r) return r;
+ memcpy(UDB_REL(alloc->udb->base, r), d, sz);
+ return r;
+}
+
+udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz, size_t sz)
+{
+ void* base = alloc->udb->base;
+ udb_void c, n, newd;
+ udb_chunk_d* cp, *np;
+ uint64_t avail;
+ uint8_t cp_type;
+ /* emulate some posix realloc stuff */
+ if(r == 0)
+ return udb_alloc_space(alloc, sz);
+ if(sz == 0) {
+ if(!udb_alloc_free(alloc, r, osz))
+ log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
+ return 0;
+ }
+ c = chunk_from_dataptr(r);
+ cp = UDB_CHUNK(c);
+ cp_type = cp->type;
+ if(cp->exp == UDB_EXP_XL) {
+ avail = UDB_XL_CHUNK(c)->size - sizeof(udb_xl_chunk_d)
+ - sizeof(uint64_t)*2;
+ } else {
+ avail = ((uint64_t)1<<cp->exp) - sizeof(udb_chunk_d) - 1;
+ }
+ if(sz <= avail)
+ return r;
+ /* reallocate it, and copy */
+ newd = udb_alloc_space(alloc, sz);
+ if(!newd) return 0;
+ /* re-base after alloc, since re-mmap may have happened */
+ base = alloc->udb->base;
+ cp = NULL; /* may be invalid now, robustness */
+ n = chunk_from_dataptr(newd);
+ np = UDB_CHUNK(n);
+ np->type = cp_type;
+ memcpy(UDB_REL(base, newd), UDB_REL(base, r), osz);
+ /* fixup ptrs */
+ chunk_fix_ptrs(base, alloc->udb, np, newd, osz, r);
+
+ if(!udb_alloc_free(alloc, r, osz))
+ log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
+ return newd;
+}
+
+int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num)
+{
+ const uint64_t mb = 1024*1024;
+ int exp = udb_alloc_exp_needed(sz);
+ uint64_t esz;
+ uint64_t want;
+ if(exp == UDB_EXP_XL)
+ esz = (sz&(mb-1))+mb;
+ else esz = (uint64_t)1<<exp;
+ /* we need grow_end_calc to take into account alignment */
+ want = grow_end_calc(alloc, exp) + esz*(num-1);
+ assert(want >= alloc->udb->base_size);
+ if(!udb_base_grow_and_remap(alloc->udb, want)) {
+ log_msg(LOG_ERR, "failed to grow the specified amount");
+ return 0;
+ }
+ return 1;
+}
+
+void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp)
+{
+ void* base = alloc->udb->base;
+ udb_void f = chunk_from_dataptr(r);
+ udb_chunk_d* fp = UDB_CHUNK(f);
+ /* not the 'free' type, that must be set by allocation routines */
+ assert(fp->type != udb_chunk_type_free);
+ assert(tp != udb_chunk_type_free);
+ fp->type = tp;
+}
+
+int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize)
+{
+ /* pointers are not valid before the header-size or after the
+ * used-region of the mmap */
+ return ( (to+destsize) <= udb->base_size &&
+ to >= (udb->glob_data->hsize-2*sizeof(udb_rel_ptr)) &&
+ (to+destsize) <= udb->alloc->disk->nextgrow);
+}
+
+int udb_valid_dataptr(udb_base* udb, udb_void to)
+{
+ void* base = udb->base;
+ udb_void ch;
+ int exp;
+ uint64_t esz;
+ /* our data chunks are aligned and at least 8 bytes */
+ if(!udb_valid_offset(udb, to, sizeof(uint64_t)))
+ return 0;
+ /* get the chunk pointer */
+ ch = chunk_from_dataptr(to);
+ if(!udb_valid_offset(udb, ch, sizeof(udb_chunk_d)))
+ return 0;
+ /* check its size */
+ exp = UDB_CHUNK(ch)->exp;
+ if(exp == UDB_EXP_XL) {
+ /* check XL chunk */
+ uint64_t xlsz;
+ if(!udb_valid_offset(udb, ch, sizeof(udb_xl_chunk_d)))
+ return 0;
+ xlsz = UDB_XL_CHUNK(ch)->size;
+ if(!udb_valid_offset(udb, ch+xlsz-1, 1))
+ return 0;
+ if(*((uint8_t*)UDB_REL(base, ch+xlsz-1)) != UDB_EXP_XL)
+ return 0;
+ if(*((uint64_t*)UDB_REL(base, ch+xlsz-sizeof(uint64_t)*2))
+ != xlsz)
+ return 0;
+ return 1;
+ }
+ /* check if regular chunk has matching end byte */
+ if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX)
+ return 0; /* cannot be a valid chunk */
+ esz = 1<<exp;
+ if(!udb_valid_offset(udb, ch+esz-1, 1))
+ return 0;
+ if(*((uint8_t*)UDB_REL(base, ch+esz-1)) != exp)
+ return 0;
+ return 1;
+}
+
+int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to)
+{
+ void* base = udb->base;
+ udb_void p;
+ if(!udb_valid_offset(udb, rptr, sizeof(udb_rel_ptr)))
+ return 0;
+ if(!udb_valid_dataptr(udb, to))
+ return 0;
+ p = UDB_CHUNK(chunk_from_dataptr(to))->ptrlist;
+ while(p) {
+ if(!udb_valid_offset(udb, p, sizeof(udb_rel_ptr)))
+ return 0;
+ if(p == rptr)
+ return 1;
+ p = UDB_REL_PTR(p)->next;
+ }
+ return 0;
+}
+
+void udb_rel_ptr_init(udb_rel_ptr* ptr)
+{
+ memset(ptr, 0, sizeof(*ptr));
+}
+
+void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr)
+{
+ if(!ptr->data)
+ return;
+ if(ptr->prev) {
+ UDB_REL_PTR(ptr->prev)->next = ptr->next;
+ } else {
+ UDB_CHUNK(chunk_from_dataptr(ptr->data))->ptrlist = ptr->next;
+ }
+ if(ptr->next) {
+ UDB_REL_PTR(ptr->next)->prev = ptr->prev;
+ }
+}
+
+void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to)
+{
+ udb_chunk_d* chunk = UDB_CHUNK(chunk_from_dataptr(to));
+ ptr->prev = 0;
+ ptr->next = chunk->ptrlist;
+ if(ptr->next)
+ UDB_REL_PTR(ptr->next)->prev = UDB_SYSTOREL(base, ptr);
+ chunk->ptrlist = UDB_SYSTOREL(base, ptr);
+ ptr->data = to;
+}
+
+void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to)
+{
+ assert(to == 0 || to > 64);
+ udb_rel_ptr_unlink(base, ptr);
+ if(to)
+ udb_rel_ptr_link(base, ptr, to);
+ else ptr->data = to;
+}
+
+void udb_rel_ptr_edit(void* base, udb_void list, udb_void to)
+{
+ udb_void p = list;
+ while(p) {
+ UDB_REL_PTR(p)->data = to;
+ p = UDB_REL_PTR(p)->next;
+ }
+}
+
+#ifdef UDB_CHECK
+/** check that all pointers are validly chained */
+static void
+udb_check_ptrs_valid(udb_base* udb)
+{
+ size_t i;
+ udb_ptr* p, *prev;
+ for(i=0; i<udb->ram_size; i++) {
+ prev = NULL;
+ for(p=udb->ram_hash[i]; p; p=p->next) {
+ assert(p->prev == prev);
+ assert((size_t)(chunk_hash_ptr(p->data)&udb->ram_mask)
+ == i);
+ assert(p->base == &udb->base);
+ prev = p;
+ }
+ }
+}
+#endif /* UDB_CHECK */
+
+void udb_ptr_init(udb_ptr* ptr, udb_base* udb)
+{
+#ifdef UDB_CHECK
+ udb_check_ptrs_valid(udb); /* previous ptrs have been unlinked */
+#endif
+ memset(ptr, 0, sizeof(*ptr));
+ ptr->base = &udb->base;
+}
+
+void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval)
+{
+ assert(newval == 0 || newval > 64);
+ if(ptr->data)
+ udb_base_unlink_ptr(udb, ptr);
+ ptr->data = newval;
+ if(newval)
+ udb_base_link_ptr(udb, ptr);
+}
+
+int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type,
+ size_t sz)
+{
+ udb_void r;
+ r = udb_alloc_space(udb->alloc, sz);
+ if(!r) return 0;
+ udb_alloc_set_type(udb->alloc, r, type);
+ udb_ptr_init(ptr, udb);
+ udb_ptr_set(ptr, udb, r);
+ return 1;
+}
+
+void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz)
+{
+ if(ptr->data) {
+ udb_void d = ptr->data;
+ udb_ptr_set(ptr, udb, 0);
+ udb_alloc_free(udb->alloc, d, sz);
+ }
+}
+
+udb_chunk_type udb_ptr_get_type(udb_ptr* ptr)
+{
+ udb_void f;
+ if(!ptr || ptr->data == 0) return udb_chunk_type_internal; /* something bad*/
+ f = chunk_from_dataptr(ptr->data);
+ return ((udb_chunk_d*)UDB_REL(*ptr->base, f))->type;
+}
diff --git a/usr.sbin/nsd/udb.h b/usr.sbin/nsd/udb.h
new file mode 100644
index 00000000000..de7985275c2
--- /dev/null
+++ b/usr.sbin/nsd/udb.h
@@ -0,0 +1,784 @@
+/* udb.h - u(micro) data base, stores data and index information in mmap file.
+ * By W.C.A. Wijngaards
+ * Copyright 2010, NLnet Labs.
+ * BSD, see LICENSE.
+ */
+#ifndef UDB_H
+#define UDB_H
+#include <assert.h>
+
+/**
+ * The micro data base UDB.
+ *
+ * File data.udb is mmapped and used to lookup and edit.
+ * it contains a header with space-allocation-info, and a reference to the
+ * base information, an object that is the entry point for the file.
+ * Then it contains a lot of data and index objects.
+ *
+ * The space allocator is 'buddy system', 1megareas, larger get own area.
+ * So worst case is 2xdata filesize (+header). Growth semi-linear.
+ * Chunks have size and type (for recovery). Call to reserve space.
+ * Call to 'realloc-in-place', if space permits.
+ *
+ * Usually you want a record-type and its indexes (sorted) to be stored in
+ * the file. This is a table (named by string). The record is opaque
+ * data.
+ *
+ * To be able to use pointers in the mmapped file, there is conversion of
+ * relative-pointers(to file base) to system-pointers.
+ *
+ * If an item is moved its internal pointers need to be recalculated.
+ * Thus a recordtype (that has internal pointers) must provide a routine.
+ * Structures that are 'on-disk', are denoted with _d. Except rel_ptr which
+ * is also on-disk.
+ *
+ * About 64-bit trouble. The pointer-size which which the application is
+ * compiled determines the file layout, because this makes it perform well
+ * in a mmap. It could in theory be converted if you really wanted to.
+ * Nonpointer data is best stored as a fixed bitsize (uint8, 16, 32, 64).
+ */
+typedef struct udb_base udb_base;
+typedef struct udb_alloc udb_alloc;
+
+/** perform extra checks (when --enable-checking is used) */
+#ifndef NDEBUG
+#define UDB_CHECK 1
+#endif
+
+/** pointers are stored like this */
+typedef uint64_t udb_void;
+
+/** convert relptr to usable pointer */
+#define UDB_REL(base, relptr) ((base) + (relptr))
+/** from system pointer to relative pointer */
+#define UDB_SYSTOREL(base, ptr) ((udb_void)((void*)(ptr) - (base)))
+
+/** MAX 2**x exponent of alloced chunks, for 1Mbytes. The smallest
+ * chunk is 16bytes (8preamble+8data), so 0-3 is unused. */
+#define UDB_ALLOC_CHUNKS_MAX 20
+/** size of areas that are subdivided */
+#define UDB_ALLOC_CHUNK_SIZE ((uint64_t)1<<UDB_ALLOC_CHUNKS_MAX)
+/** the minimum alloc in exp, 2**x. 32bytes because of chunk_free_d size (8aligned) */
+#define UDB_ALLOC_CHUNK_MINEXP 5
+/** size of minimum alloc */
+#define UDB_ALLOC_CHUNK_MINSIZE ((uint64_t)1<<UDB_ALLOC_CHUNK_MINEXP)
+/** exp size used to mark the header (cannot be reallocated) */
+#define UDB_EXP_HEADER 0
+/** exp size used to mark XL(extralarge) allocations (in whole mbs) */
+#define UDB_EXP_XL 1
+
+typedef struct udb_ptr udb_ptr;
+/**
+ * This structure is there for when you want to have a pointer into
+ * the mmap-ed file. It is kept track of. Set it to NULL to unlink it.
+ * For pointers to the mmap-ed file from within the mmap-ed file, use the
+ * rel_pre construct below.
+ */
+struct udb_ptr {
+ /** the data segment it points to (relative file offset) */
+ uint64_t data;
+ /** pointer to the base pointer (for convenience) */
+ void** base;
+ /** prev in udb_ptr list for this data segment */
+ udb_ptr* prev;
+ /** next in udb_ptr list for this data segment */
+ udb_ptr* next;
+};
+
+typedef struct udb_rel_ptr udb_rel_ptr;
+/**
+ * A relative pointer that keeps track of the list of pointers,
+ * so that it can be reallocated.
+ */
+struct udb_rel_ptr {
+ /** the relative pointer to the data itself (subtract chunk_d size
+ * to get the chunk_d type, this is for usage speed in dereferencing
+ * to the userdata). */
+ udb_void data;
+ /** udb_rel_ptr* prev in relptr list */
+ udb_void prev;
+ /** udb_rel_ptr* next in relptr list */
+ udb_void next;
+};
+
+/**
+ * This is the routine that is called for every relptr
+ * @param base: the baseptr for REL.
+ * @param p: the relptr, a real pointer to it.
+ * @param arg: user argument.
+ */
+typedef void udb_walk_relptr_cb(void*, udb_rel_ptr*, void*);
+
+/**
+ * This routine calls the callback for every relptr in a datablock
+ * params in order:
+ * base: the baseptr for REL macro.
+ * warg: the walkfunc user argument.
+ * t: the type of the chunk.
+ * d: pointer to the data part of the chunk (real pointer).
+ * s: max size of the data part.
+ * cb: the callback to call for every element.
+ * arg: user argument to pass to the callback.
+ */
+typedef void udb_walk_relptr_func(void*, void*, uint8_t, void*, uint64_t,
+ udb_walk_relptr_cb*, void*);
+
+/** What sort of salvage should be performed by alloc */
+enum udb_dirty_alloc {
+ udb_dirty_clean = 0, /* all clean */
+ udb_dirty_fl, /* allocs, freelists are messed up */
+ udb_dirty_fsize, /* file size and fsize are messed up */
+ udb_dirty_compact /* allocs, freelists and relptrs are messed up */
+};
+
+typedef struct udb_glob_d udb_glob_d;
+/**
+ * The UDB global data for a file. This structure is mmapped.
+ * Make sure it has no structure-padding problems.
+ */
+struct udb_glob_d {
+ /** size of header in the file (offset to the first alloced chunk) */
+ uint64_t hsize;
+ /** version number of this file */
+ uint8_t version;
+ /** was the file not cleanly closed, 0 is ok */
+ uint8_t clean_close;
+ /** an allocation operation was in progress, file needs to be salvaged
+ * type enum udb_dirty_alloc */
+ uint8_t dirty_alloc;
+ /** user flags */
+ uint8_t userflags;
+ /** padding to 8-bytes alignment */
+ uint8_t pad1[4];
+ /** size to mmap */
+ uint64_t fsize;
+ /** chunk move rollback info: oldchunk (0 is nothing).
+ * volatile because these values prevent dataloss, they need to be
+ * written immediately. */
+ volatile udb_void rb_old;
+ /** chunk move rollback info: newchunk (0 is nothing) */
+ volatile udb_void rb_new;
+ /** size of move rollback chunks */
+ volatile uint64_t rb_size;
+ /** segment of move rollback, for an XL chunk that overlaps. */
+ volatile uint64_t rb_seg;
+ /** linked list for content-listing, 0 if empty */
+ udb_rel_ptr content_list;
+ /** user global data pointer */
+ udb_rel_ptr user_global;
+};
+
+/**
+ * The UDB database file. Contains all the data
+ */
+struct udb_base {
+ /** name of the file, alloced */
+ char* fname;
+
+ /** mmap base pointer (or NULL) */
+ void* base;
+ /** size of mmap */
+ size_t base_size;
+ /** fd of mmap (if -1, closed). */
+ int fd;
+
+ /** space allocator that is used for this base */
+ udb_alloc* alloc;
+ /** real pointer to the global data in the file */
+ udb_glob_d* glob_data;
+
+ /** store all linked udb_ptrs in this table, by hash(offset).
+ * then a linked list of ptrs (all that match the hash).
+ * this avoids buckets, and thus memory allocation. */
+ udb_ptr** ram_hash;
+ /** size of the current udb_ptr hashtable array */
+ size_t ram_size;
+ /** mask for the curren udb_ptr hashtable lookups */
+ int ram_mask;
+ /** number of ptrs in ram, used to decide when to grow */
+ size_t ram_num;
+ /** for relocation, this walks through all relptrs in chunk */
+ udb_walk_relptr_func* walkfunc;
+ /** user data for walkfunc */
+ void* walkarg;
+};
+
+typedef enum udb_chunk_type udb_chunk_type;
+/** chunk type enum, setting these types help recovery and debug */
+enum udb_chunk_type {
+ udb_chunk_type_free = 0,
+ udb_chunk_type_data, /* alloced data */
+ udb_chunk_type_index,
+ udb_chunk_type_radtree,
+ udb_chunk_type_radnode,
+ udb_chunk_type_radarray,
+ udb_chunk_type_zone,
+ udb_chunk_type_domain,
+ udb_chunk_type_rrset,
+ udb_chunk_type_rr,
+ udb_chunk_type_task,
+ udb_chunk_type_internal
+};
+
+typedef struct udb_chunk_d udb_chunk_d;
+/**
+ * UDB chunk info (prepended for every allocated chunk).
+ * The chunks are in doublelinkedlists per size.
+ * At the end of the chunk another exp uint8 is stored (to walk backwards).
+ * 17 bytes overhead, datasize for 32byte chunk is 15.
+ */
+struct udb_chunk_d {
+ /** the size of this chunk (i.e. 2**x) */
+ uint8_t exp;
+ /** type for this chunk (enum chunktype; free, data or index) */
+ uint8_t type;
+ /** flags for this chunk */
+ uint8_t flags;
+ /** padding onto 8-alignment */
+ uint8_t pad[5];
+ /** udb_rel_ptr* first in list of rel-ptrs that point back here
+ * In the free chunk this is the previous pointer. */
+ udb_void ptrlist;
+ /* user data space starts here, 64-bit aligned */
+ uint8_t data[0];
+ /* last octet: exp of chunk */
+};
+
+typedef struct udb_free_chunk_d udb_free_chunk_d;
+/**
+ * A free chunk. Same start as the udb_chunk_d. minsize is 32 bytes.
+ */
+struct udb_free_chunk_d {
+ /** the size of this chunk (i.e. 2**x) */
+ uint8_t exp;
+ /** type for this chunk (enum chunktype; free, data or index) */
+ uint8_t type;
+ /** flags for this chunk */
+ uint8_t flags;
+ /** padding onto 8-alignment */
+ uint8_t pad[5];
+ /** udb_chunk_d* prev of free list for this size */
+ udb_void prev;
+ /** udb_chunk_d* next of free list for this size */
+ udb_void next;
+ /* empty stuff */
+ /* last octet: exp of chunk */
+};
+
+typedef struct udb_xl_chunk_d udb_xl_chunk_d;
+/**
+ * an Extra Large (XL) chunk. Same start as the udb_chunk_d. Allocated in whole
+ * MAX_CHUNK_SIZE parts, whole megabytes. overhead is 5x8=40 bytes.
+ */
+struct udb_xl_chunk_d {
+ /** the size of this chunk (i.e. 2**x): special XL value */
+ uint8_t exp;
+ /** type for this chunk (enum chunktype; free, data or index) */
+ uint8_t type;
+ /** flags for this chunk */
+ uint8_t flags;
+ /** padding onto 8-alignment */
+ uint8_t pad[5];
+ /** udb_rel_ptr* first in list of rel-ptrs that point back here
+ * In the free chunk this is the previous pointer. */
+ udb_void ptrlist;
+ /** size of this chunk in bytes */
+ uint64_t size;
+ /** data of the XL chunk */
+ uint8_t data[0];
+ /* uint64_t endsize: before last octet the size again. */
+ /* uint8_t pad[7]: padding to make last octet last. */
+ /* last octet: exp of chunk: special XL value */
+};
+
+typedef struct udb_alloc_d udb_alloc_d;
+/**
+ * UDB alloc info on disk.
+ */
+struct udb_alloc_d {
+ /** stats: number of data bytes allocated, sum of sizes passed to alloc */
+ uint64_t stat_data;
+ /** stats: number of bytes in free chunks, sum of their 2**x size */
+ uint64_t stat_free;
+ /** stats: number of bytes in alloced chunks, sum of their 2**x size */
+ uint64_t stat_alloc;
+ /** offset to create next chunk at. can be before file-end, or be
+ * fsize, volatile because it is used as a 'commit', and thus we want
+ * this to be written to memory (and thus disk) immediately. */
+ volatile uint64_t nextgrow;
+ /** fixed size array the points to the 2**x size chunks in the file,
+ * This is the start of the doublelinked list, ptr to udb_free_chunk_d.
+ * array starts at UDB_ALLOC_CHUNK_MINEXP entry as [0]. */
+ udb_void free[UDB_ALLOC_CHUNKS_MAX-UDB_ALLOC_CHUNK_MINEXP+1];
+};
+
+/**
+ * The UDB space allocator. Assigns space in the file.
+ */
+struct udb_alloc {
+ /** the base this is part of */
+ udb_base* udb;
+ /** real pointer to space allocation info on disk; fixedsize struct */
+ udb_alloc_d* disk;
+};
+
+/**
+ * file header length, the file start with
+ * 64bit: magic number to identify file (and prevent stupid mistakes)
+ * globdata: global data. Fixed size segment. (starts with size uint64)
+ * allocdata: alloc global data. Fixed size segment.
+ * size and 0 byte: end marker for reverse search.
+ */
+#define UDB_HEADER_SIZE (sizeof(uint64_t)+sizeof(udb_glob_d)+ \
+ sizeof(udb_alloc_d)+sizeof(uint64_t)*2)
+/** magic string that starts an UDB file, uint64_t, note first byte=0, to mark
+ * header start as a chunk. */
+#define UDB_MAGIC (((uint64_t)'u'<<48)|((uint64_t)'d'<<40)|((uint64_t)'b' \
+ <<32)|((uint64_t)'v'<<24)|((uint64_t)'0'<<16)|((uint64_t)'a'<<8))
+
+/* UDB BASE */
+/**
+ * Create udb base structure and attempt to read the file.
+ * @param fname: file name.
+ * @param walkfunc: function to walk through relptrs in chunk.
+ * @param arg: user argument to pass to walkfunc
+ * @return base structure or NULL on failure.
+ */
+udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc,
+ void* arg);
+
+/**
+ * Create udb base structure and create a new file.
+ * @param fname: file name.
+ * @param walkfunc: function to walk through relptrs in chunk.
+ * @param arg: user argument to pass to walkfunc
+ * @return base structure or NULL on failure.
+ */
+udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc,
+ void* arg);
+
+/**
+ * Create udb from (O_RDWR) fd.
+ * @param fname: file name.
+ * @param fd: file descriptor.
+ * @param walkfunc: function to walk through relptrs in chunk.
+ * @param arg: user argument to pass to walkfunc
+ * @return base structure or NULL on failure.
+ */
+udb_base* udb_base_create_fd(const char* fname, int fd,
+ udb_walk_relptr_func walkfunc, void* arg);
+
+/**
+ * Properly close the UDB base file. Separate from delete so the
+ * most important bits (write to disk, sockets) can be done first.
+ * @param udb: the udb.
+ */
+void udb_base_close(udb_base* udb);
+
+/**
+ * Free the data structure (and close if not already) the udb.
+ * @param udb: the udb.
+ */
+void udb_base_free(udb_base* udb);
+
+/**
+ * Free the udb, but keep mmap mapped for others.
+ * @param udb: the udb.
+ */
+void udb_base_free_keep_mmap(udb_base* udb);
+
+/**
+ * Sync the mmap.
+ * @param udb: the udb.
+ * @param wait: if true, the call blocks until synced.
+ */
+void udb_base_sync(udb_base* udb, int wait);
+
+/**
+ * The mmap size is updated to reflect changes by another process.
+ * @param udb: the udb.
+ */
+void udb_base_remap_process(udb_base* udb);
+
+/**
+ * get the user data (relative) pointer.
+ * @param udb: the udb.
+ * @return the userdata relative pointer, 0 means nothing.
+ */
+udb_rel_ptr* udb_base_get_userdata(udb_base* udb);
+
+/**
+ * Set the user data (relative) pointer.
+ * @param udb: the udb.
+ * @param user: user data. offset-pointer (or 0).
+ */
+void udb_base_set_userdata(udb_base* udb, udb_void user);
+
+/**
+ * Set the user flags (to any value, uint8).
+ * @param udb: the udb.
+ * @param v: new value.
+ */
+void udb_base_set_userflags(udb_base* udb, uint8_t v);
+
+/**
+ * Get the user flags.
+ * @param udb: the udb.
+ * @param v: new value.
+ */
+uint8_t udb_base_get_userflags(udb_base* udb);
+
+/**
+ * Not for users of udb_base, but for udb_ptr.
+ * Link in a new ptr that references a data segment.
+ * @param udb: the udb.
+ * @param ptr: to link in.
+ */
+void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr);
+
+/**
+ * Not for users of udb_base, but for udb_ptr.
+ * Unlink a ptr that references a data segment.
+ * @param udb: the udb.
+ * @param ptr: to unlink.
+ */
+void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr);
+
+/* UDB ALLOC */
+/**
+ * Utility for alloc, find 2**x size that is bigger than the given size.
+ * Does not work for amount==0.
+ * @param amount: amount of memory.
+ * @return x; the exponent where 2**x >= amount.
+ */
+int udb_exp_size(uint64_t amount);
+
+/**
+ * Utility for alloc, what is the size that the current offset supports
+ * as a maximum 2**x chunk.
+ * Does not work for offset = 0 (result is infinite).
+ * @param offset: the offset into the memory region.
+ * @return maximum exponent where 2**x is fits the offset, thus
+ * offset % (2**x) == 0 and x cannot be larger.
+ */
+int udb_exp_offset(uint64_t offset);
+
+/**
+ * Convert pointer to the data part to a pointer to the base of the chunk.
+ * @param data: data part.
+ * @return pointer to the base of the chunk.
+ */
+udb_void chunk_from_dataptr_ext(udb_void data);
+
+/**
+ * Create empty UDB allocate structure to write to disk to initialize file.
+ * @param a: allocation structure to initialize. system pointer.
+ */
+void udb_alloc_init_new(udb_alloc_d* a);
+
+/**
+ * Create new udb allocator, with specific data on disk
+ * @param udb: the udb.
+ * @param disk: disk data.
+ * @return udb allocator or NULL on (malloc) failure.
+ */
+udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk);
+
+/**
+ * Free the udb allocator from memory.
+ * @param alloc: the udb space allocator.
+ */
+void udb_alloc_delete(udb_alloc* alloc);
+
+/**
+ * Allocate space on the disk.
+ * This may involve closing and reopening the mmap.
+ * @param alloc: the udb space allocator.
+ * @param sz: size you want to use.
+ * @return relative pointer (or 0 on alloc failure).
+ */
+udb_void udb_alloc_space(udb_alloc* alloc, size_t sz);
+
+/**
+ * Allocate space on disk, give already the data you want there.
+ * This may involve closing and reopening the mmap.
+ * @param alloc: the udb space allocator.
+ * @param d: data you want there (system pointer).
+ * @param sz: size you want to use.
+ * @return relative pointer (or 0 on alloc failure).
+ */
+udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz);
+
+/**
+ * free allocated space. It may shrink the file.
+ * This may involve closing and reopening the mmap.
+ * @param alloc: the udb space allocator.
+ * @param r: relative pointer to data you want to free.
+ * @param sz: the size of the data you stop using.
+ * @return false if the free failed, it failed the close and mmap.
+ */
+int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz);
+
+/**
+ * realloc an existing allocated space. It may grow the file.
+ * This may involve closing and reopening the mmap.
+ * It could also use the existing space where it is now.
+ * @param alloc: the udb space allocator.
+ * @param r: relative pointer to data you want to realloc.
+ * if 0 then this is alloc_space(), and osz is ignored.
+ * @param osz: the old size of the data.
+ * @param sz: the size of the data you want to get.
+ * if this is 0 then a free() is done, but please do it directly,
+ * as you then get a returnvalue (file errors).
+ * @return relative pointer (0 on alloc failure, same if not moved).
+ */
+udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz,
+ size_t sz);
+
+/**
+ * Prepare for a lot of new entries. Grow space for that.
+ * This can involve closing and reopening the mmap.
+ * This space (if large) is going to be released on next free() or close().
+ * @param alloc: the udb space allocator.
+ * @param sz: size of the entries.
+ * @param num: number of entries.
+ * @return false on failure to grow or re-mmap.
+ */
+int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num);
+
+/**
+ * Set the alloc type for a newly alloced piece of data
+ * @param alloc: the udb space allocator.
+ * @param r: relativeptr to the data.
+ * @param tp: the type of that block.
+ */
+void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp);
+
+/**
+ * See if a pointer could be valid (it points within valid space),
+ * for the given type side. For debug checks.
+ * @param udb: the udb
+ * @param to: the ptr (offset).
+ * @param destsize: the size_of of the destination of the pointer.
+ * @return true if it points to a valid region.
+ */
+int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize);
+
+/**
+ * See if a pointer is valid (it points to a chunk). For debug checks.
+ * @param udb: the udb.
+ * @param to: the ptr (offset).
+ * @return true if it points to the start of a chunks data region.
+ */
+int udb_valid_dataptr(udb_base* udb, udb_void to);
+
+/**
+ * See if a pointer is on the relptrlist for dataptr. For debug checks.
+ * @param udb: the udb.
+ * @param rptr: the rel_ptr (offset).
+ * @param to: dataptr of the chunk on which ptrlist the rptr is searched.
+ * @return true if rptr is valid and on the ptrlist.
+ */
+int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to);
+
+/*** UDB_REL_PTR ***/
+/**
+ * Init a new UDB rel ptr at NULL.
+ * @param ptr: sysptr, becomes inited.
+ */
+void udb_rel_ptr_init(udb_rel_ptr* ptr);
+
+/**
+ * Unlink a UDB rel ptr.
+ * @param base: the udb base
+ * @param ptr: sysptr, unlinked
+ */
+void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr);
+
+/**
+ * Link a UDB rel ptr to a new chunk
+ * @param base: the udb base
+ * @param ptr: sysptr, linked to new value.
+ * @param to: the data to point to (relative ptr).
+ */
+void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to);
+
+/**
+ * Change rel ptr to a new value (point to another record)
+ * @param base: the udb base
+ * @param ptr: sysptr, points to new value.
+ * @param to: the data to point to (relative ptr).
+ */
+void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to);
+
+/**
+ * A chunk has moved and now edit all the relptrs in list to fix them up
+ * @param base: the udb base
+ * @param list: start of the ptr list
+ * @param to: where the chunk has moved to relptr to its userdata.
+ */
+void udb_rel_ptr_edit(void* base, udb_void list, udb_void to);
+
+/**
+ * Get system pointer. Assumes there is a variable named 'base'
+ * that points to the udb base.
+ * @param ptr: the relative pointer (a sysptr to it).
+ * @return void* to the data.
+ */
+#define UDB_SYSPTR(ptr) UDB_REL(base, (ptr)->data)
+
+/** get sys ptr for char* string */
+#define UDB_CHAR(ptr) ((char*)UDB_REL(base, ptr))
+/** get sys ptr for udb_rel_ptr */
+#define UDB_REL_PTR(ptr) ((udb_rel_ptr*)UDB_REL(base, ptr))
+/** get sys ptr for udb_glob_d */
+#define UDB_GLOB(ptr) ((udb_glob_d*)UDB_REL(base, ptr))
+/** get sys ptr for udb_chunk_d */
+#define UDB_CHUNK(ptr) ((udb_chunk_d*)UDB_REL(base, ptr))
+/** get sys ptr for udb_free_chunk_d */
+#define UDB_FREE_CHUNK(ptr) ((udb_free_chunk_d*)UDB_REL(base, ptr))
+/** get sys ptr for udb_xl_chunk_d */
+#define UDB_XL_CHUNK(ptr) ((udb_xl_chunk_d*)UDB_REL(base, ptr))
+
+/* udb_ptr */
+/**
+ * Initialize an udb ptr. Set to NULL. (and thus not linked can be deleted).
+ * You MUST set it to 0 before you stop using the ptr.
+ * @param ptr: the ptr to initialise (caller has allocated it).
+ * @param udb: the udb base to link it to.
+ */
+void udb_ptr_init(udb_ptr* ptr, udb_base* udb);
+
+/**
+ * Set udp ptr to a new value. If set to NULL you can delete it.
+ * @param ptr: the ptr.
+ * @param udb: the udb base to link up with that data segment's administration.
+ * @param newval: new value to point to (udb_void relative file offset to data).
+ */
+void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval);
+
+/** dereference udb_ptr */
+#define UDB_PTR(ptr) (UDB_REL(*((ptr)->base), (ptr)->data))
+
+/**
+ * Ease of use udb ptr, allocate space and return ptr to it
+ * You MUST udb_ptr_set it to 0 before you stop using the ptr.
+ * @param base: udb base to use.
+ * @param ptr: ptr is overwritten, can be uninitialised.
+ * @param type: type of the allocation.
+ * You need a special type if the block contains udb_rel_ptr's.
+ * You can use udb_type_data for plain data.
+ * @param sz: amount to allocate.
+ * @return 0 on alloc failure.
+ */
+int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type,
+ size_t sz);
+
+/**
+ * Ease of use udb ptr, free space and set ptr to NULL (to it can be deleted).
+ * The space is freed on disk.
+ * @param ptr: the ptr.
+ * @param udb: udb base.
+ * @param sz: the size of the data you stop using.
+ */
+void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz);
+
+/**
+ * Get pointer to the data of the ptr. or use a macro to cast UDB_PTR to
+ * the type of your structure(.._d)
+ */
+static inline uint8_t* udb_ptr_data(udb_ptr* ptr) {
+ return (uint8_t*)UDB_PTR(ptr);
+}
+
+/**
+ * See if udb ptr is null
+ */
+static inline int udb_ptr_is_null(udb_ptr* ptr) {
+ return (ptr->data == 0);
+}
+
+/**
+ * Get the type of a udb_ptr chunk.
+ * @param ptr: udb pointer
+ * @return type of chunk */
+udb_chunk_type udb_ptr_get_type(udb_ptr* ptr);
+
+/** Ease of use, create new pointer to destination relptr
+ * You MUST udb_ptr_set it to 0 before you stop using the ptr. */
+static inline void udb_ptr_new(udb_ptr* ptr, udb_base* udb, udb_rel_ptr* d) {
+ udb_ptr_init(ptr, udb);
+ udb_ptr_set(ptr, udb, d->data);
+}
+
+/** Ease of use. Stop using this ptr */
+static inline void udb_ptr_unlink(udb_ptr* ptr, udb_base* udb) {
+ if(ptr->data)
+ udb_base_unlink_ptr(udb, ptr);
+}
+
+/* Ease of use. Assign rptr from rptr */
+static inline void udb_rptr_set_rptr(udb_rel_ptr* dest, udb_base* udb,
+ udb_rel_ptr* p) {
+#ifdef UDB_CHECK
+ if(dest->data) { assert(udb_valid_rptr(udb,
+ UDB_SYSTOREL(udb->base, dest), dest->data)); }
+ if(p->data) { assert(udb_valid_rptr(udb,
+ UDB_SYSTOREL(udb->base, p), p->data)); }
+#endif
+ udb_rel_ptr_set(udb->base, dest, p->data);
+}
+
+/* Ease of use. Assign rptr from ptr */
+static inline void udb_rptr_set_ptr(udb_rel_ptr* dest, udb_base* udb,
+ udb_ptr* p) {
+#ifdef UDB_CHECK
+ if(dest->data) { assert(udb_valid_rptr(udb,
+ UDB_SYSTOREL(udb->base, dest), dest->data)); }
+ if(p->data) { assert(udb_valid_dataptr(udb, p->data)); }
+#endif
+ udb_rel_ptr_set(udb->base, dest, p->data);
+}
+
+/* Ease of use. Assign ptr from rptr */
+static inline void udb_ptr_set_rptr(udb_ptr* dest, udb_base* udb,
+ udb_rel_ptr* p) {
+#ifdef UDB_CHECK
+ if(p->data) { assert(udb_valid_rptr(udb,
+ UDB_SYSTOREL(udb->base, p), p->data)); }
+#endif
+ udb_ptr_set(dest, udb, p->data);
+}
+
+/* Ease of use. Assign ptr from ptr */
+static inline void udb_ptr_set_ptr(udb_ptr* dest, udb_base* udb, udb_ptr* p) {
+ udb_ptr_set(dest, udb, p->data);
+}
+
+/* Ease of use, zero rptr. You use this to zero an existing pointer.
+ * A new rptr should be rel_ptr_init-ed before it is taken into use. */
+static inline void udb_rptr_zero(udb_rel_ptr* dest, udb_base* udb) {
+#ifdef UDB_CHECK
+ if(dest->data) { assert(udb_valid_rptr(udb,
+ UDB_SYSTOREL(udb->base, dest), dest->data)); }
+#endif
+ udb_rel_ptr_set(udb->base, dest, 0);
+}
+
+/* Ease of use, zero ptr */
+static inline void udb_ptr_zero(udb_ptr* dest, udb_base* udb) {
+ udb_ptr_set(dest, udb, 0);
+}
+
+/** ease of use, delete memory pointed at by relptr */
+static inline void udb_rel_ptr_free_space(udb_rel_ptr* ptr, udb_base* udb,
+ size_t sz) {
+ udb_void d = ptr->data;
+#ifdef UDB_CHECK
+ if(d) { assert(udb_valid_rptr(udb, UDB_SYSTOREL(udb->base, ptr), d)); }
+#endif
+ udb_rel_ptr_set(udb->base, ptr, 0);
+ udb_alloc_free(udb->alloc, d, sz);
+}
+
+#endif /* UDB_H */
diff --git a/usr.sbin/nsd/udbradtree.c b/usr.sbin/nsd/udbradtree.c
new file mode 100644
index 00000000000..d9be6b9c255
--- /dev/null
+++ b/usr.sbin/nsd/udbradtree.c
@@ -0,0 +1,1463 @@
+/*
+ * udbradtree -- radix tree for binary strings for in udb file.
+ *
+ * Copyright (c) 2011, NLnet Labs. See LICENSE for license.
+ */
+#include "config.h"
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include "udbradtree.h"
+#include "radtree.h"
+#define RADARRAY(ptr) ((struct udb_radarray_d*)UDB_PTR(ptr))
+
+/** see if radarray can be reduced (by a factor of two) */
+static int udb_radarray_reduce_if_needed(udb_base* udb, udb_ptr* n);
+
+int udb_radix_tree_create(udb_base* udb, udb_ptr* ptr)
+{
+ if(!udb_ptr_alloc_space(ptr, udb, udb_chunk_type_radtree,
+ sizeof(struct udb_radtree_d)))
+ return 0;
+ udb_rel_ptr_init(&RADTREE(ptr)->root);
+ RADTREE(ptr)->count = 0;
+ return 1;
+}
+
+/** size of radarray */
+static size_t size_of_radarray(struct udb_radarray_d* a)
+{
+ return sizeof(struct udb_radarray_d)+((size_t)a->capacity)*(
+ sizeof(struct udb_radsel_d)+(size_t)a->str_cap);
+}
+
+/** size in bytes of data in the array lookup structure */
+static size_t size_of_lookup(udb_ptr* node)
+{
+ assert(udb_ptr_get_type(node) == udb_chunk_type_radnode);
+ return size_of_radarray((struct udb_radarray_d*)UDB_REL(*node->base,
+ RADNODE(node)->lookup.data));
+}
+
+/** external variant, size in bytes of data in the array lookup structure */
+size_t size_of_lookup_ext(udb_ptr* lookup)
+{
+ return size_of_lookup(lookup);
+}
+
+/** size needed for a lookup array like this */
+static size_t size_of_lookup_needed(uint16_t capacity, udb_radstrlen_t str_cap)
+{
+ return sizeof(struct udb_radarray_d)+ ((size_t)capacity)*(
+ sizeof(struct udb_radsel_d)+(size_t)str_cap);
+}
+
+/** get the lookup array for a node */
+static struct udb_radarray_d* lookup(udb_ptr* n)
+{
+ assert(udb_ptr_get_type(n) == udb_chunk_type_radnode);
+ return (struct udb_radarray_d*)UDB_REL(*n->base,
+ RADNODE(n)->lookup.data);
+}
+
+/** get a length in the lookup array */
+static udb_radstrlen_t lookup_len(udb_ptr* n, unsigned i)
+{
+ return lookup(n)->array[i].len;
+}
+
+/** get a string in the lookup array */
+static uint8_t* lookup_string(udb_ptr* n, unsigned i)
+{
+ return ((uint8_t*)&(lookup(n)->array[lookup(n)->capacity]))+
+ i*lookup(n)->str_cap;
+}
+
+/** get a node in the lookup array */
+static struct udb_radnode_d* lookup_node(udb_ptr* n, unsigned i)
+{
+ return (struct udb_radnode_d*)UDB_REL(*n->base,
+ lookup(n)->array[i].node.data);
+}
+
+/** zero the relptrs in radarray */
+static void udb_radarray_zero_ptrs(udb_base* udb, udb_ptr* n)
+{
+ unsigned i;
+ for(i=0; i<lookup(n)->len; i++) {
+ udb_rptr_zero(&lookup(n)->array[i].node, udb);
+ }
+}
+
+/** delete a radnode */
+static void udb_radnode_delete(udb_base* udb, udb_ptr* n)
+{
+ if(udb_ptr_is_null(n))
+ return;
+ if(RADNODE(n)->lookup.data) {
+ udb_radarray_zero_ptrs(udb, n);
+ udb_rel_ptr_free_space(&RADNODE(n)->lookup, udb,
+ size_of_lookup(n));
+ }
+ udb_rptr_zero(&RADNODE(n)->lookup, udb);
+ udb_rptr_zero(&RADNODE(n)->parent, udb);
+ udb_rptr_zero(&RADNODE(n)->elem, udb);
+ udb_ptr_free_space(n, udb, sizeof(struct udb_radnode_d));
+}
+
+/** delete radnodes in postorder recursion, n is ptr to node */
+static void udb_radnode_del_postorder(udb_base* udb, udb_ptr* n)
+{
+ unsigned i;
+ udb_ptr sub;
+ if(udb_ptr_is_null(n))
+ return;
+ /* clear subnodes */
+ udb_ptr_init(&sub, udb);
+ for(i=0; i<lookup(n)->len; i++) {
+ udb_ptr_set_rptr(&sub, udb, &lookup(n)->array[i].node);
+ udb_rptr_zero(&lookup(n)->array[i].node, udb);
+ udb_radnode_del_postorder(udb, &sub);
+ }
+ udb_ptr_unlink(&sub, udb);
+ /* clear lookup */
+ udb_rel_ptr_free_space(&RADNODE(n)->lookup, udb, size_of_lookup(n));
+ udb_rptr_zero(&RADNODE(n)->parent, udb);
+ udb_rptr_zero(&RADNODE(n)->elem, udb);
+ udb_ptr_free_space(n, udb, sizeof(struct udb_radnode_d));
+}
+
+void udb_radix_tree_clear(udb_base* udb, udb_ptr* rt)
+{
+ udb_ptr root;
+ udb_ptr_new(&root, udb, &RADTREE(rt)->root);
+ udb_rptr_zero(&RADTREE(rt)->root, udb);
+ /* free the root node (and its descendants, if any) */
+ udb_radnode_del_postorder(udb, &root);
+ udb_ptr_unlink(&root, udb);
+
+ RADTREE(rt)->count = 0;
+}
+
+void udb_radix_tree_delete(udb_base* udb, udb_ptr* rt)
+{
+ if(rt->data == 0) return;
+ assert(udb_ptr_get_type(rt) == udb_chunk_type_radtree);
+ udb_radix_tree_clear(udb, rt);
+ udb_ptr_free_space(rt, udb, sizeof(struct udb_radtree_d));
+}
+
+/**
+ * Find a prefix of the key, in whole-nodes.
+ * Finds the longest prefix that corresponds to a whole radnode entry.
+ * There may be a slightly longer prefix in one of the array elements.
+ * @param result: the longest prefix, the entry itself if *respos==len,
+ * otherwise an array entry, residx. Output.
+ * @param respos: pos in string where next unmatched byte is, if == len an
+ * exact match has been found. If == 0 then a "" match was found.
+ * @return false if no prefix found, not even the root "" prefix.
+ */
+static int udb_radix_find_prefix_node(udb_base* udb, udb_ptr* rt, uint8_t* k,
+ udb_radstrlen_t len, udb_ptr* result, udb_radstrlen_t* respos)
+{
+ udb_radstrlen_t pos = 0;
+ uint8_t byte;
+ udb_ptr n;
+ udb_ptr_new(&n, udb, &RADTREE(rt)->root);
+
+ *respos = 0;
+ udb_ptr_set_ptr(result, udb, &n);
+ if(udb_ptr_is_null(&n)) {
+ udb_ptr_unlink(&n, udb);
+ return 0;
+ }
+ while(!udb_ptr_is_null(&n)) {
+ if(pos == len) {
+ break;
+ }
+ byte = k[pos];
+ if(byte < RADNODE(&n)->offset) {
+ break;
+ }
+ byte -= RADNODE(&n)->offset;
+ if(byte >= lookup(&n)->len) {
+ break;
+ }
+ pos++;
+ if(lookup(&n)->array[byte].len != 0) {
+ /* must match additional string */
+ if(pos+lookup(&n)->array[byte].len > len) {
+ break;
+ }
+ if(memcmp(&k[pos], lookup_string(&n, byte),
+ lookup(&n)->array[byte].len) != 0) {
+ break;
+ }
+ pos += lookup(&n)->array[byte].len;
+ }
+ udb_ptr_set_rptr(&n, udb, &lookup(&n)->array[byte].node);
+ if(udb_ptr_is_null(&n)) {
+ break;
+ }
+ *respos = pos;
+ udb_ptr_set_ptr(result, udb, &n);
+ }
+ udb_ptr_unlink(&n, udb);
+ return 1;
+}
+
+/** grow the radnode stringcapacity, copy existing elements */
+static int udb_radnode_str_grow(udb_base* udb, udb_ptr* n, udb_radstrlen_t want)
+{
+ unsigned ns = ((unsigned)lookup(n)->str_cap)*2;
+ unsigned i;
+ udb_ptr a;
+ if(want > ns)
+ ns = want;
+ if(ns > 65535) ns = 65535; /* MAX of udb_radstrlen_t range */
+ /* if this fails, the tree is still usable */
+ if(!udb_ptr_alloc_space(&a, udb, udb_chunk_type_radarray,
+ size_of_lookup_needed(lookup(n)->capacity, ns)))
+ return 0;
+ /* make sure to zero the newly allocated relptrs to init them */
+ memcpy(RADARRAY(&a), lookup(n), sizeof(struct udb_radarray_d));
+ RADARRAY(&a)->str_cap = ns;
+ for(i = 0; i < lookup(n)->len; i++) {
+ udb_rel_ptr_init(&RADARRAY(&a)->array[i].node);
+ udb_rptr_set_rptr(&RADARRAY(&a)->array[i].node, udb,
+ &lookup(n)->array[i].node);
+ RADARRAY(&a)->array[i].len = lookup_len(n, i);
+ memmove(((uint8_t*)(&RADARRAY(&a)->array[
+ lookup(n)->capacity]))+i*ns,
+ lookup_string(n, i), lookup(n)->str_cap);
+ }
+ udb_radarray_zero_ptrs(udb, n);
+ udb_rel_ptr_free_space(&RADNODE(n)->lookup, udb, size_of_lookup(n));
+ udb_rptr_set_ptr(&RADNODE(n)->lookup, udb, &a);
+ udb_ptr_unlink(&a, udb);
+ return 1;
+}
+
+/** grow the radnode array, copy existing elements to start of new array */
+static int udb_radnode_array_grow(udb_base* udb, udb_ptr* n, size_t want)
+{
+ unsigned i;
+ unsigned ns = ((unsigned)lookup(n)->capacity)*2;
+ udb_ptr a;
+ assert(want <= 256); /* cannot be more, range of uint8 */
+ if(want > ns)
+ ns = want;
+ if(ns > 256) ns = 256;
+ /* if this fails, the tree is still usable */
+ if(!udb_ptr_alloc_space(&a, udb, udb_chunk_type_radarray,
+ size_of_lookup_needed(ns, lookup(n)->str_cap)))
+ return 0;
+ /* zero the newly allocated rel ptrs to init them */
+ memset(UDB_PTR(&a), 0, size_of_lookup_needed(ns, lookup(n)->str_cap));
+ assert(lookup(n)->len <= lookup(n)->capacity);
+ assert(lookup(n)->capacity < ns);
+ memcpy(RADARRAY(&a), lookup(n), sizeof(struct udb_radarray_d));
+ RADARRAY(&a)->capacity = ns;
+ for(i=0; i<lookup(n)->len; i++) {
+ udb_rptr_set_rptr(&RADARRAY(&a)->array[i].node, udb,
+ &lookup(n)->array[i].node);
+ RADARRAY(&a)->array[i].len = lookup_len(n, i);
+ }
+ memmove(&RADARRAY(&a)->array[ns], lookup_string(n, 0),
+ lookup(n)->len * lookup(n)->str_cap);
+ udb_radarray_zero_ptrs(udb, n);
+ udb_rel_ptr_free_space(&RADNODE(n)->lookup, udb, size_of_lookup(n));
+ udb_rptr_set_ptr(&RADNODE(n)->lookup, udb, &a);
+ udb_ptr_unlink(&a, udb);
+ return 1;
+}
+
+/** make empty array in radnode */
+static int udb_radnode_array_create(udb_base* udb, udb_ptr* n)
+{
+ /* is there an array? */
+ if(RADNODE(n)->lookup.data == 0) {
+ /* create array */
+ udb_ptr a;
+ uint16_t cap = 0;
+ udb_radstrlen_t len = 0;
+ if(!udb_ptr_alloc_space(&a, udb, udb_chunk_type_radarray,
+ size_of_lookup_needed(cap, len)))
+ return 0;
+ memset(UDB_PTR(&a), 0, size_of_lookup_needed(cap, len));
+ udb_rptr_set_ptr(&RADNODE(n)->lookup, udb, &a);
+ RADARRAY(&a)->len = cap;
+ RADARRAY(&a)->capacity = cap;
+ RADARRAY(&a)->str_cap = len;
+ RADNODE(n)->offset = 0;
+ udb_ptr_unlink(&a, udb);
+ }
+ return 1;
+}
+
+/** make space in radnode for another byte, or longer strings */
+static int udb_radnode_array_space(udb_base* udb, udb_ptr* n, uint8_t byte,
+ udb_radstrlen_t len)
+{
+ /* is there an array? */
+ if(RADNODE(n)->lookup.data == 0) {
+ /* create array */
+ udb_ptr a;
+ uint16_t cap = 1;
+ if(!udb_ptr_alloc_space(&a, udb, udb_chunk_type_radarray,
+ size_of_lookup_needed(cap, len)))
+ return 0;
+ /* this memset inits the relptr that is allocated */
+ memset(UDB_PTR(&a), 0, size_of_lookup_needed(cap, len));
+ udb_rptr_set_ptr(&RADNODE(n)->lookup, udb, &a);
+ RADARRAY(&a)->len = cap;
+ RADARRAY(&a)->capacity = cap;
+ RADARRAY(&a)->str_cap = len;
+ RADNODE(n)->offset = byte;
+ udb_ptr_unlink(&a, udb);
+ return 1;
+ }
+ if(lookup(n)->capacity == 0) {
+ if(!udb_radnode_array_grow(udb, n, 1))
+ return 0;
+ }
+
+ /* make space for this stringsize */
+ if(lookup(n)->str_cap < len) {
+ /* must resize for stringsize */
+ if(!udb_radnode_str_grow(udb, n, len))
+ return 0;
+ }
+
+ /* other cases */
+ /* is the array unused? */
+ if(lookup(n)->len == 0 && lookup(n)->capacity != 0) {
+ lookup(n)->len = 1;
+ RADNODE(n)->offset = byte;
+ memset(&lookup(n)->array[0], 0, sizeof(struct udb_radsel_d));
+ /* is it below the offset? */
+ } else if(byte < RADNODE(n)->offset) {
+ /* is capacity enough? */
+ int i;
+ unsigned need = RADNODE(n)->offset-byte;
+ if(lookup(n)->len+need > lookup(n)->capacity) {
+ /* grow array */
+ if(!udb_radnode_array_grow(udb, n, lookup(n)->len+need))
+ return 0;
+ }
+ /* take a piece of capacity into use, init the relptrs */
+ for(i = lookup(n)->len; i< (int)(lookup(n)->len + need); i++) {
+ udb_rel_ptr_init(&lookup(n)->array[i].node);
+ }
+ /* reshuffle items to end */
+ for(i = lookup(n)->len-1; i >= 0; i--) {
+ udb_rptr_set_rptr(&lookup(n)->array[need+i].node,
+ udb, &lookup(n)->array[i].node);
+ lookup(n)->array[need+i].len = lookup_len(n, i);
+ /* fixup pidx */
+ if(lookup(n)->array[i+need].node.data)
+ lookup_node(n, i+need)->pidx = i+need;
+ }
+ memmove(lookup_string(n, need), lookup_string(n, 0),
+ lookup(n)->len*lookup(n)->str_cap);
+ /* zero the first */
+ for(i = 0; i < (int)need; i++) {
+ udb_rptr_zero(&lookup(n)->array[i].node, udb);
+ lookup(n)->array[i].len = 0;
+ }
+ lookup(n)->len += need;
+ RADNODE(n)->offset = byte;
+ /* is it above the max? */
+ } else if(byte - RADNODE(n)->offset >= lookup(n)->len) {
+ /* is capacity enough? */
+ int i;
+ unsigned need = (byte-RADNODE(n)->offset) - lookup(n)->len + 1;
+ /* grow array */
+ if(lookup(n)->len + need > lookup(n)->capacity) {
+ if(!udb_radnode_array_grow(udb, n, lookup(n)->len+need))
+ return 0;
+ }
+ /* take new entries into use, init relptrs */
+ for(i = lookup(n)->len; i< (int)(lookup(n)->len + need); i++) {
+ udb_rel_ptr_init(&lookup(n)->array[i].node);
+ lookup(n)->array[i].len = 0;
+ }
+ /* grow length */
+ lookup(n)->len += need;
+ }
+ return 1;
+}
+
+/** make space for string size */
+static int udb_radnode_str_space(udb_base* udb, udb_ptr* n, udb_radstrlen_t len)
+{
+ if(RADNODE(n)->lookup.data == 0) {
+ return udb_radnode_array_space(udb, n, 0, len);
+ }
+ if(lookup(n)->str_cap < len) {
+ /* must resize for stringsize */
+ if(!udb_radnode_str_grow(udb, n, len))
+ return 0;
+ }
+ return 1;
+}
+
+/** copy remainder from prefixes for a split:
+ * plen: len prefix, l: longer bstring, llen: length of l. */
+static void udb_radsel_prefix_remainder(udb_radstrlen_t plen,
+ uint8_t* l, udb_radstrlen_t llen,
+ uint8_t* s, udb_radstrlen_t* slen)
+{
+ *slen = llen - plen;
+ /* assert(*slen <= lookup(n)->str_cap); */
+ memmove(s, l+plen, llen-plen);
+}
+
+/** create a prefix in the array strs */
+static void udb_radsel_str_create(uint8_t* s, udb_radstrlen_t* slen,
+ uint8_t* k, udb_radstrlen_t pos, udb_radstrlen_t len)
+{
+ *slen = len-pos;
+ /* assert(*slen <= lookup(n)->str_cap); */
+ memmove(s, k+pos, len-pos);
+}
+
+static udb_radstrlen_t
+udb_bstr_common(uint8_t* x, udb_radstrlen_t xlen,
+ uint8_t* y, udb_radstrlen_t ylen)
+{
+ assert(sizeof(radstrlen_t) == sizeof(udb_radstrlen_t));
+ return bstr_common_ext(x, xlen, y, ylen);
+}
+
+static int
+udb_bstr_is_prefix(uint8_t* p, udb_radstrlen_t plen,
+ uint8_t* x, udb_radstrlen_t xlen)
+{
+ assert(sizeof(radstrlen_t) == sizeof(udb_radstrlen_t));
+ return bstr_is_prefix_ext(p, plen, x, xlen);
+}
+
+/** grow array space for byte N after a string, (but if string shorter) */
+static int
+udb_radnode_array_space_strremain(udb_base* udb, udb_ptr* n,
+ uint8_t* str, udb_radstrlen_t len, udb_radstrlen_t pos)
+{
+ assert(pos < len);
+ /* shift by one char because it goes in lookup array */
+ return udb_radnode_array_space(udb, n, str[pos], len-(pos+1));
+}
+
+
+/** radsel create a split when two nodes have shared prefix.
+ * @param udb: udb
+ * @param n: node with the radsel that gets changed, it contains a node.
+ * @param idx: the index of the radsel that gets changed.
+ * @param k: key byte string
+ * @param pos: position where the string enters the radsel (e.g. r.str)
+ * @param len: length of k.
+ * @param add: additional node for the string k.
+ * removed by called on failure.
+ * @return false on alloc failure, no changes made.
+ */
+static int udb_radsel_split(udb_base* udb, udb_ptr* n, uint8_t idx, uint8_t* k,
+ udb_radstrlen_t pos, udb_radstrlen_t len, udb_ptr* add)
+{
+ uint8_t* addstr = k+pos;
+ udb_radstrlen_t addlen = len-pos;
+ if(udb_bstr_is_prefix(addstr, addlen, lookup_string(n, idx),
+ lookup_len(n, idx))) {
+ udb_radstrlen_t split_len = 0;
+ /* 'add' is a prefix of r.node */
+ /* also for empty addstr */
+ /* set it up so that the 'add' node has r.node as child */
+ /* so, r.node gets moved below the 'add' node, but we do
+ * this so that the r.node stays the same pointer for its
+ * key name */
+ assert(addlen != lookup_len(n, idx));
+ assert(addlen < lookup_len(n, idx));
+ /* make space for new string sizes */
+ if(!udb_radnode_str_space(udb, n, addlen))
+ return 0;
+ if(lookup_len(n, idx) - addlen > 1)
+ /* shift one because a char is in the lookup array */
+ split_len = lookup_len(n, idx) - (addlen+1);
+ if(!udb_radnode_array_space(udb, add,
+ lookup_string(n, idx)[addlen], split_len))
+ return 0;
+ /* alloc succeeded, now link it in */
+ udb_rptr_set_rptr(&RADNODE(add)->parent, udb,
+ &lookup_node(n, idx)->parent);
+ RADNODE(add)->pidx = lookup_node(n, idx)->pidx;
+ udb_rptr_set_rptr(&lookup(add)->array[0].node, udb,
+ &lookup(n)->array[idx].node);
+ if(lookup_len(n, idx) - addlen > 1) {
+ udb_radsel_prefix_remainder(addlen+1,
+ lookup_string(n, idx), lookup_len(n, idx),
+ lookup_string(add, 0),
+ &lookup(add)->array[0].len);
+ } else {
+ lookup(add)->array[0].len = 0;
+ }
+ udb_rptr_set_ptr(&lookup_node(n, idx)->parent, udb, add);
+ lookup_node(n, idx)->pidx = 0;
+
+ udb_rptr_set_ptr(&lookup(n)->array[idx].node, udb, add);
+ memmove(lookup_string(n, idx), addstr, addlen);
+ lookup(n)->array[idx].len = addlen;
+ /* n's string may have become shorter */
+ if(!udb_radarray_reduce_if_needed(udb, n)) {
+ /* ignore this, our tree has become inefficient */
+ }
+ } else if(udb_bstr_is_prefix(lookup_string(n, idx), lookup_len(n, idx),
+ addstr, addlen)) {
+ udb_radstrlen_t split_len = 0;
+ udb_ptr rnode;
+ /* r.node is a prefix of 'add' */
+ /* set it up so that the 'r.node' has 'add' as child */
+ /* and basically, r.node is already completely fine,
+ * we only need to create a node as its child */
+ assert(addlen != lookup_len(n, idx));
+ assert(lookup_len(n, idx) < addlen);
+ udb_ptr_new(&rnode, udb, &lookup(n)->array[idx].node);
+ /* make space for string length */
+ if(addlen-lookup_len(n, idx) > 1) {
+ /* shift one because a character goes into array */
+ split_len = addlen - (lookup_len(n, idx)+1);
+ }
+ if(!udb_radnode_array_space(udb, &rnode,
+ addstr[lookup_len(n, idx)], split_len)) {
+ udb_ptr_unlink(&rnode, udb);
+ return 0;
+ }
+ /* alloc succeeded, now link it in */
+ udb_rptr_set_ptr(&RADNODE(add)->parent, udb, &rnode);
+ RADNODE(add)->pidx = addstr[lookup_len(n, idx)] -
+ RADNODE(&rnode)->offset;
+ udb_rptr_set_ptr(&lookup(&rnode)->array[ RADNODE(add)->pidx ]
+ .node, udb, add);
+ if(addlen-lookup_len(n, idx) > 1) {
+ udb_radsel_prefix_remainder(lookup_len(n, idx)+1,
+ addstr, addlen,
+ lookup_string(&rnode, RADNODE(add)->pidx),
+ &lookup(&rnode)->array[ RADNODE(add)->pidx]
+ .len);
+ } else {
+ lookup(&rnode)->array[ RADNODE(add)->pidx].len = 0;
+ }
+ /* rnode's string has become shorter */
+ if(!udb_radarray_reduce_if_needed(udb, &rnode)) {
+ /* ignore this, our tree has become inefficient */
+ }
+ udb_ptr_unlink(&rnode, udb);
+ } else {
+ /* okay we need to create a new node that chooses between
+ * the nodes 'add' and r.node
+ * We do this so that r.node stays the same pointer for its
+ * key name. */
+ udb_ptr com, rnode;
+ udb_radstrlen_t common_len = udb_bstr_common(
+ lookup_string(n, idx), lookup_len(n, idx),
+ addstr, addlen);
+ assert(common_len < lookup_len(n, idx));
+ assert(common_len < addlen);
+ udb_ptr_new(&rnode, udb, &lookup(n)->array[idx].node);
+
+ /* create the new node for choice */
+ if(!udb_ptr_alloc_space(&com, udb, udb_chunk_type_radnode,
+ sizeof(struct udb_radnode_d))) {
+ udb_ptr_unlink(&rnode, udb);
+ return 0; /* out of space */
+ }
+ memset(UDB_PTR(&com), 0, sizeof(struct udb_radnode_d));
+ /* make stringspace for the two substring choices */
+ /* this allocates the com->lookup array */
+ if(!udb_radnode_array_space_strremain(udb, &com,
+ lookup_string(n, idx), lookup_len(n, idx), common_len)
+ || !udb_radnode_array_space_strremain(udb, &com,
+ addstr, addlen, common_len)) {
+ udb_ptr_unlink(&rnode, udb);
+ udb_radnode_delete(udb, &com);
+ return 0;
+ }
+ /* create stringspace for the shared prefix */
+ if(common_len > 0) {
+ if(!udb_radnode_str_space(udb, n, common_len-1)) {
+ udb_ptr_unlink(&rnode, udb);
+ udb_radnode_delete(udb, &com);
+ return 0;
+ }
+ }
+ /* allocs succeeded, proceed to link it all up */
+ udb_rptr_set_rptr(&RADNODE(&com)->parent, udb,
+ &RADNODE(&rnode)->parent);
+ RADNODE(&com)->pidx = RADNODE(&rnode)->pidx;
+ udb_rptr_set_ptr(&RADNODE(&rnode)->parent, udb, &com);
+ RADNODE(&rnode)->pidx = lookup_string(n, idx)[common_len] -
+ RADNODE(&com)->offset;
+ udb_rptr_set_ptr(&RADNODE(add)->parent, udb, &com);
+ RADNODE(add)->pidx = addstr[common_len] -
+ RADNODE(&com)->offset;
+ udb_rptr_set_ptr(&lookup(&com)->array[RADNODE(&rnode)->pidx]
+ .node, udb, &rnode);
+ if(lookup_len(n, idx)-common_len > 1) {
+ udb_radsel_prefix_remainder(common_len+1,
+ lookup_string(n, idx), lookup_len(n, idx),
+ lookup_string(&com, RADNODE(&rnode)->pidx),
+ &lookup(&com)->array[RADNODE(&rnode)->pidx].len);
+ } else {
+ lookup(&com)->array[RADNODE(&rnode)->pidx].len= 0;
+ }
+ udb_rptr_set_ptr(&lookup(&com)->array[RADNODE(add)->pidx]
+ .node, udb, add);
+ if(addlen-common_len > 1) {
+ udb_radsel_prefix_remainder(common_len+1,
+ addstr, addlen,
+ lookup_string(&com, RADNODE(add)->pidx),
+ &lookup(&com)->array[RADNODE(add)->pidx].len);
+ } else {
+ lookup(&com)->array[RADNODE(add)->pidx].len = 0;
+ }
+ memmove(lookup_string(n, idx), addstr, common_len);
+ lookup(n)->array[idx].len = common_len;
+ udb_rptr_set_ptr(&lookup(n)->array[idx].node, udb, &com);
+ udb_ptr_unlink(&rnode, udb);
+ udb_ptr_unlink(&com, udb);
+ /* n's string has become shorter */
+ if(!udb_radarray_reduce_if_needed(udb, n)) {
+ /* ignore this, our tree has become inefficient */
+ }
+ }
+ return 1;
+}
+
+uint64_t* result_data = NULL;
+udb_void udb_radix_insert(udb_base* udb, udb_ptr* rt, uint8_t* k,
+ udb_radstrlen_t len, udb_ptr* elem, udb_ptr* result)
+{
+ udb_void ret;
+ udb_ptr add, n; /* type udb_radnode_d */
+ udb_radstrlen_t pos = 0;
+ /* create new element to add */
+ if(!udb_ptr_alloc_space(&add, udb, udb_chunk_type_radnode,
+ sizeof(struct udb_radnode_d))) {
+ return 0; /* alloc failure */
+ }
+ memset(UDB_PTR(&add), 0, sizeof(struct udb_radnode_d));
+ udb_rptr_set_ptr(&RADNODE(&add)->elem, udb, elem);
+ if(!udb_radnode_array_create(udb, &add)) {
+ udb_ptr_free_space(&add, udb, sizeof(struct udb_radnode_d));
+ return 0; /* alloc failure */
+ }
+ udb_ptr_init(&n, udb);
+ result_data = &n.data;
+
+ /* find out where to add it */
+ if(!udb_radix_find_prefix_node(udb, rt, k, len, &n, &pos)) {
+ /* new root */
+ assert(RADTREE(rt)->root.data == 0);
+ if(len == 0) {
+ udb_rptr_set_ptr(&RADTREE(rt)->root, udb, &add);
+ } else {
+ /* add a root to point to new node */
+ udb_ptr_zero(&n, udb);
+ if(!udb_ptr_alloc_space(&n, udb,
+ udb_chunk_type_radnode,
+ sizeof(struct udb_radnode_d))) {
+ udb_radnode_delete(udb, &add);
+ udb_ptr_unlink(&n, udb);
+ return 0; /* alloc failure */
+ }
+ memset(RADNODE(&n), 0, sizeof(struct udb_radnode_d));
+ /* this creates the array lookup structure for n */
+ if(!udb_radnode_array_space(udb, &n, k[0], len-1)) {
+ udb_radnode_delete(udb, &add);
+ udb_ptr_free_space(&n, udb,
+ sizeof(struct udb_radnode_d));
+ return 0; /* alloc failure */
+ }
+ udb_rptr_set_ptr(&RADNODE(&add)->parent, udb, &n);
+ RADNODE(&add)->pidx = 0;
+ udb_rptr_set_ptr(&lookup(&n)->array[0].node, udb, &add);
+ if(len > 1) {
+ udb_radsel_prefix_remainder(1, k, len,
+ lookup_string(&n, 0),
+ &lookup(&n)->array[0].len);
+ }
+ udb_rptr_set_ptr(&RADTREE(rt)->root, udb, &n);
+ }
+ } else if(pos == len) {
+ /* found an exact match */
+ if(RADNODE(&n)->elem.data) {
+ /* already exists, failure */
+ udb_radnode_delete(udb, &add);
+ udb_ptr_unlink(&n, udb);
+ return 0;
+ }
+ udb_rptr_set_ptr(&RADNODE(&n)->elem, udb, elem);
+ udb_radnode_delete(udb, &add);
+ udb_ptr_set_ptr(&add, udb, &n);
+ } else {
+ /* n is a node which can accomodate */
+ uint8_t byte;
+ assert(pos < len);
+ byte = k[pos];
+
+ /* see if it falls outside of array */
+ if(byte < RADNODE(&n)->offset || byte-RADNODE(&n)->offset >=
+ lookup(&n)->len) {
+ /* make space in the array for it; adjusts offset */
+ if(!udb_radnode_array_space(udb, &n, byte,
+ len-(pos+1))) {
+ udb_radnode_delete(udb, &add);
+ udb_ptr_unlink(&n, udb);
+ return 0;
+ }
+ assert(byte>=RADNODE(&n)->offset && byte-RADNODE(&n)->
+ offset<lookup(&n)->len);
+ byte -= RADNODE(&n)->offset;
+ /* see if more prefix needs to be split off */
+ if(pos+1 < len) {
+ udb_radsel_str_create(lookup_string(&n, byte),
+ &lookup(&n)->array[byte].len,
+ k, pos+1, len);
+ }
+ /* insert the new node in the new bucket */
+ udb_rptr_set_ptr(&RADNODE(&add)->parent, udb, &n);
+ RADNODE(&add)->pidx = byte;
+ udb_rptr_set_ptr(&lookup(&n)->array[byte].node, udb,
+ &add);
+ /* so a bucket exists and byte falls in it */
+ } else if(lookup(&n)->array[byte - RADNODE(&n)->offset]
+ .node.data == 0) {
+ /* use existing bucket */
+ byte -= RADNODE(&n)->offset;
+ if(pos+1 < len) {
+ /* make space and split off more prefix */
+ if(!udb_radnode_str_space(udb, &n,
+ len-(pos+1))) {
+ udb_radnode_delete(udb, &add);
+ udb_ptr_unlink(&n, udb);
+ return 0;
+ }
+ udb_radsel_str_create(lookup_string(&n, byte),
+ &lookup(&n)->array[byte].len,
+ k, pos+1, len);
+ }
+ /* insert the new node in the new bucket */
+ udb_rptr_set_ptr(&RADNODE(&add)->parent, udb, &n);
+ RADNODE(&add)->pidx = byte;
+ udb_rptr_set_ptr(&lookup(&n)->array[byte].node, udb,
+ &add);
+ } else {
+ /* use bucket but it has a shared prefix,
+ * split that out and create a new intermediate
+ * node to split out between the two.
+ * One of the two might exactmatch the new
+ * intermediate node */
+ if(!udb_radsel_split(udb, &n, byte-RADNODE(&n)->offset,
+ k, pos+1, len, &add)) {
+ udb_radnode_delete(udb, &add);
+ udb_ptr_unlink(&n, udb);
+ return 0;
+ }
+ }
+ }
+ RADTREE(rt)->count ++;
+ ret = add.data;
+ udb_ptr_init(result, udb);
+ udb_ptr_set_ptr(result, udb, &add);
+ udb_ptr_unlink(&add, udb);
+ udb_ptr_unlink(&n, udb);
+ return ret;
+}
+
+/** Cleanup node with one child, it is removed and joined into parent[x] str */
+static int
+udb_radnode_cleanup_onechild(udb_base* udb, udb_ptr* n)
+{
+ udb_ptr par, child;
+ uint8_t pidx = RADNODE(n)->pidx;
+ radstrlen_t joinlen;
+ udb_ptr_new(&par, udb, &RADNODE(n)->parent);
+ udb_ptr_new(&child, udb, &lookup(n)->array[0].node);
+
+ /* node had one child, merge them into the parent. */
+ /* keep the child node, so its pointers stay valid. */
+
+ /* at parent, append child->str to array str */
+ assert(pidx < lookup(&par)->len);
+ joinlen = lookup_len(&par, pidx) + lookup_len(n, 0) + 1;
+ /* make stringspace for the joined string */
+ if(!udb_radnode_str_space(udb, &par, joinlen)) {
+ /* cleanup failed due to out of memory */
+ /* the tree is inefficient, with node n still existing */
+ udb_ptr_unlink(&par, udb);
+ udb_ptr_unlink(&child, udb);
+ udb_ptr_zero(n, udb);
+ return 0;
+ }
+ /* the string(par, pidx) is already there */
+ /* the array lookup is gone, put its character in the lookup string*/
+ lookup_string(&par, pidx)[lookup_len(&par, pidx)] =
+ RADNODE(&child)->pidx + RADNODE(n)->offset;
+ memmove(lookup_string(&par, pidx)+lookup_len(&par, pidx)+1,
+ lookup_string(n, 0), lookup_len(n, 0));
+ lookup(&par)->array[pidx].len = joinlen;
+ /* and set the node to our child. */
+ udb_rptr_set_ptr(&lookup(&par)->array[pidx].node, udb, &child);
+ udb_rptr_set_ptr(&RADNODE(&child)->parent, udb, &par);
+ RADNODE(&child)->pidx = pidx;
+ /* we are unlinked, delete our node */
+ udb_radnode_delete(udb, n);
+ udb_ptr_unlink(&par, udb);
+ udb_ptr_unlink(&child, udb);
+ udb_ptr_zero(n, udb);
+ return 1;
+}
+
+/** reduce the size of radarray, does a malloc */
+static int
+udb_radarray_reduce(udb_base* udb, udb_ptr* n, uint16_t cap,
+ udb_radstrlen_t strcap)
+{
+ udb_ptr a;
+ unsigned i;
+ assert(lookup(n)->len <= cap);
+ assert(cap <= lookup(n)->capacity);
+ assert(strcap <= lookup(n)->str_cap);
+ if(!udb_ptr_alloc_space(&a, udb, udb_chunk_type_radarray,
+ size_of_lookup_needed(cap, strcap)))
+ return 0;
+ memset(RADARRAY(&a), 0, size_of_lookup_needed(cap, strcap));
+ memcpy(RADARRAY(&a), lookup(n), sizeof(struct udb_radarray_d));
+ RADARRAY(&a)->capacity = cap;
+ RADARRAY(&a)->str_cap = strcap;
+ for(i=0; i<lookup(n)->len; i++) {
+ udb_rel_ptr_init(&RADARRAY(&a)->array[i].node);
+ udb_rptr_set_rptr(&RADARRAY(&a)->array[i].node, udb,
+ &lookup(n)->array[i].node);
+ RADARRAY(&a)->array[i].len = lookup_len(n, i);
+ memmove(((uint8_t*)(&RADARRAY(&a)->array[cap]))+i*strcap,
+ lookup_string(n, i), lookup_len(n, i));
+ }
+ udb_radarray_zero_ptrs(udb, n);
+ udb_rel_ptr_free_space(&RADNODE(n)->lookup, udb, size_of_lookup(n));
+ udb_rptr_set_ptr(&RADNODE(n)->lookup, udb, &a);
+ udb_ptr_unlink(&a, udb);
+ return 1;
+}
+
+/** find the max stringlength in the array */
+static udb_radstrlen_t udb_radarray_max_len(udb_ptr* n)
+{
+ unsigned i;
+ udb_radstrlen_t maxlen = 0;
+ for(i=0; i<lookup(n)->len; i++) {
+ if(lookup(n)->array[i].node.data &&
+ lookup(n)->array[i].len > maxlen)
+ maxlen = lookup(n)->array[i].len;
+ }
+ return maxlen;
+}
+
+/** see if radarray can be reduced (by a factor of two) */
+static int
+udb_radarray_reduce_if_needed(udb_base* udb, udb_ptr* n)
+{
+ udb_radstrlen_t maxlen = udb_radarray_max_len(n);
+ if((lookup(n)->len <= lookup(n)->capacity/2 || lookup(n)->len == 0
+ || maxlen <= lookup(n)->str_cap/2 || maxlen == 0) &&
+ (lookup(n)->len != lookup(n)->capacity ||
+ lookup(n)->str_cap != maxlen))
+ return udb_radarray_reduce(udb, n, lookup(n)->len, maxlen);
+ return 1;
+}
+
+static int
+udb_radnode_array_clean_all(udb_base* udb, udb_ptr* n)
+{
+ RADNODE(n)->offset = 0;
+ lookup(n)->len = 0;
+ /* reallocate lookup to a smaller capacity structure */
+ return udb_radarray_reduce(udb, n, 0, 0);
+}
+
+/** remove NULL nodes from front of array */
+static int
+udb_radnode_array_clean_front(udb_base* udb, udb_ptr* n)
+{
+ /* move them up and adjust offset */
+ unsigned idx, shuf = 0;
+ /* remove until a nonNULL entry */
+ while(shuf < lookup(n)->len && lookup(n)->array[shuf].node.data == 0)
+ shuf++;
+ if(shuf == 0)
+ return 1;
+ if(shuf == lookup(n)->len) {
+ /* the array is empty, the tree is inefficient */
+ return udb_radnode_array_clean_all(udb, n);
+ }
+ assert(shuf < lookup(n)->len);
+ assert((int)shuf <= 255-(int)RADNODE(n)->offset);
+ /* move them */
+ for(idx=0; idx<lookup(n)->len-shuf; idx++) {
+ udb_rptr_set_rptr(&lookup(n)->array[idx].node, udb,
+ &lookup(n)->array[shuf+idx].node);
+ lookup(n)->array[idx].len = lookup_len(n, shuf+idx);
+ memmove(lookup_string(n, idx), lookup_string(n, shuf+idx),
+ lookup(n)->array[idx].len);
+ }
+ /* zero the to-be-unused entries */
+ for(idx=lookup(n)->len-shuf; idx<lookup(n)->len; idx++) {
+ udb_rptr_zero(&lookup(n)->array[idx].node, udb);
+ memset(lookup_string(n, idx), 0, lookup(n)->array[idx].len);
+ lookup(n)->array[idx].len = 0;
+ }
+ RADNODE(n)->offset += shuf;
+ lookup(n)->len -= shuf;
+ for(idx=0; idx<lookup(n)->len; idx++)
+ if(lookup(n)->array[idx].node.data)
+ lookup_node(n, idx)->pidx = idx;
+
+ /* see if capacity has to shrink */
+ return udb_radarray_reduce_if_needed(udb, n);
+}
+
+/** remove NULL nodes from end of array */
+static int
+udb_radnode_array_clean_end(udb_base* udb, udb_ptr* n)
+{
+ /* shorten it */
+ unsigned shuf = 0;
+ /* remove until a nonNULL entry */
+ /* remove until a nonNULL entry */
+ while(shuf < lookup(n)->len && lookup(n)->array[lookup(n)->len-1-shuf]
+ .node.data == 0)
+ shuf++;
+ if(shuf == 0)
+ return 1;
+ if(shuf == lookup(n)->len) {
+ /* the array is empty, the tree is inefficient */
+ return udb_radnode_array_clean_all(udb, n);
+ }
+ assert(shuf < lookup(n)->len);
+ lookup(n)->len -= shuf;
+ /* array elements can stay where they are */
+ /* see if capacity has to shrink */
+ return udb_radarray_reduce_if_needed(udb, n);
+}
+
+/** clean up radnode leaf, where we know it has a parent */
+static int
+udb_radnode_cleanup_leaf(udb_base* udb, udb_ptr* n, udb_ptr* par)
+{
+ uint8_t pidx;
+ /* node was a leaf */
+
+ /* delete leaf node, but store parent+idx */
+ pidx = RADNODE(n)->pidx;
+ assert(pidx < lookup(par)->len);
+
+ /** set parent ptr to this node to NULL before deleting the node,
+ * because otherwise ptrlinks fail */
+ udb_rptr_zero(&lookup(par)->array[pidx].node, udb);
+
+ udb_radnode_delete(udb, n);
+
+ /* set parent+idx entry to NULL str and node.*/
+ lookup(par)->array[pidx].len = 0;
+
+ /* see if par offset or len must be adjusted */
+ if(lookup(par)->len == 1) {
+ /* removed final element from array */
+ if(!udb_radnode_array_clean_all(udb, par))
+ return 0;
+ } else if(pidx == 0) {
+ /* removed first element from array */
+ if(!udb_radnode_array_clean_front(udb, par))
+ return 0;
+ } else if(pidx == lookup(par)->len-1) {
+ /* removed last element from array */
+ if(!udb_radnode_array_clean_end(udb, par))
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Cleanup a radix node that was made smaller, see if it can
+ * be merged with others.
+ * @param udb: the udb
+ * @param rt: tree to remove root if needed.
+ * @param n: node to cleanup
+ * @return false on alloc failure.
+ */
+static int
+udb_radnode_cleanup(udb_base* udb, udb_ptr* rt, udb_ptr* n)
+{
+ while(!udb_ptr_is_null(n)) {
+ if(RADNODE(n)->elem.data) {
+ /* see if if needs to be reduced in stringsize */
+ if(!udb_radarray_reduce_if_needed(udb, n)) {
+ udb_ptr_zero(n, udb);
+ return 0;
+ }
+ /* cannot delete node with a data element */
+ udb_ptr_zero(n, udb);
+ return 1;
+ } else if(lookup(n)->len == 1 && RADNODE(n)->parent.data) {
+ return udb_radnode_cleanup_onechild(udb, n);
+ } else if(lookup(n)->len == 0) {
+ udb_ptr par;
+ if(!RADNODE(n)->parent.data) {
+ /* root deleted */
+ udb_rptr_zero(&RADTREE(rt)->root, udb);
+ udb_radnode_delete(udb, n);
+ return 1;
+ }
+ udb_ptr_new(&par, udb, &RADNODE(n)->parent);
+ /* remove and delete the leaf node */
+ if(!udb_radnode_cleanup_leaf(udb, n, &par)) {
+ udb_ptr_unlink(&par, udb);
+ udb_ptr_zero(n, udb);
+ return 0;
+ }
+ /* see if parent can now be cleaned up */
+ udb_ptr_set_ptr(n, udb, &par);
+ udb_ptr_unlink(&par, udb);
+ } else {
+ /* see if if needs to be reduced in stringsize */
+ if(!udb_radarray_reduce_if_needed(udb, n)) {
+ udb_ptr_zero(n, udb);
+ return 0;
+ }
+ /* node cannot be cleaned up */
+ udb_ptr_zero(n, udb);
+ return 1;
+ }
+ }
+ /* ENOTREACH */
+ return 1;
+}
+
+void udb_radix_delete(udb_base* udb, udb_ptr* rt, udb_ptr* n)
+{
+ if(udb_ptr_is_null(n))
+ return;
+ udb_rptr_zero(&RADNODE(n)->elem, udb);
+ RADTREE(rt)->count --;
+ if(!udb_radnode_cleanup(udb, rt, n)) {
+ /* out of memory in cleanup. the elem ptr is NULL, but
+ * the radix tree could be inefficient. */
+ }
+}
+
+udb_void udb_radix_search(udb_ptr* rt, uint8_t* k, udb_radstrlen_t len)
+{
+ /* since we only perform reads, and no udb_mallocs or udb_frees
+ * we know the pointers stay the same */
+ struct udb_radnode_d* n;
+ udb_radstrlen_t pos = 0;
+ uint8_t byte;
+ void* base = *rt->base;
+
+ n = (struct udb_radnode_d*)UDB_REL(base, RADTREE(rt)->root.data);
+#define NARRAY(n) ((struct udb_radarray_d*)UDB_REL(base, n->lookup.data))
+#define NSTR(n, byte) (((uint8_t*)(&NARRAY(n)->array[NARRAY(n)->capacity]))+byte*NARRAY(n)->str_cap)
+ while(n != *rt->base) {
+ if(pos == len)
+ return UDB_SYSTOREL(*rt->base, n);
+ byte = k[pos];
+ if(byte < n->offset)
+ return 0;
+ byte -= n->offset;
+ if(byte >= NARRAY(n)->len)
+ return 0;
+ pos++;
+ if(NARRAY(n)->array[byte].len != 0) {
+ /* must match additional string */
+ if(pos+NARRAY(n)->array[byte].len > len)
+ return 0; /* no match */
+ if(memcmp(&k[pos], NSTR(n, byte),
+ NARRAY(n)->array[byte].len) != 0)
+ return 0; /* no match */
+ pos += NARRAY(n)->array[byte].len;
+ }
+ n = (struct udb_radnode_d*)UDB_REL(base,
+ NARRAY(n)->array[byte].node.data);
+ }
+ return 0;
+}
+
+/** go to last elem-containing node in this subtree (excl self) */
+static void
+udb_radnode_last_in_subtree(udb_base* udb, udb_ptr* n)
+{
+ int idx;
+ /* try last entry in array first */
+ for(idx=((int)lookup(n)->len)-1; idx >= 0; idx--) {
+ if(lookup(n)->array[idx].node.data) {
+ udb_ptr s;
+ udb_ptr_init(&s, udb);
+ udb_ptr_set_rptr(&s, udb, &lookup(n)->array[idx].node);
+ /* does it have entries in its subtrees? */
+ if(lookup(&s)->len > 0) {
+ udb_radnode_last_in_subtree(udb, &s);
+ if(!udb_ptr_is_null(&s)) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ }
+ udb_ptr_set_rptr(&s, udb, &lookup(n)->array[idx].node);
+ /* no, does it have an entry itself? */
+ if(RADNODE(&s)->elem.data) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ udb_ptr_unlink(&s, udb);
+ }
+ }
+ udb_ptr_zero(n, udb);
+}
+
+/** last in subtree, incl self */
+static void
+udb_radnode_last_in_subtree_incl_self(udb_base* udb, udb_ptr* n)
+{
+ udb_ptr self;
+ udb_ptr_init(&self, udb);
+ udb_ptr_set_ptr(&self, udb, n);
+ udb_radnode_last_in_subtree(udb, n);
+ if(!udb_ptr_is_null(n)) {
+ udb_ptr_unlink(&self, udb);
+ return;
+ }
+ if(RADNODE(&self)->elem.data) {
+ udb_ptr_set_ptr(n, udb, &self);
+ udb_ptr_unlink(&self, udb);
+ return;
+ }
+ udb_ptr_zero(n, udb);
+ udb_ptr_unlink(&self, udb);
+}
+
+/** return first elem-containing node in this subtree (excl self) */
+static void
+udb_radnode_first_in_subtree(udb_base* udb, udb_ptr* n)
+{
+ unsigned idx;
+ /* try every subnode */
+ for(idx=0; idx<lookup(n)->len; idx++) {
+ if(lookup(n)->array[idx].node.data) {
+ udb_ptr s;
+ udb_ptr_init(&s, udb);
+ udb_ptr_set_rptr(&s, udb, &lookup(n)->array[idx].node);
+ /* does it have elem itself? */
+ if(RADNODE(&s)->elem.data) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ /* try its subtrees */
+ udb_radnode_first_in_subtree(udb, &s);
+ if(!udb_ptr_is_null(&s)) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+
+ }
+ }
+ udb_ptr_zero(n, udb);
+}
+
+/** Find an entry in arrays from idx-1 to 0 */
+static void
+udb_radnode_find_prev_from_idx(udb_base* udb, udb_ptr* n, unsigned from)
+{
+ unsigned idx = from;
+ while(idx > 0) {
+ idx --;
+ if(lookup(n)->array[idx].node.data) {
+ udb_ptr_set_rptr(n, udb, &lookup(n)->array[idx].node);
+ udb_radnode_last_in_subtree_incl_self(udb, n);
+ if(!udb_ptr_is_null(n))
+ return;
+ }
+ }
+ udb_ptr_zero(n, udb);
+}
+
+/** return self or a previous element */
+static int udb_ret_self_or_prev(udb_base* udb, udb_ptr* n, udb_ptr* result)
+{
+ if(RADNODE(n)->elem.data) {
+ udb_ptr_set_ptr(result, udb, n);
+ } else {
+ udb_ptr_set_ptr(result, udb, n);
+ udb_radix_prev(udb, result);
+ }
+ udb_ptr_unlink(n, udb);
+ return 0;
+}
+
+
+int udb_radix_find_less_equal(udb_base* udb, udb_ptr* rt, uint8_t* k,
+ udb_radstrlen_t len, udb_ptr* result)
+{
+ udb_ptr n;
+ udb_radstrlen_t pos = 0;
+ uint8_t byte;
+ int r;
+ /* set result to NULL */
+ udb_ptr_init(result, udb);
+ if(RADTREE(rt)->count == 0) {
+ /* empty tree */
+ return 0;
+ }
+ udb_ptr_new(&n, udb, &RADTREE(rt)->root);
+ while(pos < len) {
+ byte = k[pos];
+ if(byte < RADNODE(&n)->offset) {
+ /* so the previous is the element itself */
+ /* or something before this element */
+ return udb_ret_self_or_prev(udb, &n, result);
+ }
+ byte -= RADNODE(&n)->offset;
+ if(byte >= lookup(&n)->len) {
+ /* so, the previous is the last of array, or itself */
+ /* or something before this element */
+ udb_ptr_set_ptr(result, udb, &n);
+ udb_radnode_last_in_subtree_incl_self(udb, result);
+ if(udb_ptr_is_null(result)) {
+ udb_ptr_set_ptr(result, udb, &n);
+ udb_radix_prev(udb, result);
+ }
+ goto done_fail;
+ }
+ pos++;
+ if(!lookup(&n)->array[byte].node.data) {
+ /* no match */
+ /* Find an entry in arrays from byte-1 to 0 */
+ udb_ptr_set_ptr(result, udb, &n);
+ udb_radnode_find_prev_from_idx(udb, result, byte);
+ if(!udb_ptr_is_null(result))
+ goto done_fail;
+ /* this entry or something before it */
+ udb_ptr_zero(result, udb);
+ return udb_ret_self_or_prev(udb, &n, result);
+ }
+ if(lookup_len(&n, byte) != 0) {
+ /* must match additional string */
+ if(pos+lookup_len(&n, byte) > len) {
+ /* the additional string is longer than key*/
+ if( (r=memcmp(&k[pos], lookup_string(&n, byte),
+ len-pos)) <= 0) {
+ /* and the key is before this node */
+ udb_ptr_set_rptr(result, udb,
+ &lookup(&n)->array[byte].node);
+ udb_radix_prev(udb, result);
+ } else {
+ /* the key is after the additional
+ * string, thus everything in that
+ * subtree is smaller. */
+ udb_ptr_set_rptr(result, udb,
+ &lookup(&n)->array[byte].node);
+ udb_radnode_last_in_subtree_incl_self(udb, result);
+ /* if somehow that is NULL,
+ * then we have an inefficient tree:
+ * byte+1 is larger than us, so find
+ * something in byte-1 and before */
+ if(udb_ptr_is_null(result)) {
+ udb_ptr_set_rptr(result, udb,
+ &lookup(&n)->array[byte].node);
+ udb_radix_prev(udb, result);
+ }
+ }
+ goto done_fail; /* no match */
+ }
+ if( (r=memcmp(&k[pos], lookup_string(&n, byte),
+ lookup_len(&n, byte))) < 0) {
+ udb_ptr_set_rptr(result, udb,
+ &lookup(&n)->array[byte].node);
+ udb_radix_prev(udb, result);
+ goto done_fail; /* no match */
+ } else if(r > 0) {
+ /* the key is larger than the additional
+ * string, thus everything in that subtree
+ * is smaller */
+ udb_ptr_set_rptr(result, udb,
+ &lookup(&n)->array[byte].node);
+ udb_radnode_last_in_subtree_incl_self(udb, result);
+ /* if we have an inefficient tree */
+ if(udb_ptr_is_null(result)) {
+ udb_ptr_set_rptr(result, udb,
+ &lookup(&n)->array[byte].node);
+ udb_radix_prev(udb, result);
+ }
+ goto done_fail; /* no match */
+ }
+ pos += lookup_len(&n, byte);
+ }
+ udb_ptr_set_rptr(&n, udb, &lookup(&n)->array[byte].node);
+ }
+ if(RADNODE(&n)->elem.data) {
+ /* exact match */
+ udb_ptr_set_ptr(result, udb, &n);
+ udb_ptr_unlink(&n, udb);
+ return 1;
+ }
+ /* there is a node which is an exact match, but it has no element */
+ udb_ptr_set_ptr(result, udb, &n);
+ udb_radix_prev(udb, result);
+done_fail:
+ udb_ptr_unlink(&n, udb);
+ return 0;
+}
+
+void udb_radix_first(udb_base* udb, udb_ptr* rt, udb_ptr* p)
+{
+ udb_ptr_init(p, udb);
+ if(!rt || udb_ptr_is_null(rt) || RADTREE(rt)->count == 0)
+ return;
+ udb_ptr_set_rptr(p, udb, &RADTREE(rt)->root);
+ if(RADNODE(p)->elem.data)
+ return;
+ udb_radix_next(udb, p);
+}
+
+void udb_radix_last(udb_base* udb, udb_ptr* rt, udb_ptr* p)
+{
+ udb_ptr_init(p, udb);
+ if(!rt || udb_ptr_is_null(rt) || RADTREE(rt)->count == 0)
+ return;
+ udb_ptr_set_rptr(p, udb, &RADTREE(rt)->root);
+ udb_radnode_last_in_subtree_incl_self(udb, p);
+}
+
+void udb_radix_next(udb_base* udb, udb_ptr* n)
+{
+ udb_ptr s;
+ udb_ptr_init(&s, udb);
+ if(lookup(n)->len) {
+ /* go down */
+ udb_ptr_set_ptr(&s, udb, n);
+ udb_radnode_first_in_subtree(udb, &s);
+ if(!udb_ptr_is_null(&s)) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ }
+ /* go up - the parent->elem is not useful, because it is before us */
+ while(RADNODE(n)->parent.data) {
+ unsigned idx = RADNODE(n)->pidx;
+ udb_ptr_set_rptr(n, udb, &RADNODE(n)->parent);
+ idx++;
+ for(; idx < lookup(n)->len; idx++) {
+ /* go down the next branch */
+ if(lookup(n)->array[idx].node.data) {
+ udb_ptr_set_rptr(&s, udb,
+ &lookup(n)->array[idx].node);
+ /* node itself */
+ if(RADNODE(&s)->elem.data) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ /* or subtree */
+ udb_radnode_first_in_subtree(udb, &s);
+ if(!udb_ptr_is_null(&s)) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ }
+ }
+ }
+ udb_ptr_unlink(&s, udb);
+ udb_ptr_zero(n, udb);
+}
+
+void udb_radix_prev(udb_base* udb, udb_ptr* n)
+{
+ /* must go up, since all array nodes are after this node */
+ while(RADNODE(n)->parent.data) {
+ uint8_t idx = RADNODE(n)->pidx;
+ udb_ptr s;
+ udb_ptr_set_rptr(n, udb, &RADNODE(n)->parent);
+ assert(lookup(n)->len > 0); /* since we are a child */
+ /* see if there are elements in previous branches there */
+ udb_ptr_init(&s, udb);
+ udb_ptr_set_ptr(&s, udb, n);
+ udb_radnode_find_prev_from_idx(udb, &s, idx);
+ if(!udb_ptr_is_null(&s)) {
+ udb_ptr_set_ptr(n, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ return;
+ }
+ udb_ptr_unlink(&s, udb);
+ /* the current node is before the array */
+ if(RADNODE(n)->elem.data)
+ return;
+ }
+ udb_ptr_zero(n, udb);
+}
+
+udb_void udb_radname_insert(udb_base* udb, udb_ptr* rt, const uint8_t* dname,
+ size_t dlen, udb_ptr* elem, udb_ptr* result)
+{
+ uint8_t k[300];
+ radstrlen_t klen = (radstrlen_t)sizeof(k);
+ radname_d2r(k, &klen, dname, dlen);
+ return udb_radix_insert(udb, rt, k, klen, elem, result);
+}
+
+int udb_radname_search(udb_base* udb, udb_ptr* rt, const uint8_t* dname,
+ size_t dlen, udb_ptr* result)
+{
+ udb_void r;
+ uint8_t k[300];
+ radstrlen_t klen = (radstrlen_t)sizeof(k);
+ radname_d2r(k, &klen, dname, dlen);
+ r = udb_radix_search(rt, k, klen);
+ udb_ptr_init(result, udb);
+ udb_ptr_set(result, udb, r);
+ return (r != 0);
+}
+
+void udb_radix_tree_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg)
+{
+ struct udb_radtree_d* p = (struct udb_radtree_d*)d;
+ assert(s >= sizeof(struct udb_radtree_d));
+ (void)s;
+ (*cb)(base, &p->root, arg);
+}
+
+void udb_radix_node_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg)
+{
+ struct udb_radnode_d* p = (struct udb_radnode_d*)d;
+ assert(s >= sizeof(struct udb_radnode_d));
+ (void)s;
+ (*cb)(base, &p->elem, arg);
+ (*cb)(base, &p->parent, arg);
+ (*cb)(base, &p->lookup, arg);
+}
+
+void udb_radix_array_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg)
+{
+ struct udb_radarray_d* p = (struct udb_radarray_d*)d;
+ unsigned i;
+ assert(s >= sizeof(struct udb_radarray_d)+
+ p->capacity*(sizeof(struct udb_radsel_d)+p->str_cap));
+ (void)s;
+ for(i=0; i<p->len; i++) {
+ (*cb)(base, &p->array[i].node, arg);
+ }
+}
diff --git a/usr.sbin/nsd/udbradtree.h b/usr.sbin/nsd/udbradtree.h
new file mode 100644
index 00000000000..6f4bc735819
--- /dev/null
+++ b/usr.sbin/nsd/udbradtree.h
@@ -0,0 +1,245 @@
+/*
+ * udbradtree -- radix tree for binary strings for in udb file.
+ *
+ * Copyright (c) 2011, NLnet Labs. See LICENSE for license.
+ */
+#ifndef UDB_RADTREE_H
+#define UDB_RADTREE_H
+#include "udb.h"
+struct udb_radnode;
+
+/** length of the binary string */
+typedef uint16_t udb_radstrlen_t;
+
+/**
+ * The radix tree
+ *
+ * The elements are stored based on binary strings(0-255) of a given length.
+ * They are sorted, a prefix is sorted before its suffixes.
+ * If you want to know the key string, you should store it yourself, the
+ * tree stores it in the parts necessary for lookup.
+ * For binary strings for domain names see the radname routines.
+ *
+ * This is the tree on disk representation. It has _d suffix in the name
+ * to help delineate disk structures from normal structures.
+ */
+struct udb_radtree_d {
+ /** root node in tree, to udb_radnode_d */
+ struct udb_rel_ptr root;
+ /** count of number of elements */
+ uint64_t count;
+};
+
+/**
+ * A radix tree lookup node. It is stored on disk, and the lookup array
+ * is allocated.
+ */
+struct udb_radnode_d {
+ /** data element associated with the binary string up to this node */
+ struct udb_rel_ptr elem;
+ /** parent node (NULL for the root), to udb_radnode_d */
+ struct udb_rel_ptr parent;
+ /** the array structure, for lookup by [byte-offset]. udb_radarray_d */
+ struct udb_rel_ptr lookup;
+ /** index in the parent lookup array */
+ uint8_t pidx;
+ /** offset of the lookup array, add to [i] for lookups */
+ uint8_t offset;
+};
+
+/**
+ * radix select edge in array
+ * The string for this element is the Nth string in the stringarray.
+ */
+struct udb_radsel_d {
+ /** length of the additional string for this edge,
+ * additional string after the selection-byte for this edge.*/
+ udb_radstrlen_t len;
+ /** padding for non64bit compilers to 64bit boundaries, to make
+ * the udb file more portable, without this the file would work
+ * on the system it is created on (which is what we promise), but
+ * with this, you have a chance of it working on other platforms */
+ uint16_t padding16;
+ uint32_t padding32;
+ /** node that deals with byte+str, to udb_radnode_d */
+ struct udb_rel_ptr node;
+};
+
+/**
+ * Array of radsel elements.
+ * This is the header, the array is allocated contiguously behind it.
+ * The strings (often very short) are allocated behind the array.
+ * All strings are given the same amount of space (str_cap),
+ * so there is capacity*str_cap bytes at the end.
+ */
+struct udb_radarray_d {
+ /** length of the lookup array */
+ uint16_t len;
+ /** capacity of the lookup array (can be larger than length) */
+ uint16_t capacity;
+ /** space capacity of for every string */
+ udb_radstrlen_t str_cap;
+ /** padding to 64bit alignment, just in case compiler goes mad */
+ uint16_t padding;
+ /** the elements (allocated contiguously after this structure) */
+ struct udb_radsel_d array[0];
+};
+
+/**
+ * Create new radix tree on udb storage
+ * @param udb: the udb to allocate space on.
+ * @param ptr: ptr to the udbradtree is returned here. pass uninitialised.
+ * type is udb_radtree_d.
+ * @return 0 on alloc failure.
+ */
+int udb_radix_tree_create(udb_base* udb, udb_ptr* ptr);
+
+/**
+ * Delete intermediate nodes from radix tree
+ * @param udb: the udb.
+ * @param rt: radix tree to be cleared. type udb_radtree_d.
+ */
+void udb_radix_tree_clear(udb_base* udb, udb_ptr* rt);
+
+/**
+ * Delete radix tree.
+ * You must have deleted the elements, this deletes the nodes.
+ * @param udb: the udb.
+ * @param rt: radix tree to be deleted. type udb_radtree_d.
+ */
+void udb_radix_tree_delete(udb_base* udb, udb_ptr* rt);
+
+/**
+ * Insert element into radix tree.
+ * @param udb: the udb.
+ * @param rt: the radix tree, type udb_radtree_d.
+ * @param key: key string.
+ * @param len: length of key.
+ * @param elem: pointer to element data, on the udb store.
+ * @param result: the inserted node is set to this value. Pass uninited.
+ Not set if the routine fails.
+ * @return NULL on failure - out of memory.
+ * NULL on failure - duplicate entry.
+ * On success the new radix node for this element (udb_radnode_d).
+ */
+udb_void udb_radix_insert(udb_base* udb, udb_ptr* rt, uint8_t* k,
+ udb_radstrlen_t len, udb_ptr* elem, udb_ptr* result);
+
+/**
+ * Delete element from radix tree.
+ * @param udb: the udb.
+ * @param rt: the radix tree. type udb_radtree_d
+ * @param n: radix node for that element. type udb_radnode_d
+ * if NULL, nothing is deleted.
+ */
+void udb_radix_delete(udb_base* udb, udb_ptr* rt, udb_ptr* n);
+
+/**
+ * Find radix element in tree.
+ * @param rt: the radix tree, type udb_radtree_d.
+ * @param key: key string.
+ * @param len: length of key.
+ * @return the radix node or NULL if not found. type udb_radnode_d
+ */
+udb_void udb_radix_search(udb_ptr* rt, uint8_t* k,
+ udb_radstrlen_t len);
+
+/**
+ * Find radix element in tree, and if not found, find the closest smaller or
+ * equal element in the tree.
+ * @param udb: the udb.
+ * @param rt: the radix tree, type udb_radtree_d.
+ * @param key: key string.
+ * @param len: length of key.
+ * @param result: returns the radix node or closest match (NULL if key is
+ * smaller than the smallest key in the tree). type udb_radnode_d.
+ * you can pass an uninitialized ptr, an unlinked or a zeroed one.
+ * @return true if exact match, false if no match.
+ */
+int udb_radix_find_less_equal(udb_base* udb, udb_ptr* rt, uint8_t* k,
+ udb_radstrlen_t len, udb_ptr* result);
+
+/**
+ * Return the first (smallest) element in the tree.
+ * @param udb: the udb.
+ * @param rt: the radix tree, type udb_radtree_d.
+ * @param p: set to the first node in the tree, or NULL if none.
+ * type udb_radnode_d.
+ * pass uninited, zero or unlinked udb_ptr.
+ */
+void udb_radix_first(udb_base* udb, udb_ptr* rt, udb_ptr* p);
+
+/**
+ * Return the last (largest) element in the tree.
+ * @param udb: the udb.
+ * @param rt: the radix tree, type udb_radtree_d.
+ * @param p: last node or NULL if none, type udb_radnode_d.
+ * pass uninited, zero or unlinked udb_ptr.
+ */
+void udb_radix_last(udb_base* udb, udb_ptr* rt, udb_ptr* p);
+
+/**
+ * Return the next element.
+ * @param udb: the udb.
+ * @param n: adjusted to the next element, or NULL if none. type udb_radnode_d.
+ */
+void udb_radix_next(udb_base* udb, udb_ptr* n);
+
+/**
+ * Return the previous element.
+ * @param udb: the udb.
+ * @param n: adjusted to the prev node or NULL if none. type udb_radnode_d.
+ */
+void udb_radix_prev(udb_base* udb, udb_ptr* n);
+
+/*
+ * Perform a walk through all elements of the tree.
+ * node: variable of type struct radnode*.
+ * tree: pointer to the tree.
+ * for(udb_radix_first(tree, node); node->data; udb_radix_next(node))
+*/
+
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_radtree */
+void udb_radix_tree_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_radnode */
+void udb_radix_node_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_radarray */
+void udb_radix_array_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+
+/** get the memory used by the lookup structure for a radnode */
+size_t size_of_lookup_ext(udb_ptr* node);
+
+/** insert radtree element, key is a domain name
+ * @param udb: udb.
+ * @param rt: the tree.
+ * @param dname: domain name in uncompressed wireformat.
+ * @param dlen: length of k.
+ * @param elem: element to store
+ * @param result: the inserted node is set to this value. Pass uninited.
+ Not set if the routine fails.
+ * @return 0 on failure
+ */
+udb_void udb_radname_insert(udb_base* udb, udb_ptr* rt, const uint8_t* dname,
+ size_t dlen, udb_ptr* elem, udb_ptr* result);
+
+/** search for a radname element, key is a domain name.
+ * @param udb: udb
+ * @param rt: the tree
+ * @param dname: domain name in uncompressed wireformat.
+ * @param dlen: length of k.
+ * @param result: result ptr to store the node into.
+ * may be uninitialized.
+ * @return 0 if not found.
+ */
+int udb_radname_search(udb_base* udb, udb_ptr* rt, const uint8_t* dname,
+ size_t dlen, udb_ptr* result);
+
+#define RADNODE(ptr) ((struct udb_radnode_d*)UDB_PTR(ptr))
+#define RADTREE(ptr) ((struct udb_radtree_d*)UDB_PTR(ptr))
+
+#endif /* UDB_RADTREE_H */
diff --git a/usr.sbin/nsd/udbzone.c b/usr.sbin/nsd/udbzone.c
new file mode 100644
index 00000000000..bd5929b3929
--- /dev/null
+++ b/usr.sbin/nsd/udbzone.c
@@ -0,0 +1,786 @@
+/*
+ * udbzone -- store zone and rrset information in udb file.
+ *
+ * Copyright (c) 2011, NLnet Labs. See LICENSE for license.
+ */
+#include "config.h"
+#include "udbzone.h"
+#include "util.h"
+#include "iterated_hash.h"
+#include "dns.h"
+#include "dname.h"
+#include "difffile.h"
+#include <string.h>
+
+/** delete the zone plain its own data */
+static void
+udb_zone_delete_plain(udb_base* udb, udb_ptr* zone)
+{
+ udb_ptr dtree;
+ assert(udb_ptr_get_type(zone) == udb_chunk_type_zone);
+ udb_zone_clear(udb, zone);
+ udb_rptr_zero(&ZONE(zone)->node, udb);
+ udb_rptr_zero(&ZONE(zone)->nsec3param, udb);
+ udb_rptr_zero(&ZONE(zone)->log_str, udb);
+ udb_ptr_new(&dtree, udb, &ZONE(zone)->domains);
+ udb_rptr_zero(&ZONE(zone)->domains, udb);
+ udb_radix_tree_delete(udb, &dtree);
+ udb_ptr_free_space(zone, udb,
+ sizeof(struct zone_d)+ZONE(zone)->namelen);
+}
+
+int
+udb_dns_init_file(udb_base* udb)
+{
+ udb_ptr ztree;
+ if(!udb_radix_tree_create(udb, &ztree)) {
+ return 0;
+ }
+ udb_base_set_userdata(udb, ztree.data);
+ udb_ptr_unlink(&ztree, udb);
+ return 1;
+}
+
+void
+udb_dns_deinit_file(udb_base* udb)
+{
+ udb_ptr ztree;
+ udb_ptr z;
+ udb_ptr_new(&ztree, udb, udb_base_get_userdata(udb));
+ if(udb_ptr_is_null(&ztree)) {
+ return;
+ }
+ assert(udb_ptr_get_type(&ztree) == udb_chunk_type_radtree);
+ /* delete all zones */
+ for(udb_radix_first(udb, &ztree, &z); z.data; udb_radix_next(udb, &z)){
+ udb_ptr zone;
+ udb_ptr_new(&zone, udb, &RADNODE(&z)->elem);
+ udb_rptr_zero(&RADNODE(&z)->elem, udb);
+ udb_zone_delete_plain(udb, &zone);
+ }
+ udb_ptr_unlink(&z, udb);
+
+ udb_base_set_userdata(udb, 0);
+ udb_radix_tree_delete(udb, &ztree);
+}
+
+int
+udb_zone_create(udb_base* udb, udb_ptr* result, const uint8_t* dname,
+ size_t dlen)
+{
+ udb_ptr ztree, z, node, dtree;
+ udb_ptr_new(&ztree, udb, udb_base_get_userdata(udb));
+ assert(udb_ptr_get_type(&ztree) == udb_chunk_type_radtree);
+ udb_ptr_init(result, udb);
+ if(udb_zone_search(udb, &z, dname, dlen)) {
+ udb_ptr_unlink(&ztree, udb);
+ udb_ptr_unlink(&z, udb);
+ /* duplicate */
+ return 0;
+ }
+ if(!udb_ptr_alloc_space(&z, udb, udb_chunk_type_zone,
+ sizeof(struct zone_d)+dlen)) {
+ udb_ptr_unlink(&ztree, udb);
+ /* failed alloc */
+ return 0;
+ }
+ /* init the zone object */
+ udb_rel_ptr_init(&ZONE(&z)->node);
+ udb_rel_ptr_init(&ZONE(&z)->domains);
+ udb_rel_ptr_init(&ZONE(&z)->nsec3param);
+ udb_rel_ptr_init(&ZONE(&z)->log_str);
+ ZONE(&z)->rrset_count = 0;
+ ZONE(&z)->rr_count = 0;
+ ZONE(&z)->expired = 0;
+ ZONE(&z)->mtime = 0;
+ ZONE(&z)->namelen = dlen;
+ memmove(ZONE(&z)->name, dname, dlen);
+ if(!udb_radix_tree_create(udb, &dtree)) {
+ udb_ptr_free_space(&z, udb, sizeof(struct zone_d)+dlen);
+ udb_ptr_unlink(&ztree, udb);
+ /* failed alloc */
+ return 0;
+ }
+ udb_rptr_set_ptr(&ZONE(&z)->domains, udb, &dtree);
+
+ /* insert it */
+ if(!udb_radname_insert(udb, &ztree, dname, dlen, &z, &node)) {
+ udb_ptr_free_space(&z, udb, sizeof(struct zone_d)+dlen);
+ udb_ptr_unlink(&ztree, udb);
+ udb_radix_tree_delete(udb, &dtree);
+ udb_ptr_unlink(&dtree, udb);
+ /* failed alloc */
+ return 0;
+ }
+ udb_rptr_set_ptr(&ZONE(&z)->node, udb, &node);
+ udb_ptr_set_ptr(result, udb, &z);
+ udb_ptr_unlink(&z, udb);
+ udb_ptr_unlink(&dtree, udb);
+ udb_ptr_unlink(&ztree, udb);
+ udb_ptr_unlink(&node, udb);
+ return 1;
+}
+
+/** delete an RR */
+static void
+rr_delete(udb_base* udb, udb_ptr* rr)
+{
+ assert(udb_ptr_get_type(rr) == udb_chunk_type_rr);
+ udb_rptr_zero(&RR(rr)->next, udb);
+ udb_ptr_free_space(rr, udb, sizeof(struct rr_d)+RR(rr)->len);
+}
+
+/** delete an rrset */
+static void
+rrset_delete(udb_base* udb, udb_ptr* rrset)
+{
+ udb_ptr rr, n;
+ assert(udb_ptr_get_type(rrset) == udb_chunk_type_rrset);
+
+ /* free RRs */
+ udb_ptr_new(&rr, udb, &RRSET(rrset)->rrs);
+ udb_ptr_init(&n, udb);
+ udb_rptr_zero(&RRSET(rrset)->rrs, udb);
+ while(!udb_ptr_is_null(&rr)) {
+ udb_ptr_set_rptr(&n, udb, &RR(&rr)->next);
+ rr_delete(udb, &rr);
+ udb_ptr_set_ptr(&rr, udb, &n);
+ udb_ptr_zero(&n, udb);
+ }
+ udb_ptr_unlink(&n, udb);
+ udb_ptr_unlink(&rr, udb);
+
+ udb_rptr_zero(&RRSET(rrset)->next, udb);
+ udb_ptr_free_space(rrset, udb, sizeof(struct rrset_d));
+}
+
+/** clear a domain of its rrsets, rrs */
+static void
+domain_clear(udb_base* udb, udb_ptr* d)
+{
+ udb_ptr rrset, n;
+ assert(udb_ptr_get_type(d) == udb_chunk_type_domain);
+ udb_ptr_new(&rrset, udb, &DOMAIN(d)->rrsets);
+ udb_ptr_init(&n, udb);
+ udb_rptr_zero(&DOMAIN(d)->rrsets, udb);
+ while(!udb_ptr_is_null(&rrset)) {
+ udb_ptr_set_rptr(&n, udb, &RRSET(&rrset)->next);
+ rrset_delete(udb, &rrset);
+ udb_ptr_set_ptr(&rrset, udb, &n);
+ udb_ptr_zero(&n, udb);
+ }
+ udb_ptr_unlink(&n, udb);
+ udb_ptr_unlink(&rrset, udb);
+}
+
+/** delete a domain and all its rrsets, rrs */
+static void
+domain_delete(udb_base* udb, udb_ptr* d)
+{
+ domain_clear(udb, d);
+ udb_rptr_zero(&DOMAIN(d)->node, udb);
+ udb_ptr_free_space(d, udb,
+ sizeof(struct domain_d)+DOMAIN(d)->namelen);
+}
+
+/** delete domain but also unlink from tree at zone */
+static void
+domain_delete_unlink(udb_base* udb, udb_ptr* z, udb_ptr* d)
+{
+ udb_ptr dtree, n;
+ udb_ptr_new(&dtree, udb, &ZONE(z)->domains);
+ udb_ptr_new(&n, udb, &DOMAIN(d)->node);
+ udb_rptr_zero(&DOMAIN(d)->node, udb);
+ udb_radix_delete(udb, &dtree, &n);
+ udb_ptr_unlink(&dtree, udb);
+ udb_ptr_unlink(&n, udb);
+ domain_delete(udb, d);
+}
+
+void
+udb_zone_clear(udb_base* udb, udb_ptr* zone)
+{
+ udb_ptr dtree, d;
+ assert(udb_ptr_get_type(zone) == udb_chunk_type_zone);
+ udb_ptr_new(&dtree, udb, &ZONE(zone)->domains);
+ udb_rptr_zero(&ZONE(zone)->nsec3param, udb);
+ udb_zone_set_log_str(udb, zone, NULL);
+
+ /* walk and delete all domains, rrsets, rrs, but keep tree */
+ for(udb_radix_first(udb, &dtree, &d); d.data; udb_radix_next(udb, &d)){
+ udb_ptr domain;
+ udb_ptr_new(&domain, udb, &RADNODE(&d)->elem);
+ udb_rptr_zero(&RADNODE(&d)->elem, udb);
+ domain_delete(udb, &domain);
+ }
+ udb_ptr_unlink(&d, udb);
+ udb_radix_tree_clear(udb, &dtree);
+ ZONE(zone)->rrset_count = 0;
+ ZONE(zone)->rr_count = 0;
+ ZONE(zone)->expired = 0;
+ ZONE(zone)->mtime = 0;
+ udb_ptr_unlink(&dtree, udb);
+}
+
+void
+udb_zone_delete(udb_base* udb, udb_ptr* zone)
+{
+ udb_ptr ztree, n;
+ udb_ptr_new(&ztree, udb, udb_base_get_userdata(udb));
+ udb_ptr_new(&n, udb, &ZONE(zone)->node);
+ udb_rptr_zero(&ZONE(zone)->node, udb);
+ udb_radix_delete(udb, &ztree, &n);
+ udb_ptr_unlink(&ztree, udb);
+ udb_ptr_unlink(&n, udb);
+ udb_zone_delete_plain(udb, zone);
+}
+
+int
+udb_zone_search(udb_base* udb, udb_ptr* result, const uint8_t* dname,
+ size_t dname_len)
+{
+ udb_ptr ztree;
+ udb_ptr_new(&ztree, udb, udb_base_get_userdata(udb));
+ assert(udb_ptr_get_type(&ztree) == udb_chunk_type_radtree);
+ if(udb_radname_search(udb, &ztree, dname, dname_len, result)) {
+ if(result->data)
+ udb_ptr_set_rptr(result, udb, &RADNODE(result)->elem);
+ udb_ptr_unlink(&ztree, udb);
+ return (result->data != 0);
+ }
+ udb_ptr_unlink(&ztree, udb);
+ return 0;
+}
+
+uint64_t udb_zone_get_mtime(udb_base* udb, const uint8_t* dname, size_t dlen)
+{
+ udb_ptr z;
+ if(udb_zone_search(udb, &z, dname, dlen)) {
+ uint64_t t = ZONE(&z)->mtime;
+ udb_ptr_unlink(&z, udb);
+ return t;
+ }
+ return 0;
+}
+
+void udb_zone_set_log_str(udb_base* udb, udb_ptr* zone, const char* str)
+{
+ /* delete original log str (if any) */
+ if(ZONE(zone)->log_str.data) {
+ udb_ptr s;
+ size_t sz;
+ udb_ptr_new(&s, udb, &ZONE(zone)->log_str);
+ udb_rptr_zero(&ZONE(zone)->log_str, udb);
+ sz = strlen((char*)udb_ptr_data(&s))+1;
+ udb_ptr_free_space(&s, udb, sz);
+ }
+
+ /* set new log str */
+ if(str) {
+ udb_ptr s;
+ size_t sz = strlen(str)+1;
+ if(!udb_ptr_alloc_space(&s, udb, udb_chunk_type_data, sz)) {
+ return; /* failed to allocate log string */
+ }
+ memmove(udb_ptr_data(&s), str, sz);
+ udb_rptr_set_ptr(&ZONE(zone)->log_str, udb, &s);
+ udb_ptr_unlink(&s, udb);
+ }
+}
+
+#ifdef NSEC3
+/** select the nsec3param for nsec3 usage */
+static void
+select_nsec3_param(udb_base* udb, udb_ptr* zone, udb_ptr* rrset)
+{
+ udb_ptr rr;
+ udb_ptr_new(&rr, udb, &RRSET(rrset)->rrs);
+ while(rr.data) {
+ if(RR(&rr)->len >= 5 && RR(&rr)->wire[0] == NSEC3_SHA1_HASH &&
+ RR(&rr)->wire[1] == 0) {
+ udb_rptr_set_ptr(&ZONE(zone)->nsec3param, udb, &rr);
+ udb_ptr_unlink(&rr, udb);
+ return;
+ }
+ udb_ptr_set_rptr(&rr, udb, &RR(&rr)->next);
+ }
+ udb_ptr_unlink(&rr, udb);
+}
+
+const char*
+udb_nsec3param_string(udb_ptr* rr)
+{
+ /* max saltlenth plus first couple of numbers (3+1+5+1+3+1) */
+ static char params[MAX_RDLENGTH*2+16];
+ char* p;
+ assert(RR(rr)->len >= 5);
+ p = params + snprintf(params, sizeof(params), "%u %u %u ",
+ (unsigned)RR(rr)->wire[0], (unsigned)RR(rr)->wire[1],
+ (unsigned)read_uint16(&RR(rr)->wire[2]));
+ if(RR(rr)->wire[4] == 0) {
+ *p++ = '-';
+ } else {
+ assert(RR(rr)->len >= 5+RR(rr)->wire[4]);
+ p += hex_ntop(&RR(rr)->wire[5], RR(rr)->wire[4], p,
+ sizeof(params)-strlen(params)-1);
+ }
+ *p = 0;
+ return params;
+}
+
+/** look in zone for new selected nsec3param record from rrset */
+static void
+zone_hash_nsec3param(udb_base* udb, udb_ptr* zone, udb_ptr* rrset)
+{
+ select_nsec3_param(udb, zone, rrset);
+ if(ZONE(zone)->nsec3param.data == 0)
+ return;
+ /* prettyprint the nsec3 parameters we are using */
+ if(2 <= verbosity) {
+ udb_ptr par;
+ udb_ptr_new(&par, udb, &ZONE(zone)->nsec3param);
+ VERBOSITY(1, (LOG_INFO, "rehash of zone %s with parameters %s",
+ wiredname2str(ZONE(zone)->name),
+ udb_nsec3param_string(&par)));
+ udb_ptr_unlink(&par, udb);
+ }
+}
+#endif /* NSEC3 */
+
+/** create a new domain name */
+static int
+domain_create(udb_base* udb, udb_ptr* zone, const uint8_t* nm, size_t nmlen,
+ udb_ptr* result)
+{
+ udb_ptr dtree, node;
+ /* create domain chunk */
+ if(!udb_ptr_alloc_space(result, udb, udb_chunk_type_domain,
+ sizeof(struct domain_d)+nmlen))
+ return 0;
+ udb_rel_ptr_init(&DOMAIN(result)->node);
+ udb_rel_ptr_init(&DOMAIN(result)->rrsets);
+ DOMAIN(result)->namelen = nmlen;
+ memmove(DOMAIN(result)->name, nm, nmlen);
+
+ /* insert into domain tree */
+ udb_ptr_new(&dtree, udb, &ZONE(zone)->domains);
+ if(!udb_radname_insert(udb, &dtree, nm, nmlen, result, &node)) {
+ udb_ptr_free_space(result, udb, sizeof(struct domain_d)+nmlen);
+ udb_ptr_unlink(&dtree, udb);
+ return 0;
+ }
+ udb_rptr_set_ptr(&DOMAIN(result)->node, udb, &node);
+ udb_ptr_unlink(&dtree, udb);
+ udb_ptr_unlink(&node, udb);
+ return 1;
+}
+
+int
+udb_domain_find(udb_base* udb, udb_ptr* zone, const uint8_t* nm, size_t nmlen,
+ udb_ptr* result)
+{
+ int r;
+ udb_ptr dtree;
+ assert(udb_ptr_get_type(zone) == udb_chunk_type_zone);
+ udb_ptr_new(&dtree, udb, &ZONE(zone)->domains);
+ r = udb_radname_search(udb, &dtree, nm, nmlen, result);
+ if(result->data)
+ udb_ptr_set_rptr(result, udb, &RADNODE(result)->elem);
+ udb_ptr_unlink(&dtree, udb);
+ return r && result->data;
+}
+
+/** find or create a domain name in the zone domain tree */
+static int
+domain_find_or_create(udb_base* udb, udb_ptr* zone, const uint8_t* nm,
+ size_t nmlen, udb_ptr* result)
+{
+ assert(udb_ptr_get_type(zone) == udb_chunk_type_zone);
+ if(udb_domain_find(udb, zone, nm, nmlen, result))
+ return 1;
+ return domain_create(udb, zone, nm, nmlen, result);
+}
+
+/** remove rrset from the domain name rrset-list */
+static void
+domain_remove_rrset(udb_base* udb, udb_ptr* domain, uint16_t t)
+{
+ udb_ptr p, prev;
+ assert(udb_ptr_get_type(domain) == udb_chunk_type_domain);
+ udb_ptr_new(&p, udb, &DOMAIN(domain)->rrsets);
+ udb_ptr_init(&prev, udb);
+ while(p.data) {
+ if(RRSET(&p)->type == t) {
+ /* remove it */
+ if(prev.data == 0) {
+ /* first rrset */
+ udb_rptr_set_rptr(&DOMAIN(domain)->rrsets,
+ udb, &RRSET(&p)->next);
+ } else {
+ udb_rptr_set_rptr(&RRSET(&prev)->next,
+ udb, &RRSET(&p)->next);
+ }
+ udb_ptr_unlink(&prev, udb);
+ rrset_delete(udb, &p);
+ return;
+ }
+ udb_ptr_set_ptr(&prev, udb, &p);
+ udb_ptr_set_rptr(&p, udb, &RRSET(&p)->next);
+ }
+ /* rrset does not exist */
+ udb_ptr_unlink(&prev, udb);
+ udb_ptr_unlink(&p, udb);
+}
+
+/** create rrset in the domain rrset list */
+static int
+rrset_create(udb_base* udb, udb_ptr* domain, uint16_t t, udb_ptr* res)
+{
+ /* create it */
+ if(!udb_ptr_alloc_space(res, udb, udb_chunk_type_rrset,
+ sizeof(struct rrset_d)))
+ return 0;
+ udb_rel_ptr_init(&RRSET(res)->next);
+ udb_rel_ptr_init(&RRSET(res)->rrs);
+ RRSET(res)->type = t;
+
+#if 0
+ /* link it in, at the front */
+ udb_rptr_set_rptr(&RRSET(res)->next, udb, &DOMAIN(domain)->rrsets);
+ udb_rptr_set_ptr(&DOMAIN(domain)->rrsets, udb, res);
+#else
+ /* preserve RRset order, link at end */
+ if(DOMAIN(domain)->rrsets.data == 0) {
+ udb_rptr_set_ptr(&DOMAIN(domain)->rrsets, udb, res);
+ } else {
+ udb_ptr p;
+ udb_ptr_new(&p, udb, &DOMAIN(domain)->rrsets);
+ while(RRSET(&p)->next.data)
+ udb_ptr_set_rptr(&p, udb, &RRSET(&p)->next);
+ udb_rptr_set_ptr(&RRSET(&p)->next, udb, res);
+ udb_ptr_unlink(&p, udb);
+ }
+#endif
+ return 1;
+}
+
+int
+udb_rrset_find(udb_base* udb, udb_ptr* domain, uint16_t t, udb_ptr* res)
+{
+ assert(udb_ptr_get_type(domain) == udb_chunk_type_domain);
+ udb_ptr_init(res, udb);
+ udb_ptr_set_rptr(res, udb, &DOMAIN(domain)->rrsets);
+ while(res->data) {
+ if(RRSET(res)->type == t)
+ return 1;
+ udb_ptr_set_rptr(res, udb, &RRSET(res)->next);
+ }
+ /* rrset does not exist and res->data is conveniently zero */
+ return 0;
+}
+
+/** find or create rrset in the domain rrset list */
+static int
+rrset_find_or_create(udb_base* udb, udb_ptr* domain, uint16_t t, udb_ptr* res)
+{
+ if(udb_rrset_find(udb, domain, t, res))
+ return 1;
+ return rrset_create(udb, domain, t, res);
+}
+
+/** see if RR matches type, class and rdata */
+static int
+rr_match(udb_ptr* rr, uint16_t t, uint16_t k, uint8_t* rdata, size_t rdatalen)
+{
+ return RR(rr)->type == t && RR(rr)->klass == k &&
+ RR(rr)->len == rdatalen &&
+ memcmp(RR(rr)->wire, rdata, rdatalen) == 0;
+}
+
+/** see if RR exists in the RR list that matches the rdata, and return it */
+static int
+rr_search(udb_base* udb, udb_ptr* rrset, uint16_t t, uint16_t k,
+ uint8_t* rdata, size_t rdatalen, udb_ptr* result)
+{
+ assert(udb_ptr_get_type(rrset) == udb_chunk_type_rrset);
+ udb_ptr_init(result, udb);
+ udb_ptr_set_rptr(result, udb, &RRSET(rrset)->rrs);
+ while(result->data) {
+ if(rr_match(result, t, k, rdata, rdatalen))
+ return 1; /* found */
+ udb_ptr_set_rptr(result, udb, &RR(result)->next);
+ }
+ /* not found and result->data is conveniently zero */
+ return 0;
+}
+
+/** create RR chunk */
+static int
+rr_create(udb_base* udb, uint16_t t, uint16_t k, uint32_t ttl,
+ uint8_t* rdata, size_t rdatalen, udb_ptr* rr)
+{
+ if(!udb_ptr_alloc_space(rr, udb, udb_chunk_type_rr,
+ sizeof(struct rr_d)+rdatalen))
+ return 0;
+ udb_rel_ptr_init(&RR(rr)->next);
+ RR(rr)->type = t;
+ RR(rr)->klass = k;
+ RR(rr)->ttl = ttl;
+ RR(rr)->len = rdatalen;
+ memmove(RR(rr)->wire, rdata, rdatalen);
+ return 1;
+}
+
+/** add an RR to an RRset. */
+static int
+rrset_add_rr(udb_base* udb, udb_ptr* rrset, uint16_t t, uint16_t k,
+ uint32_t ttl, uint8_t* rdata, size_t rdatalen)
+{
+ udb_ptr rr;
+ assert(udb_ptr_get_type(rrset) == udb_chunk_type_rrset);
+ /* create it */
+ if(!rr_create(udb, t, k, ttl, rdata, rdatalen, &rr))
+ return 0;
+
+ /* add at end, to preserve order of RRs */
+ if(RRSET(rrset)->rrs.data == 0) {
+ udb_rptr_set_ptr(&RRSET(rrset)->rrs, udb, &rr);
+ } else {
+ udb_ptr lastrr;
+ udb_ptr_new(&lastrr, udb, &RRSET(rrset)->rrs);
+ while(RR(&lastrr)->next.data)
+ udb_ptr_set_rptr(&lastrr, udb, &RR(&lastrr)->next);
+ udb_rptr_set_ptr(&RR(&lastrr)->next, udb, &rr);
+ udb_ptr_unlink(&lastrr, udb);
+ }
+ udb_ptr_unlink(&rr, udb);
+ return 1;
+}
+
+/** remove an RR from an RRset. return 0 if RR did not exist. */
+static int
+rrset_del_rr(udb_base* udb, udb_ptr* rrset, uint16_t t, uint16_t k,
+ uint8_t* rdata, size_t rdatalen)
+{
+ udb_ptr p, prev;
+ assert(udb_ptr_get_type(rrset) == udb_chunk_type_rrset);
+ udb_ptr_new(&p, udb, &RRSET(rrset)->rrs);
+ udb_ptr_init(&prev, udb);
+ while(p.data) {
+ if(rr_match(&p, t, k, rdata, rdatalen)) {
+ /* remove it */
+ if(prev.data == 0) {
+ /* first in list */
+ udb_rptr_set_rptr(&RRSET(rrset)->rrs, udb,
+ &RR(&p)->next);
+ } else {
+ udb_rptr_set_rptr(&RR(&prev)->next, udb,
+ &RR(&p)->next);
+ }
+ udb_ptr_unlink(&prev, udb);
+ rr_delete(udb, &p);
+ return 1;
+ }
+ udb_ptr_set_ptr(&prev, udb, &p);
+ udb_ptr_set_rptr(&p, udb, &RR(&p)->next);
+ }
+ /* not found */
+ udb_ptr_unlink(&prev, udb);
+ udb_ptr_unlink(&p, udb);
+ return 0;
+}
+
+int
+udb_zone_add_rr(udb_base* udb, udb_ptr* zone, const uint8_t* nm, size_t nmlen,
+ uint16_t t, uint16_t k, uint32_t ttl, uint8_t* rdata, size_t rdatalen)
+{
+ udb_ptr domain, rrset, rr;
+ int created_rrset = 0;
+ assert(udb_ptr_get_type(zone) == udb_chunk_type_zone);
+
+ /* find or create domain */
+ if(!domain_find_or_create(udb, zone, nm, nmlen, &domain)) {
+ return 0;
+ }
+ /* find or create rrset(type) */
+ if(!rrset_find_or_create(udb, &domain, t, &rrset)) {
+ goto exit_clean_domain;
+ }
+ if(RRSET(&rrset)->rrs.data == 0)
+ created_rrset = 1;
+ /* test for duplicate RRs */
+ if(rr_search(udb, &rrset, t, k, rdata, rdatalen, &rr)) {
+ udb_ptr_unlink(&rr, udb);
+ goto exit_clean_domain_rrset;
+ }
+ /* add RR to rrset */
+ if(!rrset_add_rr(udb, &rrset, t, k, ttl, rdata, rdatalen)) {
+ exit_clean_domain_rrset:
+ /* if rrset was created, remove it */
+ if(RRSET(&rrset)->rrs.data == 0) {
+ udb_ptr_zero(&rrset, udb);
+ domain_remove_rrset(udb, &domain, t);
+ }
+ udb_ptr_unlink(&rrset, udb);
+ exit_clean_domain:
+ /* if domain created, delete it */
+ if(DOMAIN(&domain)->rrsets.data == 0)
+ domain_delete_unlink(udb, zone, &domain);
+ udb_ptr_unlink(&domain, udb);
+ return 0;
+ }
+ /* success, account changes */
+ if(created_rrset)
+ ZONE(zone)->rrset_count ++;
+ ZONE(zone)->rr_count ++;
+#ifdef NSEC3
+ if(t == TYPE_NSEC3PARAM && ZONE(zone)->nsec3param.data == 0)
+ zone_hash_nsec3param(udb, zone, &rrset);
+#endif /* NSEC3 */
+ udb_ptr_unlink(&domain, udb);
+ udb_ptr_unlink(&rrset, udb);
+ return 1;
+}
+
+void
+udb_zone_del_rr(udb_base* udb, udb_ptr* zone, const uint8_t* nm, size_t nmlen,
+ uint16_t t, uint16_t k, uint8_t* rdata, size_t rdatalen)
+{
+ udb_ptr domain, rrset;
+ assert(udb_ptr_get_type(zone) == udb_chunk_type_zone);
+ /* find the domain */
+ if(!udb_domain_find(udb, zone, nm, nmlen, &domain))
+ return;
+ /* find the rrset */
+ if(!udb_rrset_find(udb, &domain, t, &rrset)) {
+ udb_ptr_unlink(&domain, udb);
+ return;
+ }
+ /* remove the RR */
+#ifdef NSEC3
+ if(t == TYPE_NSEC3PARAM) {
+ udb_ptr rr;
+ if(rr_search(udb, &rrset, t, k, rdata, rdatalen, &rr)) {
+ if(rr.data == ZONE(zone)->nsec3param.data) {
+ udb_rptr_zero(&ZONE(zone)->nsec3param, udb);
+ }
+ udb_ptr_unlink(&rr, udb);
+ }
+ }
+#endif /* NSEC3 */
+ if(!rrset_del_rr(udb, &rrset, t, k, rdata, rdatalen)) {
+ /* rr did not exist */
+ udb_ptr_unlink(&domain, udb);
+ udb_ptr_unlink(&rrset, udb);
+ return;
+ }
+ ZONE(zone)->rr_count --;
+#ifdef NSEC3
+ if(t == TYPE_NSEC3PARAM && ZONE(zone)->nsec3param.data == 0 &&
+ RRSET(&rrset)->rrs.data != 0) {
+ zone_hash_nsec3param(udb, zone, &rrset);
+ }
+#endif /* NSEC3 */
+ /* see we we can remove the rrset too */
+ if(RRSET(&rrset)->rrs.data == 0) {
+ udb_ptr_zero(&rrset, udb);
+ domain_remove_rrset(udb, &domain, t);
+ ZONE(zone)->rrset_count --;
+ }
+ /* see if we can remove the domain name too */
+ if(DOMAIN(&domain)->rrsets.data == 0) {
+ domain_delete_unlink(udb, zone, &domain);
+ }
+ udb_ptr_unlink(&rrset, udb);
+ udb_ptr_unlink(&domain, udb);
+}
+
+void
+udb_zone_walk_chunk(void* base, void* d, uint64_t s, udb_walk_relptr_cb* cb,
+ void* arg)
+{
+ struct zone_d* p = (struct zone_d*)d;
+ assert(s >= sizeof(struct zone_d)+p->namelen);
+ (void)s;
+ (*cb)(base, &p->node, arg);
+ (*cb)(base, &p->domains, arg);
+ (*cb)(base, &p->nsec3param, arg);
+ (*cb)(base, &p->log_str, arg);
+}
+
+void
+udb_domain_walk_chunk(void* base, void* d, uint64_t s, udb_walk_relptr_cb* cb,
+ void* arg)
+{
+ struct domain_d* p = (struct domain_d*)d;
+ assert(s >= sizeof(struct domain_d)+p->namelen);
+ (void)s;
+ (*cb)(base, &p->node, arg);
+ (*cb)(base, &p->rrsets, arg);
+}
+
+void
+udb_rrset_walk_chunk(void* base, void* d, uint64_t s, udb_walk_relptr_cb* cb,
+ void* arg)
+{
+ struct rrset_d* p = (struct rrset_d*)d;
+ assert(s >= sizeof(struct rrset_d));
+ (void)s;
+ (*cb)(base, &p->next, arg);
+ (*cb)(base, &p->rrs, arg);
+}
+
+void
+udb_rr_walk_chunk(void* base, void* d, uint64_t s, udb_walk_relptr_cb* cb,
+ void* arg)
+{
+ struct rr_d* p = (struct rr_d*)d;
+ assert(s >= sizeof(struct rr_d)+p->len);
+ (void)s;
+ (*cb)(base, &p->next, arg);
+}
+
+void
+udb_task_walk_chunk(void* base, void* d, uint64_t s, udb_walk_relptr_cb* cb,
+ void* arg)
+{
+ struct task_list_d* p = (struct task_list_d*)d;
+ assert(s >= p->size);
+ (void)s;
+ (*cb)(base, &p->next, arg);
+}
+
+void namedb_walkfunc(void* base, void* warg, uint8_t t, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg)
+{
+ (void)warg;
+ switch(t) {
+ case udb_chunk_type_radtree:
+ udb_radix_tree_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_radnode:
+ udb_radix_node_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_radarray:
+ udb_radix_array_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_zone:
+ udb_zone_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_domain:
+ udb_domain_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_rrset:
+ udb_rrset_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_rr:
+ udb_rr_walk_chunk(base, d, s, cb, arg);
+ break;
+ case udb_chunk_type_task:
+ udb_task_walk_chunk(base, d, s, cb, arg);
+ break;
+ default:
+ /* no rel ptrs */
+ break;
+ }
+}
diff --git a/usr.sbin/nsd/udbzone.h b/usr.sbin/nsd/udbzone.h
new file mode 100644
index 00000000000..f1163e4dfe5
--- /dev/null
+++ b/usr.sbin/nsd/udbzone.h
@@ -0,0 +1,147 @@
+/*
+ * udbzone -- store zone and rrset information in udb file.
+ *
+ * Copyright (c) 2011, NLnet Labs. See LICENSE for license.
+ */
+#ifndef UDB_ZONE_H
+#define UDB_ZONE_H
+#include "udb.h"
+#include "dns.h"
+#include "udbradtree.h"
+
+/**
+ * Store the DNS information in udb file on disk.
+ * udb_global
+ * |
+ * v
+ * zonetree -> zone -- zone_name
+ * radtree |
+ * v
+ * domain --> rrset -> rr
+ * radtree list list
+ * |-- name
+ */
+
+/** zone information in the nsd.udb. Name allocated after it. */
+struct zone_d {
+ /** radtree node in the zonetree for this zone */
+ udb_rel_ptr node;
+ /** the radtree for the domain names in the zone */
+ udb_rel_ptr domains;
+ /** the NSEC3PARAM rr used for hashing (or 0), rr_d pointer */
+ udb_rel_ptr nsec3param;
+ /** the log_str for the AXFR change, or 0 */
+ udb_rel_ptr log_str;
+ /** modification time, time when the zone data was changed */
+ uint64_t mtime;
+ /** number of RRsets in the zone */
+ uint64_t rrset_count;
+ /** number of RRs in the zone */
+ uint64_t rr_count;
+ /** the length of the zone name */
+ udb_radstrlen_t namelen;
+ /** if the zone is expired */
+ uint8_t expired;
+ /** if the zone has been changed by AXFR */
+ uint8_t is_changed;
+ /** the zone (wire uncompressed) name in DNS format */
+ uint8_t name[0];
+};
+
+/** domain name in the nametree. name allocated after it */
+struct domain_d {
+ /** radtree node in the nametree for this domain */
+ udb_rel_ptr node;
+ /** the list of rrsets for this name, single linked */
+ udb_rel_ptr rrsets;
+ /** length of the domain name */
+ udb_radstrlen_t namelen;
+ /** the domain (wire uncompressed) name in DNS format */
+ uint8_t name[0];
+};
+
+/** rrset information. */
+struct rrset_d {
+ /** next in rrset list */
+ udb_rel_ptr next;
+ /** the singly linked list of rrs for this rrset */
+ udb_rel_ptr rrs;
+ /** type of the RRs in this rrset (host order) */
+ uint16_t type;
+};
+
+/** rr information; wireformat data allocated after it */
+struct rr_d {
+ /** next in rr list */
+ udb_rel_ptr next;
+ /** type (host order) */
+ uint16_t type;
+ /** class (host order) */
+ uint16_t klass;
+ /** ttl (host order) */
+ uint32_t ttl;
+ /** length of wireformat */
+ uint16_t len;
+ /** wireformat of rdata (without rdatalen) */
+ uint8_t wire[0];
+};
+
+/** init an udb for use as DNS store */
+int udb_dns_init_file(udb_base* udb);
+/** de-init an udb for use as DNS store */
+void udb_dns_deinit_file(udb_base* udb);
+
+/** create a zone */
+int udb_zone_create(udb_base* udb, udb_ptr* result, const uint8_t* dname,
+ size_t dlen);
+/** clear all RRsets from a zone */
+void udb_zone_clear(udb_base* udb, udb_ptr* zone);
+/** delete a zone */
+void udb_zone_delete(udb_base* udb, udb_ptr* zone);
+/** find a zone by name (exact match) */
+int udb_zone_search(udb_base* udb, udb_ptr* result, const uint8_t* dname,
+ size_t dlen);
+/** get modification time for zone or 0 */
+uint64_t udb_zone_get_mtime(udb_base* udb, const uint8_t* dname, size_t dlen);
+/** set log str in udb, or remove it */
+void udb_zone_set_log_str(udb_base* udb, udb_ptr* zone, const char* str);
+/** find a domain name in the zone domain tree */
+int udb_domain_find(udb_base* udb, udb_ptr* zone, const uint8_t* nm,
+ size_t nmlen, udb_ptr* result);
+/** find rrset in domain */
+int udb_rrset_find(udb_base* udb, udb_ptr* domain, uint16_t t, udb_ptr* res);
+
+/** add an RR to a zone */
+int udb_zone_add_rr(udb_base* udb, udb_ptr* zone, const uint8_t* nm,
+ size_t nmlen, uint16_t t, uint16_t k, uint32_t ttl, uint8_t* rdata,
+ size_t rdatalen);
+/** del an RR from a zone */
+void udb_zone_del_rr(udb_base* udb, udb_ptr* zone, const uint8_t* nm,
+ size_t nmlen, uint16_t t, uint16_t k, uint8_t* rdata, size_t rdatalen);
+
+/** get pretty string for nsec3parameters (static buffer returned) */
+const char* udb_nsec3param_string(udb_ptr* rr);
+
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_zone */
+void udb_zone_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_domain */
+void udb_domain_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_rrset */
+void udb_rrset_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+/** for use in udb-walkfunc, walks relptrs in udb_chunk_type_rr */
+void udb_rr_walk_chunk(void* base, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+
+/** walk through relptrs in registered types */
+void namedb_walkfunc(void* base, void* warg, uint8_t t, void* d, uint64_t s,
+ udb_walk_relptr_cb* cb, void* arg);
+
+#define ZONE(ptr) ((struct zone_d*)UDB_PTR(ptr))
+#define DOMAIN(ptr) ((struct domain_d*)UDB_PTR(ptr))
+#define RRSET(ptr) ((struct rrset_d*)UDB_PTR(ptr))
+#define RR(ptr) ((struct rr_d*)UDB_PTR(ptr))
+
+#endif /* UDB_ZONE_H */
diff --git a/usr.sbin/nsd/xfrd-disk.h b/usr.sbin/nsd/xfrd-disk.h
index 42db1993180..2c8e23fc752 100644
--- a/usr.sbin/nsd/xfrd-disk.h
+++ b/usr.sbin/nsd/xfrd-disk.h
@@ -1,7 +1,7 @@
/*
* xfrd-disk.h - XFR (transfer) Daemon TCP system header file. Save/Load state to disk.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -10,8 +10,8 @@
#ifndef XFRD_DISK_H
#define XFRD_DISK_H
-#include "config.h"
struct xfrd_state;
+struct nsd;
/* magic string to identify xfrd state file */
#define XFRD_FILE_MAGIC "NSDXFRD1"
@@ -21,4 +21,13 @@ void xfrd_read_state(struct xfrd_state* xfrd);
/* write xfrd zone state if possible */
void xfrd_write_state(struct xfrd_state* xfrd);
+/* create temp directory */
+void xfrd_make_tempdir(struct nsd* nsd);
+/* rmdir temp directory */
+void xfrd_del_tempdir(struct nsd* nsd);
+/* open temp file, makes directory if needed */
+FILE* xfrd_open_xfrfile(struct nsd* nsd, uint64_t number, char* mode);
+/* unlink temp file */
+void xfrd_unlink_xfrfile(struct nsd* nsd, uint64_t number);
+
#endif /* XFRD_DISK_H */
diff --git a/usr.sbin/nsd/xfrd-notify.c b/usr.sbin/nsd/xfrd-notify.c
index 0aa5c2c6cd7..6fb8e00e1e8 100644
--- a/usr.sbin/nsd/xfrd-notify.c
+++ b/usr.sbin/nsd/xfrd-notify.c
@@ -1,7 +1,7 @@
/*
* xfrd-notify.c - notify sending routines
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -11,7 +11,7 @@
#include <assert.h>
#include <string.h>
#include <unistd.h>
-
+#include <errno.h>
#include "xfrd-notify.h"
#include "xfrd.h"
#include "xfrd-tcp.h"
@@ -22,8 +22,6 @@
/* start sending notifies */
static void notify_enable(struct notify_zone_t* zone,
struct xfrd_soa* new_soa);
-/* stop sending notifies */
-static void notify_disable(struct notify_zone_t* zone);
/* setup the notify active state */
static void setup_notify_active(struct notify_zone_t* zone);
@@ -31,21 +29,29 @@ static void setup_notify_active(struct notify_zone_t* zone);
static int xfrd_handle_notify_reply(struct notify_zone_t* zone, buffer_type* packet);
/* handle zone notify send */
-static void xfrd_handle_notify_send(netio_type *netio,
- netio_handler_type *handler, netio_event_types_type event_types);
+static void xfrd_handle_notify_send(int fd, short event, void* arg);
static void xfrd_notify_next(struct notify_zone_t* zone);
static void xfrd_notify_send_udp(struct notify_zone_t* zone, buffer_type* packet);
static void
+notify_send_disable(struct notify_zone_t* zone)
+{
+ zone->notify_send_enable = 0;
+ event_del(&zone->notify_send_handler);
+ if(zone->notify_send_handler.ev_fd != -1) {
+ close(zone->notify_send_handler.ev_fd);
+ }
+}
+
+void
notify_disable(struct notify_zone_t* zone)
{
zone->notify_current = 0;
- zone->notify_send_handler.timeout = NULL;
- if(zone->notify_send_handler.fd != -1) {
- close(zone->notify_send_handler.fd);
- zone->notify_send_handler.fd = -1;
+ /* if added, then remove */
+ if(zone->notify_send_enable) {
+ notify_send_disable(zone);
}
if(xfrd->notify_udp_num == XFRD_MAX_UDP_NOTIFY) {
@@ -56,6 +62,8 @@ notify_disable(struct notify_zone_t* zone)
assert(wz->is_waiting);
wz->is_waiting = 0;
xfrd->notify_waiting_first = wz->waiting_next;
+ if(wz->waiting_next)
+ wz->waiting_next->waiting_prev = NULL;
if(xfrd->notify_waiting_last == wz)
xfrd->notify_waiting_last = NULL;
/* see if this zone needs notify sending */
@@ -72,13 +80,12 @@ notify_disable(struct notify_zone_t* zone)
}
void
-init_notify_send(rbtree_t* tree, netio_type* netio, region_type* region,
- const dname_type* apex, zone_options_t* options, zone_type* dbzone)
+init_notify_send(rbtree_t* tree, region_type* region, zone_options_t* options)
{
struct notify_zone_t* not = (struct notify_zone_t*)
region_alloc(region, sizeof(struct notify_zone_t));
memset(not, 0, sizeof(struct notify_zone_t));
- not->apex = apex;
+ not->apex = options->node.key;
not->apex_str = options->name;
not->node.key = not->apex;
not->options = options;
@@ -87,23 +94,49 @@ init_notify_send(rbtree_t* tree, netio_type* netio, region_type* region,
not->current_soa = (struct xfrd_soa*)region_alloc(region,
sizeof(struct xfrd_soa));
memset(not->current_soa, 0, sizeof(struct xfrd_soa));
- if(dbzone && dbzone->soa_rrset && dbzone->soa_rrset->rrs) {
- xfrd_copy_soa(not->current_soa, dbzone->soa_rrset->rrs);
- }
not->is_waiting = 0;
- not->notify_send_handler.fd = -1;
- not->notify_send_handler.timeout = 0;
- not->notify_send_handler.user_data = not;
- not->notify_send_handler.event_types =
- NETIO_EVENT_READ|NETIO_EVENT_TIMEOUT;
- not->notify_send_handler.event_handler = xfrd_handle_notify_send;
- netio_add_handler(netio, &not->notify_send_handler);
- tsig_create_record_custom(&not->notify_tsig, region, 0, 0, 4);
+
+ not->notify_send_enable = 0;
+ tsig_create_record_custom(&not->notify_tsig, NULL, 0, 0, 4);
not->notify_current = 0;
rbtree_insert(tree, (rbnode_t*)not);
}
+void
+xfrd_del_notify(xfrd_state_t* xfrd, const dname_type* dname)
+{
+ /* find it */
+ struct notify_zone_t* not = (struct notify_zone_t*)rbtree_delete(
+ xfrd->notify_zones, dname);
+ if(!not)
+ return;
+
+ /* waiting list */
+ if(not->is_waiting) {
+ if(not->waiting_prev)
+ not->waiting_prev->waiting_next = not->waiting_next;
+ else xfrd->notify_waiting_first = not->waiting_next;
+ if(not->waiting_next)
+ not->waiting_next->waiting_prev = not->waiting_prev;
+ else xfrd->notify_waiting_last = not->waiting_prev;
+ not->is_waiting = 0;
+ }
+
+ /* event */
+ if(not->notify_send_enable) {
+ notify_disable(not);
+ }
+
+ /* del tsig */
+ tsig_delete_record(&not->notify_tsig, NULL);
+
+ /* free it */
+ region_recycle(xfrd->region, not->current_soa, sizeof(xfrd_soa_t));
+ /* the apex is recycled when the zone_options.node.key is removed */
+ region_recycle(xfrd->region, not, sizeof(*not));
+}
+
static int
xfrd_handle_notify_reply(struct notify_zone_t* zone, buffer_type* packet)
{
@@ -151,13 +184,15 @@ xfrd_notify_next(struct notify_zone_t* zone)
static void
xfrd_notify_send_udp(struct notify_zone_t* zone, buffer_type* packet)
{
- if(zone->notify_send_handler.fd != -1)
- close(zone->notify_send_handler.fd);
- zone->notify_send_handler.fd = -1;
+ int fd;
+ if(zone->notify_send_enable) {
+ notify_send_disable(zone);
+ }
/* Set timeout for next reply */
- zone->notify_timeout.tv_sec = xfrd_time() + XFRD_NOTIFY_RETRY_TIMOUT;
+ zone->notify_timeout.tv_sec = XFRD_NOTIFY_RETRY_TIMOUT;
/* send NOTIFY to secondary. */
- xfrd_setup_packet(packet, TYPE_SOA, CLASS_IN, zone->apex);
+ xfrd_setup_packet(packet, TYPE_SOA, CLASS_IN, zone->apex,
+ qid_generate());
zone->notify_query_id = ID(packet);
OPCODE_SET(packet, OPCODE_NOTIFY);
AA_SET(packet);
@@ -170,41 +205,53 @@ xfrd_notify_send_udp(struct notify_zone_t* zone, buffer_type* packet)
xfrd_tsig_sign_request(packet, &zone->notify_tsig, zone->notify_current);
}
buffer_flip(packet);
- zone->notify_send_handler.fd = xfrd_send_udp(zone->notify_current,
- packet, zone->options->outgoing_interface);
- if(zone->notify_send_handler.fd == -1) {
+ fd = xfrd_send_udp(zone->notify_current, packet,
+ zone->options->pattern->outgoing_interface);
+ if(fd == -1) {
log_msg(LOG_ERR, "xfrd: zone %s: could not send notify #%d to %s",
zone->apex_str, zone->notify_retry,
zone->notify_current->ip_address_spec);
+ event_set(&zone->notify_send_handler, -1, EV_TIMEOUT,
+ xfrd_handle_notify_send, zone);
+ if(event_base_set(xfrd->event_base, &zone->notify_send_handler) != 0)
+ log_msg(LOG_ERR, "notify_send: event_base_set failed");
+ if(evtimer_add(&zone->notify_send_handler, &zone->notify_timeout) != 0)
+ log_msg(LOG_ERR, "notify_send: evtimer_add failed");
+ zone->notify_send_enable = 1;
return;
}
+ event_set(&zone->notify_send_handler, fd, EV_READ | EV_TIMEOUT,
+ xfrd_handle_notify_send, zone);
+ if(event_base_set(xfrd->event_base, &zone->notify_send_handler) != 0)
+ log_msg(LOG_ERR, "notify_send: event_base_set failed");
+ if(event_add(&zone->notify_send_handler, &zone->notify_timeout) != 0)
+ log_msg(LOG_ERR, "notify_send: evtimer_add failed");
+ zone->notify_send_enable = 1;
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s: sent notify #%d to %s",
zone->apex_str, zone->notify_retry,
zone->notify_current->ip_address_spec));
}
static void
-xfrd_handle_notify_send(netio_type* ATTR_UNUSED(netio),
- netio_handler_type *handler, netio_event_types_type event_types)
+xfrd_handle_notify_send(int fd, short event, void* arg)
{
- struct notify_zone_t* zone = (struct notify_zone_t*)handler->user_data;
+ struct notify_zone_t* zone = (struct notify_zone_t*)arg;
buffer_type* packet = xfrd_get_temp_buffer();
assert(zone->notify_current);
if(zone->is_waiting) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO,
"xfrd: notify waiting, skipped, %s", zone->apex_str));
- assert(zone->notify_send_handler.fd == -1);
return;
}
- if(event_types & NETIO_EVENT_READ) {
+ if((event & EV_READ)) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO,
"xfrd: zone %s: read notify ACK", zone->apex_str));
- assert(handler->fd != -1);
- if(xfrd_udp_read_packet(packet, zone->notify_send_handler.fd)) {
+ assert(fd != -1);
+ if(xfrd_udp_read_packet(packet, fd)) {
if(xfrd_handle_notify_reply(zone, packet))
xfrd_notify_next(zone);
}
- } else if(event_types & NETIO_EVENT_TIMEOUT) {
+ } else if((event & EV_TIMEOUT)) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s: notify timeout",
zone->apex_str));
/* timeout, try again */
@@ -212,7 +259,7 @@ xfrd_handle_notify_send(netio_type* ATTR_UNUSED(netio),
/* see if notify is still enabled */
if(zone->notify_current) {
zone->notify_retry++;
- if(zone->notify_retry > zone->options->notify_retry) {
+ if(zone->notify_retry > zone->options->pattern->notify_retry) {
log_msg(LOG_ERR, "xfrd: zone %s: max notify send count reached, %s unreachable",
zone->apex_str, zone->notify_current->ip_address_spec);
xfrd_notify_next(zone);
@@ -228,16 +275,25 @@ static void
setup_notify_active(struct notify_zone_t* zone)
{
zone->notify_retry = 0;
- zone->notify_current = zone->options->notify;
- zone->notify_send_handler.timeout = &zone->notify_timeout;
- zone->notify_timeout.tv_sec = xfrd_time();
- zone->notify_timeout.tv_nsec = 0;
+ zone->notify_current = zone->options->pattern->notify;
+ zone->notify_timeout.tv_sec = 0;
+ zone->notify_timeout.tv_usec = 0;
+
+ if(zone->notify_send_enable)
+ notify_send_disable(zone);
+ event_set(&zone->notify_send_handler, -1, EV_TIMEOUT,
+ xfrd_handle_notify_send, zone);
+ if(event_base_set(xfrd->event_base, &zone->notify_send_handler) != 0)
+ log_msg(LOG_ERR, "notifysend: event_base_set failed");
+ if(evtimer_add(&zone->notify_send_handler, &zone->notify_timeout) != 0)
+ log_msg(LOG_ERR, "notifysend: evtimer_add failed");
+ zone->notify_send_enable = 1;
}
static void
notify_enable(struct notify_zone_t* zone, struct xfrd_soa* new_soa)
{
- if(!zone->options->notify) {
+ if(!zone->options->pattern->notify) {
return; /* no notify acl, nothing to do */
}
@@ -254,27 +310,37 @@ notify_enable(struct notify_zone_t* zone, struct xfrd_soa* new_soa)
return;
}
/* put it in waiting list */
- zone->notify_current = zone->options->notify;
+ zone->notify_current = zone->options->pattern->notify;
zone->is_waiting = 1;
zone->waiting_next = NULL;
+ zone->waiting_prev = xfrd->notify_waiting_last;
if(xfrd->notify_waiting_last) {
xfrd->notify_waiting_last->waiting_next = zone;
} else {
xfrd->notify_waiting_first = zone;
}
xfrd->notify_waiting_last = zone;
- zone->notify_send_handler.timeout = NULL;
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s: notify on waiting list.",
zone->apex_str));
}
void
+xfrd_notify_start(struct notify_zone_t* zone)
+{
+ if(zone->is_waiting || zone->notify_send_enable)
+ return;
+ notify_enable(zone, NULL);
+}
+
+void
xfrd_send_notify(rbtree_t* tree, const dname_type* apex, struct xfrd_soa* new_soa)
{
/* lookup the zone */
struct notify_zone_t* zone = (struct notify_zone_t*)
rbtree_search(tree, apex);
assert(zone);
+ if(zone->notify_send_enable)
+ notify_disable(zone);
notify_enable(zone, new_soa);
}
@@ -286,24 +352,24 @@ notify_handle_master_zone_soainfo(rbtree_t* tree,
/* lookup the zone */
struct notify_zone_t* zone = (struct notify_zone_t*)
rbtree_search(tree, apex);
- assert(zone);
+ if(!zone) return; /* got SOAINFO but zone was deleted meanwhile */
/* check if SOA changed */
if( (new_soa == NULL && zone->current_soa->serial == 0) ||
(new_soa && new_soa->serial == zone->current_soa->serial))
return;
-
+ if(zone->notify_send_enable)
+ notify_disable(zone);
notify_enable(zone, new_soa);
}
-void close_notify_fds(rbtree_t* tree)
+void
+close_notify_fds(rbtree_t* tree)
{
struct notify_zone_t* zone;
RBTREE_FOR(zone, struct notify_zone_t*, tree)
{
- if(zone->notify_send_handler.fd != -1) {
- close(zone->notify_send_handler.fd);
- zone->notify_send_handler.fd = -1;
- }
+ if(zone->notify_send_enable)
+ notify_send_disable(zone);
}
}
diff --git a/usr.sbin/nsd/xfrd-notify.h b/usr.sbin/nsd/xfrd-notify.h
index 242c7e763ca..4f084d302e7 100644
--- a/usr.sbin/nsd/xfrd-notify.h
+++ b/usr.sbin/nsd/xfrd-notify.h
@@ -1,7 +1,7 @@
/*
* xfrd-notify.h - notify sending routines.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -10,9 +10,12 @@
#ifndef XFRD_NOTIFY_H
#define XFRD_NOTIFY_H
-#include "config.h"
+#ifndef USE_MINI_EVENT
+#include <event.h>
+#else
+#include "mini_event.h"
+#endif
#include "tsig.h"
-#include "netio.h"
#include "rbtree.h"
struct nsd;
@@ -22,6 +25,7 @@ struct zone_options;
struct zone;
struct xfrd_soa;
struct acl_options;
+struct xfrd_state;
/**
* This struct keeps track of outbound notifies for a zone.
@@ -38,26 +42,32 @@ struct notify_zone_t {
/* notify sending handler */
/* Not saved on disk (i.e. kill of daemon stops notifies) */
- netio_handler_type notify_send_handler;
- struct timespec notify_timeout;
+ int notify_send_enable;
+ struct event notify_send_handler;
+ struct timeval notify_timeout;
struct acl_options* notify_current; /* current slave to notify */
+ uint8_t notify_restart; /* restart notify after repattern */
uint8_t notify_retry; /* how manieth retry in sending to current */
uint16_t notify_query_id;
/* is this notify waiting for a socket? */
uint8_t is_waiting;
- /* next in the waiting list for the udp sockets */
+ /* the double linked waiting list for the udp sockets */
struct notify_zone_t* waiting_next;
+ struct notify_zone_t* waiting_prev;
};
/* initialise outgoing notifies */
-void init_notify_send(rbtree_t* tree, netio_type* netio, region_type* region,
- const dname_type* apex, struct zone_options* options,
- struct zone* dbzone);
+void init_notify_send(rbtree_t* tree, region_type* region,
+ struct zone_options* options);
+/* delete notify zone */
+void xfrd_del_notify(struct xfrd_state* xfrd, const dname_type* dname);
/* send notifications to all in the notify list */
void xfrd_send_notify(rbtree_t* tree, const struct dname* apex,
struct xfrd_soa* new_soa);
+/* start notifications, if not started already (does not clobber SOA) */
+void xfrd_notify_start(struct notify_zone_t* zone);
/* handle soa update notify for a master zone. newsoa can be NULL.
Makes sure that the soa (serial) has changed. Or drops notify. */
@@ -66,5 +76,7 @@ void notify_handle_master_zone_soainfo(rbtree_t* tree,
/* close fds in use for notification sending */
void close_notify_fds(rbtree_t* tree);
+/* stop send of notify */
+void notify_disable(struct notify_zone_t* zone);
#endif /* XFRD_NOTIFY_H */
diff --git a/usr.sbin/nsd/xfrd-tcp.h b/usr.sbin/nsd/xfrd-tcp.h
index e42e1a4456a..ac3f9dc5aab 100644
--- a/usr.sbin/nsd/xfrd-tcp.h
+++ b/usr.sbin/nsd/xfrd-tcp.h
@@ -1,7 +1,7 @@
/*
* xfrd-tcp.h - XFR (transfer) Daemon TCP system header file. Manages tcp conn.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -10,7 +10,6 @@
#ifndef XFRD_TCP_H
#define XFRD_TCP_H
-#include "config.h"
#include "xfrd.h"
struct buffer;
@@ -21,6 +20,7 @@ struct region;
struct dname;
struct acl_options;
+struct xfrd_tcp_pipeline;
typedef struct xfrd_tcp xfrd_tcp_t;
typedef struct xfrd_tcp_set xfrd_tcp_set_t;
/*
@@ -28,12 +28,14 @@ typedef struct xfrd_tcp_set xfrd_tcp_set_t;
*/
struct xfrd_tcp_set {
/* tcp connections, each has packet and read/wr state */
- struct xfrd_tcp *tcp_state[XFRD_MAX_TCP];
+ struct xfrd_tcp_pipeline *tcp_state[XFRD_MAX_TCP];
/* number of TCP connections in use. */
int tcp_count;
/* TCP timeout. */
int tcp_timeout;
- /* linked list of zones waiting for a TCP connection */
+ /* rbtree with pipelines sorted by master */
+ rbtree_t* pipetree;
+ /* double linked list of zones waiting for a TCP connection */
struct xfrd_zone *tcp_waiting_first, *tcp_waiting_last;
};
@@ -61,27 +63,91 @@ struct xfrd_tcp {
struct buffer* packet;
};
+/* use illegal pointer value to denote skipped ID number.
+ * if this does not work, we can allocate with malloc */
+#define TCP_NULL_SKIP ((struct xfrd_zone*)-1)
+/* the number of ID values (16 bits) for a pipeline */
+#define ID_PIPE_NUM 65536
+
+/**
+ * Structure to keep track of a pipelined set of queries on
+ * an open tcp connection. The queries may be answered with
+ * interleaved answer packets, the ID number disambiguates.
+ * Sorted by the master IP address so you can use lookup with
+ * smaller-or-equal to find the tcp connection most suitable.
+ */
+struct xfrd_tcp_pipeline {
+ /* the rbtree node, sorted by IP and nr of unused queries */
+ rbnode_t node;
+ /* destination IP address */
+#ifdef INET6
+ struct sockaddr_storage ip;
+#else
+ struct sockaddr_in ip;
+#endif /* INET6 */
+ socklen_t ip_len;
+ /* number of unused IDs. used IDs are waiting to send their query,
+ * or have been sent but not not all answer packets have been received.
+ * Sorted by num_unused, so a lookup smaller-equal for 65536 finds the
+ * connection to that master that has the most free IDs. */
+ int num_unused;
+ /* number of skip-set IDs (these are 'in-use') */
+ int num_skip;
+
+ int handler_added;
+ /* the event handler for this pipe (it'll disambiguate by ID) */
+ struct event handler;
+
+ /* the tcp connection to use for reading */
+ xfrd_tcp_t* tcp_r;
+ /* the tcp connection to use for writing, if it is done successfully,
+ * then the first zone from the sendlist can be removed. */
+ xfrd_tcp_t* tcp_w;
+ /* once a byte has been written, handshake complete */
+ int connection_established;
+
+ /* list of queries that want to send, first to get write event,
+ * if NULL, no write event interest */
+ struct xfrd_zone* tcp_send_first, *tcp_send_last;
+ /* the unused and id arrays must be last in the structure */
+ /* per-ID number the queries that have this ID number, every
+ * query owns one ID numbers (until it is done). NULL: unused
+ * When a query is done but not all answer-packets have been
+ * consumed for that ID number, the rest is skipped, this
+ * is denoted with the pointer-value TCP_NULL_SKIP, the ids that
+ * are skipped are not on the unused list. They may be
+ * removed once the last answer packet is skipped.
+ * ID_PIPE_NUM-num_unused values in the id array are nonNULL (either
+ * a zone pointer or SKIP) */
+ struct xfrd_zone* id[ID_PIPE_NUM];
+ /* unused ID numbers; the first part of the array contains the IDs */
+ uint16_t unused[ID_PIPE_NUM];
+};
+
/* create set of tcp connections */
xfrd_tcp_set_t* xfrd_tcp_set_create(struct region* region);
/* init tcp state */
-xfrd_tcp_t* xfrd_tcp_create(struct region* region);
+xfrd_tcp_t* xfrd_tcp_create(struct region* region, size_t bufsize);
/* obtain tcp connection for a zone (or wait) */
void xfrd_tcp_obtain(xfrd_tcp_set_t* set, struct xfrd_zone* zone);
/* release tcp connection for a zone (starts waiting) */
void xfrd_tcp_release(xfrd_tcp_set_t* set, struct xfrd_zone* zone);
+/* release tcp pipe entirely (does not stop the zones inside it) */
+void xfrd_tcp_pipe_release(xfrd_tcp_set_t* set, struct xfrd_tcp_pipeline* tp,
+ int conn);
/* use tcp connection to start xfr */
-void xfrd_tcp_xfr(xfrd_tcp_set_t* set, struct xfrd_zone* zone);
+void xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp,
+ struct xfrd_zone* zone);
/* initialize tcp_state for a zone. Opens the connection. true on success.*/
-int xfrd_tcp_open(xfrd_tcp_set_t* set, struct xfrd_zone* zone);
+int xfrd_tcp_open(xfrd_tcp_set_t* set, struct xfrd_tcp_pipeline* tp, struct xfrd_zone* zone);
/* read data from tcp, maybe partial read */
-void xfrd_tcp_read(xfrd_tcp_set_t* set, struct xfrd_zone* zone);
+void xfrd_tcp_read(struct xfrd_tcp_pipeline* tp);
/* write data to tcp, maybe a partial write */
-void xfrd_tcp_write(xfrd_tcp_set_t* set, struct xfrd_zone* zone);
+void xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, struct xfrd_zone* zone);
+/* handle tcp pipe events */
+void xfrd_handle_tcp_pipe(int fd, short event, void* arg);
-/* see if the tcp connection is in the reading stage (else writin) */
-static inline int xfrd_tcp_is_reading(xfrd_tcp_set_t* set, int conn)
-{return set->tcp_state[conn]->is_reading;}
/*
* Read from a stream connection (size16)+packet into buffer.
* returns value is
@@ -103,7 +169,7 @@ int conn_write(xfrd_tcp_t* conn);
/* setup DNS packet for a query of this type */
void xfrd_setup_packet(struct buffer* packet,
- uint16_t type, uint16_t klass, const struct dname* dname);
+ uint16_t type, uint16_t klass, const struct dname* dname, uint16_t qid);
/* write soa in network format to the packet buffer */
void xfrd_write_soa_buffer(struct buffer* packet,
const struct dname* apex, struct xfrd_soa* soa);
@@ -122,4 +188,7 @@ socklen_t xfrd_acl_sockaddr_frm(struct acl_options* acl,
struct sockaddr_in *frm);
#endif /* INET6 */
+/* create pipeline tcp structure */
+struct xfrd_tcp_pipeline* xfrd_tcp_pipeline_create(region_type* region);
+
#endif /* XFRD_TCP_H */
diff --git a/usr.sbin/nsd/xfrd.h b/usr.sbin/nsd/xfrd.h
index e4d6a278259..b71f8c54f5e 100644
--- a/usr.sbin/nsd/xfrd.h
+++ b/usr.sbin/nsd/xfrd.h
@@ -1,7 +1,7 @@
/*
* xfrd.h - XFR (transfer) Daemon header file. Coordinates SOA updates.
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
*
* See LICENSE for the license.
*
@@ -10,8 +10,11 @@
#ifndef XFRD_H
#define XFRD_H
-#include "config.h"
-#include "netio.h"
+#ifndef USE_MINI_EVENT
+#include <event.h>
+#else
+#include "mini_event.h"
+#endif
#include "rbtree.h"
#include "namedb.h"
#include "options.h"
@@ -24,6 +27,7 @@ struct buffer;
struct xfrd_tcp;
struct xfrd_tcp_set;
struct notify_zone_t;
+struct udb_ptr;
typedef struct xfrd_state xfrd_state_t;
typedef struct xfrd_zone xfrd_zone_t;
typedef struct xfrd_soa xfrd_soa_t;
@@ -35,41 +39,47 @@ struct xfrd_state {
/* time when daemon was last started */
time_t xfrd_start_time;
struct region* region;
- netio_type* netio;
+ struct event_base* event_base;
struct nsd* nsd;
struct xfrd_tcp_set* tcp_set;
/* packet buffer for udp packets */
struct buffer* packet;
- /* udp waiting list */
+ /* udp waiting list, double linked list */
struct xfrd_zone *udp_waiting_first, *udp_waiting_last;
/* number of udp sockets (for sending queries) in use */
size_t udp_use_num;
+ /* activated waiting list, double linked list */
+ struct xfrd_zone *activated_first;
/* current time is cached */
uint8_t got_time;
time_t current_time;
+ /* counter for xfr file numbers */
+ uint64_t xfrfilenumber;
+
/* timer for NSD reload */
- struct timespec reload_timeout;
- netio_handler_type reload_handler;
+ struct timeval reload_timeout;
+ struct event reload_handler;
+ int reload_added;
/* last reload must have caught all zone updates before this time */
time_t reload_cmd_last_sent;
uint8_t can_send_reload;
+ pid_t reload_pid;
/* communication channel with server_main */
- netio_handler_type ipc_handler;
- uint8_t ipc_is_soa;
- uint8_t parent_soa_info_pass;
+ struct event ipc_handler;
+ int ipc_handler_flags;
struct xfrd_tcp *ipc_conn;
struct buffer* ipc_pass;
/* sending ipc to server_main */
- struct xfrd_tcp *ipc_conn_write;
+ uint8_t need_to_send_shutdown;
uint8_t need_to_send_reload;
+ uint8_t need_to_send_stats;
uint8_t need_to_send_quit;
- uint8_t sending_zone_state;
uint8_t ipc_send_blocked;
- stack_type* dirty_zones; /* stack of xfrd_zone* */
+ struct udb_ptr* last_task;
/* xfrd shutdown flag */
uint8_t shutdown;
@@ -138,10 +148,6 @@ struct xfrd_zone {
xfrd_zone_expired
} state;
- /* if state is dirty it needs to be sent to server_main.
- * it is also on the dirty_stack. Not saved on disk. */
- uint8_t dirty;
-
/* master to try to transfer from, number for persistence */
acl_options_t* master;
int master_num;
@@ -152,8 +158,10 @@ struct xfrd_zone {
int fresh_xfr_timeout;
/* handler for timeouts */
- struct timespec timeout;
- netio_handler_type zone_handler;
+ struct timeval timeout;
+ struct event zone_handler;
+ int zone_handler_flags;
+ int event_added;
/* tcp connection zone is using, or -1 */
int tcp_conn;
@@ -161,10 +169,22 @@ struct xfrd_zone {
uint8_t tcp_waiting;
/* next zone in waiting list */
xfrd_zone_t* tcp_waiting_next;
+ xfrd_zone_t* tcp_waiting_prev;
+ /* zone is in its tcp send queue */
+ uint8_t in_tcp_send;
+ /* next zone in tcp send queue */
+ xfrd_zone_t* tcp_send_next;
+ xfrd_zone_t* tcp_send_prev;
/* zone is waiting for a udp connection (tcp is preferred) */
uint8_t udp_waiting;
/* next zone in waiting list for UDP */
xfrd_zone_t* udp_waiting_next;
+ xfrd_zone_t* udp_waiting_prev;
+ /* zone has been activated to run now (after the other events
+ * but before blocking in select again) */
+ uint8_t is_activated;
+ xfrd_zone_t* activated_next;
+ xfrd_zone_t* activated_prev;
/* xfr message handling data */
/* query id */
@@ -174,6 +194,8 @@ struct xfrd_zone {
size_t msg_rr_count;
uint8_t msg_is_ixfr; /* 1:IXFR detected. 2:middle IXFR SOA seen. */
tsig_record_type tsig; /* tsig state for IXFR/AXFR */
+ uint64_t xfrfilenumber; /* identifier for file to store xfr into,
+ valid if msg_seq_nr nonzero */
};
enum xfrd_packet_result {
@@ -194,18 +216,24 @@ enum xfrd_packet_result {
Note that also some sockets are used for writing the ixfr.db, xfrd.state
files and for the pipes to the main parent process.
*/
-#define XFRD_MAX_TCP 50 /* max number of TCP AXFR/IXFR concurrent connections.*/
+#define XFRD_MAX_TCP 32 /* max number of TCP AXFR/IXFR concurrent connections.*/
/* Each entry has 64Kb buffer preallocated.*/
-#define XFRD_MAX_UDP 100 /* max number of UDP sockets at a time for IXFR */
-#define XFRD_MAX_UDP_NOTIFY 50 /* max concurrent UDP sockets for NOTIFY */
+#define XFRD_MAX_UDP 64 /* max number of UDP sockets at a time for IXFR */
+#define XFRD_MAX_UDP_NOTIFY 64 /* max concurrent UDP sockets for NOTIFY */
extern xfrd_state_t* xfrd;
/* start xfrd, new start. Pass socket to server_main. */
-void xfrd_init(int socket, struct nsd* nsd);
+void xfrd_init(int socket, struct nsd* nsd, int shortsoa, int reload_active);
+
+/* add new slave zone, dname(from zone_opt) and given options */
+void xfrd_init_slave_zone(xfrd_state_t* xfrd, zone_options_t* zone_opt);
+
+/* delete slave zone */
+void xfrd_del_slave_zone(xfrd_state_t* xfrd, const dname_type* dname);
/* get the current time epoch. Cached for speed. */
-time_t xfrd_time();
+time_t xfrd_time(void);
/*
* Handle final received packet from network.
@@ -220,6 +248,8 @@ void xfrd_set_timer(xfrd_zone_t* zone, time_t t);
void xfrd_set_refresh_now(xfrd_zone_t* zone);
/* unset the timer - no more timeouts, for when zone is queued */
void xfrd_unset_timer(xfrd_zone_t* zone);
+/* remove the 'refresh now', remove it from the activated list */
+void xfrd_deactivate_zone(xfrd_zone_t* z);
/*
* Make a new request to next master server.
@@ -249,7 +279,7 @@ void xfrd_udp_release(xfrd_zone_t* zone);
/*
* Get a static buffer for temporary use (to build a packet).
*/
-struct buffer* xfrd_get_temp_buffer();
+struct buffer* xfrd_get_temp_buffer(void);
/*
* TSIG sign outgoing request. Call if acl has a key.
@@ -268,27 +298,41 @@ void xfrd_handle_incoming_soa(xfrd_zone_t* zone, xfrd_soa_t* soa,
void xfrd_handle_passed_packet(buffer_type* packet,
int acl_num, int acl_xfr);
-/* send expiry notify for all zones to nsd (sets all dirty). */
-void xfrd_send_expy_all_zones();
-
/* try to reopen the logfile. */
-void xfrd_reopen_logfile();
+void xfrd_reopen_logfile(void);
+
+/* free namedb for xfrd usage */
+void xfrd_free_namedb(struct nsd* nsd);
/* copy SOA info from rr to soa struct. */
void xfrd_copy_soa(xfrd_soa_t* soa, rr_type* rr);
/* check for failed updates - it is assumed that now the reload has
finished, and all zone SOAs have been sent. */
-void xfrd_check_failed_updates();
+void xfrd_check_failed_updates(void);
/*
* Prepare zones for a reload, this sets the times on the zones to be
* before the current time, so the reload happens after.
*/
-void xfrd_prepare_zones_for_reload();
+void xfrd_prepare_zones_for_reload(void);
/* Bind a local interface to a socket descriptor, return 1 on success */
int xfrd_bind_local_interface(int sockd, acl_options_t* ifc,
acl_options_t* acl, int tcp);
+/* process results and soa info from reload */
+void xfrd_process_task_result(xfrd_state_t* xfrd, struct udb_base* taskudb);
+
+/* set to reload right away (for user controlled reload events) */
+void xfrd_set_reload_now(xfrd_state_t* xfrd);
+
+/* handle incoming notify (soa or NULL) and start zone xfr if necessary */
+void xfrd_handle_notify_and_start_xfr(xfrd_zone_t* zone, xfrd_soa_t* soa);
+
+/* handle zone timeout, event */
+void xfrd_handle_zone(int fd, short event, void* arg);
+
+const char* xfrd_pretty_time(time_t v);
+
#endif /* XFRD_H */
diff --git a/usr.sbin/nsd/zlexer.lex b/usr.sbin/nsd/zlexer.lex
index 666a4ba3135..bcb5661ab80 100644
--- a/usr.sbin/nsd/zlexer.lex
+++ b/usr.sbin/nsd/zlexer.lex
@@ -2,7 +2,7 @@
/*
* zlexer.lex - lexical analyzer for (DNS) zone files
*
- * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
*
* See LICENSE for the license.
*
@@ -19,8 +19,6 @@
#include "dname.h"
#include "zparser.h"
-#define YY_NO_UNPUT
-
#if 0
#define LEXOUT(s) printf s /* used ONLY when debugging */
#else
@@ -68,6 +66,23 @@ pop_parser_state(void)
yy_switch_to_buffer(include_stack[include_stack_ptr]);
}
+static YY_BUFFER_STATE oldstate;
+/* Start string scan */
+void
+parser_push_stringbuf(char* str)
+{
+ oldstate = YY_CURRENT_BUFFER;
+ yy_switch_to_buffer(yy_scan_string(str));
+}
+
+void
+parser_pop_stringbuf(void)
+{
+ yy_delete_buffer(YY_CURRENT_BUFFER);
+ yy_switch_to_buffer(oldstate);
+ oldstate = NULL;
+}
+
#ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
#define yy_set_bol(at_bol) \
{ \
@@ -78,6 +93,16 @@ pop_parser_state(void)
#endif
%}
+%option noinput
+%option nounput
+%{
+#ifndef YY_NO_UNPUT
+#define YY_NO_UNPUT 1
+#endif
+#ifndef YY_NO_INPUT
+#define YY_NO_INPUT 1
+#endif
+%}
SPACE [ \t]
LETTER [a-zA-Z]
@@ -244,6 +269,8 @@ ANY [^\"\n\\]|\\.
<bitlabel><<EOF>> {
zc_error("EOF inside bitlabel");
BEGIN(INITIAL);
+ yyrestart(yyin); /* this is so that lex does not give an internal err */
+ yyterminate();
}
<bitlabel>{BIT}* { yymore(); }
<bitlabel>\n { ++parser->line; yymore(); }
@@ -258,6 +285,8 @@ ANY [^\"\n\\]|\\.
<quotedstring><<EOF>> {
zc_error("EOF inside quoted string");
BEGIN(INITIAL);
+ yyrestart(yyin); /* this is so that lex does not give an internal err */
+ yyterminate();
}
<quotedstring>{ANY}* { LEXOUT(("STR ")); yymore(); }
<quotedstring>\n { ++parser->line; yymore(); }