diff options
Diffstat (limited to 'usr.sbin/relayd')
-rw-r--r-- | usr.sbin/relayd/Makefile | 4 | ||||
-rw-r--r-- | usr.sbin/relayd/carp.c | 209 | ||||
-rw-r--r-- | usr.sbin/relayd/hce.c | 57 | ||||
-rw-r--r-- | usr.sbin/relayd/log.c | 15 | ||||
-rw-r--r-- | usr.sbin/relayd/parse.y | 489 | ||||
-rw-r--r-- | usr.sbin/relayd/pfe.c | 159 | ||||
-rw-r--r-- | usr.sbin/relayd/pfe_filter.c | 74 | ||||
-rw-r--r-- | usr.sbin/relayd/relay.c | 1927 | ||||
-rw-r--r-- | usr.sbin/relayd/relayd.8 | 14 | ||||
-rw-r--r-- | usr.sbin/relayd/relayd.c | 190 | ||||
-rw-r--r-- | usr.sbin/relayd/relayd.conf.5 | 310 | ||||
-rw-r--r-- | usr.sbin/relayd/relayd.h | 245 | ||||
-rw-r--r-- | usr.sbin/relayd/ssl.c | 7 |
13 files changed, 3640 insertions, 60 deletions
diff --git a/usr.sbin/relayd/Makefile b/usr.sbin/relayd/Makefile index 8475f8c2f7f..a86139ad266 100644 --- a/usr.sbin/relayd/Makefile +++ b/usr.sbin/relayd/Makefile @@ -1,9 +1,9 @@ -# $OpenBSD: Makefile,v 1.7 2007/02/03 20:24:21 reyk Exp $ +# $OpenBSD: Makefile,v 1.8 2007/02/22 03:32:39 reyk Exp $ PROG= hoststated SRCS= parse.y log.c control.c buffer.c imsg.c hoststated.c \ ssl.c pfe.c pfe_filter.c hce.c \ - check_icmp.c check_tcp.c + check_icmp.c check_tcp.c relay.c carp.c MAN= hoststated.8 hoststated.conf.5 LDADD= -levent -lssl -lcrypto diff --git a/usr.sbin/relayd/carp.c b/usr.sbin/relayd/carp.c new file mode 100644 index 00000000000..64c1091d1ab --- /dev/null +++ b/usr.sbin/relayd/carp.c @@ -0,0 +1,209 @@ +/* $OpenBSD: carp.c,v 1.1 2007/02/22 03:32:39 reyk Exp $ */ + +/* + * Copyright (c) 2006 Henning Brauer <henning@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/param.h> +#include <net/if.h> + +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <event.h> + +#include <openssl/ssl.h> + +#include "hoststated.h" + +struct carpgroup { + TAILQ_ENTRY(carpgroup) entry; + char *group; + int do_demote; + int changed_by; +}; + +TAILQ_HEAD(carpgroups, carpgroup) carpgroups = + TAILQ_HEAD_INITIALIZER(carpgroups); + +struct carpgroup *carp_group_find(char *group); +int carp_demote_ioctl(char *, int); + +struct carpgroup * +carp_group_find(char *group) +{ + struct carpgroup *c; + + TAILQ_FOREACH(c, &carpgroups, entry) + if (!strcmp(c->group, group)) + return (c); + + return (NULL); +} + +int +carp_demote_init(char *group, int force) +{ + struct carpgroup *c; + int level; + + if ((c = carp_group_find(group)) == NULL) { + if ((c = calloc(1, sizeof(struct carpgroup))) == NULL) { + log_warn("carp_demote_init calloc"); + return (-1); + } + if ((c->group = strdup(group)) == NULL) { + log_warn("carp_demote_init calloc"); + free(c); + return (-1); + } + + /* only demote if this group already is demoted */ + if ((level = carp_demote_get(group)) == -1) + return (-1); + if (level > 0 || force) + c->do_demote = 1; + + TAILQ_INSERT_TAIL(&carpgroups, c, entry); + } + + return (0); +} + +void +carp_demote_shutdown(void) +{ + struct carpgroup *c; + + while ((c = TAILQ_FIRST(&carpgroups)) != NULL) { + TAILQ_REMOVE(&carpgroups, c, entry); + for (; c->changed_by > 0; c->changed_by--) + if (c->do_demote) + carp_demote_ioctl(c->group, -1); + + free(c->group); + free(c); + } +} + +int +carp_demote_get(char *group) +{ + int s; + struct ifgroupreq ifgr; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { + log_warn("carp_demote_get: socket"); + return (-1); + } + + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, group, sizeof(ifgr.ifgr_name)); + + if (ioctl(s, SIOCGIFGATTR, (caddr_t)&ifgr) == -1) { + if (errno == ENOENT) + log_warnx("group \"%s\" does not exist", group); + else + log_warn("carp_demote_get: ioctl"); + close(s); + return (-1); + } + + close(s); + return ((int)ifgr.ifgr_attrib.ifg_carp_demoted); +} + +int +carp_demote_set(char *group, int demote) +{ + struct carpgroup *c; + + if ((c = carp_group_find(group)) == NULL) { + log_warnx("carp_group_find for %s returned NULL?!", group); + return (-1); + } + + if (c->changed_by + demote < 0) { + log_warnx("carp_demote_set: changed_by + demote < 0"); + return (-1); + } + + if (c->do_demote && carp_demote_ioctl(group, demote) == -1) + return (-1); + + c->changed_by += demote; + + /* enable demotion when we return to 0, i. e. all sessions up */ + if (demote < 0 && c->changed_by == 0) + c->do_demote = 1; + + return (0); +} + +int +carp_demote_reset(char *group, int value) +{ + int level; + int demote = 0; + + if (value < 0) { + log_warnx("carp_demote_reset: value < 0"); + return (-1); + } + + if ((level = carp_demote_get(group)) == -1) + return (-1); + if (level == value) + return (0); + + demote -= level; + demote += value; + + if (carp_demote_ioctl(group, demote) == -1) + return (-1); + + return (0); +} + +int +carp_demote_ioctl(char *group, int demote) +{ + int s, res; + struct ifgroupreq ifgr; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { + log_warn("carp_demote_get: socket"); + return (-1); + } + + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, group, sizeof(ifgr.ifgr_name)); + ifgr.ifgr_attrib.ifg_carp_demoted = demote; + + if ((res = ioctl(s, SIOCSIFGATTR, (caddr_t)&ifgr)) == -1) + log_warn("unable to %s the demote state " + "of group '%s'", (demote > 0) ? "increment" : "decrement", + group); + else + log_info("%s the demote state of group '%s'", + (demote > 0) ? "incremented" : "decremented", group); + + close (s); + return (res); +} diff --git a/usr.sbin/relayd/hce.c b/usr.sbin/relayd/hce.c index 4ee1a0135b4..7275da68987 100644 --- a/usr.sbin/relayd/hce.c +++ b/usr.sbin/relayd/hce.c @@ -1,4 +1,4 @@ -/* $OpenBSD: hce.c,v 1.15 2007/02/07 15:17:46 reyk Exp $ */ +/* $OpenBSD: hce.c,v 1.16 2007/02/22 03:32:39 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> @@ -64,7 +64,8 @@ hce_sig_handler(int sig, short event, void *arg) pid_t hce(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], - int pipe_pfe2hce[2]) + int pipe_parent2relay[2], int pipe_pfe2hce[2], + int pipe_pfe2relay[RELAY_MAXPROC][2]) { pid_t pid; struct passwd *pw; @@ -72,6 +73,7 @@ hce(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], struct event ev_sigint; struct event ev_sigterm; struct table *table; + int i; switch (pid = fork()) { case -1: @@ -116,6 +118,12 @@ hce(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], close(pipe_parent2hce[0]); close(pipe_parent2pfe[0]); close(pipe_parent2pfe[1]); + close(pipe_parent2relay[0]); + close(pipe_parent2relay[1]); + for (i = 0; i < env->prefork_relay; i++) { + close(pipe_pfe2relay[i][0]); + close(pipe_pfe2relay[i][1]); + } if ((ibuf_pfe = calloc(1, sizeof(struct imsgbuf))) == NULL || (ibuf_main = calloc(1, sizeof(struct imsgbuf))) == NULL) @@ -133,9 +141,11 @@ hce(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], ibuf_main->handler, ibuf_main); event_add(&ibuf_main->ev, NULL); - evtimer_set(&env->ev, hce_launch_checks, env); - bzero(&tv, sizeof(tv)); - evtimer_add(&env->ev, &tv); + if (!TAILQ_EMPTY(&env->services)) { + evtimer_set(&env->ev, hce_launch_checks, env); + bzero(&tv, sizeof(tv)); + evtimer_add(&env->ev, &tv); + } if (env->flags & F_SSL) { ssl_init(env); @@ -213,21 +223,33 @@ hce_notify_done(struct host *host, const char *msg) u_long duration; u_int logopt; + + if (host->up == HOST_DOWN && host->retry_cnt) { + log_debug("hce_notify_done: host %s retry %d", + host->name, host->retry_cnt); + host->up = HOST_UP; + host->retry_cnt--; + } else + host->retry_cnt = host->retry; + if (host->up != HOST_UNKNOWN) { + host->check_cnt++; + if (host->up == HOST_UP) + host->up_cnt++; + } st.id = host->id; st.up = host->up; + st.check_cnt = host->check_cnt; + st.retry_cnt = host->retry_cnt; host->flags |= (F_CHECK_SENT|F_CHECK_DONE); if (msg) log_debug("hce_notify_done: %s (%s)", host->name, msg); - if (host->up != host->last_up) { + imsg_compose(ibuf_pfe, IMSG_HOST_STATUS, 0, 0, &st, sizeof(st)); + if (host->up != host->last_up) logopt = HOSTSTATED_OPT_LOGUPDATE; - imsg_compose(ibuf_pfe, IMSG_HOST_STATUS, 0, 0, &st, sizeof(st)); - } else + else logopt = HOSTSTATED_OPT_LOGNOTIFY; - if ((table = table_find(env, host->tableid)) == NULL) - fatalx("hce_notify_done: invalid table id"); - if (gettimeofday(&tv_now, NULL)) fatal("hce_notify_done: gettimeofday"); timersub(&tv_now, &host->cte.tv_start, &tv_dur); @@ -236,11 +258,16 @@ hce_notify_done(struct host *host, const char *msg) else duration = 0; + if ((table = table_find(env, host->tableid)) == NULL) + fatalx("hce_notify_done: invalid table id"); + if (env->opts & logopt) { - log_info("host %s, check %s%s (%lums), state %s -> %s", + log_info("host %s, check %s%s (%lums), state %s -> %s, " + "availability %s", host->name, table_check(table->check), (table->flags & F_SSL) ? " use ssl" : "", duration, - host_status(host->last_up), host_status(host->up)); + host_status(host->last_up), host_status(host->up), + print_availability(host->check_cnt, host->up_cnt)); } host->last_up = host->up; @@ -293,6 +320,8 @@ hce_dispatch_imsg(int fd, short event, void *ptr) fatalx("hce_dispatch_imsg: desynchronized"); host->flags |= F_DISABLE; host->up = HOST_UNKNOWN; + host->check_cnt = 0; + host->up_cnt = 0; break; case IMSG_HOST_ENABLE: memcpy(&id, imsg.data, sizeof(id)); @@ -339,7 +368,7 @@ hce_dispatch_parent(int fd, short event, void * ptr) case EV_READ: if ((n = imsg_read(ibuf)) == -1) fatal("hce_dispatch_parent: imsg_read error"); - if (n == 0) /* connection closed */ + if (n == 0) fatalx("hce_dispatch_parent: pipe closed"); break; case EV_WRITE: diff --git a/usr.sbin/relayd/log.c b/usr.sbin/relayd/log.c index 4fc3f35fb8c..0da3432ca88 100644 --- a/usr.sbin/relayd/log.c +++ b/usr.sbin/relayd/log.c @@ -1,4 +1,4 @@ -/* $OpenBSD: log.c,v 1.3 2007/02/07 15:17:46 reyk Exp $ */ +/* $OpenBSD: log.c,v 1.4 2007/02/22 03:32:39 reyk Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -20,6 +20,7 @@ #include <sys/param.h> #include <sys/queue.h> #include <sys/socket.h> +#include <sys/tree.h> #include <netinet/in_systm.h> #include <netinet/in.h> @@ -199,3 +200,15 @@ table_check(enum table_check check) /* NOTREACHED */ return ("invalid"); } + +const char * +print_availability(u_long cnt, u_long up) +{ + static char buf[BUFSIZ]; + + if (cnt == 0) + return (""); + bzero(buf, sizeof(buf)); + snprintf(buf, sizeof(buf), "%.2f%%", (double)up / cnt * 100); + return (buf); +} diff --git a/usr.sbin/relayd/parse.y b/usr.sbin/relayd/parse.y index bd8cfd83173..efbb11882b8 100644 --- a/usr.sbin/relayd/parse.y +++ b/usr.sbin/relayd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.25 2007/02/09 17:55:49 reyk Exp $ */ +/* $OpenBSD: parse.y,v 1.26 2007/02/22 03:32:39 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> @@ -36,6 +36,7 @@ #include <errno.h> #include <event.h> #include <limits.h> +#include <stdint.h> #include <stdarg.h> #include <stdio.h> #include <netdb.h> @@ -53,9 +54,14 @@ const char *infile; objid_t last_service_id = 0; objid_t last_table_id = 0; objid_t last_host_id = 0; +objid_t last_relay_id = 0; +objid_t last_proto_id = 0; static struct service *service = NULL; static struct table *table = NULL; +static struct relay *rlay = NULL; +static struct protocol *proto = NULL; +static struct protonode node; int yyerror(const char *, ...); int yyparse(void); @@ -101,12 +107,15 @@ typedef struct { %token CHECK HTTP HTTPS TCP ICMP EXTERNAL %token TIMEOUT CODE DIGEST PORT TAG INTERFACE %token VIRTUAL IP INTERVAL DISABLE STICKYADDR -%token SEND EXPECT NOTHING USE SSL -%token LOG UPDATES ALL +%token SEND EXPECT NOTHING USE SSL LOADBALANCE ROUNDROBIN +%token RELAY LISTEN ON FORWARD TO NAT LOOKUP PREFORK NO MARK MARKED +%token PROTO SESSION CACHE APPEND CHANGE REMOVE FROM FILTER HASH +%token LOG UPDATES ALL DEMOTE NODELAY SACK SOCKET BUFFER URL RETRY %token ERROR %token <v.string> STRING %type <v.string> interface -%type <v.number> number port http_type loglevel +%type <v.number> number port http_type loglevel sslcache +%type <v.number> prototype dstmode docheck retry %type <v.host> host %type <v.tv> timeout @@ -118,6 +127,8 @@ grammar : /* empty */ | grammar main '\n' | grammar service '\n' | grammar table '\n' + | grammar relay '\n' + | grammar proto '\n' | grammar error '\n' { errors++; } ; @@ -206,6 +217,30 @@ main : INTERVAL number { conf->interval.tv_sec = $2; } | TIMEOUT timeout { bcopy(&$2, &conf->timeout, sizeof(struct timeval)); } + | PREFORK number { + if ($2 <= 0 || $2 > RELAY_MAXPROC) { + yyerror("invalid number of preforked " + "relays: %d", $2); + YYERROR; + } + conf->prefork_relay = $2; + } + | DEMOTE STRING { + conf->flags |= F_DEMOTE; + if (strlcpy(conf->demote_group, $2, + sizeof(conf->demote_group)) + >= sizeof(conf->demote_group)) { + yyerror("yyparse: demote group name too long"); + free($2); + YYERROR; + } + free($2); + if (carp_demote_init(conf->demote_group, 1) == -1) { + yyerror("yyparse: error initializing group '%s'", + conf->demote_group); + YYERROR; + } + } ; loglevel : UPDATES { $$ = HOSTSTATED_OPT_LOGUPDATE; } @@ -233,7 +268,7 @@ service : SERVICE STRING { } free($2); srv->id = last_service_id++; - if (last_service_id == UINT_MAX) { + if (last_service_id == INT_MAX) { yyerror("too many services defined"); YYERROR; } @@ -358,7 +393,7 @@ table : TABLE STRING { tb->id = last_table_id++; bcopy(&conf->timeout, &tb->timeout, sizeof(struct timeval)); - if (last_table_id == UINT_MAX) { + if (last_table_id == INT_MAX) { yyerror("too many tables defined"); YYERROR; } @@ -452,6 +487,22 @@ tableoptsl : host { | REAL port { table->port = $2; } + | DEMOTE STRING { + table->flags |= F_DEMOTE; + if (strlcpy(table->demote_group, $2, + sizeof(table->demote_group)) + >= sizeof(table->demote_group)) { + yyerror("yyparse: demote group name too long"); + free($2); + YYERROR; + } + free($2); + if (carp_demote_init(table->demote_group, 1) == -1) { + yyerror("yyparse: error initializing group '%s'", + table->demote_group); + YYERROR; + } + } | DISABLE { table->flags |= F_DISABLE; } | USE SSL { table->flags |= F_SSL; @@ -459,11 +510,371 @@ tableoptsl : host { } ; +proto : PROTO STRING { + struct protocol *p; + + TAILQ_FOREACH(p, &conf->protos, entry) + if (!strcmp(p->name, $2)) + break; + if (p != NULL) { + yyerror("protocol %s defined twice", $2); + free($2); + YYERROR; + } + if ((p = calloc(1, sizeof (*p))) == NULL) + fatal("out of memory"); + + if (strlcpy(p->name, $2, sizeof(p->name)) >= + sizeof(p->name)) { + yyerror("protocol name truncated"); + YYERROR; + } + free($2); + p->id = last_proto_id++; + p->cache = RELAY_CACHESIZE; + p->type = RELAY_PROTO_TCP; + if (last_proto_id == INT_MAX) { + yyerror("too many protocols defined"); + YYERROR; + } + RB_INIT(&p->tree); + proto = p; + } '{' optnl protopts_l '}' { + conf->protocount++; + TAILQ_INSERT_HEAD(&conf->protos, proto, entry); + } + ; + +protopts_l : protopts_l protoptsl nl + | protoptsl optnl + ; + +protoptsl : SSL SESSION CACHE sslcache { proto->cache = $4; } + | PROTO prototype { proto->type = $2; } + | TCP tcpflags + | TCP '{' tcpflags_l '}' + | protonode { + struct protonode *pn, pk; + + pn = RB_FIND(proto_tree, &proto->tree, &node); + if (pn != NULL) { + yyerror("protocol node %s defined twice", + node.key); + YYERROR; + } + if ((pn = calloc(1, sizeof (*pn))) == NULL) + fatal("out of memory"); + + bcopy(&node, pn, sizeof(*pn)); + pn->key = node.key; + pn->value = node.value; + pn->header = node.getvars ? 0 : 1; + pn->id = proto->nodecount++; + if (pn->id == INT_MAX) { + yyerror("too many protocol nodes defined"); + YYERROR; + } + RB_INSERT(proto_tree, &proto->tree, pn); + + if (node.getvars) { + pk.key = "GET"; + pn = RB_FIND(proto_tree, &proto->tree, &pk); + if (pn != NULL) { + pn->getvars++; + } else if (pn == NULL) { + if ((pn = (struct protonode *) + calloc(1, sizeof(*pn))) == NULL) + fatal("out of memory"); + pn->key = strdup("GET"); + if (pn->key == NULL) + fatal("out of memory"); + pn->value = NULL; + pn->action = NODE_ACTION_NONE; + pn->getvars = 1; + pn->id = proto->nodecount++; + if (pn->id == INT_MAX) { + yyerror("too many protocol " + "nodes defined"); + YYERROR; + } + RB_INSERT(proto_tree, &proto->tree, pn); + } + } + + bzero(&node, sizeof(node)); + } + ; + +tcpflags_l : tcpflags comma tcpflags_l + | tcpflags + ; + +tcpflags : SACK { proto->tcpflags |= TCPFLAG_SACK; } + | NO SACK { proto->tcpflags |= TCPFLAG_NSACK; } + | NODELAY { proto->tcpflags |= TCPFLAG_NODELAY; } + | NO NODELAY { proto->tcpflags |= TCPFLAG_NNODELAY; } + | SOCKET BUFFER number { + proto->tcpflags |= TCPFLAG_BUFSIZ; + proto->tcpbufsiz = $3; + } + ; + + +protonode : APPEND STRING TO STRING marked { + node.action = NODE_ACTION_APPEND; + node.key = strdup($4); + node.value = strdup($2); + if (node.key == NULL || node.value == NULL) + fatal("out of memory"); + if (strchr(node.value, '$') != NULL) + node.macro = 1; + free($4); + free($2); + } + | CHANGE STRING TO STRING marked { + node.action = NODE_ACTION_CHANGE; + node.key = strdup($2); + node.value = strdup($4); + if (node.key == NULL || node.value == NULL) + fatal("out of memory"); + if (strchr(node.value, '$') != NULL) + node.macro = 1; + free($4); + free($2); + } + | REMOVE STRING marked { + node.action = NODE_ACTION_REMOVE; + node.key = strdup($2); + node.value = NULL; + if (node.key == NULL) + fatal("out of memory"); + free($2); + } + | getvars EXPECT STRING FROM STRING mark { + node.action = NODE_ACTION_EXPECT; + node.key = strdup($5); + node.value = strdup($3);; + if (node.key == NULL || node.value == NULL) + fatal("out of memory"); + free($5); + free($3); + } + | getvars FILTER STRING FROM STRING mark { + node.action = NODE_ACTION_FILTER; + node.key = strdup($5); + node.value = strdup($3);; + if (node.key == NULL || node.value == NULL) + fatal("out of memory"); + free($5); + free($3); + } + | getvars HASH STRING marked { + node.action = NODE_ACTION_HASH; + node.key = strdup($3); + node.value = NULL; + if (node.key == NULL) + fatal("out of memory"); + free($3); + proto->lateconnect++; + } + ; + +mark : /* nothing */ + | MARK { node.mark++; } + ; + +marked : /* nothing */ + | MARKED { node.mark++; } + ; + +getvars : /* nothing */ + | URL { node.getvars++; } + ; + +sslcache : /* empty */ { $$ = RELAY_CACHESIZE; } + | number { $$ = $1; } + | DISABLE { $$ = -2; } + ; + +prototype : TCP { $$ = RELAY_PROTO_TCP; } + | HTTP { $$ = RELAY_PROTO_HTTP; } + ; + +relay : RELAY STRING { + struct relay *r; + + TAILQ_FOREACH(r, &conf->relays, entry) + if (!strcmp(r->name, $2)) + break; + if (r != NULL) { + yyerror("relay %s defined twice", $2); + free($2); + YYERROR; + } + if ((r = calloc(1, sizeof (*r))) == NULL) + fatal("out of memory"); + + if (strlcpy(r->name, $2, sizeof(r->name)) >= + sizeof(r->name)) { + yyerror("relay name truncated"); + YYERROR; + } + free($2); + r->id = last_relay_id++; + r->timeout.tv_sec = RELAY_TIMEOUT; + r->proto = NULL; + r->dsttable = NULL; + if (last_relay_id == INT_MAX) { + yyerror("too many relays defined"); + YYERROR; + } + rlay = r; + } '{' optnl relayopts_l '}' { + if (rlay->ss.ss_family == AF_UNSPEC) { + yyerror("relay %s has no listener", + rlay->name); + YYERROR; + } + if ((rlay->flags & F_NATLOOK) == 0 && + rlay->dstss.ss_family == AF_UNSPEC && + rlay->dsttable == NULL) { + yyerror("relay %s has no target, service, " + "or table", rlay->name); + YYERROR; + } + if (rlay->proto == NULL) + rlay->proto = &conf->proto_default; + conf->relaycount++; + TAILQ_INIT(&rlay->sessions); + TAILQ_INSERT_HEAD(&conf->relays, rlay, entry); + } + ; + +relayopts_l : relayopts_l relayoptsl nl + | relayoptsl optnl + ; + +relayoptsl : LISTEN ON STRING port sslserv { + struct addresslist al; + struct address *h; + + if (rlay->ss.ss_family != AF_UNSPEC) { + yyerror("relay %s listener already specified", + rlay->name); + YYERROR; + } + + TAILQ_INIT(&al); + if (host($3, &al, 1, $4, NULL) <= 0) { + yyerror("invalid listen ip: %s", $3); + free($3); + YYERROR; + } + free($3); + h = TAILQ_FIRST(&al); + bcopy(&h->ss, &rlay->ss, sizeof(rlay->ss)); + rlay->port = h->port; + } + | FORWARD TO STRING port { + struct addresslist al; + struct address *h; + + if (rlay->dstss.ss_family != AF_UNSPEC) { + yyerror("relay %s target or service already specified", + rlay->name); + free($3); + YYERROR; + } + + TAILQ_INIT(&al); + if (host($3, &al, 1, $4, NULL) <= 0) { + yyerror("invalid listen ip: %s", $3); + free($3); + YYERROR; + } + free($3); + h = TAILQ_FIRST(&al); + bcopy(&h->ss, &rlay->dstss, sizeof(rlay->dstss)); + rlay->dstport = h->port; + } + | SERVICE STRING { + struct service *svc; + struct address *h; + + if (rlay->dstss.ss_family != AF_UNSPEC) { + yyerror("relay %s target or service already specified", + rlay->name); + free($2); + YYERROR; + } + + if ((svc = service_findbyname(conf, $2)) == NULL) { + yyerror("relay %s for unknown service %s", + rlay->name, $2); + free($2); + YYERROR; + } + free($2); + h = TAILQ_FIRST(&svc->virts); + bcopy(&h->ss, &rlay->dstss, sizeof(rlay->dstss)); + rlay->dstport = h->port; + } + | TABLE STRING dstmode docheck { + struct table *dsttable; + + if ((dsttable = table_findbyname(conf, $2)) == NULL) { + yyerror("relay %d for unknown table %s", + rlay->name, $2); + free($2); + YYERROR; + } + free($2); + rlay->dsttable = dsttable; + rlay->dstmode = $3; + rlay->dstcheck = $4; + } + | PROTO STRING { + struct protocol *p; + + TAILQ_FOREACH(p, &conf->protos, entry) + if (!strcmp(p->name, $2)) + break; + if (p == NULL) { + yyerror("no such protocol: %s", $2); + free($2); + YYERROR; + } + p->flags |= F_USED; + rlay->proto = p; + free($2); + } + | NAT LOOKUP { rlay->flags |= F_NATLOOK; } + | TIMEOUT number { rlay->timeout.tv_sec = $2; } + | DISABLE { rlay->flags |= F_DISABLE; } + ; + +dstmode : /* empty */ { $$ = RELAY_DSTMODE_DEFAULT; } + | LOADBALANCE { $$ = RELAY_DSTMODE_LOADBALANCE; } + | ROUNDROBIN { $$ = RELAY_DSTMODE_ROUNDROBIN; } + | HASH { $$ = RELAY_DSTMODE_HASH; } + ; + +docheck : /* empty */ { $$ = 1; } + | NO CHECK { $$ = 0; } + ; + +sslserv : /* empty */ + | SSL { + rlay->flags |= F_SSL; + conf->flags |= F_SSL; + } + ; + interface : /*empty*/ { $$ = NULL; } | INTERFACE STRING { $$ = $2; } ; -host : HOST STRING { +host : HOST STRING retry { struct address *a; struct addresslist al; @@ -490,7 +901,8 @@ host : HOST STRING { } free($2); $$->id = last_host_id++; - if (last_host_id == UINT_MAX) { + $$->retry = $3; + if (last_host_id == INT_MAX) { yyerror("too many hosts defined"); free($$); YYERROR; @@ -498,6 +910,10 @@ host : HOST STRING { } ; +retry : /* nothing */ { $$ = 0; } + | RETRY number { $$ = $2; } + ; + timeout : number { $$.tv_sec = $1 / 1000; @@ -505,6 +921,10 @@ timeout : number } ; +comma : ',' + | /* empty */ + ; + optnl : '\n' optnl | ; @@ -546,13 +966,22 @@ lookup(char *s) /* this has to be sorted always */ static const struct keywords keywords[] = { { "all", ALL }, + { "append", APPEND }, { "backup", BACKUP }, + { "buffer", BUFFER }, + { "cache", CACHE }, + { "change", CHANGE }, { "check", CHECK }, { "code", CODE }, + { "demote", DEMOTE }, { "digest", DIGEST }, { "disable", DISABLE }, { "expect", EXPECT }, { "external", EXTERNAL }, + { "filter", FILTER }, + { "forward", FORWARD }, + { "from", FROM }, + { "hash", HASH }, { "host", HOST }, { "http", HTTP }, { "https", HTTPS }, @@ -560,19 +989,39 @@ lookup(char *s) { "interface", INTERFACE }, { "interval", INTERVAL }, { "ip", IP }, + { "listen", LISTEN }, + { "loadbalance", LOADBALANCE }, { "log", LOG }, + { "lookup", LOOKUP }, + { "mark", MARK }, + { "marked", MARKED }, + { "nat", NAT }, + { "no", NO }, + { "nodelay", NODELAY }, { "nothing", NOTHING }, + { "on", ON }, { "port", PORT }, + { "prefork", PREFORK }, + { "protocol", PROTO }, { "real", REAL }, + { "relay", RELAY }, + { "remove", REMOVE }, + { "retry", RETRY }, + { "roundrobin", ROUNDROBIN }, + { "sack", SACK }, { "send", SEND }, { "service", SERVICE }, + { "session", SESSION }, + { "socket", SOCKET }, { "ssl", SSL }, { "sticky-address", STICKYADDR }, { "table", TABLE }, { "tag", TAG }, { "tcp", TCP }, { "timeout", TIMEOUT }, + { "to", TO }, { "updates", UPDATES }, + { "url", URL }, { "use", USE }, { "virtual", VIRTUAL } }; @@ -795,16 +1244,30 @@ parse_config(struct hoststated *x_conf, const char *filename, int opts) TAILQ_INIT(&conf->services); TAILQ_INIT(&conf->tables); + TAILQ_INIT(&conf->protos); + TAILQ_INIT(&conf->relays); + memset(&conf->empty_table, 0, sizeof(conf->empty_table)); conf->empty_table.id = EMPTY_TABLE; conf->empty_table.flags |= F_DISABLE; (void)strlcpy(conf->empty_table.name, "empty", sizeof(conf->empty_table.name)); + bzero(&conf->proto_default, sizeof(conf->proto_default)); + conf->proto_default.flags = F_USED; + conf->proto_default.cache = RELAY_CACHESIZE; + conf->proto_default.type = RELAY_PROTO_TCP; + (void)strlcpy(conf->proto_default.name, "default", + sizeof(conf->proto_default.name)); + RB_INIT(&conf->proto_default.tree); + TAILQ_INSERT_TAIL(&conf->protos, &conf->proto_default, entry); + conf->timeout.tv_sec = CHECK_TIMEOUT / 1000; conf->timeout.tv_usec = (CHECK_TIMEOUT % 1000) * 1000; conf->interval.tv_sec = CHECK_INTERVAL; conf->interval.tv_usec = 0; + conf->prefork_relay = RELAY_NUMPROC; + conf->statinterval.tv_sec = RELAY_STATINTERVAL; conf->opts = opts; if ((fin = fopen(filename, "r")) == NULL) { @@ -831,7 +1294,7 @@ parse_config(struct hoststated *x_conf, const char *filename, int opts) } } - if (TAILQ_EMPTY(&conf->services)) { + if (TAILQ_EMPTY(&conf->services) && TAILQ_EMPTY(&conf->relays)) { log_warnx("no services, nothing to do"); errors++; } @@ -854,6 +1317,14 @@ parse_config(struct hoststated *x_conf, const char *filename, int opts) } } + /* Verify that every non-default protocol is used */ + TAILQ_FOREACH(proto, &conf->protos, entry) { + if (!(proto->flags & F_USED)) { + log_warnx("unused protocol: %s", proto->name); + errors++; + } + } + if (errors) { bzero(&conf, sizeof (*conf)); return (-1); diff --git a/usr.sbin/relayd/pfe.c b/usr.sbin/relayd/pfe.c index 0af74099611..c6f0bbb7133 100644 --- a/usr.sbin/relayd/pfe.c +++ b/usr.sbin/relayd/pfe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfe.c,v 1.14 2007/02/08 13:32:24 reyk Exp $ */ +/* $OpenBSD: pfe.c,v 1.15 2007/02/22 03:32:39 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> @@ -39,6 +39,7 @@ void pfe_sig_handler(int sig, short, void *); void pfe_shutdown(void); void pfe_dispatch_imsg(int, short, void *); void pfe_dispatch_parent(int, short, void *); +void pfe_dispatch_relay(int, short, void *); void pfe_sync(void); @@ -46,6 +47,7 @@ static struct hoststated *env = NULL; struct imsgbuf *ibuf_main; struct imsgbuf *ibuf_hce; +struct imsgbuf *ibuf_relay; void pfe_sig_handler(int sig, short event, void *arg) @@ -61,12 +63,15 @@ pfe_sig_handler(int sig, short event, void *arg) pid_t pfe(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], - int pipe_pfe2hce[2]) + int pipe_parent2relay[2], int pipe_pfe2hce[2], + int pipe_pfe2relay[RELAY_MAXPROC][2]) { pid_t pid; struct passwd *pw; struct event ev_sigint; struct event ev_sigterm; + int i; + struct imsgbuf *ibuf; switch (pid = fork()) { case -1: @@ -114,8 +119,13 @@ pfe(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], close(pipe_parent2pfe[0]); close(pipe_parent2hce[0]); close(pipe_parent2hce[1]); + close(pipe_parent2relay[0]); + close(pipe_parent2relay[1]); + for (i = 0; i < env->prefork_relay; i++) + close(pipe_pfe2relay[i][0]); if ((ibuf_hce = calloc(1, sizeof(struct imsgbuf))) == NULL || + (ibuf_relay = calloc(i, sizeof(struct imsgbuf))) == NULL || (ibuf_main = calloc(1, sizeof(struct imsgbuf))) == NULL) fatal("pfe"); imsg_init(ibuf_hce, pipe_pfe2hce[1], pfe_dispatch_imsg); @@ -131,11 +141,24 @@ pfe(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], ibuf_main->handler, ibuf_main); event_add(&ibuf_main->ev, NULL); + for (i = 0; i < env->prefork_relay; i++) { + ibuf = &ibuf_relay[i]; + imsg_init(ibuf, pipe_pfe2relay[i][1], pfe_dispatch_relay); + + ibuf_relay->events = EV_READ; + event_set(&ibuf->ev, ibuf->fd, ibuf->events, + ibuf->handler, ibuf); + event_add(&ibuf->ev, NULL); + } + TAILQ_INIT(&ctl_conns); if (control_listen() == -1) fatalx("pfe: control socket listen failed"); + /* Initial sync */ + pfe_sync(); + event_dispatch(); pfe_shutdown(); @@ -192,20 +215,35 @@ pfe_dispatch_imsg(int fd, short event, void *ptr) memcpy(&st, imsg.data, sizeof(st)); if ((host = host_find(env, st.id)) == NULL) fatalx("pfe_dispatch_imsg: invalid host id"); - if (host->up == st.up) { + + host->retry_cnt = st.retry_cnt; + if (st.up != HOST_UNKNOWN) { + host->check_cnt++; + if (st.up == HOST_UP) + host->up_cnt++; + } + if (host->check_cnt != st.check_cnt) { log_debug("pfe_dispatch_imsg: host %d => %d", host->id, host->up); fatalx("pfe_dispatch_imsg: desynchronized"); } + if (host->up == st.up) + break; + + /* Forward to relay engine(s) */ + for (n = 0; n < env->prefork_relay; n++) + imsg_compose(&ibuf_relay[n], IMSG_HOST_STATUS, 0, 0, + &st, sizeof(st)); + if ((table = table_find(env, host->tableid)) == NULL) fatalx("pfe_dispatch_imsg: invalid table id"); log_debug("pfe_dispatch_imsg: state %d for host %u %s", st.up, host->id, host->name); - if ((st.up == HOST_UNKNOWN && host->up == HOST_DOWN) || - (st.up == HOST_DOWN && host->up == HOST_UNKNOWN)) { + if ((st.up == HOST_UNKNOWN && !HOST_ISUP(host->up)) || + (!HOST_ISUP(st.up) && host->up == HOST_UNKNOWN)) { host->up = st.up; break; } @@ -215,6 +253,7 @@ pfe_dispatch_imsg(int fd, short event, void *ptr) table->up++; host->flags |= F_ADD; host->flags &= ~(F_DEL); + host->up = HOST_UP; } else { table->up--; table->flags |= F_CHANGED; @@ -248,7 +287,7 @@ pfe_dispatch_parent(int fd, short event, void * ptr) case EV_READ: if ((n = imsg_read(ibuf)) == -1) fatal("imsg_read error"); - if (n == 0) /* connection closed */ + if (n == 0) fatalx("pfe_dispatch_parent: pipe closed"); break; case EV_WRITE: @@ -277,10 +316,76 @@ pfe_dispatch_parent(int fd, short event, void * ptr) } void +pfe_dispatch_relay(int fd, short event, void * ptr) +{ + struct imsgbuf *ibuf; + struct imsg imsg; + ssize_t n; + struct ctl_natlook cnl; + struct ctl_stats crs; + struct relay *rlay; + + ibuf = ptr; + switch (event) { + case EV_READ: + if ((n = imsg_read(ibuf)) == -1) + fatal("imsg_read error"); + if (n == 0) + fatalx("pfe_dispatch_relay: pipe closed"); + break; + case EV_WRITE: + if (msgbuf_write(&ibuf->w) == -1) + fatal("msgbuf_write"); + imsg_event_add(ibuf); + return; + default: + fatalx("unknown event"); + } + + for (;;) { + if ((n = imsg_get(ibuf, &imsg)) == -1) + fatal("pfe_dispatch_relay: imsg_read error"); + if (n == 0) + break; + + switch (imsg.hdr.type) { + case IMSG_NATLOOK: + if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(cnl)) + fatalx("invalid imsg header len"); + bcopy(imsg.data, &cnl, sizeof(cnl)); + if (natlook(env, &cnl) != 0) + cnl.in = -1; + imsg_compose(&ibuf_relay[cnl.proc], IMSG_NATLOOK, 0, 0, + &cnl, sizeof(cnl)); + break; + case IMSG_STATISTICS: + if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof(crs)) + fatalx("invalid imsg header len"); + bcopy(imsg.data, &crs, sizeof(crs)); + if (crs.proc > env->prefork_relay) + fatalx("pfe_dispatch_relay: invalid relay proc"); + if ((rlay = relay_find(env, crs.id)) == NULL) + fatalx("pfe_dispatch_relay: invalid relay id"); + bcopy(&crs, &rlay->stats[crs.proc], sizeof(crs)); + rlay->stats[crs.proc].interval = + env->statinterval.tv_sec; + break; + default: + log_debug("pfe_dispatch_relay: unexpected imsg %d", + imsg.hdr.type); + break; + } + imsg_free(&imsg); + } + imsg_event_add(ibuf); +} + +void show(struct ctl_conn *c) { struct service *service; struct host *host; + struct relay *rlay; TAILQ_FOREACH(service, &env->services, entry) { imsg_compose(&c->ibuf, IMSG_CTL_SERVICE, 0, 0, @@ -304,6 +409,14 @@ show(struct ctl_conn *c) imsg_compose(&c->ibuf, IMSG_CTL_HOST, 0, 0, host, sizeof(*host)); } + TAILQ_FOREACH(rlay, &env->relays, entry) { + rlay->stats[env->prefork_relay].id = EMPTY_ID; + imsg_compose(&c->ibuf, IMSG_CTL_RELAY, 0, 0, + rlay, sizeof(*rlay)); + imsg_compose(&c->ibuf, IMSG_CTL_STATISTICS, 0, 0, + &rlay->stats, sizeof(rlay->stats)); + } + imsg_compose(&c->ibuf, IMSG_CTL_END, 0, 0, NULL, 0); } @@ -459,6 +572,8 @@ disable_host(struct ctl_conn *c, struct ctl_id *id) host->flags |= F_DISABLE; host->flags |= F_DEL; host->flags &= ~(F_ADD); + host->check_cnt = 0; + host->up_cnt = 0; imsg_compose(ibuf_hce, IMSG_HOST_DISABLE, 0, 0, &host->id, sizeof(host->id)); @@ -498,10 +613,12 @@ enable_host(struct ctl_conn *c, struct ctl_id *id) void pfe_sync(void) { - struct service *service; - struct table *active; - struct ctl_id id; - struct imsg imsg; + struct service *service; + struct table *active; + struct table *table; + struct ctl_id id; + struct imsg imsg; + struct ctl_demote demote; bzero(&id, sizeof(id)); bzero(&imsg, sizeof(imsg)); @@ -556,4 +673,26 @@ pfe_sync(void) control_imsg_forward(&imsg); } } + + TAILQ_FOREACH(table, &env->tables, entry) { + if ((table->flags & F_DEMOTE) == 0) + continue; + demote.level = 0; + if (table->up && table->demoted) { + demote.level = -1; + table->demoted = 0; + } + else if (!table->up && !table->demoted) { + demote.level = 1; + table->demoted = 1; + } + if (demote.level == 0) + continue; + log_debug("pfe_sync: demote %d table '%s' group '%s'", + demote.level, table->name, table->demote_group); + strlcpy(demote.group, table->demote_group, + sizeof(demote.group)); + imsg_compose(ibuf_main, IMSG_DEMOTE, 0, 0, + &demote, sizeof(demote)); + } } diff --git a/usr.sbin/relayd/pfe_filter.c b/usr.sbin/relayd/pfe_filter.c index 25e3ec43cec..27442c34006 100644 --- a/usr.sbin/relayd/pfe_filter.c +++ b/usr.sbin/relayd/pfe_filter.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfe_filter.c,v 1.13 2007/02/20 04:06:17 reyk Exp $ */ +/* $OpenBSD: pfe_filter.c,v 1.14 2007/02/22 03:32:40 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> @@ -402,3 +402,75 @@ flush_rulesets(struct hoststated *env) toolong: fatal("flush_rulesets: name too long"); } + +int +natlook(struct hoststated *env, struct ctl_natlook *cnl) +{ + struct pfioc_natlook pnl; + struct sockaddr_in *in, *out; + struct sockaddr_in6 *in6, *out6; + char ibuf[BUFSIZ], obuf[BUFSIZ]; + + bzero(&pnl, sizeof(pnl)); + + if ((pnl.af = cnl->src.ss_family) != cnl->dst.ss_family) + fatalx("natlook: illegal address families"); + switch (pnl.af) { + case AF_INET: + in = (struct sockaddr_in *)&cnl->src; + out = (struct sockaddr_in *)&cnl->dst; + bcopy(&in->sin_addr, &pnl.saddr.addr8, in->sin_len); + pnl.sport = in->sin_port; + bcopy(&out->sin_addr, &pnl.daddr.addr8, out->sin_len); + pnl.dport = out->sin_port; + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&cnl->src; + out6 = (struct sockaddr_in6 *)&cnl->dst; + bcopy(&in6->sin6_addr, &pnl.saddr.addr8, in6->sin6_len); + pnl.sport = in6->sin6_port; + bcopy(&out6->sin6_addr, &pnl.daddr.addr8, out6->sin6_len); + pnl.dport = out6->sin6_port; + } + pnl.proto = IPPROTO_TCP; + pnl.direction = PF_IN; + cnl->in = 1; + + if (ioctl(env->pf->dev, DIOCNATLOOK, &pnl) == -1) { + pnl.direction = PF_OUT; + cnl->in = 0; + if (ioctl(env->pf->dev, DIOCNATLOOK, &pnl) == -1) { + log_debug("natlook: error"); + return (-1); + } + } + + inet_ntop(pnl.af, &pnl.rsaddr, ibuf, sizeof(ibuf)); + inet_ntop(pnl.af, &pnl.rdaddr, obuf, sizeof(obuf)); + log_debug("natlook: %s %s:%d -> %s:%d", + pnl.direction == PF_IN ? "in" : "out", + ibuf, ntohs(pnl.rsport), obuf, ntohs(pnl.rdport)); + + switch (pnl.af) { + case AF_INET: + in = (struct sockaddr_in *)&cnl->rsrc; + out = (struct sockaddr_in *)&cnl->rdst; + bcopy(&pnl.rsaddr.addr8, &in->sin_addr, sizeof(in->sin_addr)); + in->sin_port = pnl.rsport; + bcopy(&pnl.rdaddr.addr8, &out->sin_addr, sizeof(out->sin_addr)); + out->sin_port = pnl.rdport; + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&cnl->rsrc; + out6 = (struct sockaddr_in6 *)&cnl->rdst; + bcopy(&pnl.rsaddr.addr8, &in6->sin6_addr, sizeof(in6->sin6_addr)); + bcopy(&pnl.rdaddr.addr8, &out6->sin6_addr, sizeof(out6->sin6_addr)); + break; + } + cnl->rsrc.ss_family = pnl.af; + cnl->rdst.ss_family = pnl.af; + cnl->rsport = pnl.rsport; + cnl->rdport = pnl.rdport; + + return (0); +} diff --git a/usr.sbin/relayd/relay.c b/usr.sbin/relayd/relay.c new file mode 100644 index 00000000000..5d9fbac12e2 --- /dev/null +++ b/usr.sbin/relayd/relay.c @@ -0,0 +1,1927 @@ +/* $OpenBSD: relay.c,v 1.1 2007/02/22 03:32:40 reyk Exp $ */ + +/* + * Copyright (c) 2006 Reyk Floeter <reyk@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/queue.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/tree.h> +#include <sys/hash.h> + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> +#include <net/if.h> +#include <arpa/inet.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <err.h> +#include <pwd.h> +#include <event.h> +#include <fnmatch.h> + +#include <openssl/ssl.h> + +#include "hoststated.h" + +void relay_sig_handler(int sig, short, void *); +void relay_statistics(int, short, void *); +void relay_dispatch_pfe(int, short, void *); +void relay_dispatch_parent(int, short, void *); +void relay_shutdown(void); + +void relay_privinit(void); +void relay_protodebug(struct relay *); +void relay_init(void); +void relay_launch(void); +int relay_socket(struct sockaddr_storage *, in_port_t, + struct protocol *); +int relay_socket_listen(struct sockaddr_storage *, in_port_t, + struct protocol *); +int relay_socket_connect(struct sockaddr_storage *, in_port_t, + struct protocol *); + +void relay_accept(int, short, void *); +void relay_input(struct session *); +void relay_close(struct session *, const char *); +void relay_session(struct session *); +void relay_natlook(int, short, void *); + +int relay_connect(struct session *); +void relay_connected(int, short, void *); + +const char *relay_host(struct sockaddr_storage *, char *, size_t); +u_int32_t relay_hash_addr(struct sockaddr_storage *, u_int32_t); +int relay_from_table(struct session *); + +void relay_write(struct bufferevent *, void *); +void relay_read(struct bufferevent *, void *); +void relay_error(struct bufferevent *, short, void *); + +int relay_handle_http(struct ctl_relay_event *, + struct protonode *, struct protonode *, int); +void relay_read_http(struct bufferevent *, void *); +void relay_read_httpcontent(struct bufferevent *, void *); +char *relay_expand_http(struct ctl_relay_event *, char *, + char *, size_t); + +SSL_CTX *relay_ssl_ctx_create(struct relay *); +void relay_ssl_transaction(struct session *); +void relay_ssl_accept(int, short, void *); +void relay_ssl_connected(struct ctl_relay_event *); +void relay_ssl_readcb(int, short, void *); +void relay_ssl_writecb(int, short, void *); + +int relay_bufferevent_add(struct event *, int); +#ifdef notyet +int relay_bufferevent_printf(struct ctl_relay_event *, + const char *, ...); +#endif +int relay_bufferevent_print(struct ctl_relay_event *, char *); +int relay_bufferevent_write_buffer(struct ctl_relay_event *, + struct evbuffer *); +int relay_bufferevent_write(struct ctl_relay_event *, + void *, size_t); +static __inline int + relay_proto_cmp(struct protonode *, struct protonode *); +extern void bufferevent_read_pressure_cb(struct evbuffer *, size_t, + size_t, void *); + +volatile sig_atomic_t relay_sessions; +objid_t relay_conid; + +static struct hoststated *env = NULL; +struct imsgbuf *ibuf_pfe; +struct imsgbuf *ibuf_main; +int proc_id; + +#if DEBUG > 1 +#define DPRINTF log_debug +#else +#define DPRINTF(x...) do { } while(0) +#endif + +void +relay_sig_handler(int sig, short event, void *arg) +{ + struct timeval tv; + + tv.tv_sec = 0; + tv.tv_usec = 0; + + switch (sig) { + case SIGALRM: + case SIGTERM: + case SIGQUIT: + case SIGINT: + event_loopexit(&tv); + } +} + +pid_t +relay(struct hoststated *x_env, int pipe_parent2pfe[2], int pipe_parent2hce[2], + int pipe_parent2relay[2], int pipe_pfe2hce[2], + int pipe_pfe2relay[RELAY_MAXPROC][2]) +{ + pid_t pid; + struct passwd *pw; + struct event ev_sigint; + struct event ev_sigterm; + int i; + + switch (pid = fork()) { + case -1: + fatal("relay: cannot fork"); + case 0: + break; + default: + return (pid); + } + + env = x_env; + + /* Need root privileges for relay initialization */ + relay_privinit(); + + if ((pw = getpwnam(HOSTSTATED_USER)) == NULL) + fatal("relay: getpwnam"); + +#ifndef DEBUG + if (chroot(pw->pw_dir) == -1) + fatal("relay: chroot"); + if (chdir("/") == -1) + fatal("relay: chdir(\"/\")"); +#endif + + setproctitle("socket relay engine"); + hoststated_process = PROC_RELAY; + +#ifndef DEBUG + if (setgroups(1, &pw->pw_gid) || + setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || + setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) + fatal("relay: can't drop privileges"); +#endif + + /* Fork child handlers */ + for (i = 1; i < env->prefork_relay; i++) { + if (fork() == 0) { + proc_id = i; + break; + } + } + + event_init(); + + /* Per-child initialization */ + relay_init(); + + signal_set(&ev_sigint, SIGINT, relay_sig_handler, NULL); + signal_set(&ev_sigterm, SIGTERM, relay_sig_handler, NULL); + signal_add(&ev_sigint, NULL); + signal_add(&ev_sigterm, NULL); + signal(SIGHUP, SIG_IGN); + signal(SIGPIPE, SIG_IGN); + + /* setup pipes */ + close(pipe_pfe2hce[0]); + close(pipe_pfe2hce[1]); + close(pipe_parent2hce[0]); + close(pipe_parent2hce[1]); + close(pipe_parent2pfe[0]); + close(pipe_parent2pfe[1]); + close(pipe_parent2relay[0]); + for (i = 0; i < env->prefork_relay; i++) { + if (i == proc_id) + continue; + close(pipe_pfe2relay[i][1]); + close(pipe_pfe2relay[i][1]); + } + close(pipe_pfe2relay[proc_id][1]); + + if ((ibuf_pfe = calloc(1, sizeof(struct imsgbuf))) == NULL || + (ibuf_main = calloc(1, sizeof(struct imsgbuf))) == NULL) + fatal("relay"); + imsg_init(ibuf_pfe, pipe_pfe2relay[proc_id][0], relay_dispatch_pfe); + imsg_init(ibuf_main, pipe_parent2relay[1], relay_dispatch_parent); + + ibuf_pfe->events = EV_READ; + event_set(&ibuf_pfe->ev, ibuf_pfe->fd, ibuf_pfe->events, + ibuf_pfe->handler, ibuf_pfe); + event_add(&ibuf_pfe->ev, NULL); + + ibuf_main->events = EV_READ; + event_set(&ibuf_main->ev, ibuf_main->fd, ibuf_main->events, + ibuf_main->handler, ibuf_main); + event_add(&ibuf_main->ev, NULL); + + relay_launch(); + + event_dispatch(); + relay_shutdown(); + + return (0); +} + +void +relay_shutdown(void) +{ + struct session *con; + + struct relay *rlay; + TAILQ_FOREACH(rlay, &env->relays, entry) { + if (rlay->flags & F_DISABLE) + continue; + close(rlay->s); + while ((con = TAILQ_FIRST(&rlay->sessions)) != NULL) + relay_close(con, "shutdown"); + } + usleep(200); /* XXX relay needs to shutdown last */ + log_info("socket relay engine exiting"); + _exit(0); +} + +void +relay_protodebug(struct relay *rlay) +{ + struct protocol *proto = rlay->proto; + struct protonode *pn; + + fprintf(stderr, "protocol %d: name %s\n", proto->id, proto->name); + fprintf(stderr, "\tflags: 0x%04x\n", proto->flags); + if (proto->cache != -1) + fprintf(stderr, "\tssl session cache: %d\n", proto->cache); + fprintf(stderr, "\ttype: "); + switch (proto->type) { + case RELAY_PROTO_TCP: + fprintf(stderr, "tcp\n"); + break; + case RELAY_PROTO_HTTP: + fprintf(stderr, "http\n"); + break; + } + RB_FOREACH(pn, proto_tree, &proto->tree) { + fprintf(stderr, "\t\t"); + switch (pn->action) { + case NODE_ACTION_APPEND: + fprintf(stderr, "append \"%s\" to \"%s\"", + pn->value, pn->key); + break; + case NODE_ACTION_CHANGE: + fprintf(stderr, "change \"%s\" to \"%s\"", + pn->key, pn->value); + break; + case NODE_ACTION_REMOVE: + fprintf(stderr, "remove \"%s\"", + pn->key); + break; + case NODE_ACTION_EXPECT: + fprintf(stderr, "%sexpect \"%s\" from \"%s\"", + pn->header ? "" : "url ", + pn->value, pn->key); + break; + case NODE_ACTION_FILTER: + fprintf(stderr, "%sfilter \"%s\" from \"%s\"", + pn->header ? "" : "url ", + pn->value, pn->key); + break; + case NODE_ACTION_HASH: + fprintf(stderr, "%shash \"%s\"", + pn->header ? "" : "url ", + pn->key); + break; + case NODE_ACTION_NONE: + fprintf(stderr, "%snone \"%s\"", + pn->header ? "" : "url ", + pn->key); + break; + } + fprintf(stderr, "\n"); + } +} + +void +relay_privinit(void) +{ + struct relay *rlay; + extern int debug; + + if (env->flags & F_SSL) + ssl_init(env); + + TAILQ_FOREACH(rlay, &env->relays, entry) { + log_debug("relay_init: adding relay %s", rlay->name); + + if (debug) + relay_protodebug(rlay); + + if ((rlay->flags & F_SSL) && + (rlay->ctx = relay_ssl_ctx_create(rlay)) == NULL) + fatal("relay_launch: failed to create SSL context"); + + if ((rlay->s = relay_socket_listen(&rlay->ss, rlay->port, + rlay->proto)) == -1) + fatal("relay_launch: failed to listen"); + } +} + +void +relay_init(void) +{ + struct relay *rlay; + struct host *host; + struct timeval tv; + + TAILQ_FOREACH(rlay, &env->relays, entry) { + if (rlay->dsttable != NULL) { + switch (rlay->dstmode) { + case RELAY_DSTMODE_ROUNDROBIN: + rlay->dstkey = 0; + break; + case RELAY_DSTMODE_LOADBALANCE: + case RELAY_DSTMODE_HASH: + rlay->dstkey = + hash32_str(rlay->name, HASHINIT); + rlay->dstkey = + hash32_str(rlay->dsttable->name, + rlay->dstkey); + break; + } + rlay->dstnhosts = 0; + TAILQ_FOREACH(host, &rlay->dsttable->hosts, entry) { + if (rlay->dstnhosts >= RELAY_MAXHOSTS) + fatal("relay_init: " + "too many hosts in table"); + rlay->dsthost[rlay->dstnhosts++] = host; + } + log_info("adding %d hosts from table %s%s", + rlay->dstnhosts, rlay->dsttable->name, + rlay->dstcheck ? "" : " (no check)"); + } + } + + /* Schedule statistics timer */ + evtimer_set(&env->statev, relay_statistics, NULL); + bcopy(&env->statinterval, &tv, sizeof(tv)); + evtimer_add(&env->statev, &tv); +} + +void +relay_statistics(int fd, short events, void *arg) +{ + struct relay *rlay; + struct ctl_stats crs, *cur; + struct timeval tv, tv_now; + int resethour = 0, resetday = 0; + struct session *con, *next_con; + + /* + * This is a hack to calculate some average statistics. + * It doesn't try to be very accurate, but could be improved... + */ + + timerclear(&tv); + if (gettimeofday(&tv_now, NULL)) + fatal("relay_init: gettimeofday"); + + TAILQ_FOREACH(rlay, &env->relays, entry) { + bzero(&crs, sizeof(crs)); + resethour = resetday = 0; + + cur = &rlay->stats[proc_id]; + cur->cnt += cur->last; + cur->tick++; + cur->avg = (cur->last + cur->avg) / 2; + cur->last_hour += cur->last; + if ((cur->tick % (3600 / env->statinterval.tv_sec)) == 0) { + cur->avg_hour = (cur->last_hour + cur->avg_hour) / 2; + resethour++; + } + cur->last_day += cur->last; + if ((cur->tick % (86400 / env->statinterval.tv_sec)) == 0) { + cur->avg_day = (cur->last_day + cur->avg_day) / 2; + resethour++; + } + bcopy(cur, &crs, sizeof(crs)); + + cur->last = 0; + if (resethour) + cur->last_hour = 0; + if (resetday) + cur->last_day = 0; + + crs.id = rlay->id; + crs.proc = proc_id; + imsg_compose(ibuf_pfe, IMSG_STATISTICS, 0, 0, + &crs, sizeof(crs)); + + for (con = TAILQ_FIRST(&rlay->sessions); + con != NULL; con = next_con) { + next_con = TAILQ_NEXT(con, entry); + timersub(&tv_now, &con->tv_last, &tv); + if (timercmp(&tv, &rlay->timeout, >=)) + relay_close(con, "hard timeout"); + } + } + + /* Schedule statistics timer */ + evtimer_set(&env->statev, relay_statistics, NULL); + bcopy(&env->statinterval, &tv, sizeof(tv)); + evtimer_add(&env->statev, &tv); +} + +void +relay_launch(void) +{ + struct relay *rlay; + + TAILQ_FOREACH(rlay, &env->relays, entry) { + log_debug("relay_launch: running relay %s", rlay->name); + + rlay->up = HOST_UP; + + event_set(&rlay->ev, rlay->s, EV_READ|EV_PERSIST, + relay_accept, rlay); + event_add(&rlay->ev, NULL); + } +} + +int +relay_socket(struct sockaddr_storage *ss, in_port_t port, + struct protocol *proto) +{ + int s = -1, val; + struct linger lng; + + switch (ss->ss_family) { + case AF_INET: + ((struct sockaddr_in *)ss)->sin_port = port; + ((struct sockaddr_in *)ss)->sin_len = + sizeof(struct sockaddr_in); + break; + case AF_INET6: + ((struct sockaddr_in6 *)ss)->sin6_port = port; + ((struct sockaddr_in6 *)ss)->sin6_len = + sizeof(struct sockaddr_in6); + break; + } + + if ((s = socket(ss->ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) + goto bad; + bzero(&lng, sizeof(lng)); + if (setsockopt(s, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) + goto bad; + val = 1; + if (setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(int)) == -1) + goto bad; + if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) + goto bad; + + if (proto->tcpflags & (TCPFLAG_NODELAY|TCPFLAG_NNODELAY)) { + if (proto->tcpflags & TCPFLAG_NNODELAY) + val = 0; + else + val = 1; + if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, + &val, sizeof(val)) == -1) + goto bad; + } + if (proto->tcpflags & (TCPFLAG_SACK|TCPFLAG_NSACK)) { + if (proto->tcpflags & TCPFLAG_NSACK) + val = 0; + else + val = 1; + if (setsockopt(s, IPPROTO_TCP, TCP_SACK_ENABLE, + &val, sizeof(val)) == -1) + goto bad; + } + if (proto->tcpflags & TCPFLAG_BUFSIZ) { + val = proto->tcpbufsiz; + if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, + &val, sizeof(val)) == -1) + goto bad; + val = proto->tcpbufsiz; + if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, + &val, sizeof(val)) == -1) + goto bad; + } + + return (s); + + bad: + if (s != -1) + close(s); + return (-1); +} + +int +relay_socket_connect(struct sockaddr_storage *ss, in_port_t port, + struct protocol *proto) +{ + int s; + + if ((s = relay_socket(ss, port, proto)) == -1) + return (-1); + + if (connect(s, (struct sockaddr *)ss, ss->ss_len) == -1) { + if (errno != EINPROGRESS) + goto bad; + } + + return (s); + + bad: + close(s); + return (-1); +} + +int +relay_socket_listen(struct sockaddr_storage *ss, in_port_t port, + struct protocol *proto) +{ + int s; + + if ((s = relay_socket(ss, port, proto)) == -1) + return (-1); + + if (bind(s, (struct sockaddr *)ss, ss->ss_len) == -1) + goto bad; + if (listen(s, 5) == -1) + goto bad; + + return (s); + + bad: + close(s); + return (-1); +} + +void +relay_connected(int fd, short sig, void *arg) +{ + struct session *con = (struct session *)arg; + struct relay *rlay = (struct relay *)con->relay; + evbuffercb outrd = relay_read; + evbuffercb outwr = relay_write; + struct bufferevent *bev; + char ibuf[128], obuf[128]; + + if (sig == EV_TIMEOUT) { + relay_close(con, "connect timeout"); + return; + } + + DPRINTF("relay_connected: session %d: %ssuccessful", + con->id, rlay->proto->lateconnect ? "late connect " : ""); + + if (env->opts & HOSTSTATED_OPT_LOGUPDATE) { + relay_host(&con->in.ss, ibuf, sizeof(ibuf)); + relay_host(&con->out.ss, obuf, sizeof(obuf)); + log_info("relay %s, session %d (%d active), %s -> %s:%d", + rlay->name, con->id, relay_sessions, + ibuf, obuf, ntohs(con->out.port)); + } + + /* + * Relay <-> Server + */ + bev = bufferevent_new(fd, outrd, outwr, relay_error, &con->out); + if (bev == NULL) { + relay_close(con, "failed to allocate output buffer event"); + return; + } + evbuffer_free(bev->output); + bev->output = con->out.output; + if (bev->output == NULL) + fatal("relay_connected: invalid output buffer"); + + con->out.bev = bev; + bufferevent_settimeout(bev, + rlay->timeout.tv_sec, rlay->timeout.tv_sec); + bufferevent_enable(bev, EV_READ|EV_WRITE); +} + +void +relay_input(struct session *con) +{ + struct relay *rlay = (struct relay *)con->relay; + struct protocol *proto = rlay->proto; + evbuffercb inrd = relay_read; + evbuffercb inwr = relay_write; + + switch (rlay->proto->type) { + case RELAY_PROTO_HTTP: + /* Check the client's HTTP request */ + inrd = relay_read_http; + if ((con->in.nodes = calloc(proto->nodecount, + sizeof(u_int8_t))) == NULL) { + relay_close(con, "failed to allocate node buffer"); + return; + } + break; + case RELAY_PROTO_TCP: + /* Use defaults */ + break; + default: + fatalx("relay_input: unknown protocol"); + } + + /* + * Client <-> Relay + */ + con->in.bev = bufferevent_new(con->in.s, inrd, inwr, + relay_error, &con->in); + if (con->in.bev == NULL) { + relay_close(con, "failed to allocate input buffer event"); + return; + } + + /* Initialize the SSL wrapper */ + if ((rlay->flags & F_SSL) && con->in.ssl != NULL) + relay_ssl_connected(&con->in); + + bufferevent_settimeout(con->in.bev, + rlay->timeout.tv_sec, rlay->timeout.tv_sec); + bufferevent_enable(con->in.bev, EV_READ|EV_WRITE); +} + +void +relay_write(struct bufferevent *bev, void *arg) +{ + struct ctl_relay_event *cre = (struct ctl_relay_event *)arg; + struct session *con = (struct session *)cre->con; + if (gettimeofday(&con->tv_last, NULL)) + con->done = 1; + if (con->done) + relay_close(con, "last write, done"); +} + +void +relay_read(struct bufferevent *bev, void *arg) +{ + struct ctl_relay_event *cre = (struct ctl_relay_event *)arg; + struct session *con = (struct session *)cre->con; + struct evbuffer *src = EVBUFFER_INPUT(bev); + + if (gettimeofday(&con->tv_last, NULL)) + goto done; + if (!EVBUFFER_LENGTH(src)) + return; + relay_bufferevent_write_buffer(cre->dst, src); + if (con->done) + goto done; + bufferevent_enable(con->in.bev, EV_READ); + return; + done: + relay_close(con, "last read, done"); +} + +char * +relay_expand_http(struct ctl_relay_event *cre, char *val, char *buf, size_t len) +{ + struct session *con = (struct session *)cre->con; + struct relay *rlay = (struct relay *)con->relay; + char ibuf[128]; + + strlcpy(buf, val, len); + + if (strstr(val, "$REMOTE_") != NULL) { + if (strstr(val, "$REMOTE_ADDR") != NULL) { + relay_host(&cre->ss, ibuf, sizeof(ibuf)); + if (expand_string(buf, len, + "$REMOTE_ADDR", ibuf) != 0) + return (NULL); + } + if (strstr(val, "$REMOTE_PORT") != NULL) { + snprintf(ibuf, sizeof(ibuf), "%u", ntohs(cre->port)); + if (expand_string(buf, len, + "$REMOTE_PORT", ibuf) != 0) + return (NULL); + } + } + if (strstr(val, "$SERVER_") != NULL) { + if (strstr(val, "$SERVER_ADDR") != NULL) { + relay_host(&rlay->ss, ibuf, sizeof(ibuf)); + if (expand_string(buf, len, + "$SERVER_ADDR", ibuf) != 0) + return (NULL); + } + if (strstr(val, "$SERVER_PORT") != NULL) { + snprintf(ibuf, sizeof(ibuf), "%u", ntohs(rlay->port)); + if (expand_string(buf, len, + "$SERVER_PORT", ibuf) != 0) + return (NULL); + } + } + if (strstr(val, "$TIMEOUT") != NULL) { + snprintf(ibuf, sizeof(ibuf), "%lu", rlay->timeout.tv_sec); + if (expand_string(buf, len, "$TIMEOUT", ibuf) != 0) + return (NULL); + } + + return (buf); +} + + +int +relay_handle_http(struct ctl_relay_event *cre, struct protonode *pn, + struct protonode *pk, int header) +{ + struct session *con = (struct session *)cre->con; + char buf[READ_BUF_SIZE], *ptr; + + if (pn->header != header) + return (0); + + switch (pn->action) { + case NODE_ACTION_APPEND: + if (!header || (pn->mark && cre->marked == 0)) + return (-1); + ptr = pn->value; + if (pn->macro && (ptr = relay_expand_http(cre, + pn->value, buf, sizeof(buf))) == NULL) + break; + relay_bufferevent_print(cre->dst, pn->key); + relay_bufferevent_print(cre->dst, ": "); + relay_bufferevent_print(cre->dst, pk->value); + relay_bufferevent_print(cre->dst, ", "); + relay_bufferevent_print(cre->dst, ptr); + relay_bufferevent_print(cre->dst, "\r\n"); + cre->nodes[pn->id] = 1; + DPRINTF("relay_handle_http: append '%s: %s, %s'", + pk->key, pk->value, ptr); + break; + case NODE_ACTION_CHANGE: + case NODE_ACTION_REMOVE: + if (!header || (pn->mark && cre->marked == 0)) + return (-1); + DPRINTF("relay_handle_http: change/remove '%s: %s'", + pk->key, pk->value); + break; + case NODE_ACTION_EXPECT: + DPRINTF("relay_handle_http: expect '%s: %s'", + pn->key, pn->value); + if (fnmatch(pn->value, pk->value, FNM_CASEFOLD) == 0) { + if (pn->mark) + cre->marked++; + cre->nodes[pn->id] = 1; + } + break; + case NODE_ACTION_FILTER: + DPRINTF("relay_handle_http: filter '%s: %s'", + pn->key, pn->value); + if (fnmatch(pn->value, pk->value, FNM_CASEFOLD) == + FNM_NOMATCH) { + if (pn->mark) + cre->marked++; + cre->nodes[pn->id] = 1; + } + break; + case NODE_ACTION_HASH: + if (pn->mark && !cre->marked) + return (-1); + DPRINTF("relay_handle_http: hash '%s: %s'", + pn->key, pk->value); + con->outkey = hash32_str(pk->value, con->outkey); + break; + case NODE_ACTION_NONE: + return (-1); + } + + return (0); +} + +void +relay_read_httpcontent(struct bufferevent *bev, void *arg) +{ + struct ctl_relay_event *cre = (struct ctl_relay_event *)arg; + struct session *con = (struct session *)cre->con; + struct evbuffer *src = EVBUFFER_INPUT(bev); + size_t size; + + if (gettimeofday(&con->tv_last, NULL)) + goto done; + size = EVBUFFER_LENGTH(src); + DPRINTF("relay_read_httpcontent: size %d, to read %d", + size, cre->toread); + if (!size) + return; + relay_bufferevent_write_buffer(cre->dst, src); + if (size >= cre->toread) + bev->readcb = relay_read_http; + cre->toread -= size; + DPRINTF("relay_read_httpcontent: done, size %d, to read %d", + size, cre->toread); + if (con->done) + goto done; + bufferevent_enable(bev, EV_READ); + return; + done: + relay_close(con, "last http content read, done"); +} + +void +relay_read_http(struct bufferevent *bev, void *arg) +{ + struct ctl_relay_event *cre = (struct ctl_relay_event *)arg; + struct session *con = (struct session *)cre->con; + struct relay *rlay = (struct relay *)con->relay; + struct protocol *proto = rlay->proto; + struct evbuffer *src = EVBUFFER_INPUT(bev); + struct protonode *pn, pk, *pnv, pkv; + char *line, buf[READ_BUF_SIZE], *ptr, *url, *method; + int done = 0, header = 0; + const char *errstr; + size_t size; + + if (gettimeofday(&con->tv_last, NULL)) + goto done; + size = EVBUFFER_LENGTH(src); + DPRINTF("relay_read_http: size %d, to read %d", size, cre->toread); + if (!size) + return; + + while (!done && (line = evbuffer_readline(src)) != NULL) { + /* + * An empty line indicates the end of the request. + * libevent already stripped the \r\n for us. + */ + if (!strlen(line)) { + done = 1; + free(line); + break; + } + pk.key = line; + + /* + * The first line is the GET/POST/PUT/... request, + * subsequent lines are HTTP headers. + */ + if (++cre->line == 1) { + pk.value = strchr(pk.key, ' '); + } else + pk.value = strchr(pk.key, ':'); + if (pk.value == NULL || strlen(pk.value) < 3) { + DPRINTF("relay_read_http: request '%s'", line); + /* Append line to the output buffer */ + relay_bufferevent_print(cre->dst, line); + relay_bufferevent_print(cre->dst, "\r\n"); + free(line); + continue; + } + if (*pk.value == ':') { + *pk.value++ = '\0'; + *pk.value++; + header = 1; + } else { + *pk.value++ = '\0'; + header = 0; + } + + DPRINTF("relay_read_http: header '%s: %s'", pk.key, pk.value); + + /* + * Identify and handle specific HTTP request methods + */ + if (cre->line == 1) { + if (strcmp("GET", pk.key) == 0) + cre->method = HTTP_METHOD_GET; + else if (strcmp("HEAD", pk.key) == 0) + cre->method = HTTP_METHOD_HEAD; + else if (strcmp("POST", pk.key) == 0) + cre->method = HTTP_METHOD_POST; + else if (strcmp("PUT", pk.key) == 0) + cre->method = HTTP_METHOD_PUT; + else if (strcmp("DELETE", pk.key) == 0) + cre->method = HTTP_METHOD_DELETE; + else if (strcmp("OPTIONS", pk.key) == 0) + cre->method = HTTP_METHOD_OPTIONS; + else if (strcmp("TRACE", pk.key) == 0) + cre->method = HTTP_METHOD_TRACE; + else if (strcmp("CONNECT", pk.key) == 0) + cre->method = HTTP_METHOD_CONNECT; + } else if ((cre->method == HTTP_METHOD_POST || + cre->method == HTTP_METHOD_PUT) && + strcasecmp("Content-Length", pk.key) == 0) { + /* + * Need to read data from the client after the + * HTTP header. + */ + cre->toread = strtonum(pk.value, 1, INT_MAX, &errstr); + + /* + * \r\n between header and body. + * XXX What about non-standard clients not using + * the carriage return? And some browsers seem to + * include the line length in the content-length. + */ + cre->toread += 2; + + if (errstr) { + relay_close(con, errstr); + return; + } + } + + /* Match the HTTP header */ + if ((pn = RB_FIND(proto_tree, &proto->tree, &pk)) == NULL) + goto next; + + /* Decode the URL */ + if (pn->getvars) { + url = strdup(pk.value); + if (url == NULL) + goto next; + if ((ptr = strchr(url, '?')) == NULL || + strlen(ptr) < 2) { + free(url); + goto next; + } + *ptr++ = '\0'; + method = strchr(ptr, ' '); + if (method != NULL) + *method++ = '\0'; + while (ptr != NULL && strlen(ptr)) { + pkv.key = ptr; + if ((ptr = strchr(ptr, '&')) != NULL) + *ptr++ = '\0'; + if ((pkv.value = + strchr(pkv.key, '=')) == NULL || + strlen(pkv.value) < 1) { + continue; + } + *pkv.value++ = '\0'; + if ((pnv = RB_FIND(proto_tree, + &proto->tree, &pkv)) == NULL) + continue; + if (relay_handle_http(cre, pnv, &pkv, 0) == -1) + continue; + } + free(url); + } + + if (relay_handle_http(cre, pn, &pk, header) == -1) + goto next; + + free(line); + continue; + +next: + relay_bufferevent_print(cre->dst, pk.key); + if (header) + relay_bufferevent_print(cre->dst, ": "); + else + relay_bufferevent_print(cre->dst, " "); + relay_bufferevent_print(cre->dst, pk.value); + relay_bufferevent_print(cre->dst, "\r\n"); + free(line); + continue; + } + if (done) { + RB_FOREACH(pn, proto_tree, &proto->tree) { + if (cre->nodes[pn->id]) { + cre->nodes[pn->id] = 0; + continue; + } + switch (pn->action) { + case NODE_ACTION_APPEND: + case NODE_ACTION_CHANGE: + ptr = pn->value; + if (pn->mark && cre->marked == 0) + break; + if (pn->macro && (ptr = relay_expand_http(cre, + pn->value, buf, sizeof(buf))) == NULL) + break; + relay_bufferevent_print(cre->dst, pn->key); + relay_bufferevent_print(cre->dst, ": "); + relay_bufferevent_print(cre->dst, ptr); + relay_bufferevent_print(cre->dst, "\r\n"); + DPRINTF("relay_read_http: add '%s: %s'", + pn->key, ptr); + break; + case NODE_ACTION_EXPECT: + if (pn->mark) + break; + DPRINTF("relay_read_http: missing '%s: %s'", + pn->key, pn->value); + relay_close(con, "incomplete header, done"); + return; + case NODE_ACTION_FILTER: + if (pn->mark) + break; + DPRINTF("relay_read_http: filtered '%s: %s'", + pn->key, pn->value); + relay_close(con, "rejecting header, done"); + return; + default: + break; + } + } + + switch (cre->method) { + case HTTP_METHOD_CONNECT: + /* Data stream */ + bev->readcb = relay_read; + break; + case HTTP_METHOD_POST: + case HTTP_METHOD_PUT: + /* HTTP request payload */ + if (cre->toread) { + bev->readcb = relay_read_httpcontent; + break; + } + /* FALLTHROUGH */ + default: + /* HTTP handler */ + bev->readcb = relay_read_http; + break; + } + + /* Write empty newline and switch to relay mode */ + relay_bufferevent_print(cre->dst, "\r\n"); + cre->line = 0; + cre->method = 0; + cre->marked = 0; + + if (proto->lateconnect && cre->bev == NULL && + relay_connect(con) == -1) { + relay_close(con, "session failed"); + return; + } + } + if (con->done) + goto done; + if (EVBUFFER_LENGTH(src)) + relay_bufferevent_write_buffer(cre->dst, src); + bufferevent_enable(bev, EV_READ); + return; + done: + relay_close(con, "last http read, done"); +} + +void +relay_error(struct bufferevent *bev, short error, void *arg) +{ + struct ctl_relay_event *cre = (struct ctl_relay_event *)arg; + struct session *con = (struct session *)cre->con; + struct evbuffer *src = EVBUFFER_OUTPUT(bev); + struct evbuffer *dst; + + if (error & EVBUFFER_TIMEOUT) { + relay_close(con, "buffer event timeout"); + return; + } +#if 0 + if (error & EVBUFFER_EOF) { + bufferevent_disable(bev, EV_READ|EV_WRITE); + relay_close(con, "done"); + return; + } +#endif + if (error & (EVBUFFER_READ|EVBUFFER_WRITE|EVBUFFER_EOF)) { + bufferevent_disable(bev, EV_READ|EV_WRITE); + + con->done = 1; + if (cre->dst->bev != NULL) { + dst = EVBUFFER_OUTPUT(cre->dst->bev); + if (EVBUFFER_LENGTH(dst)) { + bufferevent_write_buffer(cre->dst->bev, src); + return; + } + } + + relay_close(con, "done"); + return; + } + relay_close(con, "buffer event error"); +} + +const char * +relay_host(struct sockaddr_storage *ss, char *buf, size_t len) +{ + int af = ss->ss_family; + void *ptr; + + bzero(buf, len); + if (af == AF_INET) + ptr = &((struct sockaddr_in *)ss)->sin_addr; + else + ptr = &((struct sockaddr_in6 *)ss)->sin6_addr; + return (inet_ntop(af, ptr, buf, len)); +} + +void +relay_accept(int fd, short sig, void *arg) +{ + struct relay *rlay = (struct relay *)arg; + struct session *con = NULL; + struct ctl_natlook *cnl = NULL; + socklen_t slen; + struct timeval tv; + struct sockaddr_storage ss; + int s = -1; + + slen = sizeof(ss); + if ((s = accept(fd, (struct sockaddr *)&ss, (socklen_t *)&slen)) == -1) + return; + + if (relay_sessions >= RELAY_MAX_SESSIONS || rlay->flags & F_DISABLE) + goto err; + + if ((con = (struct session *) + calloc(1, sizeof(struct session))) == NULL) + goto err; + + con->in.s = s; + con->in.ssl = NULL; + con->out.s = -1; + con->out.ssl = NULL; + con->in.dst = &con->out; + con->out.dst = &con->in; + con->in.con = con; + con->out.con = con; + con->relay = rlay; + con->id = ++relay_conid; + con->outkey = rlay->dstkey; + if (gettimeofday(&con->tv_start, NULL)) + goto err; + bcopy(&con->tv_start, &con->tv_last, sizeof(con->tv_last)); + bcopy(&ss, &con->in.ss, sizeof(con->in.ss)); + + /* Pre-allocate output buffer */ + con->out.output = evbuffer_new(); + if (con->out.output == NULL) { + relay_close(con, "failed to allocate output buffer"); + return; + } + + if (rlay->flags & F_NATLOOK) { + if ((cnl = (struct ctl_natlook *) + calloc(1, sizeof(struct ctl_natlook))) == NULL) + goto err; + } + + relay_sessions++; + TAILQ_INSERT_HEAD(&rlay->sessions, con, entry); + + /* Increment the per-relay session counter */ + rlay->stats[proc_id].last++; + + if (rlay->flags & F_NATLOOK && cnl != NULL) { + con->cnl = cnl;; + bzero(cnl, sizeof(*cnl)); + cnl->in = -1; + cnl->id = con->id; + bcopy(&con->in.ss, &cnl->src, sizeof(cnl->src)); + bcopy(&rlay->ss, &cnl->dst, sizeof(cnl->dst)); + imsg_compose(ibuf_pfe, IMSG_NATLOOK, 0, 0, cnl, sizeof(*cnl)); + + /* Schedule timeout */ + evtimer_set(&con->ev, relay_natlook, con); + bcopy(&rlay->timeout, &tv, sizeof(tv)); + evtimer_add(&con->ev, &tv); + return; + } + + relay_session(con); + return; + err: + if (s != -1) { + close(s); + if (con != NULL) + free(con); + } +} + +u_int32_t +relay_hash_addr(struct sockaddr_storage *ss, u_int32_t p) +{ + struct sockaddr_in *sin4; + struct sockaddr_in6 *sin6; + + if (ss->ss_family == AF_INET) { + sin4 = (struct sockaddr_in *)ss; + p = hash32_buf(&sin4->sin_addr, + sizeof(struct in_addr), p); + } else { + sin6 = (struct sockaddr_in6 *)ss; + p = hash32_buf(&sin6->sin6_addr, + sizeof(struct in6_addr), p); + } + + return (p); +} + +int +relay_from_table(struct session *con) +{ + struct relay *rlay = (struct relay *)con->relay; + struct host *host; + struct table *table = rlay->dsttable; + u_int32_t p = con->outkey; + int idx = 0; + + if (rlay->dstcheck && !table->up) { + log_debug("relay_from_table: no active hosts"); + return (-1); + } + + switch (rlay->dstmode) { + case RELAY_DSTMODE_ROUNDROBIN: + if ((int)rlay->dstkey >= rlay->dstnhosts) + rlay->dstkey = 0; + idx = (int)rlay->dstkey++; + break; + case RELAY_DSTMODE_LOADBALANCE: + p = relay_hash_addr(&con->in.ss, p); + /* FALLTHROUGH */ + case RELAY_DSTMODE_HASH: + p = relay_hash_addr(&rlay->ss, p); + p = hash32_buf(&rlay->port, sizeof(rlay->port), p); + if ((idx = p % rlay->dstnhosts) >= RELAY_MAXHOSTS) + return (-1); + } + host = rlay->dsthost[idx]; + DPRINTF("relay_from_table: host %s, p 0x%08x, idx %d", + host->name, p, idx); + while (host != NULL) { + DPRINTF("relay_from_table: host %s", host->name); + if (!rlay->dstcheck || host->up == HOST_UP) + goto found; + host = TAILQ_NEXT(host, entry); + } + TAILQ_FOREACH(host, &rlay->dsttable->hosts, entry) { + DPRINTF("relay_from_table: next host %s", host->name); + if (!rlay->dstcheck || host->up == HOST_UP) + goto found; + } + + /* Should not happen */ + fatalx("relay_from_table: no active hosts, desynchronized"); + + found: + con->out.port = table->port; + bcopy(&host->ss, &con->out.ss, sizeof(con->out.ss)); + + return (0); +} + +void +relay_natlook(int fd, short event, void *arg) +{ + struct session *con = (struct session *)arg; + struct ctl_natlook *cnl = con->cnl; + + if (cnl == NULL) + fatalx("invalid NAT lookup"); + + if (con->out.ss.ss_family == AF_UNSPEC && cnl->in == -1) { + relay_close(con, "session NAT lookup failed"); + return; + } + if (cnl->in != -1) { + bcopy(&cnl->rdst, &con->out.ss, sizeof(con->out.ss)); + con->out.port = cnl->rdport; + } + free(con->cnl); + con->cnl = NULL; + + relay_session(con); +} + +void +relay_session(struct session *con) +{ + struct relay *rlay = (struct relay *)con->relay; + + if (bcmp(&rlay->ss, &con->out.ss, sizeof(con->out.ss)) == 0 && + con->out.port == rlay->port) { + log_debug("relay_session: session %d: looping", + con->id); + relay_close(con, "session aborted"); + return; + } + + if ((rlay->flags & F_SSL) && (con->in.ssl == NULL)) { + relay_ssl_transaction(con); + return; + } + + if (!rlay->proto->lateconnect && relay_connect(con) == -1) { + relay_close(con, "session failed"); + return; + } + + relay_input(con); +} + +int +relay_connect(struct session *con) +{ + struct relay *rlay = (struct relay *)con->relay; + + if (gettimeofday(&con->tv_start, NULL)) + return (-1); + + if (rlay->dsttable != NULL) { + if (relay_from_table(con) != 0) + return (-1); + } else { + bcopy(&rlay->dstss, &con->out.ss, sizeof(con->out.ss)); + con->out.port = rlay->dstport; + } + + if ((con->out.s = relay_socket_connect(&con->out.ss, con->out.port, + rlay->proto)) == -1) { + log_debug("relay_connect: session %d: forward failed: %s", + con->id, strerror(errno)); + return (-1); + } + if (errno == EINPROGRESS) + event_again(&con->ev, con->out.s, EV_WRITE|EV_TIMEOUT, + relay_connected, &con->tv_start, &env->timeout, con); + else + relay_connected(con->out.s, EV_WRITE, con); + + return (0); +} + +void +relay_close(struct session *con, const char *msg) +{ + struct relay *rlay = (struct relay *)con->relay; + + TAILQ_REMOVE(&rlay->sessions, con, entry); + + event_del(&con->ev); + if (con->in.bev != NULL) + bufferevent_disable(con->in.bev, EV_READ|EV_WRITE); + if (con->out.bev != NULL) + bufferevent_disable(con->out.bev, EV_READ|EV_WRITE); + + if (con->in.bev != NULL) + bufferevent_free(con->in.bev); + else if (con->in.output != NULL) + evbuffer_free(con->in.output); + if (con->in.ssl != NULL) { + /* XXX handle non-blocking shutdown */ + if (SSL_shutdown(con->in.ssl) == 0) + SSL_shutdown(con->in.ssl); + SSL_free(con->in.ssl); + } + if (con->in.s != -1) + close(con->in.s); + if (con->in.buf != NULL) + free(con->in.buf); + if (con->in.nodes != NULL) + free(con->in.nodes); + + if (con->out.bev != NULL) + bufferevent_free(con->out.bev); + else if (con->out.output != NULL) + evbuffer_free(con->out.output); + if (con->out.s != -1) + close(con->out.s); + if (con->out.buf != NULL) + free(con->out.buf); + if (con->out.nodes != NULL) + free(con->out.nodes); + + if (con->cnl != NULL) { +#if 0 + imsg_compose(ibuf_pfe, IMSG_KILLSTATES, 0, 0, + cnl, sizeof(*cnl)); +#endif + free(con->cnl); + } + +#ifdef DEBUG + log_info("relay %s, session %d closed: %s", rlay->name, con->id, msg); +#else + log_debug("relay %s, session %d closed: %s", rlay->name, con->id, msg); +#endif + + free(con); + relay_sessions--; +} + +void +relay_dispatch_pfe(int fd, short event, void *ptr) +{ + struct imsgbuf *ibuf; + struct imsg imsg; + ssize_t n; + struct session *con; + struct ctl_natlook cnl; + struct timeval tv; + struct host *host; + struct table *table; + struct ctl_status st; + + ibuf = ptr; + switch (event) { + case EV_READ: + if ((n = imsg_read(ibuf)) == -1) + fatal("relay_dispatch_pfe: imsg_read_error"); + if (n == 0) + fatalx("relay_dispatch_pfe: pipe closed"); + break; + case EV_WRITE: + if (msgbuf_write(&ibuf->w) == -1) + fatal("relay_dispatch_pfe: msgbuf_write"); + imsg_event_add(ibuf); + return; + default: + fatalx("relay_dispatch_pfe: unknown event"); + } + + for (;;) { + if ((n = imsg_get(ibuf, &imsg)) == -1) + fatal("relay_dispatch_pfe: imsg_read error"); + if (n == 0) + break; + + switch (imsg.hdr.type) { + case IMSG_HOST_STATUS: + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(st)) + fatalx("relay_dispatch_pfe: invalid request"); + memcpy(&st, imsg.data, sizeof(st)); + if ((host = host_find(env, st.id)) == NULL) + fatalx("relay_dispatch_pfe: invalid host id"); + + if (host->up == st.up) { + log_debug("relay_dispatch_pfe: host %d => %d", + host->id, host->up); + fatalx("relay_dispatch_pfe: desynchronized"); + } + + if ((table = table_find(env, host->tableid)) == NULL) + fatalx("relay_dispatch_pfe: invalid table id"); + + DPRINTF("relay_dispatch_pfe: [%d] state %d for " + "host %u %s", proc_id, st.up, host->id, host->name); + + if ((st.up == HOST_UNKNOWN && host->up == HOST_DOWN) || + (st.up == HOST_DOWN && host->up == HOST_UNKNOWN)) { + host->up = st.up; + break; + } + if (st.up == HOST_UP) + table->up++; + else + table->up--; + host->up = st.up; + break; + case IMSG_NATLOOK: + bcopy(imsg.data, &cnl, sizeof(cnl)); + if ((con = session_find(env, cnl.id)) == NULL || + con->cnl == NULL) { + log_debug("relay_dispatch_pfe: " + "session expired"); + break; + } + bcopy(&cnl, con->cnl, sizeof(*con->cnl)); + evtimer_del(&con->ev); + evtimer_set(&con->ev, relay_natlook, con); + bzero(&tv, sizeof(tv)); + evtimer_add(&con->ev, &tv); + break; + default: + log_debug("relay_dispatch_msg: unexpected imsg %d", + imsg.hdr.type); + break; + } + imsg_free(&imsg); + } + imsg_event_add(ibuf); +} + +void +relay_dispatch_parent(int fd, short event, void * ptr) +{ + struct imsgbuf *ibuf; + struct imsg imsg; + ssize_t n; + + ibuf = ptr; + switch (event) { + case EV_READ: + if ((n = imsg_read(ibuf)) == -1) + fatal("relay_dispatch_parent: imsg_read error"); + if (n == 0) + fatalx("relay_dispatch_parent: pipe closed"); + break; + case EV_WRITE: + if (msgbuf_write(&ibuf->w) == -1) + fatal("relay_dispatch_parent: msgbuf_write"); + imsg_event_add(ibuf); + return; + default: + fatalx("relay_dispatch_parent: unknown event"); + } + + for (;;) { + if ((n = imsg_get(ibuf, &imsg)) == -1) + fatal("relay_dispatch_parent: imsg_read error"); + if (n == 0) + break; + + switch (imsg.hdr.type) { + default: + log_debug("relay_dispatch_parent: unexpected imsg %d", + imsg.hdr.type); + break; + } + imsg_free(&imsg); + } +} + +SSL_CTX * +relay_ssl_ctx_create(struct relay *rlay) +{ + SSL_CTX *ctx; + char certfile[PATH_MAX], hbuf[128]; + + ctx = SSL_CTX_new(SSLv23_method()); + if (ctx == NULL) + goto err; + + /* Modify session timeout and cache size*/ + SSL_CTX_set_timeout(ctx, rlay->timeout.tv_sec); + if (rlay->proto->cache < -1) { + SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_OFF); + } else if (rlay->proto->cache >= -1) { + SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_SERVER); + if (rlay->proto->cache >= 0) + SSL_CTX_sess_set_cache_size(ctx, rlay->proto->cache); + } + + /* Enable all workarounds */ + SSL_CTX_set_options(ctx, SSL_OP_ALL); + + if (relay_host(&rlay->ss, hbuf, sizeof(hbuf)) == NULL) + goto err; + + /* Load the certificate */ + if (snprintf(certfile, sizeof(certfile), + "/etc/ssl/%s.crt", hbuf) == -1) + goto err; + log_debug("relay_ssl_ctx_create: using certificate %s", certfile); + if (!SSL_CTX_use_certificate_file(ctx, certfile, SSL_FILETYPE_PEM)) + goto err; + + /* Load the private key */ + if (snprintf(certfile, sizeof(certfile), + "/etc/ssl/private/%s.key", hbuf) == -1) { + goto err; + } + log_debug("relay_ssl_ctx_create: using private key %s", certfile); + if (!SSL_CTX_use_PrivateKey_file(ctx, certfile, SSL_FILETYPE_PEM)) + goto err; + if (!SSL_CTX_check_private_key(ctx)) + goto err; + + /* Set session context to the local relay name */ + if (!SSL_CTX_set_session_id_context(ctx, rlay->name, + strlen(rlay->name))) + goto err; + + return (ctx); + + err: + if (ctx != NULL) + SSL_CTX_free(ctx); + ssl_error(rlay->name, "relay_ssl_ctx_create"); + return (NULL); +} + +void +relay_ssl_transaction(struct session *con) +{ + struct relay *rlay = (struct relay *)con->relay; + SSL *ssl; + + ssl = SSL_new(rlay->ctx); + if (ssl == NULL) + goto err; + + if (!SSL_set_ssl_method(ssl, SSLv23_server_method())) + goto err; + if (!SSL_set_fd(ssl, con->in.s)) + goto err; + SSL_set_accept_state(ssl); + + con->in.ssl = ssl; + + event_again(&con->ev, con->in.s, EV_TIMEOUT|EV_READ, + relay_ssl_accept, &con->tv_start, &env->timeout, con); + return; + + err: + if (ssl != NULL) + SSL_free(ssl); + ssl_error(rlay->name, "relay_ssl_transaction"); +} + +void +relay_ssl_accept(int fd, short event, void *arg) +{ + struct session *con = (struct session *)arg; + struct relay *rlay = (struct relay *)con->relay; + int ret; + int ssl_err; + int retry_flag; + + if (event == EV_TIMEOUT) { + relay_close(con, "SSL accept timeout"); + return; + } + + retry_flag = ssl_err = 0; + + ret = SSL_accept(con->in.ssl); + if (ret <= 0) { + ssl_err = SSL_get_error(con->in.ssl, ret); + + switch (ssl_err) { + case SSL_ERROR_WANT_READ: + retry_flag = EV_READ; + goto retry; + case SSL_ERROR_WANT_WRITE: + retry_flag = EV_WRITE; + goto retry; + default: + ssl_error(rlay->name, "relay_ssl_accept"); + return; + } + } + + DPRINTF("relay_ssl_accept: session %d: connection established", + con->id); + relay_session(con); + return; + +retry: + DPRINTF("relay_ssl_accept: session %d: scheduling on %s", con->id, + (retry_flag == EV_READ) ? "EV_READ" : "EV_WRITE"); + event_again(&con->ev, fd, EV_TIMEOUT|retry_flag, relay_ssl_accept, + &con->tv_start, &env->timeout, con); +} + +void +relay_ssl_connected(struct ctl_relay_event *cre) +{ + /* + * Hack libevent - we overwrite the internal bufferevent I/O + * functions to handle the SSL abstraction. + */ + event_set(&cre->bev->ev_read, cre->s, EV_READ, + relay_ssl_readcb, cre->bev); + event_set(&cre->bev->ev_write, cre->s, EV_WRITE, + relay_ssl_writecb, cre->bev); +} + +void +relay_ssl_readcb(int fd, short event, void *arg) +{ + struct bufferevent *bufev = arg; + struct ctl_relay_event *cre = (struct ctl_relay_event *)bufev->cbarg; + struct session *con = (struct session *)cre->con; + struct relay *rlay = (struct relay *)con->relay; + int ret = 0, ssl_err = 0; + short what = EVBUFFER_READ; + size_t len; + char rbuf[READ_BUF_SIZE]; + int howmuch = READ_BUF_SIZE; + + if (event == EV_TIMEOUT) { + what |= EVBUFFER_TIMEOUT; + goto err; + } + + if (bufev->wm_read.high != 0) + howmuch = MIN(sizeof(rbuf), bufev->wm_read.high); + + ret = SSL_read(cre->ssl, rbuf, howmuch); + if (ret <= 0) { + ssl_err = SSL_get_error(cre->ssl, ret); + + switch (ssl_err) { + case SSL_ERROR_WANT_READ: + DPRINTF("relay_ssl_readcb: session %d: " + "want read", con->id); + goto retry; + case SSL_ERROR_WANT_WRITE: + DPRINTF("relay_ssl_readcb: session %d: " + "want write", con->id); + goto retry; + default: + if (ret == 0) + what |= EVBUFFER_EOF; + else { + ssl_error(rlay->name, "relay_ssl_readcb"); + what |= EVBUFFER_ERROR; + } + goto err; + } + } + + if (evbuffer_add(bufev->input, rbuf, ret) == -1) { + what |= EVBUFFER_ERROR; + goto err; + } + + relay_bufferevent_add(&bufev->ev_read, bufev->timeout_read); + + len = EVBUFFER_LENGTH(bufev->input); + if (bufev->wm_read.low != 0 && len < bufev->wm_read.low) + return; + if (bufev->wm_read.high != 0 && len > bufev->wm_read.high) { + struct evbuffer *buf = bufev->input; + event_del(&bufev->ev_read); + evbuffer_setcb(buf, bufferevent_read_pressure_cb, bufev); + return; + } + + if (bufev->readcb != NULL) + (*bufev->readcb)(bufev, bufev->cbarg); + return; + + retry: + relay_bufferevent_add(&bufev->ev_read, bufev->timeout_read); + return; + + err: + (*bufev->errorcb)(bufev, what, bufev->cbarg); +} + +void +relay_ssl_writecb(int fd, short event, void *arg) +{ + struct bufferevent *bufev = arg; + struct ctl_relay_event *cre = (struct ctl_relay_event *)bufev->cbarg; + struct session *con = (struct session *)cre->con; + struct relay *rlay = (struct relay *)con->relay; + int ret = 0, ssl_err; + short what = EVBUFFER_WRITE; + + if (event == EV_TIMEOUT) { + what |= EVBUFFER_TIMEOUT; + goto err; + } + + if (EVBUFFER_LENGTH(bufev->output)) { + if (cre->buf == NULL) { + cre->buflen = EVBUFFER_LENGTH(bufev->output); + if ((cre->buf = malloc(cre->buflen)) == NULL) { + what |= EVBUFFER_ERROR; + goto err; + } + bcopy(EVBUFFER_DATA(bufev->output), + cre->buf, cre->buflen); + } + + ret = SSL_write(cre->ssl, cre->buf, cre->buflen); + if (ret <= 0) { + ssl_err = SSL_get_error(cre->ssl, ret); + + switch (ssl_err) { + case SSL_ERROR_WANT_READ: + DPRINTF("relay_ssl_writecb: session %d: " + "want read", con->id); + goto retry; + case SSL_ERROR_WANT_WRITE: + DPRINTF("relay_ssl_writecb: session %d: " + "want write", con->id); + goto retry; + default: + if (ret == 0) + what |= EVBUFFER_EOF; + else { + ssl_error(rlay->name, + "relay_ssl_writecb"); + what |= EVBUFFER_ERROR; + } + goto err; + } + } + evbuffer_drain(bufev->output, ret); + } + if (cre->buf != NULL) { + free(cre->buf); + cre->buf = NULL; + cre->buflen = 0; + } + + if (EVBUFFER_LENGTH(bufev->output) != 0) + relay_bufferevent_add(&bufev->ev_write, bufev->timeout_write); + + if (bufev->writecb != NULL && + EVBUFFER_LENGTH(bufev->output) <= bufev->wm_write.low) + (*bufev->writecb)(bufev, bufev->cbarg); + return; + + retry: + if (cre->buflen != 0) + relay_bufferevent_add(&bufev->ev_write, bufev->timeout_write); + return; + + err: + if (cre->buf != NULL) { + free(cre->buf); + cre->buf = NULL; + cre->buflen = 0; + } + (*bufev->errorcb)(bufev, what, bufev->cbarg); +} + +int +relay_bufferevent_add(struct event *ev, int timeout) +{ + struct timeval tv, *ptv = NULL; + + if (timeout) { + timerclear(&tv); + tv.tv_sec = timeout; + ptv = &tv; + } + + return (event_add(ev, ptv)); +} + +#ifdef notyet +int +relay_bufferevent_printf(struct ctl_relay_event *cre, const char *fmt, ...) +{ + int ret; + va_list ap; + + va_start(ap, fmt); + ret = evbuffer_add_vprintf(cre->output, fmt, ap); + va_end(ap); + + if (cre->bev != NULL && + ret != -1 && EVBUFFER_LENGTH(cre->output) > 0 && + (cre->bev->enabled & EV_WRITE)) + bufferevent_enable(cre->bev, EV_WRITE); + + return (ret); +} +#endif + +int +relay_bufferevent_print(struct ctl_relay_event *cre, char *str) +{ + if (cre->bev == NULL) + return (evbuffer_add(cre->output, str, strlen(str))); + return (bufferevent_write(cre->bev, str, strlen(str))); +} + +int +relay_bufferevent_write_buffer(struct ctl_relay_event *cre, struct + evbuffer *buf) +{ + if (cre->bev == NULL) + return (evbuffer_add_buffer(cre->output, buf)); + return (bufferevent_write_buffer(cre->bev, buf)); +} + +int +relay_bufferevent_write(struct ctl_relay_event *cre, void *data, size_t size) +{ + if (cre->bev == NULL) + return (evbuffer_add(cre->output, data, size)); + return (bufferevent_write(cre->bev, data, size)); +} + +static __inline int +relay_proto_cmp(struct protonode *a, struct protonode *b) +{ + return (strcasecmp(a->key, b->key)); +} + +RB_GENERATE(proto_tree, protonode, nodes, relay_proto_cmp); diff --git a/usr.sbin/relayd/relayd.8 b/usr.sbin/relayd/relayd.8 index d708132debc..6a2f552aa8d 100644 --- a/usr.sbin/relayd/relayd.8 +++ b/usr.sbin/relayd/relayd.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: relayd.8,v 1.6 2007/02/07 13:30:17 reyk Exp $ +.\" $OpenBSD: relayd.8,v 1.7 2007/02/22 03:32:39 reyk Exp $ .\" .\" Copyright (c) 2006 Pierre-Yves Ritschard <pyr@openbsd.org> .\" @@ -106,3 +106,15 @@ Unix-domain socket used for communication with .Sh SEE ALSO .Xr hoststated.conf 5 , .Xr hoststatectl 8 +.Sh HISTORY +The +.Nm +program first appeared in +.Ox 4.1 . +.Sh AUTHORS +The +.Nm +program was written by +.An Pierre-Yves Ritschard Aq pyr@openbsd.org +and +.An Reyk Floeter Aq reyk@openbsd.org . diff --git a/usr.sbin/relayd/relayd.c b/usr.sbin/relayd/relayd.c index 976029c0e43..093b35fb849 100644 --- a/usr.sbin/relayd/relayd.c +++ b/usr.sbin/relayd/relayd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: relayd.c,v 1.16 2007/02/08 13:32:24 reyk Exp $ */ +/* $OpenBSD: relayd.c,v 1.17 2007/02/22 03:32:39 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> @@ -40,25 +40,33 @@ __dead void usage(void); void main_sig_handler(int, short, void *); -void main_shutdown(void); +void main_shutdown(struct hoststated *); void main_dispatch_pfe(int, short, void *); void main_dispatch_hce(int, short, void *); +void main_dispatch_relay(int, short, void *); int check_child(pid_t, const char *); int pipe_parent2pfe[2]; int pipe_parent2hce[2]; +int pipe_parent2relay[2]; int pipe_pfe2hce[2]; +int pipe_pfe2relay[RELAY_MAXPROC][2]; struct imsgbuf *ibuf_pfe; struct imsgbuf *ibuf_hce; +struct imsgbuf *ibuf_relay; pid_t pfe_pid = 0; pid_t hce_pid = 0; +pid_t relay_pid = 0; void main_sig_handler(int sig, short event, void *arg) { - int die = 0; + struct hoststated *env = arg; + int die = 0; + + log_debug("signal %d", sig); switch (sig) { case SIGTERM: @@ -73,8 +81,12 @@ main_sig_handler(int sig, short event, void *arg) hce_pid = 0; die = 1; } + if (check_child(relay_pid, "socket relay engine")) { + relay_pid = 0; + die = 1; + } if (die) - main_shutdown(); + main_shutdown(env); break; case SIGHUP: /* reconfigure */ @@ -145,6 +157,8 @@ main(int argc, char *argv[]) fprintf(stderr, "configuration OK\n"); exit(0); } + if (debug) + env.opts |= HOSTSTATED_OPT_LOGUPDATE; if (geteuid()) errx(1, "need root privileges"); @@ -163,27 +177,42 @@ main(int argc, char *argv[]) fatal("socketpair"); if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_parent2hce) == -1) fatal("socketpair"); + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_parent2relay) == -1) + fatal("socketpair"); if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_pfe2hce) == -1) fatal("socketpair"); + for (c = 0; c < env.prefork_relay; c++) { + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, + pipe_pfe2relay[c]) == -1) + fatal("socketpair"); + session_socket_blockmode(pipe_pfe2relay[c][0], BM_NONBLOCK); + session_socket_blockmode(pipe_pfe2relay[c][1], BM_NONBLOCK); + } session_socket_blockmode(pipe_parent2pfe[0], BM_NONBLOCK); session_socket_blockmode(pipe_parent2pfe[1], BM_NONBLOCK); session_socket_blockmode(pipe_parent2hce[0], BM_NONBLOCK); session_socket_blockmode(pipe_parent2hce[1], BM_NONBLOCK); + session_socket_blockmode(pipe_parent2relay[0], BM_NONBLOCK); + session_socket_blockmode(pipe_parent2relay[1], BM_NONBLOCK); session_socket_blockmode(pipe_pfe2hce[0], BM_NONBLOCK); session_socket_blockmode(pipe_pfe2hce[1], BM_NONBLOCK); - pfe_pid = pfe(&env, pipe_parent2pfe, pipe_parent2hce, pipe_pfe2hce); - hce_pid = hce(&env, pipe_parent2pfe, pipe_parent2hce, pipe_pfe2hce); + pfe_pid = pfe(&env, pipe_parent2pfe, pipe_parent2hce, + pipe_parent2relay, pipe_pfe2hce, pipe_pfe2relay); + hce_pid = hce(&env, pipe_parent2pfe, pipe_parent2hce, + pipe_parent2relay, pipe_pfe2hce, pipe_pfe2relay); + relay_pid = relay(&env, pipe_parent2pfe, pipe_parent2hce, + pipe_parent2relay, pipe_pfe2hce, pipe_pfe2relay); setproctitle("parent"); event_init(); - signal_set(&ev_sigint, SIGINT, main_sig_handler, NULL); - signal_set(&ev_sigterm, SIGTERM, main_sig_handler, NULL); - signal_set(&ev_sigchld, SIGCHLD, main_sig_handler, NULL); - signal_set(&ev_sighup, SIGHUP, main_sig_handler, NULL); + signal_set(&ev_sigint, SIGINT, main_sig_handler, &env); + signal_set(&ev_sigterm, SIGTERM, main_sig_handler, &env); + signal_set(&ev_sigchld, SIGCHLD, main_sig_handler, &env); + signal_set(&ev_sighup, SIGHUP, main_sig_handler, &env); signal_add(&ev_sigint, NULL); signal_add(&ev_sigterm, NULL); signal_add(&ev_sigchld, NULL); @@ -192,15 +221,22 @@ main(int argc, char *argv[]) close(pipe_parent2pfe[1]); close(pipe_parent2hce[1]); + close(pipe_parent2relay[1]); close(pipe_pfe2hce[0]); close(pipe_pfe2hce[1]); + for (c = 0; c < env.prefork_relay; c++) { + close(pipe_pfe2relay[c][0]); + close(pipe_pfe2relay[c][1]); + } if ((ibuf_pfe = calloc(1, sizeof(struct imsgbuf))) == NULL || - (ibuf_hce = calloc(1, sizeof(struct imsgbuf))) == NULL) + (ibuf_hce = calloc(1, sizeof(struct imsgbuf))) == NULL || + (ibuf_relay = calloc(1, sizeof(struct imsgbuf))) == NULL) fatal(NULL); imsg_init(ibuf_pfe, pipe_parent2pfe[0], main_dispatch_pfe); imsg_init(ibuf_hce, pipe_parent2hce[0], main_dispatch_hce); + imsg_init(ibuf_relay, pipe_parent2relay[0], main_dispatch_relay); ibuf_pfe->events = EV_READ; event_set(&ibuf_pfe->ev, ibuf_pfe->fd, ibuf_pfe->events, @@ -212,13 +248,21 @@ main(int argc, char *argv[]) ibuf_hce->handler, ibuf_hce); event_add(&ibuf_hce->ev, NULL); + ibuf_relay->events = EV_READ; + event_set(&ibuf_relay->ev, ibuf_relay->fd, ibuf_relay->events, + ibuf_relay->handler, ibuf_relay); + event_add(&ibuf_relay->ev, NULL); + + if (env.flags & F_DEMOTE) + carp_demote_reset(env.demote_group, 0); + event_dispatch(); return (0); } void -main_shutdown(void) +main_shutdown(struct hoststated *env) { pid_t pid; @@ -226,6 +270,8 @@ main_shutdown(void) kill(pfe_pid, SIGTERM); if (hce_pid) kill(hce_pid, SIGTERM); + if (relay_pid) + kill(relay_pid, SIGTERM); do { if ((pid = wait(NULL)) == -1 && @@ -234,6 +280,9 @@ main_shutdown(void) } while (pid != -1 || (pid == -1 && errno == EINTR)); control_cleanup(); + carp_demote_shutdown(); + if (env->flags & F_DEMOTE) + carp_demote_reset(env->demote_group, 128); log_info("terminating"); exit(0); } @@ -276,6 +325,7 @@ main_dispatch_pfe(int fd, short event, void *ptr) struct imsgbuf *ibuf; struct imsg imsg; ssize_t n; + struct ctl_demote demote; ibuf = ptr; switch (event) { @@ -301,6 +351,14 @@ main_dispatch_pfe(int fd, short event, void *ptr) break; switch (imsg.hdr.type) { + case IMSG_DEMOTE: + if (imsg.hdr.len - IMSG_HEADER_SIZE != + sizeof(demote)) + fatalx("main_dispatch_pfe: " + "invalid size of demote request"); + memcpy(&demote, imsg.data, sizeof(demote)); + carp_demote_set(demote.group, demote.level); + break; default: log_debug("main_dispatch_pfe: unexpected imsg %d", imsg.hdr.type); @@ -351,6 +409,46 @@ main_dispatch_hce(int fd, short event, void * ptr) } } +void +main_dispatch_relay(int fd, short event, void * ptr) +{ + struct imsgbuf *ibuf; + struct imsg imsg; + ssize_t n; + + ibuf = ptr; + switch (event) { + case EV_READ: + if ((n = imsg_read(ibuf)) == -1) + fatal("imsg_read error"); + if (n == 0) + fatalx("main_dispatch_relay: pipe closed"); + break; + case EV_WRITE: + if (msgbuf_write(&ibuf->w) == -1) + fatal("msgbuf_write"); + imsg_event_add(ibuf); + return; + default: + fatalx("unknown event"); + } + + for (;;) { + if ((n = imsg_get(ibuf, &imsg)) == -1) + fatal("main_dispatch_relay: imsg_read error"); + if (n == 0) + break; + + switch (imsg.hdr.type) { + default: + log_debug("main_dispatch_relay: unexpected imsg %d", + imsg.hdr.type); + break; + } + imsg_free(&imsg); + } +} + struct host * host_find(struct hoststated *env, objid_t id) { @@ -386,6 +484,32 @@ service_find(struct hoststated *env, objid_t id) return (NULL); } +struct relay * +relay_find(struct hoststated *env, objid_t id) +{ + struct relay *rlay; + + TAILQ_FOREACH(rlay, &env->relays, entry) + if (rlay->id == id) + return (rlay); + return (NULL); +} + +struct session * +session_find(struct hoststated *env, objid_t id) +{ + struct relay *rlay; + struct session *con; + + TAILQ_FOREACH(rlay, &env->relays, entry) + TAILQ_FOREACH(con, &rlay->sessions, entry) { + log_debug("session_find: %d : %d", id, con->id); + if (con->id == id) + return (con); + } + return (NULL); +} + struct host * host_findbyname(struct hoststated *env, const char *name) { @@ -421,6 +545,17 @@ service_findbyname(struct hoststated *env, const char *name) return (NULL); } +struct relay * +relay_findbyname(struct hoststated *env, const char *name) +{ + struct relay *rlay; + + TAILQ_FOREACH(rlay, &env->relays, entry) + if (strcmp(rlay->name, name) == 0) + return (rlay); + return (NULL); +} + void event_again(struct event *ev, int fd, short event, void (*fn)(int, short, void *), @@ -442,3 +577,34 @@ event_again(struct event *ev, int fd, short event, event_set(ev, fd, event, fn, arg); event_add(ev, &tv); } + +int +expand_string(char *label, size_t len, const char *srch, const char *repl) +{ + char *tmp; + char *p, *q; + + if ((tmp = calloc(1, len)) == NULL) { + log_debug("expand_string: calloc"); + return (-1); + } + p = q = label; + while ((q = strstr(p, srch)) != NULL) { + *q = '\0'; + if ((strlcat(tmp, p, len) >= len) || + (strlcat(tmp, repl, len) >= len)) { + log_debug("expand_string: string too long"); + return (-1); + } + q += strlen(srch); + p = q; + } + if (strlcat(tmp, p, len) >= len) { + log_debug("expand_string: string too long"); + return (-1); + } + strlcpy(label, tmp, len); /* always fits */ + free(tmp); + + return (0); +} diff --git a/usr.sbin/relayd/relayd.conf.5 b/usr.sbin/relayd/relayd.conf.5 index cbf5efe1ad3..3dbfdfc8a9c 100644 --- a/usr.sbin/relayd/relayd.conf.5 +++ b/usr.sbin/relayd/relayd.conf.5 @@ -1,4 +1,4 @@ -.\" $OpenBSD: relayd.conf.5,v 1.17 2007/02/07 15:17:46 reyk Exp $ +.\" $OpenBSD: relayd.conf.5,v 1.18 2007/02/22 03:32:39 reyk Exp $ .\" .\" Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> .\" @@ -26,7 +26,7 @@ is the configuration file for the Host Status Daemon, .Xr hoststated 8 . .Sh SECTIONS .Nm -is divided into four main sections: +is divided into six main sections: .Bl -tag -width xxxx .It Sy Macros User-defined variables may be defined and used later, simplifying the @@ -43,6 +43,11 @@ they contain. Services will be translated to .Xr pf 4 rdr rules if their table or backup table have content. +.It Sy Relays +Relays allow layer 7 loadbalancing, SSL acceleration, and +general-purpose TCP proxying. +.It Sy Protocols +Protocols are predefined protocol handlers and settings for relays. .El .Pp Within the sections, @@ -109,6 +114,15 @@ or .Ar unknown (the host is disabled or has not been checked yet). .Pp +.It Ic prefork Ar number +When using relays, run the specified number of processes to handle +relayed connections. +This will increase the performance and prevents delays when connecting +to a relay. +.Xr hoststated 8 +will run 5 relay processes by default and every process will handle +all configured relays. +.Pp .It Xo .Ic timeout Ar number .Xc @@ -252,7 +266,231 @@ to specify which interface the rdr rule will be enabled on: interface ``ifname'' .Ed .El -.Sh EXAMPLE +.Sh RELAYS +Relays will forward TCP traffic between a client and a target server. +In contrast to IP forwarding and redirection in the network stack, a +relay will accept incoming TCP connections from remote clients as a +server, open an outgoing connection to a target host, and forward +any traffic between the target host and the remote client. +A relay is also called an application layer or layer 7 proxy. +.Pp +The main purpose of a relay is to provide advanced loadbalancing +functionality based on specified protocol characteristics, such as +HTTP headers, to provide SSL acceleration functionality and to allow +basic handling of the underlying application protocol. +.Pp +The relay configuration directives are described below. +.Bl -tag -width Ds +.It Xo +.Ic listen on Ar address Ic port Ar port +.Op Ic ssl +.Xc +Specify the address and port for the relay to listen on. +The relay will accept incoming connections to the specified address. +.Pp +If the +.Ic ssl +keyword is present, the relay will accept connections using the +encrypted SSL protocol. +The relay will lookup a private key in +.Pa /etc/ssl/private/address.key +and a public certificate in +.Pa /etc/ssl/address.crt +in this case, +where +.Ar address +is the specified IP address of the relay to listen on. +See +.Xr ssl 8 +for details about SSL server certificates. +.It Ic forward to Ar address Ic port Ar port +Specify the address and port of the target host to connect to. +.It Ic service Ar name +Use the first virtual IP address and port from the specified service +as the target host to connect to. +This is exclusive to the +.Ic forward to +and +.Ic table +directives. +.It Xo +.Ic table Ar name Ar mode +.Op Ic no check +.Xc +Get the target host from the specified table. +The following modes are available to select a host from the specified +table: +.Pp +.Bl -tag -width loadbalance -offset indent -compact +.It Ic roundrobin +distributes the outgoing connections using a round-robin scheduler +through all active hosts. +.It Ic loadbalance +Balances the outgoing connections across the active hosts based on the +hashed name of the table, the source and destination addresses, +and the corresponding ports. +.It Ic hash +Like the +.Ic loadbalance +mode, but without including the source and destination addresses and +ports. +Additional input can be feeded into the hash by looking at HTTP +headers and GET variables, see the +.Sx Protocols +section below. +.El +.It Ic nat lookup +When redirecting connections with a +.Ar rdr +rule in +.Xr pf.conf 5 +to a relay listening on localhost, this directive will allow to +lookup the real destination address of the intended target host. +This allows to run the relay as a transparent proxy. +If either the +.Ic forward to , +.Ic service , +or +.Ic table +directive is present, it will be used as a backup if the NAT lookup +failed. +.It Ic timeout Ar seconds +Specify the timeout in seconds for accepted sessions. +The default timeout is 600 seconds (10 minutes). +.It Ic disable +Start the relay but immediately close any accepted connections. +.It Ic protocol Ar name +Use the specified protocol definition for the relay. +The generic TCP protocol options will be used by default, +see the +.Sx Protocols +section below. +.El +.Sh PROTOCOLS +Protocols are templates defining actions and settings for relays. +They allow to set generic TCP options, SSL settings, and actions +specific to the selected application layer protocol. +.Pp +The protocol configuration directives are described below. +.Bl -tag -width Ds +.It Ic protocol Ar type +Enable special handling of the specified application layer protocol. +The supported protocols are: +.Pp +.Bl -tag -width http -offset indent -compact +.It Ic http +Handle the Hypertext Transfer Protocol +(HTTP or "HTTPS" if encapsulated in a SSL tunnel). +.It Ic tcp +Generic handler for TCP-based protocols. +.El +.It Ic append Ar value Ic to Ar key +Append the specified value to a protocol entity with the selected name. +When using the +.Ic http +protocol, +.Ic key +will indicate a specified HTTP header. +.It Ic change Ar key Ic to Ar value +Like the +.Ic append +directive above, but change the contents of the specified entity. +.It Ic remove Ar key +Remove the entity with the selected name. +.It Xo +.Op Ic url +.Ic expect Ar value Ic from Ar key +.Xc +Expect an entity with the specified value. +If the entity is not present or the value doesn't match, the connection +will be dropped. +The +.Ic url +keyword will expect the value as a GET variable in the URL instead +of a HTTP header value when using the +.Ic http +protocol. +.It Xo +.Op Ic url +.Ic filter Ar value Ic from Ar key +.Xc +Like the +.Ic expect +directive above, but drop any connections with the specified entity +and value. +.It Xo +.Op Ic url +.Ic hash Ar key +.Xc +Feed the value of the selected entity into the loadbalancing hash to +select the target host. +See the +.Ic table +keyword in the +.Sx Relays +section above. +The +.Ic url +keyword will lookup the entity as a GET variable in the URL instead +of a HTTP header value when using the +.Ic http +protocol. +.It Ic tcp Ar option +Enable or disable the specified TCP option, see +.Xr tcp 4 +for details about TCP options. +Valid options are: +.Pp +.Bl -tag -width Ds +.It Xo +.Op Ic no +.Ic nodelay +.Xc +Enable the TCP NODELAY option for this connection. +This is recommended to avoid delays in the relayed data stream, +ie. for SSH connections. +.It Xo +.Op Ic no +.Ic sack +.Xc +Use selective acknowledgements for this connection. +.It Ic socket buffer Ar number +Set the socket-level buffer size for input and output for this +connection. +This will affect the TCP window size. +.El +.It Ic ssl session cache Ar value +Set the maximum size of the SSL session cache. +If the +.Ar value +is zero, the default size defined by the SSL library will be +used, a positive number will set the maximun size in bytes and the +keyword +.Ic disable +will disable the SSL session cache. +.El +.Pp +The +.Ar value +strings of the +.Ic append +and +.Ic change +directives may contain predefined macros that will be expanded on runtime: +.Pp +.Bl -tag -width $SERVER_ADDR -offset indent -compact +.It Ic $REMOTE_ADDR +The IP address of the connected client. +.It Ic $REMOTE_PORT +The TCP source port of the connected client. +.It Ic $SERVER_ADDR +The configured IP address of the relay. +.It Ic $SERVER_PORT +The configured TCP server port of the relay. +.It Ic $TIMEOUT +The configured session timeout of the relay. +.El +.Sh EXAMPLES This configuration file would create a service .Dq www which load balances four hosts @@ -293,6 +531,52 @@ service www { backup table sorryhost } .Ed +.Pp +The following configuration would add a relay to forward +secure HTTPS connections to a pool of HTTP webservers +using the +.Ic loadbalance +protocol (SSL acceleration and layer 7 loadbalancing). +The HTTP protocol definition will add two HTTP headers containing +address information of the client and the server, set the +.Dq Keep-Alive +header value to the configured session timeout, +and include the +.Dq sessid +variable in the hash to calculate the target host: +.Bd -literal -offset indent +protocol http_ssl { + protocol http + append "$REMOTE_ADDR" to "X-Forwarded-For" + append "$SERVER_ADDR:$SERVER_PORT" to "X-Forwarded-By" + change "Keep-Alive" to "$TIMEOUT" + url hash "sessid" +} + +relay sslaccel { + listen on www.example.com port 443 ssl + protocol http_ssl + table phphosts loadbalance +} +.Ed +.Pp +The second relay example will accept incoming connections to port +2222 and forward them to a remote SSH server. +The TCP +.Ic nodelay +option will allow a +.Dq smooth +SSH session without delays between keystrokes or displayed output on +the terminal: +.Bd -literal -offset indent +protocol http_ssl { + tcp { nodelay, socket buffer 65536 } +} + +relay sshforward { + listen on www.example.com port 2222 + forward to shell.example.com port 22 +} .Sh FILES .Bl -tag -width "/etc/hoststated.conf" -compact .It Pa /etc/hoststated.conf @@ -300,7 +584,25 @@ service www { configuration file .It Pa /etc/services Service name database +.It Pa /etc/ssl/private/address.key +.It Pa /etc/ssl/address.crt +Location of the relay SSL server certificates, where +.Ar address +is the configured IP address of the relay. .El .Sh SEE ALSO .Xr hoststatectl 8 , -.Xr hoststated 8 +.Xr hoststated 8 , +.Xr ssl 8 +.Sh HISTORY +The +.Nm +program first appeared in +.Ox 4.1 . +.Sh AUTHORS +The +.Nm +program was written by +.An Pierre-Yves Ritschard Aq pyr@openbsd.org +and +.An Reyk Floeter Aq reyk@openbsd.org . diff --git a/usr.sbin/relayd/relayd.h b/usr.sbin/relayd/relayd.h index 7ed5ef317f3..1008882e8ab 100644 --- a/usr.sbin/relayd/relayd.h +++ b/usr.sbin/relayd/relayd.h @@ -1,7 +1,8 @@ -/* $OpenBSD: relayd.h,v 1.24 2007/02/07 15:17:46 reyk Exp $ */ +/* $OpenBSD: relayd.h,v 1.25 2007/02/22 03:32:39 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> + * Copyright (c) 2006 Reyk Floeter <reyk@openbsd.org> * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any @@ -17,6 +18,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include <sys/tree.h> + #define CONF_FILE "/etc/hoststated.conf" #define HOSTSTATED_SOCKET "/var/run/hoststated.sock" #define PF_SOCKET "/dev/pf" @@ -32,6 +35,14 @@ #define MAX_NAME_SIZE 64 #define SRV_MAX_VIRTS 16 +#define RELAY_MAX_SESSIONS 1024 +#define RELAY_TIMEOUT 600 +#define RELAY_CACHESIZE -1 /* use default size */ +#define RELAY_NUMPROC 5 +#define RELAY_MAXPROC 32 +#define RELAY_MAXHOSTS 32 +#define RELAY_STATINTERVAL 60 + #define SMALL_READ_BUF_SIZE 1024 #define READ_BUF_SIZE 65535 #define ICMP_BUF_SIZE 64 @@ -80,6 +91,7 @@ enum imsg_type { IMSG_CTL_SERVICE, IMSG_CTL_TABLE, IMSG_CTL_HOST, + IMSG_CTL_RELAY, IMSG_CTL_TABLE_CHANGED, IMSG_CTL_PULL_RULESET, IMSG_CTL_PUSH_RULESET, @@ -93,6 +105,7 @@ enum imsg_type { IMSG_CTL_SHUTDOWN, IMSG_CTL_RELOAD, IMSG_CTL_NOTIFY, + IMSG_CTL_STATISTICS, IMSG_SERVICE_ENABLE, /* notifies from pfe to hce */ IMSG_SERVICE_DISABLE, IMSG_TABLE_ENABLE, @@ -100,7 +113,10 @@ enum imsg_type { IMSG_HOST_ENABLE, IMSG_HOST_DISABLE, IMSG_HOST_STATUS, /* notifies from hce to pfe */ - IMSG_SYNC + IMSG_SYNC, + IMSG_NATLOOK, + IMSG_DEMOTE, + IMSG_STATISTICS }; struct imsg_hdr { @@ -120,6 +136,8 @@ typedef u_int32_t objid_t; struct ctl_status { objid_t id; int up; + int retry_cnt; + u_long check_cnt; }; struct ctl_id { @@ -127,6 +145,11 @@ struct ctl_id { char name[MAX_NAME_SIZE]; }; +struct ctl_demote { + char group[IFNAMSIZ]; + int level; +}; + struct ctl_icmp_event { struct hoststated *env; int s; @@ -150,6 +173,66 @@ struct ctl_tcp_event { char rbuf[SMALL_READ_BUF_SIZE]; }; +enum httpmethod { + HTTP_METHOD_GET = 0, + HTTP_METHOD_HEAD = 1, + HTTP_METHOD_POST = 2, + HTTP_METHOD_PUT = 3, + HTTP_METHOD_DELETE = 4, + HTTP_METHOD_OPTIONS = 5, + HTTP_METHOD_TRACE = 6, + HTTP_METHOD_CONNECT = 7 +}; + +struct ctl_relay_event { + int s; + in_port_t port; + struct sockaddr_storage ss; + struct bufferevent *bev; + struct evbuffer *output; + struct ctl_relay_event *dst; + void *con; + SSL *ssl; + u_int8_t *nodes; + + int marked; + int line; + size_t toread; + enum httpmethod method; + + u_int8_t *buf; + int buflen; + u_int8_t flags; +}; + +struct ctl_natlook { + objid_t id; + struct sockaddr_storage src; + struct sockaddr_storage dst; + struct sockaddr_storage rsrc; + struct sockaddr_storage rdst; + in_port_t rsport; + in_port_t rdport; + int in; + int proc; +}; + +struct ctl_stats { + objid_t id; + int proc; + u_int interval; + u_long cnt; + u_long tick; + + u_long avg; + u_long last; + + u_long avg_hour; + u_long last_hour; + u_long avg_day; + u_long last_day; +}; + struct address { struct sockaddr_storage ss; in_port_t port; @@ -170,6 +253,8 @@ TAILQ_HEAD(addresslist, address); #define F_ACTIVE_RULESET 0x0200 #define F_CHECK_SENT 0x0400 #define F_SSL 0x0800 +#define F_NATLOOK 0x1000 +#define F_DEMOTE 0x2000 struct host { u_int16_t flags; @@ -179,6 +264,11 @@ struct host { char name[MAXHOSTNAMELEN]; int up; int last_up; + u_long check_cnt; + u_long up_cnt; + int retry_cnt; + int retry; + struct sockaddr_storage ss; struct ctl_tcp_event cte; TAILQ_ENTRY(host) entry; @@ -190,6 +280,7 @@ enum host_status { HOST_UNKNOWN = 0, HOST_UP = 1 }; +#define HOST_ISUP(x) (x == HOST_UP) struct table { objid_t id; @@ -197,8 +288,11 @@ struct table { u_int16_t flags; int check; int up; + int demoted; + char demote_group[IFNAMSIZ]; in_port_t port; int retcode; + int retry; struct timeval timeout; char name[TABLE_NAME_SIZE]; char path[MAXPATHLEN]; @@ -233,10 +327,119 @@ struct service { }; TAILQ_HEAD(servicelist, service); +struct session { + objid_t id; + struct ctl_relay_event in; + struct ctl_relay_event out; + u_int32_t outkey; + struct event ev; + struct timeval timeout; + struct timeval tv_start; + struct timeval tv_last; + int done; + void *relay; + struct ctl_natlook *cnl; + TAILQ_ENTRY(session) entry; +}; +TAILQ_HEAD(sessionlist, session); + +enum nodeaction { + NODE_ACTION_NONE = 0, + NODE_ACTION_APPEND = 1, + NODE_ACTION_CHANGE = 2, + NODE_ACTION_REMOVE = 3, + NODE_ACTION_EXPECT = 4, + NODE_ACTION_FILTER = 5, + NODE_ACTION_HASH = 6 +}; + +struct protonode { + objid_t id; + char *key; + enum nodeaction action; + char *value; + int macro; + int getvars; + int header; + int mark; + + RB_ENTRY(protonode) nodes; +}; +RB_HEAD(proto_tree, protonode); + +enum prototype { + RELAY_PROTO_TCP = 0, + RELAY_PROTO_HTTP = 1 +}; + +#define TCPFLAG_NODELAY 0x01 +#define TCPFLAG_NNODELAY 0x02 +#define TCPFLAG_SACK 0x04 +#define TCPFLAG_NSACK 0x08 +#define TCPFLAG_BUFSIZ 0x10 + +struct protocol { + objid_t id; + u_int16_t flags; + u_int16_t tcpflags; + int tcpbufsiz; + char name[MAX_NAME_SIZE]; + int cache; + enum prototype type; + int lateconnect; + + int nodecount; + struct proto_tree tree; + TAILQ_ENTRY(protocol) entry; +}; +TAILQ_HEAD(protolist, protocol); + +struct relay { + objid_t id; + u_int16_t flags; + int up; + struct protocol *proto; + char name[MAXHOSTNAMELEN]; + int s; + in_port_t port; + struct sockaddr_storage ss; + struct bufferevent *bev; + + int dsts; + in_port_t dstport; + struct sockaddr_storage dstss; + struct bufferevent *dstbev; + + struct table *dsttable; + u_int32_t dstkey; + struct host *dsthost[RELAY_MAXHOSTS]; + int dstnhosts; + int dstmode; + int dstcheck; + + struct event ev; + struct timeval timeout; + SSL_CTX *ctx; + + struct ctl_stats stats[RELAY_MAXPROC + 1]; + + struct sessionlist sessions; + TAILQ_ENTRY(relay) entry; +}; +TAILQ_HEAD(relaylist, relay); + +enum dstmode { + RELAY_DSTMODE_LOADBALANCE = 0, + RELAY_DSTMODE_ROUNDROBIN = 1, + RELAY_DSTMODE_HASH = 2 +}; +#define RELAY_DSTMODE_DEFAULT RELAY_DSTMODE_LOADBALANCE + enum { PROC_MAIN, PROC_PFE, - PROC_HCE + PROC_HCE, + PROC_RELAY } hoststated_process; struct hoststated { @@ -245,14 +448,24 @@ struct hoststated { struct pfdata *pf; int tablecount; int servicecount; + int protocount; + int relaycount; struct timeval interval; struct timeval timeout; struct table empty_table; + struct protocol proto_default; struct event ev; struct tablelist tables; struct servicelist services; + struct protolist protos; + struct relaylist relays; + u_int16_t prefork_relay; + char demote_group[IFNAMSIZ]; u_int16_t id; + struct event statev; + struct timeval statinterval; + int has_icmp; int has_icmp6; struct ctl_icmp_event icmp_send; @@ -313,6 +526,7 @@ void fatal(const char *); void fatalx(const char *); const char *host_status(enum host_status); const char *table_check(enum table_check); +const char *print_availability(u_long, u_long); /* buffer.c */ struct buf *buf_open(size_t); @@ -339,7 +553,8 @@ void imsg_free(struct imsg *); void imsg_event_add(struct imsgbuf *); /* needs to be provided externally */ /* pfe.c */ -pid_t pfe(struct hoststated *, int [2], int [2], int [2]); +pid_t pfe(struct hoststated *, int [2], int [2], int [2], int [2], + int [RELAY_MAXPROC][2]); void show(struct ctl_conn *); int enable_service(struct ctl_conn *, struct ctl_id *); int enable_table(struct ctl_conn *, struct ctl_id *); @@ -355,11 +570,20 @@ void flush_table(struct hoststated *, struct service *); void sync_table(struct hoststated *, struct service *, struct table *); void sync_ruleset(struct hoststated *, struct service *, int); void flush_rulesets(struct hoststated *); +int natlook(struct hoststated *, struct ctl_natlook *); /* hce.c */ -pid_t hce(struct hoststated *, int [2], int [2], int [2]); +pid_t hce(struct hoststated *, int [2], int [2], int [2], int [2], + int [RELAY_MAXPROC][2]); void hce_notify_done(struct host *, const char *); +/* relay.c */ +pid_t relay(struct hoststated *, int [2], int [2], int [2], int [2], + int [RELAY_MAXPROC][2]); +void relay_notify_done(struct host *, const char *); + +RB_PROTOTYPE(proto_tree, protonode, nodes, relay_proto_cmp); + /* check_icmp.c */ void icmp_init(struct hoststated *); void schedule_icmp(struct hoststated *, struct host *); @@ -384,3 +608,14 @@ struct service *service_findbyname(struct hoststated *, const char *); void event_again(struct event *, int, short, void (*)(int, short, void *), struct timeval *, struct timeval *, void *); +struct relay *relay_find(struct hoststated *, objid_t); +struct session *session_find(struct hoststated *, objid_t); +struct relay *relay_findbyname(struct hoststated *, const char *); +int expand_string(char *, size_t, const char *, const char *); + +/* carp.c */ +int carp_demote_init(char *, int); +void carp_demote_shutdown(void); +int carp_demote_get(char *); +int carp_demote_set(char *, int); +int carp_demote_reset(char *, int); diff --git a/usr.sbin/relayd/ssl.c b/usr.sbin/relayd/ssl.c index ddac5c5974c..cf5016d69e7 100644 --- a/usr.sbin/relayd/ssl.c +++ b/usr.sbin/relayd/ssl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ssl.c,v 1.6 2007/02/08 13:32:24 reyk Exp $ */ +/* $OpenBSD: ssl.c,v 1.7 2007/02/22 03:32:40 reyk Exp $ */ /* * Copyright (c) 2006 Pierre-Yves Ritschard <pyr@spootnik.org> @@ -32,6 +32,7 @@ #include <openssl/ssl.h> #include <openssl/err.h> +#include <openssl/engine.h> #include "hoststated.h" @@ -250,6 +251,10 @@ ssl_init(struct hoststated *env) { SSL_library_init(); SSL_load_error_strings(); + + /* Init hardware crypto engines. */ + ENGINE_load_builtin_engines(); + ENGINE_register_all_complete(); } void |