diff options
author | Ryan Thomas McBride <mcbride@cvs.openbsd.org> | 2004-03-22 04:54:19 +0000 |
---|---|---|
committer | Ryan Thomas McBride <mcbride@cvs.openbsd.org> | 2004-03-22 04:54:19 +0000 |
commit | e9021d37990da8cd956294b3e568b70970680503 (patch) | |
tree | e2379b236b1edf944ea9ff0e6a8e2b56ef4454a3 /sys/net | |
parent | 4d6c0c7208725a3573825ee052fe58d20e8b998c (diff) |
Support for best effort bulk transfers of states when pfsync syncif is
configured. This this allows pfsync+carp clusters to come up gracefully
without killing active connections. pfsync now prevents carp from
preempting to become master until the state table has sync'd.
ABI change, any application which use struct pf_state must be recompiled.
Reminded about this by Christian Gut. Thanks to beck@ cedric@ and dhartmei@
for testing and comments.
ok deraadt@
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/if_pfsync.c | 193 | ||||
-rw-r--r-- | sys/net/if_pfsync.h | 27 | ||||
-rw-r--r-- | sys/net/pf.c | 5 | ||||
-rw-r--r-- | sys/net/pf_ioctl.c | 9 | ||||
-rw-r--r-- | sys/net/pfvar.h | 7 |
5 files changed, 219 insertions, 22 deletions
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c index eb037572d8b..e67cfb4f2b7 100644 --- a/sys/net/if_pfsync.c +++ b/sys/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.23 2004/02/20 19:22:03 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.24 2004/03/22 04:54:17 mcbride Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -71,8 +71,9 @@ int pfsyncdebug; #define DPRINTF(x) #endif -struct pfsync_softc pfsyncif; -struct pfsyncstats pfsyncstats; +struct pfsync_softc pfsyncif; +int pfsync_sync_ok; +struct pfsyncstats pfsyncstats; void pfsyncattach(int); void pfsync_setmtu(struct pfsync_softc *, int); @@ -84,17 +85,23 @@ void pfsyncstart(struct ifnet *); struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); -int pfsync_sendout(struct pfsync_softc *sc); +int pfsync_sendout(struct pfsync_softc *); void pfsync_timeout(void *); +void pfsync_send_bus(struct pfsync_softc *, u_int8_t); +void pfsync_bulk_update(void *); +void pfsync_bulkfail(void *); extern int ifqmaxlen; extern struct timeval time; +extern struct timeval mono_time; +extern int hz; void pfsyncattach(int npfsync) { struct ifnet *ifp; + pfsync_sync_ok = 1; bzero(&pfsyncif, sizeof(pfsyncif)); pfsyncif.sc_mbuf = NULL; pfsyncif.sc_mbuf_net = NULL; @@ -102,6 +109,8 @@ pfsyncattach(int npfsync) pfsyncif.sc_statep_net.s = NULL; pfsyncif.sc_maxupdates = 128; pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif.sc_ureq_received = 0; + pfsyncif.sc_ureq_sent = 0; ifp = &pfsyncif.sc_if; strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname); ifp->if_softc = &pfsyncif; @@ -113,6 +122,8 @@ pfsyncattach(int npfsync) ifp->if_hdrlen = PFSYNC_HDRLEN; pfsync_setmtu(&pfsyncif, MCLBYTES); timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif); + timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif); + timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif); if_attach(ifp); if_alloc_sadl(ifp); @@ -227,6 +238,7 @@ pfsync_input(struct mbuf *m, ...) struct pfsync_state_del *dp; struct pfsync_state_clr *cp; struct pfsync_state_upd_req *rup; + struct pfsync_state_bus *bus; struct in_addr src; struct mbuf *mp; int iplen, action, error, i, s, count, offp; @@ -485,17 +497,65 @@ pfsync_input(struct mbuf *m, ...) bcopy(rup->id, &key.id, sizeof(key.id)); key.creatorid = rup->creatorid; - st = pf_find_state_byid(&key); - if (st == NULL) { - pfsyncstats.pfsyncs_badstate++; - continue; + if (key.id == 0 && key.creatorid == 0) { + sc->sc_ureq_received = mono_time.tv_sec; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received " + "bulk update request\n"); + pfsync_send_bus(sc, PFSYNC_BUS_START); + pfsync_bulk_update(sc); + } else { + st = pf_find_state_byid(&key); + if (st == NULL) { + pfsyncstats.pfsyncs_badstate++; + continue; + } + pfsync_pack_state(PFSYNC_ACT_UPD, st, 0); } - pfsync_pack_state(PFSYNC_ACT_UPD, st, 0); } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); splx(s); break; + case PFSYNC_ACT_BUS: + /* If we're not waiting for a bulk update, who cares. */ + if (sc->sc_ureq_sent == 0) + break; + + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + sizeof(*bus), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + bus = (struct pfsync_state_bus *)(mp->m_data + offp); + switch (bus->status) { + case PFSYNC_BUS_START: + timeout_add(&sc->sc_bulkfail_tmo, + pf_pool_limits[PF_LIMIT_STATES].limit / + (PFSYNC_BULKPACKETS * sc->sc_maxcount)); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received bulk " + "update start\n"); + break; + case PFSYNC_BUS_END: + if (mono_time.tv_sec - ntohl(bus->endtime) >= + sc->sc_ureq_sent) { + /* that's it, we're happy */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; + timeout_del(&sc->sc_bulkfail_tmo); + pfsync_sync_ok = 1; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received valid " + "bulk update end\n"); + } else { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received invalid " + "bulk update end: bad timestamp\n"); + } + break; + } + break; } done: @@ -608,6 +668,15 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) imo->imo_multicast_ifp = sc->sc_sync_ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; + + /* Request a full state table update. */ + sc->sc_ureq_sent = mono_time.tv_sec; + pfsync_sync_ok = 0; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: requesting bulk update\n"); + timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + pfsync_request_update(NULL, NULL); + pfsync_sendout(sc); } splx(s); @@ -641,7 +710,6 @@ pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) struct mbuf * pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) { - extern int hz; struct pfsync_header *h; struct mbuf *m; int len; @@ -669,6 +737,10 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + sizeof(struct pfsync_header); break; + case PFSYNC_ACT_BUS: + len = sizeof(struct pfsync_header) + + sizeof(struct pfsync_state_bus); + break; default: len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + sizeof(struct pfsync_header); @@ -773,6 +845,10 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) secs = time.tv_sec; + st->pfsync_time = mono_time.tv_sec; + TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); + TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); + if (sp == NULL) { /* not a "duplicate" update */ i = 255; @@ -920,13 +996,16 @@ pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) } } - sc->sc_sendaddr = *src; + if (src != NULL) + sc->sc_sendaddr = *src; sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); h->count++; rup = sc->sc_statep.r++; bzero(rup, sizeof(*rup)); - bcopy(up->id, rup->id, sizeof(rup->id)); - rup->creatorid = up->creatorid; + if (up != NULL) { + bcopy(up->id, rup->id, sizeof(rup->id)); + rup->creatorid = up->creatorid; + } if (h->count == sc->sc_maxcount) ret = pfsync_sendout(sc); @@ -943,9 +1022,8 @@ pfsync_clear_states(u_int32_t creatorid, char *ifname) int s, ret; s = splnet(); - if (sc->sc_mbuf != NULL) { + if (sc->sc_mbuf != NULL) pfsync_sendout(sc); - } if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, (void *)&sc->sc_statep.c)) == NULL) { splx(s); @@ -973,6 +1051,91 @@ pfsync_timeout(void *v) splx(s); } +void +pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) +{ + struct pfsync_state_bus *bus; + + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + + if (pfsync_sync_ok && + (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, + (void *)&sc->sc_statep.b)) != NULL) { + sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); + bus = sc->sc_statep.b; + bus->creatorid = pf_status.hostid; + bus->status = status; + bus->endtime = htonl(mono_time.tv_sec - sc->sc_ureq_received); + pfsync_sendout(sc); + } +} + +void +pfsync_bulk_update(void *v) +{ + struct pfsync_softc *sc = v; + int s, i = 0; + struct pf_state *state; + + s = splnet(); + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + + /* + * Grab at most PFSYNC_BULKPACKETS worth of states which have not + * been sent since the latest request was made. + */ + while ((state = TAILQ_FIRST(&state_updates)) != NULL && + ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) { + if (state->pfsync_time > sc->sc_ureq_received) { + /* we're done */ + pfsync_send_bus(sc, PFSYNC_BUS_END); + sc->sc_ureq_received = 0; + timeout_del(&sc->sc_bulk_tmo); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: bulk update complete\n"); + break; + } else { + /* send an update and move to end of list */ + if (!state->sync_flags) + pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); + state->pfsync_time = mono_time.tv_sec; + TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); + TAILQ_INSERT_TAIL(&state_updates, state, + u.s.entry_updates); + + /* look again for more in a bit */ + timeout_add(&sc->sc_bulk_tmo, 1); + } + } + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + splx(s); +} + +void +pfsync_bulkfail(void *v) +{ + struct pfsync_softc *sc = v; + + if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { + /* Try again in a bit */ + timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + pfsync_request_update(NULL, NULL); + pfsync_sendout(sc); + } else { + /* Pretend like the transfer was ok */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; + pfsync_sync_ok = 1; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: failed to receive " + "bulk update status\n"); + timeout_del(&sc->sc_bulkfail_tmo); + } +} + int pfsync_sendout(sc) struct pfsync_softc *sc; diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 9c954768cf0..b3705c8dd2a 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.12 2004/02/20 19:22:03 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.13 2004/03/22 04:54:17 mcbride Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -120,6 +120,15 @@ struct pfsync_state_clr { u_int32_t pad; } __packed; +struct pfsync_state_bus { + u_int32_t creatorid; + u_int32_t endtime; + u_int8_t status; +#define PFSYNC_BUS_START 1 +#define PFSYNC_BUS_END 2 + u_int8_t pad[7]; +} __packed; + #ifdef _KERNEL union sc_statep { @@ -127,20 +136,28 @@ union sc_statep { struct pfsync_state_upd *u; struct pfsync_state_del *d; struct pfsync_state_clr *c; + struct pfsync_state_bus *b; struct pfsync_state_upd_req *r; }; +extern int pfsync_sync_ok; + struct pfsync_softc { struct ifnet sc_if; struct ifnet *sc_sync_ifp; struct ip_moptions sc_imo; struct timeout sc_tmo; + struct timeout sc_bulk_tmo; + struct timeout sc_bulkfail_tmo; struct in_addr sc_sendaddr; struct mbuf *sc_mbuf; /* current cummulative mbuf */ struct mbuf *sc_mbuf_net; /* current cummulative mbuf */ union sc_statep sc_statep; union sc_statep sc_statep_net; + u_int32_t sc_ureq_received; + u_int32_t sc_ureq_sent; + int sc_bulk_tries; int sc_maxcount; /* number of states in mtu */ int sc_maxupdates; /* number of updates/state */ }; @@ -161,14 +178,18 @@ struct pfsync_header { #define PFSYNC_ACT_INS_F 6 /* insert fragment */ #define PFSYNC_ACT_DEL_F 7 /* delete fragments */ #define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ -#define PFSYNC_ACT_MAX 9 +#define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ +#define PFSYNC_ACT_MAX 10 u_int8_t count; } __packed; +#define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ +#define PFSYNC_MAX_BULKTRIES 12 #define PFSYNC_HDRLEN sizeof(struct pfsync_header) #define PFSYNC_ACTIONS \ "CLR ST", "INS ST", "UPD ST", "DEL ST", \ - "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", "UPD REQ" + "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ + "UPD REQ", "BLK UPD STAT" #define PFSYNC_DFLTTL 255 diff --git a/sys/net/pf.c b/sys/net/pf.c index 699e29dfbb7..fb4dffb0d89 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.430 2004/03/11 10:15:26 mcbride Exp $ */ +/* $OpenBSD: pf.c,v 1.431 2004/03/22 04:54:17 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -255,6 +255,7 @@ static __inline int pf_state_compare_id(struct pf_state *, struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; +struct pf_state_queue state_updates; RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); RB_GENERATE(pf_state_tree_lan_ext, pf_state, @@ -669,6 +670,7 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); return (-1); } + TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates); pf_status.fcounters[FCNT_STATE_INSERT]++; pf_status.states++; @@ -816,6 +818,7 @@ pf_purge_expired_states(void) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); pfi_detach_state(cur->u.s.kif); + TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); pool_put(&pf_state_pl, cur); pf_status.fcounters[FCNT_STATE_REMOVALS]++; pf_status.states--; diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index b93b14cd9b0..dd25ce2babe 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.111 2004/03/18 23:24:02 cedric Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.112 2004/03/22 04:54:18 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -143,6 +143,7 @@ pfattach(int num) TAILQ_INIT(&pf_pabuf); pf_altqs_active = &pf_altqs[0]; pf_altqs_inactive = &pf_altqs[1]; + TAILQ_INIT(&state_updates); /* default rule should never be garbage collected */ pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; @@ -177,7 +178,6 @@ pfattach(int num) /* XXX do our best to avoid a conflict */ pf_status.hostid = arc4random(); - pf_status.stateid = 1; /* might want 0 for something special */ } int @@ -856,6 +856,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) else { pf_status.running = 1; pf_status.since = time.tv_sec; + if (pf_status.stateid == 0) { + pf_status.stateid = time.tv_sec; + pf_status.stateid = pf_status.stateid << 32; + } DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } break; @@ -1364,6 +1368,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) state->anchor.ptr = NULL; state->rt_kif = NULL; state->creation = time.tv_sec; + state->pfsync_time = 0; state->packets[0] = state->packets[1] = 0; state->bytes[0] = state->bytes[1] = 0; diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 15562684273..d3dda46578e 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.186 2004/02/20 19:22:03 mcbride Exp $ */ +/* $OpenBSD: pfvar.h,v 1.187 2004/03/22 04:54:18 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -592,6 +592,8 @@ struct pf_state_peer { struct pf_state_scrub *scrub; /* state is scrubbed */ }; +TAILQ_HEAD(pf_state_queue, pf_state); + struct pf_state { u_int64_t id; union { @@ -599,6 +601,7 @@ struct pf_state { RB_ENTRY(pf_state) entry_lan_ext; RB_ENTRY(pf_state) entry_ext_gwy; RB_ENTRY(pf_state) entry_id; + TAILQ_ENTRY(pf_state) entry_updates; struct pfi_kif *kif; } s; char ifname[IFNAMSIZ]; @@ -617,6 +620,7 @@ struct pf_state { struct pf_src_node *nat_src_node; u_int32_t creation; u_int32_t expire; + u_int32_t pfsync_time; u_int32_t packets[2]; u_int32_t bytes[2]; u_int32_t creatorid; @@ -1277,6 +1281,7 @@ RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); extern struct pf_state_tree_id tree_id; +extern struct pf_state_queue state_updates; extern struct pf_anchorqueue pf_anchors; extern struct pf_ruleset pf_main_ruleset; |