From c526375e20e21a7c17c673658bfb796c8cb466bc Mon Sep 17 00:00:00 2001 From: Alexandr Nedvedicky Date: Tue, 11 Sep 2018 07:53:39 +0000 Subject: - moving state look up outside of PF_LOCK() this change adds a pf_state_lock rw-lock, which protects consistency of state table in PF. The code delivered in this change is guarded by 'WITH_PF_LOCK', which is still undefined. People, who are willing to experiment and want to run it must do two things: - compile kernel with -DWITH_PF_LOCK - bump NET_TASKQ from 1 to ... sky is the limit, (just select some sensible value for number of tasks your system is able to handle) OK bluhm@ --- sys/net/if_pfsync.c | 61 ++++++++++++++++++--- sys/net/if_pfsync.h | 4 +- sys/net/pf.c | 150 +++++++++++++++++++++++++++++++++++++++------------ sys/net/pf_ioctl.c | 53 ++++++++++++++---- sys/net/pfvar.h | 4 +- sys/net/pfvar_priv.h | 37 ++++++++++++- 6 files changed, 255 insertions(+), 54 deletions(-) diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c index 3f71ffcd4f6..8d86f4210fd 100644 --- a/sys/net/if_pfsync.c +++ b/sys/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.258 2018/08/24 12:29:33 sashan Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.259 2018/09/11 07:53:38 sashan Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -114,6 +114,8 @@ int pfsync_in_eof(caddr_t, int, int, int); int pfsync_in_error(caddr_t, int, int, int); +void pfsync_update_state_locked(struct pf_state *); + struct { int (*in)(caddr_t, int, int, int); size_t len; @@ -602,6 +604,8 @@ pfsync_state_import(struct pfsync_state *sp, int flags) st->pfsync_time = time_uptime; st->sync_state = PFSYNC_S_NONE; + refcnt_init(&st->refcnt); + /* XXX when we have anchors, use STATE_INC_COUNTERS */ r->states_cur++; r->states_tot++; @@ -609,6 +613,10 @@ pfsync_state_import(struct pfsync_state *sp, int flags) if (!ISSET(flags, PFSYNC_SI_IOCTL)) SET(st->state_flags, PFSTATE_NOSYNC); + /* + * We just set PFSTATE_NOSYNC bit, which prevents + * pfsync_insert_state() to insert state to pfsync. + */ if (pf_state_insert(kif, &skw, &sks, st) != 0) { /* XXX when we have anchors, use STATE_DEC_COUNTERS */ r->states_cur--; @@ -941,7 +949,7 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags) if (sync) { pfsyncstat_inc(pfsyncs_stale); - pfsync_update_state(st); + pfsync_update_state_locked(st); schednetisr(NETISR_PFSYNC); } } @@ -1015,7 +1023,7 @@ pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) if (sync) { pfsyncstat_inc(pfsyncs_stale); - pfsync_update_state(st); + pfsync_update_state_locked(st); schednetisr(NETISR_PFSYNC); } } @@ -1470,6 +1478,7 @@ pfsync_drop(struct pfsync_softc *sc) KASSERT(st->sync_state == q); #endif st->sync_state = PFSYNC_S_NONE; + pf_state_unref(st); } } @@ -1618,13 +1627,14 @@ pfsync_sendout(void) count = 0; while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + st->sync_state = PFSYNC_S_NONE; #ifdef PFSYNC_DEBUG KASSERT(st->sync_state == q); #endif pfsync_qs[q].write(st, m->m_data + offset); offset += pfsync_qs[q].len; - st->sync_state = PFSYNC_S_NONE; + pf_state_unref(st); count++; } @@ -1720,7 +1730,7 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; SET(st->state_flags, PFSTATE_ACK); - pd->pd_st = st; + pd->pd_st = pf_state_ref(st); pd->pd_m = m; sc->sc_deferred++; @@ -1768,6 +1778,8 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop) pd->pd_st->rule.ptr, pd->pd_st); break; #endif /* INET6 */ + default: + unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); } pd->pd_m = pdesc.m; } else { @@ -1782,10 +1794,13 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop) NULL, NULL); break; #endif /* INET6 */ + default: + unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); } } } out: + pf_state_unref(pd->pd_st); pool_put(&sc->sc_pool, pd); } @@ -1817,12 +1832,13 @@ pfsync_deferred(struct pf_state *st, int drop) } void -pfsync_update_state(struct pf_state *st) +pfsync_update_state_locked(struct pf_state *st) { struct pfsync_softc *sc = pfsyncif; int sync = 0; NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) return; @@ -1867,6 +1883,22 @@ pfsync_update_state(struct pf_state *st) schednetisr(NETISR_PFSYNC); } +void +pfsync_update_state(struct pf_state *st, int *have_pf_lock) +{ + struct pfsync_softc *sc = pfsyncif; + + if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return; + + if (*have_pf_lock == 0) { + PF_LOCK(); + *have_pf_lock = 1; + } + + pfsync_update_state_locked(st); +} + void pfsync_cancel_full_update(struct pfsync_softc *sc) { @@ -2017,9 +2049,22 @@ pfsync_delete_state(struct pf_state *st) case PFSYNC_S_UPD: case PFSYNC_S_IACK: pfsync_q_del(st); - /* FALLTHROUGH to putting it on the del list */ + /* + * FALLTHROUGH to putting it on the del list + * Note on refence count bookeeping: + * pfsync_q_del() drops reference for queue + * ownership. But the st entry survives, because + * our caller still holds a reference. + */ case PFSYNC_S_NONE: + /* + * We either fall through here, or there is no reference to + * st owned by pfsync queues at this point. + * + * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() + * grabs a reference for delete queue. + */ pfsync_q_ins(st, PFSYNC_S_DEL); return; @@ -2077,6 +2122,7 @@ pfsync_q_ins(struct pf_state *st, int q) } sc->sc_len += nlen; + pf_state_ref(st); TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); st->sync_state = q; } @@ -2092,6 +2138,7 @@ pfsync_q_del(struct pf_state *st) sc->sc_len -= pfsync_qs[q].len; TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; + pf_state_unref(st); if (TAILQ_EMPTY(&sc->sc_qs[q])) sc->sc_len -= sizeof(struct pfsync_subheader); diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index ef24c220306..3c3b7dd90f6 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.53 2017/04/14 20:46:31 bluhm Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.54 2018/09/11 07:53:38 sashan Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -328,7 +328,7 @@ void pfsync_state_export(struct pfsync_state *, struct pf_state *); void pfsync_insert_state(struct pf_state *); -void pfsync_update_state(struct pf_state *); +void pfsync_update_state(struct pf_state *, int *); void pfsync_delete_state(struct pf_state *); void pfsync_clear_states(u_int32_t, const char *); diff --git a/sys/net/pf.c b/sys/net/pf.c index 5f39f2947bd..03f12f4be6b 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1073 2018/07/22 09:09:18 sf Exp $ */ +/* $OpenBSD: pf.c,v 1.1074 2018/09/11 07:53:38 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -1250,11 +1250,17 @@ pf_purge(void *xnloops) KERNEL_LOCK(); NET_LOCK(); - PF_LOCK(); - /* process a fraction of the state table every second */ + /* + * process a fraction of the state table every second + * Note: + * we no longer need PF_LOCK() here, because + * pf_purge_expired_states() uses pf_state_lock to maintain + * consistency. + */ pf_purge_expired_states(1 + (pf_status.states / pf_default_rule.timeout[PFTM_INTERVAL])); + PF_LOCK(); /* purge other expired types every PFTM_INTERVAL seconds */ if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { pf_purge_expired_src_nodes(); @@ -1430,7 +1436,7 @@ pf_free_state(struct pf_state *cur) TAILQ_REMOVE(&state_list, cur, entry_list); if (cur->tag) pf_tag_unref(cur->tag); - pool_put(&pf_state_pl, cur); + pf_state_unref(cur); pf_status.fcounters[FCNT_STATE_REMOVALS]++; pf_status.states--; } @@ -1440,13 +1446,16 @@ pf_purge_expired_states(u_int32_t maxcheck) { static struct pf_state *cur = NULL; struct pf_state *next; + SLIST_HEAD(pf_state_gcl, pf_state) gcl; - PF_ASSERT_LOCKED(); + PF_ASSERT_UNLOCKED(); + SLIST_INIT(&gcl); + PF_STATE_ENTER_READ(); while (maxcheck--) { /* wrap to start of list when we hit the end */ if (cur == NULL) { - cur = TAILQ_FIRST(&state_list); + cur = pf_state_ref(TAILQ_FIRST(&state_list)); if (cur == NULL) break; /* list empty */ } @@ -1454,16 +1463,31 @@ pf_purge_expired_states(u_int32_t maxcheck) /* get next state, as cur may get deleted */ next = TAILQ_NEXT(cur, entry_list); - if (cur->timeout == PFTM_UNLINKED) { - /* free removed state */ - pf_free_state(cur); - } else if (pf_state_expires(cur) <= time_uptime) { - /* remove and free expired state */ - pf_remove_state(cur); - pf_free_state(cur); + if ((cur->timeout == PFTM_UNLINKED) || + (pf_state_expires(cur) <= time_uptime)) + SLIST_INSERT_HEAD(&gcl, cur, gc_list); + else + pf_state_unref(cur); + + cur = pf_state_ref(next); + } + PF_STATE_EXIT_READ(); + + PF_LOCK(); + PF_STATE_ENTER_WRITE(); + while ((next = SLIST_FIRST(&gcl)) != NULL) { + SLIST_REMOVE_HEAD(&gcl, gc_list); + if (next->timeout == PFTM_UNLINKED) + pf_free_state(next); + else if (pf_state_expires(next) <= time_uptime) { + pf_remove_state(next); + pf_free_state(next); } - cur = next; + + pf_state_unref(next); } + PF_STATE_EXIT_WRITE(); + PF_UNLOCK(); } int @@ -3973,6 +3997,11 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, s->set_prio[1] = act->set_prio[1]; s->delay = act->delay; SLIST_INIT(&s->src_nodes); + /* + * must initialize refcnt, before pf_state_insert() gets called. + * pf_state_inserts() grabs reference for pfsync! + */ + refcnt_init(&s->refcnt); switch (pd->proto) { case IPPROTO_TCP: @@ -4774,7 +4803,7 @@ pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, addlog("\n"); } /* XXX make sure it's the same direction ?? */ - pf_remove_state(*state); + (*state)->timeout = PFTM_PURGE; *state = NULL; pd->m->m_pkthdr.pf.inp = inp; return (PF_DROP); @@ -6770,6 +6799,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) struct pf_pdesc pd; int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; u_int32_t qid, pqid = 0; + int have_pf_lock = 0; if (!pf_status.running) return (PF_PASS); @@ -6859,9 +6889,6 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) pd.lookup.pid = NO_PID; } - /* lock the lookup/write section of pf_test() */ - PF_LOCK(); - switch (pd.virtual_proto) { case PF_VPROTO_FRAGMENT: { @@ -6869,7 +6896,10 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) * handle fragments that aren't reassembled by * normalization */ + PF_LOCK(); + have_pf_lock = 1; action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); + s = pf_state_ref(s); if (action != PF_PASS) REASON_SET(&reason, PFRES_FRAG); break; @@ -6883,19 +6913,26 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) "dropping IPv6 packet with ICMPv4 payload"); break; } + PF_STATE_ENTER_READ(); action = pf_test_state_icmp(&pd, &s, &reason); + s = pf_state_ref(s); + PF_STATE_EXIT_READ(); if (action == PF_PASS || action == PF_AFRT) { #if NPFSYNC > 0 - pfsync_update_state(s); + pfsync_update_state(s, &have_pf_lock); #endif /* NPFSYNC > 0 */ r = s->rule.ptr; a = s->anchor.ptr; #if NPFLOG > 0 pd.pflog |= s->log; #endif /* NPFLOG > 0 */ - } else if (s == NULL) + } else if (s == NULL) { + PF_LOCK(); + have_pf_lock = 1; action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); + s = pf_state_ref(s); + } break; } @@ -6908,19 +6945,26 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) "dropping IPv4 packet with ICMPv6 payload"); break; } + PF_STATE_ENTER_READ(); action = pf_test_state_icmp(&pd, &s, &reason); + s = pf_state_ref(s); + PF_STATE_EXIT_READ(); if (action == PF_PASS || action == PF_AFRT) { #if NPFSYNC > 0 - pfsync_update_state(s); + pfsync_update_state(s, &have_pf_lock); #endif /* NPFSYNC > 0 */ r = s->rule.ptr; a = s->anchor.ptr; #if NPFLOG > 0 pd.pflog |= s->log; #endif /* NPFLOG > 0 */ - } else if (s == NULL) + } else if (s == NULL) { + PF_LOCK(); + have_pf_lock = 1; action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); + s = pf_state_ref(s); + } break; } #endif /* INET6 */ @@ -6930,6 +6974,8 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN && pf_synflood_check(&pd)) { + PF_LOCK(); + have_pf_lock = 1; pf_syncookie_send(&pd); action = PF_DROP; break; @@ -6940,7 +6986,10 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_DROP) break; } + PF_STATE_ENTER_READ(); action = pf_test_state(&pd, &s, &reason, 0); + s = pf_state_ref(s); + PF_STATE_EXIT_READ(); if (s == NULL && action != PF_PASS && action != PF_AFRT && pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && @@ -6948,25 +6997,27 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) struct mbuf *msyn; msyn = pf_syncookie_recreate_syn(&pd); if (msyn) { - PF_UNLOCK(); action = pf_test(af, fwdir, ifp, &msyn); - PF_LOCK(); m_freem(msyn); if (action == PF_PASS || action == PF_AFRT) { + PF_STATE_ENTER_READ(); pf_test_state(&pd, &s, &reason, 1); - if (s == NULL) { - PF_UNLOCK(); + s = pf_state_ref(s); + PF_STATE_EXIT_READ(); + if (s == NULL) return (PF_DROP); - } s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1; s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1; pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST); + PF_LOCK(); + have_pf_lock = 1; action = pf_synproxy(&pd, &s, &reason); if (action != PF_PASS) { PF_UNLOCK(); + pf_state_unref(s); return (action); } } @@ -6974,19 +7025,22 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) action = PF_DROP; } - if (action == PF_PASS || action == PF_AFRT) { #if NPFSYNC > 0 - pfsync_update_state(s); + pfsync_update_state(s, &have_pf_lock); #endif /* NPFSYNC > 0 */ r = s->rule.ptr; a = s->anchor.ptr; #if NPFLOG > 0 pd.pflog |= s->log; #endif /* NPFLOG > 0 */ - } else if (s == NULL) + } else if (s == NULL) { + PF_LOCK(); + have_pf_lock = 1; action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason); + s = pf_state_ref(s); + } if (pd.virtual_proto == IPPROTO_TCP) { if (s) { @@ -6999,12 +7053,13 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) break; } - PF_UNLOCK(); + if (have_pf_lock != 0) + PF_UNLOCK(); /* * At the moment, we rely on NET_LOCK() to prevent removal of items - * we've collected above ('s', 'r', 'anchor' and 'ruleset'). They'll - * have to be refcounted when NET_LOCK() is gone. + * we've collected above ('r', 'anchor' and 'ruleset'). They'll have + * to be refcounted when NET_LOCK() is gone. */ done: @@ -7201,6 +7256,8 @@ done: *m0 = pd.m; + pf_state_unref(s); + return (action); } @@ -7401,6 +7458,33 @@ pf_state_key_unlink_reverse(struct pf_state_key *sk) } } +struct pf_state * +pf_state_ref(struct pf_state *s) +{ + if (s != NULL) + PF_REF_TAKE(s->refcnt); + return (s); +} + +void +pf_state_unref(struct pf_state *s) +{ + if ((s != NULL) && PF_REF_RELE(s->refcnt)) { + /* never inserted or removed */ +#if NPFSYNC > 0 + KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || + ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && + (s->sync_state == PFSYNC_S_NONE))); +#endif /* NPFSYNC */ + KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || + (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); + KASSERT((s->key[PF_SK_WIRE] == NULL) && + (s->key[PF_SK_STACK] == NULL)); + + pool_put(&pf_state_pl, s); + } +} + int pf_delay_pkt(struct mbuf *m, u_int ifidx) { diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index a639bd60704..72774436aa0 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.336 2018/07/22 09:09:18 sf Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.337 2018/09/11 07:53:38 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -132,7 +132,21 @@ TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); #ifdef WITH_PF_LOCK +/* + * pf_lock protects consistency of PF data structures, which don't have + * their dedicated lock yet. The pf_lock currently protects: + * - rules, + * - radix tables, + * - source nodes + * All callers must grab pf_lock exclusively. + * + * pf_state_lock protects consistency of state table. Packets, which do state + * look up grab the lock as readers. If packet must create state, then it must + * grab the lock as writer. Whenever packet creates state it grabs pf_lock + * first then it locks pf_state_lock as the writer. + */ struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); +struct rwlock pf_state_lock = RWLOCK_INITIALIZER("pf_state_lock"); #endif /* WITH_PF_LOCK */ #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) @@ -1501,6 +1515,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) u_int killed = 0; PF_LOCK(); + PF_STATE_ENTER_WRITE(); for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); @@ -1514,6 +1529,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) killed++; } } + PF_STATE_EXIT_WRITE(); psk->psk_killed = killed; #if NPFSYNC > 0 pfsync_clear_states(pf_status.hostid, psk->psk_ifname); @@ -1537,10 +1553,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (psk->psk_pfcmp.creatorid == 0) psk->psk_pfcmp.creatorid = pf_status.hostid; PF_LOCK(); + PF_STATE_ENTER_WRITE(); if ((s = pf_find_state_byid(&psk->psk_pfcmp))) { pf_remove_state(s); psk->psk_killed = 1; } + PF_STATE_EXIT_WRITE(); PF_UNLOCK(); break; } @@ -1554,6 +1572,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) key.rdomain = psk->psk_rdomain; PF_LOCK(); + PF_STATE_ENTER_WRITE(); for (i = 0; i < nitems(dirs); i++) { if (dirs[i] == PF_IN) { sidx = 0; @@ -1594,11 +1613,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (killed) psk->psk_killed = killed; + PF_STATE_EXIT_WRITE(); PF_UNLOCK(); break; } PF_LOCK(); + PF_STATE_ENTER_WRITE(); for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); @@ -1644,6 +1665,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } } psk->psk_killed = killed; + PF_STATE_EXIT_WRITE(); PF_UNLOCK(); break; } @@ -1658,7 +1680,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } PF_LOCK(); + PF_STATE_ENTER_WRITE(); error = pfsync_state_import(sp, PFSYNC_SI_IOCTL); + PF_STATE_EXIT_WRITE(); PF_UNLOCK(); break; } @@ -1673,16 +1697,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) id_key.id = ps->state.id; id_key.creatorid = ps->state.creatorid; - PF_LOCK(); + PF_STATE_ENTER_READ(); s = pf_find_state_byid(&id_key); + s = pf_state_ref(s); + PF_STATE_EXIT_READ(); if (s == NULL) { error = ENOENT; - PF_UNLOCK(); break; } pf_state_export(&ps->state, s); - PF_UNLOCK(); + pf_state_unref(s); break; } @@ -1702,7 +1727,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) p = ps->ps_states; - PF_LOCK(); + PF_STATE_ENTER_READ(); state = TAILQ_FIRST(&state_list); while (state) { if (state->timeout != PFTM_UNLINKED) { @@ -1712,7 +1737,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = copyout(pstore, p, sizeof(*p)); if (error) { free(pstore, M_TEMP, sizeof(*pstore)); - PF_UNLOCK(); + PF_STATE_EXIT_READ(); goto fail; } p++; @@ -1720,7 +1745,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } state = TAILQ_NEXT(state, entry_list); } - PF_UNLOCK(); + PF_STATE_EXIT_READ(); ps->ps_len = sizeof(struct pfsync_state) * nr; @@ -1801,8 +1826,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; - PF_LOCK(); + PF_STATE_ENTER_READ(); state = pf_find_state_all(&key, direction, &m); + state = pf_state_ref(state); + PF_STATE_EXIT_READ(); if (m > 1) error = E2BIG; /* more than one state */ @@ -1815,7 +1842,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pnl->rrdomain = sk->rdomain; } else error = ENOENT; - PF_UNLOCK(); + pf_state_unref(state); } break; } @@ -2576,8 +2603,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_state *state; PF_LOCK(); + PF_STATE_ENTER_WRITE(); RB_FOREACH(state, pf_state_tree_id, &tree_id) pf_src_tree_remove_state(state); + PF_STATE_EXIT_WRITE(); RB_FOREACH(n, pf_src_tree, &tree_src_tracking) n->expire = 1; pf_purge_expired_src_nodes(); @@ -2603,10 +2632,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ - if (sn->states != 0) + if (sn->states != 0) { + PF_ASSERT_LOCKED(); + PF_STATE_ENTER_WRITE(); RB_FOREACH(s, pf_state_tree_id, &tree_id) pf_state_rm_src_node(s, sn); + PF_STATE_EXIT_WRITE(); + } sn->expire = 1; killed++; } diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 97c13aa306c..fb43fe0b166 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.484 2018/09/10 11:37:26 bluhm Exp $ */ +/* $OpenBSD: pfvar.h,v 1.485 2018/09/11 07:53:38 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -761,6 +761,7 @@ struct pf_state { TAILQ_ENTRY(pf_state) sync_list; TAILQ_ENTRY(pf_state) entry_list; + SLIST_ENTRY(pf_state) gc_list; RB_ENTRY(pf_state) entry_id; struct pf_state_peer src; struct pf_state_peer dst; @@ -805,6 +806,7 @@ struct pf_state { u_int16_t max_mss; u_int16_t if_index_in; u_int16_t if_index_out; + pf_refcnt_t refcnt; u_int16_t delay; }; diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index 466548c3301..2b62cfe455b 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar_priv.h,v 1.4 2017/08/06 13:16:11 mpi Exp $ */ +/* $OpenBSD: pfvar_priv.h,v 1.5 2018/09/11 07:53:38 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -101,8 +101,12 @@ struct pf_pdesc { extern struct task pf_purge_task; extern struct timeout pf_purge_to; +struct pf_state *pf_state_ref(struct pf_state *); +void pf_state_unref(struct pf_state *); + #ifdef WITH_PF_LOCK extern struct rwlock pf_lock; +extern struct rwlock pf_state_lock; #define PF_LOCK() do { \ NET_ASSERT_LOCKED(); \ @@ -124,11 +128,42 @@ extern struct rwlock pf_lock; if (rw_status(&pf_lock) == RW_WRITE) \ splassert_fail(0, rw_status(&pf_lock), __func__);\ } while (0) + +#define PF_STATE_ENTER_READ() do { \ + rw_enter_read(&pf_state_lock); \ + } while (0) + +#define PF_STATE_EXIT_READ() do { \ + rw_exit_read(&pf_state_lock); \ + } while (0) + +#define PF_STATE_ENTER_WRITE() do { \ + rw_enter_write(&pf_state_lock); \ + } while (0) + +#define PF_STATE_EXIT_WRITE() do { \ + PF_ASSERT_STATE_LOCKED(); \ + rw_exit_write(&pf_state_lock); \ + } while (0) + +#define PF_ASSERT_STATE_LOCKED() do { \ + if (rw_status(&pf_state_lock) != RW_WRITE)\ + splassert_fail(RW_WRITE, \ + rw_status(&pf_state_lock), __func__);\ + } while (0) + #else /* !WITH_PF_LOCK */ #define PF_LOCK() (void)(0) #define PF_UNLOCK() (void)(0) #define PF_ASSERT_LOCKED() (void)(0) #define PF_ASSERT_UNLOCKED() (void)(0) + +#define PF_STATE_ENTER_READ() (void)(0) +#define PF_STATE_EXIT_READ() (void)(0) +#define PF_STATE_ENTER_WRITE() (void)(0) +#define PF_STATE_EXIT_WRITE() (void)(0) +#define PF_ASSERT_STATE_LOCKED() (void)(0) + #endif /* WITH_PF_LOCK */ extern void pf_purge_timeout(void *); -- cgit v1.2.3