summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandr Nedvedicky <sashan@cvs.openbsd.org>2018-09-11 07:53:39 +0000
committerAlexandr Nedvedicky <sashan@cvs.openbsd.org>2018-09-11 07:53:39 +0000
commitc526375e20e21a7c17c673658bfb796c8cb466bc (patch)
tree7238d9bdb47d34a8393455e1b669a64df5d1db4f
parentc7d0c355c6f944ec9be2643f2235c6f0566d03b4 (diff)
- moving state look up outside of PF_LOCK()
this change adds a pf_state_lock rw-lock, which protects consistency of state table in PF. The code delivered in this change is guarded by 'WITH_PF_LOCK', which is still undefined. People, who are willing to experiment and want to run it must do two things: - compile kernel with -DWITH_PF_LOCK - bump NET_TASKQ from 1 to ... sky is the limit, (just select some sensible value for number of tasks your system is able to handle) OK bluhm@
-rw-r--r--sys/net/if_pfsync.c61
-rw-r--r--sys/net/if_pfsync.h4
-rw-r--r--sys/net/pf.c150
-rw-r--r--sys/net/pf_ioctl.c53
-rw-r--r--sys/net/pfvar.h4
-rw-r--r--sys/net/pfvar_priv.h37
6 files changed, 255 insertions, 54 deletions
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index 3f71ffcd4f6..8d86f4210fd 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_pfsync.c,v 1.258 2018/08/24 12:29:33 sashan Exp $ */
+/* $OpenBSD: if_pfsync.c,v 1.259 2018/09/11 07:53:38 sashan Exp $ */
/*
* Copyright (c) 2002 Michael Shalayeff
@@ -114,6 +114,8 @@ int pfsync_in_eof(caddr_t, int, int, int);
int pfsync_in_error(caddr_t, int, int, int);
+void pfsync_update_state_locked(struct pf_state *);
+
struct {
int (*in)(caddr_t, int, int, int);
size_t len;
@@ -602,6 +604,8 @@ pfsync_state_import(struct pfsync_state *sp, int flags)
st->pfsync_time = time_uptime;
st->sync_state = PFSYNC_S_NONE;
+ refcnt_init(&st->refcnt);
+
/* XXX when we have anchors, use STATE_INC_COUNTERS */
r->states_cur++;
r->states_tot++;
@@ -609,6 +613,10 @@ pfsync_state_import(struct pfsync_state *sp, int flags)
if (!ISSET(flags, PFSYNC_SI_IOCTL))
SET(st->state_flags, PFSTATE_NOSYNC);
+ /*
+ * We just set PFSTATE_NOSYNC bit, which prevents
+ * pfsync_insert_state() to insert state to pfsync.
+ */
if (pf_state_insert(kif, &skw, &sks, st) != 0) {
/* XXX when we have anchors, use STATE_DEC_COUNTERS */
r->states_cur--;
@@ -941,7 +949,7 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags)
if (sync) {
pfsyncstat_inc(pfsyncs_stale);
- pfsync_update_state(st);
+ pfsync_update_state_locked(st);
schednetisr(NETISR_PFSYNC);
}
}
@@ -1015,7 +1023,7 @@ pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
if (sync) {
pfsyncstat_inc(pfsyncs_stale);
- pfsync_update_state(st);
+ pfsync_update_state_locked(st);
schednetisr(NETISR_PFSYNC);
}
}
@@ -1470,6 +1478,7 @@ pfsync_drop(struct pfsync_softc *sc)
KASSERT(st->sync_state == q);
#endif
st->sync_state = PFSYNC_S_NONE;
+ pf_state_unref(st);
}
}
@@ -1618,13 +1627,14 @@ pfsync_sendout(void)
count = 0;
while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ st->sync_state = PFSYNC_S_NONE;
#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
#endif
pfsync_qs[q].write(st, m->m_data + offset);
offset += pfsync_qs[q].len;
- st->sync_state = PFSYNC_S_NONE;
+ pf_state_unref(st);
count++;
}
@@ -1720,7 +1730,7 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
SET(st->state_flags, PFSTATE_ACK);
- pd->pd_st = st;
+ pd->pd_st = pf_state_ref(st);
pd->pd_m = m;
sc->sc_deferred++;
@@ -1768,6 +1778,8 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop)
pd->pd_st->rule.ptr, pd->pd_st);
break;
#endif /* INET6 */
+ default:
+ unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af);
}
pd->pd_m = pdesc.m;
} else {
@@ -1782,10 +1794,13 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop)
NULL, NULL);
break;
#endif /* INET6 */
+ default:
+ unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af);
}
}
}
out:
+ pf_state_unref(pd->pd_st);
pool_put(&sc->sc_pool, pd);
}
@@ -1817,12 +1832,13 @@ pfsync_deferred(struct pf_state *st, int drop)
}
void
-pfsync_update_state(struct pf_state *st)
+pfsync_update_state_locked(struct pf_state *st)
{
struct pfsync_softc *sc = pfsyncif;
int sync = 0;
NET_ASSERT_LOCKED();
+ PF_ASSERT_LOCKED();
if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
return;
@@ -1868,6 +1884,22 @@ pfsync_update_state(struct pf_state *st)
}
void
+pfsync_update_state(struct pf_state *st, int *have_pf_lock)
+{
+ struct pfsync_softc *sc = pfsyncif;
+
+ if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
+ return;
+
+ if (*have_pf_lock == 0) {
+ PF_LOCK();
+ *have_pf_lock = 1;
+ }
+
+ pfsync_update_state_locked(st);
+}
+
+void
pfsync_cancel_full_update(struct pfsync_softc *sc)
{
if (timeout_pending(&sc->sc_bulkfail_tmo) ||
@@ -2017,9 +2049,22 @@ pfsync_delete_state(struct pf_state *st)
case PFSYNC_S_UPD:
case PFSYNC_S_IACK:
pfsync_q_del(st);
- /* FALLTHROUGH to putting it on the del list */
+ /*
+ * FALLTHROUGH to putting it on the del list
+ * Note on refence count bookeeping:
+ * pfsync_q_del() drops reference for queue
+ * ownership. But the st entry survives, because
+ * our caller still holds a reference.
+ */
case PFSYNC_S_NONE:
+ /*
+ * We either fall through here, or there is no reference to
+ * st owned by pfsync queues at this point.
+ *
+ * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins()
+ * grabs a reference for delete queue.
+ */
pfsync_q_ins(st, PFSYNC_S_DEL);
return;
@@ -2077,6 +2122,7 @@ pfsync_q_ins(struct pf_state *st, int q)
}
sc->sc_len += nlen;
+ pf_state_ref(st);
TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
st->sync_state = q;
}
@@ -2092,6 +2138,7 @@ pfsync_q_del(struct pf_state *st)
sc->sc_len -= pfsync_qs[q].len;
TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
st->sync_state = PFSYNC_S_NONE;
+ pf_state_unref(st);
if (TAILQ_EMPTY(&sc->sc_qs[q]))
sc->sc_len -= sizeof(struct pfsync_subheader);
diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h
index ef24c220306..3c3b7dd90f6 100644
--- a/sys/net/if_pfsync.h
+++ b/sys/net/if_pfsync.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_pfsync.h,v 1.53 2017/04/14 20:46:31 bluhm Exp $ */
+/* $OpenBSD: if_pfsync.h,v 1.54 2018/09/11 07:53:38 sashan Exp $ */
/*
* Copyright (c) 2001 Michael Shalayeff
@@ -328,7 +328,7 @@ void pfsync_state_export(struct pfsync_state *,
struct pf_state *);
void pfsync_insert_state(struct pf_state *);
-void pfsync_update_state(struct pf_state *);
+void pfsync_update_state(struct pf_state *, int *);
void pfsync_delete_state(struct pf_state *);
void pfsync_clear_states(u_int32_t, const char *);
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 5f39f2947bd..03f12f4be6b 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pf.c,v 1.1073 2018/07/22 09:09:18 sf Exp $ */
+/* $OpenBSD: pf.c,v 1.1074 2018/09/11 07:53:38 sashan Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -1250,11 +1250,17 @@ pf_purge(void *xnloops)
KERNEL_LOCK();
NET_LOCK();
- PF_LOCK();
- /* process a fraction of the state table every second */
+ /*
+ * process a fraction of the state table every second
+ * Note:
+ * we no longer need PF_LOCK() here, because
+ * pf_purge_expired_states() uses pf_state_lock to maintain
+ * consistency.
+ */
pf_purge_expired_states(1 + (pf_status.states
/ pf_default_rule.timeout[PFTM_INTERVAL]));
+ PF_LOCK();
/* purge other expired types every PFTM_INTERVAL seconds */
if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) {
pf_purge_expired_src_nodes();
@@ -1430,7 +1436,7 @@ pf_free_state(struct pf_state *cur)
TAILQ_REMOVE(&state_list, cur, entry_list);
if (cur->tag)
pf_tag_unref(cur->tag);
- pool_put(&pf_state_pl, cur);
+ pf_state_unref(cur);
pf_status.fcounters[FCNT_STATE_REMOVALS]++;
pf_status.states--;
}
@@ -1440,13 +1446,16 @@ pf_purge_expired_states(u_int32_t maxcheck)
{
static struct pf_state *cur = NULL;
struct pf_state *next;
+ SLIST_HEAD(pf_state_gcl, pf_state) gcl;
- PF_ASSERT_LOCKED();
+ PF_ASSERT_UNLOCKED();
+ SLIST_INIT(&gcl);
+ PF_STATE_ENTER_READ();
while (maxcheck--) {
/* wrap to start of list when we hit the end */
if (cur == NULL) {
- cur = TAILQ_FIRST(&state_list);
+ cur = pf_state_ref(TAILQ_FIRST(&state_list));
if (cur == NULL)
break; /* list empty */
}
@@ -1454,16 +1463,31 @@ pf_purge_expired_states(u_int32_t maxcheck)
/* get next state, as cur may get deleted */
next = TAILQ_NEXT(cur, entry_list);
- if (cur->timeout == PFTM_UNLINKED) {
- /* free removed state */
- pf_free_state(cur);
- } else if (pf_state_expires(cur) <= time_uptime) {
- /* remove and free expired state */
- pf_remove_state(cur);
- pf_free_state(cur);
+ if ((cur->timeout == PFTM_UNLINKED) ||
+ (pf_state_expires(cur) <= time_uptime))
+ SLIST_INSERT_HEAD(&gcl, cur, gc_list);
+ else
+ pf_state_unref(cur);
+
+ cur = pf_state_ref(next);
+ }
+ PF_STATE_EXIT_READ();
+
+ PF_LOCK();
+ PF_STATE_ENTER_WRITE();
+ while ((next = SLIST_FIRST(&gcl)) != NULL) {
+ SLIST_REMOVE_HEAD(&gcl, gc_list);
+ if (next->timeout == PFTM_UNLINKED)
+ pf_free_state(next);
+ else if (pf_state_expires(next) <= time_uptime) {
+ pf_remove_state(next);
+ pf_free_state(next);
}
- cur = next;
+
+ pf_state_unref(next);
}
+ PF_STATE_EXIT_WRITE();
+ PF_UNLOCK();
}
int
@@ -3973,6 +3997,11 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a,
s->set_prio[1] = act->set_prio[1];
s->delay = act->delay;
SLIST_INIT(&s->src_nodes);
+ /*
+ * must initialize refcnt, before pf_state_insert() gets called.
+ * pf_state_inserts() grabs reference for pfsync!
+ */
+ refcnt_init(&s->refcnt);
switch (pd->proto) {
case IPPROTO_TCP:
@@ -4774,7 +4803,7 @@ pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason,
addlog("\n");
}
/* XXX make sure it's the same direction ?? */
- pf_remove_state(*state);
+ (*state)->timeout = PFTM_PURGE;
*state = NULL;
pd->m->m_pkthdr.pf.inp = inp;
return (PF_DROP);
@@ -6770,6 +6799,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
struct pf_pdesc pd;
int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
u_int32_t qid, pqid = 0;
+ int have_pf_lock = 0;
if (!pf_status.running)
return (PF_PASS);
@@ -6859,9 +6889,6 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
pd.lookup.pid = NO_PID;
}
- /* lock the lookup/write section of pf_test() */
- PF_LOCK();
-
switch (pd.virtual_proto) {
case PF_VPROTO_FRAGMENT: {
@@ -6869,7 +6896,10 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
* handle fragments that aren't reassembled by
* normalization
*/
+ PF_LOCK();
+ have_pf_lock = 1;
action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason);
+ s = pf_state_ref(s);
if (action != PF_PASS)
REASON_SET(&reason, PFRES_FRAG);
break;
@@ -6883,19 +6913,26 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
"dropping IPv6 packet with ICMPv4 payload");
break;
}
+ PF_STATE_ENTER_READ();
action = pf_test_state_icmp(&pd, &s, &reason);
+ s = pf_state_ref(s);
+ PF_STATE_EXIT_READ();
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
- pfsync_update_state(s);
+ pfsync_update_state(s, &have_pf_lock);
#endif /* NPFSYNC > 0 */
r = s->rule.ptr;
a = s->anchor.ptr;
#if NPFLOG > 0
pd.pflog |= s->log;
#endif /* NPFLOG > 0 */
- } else if (s == NULL)
+ } else if (s == NULL) {
+ PF_LOCK();
+ have_pf_lock = 1;
action = pf_test_rule(&pd, &r, &s, &a, &ruleset,
&reason);
+ s = pf_state_ref(s);
+ }
break;
}
@@ -6908,19 +6945,26 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
"dropping IPv4 packet with ICMPv6 payload");
break;
}
+ PF_STATE_ENTER_READ();
action = pf_test_state_icmp(&pd, &s, &reason);
+ s = pf_state_ref(s);
+ PF_STATE_EXIT_READ();
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
- pfsync_update_state(s);
+ pfsync_update_state(s, &have_pf_lock);
#endif /* NPFSYNC > 0 */
r = s->rule.ptr;
a = s->anchor.ptr;
#if NPFLOG > 0
pd.pflog |= s->log;
#endif /* NPFLOG > 0 */
- } else if (s == NULL)
+ } else if (s == NULL) {
+ PF_LOCK();
+ have_pf_lock = 1;
action = pf_test_rule(&pd, &r, &s, &a, &ruleset,
&reason);
+ s = pf_state_ref(s);
+ }
break;
}
#endif /* INET6 */
@@ -6930,6 +6974,8 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags &
(TH_SYN|TH_ACK)) == TH_SYN &&
pf_synflood_check(&pd)) {
+ PF_LOCK();
+ have_pf_lock = 1;
pf_syncookie_send(&pd);
action = PF_DROP;
break;
@@ -6940,7 +6986,10 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
if (action == PF_DROP)
break;
}
+ PF_STATE_ENTER_READ();
action = pf_test_state(&pd, &s, &reason, 0);
+ s = pf_state_ref(s);
+ PF_STATE_EXIT_READ();
if (s == NULL && action != PF_PASS && action != PF_AFRT &&
pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP &&
(pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK &&
@@ -6948,25 +6997,27 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
struct mbuf *msyn;
msyn = pf_syncookie_recreate_syn(&pd);
if (msyn) {
- PF_UNLOCK();
action = pf_test(af, fwdir, ifp, &msyn);
- PF_LOCK();
m_freem(msyn);
if (action == PF_PASS || action == PF_AFRT) {
+ PF_STATE_ENTER_READ();
pf_test_state(&pd, &s, &reason, 1);
- if (s == NULL) {
- PF_UNLOCK();
+ s = pf_state_ref(s);
+ PF_STATE_EXIT_READ();
+ if (s == NULL)
return (PF_DROP);
- }
s->src.seqhi =
ntohl(pd.hdr.tcp.th_ack) - 1;
s->src.seqlo =
ntohl(pd.hdr.tcp.th_seq) - 1;
pf_set_protostate(s, PF_PEER_SRC,
PF_TCPS_PROXY_DST);
+ PF_LOCK();
+ have_pf_lock = 1;
action = pf_synproxy(&pd, &s, &reason);
if (action != PF_PASS) {
PF_UNLOCK();
+ pf_state_unref(s);
return (action);
}
}
@@ -6974,19 +7025,22 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
action = PF_DROP;
}
-
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
- pfsync_update_state(s);
+ pfsync_update_state(s, &have_pf_lock);
#endif /* NPFSYNC > 0 */
r = s->rule.ptr;
a = s->anchor.ptr;
#if NPFLOG > 0
pd.pflog |= s->log;
#endif /* NPFLOG > 0 */
- } else if (s == NULL)
+ } else if (s == NULL) {
+ PF_LOCK();
+ have_pf_lock = 1;
action = pf_test_rule(&pd, &r, &s, &a, &ruleset,
&reason);
+ s = pf_state_ref(s);
+ }
if (pd.virtual_proto == IPPROTO_TCP) {
if (s) {
@@ -6999,12 +7053,13 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
break;
}
- PF_UNLOCK();
+ if (have_pf_lock != 0)
+ PF_UNLOCK();
/*
* At the moment, we rely on NET_LOCK() to prevent removal of items
- * we've collected above ('s', 'r', 'anchor' and 'ruleset'). They'll
- * have to be refcounted when NET_LOCK() is gone.
+ * we've collected above ('r', 'anchor' and 'ruleset'). They'll have
+ * to be refcounted when NET_LOCK() is gone.
*/
done:
@@ -7201,6 +7256,8 @@ done:
*m0 = pd.m;
+ pf_state_unref(s);
+
return (action);
}
@@ -7401,6 +7458,33 @@ pf_state_key_unlink_reverse(struct pf_state_key *sk)
}
}
+struct pf_state *
+pf_state_ref(struct pf_state *s)
+{
+ if (s != NULL)
+ PF_REF_TAKE(s->refcnt);
+ return (s);
+}
+
+void
+pf_state_unref(struct pf_state *s)
+{
+ if ((s != NULL) && PF_REF_RELE(s->refcnt)) {
+ /* never inserted or removed */
+#if NPFSYNC > 0
+ KASSERT((TAILQ_NEXT(s, sync_list) == NULL) ||
+ ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) &&
+ (s->sync_state == PFSYNC_S_NONE)));
+#endif /* NPFSYNC */
+ KASSERT((TAILQ_NEXT(s, entry_list) == NULL) ||
+ (TAILQ_NEXT(s, entry_list) == _Q_INVALID));
+ KASSERT((s->key[PF_SK_WIRE] == NULL) &&
+ (s->key[PF_SK_STACK] == NULL));
+
+ pool_put(&pf_state_pl, s);
+ }
+}
+
int
pf_delay_pkt(struct mbuf *m, u_int ifidx)
{
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index a639bd60704..72774436aa0 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pf_ioctl.c,v 1.336 2018/07/22 09:09:18 sf Exp $ */
+/* $OpenBSD: pf_ioctl.c,v 1.337 2018/09/11 07:53:38 sashan Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -132,7 +132,21 @@ TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
#ifdef WITH_PF_LOCK
+/*
+ * pf_lock protects consistency of PF data structures, which don't have
+ * their dedicated lock yet. The pf_lock currently protects:
+ * - rules,
+ * - radix tables,
+ * - source nodes
+ * All callers must grab pf_lock exclusively.
+ *
+ * pf_state_lock protects consistency of state table. Packets, which do state
+ * look up grab the lock as readers. If packet must create state, then it must
+ * grab the lock as writer. Whenever packet creates state it grabs pf_lock
+ * first then it locks pf_state_lock as the writer.
+ */
struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock");
+struct rwlock pf_state_lock = RWLOCK_INITIALIZER("pf_state_lock");
#endif /* WITH_PF_LOCK */
#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
@@ -1501,6 +1515,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
u_int killed = 0;
PF_LOCK();
+ PF_STATE_ENTER_WRITE();
for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) {
nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
@@ -1514,6 +1529,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
killed++;
}
}
+ PF_STATE_EXIT_WRITE();
psk->psk_killed = killed;
#if NPFSYNC > 0
pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
@@ -1537,10 +1553,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
if (psk->psk_pfcmp.creatorid == 0)
psk->psk_pfcmp.creatorid = pf_status.hostid;
PF_LOCK();
+ PF_STATE_ENTER_WRITE();
if ((s = pf_find_state_byid(&psk->psk_pfcmp))) {
pf_remove_state(s);
psk->psk_killed = 1;
}
+ PF_STATE_EXIT_WRITE();
PF_UNLOCK();
break;
}
@@ -1554,6 +1572,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
key.rdomain = psk->psk_rdomain;
PF_LOCK();
+ PF_STATE_ENTER_WRITE();
for (i = 0; i < nitems(dirs); i++) {
if (dirs[i] == PF_IN) {
sidx = 0;
@@ -1594,11 +1613,13 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
if (killed)
psk->psk_killed = killed;
+ PF_STATE_EXIT_WRITE();
PF_UNLOCK();
break;
}
PF_LOCK();
+ PF_STATE_ENTER_WRITE();
for (s = RB_MIN(pf_state_tree_id, &tree_id); s;
s = nexts) {
nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
@@ -1644,6 +1665,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
}
psk->psk_killed = killed;
+ PF_STATE_EXIT_WRITE();
PF_UNLOCK();
break;
}
@@ -1658,7 +1680,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
break;
}
PF_LOCK();
+ PF_STATE_ENTER_WRITE();
error = pfsync_state_import(sp, PFSYNC_SI_IOCTL);
+ PF_STATE_EXIT_WRITE();
PF_UNLOCK();
break;
}
@@ -1673,16 +1697,17 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
id_key.id = ps->state.id;
id_key.creatorid = ps->state.creatorid;
- PF_LOCK();
+ PF_STATE_ENTER_READ();
s = pf_find_state_byid(&id_key);
+ s = pf_state_ref(s);
+ PF_STATE_EXIT_READ();
if (s == NULL) {
error = ENOENT;
- PF_UNLOCK();
break;
}
pf_state_export(&ps->state, s);
- PF_UNLOCK();
+ pf_state_unref(s);
break;
}
@@ -1702,7 +1727,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
p = ps->ps_states;
- PF_LOCK();
+ PF_STATE_ENTER_READ();
state = TAILQ_FIRST(&state_list);
while (state) {
if (state->timeout != PFTM_UNLINKED) {
@@ -1712,7 +1737,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
error = copyout(pstore, p, sizeof(*p));
if (error) {
free(pstore, M_TEMP, sizeof(*pstore));
- PF_UNLOCK();
+ PF_STATE_EXIT_READ();
goto fail;
}
p++;
@@ -1720,7 +1745,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
}
state = TAILQ_NEXT(state, entry_list);
}
- PF_UNLOCK();
+ PF_STATE_EXIT_READ();
ps->ps_len = sizeof(struct pfsync_state) * nr;
@@ -1801,8 +1826,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
key.port[didx] = pnl->dport;
- PF_LOCK();
+ PF_STATE_ENTER_READ();
state = pf_find_state_all(&key, direction, &m);
+ state = pf_state_ref(state);
+ PF_STATE_EXIT_READ();
if (m > 1)
error = E2BIG; /* more than one state */
@@ -1815,7 +1842,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
pnl->rrdomain = sk->rdomain;
} else
error = ENOENT;
- PF_UNLOCK();
+ pf_state_unref(state);
}
break;
}
@@ -2576,8 +2603,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
struct pf_state *state;
PF_LOCK();
+ PF_STATE_ENTER_WRITE();
RB_FOREACH(state, pf_state_tree_id, &tree_id)
pf_src_tree_remove_state(state);
+ PF_STATE_EXIT_WRITE();
RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
n->expire = 1;
pf_purge_expired_src_nodes();
@@ -2603,10 +2632,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
&psnk->psnk_dst.addr.v.a.mask,
&sn->raddr, sn->af)) {
/* Handle state to src_node linkage */
- if (sn->states != 0)
+ if (sn->states != 0) {
+ PF_ASSERT_LOCKED();
+ PF_STATE_ENTER_WRITE();
RB_FOREACH(s, pf_state_tree_id,
&tree_id)
pf_state_rm_src_node(s, sn);
+ PF_STATE_EXIT_WRITE();
+ }
sn->expire = 1;
killed++;
}
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 97c13aa306c..fb43fe0b166 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pfvar.h,v 1.484 2018/09/10 11:37:26 bluhm Exp $ */
+/* $OpenBSD: pfvar.h,v 1.485 2018/09/11 07:53:38 sashan Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -761,6 +761,7 @@ struct pf_state {
TAILQ_ENTRY(pf_state) sync_list;
TAILQ_ENTRY(pf_state) entry_list;
+ SLIST_ENTRY(pf_state) gc_list;
RB_ENTRY(pf_state) entry_id;
struct pf_state_peer src;
struct pf_state_peer dst;
@@ -805,6 +806,7 @@ struct pf_state {
u_int16_t max_mss;
u_int16_t if_index_in;
u_int16_t if_index_out;
+ pf_refcnt_t refcnt;
u_int16_t delay;
};
diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
index 466548c3301..2b62cfe455b 100644
--- a/sys/net/pfvar_priv.h
+++ b/sys/net/pfvar_priv.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pfvar_priv.h,v 1.4 2017/08/06 13:16:11 mpi Exp $ */
+/* $OpenBSD: pfvar_priv.h,v 1.5 2018/09/11 07:53:38 sashan Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -101,8 +101,12 @@ struct pf_pdesc {
extern struct task pf_purge_task;
extern struct timeout pf_purge_to;
+struct pf_state *pf_state_ref(struct pf_state *);
+void pf_state_unref(struct pf_state *);
+
#ifdef WITH_PF_LOCK
extern struct rwlock pf_lock;
+extern struct rwlock pf_state_lock;
#define PF_LOCK() do { \
NET_ASSERT_LOCKED(); \
@@ -124,11 +128,42 @@ extern struct rwlock pf_lock;
if (rw_status(&pf_lock) == RW_WRITE) \
splassert_fail(0, rw_status(&pf_lock), __func__);\
} while (0)
+
+#define PF_STATE_ENTER_READ() do { \
+ rw_enter_read(&pf_state_lock); \
+ } while (0)
+
+#define PF_STATE_EXIT_READ() do { \
+ rw_exit_read(&pf_state_lock); \
+ } while (0)
+
+#define PF_STATE_ENTER_WRITE() do { \
+ rw_enter_write(&pf_state_lock); \
+ } while (0)
+
+#define PF_STATE_EXIT_WRITE() do { \
+ PF_ASSERT_STATE_LOCKED(); \
+ rw_exit_write(&pf_state_lock); \
+ } while (0)
+
+#define PF_ASSERT_STATE_LOCKED() do { \
+ if (rw_status(&pf_state_lock) != RW_WRITE)\
+ splassert_fail(RW_WRITE, \
+ rw_status(&pf_state_lock), __func__);\
+ } while (0)
+
#else /* !WITH_PF_LOCK */
#define PF_LOCK() (void)(0)
#define PF_UNLOCK() (void)(0)
#define PF_ASSERT_LOCKED() (void)(0)
#define PF_ASSERT_UNLOCKED() (void)(0)
+
+#define PF_STATE_ENTER_READ() (void)(0)
+#define PF_STATE_EXIT_READ() (void)(0)
+#define PF_STATE_ENTER_WRITE() (void)(0)
+#define PF_STATE_EXIT_WRITE() (void)(0)
+#define PF_ASSERT_STATE_LOCKED() (void)(0)
+
#endif /* WITH_PF_LOCK */
extern void pf_purge_timeout(void *);