summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Gwynne <dlg@cvs.openbsd.org>2022-12-16 02:05:46 +0000
committerDavid Gwynne <dlg@cvs.openbsd.org>2022-12-16 02:05:46 +0000
commita5342d1c56ea225ea752c628beb9b6a17e4d30c7 (patch)
treedad319c9f03a3771ae7d6f3e76f2b67044740f5d
parent5726119b2193931f8f2a7e963ca85a012c3a7dec (diff)
always keep pf_state_keys attached to pf_states.
pf_state structures don't contain ip addresses, protocols, ports, etc. that information is stored in a pf_state_key struct, which is used to wire a state into the state table. when things like pfsync or the pf state ioctls want to export information about a state, particularly the addresses on it, they needs the pf_state_key struct to read from. before this diff the code assumed that when a state was removed from the state tables it could throw the pf_state_key structs away as part of that removal. this code changes it so once pf_state_insert succeeds, a pf_state will keep its references to the pf_state_key structs until the pf_state struct itself is being destroyed. this allows anything that holds a reference to a pf_state to also look at the pf_state_key structs because they're now effectively an immutable part of the pf_state struct. this is by far the simplest and most straightforward fix for pfsync crashing on pf_state_key dereferences we've come up with so far. it has been made possible by the addition of reference counts to pf_state and pf_state_key structs, which allows us to properly account for this adjusted lifecycle for pf_state_keys on pf_state structs. sashan@ and i have been kicking this diff around for a couple of weeks now. ok sashan@ jmatthew@
-rw-r--r--sys/net/pf.c181
-rw-r--r--sys/net/pfvar.h3
-rw-r--r--sys/net/pfvar_priv.h6
3 files changed, 112 insertions, 78 deletions
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 593f08ed4e2..7203589d4bd 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pf.c,v 1.1156 2022/11/25 20:27:53 bluhm Exp $ */
+/* $OpenBSD: pf.c,v 1.1157 2022/12/16 02:05:44 dlg Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -185,6 +185,8 @@ int pf_translate_icmp_af(struct pf_pdesc*, int, void *);
void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
sa_family_t, struct pf_rule *, u_int);
void pf_detach_state(struct pf_state *);
+struct pf_state_key *pf_state_key_attach(struct pf_state_key *,
+ struct pf_state *, int);
void pf_state_key_detach(struct pf_state *, int);
u_int32_t pf_tcp_iss(struct pf_pdesc *);
void pf_rule_to_actions(struct pf_rule *,
@@ -379,7 +381,7 @@ pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate)
if (s->src.state == newstate)
return;
- if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL &&
+ if (s->creatorid == pf_status.hostid &&
s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
!(TCPS_HAVEESTABLISHED(s->src.state) ||
s->src.state == TCPS_CLOSED) &&
@@ -720,17 +722,26 @@ pf_state_compare_id(struct pf_state *a, struct pf_state *b)
return (0);
}
-int
+/*
+ * on failure, pf_state_key_attach() releases the pf_state_key
+ * reference and returns NULL.
+ */
+struct pf_state_key *
pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
{
struct pf_state_item *si;
struct pf_state_key *cur;
struct pf_state *olds = NULL;
+ PF_ASSERT_LOCKED();
+
KASSERT(s->key[idx] == NULL);
- if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
+ sk->removed = 0;
+ cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk);
+ if (cur != NULL) {
+ sk->removed = 1;
/* key exists. check for same kif, if none, add to key */
- TAILQ_FOREACH(si, &cur->states, entry)
+ TAILQ_FOREACH(si, &cur->states, entry) {
if (si->s->kif == s->kif &&
((si->s->key[PF_SK_WIRE]->af == sk->af &&
si->s->direction == s->direction) ||
@@ -766,44 +777,53 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
/* remove late or sks can go away */
olds = si->s;
} else {
- pool_put(&pf_state_key_pl, sk);
- return (-1); /* collision! */
+ pf_state_key_unref(sk);
+ return (NULL); /* collision! */
}
}
- pool_put(&pf_state_key_pl, sk);
- s->key[idx] = cur;
- } else
- s->key[idx] = sk;
+ }
+
+ /* reuse the existing state key */
+ pf_state_key_unref(sk);
+ sk = cur;
+ }
if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
- pf_state_key_detach(s, idx);
- return (-1);
+ if (TAILQ_EMPTY(&sk->states)) {
+ KASSERT(cur == NULL);
+ RB_REMOVE(pf_state_tree, &pf_statetbl, sk);
+ sk->removed = 1;
+ pf_state_key_unref(sk);
+ }
+
+ return (NULL);
}
- si->s = s;
+
+ s->key[idx] = pf_state_key_ref(sk); /* give a ref to state */
+ si->s = pf_state_ref(s);
/* list is sorted, if-bound states before floating */
if (s->kif == pfi_all)
- TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
+ TAILQ_INSERT_TAIL(&sk->states, si, entry);
else
- TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
+ TAILQ_INSERT_HEAD(&sk->states, si, entry);
if (olds)
pf_remove_state(olds);
- return (0);
+ /* caller owns the pf_state ref, which owns a pf_state_key ref now */
+ return (sk);
}
void
pf_detach_state(struct pf_state *s)
{
- if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
- s->key[PF_SK_WIRE] = NULL;
+ KASSERT(s->key[PF_SK_WIRE] != NULL);
+ pf_state_key_detach(s, PF_SK_WIRE);
- if (s->key[PF_SK_STACK] != NULL)
+ KASSERT(s->key[PF_SK_STACK] != NULL);
+ if (s->key[PF_SK_STACK] != s->key[PF_SK_WIRE])
pf_state_key_detach(s, PF_SK_STACK);
-
- if (s->key[PF_SK_WIRE] != NULL)
- pf_state_key_detach(s, PF_SK_WIRE);
}
void
@@ -812,20 +832,22 @@ pf_state_key_detach(struct pf_state *s, int idx)
struct pf_state_item *si;
struct pf_state_key *sk;
- if (s->key[idx] == NULL)
- return;
+ PF_ASSERT_LOCKED();
- si = TAILQ_FIRST(&s->key[idx]->states);
- while (si && si->s != s)
- si = TAILQ_NEXT(si, entry);
+ sk = s->key[idx];
+ if (sk == NULL)
+ return;
- if (si) {
- TAILQ_REMOVE(&s->key[idx]->states, si, entry);
- pool_put(&pf_state_item_pl, si);
+ TAILQ_FOREACH(si, &sk->states, entry) {
+ if (si->s == s)
+ break;
}
+ if (si == NULL)
+ return;
- sk = s->key[idx];
- s->key[idx] = NULL;
+ TAILQ_REMOVE(&sk->states, si, entry);
+ pool_put(&pf_state_item_pl, si);
+
if (TAILQ_EMPTY(&sk->states)) {
RB_REMOVE(pf_state_tree, &pf_statetbl, sk);
sk->removed = 1;
@@ -833,6 +855,8 @@ pf_state_key_detach(struct pf_state *s, int idx)
pf_state_key_unlink_inpcb(sk);
pf_state_key_unref(sk);
}
+
+ pf_state_unref(s);
}
struct pf_state_key *
@@ -842,7 +866,10 @@ pf_alloc_state_key(int pool_flags)
if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
return (NULL);
+
+ PF_REF_INIT(sk->refcnt);
TAILQ_INIT(&sk->states);
+ sk->removed = 1;
return (sk);
}
@@ -916,8 +943,6 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
sk1->proto = pd->proto;
sk1->af = pd->af;
sk1->rdomain = pd->rdomain;
- PF_REF_INIT(sk1->refcnt);
- sk1->removed = 0;
if (rtableid >= 0)
wrdom = rtable_l2(rtableid);
@@ -926,7 +951,7 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
pd->nsport != pd->osport || pd->ndport != pd->odport ||
wrdom != pd->rdomain || afto) { /* NAT/NAT64 */
if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) {
- pool_put(&pf_state_key_pl, sk1);
+ pf_state_key_unref(sk1);
return (ENOMEM);
}
pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx,
@@ -949,10 +974,8 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
sk2->proto = pd->proto;
sk2->af = pd->naf;
sk2->rdomain = wrdom;
- PF_REF_INIT(sk2->refcnt);
- sk2->removed = 0;
} else
- sk2 = sk1;
+ sk2 = pf_state_key_ref(sk1);
if (pd->dir == PF_IN) {
*skw = sk1;
@@ -971,34 +994,47 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
return (0);
}
+/*
+ * pf_state_insert() does the following:
+ * - links the pf_state up with pf_state_key(s).
+ * - inserts the pf_state_keys into pf_state_tree.
+ * - inserts the pf_state into the into pf_state_tree_id.
+ * - tells pfsync about the state.
+ *
+ * pf_state_insert() owns the references to the pf_state_key structs
+ * it is given. on failure to insert, these references are released.
+ * on success, the caller owns a pf_state reference that allows it
+ * to access the state keys.
+ */
+
int
-pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw,
- struct pf_state_key **sks, struct pf_state *s)
+pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp,
+ struct pf_state_key **sksp, struct pf_state *s)
{
+ struct pf_state_key *skw = *skwp;
+ struct pf_state_key *sks = *sksp;
+ int same = (skw == sks);
+
PF_ASSERT_LOCKED();
s->kif = kif;
PF_STATE_ENTER_WRITE();
- if (*skw == *sks) {
- if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) {
- PF_STATE_EXIT_WRITE();
- return (-1);
- }
- *skw = *sks = s->key[PF_SK_WIRE];
- s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
- } else {
- if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) {
- pool_put(&pf_state_key_pl, *sks);
- PF_STATE_EXIT_WRITE();
- return (-1);
- }
- *skw = s->key[PF_SK_WIRE];
- if (pf_state_key_attach(*sks, s, PF_SK_STACK)) {
- pf_state_key_detach(s, PF_SK_WIRE);
- PF_STATE_EXIT_WRITE();
- return (-1);
- }
- *sks = s->key[PF_SK_STACK];
+
+ skw = pf_state_key_attach(skw, s, PF_SK_WIRE);
+ if (skw == NULL) {
+ pf_state_key_unref(sks);
+ PF_STATE_EXIT_WRITE();
+ return (-1);
+ }
+
+ if (same) {
+ /* pf_state_key_attach might have swapped skw */
+ pf_state_key_unref(sks);
+ s->key[PF_SK_STACK] = sks = pf_state_key_ref(skw);
+ } else if (pf_state_key_attach(sks, s, PF_SK_STACK) == NULL) {
+ pf_state_key_detach(s, PF_SK_WIRE);
+ PF_STATE_EXIT_WRITE();
+ return (-1);
}
if (s->id == 0 && s->creatorid == 0) {
@@ -1024,6 +1060,10 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw,
pfsync_insert_state(s);
#endif /* NPFSYNC > 0 */
PF_STATE_EXIT_WRITE();
+
+ *skwp = skw;
+ *sksp = sks;
+
return (0);
}
@@ -1374,7 +1414,7 @@ pf_state_import(const struct pfsync_state *sp, int flags)
if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
goto cleanup;
} else
- sks = skw;
+ sks = pf_state_key_ref(skw);
/* allocate memory for scrub info */
if (pf_state_alloc_scrub_memory(&sp->src, &st->src) ||
@@ -1387,7 +1427,6 @@ pf_state_import(const struct pfsync_state *sp, int flags)
skw->port[0] = sp->key[PF_SK_WIRE].port[0];
skw->port[1] = sp->key[PF_SK_WIRE].port[1];
skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
- PF_REF_INIT(skw->refcnt);
skw->proto = sp->proto;
if (!(skw->af = sp->key[PF_SK_WIRE].af))
skw->af = sp->af;
@@ -1397,7 +1436,6 @@ pf_state_import(const struct pfsync_state *sp, int flags)
sks->port[0] = sp->key[PF_SK_STACK].port[0];
sks->port[1] = sp->key[PF_SK_STACK].port[1];
sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
- PF_REF_INIT(sks->refcnt);
if (!(sks->af = sp->key[PF_SK_STACK].af))
sks->af = sp->af;
if (sks->af != skw->af) {
@@ -1499,12 +1537,10 @@ pf_state_import(const struct pfsync_state *sp, int flags)
return (0);
cleanup:
- if (skw == sks)
- sks = NULL;
if (skw != NULL)
- pool_put(&pf_state_key_pl, skw);
+ pf_state_key_unref(skw);
if (sks != NULL)
- pool_put(&pf_state_key_pl, sks);
+ pf_state_key_unref(sks);
cleanup_state: /* pf_state_insert frees the state keys */
if (st) {
@@ -1587,10 +1623,9 @@ pf_state_expires(const struct pf_state *state, uint8_t stimeout)
*/
/* handle all PFTM_* > PFTM_MAX here */
- if (stimeout == PFTM_PURGE)
+ if (stimeout > PFTM_MAX)
return (0);
- KASSERT(stimeout != PFTM_UNLINKED);
KASSERT(stimeout < PFTM_MAX);
timeout = state->rule.ptr->timeout[stimeout];
@@ -4460,7 +4495,6 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a,
}
if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) {
- pf_detach_state(s);
*sks = *skw = NULL;
REASON_SET(&reason, PFRES_STATEINS);
goto csfailed;
@@ -7993,8 +8027,9 @@ pf_state_unref(struct pf_state *s)
#endif /* NPFSYNC */
KASSERT((TAILQ_NEXT(s, entry_list) == NULL) ||
(TAILQ_NEXT(s, entry_list) == _Q_INVALID));
- KASSERT((s->key[PF_SK_WIRE] == NULL) &&
- (s->key[PF_SK_STACK] == NULL));
+
+ pf_state_key_unref(s->key[PF_SK_WIRE]);
+ pf_state_key_unref(s->key[PF_SK_STACK]);
pool_put(&pf_state_pl, s);
}
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 1023966b087..d547bef23b2 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pfvar.h,v 1.521 2022/11/25 20:27:53 bluhm Exp $ */
+/* $OpenBSD: pfvar.h,v 1.522 2022/12/16 02:05:44 dlg Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -1731,7 +1731,6 @@ void pf_pkt_addr_changed(struct mbuf *);
struct inpcb *pf_inp_lookup(struct mbuf *);
void pf_inp_link(struct mbuf *, struct inpcb *);
void pf_inp_unlink(struct inpcb *);
-int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int);
int pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t,
struct pf_addr *, u_int16_t, u_int16_t, int);
int pf_translate_af(struct pf_pdesc *);
diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
index 28d98e4c2e1..81e26c96a38 100644
--- a/sys/net/pfvar_priv.h
+++ b/sys/net/pfvar_priv.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pfvar_priv.h,v 1.23 2022/11/25 20:27:53 bluhm Exp $ */
+/* $OpenBSD: pfvar_priv.h,v 1.24 2022/12/16 02:05:45 dlg Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -42,7 +42,7 @@
/*
* Protection/ownership of pf_state members:
- * I immutable after creation
+ * I immutable after pf_state_insert()
* M pf_state mtx
* P PF_STATE_LOCK
* S pfsync mutex
@@ -69,7 +69,7 @@ struct pf_state {
union pf_rule_ptr natrule; /* [I] */
struct pf_addr rt_addr; /* [I] */
struct pf_sn_head src_nodes; /* [I] */
- struct pf_state_key *key[2]; /* stack and wire */
+ struct pf_state_key *key[2]; /* [I] stack and wire */
struct pfi_kif *kif; /* [I] */
struct mutex mtx;
pf_refcnt_t refcnt;