summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandr Nedvedicky <sashan@cvs.openbsd.org>2021-02-04 00:55:42 +0000
committerAlexandr Nedvedicky <sashan@cvs.openbsd.org>2021-02-04 00:55:42 +0000
commite542bab85a75bf748a58782c7f9304dcf5ebf571 (patch)
treea233740cd09bddc759ae1ff018f362dad6be2bc9
parent8a63a59ae1371b0bb050ec5e4e332c7c922e05de (diff)
make if_pfsync.c a better friend with PF_LOCK
The code delivered in this change is currently disabled. Brave souls may enable the code by adding -DWITH_PF_LOCK when building customized kernel. Big thanks goes to Hrvoje@ for providing test equipment and testing. As soon as we enter the next release cycle, the WITH_PF_LOCK will be defined as default option for MP kernels. OK dlg@
-rw-r--r--sys/arch/amd64/conf/GENERIC.MP3
-rw-r--r--sys/net/if_pfsync.c549
-rw-r--r--sys/net/if_pfsync.h4
-rw-r--r--sys/net/pf.c8
4 files changed, 385 insertions, 179 deletions
diff --git a/sys/arch/amd64/conf/GENERIC.MP b/sys/arch/amd64/conf/GENERIC.MP
index 7f195a53092..5575ffea81b 100644
--- a/sys/arch/amd64/conf/GENERIC.MP
+++ b/sys/arch/amd64/conf/GENERIC.MP
@@ -1,7 +1,8 @@
-# $OpenBSD: GENERIC.MP,v 1.14 2018/07/13 05:25:24 tb Exp $
+# $OpenBSD: GENERIC.MP,v 1.15 2021/02/04 00:55:41 sashan Exp $
include "arch/amd64/conf/GENERIC"
+#option WITH_PF_LOCK
option MULTIPROCESSOR
#option MP_LOCKDEBUG
#option WITNESS
diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c
index 2b094dd855f..b5fe9fb3607 100644
--- a/sys/net/if_pfsync.c
+++ b/sys/net/if_pfsync.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_pfsync.c,v 1.282 2021/02/01 00:31:05 dlg Exp $ */
+/* $OpenBSD: if_pfsync.c,v 1.283 2021/02/04 00:55:41 sashan Exp $ */
/*
* Copyright (c) 2002 Michael Shalayeff
@@ -210,9 +210,11 @@ struct pfsync_softc {
struct ip sc_template;
struct pf_state_queue sc_qs[PFSYNC_S_COUNT];
+ struct mutex sc_mtx[PFSYNC_S_COUNT];
size_t sc_len;
struct pfsync_upd_reqs sc_upd_req_list;
+ struct mutex sc_upd_req_mtx;
int sc_initial_bulk;
int sc_link_demoted;
@@ -220,6 +222,7 @@ struct pfsync_softc {
int sc_defer;
struct pfsync_deferrals sc_deferrals;
u_int sc_deferred;
+ struct mutex sc_deferrals_mtx;
void *sc_plus;
size_t sc_pluslen;
@@ -234,6 +237,7 @@ struct pfsync_softc {
struct timeout sc_bulk_tmo;
TAILQ_HEAD(, tdb) sc_tdb_q;
+ struct mutex sc_tdb_mtx;
struct task sc_ltask;
struct task sc_dtask;
@@ -241,6 +245,16 @@ struct pfsync_softc {
struct timeout sc_tmo;
};
+struct pfsync_snapshot {
+ struct pfsync_softc *sn_sc;
+ struct pf_state_queue sn_qs[PFSYNC_S_COUNT];
+ struct pfsync_upd_reqs sn_upd_req_list;
+ TAILQ_HEAD(, tdb) sn_tdb_q;
+ size_t sn_len;
+ void *sn_plus;
+ size_t sn_pluslen;
+};
+
struct pfsync_softc *pfsyncif = NULL;
struct cpumem *pfsynccounters;
@@ -276,6 +290,10 @@ void pfsync_bulk_start(void);
void pfsync_bulk_status(u_int8_t);
void pfsync_bulk_update(void *);
void pfsync_bulk_fail(void *);
+
+void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
+void pfsync_drop_snapshot(struct pfsync_snapshot *);
+
#ifdef WITH_PF_LOCK
void pfsync_send_dispatch(void *);
void pfsync_send_pkt(struct mbuf *);
@@ -307,6 +325,13 @@ pfsync_clone_create(struct if_clone *ifc, int unit)
struct pfsync_softc *sc;
struct ifnet *ifp;
int q;
+ static const char *mtx_names[] = {
+ "iack_mtx",
+ "upd_c_mtx",
+ "del_mtx",
+ "ins_mtx",
+ "upd_mtx",
+ "" };
if (unit != 0)
return (EINVAL);
@@ -314,18 +339,23 @@ pfsync_clone_create(struct if_clone *ifc, int unit)
pfsync_sync_ok = 1;
sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO);
- for (q = 0; q < PFSYNC_S_COUNT; q++)
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
TAILQ_INIT(&sc->sc_qs[q]);
+ mtx_init_flags(&sc->sc_mtx[q], IPL_SOFTNET, mtx_names[q], 0);
+ }
pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync",
NULL);
TAILQ_INIT(&sc->sc_upd_req_list);
+ mtx_init(&sc->sc_upd_req_mtx, IPL_SOFTNET);
TAILQ_INIT(&sc->sc_deferrals);
+ mtx_init(&sc->sc_deferrals_mtx, IPL_SOFTNET);
task_set(&sc->sc_ltask, pfsync_syncdev_state, sc);
task_set(&sc->sc_dtask, pfsync_ifdetach, sc);
sc->sc_deferred = 0;
TAILQ_INIT(&sc->sc_tdb_q);
+ mtx_init(&sc->sc_tdb_mtx, IPL_SOFTNET);
sc->sc_len = PFSYNC_MINPKT;
sc->sc_maxupdates = 128;
@@ -370,6 +400,7 @@ pfsync_clone_destroy(struct ifnet *ifp)
struct pfsync_softc *sc = ifp->if_softc;
struct ifnet *ifp0;
struct pfsync_deferral *pd;
+ struct pfsync_deferrals deferrals;
NET_LOCK();
@@ -392,10 +423,18 @@ pfsync_clone_destroy(struct ifnet *ifp)
pfsync_drop(sc);
- while (sc->sc_deferred > 0) {
- pd = TAILQ_FIRST(&sc->sc_deferrals);
- timeout_del(&pd->pd_tmo);
- pfsync_undefer(pd, 0);
+ if (sc->sc_deferred > 0) {
+ TAILQ_INIT(&deferrals);
+ mtx_enter(&sc->sc_deferrals_mtx);
+ TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry);
+ sc->sc_deferred = 0;
+ mtx_leave(&sc->sc_deferrals_mtx);
+
+ while (!TAILQ_EMPTY(&deferrals)) {
+ pd = TAILQ_FIRST(&deferrals);
+ TAILQ_REMOVE(&deferrals, pd, pd_entry);
+ pfsync_undefer(pd, 0);
+ }
}
pfsyncif = NULL;
@@ -786,10 +825,8 @@ pfsync_input(struct mbuf **mp, int *offp, int proto, int af)
return IPPROTO_DONE;
}
- PF_LOCK();
e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count,
flags);
- PF_UNLOCK();
if (e != 0)
goto done;
@@ -810,6 +847,7 @@ pfsync_in_clr(caddr_t buf, int len, int count, int flags)
u_int32_t creatorid;
int i;
+ PF_LOCK();
for (i = 0; i < count; i++) {
clr = (struct pfsync_clr *)buf + len * i;
kif = NULL;
@@ -818,6 +856,7 @@ pfsync_in_clr(caddr_t buf, int len, int count, int flags)
(kif = pfi_kif_find(clr->ifname)) == NULL)
continue;
+ PF_STATE_ENTER_WRITE();
for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) {
nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
if (st->creatorid == creatorid &&
@@ -826,7 +865,9 @@ pfsync_in_clr(caddr_t buf, int len, int count, int flags)
pf_remove_state(st);
}
}
+ PF_STATE_EXIT_WRITE();
}
+ PF_UNLOCK();
return (0);
}
@@ -838,6 +879,7 @@ pfsync_in_ins(caddr_t buf, int len, int count, int flags)
sa_family_t af1, af2;
int i;
+ PF_LOCK();
for (i = 0; i < count; i++) {
sp = (struct pfsync_state *)(buf + len * i);
af1 = sp->key[0].af;
@@ -863,6 +905,7 @@ pfsync_in_ins(caddr_t buf, int len, int count, int flags)
break;
}
}
+ PF_UNLOCK();
return (0);
}
@@ -881,12 +924,17 @@ pfsync_in_iack(caddr_t buf, int len, int count, int flags)
id_key.id = ia->id;
id_key.creatorid = ia->creatorid;
+ PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
if (st == NULL)
continue;
if (ISSET(st->state_flags, PFSTATE_ACK))
pfsync_deferred(st, 0);
+
+ pf_state_unref(st);
}
return (0);
@@ -930,8 +978,7 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags)
struct pfsync_state *sp;
struct pf_state_cmp id_key;
struct pf_state *st;
- int sync;
-
+ int sync, error;
int i;
for (i = 0; i < count; i++) {
@@ -950,11 +997,17 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags)
id_key.id = sp->id;
id_key.creatorid = sp->creatorid;
+ PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
if (st == NULL) {
/* insert the update */
- if (pfsync_state_import(sp, flags))
+ PF_LOCK();
+ error = pfsync_state_import(sp, flags);
+ if (error)
pfsyncstat_inc(pfsyncs_badstate);
+ PF_UNLOCK();
continue;
}
@@ -992,9 +1045,11 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags)
if (sync) {
pfsyncstat_inc(pfsyncs_stale);
- pfsync_update_state_locked(st);
+ pfsync_update_state(st);
schednetisr(NETISR_PFSYNC);
}
+
+ pf_state_unref(st);
}
return (0);
@@ -1027,7 +1082,10 @@ pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
id_key.id = up->id;
id_key.creatorid = up->creatorid;
+ PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
if (st == NULL) {
/* We don't have this state. Ask for it. */
pfsync_request_update(id_key.creatorid, id_key.id);
@@ -1066,9 +1124,11 @@ pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
if (sync) {
pfsyncstat_inc(pfsyncs_stale);
- pfsync_update_state_locked(st);
+ pfsync_update_state(st);
schednetisr(NETISR_PFSYNC);
}
+
+ pf_state_unref(st);
}
return (0);
@@ -1092,15 +1152,21 @@ pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
if (id_key.id == 0 && id_key.creatorid == 0)
pfsync_bulk_start();
else {
+ PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
if (st == NULL) {
pfsyncstat_inc(pfsyncs_badstate);
continue;
}
- if (ISSET(st->state_flags, PFSTATE_NOSYNC))
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
+ pf_state_unref(st);
continue;
+ }
pfsync_update_state_req(st);
+ pf_state_unref(st);
}
}
@@ -1115,6 +1181,7 @@ pfsync_in_del(caddr_t buf, int len, int count, int flags)
struct pf_state *st;
int i;
+ PF_STATE_ENTER_WRITE();
for (i = 0; i < count; i++) {
sp = (struct pfsync_state *)(buf + len * i);
@@ -1129,6 +1196,7 @@ pfsync_in_del(caddr_t buf, int len, int count, int flags)
SET(st->state_flags, PFSTATE_NOSYNC);
pf_remove_state(st);
}
+ PF_STATE_EXIT_WRITE();
return (0);
}
@@ -1141,6 +1209,8 @@ pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
struct pf_state *st;
int i;
+ PF_LOCK();
+ PF_STATE_ENTER_WRITE();
for (i = 0; i < count; i++) {
sp = (struct pfsync_del_c *)(buf + len * i);
@@ -1156,6 +1226,8 @@ pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
SET(st->state_flags, PFSTATE_NOSYNC);
pf_remove_state(st);
}
+ PF_STATE_EXIT_WRITE();
+ PF_UNLOCK();
return (0);
}
@@ -1505,19 +1577,59 @@ pfsync_out_del(struct pf_state *st, void *buf)
}
void
-pfsync_drop(struct pfsync_softc *sc)
+pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc)
+{
+ int q;
+
+ sn->sn_sc = sc;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++)
+ mtx_enter(&sc->sc_mtx[q]);
+
+ mtx_enter(&sc->sc_upd_req_mtx);
+ mtx_enter(&sc->sc_tdb_mtx);
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ TAILQ_INIT(&sn->sn_qs[q]);
+ TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list);
+ }
+
+ TAILQ_INIT(&sn->sn_upd_req_list);
+ TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry);
+
+ TAILQ_INIT(&sn->sn_tdb_q);
+ TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry);
+
+ sn->sn_len = sc->sc_len;
+ sc->sc_len = PFSYNC_MINPKT;
+
+ sn->sn_plus = sc->sc_plus;
+ sc->sc_plus = NULL;
+ sn->sn_pluslen = sc->sc_pluslen;
+ sc->sc_pluslen = 0;
+
+ mtx_leave(&sc->sc_tdb_mtx);
+ mtx_leave(&sc->sc_upd_req_mtx);
+
+ for (q = (PFSYNC_S_COUNT - 1); q >= 0; q--)
+ mtx_leave(&sc->sc_mtx[q]);
+}
+
+void
+pfsync_drop_snapshot(struct pfsync_snapshot *sn)
{
struct pf_state *st;
struct pfsync_upd_req_item *ur;
struct tdb *t;
int q;
+
for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ if (TAILQ_EMPTY(&sn->sn_qs[q]))
continue;
- while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
- TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) {
+ TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list);
#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
#endif
@@ -1526,19 +1638,42 @@ pfsync_drop(struct pfsync_softc *sc)
}
}
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
- pool_put(&sc->sc_pool, ur);
+ while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry);
+ pool_put(&sn->sn_sc->sc_pool, ur);
}
- sc->sc_plus = NULL;
-
- while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+ while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
+ TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry);
CLR(t->tdb_flags, TDBF_PFSYNC);
}
+}
- sc->sc_len = PFSYNC_MINPKT;
+int
+pfsync_is_snapshot_empty(struct pfsync_snapshot *sn)
+{
+ int q;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++)
+ if (!TAILQ_EMPTY(&sn->sn_qs[q]))
+ return (0);
+
+ if (!TAILQ_EMPTY(&sn->sn_upd_req_list))
+ return (0);
+
+ if (!TAILQ_EMPTY(&sn->sn_tdb_q))
+ return (0);
+
+ return (sn->sn_plus == NULL);
+}
+
+void
+pfsync_drop(struct pfsync_softc *sc)
+{
+ struct pfsync_snapshot sn;
+
+ pfsync_grab_snapshot(&sn, sc);
+ pfsync_drop_snapshot(&sn);
}
#ifdef WITH_PF_LOCK
@@ -1591,6 +1726,7 @@ pfsync_send_pkt(struct mbuf *m)
void
pfsync_sendout(void)
{
+ struct pfsync_snapshot sn;
struct pfsync_softc *sc = pfsyncif;
#if NBPFILTER > 0
struct ifnet *ifp = &sc->sc_if;
@@ -1602,12 +1738,9 @@ pfsync_sendout(void)
struct pf_state *st;
struct pfsync_upd_req_item *ur;
struct tdb *t;
-
int offset;
int q, count = 0;
- PF_ASSERT_LOCKED();
-
if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
return;
@@ -1621,26 +1754,35 @@ pfsync_sendout(void)
return;
}
+ pfsync_grab_snapshot(&sn, sc);
+
+ /*
+ * Check below is sufficient to prevent us from sending empty packets,
+ * but it does not stop us from sending short packets.
+ */
+ if (pfsync_is_snapshot_empty(&sn))
+ return;
+
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
sc->sc_if.if_oerrors++;
pfsyncstat_inc(pfsyncs_onomem);
- pfsync_drop(sc);
+ pfsync_drop_snapshot(&sn);
return;
}
- if (max_linkhdr + sc->sc_len > MHLEN) {
- MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_len);
+ if (max_linkhdr + sn.sn_len > MHLEN) {
+ MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len);
if (!ISSET(m->m_flags, M_EXT)) {
m_free(m);
sc->sc_if.if_oerrors++;
pfsyncstat_inc(pfsyncs_onomem);
- pfsync_drop(sc);
+ pfsync_drop_snapshot(&sn);
return;
}
}
m->m_data += max_linkhdr;
- m->m_len = m->m_pkthdr.len = sc->sc_len;
+ m->m_len = m->m_pkthdr.len = sn.sn_len;
/* build the ip header */
ip = mtod(m, struct ip *);
@@ -1656,16 +1798,16 @@ pfsync_sendout(void)
offset += sizeof(*ph);
ph->version = PFSYNC_VERSION;
- ph->len = htons(sc->sc_len - sizeof(*ip));
+ ph->len = htons(sn.sn_len - sizeof(*ip));
bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
- if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+ if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) {
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+ while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry);
bcopy(&ur->ur_msg, m->m_data + offset,
sizeof(ur->ur_msg));
@@ -1683,20 +1825,19 @@ pfsync_sendout(void)
}
/* has someone built a custom region for us to add? */
- if (sc->sc_plus != NULL) {
- bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
- offset += sc->sc_pluslen;
-
- sc->sc_plus = NULL;
+ if (sn.sn_plus != NULL) {
+ bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen);
+ offset += sn.sn_pluslen;
+ sn.sn_plus = NULL; /* XXX memory leak ? */
}
- if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
+ if (!TAILQ_EMPTY(&sn.sn_tdb_q)) {
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+ while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
+ TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry);
pfsync_out_tdb(t, m->m_data + offset);
offset += sizeof(struct pfsync_tdb);
CLR(t->tdb_flags, TDBF_PFSYNC);
@@ -1711,15 +1852,15 @@ pfsync_sendout(void)
/* walk the queues */
for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ if (TAILQ_EMPTY(&sn.sn_qs[q]))
continue;
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
- TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) {
+ TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list);
#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
#endif
@@ -1741,10 +1882,10 @@ pfsync_sendout(void)
#if NBPFILTER > 0
if (ifp->if_bpf) {
m->m_data += sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip);
bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
m->m_data -= sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sc->sc_len;
+ m->m_len = m->m_pkthdr.len = sn.sn_len;
}
if (sc->sc_sync_ifidx == 0) {
@@ -1754,9 +1895,6 @@ pfsync_sendout(void)
}
#endif
- /* start again */
- sc->sc_len = PFSYNC_MINPKT;
-
sc->sc_if.if_opackets++;
sc->sc_if.if_obytes += m->m_pkthdr.len;
@@ -1815,7 +1953,11 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
return (0);
if (sc->sc_deferred >= 128) {
+ mtx_enter(&sc->sc_deferrals_mtx);
pd = TAILQ_FIRST(&sc->sc_deferrals);
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+ mtx_leave(&sc->sc_deferrals_mtx);
if (timeout_del(&pd->pd_tmo))
pfsync_undefer(pd, 0);
}
@@ -1830,8 +1972,10 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
pd->pd_st = pf_state_ref(st);
pd->pd_m = m;
+ mtx_enter(&sc->sc_deferrals_mtx);
sc->sc_deferred++;
TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
+ mtx_leave(&sc->sc_deferrals_mtx);
timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd);
timeout_add_msec(&pd->pd_tmo, 20);
@@ -1842,71 +1986,93 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
}
void
-pfsync_undefer(struct pfsync_deferral *pd, int drop)
+pfsync_undefer_notify(struct pfsync_deferral *pd)
{
- struct pfsync_softc *sc = pfsyncif;
struct pf_pdesc pdesc;
struct pf_state *st = pd->pd_st;
- NET_ASSERT_LOCKED();
-
- if (sc == NULL)
- return;
-
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
-
- CLR(st->state_flags, PFSTATE_ACK);
- if (drop)
- m_freem(pd->pd_m);
- else {
- if (st->rt == PF_ROUTETO) {
- if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
- st->direction, st->kif, pd->pd_m, NULL) !=
- PF_PASS) {
- m_freem(pd->pd_m);
- goto out;
- }
- switch (st->key[PF_SK_WIRE]->af) {
- case AF_INET:
- pf_route(&pdesc, st);
- break;
+ if (st->rule.ptr->rt == PF_ROUTETO) {
+ if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
+ st->direction, st->kif, pd->pd_m, NULL) != PF_PASS) {
+ m_freem(pd->pd_m);
+ return;
+ }
+ switch (st->key[PF_SK_WIRE]->af) {
+ case AF_INET:
+ pf_route(&pdesc, st);
+ break;
#ifdef INET6
- case AF_INET6:
- pf_route6(&pdesc, st);
- break;
+ case AF_INET6:
+ pf_route6(&pdesc, st);
+ break;
#endif /* INET6 */
- default:
- unhandled_af(st->key[PF_SK_WIRE]->af);
- }
- pd->pd_m = pdesc.m;
- } else {
- switch (st->key[PF_SK_WIRE]->af) {
- case AF_INET:
- ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
- 0);
- break;
+ default:
+ unhandled_af(st->key[PF_SK_WIRE]->af);
+ }
+ pd->pd_m = pdesc.m;
+ } else {
+ switch (st->key[PF_SK_WIRE]->af) {
+ case AF_INET:
+ ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
+ 0);
+ break;
#ifdef INET6
- case AF_INET6:
- ip6_output(pd->pd_m, NULL, NULL, 0,
- NULL, NULL);
- break;
+ case AF_INET6:
+ ip6_output(pd->pd_m, NULL, NULL, 0,
+ NULL, NULL);
+ break;
#endif /* INET6 */
- default:
- unhandled_af(st->key[PF_SK_WIRE]->af);
- }
+ default:
+ unhandled_af(st->key[PF_SK_WIRE]->af);
}
+
+ pd->pd_m = NULL;
}
- out:
- pf_state_unref(st);
+}
+
+void
+pfsync_free_deferral(struct pfsync_deferral *pd)
+{
+ struct pfsync_softc *sc = pfsyncif;
+
+ pf_state_unref(pd->pd_st);
+ if (pd->pd_m != NULL)
+ m_freem(pd->pd_m);
pool_put(&sc->sc_pool, pd);
}
void
+pfsync_undefer(struct pfsync_deferral *pd, int drop)
+{
+ struct pfsync_softc *sc = pfsyncif;
+
+ NET_ASSERT_LOCKED();
+
+ if (sc == NULL)
+ return;
+
+ CLR(pd->pd_st->state_flags, PFSTATE_ACK);
+ if (drop) {
+ m_freem(pd->pd_m);
+ pd->pd_m = NULL;
+ } else
+ pfsync_undefer_notify(pd);
+
+ pfsync_free_deferral(pd);
+}
+
+void
pfsync_defer_tmo(void *arg)
{
+ struct pfsync_softc *sc = pfsyncif;
+ struct pfsync_deferral *pd = arg;
+
+ mtx_enter(&sc->sc_deferrals_mtx);
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+ mtx_leave(&sc->sc_deferrals_mtx);
NET_LOCK();
- pfsync_undefer(arg, 0);
+ pfsync_undefer(pd, 0);
NET_UNLOCK();
}
@@ -1918,25 +2084,29 @@ pfsync_deferred(struct pf_state *st, int drop)
NET_ASSERT_LOCKED();
+ mtx_enter(&sc->sc_deferrals_mtx);
TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
if (pd->pd_st == st) {
- if (timeout_del(&pd->pd_tmo))
+ if (timeout_del(&pd->pd_tmo)) {
+ TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
+ sc->sc_deferred--;
+ mtx_leave(&sc->sc_deferrals_mtx);
pfsync_undefer(pd, drop);
+ } else
+ mtx_leave(&sc->sc_deferrals_mtx);
return;
}
}
-
- panic("pfsync_deferred: unable to find deferred state");
+ mtx_leave(&sc->sc_deferrals_mtx);
}
void
-pfsync_update_state_locked(struct pf_state *st)
+pfsync_update_state(struct pf_state *st)
{
struct pfsync_softc *sc = pfsyncif;
int sync = 0;
NET_ASSERT_LOCKED();
- PF_ASSERT_LOCKED();
if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
return;
@@ -1982,22 +2152,6 @@ pfsync_update_state_locked(struct pf_state *st)
}
void
-pfsync_update_state(struct pf_state *st, int *have_pf_lock)
-{
- struct pfsync_softc *sc = pfsyncif;
-
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
- return;
-
- if (*have_pf_lock == 0) {
- PF_LOCK();
- *have_pf_lock = 1;
- }
-
- pfsync_update_state_locked(st);
-}
-
-void
pfsync_cancel_full_update(struct pfsync_softc *sc)
{
if (timeout_pending(&sc->sc_bulkfail_tmo) ||
@@ -2049,7 +2203,7 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id)
{
struct pfsync_softc *sc = pfsyncif;
struct pfsync_upd_req_item *item;
- size_t nlen = sizeof(struct pfsync_upd_req);
+ size_t nlen, sc_len;
/*
* this code does nothing to prevent multiple update requests for the
@@ -2065,18 +2219,24 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id)
item->ur_msg.id = id;
item->ur_msg.creatorid = creatorid;
- if (TAILQ_EMPTY(&sc->sc_upd_req_list))
- nlen += sizeof(struct pfsync_subheader);
+ do {
+ mtx_enter(&sc->sc_upd_req_mtx);
- if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
- pfsync_sendout();
+ nlen = sizeof(struct pfsync_upd_req);
+ if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+ nlen += sizeof(struct pfsync_subheader);
- nlen = sizeof(struct pfsync_subheader) +
- sizeof(struct pfsync_upd_req);
- }
+ sc_len = atomic_add_long_nv(&sc->sc_len, nlen);
+ if (sc_len > sc->sc_if.if_mtu) {
+ atomic_sub_long(&sc->sc_len, nlen);
+ mtx_leave(&sc->sc_upd_req_mtx);
+ pfsync_sendout();
+ continue;
+ }
- TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
- sc->sc_len += nlen;
+ TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
+ mtx_leave(&sc->sc_upd_req_mtx);
+ } while (0);
schednetisr(NETISR_PFSYNC);
}
@@ -2202,7 +2362,7 @@ void
pfsync_q_ins(struct pf_state *st, int q)
{
struct pfsync_softc *sc = pfsyncif;
- size_t nlen = pfsync_qs[q].len;
+ size_t nlen, sc_len;
KASSERT(st->sync_state == PFSYNC_S_NONE);
@@ -2210,19 +2370,37 @@ pfsync_q_ins(struct pf_state *st, int q)
if (sc->sc_len < PFSYNC_MINPKT)
panic("pfsync pkt len is too low %zd", sc->sc_len);
#endif
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- nlen += sizeof(struct pfsync_subheader);
+ do {
+ mtx_enter(&sc->sc_mtx[q]);
- if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
- pfsync_sendout();
+ /*
+ * If two threads are competing to insert the same state, then
+ * there must be just single winner.
+ */
+ if (st->sync_state != PFSYNC_S_NONE) {
+ mtx_leave(&sc->sc_mtx[q]);
+ break;
+ }
- nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
- }
+ nlen = pfsync_qs[q].len;
- sc->sc_len += nlen;
- pf_state_ref(st);
- TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
- st->sync_state = q;
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ nlen += sizeof(struct pfsync_subheader);
+
+ sc_len = atomic_add_long_nv(&sc->sc_len, nlen);
+ if (sc_len > sc->sc_if.if_mtu) {
+ atomic_sub_long(&sc->sc_len, nlen);
+ mtx_leave(&sc->sc_mtx[q]);
+ pfsync_sendout();
+ continue;
+ }
+
+ pf_state_ref(st);
+
+ TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+ st->sync_state = q;
+ mtx_leave(&sc->sc_mtx[q]);
+ } while (0);
}
void
@@ -2233,39 +2411,48 @@ pfsync_q_del(struct pf_state *st)
KASSERT(st->sync_state != PFSYNC_S_NONE);
- sc->sc_len -= pfsync_qs[q].len;
+ mtx_enter(&sc->sc_mtx[q]);
+ atomic_sub_long(&sc->sc_len, pfsync_qs[q].len);
TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader));
+ mtx_leave(&sc->sc_mtx[q]);
+
st->sync_state = PFSYNC_S_NONE;
pf_state_unref(st);
-
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- sc->sc_len -= sizeof(struct pfsync_subheader);
}
void
pfsync_update_tdb(struct tdb *t, int output)
{
struct pfsync_softc *sc = pfsyncif;
- size_t nlen = sizeof(struct pfsync_tdb);
+ size_t nlen, sc_len;
if (sc == NULL)
return;
if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
- if (TAILQ_EMPTY(&sc->sc_tdb_q))
- nlen += sizeof(struct pfsync_subheader);
-
- if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
- pfsync_sendout();
+ do {
+ mtx_enter(&sc->sc_tdb_mtx);
+ nlen = sizeof(struct pfsync_tdb);
+
+ if (TAILQ_EMPTY(&sc->sc_tdb_q))
+ nlen += sizeof(struct pfsync_subheader);
+
+ sc_len = atomic_add_long_nv(&sc->sc_len, nlen);
+ if (sc_len > sc->sc_if.if_mtu) {
+ atomic_sub_long(&sc->sc_len, nlen);
+ mtx_leave(&sc->sc_tdb_mtx);
+ pfsync_sendout();
+ continue;
+ }
- nlen = sizeof(struct pfsync_subheader) +
- sizeof(struct pfsync_tdb);
- }
+ TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
+ mtx_leave(&sc->sc_tdb_mtx);
- sc->sc_len += nlen;
- TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
- SET(t->tdb_flags, TDBF_PFSYNC);
- t->tdb_updates = 0;
+ SET(t->tdb_flags, TDBF_PFSYNC);
+ t->tdb_updates = 0;
+ } while (0);
} else {
if (++t->tdb_updates >= sc->sc_maxupdates)
schednetisr(NETISR_PFSYNC);
@@ -2281,16 +2468,22 @@ void
pfsync_delete_tdb(struct tdb *t)
{
struct pfsync_softc *sc = pfsyncif;
+ size_t nlen;
if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
return;
- sc->sc_len -= sizeof(struct pfsync_tdb);
+ mtx_enter(&sc->sc_tdb_mtx);
+
TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
CLR(t->tdb_flags, TDBF_PFSYNC);
+ nlen = sizeof(struct pfsync_tdb);
if (TAILQ_EMPTY(&sc->sc_tdb_q))
- sc->sc_len -= sizeof(struct pfsync_subheader);
+ nlen += sizeof(struct pfsync_subheader);
+ atomic_sub_long(&sc->sc_len, nlen);
+
+ mtx_leave(&sc->sc_tdb_mtx);
}
void
@@ -2338,9 +2531,14 @@ pfsync_bulk_start(void)
else {
sc->sc_ureq_received = getuptime();
- if (sc->sc_bulk_next == NULL)
+ if (sc->sc_bulk_next == NULL) {
+ PF_STATE_ENTER_READ();
sc->sc_bulk_next = TAILQ_FIRST(&state_list);
+ pf_state_ref(sc->sc_bulk_next);
+ PF_STATE_EXIT_READ();
+ }
sc->sc_bulk_last = sc->sc_bulk_next;
+ pf_state_ref(sc->sc_bulk_last);
pfsync_bulk_status(PFSYNC_BUS_START);
timeout_add(&sc->sc_bulk_tmo, 0);
@@ -2351,7 +2549,7 @@ void
pfsync_bulk_update(void *arg)
{
struct pfsync_softc *sc;
- struct pf_state *st;
+ struct pf_state *st, *st_next;
int i = 0;
NET_LOCK();
@@ -2359,6 +2557,7 @@ pfsync_bulk_update(void *arg)
if (sc == NULL)
goto out;
st = sc->sc_bulk_next;
+ sc->sc_bulk_next = NULL;
for (;;) {
if (st->sync_state == PFSYNC_S_NONE &&
@@ -2368,13 +2567,23 @@ pfsync_bulk_update(void *arg)
i++;
}
- st = TAILQ_NEXT(st, entry_list);
+ /*
+ * I wonder how we prevent infinite bulk update. IMO it can
+ * happen when sc_bulk_last state expires before we iterate
+ * through the whole list.
+ */
+ PF_STATE_ENTER_READ();
+ st_next = TAILQ_NEXT(st, entry_list);
+ pf_state_unref(st);
+ st = st_next;
if (st == NULL)
st = TAILQ_FIRST(&state_list);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
- if (st == sc->sc_bulk_last) {
+ if ((st == NULL) || (st == sc->sc_bulk_last)) {
/* we're done */
- sc->sc_bulk_next = NULL;
+ pf_state_unref(sc->sc_bulk_last);
sc->sc_bulk_last = NULL;
pfsync_bulk_status(PFSYNC_BUS_END);
break;
@@ -2497,9 +2706,7 @@ void
pfsync_timeout(void *arg)
{
NET_LOCK();
- PF_LOCK();
pfsync_sendout();
- PF_UNLOCK();
NET_UNLOCK();
}
@@ -2507,9 +2714,7 @@ pfsync_timeout(void *arg)
void
pfsyncintr(void)
{
- PF_LOCK();
pfsync_sendout();
- PF_UNLOCK();
}
int
diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h
index 3c3b7dd90f6..2c9bedcc920 100644
--- a/sys/net/if_pfsync.h
+++ b/sys/net/if_pfsync.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: if_pfsync.h,v 1.54 2018/09/11 07:53:38 sashan Exp $ */
+/* $OpenBSD: if_pfsync.h,v 1.55 2021/02/04 00:55:41 sashan Exp $ */
/*
* Copyright (c) 2001 Michael Shalayeff
@@ -328,7 +328,7 @@ void pfsync_state_export(struct pfsync_state *,
struct pf_state *);
void pfsync_insert_state(struct pf_state *);
-void pfsync_update_state(struct pf_state *, int *);
+void pfsync_update_state(struct pf_state *);
void pfsync_delete_state(struct pf_state *);
void pfsync_clear_states(u_int32_t, const char *);
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 44e0e17f757..74ec0aa6278 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pf.c,v 1.1107 2021/02/03 07:41:12 dlg Exp $ */
+/* $OpenBSD: pf.c,v 1.1108 2021/02/04 00:55:41 sashan Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -6935,7 +6935,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
PF_STATE_EXIT_READ();
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
- pfsync_update_state(s, &have_pf_lock);
+ pfsync_update_state(s);
#endif /* NPFSYNC > 0 */
r = s->rule.ptr;
a = s->anchor.ptr;
@@ -6967,7 +6967,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
PF_STATE_EXIT_READ();
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
- pfsync_update_state(s, &have_pf_lock);
+ pfsync_update_state(s);
#endif /* NPFSYNC > 0 */
r = s->rule.ptr;
a = s->anchor.ptr;
@@ -7043,7 +7043,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
if (action == PF_PASS || action == PF_AFRT) {
#if NPFSYNC > 0
- pfsync_update_state(s, &have_pf_lock);
+ pfsync_update_state(s);
#endif /* NPFSYNC > 0 */
r = s->rule.ptr;
a = s->anchor.ptr;