summaryrefslogtreecommitdiff
path: root/sys/net/pf.c
diff options
context:
space:
mode:
authorChristopher Pascoe <pascoe@cvs.openbsd.org>2005-09-28 01:46:34 +0000
committerChristopher Pascoe <pascoe@cvs.openbsd.org>2005-09-28 01:46:34 +0000
commit9e85691e2f25640cac5ed32edb748113d5661501 (patch)
tree02af9059355ca7106f8f309f38b0a3e967e00239 /sys/net/pf.c
parentecd5ab2aa9b09707fe715c0e486dbdb021636ffe (diff)
Improve the safety of pf IOCTLs, taking into account that some paths can sleep.
- Introduces a rw_lock in pfioctl so that we can have concurrent readers but only one process performing updates at a time; - Separates state expiry into "unlink" and "free" parts; anyone can unlink a state/src node from the RB trees at any time, but a state can only be freed whilst the write lock is held; - Converts state_updates into list state_list containing all states, regardless of whether they are "linked" or "unlinked"; - Introduces a new PFTM_UNLINKED state that is used on the "unlinked" states to signal that they can be freed; - Converts pf_purge_expired_state to an "unlink" state routine, which only unlinks the state from the RB trees. Freeing the state/src nodes is left to the purge thread, which runs whilst holding a write lock, such that all "next" references remain valid; - Converts pfsync_bulk_update and DIOCGETSTATES to walk state_list rather than the RB trees; - Converts the purge thread to use the new state_list and perform a partial purge every second, with the target rate a full state table walk every PFTM_INTERVAL seconds. seen by mcbride, henning, dhartmei pre-3.8, but too intrusive for then
Diffstat (limited to 'sys/net/pf.c')
-rw-r--r--sys/net/pf.c108
1 files changed, 86 insertions, 22 deletions
diff --git a/sys/net/pf.c b/sys/net/pf.c
index bdf1480471d..11d098bab0a 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pf.c,v 1.502 2005/08/22 11:54:25 dhartmei Exp $ */
+/* $OpenBSD: pf.c,v 1.503 2005/09/28 01:46:32 pascoe Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@@ -49,6 +49,7 @@
#include <sys/time.h>
#include <sys/pool.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -288,7 +289,7 @@ static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *);
struct pf_src_tree tree_src_tracking;
struct pf_state_tree_id tree_id;
-struct pf_state_queue state_updates;
+struct pf_state_queue state_list;
RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
RB_GENERATE(pf_state_tree_lan_ext, pf_state,
@@ -848,8 +849,7 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
return (-1);
}
- TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
-
+ TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
pf_status.fcounters[FCNT_STATE_INSERT]++;
pf_status.states++;
pfi_kif_ref(kif, PFI_KIF_REF_STATE);
@@ -862,15 +862,24 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
void
pf_purge_thread(void *v)
{
- int s;
+ int nloops = 0, s;
for (;;) {
- tsleep(pf_purge_thread, PWAIT, "pftm",
- pf_default_rule.timeout[PFTM_INTERVAL] * hz);
+ tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
+
s = splsoftnet();
- pf_purge_expired_states();
- pf_purge_expired_fragments();
- pf_purge_expired_src_nodes();
+
+ /* process a fraction of the state table every second */
+ pf_purge_expired_states(1 + (pf_status.states
+ / pf_default_rule.timeout[PFTM_INTERVAL]));
+
+ /* purge other expired types every PFTM_INTERVAL seconds */
+ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+ pf_purge_expired_fragments();
+ pf_purge_expired_src_nodes(0);
+ nloops = 0;
+ }
+
splx(s);
}
}
@@ -888,6 +897,7 @@ pf_state_expires(const struct pf_state *state)
return (time_second);
if (state->timeout == PFTM_UNTIL_PACKET)
return (0);
+ KASSERT(state->timeout != PFTM_UNLINKED);
KASSERT(state->timeout < PFTM_MAX);
timeout = state->rule.ptr->timeout[state->timeout];
if (!timeout)
@@ -912,14 +922,21 @@ pf_state_expires(const struct pf_state *state)
}
void
-pf_purge_expired_src_nodes(void)
+pf_purge_expired_src_nodes(int waslocked)
{
struct pf_src_node *cur, *next;
+ int locked = waslocked;
for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
if (cur->states <= 0 && cur->expire <= time_second) {
+ if (! locked) {
+ rw_enter_write(&pf_consistency_lock);
+ next = RB_NEXT(pf_src_tree,
+ &tree_src_tracking, cur);
+ locked = 1;
+ }
if (cur->rule.ptr != NULL) {
cur->rule.ptr->src_nodes--;
if (cur->rule.ptr->states <= 0 &&
@@ -932,6 +949,9 @@ pf_purge_expired_src_nodes(void)
pool_put(&pf_src_tree_pl, cur);
}
}
+
+ if (locked && !waslocked)
+ rw_exit_write(&pf_consistency_lock);
}
void
@@ -964,8 +984,9 @@ pf_src_tree_remove_state(struct pf_state *s)
s->src_node = s->nat_src_node = NULL;
}
+/* callers should be at splsoftnet */
void
-pf_purge_expired_state(struct pf_state *cur)
+pf_unlink_state(struct pf_state *cur)
{
if (cur->src.state == PF_TCPS_PROXY_DST)
pf_send_tcp(cur->rule.ptr, cur->af,
@@ -979,9 +1000,24 @@ pf_purge_expired_state(struct pf_state *cur)
&cur->u.s.kif->pfik_lan_ext, cur);
RB_REMOVE(pf_state_tree_id, &tree_id, cur);
#if NPFSYNC
- pfsync_delete_state(cur);
+ if (cur->creatorid == pf_status.hostid)
+ pfsync_delete_state(cur);
#endif
+ cur->timeout = PFTM_UNLINKED;
pf_src_tree_remove_state(cur);
+}
+
+/* callers should be at splsoftnet and hold the
+ * write_lock on pf_consistency_lock */
+void
+pf_free_state(struct pf_state *cur)
+{
+#if NPFSYNC
+ if (pfsyncif.sc_bulk_send_next == cur ||
+ pfsyncif.sc_bulk_terminator == cur)
+ return;
+#endif
+ KASSERT(cur->timeout == PFTM_UNLINKED);
if (--cur->rule.ptr->states <= 0 &&
cur->rule.ptr->src_nodes <= 0)
pf_rm_rule(NULL, cur->rule.ptr);
@@ -994,7 +1030,7 @@ pf_purge_expired_state(struct pf_state *cur)
pf_rm_rule(NULL, cur->anchor.ptr);
pf_normalize_tcp_cleanup(cur);
pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
- TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
+ TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
if (cur->tag)
pf_tag_unref(cur->tag);
pool_put(&pf_state_pl, cur);
@@ -1003,16 +1039,44 @@ pf_purge_expired_state(struct pf_state *cur)
}
void
-pf_purge_expired_states(void)
+pf_purge_expired_states(u_int32_t maxcheck)
{
- struct pf_state *cur, *next;
-
- for (cur = RB_MIN(pf_state_tree_id, &tree_id);
- cur; cur = next) {
- next = RB_NEXT(pf_state_tree_id, &tree_id, cur);
- if (pf_state_expires(cur) <= time_second)
- pf_purge_expired_state(cur);
+ static struct pf_state *cur = NULL;
+ struct pf_state *next;
+ int locked = 0;
+
+ while (maxcheck--) {
+ /* wrap to start of list when we hit the end */
+ if (cur == NULL) {
+ cur = TAILQ_FIRST(&state_list);
+ if (cur == NULL)
+ break; /* list empty */
+ }
+
+ /* get next state, as cur may get deleted */
+ next = TAILQ_NEXT(cur, u.s.entry_list);
+
+ if (cur->timeout == PFTM_UNLINKED) {
+ /* free unlinked state */
+ if (! locked) {
+ rw_enter_write(&pf_consistency_lock);
+ locked = 1;
+ }
+ pf_free_state(cur);
+ } else if (pf_state_expires(cur) <= time_second) {
+ /* unlink and free expired state */
+ pf_unlink_state(cur);
+ if (! locked) {
+ rw_enter_write(&pf_consistency_lock);
+ locked = 1;
+ }
+ pf_free_state(cur);
+ }
+ cur = next;
}
+
+ if (locked)
+ rw_exit_write(&pf_consistency_lock);
}
int