summaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
authorTed Unangst <tedu@cvs.openbsd.org>2008-10-23 23:54:03 +0000
committerTed Unangst <tedu@cvs.openbsd.org>2008-10-23 23:54:03 +0000
commita589fbadefc286c5b9f79ad996596a1006bcdea3 (patch)
tree11917cdb46abf003114075e269bb90c39a4f349e /sys/kern
parent8cf2bd98977117b4f40fc8cc454c9784708fb9a0 (diff)
a better fix for the "uvm_km thread runs out of memory" problem.
add a new arg to the backend so it can tell pool to slow down. when we get this flag, yield *after* putting the page in the pool's free list. whatever we do, don't let the thread sleep. this makes things better by still letting the thread run when a huge pf request comes in, but without artificially increasing pressure on the backend by eating pages without feeding them forward. ok deraadt
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/subr_pool.c58
-rw-r--r--sys/kern/vfs_cache.c18
2 files changed, 52 insertions, 24 deletions
diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c
index 8ee2f318845..fc01bd3e366 100644
--- a/sys/kern/subr_pool.c
+++ b/sys/kern/subr_pool.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: subr_pool.c,v 1.62 2008/06/26 05:42:20 ray Exp $ */
+/* $OpenBSD: subr_pool.c,v 1.63 2008/10/23 23:54:02 tedu Exp $ */
/* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
/*-
@@ -85,6 +85,7 @@ struct pool_item {
#endif
/* Other entries use only this list entry */
TAILQ_ENTRY(pool_item) pi_list;
+ int pi_fillstart;
};
#define POOL_NEEDS_CATCHUP(pp) \
@@ -106,7 +107,7 @@ void pr_rmpage(struct pool *, struct pool_item_header *,
int pool_chk_page(struct pool *, const char *, struct pool_item_header *);
struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int);
-void *pool_allocator_alloc(struct pool *, int);
+void *pool_allocator_alloc(struct pool *, int, int *);
void pool_allocator_free(struct pool *, void *);
#ifdef DDB
@@ -392,6 +393,7 @@ pool_do_get(struct pool *pp, int flags)
struct pool_item *pi;
struct pool_item_header *ph;
void *v;
+ int slowdown = 0;
#ifdef DIAGNOSTIC
if ((flags & PR_WAITOK) != 0)
@@ -462,7 +464,7 @@ startover:
/*
* Call the back-end page allocator for more memory.
*/
- v = pool_allocator_alloc(pp, flags);
+ v = pool_allocator_alloc(pp, flags, &slowdown);
if (__predict_true(v != NULL))
ph = pool_alloc_item_header(pp, v, flags);
@@ -490,6 +492,12 @@ startover:
pool_prime_page(pp, v, ph);
pp->pr_npagealloc++;
+ if (slowdown && (flags & PR_WAITOK)) {
+ mtx_leave(&pp->pr_mtx);
+ yield();
+ mtx_enter(&pp->pr_mtx);
+ }
+
/* Start the allocation process over. */
goto startover;
}
@@ -510,6 +518,19 @@ startover:
" item addr %p",
pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
}
+ {
+ int i, *ip = v;
+
+ for (i = 0; i < pp->pr_size / sizeof(int); i++) {
+ if (++ip < &pi->pi_fillstart)
+ continue;
+ if (*ip != PI_MAGIC) {
+ panic("pool_do_get(%s): free list modified: magic=%x; page %p;"
+ " item addr %p",
+ pp->pr_wchan, *ip, ph->ph_page, pi);
+ }
+ }
+ }
#endif
/*
@@ -615,9 +636,6 @@ pool_do_put(struct pool *pp, void *v)
* Return to item list.
*/
#ifdef DIAGNOSTIC
- pi->pi_magic = PI_MAGIC;
-#endif
-#ifdef DEBUG
{
int i, *ip = v;
@@ -625,9 +643,10 @@ pool_do_put(struct pool *pp, void *v)
*ip++ = PI_MAGIC;
}
}
+ pi->pi_magic = PI_MAGIC;
#endif
- TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
+ TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
ph->ph_nmissing--;
pp->pr_nitems++;
pp->pr_nout--;
@@ -688,12 +707,13 @@ pool_prime(struct pool *pp, int n)
struct pool_item_header *ph;
caddr_t cp;
int newpages;
+ int slowdown;
mtx_enter(&pp->pr_mtx);
newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
while (newpages-- > 0) {
- cp = pool_allocator_alloc(pp, PR_NOWAIT);
+ cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
if (__predict_true(cp != NULL))
ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
if (__predict_false(cp == NULL || ph == NULL)) {
@@ -772,6 +792,13 @@ pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
/* Insert on page list */
TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
#ifdef DIAGNOSTIC
+ {
+ int i, *ip = (int *)pi;
+
+ for (i = 0; i < pp->pr_size / sizeof(int); i++) {
+ *ip++ = PI_MAGIC;
+ }
+ }
pi->pi_magic = PI_MAGIC;
#endif
cp = (caddr_t)(cp + pp->pr_size);
@@ -799,12 +826,13 @@ pool_catchup(struct pool *pp)
struct pool_item_header *ph;
caddr_t cp;
int error = 0;
+ int slowdown;
while (POOL_NEEDS_CATCHUP(pp)) {
/*
* Call the page back-end allocator for more memory.
*/
- cp = pool_allocator_alloc(pp, PR_NOWAIT);
+ cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown);
if (__predict_true(cp != NULL))
ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
if (__predict_false(cp == NULL || ph == NULL)) {
@@ -1249,9 +1277,7 @@ sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep)
*
* Each pool has a backend allocator that handles allocation, deallocation
*/
-void *pool_page_alloc_oldnointr(struct pool *, int);
-void pool_page_free_oldnointr(struct pool *, void *);
-void *pool_page_alloc(struct pool *, int);
+void *pool_page_alloc(struct pool *, int, int *);
void pool_page_free(struct pool *, void *);
/*
@@ -1278,14 +1304,14 @@ struct pool_allocator pool_allocator_nointr = {
*/
void *
-pool_allocator_alloc(struct pool *pp, int flags)
+pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
{
boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
void *v;
if (waitok)
mtx_leave(&pp->pr_mtx);
- v = pp->pr_alloc->pa_alloc(pp, flags);
+ v = pp->pr_alloc->pa_alloc(pp, flags, slowdown);
if (waitok)
mtx_enter(&pp->pr_mtx);
@@ -1301,11 +1327,11 @@ pool_allocator_free(struct pool *pp, void *v)
}
void *
-pool_page_alloc(struct pool *pp, int flags)
+pool_page_alloc(struct pool *pp, int flags, int *slowdown)
{
boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
- return (uvm_km_getpage(waitok));
+ return (uvm_km_getpage(waitok, slowdown));
}
void
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 9356cb32039..41b8263819d 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_cache.c,v 1.27 2008/10/20 20:33:07 deraadt Exp $ */
+/* $OpenBSD: vfs_cache.c,v 1.28 2008/10/23 23:54:02 tedu Exp $ */
/* $NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $ */
/*
@@ -162,6 +162,9 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
goto remove;
}
+ /* remove from lru now to prevent races */
+ TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+
vp = ncp->nc_vp;
vpid = vp->v_id;
if (vp == dvp) { /* lookup on "." */
@@ -178,6 +181,8 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0) {
vput(vp);
+ /* parent has permanent issues; recycle */
+ TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
return (error);
}
cnp->cn_flags &= ~PDIRUNLOCK;
@@ -204,6 +209,8 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
nchstats.ncs_falsehits++;
} else
nchstats.ncs_badhits++;
+ /* cache entry is stale; recycle */
+ TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
/*
* The parent needs to be locked when we return to VOP_LOOKUP().
* The `.' case here should be extremely rare (if it can happen
@@ -219,13 +226,8 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
}
nchstats.ncs_goodhits++;
- /*
- * Move this slot to end of LRU chain, if not already there.
- */
- if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
- }
+ /* cache entry is valid; keep it */
+ TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
*vpp = vp;
return (0);