diff options
-rw-r--r-- | sys/kern/kern_sysctl.c | 5 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 114 | ||||
-rw-r--r-- | sys/sys/mount.h | 8 | ||||
-rw-r--r-- | sys/uvm/uvm_pdaemon.c | 14 |
4 files changed, 126 insertions, 15 deletions
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 19112eff659..c1a0d54f5a1 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.170 2009/06/03 21:30:20 beck Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.171 2009/06/05 04:29:14 beck Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -548,7 +548,8 @@ kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, } if (bufcachepercent != opct) { pgs = bufcachepercent * physmem / 100; - bufadjust(pgs); + bufadjust(pgs); /* adjust bufpages */ + bufhighpages = bufpages; /* set high water mark */ } return(0); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c106a1a1b36..8e900669145 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,7 +1,7 @@ -/* $OpenBSD: vfs_bio.c,v 1.114 2009/06/03 21:30:20 beck Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.115 2009/06/05 04:29:14 beck Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ -/*- +/* * Copyright (c) 1994 Christopher G. Demetriou * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -109,6 +109,9 @@ long hidirtypages; long locleanpages; long hicleanpages; long maxcleanpages; +long backoffpages; /* backoff counter for page allocations */ +long buflowpages; /* bufpages low water mark */ +long bufhighpages; /* bufpages high water mark */ /* XXX - should be defined here. */ extern int bufcachepercent; @@ -170,6 +173,11 @@ buf_put(struct buf *bp) LIST_REMOVE(bp, b_list); bcstats.numbufs--; + if (backoffpages) { + backoffpages -= atop(bp->b_bufsize); + if (backoffpages < 0) + backoffpages = 0; + } if (buf_dealloc_mem(bp) != 0) return; @@ -185,7 +193,7 @@ bufinit(void) struct bqueues *dp; /* XXX - for now */ - bufpages = bufcachepercent = bufkvm = 0; + bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0; /* * If MD code doesn't say otherwise, use 10% of kvm for mappings and @@ -196,6 +204,16 @@ bufinit(void) if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; + bufhighpages = bufpages; + + /* + * set the base backoff level for the buffer cache to bufpages. + * we will not allow uvm to steal back more than this number of + * pages + */ + buflowpages = physmem * 10 / 100; + + if (bufkvm == 0) bufkvm = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 10; @@ -245,7 +263,10 @@ bufadjust(int newbufpages) * XXX - note, bufkvm was allocated once, based on 10% of physmem * see above. */ + struct buf *bp; + int s; + s = splbio(); bufpages = newbufpages; hidirtypages = (bufpages / 4) * 3; @@ -259,8 +280,76 @@ bufadjust(int newbufpages) locleanpages = bufpages - (bufpages / 10); maxcleanpages = locleanpages; + + /* + * If we we have more buffers allocated than bufpages, + * free them up to get back down. this may possibly consume + * all our clean pages... + */ + while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && + (bcstats.numbufpages > bufpages)) { + bremfree(bp); + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); + brelvp(bp); + } + buf_put(bp); + } + + /* + * Wake up cleaner if we're getting low on pages. We might + * now have too much dirty, or have fallen below our low + * water mark on clean pages so we need to free more stuff + * up. + */ + if (bcstats.numdirtypages >= hidirtypages || + bcstats.numcleanpages <= locleanpages) + wakeup(&bd_req); + + /* + * if immediate action has not freed up enough goo for us + * to proceed - we tsleep and wait for the cleaner above + * to do it's work and get us reduced down to sanity. + */ + while (bcstats.numbufpages > bufpages) { + tsleep(&needbuffer, PRIBIO, "needbuffer", 0); + } + splx(s); } +/* + * Make the buffer cache back off from cachepct. + */ +int +bufbackoff() +{ + /* + * Back off the amount of buffer cache pages. Called by the page + * daemon to consume buffer cache pages rather than swapping. + * + * On success, it frees N pages from the buffer cache, and sets + * a flag so that the next N allocations from buf_get will recycle + * a buffer rather than allocate a new one. It then returns 0 to the + * caller. + * + * on failure, it could free no pages from the buffer cache, does + * nothing and returns -1 to the caller. + */ + long d; + + if (bufpages <= buflowpages) + return(-1); + + if (bufpages - BACKPAGES >= buflowpages) + d = BACKPAGES; + else + d = bufpages - buflowpages; + backoffpages = BACKPAGES; + bufadjust(bufpages - d); + backoffpages = BACKPAGES; + return(0); +} struct buf * bio_doread(struct vnode *vp, daddr64_t blkno, int size, int async) @@ -862,11 +951,24 @@ geteblk(int size) struct buf * buf_get(struct vnode *vp, daddr64_t blkno, size_t size) { + static int gcount = 0; struct buf *bp; int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK; int npages; int s; + /* + * if we were previously backed off, slowly climb back up + * to the high water mark again. + */ + if ((backoffpages == 0) && (bufpages < bufhighpages)) { + if ( gcount == 0 ) { + bufadjust(bufpages + BACKPAGES); + gcount += BACKPAGES; + } else + gcount--; + } + s = splbio(); if (size) { /* @@ -898,7 +1000,8 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) /* * Free some buffers until we have enough space. */ - while (bcstats.numbufpages + npages > bufpages) { + while ((bcstats.numbufpages + npages > bufpages) + || backoffpages) { int freemax = 5; int i = freemax; while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) { @@ -910,7 +1013,8 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) } buf_put(bp); } - if (freemax == i) { + if (freemax == i && + (bcstats.numbufpages + npages > bufpages)) { needbuffer++; tsleep(&needbuffer, PRIBIO, "needbuffer", 0); splx(s); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 5796fda878f..402a926e398 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mount.h,v 1.93 2009/06/03 21:30:20 beck Exp $ */ +/* $OpenBSD: mount.h,v 1.94 2009/06/05 04:29:14 beck Exp $ */ /* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */ /* @@ -504,10 +504,14 @@ struct bcachestats { int64_t cachehits; /* total reads found in cache */ }; #ifdef _KERNEL +#define BACKPAGES 100 extern struct bcachestats bcstats; -#define BUFPAGES_DEFICIT (bufpages - bcstats.numbufpages) +extern long buflowpages, bufhighpages; +#define BUFPAGES_DEFICIT (((buflowpages - bcstats.numbufpages) < 0) ? 0 \ + : buflowpages - bcstats.numbufpages) extern int bufcachepercent; extern void bufadjust(int); +extern int bufbackoff(void); #endif /* diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index 3079110cb1c..2e1d2de709f 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pdaemon.c,v 1.45 2009/06/01 19:54:02 oga Exp $ */ +/* $OpenBSD: uvm_pdaemon.c,v 1.46 2009/06/05 04:29:14 beck Exp $ */ /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ /* @@ -214,8 +214,8 @@ uvm_pageout(void *arg) for (;;) { uvm_lock_fpageq(); UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); - msleep(&uvm.pagedaemon_proc, &uvm.fpageqlock, PVM | PNORELOCK, - "pgdaemon", 0); + msleep(&uvm.pagedaemon_proc, &uvm.fpageqlock, + PVM | PNORELOCK, "pgdaemon", 0); uvmexp.pdwoke++; UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); @@ -239,11 +239,13 @@ uvm_pageout(void *arg) uvmexp.inactarg); /* - * scan if needed + * get pages from the buffer cache, or scan if needed */ - if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg || - uvmexp.inactive < uvmexp.inactarg) { + if (uvmexp.inactive < uvmexp.inactarg) uvmpd_scan(); + else if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) { + if (bufbackoff() == -1) + uvmpd_scan(); } /* |