summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/kern/kern_sysctl.c5
-rw-r--r--sys/kern/vfs_bio.c114
-rw-r--r--sys/sys/mount.h8
-rw-r--r--sys/uvm/uvm_pdaemon.c14
4 files changed, 126 insertions, 15 deletions
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 19112eff659..c1a0d54f5a1 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sysctl.c,v 1.170 2009/06/03 21:30:20 beck Exp $ */
+/* $OpenBSD: kern_sysctl.c,v 1.171 2009/06/05 04:29:14 beck Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
@@ -548,7 +548,8 @@ kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
}
if (bufcachepercent != opct) {
pgs = bufcachepercent * physmem / 100;
- bufadjust(pgs);
+ bufadjust(pgs); /* adjust bufpages */
+ bufhighpages = bufpages; /* set high water mark */
}
return(0);
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c106a1a1b36..8e900669145 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,7 +1,7 @@
-/* $OpenBSD: vfs_bio.c,v 1.114 2009/06/03 21:30:20 beck Exp $ */
+/* $OpenBSD: vfs_bio.c,v 1.115 2009/06/05 04:29:14 beck Exp $ */
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
-/*-
+/*
* Copyright (c) 1994 Christopher G. Demetriou
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -109,6 +109,9 @@ long hidirtypages;
long locleanpages;
long hicleanpages;
long maxcleanpages;
+long backoffpages; /* backoff counter for page allocations */
+long buflowpages; /* bufpages low water mark */
+long bufhighpages; /* bufpages high water mark */
/* XXX - should be defined here. */
extern int bufcachepercent;
@@ -170,6 +173,11 @@ buf_put(struct buf *bp)
LIST_REMOVE(bp, b_list);
bcstats.numbufs--;
+ if (backoffpages) {
+ backoffpages -= atop(bp->b_bufsize);
+ if (backoffpages < 0)
+ backoffpages = 0;
+ }
if (buf_dealloc_mem(bp) != 0)
return;
@@ -185,7 +193,7 @@ bufinit(void)
struct bqueues *dp;
/* XXX - for now */
- bufpages = bufcachepercent = bufkvm = 0;
+ bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0;
/*
* If MD code doesn't say otherwise, use 10% of kvm for mappings and
@@ -196,6 +204,16 @@ bufinit(void)
if (bufpages == 0)
bufpages = physmem * bufcachepercent / 100;
+ bufhighpages = bufpages;
+
+ /*
+ * set the base backoff level for the buffer cache to bufpages.
+ * we will not allow uvm to steal back more than this number of
+ * pages
+ */
+ buflowpages = physmem * 10 / 100;
+
+
if (bufkvm == 0)
bufkvm = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 10;
@@ -245,7 +263,10 @@ bufadjust(int newbufpages)
* XXX - note, bufkvm was allocated once, based on 10% of physmem
* see above.
*/
+ struct buf *bp;
+ int s;
+ s = splbio();
bufpages = newbufpages;
hidirtypages = (bufpages / 4) * 3;
@@ -259,8 +280,76 @@ bufadjust(int newbufpages)
locleanpages = bufpages - (bufpages / 10);
maxcleanpages = locleanpages;
+
+ /*
+ * If we we have more buffers allocated than bufpages,
+ * free them up to get back down. this may possibly consume
+ * all our clean pages...
+ */
+ while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) &&
+ (bcstats.numbufpages > bufpages)) {
+ bremfree(bp);
+ if (bp->b_vp) {
+ RB_REMOVE(buf_rb_bufs,
+ &bp->b_vp->v_bufs_tree, bp);
+ brelvp(bp);
+ }
+ buf_put(bp);
+ }
+
+ /*
+ * Wake up cleaner if we're getting low on pages. We might
+ * now have too much dirty, or have fallen below our low
+ * water mark on clean pages so we need to free more stuff
+ * up.
+ */
+ if (bcstats.numdirtypages >= hidirtypages ||
+ bcstats.numcleanpages <= locleanpages)
+ wakeup(&bd_req);
+
+ /*
+ * if immediate action has not freed up enough goo for us
+ * to proceed - we tsleep and wait for the cleaner above
+ * to do it's work and get us reduced down to sanity.
+ */
+ while (bcstats.numbufpages > bufpages) {
+ tsleep(&needbuffer, PRIBIO, "needbuffer", 0);
+ }
+ splx(s);
}
+/*
+ * Make the buffer cache back off from cachepct.
+ */
+int
+bufbackoff()
+{
+ /*
+ * Back off the amount of buffer cache pages. Called by the page
+ * daemon to consume buffer cache pages rather than swapping.
+ *
+ * On success, it frees N pages from the buffer cache, and sets
+ * a flag so that the next N allocations from buf_get will recycle
+ * a buffer rather than allocate a new one. It then returns 0 to the
+ * caller.
+ *
+ * on failure, it could free no pages from the buffer cache, does
+ * nothing and returns -1 to the caller.
+ */
+ long d;
+
+ if (bufpages <= buflowpages)
+ return(-1);
+
+ if (bufpages - BACKPAGES >= buflowpages)
+ d = BACKPAGES;
+ else
+ d = bufpages - buflowpages;
+ backoffpages = BACKPAGES;
+ bufadjust(bufpages - d);
+ backoffpages = BACKPAGES;
+ return(0);
+}
struct buf *
bio_doread(struct vnode *vp, daddr64_t blkno, int size, int async)
@@ -862,11 +951,24 @@ geteblk(int size)
struct buf *
buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
{
+ static int gcount = 0;
struct buf *bp;
int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK;
int npages;
int s;
+ /*
+ * if we were previously backed off, slowly climb back up
+ * to the high water mark again.
+ */
+ if ((backoffpages == 0) && (bufpages < bufhighpages)) {
+ if ( gcount == 0 ) {
+ bufadjust(bufpages + BACKPAGES);
+ gcount += BACKPAGES;
+ } else
+ gcount--;
+ }
+
s = splbio();
if (size) {
/*
@@ -898,7 +1000,8 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
/*
* Free some buffers until we have enough space.
*/
- while (bcstats.numbufpages + npages > bufpages) {
+ while ((bcstats.numbufpages + npages > bufpages)
+ || backoffpages) {
int freemax = 5;
int i = freemax;
while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) {
@@ -910,7 +1013,8 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size)
}
buf_put(bp);
}
- if (freemax == i) {
+ if (freemax == i &&
+ (bcstats.numbufpages + npages > bufpages)) {
needbuffer++;
tsleep(&needbuffer, PRIBIO, "needbuffer", 0);
splx(s);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 5796fda878f..402a926e398 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mount.h,v 1.93 2009/06/03 21:30:20 beck Exp $ */
+/* $OpenBSD: mount.h,v 1.94 2009/06/05 04:29:14 beck Exp $ */
/* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */
/*
@@ -504,10 +504,14 @@ struct bcachestats {
int64_t cachehits; /* total reads found in cache */
};
#ifdef _KERNEL
+#define BACKPAGES 100
extern struct bcachestats bcstats;
-#define BUFPAGES_DEFICIT (bufpages - bcstats.numbufpages)
+extern long buflowpages, bufhighpages;
+#define BUFPAGES_DEFICIT (((buflowpages - bcstats.numbufpages) < 0) ? 0 \
+ : buflowpages - bcstats.numbufpages)
extern int bufcachepercent;
extern void bufadjust(int);
+extern int bufbackoff(void);
#endif
/*
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index 3079110cb1c..2e1d2de709f 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pdaemon.c,v 1.45 2009/06/01 19:54:02 oga Exp $ */
+/* $OpenBSD: uvm_pdaemon.c,v 1.46 2009/06/05 04:29:14 beck Exp $ */
/* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
/*
@@ -214,8 +214,8 @@ uvm_pageout(void *arg)
for (;;) {
uvm_lock_fpageq();
UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
- msleep(&uvm.pagedaemon_proc, &uvm.fpageqlock, PVM | PNORELOCK,
- "pgdaemon", 0);
+ msleep(&uvm.pagedaemon_proc, &uvm.fpageqlock,
+ PVM | PNORELOCK, "pgdaemon", 0);
uvmexp.pdwoke++;
UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
@@ -239,11 +239,13 @@ uvm_pageout(void *arg)
uvmexp.inactarg);
/*
- * scan if needed
+ * get pages from the buffer cache, or scan if needed
*/
- if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg ||
- uvmexp.inactive < uvmexp.inactarg) {
+ if (uvmexp.inactive < uvmexp.inactarg)
uvmpd_scan();
+ else if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) {
+ if (bufbackoff() == -1)
+ uvmpd_scan();
}
/*