diff options
author | Bob Beck <beck@cvs.openbsd.org> | 2009-08-02 16:28:41 +0000 |
---|---|---|
committer | Bob Beck <beck@cvs.openbsd.org> | 2009-08-02 16:28:41 +0000 |
commit | dd3b8eacde77cc7df789cf96b6e56355467f18c5 (patch) | |
tree | 9c55348b269a51c1c38df3aa7b10ceaaae481bc7 /sys | |
parent | d39d8390343be0240365f612fe409018c18f13d5 (diff) |
Dynamic buffer cache support - a re-commit of what was backed out
after c2k9
allows buffer cache to be extended and grow/shrink dynamically
tested by many, ok oga@, "why not just commit it" deraadt@
Diffstat (limited to 'sys')
26 files changed, 241 insertions, 174 deletions
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 14d257736fb..c1745a0f857 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.97 2009/07/22 20:33:12 deraadt Exp $ */ +/* $OpenBSD: machdep.c,v 1.98 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */ /*- @@ -399,12 +399,6 @@ setup_buffers() */ if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; } /* diff --git a/sys/arch/aviion/aviion/machdep.c b/sys/arch/aviion/aviion/machdep.c index ebff8ba9465..688d3e06389 100644 --- a/sys/arch/aviion/aviion/machdep.c +++ b/sys/arch/aviion/aviion/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.29 2009/06/15 17:01:25 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.30 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 2007 Miodrag Vallat. * @@ -293,12 +293,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/hp300/hp300/machdep.c b/sys/arch/hp300/hp300/machdep.c index c4b391071c6..8661452c4f8 100644 --- a/sys/arch/hp300/hp300/machdep.c +++ b/sys/arch/hp300/hp300/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.122 2009/06/15 17:01:25 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.123 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.121 1999/03/26 23:41:29 mycroft Exp $ */ /* @@ -290,12 +290,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/hppa/hppa/machdep.c b/sys/arch/hppa/hppa/machdep.c index d7053f805f0..68858a6f1bd 100644 --- a/sys/arch/hppa/hppa/machdep.c +++ b/sys/arch/hppa/hppa/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.174 2009/07/29 18:31:11 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.175 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 1999-2003 Michael Shalayeff @@ -656,12 +656,6 @@ cpu_startup(void) if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/hppa64/hppa64/machdep.c b/sys/arch/hppa64/hppa64/machdep.c index 33a408a7a02..028dc504818 100644 --- a/sys/arch/hppa64/hppa64/machdep.c +++ b/sys/arch/hppa64/hppa64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.15 2009/06/15 17:01:25 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.16 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 2005 Michael Shalayeff @@ -406,12 +406,6 @@ cpu_startup(void) if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - printf("here3\n"); /* * Allocate a submap for exec arguments. This map effectively diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index ea65c0cd522..98750e512b8 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.454 2009/07/27 11:28:55 dms Exp $ */ +/* $OpenBSD: machdep.c,v 1.455 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -547,11 +547,6 @@ setup_buffers() if (bufpages == 0) bufpages = atop(avail_end) * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; } /* diff --git a/sys/arch/luna88k/luna88k/machdep.c b/sys/arch/luna88k/luna88k/machdep.c index 03281cec0cb..ff95fd3e908 100644 --- a/sys/arch/luna88k/luna88k/machdep.c +++ b/sys/arch/luna88k/luna88k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.64 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.65 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 1998, 1999, 2000, 2001 Steve Murphree, Jr. * Copyright (c) 1996 Nivas Madhur @@ -470,12 +470,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/mac68k/mac68k/machdep.c b/sys/arch/mac68k/mac68k/machdep.c index 9f992582dca..1122c08d605 100644 --- a/sys/arch/mac68k/mac68k/machdep.c +++ b/sys/arch/mac68k/mac68k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.149 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.150 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.207 1998/07/08 04:39:34 thorpej Exp $ */ /* @@ -419,12 +419,6 @@ cpu_startup(void) if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/macppc/macppc/machdep.c b/sys/arch/macppc/macppc/machdep.c index f4c9ba24af1..e7cc2ed692f 100644 --- a/sys/arch/macppc/macppc/machdep.c +++ b/sys/arch/macppc/macppc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.109 2009/07/26 18:48:55 miod Exp $ */ +/* $OpenBSD: machdep.c,v 1.110 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.4 1996/10/16 19:33:11 ws Exp $ */ /* @@ -509,12 +509,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/mvme68k/mvme68k/machdep.c b/sys/arch/mvme68k/mvme68k/machdep.c index db6c36898db..a17d82742f5 100644 --- a/sys/arch/mvme68k/mvme68k/machdep.c +++ b/sys/arch/mvme68k/mvme68k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.116 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.117 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 1995 Theo de Raadt @@ -265,12 +265,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/mvme88k/mvme88k/machdep.c b/sys/arch/mvme88k/mvme88k/machdep.c index 063acd72a66..c9d52962de3 100644 --- a/sys/arch/mvme88k/mvme88k/machdep.c +++ b/sys/arch/mvme88k/mvme88k/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.231 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.232 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 1998, 1999, 2000, 2001 Steve Murphree, Jr. * Copyright (c) 1996 Nivas Madhur @@ -404,12 +404,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/mvmeppc/mvmeppc/machdep.c b/sys/arch/mvmeppc/mvmeppc/machdep.c index a2432cce471..ea3a62855c8 100644 --- a/sys/arch/mvmeppc/mvmeppc/machdep.c +++ b/sys/arch/mvmeppc/mvmeppc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.58 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.59 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.4 1996/10/16 19:33:11 ws Exp $ */ /* @@ -415,12 +415,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/sgi/sgi/machdep.c b/sys/arch/sgi/sgi/machdep.c index 0737cc91a60..af07a9de93b 100644 --- a/sys/arch/sgi/sgi/machdep.c +++ b/sys/arch/sgi/sgi/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.75 2009/07/22 21:29:05 miod Exp $ */ +/* $OpenBSD: machdep.c,v 1.76 2009/08/02 16:28:39 beck Exp $ */ /* * Copyright (c) 2003-2004 Opsycon AB (www.opsycon.se / www.opsycon.com) @@ -766,12 +766,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm. */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/socppc/socppc/machdep.c b/sys/arch/socppc/socppc/machdep.c index 509a5e6a9bb..b6229ea6ce1 100644 --- a/sys/arch/socppc/socppc/machdep.c +++ b/sys/arch/socppc/socppc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.14 2009/07/26 18:48:55 miod Exp $ */ +/* $OpenBSD: machdep.c,v 1.15 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.4 1996/10/16 19:33:11 ws Exp $ */ /* @@ -772,12 +772,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/solbourne/solbourne/machdep.c b/sys/arch/solbourne/solbourne/machdep.c index 0c90df97b4d..77d353bb009 100644 --- a/sys/arch/solbourne/solbourne/machdep.c +++ b/sys/arch/solbourne/solbourne/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.10 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: machdep.c,v 1.11 2009/08/02 16:28:39 beck Exp $ */ /* OpenBSD: machdep.c,v 1.105 2005/04/11 15:13:01 deraadt Exp */ /* @@ -190,12 +190,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/sparc/sparc/machdep.c b/sys/arch/sparc/sparc/machdep.c index 0cc40477def..b54c0096f4b 100644 --- a/sys/arch/sparc/sparc/machdep.c +++ b/sys/arch/sparc/sparc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.119 2009/07/13 19:50:00 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.120 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */ /* @@ -204,12 +204,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. diff --git a/sys/arch/vax/vax/machdep.c b/sys/arch/vax/vax/machdep.c index 7d0ac9ae3b4..c9ba240162b 100644 --- a/sys/arch/vax/vax/machdep.c +++ b/sys/arch/vax/vax/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.102 2009/06/20 21:02:15 miod Exp $ */ +/* $OpenBSD: machdep.c,v 1.103 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: machdep.c,v 1.108 2000/09/13 15:00:23 thorpej Exp $ */ /* @@ -212,12 +212,6 @@ cpu_startup() if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; - /* Restrict to at most 25% filled kvm */ - if (bufpages > - (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4) - bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / - PAGE_SIZE / 4; - /* * Allocate a submap for exec arguments. This map effectively limits * the number of processes exec'ing at any time. diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 82b2bd64b2f..41782ae2fba 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.177 2009/07/21 14:10:14 millert Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.178 2009/08/02 16:28:39 beck Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -555,12 +555,21 @@ kern_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, return (sysctl_cptime2(name + 1, namelen -1, oldp, oldlenp, newp, newlen)); case KERN_CACHEPCT: { - int opct = 0; - + int opct, pgs; + opct = bufcachepercent; error = sysctl_int(oldp, oldlenp, newp, newlen, - &opct); + &bufcachepercent); if (error) return(error); + if (bufcachepercent > 90 || bufcachepercent < 5) { + bufcachepercent = opct; + return (EINVAL); + } + if (bufcachepercent != opct) { + pgs = bufcachepercent * physmem / 100; + bufadjust(pgs); /* adjust bufpages */ + bufhighpages = bufpages; /* set high water mark */ + } return(0); } default: diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 1322bb93feb..045bd969ee3 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,7 +1,7 @@ -/* $OpenBSD: vfs_bio.c,v 1.118 2009/06/25 15:49:26 thib Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.119 2009/08/02 16:28:40 beck Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ -/*- +/* * Copyright (c) 1994 Christopher G. Demetriou * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -62,20 +62,6 @@ #include <miscfs/specfs/specdev.h> /* - * Definitions for the buffer hash lists. - */ -#define BUFHASH(dvp, lbn) \ - (&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) -LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; -u_long bufhash; - -/* - * Insq/Remq for the buffer hash lists. - */ -#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) -#define bremhash(bp) LIST_REMOVE(bp, b_hash) - -/* * Definitions for the buffer free lists. */ #define BQUEUES 2 /* number of free buffer queues */ @@ -123,6 +109,9 @@ long hidirtypages; long locleanpages; long hicleanpages; long maxcleanpages; +long backoffpages; /* backoff counter for page allocations */ +long buflowpages; /* bufpages low water mark */ +long bufhighpages; /* bufpages high water mark */ /* XXX - should be defined here. */ extern int bufcachepercent; @@ -182,9 +171,13 @@ buf_put(struct buf *bp) panic("buf_put: b_dep is not empty"); #endif - bremhash(bp); LIST_REMOVE(bp, b_list); bcstats.numbufs--; + if (backoffpages) { + backoffpages -= atop(bp->b_bufsize); + if (backoffpages < 0) + backoffpages = 0; + } if (buf_dealloc_mem(bp) != 0) return; @@ -200,7 +193,7 @@ bufinit(void) struct bqueues *dp; /* XXX - for now */ - bufpages = bufcachepercent = bufkvm = 0; + bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0; /* * If MD code doesn't say otherwise, use 10% of kvm for mappings and @@ -211,6 +204,16 @@ bufinit(void) if (bufpages == 0) bufpages = physmem * bufcachepercent / 100; + bufhighpages = bufpages; + + /* + * set the base backoff level for the buffer cache to bufpages. + * we will not allow uvm to steal back more than this number of + * pages + */ + buflowpages = physmem * 10 / 100; + + if (bufkvm == 0) bufkvm = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 10; @@ -237,7 +240,6 @@ bufinit(void) */ buf_mem_init(bufkvm); - bufhashtbl = hashinit(bufpages / 4, M_CACHE, M_WAITOK, &bufhash); hidirtypages = (bufpages / 4) * 3; lodirtypages = bufpages / 2; @@ -251,6 +253,104 @@ bufinit(void) maxcleanpages = locleanpages; } +/* + * Change cachepct + */ +void +bufadjust(int newbufpages) +{ + /* + * XXX - note, bufkvm was allocated once, based on 10% of physmem + * see above. + */ + struct buf *bp; + int s; + + s = splbio(); + bufpages = newbufpages; + + hidirtypages = (bufpages / 4) * 3; + lodirtypages = bufpages / 2; + + /* + * When we hit 95% of pages being clean, we bring them down to + * 90% to have some slack. + */ + hicleanpages = bufpages - (bufpages / 20); + locleanpages = bufpages - (bufpages / 10); + + maxcleanpages = locleanpages; + + /* + * If we we have more buffers allocated than bufpages, + * free them up to get back down. this may possibly consume + * all our clean pages... + */ + while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && + (bcstats.numbufpages > bufpages)) { + bremfree(bp); + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); + brelvp(bp); + } + buf_put(bp); + } + + /* + * Wake up cleaner if we're getting low on pages. We might + * now have too much dirty, or have fallen below our low + * water mark on clean pages so we need to free more stuff + * up. + */ + if (bcstats.numdirtypages >= hidirtypages || + bcstats.numcleanpages <= locleanpages) + wakeup(&bd_req); + + /* + * if immediate action has not freed up enough goo for us + * to proceed - we tsleep and wait for the cleaner above + * to do it's work and get us reduced down to sanity. + */ + while (bcstats.numbufpages > bufpages) { + tsleep(&needbuffer, PRIBIO, "needbuffer", 0); + } + splx(s); +} + +/* + * Make the buffer cache back off from cachepct. + */ +int +bufbackoff() +{ + /* + * Back off the amount of buffer cache pages. Called by the page + * daemon to consume buffer cache pages rather than swapping. + * + * On success, it frees N pages from the buffer cache, and sets + * a flag so that the next N allocations from buf_get will recycle + * a buffer rather than allocate a new one. It then returns 0 to the + * caller. + * + * on failure, it could free no pages from the buffer cache, does + * nothing and returns -1 to the caller. + */ + long d; + + if (bufpages <= buflowpages) + return(-1); + + if (bufpages - BACKPAGES >= buflowpages) + d = BACKPAGES; + else + d = bufpages - buflowpages; + backoffpages = BACKPAGES; + bufadjust(bufpages - d); + backoffpages = BACKPAGES; + return(0); +} + struct buf * bio_doread(struct vnode *vp, daddr64_t blkno, int size, int async) { @@ -676,10 +776,12 @@ brelse(struct buf *bp) CLR(bp->b_flags, B_DELWRI); } - if (bp->b_vp) + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, &bp->b_vp->v_bufs_tree, + bp); brelvp(bp); - bremhash(bp); - binshash(bp, &invalhash); + } + bp->b_vp = NULL; /* * If the buffer has no associated data, place it back in the @@ -697,6 +799,9 @@ brelse(struct buf *bp) CLR(bp->b_flags, B_WANTED); wakeup(bp); } + if (bp->b_vp != NULL) + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); buf_put(bp); splx(s); return; @@ -758,15 +863,14 @@ struct buf * incore(struct vnode *vp, daddr64_t blkno) { struct buf *bp; - - /* Search hash chain */ - LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { - if (bp->b_lblkno == blkno && bp->b_vp == vp && - !ISSET(bp->b_flags, B_INVAL)) - return (bp); - } - - return (NULL); + struct buf b; + + /* Search buf lookup tree */ + b.b_lblkno = blkno; + bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b); + if (bp && !ISSET(bp->b_flags, B_INVAL)) + return(bp); + return(NULL); } /* @@ -781,6 +885,7 @@ struct buf * getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo) { struct buf *bp; + struct buf b; int s, error; /* @@ -794,9 +899,9 @@ getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo) * the block until the write is finished. */ start: - LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { - if (bp->b_lblkno != blkno || bp->b_vp != vp) - continue; + b.b_lblkno = blkno; + bp = RB_FIND(buf_rb_bufs, &vp->v_bufs_tree, &b); + if (bp != NULL) { s = splbio(); if (ISSET(bp->b_flags, B_BUSY)) { @@ -846,11 +951,24 @@ geteblk(int size) struct buf * buf_get(struct vnode *vp, daddr64_t blkno, size_t size) { + static int gcount = 0; struct buf *bp; int poolwait = size == 0 ? PR_NOWAIT : PR_WAITOK; int npages; int s; + /* + * if we were previously backed off, slowly climb back up + * to the high water mark again. + */ + if ((backoffpages == 0) && (bufpages < bufhighpages)) { + if ( gcount == 0 ) { + bufadjust(bufpages + BACKPAGES); + gcount += BACKPAGES; + } else + gcount--; + } + s = splbio(); if (size) { /* @@ -868,8 +986,11 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) while (bcstats.numcleanpages > locleanpages) { bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]); bremfree(bp); - if (bp->b_vp) + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); brelvp(bp); + } buf_put(bp); } } @@ -879,16 +1000,21 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) /* * Free some buffers until we have enough space. */ - while (bcstats.numbufpages + npages > bufpages) { + while ((bcstats.numbufpages + npages > bufpages) + || backoffpages) { int freemax = 5; int i = freemax; while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--) { bremfree(bp); - if (bp->b_vp) + if (bp->b_vp) { + RB_REMOVE(buf_rb_bufs, + &bp->b_vp->v_bufs_tree, bp); brelvp(bp); + } buf_put(bp); } - if (freemax == i) { + if (freemax == i && + (bcstats.numbufpages + npages > bufpages)) { needbuffer++; tsleep(&needbuffer, PRIBIO, "needbuffer", 0); splx(s); @@ -929,11 +1055,12 @@ buf_get(struct vnode *vp, daddr64_t blkno, size_t size) bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); - binshash(bp, BUFHASH(vp, blkno)); + if (RB_INSERT(buf_rb_bufs, &vp->v_bufs_tree, bp)) + panic("buf_get: dup lblk vp %p bp %p", vp, bp); } else { bp->b_vnbufs.le_next = NOLIST; SET(bp->b_flags, B_INVAL); - binshash(bp, &invalhash); + bp->b_vp = NULL; } LIST_INSERT_HEAD(&bufhead, bp, b_list); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 4b19f59ef2a..7a420752b26 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.179 2009/06/25 15:49:26 thib Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.180 2009/08/02 16:28:40 beck Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -59,6 +59,7 @@ #include <sys/mbuf.h> #include <sys/syscallargs.h> #include <sys/pool.h> +#include <sys/tree.h> #include <uvm/uvm_extern.h> #include <sys/sysctl.h> @@ -115,6 +116,19 @@ void printlockedvnodes(void); struct pool vnode_pool; +static int rb_buf_compare(struct buf *b1, struct buf *b2); +RB_GENERATE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare); + +static int +rb_buf_compare(struct buf *b1, struct buf *b2) +{ + if (b1->b_lblkno < b2->b_lblkno) + return(-1); + if (b1->b_lblkno > b2->b_lblkno) + return(1); + return(0); +} + /* * Initialize the vnode management data structures. */ @@ -345,6 +359,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { splx(s); vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO); + RB_INIT(&vp->v_bufs_tree); numvnodes++; } else { for (vp = TAILQ_FIRST(listhd); vp != NULLVP; diff --git a/sys/sys/buf.h b/sys/sys/buf.h index ded9e03f319..3abc87d19d9 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: buf.h,v 1.66 2009/06/17 01:30:32 thib Exp $ */ +/* $OpenBSD: buf.h,v 1.67 2009/08/02 16:28:40 beck Exp $ */ /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */ /* @@ -40,12 +40,16 @@ #ifndef _SYS_BUF_H_ #define _SYS_BUF_H_ #include <sys/queue.h> +#include <sys/tree.h> #define NOLIST ((struct buf *)0x87654321) struct buf; struct vnode; +struct buf_rb_bufs; +RB_PROTOTYPE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare); + LIST_HEAD(bufhead, buf); /* @@ -72,8 +76,8 @@ extern struct bio_ops { * The buffer header describes an I/O operation in the kernel. */ struct buf { + RB_ENTRY(buf) b_rbbufs; /* vnode "hash" tree */ LIST_ENTRY(buf) b_list; /* All allocated buffers. */ - LIST_ENTRY(buf) b_hash; /* Hash chain. */ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ time_t b_synctime; /* Time this buffer should be flushed */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index b856a781827..ac3da0f4959 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mount.h,v 1.95 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: mount.h,v 1.96 2009/08/02 16:28:40 beck Exp $ */ /* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */ /* @@ -504,8 +504,14 @@ struct bcachestats { int64_t cachehits; /* total reads found in cache */ }; #ifdef _KERNEL +#define BACKPAGES 100 extern struct bcachestats bcstats; -#define BUFPAGES_DEFICIT (bufpages - bcstats.numbufpages) +extern long buflowpages, bufhighpages; +#define BUFPAGES_DEFICIT (((buflowpages - bcstats.numbufpages) < 0) ? 0 \ + : buflowpages - bcstats.numbufpages) +extern int bufcachepercent; +extern void bufadjust(int); +extern int bufbackoff(void); #endif /* diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 645d9314448..8dc92f61d30 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -1,4 +1,4 @@ -/* $OpenBSD: syscall.h,v 1.106 2009/06/15 17:31:49 deraadt Exp $ */ +/* $OpenBSD: syscall.h,v 1.107 2009/08/02 16:28:40 beck Exp $ */ /* * System call numbers. diff --git a/sys/sys/syscallargs.h b/sys/sys/syscallargs.h index e77e06394e1..ae8104b81b9 100644 --- a/sys/sys/syscallargs.h +++ b/sys/sys/syscallargs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: syscallargs.h,v 1.108 2009/06/15 17:31:49 deraadt Exp $ */ +/* $OpenBSD: syscallargs.h,v 1.109 2009/08/02 16:28:40 beck Exp $ */ /* * System call argument lists. diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index ffa752ca279..1608736d418 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode.h,v 1.101 2009/07/09 22:29:56 thib Exp $ */ +/* $OpenBSD: vnode.h,v 1.102 2009/08/02 16:28:40 beck Exp $ */ /* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */ /* @@ -32,10 +32,12 @@ * @(#)vnode.h 8.11 (Berkeley) 11/21/94 */ +#include <sys/buf.h> #include <sys/types.h> #include <sys/queue.h> #include <sys/lock.h> #include <sys/selinfo.h> +#include <sys/tree.h> #include <uvm/uvm.h> #include <uvm/uvm_vnode.h> @@ -79,6 +81,8 @@ enum vtagtype { */ LIST_HEAD(buflists, buf); +RB_HEAD(buf_rb_bufs, buf); + struct vnode { struct uvm_vnode v_uvm; /* uvm data */ int (**v_op)(void *); /* vnode operations vector */ @@ -94,6 +98,7 @@ struct vnode { struct mount *v_mount; /* ptr to vfs we are in */ TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ + struct buf_rb_bufs v_bufs_tree; /* lookup of all bufs */ struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ u_int v_numoutput; /* num of writes in progress */ diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index 046368e697a..000ef14d0a2 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pdaemon.c,v 1.52 2009/07/22 21:05:37 oga Exp $ */ +/* $OpenBSD: uvm_pdaemon.c,v 1.53 2009/08/02 16:28:40 beck Exp $ */ /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ /* @@ -239,11 +239,13 @@ uvm_pageout(void *arg) uvmexp.inactarg); /* - * scan if needed + * get pages from the buffer cache, or scan if needed */ - if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg || - uvmexp.inactive < uvmexp.inactarg) { + if (uvmexp.inactive < uvmexp.inactarg) uvmpd_scan(); + else if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) { + if (bufbackoff() == -1) + uvmpd_scan(); } /* |