summaryrefslogtreecommitdiff
path: root/sys/uvm
diff options
context:
space:
mode:
authorBob Beck <beck@cvs.openbsd.org>2011-07-06 19:50:39 +0000
committerBob Beck <beck@cvs.openbsd.org>2011-07-06 19:50:39 +0000
commitfe2320a5e93a9ef965f8f839c7771fb66f471c87 (patch)
treeaaf63cc9837845352635f0bb173f5b1928b62261 /sys/uvm
parent9bc7cbcf91abbd4993165a1fcb14af1eb0068dea (diff)
uvm changes for buffer cache improvements.
1) Make the pagedaemon aware of the memory ranges and size of allocations where memory is being requested, and pass this information on to bufbackoff(), which will later (not yet) be used to ensure that the buffer cache gets out of the way in the right area of memory. Note that this commit does not yet make it *do* that - as currently the buffer cache is all in dma-able memory and it will simply back off. 2) Add uvm_pagerealloc_multi - to be used by the buffer cache code for reallocating pages to particular regions. much of this work by ariane, with smatterings of me, art,and oga ok oga@, thib@, ariane@, deraadt@
Diffstat (limited to 'sys/uvm')
-rw-r--r--sys/uvm/uvm.h20
-rw-r--r--sys/uvm/uvm_extern.h23
-rw-r--r--sys/uvm/uvm_page.c39
-rw-r--r--sys/uvm/uvm_pdaemon.c47
-rw-r--r--sys/uvm/uvm_pmemrange.c107
-rw-r--r--sys/uvm/uvm_pmemrange.h47
6 files changed, 244 insertions, 39 deletions
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 561552b03cb..939738f47aa 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.h,v 1.45 2011/07/03 18:36:49 oga Exp $ */
+/* $OpenBSD: uvm.h,v 1.46 2011/07/06 19:50:38 beck Exp $ */
/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
@@ -62,24 +62,6 @@
#include <machine/vmparam.h>
-/*
- * uvm_constraint_range's:
- * MD code is allowed to setup constraint ranges for memory allocators, the
- * primary use for this is to keep allocation for certain memory consumers
- * such as mbuf pools withing address ranges that are reachable by devices
- * that perform DMA.
- *
- * It is also to discourge memory allocations from being satisfied from ranges
- * such as the ISA memory range, if they can be satisfied with allocation
- * from other ranges.
- *
- * the MD ranges are defined in arch/ARCH/ARCH/machdep.c
- */
-struct uvm_constraint_range {
- paddr_t ucr_low;
- paddr_t ucr_high;
-};
-
/* Constraint ranges, set by MD code. */
extern struct uvm_constraint_range isa_constraint;
extern struct uvm_constraint_range dma_constraint;
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 0b852d2d223..72abe2a2540 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.100 2011/07/03 17:42:51 oga Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.101 2011/07/06 19:50:38 beck Exp $ */
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -221,6 +221,7 @@ typedef int vm_prot_t;
#define UVM_PLA_NOWAIT 0x0002 /* can't sleep (need one of the two) */
#define UVM_PLA_ZERO 0x0004 /* zero all pages before returning */
#define UVM_PLA_TRYCONTIG 0x0008 /* try to allocate contig physmem */
+#define UVM_PLA_FAILOK 0x0010 /* caller can handle failure */
/*
* lockflags that control the locking behavior of various functions.
@@ -254,6 +255,24 @@ struct vnode;
struct pool;
struct simplelock;
+/*
+ * uvm_constraint_range's:
+ * MD code is allowed to setup constraint ranges for memory allocators, the
+ * primary use for this is to keep allocation for certain memory consumers
+ * such as mbuf pools withing address ranges that are reachable by devices
+ * that perform DMA.
+ *
+ * It is also to discourge memory allocations from being satisfied from ranges
+ * such as the ISA memory range, if they can be satisfied with allocation
+ * from other ranges.
+ *
+ * the MD ranges are defined in arch/ARCH/ARCH/machdep.c
+ */
+struct uvm_constraint_range {
+ paddr_t ucr_low;
+ paddr_t ucr_high;
+};
+
extern struct pool *uvm_aiobuf_pool;
/*
@@ -666,6 +685,8 @@ void uvm_pagealloc_multi(struct uvm_object *, voff_t,
vsize_t, int);
void uvm_pagerealloc(struct vm_page *,
struct uvm_object *, voff_t);
+void uvm_pagerealloc_multi(struct uvm_object *, voff_t,
+ vsize_t, int, struct uvm_constraint_range *);
/* Actually, uvm_page_physload takes PF#s which need their own type */
void uvm_page_physload(paddr_t, paddr_t, paddr_t,
paddr_t, int);
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 625608e663a..48a62736256 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.c,v 1.111 2011/07/03 18:34:14 oga Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.112 2011/07/06 19:50:38 beck Exp $ */
/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
/*
@@ -846,7 +846,8 @@ uvm_pglistfree(struct pglist *list)
* The pages are allocated wired in DMA accessible memory
*/
void
-uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, int flags)
+uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
+ int flags)
{
struct pglist plist;
struct vm_page *pg;
@@ -868,6 +869,40 @@ uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, int flags)
}
/*
+ * interface used by the buffer cache to reallocate a buffer at a time.
+ * The pages are reallocated wired outside the DMA accessible region.
+ *
+ */
+void
+uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
+ int flags, struct uvm_constraint_range *where)
+{
+ struct pglist plist;
+ struct vm_page *pg, *tpg;
+ int i;
+ voff_t offset;
+
+
+ TAILQ_INIT(&plist);
+ if (size == 0)
+ panic("size 0 uvm_pagerealloc");
+ (void) uvm_pglistalloc(size, where->ucr_low, where->ucr_high, 0,
+ 0, &plist, atop(round_page(size)), UVM_PLA_WAITOK);
+ i = 0;
+ while((pg = TAILQ_FIRST(&plist)) != NULL) {
+ offset = off + ptoa(i++);
+ tpg = uvm_pagelookup(obj, offset);
+ pg->wire_count = 1;
+ atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE);
+ KASSERT((pg->pg_flags & PG_DEV) == 0);
+ TAILQ_REMOVE(&plist, pg, pageq);
+ uvm_pagecopy(tpg, pg);
+ uvm_pagefree(tpg);
+ uvm_pagealloc_pg(pg, obj, offset, NULL);
+ }
+}
+
+/*
* uvm_pagealloc_strat: allocate vm_page from a particular free list.
*
* => return null if no pages free
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index 4182fe8dbeb..77b204ea52a 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pdaemon.c,v 1.58 2011/07/03 18:34:14 oga Exp $ */
+/* $OpenBSD: uvm_pdaemon.c,v 1.59 2011/07/06 19:50:38 beck Exp $ */
/* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
/*
@@ -147,7 +147,6 @@ uvm_wait(const char *wmsg)
msleep(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
}
-
/*
* uvmpd_tune: tune paging parameters
*
@@ -188,6 +187,9 @@ uvmpd_tune(void)
void
uvm_pageout(void *arg)
{
+ struct uvm_constraint_range constraint;
+ struct uvm_pmalloc *pma;
+ int work_done;
int npages = 0;
/*
@@ -206,10 +208,23 @@ uvm_pageout(void *arg)
*/
for (;;) {
+ work_done = 0; /* No work done this iteration. */
+
uvm_lock_fpageq();
- msleep(&uvm.pagedaemon, &uvm.fpageqlock, PVM | PNORELOCK,
- "pgdaemon", 0);
- uvmexp.pdwoke++;
+
+ if (TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
+ msleep(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
+ "pgdaemon", 0);
+ uvmexp.pdwoke++;
+ }
+
+ if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
+ pma->pm_flags |= UVM_PMA_BUSY;
+ constraint = pma->pm_constraint;
+ } else
+ constraint = no_constraint;
+
+ uvm_unlock_fpageq();
/*
* now lock page queues and recompute inactive count
@@ -229,10 +244,16 @@ uvm_pageout(void *arg)
/*
* get pages from the buffer cache, or scan if needed
*/
- if (((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
+ if (pma != NULL ||
+ ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
- if (bufbackoff() == -1)
+ if (bufbackoff(&constraint,
+ (pma ? pma->pm_size : -1)) == 0)
+ work_done = 1;
+ else {
uvmpd_scan();
+ work_done = 1; /* we hope... */
+ }
}
/*
@@ -244,6 +265,18 @@ uvm_pageout(void *arg)
uvmexp.paging == 0) {
wakeup(&uvmexp.free);
}
+
+ if (pma != NULL) {
+ pma->pm_flags &= ~UVM_PMA_BUSY;
+ if (!work_done)
+ pma->pm_flags |= UVM_PMA_FAIL;
+ if (pma->pm_flags & (UVM_PMA_FAIL | UVM_PMA_FREED)) {
+ pma->pm_flags &= ~UVM_PMA_LINKED;
+ TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
+ pmq);
+ }
+ wakeup(pma);
+ }
uvm_unlock_fpageq();
/*
diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c
index 4d88bfeb2b8..114709b6ac4 100644
--- a/sys/uvm/uvm_pmemrange.c
+++ b/sys/uvm/uvm_pmemrange.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pmemrange.c,v 1.26 2011/07/05 19:48:02 ariane Exp $ */
+/* $OpenBSD: uvm_pmemrange.c,v 1.27 2011/07/06 19:50:38 beck Exp $ */
/*
* Copyright (c) 2009, 2010 Ariane van der Steldt <ariane@stack.nl>
@@ -17,9 +17,11 @@
*/
#include <sys/param.h>
+#include <sys/systm.h>
#include <uvm/uvm.h>
#include <sys/malloc.h>
#include <sys/proc.h> /* XXX for atomic */
+#include <sys/kernel.h>
/*
* 2 trees: addr tree and size tree.
@@ -828,12 +830,12 @@ uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align,
*/
desperate = 0;
+ uvm_lock_fpageq();
+
retry: /* Return point after sleeping. */
fcount = 0;
fnsegs = 0;
- uvm_lock_fpageq();
-
retry_desperate:
/*
* If we just want any page(s), go for the really fast option.
@@ -1029,13 +1031,15 @@ fail:
while (!TAILQ_EMPTY(result))
uvm_pmr_remove_1strange(result, 0, NULL, 0);
- uvm_unlock_fpageq();
if (flags & UVM_PLA_WAITOK) {
- uvm_wait("uvm_pmr_getpages");
- goto retry;
+ if (uvm_wait_pla(ptoa(start), ptoa(end) - 1, ptoa(count),
+ flags & UVM_PLA_FAILOK) == 0)
+ goto retry;
+ KASSERT(flags & UVM_PLA_FAILOK);
} else
wakeup(&uvm.pagedaemon);
+ uvm_unlock_fpageq();
return ENOMEM;
@@ -1156,6 +1160,8 @@ uvm_pmr_freepages(struct vm_page *pg, psize_t count)
}
wakeup(&uvmexp.free);
+ uvm_wakeup_pla(VM_PAGE_TO_PHYS(pg), ptoa(count));
+
uvm_unlock_fpageq();
}
@@ -1166,6 +1172,8 @@ void
uvm_pmr_freepageq(struct pglist *pgl)
{
struct vm_page *pg;
+ paddr_t pstart;
+ psize_t plen;
TAILQ_FOREACH(pg, pgl, pageq) {
if (!((pg->pg_flags & PQ_FREE) == 0 &&
@@ -1180,8 +1188,13 @@ uvm_pmr_freepageq(struct pglist *pgl)
}
uvm_lock_fpageq();
- while (!TAILQ_EMPTY(pgl))
- uvmexp.free += uvm_pmr_remove_1strange(pgl, 0, NULL, 0);
+ while (!TAILQ_EMPTY(pgl)) {
+ pstart = VM_PAGE_TO_PHYS(TAILQ_FIRST(pgl));
+ plen = uvm_pmr_remove_1strange(pgl, 0, NULL, 0);
+ uvmexp.free += plen;
+
+ uvm_wakeup_pla(pstart, ptoa(plen));
+ }
wakeup(&uvmexp.free);
uvm_unlock_fpageq();
@@ -1509,6 +1522,7 @@ uvm_pmr_init(void)
TAILQ_INIT(&uvm.pmr_control.use);
RB_INIT(&uvm.pmr_control.addr);
+ TAILQ_INIT(&uvm.pmr_control.allocs);
/* By default, one range for the entire address space. */
new_pmr = uvm_pmr_allocpmr();
@@ -1871,6 +1885,83 @@ uvm_pmr_print(void)
}
#endif
+/*
+ * uvm_wait_pla: wait (sleep) for the page daemon to free some pages
+ * in a specific physmem area.
+ *
+ * Returns ENOMEM if the pagedaemon failed to free any pages.
+ * If not failok, failure will lead to panic.
+ *
+ * Must be called with fpageq locked.
+ */
+int
+uvm_wait_pla(paddr_t low, paddr_t high, paddr_t size, int failok)
+{
+ struct uvm_pmalloc pma;
+ const char *wmsg = "pmrwait";
+
+ /*
+ * Prevent deadlock.
+ */
+ if (curproc == uvm.pagedaemon_proc) {
+ msleep(&uvmexp.free, &uvm.fpageqlock, PVM, wmsg, hz >> 3);
+ return 0;
+ }
+
+ for (;;) {
+ pma.pm_constraint.ucr_low = low;
+ pma.pm_constraint.ucr_high = high;
+ pma.pm_size = size;
+ pma.pm_flags = UVM_PMA_LINKED;
+ TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, &pma, pmq);
+
+ wakeup(&uvm.pagedaemon); /* wake the daemon! */
+ while (pma.pm_flags & (UVM_PMA_LINKED | UVM_PMA_BUSY))
+ msleep(&pma, &uvm.fpageqlock, PVM, wmsg, 0);
+
+ if (!(pma.pm_flags & UVM_PMA_FREED) &&
+ pma.pm_flags & UVM_PMA_FAIL) {
+ if (failok)
+ return ENOMEM;
+ printf("uvm_wait: failed to free %ld pages between "
+ "0x%lx-0x%lx\n", atop(size), low, high);
+ } else
+ return 0;
+ }
+ /* UNREACHABLE */
+}
+
+/*
+ * Wake up uvm_pmalloc sleepers.
+ */
+void
+uvm_wakeup_pla(paddr_t low, psize_t len)
+{
+ struct uvm_pmalloc *pma, *pma_next;
+ paddr_t high;
+
+ high = low + len;
+
+ /*
+ * Wake specific allocations waiting for this memory.
+ */
+ for (pma = TAILQ_FIRST(&uvm.pmr_control.allocs); pma != NULL;
+ pma = pma_next) {
+ pma_next = TAILQ_NEXT(pma, pmq);
+
+ if (low < pma->pm_constraint.ucr_high &&
+ high > pma->pm_constraint.ucr_low) {
+ pma->pm_flags |= UVM_PMA_FREED;
+ if (!(pma->pm_flags & UVM_PMA_BUSY)) {
+ pma->pm_flags &= ~UVM_PMA_LINKED;
+ TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
+ pmq);
+ wakeup(pma);
+ }
+ }
+ }
+}
+
#ifndef SMALL_KERNEL
/*
* Zero all free memory.
diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h
index fa27540e77d..b80d6a3539d 100644
--- a/sys/uvm/uvm_pmemrange.h
+++ b/sys/uvm/uvm_pmemrange.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pmemrange.h,v 1.7 2011/04/03 22:07:37 ariane Exp $ */
+/* $OpenBSD: uvm_pmemrange.h,v 1.8 2011/07/06 19:50:38 beck Exp $ */
/*
* Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
@@ -59,6 +59,44 @@ struct uvm_pmemrange {
/* pmr, sorted by address */
};
+/*
+ * Description of failing memory allocation.
+ *
+ * Two ways new pages can become available:
+ * [1] page daemon drops them (we notice because they are freed)
+ * [2] a process calls free
+ *
+ * The buffer cache and page daemon can decide that they don't have the
+ * ability to make pages available in the requested range. In that case,
+ * the FAIL bit will be set.
+ * XXX There's a possibility that a page is no longer on the queues but
+ * XXX has not yet been freed, or that a page was busy.
+ * XXX Also, wired pages are not considered for paging, so they could
+ * XXX cause a failure that may be recoverable.
+ */
+struct uvm_pmalloc {
+ TAILQ_ENTRY(uvm_pmalloc) pmq;
+
+ /*
+ * Allocation request parameters.
+ */
+ struct uvm_constraint_range pm_constraint;
+ psize_t pm_size;
+
+ /*
+ * State flags.
+ */
+ int pm_flags;
+};
+
+/*
+ * uvm_pmalloc flags.
+ */
+#define UVM_PMA_LINKED 0x01 /* uvm_pmalloc is on list */
+#define UVM_PMA_BUSY 0x02 /* entry is busy with fpageq unlocked */
+#define UVM_PMA_FAIL 0x10 /* page daemon cannot free pages */
+#define UVM_PMA_FREED 0x20 /* at least one page in the range was freed */
+
RB_HEAD(uvm_pmemrange_addr, uvm_pmemrange);
TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange);
@@ -68,13 +106,18 @@ TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange);
struct uvm_pmr_control {
struct uvm_pmemrange_addr addr;
struct uvm_pmemrange_use use;
+
+ /* Only changed while fpageq is locked. */
+ TAILQ_HEAD(, uvm_pmalloc) allocs;
};
void uvm_pmr_freepages(struct vm_page *, psize_t);
-void uvm_pmr_freepageq(struct pglist *pgl);
+void uvm_pmr_freepageq(struct pglist *);
int uvm_pmr_getpages(psize_t, paddr_t, paddr_t, paddr_t, paddr_t,
int, int, struct pglist *);
void uvm_pmr_init(void);
+int uvm_wait_pla(paddr_t, paddr_t, paddr_t, int);
+void uvm_wakeup_pla(paddr_t, psize_t);
#if defined(DDB) || defined(DEBUG)
int uvm_pmr_isfree(struct vm_page *pg);