summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/arch/amd64/amd64/pmap.c10
-rw-r--r--sys/arch/amd64/include/vmparam.h9
-rw-r--r--sys/arch/i386/i386/pmap.c11
-rw-r--r--sys/arch/i386/i386/pmapae.c15
-rw-r--r--sys/arch/i386/include/vmparam.h9
-rw-r--r--sys/conf/files3
-rw-r--r--sys/uvm/uvm.h31
-rw-r--r--sys/uvm/uvm_extern.h9
-rw-r--r--sys/uvm/uvm_map.c15
-rw-r--r--sys/uvm/uvm_page.c173
-rw-r--r--sys/uvm/uvm_page.h17
-rw-r--r--sys/uvm/uvm_pglist.c328
-rw-r--r--sys/uvm/uvm_pmemrange.c1248
-rw-r--r--sys/uvm/uvm_pmemrange.h83
-rw-r--r--sys/uvm/uvm_vnode.c4
-rw-r--r--sys/xfs/xfs_vnodeops-bsd.c2
16 files changed, 1503 insertions, 464 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 4bd4ba51f9f..fb46e417f84 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.42 2009/05/28 09:05:33 art Exp $ */
+/* $OpenBSD: pmap.c,v 1.43 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
/*
@@ -834,7 +834,7 @@ pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level,
pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
ptp->wire_count = 0;
uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(pagelist, ptp, listq);
+ TAILQ_INSERT_TAIL(pagelist, ptp, fq.queues.listq);
}
void
@@ -1545,7 +1545,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
@@ -1617,7 +1617,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
}
@@ -1690,7 +1690,7 @@ pmap_page_remove(struct vm_page *pg)
pmap_tlb_shootwait();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
}
diff --git a/sys/arch/amd64/include/vmparam.h b/sys/arch/amd64/include/vmparam.h
index fd82b226db2..d3c5c9dd102 100644
--- a/sys/arch/amd64/include/vmparam.h
+++ b/sys/arch/amd64/include/vmparam.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmparam.h,v 1.10 2008/07/18 16:40:17 kurt Exp $ */
+/* $OpenBSD: vmparam.h,v 1.11 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: vmparam.h,v 1.1 2003/04/26 18:39:49 fvdl Exp $ */
/*-
@@ -112,6 +112,13 @@
#define VM_FREELIST_LOW 1
#define VM_FREELIST_HIGH 2
+/* reserve ISA-DMA and 32-bit DMA memory */
+#define UVM_IO_RANGES \
+ { \
+ { 0, 0x00ffffffUL }, \
+ { 0, 0xffffffffUL }, \
+ }
+
#define __HAVE_VM_PAGE_MD
struct pv_entry;
struct vm_page_md {
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 753298eb42a..91fd6edb555 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.136 2009/02/05 01:13:21 oga Exp $ */
+/* $OpenBSD: pmap.c,v 1.137 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -2074,7 +2074,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
/* If PTP is no longer being used, free it. */
if (ptp && ptp->wire_count <= 1) {
pmap_drop_ptp(pmap, va, ptp, ptes);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq);
}
if (!shootall)
@@ -2088,7 +2088,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
}
@@ -2145,7 +2145,8 @@ pmap_page_remove(struct vm_page *pg)
if (pve->pv_ptp && --pve->pv_ptp->wire_count <= 1) {
pmap_drop_ptp(pve->pv_pmap, pve->pv_va,
pve->pv_ptp, ptes);
- TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
+ fq.queues.listq);
}
pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
@@ -2158,7 +2159,7 @@ pmap_page_remove(struct vm_page *pg)
pmap_tlb_shootwait();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
}
diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c
index 683d282379a..b13ff7c9463 100644
--- a/sys/arch/i386/i386/pmapae.c
+++ b/sys/arch/i386/i386/pmapae.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmapae.c,v 1.15 2009/01/27 22:14:13 miod Exp $ */
+/* $OpenBSD: pmapae.c,v 1.16 2009/06/01 17:42:33 ariane Exp $ */
/*
* Copyright (c) 2006 Michael Shalayeff
@@ -1453,14 +1453,15 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, ptp,
+ fq.queues.listq);
}
}
pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes_pae(pmap); /* unlock pmap */
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
return;
@@ -1546,7 +1547,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq);
}
}
@@ -1554,7 +1555,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
pmap_unmap_ptes_pae(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
}
@@ -1665,7 +1666,7 @@ pmap_page_remove_pae(struct vm_page *pg)
/* Postpone free to after shootdown. */
uvm_pagerealloc(pve->pv_ptp, NULL, 0);
TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
- listq);
+ fq.queues.listq);
}
}
pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */
@@ -1676,7 +1677,7 @@ pmap_page_remove_pae(struct vm_page *pg)
PMAP_HEAD_TO_MAP_UNLOCK();
pmap_tlb_shootnow(cpumask);
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
uvm_pagefree(ptp);
}
}
diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h
index 9c2163e6c2d..41e95b3f418 100644
--- a/sys/arch/i386/include/vmparam.h
+++ b/sys/arch/i386/include/vmparam.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmparam.h,v 1.41 2008/07/18 16:40:17 kurt Exp $ */
+/* $OpenBSD: vmparam.h,v 1.42 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $ */
/*-
@@ -118,6 +118,13 @@
#define VM_FREELIST_DEFAULT 0
#define VM_FREELIST_FIRST16 1
+/* reserve ISA-DMA and 32-bit DMA memory */
+#define UVM_IO_RANGES \
+ { \
+ { 0, 0x00ffffffUL }, \
+ { 0, 0xffffffffUL }, \
+ }
+
#define __HAVE_VM_PAGE_MD
struct pv_entry;
struct vm_page_md {
diff --git a/sys/conf/files b/sys/conf/files
index 38d4e2014c2..826593c211d 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.455 2009/05/06 18:21:23 stevesk Exp $
+# $OpenBSD: files,v 1.456 2009/06/01 17:42:33 ariane Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -971,6 +971,7 @@ file uvm/uvm_page.c
file uvm/uvm_pager.c
file uvm/uvm_pdaemon.c
file uvm/uvm_pglist.c
+file uvm/uvm_pmemrange.c
file uvm/uvm_stat.c
file uvm/uvm_swap.c
file uvm/uvm_swap_encrypt.c uvm_swap_encrypt
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 3efcf89044c..087add79376 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.h,v 1.32 2009/05/04 18:08:06 oga Exp $ */
+/* $OpenBSD: uvm.h,v 1.33 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
@@ -57,6 +57,7 @@
#include <uvm/uvm_page.h>
#include <uvm/uvm_pager.h>
#include <uvm/uvm_pdaemon.h>
+#include <uvm/uvm_pmemrange.h>
#include <uvm/uvm_swap.h>
#ifdef UVM_SWAP_ENCRYPT
#include <uvm/uvm_swap_encrypt.h>
@@ -68,6 +69,32 @@
#include <machine/vmparam.h>
/*
+ * UVM_IO_RANGES: paddr_t pairs, describing the lowest and highest address
+ * that should be reserved. These ranges (which may overlap) will have their
+ * use counter increased, causing them to be avoided if an allocation can be
+ * satisfied from another range of memory.
+ *
+ * IO ranges need not overlap with physmem ranges: the uvm code splits ranges
+ * on demand to satisfy requests.
+ *
+ * UVM_IO_RANGES specified here actually translates into a call to
+ * uvm_pmr_use_inc() at uvm initialization time. uvm_pmr_use_inc() can also
+ * be called after uvm_init() has completed.
+ *
+ * Note: the upper bound is specified in the same way as to uvm_pglistalloc.
+ * Ex: a memory range of 16 bit is specified as: { 0, 0xffff }.
+ */
+#ifndef UVM_IO_RANGES
+#define UVM_IO_RANGES {}
+#endif
+
+/* UVM IO ranges are described in an array of uvm_io_ranges. */
+struct uvm_io_ranges {
+ paddr_t low;
+ paddr_t high;
+};
+
+/*
* uvm structure (vm global state: collected in one structure for ease
* of reference...)
*/
@@ -76,7 +103,7 @@ struct uvm {
/* vm_page related parameters */
/* vm_page queues */
- struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
+ struct uvm_pmr_control pmr_control; /* pmemrange control data */
struct pglist page_active; /* allocated pages, in use */
struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */
struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 5ff1e2ddad2..bc6a766590a 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.76 2009/04/20 00:30:18 oga Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.77 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -221,11 +221,12 @@ typedef int vm_prot_t;
#define UVM_PGA_ZERO 0x0002 /* returned page must be zeroed */
/*
- * flags for uvm_pglistalloc()
+ * flags for uvm_pglistalloc() and uvm_pmr_getpages()
*/
#define UVM_PLA_WAITOK 0x0001 /* may sleep */
#define UVM_PLA_NOWAIT 0x0002 /* can't sleep (need one of the two) */
#define UVM_PLA_ZERO 0x0004 /* zero all pages before returning */
+#define UVM_PLA_TRY_CONTIG 0x0008 /* try to allocate a contig range */
/*
* lockflags that control the locking behavior of various functions.
@@ -589,6 +590,10 @@ int uvm_pglistalloc(psize_t, paddr_t,
struct pglist *, int, int);
void uvm_pglistfree(struct pglist *);
+/* uvm_pmemrange.c */
+
+void uvm_pmr_use_inc(paddr_t, paddr_t);
+
/* uvm_swap.c */
void uvm_swap_init(void);
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index 8858a585027..1b6f3262986 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.c,v 1.110 2009/05/02 12:54:42 oga Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.111 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
/*
@@ -3822,7 +3822,7 @@ uvm_object_printit(uobj, full, pr)
(*pr)(" PAGES <pg,offset>:\n ");
for (pg = TAILQ_FIRST(&uobj->memq);
pg != NULL;
- pg = TAILQ_NEXT(pg, listq), cnt++) {
+ pg = TAILQ_NEXT(pg, fq.queues.listq), cnt++) {
(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
if ((cnt % 3) == 2) {
(*pr)("\n ");
@@ -3883,7 +3883,8 @@ uvm_page_printit(pg, full, pr)
uobj = pg->uobject;
if (uobj) {
(*pr)(" checking object list\n");
- TAILQ_FOREACH(tpg, &uobj->memq, listq) {
+ TAILQ_FOREACH(tpg, &uobj->memq,
+ fq.queues.listq) {
if (tpg == pg) {
break;
}
@@ -3898,9 +3899,11 @@ uvm_page_printit(pg, full, pr)
/* cross-verify page queue */
if (pg->pg_flags & PQ_FREE) {
- int fl = uvm_page_lookup_freelist(pg);
- pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ?
- PGFL_ZEROS : PGFL_UNKNOWN];
+ if (uvm_pmr_isfree(pg))
+ printf(" page found in uvm_pmemrange\n");
+ else
+ printf(" >>> page not found in uvm_pmemrange <<<\n");
+ pgl = NULL;
} else if (pg->pg_flags & PQ_INACTIVE) {
pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
&uvm.page_inactive_swp : &uvm.page_inactive_obj;
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 39008ac0c19..7c6e257ccb5 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.c,v 1.80 2009/05/08 15:10:35 ariane Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.81 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
/*
@@ -159,10 +159,11 @@ uvm_pageinsert(struct vm_page *pg)
KASSERT((pg->pg_flags & PG_TABLED) == 0);
mtx_enter(&uvm.hashlock);
buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
- TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */
+ TAILQ_INSERT_TAIL(buck, pg, fq.queues.hashq); /* put in hash */
mtx_leave(&uvm.hashlock);
- TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
+ TAILQ_INSERT_TAIL(&pg->uobject->memq, pg,
+ fq.queues.listq); /* put in object */
atomic_setbits_int(&pg->pg_flags, PG_TABLED);
pg->uobject->uo_npages++;
}
@@ -183,7 +184,7 @@ uvm_pageremove(struct vm_page *pg)
KASSERT(pg->pg_flags & PG_TABLED);
mtx_enter(&uvm.hashlock);
buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
- TAILQ_REMOVE(buck, pg, hashq);
+ TAILQ_REMOVE(buck, pg, fq.queues.hashq);
mtx_leave(&uvm.hashlock);
#ifdef UBC
@@ -193,7 +194,7 @@ uvm_pageremove(struct vm_page *pg)
#endif
/* object should be locked */
- TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
+ TAILQ_REMOVE(&pg->uobject->memq, pg, fq.queues.listq);
atomic_clearbits_int(&pg->pg_flags, PG_TABLED|PQ_AOBJ);
pg->uobject->uo_npages--;
@@ -226,15 +227,12 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
* init the page queues and page queue locks
*/
- for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
- for (i = 0; i < PGFL_NQUEUES; i++)
- TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
- }
TAILQ_INIT(&uvm.page_active);
TAILQ_INIT(&uvm.page_inactive_swp);
TAILQ_INIT(&uvm.page_inactive_obj);
simple_lock_init(&uvm.pageqlock);
mtx_init(&uvm.fpageqlock, IPL_VM);
+ uvm_pmr_init();
/*
* init the <obj,offset> => <page> hash table. for now
@@ -319,10 +317,13 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
if (atop(paddr) >= vm_physmem[lcv].avail_start &&
atop(paddr) <= vm_physmem[lcv].avail_end) {
uvmexp.npages++;
- /* add page to free pool */
- uvm_pagefree(&vm_physmem[lcv].pgs[i]);
}
}
+
+ /* add pages to free pool */
+ uvm_pmr_freepages(&vm_physmem[lcv].pgs[
+ vm_physmem[lcv].avail_start - vm_physmem[lcv].start],
+ vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
}
/*
@@ -811,10 +812,10 @@ uvm_page_rehash(void)
/* ... and rehash */
for (lcv = 0 ; lcv < oldcount ; lcv++) {
while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) {
- TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
+ TAILQ_REMOVE(&oldbuckets[lcv], pg, fq.queues.hashq);
TAILQ_INSERT_TAIL(
&uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
- pg, hashq);
+ pg, fq.queues.hashq);
}
}
mtx_leave(&uvm.hashlock);
@@ -892,18 +893,15 @@ struct vm_page *
uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
int flags, int strat, int free_list)
{
- int lcv, try1, try2, zeroit = 0;
+ struct pglist pgl;
+ int pmr_flags;
struct vm_page *pg;
- struct pglist *freeq;
- struct pgfreelist *pgfl;
boolean_t use_reserve;
UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist);
KASSERT(obj == NULL || anon == NULL);
KASSERT(off == trunc_page(off));
- uvm_lock_fpageq();
-
/*
* check to see if we need to generate some free pages waking
* the pagedaemon.
@@ -930,95 +928,20 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
(curproc == syncerproc))))
goto fail;
-#if PGFL_NQUEUES != 2
-#error uvm_pagealloc_strat needs to be updated
-#endif
-
- /*
- * If we want a zero'd page, try the ZEROS queue first, otherwise
- * we try the UNKNOWN queue first.
- */
- if (flags & UVM_PGA_ZERO) {
- try1 = PGFL_ZEROS;
- try2 = PGFL_UNKNOWN;
- } else {
- try1 = PGFL_UNKNOWN;
- try2 = PGFL_ZEROS;
- }
-
- UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx",
- obj, (u_long)off, anon, flags);
- UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0);
- again:
- switch (strat) {
- case UVM_PGA_STRAT_NORMAL:
- /* Check all freelists in descending priority order. */
- for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
- pgfl = &uvm.page_free[lcv];
- if ((pg = TAILQ_FIRST((freeq =
- &pgfl->pgfl_queues[try1]))) != NULL ||
- (pg = TAILQ_FIRST((freeq =
- &pgfl->pgfl_queues[try2]))) != NULL)
- goto gotit;
- }
-
- /* No pages free! */
- goto fail;
-
- case UVM_PGA_STRAT_ONLY:
- case UVM_PGA_STRAT_FALLBACK:
- /* Attempt to allocate from the specified free list. */
- KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
- pgfl = &uvm.page_free[free_list];
- if ((pg = TAILQ_FIRST((freeq =
- &pgfl->pgfl_queues[try1]))) != NULL ||
- (pg = TAILQ_FIRST((freeq =
- &pgfl->pgfl_queues[try2]))) != NULL)
- goto gotit;
-
- /* Fall back, if possible. */
- if (strat == UVM_PGA_STRAT_FALLBACK) {
- strat = UVM_PGA_STRAT_NORMAL;
- goto again;
- }
-
- /* No pages free! */
+ pmr_flags = UVM_PLA_NOWAIT;
+ if (flags & UVM_PGA_ZERO)
+ pmr_flags |= UVM_PLA_ZERO;
+ TAILQ_INIT(&pgl);
+ if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0)
goto fail;
-
- default:
- panic("uvm_pagealloc_strat: bad strat %d", strat);
- /* NOTREACHED */
- }
-
- gotit:
- TAILQ_REMOVE(freeq, pg, pageq);
- uvmexp.free--;
-
- /* update zero'd page count */
- if (pg->pg_flags & PG_ZERO)
- uvmexp.zeropages--;
-
- /*
- * update allocation statistics and remember if we have to
- * zero the page
- */
- if (flags & UVM_PGA_ZERO) {
- if (pg->pg_flags & PG_ZERO) {
- uvmexp.pga_zerohit++;
- zeroit = 0;
- } else {
- uvmexp.pga_zeromiss++;
- zeroit = 1;
- }
- }
-
- uvm_unlock_fpageq(); /* unlock free page queue */
+ pg = TAILQ_FIRST(&pgl);
+ KASSERT(pg != NULL);
+ KASSERT(TAILQ_NEXT(pg, pageq) == NULL);
pg->offset = off;
pg->uobject = obj;
pg->uanon = anon;
pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE;
- pg->pg_version++;
if (anon) {
anon->an_page = pg;
atomic_setbits_int(&pg->pg_flags, PQ_ANON);
@@ -1034,22 +957,11 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
#endif
UVM_PAGE_OWN(pg, "new alloc");
- if (flags & UVM_PGA_ZERO) {
- /*
- * A zero'd page is not clean. If we got a page not already
- * zero'd, then we have to zero it ourselves.
- */
- atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
- if (zeroit)
- pmap_zero_page(pg);
- }
-
UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg,
(u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
return(pg);
fail:
- uvm_unlock_fpageq();
UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0);
return (NULL);
}
@@ -1100,6 +1012,7 @@ uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
void
uvm_pagefree(struct vm_page *pg)
{
+ struct pglist pgl;
int saved_loan_count = pg->loan_count;
UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist);
@@ -1195,27 +1108,35 @@ uvm_pagefree(struct vm_page *pg)
}
/*
- * and put on free queue
+ * Clean page state bits.
*/
-
- atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
-
- uvm_lock_fpageq();
- TAILQ_INSERT_TAIL(&uvm.page_free[
- uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
- atomic_clearbits_int(&pg->pg_flags, PQ_MASK);
- atomic_setbits_int(&pg->pg_flags, PQ_FREE);
+ atomic_clearbits_int(&pg->pg_flags,
+ PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK);
+ /*
+ * Pmap flag cleaning.
+ * XXX: Shouldn't pmap do this?
+ */
+ atomic_clearbits_int(&pg->pg_flags,
+ PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
+
+#if defined(DIAGNOSTIC)
+ if (pg->pg_flags != 0) {
+ panic("uvm_pagefree: expected page %p pg_flags to be 0\n"
+ "uvm_pagefree: instead of pg->pg_flags = %x\n",
+ VM_PAGE_TO_PHYS(pg), pg->pg_flags);
+ }
+#endif
#ifdef DEBUG
pg->uobject = (void *)0xdeadbeef;
pg->offset = 0xdeadbeef;
pg->uanon = (void *)0xdeadbeef;
#endif
- uvmexp.free++;
+ TAILQ_INIT(&pgl);
+ TAILQ_INSERT_HEAD(&pgl, pg, pageq);
+ uvm_pmr_freepageq(&pgl);
if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
uvm.page_idle_zero = vm_page_zero_enable;
-
- uvm_unlock_fpageq();
}
/*
@@ -1308,6 +1229,7 @@ uvm_page_own(struct vm_page *pg, char *tag)
void
uvm_pageidlezero(void)
{
+#if 0 /* Disabled for now. */
struct vm_page *pg;
struct pgfreelist *pgfl;
int free_list;
@@ -1374,6 +1296,7 @@ uvm_pageidlezero(void)
uvmexp.zeropages++;
uvm_unlock_fpageq();
} while (curcpu_is_idle());
+#endif /* 0 */
}
/*
@@ -1476,7 +1399,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
mtx_enter(&uvm.hashlock);
buck = &uvm.page_hash[uvm_pagehash(obj,off)];
- TAILQ_FOREACH(pg, buck, hashq) {
+ TAILQ_FOREACH(pg, buck, fq.queues.hashq) {
if (pg->uobject == obj && pg->offset == off) {
break;
}
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index e21562cd030..e7991dce4a0 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.h,v 1.32 2009/04/28 16:06:07 miod Exp $ */
+/* $OpenBSD: uvm_page.h,v 1.33 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $ */
/*
@@ -106,11 +106,22 @@
#include <uvm/uvm_extern.h>
#include <uvm/uvm_pglist.h>
+union vm_page_fq {
+ struct {
+ TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
+ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
+ } queues;
+
+ struct {
+ RB_ENTRY(vm_page) tree; /* Free chunks, addr/size */
+ psize_t pages;
+ } free;
+};
+
struct vm_page {
+ union vm_page_fq fq; /* free and queue management */
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
* queue or free list (P) */
- TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
- TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
struct vm_anon *uanon; /* anon (O,P) */
struct uvm_object *uobject; /* object (O,P) */
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
index 093cd134b7f..ff0f8d91f68 100644
--- a/sys/uvm/uvm_pglist.c
+++ b/sys/uvm/uvm_pglist.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pglist.c,v 1.29 2009/05/04 18:08:06 oga Exp $ */
+/* $OpenBSD: uvm_pglist.c,v 1.30 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $ */
/*-
@@ -56,112 +56,6 @@ u_long uvm_pglistalloc_npages;
#define STAT_DECR(v)
#endif
-int uvm_pglistalloc_simple(psize_t, paddr_t, paddr_t, struct pglist *);
-
-/*
- * Simple page allocation: pages do not need to be contiguous. We just
- * attempt to find enough free pages in the given range.
- */
-int
-uvm_pglistalloc_simple(psize_t size, paddr_t low, paddr_t high,
- struct pglist *rlist)
-{
- psize_t todo;
- int psi;
- struct vm_page *pg;
- struct vm_physseg *seg;
- paddr_t slow, shigh;
- int pgflidx, error, free_list;
- UVMHIST_FUNC("uvm_pglistalloc_simple"); UVMHIST_CALLED(pghist);
-#ifdef DEBUG
- vm_page_t tp;
-#endif
-
- /* Default to "lose". */
- error = ENOMEM;
-
- todo = atop(size);
-
- /*
- * Block all memory allocation and lock the free list.
- */
- uvm_lock_fpageq();
-
- /* Are there even any free pages? */
- if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
- goto out;
-
- for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) {
- /*
- * Skip this segment if incompatible with the address range.
- */
- if (seg->avail_end <= atop(low))
- continue;
- if (seg->avail_start >= atop(high))
- continue;
-
- slow = MAX(atop(low), seg->avail_start);
- shigh = MIN(atop(high), seg->avail_end);
-
- /* we want to be able to allocate at least a page... */
- if (slow == shigh)
- continue;
-
- for (pg = &seg->pgs[slow - seg->start]; slow != shigh;
- slow++, pg++) {
- if (VM_PAGE_IS_FREE(pg) == 0)
- continue;
-
- free_list = uvm_page_lookup_freelist(pg);
- pgflidx = (pg->pg_flags & PG_ZERO) ?
- PGFL_ZEROS : PGFL_UNKNOWN;
-#ifdef DEBUG
- for (tp = TAILQ_FIRST(&uvm.page_free[free_list].pgfl_queues[pgflidx]);
- tp != NULL; tp = TAILQ_NEXT(tp, pageq)) {
- if (tp == pg)
- break;
- }
- if (tp == NULL)
- panic("uvm_pglistalloc_simple: page not on freelist");
-#endif
- TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
- pg, pageq);
- uvmexp.free--;
- if (pg->pg_flags & PG_ZERO)
- uvmexp.zeropages--;
- pg->uobject = NULL;
- pg->uanon = NULL;
- pg->pg_version++;
- TAILQ_INSERT_TAIL(rlist, pg, pageq);
- STAT_INCR(uvm_pglistalloc_npages);
- if (--todo == 0) {
- error = 0;
- goto out;
- }
- }
-
- }
-
-out:
- /*
- * check to see if we need to generate some free pages waking
- * the pagedaemon.
- */
-
- if (!error && (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
- (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
- uvmexp.inactive < uvmexp.inactarg))) {
- wakeup(&uvm.pagedaemon_proc);
- }
-
- uvm_unlock_fpageq();
-
- if (error)
- uvm_pglistfree(rlist);
-
- return (error);
-}
-
/*
* uvm_pglistalloc: allocate a list of pages
*
@@ -179,202 +73,45 @@ out:
* alignment memory must be aligned to this power-of-two boundary.
* boundary no segment in the allocation may cross this
* power-of-two boundary (relative to zero).
+ * => flags:
+ * UVM_PLA_NOWAIT fail if allocation fails
+ * UVM_PLA_WAITOK wait for memory to become avail if allocation fails
+ * UVM_PLA_ZERO return zeroed memory
+ * UVM_PLA_TRY_CONTIG device prefers p-lineair mem
*/
int
uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
paddr_t boundary, struct pglist *rlist, int nsegs, int flags)
{
- int psi;
- struct vm_page *pgs;
- struct vm_physseg *seg;
- paddr_t slow, shigh;
- paddr_t try, idxpa, lastidxpa;
- int tryidx, idx, pgflidx, endidx, error, free_list;
- vm_page_t m;
- u_long pagemask;
-#ifdef DEBUG
- vm_page_t tp;
-#endif
UVMHIST_FUNC("uvm_pglistalloc"); UVMHIST_CALLED(pghist);
KASSERT((alignment & (alignment - 1)) == 0);
KASSERT((boundary & (boundary - 1)) == 0);
- /*
- * This argument is always ignored for now, but ensure drivers always
- * show intention.
- */
KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
-
- /*
- * Our allocations are always page granularity, so our alignment
- * must be, too.
- */
- if (alignment < PAGE_SIZE)
- alignment = PAGE_SIZE;
if (size == 0)
return (EINVAL);
- size = round_page(size);
- low = roundup(low, alignment);
-
/*
- * If we are allowed to allocate as many segments as pages,
- * no need to be smart.
+ * Convert byte addresses to page numbers.
*/
- if ((nsegs >= size / PAGE_SIZE) && (alignment == PAGE_SIZE) &&
- (boundary == 0)) {
- error = uvm_pglistalloc_simple(size, low, high, rlist);
- goto done;
- }
-
- if (boundary != 0 && boundary < size)
- return (EINVAL);
-
- pagemask = ~(boundary - 1);
-
- /* Default to "lose". */
- error = ENOMEM;
-
- /*
- * Block all memory allocation and lock the free list.
- */
- uvm_lock_fpageq();
-
- /* Are there even any free pages? */
- if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
- goto out;
-
- for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) {
- /*
- * Skip this segment if incompatible with the address range.
- */
- if (seg->avail_end <= atop(low))
- continue;
- if (seg->avail_start >= atop(high))
- continue;
-
- slow = MAX(low, ptoa(seg->avail_start));
- shigh = MIN(high, ptoa(seg->avail_end));
-
- try = roundup(slow, alignment);
- for (;; try += alignment) {
- if (try + size > shigh) {
- /*
- * We've run past the allowable range, or
- * the segment. Try another.
- */
- break;
- }
-
- tryidx = idx = atop(try) - seg->start;
- endidx = idx + atop(size);
- pgs = vm_physmem[psi].pgs;
-
- /*
- * Found a suitable starting page. See if the
- * range is free.
- */
-
- for (; idx < endidx; idx++) {
- if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
- break;
- }
- idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
- if (idx == tryidx)
- continue;
-
- /*
- * Check that the region is contiguous
- * (it really should...) and does not
- * cross an alignment boundary.
- */
- lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
- if ((lastidxpa + PAGE_SIZE) != idxpa)
- break;
-
- if (boundary != 0 &&
- ((lastidxpa ^ idxpa) & pagemask) != 0)
- break;
- }
-
- if (idx == endidx) {
- goto found;
- }
- }
- }
-
- /*
- * We could not allocate a contiguous range. This is where
- * we should try harder if nsegs > 1...
- */
- goto out;
-
-#if PGFL_NQUEUES != 2
-#error uvm_pglistalloc needs to be updated
-#endif
-
-found:
- /*
- * we have a chunk of memory that conforms to the requested constraints.
- */
- idx = tryidx;
- while (idx < endidx) {
- m = &pgs[idx];
- free_list = uvm_page_lookup_freelist(m);
- pgflidx = (m->pg_flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
-#ifdef DEBUG
- for (tp = TAILQ_FIRST(&uvm.page_free[
- free_list].pgfl_queues[pgflidx]);
- tp != NULL;
- tp = TAILQ_NEXT(tp, pageq)) {
- if (tp == m)
- break;
- }
- if (tp == NULL)
- panic("uvm_pglistalloc: page not on freelist");
-#endif
- TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
- m, pageq);
- uvmexp.free--;
- if (m->pg_flags & PG_ZERO)
- uvmexp.zeropages--;
- m->uobject = NULL;
- m->uanon = NULL;
- m->pg_version++;
- TAILQ_INSERT_TAIL(rlist, m, pageq);
- idx++;
- STAT_INCR(uvm_pglistalloc_npages);
- }
- error = 0;
-
-out:
- /*
- * check to see if we need to generate some free pages waking
- * the pagedaemon.
- */
-
- if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
- (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
- uvmexp.inactive < uvmexp.inactarg)) {
- wakeup(&uvm.pagedaemon_proc);
- }
-
- uvm_unlock_fpageq();
-
-done:
- /* No locking needed here, pages are not on any queue. */
- if (error == 0) {
- TAILQ_FOREACH(m, rlist, pageq) {
- if (flags & UVM_PLA_ZERO &&
- (m->pg_flags & PG_ZERO) == 0)
- uvm_pagezero(m);
- m->pg_flags = PG_CLEAN;
- }
- }
-
- return (error);
+ if (alignment < PAGE_SIZE)
+ alignment = PAGE_SIZE;
+ low = atop(roundup(low, alignment));
+ /* Allows for overflow: 0xffff + 1 = 0x0000 */
+ if ((high & PAGE_MASK) == PAGE_MASK)
+ high = atop(high) + 1;
+ else
+ high = atop(high);
+ size = atop(round_page(size));
+ alignment = atop(alignment);
+ if (boundary < PAGE_SIZE && boundary != 0)
+ boundary = PAGE_SIZE;
+ boundary = atop(boundary);
+
+ return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs,
+ flags, rlist);
}
/*
@@ -389,14 +126,8 @@ uvm_pglistfree(struct pglist *list)
struct vm_page *m;
UVMHIST_FUNC("uvm_pglistfree"); UVMHIST_CALLED(pghist);
- /*
- * Block all memory allocation and lock the free list.
- */
- uvm_lock_fpageq();
-
- while ((m = TAILQ_FIRST(list)) != NULL) {
+ TAILQ_FOREACH(m, list, pageq) {
KASSERT((m->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
- TAILQ_REMOVE(list, m, pageq);
#ifdef DEBUG
if (m->uobject == (void *)0xdeadbeef &&
m->uanon == (void *)0xdeadbeef) {
@@ -408,15 +139,6 @@ uvm_pglistfree(struct pglist *list)
m->uanon = (void *)0xdeadbeef;
#endif
atomic_clearbits_int(&m->pg_flags, PQ_MASK);
- atomic_setbits_int(&m->pg_flags, PQ_FREE);
- TAILQ_INSERT_TAIL(&uvm.page_free[
- uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN],
- m, pageq);
- uvmexp.free++;
- if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
- uvm.page_idle_zero = vm_page_zero_enable;
- STAT_DECR(uvm_pglistalloc_npages);
}
-
- uvm_unlock_fpageq();
+ uvm_pmr_freepageq(list);
}
diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c
new file mode 100644
index 00000000000..86a0d137a97
--- /dev/null
+++ b/sys/uvm/uvm_pmemrange.c
@@ -0,0 +1,1248 @@
+/* $OpenBSD: uvm_pmemrange.c,v 1.1 2009/06/01 17:42:33 ariane Exp $ */
+
+/*
+ * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <uvm/uvm.h>
+#include <sys/malloc.h>
+
+/*
+ * 2 trees: addr tree and size tree.
+ *
+ * addr tree is vm_page[0].fq.free.tree
+ * size tree is vm_page[1].fq.free.tree
+ *
+ * The size tree is not used for memory ranges of 1 page, instead,
+ * single queue is vm_page[0].pageq
+ *
+ * uvm_page_init guarantees that every vm_physseg contains an array of
+ * struct vm_page. Also, uvm_page_physload allocates an array of struct
+ * vm_page. This code depends on that array.
+ */
+
+/* Tree comparators. */
+int uvm_pmemrange_addr_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
+int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
+int uvm_pmr_addr_cmp(struct vm_page *, struct vm_page *);
+int uvm_pmr_size_cmp(struct vm_page *, struct vm_page *);
+
+/* Memory types. The page flags are used to derive what the current memory
+ * type of a page is. */
+static __inline int
+uvm_pmr_pg_to_memtype(struct vm_page *pg)
+{
+ if (pg->pg_flags & PG_ZERO)
+ return UVM_PMR_MEMTYPE_ZERO;
+ /* Default: dirty memory. */
+ return UVM_PMR_MEMTYPE_DIRTY;
+}
+
+/* Cancel static calls (for profiling). */
+#define static
+#define __inline
+/* Trees. */
+RB_PROTOTYPE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp);
+RB_PROTOTYPE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp);
+RB_PROTOTYPE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
+ uvm_pmemrange_addr_cmp);
+RB_GENERATE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp);
+RB_GENERATE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp);
+RB_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
+ uvm_pmemrange_addr_cmp);
+#undef static
+#undef __inline
+
+/* Validation. */
+#ifdef DEBUG
+void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr);
+#else
+#define uvm_pmr_assertvalid(pmr) do {} while (0)
+#endif
+
+
+int uvm_pmr_get1page(psize_t, int, struct pglist *,
+ paddr_t, paddr_t);
+
+struct uvm_pmemrange *uvm_pmr_allocpmr(void);
+struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
+struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *,
+ struct vm_page *, int);
+void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr,
+ struct vm_page *pg, struct vm_page **pg_prev,
+ struct vm_page **pg_next);
+struct vm_page *uvm_pmr_insert(struct uvm_pmemrange *,
+ struct vm_page *, int);
+void uvm_pmr_remove(struct uvm_pmemrange *,
+ struct vm_page *);
+psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t,
+ struct vm_page **);
+void uvm_pmr_split(paddr_t);
+struct uvm_pmemrange *uvm_pmemrange_find(paddr_t);
+struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *,
+ struct uvm_pmemrange *);
+struct vm_page *uvm_pmr_extract_range(struct uvm_pmemrange *,
+ struct vm_page *, paddr_t, paddr_t,
+ struct pglist *);
+
+/*
+ * Computes num/denom and rounds it up to the next power-of-2.
+ */
+static __inline psize_t
+pow2divide(psize_t num, psize_t denom)
+{
+ int rshift = 0;
+
+ while (num > (denom << rshift))
+ rshift++;
+ return (paddr_t)1 << rshift;
+}
+
+/*
+ * Predicate: lhs is a subrange or rhs.
+ */
+#define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high) \
+ ((lhs_low) >= (rhs_low) && (lhs_high <= rhs_high))
+
+/*
+ * Align to power-of-2 alignment.
+ */
+#define PMR_ALIGN(pgno, align) \
+ (((pgno) + ((align) - 1)) & ~((align) - 1))
+
+
+/*
+ * Comparator: sort by address ascending.
+ */
+int
+uvm_pmemrange_addr_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
+{
+ return lhs->low < rhs->low ? -1 : lhs->low > rhs->low;
+}
+
+/*
+ * Comparator: sort by use ascending.
+ *
+ * The higher the use value of a range, the more devices need memory in
+ * this range. Therefor allocate from the range with the lowest use first.
+ */
+int
+uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
+{
+ int result;
+
+ result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use;
+ if (result == 0)
+ result = uvm_pmemrange_addr_cmp(lhs, rhs);
+ return result;
+}
+
+int
+uvm_pmr_addr_cmp(struct vm_page *lhs, struct vm_page *rhs)
+{
+ paddr_t lhs_addr, rhs_addr;
+
+ lhs_addr = VM_PAGE_TO_PHYS(lhs);
+ rhs_addr = VM_PAGE_TO_PHYS(rhs);
+
+ return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr);
+}
+
+int
+uvm_pmr_size_cmp(struct vm_page *lhs, struct vm_page *rhs)
+{
+ psize_t lhs_size, rhs_size;
+ int cmp;
+
+ /* Using second tree, so we receive pg[1] instead of pg[0]. */
+ lhs_size = (lhs - 1)->fq.free.pages;
+ rhs_size = (rhs - 1)->fq.free.pages;
+
+ cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size);
+ if (cmp == 0)
+ cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1);
+ return cmp;
+}
+
+/*
+ * Find the first range of free pages that is at least sz pages long.
+ */
+struct vm_page *
+uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti)
+{
+ struct vm_page *node, *best;
+
+ KASSERT(sz >= 1);
+
+ if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti]))
+ return TAILQ_FIRST(&pmr->single[mti]);
+
+ node = RB_ROOT(&pmr->size[mti]);
+ best = NULL;
+ while (node != NULL) {
+ if ((node - 1)->fq.free.pages >= sz) {
+ best = (node - 1);
+ node = RB_LEFT(node, fq.free.tree);
+ } else
+ node = RB_RIGHT(node, fq.free.tree);
+ }
+ return best;
+}
+
+/*
+ * Finds the next range. The next range has a size >= pg->fq.free.pages.
+ * Returns NULL if no more ranges are available.
+ */
+struct vm_page *
+uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt)
+{
+ struct vm_page *npg;
+
+ KASSERT(pmr != NULL && pg != NULL);
+ if (pg->fq.free.pages == 1) {
+ if (TAILQ_NEXT(pg, pageq) != NULL)
+ return TAILQ_NEXT(pg, pageq);
+ else
+ npg = RB_MIN(uvm_pmr_size, &pmr->size[mt]);
+ } else
+ npg = RB_NEXT(uvm_pmr_size, &pmr->size[mt], pg + 1);
+
+ return npg == NULL ? NULL : npg - 1;
+}
+
+/*
+ * Finds the previous and next ranges relative to the (uninserted) pg range.
+ *
+ * *pg_prev == NULL if no previous range is available, that can join with
+ * pg.
+ * *pg_next == NULL if no previous range is available, that can join with
+ * pg.
+ */
+void
+uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg,
+ struct vm_page **pg_prev, struct vm_page **pg_next)
+{
+ KASSERT(pg_prev != NULL && pg_next != NULL);
+
+ *pg_next = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg);
+ if (*pg_next == NULL)
+ *pg_prev = RB_MAX(uvm_pmr_addr, &pmr->addr);
+ else
+ *pg_prev = RB_PREV(uvm_pmr_addr, &pmr->addr, *pg_next);
+
+ /* Reset if not contig. */
+ if (*pg_prev != NULL &&
+ (atop(VM_PAGE_TO_PHYS(*pg_prev)) + (*pg_prev)->fq.free.pages
+ != atop(VM_PAGE_TO_PHYS(pg)) ||
+ uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg)))
+ *pg_prev = NULL;
+ if (*pg_next != NULL &&
+ (atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages
+ != atop(VM_PAGE_TO_PHYS(*pg_next)) ||
+ uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg)))
+ *pg_next = NULL;
+ return;
+}
+
+/*
+ * Remove a range from the address tree.
+ * Address tree maintains pmr counters.
+ */
+static __inline void
+uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+ KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg);
+ KASSERT(pg->pg_flags & PQ_FREE);
+ RB_REMOVE(uvm_pmr_addr, &pmr->addr, pg);
+
+ pmr->nsegs--;
+}
+/*
+ * Remove a range from the size tree.
+ */
+static __inline void
+uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+ int memtype;
+#ifdef DEBUG
+ struct vm_page *i;
+#endif
+
+ KASSERT(pg->pg_flags & PQ_FREE);
+ memtype = uvm_pmr_pg_to_memtype(pg);
+
+ if (pg->fq.free.pages == 1) {
+#ifdef DEBUG
+ TAILQ_FOREACH(i, &pmr->single[memtype], pageq) {
+ if (i == pg)
+ break;
+ }
+ KDASSERT(i == pg);
+#endif
+ TAILQ_REMOVE(&pmr->single[memtype], pg, pageq);
+ } else {
+ KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[memtype],
+ pg + 1) == pg + 1);
+ RB_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1);
+ }
+}
+/* Remove from both trees. */
+void
+uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+ uvm_pmr_assertvalid(pmr);
+ uvm_pmr_remove_size(pmr, pg);
+ uvm_pmr_remove_addr(pmr, pg);
+ uvm_pmr_assertvalid(pmr);
+}
+
+/*
+ * Insert the range described in pg.
+ * Returns the range thus created (which may be joined with the previous and
+ * next ranges).
+ * If no_join, the caller guarantees that the range cannot possibly join
+ * with adjecent ranges.
+ */
+static __inline struct vm_page *
+uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
+{
+ struct vm_page *prev, *next;
+
+#ifdef DEBUG
+ struct vm_page *i;
+ int mt;
+
+ for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) {
+ TAILQ_FOREACH(i, &pmr->single[mt], pageq)
+ KDASSERT(i != pg);
+ if (pg->fq.free.pages > 1) {
+ KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mt],
+ pg + 1) == NULL);
+ }
+ KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL);
+ }
+#endif
+
+ KASSERT(pg->pg_flags & PQ_FREE);
+ KASSERT(pg->fq.free.pages >= 1);
+
+ if (!no_join) {
+ uvm_pmr_pnaddr(pmr, pg, &prev, &next);
+ if (next != NULL) {
+ uvm_pmr_remove_size(pmr, next);
+ uvm_pmr_remove_addr(pmr, next);
+ pg->fq.free.pages += next->fq.free.pages;
+ next->fq.free.pages = 0;
+ }
+ if (prev != NULL) {
+ uvm_pmr_remove_size(pmr, prev);
+ prev->fq.free.pages += pg->fq.free.pages;
+ pg->fq.free.pages = 0;
+ return prev;
+ }
+ }
+#ifdef DEBUG
+ else {
+ uvm_pmr_pnaddr(pmr, pg, &prev, &next);
+ KDASSERT(prev == NULL && next == NULL);
+ }
+#endif /* DEBUG */
+
+ RB_INSERT(uvm_pmr_addr, &pmr->addr, pg);
+
+ pmr->nsegs++;
+
+ return pg;
+}
+/*
+ * Insert the range described in pg.
+ * Returns the range thus created (which may be joined with the previous and
+ * next ranges).
+ * Page must already be in the address tree.
+ */
+static __inline void
+uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+ int memtype;
+#ifdef DEBUG
+ struct vm_page *i;
+ int mti;
+#endif
+
+ memtype = uvm_pmr_pg_to_memtype(pg);
+#ifdef DEBUG
+ for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) {
+ TAILQ_FOREACH(i, &pmr->single[mti], pageq)
+ KDASSERT(i != pg);
+ if (pg->fq.free.pages > 1) {
+ KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mti],
+ pg + 1) == NULL);
+ }
+ KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg);
+ }
+ for (i = pg; i < pg + pg->fq.free.pages; i++)
+ KASSERT(uvm_pmr_pg_to_memtype(i) == memtype);
+#endif
+
+ KASSERT(pg->pg_flags & PQ_FREE);
+ KASSERT(pg->fq.free.pages >= 1);
+
+ if (pg->fq.free.pages == 1)
+ TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq);
+ else
+ RB_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1);
+}
+/* Insert in both trees. */
+struct vm_page *
+uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
+{
+ uvm_pmr_assertvalid(pmr);
+ pg = uvm_pmr_insert_addr(pmr, pg, no_join);
+ uvm_pmr_insert_size(pmr, pg);
+ uvm_pmr_assertvalid(pmr);
+ return pg;
+}
+
+/*
+ * Remove the first segment of contiguous pages from pgl.
+ * A segment ends if it crosses boundary (unless boundary = 0) or
+ * if it would enter a different uvm_pmemrange.
+ *
+ * Work: the page range that the caller is currently working with.
+ * May be null.
+ */
+psize_t
+uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary,
+ struct vm_page **work)
+{
+ struct vm_page *pg, *pre_last, *last, *inserted;
+ psize_t count;
+ struct uvm_pmemrange *pmr;
+ paddr_t first_boundary;
+
+ KASSERT(!TAILQ_EMPTY(pgl));
+
+ pg = TAILQ_FIRST(pgl);
+ pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
+ KDASSERT(pmr != NULL);
+ if (boundary != 0) {
+ first_boundary =
+ PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary);
+ } else
+ first_boundary = 0;
+
+ /* Remove all pages in the first segment. */
+ pre_last = pg;
+ last = TAILQ_NEXT(pre_last, pageq);
+ TAILQ_REMOVE(pgl, pre_last, pageq);
+ count = 1;
+ /*
+ * While loop checks the following:
+ * - last != NULL we have not reached the end of pgs
+ * - boundary == 0 || last < first_boundary
+ * we do not cross a boundary
+ * - atop(pre_last) + 1 == atop(last)
+ * still in the same segment
+ * - low <= last
+ * - high > last still testing the same memory range
+ *
+ * At the end of the loop, last points at the next segment
+ * and each page [pg, pre_last] (inclusive range) has been removed
+ * and count is the number of pages that have been removed.
+ */
+ while (last != NULL &&
+ (boundary == 0 || atop(VM_PAGE_TO_PHYS(last)) < first_boundary) &&
+ atop(VM_PAGE_TO_PHYS(pre_last)) + 1 ==
+ atop(VM_PAGE_TO_PHYS(last)) &&
+ pmr->low <= atop(VM_PAGE_TO_PHYS(last)) &&
+ pmr->high > atop(VM_PAGE_TO_PHYS(last))) {
+ count++;
+ pre_last = last;
+ last = TAILQ_NEXT(last, pageq);
+ TAILQ_REMOVE(pgl, pre_last, pageq);
+ }
+ KDASSERT(TAILQ_FIRST(pgl) == last);
+ KDASSERT(pg + (count - 1) == pre_last);
+
+ pg->fq.free.pages = count;
+ inserted = uvm_pmr_insert(pmr, pg, 0);
+
+ if (work != NULL && *work != NULL &&
+ atop(VM_PAGE_TO_PHYS(inserted)) <= atop(VM_PAGE_TO_PHYS(*work)) &&
+ atop(VM_PAGE_TO_PHYS(inserted)) + inserted->fq.free.pages >
+ atop(VM_PAGE_TO_PHYS(*work)))
+ *work = inserted;
+ return count;
+}
+
+/*
+ * Extract a number of pages from a segment of free pages.
+ * Called by uvm_pmr_getpages.
+ *
+ * Returns the segment that was created from pages left over at the tail
+ * of the remove set of pages, or NULL if no pages were left at the tail.
+ */
+struct vm_page *
+uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg,
+ paddr_t start, paddr_t end, struct pglist *result)
+{
+ struct vm_page *after, *pg_i;
+ psize_t before_sz, after_sz;
+#ifdef DEBUG
+ psize_t i;
+#endif
+
+ KASSERT(end > start);
+ KASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)));
+ KASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages);
+ KASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start);
+ KASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages >= end);
+
+ before_sz = start - atop(VM_PAGE_TO_PHYS(pg));
+ after_sz = atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages - end;
+ KDASSERT(before_sz + after_sz + (end - start) == pg->fq.free.pages);
+ uvm_pmr_assertvalid(pmr);
+
+ uvm_pmr_remove_size(pmr, pg);
+ if (before_sz == 0)
+ uvm_pmr_remove_addr(pmr, pg);
+
+ /* Add selected pages to result. */
+ for (pg_i = pg + before_sz; atop(VM_PAGE_TO_PHYS(pg_i)) < end;
+ pg_i++) {
+ pg_i->fq.free.pages = 0;
+ TAILQ_INSERT_TAIL(result, pg_i, pageq);
+ KDASSERT(pg_i->pg_flags & PQ_FREE);
+ }
+
+ /* Before handling. */
+ if (before_sz > 0) {
+ pg->fq.free.pages = before_sz;
+ uvm_pmr_insert_size(pmr, pg);
+ }
+
+ /* After handling. */
+ after = NULL;
+ if (after_sz > 0) {
+ after = pg + before_sz + (end - start);
+#ifdef DEBUG
+ for (i = 0; i < after_sz; i++) {
+ KASSERT(!uvm_pmr_isfree(after + i));
+ }
+#endif
+ KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end);
+ after->fq.free.pages = after_sz;
+ after = uvm_pmr_insert_addr(pmr, after, 1);
+ uvm_pmr_insert_size(pmr, after);
+ }
+
+ uvm_pmr_assertvalid(pmr);
+ return after;
+}
+
+/*
+ * Acquire a number of pages.
+ *
+ * count: the number of pages returned
+ * start: lowest page number
+ * end: highest page number +1
+ * (start = end = 0: no limitation)
+ * align: power-of-2 alignment constraint (align = 1: no alignment)
+ * boundary: power-of-2 boundary (boundary = 0: no boundary)
+ * maxseg: maximum number of segments to return
+ * flags: UVM_PLA_* flags
+ * result: returned pages storage (uses pageq)
+ */
+int
+uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align,
+ paddr_t boundary, int maxseg, int flags, struct pglist *result)
+{
+ struct uvm_pmemrange *pmr; /* Iterate memory ranges. */
+ struct vm_page *found, *f_next; /* Iterate chunks. */
+ psize_t fcount; /* Current found pages. */
+ int fnsegs; /* Current segment counter. */
+ int try, start_try;
+ psize_t search[2];
+ paddr_t fstart, fend; /* Pages to be taken from found. */
+ int memtype; /* Requested memtype. */
+ int desperate; /* True if allocation failed. */
+
+ /* Validate arguments. */
+ KASSERT(count > 0);
+ KASSERT((start == 0 && end == 0) || (start < end));
+ KASSERT(align >= 1 && powerof2(align));
+ KASSERT(maxseg > 0);
+ KASSERT(boundary == 0 || powerof2(boundary));
+ KDASSERT(boundary == 0 || maxseg * boundary >= count);
+ KASSERT(TAILQ_EMPTY(result));
+
+ /* Configure search. If start_try == 0, search[0] should be faster
+ * (because it will have to throw away less segments).
+ * search[1] is the worst case: start searching at the smallest
+ * possible range instead of starting at the range most likely to
+ * fulfill the allocation. */
+ start_try = 0;
+ search[0] = (flags & UVM_PLA_TRY_CONTIG ? count :
+ pow2divide(count, maxseg));
+ search[1] = 1;
+ if (maxseg == 1) {
+ start_try = 1;
+ search[1] = count;
+ } else if (search[1] >= search[0])
+ start_try = 1;
+
+ReTry: /* Return point after sleeping. */
+ fcount = 0;
+ fnsegs = 0;
+
+ /* Memory type: if zeroed memory is requested, traverse the zero set.
+ * Otherwise, traverse the dirty set. */
+ if (flags & UVM_PLA_ZERO)
+ memtype = UVM_PMR_MEMTYPE_ZERO;
+ else
+ memtype = UVM_PMR_MEMTYPE_DIRTY;
+ desperate = 0;
+
+ uvm_lock_fpageq();
+
+ReTryDesperate:
+ /*
+ * If we just want any page(s), go for the really fast option.
+ */
+ if (count <= maxseg && align == 1 && boundary == 0 &&
+ (flags & UVM_PLA_TRY_CONTIG) == 0) {
+ if (!desperate) {
+ KASSERT(fcount == 0);
+ fcount += uvm_pmr_get1page(count, memtype, result,
+ start, end);
+ } else {
+ for (memtype = 0; memtype < UVM_PMR_MEMTYPE_MAX &&
+ fcount < count; memtype++) {
+ fcount += uvm_pmr_get1page(count - fcount,
+ memtype, result, start, end);
+ }
+ }
+
+ if (fcount == count)
+ goto Out;
+ else
+ goto Fail;
+ }
+
+ TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
+ /* Empty range. */
+ if (pmr->nsegs == 0)
+ continue;
+
+ /* Outside requested range. */
+ if (!(start == 0 && end == 0) &&
+ !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end))
+ continue;
+
+ try = start_try;
+ReScan: /* Return point at try++. */
+
+ for (found = uvm_pmr_nfindsz(pmr, search[try], memtype);
+ found != NULL;
+ found = f_next) {
+ f_next = uvm_pmr_nextsz(pmr, found, memtype);
+
+ fstart = atop(VM_PAGE_TO_PHYS(found));
+DrainFound:
+ /* Throw away the first segment if fnsegs == maxseg */
+ if (fnsegs == maxseg) {
+ fnsegs--;
+ fcount -=
+ uvm_pmr_remove_1strange(result, boundary,
+ &found);
+ }
+
+ fstart = PMR_ALIGN(fstart, align);
+ fend = atop(VM_PAGE_TO_PHYS(found)) +
+ found->fq.free.pages;
+ if (fstart >= fend)
+ continue;
+ if (boundary != 0) {
+ fend =
+ MIN(fend, PMR_ALIGN(fstart + 1, boundary));
+ }
+ if (fend - fstart > count - fcount)
+ fend = fstart + (count - fcount);
+
+ fcount += fend - fstart;
+ fnsegs++;
+ found = uvm_pmr_extract_range(pmr, found,
+ fstart, fend, result);
+
+ if (fcount == count)
+ goto Out;
+
+ /* If there's still space left in found, try to
+ * fully drain it prior to continueing. */
+ if (found != NULL) {
+ fstart = fend;
+ goto DrainFound;
+ }
+ }
+
+ if (++try < nitems(search))
+ goto ReScan;
+ }
+
+ /*
+ * Not enough memory of the requested type available. Fall back to
+ * less good memory that we'll clean up better later.
+ *
+ * This algorithm is not very smart though, it just starts scanning
+ * a different typed range, but the nicer ranges of the previous
+ * iteration may fall out.
+ */
+ if (!desperate) {
+ desperate = 1;
+ memtype = 0;
+ goto ReTryDesperate;
+ } else if (++memtype < UVM_PMR_MEMTYPE_MAX)
+ goto ReTryDesperate;
+
+Fail:
+ /*
+ * Allocation failed.
+ */
+
+ /* XXX: claim from memory reserve here */
+
+ while (!TAILQ_EMPTY(result))
+ uvm_pmr_remove_1strange(result, 0, NULL);
+ uvm_unlock_fpageq();
+
+ if (flags & UVM_PLA_WAITOK) {
+ uvm_wait("uvm_pmr_getpages");
+ goto ReTry;
+ } else
+ wakeup(&uvm.pagedaemon_proc);
+
+ return ENOMEM;
+
+Out:
+
+ /*
+ * Allocation succesful.
+ */
+
+ uvmexp.free -= fcount;
+
+ uvm_unlock_fpageq();
+
+ /* Update statistics and zero pages if UVM_PLA_ZERO. */
+ TAILQ_FOREACH(found, result, pageq) {
+ if (found->pg_flags & PG_ZERO) {
+ uvmexp.zeropages--;
+ }
+ if (flags & UVM_PLA_ZERO) {
+ if (found->pg_flags & PG_ZERO)
+ uvmexp.pga_zerohit++;
+ else {
+ uvmexp.pga_zeromiss++;
+ uvm_pagezero(found);
+ }
+ }
+ atomic_clearbits_int(&found->pg_flags, PG_ZERO | PQ_FREE);
+
+ found->uobject = NULL;
+ found->uanon = NULL;
+ found->pg_version++;
+ }
+
+ return 0;
+}
+
+/*
+ * Free a number of contig pages (invoked by uvm_page_init).
+ */
+void
+uvm_pmr_freepages(struct vm_page *pg, psize_t count)
+{
+ struct uvm_pmemrange *pmr;
+ psize_t i, pmr_count;
+
+ uvm_lock_fpageq();
+
+ for (i = 0; i < count; i++) {
+ atomic_clearbits_int(&pg[i].pg_flags, pg[i].pg_flags);
+ atomic_setbits_int(&pg[i].pg_flags, PQ_FREE);
+ }
+
+ while (count > 0) {
+ pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
+ KASSERT(pmr != NULL);
+
+ pmr_count = MIN(count, pmr->high - atop(VM_PAGE_TO_PHYS(pg)));
+ pg->fq.free.pages = pmr_count;
+ uvm_pmr_insert(pmr, pg, 0);
+
+ uvmexp.free += pmr_count;
+ count -= pmr_count;
+ pg += pmr_count;
+ }
+ wakeup(&uvmexp.free);
+
+ uvm_unlock_fpageq();
+}
+
+/*
+ * Free all pages in the queue.
+ */
+void
+uvm_pmr_freepageq(struct pglist *pgl)
+{
+ struct vm_page *pg;
+
+ TAILQ_FOREACH(pg, pgl, pageq) {
+ atomic_clearbits_int(&pg->pg_flags, pg->pg_flags);
+ atomic_setbits_int(&pg->pg_flags, PQ_FREE);
+ }
+
+ uvm_lock_fpageq();
+ while (!TAILQ_EMPTY(pgl))
+ uvmexp.free += uvm_pmr_remove_1strange(pgl, 0, NULL);
+ wakeup(&uvmexp.free);
+ uvm_unlock_fpageq();
+
+ return;
+}
+
+/*
+ * Store a pmemrange in the list.
+ *
+ * The list is sorted by use.
+ */
+struct uvm_pmemrange *
+uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq,
+ struct uvm_pmemrange *pmr)
+{
+ struct uvm_pmemrange *iter;
+ int cmp = 1;
+
+ TAILQ_FOREACH(iter, useq, pmr_use) {
+ cmp = uvm_pmemrange_use_cmp(pmr, iter);
+ if (cmp == 0)
+ return iter;
+ if (cmp == -1)
+ break;
+ }
+ if (cmp == 0)
+ return iter;
+
+ if (iter == NULL)
+ TAILQ_INSERT_TAIL(useq, pmr, pmr_use);
+ else
+ TAILQ_INSERT_BEFORE(iter, pmr, pmr_use);
+ return NULL;
+}
+
+#ifdef DEBUG
+/*
+ * Validation of the whole pmemrange.
+ * Called with fpageq locked.
+ */
+void
+uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)
+{
+ struct vm_page *prev, *next, *i, *xref;
+ int lcv, mti;
+
+ /* Validate address tree. */
+ RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) {
+ /* Validate the range. */
+ KASSERT(i->fq.free.pages > 0);
+ KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low);
+ KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fq.free.pages
+ <= pmr->high);
+
+ /* Validate each page in this range. */
+ for (lcv = 0; lcv < i->fq.free.pages; lcv++) {
+ KASSERT(lcv == 0 || i[lcv].fq.free.pages == 0);
+ /* Flag check:
+ * - PG_ZERO: page is zeroed.
+ * - PQ_FREE: page is free.
+ * Any other flag is a mistake. */
+ if (i[lcv].pg_flags !=
+ (i[lcv].pg_flags & (PG_ZERO | PQ_FREE))) {
+ panic("i[%lu].pg_flags = %x, should be %x\n",
+ lcv, i[lcv].pg_flags, PG_ZERO | PQ_FREE);
+ }
+ /* Free pages are:
+ * - not wired
+ * - not loaned
+ * - have no vm_anon
+ * - have no uvm_object */
+ KASSERT(i[lcv].wire_count == 0);
+ KASSERT(i[lcv].loan_count == 0);
+ KASSERT(i[lcv].uanon == NULL);
+ KASSERT(i[lcv].uobject == NULL);
+ /* Pages in a single range always have the same
+ * memtype. */
+ KASSERT(uvm_pmr_pg_to_memtype(&i[0]) ==
+ uvm_pmr_pg_to_memtype(&i[lcv]));
+ }
+
+ /* Check that it shouldn't be joined with its predecessor. */
+ prev = RB_PREV(uvm_pmr_addr, &pmr->addr, i);
+ if (prev != NULL) {
+ KASSERT(uvm_pmr_pg_to_memtype(&i[0]) !=
+ uvm_pmr_pg_to_memtype(&i[lcv]) ||
+ atop(VM_PAGE_TO_PHYS(i)) >
+ atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages);
+ }
+
+ /* Assert i is in the size tree as well. */
+ if (i->fq.free.pages == 1) {
+ TAILQ_FOREACH(xref,
+ &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq) {
+ if (xref == i)
+ break;
+ }
+ KASSERT(xref == i);
+ } else {
+ KASSERT(RB_FIND(uvm_pmr_size,
+ &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) ==
+ i + 1);
+ }
+ }
+
+ /* Validate size tree. */
+ for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) {
+ for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL; i = next) {
+ next = uvm_pmr_nextsz(pmr, i, mti);
+ if (next != NULL) {
+ KASSERT(i->fq.free.pages <=
+ next->fq.free.pages);
+ }
+
+ /* Assert i is in the addr tree as well. */
+ KASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, i) == i);
+
+ /* Assert i is of the correct memory type. */
+ KASSERT(uvm_pmr_pg_to_memtype(i) == mti);
+ }
+ }
+
+ /* Validate nsegs statistic. */
+ lcv = 0;
+ RB_FOREACH(i, uvm_pmr_addr, &pmr->addr)
+ lcv++;
+ KASSERT(pmr->nsegs == lcv);
+}
+#endif /* DEBUG */
+
+/*
+ * Split pmr at split point pageno.
+ * Called with fpageq unlocked.
+ *
+ * Split is only applied if a pmemrange spans pageno.
+ */
+void
+uvm_pmr_split(paddr_t pageno)
+{
+ struct uvm_pmemrange *pmr, *drain;
+ struct vm_page *rebuild, *prev, *next;
+ psize_t prev_sz;
+
+ uvm_lock_fpageq();
+ pmr = uvm_pmemrange_find(pageno);
+ if (pmr == NULL || !(pmr->low < pageno)) {
+ /* No split required. */
+ uvm_unlock_fpageq();
+ return;
+ }
+
+ KASSERT(pmr->low < pageno);
+ KASSERT(pmr->high > pageno);
+
+ drain = uvm_pmr_allocpmr();
+ drain->low = pageno;
+ drain->high = pmr->high;
+ drain->use = pmr->use;
+
+ uvm_pmr_assertvalid(pmr);
+ uvm_pmr_assertvalid(drain);
+ KASSERT(drain->nsegs == 0);
+
+ RB_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr) {
+ if (atop(VM_PAGE_TO_PHYS(rebuild)) >= pageno)
+ break;
+ }
+ if (rebuild == NULL)
+ prev = RB_MAX(uvm_pmr_addr, &pmr->addr);
+ else
+ prev = RB_PREV(uvm_pmr_addr, &pmr->addr, rebuild);
+ KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno);
+
+ /*
+ * Handle free chunk that spans the split point.
+ */
+ if (prev != NULL &&
+ atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages > pageno) {
+ psize_t before, after;
+
+ KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno);
+
+ uvm_pmr_remove(pmr, prev);
+ prev_sz = prev->fq.free.pages;
+ before = pageno - atop(VM_PAGE_TO_PHYS(prev));
+ after = atop(VM_PAGE_TO_PHYS(prev)) + prev_sz - pageno;
+
+ KASSERT(before > 0);
+ KASSERT(after > 0);
+
+ prev->fq.free.pages = before;
+ uvm_pmr_insert(pmr, prev, 1);
+ (prev + before)->fq.free.pages = after;
+ uvm_pmr_insert(drain, prev + before, 1);
+ }
+
+ /*
+ * Move free chunks that no longer fall in the range.
+ */
+ for (; rebuild != NULL; rebuild = next) {
+ next = RB_NEXT(uvm_pmr_addr, &pmr->addr, rebuild);
+
+ uvm_pmr_remove(pmr, rebuild);
+ uvm_pmr_insert(drain, rebuild, 1);
+ }
+
+ pmr->high = pageno;
+ uvm_pmr_assertvalid(pmr);
+ uvm_pmr_assertvalid(drain);
+
+ RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain);
+ uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain);
+ uvm_unlock_fpageq();
+}
+
+/*
+ * Increase the usage counter for the given range of memory.
+ *
+ * The more usage counters a given range of memory has, the more will be
+ * attempted not to allocate from it.
+ *
+ * Addresses here are in paddr_t, not page-numbers.
+ * The lowest and highest allowed address are specified.
+ */
+void
+uvm_pmr_use_inc(paddr_t low, paddr_t high)
+{
+ struct uvm_pmemrange *pmr;
+
+ /*
+ * If high+1 == 0, then you are increasing use of the whole address
+ * space, which won't make any difference. Skip in that case.
+ */
+ high++;
+ if (high == 0)
+ return;
+
+ /*
+ * pmr uses page numbers, translate low and high.
+ */
+ low = atop(round_page(low));
+ high = atop(trunc_page(high));
+ uvm_pmr_split(low);
+ uvm_pmr_split(high);
+
+ uvm_lock_fpageq();
+
+ /* Increase use count on segments in range. */
+ RB_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr) {
+ if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)) {
+ TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use);
+ pmr->use++;
+ uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr);
+ }
+ uvm_pmr_assertvalid(pmr);
+ }
+
+ uvm_unlock_fpageq();
+}
+
+/*
+ * Allocate a pmemrange.
+ *
+ * If called from uvm_page_init, the uvm_pageboot_alloc is used.
+ * If called after uvm_init, malloc is used.
+ * (And if called in between, you're dead.)
+ */
+struct uvm_pmemrange *
+uvm_pmr_allocpmr()
+{
+ struct uvm_pmemrange *nw;
+ int i;
+
+ if (!uvm.page_init_done) {
+ nw = (struct uvm_pmemrange *)
+ uvm_pageboot_alloc(sizeof(struct uvm_pmemrange));
+ bzero(nw, sizeof(struct uvm_pmemrange));
+ } else {
+ nw = malloc(sizeof(struct uvm_pmemrange),
+ M_VMMAP, M_NOWAIT | M_ZERO);
+ }
+ RB_INIT(&nw->addr);
+ for (i = 0; i < UVM_PMR_MEMTYPE_MAX; i++) {
+ RB_INIT(&nw->size[i]);
+ TAILQ_INIT(&nw->single[i]);
+ }
+ return nw;
+}
+
+static const struct uvm_io_ranges uvm_io_ranges[] = UVM_IO_RANGES;
+
+/*
+ * Initialization of pmr.
+ * Called by uvm_page_init.
+ *
+ * Sets up pmemranges that maps the vm_physmem data.
+ */
+void
+uvm_pmr_init(void)
+{
+ struct uvm_pmemrange *new_pmr;
+ int i;
+
+ TAILQ_INIT(&uvm.pmr_control.use);
+ RB_INIT(&uvm.pmr_control.addr);
+
+ for (i = 0 ; i < vm_nphysseg ; i++) {
+ new_pmr = uvm_pmr_allocpmr();
+
+ new_pmr->low = vm_physmem[i].start;
+ new_pmr->high = vm_physmem[i].end;
+
+ RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr);
+ uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr);
+ }
+
+ for (i = 0; i < nitems(uvm_io_ranges); i++)
+ uvm_pmr_use_inc(uvm_io_ranges[i].low, uvm_io_ranges[i].high);
+}
+
+/*
+ * Find the pmemrange that contains the given page number.
+ *
+ * (Manually traverses the binary tree, because that is cheaper on stack
+ * usage.)
+ */
+struct uvm_pmemrange *
+uvm_pmemrange_find(paddr_t pageno)
+{
+ struct uvm_pmemrange *pmr;
+
+ pmr = RB_ROOT(&uvm.pmr_control.addr);
+ while (pmr != NULL) {
+ if (pmr->low > pageno)
+ pmr = RB_LEFT(pmr, pmr_addr);
+ else if (pmr->high <= pageno)
+ pmr = RB_RIGHT(pmr, pmr_addr);
+ else
+ break;
+ }
+
+ return pmr;
+}
+
+#if defined(DDB) || defined(DEBUG)
+/*
+ * Return true if the given page is in any of the free lists.
+ * Used by uvm_page_printit.
+ * This function is safe, even if the page is not on the freeq.
+ * Note: does not apply locking, only called from ddb.
+ */
+int
+uvm_pmr_isfree(struct vm_page *pg)
+{
+ struct vm_page *r;
+ struct uvm_pmemrange *pmr;
+
+ pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
+ if (pmr == NULL)
+ return 0;
+ r = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg);
+ if (r == NULL)
+ r = RB_MAX(uvm_pmr_addr, &pmr->addr);
+ else
+ r = RB_PREV(uvm_pmr_addr, &pmr->addr, r);
+ if (r == NULL)
+ return 0; /* Empty tree. */
+
+ KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg)));
+ return atop(VM_PAGE_TO_PHYS(r)) + r->fq.free.pages >
+ atop(VM_PAGE_TO_PHYS(pg));
+}
+#endif /* DEBUG */
+
+/*
+ * Allocate any page, the fastest way. No constraints.
+ */
+int
+uvm_pmr_get1page(psize_t count, int memtype, struct pglist *result,
+ paddr_t start, paddr_t end)
+{
+ struct uvm_pmemrange *pmr;
+ struct vm_page *found;
+ psize_t fcount;
+
+ fcount = 0;
+ pmr = TAILQ_FIRST(&uvm.pmr_control.use);
+ while (pmr != NULL && fcount != count) {
+ /* Outside requested range. */
+ if (!(start == 0 && end == 0) &&
+ !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end)) {
+ pmr = TAILQ_NEXT(pmr, pmr_use);
+ continue;
+ }
+
+ found = TAILQ_FIRST(&pmr->single[memtype]);
+ if (found == NULL) {
+ found = RB_ROOT(&pmr->size[memtype]);
+ /* Size tree gives pg[1] instead of pg[0] */
+ if (found != NULL)
+ found--;
+ }
+ if (found == NULL) {
+ pmr = TAILQ_NEXT(pmr, pmr_use);
+ continue;
+ }
+
+ uvm_pmr_assertvalid(pmr);
+ uvm_pmr_remove_size(pmr, found);
+ while (found->fq.free.pages > 0 && fcount < count) {
+ found->fq.free.pages--;
+ fcount++;
+ TAILQ_INSERT_HEAD(result,
+ &found[found->fq.free.pages], pageq);
+ }
+ if (found->fq.free.pages > 0) {
+ uvm_pmr_insert_size(pmr, found);
+ KASSERT(fcount == count);
+ uvm_pmr_assertvalid(pmr);
+ return fcount;
+ } else
+ uvm_pmr_remove_addr(pmr, found);
+ uvm_pmr_assertvalid(pmr);
+ }
+
+ /* Ran out of ranges before enough pages were gathered. */
+ return fcount;
+}
diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h
new file mode 100644
index 00000000000..90219dc075b
--- /dev/null
+++ b/sys/uvm/uvm_pmemrange.h
@@ -0,0 +1,83 @@
+/* $OpenBSD: uvm_pmemrange.h,v 1.1 2009/06/01 17:42:33 ariane Exp $ */
+
+/*
+ * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * uvm_pmemrange.h: describe and manage free physical memory.
+ */
+
+#ifndef _UVM_UVM_PMEMRANGE_H_
+#define _UVM_UVM_PMEMRANGE_H_
+
+#include <uvm/uvm_extern.h>
+#include <uvm/uvm_page.h>
+
+RB_HEAD(uvm_pmr_addr, vm_page);
+RB_HEAD(uvm_pmr_size, vm_page);
+
+/*
+ * Page types available:
+ * - DIRTY: this page may contain random data.
+ * - ZERO: this page has been zeroed.
+ */
+#define UVM_PMR_MEMTYPE_DIRTY 1
+#define UVM_PMR_MEMTYPE_ZERO 1
+#define UVM_PMR_MEMTYPE_MAX 2
+
+/*
+ * An address range of memory.
+ */
+struct uvm_pmemrange {
+ struct uvm_pmr_addr addr; /* Free page chunks, sorted by addr. */
+ struct uvm_pmr_size size[UVM_PMR_MEMTYPE_MAX];
+ /* Free page chunks, sorted by size. */
+ TAILQ_HEAD(, vm_page) single[UVM_PMR_MEMTYPE_MAX];
+ /* single page regions (uses pageq) */
+
+ paddr_t low; /* Start of address range (pgno). */
+ paddr_t high; /* End +1 (pgno). */
+ int use; /* Use counter. */
+ int nsegs; /* Current range count. */
+
+ TAILQ_ENTRY(uvm_pmemrange) pmr_use;
+ /* pmr, sorted by use */
+ RB_ENTRY(uvm_pmemrange) pmr_addr;
+ /* pmr, sorted by address */
+};
+
+RB_HEAD(uvm_pmemrange_addr, uvm_pmemrange);
+TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange);
+
+/*
+ * pmr control structure. Contained in uvm.pmr_control.
+ */
+struct uvm_pmr_control {
+ struct uvm_pmemrange_addr addr;
+ struct uvm_pmemrange_use use;
+};
+
+void uvm_pmr_freepages(struct vm_page *, psize_t);
+void uvm_pmr_freepageq(struct pglist *pgl);
+int uvm_pmr_getpages(psize_t, paddr_t, paddr_t, paddr_t, paddr_t,
+ int, int, struct pglist *);
+void uvm_pmr_init(void);
+
+#ifdef DDB
+int uvm_pmr_isfree(struct vm_page *pg);
+#endif
+
+#endif /* _UVM_UVM_PMEMRANGE_H_ */
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index 0ea3b01d07a..2e4870b745d 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_vnode.c,v 1.58 2009/05/23 14:06:37 oga Exp $ */
+/* $OpenBSD: uvm_vnode.c,v 1.59 2009/06/01 17:42:33 ariane Exp $ */
/* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */
/*
@@ -561,7 +561,7 @@ uvm_vnp_terminate(struct vnode *vp)
while (uvn->u_obj.uo_npages) {
#ifdef DEBUG
struct vm_page *pp;
- TAILQ_FOREACH(pp, &uvn->u_obj.memq, listq) {
+ TAILQ_FOREACH(pp, &uvn->u_obj.memq, fq.queues.listq) {
if ((pp->pg_flags & PG_BUSY) == 0)
panic("uvm_vnp_terminate: detected unbusy pg");
}
diff --git a/sys/xfs/xfs_vnodeops-bsd.c b/sys/xfs/xfs_vnodeops-bsd.c
index ed74d6d478d..3256b1c0160 100644
--- a/sys/xfs/xfs_vnodeops-bsd.c
+++ b/sys/xfs/xfs_vnodeops-bsd.c
@@ -1119,7 +1119,7 @@ xfs_putpages (struct vop_putpages_args *ap)
while (pg && !dirty) {
dirty = pmap_is_modified(pg) || (pg->flags & PG_CLEAN) == 0;
- pg = TAILQ_NEXT(pg, listq);
+ pg = TAILQ_NEXT(pg, fq.queues.listq);
}
if (dirty)