16 files changed, 1503 insertions, 464 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 4bd4ba51f9f..fb46e417f84 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmap.c,v 1.42 2009/05/28 09:05:33 art Exp $	*/
+/*	$OpenBSD: pmap.c,v 1.43 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $	*/
 
 /*
@@ -834,7 +834,7 @@ pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level,
 		pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
 	ptp->wire_count = 0;
 	uvm_pagerealloc(ptp, NULL, 0);
-	TAILQ_INSERT_TAIL(pagelist, ptp, listq);
+	TAILQ_INSERT_TAIL(pagelist, ptp, fq.queues.listq);
 }
 
 void
@@ -1545,7 +1545,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
 		PMAP_MAP_TO_HEAD_UNLOCK();
 
 		while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-			TAILQ_REMOVE(&empty_ptps, ptp, listq);
+			TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 			uvm_pagefree(ptp);
                 }
 
@@ -1617,7 +1617,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
 	PMAP_MAP_TO_HEAD_UNLOCK();
 
 	while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-		TAILQ_REMOVE(&empty_ptps, ptp, listq);
+		TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 		uvm_pagefree(ptp);
 	}
 }
@@ -1690,7 +1690,7 @@ pmap_page_remove(struct vm_page *pg)
 	pmap_tlb_shootwait();
 
 	while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-		TAILQ_REMOVE(&empty_ptps, ptp, listq);
+		TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 		uvm_pagefree(ptp);
 	}
 }
diff --git a/sys/arch/amd64/include/vmparam.h b/sys/arch/amd64/include/vmparam.h
index fd82b226db2..d3c5c9dd102 100644
--- a/sys/arch/amd64/include/vmparam.h
+++ b/sys/arch/amd64/include/vmparam.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vmparam.h,v 1.10 2008/07/18 16:40:17 kurt Exp $	*/
+/*	$OpenBSD: vmparam.h,v 1.11 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: vmparam.h,v 1.1 2003/04/26 18:39:49 fvdl Exp $	*/
 
 /*-
@@ -112,6 +112,13 @@
 #define	VM_FREELIST_LOW	1
 #define	VM_FREELIST_HIGH	2
 
+/* reserve ISA-DMA and 32-bit DMA memory */
+#define UVM_IO_RANGES							\
+	{								\
+		{ 0, 0x00ffffffUL },					\
+		{ 0, 0xffffffffUL },					\
+	}
+
 #define __HAVE_VM_PAGE_MD
 struct pv_entry;
 struct vm_page_md {
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 753298eb42a..91fd6edb555 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmap.c,v 1.136 2009/02/05 01:13:21 oga Exp $	*/
+/*	$OpenBSD: pmap.c,v 1.137 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $	*/
 
 /*
@@ -2074,7 +2074,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
 		/* If PTP is no longer being used, free it. */
 		if (ptp && ptp->wire_count <= 1) {
 			pmap_drop_ptp(pmap, va, ptp, ptes);
-			TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
+			TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq);
 		}
 
 		if (!shootall)
@@ -2088,7 +2088,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
 	pmap_unmap_ptes(pmap);
 	PMAP_MAP_TO_HEAD_UNLOCK();
 	while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-		TAILQ_REMOVE(&empty_ptps, ptp, listq);
+		TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 		uvm_pagefree(ptp);
 	}
 }
@@ -2145,7 +2145,8 @@ pmap_page_remove(struct vm_page *pg)
 		if (pve->pv_ptp && --pve->pv_ptp->wire_count <= 1) {
 			pmap_drop_ptp(pve->pv_pmap, pve->pv_va,
 			    pve->pv_ptp, ptes);
-			TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, listq);
+			TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
+			    fq.queues.listq);
 		}
 
 		pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
@@ -2158,7 +2159,7 @@ pmap_page_remove(struct vm_page *pg)
 	pmap_tlb_shootwait();
 
 	while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-		TAILQ_REMOVE(&empty_ptps, ptp, listq);
+		TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 		uvm_pagefree(ptp);
 	}
 }
diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c
index 683d282379a..b13ff7c9463 100644
--- a/sys/arch/i386/i386/pmapae.c
+++ b/sys/arch/i386/i386/pmapae.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmapae.c,v 1.15 2009/01/27 22:14:13 miod Exp $	*/
+/*	$OpenBSD: pmapae.c,v 1.16 2009/06/01 17:42:33 ariane Exp $	*/
 
 /*
  * Copyright (c) 2006 Michael Shalayeff
@@ -1453,14 +1453,15 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 				ptp->wire_count = 0;
 				/* Postpone free to after shootdown. */
 				uvm_pagerealloc(ptp, NULL, 0);
-				TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
+				TAILQ_INSERT_TAIL(&empty_ptps, ptp,
+				    fq.queues.listq);
 			}
 		}
 		pmap_tlb_shootnow(cpumask);
 		pmap_unmap_ptes_pae(pmap);		/* unlock pmap */
 		PMAP_MAP_TO_HEAD_UNLOCK();
 		while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-			TAILQ_REMOVE(&empty_ptps, ptp, listq);
+			TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 			uvm_pagefree(ptp);
 		}
 		return;
@@ -1546,7 +1547,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 			ptp->wire_count = 0;
 			/* Postpone free to after shootdown. */
 			uvm_pagerealloc(ptp, NULL, 0);
-			TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
+			TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq);
 		}
 	}
 
@@ -1554,7 +1555,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 	pmap_unmap_ptes_pae(pmap);
 	PMAP_MAP_TO_HEAD_UNLOCK();
 	while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-		TAILQ_REMOVE(&empty_ptps, ptp, listq);
+		TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 		uvm_pagefree(ptp);
 	}
 }
@@ -1665,7 +1666,7 @@ pmap_page_remove_pae(struct vm_page *pg)
 				/* Postpone free to after shootdown. */
 				uvm_pagerealloc(pve->pv_ptp, NULL, 0);
 				TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
-				    listq);
+				    fq.queues.listq);
 			}
 		}
 		pmap_unmap_ptes_pae(pve->pv_pmap);	/* unlocks pmap */
@@ -1676,7 +1677,7 @@ pmap_page_remove_pae(struct vm_page *pg)
 	PMAP_HEAD_TO_MAP_UNLOCK();
 	pmap_tlb_shootnow(cpumask);
 	while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
-		TAILQ_REMOVE(&empty_ptps, ptp, listq);
+		TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
 		uvm_pagefree(ptp);
 	}
 }
diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h
index 9c2163e6c2d..41e95b3f418 100644
--- a/sys/arch/i386/include/vmparam.h
+++ b/sys/arch/i386/include/vmparam.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vmparam.h,v 1.41 2008/07/18 16:40:17 kurt Exp $	*/
+/*	$OpenBSD: vmparam.h,v 1.42 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $	*/
 
 /*-
@@ -118,6 +118,13 @@
 #define VM_FREELIST_DEFAULT	0
 #define VM_FREELIST_FIRST16	1
 
+/* reserve ISA-DMA and 32-bit DMA memory */
+#define UVM_IO_RANGES							\
+	{								\
+		{ 0, 0x00ffffffUL },					\
+		{ 0, 0xffffffffUL },					\
+	}
+
 #define __HAVE_VM_PAGE_MD
 struct pv_entry;
 struct vm_page_md {
diff --git a/sys/conf/files b/sys/conf/files
index 38d4e2014c2..826593c211d 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-#	$OpenBSD: files,v 1.455 2009/05/06 18:21:23 stevesk Exp $
+#	$OpenBSD: files,v 1.456 2009/06/01 17:42:33 ariane Exp $
 #	$NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
 
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
@@ -971,6 +971,7 @@ file uvm/uvm_page.c
 file uvm/uvm_pager.c
 file uvm/uvm_pdaemon.c
 file uvm/uvm_pglist.c
+file uvm/uvm_pmemrange.c
 file uvm/uvm_stat.c
 file uvm/uvm_swap.c
 file uvm/uvm_swap_encrypt.c		uvm_swap_encrypt
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 3efcf89044c..087add79376 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm.h,v 1.32 2009/05/04 18:08:06 oga Exp $	*/
+/*	$OpenBSD: uvm.h,v 1.33 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $	*/
 
 /*
@@ -57,6 +57,7 @@
 #include <uvm/uvm_page.h>
 #include <uvm/uvm_pager.h>
 #include <uvm/uvm_pdaemon.h>
+#include <uvm/uvm_pmemrange.h>
 #include <uvm/uvm_swap.h>
 #ifdef UVM_SWAP_ENCRYPT
 #include <uvm/uvm_swap_encrypt.h>
@@ -68,6 +69,32 @@
 #include <machine/vmparam.h>
 
 /*
+ * UVM_IO_RANGES: paddr_t pairs, describing the lowest and highest address
+ * that should be reserved. These ranges (which may overlap) will have their
+ * use counter increased, causing them to be avoided if an allocation can be
+ * satisfied from another range of memory.
+ *
+ * IO ranges need not overlap with physmem ranges: the uvm code splits ranges
+ * on demand to satisfy requests.
+ *
+ * UVM_IO_RANGES specified here actually translates into a call to
+ * uvm_pmr_use_inc() at uvm initialization time. uvm_pmr_use_inc() can also
+ * be called after uvm_init() has completed.
+ *
+ * Note: the upper bound is specified in the same way as to uvm_pglistalloc.
+ * Ex: a memory range of 16 bit is specified as: { 0, 0xffff }.
+ */
+#ifndef UVM_IO_RANGES
+#define UVM_IO_RANGES		{}
+#endif
+
+/* UVM IO ranges are described in an array of uvm_io_ranges. */
+struct uvm_io_ranges {
+	paddr_t low;
+	paddr_t high;
+};
+
+/*
  * uvm structure (vm global state: collected in one structure for ease
  * of reference...)
  */
@@ -76,7 +103,7 @@ struct uvm {
 	/* vm_page related parameters */
 
 		/* vm_page queues */
-	struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
+	struct uvm_pmr_control pmr_control; /* pmemrange control data */
 	struct pglist page_active;	/* allocated pages, in use */
 	struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */
 	struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 5ff1e2ddad2..bc6a766590a 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_extern.h,v 1.76 2009/04/20 00:30:18 oga Exp $	*/
+/*	$OpenBSD: uvm_extern.h,v 1.77 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $	*/
 
 /*
@@ -221,11 +221,12 @@ typedef int		vm_prot_t;
 #define	UVM_PGA_ZERO		0x0002	/* returned page must be zeroed */
 
 /*
- * flags for uvm_pglistalloc()
+ * flags for uvm_pglistalloc() and uvm_pmr_getpages()
  */
 #define UVM_PLA_WAITOK		0x0001	/* may sleep */
 #define UVM_PLA_NOWAIT		0x0002	/* can't sleep (need one of the two) */
 #define UVM_PLA_ZERO		0x0004	/* zero all pages before returning */
+#define UVM_PLA_TRY_CONTIG	0x0008	/* try to allocate a contig range */
 
 /*
  * lockflags that control the locking behavior of various functions.
@@ -589,6 +590,10 @@ int			uvm_pglistalloc(psize_t, paddr_t,
 				struct pglist *, int, int); 
 void			uvm_pglistfree(struct pglist *);
 
+/* uvm_pmemrange.c */
+
+void			uvm_pmr_use_inc(paddr_t, paddr_t);
+
 /* uvm_swap.c */
 void			uvm_swap_init(void);
 
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index 8858a585027..1b6f3262986 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_map.c,v 1.110 2009/05/02 12:54:42 oga Exp $	*/
+/*	$OpenBSD: uvm_map.c,v 1.111 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
 
 /* 
@@ -3822,7 +3822,7 @@ uvm_object_printit(uobj, full, pr)
 	(*pr)("  PAGES <pg,offset>:\n  ");
 	for (pg = TAILQ_FIRST(&uobj->memq);
 	     pg != NULL;
-	     pg = TAILQ_NEXT(pg, listq), cnt++) {
+	     pg = TAILQ_NEXT(pg, fq.queues.listq), cnt++) {
 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
 		if ((cnt % 3) == 2) {
 			(*pr)("\n  ");
@@ -3883,7 +3883,8 @@ uvm_page_printit(pg, full, pr)
 			uobj = pg->uobject;
 			if (uobj) {
 				(*pr)("  checking object list\n");
-				TAILQ_FOREACH(tpg, &uobj->memq, listq) {
+				TAILQ_FOREACH(tpg, &uobj->memq,
+				    fq.queues.listq) {
 					if (tpg == pg) {
 						break;
 					}
@@ -3898,9 +3899,11 @@ uvm_page_printit(pg, full, pr)
 
 	/* cross-verify page queue */
 	if (pg->pg_flags & PQ_FREE) {
-		int fl = uvm_page_lookup_freelist(pg);
-		pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ?
-		    PGFL_ZEROS : PGFL_UNKNOWN];
+		if (uvm_pmr_isfree(pg))
+			printf("  page found in uvm_pmemrange\n");
+		else
+			printf("  >>> page not found in uvm_pmemrange <<<\n");
+		pgl = NULL;
 	} else if (pg->pg_flags & PQ_INACTIVE) {
 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 39008ac0c19..7c6e257ccb5 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_page.c,v 1.80 2009/05/08 15:10:35 ariane Exp $	*/
+/*	$OpenBSD: uvm_page.c,v 1.81 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $	*/
 
 /* 
@@ -159,10 +159,11 @@ uvm_pageinsert(struct vm_page *pg)
 	KASSERT((pg->pg_flags & PG_TABLED) == 0);
 	mtx_enter(&uvm.hashlock);
 	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
-	TAILQ_INSERT_TAIL(buck, pg, hashq);	/* put in hash */
+	TAILQ_INSERT_TAIL(buck, pg, fq.queues.hashq);	/* put in hash */
 	mtx_leave(&uvm.hashlock);
 
-	TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
+	TAILQ_INSERT_TAIL(&pg->uobject->memq, pg,
+	    fq.queues.listq); /* put in object */
 	atomic_setbits_int(&pg->pg_flags, PG_TABLED);
 	pg->uobject->uo_npages++;
 }
@@ -183,7 +184,7 @@ uvm_pageremove(struct vm_page *pg)
 	KASSERT(pg->pg_flags & PG_TABLED);
 	mtx_enter(&uvm.hashlock);
 	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
-	TAILQ_REMOVE(buck, pg, hashq);
+	TAILQ_REMOVE(buck, pg, fq.queues.hashq);
 	mtx_leave(&uvm.hashlock);
 
 #ifdef UBC
@@ -193,7 +194,7 @@ uvm_pageremove(struct vm_page *pg)
 #endif
 
 	/* object should be locked */
-	TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
+	TAILQ_REMOVE(&pg->uobject->memq, pg, fq.queues.listq);
 
 	atomic_clearbits_int(&pg->pg_flags, PG_TABLED|PQ_AOBJ);
 	pg->uobject->uo_npages--;
@@ -226,15 +227,12 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
 	 * init the page queues and page queue locks
 	 */
 
-	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
-		for (i = 0; i < PGFL_NQUEUES; i++)
-			TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
-	}
 	TAILQ_INIT(&uvm.page_active);
 	TAILQ_INIT(&uvm.page_inactive_swp);
 	TAILQ_INIT(&uvm.page_inactive_obj);
 	simple_lock_init(&uvm.pageqlock);
 	mtx_init(&uvm.fpageqlock, IPL_VM);
+	uvm_pmr_init();
 
 	/*
 	 * init the <obj,offset> => <page> hash table.  for now
@@ -319,10 +317,13 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
 			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
 			    atop(paddr) <= vm_physmem[lcv].avail_end) {
 				uvmexp.npages++;
-				/* add page to free pool */
-				uvm_pagefree(&vm_physmem[lcv].pgs[i]);
 			}
 		}
+
+		/* add pages to free pool */
+		uvm_pmr_freepages(&vm_physmem[lcv].pgs[
+		    vm_physmem[lcv].avail_start - vm_physmem[lcv].start],
+		    vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
 	}
 
 	/*
@@ -811,10 +812,10 @@ uvm_page_rehash(void)
 	/* ... and rehash */
 	for (lcv = 0 ; lcv < oldcount ; lcv++) {
 		while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) {
-			TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
+			TAILQ_REMOVE(&oldbuckets[lcv], pg, fq.queues.hashq);
 			TAILQ_INSERT_TAIL(
 			  &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
-			  pg, hashq);
+			  pg, fq.queues.hashq);
 		}
 	}
 	mtx_leave(&uvm.hashlock);
@@ -892,18 +893,15 @@ struct vm_page *
 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
     int flags, int strat, int free_list)
 {
-	int lcv, try1, try2, zeroit = 0;
+	struct pglist pgl;
+	int pmr_flags;
 	struct vm_page *pg;
-	struct pglist *freeq;
-	struct pgfreelist *pgfl;
 	boolean_t use_reserve;
 	UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist);
 
 	KASSERT(obj == NULL || anon == NULL);
 	KASSERT(off == trunc_page(off));
 
-	uvm_lock_fpageq();
-
 	/*
 	 * check to see if we need to generate some free pages waking
 	 * the pagedaemon.
@@ -930,95 +928,20 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
 	      (curproc == syncerproc))))
 		goto fail;
 
-#if PGFL_NQUEUES != 2
-#error uvm_pagealloc_strat needs to be updated
-#endif
-
-	/*
-	 * If we want a zero'd page, try the ZEROS queue first, otherwise
-	 * we try the UNKNOWN queue first.
-	 */
-	if (flags & UVM_PGA_ZERO) {
-		try1 = PGFL_ZEROS;
-		try2 = PGFL_UNKNOWN;
-	} else {
-		try1 = PGFL_UNKNOWN;
-		try2 = PGFL_ZEROS;
-	}
-
-	UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx",
-	    obj, (u_long)off, anon, flags);
-	UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0);
- again:
-	switch (strat) {
-	case UVM_PGA_STRAT_NORMAL:
-		/* Check all freelists in descending priority order. */
-		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
-			pgfl = &uvm.page_free[lcv];
-			if ((pg = TAILQ_FIRST((freeq =
-			      &pgfl->pgfl_queues[try1]))) != NULL ||
-			    (pg = TAILQ_FIRST((freeq =
-			      &pgfl->pgfl_queues[try2]))) != NULL)
-				goto gotit;
-		}
-
-		/* No pages free! */
-		goto fail;
-
-	case UVM_PGA_STRAT_ONLY:
-	case UVM_PGA_STRAT_FALLBACK:
-		/* Attempt to allocate from the specified free list. */
-		KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
-		pgfl = &uvm.page_free[free_list];
-		if ((pg = TAILQ_FIRST((freeq =
-		      &pgfl->pgfl_queues[try1]))) != NULL ||
-		    (pg = TAILQ_FIRST((freeq =
-		      &pgfl->pgfl_queues[try2]))) != NULL)
-			goto gotit;
-
-		/* Fall back, if possible. */
-		if (strat == UVM_PGA_STRAT_FALLBACK) {
-			strat = UVM_PGA_STRAT_NORMAL;
-			goto again;
-		}
-
-		/* No pages free! */
+	pmr_flags = UVM_PLA_NOWAIT;
+	if (flags & UVM_PGA_ZERO)
+		pmr_flags |= UVM_PLA_ZERO;
+	TAILQ_INIT(&pgl);
+	if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0)
 		goto fail;
-
-	default:
-		panic("uvm_pagealloc_strat: bad strat %d", strat);
-		/* NOTREACHED */
-	}
-
- gotit:
-	TAILQ_REMOVE(freeq, pg, pageq);
-	uvmexp.free--;
-
-	/* update zero'd page count */
-	if (pg->pg_flags & PG_ZERO)
-		uvmexp.zeropages--;
-
-	/*
-	 * update allocation statistics and remember if we have to
-	 * zero the page
-	 */
-	if (flags & UVM_PGA_ZERO) {
-		if (pg->pg_flags & PG_ZERO) {
-			uvmexp.pga_zerohit++;
-			zeroit = 0;
-		} else {
-			uvmexp.pga_zeromiss++;
-			zeroit = 1;
-		}
-	}
-
-	uvm_unlock_fpageq();		/* unlock free page queue */
+	pg = TAILQ_FIRST(&pgl);
+	KASSERT(pg != NULL);
+	KASSERT(TAILQ_NEXT(pg, pageq) == NULL);
 
 	pg->offset = off;
 	pg->uobject = obj;
 	pg->uanon = anon;
 	pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE;
-	pg->pg_version++;
 	if (anon) {
 		anon->an_page = pg;
 		atomic_setbits_int(&pg->pg_flags, PQ_ANON);
@@ -1034,22 +957,11 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
 #endif
 	UVM_PAGE_OWN(pg, "new alloc");
 
-	if (flags & UVM_PGA_ZERO) {
-		/*
-		 * A zero'd page is not clean.  If we got a page not already
-		 * zero'd, then we have to zero it ourselves.
-		 */
-		atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
-		if (zeroit)
-			pmap_zero_page(pg);
-	}
-
 	UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg,
 	    (u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
 	return(pg);
 
  fail:
-	uvm_unlock_fpageq();
 	UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0);
 	return (NULL);
 }
@@ -1100,6 +1012,7 @@ uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
 void
 uvm_pagefree(struct vm_page *pg)
 {
+	struct pglist pgl;
 	int saved_loan_count = pg->loan_count;
 	UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist);
 
@@ -1195,27 +1108,35 @@ uvm_pagefree(struct vm_page *pg)
 	}
 
 	/*
-	 * and put on free queue
+	 * Clean page state bits.
 	 */
-
-	atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
-
-	uvm_lock_fpageq();
-	TAILQ_INSERT_TAIL(&uvm.page_free[
-	    uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
-	atomic_clearbits_int(&pg->pg_flags, PQ_MASK);
-	atomic_setbits_int(&pg->pg_flags, PQ_FREE);
+	atomic_clearbits_int(&pg->pg_flags,
+	    PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK);
+	/*
+	 * Pmap flag cleaning.
+	 * XXX: Shouldn't pmap do this?
+	 */
+	atomic_clearbits_int(&pg->pg_flags,
+	    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
+
+#if defined(DIAGNOSTIC)
+	if (pg->pg_flags != 0) {
+		panic("uvm_pagefree: expected page %p pg_flags to be 0\n"
+		    "uvm_pagefree: instead of pg->pg_flags = %x\n",
+		    VM_PAGE_TO_PHYS(pg), pg->pg_flags);
+	}
+#endif
 #ifdef DEBUG
 	pg->uobject = (void *)0xdeadbeef;
 	pg->offset = 0xdeadbeef;
 	pg->uanon = (void *)0xdeadbeef;
 #endif
-	uvmexp.free++;
+	TAILQ_INIT(&pgl);
+	TAILQ_INSERT_HEAD(&pgl, pg, pageq);
+	uvm_pmr_freepageq(&pgl);
 
 	if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
 		uvm.page_idle_zero = vm_page_zero_enable;
-
-	uvm_unlock_fpageq();
 }
 
 /*
@@ -1308,6 +1229,7 @@ uvm_page_own(struct vm_page *pg, char *tag)
 void
 uvm_pageidlezero(void)
 {
+#if 0 /* Disabled for now. */
 	struct vm_page *pg;
 	struct pgfreelist *pgfl;
 	int free_list;
@@ -1374,6 +1296,7 @@ uvm_pageidlezero(void)
 		uvmexp.zeropages++;
 		uvm_unlock_fpageq();
 	} while (curcpu_is_idle());
+#endif /* 0 */
 }
 
 /*
@@ -1476,7 +1399,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
 	mtx_enter(&uvm.hashlock);
 	buck = &uvm.page_hash[uvm_pagehash(obj,off)];
 
-	TAILQ_FOREACH(pg, buck, hashq) {
+	TAILQ_FOREACH(pg, buck, fq.queues.hashq) {
 		if (pg->uobject == obj && pg->offset == off) {
 			break;
 		}
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index e21562cd030..e7991dce4a0 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_page.h,v 1.32 2009/04/28 16:06:07 miod Exp $	*/
+/*	$OpenBSD: uvm_page.h,v 1.33 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $	*/
 
 /* 
@@ -106,11 +106,22 @@
 #include <uvm/uvm_extern.h>
 #include <uvm/uvm_pglist.h>
 
+union vm_page_fq {
+	struct {
+		TAILQ_ENTRY(vm_page)	hashq;	/* hash table links (O)*/
+		TAILQ_ENTRY(vm_page)	listq;	/* pages in same object (O)*/
+	}	queues;
+
+	struct {
+		RB_ENTRY(vm_page)	tree;	/* Free chunks, addr/size */
+		psize_t			pages;
+	}	free;
+};
+
 struct vm_page {
+	union vm_page_fq	fq;		/* free and queue management */
 	TAILQ_ENTRY(vm_page)	pageq;		/* queue info for FIFO
 						 * queue or free list (P) */
-	TAILQ_ENTRY(vm_page)	hashq;		/* hash table links (O)*/
-	TAILQ_ENTRY(vm_page)	listq;		/* pages in same object (O)*/
 
 	struct vm_anon		*uanon;		/* anon (O,P) */
 	struct uvm_object	*uobject;	/* object (O,P) */
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
index 093cd134b7f..ff0f8d91f68 100644
--- a/sys/uvm/uvm_pglist.c
+++ b/sys/uvm/uvm_pglist.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_pglist.c,v 1.29 2009/05/04 18:08:06 oga Exp $	*/
+/*	$OpenBSD: uvm_pglist.c,v 1.30 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $	*/
 
 /*-
@@ -56,112 +56,6 @@ u_long	uvm_pglistalloc_npages;
 #define	STAT_DECR(v)
 #endif
 
-int	uvm_pglistalloc_simple(psize_t, paddr_t, paddr_t, struct pglist *);
-
-/*
- * Simple page allocation: pages do not need to be contiguous. We just
- * attempt to find enough free pages in the given range.
- */
-int
-uvm_pglistalloc_simple(psize_t size, paddr_t low, paddr_t high,
-    struct pglist *rlist)
-{
-	psize_t todo;
-	int psi;
-	struct vm_page *pg;
-	struct vm_physseg *seg;
-	paddr_t slow, shigh;
-	int pgflidx, error, free_list;
-	UVMHIST_FUNC("uvm_pglistalloc_simple"); UVMHIST_CALLED(pghist);
-#ifdef DEBUG
-	vm_page_t tp;
-#endif
-
-	/* Default to "lose". */
-	error = ENOMEM;
-
-	todo = atop(size);
-
-	/*
-	 * Block all memory allocation and lock the free list.
-	 */
-	uvm_lock_fpageq();
-
-	/* Are there even any free pages? */
-	if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
-		goto out;
-
-	for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) {
-		/*
-		 * Skip this segment if incompatible with the address range.
-		 */
-		if (seg->avail_end <= atop(low))
-			continue;
-		if (seg->avail_start >= atop(high))
-			continue;
-
-		slow = MAX(atop(low), seg->avail_start);
-		shigh = MIN(atop(high), seg->avail_end);
-
-		/* we want to be able to allocate at least a page... */
-		if (slow == shigh)
-			continue;
-
-		for (pg = &seg->pgs[slow - seg->start]; slow != shigh;
-		    slow++, pg++) {
-			if (VM_PAGE_IS_FREE(pg) == 0)
-				continue;
-
-			free_list = uvm_page_lookup_freelist(pg);
-			pgflidx = (pg->pg_flags & PG_ZERO) ?
-			    PGFL_ZEROS : PGFL_UNKNOWN;
-#ifdef DEBUG
-			for (tp = TAILQ_FIRST(&uvm.page_free[free_list].pgfl_queues[pgflidx]);
-			     tp != NULL; tp = TAILQ_NEXT(tp, pageq)) {
-				if (tp == pg)
-					break;
-			}
-			if (tp == NULL)
-				panic("uvm_pglistalloc_simple: page not on freelist");
-#endif
-			TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
-			    pg, pageq);
-			uvmexp.free--;
-			if (pg->pg_flags & PG_ZERO)
-				uvmexp.zeropages--;
-			pg->uobject = NULL;
-			pg->uanon = NULL;
-			pg->pg_version++;
-			TAILQ_INSERT_TAIL(rlist, pg, pageq);
-			STAT_INCR(uvm_pglistalloc_npages);
-			if (--todo == 0) {
-				error = 0;
-				goto out;
-			}
-		}
-
-	}
-
-out:
-	/*
-	 * check to see if we need to generate some free pages waking
-	 * the pagedaemon.
-	 */
-
-	if (!error && (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
-	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
-	    uvmexp.inactive < uvmexp.inactarg))) {
-		wakeup(&uvm.pagedaemon_proc);
-	}
-
-	uvm_unlock_fpageq();
-
-	if (error)
-		uvm_pglistfree(rlist);
-
-	return (error);
-}
-
 /*
  * uvm_pglistalloc: allocate a list of pages
  *
@@ -179,202 +73,45 @@ out:
  *	alignment	memory must be aligned to this power-of-two boundary.
  *	boundary	no segment in the allocation may cross this 
  *			power-of-two boundary (relative to zero).
+ * => flags:
+ *	UVM_PLA_NOWAIT	fail if allocation fails
+ *	UVM_PLA_WAITOK	wait for memory to become avail if allocation fails
+ *	UVM_PLA_ZERO	return zeroed memory
+ *	UVM_PLA_TRY_CONTIG device prefers p-lineair mem
  */
 
 int
 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
     paddr_t boundary, struct pglist *rlist, int nsegs, int flags)
 {
-	int psi;
-	struct vm_page *pgs;
-	struct vm_physseg *seg;
-	paddr_t slow, shigh;
-	paddr_t try, idxpa, lastidxpa;
-	int tryidx, idx, pgflidx, endidx, error, free_list;
-	vm_page_t m;
-	u_long pagemask;
-#ifdef DEBUG
-	vm_page_t tp;
-#endif
 	UVMHIST_FUNC("uvm_pglistalloc"); UVMHIST_CALLED(pghist);
 
 	KASSERT((alignment & (alignment - 1)) == 0);
 	KASSERT((boundary & (boundary - 1)) == 0);
-	/*
-	 * This argument is always ignored for now, but ensure drivers always
-	 * show intention.
-	 */
 	KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
-	
-	/*
-	 * Our allocations are always page granularity, so our alignment
-	 * must be, too.
-	 */
-	if (alignment < PAGE_SIZE)
-		alignment = PAGE_SIZE;
 
 	if (size == 0)
 		return (EINVAL);
 
-	size = round_page(size);
-	low = roundup(low, alignment);
-
 	/*
-	 * If we are allowed to allocate as many segments as pages,
-	 * no need to be smart.
+	 * Convert byte addresses to page numbers.
 	 */
-	if ((nsegs >= size / PAGE_SIZE) && (alignment == PAGE_SIZE) &&
-	    (boundary == 0)) {
-		error = uvm_pglistalloc_simple(size, low, high, rlist);
-		goto done;
-	}
-
-	if (boundary != 0 && boundary < size)
-		return (EINVAL);
-
-	pagemask = ~(boundary - 1);
-
-	/* Default to "lose". */
-	error = ENOMEM;
-
-	/*
-	 * Block all memory allocation and lock the free list.
-	 */
-	uvm_lock_fpageq();
-
-	/* Are there even any free pages? */
-	if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
-		goto out;
-
-	for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) {
-		/*
-		 * Skip this segment if incompatible with the address range.
-		 */
-		if (seg->avail_end <= atop(low))
-			continue;
-		if (seg->avail_start >= atop(high))
-			continue;
-
-		slow = MAX(low, ptoa(seg->avail_start));
-		shigh = MIN(high, ptoa(seg->avail_end));
-
-		try = roundup(slow, alignment);
-		for (;; try += alignment) {
-			if (try + size > shigh) {
-				/*
-				 * We've run past the allowable range, or
-				 * the segment. Try another.
-				 */
-				break;
-			}
-
-			tryidx = idx = atop(try) - seg->start;
-			endidx = idx + atop(size);
-			pgs = vm_physmem[psi].pgs;
-
-			/*
-			 * Found a suitable starting page.  See if the
-			 * range is free.
-			 */
-
-			for (; idx < endidx; idx++) {
-				if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
-					break;
-				}
-				idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
-				if (idx == tryidx)
-					continue;
-
-				/*
-				 * Check that the region is contiguous
-				 * (it really should...) and does not
-				 * cross an alignment boundary.
-				 */
-				lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
-				if ((lastidxpa + PAGE_SIZE) != idxpa)
-					break;
-
-				if (boundary != 0 &&
-				    ((lastidxpa ^ idxpa) & pagemask) != 0)
-					break;
-			}
-
-			if (idx == endidx) {
-				goto found;
-			}
-		}
-	}
-
-	/*
-	 * We could not allocate a contiguous range.  This is where
-	 * we should try harder if nsegs > 1...
-	 */
-	goto out;
-
-#if PGFL_NQUEUES != 2
-#error uvm_pglistalloc needs to be updated
-#endif
-
-found:
-	/*
-	 * we have a chunk of memory that conforms to the requested constraints.
-	 */
-	idx = tryidx;
-	while (idx < endidx) {
-		m = &pgs[idx];
-		free_list = uvm_page_lookup_freelist(m);
-		pgflidx = (m->pg_flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
-#ifdef DEBUG
-		for (tp = TAILQ_FIRST(&uvm.page_free[
-			free_list].pgfl_queues[pgflidx]);
-		     tp != NULL;
-		     tp = TAILQ_NEXT(tp, pageq)) {
-			if (tp == m)
-				break;
-		}
-		if (tp == NULL)
-			panic("uvm_pglistalloc: page not on freelist");
-#endif
-		TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
-		    m, pageq);
-		uvmexp.free--;
-		if (m->pg_flags & PG_ZERO)
-			uvmexp.zeropages--;
-		m->uobject = NULL;
-		m->uanon = NULL;
-		m->pg_version++;
-		TAILQ_INSERT_TAIL(rlist, m, pageq);
-		idx++;
-		STAT_INCR(uvm_pglistalloc_npages);
-	}
-	error = 0;
-
-out:
-	/*
-	 * check to see if we need to generate some free pages waking
-	 * the pagedaemon.
-	 */
-	 
-	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
-	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
-	     uvmexp.inactive < uvmexp.inactarg)) {
-		wakeup(&uvm.pagedaemon_proc);
-	}
-
-	uvm_unlock_fpageq();
-
-done: 
-	/* No locking needed here, pages are not on any queue. */
-	if (error == 0) {
-		TAILQ_FOREACH(m, rlist, pageq) {
-			if (flags & UVM_PLA_ZERO &&
-			    (m->pg_flags & PG_ZERO) == 0)
-				uvm_pagezero(m);
-			m->pg_flags = PG_CLEAN;
-		}
-	}
-
-	return (error);
+	if (alignment < PAGE_SIZE)
+		alignment = PAGE_SIZE;
+	low = atop(roundup(low, alignment));
+	/* Allows for overflow: 0xffff + 1 = 0x0000 */
+	if ((high & PAGE_MASK) == PAGE_MASK)
+		high = atop(high) + 1;
+	else
+		high = atop(high);
+	size = atop(round_page(size));
+	alignment = atop(alignment);
+	if (boundary < PAGE_SIZE && boundary != 0)
+		boundary = PAGE_SIZE;
+	boundary = atop(boundary);
+
+	return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs,
+	    flags, rlist);
 }
 
 /*
@@ -389,14 +126,8 @@ uvm_pglistfree(struct pglist *list)
 	struct vm_page *m;
 	UVMHIST_FUNC("uvm_pglistfree"); UVMHIST_CALLED(pghist);
 
-	/*
-	 * Block all memory allocation and lock the free list.
-	 */
-	uvm_lock_fpageq();
-
-	while ((m = TAILQ_FIRST(list)) != NULL) {
+	TAILQ_FOREACH(m, list, pageq) {
 		KASSERT((m->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
-		TAILQ_REMOVE(list, m, pageq);
 #ifdef DEBUG
 		if (m->uobject == (void *)0xdeadbeef &&
 		    m->uanon == (void *)0xdeadbeef) {
@@ -408,15 +139,6 @@ uvm_pglistfree(struct pglist *list)
 		m->uanon = (void *)0xdeadbeef;
 #endif
 		atomic_clearbits_int(&m->pg_flags, PQ_MASK);
-		atomic_setbits_int(&m->pg_flags, PQ_FREE);
-		TAILQ_INSERT_TAIL(&uvm.page_free[
-		    uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN],
-		    m, pageq);
-		uvmexp.free++;
-		if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
-			uvm.page_idle_zero = vm_page_zero_enable;
-		STAT_DECR(uvm_pglistalloc_npages);
 	}
-
-	uvm_unlock_fpageq();
+	uvm_pmr_freepageq(list);
 }
diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c
new file mode 100644
index 00000000000..86a0d137a97
--- /dev/null
+++ b/sys/uvm/uvm_pmemrange.c
@@ -0,0 +1,1248 @@
+/*	$OpenBSD: uvm_pmemrange.c,v 1.1 2009/06/01 17:42:33 ariane Exp $	*/
+
+/*
+ * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <uvm/uvm.h>
+#include <sys/malloc.h>
+
+/*
+ * 2 trees: addr tree and size tree.
+ *
+ * addr tree is vm_page[0].fq.free.tree
+ * size tree is vm_page[1].fq.free.tree
+ *
+ * The size tree is not used for memory ranges of 1 page, instead,
+ * single queue is vm_page[0].pageq
+ *
+ * uvm_page_init guarantees that every vm_physseg contains an array of
+ * struct vm_page. Also, uvm_page_physload allocates an array of struct
+ * vm_page. This code depends on that array.
+ */
+
+/* Tree comparators. */
+int	uvm_pmemrange_addr_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
+int	uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
+int	uvm_pmr_addr_cmp(struct vm_page *, struct vm_page *);
+int	uvm_pmr_size_cmp(struct vm_page *, struct vm_page *);
+
+/* Memory types. The page flags are used to derive what the current memory
+ * type of a page is. */
+static __inline int
+uvm_pmr_pg_to_memtype(struct vm_page *pg)
+{
+	if (pg->pg_flags & PG_ZERO)
+		return UVM_PMR_MEMTYPE_ZERO;
+	/* Default: dirty memory. */
+	return UVM_PMR_MEMTYPE_DIRTY;
+}
+
+/* Cancel static calls (for profiling). */
+#define static
+#define __inline
+/* Trees. */
+RB_PROTOTYPE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp);
+RB_PROTOTYPE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp);
+RB_PROTOTYPE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
+    uvm_pmemrange_addr_cmp);
+RB_GENERATE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp);
+RB_GENERATE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp);
+RB_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
+    uvm_pmemrange_addr_cmp);
+#undef static
+#undef __inline
+
+/* Validation. */
+#ifdef DEBUG
+void	uvm_pmr_assertvalid(struct uvm_pmemrange *pmr);
+#else
+#define uvm_pmr_assertvalid(pmr)	do {} while (0)
+#endif
+
+
+int			 uvm_pmr_get1page(psize_t, int, struct pglist *,
+			    paddr_t, paddr_t);
+
+struct uvm_pmemrange	*uvm_pmr_allocpmr(void);
+struct vm_page		*uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
+struct vm_page		*uvm_pmr_nextsz(struct uvm_pmemrange *,
+			    struct vm_page *, int);
+void			 uvm_pmr_pnaddr(struct uvm_pmemrange *pmr,
+			    struct vm_page *pg, struct vm_page **pg_prev,
+			    struct vm_page **pg_next);
+struct vm_page		*uvm_pmr_insert(struct uvm_pmemrange *,
+			    struct vm_page *, int);
+void			 uvm_pmr_remove(struct uvm_pmemrange *,
+			    struct vm_page *);
+psize_t			 uvm_pmr_remove_1strange(struct pglist *, paddr_t,
+			    struct vm_page **);
+void			 uvm_pmr_split(paddr_t);
+struct uvm_pmemrange	*uvm_pmemrange_find(paddr_t);
+struct uvm_pmemrange	*uvm_pmemrange_use_insert(struct uvm_pmemrange_use *,
+			    struct uvm_pmemrange *);
+struct vm_page		*uvm_pmr_extract_range(struct uvm_pmemrange *,
+			    struct vm_page *, paddr_t, paddr_t,
+			    struct pglist *);
+
+/*
+ * Computes num/denom and rounds it up to the next power-of-2.
+ */
+static __inline psize_t
+pow2divide(psize_t num, psize_t denom)
+{
+	int rshift = 0;
+
+	while (num > (denom << rshift))
+		rshift++;
+	return (paddr_t)1 << rshift;
+}
+
+/*
+ * Predicate: lhs is a subrange or rhs.
+ */
+#define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high)	\
+	((lhs_low) >= (rhs_low) && (lhs_high <= rhs_high))
+
+/*
+ * Align to power-of-2 alignment.
+ */
+#define PMR_ALIGN(pgno, align)						\
+	(((pgno) + ((align) - 1)) & ~((align) - 1))
+
+
+/*
+ * Comparator: sort by address ascending.
+ */
+int
+uvm_pmemrange_addr_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
+{
+	return lhs->low < rhs->low ? -1 : lhs->low > rhs->low;
+}
+
+/*
+ * Comparator: sort by use ascending.
+ *
+ * The higher the use value of a range, the more devices need memory in
+ * this range. Therefor allocate from the range with the lowest use first.
+ */
+int
+uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
+{
+	int result;
+
+	result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use;
+	if (result == 0)
+		result = uvm_pmemrange_addr_cmp(lhs, rhs);
+	return result;
+}
+
+int
+uvm_pmr_addr_cmp(struct vm_page *lhs, struct vm_page *rhs)
+{
+	paddr_t lhs_addr, rhs_addr;
+
+	lhs_addr = VM_PAGE_TO_PHYS(lhs);
+	rhs_addr = VM_PAGE_TO_PHYS(rhs);
+
+	return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr);
+}
+
+int
+uvm_pmr_size_cmp(struct vm_page *lhs, struct vm_page *rhs)
+{
+	psize_t lhs_size, rhs_size;
+	int cmp;
+
+	/* Using second tree, so we receive pg[1] instead of pg[0]. */
+	lhs_size = (lhs - 1)->fq.free.pages;
+	rhs_size = (rhs - 1)->fq.free.pages;
+
+	cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size);
+	if (cmp == 0)
+		cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1);
+	return cmp;
+}
+
+/*
+ * Find the first range of free pages that is at least sz pages long.
+ */
+struct vm_page *
+uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti)
+{
+	struct	vm_page *node, *best;
+
+	KASSERT(sz >= 1);
+
+	if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti]))
+		return TAILQ_FIRST(&pmr->single[mti]);
+
+	node = RB_ROOT(&pmr->size[mti]);
+	best = NULL;
+	while (node != NULL) {
+		if ((node - 1)->fq.free.pages >= sz) {
+			best = (node - 1);
+			node = RB_LEFT(node, fq.free.tree);
+		} else
+			node = RB_RIGHT(node, fq.free.tree);
+	}
+	return best;
+}
+
+/*
+ * Finds the next range. The next range has a size >= pg->fq.free.pages.
+ * Returns NULL if no more ranges are available.
+ */
+struct vm_page *
+uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt)
+{
+	struct vm_page *npg;
+
+	KASSERT(pmr != NULL && pg != NULL);
+	if (pg->fq.free.pages == 1) {
+		if (TAILQ_NEXT(pg, pageq) != NULL)
+			return TAILQ_NEXT(pg, pageq);
+		else
+			npg = RB_MIN(uvm_pmr_size, &pmr->size[mt]);
+	} else
+		npg = RB_NEXT(uvm_pmr_size, &pmr->size[mt], pg + 1);
+
+	return npg == NULL ? NULL : npg - 1;
+}
+
+/*
+ * Finds the previous and next ranges relative to the (uninserted) pg range.
+ *
+ * *pg_prev == NULL if no previous range is available, that can join with
+ * 	pg.
+ * *pg_next == NULL if no previous range is available, that can join with
+ * 	pg.
+ */
+void
+uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg,
+    struct vm_page **pg_prev, struct vm_page **pg_next)
+{
+	KASSERT(pg_prev != NULL && pg_next != NULL);
+
+	*pg_next = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg);
+	if (*pg_next == NULL)
+		*pg_prev = RB_MAX(uvm_pmr_addr, &pmr->addr);
+	else
+		*pg_prev = RB_PREV(uvm_pmr_addr, &pmr->addr, *pg_next);
+
+	/* Reset if not contig. */
+	if (*pg_prev != NULL &&
+	    (atop(VM_PAGE_TO_PHYS(*pg_prev)) + (*pg_prev)->fq.free.pages
+	    != atop(VM_PAGE_TO_PHYS(pg)) ||
+	    uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg)))
+		*pg_prev = NULL;
+	if (*pg_next != NULL &&
+	    (atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages
+	    != atop(VM_PAGE_TO_PHYS(*pg_next)) ||
+	    uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg)))
+		*pg_next = NULL;
+	return;
+}
+
+/*
+ * Remove a range from the address tree.
+ * Address tree maintains pmr counters.
+ */
+static __inline void
+uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+	KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg);
+	KASSERT(pg->pg_flags & PQ_FREE);
+	RB_REMOVE(uvm_pmr_addr, &pmr->addr, pg);
+
+	pmr->nsegs--;
+}
+/*
+ * Remove a range from the size tree.
+ */
+static __inline void
+uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+	int memtype;
+#ifdef DEBUG
+	struct vm_page *i;
+#endif
+
+	KASSERT(pg->pg_flags & PQ_FREE);
+	memtype = uvm_pmr_pg_to_memtype(pg);
+
+	if (pg->fq.free.pages == 1) {
+#ifdef DEBUG
+		TAILQ_FOREACH(i, &pmr->single[memtype], pageq) {
+			if (i == pg)
+				break;
+		}
+		KDASSERT(i == pg);
+#endif
+		TAILQ_REMOVE(&pmr->single[memtype], pg, pageq);
+	} else {
+		KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[memtype],
+		    pg + 1) == pg + 1);
+		RB_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1);
+	}
+}
+/* Remove from both trees. */
+void
+uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+	uvm_pmr_assertvalid(pmr);
+	uvm_pmr_remove_size(pmr, pg);
+	uvm_pmr_remove_addr(pmr, pg);
+	uvm_pmr_assertvalid(pmr);
+}
+
+/*
+ * Insert the range described in pg.
+ * Returns the range thus created (which may be joined with the previous and
+ * next ranges).
+ * If no_join, the caller guarantees that the range cannot possibly join
+ * with adjecent ranges.
+ */
+static __inline struct vm_page *
+uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
+{
+	struct vm_page *prev, *next;
+
+#ifdef DEBUG
+	struct vm_page *i;
+	int mt;
+
+	for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) {
+		TAILQ_FOREACH(i, &pmr->single[mt], pageq)
+			KDASSERT(i != pg);
+		if (pg->fq.free.pages > 1) {
+			KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mt],
+			    pg + 1) == NULL);
+		}
+		KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL);
+	}
+#endif
+
+	KASSERT(pg->pg_flags & PQ_FREE);
+	KASSERT(pg->fq.free.pages >= 1);
+
+	if (!no_join) {
+		uvm_pmr_pnaddr(pmr, pg, &prev, &next);
+		if (next != NULL) {
+			uvm_pmr_remove_size(pmr, next);
+			uvm_pmr_remove_addr(pmr, next);
+			pg->fq.free.pages += next->fq.free.pages;
+			next->fq.free.pages = 0;
+		}
+		if (prev != NULL) {
+			uvm_pmr_remove_size(pmr, prev);
+			prev->fq.free.pages += pg->fq.free.pages;
+			pg->fq.free.pages = 0;
+			return prev;
+		}
+	}
+#ifdef DEBUG
+	else {
+		uvm_pmr_pnaddr(pmr, pg, &prev, &next);
+		KDASSERT(prev == NULL && next == NULL);
+	}
+#endif /* DEBUG */
+
+	RB_INSERT(uvm_pmr_addr, &pmr->addr, pg);
+
+	pmr->nsegs++;
+
+	return pg;
+}
+/*
+ * Insert the range described in pg.
+ * Returns the range thus created (which may be joined with the previous and
+ * next ranges).
+ * Page must already be in the address tree.
+ */
+static __inline void
+uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
+{
+	int memtype;
+#ifdef DEBUG
+	struct vm_page *i;
+	int mti;
+#endif
+
+	memtype = uvm_pmr_pg_to_memtype(pg);
+#ifdef DEBUG
+	for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) {
+		TAILQ_FOREACH(i, &pmr->single[mti], pageq)
+			KDASSERT(i != pg);
+		if (pg->fq.free.pages > 1) {
+			KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mti],
+			    pg + 1) == NULL);
+		}
+		KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg);
+	}
+	for (i = pg; i < pg + pg->fq.free.pages; i++)
+		KASSERT(uvm_pmr_pg_to_memtype(i) == memtype);
+#endif
+
+	KASSERT(pg->pg_flags & PQ_FREE);
+	KASSERT(pg->fq.free.pages >= 1);
+
+	if (pg->fq.free.pages == 1)
+		TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq);
+	else
+		RB_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1);
+}
+/* Insert in both trees. */
+struct vm_page *
+uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
+{
+	uvm_pmr_assertvalid(pmr);
+	pg = uvm_pmr_insert_addr(pmr, pg, no_join);
+	uvm_pmr_insert_size(pmr, pg);
+	uvm_pmr_assertvalid(pmr);
+	return pg;
+}
+
+/*
+ * Remove the first segment of contiguous pages from pgl.
+ * A segment ends if it crosses boundary (unless boundary = 0) or
+ * if it would enter a different uvm_pmemrange.
+ *
+ * Work: the page range that the caller is currently working with.
+ * May be null.
+ */
+psize_t
+uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary,
+    struct vm_page **work)
+{
+	struct vm_page *pg, *pre_last, *last, *inserted;
+	psize_t count;
+	struct uvm_pmemrange *pmr;
+	paddr_t first_boundary;
+
+	KASSERT(!TAILQ_EMPTY(pgl));
+
+	pg = TAILQ_FIRST(pgl);
+	pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
+	KDASSERT(pmr != NULL);
+	if (boundary != 0) {
+		first_boundary =
+		    PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary);
+	} else
+		first_boundary = 0;
+
+	/* Remove all pages in the first segment. */
+	pre_last = pg;
+	last = TAILQ_NEXT(pre_last, pageq);
+	TAILQ_REMOVE(pgl, pre_last, pageq);
+	count = 1;
+	/*
+	 * While loop checks the following:
+	 * - last != NULL	we have not reached the end of pgs
+	 * - boundary == 0 || last < first_boundary
+	 * 			we do not cross a boundary
+	 * - atop(pre_last) + 1 == atop(last)
+	 * 			still in the same segment
+	 * - low <= last
+	 * - high > last  	still testing the same memory range
+	 *
+	 * At the end of the loop, last points at the next segment
+	 * and each page [pg, pre_last] (inclusive range) has been removed
+	 * and count is the number of pages that have been removed.
+	 */
+	while (last != NULL &&
+	    (boundary == 0 || atop(VM_PAGE_TO_PHYS(last)) < first_boundary) &&
+	    atop(VM_PAGE_TO_PHYS(pre_last)) + 1 ==
+	     atop(VM_PAGE_TO_PHYS(last)) &&
+	    pmr->low <= atop(VM_PAGE_TO_PHYS(last)) &&
+	    pmr->high > atop(VM_PAGE_TO_PHYS(last))) {
+		count++;
+		pre_last = last;
+		last = TAILQ_NEXT(last, pageq);
+		TAILQ_REMOVE(pgl, pre_last, pageq);
+	}
+	KDASSERT(TAILQ_FIRST(pgl) == last);
+	KDASSERT(pg + (count - 1) == pre_last);
+
+	pg->fq.free.pages = count;
+	inserted = uvm_pmr_insert(pmr, pg, 0);
+
+	if (work != NULL && *work != NULL &&
+	    atop(VM_PAGE_TO_PHYS(inserted)) <= atop(VM_PAGE_TO_PHYS(*work)) &&
+	    atop(VM_PAGE_TO_PHYS(inserted)) + inserted->fq.free.pages >
+	    atop(VM_PAGE_TO_PHYS(*work)))
+		*work = inserted;
+	return count;
+}
+
+/*
+ * Extract a number of pages from a segment of free pages.
+ * Called by uvm_pmr_getpages.
+ *
+ * Returns the segment that was created from pages left over at the tail
+ * of the remove set of pages, or NULL if no pages were left at the tail.
+ */
+struct vm_page *
+uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg,
+    paddr_t start, paddr_t end, struct pglist *result)
+{
+	struct vm_page *after, *pg_i;
+	psize_t before_sz, after_sz;
+#ifdef DEBUG
+	psize_t i;
+#endif
+
+	KASSERT(end > start);
+	KASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)));
+	KASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages);
+	KASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start);
+	KASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages >= end);
+
+	before_sz = start - atop(VM_PAGE_TO_PHYS(pg));
+	after_sz = atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages - end;
+	KDASSERT(before_sz + after_sz + (end - start) == pg->fq.free.pages);
+	uvm_pmr_assertvalid(pmr);
+
+	uvm_pmr_remove_size(pmr, pg);
+	if (before_sz == 0)
+		uvm_pmr_remove_addr(pmr, pg);
+
+	/* Add selected pages to result. */
+	for (pg_i = pg + before_sz; atop(VM_PAGE_TO_PHYS(pg_i)) < end;
+	    pg_i++) {
+		pg_i->fq.free.pages = 0;
+		TAILQ_INSERT_TAIL(result, pg_i, pageq);
+		KDASSERT(pg_i->pg_flags & PQ_FREE);
+	}
+
+	/* Before handling. */
+	if (before_sz > 0) {
+		pg->fq.free.pages = before_sz;
+		uvm_pmr_insert_size(pmr, pg);
+	}
+
+	/* After handling. */
+	after = NULL;
+	if (after_sz > 0) {
+		after = pg + before_sz + (end - start);
+#ifdef DEBUG
+		for (i = 0; i < after_sz; i++) {
+			KASSERT(!uvm_pmr_isfree(after + i));
+		}
+#endif
+		KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end);
+		after->fq.free.pages = after_sz;
+		after = uvm_pmr_insert_addr(pmr, after, 1);
+		uvm_pmr_insert_size(pmr, after);
+	}
+
+	uvm_pmr_assertvalid(pmr);
+	return after;
+}
+
+/*
+ * Acquire a number of pages.
+ *
+ * count:	the number of pages returned
+ * start:	lowest page number
+ * end:		highest page number +1
+ * 		(start = end = 0: no limitation)
+ * align:	power-of-2 alignment constraint (align = 1: no alignment)
+ * boundary:	power-of-2 boundary (boundary = 0: no boundary)
+ * maxseg:	maximum number of segments to return
+ * flags:	UVM_PLA_* flags
+ * result:	returned pages storage (uses pageq)
+ */
+int
+uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align,
+    paddr_t boundary, int maxseg, int flags, struct pglist *result)
+{
+	struct	uvm_pmemrange *pmr;	/* Iterate memory ranges. */
+	struct	vm_page *found, *f_next; /* Iterate chunks. */
+	psize_t	fcount;			/* Current found pages. */
+	int	fnsegs;			/* Current segment counter. */
+	int	try, start_try;
+	psize_t	search[2];
+	paddr_t	fstart, fend;		/* Pages to be taken from found. */
+	int	memtype;		/* Requested memtype. */
+	int	desperate;		/* True if allocation failed. */
+
+	/* Validate arguments. */
+	KASSERT(count > 0);
+	KASSERT((start == 0 && end == 0) || (start < end));
+	KASSERT(align >= 1 && powerof2(align));
+	KASSERT(maxseg > 0);
+	KASSERT(boundary == 0 || powerof2(boundary));
+	KDASSERT(boundary == 0 || maxseg * boundary >= count);
+	KASSERT(TAILQ_EMPTY(result));
+
+	/* Configure search. If start_try == 0, search[0] should be faster
+	 * (because it will have to throw away less segments).
+	 * search[1] is the worst case: start searching at the smallest
+	 * possible range instead of starting at the range most likely to
+	 * fulfill the allocation. */
+	start_try = 0;
+	search[0] = (flags & UVM_PLA_TRY_CONTIG ? count :
+	    pow2divide(count, maxseg));
+	search[1] = 1;
+	if (maxseg == 1) {
+		start_try = 1;
+		search[1] = count;
+	} else if (search[1] >= search[0])
+		start_try = 1;
+
+ReTry:		/* Return point after sleeping. */
+	fcount = 0;
+	fnsegs = 0;
+
+	/* Memory type: if zeroed memory is requested, traverse the zero set.
+	 * Otherwise, traverse the dirty set. */
+	if (flags & UVM_PLA_ZERO)
+		memtype = UVM_PMR_MEMTYPE_ZERO;
+	else
+		memtype = UVM_PMR_MEMTYPE_DIRTY;
+	desperate = 0;
+
+	uvm_lock_fpageq();
+
+ReTryDesperate:
+	/*
+	 * If we just want any page(s), go for the really fast option.
+	 */
+	if (count <= maxseg && align == 1 && boundary == 0 &&
+	    (flags & UVM_PLA_TRY_CONTIG) == 0) {
+		if (!desperate) {
+			KASSERT(fcount == 0);
+			fcount += uvm_pmr_get1page(count, memtype, result,
+			    start, end);
+		} else {
+			for (memtype = 0; memtype < UVM_PMR_MEMTYPE_MAX &&
+			    fcount < count; memtype++) {
+				fcount += uvm_pmr_get1page(count - fcount,
+				    memtype, result, start, end);
+			}
+		}
+
+		if (fcount == count)
+			goto Out;
+		else
+			goto Fail;
+	}
+
+	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
+		/* Empty range. */
+		if (pmr->nsegs == 0)
+			continue;
+
+		/* Outside requested range. */
+		if (!(start == 0 && end == 0) &&
+		    !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end))
+			continue;
+
+		try = start_try;
+ReScan:		/* Return point at try++. */
+
+		for (found = uvm_pmr_nfindsz(pmr, search[try], memtype);
+		    found != NULL;
+		    found = f_next) {
+			f_next = uvm_pmr_nextsz(pmr, found, memtype);
+
+			fstart = atop(VM_PAGE_TO_PHYS(found));
+DrainFound:
+			/* Throw away the first segment if fnsegs == maxseg */
+			if (fnsegs == maxseg) {
+				fnsegs--;
+				fcount -=
+				    uvm_pmr_remove_1strange(result, boundary,
+				    &found);
+			}
+
+			fstart = PMR_ALIGN(fstart, align);
+			fend = atop(VM_PAGE_TO_PHYS(found)) +
+			    found->fq.free.pages;
+			if (fstart >= fend)
+				continue;
+			if (boundary != 0) {
+				fend =
+				    MIN(fend, PMR_ALIGN(fstart + 1, boundary));
+			}
+			if (fend - fstart > count - fcount)
+				fend = fstart + (count - fcount);
+
+			fcount += fend - fstart;
+			fnsegs++;
+			found = uvm_pmr_extract_range(pmr, found,
+			    fstart, fend, result);
+
+			if (fcount == count)
+				goto Out;
+
+			/* If there's still space left in found, try to
+			 * fully drain it prior to continueing. */
+			if (found != NULL) {
+				fstart = fend;
+				goto DrainFound;
+			}
+		}
+
+		if (++try < nitems(search))
+			goto ReScan;
+	}
+
+	/*
+	 * Not enough memory of the requested type available. Fall back to
+	 * less good memory that we'll clean up better later.
+	 *
+	 * This algorithm is not very smart though, it just starts scanning
+	 * a different typed range, but the nicer ranges of the previous
+	 * iteration may fall out.
+	 */
+	if (!desperate) {
+		desperate = 1;
+		memtype = 0;
+		goto ReTryDesperate;
+	} else if (++memtype < UVM_PMR_MEMTYPE_MAX)
+		goto ReTryDesperate;
+
+Fail:
+	/*
+	 * Allocation failed.
+	 */
+
+	/* XXX: claim from memory reserve here */
+
+	while (!TAILQ_EMPTY(result))
+		uvm_pmr_remove_1strange(result, 0, NULL);
+	uvm_unlock_fpageq();
+
+	if (flags & UVM_PLA_WAITOK) {
+		uvm_wait("uvm_pmr_getpages");
+		goto ReTry;
+	} else
+		wakeup(&uvm.pagedaemon_proc);
+
+	return ENOMEM;
+
+Out:
+
+	/*
+	 * Allocation succesful.
+	 */
+
+	uvmexp.free -= fcount;
+
+	uvm_unlock_fpageq();
+
+	/* Update statistics and zero pages if UVM_PLA_ZERO. */
+	TAILQ_FOREACH(found, result, pageq) {
+		if (found->pg_flags & PG_ZERO) {
+			uvmexp.zeropages--;
+		}
+		if (flags & UVM_PLA_ZERO) {
+			if (found->pg_flags & PG_ZERO)
+				uvmexp.pga_zerohit++;
+			else {
+				uvmexp.pga_zeromiss++;
+				uvm_pagezero(found);
+			}
+		}
+		atomic_clearbits_int(&found->pg_flags, PG_ZERO | PQ_FREE);
+
+		found->uobject = NULL;
+		found->uanon = NULL;
+		found->pg_version++;
+	}
+
+	return 0;
+}
+
+/*
+ * Free a number of contig pages (invoked by uvm_page_init).
+ */
+void
+uvm_pmr_freepages(struct vm_page *pg, psize_t count)
+{
+	struct uvm_pmemrange *pmr;
+	psize_t i, pmr_count;
+
+	uvm_lock_fpageq();
+
+	for (i = 0; i < count; i++) {
+		atomic_clearbits_int(&pg[i].pg_flags, pg[i].pg_flags);
+		atomic_setbits_int(&pg[i].pg_flags, PQ_FREE);
+	}
+
+	while (count > 0) {
+		pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
+		KASSERT(pmr != NULL);
+
+		pmr_count = MIN(count, pmr->high - atop(VM_PAGE_TO_PHYS(pg)));
+		pg->fq.free.pages = pmr_count;
+		uvm_pmr_insert(pmr, pg, 0);
+
+		uvmexp.free += pmr_count;
+		count -= pmr_count;
+		pg += pmr_count;
+	}
+	wakeup(&uvmexp.free);
+
+	uvm_unlock_fpageq();
+}
+
+/*
+ * Free all pages in the queue.
+ */
+void
+uvm_pmr_freepageq(struct pglist *pgl)
+{
+	struct vm_page *pg;
+
+	TAILQ_FOREACH(pg, pgl, pageq) {
+		atomic_clearbits_int(&pg->pg_flags, pg->pg_flags);
+		atomic_setbits_int(&pg->pg_flags, PQ_FREE);
+	}
+
+	uvm_lock_fpageq();
+	while (!TAILQ_EMPTY(pgl))
+		uvmexp.free += uvm_pmr_remove_1strange(pgl, 0, NULL);
+	wakeup(&uvmexp.free);
+	uvm_unlock_fpageq();
+
+	return;
+}
+
+/*
+ * Store a pmemrange in the list.
+ *
+ * The list is sorted by use.
+ */
+struct uvm_pmemrange *
+uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq,
+    struct uvm_pmemrange *pmr)
+{
+	struct uvm_pmemrange *iter;
+	int cmp = 1;
+
+	TAILQ_FOREACH(iter, useq, pmr_use) {
+		cmp = uvm_pmemrange_use_cmp(pmr, iter);
+		if (cmp == 0)
+			return iter;
+		if (cmp == -1)
+			break;
+	}
+	if (cmp == 0)
+		return iter;
+
+	if (iter == NULL)
+		TAILQ_INSERT_TAIL(useq, pmr, pmr_use);
+	else
+		TAILQ_INSERT_BEFORE(iter, pmr, pmr_use);
+	return NULL;
+}
+
+#ifdef DEBUG
+/*
+ * Validation of the whole pmemrange.
+ * Called with fpageq locked.
+ */
+void
+uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)
+{
+	struct vm_page *prev, *next, *i, *xref;
+	int lcv, mti;
+
+	/* Validate address tree. */
+	RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) {
+		/* Validate the range. */
+		KASSERT(i->fq.free.pages > 0);
+		KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low);
+		KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fq.free.pages
+		    <= pmr->high);
+
+		/* Validate each page in this range. */
+		for (lcv = 0; lcv < i->fq.free.pages; lcv++) {
+			KASSERT(lcv == 0 || i[lcv].fq.free.pages == 0);
+			/* Flag check:
+			 * - PG_ZERO: page is zeroed.
+			 * - PQ_FREE: page is free.
+			 * Any other flag is a mistake. */
+			if (i[lcv].pg_flags !=
+			    (i[lcv].pg_flags & (PG_ZERO | PQ_FREE))) {
+				panic("i[%lu].pg_flags = %x, should be %x\n",
+				    lcv, i[lcv].pg_flags, PG_ZERO | PQ_FREE);
+			}
+			/* Free pages are:
+			 * - not wired
+			 * - not loaned
+			 * - have no vm_anon
+			 * - have no uvm_object */
+			KASSERT(i[lcv].wire_count == 0);
+			KASSERT(i[lcv].loan_count == 0);
+			KASSERT(i[lcv].uanon == NULL);
+			KASSERT(i[lcv].uobject == NULL);
+			/* Pages in a single range always have the same
+			 * memtype. */
+			KASSERT(uvm_pmr_pg_to_memtype(&i[0]) ==
+			    uvm_pmr_pg_to_memtype(&i[lcv]));
+		}
+
+		/* Check that it shouldn't be joined with its predecessor. */
+		prev = RB_PREV(uvm_pmr_addr, &pmr->addr, i);
+		if (prev != NULL) {
+			KASSERT(uvm_pmr_pg_to_memtype(&i[0]) !=
+			    uvm_pmr_pg_to_memtype(&i[lcv]) ||
+			    atop(VM_PAGE_TO_PHYS(i)) >
+			    atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages);
+		}
+
+		/* Assert i is in the size tree as well. */
+		if (i->fq.free.pages == 1) {
+			TAILQ_FOREACH(xref,
+			    &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq) {
+				if (xref == i)
+					break;
+			}
+			KASSERT(xref == i);
+		} else {
+			KASSERT(RB_FIND(uvm_pmr_size,
+			    &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) ==
+			    i + 1);
+		}
+	}
+
+	/* Validate size tree. */
+	for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) {
+		for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL; i = next) {
+			next = uvm_pmr_nextsz(pmr, i, mti);
+			if (next != NULL) {
+				KASSERT(i->fq.free.pages <=
+				    next->fq.free.pages);
+			}
+
+			/* Assert i is in the addr tree as well. */
+			KASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, i) == i);
+
+			/* Assert i is of the correct memory type. */
+			KASSERT(uvm_pmr_pg_to_memtype(i) == mti);
+		}
+	}
+
+	/* Validate nsegs statistic. */
+	lcv = 0;
+	RB_FOREACH(i, uvm_pmr_addr, &pmr->addr)
+		lcv++;
+	KASSERT(pmr->nsegs == lcv);
+}
+#endif /* DEBUG */
+
+/*
+ * Split pmr at split point pageno.
+ * Called with fpageq unlocked.
+ *
+ * Split is only applied if a pmemrange spans pageno.
+ */
+void
+uvm_pmr_split(paddr_t pageno)
+{
+	struct uvm_pmemrange *pmr, *drain;
+	struct vm_page *rebuild, *prev, *next;
+	psize_t prev_sz;
+
+	uvm_lock_fpageq();
+	pmr = uvm_pmemrange_find(pageno);
+	if (pmr == NULL || !(pmr->low < pageno)) {
+		/* No split required. */
+		uvm_unlock_fpageq();
+		return;
+	}
+
+	KASSERT(pmr->low < pageno);
+	KASSERT(pmr->high > pageno);
+
+	drain = uvm_pmr_allocpmr();
+	drain->low = pageno;
+	drain->high = pmr->high;
+	drain->use = pmr->use;
+
+	uvm_pmr_assertvalid(pmr);
+	uvm_pmr_assertvalid(drain);
+	KASSERT(drain->nsegs == 0);
+
+	RB_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr) {
+		if (atop(VM_PAGE_TO_PHYS(rebuild)) >= pageno)
+			break;
+	}
+	if (rebuild == NULL)
+		prev = RB_MAX(uvm_pmr_addr, &pmr->addr);
+	else
+		prev = RB_PREV(uvm_pmr_addr, &pmr->addr, rebuild);
+	KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno);
+
+	/*
+	 * Handle free chunk that spans the split point.
+	 */
+	if (prev != NULL &&
+	    atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages > pageno) {
+		psize_t before, after;
+
+		KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno);
+
+		uvm_pmr_remove(pmr, prev);
+		prev_sz = prev->fq.free.pages;
+		before = pageno - atop(VM_PAGE_TO_PHYS(prev));
+		after = atop(VM_PAGE_TO_PHYS(prev)) + prev_sz - pageno;
+
+		KASSERT(before > 0);
+		KASSERT(after > 0);
+
+		prev->fq.free.pages = before;
+		uvm_pmr_insert(pmr, prev, 1);
+		(prev + before)->fq.free.pages = after;
+		uvm_pmr_insert(drain, prev + before, 1);
+	}
+
+	/*
+	 * Move free chunks that no longer fall in the range.
+	 */
+	for (; rebuild != NULL; rebuild = next) {
+		next = RB_NEXT(uvm_pmr_addr, &pmr->addr, rebuild);
+
+		uvm_pmr_remove(pmr, rebuild);
+		uvm_pmr_insert(drain, rebuild, 1);
+	}
+
+	pmr->high = pageno;
+	uvm_pmr_assertvalid(pmr);
+	uvm_pmr_assertvalid(drain);
+
+	RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain);
+	uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain);
+	uvm_unlock_fpageq();
+}
+
+/*
+ * Increase the usage counter for the given range of memory.
+ *
+ * The more usage counters a given range of memory has, the more will be
+ * attempted not to allocate from it.
+ *
+ * Addresses here are in paddr_t, not page-numbers.
+ * The lowest and highest allowed address are specified.
+ */
+void
+uvm_pmr_use_inc(paddr_t low, paddr_t high)
+{
+	struct uvm_pmemrange *pmr;
+
+	/*
+	 * If high+1 == 0, then you are increasing use of the whole address
+	 * space, which won't make any difference. Skip in that case.
+	 */
+	high++;
+	if (high == 0)
+		return;
+
+	/*
+	 * pmr uses page numbers, translate low and high.
+	 */
+	low = atop(round_page(low));
+	high = atop(trunc_page(high));
+	uvm_pmr_split(low);
+	uvm_pmr_split(high);
+
+	uvm_lock_fpageq();
+
+	/* Increase use count on segments in range. */
+	RB_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr) {
+		if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)) {
+			TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use);
+			pmr->use++;
+			uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr);
+		}
+		uvm_pmr_assertvalid(pmr);
+	}
+
+	uvm_unlock_fpageq();
+}
+
+/*
+ * Allocate a pmemrange.
+ *
+ * If called from uvm_page_init, the uvm_pageboot_alloc is used.
+ * If called after uvm_init, malloc is used.
+ * (And if called in between, you're dead.)
+ */
+struct uvm_pmemrange *
+uvm_pmr_allocpmr()
+{
+	struct uvm_pmemrange *nw;
+	int i;
+
+	if (!uvm.page_init_done) {
+		nw = (struct uvm_pmemrange *)
+		    uvm_pageboot_alloc(sizeof(struct uvm_pmemrange));
+		bzero(nw, sizeof(struct uvm_pmemrange));
+	} else {
+		nw = malloc(sizeof(struct uvm_pmemrange),
+		    M_VMMAP, M_NOWAIT | M_ZERO);
+	}
+	RB_INIT(&nw->addr);
+	for (i = 0; i < UVM_PMR_MEMTYPE_MAX; i++) {
+		RB_INIT(&nw->size[i]);
+		TAILQ_INIT(&nw->single[i]);
+	}
+	return nw;
+}
+
+static const struct uvm_io_ranges uvm_io_ranges[] = UVM_IO_RANGES;
+
+/*
+ * Initialization of pmr.
+ * Called by uvm_page_init.
+ *
+ * Sets up pmemranges that maps the vm_physmem data.
+ */
+void
+uvm_pmr_init(void)
+{
+	struct uvm_pmemrange *new_pmr;
+	int i;
+
+	TAILQ_INIT(&uvm.pmr_control.use);
+	RB_INIT(&uvm.pmr_control.addr);
+
+	for (i = 0 ; i < vm_nphysseg ; i++) {
+		new_pmr = uvm_pmr_allocpmr();
+
+		new_pmr->low = vm_physmem[i].start;
+		new_pmr->high = vm_physmem[i].end;
+
+		RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr);
+		uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr);
+	}
+
+	for (i = 0; i < nitems(uvm_io_ranges); i++)
+		uvm_pmr_use_inc(uvm_io_ranges[i].low, uvm_io_ranges[i].high);
+}
+
+/*
+ * Find the pmemrange that contains the given page number.
+ *
+ * (Manually traverses the binary tree, because that is cheaper on stack
+ * usage.)
+ */
+struct uvm_pmemrange *
+uvm_pmemrange_find(paddr_t pageno)
+{
+	struct uvm_pmemrange *pmr;
+
+	pmr = RB_ROOT(&uvm.pmr_control.addr);
+	while (pmr != NULL) {
+		if (pmr->low > pageno)
+			pmr = RB_LEFT(pmr, pmr_addr);
+		else if (pmr->high <= pageno)
+			pmr = RB_RIGHT(pmr, pmr_addr);
+		else
+			break;
+	}
+
+	return pmr;
+}
+
+#if defined(DDB) || defined(DEBUG)
+/*
+ * Return true if the given page is in any of the free lists.
+ * Used by uvm_page_printit.
+ * This function is safe, even if the page is not on the freeq.
+ * Note: does not apply locking, only called from ddb.
+ */
+int
+uvm_pmr_isfree(struct vm_page *pg)
+{
+	struct vm_page *r;
+	struct uvm_pmemrange *pmr;
+
+	pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
+	if (pmr == NULL)
+		return 0;
+	r = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg);
+	if (r == NULL)
+		r = RB_MAX(uvm_pmr_addr, &pmr->addr);
+	else
+		r = RB_PREV(uvm_pmr_addr, &pmr->addr, r);
+	if (r == NULL)
+		return 0; /* Empty tree. */
+
+	KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg)));
+	return atop(VM_PAGE_TO_PHYS(r)) + r->fq.free.pages >
+	    atop(VM_PAGE_TO_PHYS(pg));
+}
+#endif /* DEBUG */
+
+/*
+ * Allocate any page, the fastest way. No constraints.
+ */
+int
+uvm_pmr_get1page(psize_t count, int memtype, struct pglist *result,
+    paddr_t start, paddr_t end)
+{
+	struct	uvm_pmemrange *pmr;
+	struct	vm_page *found;
+	psize_t	fcount;
+
+	fcount = 0;
+	pmr = TAILQ_FIRST(&uvm.pmr_control.use);
+	while (pmr != NULL && fcount != count) {
+		/* Outside requested range. */
+		if (!(start == 0 && end == 0) &&
+		    !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end)) {
+			pmr = TAILQ_NEXT(pmr, pmr_use);
+			continue;
+		}
+
+		found = TAILQ_FIRST(&pmr->single[memtype]);
+		if (found == NULL) {
+			found = RB_ROOT(&pmr->size[memtype]);
+			/* Size tree gives pg[1] instead of pg[0] */
+			if (found != NULL)
+				found--;
+		}
+		if (found == NULL) {
+			pmr = TAILQ_NEXT(pmr, pmr_use);
+			continue;
+		}
+
+		uvm_pmr_assertvalid(pmr);
+		uvm_pmr_remove_size(pmr, found);
+		while (found->fq.free.pages > 0 && fcount < count) {
+			found->fq.free.pages--;
+			fcount++;
+			TAILQ_INSERT_HEAD(result,
+			    &found[found->fq.free.pages], pageq);
+		}
+		if (found->fq.free.pages > 0) {
+			uvm_pmr_insert_size(pmr, found);
+			KASSERT(fcount == count);
+			uvm_pmr_assertvalid(pmr);
+			return fcount;
+		} else
+			uvm_pmr_remove_addr(pmr, found);
+		uvm_pmr_assertvalid(pmr);
+	}
+
+	/* Ran out of ranges before enough pages were gathered. */
+	return fcount;
+}
diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h
new file mode 100644
index 00000000000..90219dc075b
--- /dev/null
+++ b/sys/uvm/uvm_pmemrange.h
@@ -0,0 +1,83 @@
+/*	$OpenBSD: uvm_pmemrange.h,v 1.1 2009/06/01 17:42:33 ariane Exp $	*/
+
+/*
+ * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * uvm_pmemrange.h: describe and manage free physical memory.
+ */
+
+#ifndef _UVM_UVM_PMEMRANGE_H_
+#define _UVM_UVM_PMEMRANGE_H_
+
+#include <uvm/uvm_extern.h>
+#include <uvm/uvm_page.h>
+
+RB_HEAD(uvm_pmr_addr, vm_page);
+RB_HEAD(uvm_pmr_size, vm_page);
+
+/*
+ * Page types available:
+ * - DIRTY: this page may contain random data.
+ * - ZERO: this page has been zeroed.
+ */
+#define UVM_PMR_MEMTYPE_DIRTY	1
+#define UVM_PMR_MEMTYPE_ZERO	1
+#define UVM_PMR_MEMTYPE_MAX	2
+
+/*
+ * An address range of memory.
+ */
+struct uvm_pmemrange {
+	struct	uvm_pmr_addr addr;	/* Free page chunks, sorted by addr. */
+	struct	uvm_pmr_size size[UVM_PMR_MEMTYPE_MAX];
+					/* Free page chunks, sorted by size. */
+	TAILQ_HEAD(, vm_page) single[UVM_PMR_MEMTYPE_MAX];
+					/* single page regions (uses pageq) */
+
+	paddr_t	low;			/* Start of address range (pgno). */
+	paddr_t	high;			/* End +1 (pgno). */
+	int	use;			/* Use counter. */
+	int	nsegs;			/* Current range count. */
+
+	TAILQ_ENTRY(uvm_pmemrange) pmr_use;
+					/* pmr, sorted by use */
+	RB_ENTRY(uvm_pmemrange) pmr_addr;
+					/* pmr, sorted by address */
+};
+
+RB_HEAD(uvm_pmemrange_addr, uvm_pmemrange);
+TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange);
+
+/*
+ * pmr control structure. Contained in uvm.pmr_control.
+ */
+struct uvm_pmr_control {
+	struct	uvm_pmemrange_addr addr;
+	struct	uvm_pmemrange_use use;
+};
+
+void	uvm_pmr_freepages(struct vm_page *, psize_t);
+void	uvm_pmr_freepageq(struct pglist *pgl);
+int	uvm_pmr_getpages(psize_t, paddr_t, paddr_t, paddr_t, paddr_t,
+	    int, int, struct pglist *);
+void	uvm_pmr_init(void);
+
+#ifdef DDB
+int	uvm_pmr_isfree(struct vm_page *pg);
+#endif
+
+#endif /* _UVM_UVM_PMEMRANGE_H_ */
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index 0ea3b01d07a..2e4870b745d 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_vnode.c,v 1.58 2009/05/23 14:06:37 oga Exp $	*/
+/*	$OpenBSD: uvm_vnode.c,v 1.59 2009/06/01 17:42:33 ariane Exp $	*/
 /*	$NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $	*/
 
 /*
@@ -561,7 +561,7 @@ uvm_vnp_terminate(struct vnode *vp)
 	while (uvn->u_obj.uo_npages) {
 #ifdef DEBUG
 		struct vm_page *pp;
-		TAILQ_FOREACH(pp, &uvn->u_obj.memq, listq) {
+		TAILQ_FOREACH(pp, &uvn->u_obj.memq, fq.queues.listq) {
 			if ((pp->pg_flags & PG_BUSY) == 0)
 				panic("uvm_vnp_terminate: detected unbusy pg");
 		}
diff --git a/sys/xfs/xfs_vnodeops-bsd.c b/sys/xfs/xfs_vnodeops-bsd.c
index ed74d6d478d..3256b1c0160 100644
--- a/sys/xfs/xfs_vnodeops-bsd.c
+++ b/sys/xfs/xfs_vnodeops-bsd.c
@@ -1119,7 +1119,7 @@ xfs_putpages (struct vop_putpages_args *ap)
 
 	while (pg && !dirty) {
 	    dirty = pmap_is_modified(pg) || (pg->flags & PG_CLEAN) == 0;
-	    pg = TAILQ_NEXT(pg, listq);
+	    pg = TAILQ_NEXT(pg, fq.queues.listq);
 	}	
 
 	if (dirty)