summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAriane van der Steldt <ariane@cvs.openbsd.org>2009-06-16 16:42:42 +0000
committerAriane van der Steldt <ariane@cvs.openbsd.org>2009-06-16 16:42:42 +0000
commitdf471f546425532f5eb23a525aa177e7cebfc201 (patch)
tree972d5345b01631db0b5c8763b0ddbc1ff716f769
parenta3f40e2b55539a27c6990efc4ebff8036b9a4938 (diff)
Backout pmemrange (which to most people is more well known as physmem
allocator). "i can't see any obvious problems" oga
-rw-r--r--sys/arch/amd64/amd64/pmap.c10
-rw-r--r--sys/arch/amd64/include/vmparam.h9
-rw-r--r--sys/arch/i386/i386/pmap.c11
-rw-r--r--sys/arch/i386/i386/pmapae.c15
-rw-r--r--sys/arch/i386/include/vmparam.h9
-rw-r--r--sys/conf/files3
-rw-r--r--sys/nnpfs/nnpfs_vnodeops-bsd.c2
-rw-r--r--sys/uvm/uvm.h31
-rw-r--r--sys/uvm/uvm_extern.h9
-rw-r--r--sys/uvm/uvm_map.c15
-rw-r--r--sys/uvm/uvm_page.c173
-rw-r--r--sys/uvm/uvm_page.h17
-rw-r--r--sys/uvm/uvm_pglist.c328
-rw-r--r--sys/uvm/uvm_pmemrange.c1243
-rw-r--r--sys/uvm/uvm_pmemrange.h83
-rw-r--r--sys/uvm/uvm_vnode.c4
16 files changed, 464 insertions, 1498 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 165f3b365dd..b6ff88bef80 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.48 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: pmap.c,v 1.49 2009/06/16 16:42:40 ariane Exp $ */
/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
/*
@@ -835,7 +835,7 @@ pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level,
pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
ptp->wire_count = 0;
uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(pagelist, ptp, fq.queues.listq);
+ TAILQ_INSERT_TAIL(pagelist, ptp, listq);
}
void
@@ -1537,7 +1537,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
@@ -1609,7 +1609,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
@@ -1682,7 +1682,7 @@ pmap_page_remove(struct vm_page *pg)
pmap_tlb_shootwait();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
diff --git a/sys/arch/amd64/include/vmparam.h b/sys/arch/amd64/include/vmparam.h
index d3c5c9dd102..16914446d34 100644
--- a/sys/arch/amd64/include/vmparam.h
+++ b/sys/arch/amd64/include/vmparam.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmparam.h,v 1.11 2009/06/01 17:42:33 ariane Exp $ */
+/* $OpenBSD: vmparam.h,v 1.12 2009/06/16 16:42:40 ariane Exp $ */
/* $NetBSD: vmparam.h,v 1.1 2003/04/26 18:39:49 fvdl Exp $ */
/*-
@@ -112,13 +112,6 @@
#define VM_FREELIST_LOW 1
#define VM_FREELIST_HIGH 2
-/* reserve ISA-DMA and 32-bit DMA memory */
-#define UVM_IO_RANGES \
- { \
- { 0, 0x00ffffffUL }, \
- { 0, 0xffffffffUL }, \
- }
-
#define __HAVE_VM_PAGE_MD
struct pv_entry;
struct vm_page_md {
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 240970dab3b..3619ae37e96 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.141 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: pmap.c,v 1.142 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -2009,7 +2009,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
/* If PTP is no longer being used, free it. */
if (ptp && ptp->wire_count <= 1) {
pmap_drop_ptp(pmap, va, ptp, ptes);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
if (!shootall)
@@ -2023,7 +2023,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
@@ -2080,8 +2080,7 @@ pmap_page_remove(struct vm_page *pg)
if (pve->pv_ptp && --pve->pv_ptp->wire_count <= 1) {
pmap_drop_ptp(pve->pv_pmap, pve->pv_va,
pve->pv_ptp, ptes);
- TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
- fq.queues.listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, listq);
}
pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
@@ -2094,7 +2093,7 @@ pmap_page_remove(struct vm_page *pg)
pmap_tlb_shootwait();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c
index 46fd0b40943..ed75721ad3c 100644
--- a/sys/arch/i386/i386/pmapae.c
+++ b/sys/arch/i386/i386/pmapae.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmapae.c,v 1.18 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: pmapae.c,v 1.19 2009/06/16 16:42:41 ariane Exp $ */
/*
* Copyright (c) 2006 Michael Shalayeff
@@ -1453,15 +1453,14 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp,
- fq.queues.listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
}
pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes_pae(pmap); /* unlock pmap */
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
return;
@@ -1547,7 +1546,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
- TAILQ_INSERT_TAIL(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
}
@@ -1555,7 +1554,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
pmap_unmap_ptes_pae(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
@@ -1666,7 +1665,7 @@ pmap_page_remove_pae(struct vm_page *pg)
/* Postpone free to after shootdown. */
uvm_pagerealloc(pve->pv_ptp, NULL, 0);
TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
- fq.queues.listq);
+ listq);
}
}
pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */
@@ -1677,7 +1676,7 @@ pmap_page_remove_pae(struct vm_page *pg)
PMAP_HEAD_TO_MAP_UNLOCK();
pmap_tlb_shootnow(cpumask);
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
- TAILQ_REMOVE(&empty_ptps, ptp, fq.queues.listq);
+ TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h
index 41e95b3f418..293879609de 100644
--- a/sys/arch/i386/include/vmparam.h
+++ b/sys/arch/i386/include/vmparam.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmparam.h,v 1.42 2009/06/01 17:42:33 ariane Exp $ */
+/* $OpenBSD: vmparam.h,v 1.43 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $ */
/*-
@@ -118,13 +118,6 @@
#define VM_FREELIST_DEFAULT 0
#define VM_FREELIST_FIRST16 1
-/* reserve ISA-DMA and 32-bit DMA memory */
-#define UVM_IO_RANGES \
- { \
- { 0, 0x00ffffffUL }, \
- { 0, 0xffffffffUL }, \
- }
-
#define __HAVE_VM_PAGE_MD
struct pv_entry;
struct vm_page_md {
diff --git a/sys/conf/files b/sys/conf/files
index 4e84684fa51..84d1f578eb8 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.465 2009/06/16 00:11:29 oga Exp $
+# $OpenBSD: files,v 1.466 2009/06/16 16:42:41 ariane Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -973,7 +973,6 @@ file uvm/uvm_page.c
file uvm/uvm_pager.c
file uvm/uvm_pdaemon.c
file uvm/uvm_pglist.c
-file uvm/uvm_pmemrange.c
file uvm/uvm_stat.c
file uvm/uvm_swap.c
file uvm/uvm_swap_encrypt.c uvm_swap_encrypt
diff --git a/sys/nnpfs/nnpfs_vnodeops-bsd.c b/sys/nnpfs/nnpfs_vnodeops-bsd.c
index 7e740d3f8f5..e293154a270 100644
--- a/sys/nnpfs/nnpfs_vnodeops-bsd.c
+++ b/sys/nnpfs/nnpfs_vnodeops-bsd.c
@@ -1119,7 +1119,7 @@ nnpfs_putpages (struct vop_putpages_args *ap)
while (pg && !dirty) {
dirty = pmap_is_modified(pg) || (pg->flags & PG_CLEAN) == 0;
- pg = TAILQ_NEXT(pg, fq.queues.listq);
+ pg = TAILQ_NEXT(pg, listq);
}
if (dirty)
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index d48c5f8026c..57ae90fc894 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.h,v 1.35 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: uvm.h,v 1.36 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
@@ -57,7 +57,6 @@
#include <uvm/uvm_page.h>
#include <uvm/uvm_pager.h>
#include <uvm/uvm_pdaemon.h>
-#include <uvm/uvm_pmemrange.h>
#include <uvm/uvm_swap.h>
#ifdef UVM_SWAP_ENCRYPT
#include <uvm/uvm_swap_encrypt.h>
@@ -69,32 +68,6 @@
#include <machine/vmparam.h>
/*
- * UVM_IO_RANGES: paddr_t pairs, describing the lowest and highest address
- * that should be reserved. These ranges (which may overlap) will have their
- * use counter increased, causing them to be avoided if an allocation can be
- * satisfied from another range of memory.
- *
- * IO ranges need not overlap with physmem ranges: the uvm code splits ranges
- * on demand to satisfy requests.
- *
- * UVM_IO_RANGES specified here actually translates into a call to
- * uvm_pmr_use_inc() at uvm initialization time. uvm_pmr_use_inc() can also
- * be called after uvm_init() has completed.
- *
- * Note: the upper bound is specified in the same way as to uvm_pglistalloc.
- * Ex: a memory range of 16 bit is specified as: { 0, 0xffff }.
- */
-#ifndef UVM_IO_RANGES
-#define UVM_IO_RANGES {}
-#endif
-
-/* UVM IO ranges are described in an array of uvm_io_ranges. */
-struct uvm_io_ranges {
- paddr_t low;
- paddr_t high;
-};
-
-/*
* uvm structure (vm global state: collected in one structure for ease
* of reference...)
*/
@@ -103,7 +76,7 @@ struct uvm {
/* vm_page related parameters */
/* vm_page queues */
- struct uvm_pmr_control pmr_control; /* pmemrange control data */
+ struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
struct pglist page_active; /* allocated pages, in use */
struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */
struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 48255c1d8ad..de7dbb06b3b 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.79 2009/06/14 03:04:08 deraadt Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.80 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -221,12 +221,11 @@ typedef int vm_prot_t;
#define UVM_PGA_ZERO 0x0002 /* returned page must be zeroed */
/*
- * flags for uvm_pglistalloc() and uvm_pmr_getpages()
+ * flags for uvm_pglistalloc()
*/
#define UVM_PLA_WAITOK 0x0001 /* may sleep */
#define UVM_PLA_NOWAIT 0x0002 /* can't sleep (need one of the two) */
#define UVM_PLA_ZERO 0x0004 /* zero all pages before returning */
-#define UVM_PLA_TRY_CONTIG 0x0008 /* try to allocate a contig range */
/*
* lockflags that control the locking behavior of various functions.
@@ -590,10 +589,6 @@ int uvm_pglistalloc(psize_t, paddr_t,
struct pglist *, int, int);
void uvm_pglistfree(struct pglist *);
-/* uvm_pmemrange.c */
-
-void uvm_pmr_use_inc(paddr_t, paddr_t);
-
/* uvm_swap.c */
void uvm_swap_init(void);
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index b804d36bd6c..b82f161aa9f 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.c,v 1.116 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.117 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
/*
@@ -3822,7 +3822,7 @@ uvm_object_printit(uobj, full, pr)
(*pr)(" PAGES <pg,offset>:\n ");
for (pg = TAILQ_FIRST(&uobj->memq);
pg != NULL;
- pg = TAILQ_NEXT(pg, fq.queues.listq), cnt++) {
+ pg = TAILQ_NEXT(pg, listq), cnt++) {
(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
if ((cnt % 3) == 2) {
(*pr)("\n ");
@@ -3883,8 +3883,7 @@ uvm_page_printit(pg, full, pr)
uobj = pg->uobject;
if (uobj) {
(*pr)(" checking object list\n");
- TAILQ_FOREACH(tpg, &uobj->memq,
- fq.queues.listq) {
+ TAILQ_FOREACH(tpg, &uobj->memq, listq) {
if (tpg == pg) {
break;
}
@@ -3899,11 +3898,9 @@ uvm_page_printit(pg, full, pr)
/* cross-verify page queue */
if (pg->pg_flags & PQ_FREE) {
- if (uvm_pmr_isfree(pg))
- printf(" page found in uvm_pmemrange\n");
- else
- printf(" >>> page not found in uvm_pmemrange <<<\n");
- pgl = NULL;
+ int fl = uvm_page_lookup_freelist(pg);
+ pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ?
+ PGFL_ZEROS : PGFL_UNKNOWN];
} else if (pg->pg_flags & PQ_INACTIVE) {
pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
&uvm.page_inactive_swp : &uvm.page_inactive_obj;
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 2cf45c11375..afd841ff331 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.c,v 1.89 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.90 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
/*
@@ -159,11 +159,10 @@ uvm_pageinsert(struct vm_page *pg)
KASSERT((pg->pg_flags & PG_TABLED) == 0);
mtx_enter(&uvm.hashlock);
buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
- TAILQ_INSERT_TAIL(buck, pg, fq.queues.hashq); /* put in hash */
+ TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */
mtx_leave(&uvm.hashlock);
- TAILQ_INSERT_TAIL(&pg->uobject->memq, pg,
- fq.queues.listq); /* put in object */
+ TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
atomic_setbits_int(&pg->pg_flags, PG_TABLED);
pg->uobject->uo_npages++;
}
@@ -184,7 +183,7 @@ uvm_pageremove(struct vm_page *pg)
KASSERT(pg->pg_flags & PG_TABLED);
mtx_enter(&uvm.hashlock);
buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
- TAILQ_REMOVE(buck, pg, fq.queues.hashq);
+ TAILQ_REMOVE(buck, pg, hashq);
mtx_leave(&uvm.hashlock);
#ifdef UBC
@@ -194,7 +193,7 @@ uvm_pageremove(struct vm_page *pg)
#endif
/* object should be locked */
- TAILQ_REMOVE(&pg->uobject->memq, pg, fq.queues.listq);
+ TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
atomic_clearbits_int(&pg->pg_flags, PG_TABLED|PQ_AOBJ);
pg->uobject->uo_npages--;
@@ -227,12 +226,15 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
* init the page queues and page queue locks
*/
+ for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
+ for (i = 0; i < PGFL_NQUEUES; i++)
+ TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]);
+ }
TAILQ_INIT(&uvm.page_active);
TAILQ_INIT(&uvm.page_inactive_swp);
TAILQ_INIT(&uvm.page_inactive_obj);
simple_lock_init(&uvm.pageqlock);
mtx_init(&uvm.fpageqlock, IPL_VM);
- uvm_pmr_init();
/*
* init the <obj,offset> => <page> hash table. for now
@@ -317,13 +319,10 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
if (atop(paddr) >= vm_physmem[lcv].avail_start &&
atop(paddr) <= vm_physmem[lcv].avail_end) {
uvmexp.npages++;
+ /* add page to free pool */
+ uvm_pagefree(&vm_physmem[lcv].pgs[i]);
}
}
-
- /* add pages to free pool */
- uvm_pmr_freepages(&vm_physmem[lcv].pgs[
- vm_physmem[lcv].avail_start - vm_physmem[lcv].start],
- vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
}
/*
@@ -812,10 +811,10 @@ uvm_page_rehash(void)
/* ... and rehash */
for (lcv = 0 ; lcv < oldcount ; lcv++) {
while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) {
- TAILQ_REMOVE(&oldbuckets[lcv], pg, fq.queues.hashq);
+ TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
TAILQ_INSERT_TAIL(
&uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
- pg, fq.queues.hashq);
+ pg, hashq);
}
}
mtx_leave(&uvm.hashlock);
@@ -893,15 +892,18 @@ struct vm_page *
uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
int flags, int strat, int free_list)
{
- struct pglist pgl;
- int pmr_flags;
+ int lcv, try1, try2, zeroit = 0;
struct vm_page *pg;
+ struct pglist *freeq;
+ struct pgfreelist *pgfl;
boolean_t use_reserve;
UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist);
KASSERT(obj == NULL || anon == NULL);
KASSERT(off == trunc_page(off));
+ uvm_lock_fpageq();
+
/*
* check to see if we need to generate some free pages waking
* the pagedaemon.
@@ -928,20 +930,95 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
(curproc == syncerproc))))
goto fail;
- pmr_flags = UVM_PLA_NOWAIT;
- if (flags & UVM_PGA_ZERO)
- pmr_flags |= UVM_PLA_ZERO;
- TAILQ_INIT(&pgl);
- if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0)
+#if PGFL_NQUEUES != 2
+#error uvm_pagealloc_strat needs to be updated
+#endif
+
+ /*
+ * If we want a zero'd page, try the ZEROS queue first, otherwise
+ * we try the UNKNOWN queue first.
+ */
+ if (flags & UVM_PGA_ZERO) {
+ try1 = PGFL_ZEROS;
+ try2 = PGFL_UNKNOWN;
+ } else {
+ try1 = PGFL_UNKNOWN;
+ try2 = PGFL_ZEROS;
+ }
+
+ UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx",
+ obj, (u_long)off, anon, flags);
+ UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0);
+ again:
+ switch (strat) {
+ case UVM_PGA_STRAT_NORMAL:
+ /* Check all freelists in descending priority order. */
+ for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
+ pgfl = &uvm.page_free[lcv];
+ if ((pg = TAILQ_FIRST((freeq =
+ &pgfl->pgfl_queues[try1]))) != NULL ||
+ (pg = TAILQ_FIRST((freeq =
+ &pgfl->pgfl_queues[try2]))) != NULL)
+ goto gotit;
+ }
+
+ /* No pages free! */
+ goto fail;
+
+ case UVM_PGA_STRAT_ONLY:
+ case UVM_PGA_STRAT_FALLBACK:
+ /* Attempt to allocate from the specified free list. */
+ KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
+ pgfl = &uvm.page_free[free_list];
+ if ((pg = TAILQ_FIRST((freeq =
+ &pgfl->pgfl_queues[try1]))) != NULL ||
+ (pg = TAILQ_FIRST((freeq =
+ &pgfl->pgfl_queues[try2]))) != NULL)
+ goto gotit;
+
+ /* Fall back, if possible. */
+ if (strat == UVM_PGA_STRAT_FALLBACK) {
+ strat = UVM_PGA_STRAT_NORMAL;
+ goto again;
+ }
+
+ /* No pages free! */
goto fail;
- pg = TAILQ_FIRST(&pgl);
- KASSERT(pg != NULL);
- KASSERT(TAILQ_NEXT(pg, pageq) == NULL);
+
+ default:
+ panic("uvm_pagealloc_strat: bad strat %d", strat);
+ /* NOTREACHED */
+ }
+
+ gotit:
+ TAILQ_REMOVE(freeq, pg, pageq);
+ uvmexp.free--;
+
+ /* update zero'd page count */
+ if (pg->pg_flags & PG_ZERO)
+ uvmexp.zeropages--;
+
+ /*
+ * update allocation statistics and remember if we have to
+ * zero the page
+ */
+ if (flags & UVM_PGA_ZERO) {
+ if (pg->pg_flags & PG_ZERO) {
+ uvmexp.pga_zerohit++;
+ zeroit = 0;
+ } else {
+ uvmexp.pga_zeromiss++;
+ zeroit = 1;
+ }
+ }
+
+ uvm_unlock_fpageq(); /* unlock free page queue */
pg->offset = off;
pg->uobject = obj;
pg->uanon = anon;
pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE;
+ pg->pg_version++;
if (anon) {
anon->an_page = pg;
atomic_setbits_int(&pg->pg_flags, PQ_ANON);
@@ -957,11 +1034,22 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
#endif
UVM_PAGE_OWN(pg, "new alloc");
+ if (flags & UVM_PGA_ZERO) {
+ /*
+ * A zero'd page is not clean. If we got a page not already
+ * zero'd, then we have to zero it ourselves.
+ */
+ atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
+ if (zeroit)
+ pmap_zero_page(pg);
+ }
+
UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg,
(u_long)VM_PAGE_TO_PHYS(pg), 0, 0);
return(pg);
fail:
+ uvm_unlock_fpageq();
UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0);
return (NULL);
}
@@ -1012,7 +1100,6 @@ uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
void
uvm_pagefree(struct vm_page *pg)
{
- struct pglist pgl;
int saved_loan_count = pg->loan_count;
UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist);
@@ -1108,35 +1195,27 @@ uvm_pagefree(struct vm_page *pg)
}
/*
- * Clean page state bits.
- */
- atomic_clearbits_int(&pg->pg_flags,
- PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK);
- /*
- * Pmap flag cleaning.
- * XXX: Shouldn't pmap do this?
+ * and put on free queue
*/
- atomic_clearbits_int(&pg->pg_flags,
- PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
-
-#if defined(DIAGNOSTIC)
- if (pg->pg_flags != 0) {
- panic("uvm_pagefree: expected page %p pg_flags to be 0\n"
- "uvm_pagefree: instead of pg->pg_flags = %x\n",
- VM_PAGE_TO_PHYS(pg), pg->pg_flags);
- }
-#endif
+
+ atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
+
+ uvm_lock_fpageq();
+ TAILQ_INSERT_TAIL(&uvm.page_free[
+ uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq);
+ atomic_clearbits_int(&pg->pg_flags, PQ_MASK);
+ atomic_setbits_int(&pg->pg_flags, PQ_FREE);
#ifdef DEBUG
pg->uobject = (void *)0xdeadbeef;
pg->offset = 0xdeadbeef;
pg->uanon = (void *)0xdeadbeef;
#endif
- TAILQ_INIT(&pgl);
- TAILQ_INSERT_HEAD(&pgl, pg, pageq);
- uvm_pmr_freepageq(&pgl);
+ uvmexp.free++;
if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
uvm.page_idle_zero = vm_page_zero_enable;
+
+ uvm_unlock_fpageq();
}
/*
@@ -1229,7 +1308,6 @@ uvm_page_own(struct vm_page *pg, char *tag)
void
uvm_pageidlezero(void)
{
-#if 0 /* Disabled for now. */
struct vm_page *pg;
struct pgfreelist *pgfl;
int free_list;
@@ -1296,7 +1374,6 @@ uvm_pageidlezero(void)
uvmexp.zeropages++;
uvm_unlock_fpageq();
} while (curcpu_is_idle());
-#endif /* 0 */
}
/*
@@ -1399,7 +1476,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
mtx_enter(&uvm.hashlock);
buck = &uvm.page_hash[uvm_pagehash(obj,off)];
- TAILQ_FOREACH(pg, buck, fq.queues.hashq) {
+ TAILQ_FOREACH(pg, buck, hashq) {
if (pg->uobject == obj && pg->offset == off) {
break;
}
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index 5896286c871..4da52e8e323 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.h,v 1.37 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: uvm_page.h,v 1.38 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $ */
/*
@@ -106,22 +106,11 @@
#include <uvm/uvm_extern.h>
#include <uvm/uvm_pglist.h>
-union vm_page_fq {
- struct {
- TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
- TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
- } queues;
-
- struct {
- RB_ENTRY(vm_page) tree; /* Free chunks, addr/size */
- psize_t pages;
- } free;
-};
-
struct vm_page {
- union vm_page_fq fq; /* free and queue management */
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
* queue or free list (P) */
+ TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
+ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
struct vm_anon *uanon; /* anon (O,P) */
struct uvm_object *uobject; /* object (O,P) */
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
index ff0f8d91f68..5abe87d9fb5 100644
--- a/sys/uvm/uvm_pglist.c
+++ b/sys/uvm/uvm_pglist.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pglist.c,v 1.30 2009/06/01 17:42:33 ariane Exp $ */
+/* $OpenBSD: uvm_pglist.c,v 1.31 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $ */
/*-
@@ -56,6 +56,112 @@ u_long uvm_pglistalloc_npages;
#define STAT_DECR(v)
#endif
+int uvm_pglistalloc_simple(psize_t, paddr_t, paddr_t, struct pglist *);
+
+/*
+ * Simple page allocation: pages do not need to be contiguous. We just
+ * attempt to find enough free pages in the given range.
+ */
+int
+uvm_pglistalloc_simple(psize_t size, paddr_t low, paddr_t high,
+ struct pglist *rlist)
+{
+ psize_t todo;
+ int psi;
+ struct vm_page *pg;
+ struct vm_physseg *seg;
+ paddr_t slow, shigh;
+ int pgflidx, error, free_list;
+ UVMHIST_FUNC("uvm_pglistalloc_simple"); UVMHIST_CALLED(pghist);
+#ifdef DEBUG
+ vm_page_t tp;
+#endif
+
+ /* Default to "lose". */
+ error = ENOMEM;
+
+ todo = atop(size);
+
+ /*
+ * Block all memory allocation and lock the free list.
+ */
+ uvm_lock_fpageq();
+
+ /* Are there even any free pages? */
+ if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
+ goto out;
+
+ for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) {
+ /*
+ * Skip this segment if incompatible with the address range.
+ */
+ if (seg->avail_end <= atop(low))
+ continue;
+ if (seg->avail_start >= atop(high))
+ continue;
+
+ slow = MAX(atop(low), seg->avail_start);
+ shigh = MIN(atop(high), seg->avail_end);
+
+ /* we want to be able to allocate at least a page... */
+ if (slow == shigh)
+ continue;
+
+ for (pg = &seg->pgs[slow - seg->start]; slow != shigh;
+ slow++, pg++) {
+ if (VM_PAGE_IS_FREE(pg) == 0)
+ continue;
+
+ free_list = uvm_page_lookup_freelist(pg);
+ pgflidx = (pg->pg_flags & PG_ZERO) ?
+ PGFL_ZEROS : PGFL_UNKNOWN;
+#ifdef DEBUG
+ for (tp = TAILQ_FIRST(&uvm.page_free[free_list].pgfl_queues[pgflidx]);
+ tp != NULL; tp = TAILQ_NEXT(tp, pageq)) {
+ if (tp == pg)
+ break;
+ }
+ if (tp == NULL)
+ panic("uvm_pglistalloc_simple: page not on freelist");
+#endif
+ TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
+ pg, pageq);
+ uvmexp.free--;
+ if (pg->pg_flags & PG_ZERO)
+ uvmexp.zeropages--;
+ pg->uobject = NULL;
+ pg->uanon = NULL;
+ pg->pg_version++;
+ TAILQ_INSERT_TAIL(rlist, pg, pageq);
+ STAT_INCR(uvm_pglistalloc_npages);
+ if (--todo == 0) {
+ error = 0;
+ goto out;
+ }
+ }
+
+ }
+
+out:
+ /*
+ * check to see if we need to generate some free pages waking
+ * the pagedaemon.
+ */
+
+ if (!error && (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
+ (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
+ uvmexp.inactive < uvmexp.inactarg))) {
+ wakeup(&uvm.pagedaemon_proc);
+ }
+
+ uvm_unlock_fpageq();
+
+ if (error)
+ uvm_pglistfree(rlist);
+
+ return (error);
+}
+
/*
* uvm_pglistalloc: allocate a list of pages
*
@@ -73,45 +179,202 @@ u_long uvm_pglistalloc_npages;
* alignment memory must be aligned to this power-of-two boundary.
* boundary no segment in the allocation may cross this
* power-of-two boundary (relative to zero).
- * => flags:
- * UVM_PLA_NOWAIT fail if allocation fails
- * UVM_PLA_WAITOK wait for memory to become avail if allocation fails
- * UVM_PLA_ZERO return zeroed memory
- * UVM_PLA_TRY_CONTIG device prefers p-lineair mem
*/
int
uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
paddr_t boundary, struct pglist *rlist, int nsegs, int flags)
{
+ int psi;
+ struct vm_page *pgs;
+ struct vm_physseg *seg;
+ paddr_t slow, shigh;
+ paddr_t try, idxpa, lastidxpa;
+ int tryidx, idx, pgflidx, endidx, error, free_list;
+ vm_page_t m;
+ u_long pagemask;
+#ifdef DEBUG
+ vm_page_t tp;
+#endif
UVMHIST_FUNC("uvm_pglistalloc"); UVMHIST_CALLED(pghist);
KASSERT((alignment & (alignment - 1)) == 0);
KASSERT((boundary & (boundary - 1)) == 0);
+ /*
+ * This argument is always ignored for now, but ensure drivers always
+ * show intention.
+ */
KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
+
+ /*
+ * Our allocations are always page granularity, so our alignment
+ * must be, too.
+ */
+ if (alignment < PAGE_SIZE)
+ alignment = PAGE_SIZE;
if (size == 0)
return (EINVAL);
+ size = round_page(size);
+ low = roundup(low, alignment);
+
/*
- * Convert byte addresses to page numbers.
+ * If we are allowed to allocate as many segments as pages,
+ * no need to be smart.
*/
- if (alignment < PAGE_SIZE)
- alignment = PAGE_SIZE;
- low = atop(roundup(low, alignment));
- /* Allows for overflow: 0xffff + 1 = 0x0000 */
- if ((high & PAGE_MASK) == PAGE_MASK)
- high = atop(high) + 1;
- else
- high = atop(high);
- size = atop(round_page(size));
- alignment = atop(alignment);
- if (boundary < PAGE_SIZE && boundary != 0)
- boundary = PAGE_SIZE;
- boundary = atop(boundary);
-
- return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs,
- flags, rlist);
+ if ((nsegs >= size / PAGE_SIZE) && (alignment == PAGE_SIZE) &&
+ (boundary == 0)) {
+ error = uvm_pglistalloc_simple(size, low, high, rlist);
+ goto done;
+ }
+
+ if (boundary != 0 && boundary < size)
+ return (EINVAL);
+
+ pagemask = ~(boundary - 1);
+
+ /* Default to "lose". */
+ error = ENOMEM;
+
+ /*
+ * Block all memory allocation and lock the free list.
+ */
+ uvm_lock_fpageq();
+
+ /* Are there even any free pages? */
+ if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
+ goto out;
+
+ for (psi = 0, seg = vm_physmem; psi < vm_nphysseg; psi++, seg++) {
+ /*
+ * Skip this segment if incompatible with the address range.
+ */
+ if (seg->avail_end <= atop(low))
+ continue;
+ if (seg->avail_start >= atop(high))
+ continue;
+
+ slow = MAX(low, ptoa(seg->avail_start));
+ shigh = MIN(high, ptoa(seg->avail_end));
+
+ try = roundup(slow, alignment);
+ for (;; try += alignment) {
+ if (try + size > shigh) {
+ /*
+ * We've run past the allowable range, or
+ * the segment. Try another.
+ */
+ break;
+ }
+
+ tryidx = idx = atop(try) - seg->start;
+ endidx = idx + atop(size);
+ pgs = vm_physmem[psi].pgs;
+
+ /*
+ * Found a suitable starting page. See if the
+ * range is free.
+ */
+
+ for (; idx < endidx; idx++) {
+ if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
+ break;
+ }
+ idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
+ if (idx == tryidx)
+ continue;
+
+ /*
+ * Check that the region is contiguous
+ * (it really should...) and does not
+ * cross an alignment boundary.
+ */
+ lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
+ if ((lastidxpa + PAGE_SIZE) != idxpa)
+ break;
+
+ if (boundary != 0 &&
+ ((lastidxpa ^ idxpa) & pagemask) != 0)
+ break;
+ }
+
+ if (idx == endidx) {
+ goto found;
+ }
+ }
+ }
+
+ /*
+ * We could not allocate a contiguous range. This is where
+ * we should try harder if nsegs > 1...
+ */
+ goto out;
+
+#if PGFL_NQUEUES != 2
+#error uvm_pglistalloc needs to be updated
+#endif
+
+found:
+ /*
+ * we have a chunk of memory that conforms to the requested constraints.
+ */
+ idx = tryidx;
+ while (idx < endidx) {
+ m = &pgs[idx];
+ free_list = uvm_page_lookup_freelist(m);
+ pgflidx = (m->pg_flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
+#ifdef DEBUG
+ for (tp = TAILQ_FIRST(&uvm.page_free[
+ free_list].pgfl_queues[pgflidx]);
+ tp != NULL;
+ tp = TAILQ_NEXT(tp, pageq)) {
+ if (tp == m)
+ break;
+ }
+ if (tp == NULL)
+ panic("uvm_pglistalloc: page not on freelist");
+#endif
+ TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx],
+ m, pageq);
+ uvmexp.free--;
+ if (m->pg_flags & PG_ZERO)
+ uvmexp.zeropages--;
+ m->uobject = NULL;
+ m->uanon = NULL;
+ m->pg_version++;
+ TAILQ_INSERT_TAIL(rlist, m, pageq);
+ idx++;
+ STAT_INCR(uvm_pglistalloc_npages);
+ }
+ error = 0;
+
+out:
+ /*
+ * check to see if we need to generate some free pages waking
+ * the pagedaemon.
+ */
+
+ if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
+ (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
+ uvmexp.inactive < uvmexp.inactarg)) {
+ wakeup(&uvm.pagedaemon_proc);
+ }
+
+ uvm_unlock_fpageq();
+
+done:
+ /* No locking needed here, pages are not on any queue. */
+ if (error == 0) {
+ TAILQ_FOREACH(m, rlist, pageq) {
+ if (flags & UVM_PLA_ZERO &&
+ (m->pg_flags & PG_ZERO) == 0)
+ uvm_pagezero(m);
+ m->pg_flags = PG_CLEAN;
+ }
+ }
+
+ return (error);
}
/*
@@ -126,8 +389,14 @@ uvm_pglistfree(struct pglist *list)
struct vm_page *m;
UVMHIST_FUNC("uvm_pglistfree"); UVMHIST_CALLED(pghist);
- TAILQ_FOREACH(m, list, pageq) {
+ /*
+ * Block all memory allocation and lock the free list.
+ */
+ uvm_lock_fpageq();
+
+ while ((m = TAILQ_FIRST(list)) != NULL) {
KASSERT((m->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
+ TAILQ_REMOVE(list, m, pageq);
#ifdef DEBUG
if (m->uobject == (void *)0xdeadbeef &&
m->uanon == (void *)0xdeadbeef) {
@@ -139,6 +408,15 @@ uvm_pglistfree(struct pglist *list)
m->uanon = (void *)0xdeadbeef;
#endif
atomic_clearbits_int(&m->pg_flags, PQ_MASK);
+ atomic_setbits_int(&m->pg_flags, PQ_FREE);
+ TAILQ_INSERT_TAIL(&uvm.page_free[
+ uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN],
+ m, pageq);
+ uvmexp.free++;
+ if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+ uvm.page_idle_zero = vm_page_zero_enable;
+ STAT_DECR(uvm_pglistalloc_npages);
}
- uvm_pmr_freepageq(list);
+
+ uvm_unlock_fpageq();
}
diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c
deleted file mode 100644
index d25b780168b..00000000000
--- a/sys/uvm/uvm_pmemrange.c
+++ /dev/null
@@ -1,1243 +0,0 @@
-/* $OpenBSD: uvm_pmemrange.c,v 1.8 2009/06/14 03:04:08 deraadt Exp $ */
-
-/*
- * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/param.h>
-#include <uvm/uvm.h>
-#include <sys/malloc.h>
-
-/*
- * 2 trees: addr tree and size tree.
- *
- * addr tree is vm_page[0].fq.free.tree
- * size tree is vm_page[1].fq.free.tree
- *
- * The size tree is not used for memory ranges of 1 page, instead,
- * single queue is vm_page[0].pageq
- *
- * uvm_page_init guarantees that every vm_physseg contains an array of
- * struct vm_page. Also, uvm_page_physload allocates an array of struct
- * vm_page. This code depends on that array.
- */
-
-/* Tree comparators. */
-int uvm_pmemrange_addr_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
-int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *);
-int uvm_pmr_addr_cmp(struct vm_page *, struct vm_page *);
-int uvm_pmr_size_cmp(struct vm_page *, struct vm_page *);
-
-/* Memory types. The page flags are used to derive what the current memory
- * type of a page is. */
-static __inline int
-uvm_pmr_pg_to_memtype(struct vm_page *pg)
-{
- if (pg->pg_flags & PG_ZERO)
- return UVM_PMR_MEMTYPE_ZERO;
- /* Default: dirty memory. */
- return UVM_PMR_MEMTYPE_DIRTY;
-}
-
-/* Trees. */
-RB_PROTOTYPE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp);
-RB_PROTOTYPE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp);
-RB_PROTOTYPE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
- uvm_pmemrange_addr_cmp);
-RB_GENERATE(uvm_pmr_addr, vm_page, fq.free.tree, uvm_pmr_addr_cmp);
-RB_GENERATE(uvm_pmr_size, vm_page, fq.free.tree, uvm_pmr_size_cmp);
-RB_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
- uvm_pmemrange_addr_cmp);
-
-/* Validation. */
-#ifdef DEBUG
-void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr);
-#else
-#define uvm_pmr_assertvalid(pmr) do {} while (0)
-#endif
-
-
-int uvm_pmr_get1page(psize_t, int, struct pglist *,
- paddr_t, paddr_t);
-
-struct uvm_pmemrange *uvm_pmr_allocpmr(void);
-struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
-struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *,
- struct vm_page *, int);
-void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr,
- struct vm_page *pg, struct vm_page **pg_prev,
- struct vm_page **pg_next);
-struct vm_page *uvm_pmr_insert(struct uvm_pmemrange *,
- struct vm_page *, int);
-void uvm_pmr_remove(struct uvm_pmemrange *,
- struct vm_page *);
-psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t,
- struct vm_page **);
-void uvm_pmr_split(paddr_t);
-struct uvm_pmemrange *uvm_pmemrange_find(paddr_t);
-struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *,
- struct uvm_pmemrange *);
-struct vm_page *uvm_pmr_extract_range(struct uvm_pmemrange *,
- struct vm_page *, paddr_t, paddr_t,
- struct pglist *);
-
-/*
- * Computes num/denom and rounds it up to the next power-of-2.
- */
-static __inline psize_t
-pow2divide(psize_t num, psize_t denom)
-{
- int rshift = 0;
-
- while (num > (denom << rshift))
- rshift++;
- return (paddr_t)1 << rshift;
-}
-
-/*
- * Predicate: lhs is a subrange or rhs.
- */
-#define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high) \
- ((lhs_low) >= (rhs_low) && (lhs_high <= rhs_high))
-
-/*
- * Align to power-of-2 alignment.
- */
-#define PMR_ALIGN(pgno, align) \
- (((pgno) + ((align) - 1)) & ~((align) - 1))
-
-
-/*
- * Comparator: sort by address ascending.
- */
-int
-uvm_pmemrange_addr_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
-{
- return lhs->low < rhs->low ? -1 : lhs->low > rhs->low;
-}
-
-/*
- * Comparator: sort by use ascending.
- *
- * The higher the use value of a range, the more devices need memory in
- * this range. Therefor allocate from the range with the lowest use first.
- */
-int
-uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs)
-{
- int result;
-
- result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use;
- if (result == 0)
- result = uvm_pmemrange_addr_cmp(lhs, rhs);
- return result;
-}
-
-int
-uvm_pmr_addr_cmp(struct vm_page *lhs, struct vm_page *rhs)
-{
- paddr_t lhs_addr, rhs_addr;
-
- lhs_addr = VM_PAGE_TO_PHYS(lhs);
- rhs_addr = VM_PAGE_TO_PHYS(rhs);
-
- return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr);
-}
-
-int
-uvm_pmr_size_cmp(struct vm_page *lhs, struct vm_page *rhs)
-{
- psize_t lhs_size, rhs_size;
- int cmp;
-
- /* Using second tree, so we receive pg[1] instead of pg[0]. */
- lhs_size = (lhs - 1)->fq.free.pages;
- rhs_size = (rhs - 1)->fq.free.pages;
-
- cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size);
- if (cmp == 0)
- cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1);
- return cmp;
-}
-
-/*
- * Find the first range of free pages that is at least sz pages long.
- */
-struct vm_page *
-uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti)
-{
- struct vm_page *node, *best;
-
- KASSERT(sz >= 1);
-
- if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti]))
- return TAILQ_FIRST(&pmr->single[mti]);
-
- node = RB_ROOT(&pmr->size[mti]);
- best = NULL;
- while (node != NULL) {
- if ((node - 1)->fq.free.pages >= sz) {
- best = (node - 1);
- node = RB_LEFT(node, fq.free.tree);
- } else
- node = RB_RIGHT(node, fq.free.tree);
- }
- return best;
-}
-
-/*
- * Finds the next range. The next range has a size >= pg->fq.free.pages.
- * Returns NULL if no more ranges are available.
- */
-struct vm_page *
-uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt)
-{
- struct vm_page *npg;
-
- KASSERT(pmr != NULL && pg != NULL);
- if (pg->fq.free.pages == 1) {
- if (TAILQ_NEXT(pg, pageq) != NULL)
- return TAILQ_NEXT(pg, pageq);
- else
- npg = RB_MIN(uvm_pmr_size, &pmr->size[mt]);
- } else
- npg = RB_NEXT(uvm_pmr_size, &pmr->size[mt], pg + 1);
-
- return npg == NULL ? NULL : npg - 1;
-}
-
-/*
- * Finds the previous and next ranges relative to the (uninserted) pg range.
- *
- * *pg_prev == NULL if no previous range is available, that can join with
- * pg.
- * *pg_next == NULL if no previous range is available, that can join with
- * pg.
- */
-void
-uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg,
- struct vm_page **pg_prev, struct vm_page **pg_next)
-{
- KASSERT(pg_prev != NULL && pg_next != NULL);
-
- *pg_next = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg);
- if (*pg_next == NULL)
- *pg_prev = RB_MAX(uvm_pmr_addr, &pmr->addr);
- else
- *pg_prev = RB_PREV(uvm_pmr_addr, &pmr->addr, *pg_next);
-
- /* Reset if not contig. */
- if (*pg_prev != NULL &&
- (atop(VM_PAGE_TO_PHYS(*pg_prev)) + (*pg_prev)->fq.free.pages
- != atop(VM_PAGE_TO_PHYS(pg)) ||
- uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg)))
- *pg_prev = NULL;
- if (*pg_next != NULL &&
- (atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages
- != atop(VM_PAGE_TO_PHYS(*pg_next)) ||
- uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg)))
- *pg_next = NULL;
- return;
-}
-
-/*
- * Remove a range from the address tree.
- * Address tree maintains pmr counters.
- */
-static __inline void
-uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg)
-{
- KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg);
- KASSERT(pg->pg_flags & PQ_FREE);
- RB_REMOVE(uvm_pmr_addr, &pmr->addr, pg);
-
- pmr->nsegs--;
-}
-/*
- * Remove a range from the size tree.
- */
-static __inline void
-uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
-{
- int memtype;
-#ifdef DEBUG
- struct vm_page *i;
-#endif
-
- KASSERT(pg->pg_flags & PQ_FREE);
- memtype = uvm_pmr_pg_to_memtype(pg);
-
- if (pg->fq.free.pages == 1) {
-#ifdef DEBUG
- TAILQ_FOREACH(i, &pmr->single[memtype], pageq) {
- if (i == pg)
- break;
- }
- KDASSERT(i == pg);
-#endif
- TAILQ_REMOVE(&pmr->single[memtype], pg, pageq);
- } else {
- KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[memtype],
- pg + 1) == pg + 1);
- RB_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1);
- }
-}
-/* Remove from both trees. */
-void
-uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg)
-{
- uvm_pmr_assertvalid(pmr);
- uvm_pmr_remove_size(pmr, pg);
- uvm_pmr_remove_addr(pmr, pg);
- uvm_pmr_assertvalid(pmr);
-}
-
-/*
- * Insert the range described in pg.
- * Returns the range thus created (which may be joined with the previous and
- * next ranges).
- * If no_join, the caller guarantees that the range cannot possibly join
- * with adjecent ranges.
- */
-static __inline struct vm_page *
-uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
-{
- struct vm_page *prev, *next;
-
-#ifdef DEBUG
- struct vm_page *i;
- int mt;
-
- for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) {
- TAILQ_FOREACH(i, &pmr->single[mt], pageq)
- KDASSERT(i != pg);
- if (pg->fq.free.pages > 1) {
- KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mt],
- pg + 1) == NULL);
- }
- KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL);
- }
-#endif
-
- KASSERT(pg->pg_flags & PQ_FREE);
- KASSERT(pg->fq.free.pages >= 1);
-
- if (!no_join) {
- uvm_pmr_pnaddr(pmr, pg, &prev, &next);
- if (next != NULL) {
- uvm_pmr_remove_size(pmr, next);
- uvm_pmr_remove_addr(pmr, next);
- pg->fq.free.pages += next->fq.free.pages;
- next->fq.free.pages = 0;
- }
- if (prev != NULL) {
- uvm_pmr_remove_size(pmr, prev);
- prev->fq.free.pages += pg->fq.free.pages;
- pg->fq.free.pages = 0;
- return prev;
- }
- }
-#ifdef DEBUG
- else {
- uvm_pmr_pnaddr(pmr, pg, &prev, &next);
- KDASSERT(prev == NULL && next == NULL);
- }
-#endif /* DEBUG */
-
- RB_INSERT(uvm_pmr_addr, &pmr->addr, pg);
-
- pmr->nsegs++;
-
- return pg;
-}
-/*
- * Insert the range described in pg.
- * Returns the range thus created (which may be joined with the previous and
- * next ranges).
- * Page must already be in the address tree.
- */
-static __inline void
-uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg)
-{
- int memtype;
-#ifdef DEBUG
- struct vm_page *i;
- int mti;
-#endif
-
- memtype = uvm_pmr_pg_to_memtype(pg);
-#ifdef DEBUG
- for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) {
- TAILQ_FOREACH(i, &pmr->single[mti], pageq)
- KDASSERT(i != pg);
- if (pg->fq.free.pages > 1) {
- KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mti],
- pg + 1) == NULL);
- }
- KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg);
- }
- for (i = pg; i < pg + pg->fq.free.pages; i++)
- KASSERT(uvm_pmr_pg_to_memtype(i) == memtype);
-#endif
-
- KASSERT(pg->pg_flags & PQ_FREE);
- KASSERT(pg->fq.free.pages >= 1);
-
- if (pg->fq.free.pages == 1)
- TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq);
- else
- RB_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1);
-}
-/* Insert in both trees. */
-struct vm_page *
-uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join)
-{
- uvm_pmr_assertvalid(pmr);
- pg = uvm_pmr_insert_addr(pmr, pg, no_join);
- uvm_pmr_insert_size(pmr, pg);
- uvm_pmr_assertvalid(pmr);
- return pg;
-}
-
-/*
- * Remove the first segment of contiguous pages from pgl.
- * A segment ends if it crosses boundary (unless boundary = 0) or
- * if it would enter a different uvm_pmemrange.
- *
- * Work: the page range that the caller is currently working with.
- * May be null.
- */
-psize_t
-uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary,
- struct vm_page **work)
-{
- struct vm_page *pg, *pre_last, *last, *inserted;
- psize_t count;
- struct uvm_pmemrange *pmr;
- paddr_t first_boundary;
-
- KASSERT(!TAILQ_EMPTY(pgl));
-
- pg = TAILQ_FIRST(pgl);
- pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
- KDASSERT(pmr != NULL);
- if (boundary != 0) {
- first_boundary =
- PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary);
- } else
- first_boundary = 0;
-
- /* Remove all pages in the first segment. */
- pre_last = pg;
- last = TAILQ_NEXT(pre_last, pageq);
- TAILQ_REMOVE(pgl, pre_last, pageq);
- count = 1;
- /*
- * While loop checks the following:
- * - last != NULL we have not reached the end of pgs
- * - boundary == 0 || last < first_boundary
- * we do not cross a boundary
- * - atop(pre_last) + 1 == atop(last)
- * still in the same segment
- * - low <= last
- * - high > last still testing the same memory range
- *
- * At the end of the loop, last points at the next segment
- * and each page [pg, pre_last] (inclusive range) has been removed
- * and count is the number of pages that have been removed.
- */
- while (last != NULL &&
- (boundary == 0 || atop(VM_PAGE_TO_PHYS(last)) < first_boundary) &&
- atop(VM_PAGE_TO_PHYS(pre_last)) + 1 ==
- atop(VM_PAGE_TO_PHYS(last)) &&
- pmr->low <= atop(VM_PAGE_TO_PHYS(last)) &&
- pmr->high > atop(VM_PAGE_TO_PHYS(last))) {
- count++;
- pre_last = last;
- last = TAILQ_NEXT(last, pageq);
- TAILQ_REMOVE(pgl, pre_last, pageq);
- }
- KDASSERT(TAILQ_FIRST(pgl) == last);
- KDASSERT(pg + (count - 1) == pre_last);
-
- pg->fq.free.pages = count;
- inserted = uvm_pmr_insert(pmr, pg, 0);
-
- if (work != NULL && *work != NULL &&
- atop(VM_PAGE_TO_PHYS(inserted)) <= atop(VM_PAGE_TO_PHYS(*work)) &&
- atop(VM_PAGE_TO_PHYS(inserted)) + inserted->fq.free.pages >
- atop(VM_PAGE_TO_PHYS(*work)))
- *work = inserted;
- return count;
-}
-
-/*
- * Extract a number of pages from a segment of free pages.
- * Called by uvm_pmr_getpages.
- *
- * Returns the segment that was created from pages left over at the tail
- * of the remove set of pages, or NULL if no pages were left at the tail.
- */
-struct vm_page *
-uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg,
- paddr_t start, paddr_t end, struct pglist *result)
-{
- struct vm_page *after, *pg_i;
- psize_t before_sz, after_sz;
-#ifdef DEBUG
- psize_t i;
-#endif
-
- KASSERT(end > start);
- KASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)));
- KASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages);
- KASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start);
- KASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages >= end);
-
- before_sz = start - atop(VM_PAGE_TO_PHYS(pg));
- after_sz = atop(VM_PAGE_TO_PHYS(pg)) + pg->fq.free.pages - end;
- KDASSERT(before_sz + after_sz + (end - start) == pg->fq.free.pages);
- uvm_pmr_assertvalid(pmr);
-
- uvm_pmr_remove_size(pmr, pg);
- if (before_sz == 0)
- uvm_pmr_remove_addr(pmr, pg);
-
- /* Add selected pages to result. */
- for (pg_i = pg + before_sz; atop(VM_PAGE_TO_PHYS(pg_i)) < end;
- pg_i++) {
- pg_i->fq.free.pages = 0;
- TAILQ_INSERT_TAIL(result, pg_i, pageq);
- KDASSERT(pg_i->pg_flags & PQ_FREE);
- }
-
- /* Before handling. */
- if (before_sz > 0) {
- pg->fq.free.pages = before_sz;
- uvm_pmr_insert_size(pmr, pg);
- }
-
- /* After handling. */
- after = NULL;
- if (after_sz > 0) {
- after = pg + before_sz + (end - start);
-#ifdef DEBUG
- for (i = 0; i < after_sz; i++) {
- KASSERT(!uvm_pmr_isfree(after + i));
- }
-#endif
- KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end);
- after->fq.free.pages = after_sz;
- after = uvm_pmr_insert_addr(pmr, after, 1);
- uvm_pmr_insert_size(pmr, after);
- }
-
- uvm_pmr_assertvalid(pmr);
- return after;
-}
-
-/*
- * Acquire a number of pages.
- *
- * count: the number of pages returned
- * start: lowest page number
- * end: highest page number +1
- * (start = end = 0: no limitation)
- * align: power-of-2 alignment constraint (align = 1: no alignment)
- * boundary: power-of-2 boundary (boundary = 0: no boundary)
- * maxseg: maximum number of segments to return
- * flags: UVM_PLA_* flags
- * result: returned pages storage (uses pageq)
- */
-int
-uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align,
- paddr_t boundary, int maxseg, int flags, struct pglist *result)
-{
- struct uvm_pmemrange *pmr; /* Iterate memory ranges. */
- struct vm_page *found, *f_next; /* Iterate chunks. */
- psize_t fcount; /* Current found pages. */
- int fnsegs; /* Current segment counter. */
- int try, start_try;
- psize_t search[2];
- paddr_t fstart, fend; /* Pages to be taken from found. */
- int memtype; /* Requested memtype. */
- int desperate; /* True if allocation failed. */
-
- /* Validate arguments. */
- KASSERT(count > 0);
- KASSERT((start == 0 && end == 0) || (start < end));
- KASSERT(align >= 1 && powerof2(align));
- KASSERT(maxseg > 0);
- KASSERT(boundary == 0 || powerof2(boundary));
- KDASSERT(boundary == 0 || maxseg * boundary >= count);
- KASSERT(TAILQ_EMPTY(result));
-
- /* Configure search. If start_try == 0, search[0] should be faster
- * (because it will have to throw away less segments).
- * search[1] is the worst case: start searching at the smallest
- * possible range instead of starting at the range most likely to
- * fulfill the allocation. */
- start_try = 0;
- search[0] = (flags & UVM_PLA_TRY_CONTIG ? count :
- pow2divide(count, maxseg));
- search[1] = 1;
- if (maxseg == 1) {
- start_try = 1;
- search[1] = count;
- } else if (search[1] >= search[0])
- start_try = 1;
-
-ReTry: /* Return point after sleeping. */
- fcount = 0;
- fnsegs = 0;
-
- /* Memory type: if zeroed memory is requested, traverse the zero set.
- * Otherwise, traverse the dirty set. */
- if (flags & UVM_PLA_ZERO)
- memtype = UVM_PMR_MEMTYPE_ZERO;
- else
- memtype = UVM_PMR_MEMTYPE_DIRTY;
- desperate = 0;
-
- uvm_lock_fpageq();
-
-ReTryDesperate:
- /*
- * If we just want any page(s), go for the really fast option.
- */
- if (count <= maxseg && align == 1 && boundary == 0 &&
- (flags & UVM_PLA_TRY_CONTIG) == 0) {
- if (!desperate) {
- KASSERT(fcount == 0);
- fcount += uvm_pmr_get1page(count, memtype, result,
- start, end);
- } else {
- for (memtype = 0; memtype < UVM_PMR_MEMTYPE_MAX &&
- fcount < count; memtype++) {
- fcount += uvm_pmr_get1page(count - fcount,
- memtype, result, start, end);
- }
- }
-
- if (fcount == count)
- goto Out;
- else
- goto Fail;
- }
-
- TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
- /* Empty range. */
- if (pmr->nsegs == 0)
- continue;
-
- /* Outside requested range. */
- if (!(start == 0 && end == 0) &&
- !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end))
- continue;
-
- try = start_try;
-ReScan: /* Return point at try++. */
-
- for (found = uvm_pmr_nfindsz(pmr, search[try], memtype);
- found != NULL;
- found = f_next) {
- f_next = uvm_pmr_nextsz(pmr, found, memtype);
-
- fstart = atop(VM_PAGE_TO_PHYS(found));
-DrainFound:
- /* Throw away the first segment if fnsegs == maxseg */
- if (fnsegs == maxseg) {
- fnsegs--;
- fcount -=
- uvm_pmr_remove_1strange(result, boundary,
- &found);
- }
-
- fstart = PMR_ALIGN(fstart, align);
- fend = atop(VM_PAGE_TO_PHYS(found)) +
- found->fq.free.pages;
- if (fstart >= fend)
- continue;
- if (boundary != 0) {
- fend =
- MIN(fend, PMR_ALIGN(fstart + 1, boundary));
- }
- if (fend - fstart > count - fcount)
- fend = fstart + (count - fcount);
-
- fcount += fend - fstart;
- fnsegs++;
- found = uvm_pmr_extract_range(pmr, found,
- fstart, fend, result);
-
- if (fcount == count)
- goto Out;
-
- /* If there's still space left in found, try to
- * fully drain it prior to continueing. */
- if (found != NULL) {
- fstart = fend;
- goto DrainFound;
- }
- }
-
- if (++try < nitems(search))
- goto ReScan;
- }
-
- /*
- * Not enough memory of the requested type available. Fall back to
- * less good memory that we'll clean up better later.
- *
- * This algorithm is not very smart though, it just starts scanning
- * a different typed range, but the nicer ranges of the previous
- * iteration may fall out.
- */
- if (!desperate) {
- desperate = 1;
- memtype = 0;
- goto ReTryDesperate;
- } else if (++memtype < UVM_PMR_MEMTYPE_MAX)
- goto ReTryDesperate;
-
-Fail:
- /*
- * Allocation failed.
- */
-
- /* XXX: claim from memory reserve here */
-
- while (!TAILQ_EMPTY(result))
- uvm_pmr_remove_1strange(result, 0, NULL);
- uvm_unlock_fpageq();
-
- if (flags & UVM_PLA_WAITOK) {
- uvm_wait("uvm_pmr_getpages");
- goto ReTry;
- } else
- wakeup(&uvm.pagedaemon_proc);
-
- return ENOMEM;
-
-Out:
-
- /*
- * Allocation succesful.
- */
-
- uvmexp.free -= fcount;
-
- uvm_unlock_fpageq();
-
- /* Update statistics and zero pages if UVM_PLA_ZERO. */
- TAILQ_FOREACH(found, result, pageq) {
- if (found->pg_flags & PG_ZERO) {
- uvmexp.zeropages--;
- }
- if (flags & UVM_PLA_ZERO) {
- if (found->pg_flags & PG_ZERO)
- uvmexp.pga_zerohit++;
- else {
- uvmexp.pga_zeromiss++;
- uvm_pagezero(found);
- }
- }
- atomic_clearbits_int(&found->pg_flags, PG_ZERO | PQ_FREE);
-
- found->uobject = NULL;
- found->uanon = NULL;
- found->pg_version++;
- }
-
- return 0;
-}
-
-/*
- * Free a number of contig pages (invoked by uvm_page_init).
- */
-void
-uvm_pmr_freepages(struct vm_page *pg, psize_t count)
-{
- struct uvm_pmemrange *pmr;
- psize_t i, pmr_count;
-
- uvm_lock_fpageq();
-
- for (i = 0; i < count; i++) {
- atomic_clearbits_int(&pg[i].pg_flags, pg[i].pg_flags);
- atomic_setbits_int(&pg[i].pg_flags, PQ_FREE);
- }
-
- while (count > 0) {
- pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
- KASSERT(pmr != NULL);
-
- pmr_count = MIN(count, pmr->high - atop(VM_PAGE_TO_PHYS(pg)));
- pg->fq.free.pages = pmr_count;
- uvm_pmr_insert(pmr, pg, 0);
-
- uvmexp.free += pmr_count;
- count -= pmr_count;
- pg += pmr_count;
- }
- wakeup(&uvmexp.free);
-
- uvm_unlock_fpageq();
-}
-
-/*
- * Free all pages in the queue.
- */
-void
-uvm_pmr_freepageq(struct pglist *pgl)
-{
- struct vm_page *pg;
-
- TAILQ_FOREACH(pg, pgl, pageq) {
- atomic_clearbits_int(&pg->pg_flags, pg->pg_flags);
- atomic_setbits_int(&pg->pg_flags, PQ_FREE);
- }
-
- uvm_lock_fpageq();
- while (!TAILQ_EMPTY(pgl))
- uvmexp.free += uvm_pmr_remove_1strange(pgl, 0, NULL);
- wakeup(&uvmexp.free);
- uvm_unlock_fpageq();
-
- return;
-}
-
-/*
- * Store a pmemrange in the list.
- *
- * The list is sorted by use.
- */
-struct uvm_pmemrange *
-uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq,
- struct uvm_pmemrange *pmr)
-{
- struct uvm_pmemrange *iter;
- int cmp = 1;
-
- TAILQ_FOREACH(iter, useq, pmr_use) {
- cmp = uvm_pmemrange_use_cmp(pmr, iter);
- if (cmp == 0)
- return iter;
- if (cmp == -1)
- break;
- }
- if (cmp == 0)
- return iter;
-
- if (iter == NULL)
- TAILQ_INSERT_TAIL(useq, pmr, pmr_use);
- else
- TAILQ_INSERT_BEFORE(iter, pmr, pmr_use);
- return NULL;
-}
-
-#ifdef DEBUG
-/*
- * Validation of the whole pmemrange.
- * Called with fpageq locked.
- */
-void
-uvm_pmr_assertvalid(struct uvm_pmemrange *pmr)
-{
- struct vm_page *prev, *next, *i, *xref;
- int lcv, mti;
-
- /* Validate address tree. */
- RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) {
- /* Validate the range. */
- KASSERT(i->fq.free.pages > 0);
- KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low);
- KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fq.free.pages
- <= pmr->high);
-
- /* Validate each page in this range. */
- for (lcv = 0; lcv < i->fq.free.pages; lcv++) {
- KASSERT(lcv == 0 || i[lcv].fq.free.pages == 0);
- /* Flag check:
- * - PG_ZERO: page is zeroed.
- * - PQ_FREE: page is free.
- * Any other flag is a mistake. */
- if (i[lcv].pg_flags !=
- (i[lcv].pg_flags & (PG_ZERO | PQ_FREE))) {
- panic("i[%lu].pg_flags = %x, should be %x\n",
- lcv, i[lcv].pg_flags, PG_ZERO | PQ_FREE);
- }
- /* Free pages are:
- * - not wired
- * - not loaned
- * - have no vm_anon
- * - have no uvm_object */
- KASSERT(i[lcv].wire_count == 0);
- KASSERT(i[lcv].loan_count == 0);
- KASSERT(i[lcv].uanon == NULL);
- KASSERT(i[lcv].uobject == NULL);
- /* Pages in a single range always have the same
- * memtype. */
- KASSERT(uvm_pmr_pg_to_memtype(&i[0]) ==
- uvm_pmr_pg_to_memtype(&i[lcv]));
- }
-
- /* Check that it shouldn't be joined with its predecessor. */
- prev = RB_PREV(uvm_pmr_addr, &pmr->addr, i);
- if (prev != NULL) {
- KASSERT(uvm_pmr_pg_to_memtype(&i[0]) !=
- uvm_pmr_pg_to_memtype(&i[lcv]) ||
- atop(VM_PAGE_TO_PHYS(i)) >
- atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages);
- }
-
- /* Assert i is in the size tree as well. */
- if (i->fq.free.pages == 1) {
- TAILQ_FOREACH(xref,
- &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq) {
- if (xref == i)
- break;
- }
- KASSERT(xref == i);
- } else {
- KASSERT(RB_FIND(uvm_pmr_size,
- &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) ==
- i + 1);
- }
- }
-
- /* Validate size tree. */
- for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) {
- for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL; i = next) {
- next = uvm_pmr_nextsz(pmr, i, mti);
- if (next != NULL) {
- KASSERT(i->fq.free.pages <=
- next->fq.free.pages);
- }
-
- /* Assert i is in the addr tree as well. */
- KASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, i) == i);
-
- /* Assert i is of the correct memory type. */
- KASSERT(uvm_pmr_pg_to_memtype(i) == mti);
- }
- }
-
- /* Validate nsegs statistic. */
- lcv = 0;
- RB_FOREACH(i, uvm_pmr_addr, &pmr->addr)
- lcv++;
- KASSERT(pmr->nsegs == lcv);
-}
-#endif /* DEBUG */
-
-/*
- * Split pmr at split point pageno.
- * Called with fpageq unlocked.
- *
- * Split is only applied if a pmemrange spans pageno.
- */
-void
-uvm_pmr_split(paddr_t pageno)
-{
- struct uvm_pmemrange *pmr, *drain;
- struct vm_page *rebuild, *prev, *next;
- psize_t prev_sz;
-
- uvm_lock_fpageq();
- pmr = uvm_pmemrange_find(pageno);
- if (pmr == NULL || !(pmr->low < pageno)) {
- /* No split required. */
- uvm_unlock_fpageq();
- return;
- }
-
- KASSERT(pmr->low < pageno);
- KASSERT(pmr->high > pageno);
-
- drain = uvm_pmr_allocpmr();
- drain->low = pageno;
- drain->high = pmr->high;
- drain->use = pmr->use;
-
- uvm_pmr_assertvalid(pmr);
- uvm_pmr_assertvalid(drain);
- KASSERT(drain->nsegs == 0);
-
- RB_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr) {
- if (atop(VM_PAGE_TO_PHYS(rebuild)) >= pageno)
- break;
- }
- if (rebuild == NULL)
- prev = RB_MAX(uvm_pmr_addr, &pmr->addr);
- else
- prev = RB_PREV(uvm_pmr_addr, &pmr->addr, rebuild);
- KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno);
-
- /*
- * Handle free chunk that spans the split point.
- */
- if (prev != NULL &&
- atop(VM_PAGE_TO_PHYS(prev)) + prev->fq.free.pages > pageno) {
- psize_t before, after;
-
- KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno);
-
- uvm_pmr_remove(pmr, prev);
- prev_sz = prev->fq.free.pages;
- before = pageno - atop(VM_PAGE_TO_PHYS(prev));
- after = atop(VM_PAGE_TO_PHYS(prev)) + prev_sz - pageno;
-
- KASSERT(before > 0);
- KASSERT(after > 0);
-
- prev->fq.free.pages = before;
- uvm_pmr_insert(pmr, prev, 1);
- (prev + before)->fq.free.pages = after;
- uvm_pmr_insert(drain, prev + before, 1);
- }
-
- /*
- * Move free chunks that no longer fall in the range.
- */
- for (; rebuild != NULL; rebuild = next) {
- next = RB_NEXT(uvm_pmr_addr, &pmr->addr, rebuild);
-
- uvm_pmr_remove(pmr, rebuild);
- uvm_pmr_insert(drain, rebuild, 1);
- }
-
- pmr->high = pageno;
- uvm_pmr_assertvalid(pmr);
- uvm_pmr_assertvalid(drain);
-
- RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain);
- uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain);
- uvm_unlock_fpageq();
-}
-
-/*
- * Increase the usage counter for the given range of memory.
- *
- * The more usage counters a given range of memory has, the more will be
- * attempted not to allocate from it.
- *
- * Addresses here are in paddr_t, not page-numbers.
- * The lowest and highest allowed address are specified.
- */
-void
-uvm_pmr_use_inc(paddr_t low, paddr_t high)
-{
- struct uvm_pmemrange *pmr;
-
- /*
- * If high+1 == 0, then you are increasing use of the whole address
- * space, which won't make any difference. Skip in that case.
- */
- high++;
- if (high == 0)
- return;
-
- /*
- * pmr uses page numbers, translate low and high.
- */
- low = atop(round_page(low));
- high = atop(trunc_page(high));
- uvm_pmr_split(low);
- uvm_pmr_split(high);
-
- uvm_lock_fpageq();
-
- /* Increase use count on segments in range. */
- RB_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr) {
- if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)) {
- TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use);
- pmr->use++;
- uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr);
- }
- uvm_pmr_assertvalid(pmr);
- }
-
- uvm_unlock_fpageq();
-}
-
-/*
- * Allocate a pmemrange.
- *
- * If called from uvm_page_init, the uvm_pageboot_alloc is used.
- * If called after uvm_init, malloc is used.
- * (And if called in between, you're dead.)
- */
-struct uvm_pmemrange *
-uvm_pmr_allocpmr()
-{
- struct uvm_pmemrange *nw;
- int i;
-
- if (!uvm.page_init_done) {
- nw = (struct uvm_pmemrange *)
- uvm_pageboot_alloc(sizeof(struct uvm_pmemrange));
- bzero(nw, sizeof(struct uvm_pmemrange));
- } else {
- nw = malloc(sizeof(struct uvm_pmemrange),
- M_VMMAP, M_NOWAIT | M_ZERO);
- }
- RB_INIT(&nw->addr);
- for (i = 0; i < UVM_PMR_MEMTYPE_MAX; i++) {
- RB_INIT(&nw->size[i]);
- TAILQ_INIT(&nw->single[i]);
- }
- return nw;
-}
-
-static const struct uvm_io_ranges uvm_io_ranges[] = UVM_IO_RANGES;
-
-/*
- * Initialization of pmr.
- * Called by uvm_page_init.
- *
- * Sets up pmemranges that maps the vm_physmem data.
- */
-void
-uvm_pmr_init(void)
-{
- struct uvm_pmemrange *new_pmr;
- int i;
-
- TAILQ_INIT(&uvm.pmr_control.use);
- RB_INIT(&uvm.pmr_control.addr);
-
- for (i = 0 ; i < vm_nphysseg ; i++) {
- new_pmr = uvm_pmr_allocpmr();
-
- new_pmr->low = vm_physmem[i].start;
- new_pmr->high = vm_physmem[i].end;
-
- RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr);
- uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr);
- }
-
- for (i = 0; i < nitems(uvm_io_ranges); i++)
- uvm_pmr_use_inc(uvm_io_ranges[i].low, uvm_io_ranges[i].high);
-}
-
-/*
- * Find the pmemrange that contains the given page number.
- *
- * (Manually traverses the binary tree, because that is cheaper on stack
- * usage.)
- */
-struct uvm_pmemrange *
-uvm_pmemrange_find(paddr_t pageno)
-{
- struct uvm_pmemrange *pmr;
-
- pmr = RB_ROOT(&uvm.pmr_control.addr);
- while (pmr != NULL) {
- if (pmr->low > pageno)
- pmr = RB_LEFT(pmr, pmr_addr);
- else if (pmr->high <= pageno)
- pmr = RB_RIGHT(pmr, pmr_addr);
- else
- break;
- }
-
- return pmr;
-}
-
-#if defined(DDB) || defined(DEBUG)
-/*
- * Return true if the given page is in any of the free lists.
- * Used by uvm_page_printit.
- * This function is safe, even if the page is not on the freeq.
- * Note: does not apply locking, only called from ddb.
- */
-int
-uvm_pmr_isfree(struct vm_page *pg)
-{
- struct vm_page *r;
- struct uvm_pmemrange *pmr;
-
- pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg)));
- if (pmr == NULL)
- return 0;
- r = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg);
- if (r == NULL)
- r = RB_MAX(uvm_pmr_addr, &pmr->addr);
- else
- r = RB_PREV(uvm_pmr_addr, &pmr->addr, r);
- if (r == NULL)
- return 0; /* Empty tree. */
-
- KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg)));
- return atop(VM_PAGE_TO_PHYS(r)) + r->fq.free.pages >
- atop(VM_PAGE_TO_PHYS(pg));
-}
-#endif /* DEBUG */
-
-/*
- * Allocate any page, the fastest way. No constraints.
- */
-int
-uvm_pmr_get1page(psize_t count, int memtype, struct pglist *result,
- paddr_t start, paddr_t end)
-{
- struct uvm_pmemrange *pmr;
- struct vm_page *found;
- psize_t fcount;
-
- fcount = 0;
- pmr = TAILQ_FIRST(&uvm.pmr_control.use);
- while (pmr != NULL && fcount != count) {
- /* Outside requested range. */
- if (!(start == 0 && end == 0) &&
- !PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, start, end)) {
- pmr = TAILQ_NEXT(pmr, pmr_use);
- continue;
- }
-
- found = TAILQ_FIRST(&pmr->single[memtype]);
- if (found == NULL) {
- found = RB_ROOT(&pmr->size[memtype]);
- /* Size tree gives pg[1] instead of pg[0] */
- if (found != NULL)
- found--;
- }
- if (found == NULL) {
- pmr = TAILQ_NEXT(pmr, pmr_use);
- continue;
- }
-
- uvm_pmr_assertvalid(pmr);
- uvm_pmr_remove_size(pmr, found);
- while (found->fq.free.pages > 0 && fcount < count) {
- found->fq.free.pages--;
- fcount++;
- TAILQ_INSERT_HEAD(result,
- &found[found->fq.free.pages], pageq);
- }
- if (found->fq.free.pages > 0) {
- uvm_pmr_insert_size(pmr, found);
- KASSERT(fcount == count);
- uvm_pmr_assertvalid(pmr);
- return fcount;
- } else
- uvm_pmr_remove_addr(pmr, found);
- uvm_pmr_assertvalid(pmr);
- }
-
- /* Ran out of ranges before enough pages were gathered. */
- return fcount;
-}
diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h
deleted file mode 100644
index 493961f1f9c..00000000000
--- a/sys/uvm/uvm_pmemrange.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* $OpenBSD: uvm_pmemrange.h,v 1.3 2009/06/14 02:20:23 deraadt Exp $ */
-
-/*
- * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * uvm_pmemrange.h: describe and manage free physical memory.
- */
-
-#ifndef _UVM_UVM_PMEMRANGE_H_
-#define _UVM_UVM_PMEMRANGE_H_
-
-#include <uvm/uvm_extern.h>
-#include <uvm/uvm_page.h>
-
-RB_HEAD(uvm_pmr_addr, vm_page);
-RB_HEAD(uvm_pmr_size, vm_page);
-
-/*
- * Page types available:
- * - DIRTY: this page may contain random data.
- * - ZERO: this page has been zeroed.
- */
-#define UVM_PMR_MEMTYPE_DIRTY 1
-#define UVM_PMR_MEMTYPE_ZERO 1
-#define UVM_PMR_MEMTYPE_MAX 2
-
-/*
- * An address range of memory.
- */
-struct uvm_pmemrange {
- struct uvm_pmr_addr addr; /* Free page chunks, sorted by addr. */
- struct uvm_pmr_size size[UVM_PMR_MEMTYPE_MAX];
- /* Free page chunks, sorted by size. */
- TAILQ_HEAD(, vm_page) single[UVM_PMR_MEMTYPE_MAX];
- /* single page regions (uses pageq) */
-
- paddr_t low; /* Start of address range (pgno). */
- paddr_t high; /* End +1 (pgno). */
- int use; /* Use counter. */
- int nsegs; /* Current range count. */
-
- TAILQ_ENTRY(uvm_pmemrange) pmr_use;
- /* pmr, sorted by use */
- RB_ENTRY(uvm_pmemrange) pmr_addr;
- /* pmr, sorted by address */
-};
-
-RB_HEAD(uvm_pmemrange_addr, uvm_pmemrange);
-TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange);
-
-/*
- * pmr control structure. Contained in uvm.pmr_control.
- */
-struct uvm_pmr_control {
- struct uvm_pmemrange_addr addr;
- struct uvm_pmemrange_use use;
-};
-
-void uvm_pmr_freepages(struct vm_page *, psize_t);
-void uvm_pmr_freepageq(struct pglist *pgl);
-int uvm_pmr_getpages(psize_t, paddr_t, paddr_t, paddr_t, paddr_t,
- int, int, struct pglist *);
-void uvm_pmr_init(void);
-
-#ifdef DDB
-int uvm_pmr_isfree(struct vm_page *pg);
-#endif
-
-#endif /* _UVM_UVM_PMEMRANGE_H_ */
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index e85e2c24e38..998f0fa0a62 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_vnode.c,v 1.63 2009/06/16 00:11:29 oga Exp $ */
+/* $OpenBSD: uvm_vnode.c,v 1.64 2009/06/16 16:42:41 ariane Exp $ */
/* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */
/*
@@ -561,7 +561,7 @@ uvm_vnp_terminate(struct vnode *vp)
while (uvn->u_obj.uo_npages) {
#ifdef DEBUG
struct vm_page *pp;
- TAILQ_FOREACH(pp, &uvn->u_obj.memq, fq.queues.listq) {
+ TAILQ_FOREACH(pp, &uvn->u_obj.memq, listq) {
if ((pp->pg_flags & PG_BUSY) == 0)
panic("uvm_vnp_terminate: detected unbusy pg");
}