summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorOwain Ainsworth <oga@cvs.openbsd.org>2009-06-16 00:11:30 +0000
committerOwain Ainsworth <oga@cvs.openbsd.org>2009-06-16 00:11:30 +0000
commitb20700966027364e7e2e3cf5ca4613cbb4e2a25b (patch)
treedac29c9a1582e023159a8aabe2282775b21cbdc2 /sys
parentab37797a62467132f94babf9bc9d57cef8402599 (diff)
Backout all changes to uvm after pmemrange (which will be backed out
separately). a change at or just before the hackathon has either exposed or added a very very nasty memory corruption bug that is giving us hell right now. So in the interest of kernel stability these diffs are being backed out until such a time as that corruption bug has been found and squashed, then the ones that are proven good may slowly return. a quick hitlist of the main commits this backs out: mine: uvm_objwire the lock change in uvm_swap.c using trees for uvm objects instead of the hash removing the pgo_releasepg callback. art@'s: putting pmap_page_protect(VM_PROT_NONE) in uvm_pagedeactivate() since all callers called that just prior anyway. ok beck@, ariane@. prompted by deraadt@.
Diffstat (limited to 'sys')
-rw-r--r--sys/arch/amd64/amd64/pmap.c10
-rw-r--r--sys/arch/hppa/hppa/pmap.c10
-rw-r--r--sys/arch/i386/i386/pmap.c11
-rw-r--r--sys/arch/i386/i386/pmapae.c8
-rw-r--r--sys/conf/files3
-rw-r--r--sys/kern/vfs_biomem.c4
-rw-r--r--sys/uvm/uvm.h8
-rw-r--r--sys/uvm/uvm_anon.c3
-rw-r--r--sys/uvm/uvm_aobj.c57
-rw-r--r--sys/uvm/uvm_device.c6
-rw-r--r--sys/uvm/uvm_fault.c62
-rw-r--r--sys/uvm/uvm_init.c3
-rw-r--r--sys/uvm/uvm_loan.c26
-rw-r--r--sys/uvm/uvm_map.c20
-rw-r--r--sys/uvm/uvm_mmap.c10
-rw-r--r--sys/uvm/uvm_object.c159
-rw-r--r--sys/uvm/uvm_object.h17
-rw-r--r--sys/uvm/uvm_page.c181
-rw-r--r--sys/uvm/uvm_page.h6
-rw-r--r--sys/uvm/uvm_pager.h4
-rw-r--r--sys/uvm/uvm_pdaemon.c42
-rw-r--r--sys/uvm/uvm_swap.c6
-rw-r--r--sys/uvm/uvm_vnode.c57
23 files changed, 425 insertions, 288 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 78e72c6bd53..165f3b365dd 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.47 2009/06/09 02:56:38 krw Exp $ */
+/* $OpenBSD: pmap.c,v 1.48 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
/*
@@ -567,7 +567,7 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa)
kpm = pmap_kernel();
for (i = 0; i < PTP_LEVELS - 1; i++) {
kpm->pm_obj[i].pgops = NULL;
- RB_INIT(&kpm->pm_obj[i].memt);
+ TAILQ_INIT(&kpm->pm_obj[i].memq);
kpm->pm_obj[i].uo_npages = 0;
kpm->pm_obj[i].uo_refs = 1;
kpm->pm_ptphint[i] = NULL;
@@ -832,7 +832,7 @@ pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level,
obj = &pmap->pm_obj[lidx];
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint[lidx] == ptp)
- pmap->pm_ptphint[lidx] = RB_ROOT(&obj->memt);
+ pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
ptp->wire_count = 0;
uvm_pagerealloc(ptp, NULL, 0);
TAILQ_INSERT_TAIL(pagelist, ptp, fq.queues.listq);
@@ -1019,7 +1019,7 @@ pmap_create(void)
/* init uvm_object */
for (i = 0; i < PTP_LEVELS - 1; i++) {
pmap->pm_obj[i].pgops = NULL; /* not a mappable object */
- RB_INIT(&pmap->pm_obj[i].memt);
+ TAILQ_INIT(&pmap->pm_obj[i].memq);
pmap->pm_obj[i].uo_npages = 0;
pmap->pm_obj[i].uo_refs = 1;
pmap->pm_ptphint[i] = NULL;
@@ -1091,7 +1091,7 @@ pmap_destroy(struct pmap *pmap)
*/
for (i = 0; i < PTP_LEVELS - 1; i++) {
- while ((pg = RB_ROOT(&pmap->pm_obj[i].memt)) != NULL) {
+ while ((pg = TAILQ_FIRST(&pmap->pm_obj[i].memq)) != NULL) {
KASSERT((pg->pg_flags & PG_BUSY) == 0);
pg->wire_count = 0;
diff --git a/sys/arch/hppa/hppa/pmap.c b/sys/arch/hppa/hppa/pmap.c
index 5f9d72ab8b9..34e5652adae 100644
--- a/sys/arch/hppa/hppa/pmap.c
+++ b/sys/arch/hppa/hppa/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.136 2009/06/11 20:10:51 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.137 2009/06/16 00:11:29 oga Exp $ */
/*
* Copyright (c) 1998-2004 Michael Shalayeff
@@ -235,7 +235,7 @@ pmap_pde_release(struct pmap *pmap, vaddr_t va, struct vm_page *ptp)
pmap_pde_set(pmap, va, 0);
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp)
- pmap->pm_ptphint = RB_ROOT(&pmap->pm_obj.memt);
+ pmap->pm_ptphint = TAILQ_FIRST(&pmap->pm_obj.memq);
ptp->wire_count = 0;
#ifdef DIAGNOSTIC
if (ptp->pg_flags & PG_BUSY)
@@ -470,7 +470,7 @@ pmap_bootstrap(vstart)
bzero(kpm, sizeof(*kpm));
simple_lock_init(&kpm->pm_lock);
kpm->pm_obj.pgops = NULL;
- RB_INIT(&kpm->pm_obj.memt);
+ TAILQ_INIT(&kpm->pm_obj.memq);
kpm->pm_obj.uo_npages = 0;
kpm->pm_obj.uo_refs = 1;
kpm->pm_space = HPPA_SID_KERNEL;
@@ -656,7 +656,7 @@ pmap_create()
simple_lock_init(&pmap->pm_lock);
pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
- RB_INIT(&pmap->pm_obj.memt);
+ TAILQ_INIT(&pmap->pm_obj.memq);
pmap->pm_obj.uo_npages = 0;
pmap->pm_obj.uo_refs = 1;
@@ -698,7 +698,7 @@ pmap_destroy(pmap)
return;
#ifdef DIAGNOSTIC
- while ((pg = RB_ROOT(&pmap->pm_obj.memt))) {
+ while ((pg = TAILQ_FIRST(&pmap->pm_obj.memq))) {
pt_entry_t *pde, *epde;
struct vm_page *sheep;
struct pv_entry *haggis;
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 9b502b9947f..240970dab3b 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.140 2009/06/03 02:31:48 art Exp $ */
+/* $OpenBSD: pmap.c,v 1.141 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -805,7 +805,7 @@ pmap_bootstrap(vaddr_t kva_start)
kpm = pmap_kernel();
simple_lock_init(&kpm->pm_obj.vmobjlock);
kpm->pm_obj.pgops = NULL;
- RB_INIT(&kpm->pm_obj.memt);
+ TAILQ_INIT(&kpm->pm_obj.memq);
kpm->pm_obj.uo_npages = 0;
kpm->pm_obj.uo_refs = 1;
bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */
@@ -1424,7 +1424,7 @@ pmap_drop_ptp(struct pmap *pm, vaddr_t va, struct vm_page *ptp,
pm->pm_stats.resident_count--;
/* update hint */
if (pm->pm_ptphint == ptp)
- pm->pm_ptphint = RB_ROOT(&pm->pm_obj.memt);
+ pm->pm_ptphint = TAILQ_FIRST(&pm->pm_obj.memq);
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
@@ -1461,7 +1461,7 @@ pmap_pinit(struct pmap *pmap)
/* init uvm_object */
simple_lock_init(&pmap->pm_obj.vmobjlock);
pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
- RB_INIT(&pmap->pm_obj.memt);
+ TAILQ_INIT(&pmap->pm_obj.memq);
pmap->pm_obj.uo_npages = 0;
pmap->pm_obj.uo_refs = 1;
pmap->pm_stats.wired_count = 0;
@@ -1533,7 +1533,8 @@ pmap_destroy(struct pmap *pmap)
simple_unlock(&pmaps_lock);
/* Free any remaining PTPs. */
- while ((pg = RB_ROOT(&pmap->pm_obj.memt)) != NULL) {
+ while (!TAILQ_EMPTY(&pmap->pm_obj.memq)) {
+ pg = TAILQ_FIRST(&pmap->pm_obj.memq);
pg->wire_count = 0;
uvm_pagefree(pg);
}
diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c
index aeee7c19f66..46fd0b40943 100644
--- a/sys/arch/i386/i386/pmapae.c
+++ b/sys/arch/i386/i386/pmapae.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmapae.c,v 1.17 2009/06/02 23:00:19 oga Exp $ */
+/* $OpenBSD: pmapae.c,v 1.18 2009/06/16 00:11:29 oga Exp $ */
/*
* Copyright (c) 2006 Michael Shalayeff
@@ -1449,7 +1449,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp)
pmap->pm_ptphint =
- RB_ROOT(&pmap->pm_obj.memt);
+ TAILQ_FIRST(&pmap->pm_obj.memq);
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
@@ -1543,7 +1543,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp) /* update hint? */
pmap->pm_ptphint =
- RB_ROOT(&pmap->pm_obj.memt);
+ TAILQ_FIRST(&pmap->pm_obj.memq);
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
@@ -1661,7 +1661,7 @@ pmap_page_remove_pae(struct vm_page *pg)
/* update hint? */
if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
pve->pv_pmap->pm_ptphint =
- RB_ROOT(&pve->pv_pmap->pm_obj.memt);
+ TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq);
pve->pv_ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(pve->pv_ptp, NULL, 0);
diff --git a/sys/conf/files b/sys/conf/files
index e3eeea9aee7..4e84684fa51 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.464 2009/06/10 03:24:02 marco Exp $
+# $OpenBSD: files,v 1.465 2009/06/16 00:11:29 oga Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -969,7 +969,6 @@ file uvm/uvm_km.c
file uvm/uvm_map.c
file uvm/uvm_meter.c
file uvm/uvm_mmap.c
-file uvm/uvm_object.c !small_kernel
file uvm/uvm_page.c
file uvm/uvm_pager.c
file uvm/uvm_pdaemon.c
diff --git a/sys/kern/vfs_biomem.c b/sys/kern/vfs_biomem.c
index eddaf9d58df..ac9eaf95e25 100644
--- a/sys/kern/vfs_biomem.c
+++ b/sys/kern/vfs_biomem.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_biomem.c,v 1.7 2009/06/06 18:06:22 art Exp $ */
+/* $OpenBSD: vfs_biomem.c,v 1.8 2009/06/16 00:11:29 oga Exp $ */
/*
* Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
*
@@ -64,7 +64,7 @@ buf_mem_init(vsize_t size)
buf_object = &buf_object_store;
buf_object->pgops = NULL;
- RB_INIT(&buf_object->memt);
+ TAILQ_INIT(&buf_object->memq);
buf_object->uo_npages = 0;
buf_object->uo_refs = 1;
}
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index cc29e56497e..d48c5f8026c 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.h,v 1.34 2009/06/02 23:00:19 oga Exp $ */
+/* $OpenBSD: uvm.h,v 1.35 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
@@ -121,6 +121,12 @@ struct uvm {
struct proc *aiodoned_proc;
struct mutex aiodoned_lock;
+ /* page hash */
+ struct pglist *page_hash; /* page hash table (vp/off->page) */
+ int page_nhash; /* number of buckets */
+ int page_hashmask; /* hash mask */
+ struct mutex hashlock; /* lock on page_hash array */
+
/* static kernel map entry pool */
vm_map_entry_t kentry_free; /* free page pool */
simple_lock_data_t kentry_lock;
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
index 4c75237f5c8..b6d25514ab3 100644
--- a/sys/uvm/uvm_anon.c
+++ b/sys/uvm/uvm_anon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_anon.c,v 1.32 2009/06/06 17:46:44 art Exp $ */
+/* $OpenBSD: uvm_anon.c,v 1.33 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_anon.c,v 1.10 2000/11/25 06:27:59 chs Exp $ */
/*
@@ -354,6 +354,7 @@ uvm_anon_pagein(struct vm_anon *anon)
*/
pmap_clear_reference(pg);
+ pmap_page_protect(pg, VM_PROT_NONE);
uvm_lock_pageq();
uvm_pagedeactivate(pg);
uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index b651338c28a..b2a68d6d249 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_aobj.c,v 1.42 2009/06/06 17:46:44 art Exp $ */
+/* $OpenBSD: uvm_aobj.c,v 1.43 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -174,6 +174,7 @@ boolean_t uao_flush(struct uvm_object *, voff_t, voff_t, int);
void uao_free(struct uvm_aobj *);
int uao_get(struct uvm_object *, voff_t, vm_page_t *,
int *, int, vm_prot_t, int, int);
+boolean_t uao_releasepg(struct vm_page *, struct vm_page **);
boolean_t uao_pagein(struct uvm_aobj *, int, int);
boolean_t uao_pagein_page(struct uvm_aobj *, int);
@@ -190,6 +191,10 @@ struct uvm_pagerops aobj_pager = {
NULL, /* fault */
uao_flush, /* flush */
uao_get, /* get */
+ NULL, /* put (done by pagedaemon) */
+ NULL, /* cluster */
+ NULL, /* mk_pcluster */
+ uao_releasepg /* releasepg */
};
/*
@@ -521,7 +526,7 @@ uao_create(vsize_t size, int flags)
*/
simple_lock_init(&aobj->u_obj.vmobjlock);
aobj->u_obj.pgops = &aobj_pager;
- RB_INIT(&aobj->u_obj.memt);
+ TAILQ_INIT(&aobj->u_obj.memq);
aobj->u_obj.uo_npages = 0;
/*
@@ -665,7 +670,7 @@ uao_detach_locked(struct uvm_object *uobj)
* Release swap resources then free the page.
*/
uvm_lock_pageq();
- while((pg = RB_ROOT(&uobj->memt)) != NULL) {
+ while((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
if (pg->pg_flags & PG_BUSY) {
atomic_setbits_int(&pg->pg_flags, PG_WANTED);
uvm_unlock_pageq();
@@ -790,8 +795,10 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
continue;
uvm_lock_pageq();
+ /* zap all mappings for the page. */
+ pmap_page_protect(pp, VM_PROT_NONE);
- /* Deactivate the page. */
+ /* ...and deactivate the page. */
uvm_pagedeactivate(pp);
uvm_unlock_pageq();
@@ -1135,6 +1142,45 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
}
/*
+ * uao_releasepg: handle released page in an aobj
+ *
+ * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
+ * to dispose of.
+ * => caller must handle PG_WANTED case
+ * => called with page's object locked, pageq's unlocked
+ * => returns TRUE if page's object is still alive, FALSE if we
+ * killed the page's object. if we return TRUE, then we
+ * return with the object locked.
+ * => if (nextpgp != NULL) => we return the next page on the queue, and return
+ * with the page queues locked [for pagedaemon]
+ * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
+ * => we kill the aobj if it is not referenced and we are suppose to
+ * kill it ("KILLME").
+ */
+boolean_t
+uao_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */)
+{
+ struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject;
+
+ KASSERT(pg->pg_flags & PG_RELEASED);
+
+ /*
+ * dispose of the page [caller handles PG_WANTED] and swap slot.
+ */
+ pmap_page_protect(pg, VM_PROT_NONE);
+ uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
+ uvm_lock_pageq();
+ if (nextpgp)
+ *nextpgp = TAILQ_NEXT(pg, pageq); /* next page for daemon */
+ uvm_pagefree(pg);
+ if (!nextpgp)
+ uvm_unlock_pageq(); /* keep locked for daemon */
+
+ return TRUE;
+}
+
+
+/*
* uao_dropswap: release any swap resources from this aobj page.
*
* => aobj must be locked or have a reference count of 0.
@@ -1350,6 +1396,9 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
* deactivate the page (to put it on a page queue).
*/
pmap_clear_reference(pg);
+#ifndef UBC
+ pmap_page_protect(pg, VM_PROT_NONE);
+#endif
uvm_lock_pageq();
uvm_pagedeactivate(pg);
uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c
index a3743490068..26b6976b266 100644
--- a/sys/uvm/uvm_device.c
+++ b/sys/uvm/uvm_device.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_device.c,v 1.33 2009/06/02 23:00:19 oga Exp $ */
+/* $OpenBSD: uvm_device.c,v 1.34 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_device.c,v 1.30 2000/11/25 06:27:59 chs Exp $ */
/*
@@ -227,7 +227,7 @@ udv_attach(void *arg, vm_prot_t accessprot, voff_t off, vsize_t size)
simple_lock_init(&udv->u_obj.vmobjlock);
udv->u_obj.pgops = &uvm_deviceops;
- RB_INIT(&udv->u_obj.memt);
+ TAILQ_INIT(&udv->u_obj.memq);
udv->u_obj.uo_npages = 0;
udv->u_obj.uo_refs = 1;
udv->u_flags = 0;
@@ -287,7 +287,7 @@ again:
uobj,uobj->uo_refs,0,0);
return;
}
- KASSERT(uobj->uo_npages == 0 && RB_EMPTY(&uobj->memt));
+ KASSERT(uobj->uo_npages == 0 && TAILQ_EMPTY(&uobj->memq));
/*
* is it being held? if so, wait until others are done.
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index e0e8946c49c..0c7f05b6459 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_fault.c,v 1.55 2009/06/06 17:46:44 art Exp $ */
+/* $OpenBSD: uvm_fault.c,v 1.56 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */
/*
@@ -203,6 +203,11 @@ uvmfault_anonflush(struct vm_anon **anons, int n)
if (pg && (pg->pg_flags & PG_BUSY) == 0 && pg->loan_count == 0) {
uvm_lock_pageq();
if (pg->wire_count == 0) {
+#ifdef UBC
+ pmap_clear_reference(pg);
+#else
+ pmap_page_protect(pg, VM_PROT_NONE);
+#endif
uvm_pagedeactivate(pg);
}
uvm_unlock_pageq();
@@ -916,10 +921,10 @@ ReFault:
/*
* if center page is resident and not
- * PG_BUSY, then pgo_get made it PG_BUSY
- * for us and gave us a handle to it.
- * remember this page as "uobjpage."
- * (for later use).
+ * PG_BUSY|PG_RELEASED then pgo_get
+ * made it PG_BUSY for us and gave
+ * us a handle to it. remember this
+ * page as "uobjpage." (for later use).
*/
if (lcv == centeridx) {
@@ -961,8 +966,8 @@ ReFault:
(wired ? PMAP_WIRED : 0));
/*
- * NOTE: page can't be PG_WANTED because
- * we've held the lock the whole time
+ * NOTE: page can't be PG_WANTED or PG_RELEASED
+ * because we've held the lock the whole time
* we've had the handle.
*/
@@ -1366,12 +1371,15 @@ Case2:
/* locked(!locked): uobj, uobjpage */
/*
- * Re-verify that amap slot is still free. if there is
- * a problem, we unlock and clean up.
+ * verify that the page has not be released and re-verify
+ * that amap slot is still free. if there is a problem,
+ * we unlock and clean up.
*/
- if (locked && amap && amap_lookup(&ufi.entry->aref,
- ufi.orig_rvaddr - ufi.entry->start)) {
+ if ((uobjpage->pg_flags & PG_RELEASED) != 0 ||
+ (locked && amap &&
+ amap_lookup(&ufi.entry->aref,
+ ufi.orig_rvaddr - ufi.entry->start))) {
if (locked)
uvmfault_unlockall(&ufi, amap, NULL, NULL);
locked = FALSE;
@@ -1390,6 +1398,17 @@ Case2:
/* still holding object lock */
wakeup(uobjpage);
+ if (uobjpage->pg_flags & PG_RELEASED) {
+ uvmexp.fltpgrele++;
+ KASSERT(uobj->pgops->pgo_releasepg != NULL);
+
+ /* frees page */
+ if (uobj->pgops->pgo_releasepg(uobjpage,NULL))
+ /* unlock if still alive */
+ simple_unlock(&uobj->vmobjlock);
+ goto ReFault;
+ }
+
uvm_lock_pageq();
/* make sure it is in queues */
uvm_pageactivate(uobjpage);
@@ -1404,8 +1423,9 @@ Case2:
}
/*
- * we have the data in uobjpage which is PG_BUSY and we are
- * holding object lock.
+ * we have the data in uobjpage which is PG_BUSY and
+ * !PG_RELEASED. we are holding object lock (so the page
+ * can't be released on us).
*/
/* locked: maps(read), amap(if !null), uobj, uobjpage */
@@ -1419,6 +1439,8 @@ Case2:
/*
* notes:
* - at this point uobjpage can not be NULL
+ * - at this point uobjpage can not be PG_RELEASED (since we checked
+ * for it above)
* - at this point uobjpage could be PG_WANTED (handle later)
*/
@@ -1605,7 +1627,9 @@ Case2:
}
/*
- * dispose of uobjpage. drop handle to uobj as well.
+ * dispose of uobjpage. it can't be PG_RELEASED
+ * since we still hold the object lock.
+ * drop handle to uobj as well.
*/
if (uobjpage->pg_flags & PG_WANTED)
@@ -1668,6 +1692,11 @@ Case2:
if (pg->pg_flags & PG_WANTED)
wakeup(pg); /* lock still held */
+ /*
+ * note that pg can't be PG_RELEASED since we did not drop
+ * the object lock since the last time we checked.
+ */
+
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
uvmfault_unlockall(&ufi, amap, uobj, NULL);
@@ -1707,6 +1736,11 @@ Case2:
if (pg->pg_flags & PG_WANTED)
wakeup(pg); /* lock still held */
+ /*
+ * note that pg can't be PG_RELEASED since we did not drop the object
+ * lock since the last time we checked.
+ */
+
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
uvmfault_unlockall(&ufi, amap, uobj, NULL);
diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c
index 9d606516ee1..0c4244eec76 100644
--- a/sys/uvm/uvm_init.c
+++ b/sys/uvm/uvm_init.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_init.c,v 1.22 2009/06/02 23:00:19 oga Exp $ */
+/* $OpenBSD: uvm_init.c,v 1.23 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_init.c,v 1.14 2000/06/27 17:29:23 mrg Exp $ */
/*
@@ -148,6 +148,7 @@ uvm_init(void)
* of kernel objects.
*/
+ uvm_page_rehash();
uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
UAO_FLAG_KERNSWAP);
diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c
index b4f62568fd7..756ffb7c2ca 100644
--- a/sys/uvm/uvm_loan.c
+++ b/sys/uvm/uvm_loan.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_loan.c,v 1.31 2009/06/01 19:54:02 oga Exp $ */
+/* $OpenBSD: uvm_loan.c,v 1.32 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_loan.c,v 1.22 2000/06/27 17:29:25 mrg Exp $ */
/*
@@ -462,12 +462,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
simple_lock(&uobj->vmobjlock);
/*
- * Re-verify that amap slot is still free. if there is a
- * problem we drop our lock (thus force a lookup refresh/retry).
+ * verify that the page has not be released and re-verify
+ * that amap slot is still free. if there is a problem we
+ * drop our lock (thus force a lookup refresh/retry).
*/
- if (locked && amap && amap_lookup(&ufi->entry->aref,
- ufi->orig_rvaddr - ufi->entry->start)) {
+ if ((pg->pg_flags & PG_RELEASED) != 0 ||
+ (locked && amap && amap_lookup(&ufi->entry->aref,
+ ufi->orig_rvaddr - ufi->entry->start))) {
if (locked)
uvmfault_unlockall(ufi, amap, NULL, NULL);
@@ -484,6 +486,17 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
/* still holding object lock */
wakeup(pg);
+ if (pg->pg_flags & PG_RELEASED) {
+#ifdef DIAGNOSTIC
+ if (uobj->pgops->pgo_releasepg == NULL)
+ panic("uvm_loanuobj: object has no releasepg function");
+#endif
+ /* frees page */
+ if (uobj->pgops->pgo_releasepg(pg, NULL))
+ simple_unlock(&uobj->vmobjlock);
+ return (0);
+ }
+
uvm_lock_pageq();
uvm_pageactivate(pg); /* make sure it is in queues */
uvm_unlock_pageq();
@@ -496,7 +509,8 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
/*
* at this point we have the page we want ("pg") marked PG_BUSY for us
- * and we have all data structures locked. do the loanout.
+ * and we have all data structures locked. do the loanout. page can
+ * not be PG_RELEASED (we caught this above).
*/
if ((flags & UVM_LOAN_TOANON) == 0) { /* loan to wired-kernel page? */
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index dc511476be3..b804d36bd6c 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.c,v 1.115 2009/06/14 02:53:09 deraadt Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.116 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
/*
@@ -3058,7 +3058,15 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
}
KASSERT(pg->uanon == anon);
- /* Deactivate the page. */
+#ifdef UBC
+ /* ...and deactivate the page. */
+ pmap_clear_reference(pg);
+#else
+ /* zap all mappings for the page. */
+ pmap_page_protect(pg, VM_PROT_NONE);
+
+ /* ...and deactivate the page. */
+#endif
uvm_pagedeactivate(pg);
uvm_unlock_pageq();
@@ -3812,8 +3820,9 @@ uvm_object_printit(uobj, full, pr)
return;
}
(*pr)(" PAGES <pg,offset>:\n ");
- RB_FOREACH(pg, uobj_pgs, &uobj->memt) {
- cnt++;
+ for (pg = TAILQ_FIRST(&uobj->memq);
+ pg != NULL;
+ pg = TAILQ_NEXT(pg, fq.queues.listq), cnt++) {
(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
if ((cnt % 3) == 2) {
(*pr)("\n ");
@@ -3874,7 +3883,8 @@ uvm_page_printit(pg, full, pr)
uobj = pg->uobject;
if (uobj) {
(*pr)(" checking object list\n");
- RB_FOREACH(pg, uobj_pgs, &uobj->memt) {
+ TAILQ_FOREACH(tpg, &uobj->memq,
+ fq.queues.listq) {
if (tpg == pg) {
break;
}
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 7075e42e515..9d13fc011af 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_mmap.c,v 1.74 2009/06/01 20:53:30 millert Exp $ */
+/* $OpenBSD: uvm_mmap.c,v 1.75 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -298,7 +298,8 @@ sys_mincore(struct proc *p, void *v, register_t *retval)
*/
if (UVM_ET_ISOBJ(entry)) {
KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
- if (entry->object.uvm_obj->pgops->pgo_fault != NULL) {
+ if (entry->object.uvm_obj->pgops->pgo_releasepg
+ == NULL) {
pgi = 1;
for (/* nothing */; start < lim;
start += PAGE_SIZE, vec++)
@@ -577,9 +578,8 @@ sys_mmap(struct proc *p, void *v, register_t *retval)
if ((flags & MAP_ANON) != 0 ||
((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
- u_int64_t used = ptoa(p->p_vmspace->vm_dused);
- if (p->p_rlimit[RLIMIT_DATA].rlim_cur < used ||
- size > p->p_rlimit[RLIMIT_DATA].rlim_cur - used) {
+ if (size >
+ (p->p_rlimit[RLIMIT_DATA].rlim_cur - ptoa(p->p_vmspace->vm_dused))) {
error = ENOMEM;
goto out;
}
diff --git a/sys/uvm/uvm_object.c b/sys/uvm/uvm_object.c
deleted file mode 100644
index 9eaaf575f50..00000000000
--- a/sys/uvm/uvm_object.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/* $OpenBSD: uvm_object.c,v 1.1 2009/06/06 03:45:08 oga Exp $ */
-
-/*
- * Copyright (c) 2006 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Mindaugas Rasiukevicius.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * uvm_object.c: operate with memory objects
- *
- */
-
-#include <sys/cdefs.h>
-
-#include <sys/param.h>
-
-#include <uvm/uvm.h>
-
-/* We will fetch this page count per step */
-#define FETCH_PAGECOUNT 16
-
-/*
- * uvm_objwire: wire the pages of entire uobj
- *
- * => caller must pass page-aligned start and end values
- * => if the caller passes in a pageq pointer, we'll return a list of
- * wired pages.
- */
-
-int
-uvm_objwire(struct uvm_object *uobj, off_t start, off_t end,
- struct pglist *pageq)
-{
- int i, npages, error;
- struct vm_page *pgs[FETCH_PAGECOUNT];
- off_t offset = start, left;
-
- left = (end - start) >> PAGE_SHIFT;
-
- simple_lock(&uobj->vmobjlock);
- while (left) {
-
- npages = MIN(FETCH_PAGECOUNT, left);
-
- /* Get the pages */
- memset(pgs, 0, sizeof(pgs));
- error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, 0,
- VM_PROT_READ | VM_PROT_WRITE, UVM_ADV_SEQUENTIAL,
- PGO_ALLPAGES | PGO_SYNCIO);
-
- if (error)
- goto error;
-
- simple_lock(&uobj->vmobjlock);
- for (i = 0; i < npages; i++) {
-
- KASSERT(pgs[i] != NULL);
- KASSERT(!(pgs[i]->pg_flags & PG_RELEASED));
-
-#if 0
- /*
- * Loan break
- */
- if (pgs[i]->loan_count) {
- while (pgs[i]->loan_count) {
- pg = uvm_loanbreak(pgs[i]);
- if (!pg) {
- simple_unlock(&uobj->vmobjlock);
- uvm_wait("uobjwirepg");
- simple_lock(&uobj->vmobjlock);
- continue;
- }
- }
- pgs[i] = pg;
- }
-#endif
-
- if (pgs[i]->pg_flags & PQ_AOBJ) {
- atomic_clearbits_int(&pgs[i]->pg_flags,
- PG_CLEAN);
- uao_dropswap(uobj, i);
- }
- }
-
- /* Wire the pages */
- uvm_lock_pageq();
- for (i = 0; i < npages; i++) {
- uvm_pagewire(pgs[i]);
- if (pageq != NULL)
- TAILQ_INSERT_TAIL(pageq, pgs[i], pageq);
- }
- uvm_unlock_pageq();
-
- /* Unbusy the pages */
- uvm_page_unbusy(pgs, npages);
-
- left -= npages;
- offset += npages << PAGE_SHIFT;
- }
- simple_unlock(&uobj->vmobjlock);
-
- return 0;
-
-error:
- /* Unwire the pages which have been wired */
- uvm_objunwire(uobj, start, offset);
-
- return error;
-}
-
-/*
- * uobj_unwirepages: unwire the pages of entire uobj
- *
- * => caller must pass page-aligned start and end values
- */
-
-void
-uvm_objunwire(struct uvm_object *uobj, off_t start, off_t end)
-{
- struct vm_page *pg;
- off_t offset;
-
- simple_lock(&uobj->vmobjlock);
- uvm_lock_pageq();
- for (offset = start; offset < end; offset += PAGE_SIZE) {
- pg = uvm_pagelookup(uobj, offset);
-
- KASSERT(pg != NULL);
- KASSERT(!(pg->pg_flags & PG_RELEASED));
-
- uvm_pageunwire(pg);
- }
- uvm_unlock_pageq();
- simple_unlock(&uobj->vmobjlock);
-}
diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h
index d6292ec0acb..3d5a091f462 100644
--- a/sys/uvm/uvm_object.h
+++ b/sys/uvm/uvm_object.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_object.h,v 1.12 2009/06/06 03:45:08 oga Exp $ */
+/* $OpenBSD: uvm_object.h,v 1.13 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_object.h,v 1.11 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -47,11 +47,11 @@
*/
struct uvm_object {
- simple_lock_data_t vmobjlock; /* lock on memq */
- struct uvm_pagerops *pgops; /* pager ops */
- RB_HEAD(uobj_pgs, vm_page) memt; /* pages in obj */
- int uo_npages; /* # of pages in memq */
- int uo_refs; /* reference count */
+ simple_lock_data_t vmobjlock; /* lock on memq */
+ struct uvm_pagerops *pgops; /* pager ops */
+ struct pglist memq; /* pages in this object */
+ int uo_npages; /* # of pages in memq */
+ int uo_refs; /* reference count */
};
/*
@@ -93,11 +93,6 @@ extern struct uvm_pagerops uvm_deviceops;
((uobj)->pgops == &uvm_vnodeops && \
((struct vnode *)uobj)->v_flag & VTEXT)
-int uvm_pagecmp(struct vm_page *, struct vm_page *);
-RB_PROTOTYPE(uobj_pgs, vm_page, fq.queues.tree, uvm_pagecmp);
-
-int uvm_objwire(struct uvm_object *, off_t, off_t, struct pglist *);
-void uvm_objunwire(struct uvm_object *, off_t, off_t);
#endif /* _KERNEL */
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 3758deb58e7..2cf45c11375 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.c,v 1.88 2009/06/14 03:04:08 deraadt Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.89 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
/*
@@ -118,6 +118,14 @@ static vaddr_t virtual_space_start;
static vaddr_t virtual_space_end;
/*
+ * we use a hash table with only one bucket during bootup. we will
+ * later rehash (resize) the hash table once the allocator is ready.
+ * we static allocate the one bootstrap bucket below...
+ */
+
+static struct pglist uvm_bootbucket;
+
+/*
* History
*/
UVMHIST_DECL(pghist);
@@ -134,7 +142,7 @@ static void uvm_pageremove(struct vm_page *);
*/
/*
- * uvm_pageinsert: insert a page in the object
+ * uvm_pageinsert: insert a page in the object and the hash table
*
* => caller must lock object
* => caller must lock page queues
@@ -145,17 +153,23 @@ static void uvm_pageremove(struct vm_page *);
__inline static void
uvm_pageinsert(struct vm_page *pg)
{
+ struct pglist *buck;
UVMHIST_FUNC("uvm_pageinsert"); UVMHIST_CALLED(pghist);
KASSERT((pg->pg_flags & PG_TABLED) == 0);
+ mtx_enter(&uvm.hashlock);
+ buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
+ TAILQ_INSERT_TAIL(buck, pg, fq.queues.hashq); /* put in hash */
+ mtx_leave(&uvm.hashlock);
- RB_INSERT(uobj_pgs, &pg->uobject->memt, pg);
+ TAILQ_INSERT_TAIL(&pg->uobject->memq, pg,
+ fq.queues.listq); /* put in object */
atomic_setbits_int(&pg->pg_flags, PG_TABLED);
pg->uobject->uo_npages++;
}
/*
- * uvm_page_remove: remove page from object
+ * uvm_page_remove: remove page from object and hash
*
* => caller must lock object
* => caller must lock page queues
@@ -164,11 +178,23 @@ uvm_pageinsert(struct vm_page *pg)
static __inline void
uvm_pageremove(struct vm_page *pg)
{
+ struct pglist *buck;
UVMHIST_FUNC("uvm_pageremove"); UVMHIST_CALLED(pghist);
KASSERT(pg->pg_flags & PG_TABLED);
+ mtx_enter(&uvm.hashlock);
+ buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
+ TAILQ_REMOVE(buck, pg, fq.queues.hashq);
+ mtx_leave(&uvm.hashlock);
+
+#ifdef UBC
+ if (pg->uobject->pgops == &uvm_vnodeops) {
+ uvm_pgcnt_vnode--;
+ }
+#endif
+
/* object should be locked */
- RB_REMOVE(uobj_pgs, &pg->uobject->memt, pg);
+ TAILQ_REMOVE(&pg->uobject->memq, pg, fq.queues.listq);
atomic_clearbits_int(&pg->pg_flags, PG_TABLED|PQ_AOBJ);
pg->uobject->uo_npages--;
@@ -176,14 +202,6 @@ uvm_pageremove(struct vm_page *pg)
pg->pg_version++;
}
-int
-uvm_pagecmp(struct vm_page *a, struct vm_page *b)
-{
- return (a->offset < b->offset ? -1 : a->offset > b->offset);
-}
-
-RB_GENERATE(uobj_pgs, vm_page, fq.queues.tree, uvm_pagecmp);
-
/*
* uvm_page_init: init the page system. called from uvm_init().
*
@@ -216,6 +234,18 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
mtx_init(&uvm.fpageqlock, IPL_VM);
uvm_pmr_init();
+ /*
+ * init the <obj,offset> => <page> hash table. for now
+ * we just have one bucket (the bootstrap bucket). later on we
+ * will allocate new buckets as we dynamically resize the hash table.
+ */
+
+ uvm.page_nhash = 1; /* 1 bucket */
+ uvm.page_hashmask = 0; /* mask for hash function */
+ uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
+ TAILQ_INIT(uvm.page_hash); /* init hash table */
+ mtx_init(&uvm.hashlock, IPL_VM); /* init hash table lock */
+
/*
* allocate vm_page structures.
*/
@@ -713,9 +743,97 @@ uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
* done!
*/
+ if (!preload)
+ uvm_page_rehash();
+
+ return;
+}
+
+/*
+ * uvm_page_rehash: reallocate hash table based on number of free pages.
+ */
+
+void
+uvm_page_rehash(void)
+{
+ int freepages, lcv, bucketcount, oldcount;
+ struct pglist *newbuckets, *oldbuckets;
+ struct vm_page *pg;
+ size_t newsize, oldsize;
+
+ /*
+ * compute number of pages that can go in the free pool
+ */
+
+ freepages = 0;
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+ freepages +=
+ (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
+
+ /*
+ * compute number of buckets needed for this number of pages
+ */
+
+ bucketcount = 1;
+ while (bucketcount < freepages)
+ bucketcount = bucketcount * 2;
+
+ /*
+ * compute the size of the current table and new table.
+ */
+
+ oldbuckets = uvm.page_hash;
+ oldcount = uvm.page_nhash;
+ oldsize = round_page(sizeof(struct pglist) * oldcount);
+ newsize = round_page(sizeof(struct pglist) * bucketcount);
+
+ /*
+ * allocate the new buckets
+ */
+
+ newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize);
+ if (newbuckets == NULL) {
+ printf("uvm_page_physrehash: WARNING: could not grow page "
+ "hash table\n");
+ return;
+ }
+ for (lcv = 0 ; lcv < bucketcount ; lcv++)
+ TAILQ_INIT(&newbuckets[lcv]);
+
+ /*
+ * now replace the old buckets with the new ones and rehash everything
+ */
+
+ mtx_enter(&uvm.hashlock);
+ uvm.page_hash = newbuckets;
+ uvm.page_nhash = bucketcount;
+ uvm.page_hashmask = bucketcount - 1; /* power of 2 */
+
+ /* ... and rehash */
+ for (lcv = 0 ; lcv < oldcount ; lcv++) {
+ while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) {
+ TAILQ_REMOVE(&oldbuckets[lcv], pg, fq.queues.hashq);
+ TAILQ_INSERT_TAIL(
+ &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
+ pg, fq.queues.hashq);
+ }
+ }
+ mtx_leave(&uvm.hashlock);
+
+ /*
+ * free old bucket array if is not the boot-time table
+ */
+
+ if (oldbuckets != &uvm_bootbucket)
+ uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize);
+
+ /*
+ * done
+ */
return;
}
+
#ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
void uvm_page_physdump(void); /* SHUT UP GCC */
@@ -741,6 +859,7 @@ uvm_page_physdump(void)
case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
default: printf("<<UNKNOWN>>!!!!\n");
}
+ printf("number of buckets = %d\n", uvm.page_nhash);
}
#endif
@@ -822,9 +941,7 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
pg->offset = off;
pg->uobject = obj;
pg->uanon = anon;
- pg->pg_flags = PG_BUSY|PG_FAKE;
- if (!(flags & UVM_PGA_ZERO))
- atomic_setbits_int(&pg->pg_flags, PG_CLEAN);
+ pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE;
if (anon) {
anon->an_page = pg;
atomic_setbits_int(&pg->pg_flags, PQ_ANON);
@@ -885,7 +1002,7 @@ uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
/*
* uvm_pagefree: free page
*
- * => erase page's identity (i.e. remove from object)
+ * => erase page's identity (i.e. remove from hash/object)
* => put page on free list
* => caller must lock owning object (either anon or uvm_object)
* => caller must lock page queues
@@ -993,8 +1110,8 @@ uvm_pagefree(struct vm_page *pg)
/*
* Clean page state bits.
*/
- atomic_clearbits_int(&pg->pg_flags, PG_ZERO|PG_FAKE|PG_BUSY|
- PG_RELEASED|PG_CLEAN|PG_CLEANCHK|PQ_ENCRYPT);
+ atomic_clearbits_int(&pg->pg_flags,
+ PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK);
/*
* Pmap flag cleaning.
* XXX: Shouldn't pmap do this?
@@ -1051,14 +1168,7 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs)
UVMHIST_LOG(pdhist, "releasing pg %p", pg,0,0,0);
uobj = pg->uobject;
if (uobj != NULL) {
- uvm_lock_pageq();
- pmap_page_protect(pg, VM_PROT_NONE);
- /* XXX won't happen right now */
- if (pg->pg_flags & PQ_ANON)
- uao_dropswap(uobj,
- pg->offset >> PAGE_SHIFT);
- uvm_pagefree(pg);
- uvm_unlock_pageq();
+ uobj->pgops->pgo_releasepg(pg, NULL);
} else {
atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
@@ -1283,10 +1393,19 @@ PHYS_TO_VM_PAGE(paddr_t pa)
struct vm_page *
uvm_pagelookup(struct uvm_object *obj, voff_t off)
{
- struct vm_page find;
+ struct vm_page *pg;
+ struct pglist *buck;
- find.offset = off;
- return (RB_FIND(uobj_pgs, &obj->memt, &find));
+ mtx_enter(&uvm.hashlock);
+ buck = &uvm.page_hash[uvm_pagehash(obj,off)];
+
+ TAILQ_FOREACH(pg, buck, fq.queues.hashq) {
+ if (pg->uobject == obj && pg->offset == off) {
+ break;
+ }
+ }
+ mtx_leave(&uvm.hashlock);
+ return(pg);
}
/*
@@ -1344,8 +1463,6 @@ uvm_pageunwire(struct vm_page *pg)
void
uvm_pagedeactivate(struct vm_page *pg)
{
- pmap_page_protect(pg, VM_PROT_NONE);
-
if (pg->pg_flags & PQ_ACTIVE) {
TAILQ_REMOVE(&uvm.page_active, pg, pageq);
atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index e6d71c95ae9..5896286c871 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.h,v 1.36 2009/06/14 03:04:08 deraadt Exp $ */
+/* $OpenBSD: uvm_page.h,v 1.37 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $ */
/*
@@ -108,7 +108,7 @@
union vm_page_fq {
struct {
- RB_ENTRY(vm_page) tree; /* hash table links (O)*/
+ TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
} queues;
@@ -122,6 +122,7 @@ struct vm_page {
union vm_page_fq fq; /* free and queue management */
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
* queue or free list (P) */
+
struct vm_anon *uanon; /* anon (O,P) */
struct uvm_object *uobject; /* object (O,P) */
voff_t offset; /* offset into object (O,P) */
@@ -252,6 +253,7 @@ void uvm_page_own(struct vm_page *, char *);
#if !defined(PMAP_STEAL_MEMORY)
boolean_t uvm_page_physget(paddr_t *);
#endif
+void uvm_page_rehash(void);
void uvm_pageidlezero(void);
void uvm_pageactivate(struct vm_page *);
diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
index 5b820ea65b3..8f720d4ccec 100644
--- a/sys/uvm/uvm_pager.h
+++ b/sys/uvm/uvm_pager.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pager.h,v 1.24 2009/06/01 19:54:02 oga Exp $ */
+/* $OpenBSD: uvm_pager.h,v 1.25 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $ */
/*
@@ -109,6 +109,8 @@ struct uvm_pagerops {
struct vm_page ** (*pgo_mk_pcluster)(struct uvm_object *,
struct vm_page **, int *, struct vm_page *,
int, voff_t, voff_t);
+ /* release page */
+ boolean_t (*pgo_releasepg)(struct vm_page *, struct vm_page **);
};
/* pager flags [mostly for flush] */
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index b30cf1e0a98..27cdc07ae73 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pdaemon.c,v 1.48 2009/06/15 17:01:26 beck Exp $ */
+/* $OpenBSD: uvm_pdaemon.c,v 1.49 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
/*
@@ -820,20 +820,35 @@ uvmpd_scan_inactive(struct pglist *pglst)
atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(p, NULL);
- /* released during I/O? Can only happen for anons */
+ /* released during I/O? */
if (p->pg_flags & PG_RELEASED) {
- KASSERT(anon != NULL);
- /* remove page so we can get nextpg */
- anon->an_page = NULL;
+ if (anon) {
+ /* remove page so we can get nextpg */
+ anon->an_page = NULL;
- simple_unlock(&anon->an_lock);
- uvm_anfree(anon); /* kills anon */
- pmap_page_protect(p, VM_PROT_NONE);
- anon = NULL;
- uvm_lock_pageq();
- nextpg = TAILQ_NEXT(p, pageq);
- /* free released page */
- uvm_pagefree(p);
+ simple_unlock(&anon->an_lock);
+ uvm_anfree(anon); /* kills anon */
+ pmap_page_protect(p, VM_PROT_NONE);
+ anon = NULL;
+ uvm_lock_pageq();
+ nextpg = TAILQ_NEXT(p, pageq);
+ /* free released page */
+ uvm_pagefree(p);
+
+ } else {
+
+ /*
+ * pgo_releasepg nukes the page and
+ * gets "nextpg" for us. it returns
+ * with the page queues locked (when
+ * given nextpg ptr).
+ */
+
+ if (!uobj->pgops->pgo_releasepg(p,
+ &nextpg))
+ /* uobj died after release */
+ uobj = NULL;
+ }
} else { /* page was not released during I/O */
uvm_lock_pageq();
nextpg = TAILQ_NEXT(p, pageq);
@@ -1042,6 +1057,7 @@ uvmpd_scan(void)
*/
if (inactive_shortage > 0) {
+ pmap_page_protect(p, VM_PROT_NONE);
/* no need to check wire_count as pg is "active" */
uvm_pagedeactivate(p);
uvmexp.pddeact++;
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index 43fd09128e2..f6a78511a70 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_swap.c,v 1.90 2009/06/04 02:56:14 oga Exp $ */
+/* $OpenBSD: uvm_swap.c,v 1.91 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */
/*
@@ -358,8 +358,12 @@ uvm_swap_allocpages(struct vm_page **pps, int npages)
boolean_t fail;
/* Estimate if we will succeed */
+ uvm_lock_fpageq();
+
fail = uvmexp.free - npages < uvmexp.reserve_kernel;
+ uvm_unlock_fpageq();
+
if (fail)
return FALSE;
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index c15ebbf70a0..e85e2c24e38 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_vnode.c,v 1.62 2009/06/06 17:46:44 art Exp $ */
+/* $OpenBSD: uvm_vnode.c,v 1.63 2009/06/16 00:11:29 oga Exp $ */
/* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */
/*
@@ -93,6 +93,7 @@ void uvn_init(void);
int uvn_io(struct uvm_vnode *, vm_page_t *, int, int, int);
int uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t);
void uvn_reference(struct uvm_object *);
+boolean_t uvn_releasepg(struct vm_page *, struct vm_page **);
/*
* master pager structure
@@ -108,6 +109,7 @@ struct uvm_pagerops uvm_vnodeops = {
uvn_put,
uvn_cluster,
uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
+ uvn_releasepg,
};
/*
@@ -271,7 +273,7 @@ uvn_attach(void *arg, vm_prot_t accessprot)
* now set up the uvn.
*/
uvn->u_obj.pgops = &uvm_vnodeops;
- RB_INIT(&uvn->u_obj.memt);
+ TAILQ_INIT(&uvn->u_obj.memq);
uvn->u_obj.uo_npages = 0;
uvn->u_obj.uo_refs = 1; /* just us... */
oldflags = uvn->u_flags;
@@ -438,7 +440,11 @@ uvn_detach(struct uvm_object *uobj)
if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
LIST_REMOVE(uvn, u_wlist);
}
- KASSERT(RB_EMPTY(&uobj->memt));
+#ifdef DIAGNOSTIC
+ if (!TAILQ_EMPTY(&uobj->memq))
+ panic("uvn_deref: vnode VM object still has pages afer "
+ "syncio/free flush");
+#endif
oldflags = uvn->u_flags;
uvn->u_flags = 0;
simple_unlock(&uobj->vmobjlock);
@@ -520,8 +526,8 @@ uvm_vnp_terminate(struct vnode *vp)
/*
* it is possible that the uvn was detached and is in the relkill
- * state [i.e. waiting for async i/o to finish].
- * we take over the vnode now and cancel the relkill.
+ * state [i.e. waiting for async i/o to finish so that releasepg can
+ * kill object]. we take over the vnode now and cancel the relkill.
* we want to know when the i/o is done so we can recycle right
* away. note that a uvn can only be in the RELKILL state if it
* has a zero reference count.
@@ -555,7 +561,7 @@ uvm_vnp_terminate(struct vnode *vp)
while (uvn->u_obj.uo_npages) {
#ifdef DEBUG
struct vm_page *pp;
- RB_FOREACH(pp, uobj_pgs, &uvn->u_obj.memt) {
+ TAILQ_FOREACH(pp, &uvn->u_obj.memq, fq.queues.listq) {
if ((pp->pg_flags & PG_BUSY) == 0)
panic("uvm_vnp_terminate: detected unbusy pg");
}
@@ -615,6 +621,41 @@ uvm_vnp_terminate(struct vnode *vp)
}
/*
+ * uvn_releasepg: handled a released page in a uvn
+ *
+ * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
+ * to dispose of.
+ * => caller must handled PG_WANTED case
+ * => called with page's object locked, pageq's unlocked
+ * => returns TRUE if page's object is still alive, FALSE if we
+ * killed the page's object. if we return TRUE, then we
+ * return with the object locked.
+ * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ * with the page queues locked [for pagedaemon]
+ * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
+ * => we kill the uvn if it is not referenced and we are suppose to
+ * kill it ("relkill").
+ */
+
+boolean_t
+uvn_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */)
+{
+ KASSERT(pg->pg_flags & PG_RELEASED);
+
+ /*
+ * dispose of the page [caller handles PG_WANTED]
+ */
+ pmap_page_protect(pg, VM_PROT_NONE);
+ uvm_lock_pageq();
+ if (nextpgp)
+ *nextpgp = TAILQ_NEXT(pg, pageq); /* next page for daemon */
+ uvm_pagefree(pg);
+ if (!nextpgp)
+ uvm_unlock_pageq();
+ return (TRUE);
+}
+
+/*
* NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
* through the buffer cache and allow I/O in any size. These VOPs use
* synchronous i/o. [vs. VOP_STRATEGY which can be async, but doesn't
@@ -648,6 +689,8 @@ uvm_vnp_terminate(struct vnode *vp)
* - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio }
* - get "page" structures (atop?).
* - handle "wanted" pages
+ * - handle "released" pages [using pgo_releasepg]
+ * >>> pgo_releasepg may kill the object
* dont forget to look at "object" wanted flag in all cases.
*/
@@ -802,6 +845,7 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
if (flags & PGO_DEACTIVATE) {
if ((pp->pg_flags & PQ_INACTIVE) == 0 &&
pp->wire_count == 0) {
+ pmap_page_protect(pp, VM_PROT_NONE);
uvm_pagedeactivate(pp);
}
} else if (flags & PGO_FREE) {
@@ -945,6 +989,7 @@ ReTry:
if (flags & PGO_DEACTIVATE) {
if ((pp->pg_flags & PQ_INACTIVE) == 0 &&
pp->wire_count == 0) {
+ pmap_page_protect(ptmp, VM_PROT_NONE);
uvm_pagedeactivate(ptmp);
}
} else if (flags & PGO_FREE &&