diff options
author | Owain Ainsworth <oga@cvs.openbsd.org> | 2009-06-16 00:11:30 +0000 |
---|---|---|
committer | Owain Ainsworth <oga@cvs.openbsd.org> | 2009-06-16 00:11:30 +0000 |
commit | b20700966027364e7e2e3cf5ca4613cbb4e2a25b (patch) | |
tree | dac29c9a1582e023159a8aabe2282775b21cbdc2 | |
parent | ab37797a62467132f94babf9bc9d57cef8402599 (diff) |
Backout all changes to uvm after pmemrange (which will be backed out
separately).
a change at or just before the hackathon has either exposed or added a
very very nasty memory corruption bug that is giving us hell right now.
So in the interest of kernel stability these diffs are being backed out
until such a time as that corruption bug has been found and squashed,
then the ones that are proven good may slowly return.
a quick hitlist of the main commits this backs out:
mine:
uvm_objwire
the lock change in uvm_swap.c
using trees for uvm objects instead of the hash
removing the pgo_releasepg callback.
art@'s:
putting pmap_page_protect(VM_PROT_NONE) in uvm_pagedeactivate() since
all callers called that just prior anyway.
ok beck@, ariane@.
prompted by deraadt@.
-rw-r--r-- | sys/arch/amd64/amd64/pmap.c | 10 | ||||
-rw-r--r-- | sys/arch/hppa/hppa/pmap.c | 10 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 11 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmapae.c | 8 | ||||
-rw-r--r-- | sys/conf/files | 3 | ||||
-rw-r--r-- | sys/kern/vfs_biomem.c | 4 | ||||
-rw-r--r-- | sys/uvm/uvm.h | 8 | ||||
-rw-r--r-- | sys/uvm/uvm_anon.c | 3 | ||||
-rw-r--r-- | sys/uvm/uvm_aobj.c | 57 | ||||
-rw-r--r-- | sys/uvm/uvm_device.c | 6 | ||||
-rw-r--r-- | sys/uvm/uvm_fault.c | 62 | ||||
-rw-r--r-- | sys/uvm/uvm_init.c | 3 | ||||
-rw-r--r-- | sys/uvm/uvm_loan.c | 26 | ||||
-rw-r--r-- | sys/uvm/uvm_map.c | 20 | ||||
-rw-r--r-- | sys/uvm/uvm_mmap.c | 10 | ||||
-rw-r--r-- | sys/uvm/uvm_object.c | 159 | ||||
-rw-r--r-- | sys/uvm/uvm_object.h | 17 | ||||
-rw-r--r-- | sys/uvm/uvm_page.c | 181 | ||||
-rw-r--r-- | sys/uvm/uvm_page.h | 6 | ||||
-rw-r--r-- | sys/uvm/uvm_pager.h | 4 | ||||
-rw-r--r-- | sys/uvm/uvm_pdaemon.c | 42 | ||||
-rw-r--r-- | sys/uvm/uvm_swap.c | 6 | ||||
-rw-r--r-- | sys/uvm/uvm_vnode.c | 57 |
23 files changed, 425 insertions, 288 deletions
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 78e72c6bd53..165f3b365dd 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.47 2009/06/09 02:56:38 krw Exp $ */ +/* $OpenBSD: pmap.c,v 1.48 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -567,7 +567,7 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa) kpm = pmap_kernel(); for (i = 0; i < PTP_LEVELS - 1; i++) { kpm->pm_obj[i].pgops = NULL; - RB_INIT(&kpm->pm_obj[i].memt); + TAILQ_INIT(&kpm->pm_obj[i].memq); kpm->pm_obj[i].uo_npages = 0; kpm->pm_obj[i].uo_refs = 1; kpm->pm_ptphint[i] = NULL; @@ -832,7 +832,7 @@ pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level, obj = &pmap->pm_obj[lidx]; pmap->pm_stats.resident_count--; if (pmap->pm_ptphint[lidx] == ptp) - pmap->pm_ptphint[lidx] = RB_ROOT(&obj->memt); + pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq); ptp->wire_count = 0; uvm_pagerealloc(ptp, NULL, 0); TAILQ_INSERT_TAIL(pagelist, ptp, fq.queues.listq); @@ -1019,7 +1019,7 @@ pmap_create(void) /* init uvm_object */ for (i = 0; i < PTP_LEVELS - 1; i++) { pmap->pm_obj[i].pgops = NULL; /* not a mappable object */ - RB_INIT(&pmap->pm_obj[i].memt); + TAILQ_INIT(&pmap->pm_obj[i].memq); pmap->pm_obj[i].uo_npages = 0; pmap->pm_obj[i].uo_refs = 1; pmap->pm_ptphint[i] = NULL; @@ -1091,7 +1091,7 @@ pmap_destroy(struct pmap *pmap) */ for (i = 0; i < PTP_LEVELS - 1; i++) { - while ((pg = RB_ROOT(&pmap->pm_obj[i].memt)) != NULL) { + while ((pg = TAILQ_FIRST(&pmap->pm_obj[i].memq)) != NULL) { KASSERT((pg->pg_flags & PG_BUSY) == 0); pg->wire_count = 0; diff --git a/sys/arch/hppa/hppa/pmap.c b/sys/arch/hppa/hppa/pmap.c index 5f9d72ab8b9..34e5652adae 100644 --- a/sys/arch/hppa/hppa/pmap.c +++ b/sys/arch/hppa/hppa/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.136 2009/06/11 20:10:51 kettenis Exp $ */ +/* $OpenBSD: pmap.c,v 1.137 2009/06/16 00:11:29 oga Exp $ */ /* * Copyright (c) 1998-2004 Michael Shalayeff @@ -235,7 +235,7 @@ pmap_pde_release(struct pmap *pmap, vaddr_t va, struct vm_page *ptp) pmap_pde_set(pmap, va, 0); pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) - pmap->pm_ptphint = RB_ROOT(&pmap->pm_obj.memt); + pmap->pm_ptphint = TAILQ_FIRST(&pmap->pm_obj.memq); ptp->wire_count = 0; #ifdef DIAGNOSTIC if (ptp->pg_flags & PG_BUSY) @@ -470,7 +470,7 @@ pmap_bootstrap(vstart) bzero(kpm, sizeof(*kpm)); simple_lock_init(&kpm->pm_lock); kpm->pm_obj.pgops = NULL; - RB_INIT(&kpm->pm_obj.memt); + TAILQ_INIT(&kpm->pm_obj.memq); kpm->pm_obj.uo_npages = 0; kpm->pm_obj.uo_refs = 1; kpm->pm_space = HPPA_SID_KERNEL; @@ -656,7 +656,7 @@ pmap_create() simple_lock_init(&pmap->pm_lock); pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ - RB_INIT(&pmap->pm_obj.memt); + TAILQ_INIT(&pmap->pm_obj.memq); pmap->pm_obj.uo_npages = 0; pmap->pm_obj.uo_refs = 1; @@ -698,7 +698,7 @@ pmap_destroy(pmap) return; #ifdef DIAGNOSTIC - while ((pg = RB_ROOT(&pmap->pm_obj.memt))) { + while ((pg = TAILQ_FIRST(&pmap->pm_obj.memq))) { pt_entry_t *pde, *epde; struct vm_page *sheep; struct pv_entry *haggis; diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 9b502b9947f..240970dab3b 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.140 2009/06/03 02:31:48 art Exp $ */ +/* $OpenBSD: pmap.c,v 1.141 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -805,7 +805,7 @@ pmap_bootstrap(vaddr_t kva_start) kpm = pmap_kernel(); simple_lock_init(&kpm->pm_obj.vmobjlock); kpm->pm_obj.pgops = NULL; - RB_INIT(&kpm->pm_obj.memt); + TAILQ_INIT(&kpm->pm_obj.memq); kpm->pm_obj.uo_npages = 0; kpm->pm_obj.uo_refs = 1; bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ @@ -1424,7 +1424,7 @@ pmap_drop_ptp(struct pmap *pm, vaddr_t va, struct vm_page *ptp, pm->pm_stats.resident_count--; /* update hint */ if (pm->pm_ptphint == ptp) - pm->pm_ptphint = RB_ROOT(&pm->pm_obj.memt); + pm->pm_ptphint = TAILQ_FIRST(&pm->pm_obj.memq); ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); @@ -1461,7 +1461,7 @@ pmap_pinit(struct pmap *pmap) /* init uvm_object */ simple_lock_init(&pmap->pm_obj.vmobjlock); pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ - RB_INIT(&pmap->pm_obj.memt); + TAILQ_INIT(&pmap->pm_obj.memq); pmap->pm_obj.uo_npages = 0; pmap->pm_obj.uo_refs = 1; pmap->pm_stats.wired_count = 0; @@ -1533,7 +1533,8 @@ pmap_destroy(struct pmap *pmap) simple_unlock(&pmaps_lock); /* Free any remaining PTPs. */ - while ((pg = RB_ROOT(&pmap->pm_obj.memt)) != NULL) { + while (!TAILQ_EMPTY(&pmap->pm_obj.memq)) { + pg = TAILQ_FIRST(&pmap->pm_obj.memq); pg->wire_count = 0; uvm_pagefree(pg); } diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c index aeee7c19f66..46fd0b40943 100644 --- a/sys/arch/i386/i386/pmapae.c +++ b/sys/arch/i386/i386/pmapae.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmapae.c,v 1.17 2009/06/02 23:00:19 oga Exp $ */ +/* $OpenBSD: pmapae.c,v 1.18 2009/06/16 00:11:29 oga Exp $ */ /* * Copyright (c) 2006 Michael Shalayeff @@ -1449,7 +1449,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) pmap->pm_ptphint = - RB_ROOT(&pmap->pm_obj.memt); + TAILQ_FIRST(&pmap->pm_obj.memq); ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); @@ -1543,7 +1543,7 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) pmap->pm_stats.resident_count--; if (pmap->pm_ptphint == ptp) /* update hint? */ pmap->pm_ptphint = - RB_ROOT(&pmap->pm_obj.memt); + TAILQ_FIRST(&pmap->pm_obj.memq); ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); @@ -1661,7 +1661,7 @@ pmap_page_remove_pae(struct vm_page *pg) /* update hint? */ if (pve->pv_pmap->pm_ptphint == pve->pv_ptp) pve->pv_pmap->pm_ptphint = - RB_ROOT(&pve->pv_pmap->pm_obj.memt); + TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq); pve->pv_ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(pve->pv_ptp, NULL, 0); diff --git a/sys/conf/files b/sys/conf/files index e3eeea9aee7..4e84684fa51 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.464 2009/06/10 03:24:02 marco Exp $ +# $OpenBSD: files,v 1.465 2009/06/16 00:11:29 oga Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -969,7 +969,6 @@ file uvm/uvm_km.c file uvm/uvm_map.c file uvm/uvm_meter.c file uvm/uvm_mmap.c -file uvm/uvm_object.c !small_kernel file uvm/uvm_page.c file uvm/uvm_pager.c file uvm/uvm_pdaemon.c diff --git a/sys/kern/vfs_biomem.c b/sys/kern/vfs_biomem.c index eddaf9d58df..ac9eaf95e25 100644 --- a/sys/kern/vfs_biomem.c +++ b/sys/kern/vfs_biomem.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_biomem.c,v 1.7 2009/06/06 18:06:22 art Exp $ */ +/* $OpenBSD: vfs_biomem.c,v 1.8 2009/06/16 00:11:29 oga Exp $ */ /* * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> * @@ -64,7 +64,7 @@ buf_mem_init(vsize_t size) buf_object = &buf_object_store; buf_object->pgops = NULL; - RB_INIT(&buf_object->memt); + TAILQ_INIT(&buf_object->memq); buf_object->uo_npages = 0; buf_object->uo_refs = 1; } diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h index cc29e56497e..d48c5f8026c 100644 --- a/sys/uvm/uvm.h +++ b/sys/uvm/uvm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm.h,v 1.34 2009/06/02 23:00:19 oga Exp $ */ +/* $OpenBSD: uvm.h,v 1.35 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */ /* @@ -121,6 +121,12 @@ struct uvm { struct proc *aiodoned_proc; struct mutex aiodoned_lock; + /* page hash */ + struct pglist *page_hash; /* page hash table (vp/off->page) */ + int page_nhash; /* number of buckets */ + int page_hashmask; /* hash mask */ + struct mutex hashlock; /* lock on page_hash array */ + /* static kernel map entry pool */ vm_map_entry_t kentry_free; /* free page pool */ simple_lock_data_t kentry_lock; diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c index 4c75237f5c8..b6d25514ab3 100644 --- a/sys/uvm/uvm_anon.c +++ b/sys/uvm/uvm_anon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_anon.c,v 1.32 2009/06/06 17:46:44 art Exp $ */ +/* $OpenBSD: uvm_anon.c,v 1.33 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_anon.c,v 1.10 2000/11/25 06:27:59 chs Exp $ */ /* @@ -354,6 +354,7 @@ uvm_anon_pagein(struct vm_anon *anon) */ pmap_clear_reference(pg); + pmap_page_protect(pg, VM_PROT_NONE); uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c index b651338c28a..b2a68d6d249 100644 --- a/sys/uvm/uvm_aobj.c +++ b/sys/uvm/uvm_aobj.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_aobj.c,v 1.42 2009/06/06 17:46:44 art Exp $ */ +/* $OpenBSD: uvm_aobj.c,v 1.43 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ /* @@ -174,6 +174,7 @@ boolean_t uao_flush(struct uvm_object *, voff_t, voff_t, int); void uao_free(struct uvm_aobj *); int uao_get(struct uvm_object *, voff_t, vm_page_t *, int *, int, vm_prot_t, int, int); +boolean_t uao_releasepg(struct vm_page *, struct vm_page **); boolean_t uao_pagein(struct uvm_aobj *, int, int); boolean_t uao_pagein_page(struct uvm_aobj *, int); @@ -190,6 +191,10 @@ struct uvm_pagerops aobj_pager = { NULL, /* fault */ uao_flush, /* flush */ uao_get, /* get */ + NULL, /* put (done by pagedaemon) */ + NULL, /* cluster */ + NULL, /* mk_pcluster */ + uao_releasepg /* releasepg */ }; /* @@ -521,7 +526,7 @@ uao_create(vsize_t size, int flags) */ simple_lock_init(&aobj->u_obj.vmobjlock); aobj->u_obj.pgops = &aobj_pager; - RB_INIT(&aobj->u_obj.memt); + TAILQ_INIT(&aobj->u_obj.memq); aobj->u_obj.uo_npages = 0; /* @@ -665,7 +670,7 @@ uao_detach_locked(struct uvm_object *uobj) * Release swap resources then free the page. */ uvm_lock_pageq(); - while((pg = RB_ROOT(&uobj->memt)) != NULL) { + while((pg = TAILQ_FIRST(&uobj->memq)) != NULL) { if (pg->pg_flags & PG_BUSY) { atomic_setbits_int(&pg->pg_flags, PG_WANTED); uvm_unlock_pageq(); @@ -790,8 +795,10 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) continue; uvm_lock_pageq(); + /* zap all mappings for the page. */ + pmap_page_protect(pp, VM_PROT_NONE); - /* Deactivate the page. */ + /* ...and deactivate the page. */ uvm_pagedeactivate(pp); uvm_unlock_pageq(); @@ -1135,6 +1142,45 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } /* + * uao_releasepg: handle released page in an aobj + * + * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need + * to dispose of. + * => caller must handle PG_WANTED case + * => called with page's object locked, pageq's unlocked + * => returns TRUE if page's object is still alive, FALSE if we + * killed the page's object. if we return TRUE, then we + * return with the object locked. + * => if (nextpgp != NULL) => we return the next page on the queue, and return + * with the page queues locked [for pagedaemon] + * => if (nextpgp == NULL) => we return with page queues unlocked [normal case] + * => we kill the aobj if it is not referenced and we are suppose to + * kill it ("KILLME"). + */ +boolean_t +uao_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject; + + KASSERT(pg->pg_flags & PG_RELEASED); + + /* + * dispose of the page [caller handles PG_WANTED] and swap slot. + */ + pmap_page_protect(pg, VM_PROT_NONE); + uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); + uvm_lock_pageq(); + if (nextpgp) + *nextpgp = TAILQ_NEXT(pg, pageq); /* next page for daemon */ + uvm_pagefree(pg); + if (!nextpgp) + uvm_unlock_pageq(); /* keep locked for daemon */ + + return TRUE; +} + + +/* * uao_dropswap: release any swap resources from this aobj page. * * => aobj must be locked or have a reference count of 0. @@ -1350,6 +1396,9 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx) * deactivate the page (to put it on a page queue). */ pmap_clear_reference(pg); +#ifndef UBC + pmap_page_protect(pg, VM_PROT_NONE); +#endif uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c index a3743490068..26b6976b266 100644 --- a/sys/uvm/uvm_device.c +++ b/sys/uvm/uvm_device.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_device.c,v 1.33 2009/06/02 23:00:19 oga Exp $ */ +/* $OpenBSD: uvm_device.c,v 1.34 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_device.c,v 1.30 2000/11/25 06:27:59 chs Exp $ */ /* @@ -227,7 +227,7 @@ udv_attach(void *arg, vm_prot_t accessprot, voff_t off, vsize_t size) simple_lock_init(&udv->u_obj.vmobjlock); udv->u_obj.pgops = &uvm_deviceops; - RB_INIT(&udv->u_obj.memt); + TAILQ_INIT(&udv->u_obj.memq); udv->u_obj.uo_npages = 0; udv->u_obj.uo_refs = 1; udv->u_flags = 0; @@ -287,7 +287,7 @@ again: uobj,uobj->uo_refs,0,0); return; } - KASSERT(uobj->uo_npages == 0 && RB_EMPTY(&uobj->memt)); + KASSERT(uobj->uo_npages == 0 && TAILQ_EMPTY(&uobj->memq)); /* * is it being held? if so, wait until others are done. diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index e0e8946c49c..0c7f05b6459 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_fault.c,v 1.55 2009/06/06 17:46:44 art Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.56 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */ /* @@ -203,6 +203,11 @@ uvmfault_anonflush(struct vm_anon **anons, int n) if (pg && (pg->pg_flags & PG_BUSY) == 0 && pg->loan_count == 0) { uvm_lock_pageq(); if (pg->wire_count == 0) { +#ifdef UBC + pmap_clear_reference(pg); +#else + pmap_page_protect(pg, VM_PROT_NONE); +#endif uvm_pagedeactivate(pg); } uvm_unlock_pageq(); @@ -916,10 +921,10 @@ ReFault: /* * if center page is resident and not - * PG_BUSY, then pgo_get made it PG_BUSY - * for us and gave us a handle to it. - * remember this page as "uobjpage." - * (for later use). + * PG_BUSY|PG_RELEASED then pgo_get + * made it PG_BUSY for us and gave + * us a handle to it. remember this + * page as "uobjpage." (for later use). */ if (lcv == centeridx) { @@ -961,8 +966,8 @@ ReFault: (wired ? PMAP_WIRED : 0)); /* - * NOTE: page can't be PG_WANTED because - * we've held the lock the whole time + * NOTE: page can't be PG_WANTED or PG_RELEASED + * because we've held the lock the whole time * we've had the handle. */ @@ -1366,12 +1371,15 @@ Case2: /* locked(!locked): uobj, uobjpage */ /* - * Re-verify that amap slot is still free. if there is - * a problem, we unlock and clean up. + * verify that the page has not be released and re-verify + * that amap slot is still free. if there is a problem, + * we unlock and clean up. */ - if (locked && amap && amap_lookup(&ufi.entry->aref, - ufi.orig_rvaddr - ufi.entry->start)) { + if ((uobjpage->pg_flags & PG_RELEASED) != 0 || + (locked && amap && + amap_lookup(&ufi.entry->aref, + ufi.orig_rvaddr - ufi.entry->start))) { if (locked) uvmfault_unlockall(&ufi, amap, NULL, NULL); locked = FALSE; @@ -1390,6 +1398,17 @@ Case2: /* still holding object lock */ wakeup(uobjpage); + if (uobjpage->pg_flags & PG_RELEASED) { + uvmexp.fltpgrele++; + KASSERT(uobj->pgops->pgo_releasepg != NULL); + + /* frees page */ + if (uobj->pgops->pgo_releasepg(uobjpage,NULL)) + /* unlock if still alive */ + simple_unlock(&uobj->vmobjlock); + goto ReFault; + } + uvm_lock_pageq(); /* make sure it is in queues */ uvm_pageactivate(uobjpage); @@ -1404,8 +1423,9 @@ Case2: } /* - * we have the data in uobjpage which is PG_BUSY and we are - * holding object lock. + * we have the data in uobjpage which is PG_BUSY and + * !PG_RELEASED. we are holding object lock (so the page + * can't be released on us). */ /* locked: maps(read), amap(if !null), uobj, uobjpage */ @@ -1419,6 +1439,8 @@ Case2: /* * notes: * - at this point uobjpage can not be NULL + * - at this point uobjpage can not be PG_RELEASED (since we checked + * for it above) * - at this point uobjpage could be PG_WANTED (handle later) */ @@ -1605,7 +1627,9 @@ Case2: } /* - * dispose of uobjpage. drop handle to uobj as well. + * dispose of uobjpage. it can't be PG_RELEASED + * since we still hold the object lock. + * drop handle to uobj as well. */ if (uobjpage->pg_flags & PG_WANTED) @@ -1668,6 +1692,11 @@ Case2: if (pg->pg_flags & PG_WANTED) wakeup(pg); /* lock still held */ + /* + * note that pg can't be PG_RELEASED since we did not drop + * the object lock since the last time we checked. + */ + atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED); UVM_PAGE_OWN(pg, NULL); uvmfault_unlockall(&ufi, amap, uobj, NULL); @@ -1707,6 +1736,11 @@ Case2: if (pg->pg_flags & PG_WANTED) wakeup(pg); /* lock still held */ + /* + * note that pg can't be PG_RELEASED since we did not drop the object + * lock since the last time we checked. + */ + atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED); UVM_PAGE_OWN(pg, NULL); uvmfault_unlockall(&ufi, amap, uobj, NULL); diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c index 9d606516ee1..0c4244eec76 100644 --- a/sys/uvm/uvm_init.c +++ b/sys/uvm/uvm_init.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_init.c,v 1.22 2009/06/02 23:00:19 oga Exp $ */ +/* $OpenBSD: uvm_init.c,v 1.23 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_init.c,v 1.14 2000/06/27 17:29:23 mrg Exp $ */ /* @@ -148,6 +148,7 @@ uvm_init(void) * of kernel objects. */ + uvm_page_rehash(); uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNSWAP); diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c index b4f62568fd7..756ffb7c2ca 100644 --- a/sys/uvm/uvm_loan.c +++ b/sys/uvm/uvm_loan.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_loan.c,v 1.31 2009/06/01 19:54:02 oga Exp $ */ +/* $OpenBSD: uvm_loan.c,v 1.32 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_loan.c,v 1.22 2000/06/27 17:29:25 mrg Exp $ */ /* @@ -462,12 +462,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) simple_lock(&uobj->vmobjlock); /* - * Re-verify that amap slot is still free. if there is a - * problem we drop our lock (thus force a lookup refresh/retry). + * verify that the page has not be released and re-verify + * that amap slot is still free. if there is a problem we + * drop our lock (thus force a lookup refresh/retry). */ - if (locked && amap && amap_lookup(&ufi->entry->aref, - ufi->orig_rvaddr - ufi->entry->start)) { + if ((pg->pg_flags & PG_RELEASED) != 0 || + (locked && amap && amap_lookup(&ufi->entry->aref, + ufi->orig_rvaddr - ufi->entry->start))) { if (locked) uvmfault_unlockall(ufi, amap, NULL, NULL); @@ -484,6 +486,17 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) /* still holding object lock */ wakeup(pg); + if (pg->pg_flags & PG_RELEASED) { +#ifdef DIAGNOSTIC + if (uobj->pgops->pgo_releasepg == NULL) + panic("uvm_loanuobj: object has no releasepg function"); +#endif + /* frees page */ + if (uobj->pgops->pgo_releasepg(pg, NULL)) + simple_unlock(&uobj->vmobjlock); + return (0); + } + uvm_lock_pageq(); uvm_pageactivate(pg); /* make sure it is in queues */ uvm_unlock_pageq(); @@ -496,7 +509,8 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) /* * at this point we have the page we want ("pg") marked PG_BUSY for us - * and we have all data structures locked. do the loanout. + * and we have all data structures locked. do the loanout. page can + * not be PG_RELEASED (we caught this above). */ if ((flags & UVM_LOAN_TOANON) == 0) { /* loan to wired-kernel page? */ diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index dc511476be3..b804d36bd6c 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_map.c,v 1.115 2009/06/14 02:53:09 deraadt Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.116 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ /* @@ -3058,7 +3058,15 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) } KASSERT(pg->uanon == anon); - /* Deactivate the page. */ +#ifdef UBC + /* ...and deactivate the page. */ + pmap_clear_reference(pg); +#else + /* zap all mappings for the page. */ + pmap_page_protect(pg, VM_PROT_NONE); + + /* ...and deactivate the page. */ +#endif uvm_pagedeactivate(pg); uvm_unlock_pageq(); @@ -3812,8 +3820,9 @@ uvm_object_printit(uobj, full, pr) return; } (*pr)(" PAGES <pg,offset>:\n "); - RB_FOREACH(pg, uobj_pgs, &uobj->memt) { - cnt++; + for (pg = TAILQ_FIRST(&uobj->memq); + pg != NULL; + pg = TAILQ_NEXT(pg, fq.queues.listq), cnt++) { (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); if ((cnt % 3) == 2) { (*pr)("\n "); @@ -3874,7 +3883,8 @@ uvm_page_printit(pg, full, pr) uobj = pg->uobject; if (uobj) { (*pr)(" checking object list\n"); - RB_FOREACH(pg, uobj_pgs, &uobj->memt) { + TAILQ_FOREACH(tpg, &uobj->memq, + fq.queues.listq) { if (tpg == pg) { break; } diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 7075e42e515..9d13fc011af 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_mmap.c,v 1.74 2009/06/01 20:53:30 millert Exp $ */ +/* $OpenBSD: uvm_mmap.c,v 1.75 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ /* @@ -298,7 +298,8 @@ sys_mincore(struct proc *p, void *v, register_t *retval) */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); - if (entry->object.uvm_obj->pgops->pgo_fault != NULL) { + if (entry->object.uvm_obj->pgops->pgo_releasepg + == NULL) { pgi = 1; for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) @@ -577,9 +578,8 @@ sys_mmap(struct proc *p, void *v, register_t *retval) if ((flags & MAP_ANON) != 0 || ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) { - u_int64_t used = ptoa(p->p_vmspace->vm_dused); - if (p->p_rlimit[RLIMIT_DATA].rlim_cur < used || - size > p->p_rlimit[RLIMIT_DATA].rlim_cur - used) { + if (size > + (p->p_rlimit[RLIMIT_DATA].rlim_cur - ptoa(p->p_vmspace->vm_dused))) { error = ENOMEM; goto out; } diff --git a/sys/uvm/uvm_object.c b/sys/uvm/uvm_object.c deleted file mode 100644 index 9eaaf575f50..00000000000 --- a/sys/uvm/uvm_object.c +++ /dev/null @@ -1,159 +0,0 @@ -/* $OpenBSD: uvm_object.c,v 1.1 2009/06/06 03:45:08 oga Exp $ */ - -/* - * Copyright (c) 2006 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Mindaugas Rasiukevicius. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * uvm_object.c: operate with memory objects - * - */ - -#include <sys/cdefs.h> - -#include <sys/param.h> - -#include <uvm/uvm.h> - -/* We will fetch this page count per step */ -#define FETCH_PAGECOUNT 16 - -/* - * uvm_objwire: wire the pages of entire uobj - * - * => caller must pass page-aligned start and end values - * => if the caller passes in a pageq pointer, we'll return a list of - * wired pages. - */ - -int -uvm_objwire(struct uvm_object *uobj, off_t start, off_t end, - struct pglist *pageq) -{ - int i, npages, error; - struct vm_page *pgs[FETCH_PAGECOUNT]; - off_t offset = start, left; - - left = (end - start) >> PAGE_SHIFT; - - simple_lock(&uobj->vmobjlock); - while (left) { - - npages = MIN(FETCH_PAGECOUNT, left); - - /* Get the pages */ - memset(pgs, 0, sizeof(pgs)); - error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, 0, - VM_PROT_READ | VM_PROT_WRITE, UVM_ADV_SEQUENTIAL, - PGO_ALLPAGES | PGO_SYNCIO); - - if (error) - goto error; - - simple_lock(&uobj->vmobjlock); - for (i = 0; i < npages; i++) { - - KASSERT(pgs[i] != NULL); - KASSERT(!(pgs[i]->pg_flags & PG_RELEASED)); - -#if 0 - /* - * Loan break - */ - if (pgs[i]->loan_count) { - while (pgs[i]->loan_count) { - pg = uvm_loanbreak(pgs[i]); - if (!pg) { - simple_unlock(&uobj->vmobjlock); - uvm_wait("uobjwirepg"); - simple_lock(&uobj->vmobjlock); - continue; - } - } - pgs[i] = pg; - } -#endif - - if (pgs[i]->pg_flags & PQ_AOBJ) { - atomic_clearbits_int(&pgs[i]->pg_flags, - PG_CLEAN); - uao_dropswap(uobj, i); - } - } - - /* Wire the pages */ - uvm_lock_pageq(); - for (i = 0; i < npages; i++) { - uvm_pagewire(pgs[i]); - if (pageq != NULL) - TAILQ_INSERT_TAIL(pageq, pgs[i], pageq); - } - uvm_unlock_pageq(); - - /* Unbusy the pages */ - uvm_page_unbusy(pgs, npages); - - left -= npages; - offset += npages << PAGE_SHIFT; - } - simple_unlock(&uobj->vmobjlock); - - return 0; - -error: - /* Unwire the pages which have been wired */ - uvm_objunwire(uobj, start, offset); - - return error; -} - -/* - * uobj_unwirepages: unwire the pages of entire uobj - * - * => caller must pass page-aligned start and end values - */ - -void -uvm_objunwire(struct uvm_object *uobj, off_t start, off_t end) -{ - struct vm_page *pg; - off_t offset; - - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); - for (offset = start; offset < end; offset += PAGE_SIZE) { - pg = uvm_pagelookup(uobj, offset); - - KASSERT(pg != NULL); - KASSERT(!(pg->pg_flags & PG_RELEASED)); - - uvm_pageunwire(pg); - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); -} diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h index d6292ec0acb..3d5a091f462 100644 --- a/sys/uvm/uvm_object.h +++ b/sys/uvm/uvm_object.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_object.h,v 1.12 2009/06/06 03:45:08 oga Exp $ */ +/* $OpenBSD: uvm_object.h,v 1.13 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_object.h,v 1.11 2001/03/09 01:02:12 chs Exp $ */ /* @@ -47,11 +47,11 @@ */ struct uvm_object { - simple_lock_data_t vmobjlock; /* lock on memq */ - struct uvm_pagerops *pgops; /* pager ops */ - RB_HEAD(uobj_pgs, vm_page) memt; /* pages in obj */ - int uo_npages; /* # of pages in memq */ - int uo_refs; /* reference count */ + simple_lock_data_t vmobjlock; /* lock on memq */ + struct uvm_pagerops *pgops; /* pager ops */ + struct pglist memq; /* pages in this object */ + int uo_npages; /* # of pages in memq */ + int uo_refs; /* reference count */ }; /* @@ -93,11 +93,6 @@ extern struct uvm_pagerops uvm_deviceops; ((uobj)->pgops == &uvm_vnodeops && \ ((struct vnode *)uobj)->v_flag & VTEXT) -int uvm_pagecmp(struct vm_page *, struct vm_page *); -RB_PROTOTYPE(uobj_pgs, vm_page, fq.queues.tree, uvm_pagecmp); - -int uvm_objwire(struct uvm_object *, off_t, off_t, struct pglist *); -void uvm_objunwire(struct uvm_object *, off_t, off_t); #endif /* _KERNEL */ diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index 3758deb58e7..2cf45c11375 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_page.c,v 1.88 2009/06/14 03:04:08 deraadt Exp $ */ +/* $OpenBSD: uvm_page.c,v 1.89 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ /* @@ -118,6 +118,14 @@ static vaddr_t virtual_space_start; static vaddr_t virtual_space_end; /* + * we use a hash table with only one bucket during bootup. we will + * later rehash (resize) the hash table once the allocator is ready. + * we static allocate the one bootstrap bucket below... + */ + +static struct pglist uvm_bootbucket; + +/* * History */ UVMHIST_DECL(pghist); @@ -134,7 +142,7 @@ static void uvm_pageremove(struct vm_page *); */ /* - * uvm_pageinsert: insert a page in the object + * uvm_pageinsert: insert a page in the object and the hash table * * => caller must lock object * => caller must lock page queues @@ -145,17 +153,23 @@ static void uvm_pageremove(struct vm_page *); __inline static void uvm_pageinsert(struct vm_page *pg) { + struct pglist *buck; UVMHIST_FUNC("uvm_pageinsert"); UVMHIST_CALLED(pghist); KASSERT((pg->pg_flags & PG_TABLED) == 0); + mtx_enter(&uvm.hashlock); + buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; + TAILQ_INSERT_TAIL(buck, pg, fq.queues.hashq); /* put in hash */ + mtx_leave(&uvm.hashlock); - RB_INSERT(uobj_pgs, &pg->uobject->memt, pg); + TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, + fq.queues.listq); /* put in object */ atomic_setbits_int(&pg->pg_flags, PG_TABLED); pg->uobject->uo_npages++; } /* - * uvm_page_remove: remove page from object + * uvm_page_remove: remove page from object and hash * * => caller must lock object * => caller must lock page queues @@ -164,11 +178,23 @@ uvm_pageinsert(struct vm_page *pg) static __inline void uvm_pageremove(struct vm_page *pg) { + struct pglist *buck; UVMHIST_FUNC("uvm_pageremove"); UVMHIST_CALLED(pghist); KASSERT(pg->pg_flags & PG_TABLED); + mtx_enter(&uvm.hashlock); + buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; + TAILQ_REMOVE(buck, pg, fq.queues.hashq); + mtx_leave(&uvm.hashlock); + +#ifdef UBC + if (pg->uobject->pgops == &uvm_vnodeops) { + uvm_pgcnt_vnode--; + } +#endif + /* object should be locked */ - RB_REMOVE(uobj_pgs, &pg->uobject->memt, pg); + TAILQ_REMOVE(&pg->uobject->memq, pg, fq.queues.listq); atomic_clearbits_int(&pg->pg_flags, PG_TABLED|PQ_AOBJ); pg->uobject->uo_npages--; @@ -176,14 +202,6 @@ uvm_pageremove(struct vm_page *pg) pg->pg_version++; } -int -uvm_pagecmp(struct vm_page *a, struct vm_page *b) -{ - return (a->offset < b->offset ? -1 : a->offset > b->offset); -} - -RB_GENERATE(uobj_pgs, vm_page, fq.queues.tree, uvm_pagecmp); - /* * uvm_page_init: init the page system. called from uvm_init(). * @@ -216,6 +234,18 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) mtx_init(&uvm.fpageqlock, IPL_VM); uvm_pmr_init(); + /* + * init the <obj,offset> => <page> hash table. for now + * we just have one bucket (the bootstrap bucket). later on we + * will allocate new buckets as we dynamically resize the hash table. + */ + + uvm.page_nhash = 1; /* 1 bucket */ + uvm.page_hashmask = 0; /* mask for hash function */ + uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */ + TAILQ_INIT(uvm.page_hash); /* init hash table */ + mtx_init(&uvm.hashlock, IPL_VM); /* init hash table lock */ + /* * allocate vm_page structures. */ @@ -713,9 +743,97 @@ uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start, * done! */ + if (!preload) + uvm_page_rehash(); + + return; +} + +/* + * uvm_page_rehash: reallocate hash table based on number of free pages. + */ + +void +uvm_page_rehash(void) +{ + int freepages, lcv, bucketcount, oldcount; + struct pglist *newbuckets, *oldbuckets; + struct vm_page *pg; + size_t newsize, oldsize; + + /* + * compute number of pages that can go in the free pool + */ + + freepages = 0; + for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) + freepages += + (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start); + + /* + * compute number of buckets needed for this number of pages + */ + + bucketcount = 1; + while (bucketcount < freepages) + bucketcount = bucketcount * 2; + + /* + * compute the size of the current table and new table. + */ + + oldbuckets = uvm.page_hash; + oldcount = uvm.page_nhash; + oldsize = round_page(sizeof(struct pglist) * oldcount); + newsize = round_page(sizeof(struct pglist) * bucketcount); + + /* + * allocate the new buckets + */ + + newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize); + if (newbuckets == NULL) { + printf("uvm_page_physrehash: WARNING: could not grow page " + "hash table\n"); + return; + } + for (lcv = 0 ; lcv < bucketcount ; lcv++) + TAILQ_INIT(&newbuckets[lcv]); + + /* + * now replace the old buckets with the new ones and rehash everything + */ + + mtx_enter(&uvm.hashlock); + uvm.page_hash = newbuckets; + uvm.page_nhash = bucketcount; + uvm.page_hashmask = bucketcount - 1; /* power of 2 */ + + /* ... and rehash */ + for (lcv = 0 ; lcv < oldcount ; lcv++) { + while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) { + TAILQ_REMOVE(&oldbuckets[lcv], pg, fq.queues.hashq); + TAILQ_INSERT_TAIL( + &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)], + pg, fq.queues.hashq); + } + } + mtx_leave(&uvm.hashlock); + + /* + * free old bucket array if is not the boot-time table + */ + + if (oldbuckets != &uvm_bootbucket) + uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize); + + /* + * done + */ return; } + #ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */ void uvm_page_physdump(void); /* SHUT UP GCC */ @@ -741,6 +859,7 @@ uvm_page_physdump(void) case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break; default: printf("<<UNKNOWN>>!!!!\n"); } + printf("number of buckets = %d\n", uvm.page_nhash); } #endif @@ -822,9 +941,7 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, pg->offset = off; pg->uobject = obj; pg->uanon = anon; - pg->pg_flags = PG_BUSY|PG_FAKE; - if (!(flags & UVM_PGA_ZERO)) - atomic_setbits_int(&pg->pg_flags, PG_CLEAN); + pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE; if (anon) { anon->an_page = pg; atomic_setbits_int(&pg->pg_flags, PQ_ANON); @@ -885,7 +1002,7 @@ uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) /* * uvm_pagefree: free page * - * => erase page's identity (i.e. remove from object) + * => erase page's identity (i.e. remove from hash/object) * => put page on free list * => caller must lock owning object (either anon or uvm_object) * => caller must lock page queues @@ -993,8 +1110,8 @@ uvm_pagefree(struct vm_page *pg) /* * Clean page state bits. */ - atomic_clearbits_int(&pg->pg_flags, PG_ZERO|PG_FAKE|PG_BUSY| - PG_RELEASED|PG_CLEAN|PG_CLEANCHK|PQ_ENCRYPT); + atomic_clearbits_int(&pg->pg_flags, + PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK); /* * Pmap flag cleaning. * XXX: Shouldn't pmap do this? @@ -1051,14 +1168,7 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs) UVMHIST_LOG(pdhist, "releasing pg %p", pg,0,0,0); uobj = pg->uobject; if (uobj != NULL) { - uvm_lock_pageq(); - pmap_page_protect(pg, VM_PROT_NONE); - /* XXX won't happen right now */ - if (pg->pg_flags & PQ_ANON) - uao_dropswap(uobj, - pg->offset >> PAGE_SHIFT); - uvm_pagefree(pg); - uvm_unlock_pageq(); + uobj->pgops->pgo_releasepg(pg, NULL); } else { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); @@ -1283,10 +1393,19 @@ PHYS_TO_VM_PAGE(paddr_t pa) struct vm_page * uvm_pagelookup(struct uvm_object *obj, voff_t off) { - struct vm_page find; + struct vm_page *pg; + struct pglist *buck; - find.offset = off; - return (RB_FIND(uobj_pgs, &obj->memt, &find)); + mtx_enter(&uvm.hashlock); + buck = &uvm.page_hash[uvm_pagehash(obj,off)]; + + TAILQ_FOREACH(pg, buck, fq.queues.hashq) { + if (pg->uobject == obj && pg->offset == off) { + break; + } + } + mtx_leave(&uvm.hashlock); + return(pg); } /* @@ -1344,8 +1463,6 @@ uvm_pageunwire(struct vm_page *pg) void uvm_pagedeactivate(struct vm_page *pg) { - pmap_page_protect(pg, VM_PROT_NONE); - if (pg->pg_flags & PQ_ACTIVE) { TAILQ_REMOVE(&uvm.page_active, pg, pageq); atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE); diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h index e6d71c95ae9..5896286c871 100644 --- a/sys/uvm/uvm_page.h +++ b/sys/uvm/uvm_page.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_page.h,v 1.36 2009/06/14 03:04:08 deraadt Exp $ */ +/* $OpenBSD: uvm_page.h,v 1.37 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $ */ /* @@ -108,7 +108,7 @@ union vm_page_fq { struct { - RB_ENTRY(vm_page) tree; /* hash table links (O)*/ + TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/ } queues; @@ -122,6 +122,7 @@ struct vm_page { union vm_page_fq fq; /* free and queue management */ TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO * queue or free list (P) */ + struct vm_anon *uanon; /* anon (O,P) */ struct uvm_object *uobject; /* object (O,P) */ voff_t offset; /* offset into object (O,P) */ @@ -252,6 +253,7 @@ void uvm_page_own(struct vm_page *, char *); #if !defined(PMAP_STEAL_MEMORY) boolean_t uvm_page_physget(paddr_t *); #endif +void uvm_page_rehash(void); void uvm_pageidlezero(void); void uvm_pageactivate(struct vm_page *); diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h index 5b820ea65b3..8f720d4ccec 100644 --- a/sys/uvm/uvm_pager.h +++ b/sys/uvm/uvm_pager.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pager.h,v 1.24 2009/06/01 19:54:02 oga Exp $ */ +/* $OpenBSD: uvm_pager.h,v 1.25 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $ */ /* @@ -109,6 +109,8 @@ struct uvm_pagerops { struct vm_page ** (*pgo_mk_pcluster)(struct uvm_object *, struct vm_page **, int *, struct vm_page *, int, voff_t, voff_t); + /* release page */ + boolean_t (*pgo_releasepg)(struct vm_page *, struct vm_page **); }; /* pager flags [mostly for flush] */ diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index b30cf1e0a98..27cdc07ae73 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pdaemon.c,v 1.48 2009/06/15 17:01:26 beck Exp $ */ +/* $OpenBSD: uvm_pdaemon.c,v 1.49 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ /* @@ -820,20 +820,35 @@ uvmpd_scan_inactive(struct pglist *pglst) atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED); UVM_PAGE_OWN(p, NULL); - /* released during I/O? Can only happen for anons */ + /* released during I/O? */ if (p->pg_flags & PG_RELEASED) { - KASSERT(anon != NULL); - /* remove page so we can get nextpg */ - anon->an_page = NULL; + if (anon) { + /* remove page so we can get nextpg */ + anon->an_page = NULL; - simple_unlock(&anon->an_lock); - uvm_anfree(anon); /* kills anon */ - pmap_page_protect(p, VM_PROT_NONE); - anon = NULL; - uvm_lock_pageq(); - nextpg = TAILQ_NEXT(p, pageq); - /* free released page */ - uvm_pagefree(p); + simple_unlock(&anon->an_lock); + uvm_anfree(anon); /* kills anon */ + pmap_page_protect(p, VM_PROT_NONE); + anon = NULL; + uvm_lock_pageq(); + nextpg = TAILQ_NEXT(p, pageq); + /* free released page */ + uvm_pagefree(p); + + } else { + + /* + * pgo_releasepg nukes the page and + * gets "nextpg" for us. it returns + * with the page queues locked (when + * given nextpg ptr). + */ + + if (!uobj->pgops->pgo_releasepg(p, + &nextpg)) + /* uobj died after release */ + uobj = NULL; + } } else { /* page was not released during I/O */ uvm_lock_pageq(); nextpg = TAILQ_NEXT(p, pageq); @@ -1042,6 +1057,7 @@ uvmpd_scan(void) */ if (inactive_shortage > 0) { + pmap_page_protect(p, VM_PROT_NONE); /* no need to check wire_count as pg is "active" */ uvm_pagedeactivate(p); uvmexp.pddeact++; diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c index 43fd09128e2..f6a78511a70 100644 --- a/sys/uvm/uvm_swap.c +++ b/sys/uvm/uvm_swap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_swap.c,v 1.90 2009/06/04 02:56:14 oga Exp $ */ +/* $OpenBSD: uvm_swap.c,v 1.91 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */ /* @@ -358,8 +358,12 @@ uvm_swap_allocpages(struct vm_page **pps, int npages) boolean_t fail; /* Estimate if we will succeed */ + uvm_lock_fpageq(); + fail = uvmexp.free - npages < uvmexp.reserve_kernel; + uvm_unlock_fpageq(); + if (fail) return FALSE; diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index c15ebbf70a0..e85e2c24e38 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_vnode.c,v 1.62 2009/06/06 17:46:44 art Exp $ */ +/* $OpenBSD: uvm_vnode.c,v 1.63 2009/06/16 00:11:29 oga Exp $ */ /* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */ /* @@ -93,6 +93,7 @@ void uvn_init(void); int uvn_io(struct uvm_vnode *, vm_page_t *, int, int, int); int uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t); void uvn_reference(struct uvm_object *); +boolean_t uvn_releasepg(struct vm_page *, struct vm_page **); /* * master pager structure @@ -108,6 +109,7 @@ struct uvm_pagerops uvm_vnodeops = { uvn_put, uvn_cluster, uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */ + uvn_releasepg, }; /* @@ -271,7 +273,7 @@ uvn_attach(void *arg, vm_prot_t accessprot) * now set up the uvn. */ uvn->u_obj.pgops = &uvm_vnodeops; - RB_INIT(&uvn->u_obj.memt); + TAILQ_INIT(&uvn->u_obj.memq); uvn->u_obj.uo_npages = 0; uvn->u_obj.uo_refs = 1; /* just us... */ oldflags = uvn->u_flags; @@ -438,7 +440,11 @@ uvn_detach(struct uvm_object *uobj) if (uvn->u_flags & UVM_VNODE_WRITEABLE) { LIST_REMOVE(uvn, u_wlist); } - KASSERT(RB_EMPTY(&uobj->memt)); +#ifdef DIAGNOSTIC + if (!TAILQ_EMPTY(&uobj->memq)) + panic("uvn_deref: vnode VM object still has pages afer " + "syncio/free flush"); +#endif oldflags = uvn->u_flags; uvn->u_flags = 0; simple_unlock(&uobj->vmobjlock); @@ -520,8 +526,8 @@ uvm_vnp_terminate(struct vnode *vp) /* * it is possible that the uvn was detached and is in the relkill - * state [i.e. waiting for async i/o to finish]. - * we take over the vnode now and cancel the relkill. + * state [i.e. waiting for async i/o to finish so that releasepg can + * kill object]. we take over the vnode now and cancel the relkill. * we want to know when the i/o is done so we can recycle right * away. note that a uvn can only be in the RELKILL state if it * has a zero reference count. @@ -555,7 +561,7 @@ uvm_vnp_terminate(struct vnode *vp) while (uvn->u_obj.uo_npages) { #ifdef DEBUG struct vm_page *pp; - RB_FOREACH(pp, uobj_pgs, &uvn->u_obj.memt) { + TAILQ_FOREACH(pp, &uvn->u_obj.memq, fq.queues.listq) { if ((pp->pg_flags & PG_BUSY) == 0) panic("uvm_vnp_terminate: detected unbusy pg"); } @@ -615,6 +621,41 @@ uvm_vnp_terminate(struct vnode *vp) } /* + * uvn_releasepg: handled a released page in a uvn + * + * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need + * to dispose of. + * => caller must handled PG_WANTED case + * => called with page's object locked, pageq's unlocked + * => returns TRUE if page's object is still alive, FALSE if we + * killed the page's object. if we return TRUE, then we + * return with the object locked. + * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return + * with the page queues locked [for pagedaemon] + * => if (nextpgp == NULL) => we return with page queues unlocked [normal case] + * => we kill the uvn if it is not referenced and we are suppose to + * kill it ("relkill"). + */ + +boolean_t +uvn_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */) +{ + KASSERT(pg->pg_flags & PG_RELEASED); + + /* + * dispose of the page [caller handles PG_WANTED] + */ + pmap_page_protect(pg, VM_PROT_NONE); + uvm_lock_pageq(); + if (nextpgp) + *nextpgp = TAILQ_NEXT(pg, pageq); /* next page for daemon */ + uvm_pagefree(pg); + if (!nextpgp) + uvm_unlock_pageq(); + return (TRUE); +} + +/* * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go * through the buffer cache and allow I/O in any size. These VOPs use * synchronous i/o. [vs. VOP_STRATEGY which can be async, but doesn't @@ -648,6 +689,8 @@ uvm_vnp_terminate(struct vnode *vp) * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio } * - get "page" structures (atop?). * - handle "wanted" pages + * - handle "released" pages [using pgo_releasepg] + * >>> pgo_releasepg may kill the object * dont forget to look at "object" wanted flag in all cases. */ @@ -802,6 +845,7 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) if (flags & PGO_DEACTIVATE) { if ((pp->pg_flags & PQ_INACTIVE) == 0 && pp->wire_count == 0) { + pmap_page_protect(pp, VM_PROT_NONE); uvm_pagedeactivate(pp); } } else if (flags & PGO_FREE) { @@ -945,6 +989,7 @@ ReTry: if (flags & PGO_DEACTIVATE) { if ((pp->pg_flags & PQ_INACTIVE) == 0 && pp->wire_count == 0) { + pmap_page_protect(ptmp, VM_PROT_NONE); uvm_pagedeactivate(ptmp); } } else if (flags & PGO_FREE && |