diff options
author | Owain Ainsworth <oga@cvs.openbsd.org> | 2009-06-16 17:14:16 +0000 |
---|---|---|
committer | Owain Ainsworth <oga@cvs.openbsd.org> | 2009-06-16 17:14:16 +0000 |
commit | 42caf8629b478d48a8120571cf83377282e2c536 (patch) | |
tree | 434bf60ed97f9ede587aa6d59131d935df1be782 | |
parent | df471f546425532f5eb23a525aa177e7cebfc201 (diff) |
Backout all the PG_RELEASED changes.
This is for the same reason as the earlier backouts, to avoid the bug
either added or exposed sometime around c2k9. This *should* be the last
one.
prompted by deraadt@
ok ariane@
-rw-r--r-- | sys/uvm/uvm_aobj.c | 192 | ||||
-rw-r--r-- | sys/uvm/uvm_aobj.h | 4 | ||||
-rw-r--r-- | sys/uvm/uvm_km.c | 25 | ||||
-rw-r--r-- | sys/uvm/uvm_pager.c | 76 | ||||
-rw-r--r-- | sys/uvm/uvm_vnode.c | 334 |
5 files changed, 474 insertions, 157 deletions
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c index b2a68d6d249..11c0cc181bf 100644 --- a/sys/uvm/uvm_aobj.c +++ b/sys/uvm/uvm_aobj.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_aobj.c,v 1.43 2009/06/16 00:11:29 oga Exp $ */ +/* $OpenBSD: uvm_aobj.c,v 1.44 2009/06/16 17:14:14 oga Exp $ */ /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ /* @@ -139,7 +139,7 @@ struct pool uao_swhash_elt_pool; * uvm_aobj: the actual anon-backed uvm_object * * => the uvm_object is at the top of the structure, this allows - * (struct uvm_aobj *) == (struct uvm_object *) + * (struct uvm_device *) == (struct uvm_object *) * => only one of u_swslots and u_swhash is used in any given aobj */ @@ -562,7 +562,7 @@ uao_init(void) simple_lock_init(&uao_list_lock); /* - * NOTE: Pages for this pool must not come from a pageable + * NOTE: Pages fror this pool must not come from a pageable * kernel map! */ pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), @@ -638,7 +638,8 @@ void uao_detach_locked(struct uvm_object *uobj) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; - struct vm_page *pg; + struct vm_page *pg, *next; + boolean_t busybody; UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist); /* @@ -665,26 +666,35 @@ uao_detach_locked(struct uvm_object *uobj) simple_unlock(&uao_list_lock); /* - * Free all pages left in the object. If they're busy, wait - * for them to become available before we kill it. - * Release swap resources then free the page. + * free all the pages that aren't PG_BUSY, + * mark for release any that are. */ - uvm_lock_pageq(); - while((pg = TAILQ_FIRST(&uobj->memq)) != NULL) { + busybody = FALSE; + for (pg = TAILQ_FIRST(&uobj->memq); pg != NULL; pg = next) { + next = TAILQ_NEXT(pg, listq); if (pg->pg_flags & PG_BUSY) { - atomic_setbits_int(&pg->pg_flags, PG_WANTED); - uvm_unlock_pageq(); - UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, - "uao_det", 0); - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); + atomic_setbits_int(&pg->pg_flags, PG_RELEASED); + busybody = TRUE; continue; } + + /* zap the mappings, free the swap slot, free the page */ pmap_page_protect(pg, VM_PROT_NONE); uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); + uvm_lock_pageq(); uvm_pagefree(pg); + uvm_unlock_pageq(); + } + + /* + * if we found any busy pages, we're done for now. + * mark the aobj for death, releasepg will finish up for us. + */ + if (busybody) { + aobj->u_flags |= UAO_FLAG_KILLME; + simple_unlock(&aobj->u_obj.vmobjlock); + return; } - uvm_unlock_pageq(); /* * finally, free the rest. @@ -716,6 +726,35 @@ uao_detach_locked(struct uvm_object *uobj) * => we return TRUE unless we encountered some sort of I/O error * XXXJRT currently never happens, as we never directly initiate * XXXJRT I/O + * + * comment on "cleaning" object and PG_BUSY pages: + * this routine is holding the lock on the object. the only time + * that is can run into a PG_BUSY page that it does not own is if + * some other process has started I/O on the page (e.g. either + * a pagein or a pageout). if the PG_BUSY page is being paged + * in, then it can not be dirty (!PG_CLEAN) because no one has + * had a change to modify it yet. if the PG_BUSY page is being + * paged out then it means that someone else has already started + * cleaning the page for us (how nice!). in this case, if we + * have syncio specified, then after we make our pass through the + * object we need to wait for the other PG_BUSY pages to clear + * off (i.e. we need to do an iosync). also note that once a + * page is PG_BUSY is must stary in its object until it is un-busyed. + * XXXJRT We never actually do this, as we are "flushing" anonymous + * XXXJRT memory, which doesn't have persistent backing store. + * + * note on page traversal: + * we can traverse the pages in an object either by going down the + * linked list in "uobj->memq", or we can go over the address range + * by page doing hash table lookups for each address. depending + * on how many pages are in the object it may be cheaper to do one + * or the other. we set "by_list" to true if we are using memq. + * if the cost of a hash lookup was equal to the cost of the list + * traversal we could compare the number of pages in the start->stop + * range to the total number of pages in the object. however, it + * seems that a hash table lookup is more expensive than the linked + * list traversal, so we multiply the number of pages in the + * start->stop range by a penalty which we define below. */ #define UAO_HASH_PENALTY 4 /* XXX: a guess */ @@ -724,13 +763,19 @@ boolean_t uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) { struct uvm_aobj *aobj = (struct uvm_aobj *) uobj; - struct vm_page *pp; + struct vm_page *pp, *ppnext; + boolean_t retval, by_list; voff_t curoff; UVMHIST_FUNC("uao_flush"); UVMHIST_CALLED(maphist); + curoff = 0; /* XXX: shut up gcc */ + + retval = TRUE; /* default to success */ + if (flags & PGO_ALLPAGES) { start = 0; stop = aobj->u_pages << PAGE_SHIFT; + by_list = TRUE; /* always go by the list */ } else { start = trunc_page(start); stop = round_page(stop); @@ -739,10 +784,13 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) "flush (fixed)\n"); stop = aobj->u_pages << PAGE_SHIFT; } + by_list = (uobj->uo_npages <= + ((stop - start) >> PAGE_SHIFT) * UAO_HASH_PENALTY); } - UVMHIST_LOG(maphist, " flush start=0x%lx, stop=0x%lx, flags=0x%lx", - (u_long)start, (u_long)stop, flags, 0); + UVMHIST_LOG(maphist, + " flush start=0x%lx, stop=0x%lx, by_list=%ld, flags=0x%lx", + (u_long)start, (u_long)stop, by_list, flags); /* * Don't need to do any work here if we're not freeing @@ -751,31 +799,44 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { UVMHIST_LOG(maphist, "<- done (no work to do)",0,0,0,0); - return (TRUE); + return (retval); } - /* locked: uobj */ - curoff = start; - for (;;) { - if (curoff < stop) { - pp = uvm_pagelookup(uobj, curoff); - curoff += PAGE_SIZE; - if (pp == NULL) + /* + * now do it. note: we must update ppnext in the body of loop or we + * will get stuck. we need to use ppnext because we may free "pp" + * before doing the next loop. + */ + + if (by_list) { + pp = TAILQ_FIRST(&uobj->memq); + } else { + curoff = start; + pp = uvm_pagelookup(uobj, curoff); + } + + ppnext = NULL; /* XXX: shut up gcc */ + uvm_lock_pageq(); /* page queues locked */ + + /* locked: both page queues and uobj */ + for ( ; (by_list && pp != NULL) || + (!by_list && curoff < stop) ; pp = ppnext) { + if (by_list) { + ppnext = TAILQ_NEXT(pp, listq); + + /* range check */ + if (pp->offset < start || pp->offset >= stop) continue; } else { - break; - } + curoff += PAGE_SIZE; + if (curoff < stop) + ppnext = uvm_pagelookup(uobj, curoff); - /* Make sure page is unbusy, else wait for it. */ - if (pp->pg_flags & PG_BUSY) { - atomic_setbits_int(&pp->pg_flags, PG_WANTED); - UVM_UNLOCK_AND_WAIT(pp, &uobj->vmobjlock, 0, - "uaoflsh", 0); - simple_lock(&uobj->vmobjlock); - curoff -= PAGE_SIZE; - continue; + /* null check */ + if (pp == NULL) + continue; } - + switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { /* * XXX In these first 3 cases, we always just @@ -784,9 +845,7 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) * XXX in the future. */ case PGO_CLEANIT|PGO_FREE: - /* FALLTHROUGH */ case PGO_CLEANIT|PGO_DEACTIVATE: - /* FALLTHROUGH */ case PGO_DEACTIVATE: deactivate_it: /* skip the page if it's loaned or wired */ @@ -794,13 +853,16 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) pp->wire_count != 0) continue; - uvm_lock_pageq(); +#ifdef UBC + /* ...and deactivate the page. */ + pmap_clear_reference(pp); +#else /* zap all mappings for the page. */ pmap_page_protect(pp, VM_PROT_NONE); /* ...and deactivate the page. */ +#endif uvm_pagedeactivate(pp); - uvm_unlock_pageq(); continue; @@ -817,13 +879,19 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) pp->wire_count != 0) continue; + /* + * mark the page as released if its busy. + */ + if (pp->pg_flags & PG_BUSY) { + atomic_setbits_int(&pp->pg_flags, PG_RELEASED); + continue; + } + /* zap all mappings for the page. */ pmap_page_protect(pp, VM_PROT_NONE); uao_dropswap(uobj, pp->offset >> PAGE_SHIFT); - uvm_lock_pageq(); uvm_pagefree(pp); - uvm_unlock_pageq(); continue; @@ -832,9 +900,11 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) } } + uvm_unlock_pageq(); + UVMHIST_LOG(maphist, "<- done, rv=%ld",retval,0,0,0); - return (TRUE); + return (retval); } /* @@ -916,10 +986,10 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } /* - * to be useful must get a non-busy page + * to be useful must get a non-busy, non-released page */ if (ptmp == NULL || - (ptmp->pg_flags & PG_BUSY) != 0) { + (ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) { if (lcv == centeridx || (flags & PGO_ALLPAGES) != 0) /* need to do a wait or I/O! */ @@ -1026,7 +1096,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } /* page is there, see if we need to wait on it */ - if ((ptmp->pg_flags & PG_BUSY) != 0) { + if ((ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) { atomic_setbits_int(&ptmp->pg_flags, PG_WANTED); UVMHIST_LOG(pdhist, "sleeping, ptmp->flags 0x%lx\n", @@ -1065,7 +1135,8 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, /* * just zero the page if there's nothing in swap. */ - if (swslot == 0) { + if (swslot == 0) + { /* * page hasn't existed before, just zero it. */ @@ -1176,7 +1247,27 @@ uao_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */) if (!nextpgp) uvm_unlock_pageq(); /* keep locked for daemon */ - return TRUE; + /* + * if we're not killing the object, we're done. + */ + if ((aobj->u_flags & UAO_FLAG_KILLME) == 0) + return TRUE; + KASSERT(aobj->u_obj.uo_refs == 0); + + /* + * if there are still pages in the object, we're done for now. + */ + if (aobj->u_obj.uo_npages != 0) + return TRUE; + + KASSERT(TAILQ_EMPTY(&aobj->u_obj.memq)); + + /* + * finally, free the rest. + */ + uao_free(aobj); + + return FALSE; } @@ -1382,6 +1473,7 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx) return FALSE; } + KASSERT((pg->pg_flags & PG_RELEASED) == 0); /* * ok, we've got the page now. diff --git a/sys/uvm/uvm_aobj.h b/sys/uvm/uvm_aobj.h index b30e9026b07..b97281011dd 100644 --- a/sys/uvm/uvm_aobj.h +++ b/sys/uvm/uvm_aobj.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_aobj.h,v 1.9 2009/05/05 05:12:17 oga Exp $ */ +/* $OpenBSD: uvm_aobj.h,v 1.10 2009/06/16 17:14:15 oga Exp $ */ /* $NetBSD: uvm_aobj.h,v 1.10 2000/01/11 06:57:49 chs Exp $ */ /* @@ -55,6 +55,8 @@ #define UAO_FLAG_KERNSWAP 0x2 /* enable kernel swap */ /* internal flags */ +#define UAO_FLAG_KILLME 0x4 /* aobj should die when last released + * page is no longer PG_BUSY ... */ #define UAO_FLAG_NOSWAP 0x8 /* aobj can't swap (kernel obj only!) */ #ifdef _KERNEL diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c index 895a9593173..962b41bfac1 100644 --- a/sys/uvm/uvm_km.c +++ b/sys/uvm/uvm_km.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_km.c,v 1.71 2009/05/05 05:27:53 oga Exp $ */ +/* $OpenBSD: uvm_km.c,v 1.72 2009/06/16 17:14:15 oga Exp $ */ /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ /* @@ -276,12 +276,8 @@ uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) pp->pg_flags & PG_BUSY, 0, 0); if (pp->pg_flags & PG_BUSY) { - atomic_setbits_int(&pp->pg_flags, PG_WANTED); - UVM_UNLOCK_AND_WAIT(pp, &uobj->vmobjlock, 0, - "km_pgrm", 0); - simple_lock(&uobj->vmobjlock); - curoff -= PAGE_SIZE; /* loop back to us */ - continue; + /* owner must check for this when done */ + atomic_setbits_int(&pp->pg_flags, PG_RELEASED); } else { /* free the swap slot... */ uao_dropswap(uobj, curoff >> PAGE_SHIFT); @@ -515,6 +511,21 @@ uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) loopva = kva; while (size) { simple_lock(&uvm.kernel_object->vmobjlock); + pg = uvm_pagelookup(uvm.kernel_object, offset); + + /* + * if we found a page in an unallocated region, it must be + * released + */ + if (pg) { + if ((pg->pg_flags & PG_RELEASED) == 0) + panic("uvm_km_alloc1: non-released page"); + atomic_setbits_int(&pg->pg_flags, PG_WANTED); + UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock, + FALSE, "km_alloc", 0); + continue; /* retry */ + } + /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c index 292cc90e218..4aad8ee3738 100644 --- a/sys/uvm/uvm_pager.c +++ b/sys/uvm/uvm_pager.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pager.c,v 1.51 2009/05/23 14:06:37 oga Exp $ */ +/* $OpenBSD: uvm_pager.c,v 1.52 2009/06/16 17:14:15 oga Exp $ */ /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ /* @@ -339,8 +339,6 @@ uvm_pagermapout(vaddr_t kva, int npages) * PGO_ALLPAGES: all pages in object are valid targets * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster * PGO_DOACTCLUST: include active pages in cluster. - * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed - * in async io (caller must clean on error). * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. * PG_CLEANCHK is only a hint, but clearing will help reduce * the number of calls we make to the pmap layer. @@ -442,14 +440,6 @@ uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, atomic_setbits_int(&pclust->pg_flags, PG_BUSY); UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); - /* - * If we want to free after io is done, and we're - * async, set the released flag - */ - if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE) - atomic_setbits_int(&pclust->pg_flags, - PG_RELEASED); - /* XXX: protect wired page? see above comment. */ pmap_page_protect(pclust, VM_PROT_READ); if (!forward) { @@ -491,7 +481,6 @@ uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets * PGO_SYNCIO: do SYNC I/O (no async) * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O - * PGO_FREE: tell the aio daemon to free pages in the async case. * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range * if (!uobj) start is the (daddr64_t) of the starting swapblk * => return state: @@ -715,6 +704,8 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, struct vm_page **ppsp, int *npages, int flags) { int lcv; + boolean_t obj_is_alive; + struct uvm_object *saved_uobj; /* * drop all pages but "pg" @@ -756,8 +747,9 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, } /* if page was released, release it. otherwise un-busy it */ - if (ppsp[lcv]->pg_flags & PG_RELEASED && - ppsp[lcv]->pg_flags & PQ_ANON) { + if (ppsp[lcv]->pg_flags & PG_RELEASED) { + + if (ppsp[lcv]->pg_flags & PQ_ANON) { /* so that anfree will free */ atomic_clearbits_int(&ppsp[lcv]->pg_flags, PG_BUSY); @@ -769,13 +761,34 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, uvm_anfree(ppsp[lcv]->uanon); continue; - } else { + } + /* - * if we were planning on async io then we would - * have PG_RELEASED set, clear that with the others. + * pgo_releasepg will dump the page for us */ + + saved_uobj = ppsp[lcv]->uobject; + obj_is_alive = + saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL); + + /* for normal objects, "pg" is still PG_BUSY by us, + * so obj can't die */ + KASSERT(!uobj || obj_is_alive); + + /* only unlock the object if it is still alive... */ + if (obj_is_alive && saved_uobj != uobj) + simple_unlock(&saved_uobj->vmobjlock); + + /* + * XXXCDC: suppose uobj died in the pgo_releasepg? + * how pass that + * info up to caller. we are currently ignoring it... + */ + + continue; /* next page */ + } else { atomic_clearbits_int(&ppsp[lcv]->pg_flags, - PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED); + PG_BUSY|PG_WANTED|PG_FAKE); UVM_PAGE_OWN(ppsp[lcv], NULL); } @@ -799,6 +812,33 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, } } +#ifdef UBC +/* + * interrupt-context iodone handler for nested i/o bufs. + * + * => must be at splbio(). + */ + +void +uvm_aio_biodone1(struct buf *bp) +{ + struct buf *mbp = bp->b_private; + + splassert(IPL_BIO); + + KASSERT(mbp != bp); + if (bp->b_flags & B_ERROR) { + mbp->b_flags |= B_ERROR; + mbp->b_error = bp->b_error; + } + mbp->b_resid -= bp->b_bcount; + pool_put(&bufpool, bp); + if (mbp->b_resid == 0) { + biodone(mbp); + } +} +#endif + /* * interrupt-context iodone handler for single-buf i/os * or the top-level buf of a nested-buf i/o. diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index 998f0fa0a62..439cfe03712 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_vnode.c,v 1.64 2009/06/16 16:42:41 ariane Exp $ */ +/* $OpenBSD: uvm_vnode.c,v 1.65 2009/06/16 17:14:15 oga Exp $ */ /* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */ /* @@ -415,23 +415,30 @@ uvn_detach(struct uvm_object *uobj) /* * given the structure of this pager, the above flush request will * create the following state: all the pages that were in the object - * have either been free'd or they are marked PG_BUSY and in the - * middle of an async io. If we still have pages we set the "relkill" - * state, so that in the case the vnode gets terminated we know - * to leave it alone. Otherwise we'll kill the vnode when it's empty. + * have either been free'd or they are marked PG_BUSY|PG_RELEASED. + * the PG_BUSY bit was set either by us or the daemon for async I/O. + * in either case, if we have pages left we can't kill the object + * yet because i/o is pending. in this case we set the "relkill" + * flag which will cause pgo_releasepg to kill the object once all + * the I/O's are done [pgo_releasepg will be called from the aiodone + * routine or from the page daemon]. */ - uvn->u_flags |= UVM_VNODE_RELKILL; - /* wait on any outstanding io */ - while (uobj->uo_npages && uvn->u_flags & UVM_VNODE_RELKILL) { - uvn->u_flags |= UVM_VNODE_IOSYNC; - UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE, - "uvn_term",0); - simple_lock(&uvn->u_obj.vmobjlock); - } - - if ((uvn->u_flags & UVM_VNODE_RELKILL) == 0) + if (uobj->uo_npages) { /* I/O pending. iodone will free */ +#ifdef DEBUG + /* + * XXXCDC: very unlikely to happen until we have async i/o + * so print a little info message in case it does. + */ + printf("uvn_detach: vn %p has pages left after flush - " + "relkill mode\n", uobj); +#endif + uvn->u_flags |= UVM_VNODE_RELKILL; + simple_unlock(&uobj->vmobjlock); + UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0, + 0, 0); return; + } /* * kill object now. note that we can't be on the sync q because @@ -483,6 +490,8 @@ uvn_detach(struct uvm_object *uobj) * => the caller must XLOCK and VOP_LOCK the vnode before calling us * [protects us from getting a vnode that is already in the DYING * state...] + * => unlike uvn_detach, this function must not return until all the + * uvn's pages are disposed of. * => in case [2] the uvn is still alive after this call, but all I/O * ops will fail (due to the backing vnode now being "dead"). this * will prob. kill any process using the uvn due to pgo_get failing. @@ -640,7 +649,12 @@ uvm_vnp_terminate(struct vnode *vp) boolean_t uvn_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */) { - KASSERT(pg->pg_flags & PG_RELEASED); + struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject; + struct vnode *vp = (struct vnode *)uvn; +#ifdef DIAGNOSTIC + if ((pg->pg_flags & PG_RELEASED) == 0) + panic("uvn_releasepg: page not released!"); +#endif /* * dispose of the page [caller handles PG_WANTED] @@ -652,6 +666,32 @@ uvn_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */) uvm_pagefree(pg); if (!nextpgp) uvm_unlock_pageq(); + + /* + * now see if we need to kill the object + */ + if (uvn->u_flags & UVM_VNODE_RELKILL) { + if (uvn->u_obj.uo_refs) + panic("uvn_releasepg: kill flag set on referenced " + "object!"); + if (uvn->u_obj.uo_npages == 0) { + if (uvn->u_flags & UVM_VNODE_WRITEABLE) { + LIST_REMOVE(uvn, u_wlist); + } +#ifdef DIAGNOSTIC + if (!TAILQ_EMPTY(&uvn->u_obj.memq)) + panic("uvn_releasepg: pages in object with npages == 0"); +#endif + if (uvn->u_flags & UVM_VNODE_WANTED) + /* still holding object lock */ + wakeup(uvn); + + uvn->u_flags = 0; /* DEAD! */ + simple_unlock(&uvn->u_obj.vmobjlock); + vrele(vp); + return (FALSE); + } + } return (TRUE); } @@ -752,13 +792,15 @@ boolean_t uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) { struct uvm_vnode *uvn = (struct uvm_vnode *) uobj; - struct vm_page *pp, *ptmp; + struct vm_page *pp, *ppnext, *ptmp; struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp; int npages, result, lcv; - boolean_t retval, need_iosync, needs_clean; + boolean_t retval, need_iosync, by_list, needs_clean, all; voff_t curoff; + u_short pp_version; UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist); + curoff = 0; /* XXX: shut up gcc */ /* * get init vals and determine how we are going to traverse object */ @@ -766,16 +808,24 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) need_iosync = FALSE; retval = TRUE; /* return value */ if (flags & PGO_ALLPAGES) { - start = 0; - stop = round_page(uvn->u_size); + all = TRUE; + by_list = TRUE; /* always go by the list */ } else { start = trunc_page(start); - stop = MIN(round_page(stop), round_page(uvn->u_size)); + stop = round_page(stop); +#ifdef DEBUG + if (stop > round_page(uvn->u_size)) + printf("uvn_flush: strange, got an out of range " + "flush (fixed)\n"); +#endif + all = FALSE; + by_list = (uobj->uo_npages <= + ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY); } UVMHIST_LOG(maphist, - " flush start=0x%lx, stop=0x%lx, flags=0x%lx", - (u_long)start, (u_long)stop, flags, 0); + " flush start=0x%lx, stop=0x%lx, by_list=%ld, flags=0x%lx", + (u_long)start, (u_long)stop, by_list, flags); /* * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as @@ -788,21 +838,75 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) * [borrowed PG_CLEANCHK idea from FreeBSD VM] */ - if ((flags & PGO_CLEANIT) != 0) { - KASSERT(uobj->pgops->pgo_mk_pcluster != 0); - for (curoff = start ; curoff < stop; curoff += PAGE_SIZE) { - if ((pp = uvm_pagelookup(uobj, curoff)) != NULL) + if ((flags & PGO_CLEANIT) != 0 && + uobj->pgops->pgo_mk_pcluster != NULL) { + if (by_list) { + TAILQ_FOREACH(pp, &uobj->memq, listq) { + if (!all && + (pp->offset < start || pp->offset >= stop)) + continue; atomic_clearbits_int(&pp->pg_flags, PG_CLEANCHK); + } + + } else { /* by hash */ + for (curoff = start ; curoff < stop; + curoff += PAGE_SIZE) { + pp = uvm_pagelookup(uobj, curoff); + if (pp) + atomic_clearbits_int(&pp->pg_flags, + PG_CLEANCHK); + } } } + /* + * now do it. note: we must update ppnext in body of loop or we + * will get stuck. we need to use ppnext because we may free "pp" + * before doing the next loop. + */ + + if (by_list) { + pp = TAILQ_FIRST(&uobj->memq); + } else { + curoff = start; + pp = uvm_pagelookup(uobj, curoff); + } + + ppnext = NULL; /* XXX: shut up gcc */ ppsp = NULL; /* XXX: shut up gcc */ uvm_lock_pageq(); /* page queues locked */ + /* locked: both page queues and uobj */ - for (curoff = start; curoff < stop; curoff += PAGE_SIZE) { - if ((pp = uvm_pagelookup(uobj, curoff)) == NULL) - continue; + for ( ; (by_list && pp != NULL) || + (!by_list && curoff < stop) ; pp = ppnext) { + + if (by_list) { + + /* + * range check + */ + + if (!all && + (pp->offset < start || pp->offset >= stop)) { + ppnext = TAILQ_NEXT(pp, listq); + continue; + } + + } else { + + /* + * null check + */ + + curoff += PAGE_SIZE; + if (pp == NULL) { + if (curoff < stop) + ppnext = uvm_pagelookup(uobj, curoff); + continue; + } + + } /* * handle case where we do not need to clean page (either @@ -839,32 +943,37 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) } /* - * if we don't need a clean... deactivate/free pages then cont. + * if we don't need a clean... load ppnext and dispose of pp */ if (!needs_clean) { + /* load ppnext */ + if (by_list) + ppnext = TAILQ_NEXT(pp, listq); + else { + if (curoff < stop) + ppnext = uvm_pagelookup(uobj, curoff); + } + + /* now dispose of pp */ if (flags & PGO_DEACTIVATE) { if ((pp->pg_flags & PQ_INACTIVE) == 0 && pp->wire_count == 0) { pmap_page_protect(pp, VM_PROT_NONE); uvm_pagedeactivate(pp); } + } else if (flags & PGO_FREE) { if (pp->pg_flags & PG_BUSY) { + /* release busy pages */ atomic_setbits_int(&pp->pg_flags, - PG_WANTED); - uvm_unlock_pageq(); - UVM_UNLOCK_AND_WAIT(pp, - &uobj->vmobjlock, 0, "uvn_flsh", 0); - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); - curoff -= PAGE_SIZE; - continue; + PG_RELEASED); } else { pmap_page_protect(pp, VM_PROT_NONE); /* removed page from object */ uvm_pagefree(pp); } } + /* ppnext is valid so we can continue... */ continue; } @@ -880,9 +989,7 @@ uvn_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) atomic_setbits_int(&pp->pg_flags, PG_BUSY); UVM_PAGE_OWN(pp, "uvn_flush"); pmap_page_protect(pp, VM_PROT_READ); - /* if we're async, free the page in aiodoned */ - if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE) - atomic_setbits_int(&pp->pg_flags, PG_RELEASED); + pp_version = pp->pg_version; ReTry: ppsp = pps; npages = sizeof(pps) / sizeof(struct vm_page *); @@ -893,11 +1000,11 @@ ReTry: /* unlocked: page queues, uobj */ /* - * if we did an async I/O it is remotely possible for the - * async i/o to complete and the page "pp" be freed or what - * not before we get a chance to relock the object. Therefore, - * we only touch it when it won't be freed, RELEASED took care - * of the rest. + * at this point nothing is locked. if we did an async I/O + * it is remotely possible for the async i/o to complete and + * the page "pp" be freed or what not before we get a chance + * to relock the object. in order to detect this, we have + * saved the version number of the page in "pp_version". */ /* relock! */ @@ -906,7 +1013,7 @@ ReTry: /* * VM_PAGER_AGAIN: given the structure of this pager, this - * can only happen when we are doing async I/O and can't + * can only happen when we are doing async I/O and can't * map the pages into kernel memory (pager_map) due to lack * of vm space. if this happens we drop back to sync I/O. */ @@ -924,10 +1031,6 @@ ReTry: panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)"); #endif flags |= PGO_SYNCIO; - if (flags & PGO_FREE) - atomic_clearbits_int(&pp->pg_flags, - PG_RELEASED); - goto ReTry; } @@ -939,20 +1042,66 @@ ReTry: */ /* - * for pending async i/o if we are not deactivating - * we can move on to the next page. aiodoned deals with - * the freeing case for us. + * for pending async i/o if we are not deactivating/freeing + * we can move on to the next page. */ - if (result == VM_PAGER_PEND && (flags & PGO_DEACTIVATE) == 0) - continue; + + if (result == VM_PAGER_PEND) { + + if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { + /* + * no per-page ops: refresh ppnext and continue + */ + if (by_list) { + if (pp->pg_version == pp_version) + ppnext = TAILQ_NEXT(pp, listq); + else + /* reset */ + ppnext = TAILQ_FIRST(&uobj->memq); + } else { + if (curoff < stop) + ppnext = uvm_pagelookup(uobj, + curoff); + } + continue; + } + + /* need to do anything here? */ + } /* - * need to look at each page of the I/O operation, and do what - * we gotta do. + * need to look at each page of the I/O operation. we defer + * processing "pp" until the last trip through this "for" loop + * so that we can load "ppnext" for the main loop after we + * play with the cluster pages [thus the "npages + 1" in the + * loop below]. */ - for (lcv = 0 ; lcv < npages; lcv++) { - ptmp = ppsp[lcv]; + for (lcv = 0 ; lcv < npages + 1 ; lcv++) { + + /* + * handle ppnext for outside loop, and saving pp + * until the end. + */ + if (lcv < npages) { + if (ppsp[lcv] == pp) + continue; /* skip pp until the end */ + ptmp = ppsp[lcv]; + } else { + ptmp = pp; + + /* set up next page for outer loop */ + if (by_list) { + if (pp->pg_version == pp_version) + ppnext = TAILQ_NEXT(pp, listq); + else + /* reset */ + ppnext = TAILQ_FIRST(&uobj->memq); + } else { + if (curoff < stop) + ppnext = uvm_pagelookup(uobj, curoff); + } + } /* * verify the page didn't get moved while obj was @@ -976,10 +1125,25 @@ ReTry: atomic_clearbits_int(&ptmp->pg_flags, PG_WANTED|PG_BUSY); UVM_PAGE_OWN(ptmp, NULL); - atomic_setbits_int(&ptmp->pg_flags, - PG_CLEAN|PG_CLEANCHK); - if ((flags & PGO_FREE) == 0) - pmap_clear_modify(ptmp); + if (ptmp->pg_flags & PG_RELEASED) { + + /* + * pgo_releasepg needs to grab the + * pageq lock itself. + */ + uvm_unlock_pageq(); + if (!uvn_releasepg(ptmp, NULL)) + return (TRUE); + + uvm_lock_pageq(); /* relock */ + continue; /* next page */ + + } else { + atomic_setbits_int(&ptmp->pg_flags, + PG_CLEAN|PG_CLEANCHK); + if ((flags & PGO_FREE) == 0) + pmap_clear_modify(ptmp); + } } /* @@ -992,21 +1156,29 @@ ReTry: pmap_page_protect(ptmp, VM_PROT_NONE); uvm_pagedeactivate(ptmp); } - } else if (flags & PGO_FREE && - result != VM_PAGER_PEND) { - if (result != VM_PAGER_OK) { - printf("uvn_flush: obj=%p, " - "offset=0x%llx. error " - "during pageout.\n", - pp->uobject, - (long long)pp->offset); - printf("uvn_flush: WARNING: " - "changes to page may be " - "lost!\n"); - retval = FALSE; + + } else if (flags & PGO_FREE) { + if (result == VM_PAGER_PEND) { + if ((ptmp->pg_flags & PG_BUSY) != 0) + /* signal for i/o done */ + atomic_setbits_int( + &ptmp->pg_flags, + PG_RELEASED); + } else { + if (result != VM_PAGER_OK) { + printf("uvn_flush: obj=%p, " + "offset=0x%llx. error " + "during pageout.\n", + pp->uobject, + (long long)pp->offset); + printf("uvn_flush: WARNING: " + "changes to page may be " + "lost!\n"); + retval = FALSE; + } + pmap_page_protect(ptmp, VM_PROT_NONE); + uvm_pagefree(ptmp); } - pmap_page_protect(ptmp, VM_PROT_NONE); - uvm_pagefree(ptmp); } } /* end of "lcv" for loop */ @@ -1149,7 +1321,7 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, /* to be useful must get a non-busy, non-released pg */ if (ptmp == NULL || - (ptmp->pg_flags & PG_BUSY) != 0) { + (ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) { if (lcv == centeridx || (flags & PGO_ALLPAGES) != 0) done = FALSE; /* need to do a wait or I/O! */ @@ -1255,7 +1427,7 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } /* page is there, see if we need to wait on it */ - if ((ptmp->pg_flags & PG_BUSY) != 0) { + if ((ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) { atomic_setbits_int(&ptmp->pg_flags, PG_WANTED); UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock, FALSE, "uvn_get",0); |