summaryrefslogtreecommitdiff
path: root/sys/uvm/uvm_aobj.c
diff options
context:
space:
mode:
authorOwain Ainsworth <oga@cvs.openbsd.org>2009-05-05 05:12:18 +0000
committerOwain Ainsworth <oga@cvs.openbsd.org>2009-05-05 05:12:18 +0000
commit2ca0420015a74e63df7b050b2128a444db29fdba (patch)
treea372e6a26ed8034c4fa9f1415dd057073476d657 /sys/uvm/uvm_aobj.c
parent71fba0a7a9467d4c3ad3de9a82987215f224e965 (diff)
The first step in cleaning up the use of PG_RELEASED for uvm objects.
Now, the PG_ RELEASED flag currently has two (maybe three) uses. The valid one is for use with async io where we want to free the page after we've paged it out. The other ones are "oh i'd like to free this, but someone else is playing with it". It's simpler to just sleep on the damned page instead and stop the fiddling. First step does uao's: in uao_detach, sleep on the object and free it when we're clean, instead of setting a flag so it's freed after. In uao_flush, do the same. Change the interation over the object in flush so that we don't have to add marker pages or other such voodoo to the list when we sleep (netbsd did that when they had a similar diff), just use the hash always. We can now change uao_releasepg() to just free the page, and not bother with the KILLME stuff. When the other objects are fixed this hook will vanish. Much discussion with art@ over the idea, and ariane@ over this specific diff. As mentioned, this one is based loosely on a similar idea in netbsd. Been in my tree for a while, survived many make builds, etc, and forcing paging using ariane's evil program. ok ariane@, beck@.
Diffstat (limited to 'sys/uvm/uvm_aobj.c')
-rw-r--r--sys/uvm/uvm_aobj.c192
1 files changed, 50 insertions, 142 deletions
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index fb131b0036c..77891de411c 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_aobj.c,v 1.37 2009/05/02 12:54:42 oga Exp $ */
+/* $OpenBSD: uvm_aobj.c,v 1.38 2009/05/05 05:12:17 oga Exp $ */
/* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -139,7 +139,7 @@ struct pool uao_swhash_elt_pool;
* uvm_aobj: the actual anon-backed uvm_object
*
* => the uvm_object is at the top of the structure, this allows
- * (struct uvm_device *) == (struct uvm_object *)
+ * (struct uvm_aobj *) == (struct uvm_object *)
* => only one of u_swslots and u_swhash is used in any given aobj
*/
@@ -565,7 +565,7 @@ uao_init(void)
simple_lock_init(&uao_list_lock);
/*
- * NOTE: Pages fror this pool must not come from a pageable
+ * NOTE: Pages for this pool must not come from a pageable
* kernel map!
*/
pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
@@ -641,8 +641,7 @@ void
uao_detach_locked(struct uvm_object *uobj)
{
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
- struct vm_page *pg, *next;
- boolean_t busybody;
+ struct vm_page *pg;
UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
/*
@@ -669,35 +668,26 @@ uao_detach_locked(struct uvm_object *uobj)
simple_unlock(&uao_list_lock);
/*
- * free all the pages that aren't PG_BUSY,
- * mark for release any that are.
+ * Free all pages left in the object. If they're busy, wait
+ * for them to become available before we kill it.
+ * Release swap resources then free the page.
*/
- busybody = FALSE;
- for (pg = TAILQ_FIRST(&uobj->memq); pg != NULL; pg = next) {
- next = TAILQ_NEXT(pg, listq);
+ uvm_lock_pageq();
+ while((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
if (pg->pg_flags & PG_BUSY) {
- atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
- busybody = TRUE;
+ atomic_setbits_int(&pg->pg_flags, PG_WANTED);
+ uvm_unlock_pageq();
+ UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
+ "uao_det", 0);
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
continue;
}
-
- /* zap the mappings, free the swap slot, free the page */
pmap_page_protect(pg, VM_PROT_NONE);
uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
- uvm_lock_pageq();
uvm_pagefree(pg);
- uvm_unlock_pageq();
- }
-
- /*
- * if we found any busy pages, we're done for now.
- * mark the aobj for death, releasepg will finish up for us.
- */
- if (busybody) {
- aobj->u_flags |= UAO_FLAG_KILLME;
- simple_unlock(&aobj->u_obj.vmobjlock);
- return;
}
+ uvm_unlock_pageq();
/*
* finally, free the rest.
@@ -729,35 +719,6 @@ uao_detach_locked(struct uvm_object *uobj)
* => we return TRUE unless we encountered some sort of I/O error
* XXXJRT currently never happens, as we never directly initiate
* XXXJRT I/O
- *
- * comment on "cleaning" object and PG_BUSY pages:
- * this routine is holding the lock on the object. the only time
- * that is can run into a PG_BUSY page that it does not own is if
- * some other process has started I/O on the page (e.g. either
- * a pagein or a pageout). if the PG_BUSY page is being paged
- * in, then it can not be dirty (!PG_CLEAN) because no one has
- * had a change to modify it yet. if the PG_BUSY page is being
- * paged out then it means that someone else has already started
- * cleaning the page for us (how nice!). in this case, if we
- * have syncio specified, then after we make our pass through the
- * object we need to wait for the other PG_BUSY pages to clear
- * off (i.e. we need to do an iosync). also note that once a
- * page is PG_BUSY is must stary in its object until it is un-busyed.
- * XXXJRT We never actually do this, as we are "flushing" anonymous
- * XXXJRT memory, which doesn't have persistent backing store.
- *
- * note on page traversal:
- * we can traverse the pages in an object either by going down the
- * linked list in "uobj->memq", or we can go over the address range
- * by page doing hash table lookups for each address. depending
- * on how many pages are in the object it may be cheaper to do one
- * or the other. we set "by_list" to true if we are using memq.
- * if the cost of a hash lookup was equal to the cost of the list
- * traversal we could compare the number of pages in the start->stop
- * range to the total number of pages in the object. however, it
- * seems that a hash table lookup is more expensive than the linked
- * list traversal, so we multiply the number of pages in the
- * start->stop range by a penalty which we define below.
*/
#define UAO_HASH_PENALTY 4 /* XXX: a guess */
@@ -766,19 +727,13 @@ boolean_t
uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
{
struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
- struct vm_page *pp, *ppnext;
- boolean_t retval, by_list;
+ struct vm_page *pp;
voff_t curoff;
UVMHIST_FUNC("uao_flush"); UVMHIST_CALLED(maphist);
- curoff = 0; /* XXX: shut up gcc */
-
- retval = TRUE; /* default to success */
-
if (flags & PGO_ALLPAGES) {
start = 0;
stop = aobj->u_pages << PAGE_SHIFT;
- by_list = TRUE; /* always go by the list */
} else {
start = trunc_page(start);
stop = round_page(stop);
@@ -787,13 +742,10 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
"flush (fixed)\n");
stop = aobj->u_pages << PAGE_SHIFT;
}
- by_list = (uobj->uo_npages <=
- ((stop - start) >> PAGE_SHIFT) * UAO_HASH_PENALTY);
}
- UVMHIST_LOG(maphist,
- " flush start=0x%lx, stop=0x%lx, by_list=%ld, flags=0x%lx",
- (u_long)start, (u_long)stop, by_list, flags);
+ UVMHIST_LOG(maphist, " flush start=0x%lx, stop=0x%lx, flags=0x%lx",
+ (u_long)start, (u_long)stop, flags, 0);
/*
* Don't need to do any work here if we're not freeing
@@ -802,44 +754,31 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
UVMHIST_LOG(maphist,
"<- done (no work to do)",0,0,0,0);
- return (retval);
- }
-
- /*
- * now do it. note: we must update ppnext in the body of loop or we
- * will get stuck. we need to use ppnext because we may free "pp"
- * before doing the next loop.
- */
-
- if (by_list) {
- pp = TAILQ_FIRST(&uobj->memq);
- } else {
- curoff = start;
- pp = uvm_pagelookup(uobj, curoff);
+ return (TRUE);
}
- ppnext = NULL; /* XXX: shut up gcc */
- uvm_lock_pageq(); /* page queues locked */
-
- /* locked: both page queues and uobj */
- for ( ; (by_list && pp != NULL) ||
- (!by_list && curoff < stop) ; pp = ppnext) {
- if (by_list) {
- ppnext = TAILQ_NEXT(pp, listq);
-
- /* range check */
- if (pp->offset < start || pp->offset >= stop)
- continue;
- } else {
+ /* locked: uobj */
+ curoff = start;
+ for (;;) {
+ if (curoff < stop) {
+ pp = uvm_pagelookup(uobj, curoff);
curoff += PAGE_SIZE;
- if (curoff < stop)
- ppnext = uvm_pagelookup(uobj, curoff);
-
- /* null check */
if (pp == NULL)
continue;
+ } else {
+ break;
}
-
+
+ /* Make sure page is unbusy, else wait for it. */
+ if (pp->pg_flags & PG_BUSY) {
+ atomic_setbits_int(&pp->pg_flags, PG_WANTED);
+ UVM_UNLOCK_AND_WAIT(pp, &uobj->vmobjlock, 0,
+ "uaoflsh", 0);
+ simple_lock(&uobj->vmobjlock);
+ curoff -= PAGE_SIZE;
+ continue;
+ }
+
switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
/*
* XXX In these first 3 cases, we always just
@@ -848,7 +787,9 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
* XXX in the future.
*/
case PGO_CLEANIT|PGO_FREE:
+ /* FALLTHROUGH */
case PGO_CLEANIT|PGO_DEACTIVATE:
+ /* FALLTHROUGH */
case PGO_DEACTIVATE:
deactivate_it:
/* skip the page if it's loaned or wired */
@@ -856,16 +797,13 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
pp->wire_count != 0)
continue;
-#ifdef UBC
- /* ...and deactivate the page. */
- pmap_clear_reference(pp);
-#else
+ uvm_lock_pageq();
/* zap all mappings for the page. */
pmap_page_protect(pp, VM_PROT_NONE);
/* ...and deactivate the page. */
-#endif
uvm_pagedeactivate(pp);
+ uvm_unlock_pageq();
continue;
@@ -882,19 +820,13 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
pp->wire_count != 0)
continue;
- /*
- * mark the page as released if its busy.
- */
- if (pp->pg_flags & PG_BUSY) {
- atomic_setbits_int(&pp->pg_flags, PG_RELEASED);
- continue;
- }
-
/* zap all mappings for the page. */
pmap_page_protect(pp, VM_PROT_NONE);
uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
+ uvm_lock_pageq();
uvm_pagefree(pp);
+ uvm_unlock_pageq();
continue;
@@ -903,11 +835,9 @@ uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
}
}
- uvm_unlock_pageq();
-
UVMHIST_LOG(maphist,
"<- done, rv=%ld",retval,0,0,0);
- return (retval);
+ return (TRUE);
}
/*
@@ -989,10 +919,10 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
}
/*
- * to be useful must get a non-busy, non-released page
+ * to be useful must get a non-busy page
*/
if (ptmp == NULL ||
- (ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ (ptmp->pg_flags & PG_BUSY) != 0) {
if (lcv == centeridx ||
(flags & PGO_ALLPAGES) != 0)
/* need to do a wait or I/O! */
@@ -1099,7 +1029,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
}
/* page is there, see if we need to wait on it */
- if ((ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ if ((ptmp->pg_flags & PG_BUSY) != 0) {
atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
UVMHIST_LOG(pdhist,
"sleeping, ptmp->flags 0x%lx\n",
@@ -1138,8 +1068,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
/*
* just zero the page if there's nothing in swap.
*/
- if (swslot == 0)
- {
+ if (swslot == 0) {
/*
* page hasn't existed before, just zero it.
*/
@@ -1250,27 +1179,7 @@ uao_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */)
if (!nextpgp)
uvm_unlock_pageq(); /* keep locked for daemon */
- /*
- * if we're not killing the object, we're done.
- */
- if ((aobj->u_flags & UAO_FLAG_KILLME) == 0)
- return TRUE;
- KASSERT(aobj->u_obj.uo_refs == 0);
-
- /*
- * if there are still pages in the object, we're done for now.
- */
- if (aobj->u_obj.uo_npages != 0)
- return TRUE;
-
- KASSERT(TAILQ_EMPTY(&aobj->u_obj.memq));
-
- /*
- * finally, free the rest.
- */
- uao_free(aobj);
-
- return FALSE;
+ return TRUE;
}
@@ -1476,7 +1385,6 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
return FALSE;
}
- KASSERT((pg->pg_flags & PG_RELEASED) == 0);
/*
* ok, we've got the page now.