summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-07-26 19:37:14 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-07-26 19:37:14 +0000
commit34736e0579123f60efbe00cf506fd4b82e5b769a (patch)
tree1e8806da205009ef73327c70f28319fe7e5df798 /sys
parent78e879ae8deee32b2668976fbffb5a125629ba39 (diff)
Add support for disabling swap devices (swapctl -d).
Improve error handling on I/O errors to swap. From NetBSD
Diffstat (limited to 'sys')
-rw-r--r--sys/uvm/uvm_anon.c239
-rw-r--r--sys/uvm/uvm_anon.h7
-rw-r--r--sys/uvm/uvm_aobj.c371
-rw-r--r--sys/uvm/uvm_aobj.h6
-rw-r--r--sys/uvm/uvm_extern.h6
-rw-r--r--sys/uvm/uvm_fault.c49
-rw-r--r--sys/uvm/uvm_fault_i.h21
-rw-r--r--sys/uvm/uvm_km.c5
-rw-r--r--sys/uvm/uvm_pager.c114
-rw-r--r--sys/uvm/uvm_pager.h6
-rw-r--r--sys/uvm/uvm_stat.c9
-rw-r--r--sys/uvm/uvm_swap.c390
-rw-r--r--sys/uvm/uvm_swap.h8
13 files changed, 845 insertions, 386 deletions
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
index c94fd494ac6..ab15cec84da 100644
--- a/sys/uvm/uvm_anon.c
+++ b/sys/uvm/uvm_anon.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_anon.c,v 1.8 2001/07/18 10:47:05 art Exp $ */
-/* $NetBSD: uvm_anon.c,v 1.4 1999/09/12 01:17:34 chs Exp $ */
+/* $OpenBSD: uvm_anon.c,v 1.9 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_anon.c,v 1.5 2000/01/11 06:57:49 chs Exp $ */
/*
*
@@ -42,6 +42,7 @@
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/pool.h>
+#include <sys/kernel.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
@@ -51,61 +52,80 @@
#include <uvm/uvm_swap.h>
/*
+ * anonblock_list: global list of anon blocks,
+ * locked by swap_syscall_lock (since we never remove
+ * anything from this list and we only add to it via swapctl(2)).
+ */
+
+struct uvm_anonblock {
+ LIST_ENTRY(uvm_anonblock) list;
+ int count;
+ struct vm_anon *anons;
+};
+static LIST_HEAD(anonlist, uvm_anonblock) anonblock_list;
+
+
+static boolean_t anon_pagein __P((struct vm_anon *));
+
+
+/*
* allocate anons
*/
void
uvm_anon_init()
{
- struct vm_anon *anon;
int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
- int lcv;
+
+ simple_lock_init(&uvm.afreelock);
+ LIST_INIT(&anonblock_list);
/*
* Allocate the initial anons.
*/
- anon = (struct vm_anon *)uvm_km_alloc(kernel_map,
- sizeof(*anon) * nanon);
- if (anon == NULL) {
- printf("uvm_anon_init: can not allocate %d anons\n", nanon);
- panic("uvm_anon_init");
- }
-
- memset(anon, 0, sizeof(*anon) * nanon);
- uvm.afree = NULL;
- uvmexp.nanon = uvmexp.nfreeanon = nanon;
- for (lcv = 0 ; lcv < nanon ; lcv++) {
- anon[lcv].u.an_nxt = uvm.afree;
- uvm.afree = &anon[lcv];
- simple_lock_init(&uvm.afree->an_lock);
- }
- simple_lock_init(&uvm.afreelock);
+ uvm_anon_add(nanon);
}
/*
* add some more anons to the free pool. called when we add
* more swap space.
+ *
+ * => swap_syscall_lock should be held (protects anonblock_list).
*/
void
-uvm_anon_add(pages)
- int pages;
+uvm_anon_add(count)
+ int count;
{
+ struct uvm_anonblock *anonblock;
struct vm_anon *anon;
- int lcv;
+ int lcv, needed;
- anon = (struct vm_anon *)uvm_km_alloc(kernel_map,
- sizeof(*anon) * pages);
+ simple_lock(&uvm.afreelock);
+ uvmexp.nanonneeded += count;
+ needed = uvmexp.nanonneeded - uvmexp.nanon;
+ simple_unlock(&uvm.afreelock);
+
+ if (needed <= 0) {
+ return;
+ }
+
+ MALLOC(anonblock, void *, sizeof(*anonblock), M_UVMAMAP, M_WAITOK);
+ anon = (void *)uvm_km_alloc(kernel_map, sizeof(*anon) * needed);
/* XXX Should wait for VM to free up. */
- if (anon == NULL) {
- printf("uvm_anon_add: can not allocate %d anons\n", pages);
+ if (anonblock == NULL || anon == NULL) {
+ printf("uvm_anon_add: can not allocate %d anons\n", needed);
panic("uvm_anon_add");
}
+ anonblock->count = needed;
+ anonblock->anons = anon;
+ LIST_INSERT_HEAD(&anonblock_list, anonblock, list);
+ memset(anon, 0, sizeof(*anon) * needed);
+
simple_lock(&uvm.afreelock);
- memset(anon, 0, sizeof(*anon) * pages);
- uvmexp.nanon += pages;
- uvmexp.nfreeanon += pages;
- for (lcv = 0; lcv < pages; lcv++) {
+ uvmexp.nanon += needed;
+ uvmexp.nfreeanon += needed;
+ for (lcv = 0; lcv < needed; lcv++) {
simple_lock_init(&anon->an_lock);
anon[lcv].u.an_nxt = uvm.afree;
uvm.afree = &anon[lcv];
@@ -115,6 +135,23 @@ uvm_anon_add(pages)
}
/*
+ * remove anons from the free pool.
+ */
+void
+uvm_anon_remove(count)
+ int count;
+{
+ /*
+ * we never actually free any anons, to avoid allocation overhead.
+ * XXX someday we might want to try to free anons.
+ */
+
+ simple_lock(&uvm.afreelock);
+ uvmexp.nanonneeded -= count;
+ simple_unlock(&uvm.afreelock);
+}
+
+/*
* allocate an anon
*/
struct vm_anon *
@@ -362,3 +399,143 @@ uvm_anon_lockloanpg(anon)
return(pg);
}
+
+
+
+/*
+ * page in every anon that is paged out to a range of swslots.
+ *
+ * swap_syscall_lock should be held (protects anonblock_list).
+ */
+
+boolean_t
+anon_swap_off(startslot, endslot)
+ int startslot, endslot;
+{
+ struct uvm_anonblock *anonblock;
+
+ for (anonblock = LIST_FIRST(&anonblock_list);
+ anonblock != NULL;
+ anonblock = LIST_NEXT(anonblock, list)) {
+ int i;
+
+ /*
+ * loop thru all the anons in the anonblock,
+ * paging in where needed.
+ */
+
+ for (i = 0; i < anonblock->count; i++) {
+ struct vm_anon *anon = &anonblock->anons[i];
+ int slot;
+
+ /*
+ * lock anon to work on it.
+ */
+
+ simple_lock(&anon->an_lock);
+
+ /*
+ * is this anon's swap slot in range?
+ */
+
+ slot = anon->an_swslot;
+ if (slot >= startslot && slot < endslot) {
+ boolean_t rv;
+
+ /*
+ * yup, page it in.
+ */
+
+ /* locked: anon */
+ rv = anon_pagein(anon);
+ /* unlocked: anon */
+
+ if (rv) {
+ return rv;
+ }
+ } else {
+
+ /*
+ * nope, unlock and proceed.
+ */
+
+ simple_unlock(&anon->an_lock);
+ }
+ }
+ }
+ return FALSE;
+}
+
+
+/*
+ * fetch an anon's page.
+ *
+ * => anon must be locked, and is unlocked upon return.
+ * => returns TRUE if pagein was aborted due to lack of memory.
+ */
+
+static boolean_t
+anon_pagein(anon)
+ struct vm_anon *anon;
+{
+ struct vm_page *pg;
+ struct uvm_object *uobj;
+ int rv;
+ UVMHIST_FUNC("anon_pagein"); UVMHIST_CALLED(pdhist);
+
+ /* locked: anon */
+ rv = uvmfault_anonget(NULL, NULL, anon);
+ /* unlocked: anon */
+
+ switch (rv) {
+ case VM_PAGER_OK:
+ break;
+
+ case VM_PAGER_ERROR:
+ case VM_PAGER_REFAULT:
+
+ /*
+ * nothing more to do on errors.
+ * VM_PAGER_REFAULT can only mean that the anon was freed,
+ * so again there's nothing to do.
+ */
+
+ return FALSE;
+
+#ifdef DIAGNOSTIC
+ default:
+ panic("anon_pagein: uvmfault_anonget -> %d", rv);
+#endif
+ }
+
+ /*
+ * ok, we've got the page now.
+ * mark it as dirty, clear its swslot and un-busy it.
+ */
+
+ pg = anon->u.an_page;
+ uobj = pg->uobject;
+ uvm_swap_free(anon->an_swslot, 1);
+ anon->an_swslot = 0;
+ pg->flags &= ~(PG_CLEAN);
+
+ /*
+ * deactivate the page (to put it on a page queue)
+ */
+
+ pmap_clear_reference(pg);
+ pmap_page_protect(pg, VM_PROT_NONE);
+ uvm_lock_pageq();
+ uvm_pagedeactivate(pg);
+ uvm_unlock_pageq();
+
+ /*
+ * unlock the anon and we're done.
+ */
+
+ simple_unlock(&anon->an_lock);
+ if (uobj) {
+ simple_unlock(&uobj->vmobjlock);
+ }
+ return FALSE;
+}
diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h
index e4ef0b57561..c7a743f2d07 100644
--- a/sys/uvm/uvm_anon.h
+++ b/sys/uvm/uvm_anon.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_anon.h,v 1.6 2001/03/09 05:34:38 smart Exp $ */
-/* $NetBSD: uvm_anon.h,v 1.11 1999/06/21 17:25:11 thorpej Exp $ */
+/* $OpenBSD: uvm_anon.h,v 1.7 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_anon.h,v 1.12 2000/01/11 06:57:49 chs Exp $ */
/*
*
@@ -103,9 +103,10 @@ struct vm_anon *uvm_analloc __P((void));
void uvm_anfree __P((struct vm_anon *));
void uvm_anon_init __P((void));
void uvm_anon_add __P((int));
+void uvm_anon_remove __P((int));
struct vm_page *uvm_anon_lockloanpg __P((struct vm_anon *));
void uvm_anon_dropswap __P((struct vm_anon *));
-
+boolean_t anon_swap_off __P((int, int));
#endif /* _KERNEL */
#endif /* _UVM_UVM_ANON_H_ */
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index 82b1986f2d3..d54131f4178 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_aobj.c,v 1.11 2001/07/18 10:47:05 art Exp $ */
-/* $NetBSD: uvm_aobj.c,v 1.26 1999/09/12 01:17:34 chs Exp $ */
+/* $OpenBSD: uvm_aobj.c,v 1.12 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_aobj.c,v 1.27 2000/01/11 06:57:49 chs Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -48,6 +48,7 @@
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/pool.h>
+#include <sys/kernel.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
@@ -120,7 +121,7 @@
struct uao_swhash_elt {
LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
- vaddr_t tag; /* our 'tag' */
+ vaddr_t tag; /* our 'tag' */
int count; /* our number of active slots */
int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
};
@@ -169,7 +170,6 @@ struct pool uvm_aobj_pool;
* local functions
*/
-static void uao_init __P((void));
static struct uao_swhash_elt *uao_find_swhash_elt __P((struct uvm_aobj *,
int, boolean_t));
static int uao_find_swslot __P((struct uvm_aobj *,
@@ -183,6 +183,8 @@ static int uao_get __P((struct uvm_object *, vaddr_t,
vm_prot_t, int, int));
static boolean_t uao_releasepg __P((struct vm_page *,
struct vm_page **));
+static boolean_t uao_pagein __P((struct uvm_aobj *, int, int));
+static boolean_t uao_pagein_page __P((struct uvm_aobj *, int));
@@ -193,7 +195,7 @@ static boolean_t uao_releasepg __P((struct vm_page *,
*/
struct uvm_pagerops aobj_pager = {
- uao_init, /* init */
+ NULL, /* init */
uao_reference, /* reference */
uao_detach, /* detach */
NULL, /* fault */
@@ -402,6 +404,8 @@ uao_free(aobj)
struct uvm_aobj *aobj;
{
+ simple_unlock(&aobj->u_obj.vmobjlock);
+
if (UAO_USES_SWHASH(aobj)) {
int i, hashbuckets = aobj->u_swhashmask + 1;
@@ -412,12 +416,12 @@ uao_free(aobj)
for (i = 0; i < hashbuckets; i++) {
struct uao_swhash_elt *elt, *next;
- for (elt = aobj->u_swhash[i].lh_first; elt != NULL;
- elt = next) {
+ for (elt = LIST_FIRST(&aobj->u_swhash[i]);
+ elt != NULL;
+ elt = next) {
int j;
- for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++)
- {
+ for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++) {
int slot = elt->slots[j];
if (slot) {
@@ -433,7 +437,7 @@ uao_free(aobj)
}
}
- next = elt->list.le_next;
+ next = LIST_NEXT(elt, list);
pool_put(&uao_swhash_elt_pool, elt);
}
}
@@ -445,8 +449,7 @@ uao_free(aobj)
* free the array
*/
- for (i = 0; i < aobj->u_pages; i++)
- {
+ for (i = 0; i < aobj->u_pages; i++) {
int slot = aobj->u_swslots[i];
if (slot) {
@@ -484,24 +487,18 @@ uao_create(size, flags)
vsize_t size;
int flags;
{
- static struct uvm_aobj kernel_object_store; /* home of kernel_object */
+ static struct uvm_aobj kernel_object_store; /* home of kernel_object */
static int kobj_alloced = 0; /* not allocated yet */
int pages = round_page(size) >> PAGE_SHIFT;
struct uvm_aobj *aobj;
/*
- * malloc a new aobj unless we are asked for the kernel object
- */
+ * malloc a new aobj unless we are asked for the kernel object
+ */
if (flags & UAO_FLAG_KERNOBJ) { /* want kernel object? */
if (kobj_alloced)
panic("uao_create: kernel object already allocated");
- /*
- * XXXTHORPEJ: Need to call this now, so the pool gets
- * initialized!
- */
- uao_init();
-
aobj = &kernel_object_store;
aobj->u_pages = pages;
aobj->u_flags = UAO_FLAG_NOSWAP; /* no swap to start */
@@ -531,7 +528,7 @@ uao_create(size, flags)
M_NOWAIT : M_WAITOK;
/* allocate hash table or array depending on object size */
- if (UAO_USES_SWHASH(aobj)) {
+ if (UAO_USES_SWHASH(aobj)) {
aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
M_UVMAOBJ, mflags, &aobj->u_swhashmask);
if (aobj->u_swhash == NULL)
@@ -561,8 +558,6 @@ uao_create(size, flags)
/*
* now that aobj is ready, add it to the global list
- * XXXCHS: uao_init hasn't been called'd in the KERNOBJ case,
- * do we really need the kernel object on this list anyway?
*/
simple_lock(&uao_list_lock);
LIST_INSERT_HEAD(&uao_list, aobj, u_list);
@@ -581,7 +576,7 @@ uao_create(size, flags)
*
* => called at boot time from uvm_pager_init()
*/
-static void
+void
uao_init()
{
static int uao_initialized;
@@ -608,12 +603,30 @@ uao_init()
/*
* uao_reference: add a ref to an aobj
*
- * => aobj must be unlocked (we will lock it)
+ * => aobj must be unlocked
+ * => just lock it and call the locked version
*/
void
uao_reference(uobj)
struct uvm_object *uobj;
{
+ simple_lock(&uobj->vmobjlock);
+ uao_reference_locked(uobj);
+ simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * uao_reference_locked: add a ref to an aobj that is already locked
+ *
+ * => aobj must be locked
+ * this needs to be separate from the normal routine
+ * since sometimes we need to add a reference to an aobj when
+ * it's already locked.
+ */
+void
+uao_reference_locked(uobj)
+ struct uvm_object *uobj;
+{
UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
/*
@@ -623,22 +636,39 @@ uao_reference(uobj)
if (UVM_OBJ_IS_KERN_OBJECT(uobj))
return;
- simple_lock(&uobj->vmobjlock);
uobj->uo_refs++; /* bump! */
UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
- uobj, uobj->uo_refs,0,0);
- simple_unlock(&uobj->vmobjlock);
+ uobj, uobj->uo_refs,0,0);
}
+
/*
* uao_detach: drop a reference to an aobj
*
- * => aobj must be unlocked, we will lock it
+ * => aobj must be unlocked
+ * => just lock it and call the locked version
*/
void
uao_detach(uobj)
struct uvm_object *uobj;
{
+ simple_lock(&uobj->vmobjlock);
+ uao_detach_locked(uobj);
+}
+
+
+/*
+ * uao_detach_locked: drop a reference to an aobj
+ *
+ * => aobj must be locked, and is unlocked (or freed) upon return.
+ * this needs to be separate from the normal routine
+ * since sometimes we need to detach from an aobj when
+ * it's already locked.
+ */
+void
+uao_detach_locked(uobj)
+ struct uvm_object *uobj;
+{
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
struct vm_page *pg;
boolean_t busybody;
@@ -647,10 +677,10 @@ uao_detach(uobj)
/*
* detaching from kernel_object is a noop.
*/
- if (UVM_OBJ_IS_KERN_OBJECT(uobj))
+ if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
+ simple_unlock(&uobj->vmobjlock);
return;
-
- simple_lock(&uobj->vmobjlock);
+ }
UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0);
uobj->uo_refs--; /* drop ref! */
@@ -668,12 +698,13 @@ uao_detach(uobj)
simple_unlock(&uao_list_lock);
/*
- * free all the pages that aren't PG_BUSY, mark for release any that are.
+ * free all the pages that aren't PG_BUSY,
+ * mark for release any that are.
*/
-
busybody = FALSE;
- for (pg = uobj->memq.tqh_first ; pg != NULL ; pg = pg->listq.tqe_next) {
-
+ for (pg = TAILQ_FIRST(&uobj->memq);
+ pg != NULL;
+ pg = TAILQ_NEXT(pg, listq)) {
if (pg->flags & PG_BUSY) {
pg->flags |= PG_RELEASED;
busybody = TRUE;
@@ -941,16 +972,16 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
vaddr_t current_offset;
vm_page_t ptmp;
- int lcv, gotpages, maxpages, swslot, rv;
+ int lcv, gotpages, maxpages, swslot, rv, pageidx;
boolean_t done;
UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
- UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d", aobj, offset, flags,0);
+ UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
+ aobj, offset, flags,0);
/*
* get number of pages
*/
-
maxpages = *npagesp;
/*
@@ -958,7 +989,6 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
*/
if (flags & PGO_LOCKED) {
-
/*
* step 1a: get pages that are already resident. only do
* this if the data structures are locked (i.e. the first
@@ -1040,14 +1070,18 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
for (lcv = 0, current_offset = offset ; lcv < maxpages ;
lcv++, current_offset += PAGE_SIZE) {
+
/*
* - skip over pages we've already gotten or don't want
* - skip over pages we don't _have_ to get
*/
+
if (pps[lcv] != NULL ||
(lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
continue;
+ pageidx = current_offset >> PAGE_SHIFT;
+
/*
* we have yet to locate the current page (pps[lcv]). we
* first look for a page that is already at the current offset.
@@ -1134,7 +1168,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
* we have a "fake/busy/clean" page that we just allocated.
* do the needed "i/o", either reading from swap or zeroing.
*/
- swslot = uao_find_swslot(aobj, current_offset >> PAGE_SHIFT);
+ swslot = uao_find_swslot(aobj, pageidx);
/*
* just zero the page if there's nothing in swap.
@@ -1145,9 +1179,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
* page hasn't existed before, just zero it.
*/
uvm_pagezero(ptmp);
- }
- else
- {
+ } else {
UVMHIST_LOG(pdhist, "pagein from swslot %d",
swslot, 0,0,0);
@@ -1167,13 +1199,24 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
UVMHIST_LOG(pdhist, "<- done (error=%d)",
rv,0,0,0);
if (ptmp->flags & PG_WANTED)
- /* object lock still held */
wakeup(ptmp);
+
+ /*
+ * remove the swap slot from the aobj
+ * and mark the aobj as having no real slot.
+ * don't free the swap slot, thus preventing
+ * it from being used again.
+ */
+ swslot = uao_set_swslot(&aobj->u_obj, pageidx,
+ SWSLOT_BAD);
+ uvm_swap_markbad(swslot, 1);
+
ptmp->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(ptmp, NULL);
uvm_lock_pageq();
uvm_pagefree(ptmp);
uvm_unlock_pageq();
+
simple_unlock(&uobj->vmobjlock);
return (rv);
}
@@ -1221,7 +1264,8 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
* => we kill the aobj if it is not referenced and we are suppose to
* kill it ("KILLME").
*/
-static boolean_t uao_releasepg(pg, nextpgp)
+static boolean_t
+uao_releasepg(pg, nextpgp)
struct vm_page *pg;
struct vm_page **nextpgp; /* OUT */
{
@@ -1231,7 +1275,7 @@ static boolean_t uao_releasepg(pg, nextpgp)
if ((pg->flags & PG_RELEASED) == 0)
panic("uao_releasepg: page not released!");
#endif
-
+
/*
* dispose of the page [caller handles PG_WANTED] and swap slot.
*/
@@ -1242,7 +1286,7 @@ static boolean_t uao_releasepg(pg, nextpgp)
*nextpgp = pg->pageq.tqe_next; /* next page for daemon */
uvm_pagefree(pg);
if (!nextpgp)
- uvm_unlock_pageq(); /* keep locked for daemon */
+ uvm_unlock_pageq(); /* keep locked for daemon */
/*
* if we're not killing the object, we're done.
@@ -1262,7 +1306,7 @@ static boolean_t uao_releasepg(pg, nextpgp)
return TRUE;
#ifdef DIAGNOSTIC
- if (aobj->u_obj.memq.tqh_first)
+ if (TAILQ_FIRST(&aobj->u_obj.memq))
panic("uvn_releasepg: pages in object with npages == 0");
#endif
@@ -1274,6 +1318,7 @@ static boolean_t uao_releasepg(pg, nextpgp)
return FALSE;
}
+
/*
* uao_dropswap: release any swap resources from this aobj page.
*
@@ -1292,3 +1337,229 @@ uao_dropswap(uobj, pageidx)
uvm_swap_free(slot, 1);
}
}
+
+
+/*
+ * page in every page in every aobj that is paged-out to a range of swslots.
+ *
+ * => nothing should be locked.
+ * => returns TRUE if pagein was aborted due to lack of memory.
+ */
+boolean_t
+uao_swap_off(startslot, endslot)
+ int startslot, endslot;
+{
+ struct uvm_aobj *aobj, *nextaobj;
+
+ /*
+ * walk the list of all aobjs.
+ */
+
+restart:
+ simple_lock(&uao_list_lock);
+
+ for (aobj = LIST_FIRST(&uao_list);
+ aobj != NULL;
+ aobj = nextaobj) {
+ boolean_t rv;
+
+ /*
+ * try to get the object lock,
+ * start all over if we fail.
+ * most of the time we'll get the aobj lock,
+ * so this should be a rare case.
+ */
+ if (!simple_lock_try(&aobj->u_obj.vmobjlock)) {
+ simple_unlock(&uao_list_lock);
+ goto restart;
+ }
+
+ /*
+ * add a ref to the aobj so it doesn't disappear
+ * while we're working.
+ */
+ uao_reference_locked(&aobj->u_obj);
+
+ /*
+ * now it's safe to unlock the uao list.
+ */
+ simple_unlock(&uao_list_lock);
+
+ /*
+ * page in any pages in the swslot range.
+ * if there's an error, abort and return the error.
+ */
+ rv = uao_pagein(aobj, startslot, endslot);
+ if (rv) {
+ uao_detach_locked(&aobj->u_obj);
+ return rv;
+ }
+
+ /*
+ * we're done with this aobj.
+ * relock the list and drop our ref on the aobj.
+ */
+ simple_lock(&uao_list_lock);
+ nextaobj = LIST_NEXT(aobj, u_list);
+ uao_detach_locked(&aobj->u_obj);
+ }
+
+ /*
+ * done with traversal, unlock the list
+ */
+ simple_unlock(&uao_list_lock);
+ return FALSE;
+}
+
+
+/*
+ * page in any pages from aobj in the given range.
+ *
+ * => aobj must be locked and is returned locked.
+ * => returns TRUE if pagein was aborted due to lack of memory.
+ */
+static boolean_t
+uao_pagein(aobj, startslot, endslot)
+ struct uvm_aobj *aobj;
+ int startslot, endslot;
+{
+ boolean_t rv;
+
+ if (UAO_USES_SWHASH(aobj)) {
+ struct uao_swhash_elt *elt;
+ int bucket;
+
+restart:
+ for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
+ for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
+ elt != NULL;
+ elt = LIST_NEXT(elt, list)) {
+ int i;
+
+ for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
+ int slot = elt->slots[i];
+
+ /*
+ * if the slot isn't in range, skip it.
+ */
+ if (slot < startslot ||
+ slot >= endslot) {
+ continue;
+ }
+
+ /*
+ * process the page,
+ * the start over on this object
+ * since the swhash elt
+ * may have been freed.
+ */
+ rv = uao_pagein_page(aobj,
+ UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
+ if (rv) {
+ return rv;
+ }
+ goto restart;
+ }
+ }
+ }
+ } else {
+ int i;
+
+ for (i = 0; i < aobj->u_pages; i++) {
+ int slot = aobj->u_swslots[i];
+
+ /*
+ * if the slot isn't in range, skip it
+ */
+ if (slot < startslot || slot >= endslot) {
+ continue;
+ }
+
+ /*
+ * process the page.
+ */
+ rv = uao_pagein_page(aobj, i);
+ if (rv) {
+ return rv;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+/*
+ * page in a page from an aobj. used for swap_off.
+ * returns TRUE if pagein was aborted due to lack of memory.
+ *
+ * => aobj must be locked and is returned locked.
+ */
+static boolean_t
+uao_pagein_page(aobj, pageidx)
+ struct uvm_aobj *aobj;
+ int pageidx;
+{
+ struct vm_page *pg;
+ int rv, slot, npages;
+ UVMHIST_FUNC("uao_pagein_page"); UVMHIST_CALLED(pdhist);
+
+ pg = NULL;
+ npages = 1;
+ /* locked: aobj */
+ rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
+ &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, 0);
+ /* unlocked: aobj */
+
+ /*
+ * relock and finish up.
+ */
+ simple_lock(&aobj->u_obj.vmobjlock);
+
+ switch (rv) {
+ case VM_PAGER_OK:
+ break;
+
+ case VM_PAGER_ERROR:
+ case VM_PAGER_REFAULT:
+ /*
+ * nothing more to do on errors.
+ * VM_PAGER_REFAULT can only mean that the anon was freed,
+ * so again there's nothing to do.
+ */
+ return FALSE;
+
+#ifdef DIAGNOSTIC
+ default:
+ panic("uao_pagein_page: uao_get -> %d\n", rv);
+#endif
+ }
+
+#ifdef DIAGNOSTIC
+ /*
+ * this should never happen, since we have a reference on the aobj.
+ */
+ if (pg->flags & PG_RELEASED) {
+ panic("uao_pagein_page: found PG_RELEASED page?\n");
+ }
+#endif
+
+ /*
+ * ok, we've got the page now.
+ * mark it as dirty, clear its swslot and un-busy it.
+ */
+ slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
+ uvm_swap_free(slot, 1);
+ pg->flags &= ~(PG_BUSY|PG_CLEAN|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+
+ /*
+ * deactivate the page (to put it on a page queue).
+ */
+ pmap_clear_reference(pg);
+ pmap_page_protect(pg, VM_PROT_NONE);
+ uvm_lock_pageq();
+ uvm_pagedeactivate(pg);
+ uvm_unlock_pageq();
+
+ return FALSE;
+}
diff --git a/sys/uvm/uvm_aobj.h b/sys/uvm/uvm_aobj.h
index e9db0c97a5e..44a1a3498c0 100644
--- a/sys/uvm/uvm_aobj.h
+++ b/sys/uvm/uvm_aobj.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_aobj.h,v 1.6 2001/03/09 05:34:38 smart Exp $ */
-/* $NetBSD: uvm_aobj.h,v 1.9 1999/06/21 17:25:11 thorpej Exp $ */
+/* $OpenBSD: uvm_aobj.h,v 1.7 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_aobj.h,v 1.10 2000/01/11 06:57:49 chs Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -65,8 +65,10 @@
* prototypes
*/
+void uao_init __P((void));
int uao_set_swslot __P((struct uvm_object *, int, int));
void uao_dropswap __P((struct uvm_object *, int));
+int uao_swap_off __P((int, int));
/*
* globals
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index e669456e8e5..27c9b941c64 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_extern.h,v 1.14 2001/07/25 14:47:59 art Exp $ */
-/* $NetBSD: uvm_extern.h,v 1.35 1999/12/30 16:09:47 eeh Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.15 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_extern.h,v 1.36 2000/01/11 06:57:49 chs Exp $ */
/*
*
@@ -278,7 +278,9 @@ typedef int vm_fault_t;
/* uvm_aobj.c */
struct uvm_object *uao_create __P((vsize_t, int));
void uao_detach __P((struct uvm_object *));
+void uao_detach_locked __P((struct uvm_object *));
void uao_reference __P((struct uvm_object *));
+void uao_reference_locked __P((struct uvm_object *));
/* uvm_fault.c */
int uvm_fault __P((vm_map_t, vaddr_t,
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index 1583ab5d385..ab60ff2a081 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_fault.c,v 1.16 2001/07/25 13:25:33 art Exp $ */
-/* $NetBSD: uvm_fault.c,v 1.46 1999/11/13 00:24:38 thorpej Exp $ */
+/* $OpenBSD: uvm_fault.c,v 1.17 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_fault.c,v 1.47 2000/01/11 06:57:50 chs Exp $ */
/*
*
@@ -280,7 +280,7 @@ uvmfault_amapcopy(ufi)
* page in that anon.
*
* => maps, amap, and anon locked by caller.
- * => if we fail (result != VM_PAGER_OK) we unlock everything.
+ * => if we fail (result != VM_PAGER_OK) we unlock everything except anon.
* => if we are successful, we return with everything still locked.
* => we don't move the page on the queues [gets moved later]
* => if we allocate a new page [we_own], it gets put on the queues.
@@ -291,7 +291,8 @@ uvmfault_amapcopy(ufi)
* else.
*/
-int uvmfault_anonget(ufi, amap, anon)
+int
+uvmfault_anonget(ufi, amap, anon)
struct uvm_faultinfo *ufi;
struct vm_amap *amap;
struct vm_anon *anon;
@@ -415,7 +416,7 @@ int uvmfault_anonget(ufi, amap, anon)
*/
locked = uvmfault_relock(ufi);
- if (locked) {
+ if (locked && amap != NULL) {
amap_lock(amap);
}
if (locked || we_own)
@@ -452,7 +453,8 @@ int uvmfault_anonget(ufi, amap, anon)
simple_unlock(&anon->an_lock);
uvm_anfree(anon); /* frees page for us */
if (locked)
- uvmfault_unlockall(ufi, amap, NULL, NULL);
+ uvmfault_unlockall(ufi, amap, NULL,
+ NULL);
uvmexp.fltpgrele++;
UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
return (VM_PAGER_REFAULT); /* refault! */
@@ -460,13 +462,24 @@ int uvmfault_anonget(ufi, amap, anon)
if (result != VM_PAGER_OK) {
#ifdef DIAGNOSTIC
- if (result == VM_PAGER_PEND)
- panic("uvmfault_anonget: got PENDING for non-async I/O");
+ if (result == VM_PAGER_PEND) {
+ panic("uvmfault_anonget: "
+ "got PENDING for non-async I/O");
+ }
#endif
/* remove page from anon */
anon->u.an_page = NULL;
- /*
+ /*
+ * remove the swap slot from the anon
+ * and mark the anon as having no real slot.
+ * don't free the swap slot, thus preventing
+ * it from being used again.
+ */
+ uvm_swap_markbad(anon->an_swslot, 1);
+ anon->an_swslot = SWSLOT_BAD;
+
+ /*
* note: page was never !PG_BUSY, so it
* can't be mapped and thus no need to
* pmap_page_protect it...
@@ -509,8 +522,9 @@ int uvmfault_anonget(ufi, amap, anon)
* verify no one has touched the amap and moved the anon on us.
*/
- if (amap_lookup(&ufi->entry->aref,
- ufi->orig_rvaddr - ufi->entry->start) != anon) {
+ if (ufi != NULL &&
+ amap_lookup(&ufi->entry->aref,
+ ufi->orig_rvaddr - ufi->entry->start) != anon) {
uvmfault_unlockall(ufi, amap, NULL, anon);
UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
@@ -1051,14 +1065,18 @@ ReFault:
*/
/*
- * let uvmfault_anonget do the dirty work. if it fails (!OK) it will
- * unlock for us. if it is OK, locks are still valid and locked.
+ * let uvmfault_anonget do the dirty work.
+ * if it fails (!OK) it will unlock all but the anon for us.
+ * if it succeeds, locks are still valid and locked.
* also, if it is OK, then the anon's page is on the queues.
* if the page is on loan from a uvm_object, then anonget will
* lock that object for us if it does not fail.
*/
result = uvmfault_anonget(&ufi, amap, anon);
+ if (result != VM_PAGER_OK) {
+ simple_unlock(&anon->an_lock);
+ }
if (result == VM_PAGER_REFAULT)
goto ReFault;
@@ -1796,8 +1814,9 @@ uvm_fault_wire(map, start, end, access_type)
pmap = vm_map_pmap(map);
/*
- * fault it in page at a time. if the fault fails then we have
- * to undo what we have done.
+ * now fault it in a page at a time. if the fault fails then we have
+ * to undo what we have done. note that in uvm_fault VM_PROT_NONE
+ * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
*/
for (va = start ; va < end ; va += PAGE_SIZE) {
diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h
index 57636eec78c..d5b53c87713 100644
--- a/sys/uvm/uvm_fault_i.h
+++ b/sys/uvm/uvm_fault_i.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_fault_i.h,v 1.5 2001/01/29 02:07:44 niklas Exp $ */
-/* $NetBSD: uvm_fault_i.h,v 1.9 1999/06/04 23:38:41 thorpej Exp $ */
+/* $OpenBSD: uvm_fault_i.h,v 1.6 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_fault_i.h,v 1.10 2000/01/11 06:57:50 chs Exp $ */
/*
*
@@ -51,6 +51,14 @@ uvmfault_unlockmaps(ufi, write_locked)
struct uvm_faultinfo *ufi;
boolean_t write_locked;
{
+ /*
+ * ufi can be NULL when this isn't really a fault,
+ * but merely paging in anon data.
+ */
+
+ if (ufi == NULL) {
+ return;
+ }
if (write_locked) {
vm_map_unlock(ufi->map);
@@ -213,8 +221,17 @@ static __inline boolean_t
uvmfault_relock(ufi)
struct uvm_faultinfo *ufi;
{
+ /*
+ * ufi can be NULL when this isn't really a fault,
+ * but merely paging in anon data.
+ */
+
+ if (ufi == NULL) {
+ return TRUE;
+ }
uvmexp.fltrelck++;
+
/*
* relock map. fail if version mismatch (in which case nothing
* gets locked).
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
index 90b1cc3662d..2acbc0507f3 100644
--- a/sys/uvm/uvm_km.c
+++ b/sys/uvm/uvm_km.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_km.c,v 1.12 2001/07/25 13:25:33 art Exp $ */
-/* $NetBSD: uvm_km.c,v 1.33 1999/11/13 00:24:38 thorpej Exp $ */
+/* $OpenBSD: uvm_km.c,v 1.13 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_km.c,v 1.34 2000/01/11 06:57:50 chs Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -201,6 +201,7 @@ uvm_km_init(start, end)
*/
/* kernel_object: for pageable anonymous kernel memory */
+ uao_init();
uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index cb836859f90..266e63a69be 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_pager.c,v 1.11 2001/07/25 13:25:33 art Exp $ */
-/* $NetBSD: uvm_pager.c,v 1.24 1999/11/13 00:24:38 thorpej Exp $ */
+/* $OpenBSD: uvm_pager.c,v 1.12 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_pager.c,v 1.25 2000/01/11 06:57:50 chs Exp $ */
/*
*
@@ -529,7 +529,7 @@ ReTry:
simple_lock(&uobj->vmobjlock);
if (*npages > 1 || pg == NULL)
uvm_pager_dropcluster(uobj, pg, ppsp, npages,
- PGO_PDFREECLUST, 0);
+ PGO_PDFREECLUST);
/* if (uobj): object still locked, as per
* return-state item #3 */
}
@@ -543,12 +543,61 @@ ReTry:
*/
if (*npages > 1 || pg == NULL) {
- if (uobj)
+ if (uobj) {
simple_lock(&uobj->vmobjlock);
- uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP,
- swblk);
- if (pg != NULL)
- goto ReTry;
+ }
+ uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP);
+
+ /*
+ * for failed swap-backed pageouts with a "pg",
+ * we need to reset pg's swslot to either:
+ * "swblk" (for transient errors, so we can retry),
+ * or 0 (for hard errors).
+ */
+
+ if (uobj == NULL && pg != NULL) {
+ int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
+ if (pg->pqflags & PQ_ANON) {
+ simple_lock(&pg->uanon->an_lock);
+ pg->uanon->an_swslot = nswblk;
+ simple_unlock(&pg->uanon->an_lock);
+ } else {
+ simple_lock(&pg->uobject->vmobjlock);
+ uao_set_swslot(pg->uobject,
+ pg->offset >> PAGE_SHIFT,
+ nswblk);
+ simple_unlock(&pg->uobject->vmobjlock);
+ }
+ }
+ if (result == VM_PAGER_AGAIN) {
+
+ /*
+ * for transient failures, free all the swslots that
+ * we're not going to retry with.
+ */
+
+ if (uobj == NULL) {
+ if (pg) {
+ uvm_swap_free(swblk + 1, *npages - 1);
+ } else {
+ uvm_swap_free(swblk, *npages);
+ }
+ }
+ if (pg) {
+ ppsp[0] = pg;
+ *npages = 1;
+ goto ReTry;
+ }
+ } else if (uobj == NULL) {
+
+ /*
+ * for hard errors on swap-backed pageouts,
+ * mark the swslots as bad. note that we do not
+ * free swslots that we mark bad.
+ */
+
+ uvm_swap_markbad(swblk, *npages);
+ }
}
/*
@@ -582,35 +631,17 @@ ReTry:
*/
void
-uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags, swblk)
+uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
struct uvm_object *uobj; /* IN */
struct vm_page *pg, **ppsp; /* IN, IN/OUT */
int *npages; /* IN/OUT */
int flags;
- int swblk; /* valid if
- (uobj == NULL && PGO_REALLOCSWAP) */
{
int lcv;
boolean_t obj_is_alive;
struct uvm_object *saved_uobj;
/*
- * if we need to reallocate swap space for the cluster we are dropping
- * (true if swap-backed and PGO_REALLOCSWAP) then free the old
- * allocation now. save a block for "pg" if it is non-NULL.
- *
- * note that we will zap the object's pointer to swap in the "for" loop
- * below...
- */
-
- if (uobj == NULL && (flags & PGO_REALLOCSWAP)) {
- if (pg)
- uvm_swap_free(swblk + 1, *npages - 1);
- else
- uvm_swap_free(swblk, *npages);
- }
-
- /*
* drop all pages but "pg"
*/
@@ -717,34 +748,5 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags, swblk)
else
simple_unlock(&ppsp[lcv]->uobject->vmobjlock);
}
-
- }
-
- /*
- * drop to a cluster of 1 page ("pg") if requested
- */
-
- if (pg && (flags & PGO_PDFREECLUST) == 0) {
- /*
- * if we are not a successful pageout, we make a 1 page cluster.
- */
- ppsp[0] = pg;
- *npages = 1;
-
- /*
- * assign new swap block to new cluster, if anon backed
- */
- if (uobj == NULL && (flags & PGO_REALLOCSWAP)) {
- if (pg->pqflags & PQ_ANON) {
- simple_lock(&pg->uanon->an_lock);
- pg->uanon->an_swslot = swblk; /* reassign */
- simple_unlock(&pg->uanon->an_lock);
- } else {
- simple_lock(&pg->uobject->vmobjlock);
- uao_set_swslot(pg->uobject,
- pg->offset >> PAGE_SHIFT, swblk);
- simple_unlock(&pg->uobject->vmobjlock);
- }
- }
}
}
diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
index 5c594595146..dae766f6883 100644
--- a/sys/uvm/uvm_pager.h
+++ b/sys/uvm/uvm_pager.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_pager.h,v 1.7 2001/03/22 03:05:56 smart Exp $ */
-/* $NetBSD: uvm_pager.h,v 1.10 1999/06/21 17:25:12 thorpej Exp $ */
+/* $OpenBSD: uvm_pager.h,v 1.8 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_pager.h,v 1.11 2000/01/11 06:57:50 chs Exp $ */
/*
*
@@ -134,7 +134,7 @@ struct uvm_pagerops {
void uvm_pager_dropcluster __P((struct uvm_object *,
struct vm_page *, struct vm_page **,
- int *, int, int));
+ int *, int));
void uvm_pager_init __P((void));
int uvm_pager_put __P((struct uvm_object *, struct vm_page *,
struct vm_page ***, int *, int,
diff --git a/sys/uvm/uvm_stat.c b/sys/uvm/uvm_stat.c
index 72a18d26589..5cc7045457d 100644
--- a/sys/uvm/uvm_stat.c
+++ b/sys/uvm/uvm_stat.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_stat.c,v 1.5 2001/01/29 02:07:48 niklas Exp $ */
-/* $NetBSD: uvm_stat.c,v 1.12 1999/03/26 17:34:16 chs Exp $ */
+/* $OpenBSD: uvm_stat.c,v 1.6 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_stat.c,v 1.13 2000/01/11 06:57:50 chs Exp $ */
/*
*
@@ -241,8 +241,9 @@ uvm_dump()
uvmexp.pdbusy, uvmexp.pdfreed, uvmexp.pdreact, uvmexp.pddeact);
printf(" pageouts=%d, pending=%d, nswget=%d\n", uvmexp.pdpageouts,
uvmexp.pdpending, uvmexp.nswget);
- printf(" nswapdev=%d, nanon=%d, nfreeanon=%d\n", uvmexp.nswapdev,
- uvmexp.nanon, uvmexp.nfreeanon);
+ printf(" nswapdev=%d, nanon=%d, nanonneeded=%d nfreeanon=%d\n",
+ uvmexp.nswapdev, uvmexp.nanon, uvmexp.nanonneeded,
+ uvmexp.nfreeanon);
printf(" swpages=%d, swpginuse=%d, swpgonly=%d paging=%d\n",
uvmexp.swpages, uvmexp.swpginuse, uvmexp.swpgonly, uvmexp.paging);
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index fe7e03bdf5b..0e39ca7a1e7 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_swap.c,v 1.30 2001/07/25 14:47:59 art Exp $ */
-/* $NetBSD: uvm_swap.c,v 1.31 2000/01/04 21:37:54 wrstuden Exp $ */
+/* $OpenBSD: uvm_swap.c,v 1.31 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_swap.c,v 1.32 2000/01/11 06:57:51 chs Exp $ */
/*
* Copyright (c) 1995, 1996, 1997 Matthew R. Green
@@ -119,12 +119,6 @@
*/
/*
- * SWAP_TO_FILES: allows swapping to plain files.
- */
-
-#define SWAP_TO_FILES
-
-/*
* swapdev: describes a single swap partition/file
*
* note the following should be true:
@@ -142,18 +136,17 @@ struct swapdev {
int swd_pathlen; /* length of pathname */
int swd_npages; /* #pages we can use */
int swd_npginuse; /* #pages in use */
+ int swd_npgbad; /* #pages bad */
int swd_drumoffset; /* page0 offset in drum */
int swd_drumsize; /* #pages in drum */
struct extent *swd_ex; /* extent for this swapdev */
struct vnode *swd_vp; /* backing vnode */
CIRCLEQ_ENTRY(swapdev) swd_next; /* priority circleq */
-#ifdef SWAP_TO_FILES
int swd_bsize; /* blocksize (bytes) */
int swd_maxactive; /* max active i/o reqs */
struct buf swd_tab; /* buffer list */
struct ucred *swd_cred; /* cred for file access */
-#endif
#ifdef UVM_SWAP_ENCRYPT
#define SWD_KEY_SHIFT 7 /* One key per 0.5 MByte */
#define SWD_KEY(x,y) &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT])
@@ -219,8 +212,7 @@ struct pool vndbuf_pool;
#define getvndxfer(vnx) do { \
int s = splbio(); \
- vnx = (struct vndxfer *) \
- pool_get(&vndxfer_pool, PR_MALLOCOK|PR_WAITOK); \
+ vnx = pool_get(&vndxfer_pool, PR_MALLOCOK|PR_WAITOK); \
splx(s); \
} while (0)
@@ -230,8 +222,7 @@ struct pool vndbuf_pool;
#define getvndbuf(vbp) do { \
int s = splbio(); \
- vbp = (struct vndbuf *) \
- pool_get(&vndbuf_pool, PR_MALLOCOK|PR_WAITOK); \
+ vbp = pool_get(&vndbuf_pool, PR_MALLOCOK|PR_WAITOK); \
splx(s); \
} while (0)
@@ -266,15 +257,11 @@ static void swaplist_insert __P((struct swapdev *,
static void swaplist_trim __P((void));
static int swap_on __P((struct proc *, struct swapdev *));
-#ifdef SWAP_OFF_WORKS
static int swap_off __P((struct proc *, struct swapdev *));
-#endif
-#ifdef SWAP_TO_FILES
static void sw_reg_strategy __P((struct swapdev *, struct buf *, int));
static void sw_reg_iodone __P((struct buf *));
static void sw_reg_start __P((struct swapdev *));
-#endif
static void uvm_swap_aiodone __P((struct uvm_aiodesc *));
static void uvm_swap_bufdone __P((struct buf *));
@@ -507,8 +494,8 @@ swaplist_insert(sdp, newspp, priority)
/*
* find entry at or after which to insert the new device.
*/
- for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
+ for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL;
+ spp = LIST_NEXT(spp, spi_swappri)) {
if (priority <= spp->spi_priority)
break;
pspp = spp;
@@ -519,7 +506,8 @@ swaplist_insert(sdp, newspp, priority)
*/
if (spp == NULL || spp->spi_priority != priority) {
spp = newspp; /* use newspp! */
- UVMHIST_LOG(pdhist, "created new swappri = %d", priority, 0, 0, 0);
+ UVMHIST_LOG(pdhist, "created new swappri = %d",
+ priority, 0, 0, 0);
spp->spi_priority = priority;
CIRCLEQ_INIT(&spp->spi_swapdev);
@@ -540,10 +528,6 @@ swaplist_insert(sdp, newspp, priority)
sdp->swd_priority = priority;
CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
uvmexp.nswapdev++;
-
- /*
- * done!
- */
}
/*
@@ -564,11 +548,11 @@ swaplist_find(vp, remove)
/*
* search the lists for the requested vp
*/
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- for (sdp = spp->spi_swapdev.cqh_first;
+ for (spp = LIST_FIRST(&swap_priority); spp != NULL;
+ spp = LIST_NEXT(spp, spi_swappri)) {
+ for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
sdp != (void *)&spp->spi_swapdev;
- sdp = sdp->swd_next.cqe_next)
+ sdp = CIRCLEQ_NEXT(sdp, swd_next))
if (sdp->swd_vp == vp) {
if (remove) {
CIRCLEQ_REMOVE(&spp->spi_swapdev,
@@ -593,12 +577,13 @@ swaplist_trim()
{
struct swappri *spp, *nextspp;
- for (spp = swap_priority.lh_first; spp != NULL; spp = nextspp) {
- nextspp = spp->spi_swappri.le_next;
- if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev)
+ for (spp = LIST_FIRST(&swap_priority); spp != NULL; spp = nextspp) {
+ nextspp = LIST_NEXT(spp, spi_swappri);
+ if (CIRCLEQ_FIRST(&spp->spi_swapdev) !=
+ (void *)&spp->spi_swapdev)
continue;
LIST_REMOVE(spp, spi_swappri);
- free((caddr_t)spp, M_VMSWAP);
+ free(spp, M_VMSWAP);
}
}
@@ -637,11 +622,11 @@ swapdrum_getsdp(pgno)
struct swapdev *sdp;
struct swappri *spp;
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next)
- for (sdp = spp->spi_swapdev.cqh_first;
+ for (spp = LIST_FIRST(&swap_priority); spp != NULL;
+ spp = LIST_NEXT(spp, spi_swappri))
+ for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
sdp != (void *)&spp->spi_swapdev;
- sdp = sdp->swd_next.cqe_next)
+ sdp = CIRCLEQ_NEXT(sdp, swd_next))
if (pgno >= sdp->swd_drumoffset &&
pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
return sdp;
@@ -681,7 +666,7 @@ sys_swapctl(p, v, retval)
/*
* ensure serialized syscall access by grabbing the swap_syscall_lock
*/
- lockmgr(&swap_syscall_lock, LK_EXCLUSIVE, (void *)0, p);
+ lockmgr(&swap_syscall_lock, LK_EXCLUSIVE, NULL, p);
/*
* we handle the non-priv NSWAP and STATS request first.
@@ -713,23 +698,15 @@ sys_swapctl(p, v, retval)
sep = (struct swapent *)SCARG(uap, arg);
count = 0;
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- for (sdp = spp->spi_swapdev.cqh_first;
+ for (spp = LIST_FIRST(&swap_priority); spp != NULL;
+ spp = LIST_NEXT(spp, spi_swappri)) {
+ for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
sdp != (void *)&spp->spi_swapdev && misc-- > 0;
- sdp = sdp->swd_next.cqe_next) {
- /*
- * backwards compatibility for system call.
- * note that we use 'struct oswapent' as an
- * overlay into both 'struct swapdev' and
- * the userland 'struct swapent', as we
- * want to retain backwards compatibility
- * with NetBSD 1.3.
- */
+ sdp = CIRCLEQ_NEXT(sdp, swd_next)) {
sdp->swd_inuse =
btodb(sdp->swd_npginuse << PAGE_SHIFT);
- error = copyout((caddr_t)&sdp->swd_se,
- (caddr_t)sep, sizeof(struct swapent));
+ error = copyout(&sdp->swd_se, sep,
+ sizeof(struct swapent));
/* now copy out the path if necessary */
#if defined(COMPAT_13)
@@ -737,9 +714,8 @@ sys_swapctl(p, v, retval)
#else
if (error == 0)
#endif
- error = copyout((caddr_t)sdp->swd_path,
- (caddr_t)&sep->se_path,
- sdp->swd_pathlen);
+ error = copyout(sdp->swd_path,
+ &sep->se_path, sdp->swd_pathlen);
if (error)
goto out;
@@ -822,8 +798,7 @@ sys_swapctl(p, v, retval)
* any empty priority structures.
*/
priority = SCARG(uap, misc);
- spp = (struct swappri *)
- malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
+ spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
simple_lock(&uvm.swap_data_lock);
if ((sdp = swaplist_find(vp, 1)) == NULL) {
error = ENOENT;
@@ -837,12 +812,14 @@ sys_swapctl(p, v, retval)
break;
case SWAP_ON:
+
/*
* check for duplicates. if none found, then insert a
* dummy entry on the list to prevent someone else from
* trying to enable this device while we are working on
* it.
*/
+
priority = SCARG(uap, misc);
simple_lock(&uvm.swap_data_lock);
if ((sdp = swaplist_find(vp, 0)) != NULL) {
@@ -850,21 +827,20 @@ sys_swapctl(p, v, retval)
simple_unlock(&uvm.swap_data_lock);
break;
}
- sdp = (struct swapdev *)
- malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
- spp = (struct swappri *)
- malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
+ sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
+ spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
memset(sdp, 0, sizeof(*sdp));
sdp->swd_flags = SWF_FAKE; /* placeholder only */
sdp->swd_vp = vp;
sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
-#ifdef SWAP_TO_FILES
+
/*
* XXX Is NFS elaboration necessary?
*/
- if (vp->v_type == VREG)
+ if (vp->v_type == VREG) {
sdp->swd_cred = crdup(p->p_ucred);
-#endif
+ }
+
swaplist_insert(sdp, spp, priority);
simple_unlock(&uvm.swap_data_lock);
@@ -872,23 +848,24 @@ sys_swapctl(p, v, retval)
sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0)
panic("swapctl: copystr");
+
/*
* we've now got a FAKE placeholder in the swap list.
* now attempt to enable swap on it. if we fail, undo
* what we've done and kill the fake entry we just inserted.
* if swap_on is a success, it will clear the SWF_FAKE flag
*/
+
if ((error = swap_on(p, sdp)) != 0) {
simple_lock(&uvm.swap_data_lock);
(void) swaplist_find(vp, 1); /* kill fake entry */
swaplist_trim();
simple_unlock(&uvm.swap_data_lock);
-#ifdef SWAP_TO_FILES
- if (vp->v_type == VREG)
+ if (vp->v_type == VREG) {
crfree(sdp->swd_cred);
-#endif
+ }
free(sdp->swd_path, M_VMSWAP);
- free((caddr_t)sdp, M_VMSWAP);
+ free(sdp, M_VMSWAP);
break;
}
@@ -900,17 +877,13 @@ sys_swapctl(p, v, retval)
break;
case SWAP_OFF:
- UVMHIST_LOG(pdhist, "someone is using SWAP_OFF...??", 0,0,0,0);
-#ifdef SWAP_OFF_WORKS
- /*
- * find the entry of interest and ensure it is enabled.
- */
simple_lock(&uvm.swap_data_lock);
if ((sdp = swaplist_find(vp, 0)) == NULL) {
simple_unlock(&uvm.swap_data_lock);
error = ENXIO;
break;
}
+
/*
* If a device isn't in use or enabled, we
* can't stop swapping from it (again).
@@ -920,28 +893,16 @@ sys_swapctl(p, v, retval)
error = EBUSY;
break;
}
- /* XXXCDC: should we call with list locked or unlocked? */
- if ((error = swap_off(p, sdp)) != 0)
- break;
- /* XXXCDC: might need relock here */
/*
- * now we can kill the entry.
+ * do the real work.
*/
- if ((sdp = swaplist_find(vp, 1)) == NULL) {
- error = ENXIO;
- break;
- }
- simple_unlock(&uvm.swap_data_lock);
- free((caddr_t)sdp, M_VMSWAP);
-#else
- error = EINVAL;
-#endif
+ if ((error = swap_off(p, sdp)) != 0)
+ goto out;
+
break;
default:
- UVMHIST_LOG(pdhist, "unhandled command: %#x",
- SCARG(uap, cmd), 0, 0, 0);
error = EINVAL;
}
@@ -950,7 +911,7 @@ sys_swapctl(p, v, retval)
*/
vput(vp);
out:
- lockmgr(&swap_syscall_lock, LK_RELEASE, (void *)0, p);
+ lockmgr(&swap_syscall_lock, LK_RELEASE, NULL, p);
UVMHIST_LOG(pdhist, "<- done! error=%d", error, 0, 0, 0);
return (error);
@@ -975,9 +936,7 @@ swap_on(p, sdp)
struct vnode *vp;
int error, npages, nblocks, size;
long addr;
-#ifdef SWAP_TO_FILES
struct vattr va;
-#endif
#if defined(NFSCLIENT)
extern int (**nfsv2_vnodeop_p) __P((void *));
#endif /* defined(NFSCLIENT) */
@@ -1027,7 +986,6 @@ swap_on(p, sdp)
}
break;
-#ifdef SWAP_TO_FILES
case VREG:
if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
goto bad;
@@ -1048,7 +1006,6 @@ swap_on(p, sdp)
#endif /* defined(NFSCLIENT) */
sdp->swd_maxactive = 8; /* XXX */
break;
-#endif
default:
error = ENXIO;
@@ -1065,7 +1022,7 @@ swap_on(p, sdp)
/*
* for block special files, we want to make sure that leave
* the disklabel and bootblocks alone, so we arrange to skip
- * over them (randomly choosing to skip PAGE_SIZE bytes).
+ * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
* note that because of this the "size" can be less than the
* actual number of blocks on the device.
*/
@@ -1105,11 +1062,6 @@ swap_on(p, sdp)
if (addr) {
if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
panic("disklabel region");
- sdp->swd_npginuse += addr;
- simple_lock(&uvm.swap_data_lock);
- uvmexp.swpginuse += addr;
- uvmexp.swpgonly += addr;
- simple_unlock(&uvm.swap_data_lock);
}
/*
@@ -1127,23 +1079,23 @@ swap_on(p, sdp)
sp = &mp->mnt_stat;
rootblocks = sp->f_blocks * btodb(sp->f_bsize);
rootpages = round_page(dbtob(rootblocks)) >> PAGE_SHIFT;
- if (rootpages > npages)
+ if (rootpages > size)
panic("swap_on: miniroot larger than swap?");
if (extent_alloc_region(sdp->swd_ex, addr,
rootpages, EX_WAITOK))
panic("swap_on: unable to preserve miniroot");
- simple_lock(&uvm.swap_data_lock);
- sdp->swd_npginuse += (rootpages - addr);
- uvmexp.swpginuse += (rootpages - addr);
- uvmexp.swpgonly += (rootpages - addr);
- simple_unlock(&uvm.swap_data_lock);
-
+ size -= rootpages;
printf("Preserved %d pages of miniroot ", rootpages);
- printf("leaving %d pages of swap\n", size - rootpages);
+ printf("leaving %d pages of swap\n", size);
}
+ /*
+ * add anons to reflect the new swap space
+ */
+ uvm_anon_add(size);
+
#ifdef UVM_SWAP_ENCRYPT
if (uvm_doswapencrypt)
uvm_swap_initcrypt(sdp, npages);
@@ -1153,49 +1105,11 @@ swap_on(p, sdp)
*/
simple_lock(&uvm.swap_data_lock);
swapdrum_add(sdp, npages);
- sdp->swd_npages = npages;
+ sdp->swd_npages = size;
sdp->swd_flags &= ~SWF_FAKE; /* going live */
sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
simple_unlock(&uvm.swap_data_lock);
- uvmexp.swpages += npages;
-
- /*
- * add anon's to reflect the swap space we added
- */
- uvm_anon_add(size);
-
-#if 0
- /*
- * At this point we could arrange to reserve memory for the
- * swap buffer pools.
- *
- * I don't think this is necessary, since swapping starts well
- * ahead of serious memory deprivation and the memory resource
- * pools hold on to actively used memory. This should ensure
- * we always have some resources to continue operation.
- */
-
- int s = splbio();
- int n = 8 * sdp->swd_maxactive;
-
- (void)pool_prime(&swapbuf_pool, n, 0);
-
- if (vp->v_type == VREG) {
- /* Allocate additional vnx and vnd buffers */
- /*
- * Allocation Policy:
- * (8 * swd_maxactive) vnx headers per swap dev
- * (16 * swd_maxactive) vnd buffers per swap dev
- */
-
- n = 8 * sdp->swd_maxactive;
- (void)pool_prime(&vndxfer_pool, n, 0);
-
- n = 16 * sdp->swd_maxactive;
- (void)pool_prime(&vndbuf_pool, n, 0);
- }
- splx(s);
-#endif
+ uvmexp.swpages += size;
return (0);
@@ -1208,66 +1122,84 @@ bad:
return (error);
}
-#ifdef SWAP_OFF_WORKS
/*
* swap_off: stop swapping on swapdev
*
- * XXXCDC: what conditions go here?
+ * => swap data should be locked, we will unlock.
*/
static int
swap_off(p, sdp)
struct proc *p;
struct swapdev *sdp;
{
- char *name;
+ void *name;
UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
+ UVMHIST_LOG(pdhist, " dev=%x", sdp->swd_dev,0,0,0);
- /* turn off the enable flag */
+ /* disable the swap area being removed */
sdp->swd_flags &= ~SWF_ENABLE;
-
- UVMHIST_LOG(pdhist, " dev=%x", sdp->swd_dev);
+ simple_unlock(&uvm.swap_data_lock);
/*
- * XXX write me
- *
- * the idea is to find out which processes are using this swap
- * device, and page them all in.
- *
- * eventually, we should try to move them out to other swap areas
- * if available.
- *
- * The alternative is to create a redirection map for this swap
- * device. This should work by moving all the pages of data from
- * the ex-swap device to another one, and making an entry in the
- * redirection map for it. locking is going to be important for
- * this!
- *
- * XXXCDC: also need to shrink anon pool
+ * the idea is to find all the pages that are paged out to this
+ * device, and page them all in. in uvm, swap-backed pageable
+ * memory can take two forms: aobjs and anons. call the
+ * swapoff hook for each subsystem to bring in pages.
*/
- /* until the above code is written, we must ENODEV */
- return ENODEV;
+ if (uao_swap_off(sdp->swd_drumoffset,
+ sdp->swd_drumoffset + sdp->swd_drumsize) ||
+ anon_swap_off(sdp->swd_drumoffset,
+ sdp->swd_drumoffset + sdp->swd_drumsize)) {
+
+ simple_lock(&uvm.swap_data_lock);
+ sdp->swd_flags |= SWF_ENABLE;
+ simple_unlock(&uvm.swap_data_lock);
+ return ENOMEM;
+ }
-#ifdef UVM_SWAP_ENCRYPT
- if (sdp->swd_decrypt) {
- free(sdp->swd_decrypt);
- memset(sdp->swd_keys, 0, (sdp->swd_npages >> SWD_KEY_SHIFT) * sizeof(struct swap_key));
- free(sdp->swd_keys);
+#ifdef DIAGNOSTIC
+ if (sdp->swd_npginuse != sdp->swd_npgbad) {
+ panic("swap_off: sdp %p - %d pages still in use (%d bad)\n",
+ sdp, sdp->swd_npginuse, sdp->swd_npgbad);
}
#endif
- extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK);
- name = sdp->swd_ex->ex_name;
- extent_destroy(sdp->swd_ex);
- free(name, M_VMSWAP);
- free((caddr_t)sdp->swd_ex, M_VMSWAP);
- if (sdp->swp_vp != rootvp)
+
+ /*
+ * done with the vnode.
+ */
+ if (sdp->swd_vp->v_type == VREG) {
+ crfree(sdp->swd_cred);
+ }
+ if (sdp->swd_vp != rootvp) {
(void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
- if (sdp->swd_vp)
+ }
+ if (sdp->swd_vp) {
vrele(sdp->swd_vp);
- free((caddr_t)sdp, M_VMSWAP);
+ }
+
+ /* remove anons from the system */
+ uvm_anon_remove(sdp->swd_npages);
+
+ simple_lock(&uvm.swap_data_lock);
+ uvmexp.swpages -= sdp->swd_npages;
+
+ if (swaplist_find(sdp->swd_vp, 1) == NULL)
+ panic("swap_off: swapdev not in list\n");
+ swaplist_trim();
+
+ /*
+ * free all resources!
+ */
+ extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize,
+ EX_WAITOK);
+ name = (void *)sdp->swd_ex->ex_name;
+ extent_destroy(sdp->swd_ex);
+ free(name, M_VMSWAP);
+ free(sdp, M_VMSWAP);
+ simple_unlock(&uvm.swap_data_lock);
return (0);
}
-#endif
/*
* /dev/drum interface and i/o functions
@@ -1339,23 +1271,23 @@ swstrategy(bp)
* convert drum page number to block number on this swapdev.
*/
- pageno = pageno - sdp->swd_drumoffset; /* page # on swapdev */
+ pageno -= sdp->swd_drumoffset; /* page # on swapdev */
bn = btodb(pageno << PAGE_SHIFT); /* convert to diskblock */
UVMHIST_LOG(pdhist, " %s: mapoff=%x bn=%x bcount=%ld\n",
((bp->b_flags & B_READ) == 0) ? "write" : "read",
sdp->swd_drumoffset, bn, bp->b_bcount);
-
/*
* for block devices we finish up here.
- * for regular files we have to do more work which we deligate
+ * for regular files we have to do more work which we delegate
* to sw_reg_strategy().
*/
switch (sdp->swd_vp->v_type) {
default:
panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
+
case VBLK:
/*
@@ -1369,19 +1301,17 @@ swstrategy(bp)
splx(s);
VOP_STRATEGY(bp);
return;
-#ifdef SWAP_TO_FILES
+
case VREG:
/*
- * deligate to sw_reg_strategy function.
+ * delegate to sw_reg_strategy function.
*/
sw_reg_strategy(sdp, bp, bn);
return;
-#endif
}
/* NOTREACHED */
}
-#ifdef SWAP_TO_FILES
/*
* sw_reg_strategy: handle swap i/o to regular files
*/
@@ -1432,7 +1362,7 @@ sw_reg_strategy(sdp, bp, bn)
error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
&vp, &nbn, &nra);
- if (error == 0 && (long)nbn == -1) {
+ if (error == 0 && nbn == (daddr_t)-1) {
/*
* this used to just set error, but that doesn't
* do the right thing. Instead, it causes random
@@ -1483,9 +1413,6 @@ sw_reg_strategy(sdp, bp, bn)
getvndbuf(nbp);
nbp->vb_buf.b_flags = bp->b_flags | B_CALL;
nbp->vb_buf.b_bcount = sz;
-#if 0
- nbp->vb_buf.b_bufsize = bp->b_bufsize; /* XXXCDC: really? */
-#endif
nbp->vb_buf.b_bufsize = sz;
nbp->vb_buf.b_error = 0;
nbp->vb_buf.b_data = addr;
@@ -1641,11 +1568,11 @@ sw_reg_iodone(bp)
}
/*
- * drop "hold" reference to vnode (if one)
- * XXXCDC: always set to NULLVP, this is useless, right?
+ * disassociate this buffer from the vnode (if any).
*/
- if (vbp->vb_buf.b_vp != NULLVP)
+ if (vbp->vb_buf.b_vp != NULLVP) {
brelvp(&vbp->vb_buf);
+ }
/*
* kill vbp structure
@@ -1683,10 +1610,8 @@ sw_reg_iodone(bp)
*/
sdp->swd_tab.b_active--;
sw_reg_start(sdp);
-
splx(s);
}
-#endif /* SWAP_TO_FILES */
/*
@@ -1721,11 +1646,11 @@ uvm_swap_alloc(nslots, lessok)
simple_lock(&uvm.swap_data_lock);
ReTry: /* XXXMRG */
- for (spp = swap_priority.lh_first; spp != NULL;
- spp = spp->spi_swappri.le_next) {
- for (sdp = spp->spi_swapdev.cqh_first;
+ for (spp = LIST_FIRST(&swap_priority); spp != NULL;
+ spp = LIST_NEXT(spp, spi_swappri)) {
+ for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
sdp != (void *)&spp->spi_swapdev;
- sdp = sdp->swd_next.cqe_next) {
+ sdp = CIRCLEQ_NEXT(sdp,swd_next)) {
/* if it's not enabled, then we can't swap from it */
if ((sdp->swd_flags & SWF_ENABLE) == 0)
continue;
@@ -1765,6 +1690,33 @@ ReTry: /* XXXMRG */
}
/*
+ * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
+ *
+ * => we lock uvm.swap_data_lock
+ */
+void
+uvm_swap_markbad(startslot, nslots)
+ int startslot;
+ int nslots;
+{
+ struct swapdev *sdp;
+ UVMHIST_FUNC("uvm_swap_markbad"); UVMHIST_CALLED(pdhist);
+
+ simple_lock(&uvm.swap_data_lock);
+ sdp = swapdrum_getsdp(startslot);
+
+ /*
+ * we just keep track of how many pages have been marked bad
+ * in this device, to make everything add up in swap_off().
+ * we assume here that the range of slots will all be within
+ * one swap device.
+ */
+ sdp->swd_npgbad += nslots;
+
+ simple_unlock(&uvm.swap_data_lock);
+}
+
+/*
* uvm_swap_free: free swap slots
*
* => this can be all or part of an allocation made by uvm_swap_alloc
@@ -1780,6 +1732,14 @@ uvm_swap_free(startslot, nslots)
UVMHIST_LOG(pdhist, "freeing %d slots starting at %d", nslots,
startslot, 0, 0);
+
+ /*
+ * ignore attempts to free the "bad" slot.
+ */
+ if (startslot == SWSLOT_BAD) {
+ return;
+ }
+
/*
* convert drum slot offset back to sdp, free the blocks
* in the extent, and return. must hold pri lock to do
@@ -1798,9 +1758,10 @@ uvm_swap_free(startslot, nslots)
}
#endif
if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
- EX_MALLOCOK|EX_NOWAIT) != 0)
- printf("warning: resource shortage: %d slots of swap lost\n",
+ EX_MALLOCOK|EX_NOWAIT) != 0) {
+ printf("warning: resource shortage: %d pages of swap lost\n",
nslots);
+ }
sdp->swd_npginuse -= nslots;
uvmexp.swpginuse -= nslots;
@@ -1840,10 +1801,6 @@ uvm_swap_put(swslot, ppsp, npages, flags)
{
int result;
-#if 0
- flags |= PGO_SYNCIO; /* XXXMRG: tmp, force sync */
-#endif
-
result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
@@ -1869,6 +1826,10 @@ uvm_swap_get(page, swslot, flags)
printf("uvm_swap_get: ASYNC get requested?\n");
#endif
+ if (swslot == SWSLOT_BAD) {
+ return VM_PAGER_ERROR;
+ }
+
/*
* this page is (about to be) no longer only in swap.
*/
@@ -2000,7 +1961,7 @@ uvm_swap_io(pps, startslot, npages, flags)
/* dispose of pages we dont use anymore */
opages = npages;
uvm_pager_dropcluster(NULL, NULL, pps, &opages,
- PGO_PDFREECLUST, 0);
+ PGO_PDFREECLUST);
kva = dstkva;
}
@@ -2075,6 +2036,9 @@ uvm_swap_io(pps, startslot, npages, flags)
if (swap_encrypt_initalized)
uvm_swap_markdecrypt(sdp, startslot, npages, encrypt);
#endif
+ s = splbio();
+ swapdev_vp->v_numoutput++;
+ splx(s);
}
/*
@@ -2251,7 +2215,7 @@ uvm_swap_aiodone(aio)
else
#endif /* UVM_SWAP_ENCRYPT */
uvm_pager_dropcluster(NULL, NULL, pps, &aio->npages,
- PGO_PDFREECLUST, 0);
+ PGO_PDFREECLUST);
/*
* finally, we can dispose of the swapbuf
diff --git a/sys/uvm/uvm_swap.h b/sys/uvm/uvm_swap.h
index bbbbe2af391..3c9e9ad0311 100644
--- a/sys/uvm/uvm_swap.h
+++ b/sys/uvm/uvm_swap.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_swap.h,v 1.7 2001/03/22 03:05:57 smart Exp $ */
-/* $NetBSD: uvm_swap.h,v 1.4 1999/06/21 17:25:12 thorpej Exp $ */
+/* $OpenBSD: uvm_swap.h,v 1.8 2001/07/26 19:37:13 art Exp $ */
+/* $NetBSD: uvm_swap.h,v 1.5 2000/01/11 06:57:51 chs Exp $ */
/*
* Copyright (c) 1997 Matthew R. Green
@@ -34,13 +34,15 @@
#ifndef _UVM_UVM_SWAP_H_
#define _UVM_UVM_SWAP_H_
+#define SWSLOT_BAD (-1)
+
#ifdef _KERNEL
int uvm_swap_get __P((struct vm_page *, int, int));
int uvm_swap_put __P((int, struct vm_page **, int, int));
int uvm_swap_alloc __P((int *, boolean_t));
void uvm_swap_free __P((int, int));
-
+void uvm_swap_markbad __P((int, int));
#ifdef UVM_SWAP_ENCRYPT
void uvm_swap_initcrypt_all __P((void));
#endif