summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-11-10 18:42:33 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-11-10 18:42:33 +0000
commitbbe8f62ec65096ee1bb8589c0f3ade29b85553c4 (patch)
tree4e43b942be0409169ada8cef2d0708a064d84fd9 /sys
parent89424e542479723fbb4744a2c96edff04f29fb24 (diff)
Merge in some parts of the ubc work that has been done in NetBSD that are not
UBC, but prerequsites for it. - Create a daemon that processes async I/O (swap and paging in the future) requests that need processing in process context and that were processed in the pagedaemon before. - Convert some ugly ifdef DIAGNOSTIC code to less intrusive KASSERTs. - misc other cleanups.
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/init_main.c8
-rw-r--r--sys/uvm/uvm.h15
-rw-r--r--sys/uvm/uvm_extern.h11
-rw-r--r--sys/uvm/uvm_page.c163
-rw-r--r--sys/uvm/uvm_page.h80
-rw-r--r--sys/uvm/uvm_pager.c309
-rw-r--r--sys/uvm/uvm_pager.h37
-rw-r--r--sys/uvm/uvm_pdaemon.c312
-rw-r--r--sys/uvm/uvm_swap.c249
-rw-r--r--sys/uvm/uvm_swap.h3
-rw-r--r--sys/uvm/uvm_vnode.c6
11 files changed, 667 insertions, 526 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index e41148c9f95..c909a23141b 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_main.c,v 1.83 2001/11/07 01:18:01 art Exp $ */
+/* $OpenBSD: init_main.c,v 1.84 2001/11/10 18:42:31 art Exp $ */
/* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */
/*
@@ -417,8 +417,12 @@ main(framep)
if (kthread_create(start_update, NULL, NULL, "update"))
panic("fork update");
+ /* Create process 6, the aiodone daemon kernel thread. */
+ if (kthread_create(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
+ panic("fork aiodoned");
+
#ifdef CRYPTO
- /* Create process 6, the crypto kernel thread. */
+ /* Create process 7, the crypto kernel thread. */
if (kthread_create(start_crypto, NULL, NULL, "crypto"))
panic("crypto thread");
#endif /* CRYPTO */
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 1a61e80df34..f3ff4214dd5 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm.h,v 1.13 2001/11/05 22:14:54 art Exp $ */
-/* $NetBSD: uvm.h,v 1.23 2000/06/26 14:21:16 mrg Exp $ */
+/* $OpenBSD: uvm.h,v 1.14 2001/11/10 18:42:31 art Exp $ */
+/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
*
@@ -74,6 +74,7 @@
struct uvm {
/* vm_page related parameters */
+
/* vm_page queues */
struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
struct pglist page_active; /* allocated pages, in use */
@@ -84,10 +85,17 @@ struct uvm {
boolean_t page_init_done; /* TRUE if uvm_page_init() finished */
boolean_t page_idle_zero; /* TRUE if we should try to zero
pages in the idle loop */
+
/* page daemon trigger */
int pagedaemon; /* daemon sleeps on this */
struct proc *pagedaemon_proc; /* daemon's pid */
simple_lock_data_t pagedaemon_lock;
+
+ /* aiodone daemon trigger */
+ int aiodoned; /* daemon sleeps on this */
+ struct proc *aiodoned_proc; /* daemon's pid */
+ simple_lock_data_t aiodoned_lock;
+
/* page hash */
struct pglist *page_hash; /* page hash table (vp/off->page) */
int page_nhash; /* number of buckets */
@@ -103,7 +111,7 @@ struct uvm {
simple_lock_data_t kentry_lock;
/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
- struct uvm_aiohead aio_done; /* done async i/o reqs */
+ TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */
/* pager VM area bounds */
vaddr_t pager_sva; /* start of pager VA area */
@@ -143,6 +151,7 @@ extern struct uvm uvm;
UVMHIST_DECL(maphist);
UVMHIST_DECL(pdhist);
+UVMHIST_DECL(ubchist);
/*
* UVM_UNLOCK_AND_WAIT: atomic unlock+wait... wrapper around the
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 24102f0437c..9dd59bd23a8 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.31 2001/11/09 03:32:23 art Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.32 2001/11/10 18:42:31 art Exp $ */
/* $NetBSD: uvm_extern.h,v 1.52 2000/11/27 04:36:40 nisimura Exp $ */
/*
@@ -246,8 +246,11 @@ struct vm_map;
struct vmspace;
struct pmap;
struct vnode;
+struct pool;
struct simplelock;
+extern struct pool *uvm_aiobuf_pool;
+
/*
* uvmexp: global data structures that are exported to parts of the kernel
* other than the vm system.
@@ -549,8 +552,14 @@ void uvm_page_physload __P((paddr_t, paddr_t,
paddr_t, paddr_t, int));
void uvm_setpagesize __P((void));
+/* uvm_pager.c */
+void uvm_aio_biodone1 __P((struct buf *));
+void uvm_aio_biodone __P((struct buf *));
+void uvm_aio_aiodone __P((struct buf *));
+
/* uvm_pdaemon.c */
void uvm_pageout __P((void *));
+void uvm_aiodone_daemon __P((void *));
/* uvm_pglist.c */
int uvm_pglistalloc __P((psize_t, paddr_t,
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 4e46822894a..3207e33020c 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_page.c,v 1.29 2001/11/07 02:55:50 art Exp $ */
-/* $NetBSD: uvm_page.c,v 1.43 2000/11/09 19:15:28 christos Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.30 2001/11/10 18:42:31 art Exp $ */
+/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -75,6 +75,7 @@
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/sched.h>
+#include <sys/kernel.h>
#define UVM_PAGE /* pull in uvm_page.h functions */
#include <uvm/uvm.h>
@@ -96,8 +97,18 @@ int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
* We therefore provide a way to disable it from machdep code here.
*/
+/*
+ * XXX disabled until we can find a way to do this without causing
+ * problems for either cpu caches or DMA latency.
+ */
boolean_t vm_page_zero_enable = FALSE;
+#ifdef UBC
+u_long uvm_pgcnt_anon;
+u_long uvm_pgcnt_vnode;
+extern struct uvm_pagerops uvm_vnodeops;
+#endif
+
/*
* local variables
*/
@@ -124,7 +135,7 @@ static struct pglist uvm_bootbucket;
*/
static void uvm_pageinsert __P((struct vm_page *));
-
+static void uvm_pageremove __P((struct vm_page *));
/*
* inline functions
@@ -161,7 +172,6 @@ uvm_pageinsert(pg)
TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
pg->flags |= PG_TABLED;
pg->uobject->uo_npages++;
-
}
/*
@@ -171,21 +181,14 @@ uvm_pageinsert(pg)
* => caller must lock page queues
*/
-void __inline
+static __inline void
uvm_pageremove(pg)
struct vm_page *pg;
{
struct pglist *buck;
int s;
-#ifdef DIAGNOSTIC
- if ((pg->flags & (PG_FAULTING)) != 0)
- panic("uvm_pageremove: page is faulting");
-#endif
-
- if ((pg->flags & PG_TABLED) == 0)
- return; /* XXX: log */
-
+ KASSERT(pg->flags & PG_TABLED);
buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
s = splimp();
simple_lock(&uvm.hashlock);
@@ -193,6 +196,12 @@ uvm_pageremove(pg)
simple_unlock(&uvm.hashlock);
splx(s);
+#ifdef UBC
+ if (pg->uobject->pgops == &uvm_vnodeops) {
+ uvm_pgcnt_vnode--;
+ }
+#endif
+
/* object should be locked */
TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
@@ -200,7 +209,6 @@ uvm_pageremove(pg)
pg->uobject->uo_npages--;
pg->uobject = NULL;
pg->version++;
-
}
/*
@@ -218,7 +226,6 @@ uvm_page_init(kvm_startp, kvm_endp)
int lcv, i;
paddr_t paddr;
-
/*
* step 1: init the page queues and page queue locks
*/
@@ -239,7 +246,7 @@ uvm_page_init(kvm_startp, kvm_endp)
*/
uvm.page_nhash = 1; /* 1 bucket */
- uvm.page_hashmask = 0; /* mask for hash function */
+ uvm.page_hashmask = 0; /* mask for hash function */
uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
TAILQ_INIT(uvm.page_hash); /* init hash table */
simple_lock_init(&uvm.hashlock); /* init hash table lock */
@@ -292,7 +299,6 @@ uvm_page_init(kvm_startp, kvm_endp)
*/
for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
-
n = vm_physmem[lcv].end - vm_physmem[lcv].start;
if (n > pagecount) {
printf("uvm_page_init: lost %ld page(s) in init\n",
@@ -318,6 +324,7 @@ uvm_page_init(kvm_startp, kvm_endp)
}
}
}
+
/*
* step 5: pass up the values of virtual_space_start and
* virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
@@ -328,10 +335,11 @@ uvm_page_init(kvm_startp, kvm_endp)
*kvm_endp = trunc_page(virtual_space_end);
/*
- * step 6: init pagedaemon lock
+ * step 6: init locks for kernel threads
*/
simple_lock_init(&uvm.pagedaemon_lock);
+ simple_lock_init(&uvm.aiodoned_lock);
/*
* step 7: init reserve thresholds
@@ -343,10 +351,6 @@ uvm_page_init(kvm_startp, kvm_endp)
/*
* step 8: determine if we should zero pages in the idle
* loop.
- *
- * XXXJRT - might consider zero'ing up to the target *now*,
- * but that could take an awfully long time if you
- * have a lot of memory.
*/
uvm.page_idle_zero = vm_page_zero_enable;
@@ -361,7 +365,6 @@ uvm_page_init(kvm_startp, kvm_endp)
* uvm_setpagesize: set the page size
*
* => sets page_shift and page_mask from uvmexp.pagesize.
- * => XXXCDC: move global vars.
*/
void
@@ -892,22 +895,26 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
struct pgfreelist *pgfl;
boolean_t use_reserve;
-#ifdef DIAGNOSTIC
- /* sanity check */
- if (obj && anon)
- panic("uvm_pagealloc: obj and anon != NULL");
-#endif
-
- s = uvm_lock_fpageq(); /* lock free page queue */
+ KASSERT(obj == NULL || anon == NULL);
+ KASSERT(off == trunc_page(off));
+ s = uvm_lock_fpageq();
/*
* check to see if we need to generate some free pages waking
* the pagedaemon.
*/
+#ifdef UBC
+ if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
+ (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
+ uvmexp.inactive < uvmexp.inactarg)) {
+ wakeup(&uvm.pagedaemon);
+ }
+#else
if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
uvmexp.inactive < uvmexp.inactarg))
wakeup(&uvm.pagedaemon);
+#endif
/*
* fail if any of these conditions is true:
@@ -961,11 +968,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
case UVM_PGA_STRAT_ONLY:
case UVM_PGA_STRAT_FALLBACK:
/* Attempt to allocate from the specified free list. */
-#ifdef DIAGNOSTIC
- if (free_list >= VM_NFREELIST || free_list < 0)
- panic("uvm_pagealloc_strat: bad free list %d",
- free_list);
-#endif
+ KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
pgfl = &uvm.page_free[free_list];
if ((pg = TAILQ_FIRST((freeq =
&pgfl->pgfl_queues[try1]))) != NULL ||
@@ -1016,11 +1019,12 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
pg->uanon = anon;
pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
pg->version++;
- pg->wire_count = 0;
- pg->loan_count = 0;
if (anon) {
anon->u.an_page = pg;
pg->pqflags = PQ_ANON;
+#ifdef UBC
+ uvm_pgcnt_anon++;
+#endif
} else {
if (obj)
uvm_pageinsert(pg);
@@ -1121,8 +1125,6 @@ uvm_pagerealloc(pg, newobj, newoff)
pg->version++;
uvm_pageinsert(pg);
}
-
- return;
}
@@ -1136,14 +1138,20 @@ uvm_pagerealloc(pg, newobj, newoff)
* => assumes all valid mappings of pg are gone
*/
-void uvm_pagefree(pg)
-
-struct vm_page *pg;
-
+void
+uvm_pagefree(pg)
+ struct vm_page *pg;
{
int s;
int saved_loan_count = pg->loan_count;
+#ifdef DEBUG
+ if (pg->uobject == (void *)0xdeadbeef &&
+ pg->uanon == (void *)0xdeadbeef) {
+ panic("uvm_pagefree: freeing free page %p\n", pg);
+ }
+#endif
+
/*
* if the page was an object page (and thus "TABLED"), remove it
* from the object.
@@ -1152,7 +1160,7 @@ struct vm_page *pg;
if (pg->flags & PG_TABLED) {
/*
- * if the object page is on loan we are going to drop ownership.
+ * if the object page is on loan we are going to drop ownership.
* it is possible that an anon will take over as owner for this
* page later on. the anon will want a !PG_CLEAN page so that
* it knows it needs to allocate swap if it wants to page the
@@ -1161,7 +1169,6 @@ struct vm_page *pg;
if (saved_loan_count)
pg->flags &= ~PG_CLEAN; /* in case an anon takes over */
-
uvm_pageremove(pg);
/*
@@ -1172,9 +1179,9 @@ struct vm_page *pg;
* return (when the last loan is dropped, then the page can be
* freed by whatever was holding the last loan).
*/
+
if (saved_loan_count)
return;
-
} else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
/*
@@ -1184,19 +1191,12 @@ struct vm_page *pg;
* note that the kernel can't change the loan status of our
* page as long as we are holding PQ lock.
*/
+
pg->pqflags &= ~PQ_ANON;
pg->uanon = NULL;
return;
}
-
-#ifdef DIAGNOSTIC
- if (saved_loan_count) {
- printf("uvm_pagefree: warning: freeing page with a loan "
- "count of %d\n", saved_loan_count);
- panic("uvm_pagefree: loan count");
- }
-#endif
-
+ KASSERT(saved_loan_count == 0);
/*
* now remove the page from the queues
@@ -1219,13 +1219,19 @@ struct vm_page *pg;
/*
* if the page was wired, unwire it now.
*/
+
if (pg->wire_count) {
pg->wire_count = 0;
uvmexp.wired--;
}
+#ifdef UBC
+ if (pg->uanon) {
+ uvm_pgcnt_anon--;
+ }
+#endif
/*
- * and put on free queue
+ * and put on free queue
*/
pg->flags &= ~PG_ZERO;
@@ -1247,6 +1253,51 @@ struct vm_page *pg;
uvm_unlock_fpageq(s);
}
+/*
+ * uvm_page_unbusy: unbusy an array of pages.
+ *
+ * => pages must either all belong to the same object, or all belong to anons.
+ * => if pages are object-owned, object must be locked.
+ * => if pages are anon-owned, anons must be unlockd and have 0 refcount.
+ */
+
+void
+uvm_page_unbusy(pgs, npgs)
+ struct vm_page **pgs;
+ int npgs;
+{
+ struct vm_page *pg;
+ struct uvm_object *uobj;
+ int i;
+ UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
+
+ for (i = 0; i < npgs; i++) {
+ pg = pgs[i];
+
+ if (pg == NULL) {
+ continue;
+ }
+ if (pg->flags & PG_WANTED) {
+ wakeup(pg);
+ }
+ if (pg->flags & PG_RELEASED) {
+ UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
+ uobj = pg->uobject;
+ if (uobj != NULL) {
+ uobj->pgops->pgo_releasepg(pg, NULL);
+ } else {
+ pg->flags &= ~(PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ uvm_anfree(pg->uanon);
+ }
+ } else {
+ UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ }
+ }
+}
+
#if defined(UVM_PAGE_TRKOWN)
/*
* uvm_page_own: set or release page ownership
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index 564504a205f..aa5991137c2 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_page.h,v 1.11 2001/11/07 02:55:50 art Exp $ */
-/* $NetBSD: uvm_page.h,v 1.17 2000/10/03 20:50:49 mrg Exp $ */
+/* $OpenBSD: uvm_page.h,v 1.12 2001/11/10 18:42:31 art Exp $ */
+/* $NetBSD: uvm_page.h,v 1.18 2000/11/27 08:40:05 chs Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -119,27 +119,27 @@
#include <uvm/uvm_pglist.h>
struct vm_page {
- TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
- * queue or free list (P) */
- TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
- TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
-
- struct vm_anon *uanon; /* anon (O,P) */
- struct uvm_object *uobject; /* object (O,P) */
- voff_t offset; /* offset into object (O,P) */
-
- u_short flags; /* object flags [O] */
- u_short version; /* version count [O] */
- u_short wire_count; /* wired down map refs [P] */
- u_short pqflags; /* page queue flags [P] */
- u_int loan_count; /* number of active loans
- * to read: [O or P]
- * to modify: [O _and_ P] */
- paddr_t phys_addr; /* physical address of page */
+ TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
+ * queue or free list (P) */
+ TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
+ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
+
+ struct vm_anon *uanon; /* anon (O,P) */
+ struct uvm_object *uobject; /* object (O,P) */
+ voff_t offset; /* offset into object (O,P) */
+
+ u_short flags; /* object flags [O] */
+ u_short version; /* version count [O] */
+ u_short wire_count; /* wired down map refs [P] */
+ u_short pqflags; /* page queue flags [P] */
+ u_int loan_count; /* number of active loans
+ * to read: [O or P]
+ * to modify: [O _and_ P] */
+ paddr_t phys_addr; /* physical address of page */
#if defined(UVM_PAGE_TRKOWN)
- /* debugging fields to track page ownership */
- pid_t owner; /* proc that set PG_BUSY */
- char *owner_tag; /* why it was set busy */
+ /* debugging fields to track page ownership */
+ pid_t owner; /* proc that set PG_BUSY */
+ char *owner_tag; /* why it was set busy */
#endif
};
@@ -158,25 +158,23 @@ struct vm_page {
* PG_ZERO is used to indicate that a page has been pre-zero'd. This flag
* is only set when the page is on no queues, and is cleared when the page
* is placed on the free list.
- *
- * possible deadwood: PG_FAULTING, PQ_LAUNDRY
*/
+
+#define PG_BUSY 0x0001 /* page is locked */
+#define PG_WANTED 0x0002 /* someone is waiting for page */
+#define PG_TABLED 0x0004 /* page is in VP table */
#define PG_CLEAN 0x0008 /* page has not been modified */
-#define PG_BUSY 0x0010 /* page is in transit */
-#define PG_WANTED 0x0020 /* someone is waiting for page */
-#define PG_TABLED 0x0040 /* page is in VP table */
-#define PG_ZERO 0x0100 /* page is pre-zero'd */
-#define PG_FAKE 0x0200 /* page is placeholder for pagein */
-#define PG_FILLED 0x0400 /* client flag to set when filled */
-#define PG_DIRTY 0x0800 /* client flag to set when dirty */
-#define PG_RELEASED 0x1000 /* page released while paging */
-#define PG_FAULTING 0x2000 /* page is being faulted in */
-#define PG_CLEANCHK 0x4000 /* clean bit has been checked */
+#define PG_CLEANCHK 0x0010 /* clean bit has been checked */
+#define PG_RELEASED 0x0020 /* page released while paging */
+#define PG_FAKE 0x0040 /* page is not yet initialized */
+#define PG_RDONLY 0x0080 /* page must be mapped read-only */
+#define PG_ZERO 0x0100 /* page is pre-zero'd */
+
+#define PG_PAGER1 0x1000 /* pager-specific flag */
#define PQ_FREE 0x0001 /* page is on free list */
#define PQ_INACTIVE 0x0002 /* page is in inactive list */
#define PQ_ACTIVE 0x0004 /* page is in active list */
-#define PQ_LAUNDRY 0x0008 /* page is being cleaned now */
#define PQ_ANON 0x0010 /* page is part of an anon, rather
than an uvm_object */
#define PQ_AOBJ 0x0020 /* page is part of an anonymous
@@ -239,12 +237,9 @@ extern boolean_t vm_page_zero_enable;
* ordered, in LRU-like fashion.
*/
-extern
-struct pglist vm_page_queue_free; /* memory free queue */
-extern
-struct pglist vm_page_queue_active; /* active memory queue */
-extern
-struct pglist vm_page_queue_inactive; /* inactive memory queue */
+extern struct pglist vm_page_queue_free; /* memory free queue */
+extern struct pglist vm_page_queue_active; /* active memory queue */
+extern struct pglist vm_page_queue_inactive; /* inactive memory queue */
/*
* physical memory config is stored in vm_physmem.
@@ -285,9 +280,8 @@ vaddr_t uvm_pageboot_alloc __P((vsize_t));
PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *));
PAGE_INLINE void uvm_pagedeactivate __P((struct vm_page *));
void uvm_pagefree __P((struct vm_page *));
+void uvm_page_unbusy __P((struct vm_page **, int));
PAGE_INLINE struct vm_page *uvm_pagelookup __P((struct uvm_object *, voff_t));
-void uvm_pageremove __P((struct vm_page *));
-/* uvm_pagerename: not needed */
PAGE_INLINE void uvm_pageunwire __P((struct vm_page *));
PAGE_INLINE void uvm_pagewait __P((struct vm_page *, int));
PAGE_INLINE void uvm_pagewake __P((struct vm_page *));
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index 29e305c57b3..57cd0b4fc3a 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_pager.c,v 1.20 2001/11/07 02:55:50 art Exp $ */
-/* $NetBSD: uvm_pager.c,v 1.34 2000/11/24 22:41:39 chs Exp $ */
+/* $OpenBSD: uvm_pager.c,v 1.21 2001/11/10 18:42:31 art Exp $ */
+/* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */
/*
*
@@ -39,25 +39,36 @@
* uvm_pager.c: generic functions used to assist the pagers.
*/
+#define UVM_PAGER
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
+#include <sys/pool.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
-#define UVM_PAGER
#include <uvm/uvm.h>
+struct pool *uvm_aiobuf_pool;
+
/*
* list of uvm pagers in the system
*/
extern struct uvm_pagerops uvm_deviceops;
extern struct uvm_pagerops uvm_vnodeops;
+#ifdef UBC
+extern struct uvm_pagerops ubc_pager;
+#endif
struct uvm_pagerops *uvmpagerops[] = {
&aobj_pager,
&uvm_deviceops,
&uvm_vnodeops,
+#ifdef UBC
+ &ubc_pager,
+#endif
};
/*
@@ -67,7 +78,8 @@ struct uvm_pagerops *uvmpagerops[] = {
vm_map_t pager_map; /* XXX */
simple_lock_data_t pager_map_wanted_lock;
boolean_t pager_map_wanted; /* locked by pager map */
-
+static vaddr_t emergva;
+static boolean_t emerginuse;
/*
* uvm_pager_init: init pagers (at boot time)
@@ -82,10 +94,12 @@ uvm_pager_init()
* init pager map
*/
- pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
- PAGER_MAP_SIZE, 0, FALSE, NULL);
- simple_lock_init(&pager_map_wanted_lock);
- pager_map_wanted = FALSE;
+ pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
+ PAGER_MAP_SIZE, 0, FALSE, NULL);
+ simple_lock_init(&pager_map_wanted_lock);
+ pager_map_wanted = FALSE;
+ emergva = uvm_km_valloc(kernel_map, MAXBSIZE);
+ emerginuse = FALSE;
/*
* init ASYNC I/O queue
@@ -111,22 +125,19 @@ uvm_pager_init()
*/
vaddr_t
-uvm_pagermapin(pps, npages, aiop, flags)
+uvm_pagermapin(pps, npages, flags)
struct vm_page **pps;
int npages;
- struct uvm_aiodesc **aiop; /* OUT */
int flags;
{
vsize_t size;
vaddr_t kva;
- struct uvm_aiodesc *aio;
vaddr_t cva;
struct vm_page *pp;
vm_prot_t prot;
UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist);
- UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d, aiop=0x%x, flags=0x%x)",
- pps, npages, aiop, flags);
+ UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d)", pps, npages,0,0);
/*
* compute protection. outgoing I/O only needs read
@@ -138,24 +149,26 @@ uvm_pagermapin(pps, npages, aiop, flags)
prot |= VM_PROT_WRITE;
ReStart:
- if (aiop) {
- MALLOC(aio, struct uvm_aiodesc *, sizeof(*aio), M_TEMP,
- (flags & UVMPAGER_MAPIN_WAITOK));
- if (aio == NULL)
- return(0);
- *aiop = aio;
- } else {
- aio = NULL;
- }
-
size = npages << PAGE_SHIFT;
kva = 0; /* let system choose VA */
if (uvm_map(pager_map, &kva, size, NULL,
UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
+ if (curproc == uvm.pagedaemon_proc) {
+ simple_lock(&pager_map_wanted_lock);
+ if (emerginuse) {
+ UVM_UNLOCK_AND_WAIT(&emergva,
+ &pager_map_wanted_lock, FALSE,
+ "emergva", 0);
+ goto ReStart;
+ }
+ emerginuse = TRUE;
+ simple_unlock(&pager_map_wanted_lock);
+ kva = emergva;
+ KASSERT(npages <= MAXBSIZE >> PAGE_SHIFT);
+ goto enter;
+ }
if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
- if (aio)
- FREE(aio, M_TEMP);
UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0);
return(0);
}
@@ -163,16 +176,17 @@ ReStart:
pager_map_wanted = TRUE;
UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0);
UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE,
- "pager_map",0);
+ "pager_map", 0);
goto ReStart;
}
+enter:
/* got it */
for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) {
pp = *pps++;
#ifdef DEBUG
if ((pp->flags & PG_BUSY) == 0)
- panic("uvm_pagermapin: page not busy");
+ panic("uvm_pagermapin: pg %p not busy", pp);
#endif
pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp),
prot, PMAP_WIRED | prot);
@@ -197,13 +211,22 @@ uvm_pagermapout(kva, npages)
vsize_t size = npages << PAGE_SHIFT;
vm_map_entry_t entries;
UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist);
-
+
UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0);
/*
* duplicate uvm_unmap, but add in pager_map_wanted handling.
*/
+ if (kva == emergva) {
+ simple_lock(&pager_map_wanted_lock);
+ emerginuse = FALSE;
+ wakeup(&emergva);
+ simple_unlock(&pager_map_wanted_lock);
+ entries = NULL;
+ goto remove;
+ }
+
vm_map_lock(pager_map);
(void) uvm_unmap_remove(pager_map, kva, kva + size, &entries);
simple_lock(&pager_map_wanted_lock);
@@ -213,6 +236,8 @@ uvm_pagermapout(kva, npages)
}
simple_unlock(&pager_map_wanted_lock);
vm_map_unlock(pager_map);
+remove:
+ pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT));
if (entries)
uvm_unmap_detach(entries, 0);
@@ -250,7 +275,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
{
struct vm_page **ppsp, *pclust;
voff_t lo, hi, curoff;
- int center_idx, forward;
+ int center_idx, forward, incr;
UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist);
/*
@@ -272,9 +297,11 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
if (hi > mhi)
hi = mhi;
}
- if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */
+ if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */
#ifdef DIAGNOSTIC
- printf("uvm_mk_pcluster: provided page array too small (fixed)\n");
+ printf("uvm_mk_pcluster uobj %p npages %d lo 0x%llx hi 0x%llx "
+ "flags 0x%x\n", uobj, *npages, (long long)lo,
+ (long long)hi, flags);
#endif
pps[0] = center;
*npages = 1;
@@ -290,7 +317,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
pps[center_idx] = center; /* plug in the center page */
ppsp = &pps[center_idx];
*npages = 1;
-
+
/*
* attempt to cluster around the left [backward], and then
* the right side [forward].
@@ -302,21 +329,23 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
*/
for (forward = 0 ; forward <= 1 ; forward++) {
-
- curoff = center->offset + (forward ? PAGE_SIZE : -PAGE_SIZE);
+ incr = forward ? PAGE_SIZE : -PAGE_SIZE;
+ curoff = center->offset + incr;
for ( ;(forward == 0 && curoff >= lo) ||
(forward && curoff < hi);
- curoff += (forward ? 1 : -1) << PAGE_SHIFT) {
+ curoff += incr) {
pclust = uvm_pagelookup(uobj, curoff); /* lookup page */
- if (pclust == NULL)
+ if (pclust == NULL) {
break; /* no page */
+ }
/* handle active pages */
/* NOTE: inactive pages don't have pmap mappings */
if ((pclust->pqflags & PQ_INACTIVE) == 0) {
- if ((flags & PGO_DOACTCLUST) == 0)
+ if ((flags & PGO_DOACTCLUST) == 0) {
/* dont want mapped pages at all */
break;
+ }
/* make sure "clean" bit is sync'd */
if ((pclust->flags & PG_CLEANCHK) == 0) {
@@ -328,13 +357,16 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
pclust->flags |= PG_CLEANCHK;
}
}
+
/* is page available for cleaning and does it need it */
- if ((pclust->flags & (PG_CLEAN|PG_BUSY)) != 0)
+ if ((pclust->flags & (PG_CLEAN|PG_BUSY)) != 0) {
break; /* page is already clean or is busy */
+ }
/* yes! enroll the page in our array */
pclust->flags |= PG_BUSY; /* busy! */
UVM_PAGE_OWN(pclust, "uvm_mk_pcluster");
+
/* XXX: protect wired page? see above comment. */
pmap_page_protect(pclust, VM_PROT_READ);
if (!forward) {
@@ -344,7 +376,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
/* move forward one page */
ppsp[*npages] = pclust;
}
- *npages = *npages + 1;
+ (*npages)++;
}
}
@@ -407,6 +439,7 @@ uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop)
int result;
daddr_t swblk;
struct vm_page **ppsp = *ppsp_ptr;
+ UVMHIST_FUNC("uvm_pager_put"); UVMHIST_CALLED(ubchist);
/*
* note that uobj is null if we are doing a swap-backed pageout.
@@ -457,12 +490,12 @@ uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop)
ReTry:
if (uobj) {
/* object is locked */
- result = uobj->pgops->pgo_put(uobj, ppsp, *npages,
- flags & PGO_SYNCIO);
+ result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags);
+ UVMHIST_LOG(ubchist, "put -> %d", result, 0,0,0);
/* object is now unlocked */
} else {
/* nothing locked */
- result = uvm_swap_put(swblk, ppsp, *npages, flags & PGO_SYNCIO);
+ result = uvm_swap_put(swblk, ppsp, *npages, flags);
/* nothing locked */
}
@@ -498,9 +531,9 @@ ReTry:
}
/*
- * a pager error occurred.
- * for transient errors, drop to a cluster of 1 page ("pg")
- * and try again. for hard errors, don't bother retrying.
+ * a pager error occured (even after dropping the cluster, if there
+ * was one). give up! the caller only has one page ("pg")
+ * to worry about.
*/
if (*npages > 1 || pg == NULL) {
@@ -608,7 +641,8 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
for (lcv = 0 ; lcv < *npages ; lcv++) {
- if (ppsp[lcv] == pg) /* skip "pg" */
+ /* skip "pg" or empty slot */
+ if (ppsp[lcv] == pg || ppsp[lcv] == NULL)
continue;
/*
@@ -635,9 +669,10 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
}
/* did someone want the page while we had it busy-locked? */
- if (ppsp[lcv]->flags & PG_WANTED)
+ if (ppsp[lcv]->flags & PG_WANTED) {
/* still holding obj lock */
wakeup(ppsp[lcv]);
+ }
/* if page was released, release it. otherwise un-busy it */
if (ppsp[lcv]->flags & PG_RELEASED) {
@@ -688,7 +723,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
continue; /* next page */
} else {
- ppsp[lcv]->flags &= ~(PG_BUSY|PG_WANTED);
+ ppsp[lcv]->flags &= ~(PG_BUSY|PG_WANTED|PG_FAKE);
UVM_PAGE_OWN(ppsp[lcv], NULL);
}
@@ -711,3 +746,181 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
}
}
}
+
+#ifdef UBC
+/*
+ * interrupt-context iodone handler for nested i/o bufs.
+ *
+ * => must be at splbio().
+ */
+
+void
+uvm_aio_biodone1(bp)
+ struct buf *bp;
+{
+ struct buf *mbp = bp->b_private;
+
+ KASSERT(mbp != bp);
+ if (bp->b_flags & B_ERROR) {
+ mbp->b_flags |= B_ERROR;
+ mbp->b_error = bp->b_error;
+ }
+ mbp->b_resid -= bp->b_bcount;
+ pool_put(&bufpool, bp);
+ if (mbp->b_resid == 0) {
+ biodone(mbp);
+ }
+}
+#endif
+
+/*
+ * interrupt-context iodone handler for single-buf i/os
+ * or the top-level buf of a nested-buf i/o.
+ *
+ * => must be at splbio().
+ */
+
+void
+uvm_aio_biodone(bp)
+ struct buf *bp;
+{
+ /* reset b_iodone for when this is a single-buf i/o. */
+ bp->b_iodone = uvm_aio_aiodone;
+
+ simple_lock(&uvm.aiodoned_lock); /* locks uvm.aio_done */
+ TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist);
+ wakeup(&uvm.aiodoned);
+ simple_unlock(&uvm.aiodoned_lock);
+}
+
+/*
+ * uvm_aio_aiodone: do iodone processing for async i/os.
+ * this should be called in thread context, not interrupt context.
+ */
+
+void
+uvm_aio_aiodone(bp)
+ struct buf *bp;
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vm_page *pg, *pgs[npages];
+ struct uvm_object *uobj;
+ int s, i;
+ boolean_t release, write, swap;
+ UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
+
+ release = (bp->b_flags & (B_ERROR|B_READ)) == (B_ERROR|B_READ);
+ write = (bp->b_flags & B_READ) == 0;
+#ifdef UBC
+ /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
+ if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
+ (*bioops.io_pageiodone)(bp);
+ }
+#endif
+
+ uobj = NULL;
+ for (i = 0; i < npages; i++) {
+ pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ }
+ uvm_pagermapout((vaddr_t)bp->b_data, npages);
+#ifdef UVM_SWAP_ENCRYPT
+ /*
+ * XXX - assumes that we only get ASYNC writes. used to be above.
+ */
+ if (pgs[0]->pqflags & PQ_ENCRYPT) {
+ uvm_swap_freepages(pgs, npages);
+ goto freed;
+ }
+#endif /* UVM_SWAP_ENCRYPT */
+ for (i = 0; i < npages; i++) {
+ pg = pgs[i];
+
+ if (i == 0) {
+ swap = (pg->pqflags & PQ_SWAPBACKED) != 0;
+ if (!swap) {
+ uobj = pg->uobject;
+ simple_lock(&uobj->vmobjlock);
+ }
+ }
+ KASSERT(swap || pg->uobject == uobj);
+ if (swap) {
+ if (pg->pqflags & PQ_ANON) {
+ simple_lock(&pg->uanon->an_lock);
+ } else {
+ simple_lock(&pg->uobject->vmobjlock);
+ }
+ }
+
+ /*
+ * if this is a read and we got an error, mark the pages
+ * PG_RELEASED so that uvm_page_unbusy() will free them.
+ */
+
+ if (release) {
+ pg->flags |= PG_RELEASED;
+ continue;
+ }
+ KASSERT(!write || (pgs[i]->flags & PG_FAKE) == 0);
+
+ /*
+ * if this is a read and the page is PG_FAKE
+ * or this was a write, mark the page PG_CLEAN and not PG_FAKE.
+ */
+
+ if (pgs[i]->flags & PG_FAKE || write) {
+ pmap_clear_reference(pgs[i]);
+ pmap_clear_modify(pgs[i]);
+ pgs[i]->flags |= PG_CLEAN;
+ pgs[i]->flags &= ~PG_FAKE;
+ }
+ if (swap) {
+ if (pg->pqflags & PQ_ANON) {
+ simple_unlock(&pg->uanon->an_lock);
+ } else {
+ simple_unlock(&pg->uobject->vmobjlock);
+ }
+ }
+ }
+ uvm_page_unbusy(pgs, npages);
+ if (!swap) {
+ simple_unlock(&uobj->vmobjlock);
+ }
+
+#ifdef UVM_SWAP_ENCRYPT
+freed:
+#endif
+ s = splbio();
+ if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) {
+ vwakeup(bp->b_vp);
+ }
+ pool_put(&bufpool, bp);
+ splx(s);
+}
+
+/*
+ * translate unix errno values to VM_PAGER_*.
+ */
+
+int
+uvm_errno2vmerror(errno)
+ int errno;
+{
+ switch (errno) {
+ case 0:
+ return VM_PAGER_OK;
+ case EINVAL:
+ return VM_PAGER_BAD;
+ case EINPROGRESS:
+ return VM_PAGER_PEND;
+ case EIO:
+ return VM_PAGER_ERROR;
+ case EAGAIN:
+ return VM_PAGER_AGAIN;
+ case EBUSY:
+ return VM_PAGER_UNLOCK;
+ default:
+ return VM_PAGER_ERROR;
+ }
+}
diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
index 208693227d3..b246e815e04 100644
--- a/sys/uvm/uvm_pager.h
+++ b/sys/uvm/uvm_pager.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_pager.h,v 1.13 2001/11/07 02:55:50 art Exp $ */
-/* $NetBSD: uvm_pager.h,v 1.18 2000/11/24 22:41:39 chs Exp $ */
+/* $OpenBSD: uvm_pager.h,v 1.14 2001/11/10 18:42:31 art Exp $ */
+/* $NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $ */
/*
*
@@ -83,21 +83,6 @@
*/
/*
- * async pager i/o descriptor structure
- */
-
-TAILQ_HEAD(uvm_aiohead, uvm_aiodesc);
-
-struct uvm_aiodesc {
- void (*aiodone) __P((struct uvm_aiodesc *));
- /* aio done function */
- vaddr_t kva; /* KVA of mapped page(s) */
- int npages; /* # of pages in I/O req */
- void *pd_ptr; /* pager-dependent pointer */
- TAILQ_ENTRY(uvm_aiodesc) aioq; /* linked list of aio's */
-};
-
-/*
* pager ops
*/
@@ -133,22 +118,22 @@ struct uvm_pagerops {
/* pager flags [mostly for flush] */
#define PGO_CLEANIT 0x001 /* write dirty pages to backing store */
-#define PGO_SYNCIO 0x002 /* if PGO_CLEAN: use sync I/O? */
-/*
- * obviously if neither PGO_INVALIDATE or PGO_FREE are set then the pages
- * stay where they are.
- */
+#define PGO_SYNCIO 0x002 /* if PGO_CLEANIT: use sync I/O? */
#define PGO_DEACTIVATE 0x004 /* deactivate flushed pages */
#define PGO_FREE 0x008 /* free flushed pages */
+/* if PGO_FREE is not set then the pages stay where they are. */
#define PGO_ALLPAGES 0x010 /* flush whole object/get all pages */
#define PGO_DOACTCLUST 0x020 /* flag to mk_pcluster to include active */
#define PGO_LOCKED 0x040 /* fault data structures are locked [get] */
#define PGO_PDFREECLUST 0x080 /* daemon's free cluster flag [uvm_pager_put] */
#define PGO_REALLOCSWAP 0x100 /* reallocate swap area [pager_dropcluster] */
+#define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */
+#define PGO_WEAK 0x400 /* "weak" put, for nfs */
+#define PGO_PASTEOF 0x800 /* allow allocation of pages past EOF */
/* page we are not interested in getting */
-#define PGO_DONTCARE ((struct vm_page *) -1) /* [get only] */
+#define PGO_DONTCARE ((struct vm_page *) -1L) /* [get only] */
#ifdef _KERNEL
@@ -176,12 +161,12 @@ int uvm_pager_put __P((struct uvm_object *, struct vm_page *,
PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t));
-vaddr_t uvm_pagermapin __P((struct vm_page **, int,
- struct uvm_aiodesc **, int));
+vaddr_t uvm_pagermapin __P((struct vm_page **, int, int));
void uvm_pagermapout __P((vaddr_t, int));
struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **,
int *, struct vm_page *, int,
voff_t, voff_t));
+int uvm_errno2vmerror __P((int));
/* Flags to uvm_pagermapin() */
#define UVMPAGER_MAPIN_WAITOK 0x01 /* it's okay to wait */
@@ -215,7 +200,9 @@ struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **,
* is changed to do physically-addressed i/o.
*/
+#ifndef PAGER_MAP_SIZE
#define PAGER_MAP_SIZE (16 * 1024 * 1024)
+#endif
#endif /* _KERNEL */
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index 42f141b17c0..e40281deb9d 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pdaemon.c,v 1.16 2001/11/06 13:36:52 art Exp $ */
+/* $OpenBSD: uvm_pdaemon.c,v 1.17 2001/11/10 18:42:31 art Exp $ */
/* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
/*
@@ -76,6 +76,7 @@
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/pool.h>
+#include <sys/buf.h>
#include <uvm/uvm.h>
@@ -193,10 +194,8 @@ void
uvm_pageout(void *arg)
{
int npages = 0;
- int s;
- struct uvm_aiodesc *aio, *nextaio;
UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
-
+
UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
/*
@@ -213,73 +212,33 @@ uvm_pageout(void *arg)
/*
* main loop
*/
- while (TRUE) {
-
- /*
- * carefully attempt to go to sleep (without losing "wakeups"!).
- * we need splbio because we want to make sure the aio_done list
- * is totally empty before we go to sleep.
- */
- s = splbio();
+ for (;;) {
simple_lock(&uvm.pagedaemon_lock);
- /*
- * if we've got done aio's, then bypass the sleep
- */
-
- if (uvm.aio_done.tqh_first == NULL) {
- UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0);
- UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
- &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0);
- uvmexp.pdwoke++;
- UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
-
- /* relock pagedaemon_lock, still at splbio */
- simple_lock(&uvm.pagedaemon_lock);
- }
-
- /*
- * check for done aio structures
- */
-
- aio = uvm.aio_done.tqh_first; /* save current list (if any)*/
- if (aio) {
- TAILQ_INIT(&uvm.aio_done); /* zero global list */
- }
-
- simple_unlock(&uvm.pagedaemon_lock); /* unlock */
- splx(s); /* drop splbio */
-
- /*
- * first clear out any pending aios (to free space in case we
- * want to pageout more stuff).
- */
-
- for (/*null*/; aio != NULL ; aio = nextaio) {
-
- uvmexp.paging -= aio->npages;
- nextaio = aio->aioq.tqe_next;
- aio->aiodone(aio);
+ UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
+ UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
+ &uvm.pagedaemon_lock, FALSE, "pgdaemon", 0);
+ uvmexp.pdwoke++;
+ UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
- }
-
- /* Next, drain pool resources */
+ /* drain pool resources */
pool_drain(0);
/*
* now lock page queues and recompute inactive count
*/
- uvm_lock_pageq();
+ uvm_lock_pageq();
if (npages != uvmexp.npages) { /* check for new pages? */
npages = uvmexp.npages;
uvmpd_tune();
}
uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
- if (uvmexp.inactarg <= uvmexp.freetarg)
+ if (uvmexp.inactarg <= uvmexp.freetarg) {
uvmexp.inactarg = uvmexp.freetarg + 1;
+ }
UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
@@ -287,30 +246,113 @@ uvm_pageout(void *arg)
/*
* scan if needed
- * [XXX: note we are reading uvm.free without locking]
*/
+
+#ifdef UBC
+ if (uvmexp.free + uvmexp.paging < uvmexp.freetarg ||
+ uvmexp.inactive < uvmexp.inactarg ||
+ uvm_pgcnt_vnode >
+ (uvmexp.active + uvmexp.inactive + uvmexp.wired +
+ uvmexp.free) * 13 / 16) {
+#else
if (uvmexp.free < uvmexp.freetarg ||
- uvmexp.inactive < uvmexp.inactarg)
+ uvmexp.inactive < uvmexp.inactarg) {
+#endif
uvmpd_scan();
+ }
/*
- * done scan. unlock page queues (the only lock we are holding)
+ * if there's any free memory to be had,
+ * wake up any waiters.
*/
+
+ if (uvmexp.free > uvmexp.reserve_kernel ||
+ uvmexp.paging == 0) {
+ wakeup(&uvmexp.free);
+ }
+
+ /*
+ * scan done. unlock page queues (the only lock we are holding)
+ */
+
uvm_unlock_pageq();
+ }
+ /*NOTREACHED*/
+}
+
+
+/*
+ * uvm_aiodone_daemon: main loop for the aiodone daemon.
+ */
+
+void
+uvm_aiodone_daemon(void *arg)
+{
+ int s, free;
+ struct buf *bp, *nbp;
+ UVMHIST_FUNC("uvm_aiodoned"); UVMHIST_CALLED(pdhist);
+
+ for (;;) {
/*
- * done! restart loop.
+ * carefully attempt to go to sleep (without losing "wakeups"!).
+ * we need splbio because we want to make sure the aio_done list
+ * is totally empty before we go to sleep.
*/
- if (uvmexp.free > uvmexp.reserve_kernel ||
- uvmexp.paging == 0)
+
+ s = splbio();
+ simple_lock(&uvm.aiodoned_lock);
+ if (TAILQ_FIRST(&uvm.aio_done) == NULL) {
+ UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
+ UVM_UNLOCK_AND_WAIT(&uvm.aiodoned,
+ &uvm.aiodoned_lock, FALSE, "aiodoned", 0);
+ UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
+
+ /* relock aiodoned_lock, still at splbio */
+ simple_lock(&uvm.aiodoned_lock);
+ }
+
+ /*
+ * check for done aio structures
+ */
+
+ bp = TAILQ_FIRST(&uvm.aio_done);
+ if (bp) {
+ TAILQ_INIT(&uvm.aio_done);
+ }
+
+ simple_unlock(&uvm.aiodoned_lock);
+ splx(s);
+
+ /*
+ * process each i/o that's done.
+ */
+
+ free = uvmexp.free;
+ while (bp != NULL) {
+ if (bp->b_flags & B_PDAEMON) {
+ uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
+ }
+ nbp = TAILQ_NEXT(bp, b_freelist);
+ (*bp->b_iodone)(bp);
+ bp = nbp;
+ }
+ if (free <= uvmexp.reserve_kernel) {
+ s = uvm_lock_fpageq();
+ wakeup(&uvm.pagedaemon);
+ uvm_unlock_fpageq(s);
+ } else {
+ simple_lock(&uvm.pagedaemon_lock);
wakeup(&uvmexp.free);
+ simple_unlock(&uvm.pagedaemon_lock);
+ }
}
- /*NOTREACHED*/
}
+
+
/*
- * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
- * its own function for ease of reading.
+ * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
*
* => called with page queues locked
* => we work on meeting our free target by converting inactive pages
@@ -527,6 +569,7 @@ uvmpd_scan_inactive(pglst)
* this page is dirty, skip it if we'll have met our
* free target when all the current pageouts complete.
*/
+
if (free + uvmexp.paging > uvmexp.freetarg << 2) {
if (anon) {
simple_unlock(&anon->an_lock);
@@ -542,11 +585,8 @@ uvmpd_scan_inactive(pglst)
* reactivate it so that we eventually cycle
* all pages thru the inactive queue.
*/
-#ifdef DIAGNOSTIC
- if (uvmexp.swpgonly > uvmexp.swpages) {
- panic("uvmexp.swpgonly botch");
- }
-#endif
+
+ KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
if ((p->pqflags & PQ_SWAPBACKED) &&
uvmexp.swpgonly == uvmexp.swpages) {
dirtyreacts++;
@@ -564,11 +604,8 @@ uvmpd_scan_inactive(pglst)
* is full, free any swap allocated to the page
* so that other pages can be paged out.
*/
-#ifdef DIAGNOSTIC
- if (uvmexp.swpginuse > uvmexp.swpages) {
- panic("uvmexp.swpginuse botch");
- }
-#endif
+
+ KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
if ((p->pqflags & PQ_SWAPBACKED) &&
uvmexp.swpginuse == uvmexp.swpages) {
@@ -621,13 +658,11 @@ uvmpd_scan_inactive(pglst)
/*
* start new cluster (if necessary)
*/
+
if (swslot == 0) {
- /* want this much */
swnpages = MAXBSIZE >> PAGE_SHIFT;
-
swslot = uvm_swap_alloc(&swnpages,
TRUE);
-
if (swslot == 0) {
/* no swap? give up! */
p->flags &= ~PG_BUSY;
@@ -646,6 +681,7 @@ uvmpd_scan_inactive(pglst)
/*
* add block to cluster
*/
+
swpps[swcpages] = p;
if (anon)
anon->an_swslot = swslot + swcpages;
@@ -654,11 +690,7 @@ uvmpd_scan_inactive(pglst)
p->offset >> PAGE_SHIFT,
swslot + swcpages);
swcpages++;
-
- /* done (swap-backed) */
}
-
- /* end: if (p) ["if we have new page to consider"] */
} else {
/* if p == NULL we must be doing a last swap i/o */
@@ -666,16 +698,16 @@ uvmpd_scan_inactive(pglst)
}
/*
- * now consider doing the pageout.
+ * now consider doing the pageout.
*
- * for swap-backed pages, we do the pageout if we have either
- * filled the cluster (in which case (swnpages == swcpages) or
+ * for swap-backed pages, we do the pageout if we have either
+ * filled the cluster (in which case (swnpages == swcpages) or
* run out of pages (p == NULL).
*
* for object pages, we always do the pageout.
*/
- if (swap_backed) {
+ if (swap_backed) {
if (p) { /* if we just added a page to cluster */
if (anon)
simple_unlock(&anon->an_lock);
@@ -698,21 +730,18 @@ uvmpd_scan_inactive(pglst)
if (swcpages < swnpages) {
uvm_swap_free(swslot + swcpages,
(swnpages - swcpages));
- }
-
+ }
} else {
-
/* normal object pageout */
ppsp = pps;
npages = sizeof(pps) / sizeof(struct vm_page *);
/* not looked at because PGO_ALLPAGES is set */
start = 0;
-
}
/*
* now do the pageout.
- *
+ *
* for swap_backed pages we have already built the cluster.
* for !swap_backed pages, uvm_pager_put will call the object's
* "make put cluster" function to build a cluster on our behalf.
@@ -733,7 +762,7 @@ uvmpd_scan_inactive(pglst)
/* locked: uobj (if !swap_backed), page queues */
uvmexp.pdpageouts++;
- result = uvm_pager_put((swap_backed) ? NULL : uobj, p,
+ result = uvm_pager_put(swap_backed ? NULL : uobj, p,
&ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
/* locked: uobj (if !swap_backed && result != PEND) */
/* unlocked: pageqs, object (if swap_backed ||result == PEND) */
@@ -761,21 +790,29 @@ uvmpd_scan_inactive(pglst)
if (result == VM_PAGER_PEND) {
uvmexp.paging += npages;
- uvm_lock_pageq(); /* relock page queues */
+ uvm_lock_pageq();
uvmexp.pdpending++;
if (p) {
if (p->pqflags & PQ_INACTIVE)
- /* reload! */
- nextpg = p->pageq.tqe_next;
+ nextpg = TAILQ_NEXT(p, pageq);
else
- /* reload! */
- nextpg = pglst->tqh_first;
- } else {
- nextpg = NULL; /* done list */
+ nextpg = TAILQ_FIRST(pglst);
+ } else {
+ nextpg = NULL;
}
continue;
}
+#ifdef UBC
+ if (result == VM_PAGER_ERROR &&
+ curproc == uvm.pagedaemon_proc) {
+ uvm_lock_pageq();
+ nextpg = TAILQ_NEXT(p, pageq);
+ uvm_pageactivate(p);
+ continue;
+ }
+#endif
+
/*
* clean up "p" if we have one
*/
@@ -836,24 +873,19 @@ uvmpd_scan_inactive(pglst)
pmap_page_protect(p, VM_PROT_NONE);
anon = NULL;
uvm_lock_pageq();
- nextpg = p->pageq.tqe_next;
+ nextpg = TAILQ_NEXT(p, pageq);
/* free released page */
uvm_pagefree(p);
} else {
-#ifdef DIAGNOSTIC
- if (uobj->pgops->pgo_releasepg == NULL)
- panic("pagedaemon: no "
- "pgo_releasepg function");
-#endif
-
- /*
+ /*
* pgo_releasepg nukes the page and
* gets "nextpg" for us. it returns
* with the page queues locked (when
* given nextpg ptr).
*/
+
if (!uobj->pgops->pgo_releasepg(p,
&nextpg))
/* uobj died after release */
@@ -863,35 +895,27 @@ uvmpd_scan_inactive(pglst)
* lock page queues here so that they're
* always locked at the end of the loop.
*/
+
uvm_lock_pageq();
}
-
} else { /* page was not released during I/O */
-
uvm_lock_pageq();
- nextpg = p->pageq.tqe_next;
-
+ nextpg = TAILQ_NEXT(p, pageq);
if (result != VM_PAGER_OK) {
-
/* pageout was a failure... */
if (result != VM_PAGER_AGAIN)
uvm_pageactivate(p);
pmap_clear_reference(p);
/* XXXCDC: if (swap_backed) FREE p's
* swap block? */
-
} else {
-
/* pageout was a success... */
pmap_clear_reference(p);
pmap_clear_modify(p);
p->flags |= PG_CLEAN;
- /* XXX: could free page here, but old
- * pagedaemon does not */
-
}
}
-
+
/*
* drop object lock (if there is an object left). do
* a safety check of nextpg to make sure it is on the
@@ -905,26 +929,27 @@ uvmpd_scan_inactive(pglst)
else if (uobj)
simple_unlock(&uobj->vmobjlock);
- } /* if (p) */ else {
+ } else {
+
+ /*
+ * if p is null in this loop, make sure it stays null
+ * in the next loop.
+ */
- /* if p is null in this loop, make sure it stays null
- * in next loop */
nextpg = NULL;
/*
* lock page queues here just so they're always locked
* at the end of the loop.
*/
+
uvm_lock_pageq();
}
if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
- printf("pagedaemon: invalid nextpg! reverting to "
- "queue head\n");
- nextpg = pglst->tqh_first; /* reload! */
+ nextpg = TAILQ_FIRST(pglst); /* reload! */
}
-
- } /* end of "inactive" 'for' loop */
+ }
return (retval);
}
@@ -944,10 +969,8 @@ uvmpd_scan()
UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
uvmexp.pdrevs++; /* counter */
+ uobj = NULL;
-#ifdef __GNUC__
- uobj = NULL; /* XXX gcc */
-#endif
/*
* get current "free" page count
*/
@@ -961,13 +984,11 @@ uvmpd_scan()
* we need to unlock the page queues for this.
*/
if (free < uvmexp.freetarg) {
-
uvmexp.pdswout++;
UVMHIST_LOG(pdhist," free %d < target %d: swapout", free,
uvmexp.freetarg, 0, 0);
uvm_unlock_pageq();
uvm_swapout_threads();
- pmap_update(); /* update so we can scan inactive q */
uvm_lock_pageq();
}
@@ -983,8 +1004,8 @@ uvmpd_scan()
UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
/*
- * do loop #1! alternate starting queue between swap and object based
- * on the low bit of uvmexp.pdrevs (which we bump by one each call).
+ * alternate starting queue between swap and object based on the
+ * low bit of uvmexp.pdrevs (which we bump by one each call).
*/
got_it = FALSE;
@@ -1008,6 +1029,7 @@ uvmpd_scan()
* detect if we're not going to be able to page anything out
* until we free some swap resources from active pages.
*/
+
swap_shortage = 0;
if (uvmexp.free < uvmexp.freetarg &&
uvmexp.swpginuse == uvmexp.swpages &&
@@ -1015,13 +1037,13 @@ uvmpd_scan()
pages_freed == 0) {
swap_shortage = uvmexp.freetarg - uvmexp.free;
}
-
+
UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d",
inactive_shortage, swap_shortage,0,0);
- for (p = TAILQ_FIRST(&uvm.page_active);
+ for (p = TAILQ_FIRST(&uvm.page_active);
p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
p = nextpg) {
- nextpg = p->pageq.tqe_next;
+ nextpg = TAILQ_NEXT(p, pageq);
if (p->flags & PG_BUSY)
continue; /* quick check before trying to lock */
@@ -1030,22 +1052,13 @@ uvmpd_scan()
*/
/* is page anon owned or ownerless? */
if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
-
-#ifdef DIAGNOSTIC
- if (p->uanon == NULL)
- panic("pagedaemon: page with no anon or "
- "object detected - loop 2");
-#endif
+ KASSERT(p->uanon != NULL);
if (!simple_lock_try(&p->uanon->an_lock))
continue;
/* take over the page? */
if ((p->pqflags & PQ_ANON) == 0) {
-#ifdef DIAGNOSTIC
- if (p->loan_count < 1)
- panic("pagedaemon: non-loaned "
- "ownerless page detected - loop 2");
-#endif
+ KASSERT(p->loan_count > 0);
p->loan_count--;
p->pqflags |= PQ_ANON;
}
@@ -1053,9 +1066,11 @@ uvmpd_scan()
if (!simple_lock_try(&p->uobject->vmobjlock))
continue;
}
+
/*
* skip this page if it's busy.
*/
+
if ((p->flags & PG_BUSY) != 0) {
if (p->pqflags & PQ_ANON)
simple_unlock(&p->uanon->an_lock);
@@ -1063,11 +1078,12 @@ uvmpd_scan()
simple_unlock(&p->uobject->vmobjlock);
continue;
}
-
+
/*
* if there's a shortage of swap, free any swap allocated
* to this page so that other pages can be paged out.
*/
+
if (swap_shortage > 0) {
if ((p->pqflags & PQ_ANON) && p->uanon->an_swslot) {
uvm_swap_free(p->uanon->an_swslot, 1);
@@ -1085,11 +1101,12 @@ uvmpd_scan()
}
}
}
-
+
/*
* deactivate this page if there's a shortage of
* inactive pages.
*/
+
if (inactive_shortage > 0) {
pmap_page_protect(p, VM_PROT_NONE);
/* no need to check wire_count as pg is "active" */
@@ -1097,7 +1114,6 @@ uvmpd_scan()
uvmexp.pddeact++;
inactive_shortage--;
}
-
if (p->pqflags & PQ_ANON)
simple_unlock(&p->uanon->an_lock);
else
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index e4bd678b122..e3447538d97 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_swap.c,v 1.38 2001/11/07 02:55:50 art Exp $ */
+/* $OpenBSD: uvm_swap.c,v 1.39 2001/11/10 18:42:32 art Exp $ */
/* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */
/*
@@ -77,11 +77,6 @@
* by the "swap_priority" global var. each "swappri" contains a
* CIRCLEQ of "swapdev" structures at that priority.
*
- * the system maintains a fixed pool of "swapbuf" structures for use
- * at swap i/o time. a swapbuf includes a "buf" structure and an
- * "aiodone" [we want to avoid malloc()'ing anything at swapout time
- * since memory may be low].
- *
* locking:
* - swap_syscall_lock (sleep lock): this lock serializes the swapctl
* system call and prevents the swap priority list from changing
@@ -89,8 +84,6 @@
* - uvm.swap_data_lock (simple_lock): this lock protects all swap data
* structures including the priority list, the swapdev structures,
* and the swapmap extent.
- * - swap_buf_lock (simple_lock): this lock protects the free swapbuf
- * pool.
*
* each swap device has the following info:
* - swap device in use (could be disabled, preventing future use)
@@ -172,15 +165,6 @@ struct swappri {
};
/*
- * swapbuf, swapbuffer plus async i/o info
- */
-struct swapbuf {
- struct buf sw_buf; /* a buffer structure */
- struct uvm_aiodesc sw_aio; /* aiodesc structure, used if ASYNC */
- SIMPLEQ_ENTRY(swapbuf) sw_sq; /* free list pointer */
-};
-
-/*
* The following two structures are used to keep track of data transfers
* on swap devices associated with regular files.
* NOTE: this code is more or less a copy of vnd.c; we use the same
@@ -236,8 +220,6 @@ cdev_decl(sw);
* local variables
*/
static struct extent *swapmap; /* controls the mapping of /dev/drum */
-SIMPLEQ_HEAD(swapbufhead, swapbuf);
-struct pool swapbuf_pool;
/* list of all active swap devices [by priority] */
LIST_HEAD(swap_priority, swappri);
@@ -264,8 +246,6 @@ static void sw_reg_strategy __P((struct swapdev *, struct buf *, int));
static void sw_reg_iodone __P((struct buf *));
static void sw_reg_start __P((struct swapdev *));
-static void uvm_swap_aiodone __P((struct uvm_aiodesc *));
-static void uvm_swap_bufdone __P((struct buf *));
static int uvm_swap_io __P((struct vm_page **, int, int, int));
static void swapmount __P((void));
@@ -273,7 +253,6 @@ static void swapmount __P((void));
#ifdef UVM_SWAP_ENCRYPT
/* for swap encrypt */
boolean_t uvm_swap_allocpages __P((struct vm_page **, int));
-void uvm_swap_freepages __P((struct vm_page **, int));
void uvm_swap_markdecrypt __P((struct swapdev *, int, int, int));
boolean_t uvm_swap_needdecrypt __P((struct swapdev *, int));
void uvm_swap_initcrypt __P((struct swapdev *, int));
@@ -317,16 +296,10 @@ uvm_swap_init()
panic("uvm_swap_init: extent_create failed");
/*
- * allocate our private pool of "swapbuf" structures (includes
- * a "buf" structure). ["nswbuf" comes from param.c and can
- * be adjusted by MD code before we get here].
+ * allocate pools for structures used for swapping to files.
*/
- pool_init(&swapbuf_pool, sizeof(struct swapbuf), 0, 0, 0, "swp buf", 0,
- NULL, NULL, 0);
- /* XXX - set a maximum on swapbuf_pool? */
-
pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx",
0, NULL, NULL, 0);
@@ -1256,7 +1229,7 @@ swstrategy(bp)
* be yanked out from under us because we are holding resources
* in it (i.e. the blocks we are doing I/O on).
*/
- pageno = dbtob(bp->b_blkno) >> PAGE_SHIFT;
+ pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
simple_lock(&uvm.swap_data_lock);
sdp = swapdrum_getsdp(pageno);
simple_unlock(&uvm.swap_data_lock);
@@ -1275,7 +1248,7 @@ swstrategy(bp)
pageno -= sdp->swd_drumoffset; /* page # on swapdev */
bn = btodb(pageno << PAGE_SHIFT); /* convert to diskblock */
- UVMHIST_LOG(pdhist, " %s: mapoff=%x bn=%x bcount=%ld\n",
+ UVMHIST_LOG(pdhist, " %s: mapoff=%x bn=%x bcount=%ld",
((bp->b_flags & B_READ) == 0) ? "write" : "read",
sdp->swd_drumoffset, bn, bp->b_bcount);
@@ -1393,18 +1366,15 @@ sw_reg_strategy(sdp, bp, bn)
/*
* compute the size ("sz") of this transfer (in bytes).
- * XXXCDC: ignores read-ahead for non-zero offset
*/
- if ((off = (byteoff % sdp->swd_bsize)) != 0)
- sz = sdp->swd_bsize - off;
- else
- sz = (1 + nra) * sdp->swd_bsize;
-
- if (resid < sz)
+ off = byteoff % sdp->swd_bsize;
+ sz = (1 + nra) * sdp->swd_bsize - off;
+ if (sz > resid)
sz = resid;
- UVMHIST_LOG(pdhist, "sw_reg_strategy: vp %p/%p offset 0x%x/0x%x",
- sdp->swd_vp, vp, byteoff, nbn);
+ UVMHIST_LOG(pdhist, "sw_reg_strategy: "
+ "vp %p/%p offset 0x%x/0x%x",
+ sdp->swd_vp, vp, byteoff, nbn);
/*
* now get a buf structure. note that the vb_buf is
@@ -1466,7 +1436,7 @@ sw_reg_strategy(sdp, bp, bn)
vnx->vx_pending++;
/* assoc new buffer with underlying vnode */
- bgetvp(vp, &nbp->vb_buf);
+ bgetvp(vp, &nbp->vb_buf);
/* sort it in and start I/O if we are not over our limit */
disksort(&sdp->swd_tab, &nbp->vb_buf);
@@ -1525,6 +1495,7 @@ sw_reg_start(sdp)
bp, bp->b_vp, bp->b_blkno, bp->b_bcount);
if ((bp->b_flags & B_READ) == 0)
bp->b_vp->v_numoutput++;
+
VOP_STRATEGY(bp);
}
sdp->swd_flags &= ~SWF_BUSY;
@@ -1571,14 +1542,7 @@ sw_reg_iodone(bp)
/*
* disassociate this buffer from the vnode (if any).
*/
- if (vbp->vb_buf.b_vp != NULLVP) {
- brelvp(&vbp->vb_buf);
- }
-
- /*
- * disassociate this buffer from the vnode (if any).
- */
- if (vbp->vb_buf.b_vp != NULLVP) {
+ if (vbp->vb_buf.b_vp != NULL) {
brelvp(&vbp->vb_buf);
}
@@ -1719,8 +1683,9 @@ uvm_swap_markbad(startslot, nslots)
* we assume here that the range of slots will all be within
* one swap device.
*/
- sdp->swd_npgbad += nslots;
+ sdp->swd_npgbad += nslots;
+ UVMHIST_LOG(pdhist, "now %d bad", sdp->swd_npgbad, 0,0,0);
simple_unlock(&uvm.swap_data_lock);
}
@@ -1870,10 +1835,10 @@ uvm_swap_io(pps, startslot, npages, flags)
int startslot, npages, flags;
{
daddr_t startblk;
- struct swapbuf *sbp;
struct buf *bp;
vaddr_t kva;
int result, s, mapinflags, pflag;
+ boolean_t write, async;
#ifdef UVM_SWAP_ENCRYPT
vaddr_t dstkva;
struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT];
@@ -1885,6 +1850,9 @@ uvm_swap_io(pps, startslot, npages, flags)
UVMHIST_LOG(pdhist, "<- called, startslot=%d, npages=%d, flags=%d",
startslot, npages, flags, 0);
+ write = (flags & B_READ) == 0;
+ async = (flags & B_ASYNC) != 0;
+
/*
* convert starting drum slot to block number
*/
@@ -1892,20 +1860,17 @@ uvm_swap_io(pps, startslot, npages, flags)
/*
* first, map the pages into the kernel (XXX: currently required
- * by buffer system). note that we don't let pagermapin alloc
- * an aiodesc structure because we don't want to chance a malloc.
- * we've got our own pool of aiodesc structures (in swapbuf).
+ * by buffer system).
*/
- mapinflags = (flags & B_READ) ? UVMPAGER_MAPIN_READ :
- UVMPAGER_MAPIN_WRITE;
- if ((flags & B_ASYNC) == 0)
+ mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
+ if (!async)
mapinflags |= UVMPAGER_MAPIN_WAITOK;
- kva = uvm_pagermapin(pps, npages, NULL, mapinflags);
+ kva = uvm_pagermapin(pps, npages, mapinflags);
if (kva == 0)
return (VM_PAGER_AGAIN);
#ifdef UVM_SWAP_ENCRYPT
- if ((flags & B_READ) == 0) {
+ if (write) {
/*
* Check if we need to do swap encryption on old pages.
* Later we need a different scheme, that swap encrypts
@@ -1934,7 +1899,7 @@ uvm_swap_io(pps, startslot, npages, flags)
/*
* encrypt to swap
*/
- if ((flags & B_READ) == 0 && encrypt) {
+ if (write && encrypt) {
int i, opages;
caddr_t src, dst;
struct swap_key *key;
@@ -1943,7 +1908,7 @@ uvm_swap_io(pps, startslot, npages, flags)
/* We always need write access. */
swmapflags = UVMPAGER_MAPIN_READ;
- if ((flags & B_ASYNC) == 0)
+ if (!async)
swmapflags |= UVMPAGER_MAPIN_WAITOK;
if (!uvm_swap_allocpages(tpps, npages)) {
@@ -1951,7 +1916,7 @@ uvm_swap_io(pps, startslot, npages, flags)
return (VM_PAGER_AGAIN);
}
- dstkva = uvm_pagermapin(tpps, npages, NULL, swmapflags);
+ dstkva = uvm_pagermapin(tpps, npages, swmapflags);
if (dstkva == NULL) {
uvm_pagermapout(kva, npages);
uvm_swap_freepages(tpps, npages);
@@ -1985,22 +1950,20 @@ uvm_swap_io(pps, startslot, npages, flags)
#endif /* UVM_SWAP_ENCRYPT */
/*
- * now allocate a swap buffer off of freesbufs
+ * now allocate a buf for the i/o.
* [make sure we don't put the pagedaemon to sleep...]
*/
s = splbio();
- pflag = ((flags & B_ASYNC) != 0 || curproc == uvm.pagedaemon_proc)
- ? 0
- : PR_WAITOK;
- sbp = pool_get(&swapbuf_pool, pflag);
- splx(s); /* drop splbio */
+ pflag = (async || curproc == uvm.pagedaemon_proc) ? 0 : PR_WAITOK;
+ bp = pool_get(&bufpool, pflag);
+ splx(s);
/*
* if we failed to get a swapbuf, return "try again"
*/
- if (sbp == NULL) {
+ if (bp == NULL) {
#ifdef UVM_SWAP_ENCRYPT
- if ((flags & B_READ) == 0 && encrypt) {
+ if (write && encrypt) {
int i;
/* swap encrypt needs cleanup */
@@ -2019,15 +1982,17 @@ uvm_swap_io(pps, startslot, npages, flags)
* prevent ASYNC reads.
* uvm_swap_io is only called from uvm_swap_get, uvm_swap_get
* assumes that all gets are SYNCIO. Just make sure here.
+ * XXXARTUBC - might not be true anymore.
*/
- if (flags & B_READ)
+ if (!write) {
flags &= ~B_ASYNC;
+ async = 0;
+ }
#endif
/*
- * fill in the bp/sbp. we currently route our i/o through
+ * fill in the bp. we currently route our i/o through
* /dev/drum's vnode [swapdev_vp].
*/
- bp = &sbp->sw_buf;
bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC));
bp->b_proc = &proc0; /* XXX */
bp->b_rcred = bp->b_wcred = proc0.p_ucred;
@@ -2039,13 +2004,13 @@ uvm_swap_io(pps, startslot, npages, flags)
bp->b_vp = NULL;
buf_replacevnode(bp, swapdev_vp);
splx(s);
- bp->b_bcount = npages << PAGE_SHIFT;
+ bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT;
/*
* for pageouts we must set "dirtyoff" [NFS client code needs it].
* and we bump v_numoutput (counter of number of active outputs).
*/
- if ((bp->b_flags & B_READ) == 0) {
+ if (write) {
bp->b_dirtyoff = 0;
bp->b_dirtyend = npages << PAGE_SHIFT;
#ifdef UVM_SWAP_ENCRYPT
@@ -2059,33 +2024,29 @@ uvm_swap_io(pps, startslot, npages, flags)
}
/*
- * for async ops we must set up the aiodesc and setup the callback
- * XXX: we expect no async-reads, but we don't prevent it here.
- */
- if (flags & B_ASYNC) {
- sbp->sw_aio.aiodone = uvm_swap_aiodone;
- sbp->sw_aio.kva = kva;
- sbp->sw_aio.npages = npages;
- sbp->sw_aio.pd_ptr = sbp; /* backpointer */
- bp->b_flags |= B_CALL; /* set callback */
- bp->b_iodone = uvm_swap_bufdone;/* "buf" iodone function */
+ * for async ops we must set up the iodone handler.
+ */
+ if (async) {
+ bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ?
+ B_PDAEMON : 0);
+ bp->b_iodone = uvm_aio_biodone;
UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
}
UVMHIST_LOG(pdhist,
- "about to start io: data = 0x%p blkno = 0x%x, bcount = %ld",
+ "about to start io: data = %p blkno = 0x%x, bcount = %ld",
bp->b_data, bp->b_blkno, bp->b_bcount, 0);
/*
* now we start the I/O, and if async, return.
*/
VOP_STRATEGY(bp);
- if (flags & B_ASYNC)
+ if (async)
return (VM_PAGER_PEND);
/*
* must be sync i/o. wait for it to finish
*/
- bp->b_error = biowait(bp);
+ (void) biowait(bp);
result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
#ifdef UVM_SWAP_ENCRYPT
@@ -2124,13 +2085,15 @@ uvm_swap_io(pps, startslot, npages, flags)
uvm_swap_freepages(tpps, npages);
#endif
/*
- * now dispose of the swap buffer
+ * now dispose of the buf
*/
s = splbio();
if (bp->b_vp)
brelvp(bp);
- pool_put(&swapbuf_pool, sbp);
+ if (write && bp->b_vp)
+ vwakeup(bp->b_vp);
+ pool_put(&bufpool, bp);
splx(s);
/*
@@ -2140,112 +2103,6 @@ uvm_swap_io(pps, startslot, npages, flags)
return (result);
}
-/*
- * uvm_swap_bufdone: called from the buffer system when the i/o is done
- */
-static void
-uvm_swap_bufdone(bp)
- struct buf *bp;
-{
- struct swapbuf *sbp = (struct swapbuf *) bp;
- int s = splbio();
- UVMHIST_FUNC("uvm_swap_bufdone"); UVMHIST_CALLED(pdhist);
-
- UVMHIST_LOG(pdhist, "cleaning buf %p", buf, 0, 0, 0);
-#ifdef DIAGNOSTIC
- /*
- * sanity check: swapbufs are private, so they shouldn't be wanted
- */
- if (bp->b_flags & B_WANTED)
- panic("uvm_swap_bufdone: private buf wanted");
-#endif
-
- /*
- * drop the buffer's reference to the vnode.
- */
- if (bp->b_vp)
- brelvp(bp);
-
- /*
- * now put the aio on the uvm.aio_done list and wake the
- * pagedaemon (which will finish up our job in its context).
- */
- simple_lock(&uvm.pagedaemon_lock); /* locks uvm.aio_done */
- TAILQ_INSERT_TAIL(&uvm.aio_done, &sbp->sw_aio, aioq);
- simple_unlock(&uvm.pagedaemon_lock);
-
- wakeup(&uvm.pagedaemon);
- splx(s);
-}
-
-/*
- * uvm_swap_aiodone: aiodone function for anonymous memory
- *
- * => this is called in the context of the pagedaemon (but with the
- * page queues unlocked!)
- * => our "aio" structure must be part of a "swapbuf"
- */
-static void
-uvm_swap_aiodone(aio)
- struct uvm_aiodesc *aio;
-{
- struct swapbuf *sbp = aio->pd_ptr;
- struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT];
- int lcv, s;
- vaddr_t addr;
- UVMHIST_FUNC("uvm_swap_aiodone"); UVMHIST_CALLED(pdhist);
-
- UVMHIST_LOG(pdhist, "done with aio %p", aio, 0, 0, 0);
-#ifdef DIAGNOSTIC
- /*
- * sanity check
- */
- if (aio->npages > (MAXBSIZE >> PAGE_SHIFT))
- panic("uvm_swap_aiodone: aio too big!");
-#endif
-
- /*
- * first, we have to recover the page pointers (pps) by poking in the
- * kernel pmap (XXX: should be saved in the buf structure).
- */
- for (addr = aio->kva, lcv = 0 ; lcv < aio->npages ;
- addr += PAGE_SIZE, lcv++) {
- pps[lcv] = uvm_pageratop(addr);
- }
-
- /*
- * now we can dispose of the kernel mappings of the buffer
- */
- uvm_pagermapout(aio->kva, aio->npages);
-
- /*
- * now we can dispose of the pages by using the dropcluster function
- * [note that we have no "page of interest" so we pass in null]
- */
-
-#ifdef UVM_SWAP_ENCRYPT
- /*
- * XXX - assumes that we only get ASYNC writes. used to be above.
- */
- if (pps[0]->pqflags & PQ_ENCRYPT)
- uvm_swap_freepages(pps, aio->npages);
- else
-#endif /* UVM_SWAP_ENCRYPT */
- uvm_pager_dropcluster(NULL, NULL, pps, &aio->npages,
- PGO_PDFREECLUST);
-
- /*
- * finally, we can dispose of the swapbuf
- */
- s = splbio();
- pool_put(&swapbuf_pool, sbp);
- splx(s);
-
- /*
- * done!
- */
-}
-
static void
swapmount()
{
diff --git a/sys/uvm/uvm_swap.h b/sys/uvm/uvm_swap.h
index 3108dd10194..12db66a657b 100644
--- a/sys/uvm/uvm_swap.h
+++ b/sys/uvm/uvm_swap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_swap.h,v 1.9 2001/11/07 02:55:51 art Exp $ */
+/* $OpenBSD: uvm_swap.h,v 1.10 2001/11/10 18:42:32 art Exp $ */
/* $NetBSD: uvm_swap.h,v 1.5 2000/01/11 06:57:51 chs Exp $ */
/*
@@ -45,6 +45,7 @@ void uvm_swap_free __P((int, int));
void uvm_swap_markbad __P((int, int));
#ifdef UVM_SWAP_ENCRYPT
void uvm_swap_initcrypt_all __P((void));
+void uvm_swap_freepages __P((struct vm_page **, int));
#endif
#endif /* _KERNEL */
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index e5e954eb9df..e921e4fb846 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_vnode.c,v 1.23 2001/11/07 02:55:51 art Exp $ */
+/* $OpenBSD: uvm_vnode.c,v 1.24 2001/11/10 18:42:32 art Exp $ */
/* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */
/*
@@ -1609,7 +1609,7 @@ uvn_io(uvn, pps, npages, flags, rw)
mapinflags = (rw == UIO_READ) ?
UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
- kva = uvm_pagermapin(pps, npages, NULL, mapinflags);
+ kva = uvm_pagermapin(pps, npages, mapinflags);
if (kva == 0 && waitf == M_NOWAIT) {
simple_unlock(&uvn->u_obj.vmobjlock);
UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
@@ -1626,7 +1626,7 @@ uvn_io(uvn, pps, npages, flags, rw)
simple_unlock(&uvn->u_obj.vmobjlock);
/* NOTE: object now unlocked */
if (kva == 0)
- kva = uvm_pagermapin(pps, npages, NULL,
+ kva = uvm_pagermapin(pps, npages,
mapinflags | UVMPAGER_MAPIN_WAITOK);
/*