diff options
author | Artur Grabowski <art@cvs.openbsd.org> | 2001-11-10 18:42:33 +0000 |
---|---|---|
committer | Artur Grabowski <art@cvs.openbsd.org> | 2001-11-10 18:42:33 +0000 |
commit | bbe8f62ec65096ee1bb8589c0f3ade29b85553c4 (patch) | |
tree | 4e43b942be0409169ada8cef2d0708a064d84fd9 /sys | |
parent | 89424e542479723fbb4744a2c96edff04f29fb24 (diff) |
Merge in some parts of the ubc work that has been done in NetBSD that are not
UBC, but prerequsites for it.
- Create a daemon that processes async I/O (swap and paging in the future)
requests that need processing in process context and that were processed
in the pagedaemon before.
- Convert some ugly ifdef DIAGNOSTIC code to less intrusive KASSERTs.
- misc other cleanups.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/init_main.c | 8 | ||||
-rw-r--r-- | sys/uvm/uvm.h | 15 | ||||
-rw-r--r-- | sys/uvm/uvm_extern.h | 11 | ||||
-rw-r--r-- | sys/uvm/uvm_page.c | 163 | ||||
-rw-r--r-- | sys/uvm/uvm_page.h | 80 | ||||
-rw-r--r-- | sys/uvm/uvm_pager.c | 309 | ||||
-rw-r--r-- | sys/uvm/uvm_pager.h | 37 | ||||
-rw-r--r-- | sys/uvm/uvm_pdaemon.c | 312 | ||||
-rw-r--r-- | sys/uvm/uvm_swap.c | 249 | ||||
-rw-r--r-- | sys/uvm/uvm_swap.h | 3 | ||||
-rw-r--r-- | sys/uvm/uvm_vnode.c | 6 |
11 files changed, 667 insertions, 526 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index e41148c9f95..c909a23141b 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_main.c,v 1.83 2001/11/07 01:18:01 art Exp $ */ +/* $OpenBSD: init_main.c,v 1.84 2001/11/10 18:42:31 art Exp $ */ /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */ /* @@ -417,8 +417,12 @@ main(framep) if (kthread_create(start_update, NULL, NULL, "update")) panic("fork update"); + /* Create process 6, the aiodone daemon kernel thread. */ + if (kthread_create(uvm_aiodone_daemon, NULL, NULL, "aiodoned")) + panic("fork aiodoned"); + #ifdef CRYPTO - /* Create process 6, the crypto kernel thread. */ + /* Create process 7, the crypto kernel thread. */ if (kthread_create(start_crypto, NULL, NULL, "crypto")) panic("crypto thread"); #endif /* CRYPTO */ diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h index 1a61e80df34..f3ff4214dd5 100644 --- a/sys/uvm/uvm.h +++ b/sys/uvm/uvm.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm.h,v 1.13 2001/11/05 22:14:54 art Exp $ */ -/* $NetBSD: uvm.h,v 1.23 2000/06/26 14:21:16 mrg Exp $ */ +/* $OpenBSD: uvm.h,v 1.14 2001/11/10 18:42:31 art Exp $ */ +/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */ /* * @@ -74,6 +74,7 @@ struct uvm { /* vm_page related parameters */ + /* vm_page queues */ struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */ struct pglist page_active; /* allocated pages, in use */ @@ -84,10 +85,17 @@ struct uvm { boolean_t page_init_done; /* TRUE if uvm_page_init() finished */ boolean_t page_idle_zero; /* TRUE if we should try to zero pages in the idle loop */ + /* page daemon trigger */ int pagedaemon; /* daemon sleeps on this */ struct proc *pagedaemon_proc; /* daemon's pid */ simple_lock_data_t pagedaemon_lock; + + /* aiodone daemon trigger */ + int aiodoned; /* daemon sleeps on this */ + struct proc *aiodoned_proc; /* daemon's pid */ + simple_lock_data_t aiodoned_lock; + /* page hash */ struct pglist *page_hash; /* page hash table (vp/off->page) */ int page_nhash; /* number of buckets */ @@ -103,7 +111,7 @@ struct uvm { simple_lock_data_t kentry_lock; /* aio_done is locked by uvm.pagedaemon_lock and splbio! */ - struct uvm_aiohead aio_done; /* done async i/o reqs */ + TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */ /* pager VM area bounds */ vaddr_t pager_sva; /* start of pager VA area */ @@ -143,6 +151,7 @@ extern struct uvm uvm; UVMHIST_DECL(maphist); UVMHIST_DECL(pdhist); +UVMHIST_DECL(ubchist); /* * UVM_UNLOCK_AND_WAIT: atomic unlock+wait... wrapper around the diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 24102f0437c..9dd59bd23a8 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_extern.h,v 1.31 2001/11/09 03:32:23 art Exp $ */ +/* $OpenBSD: uvm_extern.h,v 1.32 2001/11/10 18:42:31 art Exp $ */ /* $NetBSD: uvm_extern.h,v 1.52 2000/11/27 04:36:40 nisimura Exp $ */ /* @@ -246,8 +246,11 @@ struct vm_map; struct vmspace; struct pmap; struct vnode; +struct pool; struct simplelock; +extern struct pool *uvm_aiobuf_pool; + /* * uvmexp: global data structures that are exported to parts of the kernel * other than the vm system. @@ -549,8 +552,14 @@ void uvm_page_physload __P((paddr_t, paddr_t, paddr_t, paddr_t, int)); void uvm_setpagesize __P((void)); +/* uvm_pager.c */ +void uvm_aio_biodone1 __P((struct buf *)); +void uvm_aio_biodone __P((struct buf *)); +void uvm_aio_aiodone __P((struct buf *)); + /* uvm_pdaemon.c */ void uvm_pageout __P((void *)); +void uvm_aiodone_daemon __P((void *)); /* uvm_pglist.c */ int uvm_pglistalloc __P((psize_t, paddr_t, diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index 4e46822894a..3207e33020c 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_page.c,v 1.29 2001/11/07 02:55:50 art Exp $ */ -/* $NetBSD: uvm_page.c,v 1.43 2000/11/09 19:15:28 christos Exp $ */ +/* $OpenBSD: uvm_page.c,v 1.30 2001/11/10 18:42:31 art Exp $ */ +/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -75,6 +75,7 @@ #include <sys/systm.h> #include <sys/malloc.h> #include <sys/sched.h> +#include <sys/kernel.h> #define UVM_PAGE /* pull in uvm_page.h functions */ #include <uvm/uvm.h> @@ -96,8 +97,18 @@ int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ * We therefore provide a way to disable it from machdep code here. */ +/* + * XXX disabled until we can find a way to do this without causing + * problems for either cpu caches or DMA latency. + */ boolean_t vm_page_zero_enable = FALSE; +#ifdef UBC +u_long uvm_pgcnt_anon; +u_long uvm_pgcnt_vnode; +extern struct uvm_pagerops uvm_vnodeops; +#endif + /* * local variables */ @@ -124,7 +135,7 @@ static struct pglist uvm_bootbucket; */ static void uvm_pageinsert __P((struct vm_page *)); - +static void uvm_pageremove __P((struct vm_page *)); /* * inline functions @@ -161,7 +172,6 @@ uvm_pageinsert(pg) TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */ pg->flags |= PG_TABLED; pg->uobject->uo_npages++; - } /* @@ -171,21 +181,14 @@ uvm_pageinsert(pg) * => caller must lock page queues */ -void __inline +static __inline void uvm_pageremove(pg) struct vm_page *pg; { struct pglist *buck; int s; -#ifdef DIAGNOSTIC - if ((pg->flags & (PG_FAULTING)) != 0) - panic("uvm_pageremove: page is faulting"); -#endif - - if ((pg->flags & PG_TABLED) == 0) - return; /* XXX: log */ - + KASSERT(pg->flags & PG_TABLED); buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; s = splimp(); simple_lock(&uvm.hashlock); @@ -193,6 +196,12 @@ uvm_pageremove(pg) simple_unlock(&uvm.hashlock); splx(s); +#ifdef UBC + if (pg->uobject->pgops == &uvm_vnodeops) { + uvm_pgcnt_vnode--; + } +#endif + /* object should be locked */ TAILQ_REMOVE(&pg->uobject->memq, pg, listq); @@ -200,7 +209,6 @@ uvm_pageremove(pg) pg->uobject->uo_npages--; pg->uobject = NULL; pg->version++; - } /* @@ -218,7 +226,6 @@ uvm_page_init(kvm_startp, kvm_endp) int lcv, i; paddr_t paddr; - /* * step 1: init the page queues and page queue locks */ @@ -239,7 +246,7 @@ uvm_page_init(kvm_startp, kvm_endp) */ uvm.page_nhash = 1; /* 1 bucket */ - uvm.page_hashmask = 0; /* mask for hash function */ + uvm.page_hashmask = 0; /* mask for hash function */ uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */ TAILQ_INIT(uvm.page_hash); /* init hash table */ simple_lock_init(&uvm.hashlock); /* init hash table lock */ @@ -292,7 +299,6 @@ uvm_page_init(kvm_startp, kvm_endp) */ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { - n = vm_physmem[lcv].end - vm_physmem[lcv].start; if (n > pagecount) { printf("uvm_page_init: lost %ld page(s) in init\n", @@ -318,6 +324,7 @@ uvm_page_init(kvm_startp, kvm_endp) } } } + /* * step 5: pass up the values of virtual_space_start and * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper @@ -328,10 +335,11 @@ uvm_page_init(kvm_startp, kvm_endp) *kvm_endp = trunc_page(virtual_space_end); /* - * step 6: init pagedaemon lock + * step 6: init locks for kernel threads */ simple_lock_init(&uvm.pagedaemon_lock); + simple_lock_init(&uvm.aiodoned_lock); /* * step 7: init reserve thresholds @@ -343,10 +351,6 @@ uvm_page_init(kvm_startp, kvm_endp) /* * step 8: determine if we should zero pages in the idle * loop. - * - * XXXJRT - might consider zero'ing up to the target *now*, - * but that could take an awfully long time if you - * have a lot of memory. */ uvm.page_idle_zero = vm_page_zero_enable; @@ -361,7 +365,6 @@ uvm_page_init(kvm_startp, kvm_endp) * uvm_setpagesize: set the page size * * => sets page_shift and page_mask from uvmexp.pagesize. - * => XXXCDC: move global vars. */ void @@ -892,22 +895,26 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) struct pgfreelist *pgfl; boolean_t use_reserve; -#ifdef DIAGNOSTIC - /* sanity check */ - if (obj && anon) - panic("uvm_pagealloc: obj and anon != NULL"); -#endif - - s = uvm_lock_fpageq(); /* lock free page queue */ + KASSERT(obj == NULL || anon == NULL); + KASSERT(off == trunc_page(off)); + s = uvm_lock_fpageq(); /* * check to see if we need to generate some free pages waking * the pagedaemon. */ +#ifdef UBC + if (uvmexp.free + uvmexp.paging < uvmexp.freemin || + (uvmexp.free + uvmexp.paging < uvmexp.freetarg && + uvmexp.inactive < uvmexp.inactarg)) { + wakeup(&uvm.pagedaemon); + } +#else if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg && uvmexp.inactive < uvmexp.inactarg)) wakeup(&uvm.pagedaemon); +#endif /* * fail if any of these conditions is true: @@ -961,11 +968,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) case UVM_PGA_STRAT_ONLY: case UVM_PGA_STRAT_FALLBACK: /* Attempt to allocate from the specified free list. */ -#ifdef DIAGNOSTIC - if (free_list >= VM_NFREELIST || free_list < 0) - panic("uvm_pagealloc_strat: bad free list %d", - free_list); -#endif + KASSERT(free_list >= 0 && free_list < VM_NFREELIST); pgfl = &uvm.page_free[free_list]; if ((pg = TAILQ_FIRST((freeq = &pgfl->pgfl_queues[try1]))) != NULL || @@ -1016,11 +1019,12 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) pg->uanon = anon; pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; pg->version++; - pg->wire_count = 0; - pg->loan_count = 0; if (anon) { anon->u.an_page = pg; pg->pqflags = PQ_ANON; +#ifdef UBC + uvm_pgcnt_anon++; +#endif } else { if (obj) uvm_pageinsert(pg); @@ -1121,8 +1125,6 @@ uvm_pagerealloc(pg, newobj, newoff) pg->version++; uvm_pageinsert(pg); } - - return; } @@ -1136,14 +1138,20 @@ uvm_pagerealloc(pg, newobj, newoff) * => assumes all valid mappings of pg are gone */ -void uvm_pagefree(pg) - -struct vm_page *pg; - +void +uvm_pagefree(pg) + struct vm_page *pg; { int s; int saved_loan_count = pg->loan_count; +#ifdef DEBUG + if (pg->uobject == (void *)0xdeadbeef && + pg->uanon == (void *)0xdeadbeef) { + panic("uvm_pagefree: freeing free page %p\n", pg); + } +#endif + /* * if the page was an object page (and thus "TABLED"), remove it * from the object. @@ -1152,7 +1160,7 @@ struct vm_page *pg; if (pg->flags & PG_TABLED) { /* - * if the object page is on loan we are going to drop ownership. + * if the object page is on loan we are going to drop ownership. * it is possible that an anon will take over as owner for this * page later on. the anon will want a !PG_CLEAN page so that * it knows it needs to allocate swap if it wants to page the @@ -1161,7 +1169,6 @@ struct vm_page *pg; if (saved_loan_count) pg->flags &= ~PG_CLEAN; /* in case an anon takes over */ - uvm_pageremove(pg); /* @@ -1172,9 +1179,9 @@ struct vm_page *pg; * return (when the last loan is dropped, then the page can be * freed by whatever was holding the last loan). */ + if (saved_loan_count) return; - } else if (saved_loan_count && (pg->pqflags & PQ_ANON)) { /* @@ -1184,19 +1191,12 @@ struct vm_page *pg; * note that the kernel can't change the loan status of our * page as long as we are holding PQ lock. */ + pg->pqflags &= ~PQ_ANON; pg->uanon = NULL; return; } - -#ifdef DIAGNOSTIC - if (saved_loan_count) { - printf("uvm_pagefree: warning: freeing page with a loan " - "count of %d\n", saved_loan_count); - panic("uvm_pagefree: loan count"); - } -#endif - + KASSERT(saved_loan_count == 0); /* * now remove the page from the queues @@ -1219,13 +1219,19 @@ struct vm_page *pg; /* * if the page was wired, unwire it now. */ + if (pg->wire_count) { pg->wire_count = 0; uvmexp.wired--; } +#ifdef UBC + if (pg->uanon) { + uvm_pgcnt_anon--; + } +#endif /* - * and put on free queue + * and put on free queue */ pg->flags &= ~PG_ZERO; @@ -1247,6 +1253,51 @@ struct vm_page *pg; uvm_unlock_fpageq(s); } +/* + * uvm_page_unbusy: unbusy an array of pages. + * + * => pages must either all belong to the same object, or all belong to anons. + * => if pages are object-owned, object must be locked. + * => if pages are anon-owned, anons must be unlockd and have 0 refcount. + */ + +void +uvm_page_unbusy(pgs, npgs) + struct vm_page **pgs; + int npgs; +{ + struct vm_page *pg; + struct uvm_object *uobj; + int i; + UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist); + + for (i = 0; i < npgs; i++) { + pg = pgs[i]; + + if (pg == NULL) { + continue; + } + if (pg->flags & PG_WANTED) { + wakeup(pg); + } + if (pg->flags & PG_RELEASED) { + UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0); + uobj = pg->uobject; + if (uobj != NULL) { + uobj->pgops->pgo_releasepg(pg, NULL); + } else { + pg->flags &= ~(PG_BUSY); + UVM_PAGE_OWN(pg, NULL); + uvm_anfree(pg->uanon); + } + } else { + UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0); + pg->flags &= ~(PG_WANTED|PG_BUSY); + UVM_PAGE_OWN(pg, NULL); + } + } +} + #if defined(UVM_PAGE_TRKOWN) /* * uvm_page_own: set or release page ownership diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h index 564504a205f..aa5991137c2 100644 --- a/sys/uvm/uvm_page.h +++ b/sys/uvm/uvm_page.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_page.h,v 1.11 2001/11/07 02:55:50 art Exp $ */ -/* $NetBSD: uvm_page.h,v 1.17 2000/10/03 20:50:49 mrg Exp $ */ +/* $OpenBSD: uvm_page.h,v 1.12 2001/11/10 18:42:31 art Exp $ */ +/* $NetBSD: uvm_page.h,v 1.18 2000/11/27 08:40:05 chs Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -119,27 +119,27 @@ #include <uvm/uvm_pglist.h> struct vm_page { - TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO - * queue or free list (P) */ - TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/ - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/ - - struct vm_anon *uanon; /* anon (O,P) */ - struct uvm_object *uobject; /* object (O,P) */ - voff_t offset; /* offset into object (O,P) */ - - u_short flags; /* object flags [O] */ - u_short version; /* version count [O] */ - u_short wire_count; /* wired down map refs [P] */ - u_short pqflags; /* page queue flags [P] */ - u_int loan_count; /* number of active loans - * to read: [O or P] - * to modify: [O _and_ P] */ - paddr_t phys_addr; /* physical address of page */ + TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO + * queue or free list (P) */ + TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/ + TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/ + + struct vm_anon *uanon; /* anon (O,P) */ + struct uvm_object *uobject; /* object (O,P) */ + voff_t offset; /* offset into object (O,P) */ + + u_short flags; /* object flags [O] */ + u_short version; /* version count [O] */ + u_short wire_count; /* wired down map refs [P] */ + u_short pqflags; /* page queue flags [P] */ + u_int loan_count; /* number of active loans + * to read: [O or P] + * to modify: [O _and_ P] */ + paddr_t phys_addr; /* physical address of page */ #if defined(UVM_PAGE_TRKOWN) - /* debugging fields to track page ownership */ - pid_t owner; /* proc that set PG_BUSY */ - char *owner_tag; /* why it was set busy */ + /* debugging fields to track page ownership */ + pid_t owner; /* proc that set PG_BUSY */ + char *owner_tag; /* why it was set busy */ #endif }; @@ -158,25 +158,23 @@ struct vm_page { * PG_ZERO is used to indicate that a page has been pre-zero'd. This flag * is only set when the page is on no queues, and is cleared when the page * is placed on the free list. - * - * possible deadwood: PG_FAULTING, PQ_LAUNDRY */ + +#define PG_BUSY 0x0001 /* page is locked */ +#define PG_WANTED 0x0002 /* someone is waiting for page */ +#define PG_TABLED 0x0004 /* page is in VP table */ #define PG_CLEAN 0x0008 /* page has not been modified */ -#define PG_BUSY 0x0010 /* page is in transit */ -#define PG_WANTED 0x0020 /* someone is waiting for page */ -#define PG_TABLED 0x0040 /* page is in VP table */ -#define PG_ZERO 0x0100 /* page is pre-zero'd */ -#define PG_FAKE 0x0200 /* page is placeholder for pagein */ -#define PG_FILLED 0x0400 /* client flag to set when filled */ -#define PG_DIRTY 0x0800 /* client flag to set when dirty */ -#define PG_RELEASED 0x1000 /* page released while paging */ -#define PG_FAULTING 0x2000 /* page is being faulted in */ -#define PG_CLEANCHK 0x4000 /* clean bit has been checked */ +#define PG_CLEANCHK 0x0010 /* clean bit has been checked */ +#define PG_RELEASED 0x0020 /* page released while paging */ +#define PG_FAKE 0x0040 /* page is not yet initialized */ +#define PG_RDONLY 0x0080 /* page must be mapped read-only */ +#define PG_ZERO 0x0100 /* page is pre-zero'd */ + +#define PG_PAGER1 0x1000 /* pager-specific flag */ #define PQ_FREE 0x0001 /* page is on free list */ #define PQ_INACTIVE 0x0002 /* page is in inactive list */ #define PQ_ACTIVE 0x0004 /* page is in active list */ -#define PQ_LAUNDRY 0x0008 /* page is being cleaned now */ #define PQ_ANON 0x0010 /* page is part of an anon, rather than an uvm_object */ #define PQ_AOBJ 0x0020 /* page is part of an anonymous @@ -239,12 +237,9 @@ extern boolean_t vm_page_zero_enable; * ordered, in LRU-like fashion. */ -extern -struct pglist vm_page_queue_free; /* memory free queue */ -extern -struct pglist vm_page_queue_active; /* active memory queue */ -extern -struct pglist vm_page_queue_inactive; /* inactive memory queue */ +extern struct pglist vm_page_queue_free; /* memory free queue */ +extern struct pglist vm_page_queue_active; /* active memory queue */ +extern struct pglist vm_page_queue_inactive; /* inactive memory queue */ /* * physical memory config is stored in vm_physmem. @@ -285,9 +280,8 @@ vaddr_t uvm_pageboot_alloc __P((vsize_t)); PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *)); PAGE_INLINE void uvm_pagedeactivate __P((struct vm_page *)); void uvm_pagefree __P((struct vm_page *)); +void uvm_page_unbusy __P((struct vm_page **, int)); PAGE_INLINE struct vm_page *uvm_pagelookup __P((struct uvm_object *, voff_t)); -void uvm_pageremove __P((struct vm_page *)); -/* uvm_pagerename: not needed */ PAGE_INLINE void uvm_pageunwire __P((struct vm_page *)); PAGE_INLINE void uvm_pagewait __P((struct vm_page *, int)); PAGE_INLINE void uvm_pagewake __P((struct vm_page *)); diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c index 29e305c57b3..57cd0b4fc3a 100644 --- a/sys/uvm/uvm_pager.c +++ b/sys/uvm/uvm_pager.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_pager.c,v 1.20 2001/11/07 02:55:50 art Exp $ */ -/* $NetBSD: uvm_pager.c,v 1.34 2000/11/24 22:41:39 chs Exp $ */ +/* $OpenBSD: uvm_pager.c,v 1.21 2001/11/10 18:42:31 art Exp $ */ +/* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ /* * @@ -39,25 +39,36 @@ * uvm_pager.c: generic functions used to assist the pagers. */ +#define UVM_PAGER #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/malloc.h> +#include <sys/pool.h> +#include <sys/vnode.h> +#include <sys/buf.h> -#define UVM_PAGER #include <uvm/uvm.h> +struct pool *uvm_aiobuf_pool; + /* * list of uvm pagers in the system */ extern struct uvm_pagerops uvm_deviceops; extern struct uvm_pagerops uvm_vnodeops; +#ifdef UBC +extern struct uvm_pagerops ubc_pager; +#endif struct uvm_pagerops *uvmpagerops[] = { &aobj_pager, &uvm_deviceops, &uvm_vnodeops, +#ifdef UBC + &ubc_pager, +#endif }; /* @@ -67,7 +78,8 @@ struct uvm_pagerops *uvmpagerops[] = { vm_map_t pager_map; /* XXX */ simple_lock_data_t pager_map_wanted_lock; boolean_t pager_map_wanted; /* locked by pager map */ - +static vaddr_t emergva; +static boolean_t emerginuse; /* * uvm_pager_init: init pagers (at boot time) @@ -82,10 +94,12 @@ uvm_pager_init() * init pager map */ - pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva, - PAGER_MAP_SIZE, 0, FALSE, NULL); - simple_lock_init(&pager_map_wanted_lock); - pager_map_wanted = FALSE; + pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva, + PAGER_MAP_SIZE, 0, FALSE, NULL); + simple_lock_init(&pager_map_wanted_lock); + pager_map_wanted = FALSE; + emergva = uvm_km_valloc(kernel_map, MAXBSIZE); + emerginuse = FALSE; /* * init ASYNC I/O queue @@ -111,22 +125,19 @@ uvm_pager_init() */ vaddr_t -uvm_pagermapin(pps, npages, aiop, flags) +uvm_pagermapin(pps, npages, flags) struct vm_page **pps; int npages; - struct uvm_aiodesc **aiop; /* OUT */ int flags; { vsize_t size; vaddr_t kva; - struct uvm_aiodesc *aio; vaddr_t cva; struct vm_page *pp; vm_prot_t prot; UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d, aiop=0x%x, flags=0x%x)", - pps, npages, aiop, flags); + UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d)", pps, npages,0,0); /* * compute protection. outgoing I/O only needs read @@ -138,24 +149,26 @@ uvm_pagermapin(pps, npages, aiop, flags) prot |= VM_PROT_WRITE; ReStart: - if (aiop) { - MALLOC(aio, struct uvm_aiodesc *, sizeof(*aio), M_TEMP, - (flags & UVMPAGER_MAPIN_WAITOK)); - if (aio == NULL) - return(0); - *aiop = aio; - } else { - aio = NULL; - } - size = npages << PAGE_SHIFT; kva = 0; /* let system choose VA */ if (uvm_map(pager_map, &kva, size, NULL, UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) { + if (curproc == uvm.pagedaemon_proc) { + simple_lock(&pager_map_wanted_lock); + if (emerginuse) { + UVM_UNLOCK_AND_WAIT(&emergva, + &pager_map_wanted_lock, FALSE, + "emergva", 0); + goto ReStart; + } + emerginuse = TRUE; + simple_unlock(&pager_map_wanted_lock); + kva = emergva; + KASSERT(npages <= MAXBSIZE >> PAGE_SHIFT); + goto enter; + } if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) { - if (aio) - FREE(aio, M_TEMP); UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0); return(0); } @@ -163,16 +176,17 @@ ReStart: pager_map_wanted = TRUE; UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0); UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE, - "pager_map",0); + "pager_map", 0); goto ReStart; } +enter: /* got it */ for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) { pp = *pps++; #ifdef DEBUG if ((pp->flags & PG_BUSY) == 0) - panic("uvm_pagermapin: page not busy"); + panic("uvm_pagermapin: pg %p not busy", pp); #endif pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp), prot, PMAP_WIRED | prot); @@ -197,13 +211,22 @@ uvm_pagermapout(kva, npages) vsize_t size = npages << PAGE_SHIFT; vm_map_entry_t entries; UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist); - + UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0); /* * duplicate uvm_unmap, but add in pager_map_wanted handling. */ + if (kva == emergva) { + simple_lock(&pager_map_wanted_lock); + emerginuse = FALSE; + wakeup(&emergva); + simple_unlock(&pager_map_wanted_lock); + entries = NULL; + goto remove; + } + vm_map_lock(pager_map); (void) uvm_unmap_remove(pager_map, kva, kva + size, &entries); simple_lock(&pager_map_wanted_lock); @@ -213,6 +236,8 @@ uvm_pagermapout(kva, npages) } simple_unlock(&pager_map_wanted_lock); vm_map_unlock(pager_map); +remove: + pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT)); if (entries) uvm_unmap_detach(entries, 0); @@ -250,7 +275,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) { struct vm_page **ppsp, *pclust; voff_t lo, hi, curoff; - int center_idx, forward; + int center_idx, forward, incr; UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist); /* @@ -272,9 +297,11 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) if (hi > mhi) hi = mhi; } - if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ + if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ #ifdef DIAGNOSTIC - printf("uvm_mk_pcluster: provided page array too small (fixed)\n"); + printf("uvm_mk_pcluster uobj %p npages %d lo 0x%llx hi 0x%llx " + "flags 0x%x\n", uobj, *npages, (long long)lo, + (long long)hi, flags); #endif pps[0] = center; *npages = 1; @@ -290,7 +317,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) pps[center_idx] = center; /* plug in the center page */ ppsp = &pps[center_idx]; *npages = 1; - + /* * attempt to cluster around the left [backward], and then * the right side [forward]. @@ -302,21 +329,23 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) */ for (forward = 0 ; forward <= 1 ; forward++) { - - curoff = center->offset + (forward ? PAGE_SIZE : -PAGE_SIZE); + incr = forward ? PAGE_SIZE : -PAGE_SIZE; + curoff = center->offset + incr; for ( ;(forward == 0 && curoff >= lo) || (forward && curoff < hi); - curoff += (forward ? 1 : -1) << PAGE_SHIFT) { + curoff += incr) { pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ - if (pclust == NULL) + if (pclust == NULL) { break; /* no page */ + } /* handle active pages */ /* NOTE: inactive pages don't have pmap mappings */ if ((pclust->pqflags & PQ_INACTIVE) == 0) { - if ((flags & PGO_DOACTCLUST) == 0) + if ((flags & PGO_DOACTCLUST) == 0) { /* dont want mapped pages at all */ break; + } /* make sure "clean" bit is sync'd */ if ((pclust->flags & PG_CLEANCHK) == 0) { @@ -328,13 +357,16 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) pclust->flags |= PG_CLEANCHK; } } + /* is page available for cleaning and does it need it */ - if ((pclust->flags & (PG_CLEAN|PG_BUSY)) != 0) + if ((pclust->flags & (PG_CLEAN|PG_BUSY)) != 0) { break; /* page is already clean or is busy */ + } /* yes! enroll the page in our array */ pclust->flags |= PG_BUSY; /* busy! */ UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); + /* XXX: protect wired page? see above comment. */ pmap_page_protect(pclust, VM_PROT_READ); if (!forward) { @@ -344,7 +376,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) /* move forward one page */ ppsp[*npages] = pclust; } - *npages = *npages + 1; + (*npages)++; } } @@ -407,6 +439,7 @@ uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop) int result; daddr_t swblk; struct vm_page **ppsp = *ppsp_ptr; + UVMHIST_FUNC("uvm_pager_put"); UVMHIST_CALLED(ubchist); /* * note that uobj is null if we are doing a swap-backed pageout. @@ -457,12 +490,12 @@ uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop) ReTry: if (uobj) { /* object is locked */ - result = uobj->pgops->pgo_put(uobj, ppsp, *npages, - flags & PGO_SYNCIO); + result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); + UVMHIST_LOG(ubchist, "put -> %d", result, 0,0,0); /* object is now unlocked */ } else { /* nothing locked */ - result = uvm_swap_put(swblk, ppsp, *npages, flags & PGO_SYNCIO); + result = uvm_swap_put(swblk, ppsp, *npages, flags); /* nothing locked */ } @@ -498,9 +531,9 @@ ReTry: } /* - * a pager error occurred. - * for transient errors, drop to a cluster of 1 page ("pg") - * and try again. for hard errors, don't bother retrying. + * a pager error occured (even after dropping the cluster, if there + * was one). give up! the caller only has one page ("pg") + * to worry about. */ if (*npages > 1 || pg == NULL) { @@ -608,7 +641,8 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) for (lcv = 0 ; lcv < *npages ; lcv++) { - if (ppsp[lcv] == pg) /* skip "pg" */ + /* skip "pg" or empty slot */ + if (ppsp[lcv] == pg || ppsp[lcv] == NULL) continue; /* @@ -635,9 +669,10 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) } /* did someone want the page while we had it busy-locked? */ - if (ppsp[lcv]->flags & PG_WANTED) + if (ppsp[lcv]->flags & PG_WANTED) { /* still holding obj lock */ wakeup(ppsp[lcv]); + } /* if page was released, release it. otherwise un-busy it */ if (ppsp[lcv]->flags & PG_RELEASED) { @@ -688,7 +723,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) continue; /* next page */ } else { - ppsp[lcv]->flags &= ~(PG_BUSY|PG_WANTED); + ppsp[lcv]->flags &= ~(PG_BUSY|PG_WANTED|PG_FAKE); UVM_PAGE_OWN(ppsp[lcv], NULL); } @@ -711,3 +746,181 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) } } } + +#ifdef UBC +/* + * interrupt-context iodone handler for nested i/o bufs. + * + * => must be at splbio(). + */ + +void +uvm_aio_biodone1(bp) + struct buf *bp; +{ + struct buf *mbp = bp->b_private; + + KASSERT(mbp != bp); + if (bp->b_flags & B_ERROR) { + mbp->b_flags |= B_ERROR; + mbp->b_error = bp->b_error; + } + mbp->b_resid -= bp->b_bcount; + pool_put(&bufpool, bp); + if (mbp->b_resid == 0) { + biodone(mbp); + } +} +#endif + +/* + * interrupt-context iodone handler for single-buf i/os + * or the top-level buf of a nested-buf i/o. + * + * => must be at splbio(). + */ + +void +uvm_aio_biodone(bp) + struct buf *bp; +{ + /* reset b_iodone for when this is a single-buf i/o. */ + bp->b_iodone = uvm_aio_aiodone; + + simple_lock(&uvm.aiodoned_lock); /* locks uvm.aio_done */ + TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist); + wakeup(&uvm.aiodoned); + simple_unlock(&uvm.aiodoned_lock); +} + +/* + * uvm_aio_aiodone: do iodone processing for async i/os. + * this should be called in thread context, not interrupt context. + */ + +void +uvm_aio_aiodone(bp) + struct buf *bp; +{ + int npages = bp->b_bufsize >> PAGE_SHIFT; + struct vm_page *pg, *pgs[npages]; + struct uvm_object *uobj; + int s, i; + boolean_t release, write, swap; + UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist); + UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0); + + release = (bp->b_flags & (B_ERROR|B_READ)) == (B_ERROR|B_READ); + write = (bp->b_flags & B_READ) == 0; +#ifdef UBC + /* XXXUBC B_NOCACHE is for swap pager, should be done differently */ + if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) { + (*bioops.io_pageiodone)(bp); + } +#endif + + uobj = NULL; + for (i = 0; i < npages; i++) { + pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); + UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0); + } + uvm_pagermapout((vaddr_t)bp->b_data, npages); +#ifdef UVM_SWAP_ENCRYPT + /* + * XXX - assumes that we only get ASYNC writes. used to be above. + */ + if (pgs[0]->pqflags & PQ_ENCRYPT) { + uvm_swap_freepages(pgs, npages); + goto freed; + } +#endif /* UVM_SWAP_ENCRYPT */ + for (i = 0; i < npages; i++) { + pg = pgs[i]; + + if (i == 0) { + swap = (pg->pqflags & PQ_SWAPBACKED) != 0; + if (!swap) { + uobj = pg->uobject; + simple_lock(&uobj->vmobjlock); + } + } + KASSERT(swap || pg->uobject == uobj); + if (swap) { + if (pg->pqflags & PQ_ANON) { + simple_lock(&pg->uanon->an_lock); + } else { + simple_lock(&pg->uobject->vmobjlock); + } + } + + /* + * if this is a read and we got an error, mark the pages + * PG_RELEASED so that uvm_page_unbusy() will free them. + */ + + if (release) { + pg->flags |= PG_RELEASED; + continue; + } + KASSERT(!write || (pgs[i]->flags & PG_FAKE) == 0); + + /* + * if this is a read and the page is PG_FAKE + * or this was a write, mark the page PG_CLEAN and not PG_FAKE. + */ + + if (pgs[i]->flags & PG_FAKE || write) { + pmap_clear_reference(pgs[i]); + pmap_clear_modify(pgs[i]); + pgs[i]->flags |= PG_CLEAN; + pgs[i]->flags &= ~PG_FAKE; + } + if (swap) { + if (pg->pqflags & PQ_ANON) { + simple_unlock(&pg->uanon->an_lock); + } else { + simple_unlock(&pg->uobject->vmobjlock); + } + } + } + uvm_page_unbusy(pgs, npages); + if (!swap) { + simple_unlock(&uobj->vmobjlock); + } + +#ifdef UVM_SWAP_ENCRYPT +freed: +#endif + s = splbio(); + if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) { + vwakeup(bp->b_vp); + } + pool_put(&bufpool, bp); + splx(s); +} + +/* + * translate unix errno values to VM_PAGER_*. + */ + +int +uvm_errno2vmerror(errno) + int errno; +{ + switch (errno) { + case 0: + return VM_PAGER_OK; + case EINVAL: + return VM_PAGER_BAD; + case EINPROGRESS: + return VM_PAGER_PEND; + case EIO: + return VM_PAGER_ERROR; + case EAGAIN: + return VM_PAGER_AGAIN; + case EBUSY: + return VM_PAGER_UNLOCK; + default: + return VM_PAGER_ERROR; + } +} diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h index 208693227d3..b246e815e04 100644 --- a/sys/uvm/uvm_pager.h +++ b/sys/uvm/uvm_pager.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_pager.h,v 1.13 2001/11/07 02:55:50 art Exp $ */ -/* $NetBSD: uvm_pager.h,v 1.18 2000/11/24 22:41:39 chs Exp $ */ +/* $OpenBSD: uvm_pager.h,v 1.14 2001/11/10 18:42:31 art Exp $ */ +/* $NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $ */ /* * @@ -83,21 +83,6 @@ */ /* - * async pager i/o descriptor structure - */ - -TAILQ_HEAD(uvm_aiohead, uvm_aiodesc); - -struct uvm_aiodesc { - void (*aiodone) __P((struct uvm_aiodesc *)); - /* aio done function */ - vaddr_t kva; /* KVA of mapped page(s) */ - int npages; /* # of pages in I/O req */ - void *pd_ptr; /* pager-dependent pointer */ - TAILQ_ENTRY(uvm_aiodesc) aioq; /* linked list of aio's */ -}; - -/* * pager ops */ @@ -133,22 +118,22 @@ struct uvm_pagerops { /* pager flags [mostly for flush] */ #define PGO_CLEANIT 0x001 /* write dirty pages to backing store */ -#define PGO_SYNCIO 0x002 /* if PGO_CLEAN: use sync I/O? */ -/* - * obviously if neither PGO_INVALIDATE or PGO_FREE are set then the pages - * stay where they are. - */ +#define PGO_SYNCIO 0x002 /* if PGO_CLEANIT: use sync I/O? */ #define PGO_DEACTIVATE 0x004 /* deactivate flushed pages */ #define PGO_FREE 0x008 /* free flushed pages */ +/* if PGO_FREE is not set then the pages stay where they are. */ #define PGO_ALLPAGES 0x010 /* flush whole object/get all pages */ #define PGO_DOACTCLUST 0x020 /* flag to mk_pcluster to include active */ #define PGO_LOCKED 0x040 /* fault data structures are locked [get] */ #define PGO_PDFREECLUST 0x080 /* daemon's free cluster flag [uvm_pager_put] */ #define PGO_REALLOCSWAP 0x100 /* reallocate swap area [pager_dropcluster] */ +#define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */ +#define PGO_WEAK 0x400 /* "weak" put, for nfs */ +#define PGO_PASTEOF 0x800 /* allow allocation of pages past EOF */ /* page we are not interested in getting */ -#define PGO_DONTCARE ((struct vm_page *) -1) /* [get only] */ +#define PGO_DONTCARE ((struct vm_page *) -1L) /* [get only] */ #ifdef _KERNEL @@ -176,12 +161,12 @@ int uvm_pager_put __P((struct uvm_object *, struct vm_page *, PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t)); -vaddr_t uvm_pagermapin __P((struct vm_page **, int, - struct uvm_aiodesc **, int)); +vaddr_t uvm_pagermapin __P((struct vm_page **, int, int)); void uvm_pagermapout __P((vaddr_t, int)); struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **, int *, struct vm_page *, int, voff_t, voff_t)); +int uvm_errno2vmerror __P((int)); /* Flags to uvm_pagermapin() */ #define UVMPAGER_MAPIN_WAITOK 0x01 /* it's okay to wait */ @@ -215,7 +200,9 @@ struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **, * is changed to do physically-addressed i/o. */ +#ifndef PAGER_MAP_SIZE #define PAGER_MAP_SIZE (16 * 1024 * 1024) +#endif #endif /* _KERNEL */ diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index 42f141b17c0..e40281deb9d 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pdaemon.c,v 1.16 2001/11/06 13:36:52 art Exp $ */ +/* $OpenBSD: uvm_pdaemon.c,v 1.17 2001/11/10 18:42:31 art Exp $ */ /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ /* @@ -76,6 +76,7 @@ #include <sys/systm.h> #include <sys/kernel.h> #include <sys/pool.h> +#include <sys/buf.h> #include <uvm/uvm.h> @@ -193,10 +194,8 @@ void uvm_pageout(void *arg) { int npages = 0; - int s; - struct uvm_aiodesc *aio, *nextaio; UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist); - + UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0); /* @@ -213,73 +212,33 @@ uvm_pageout(void *arg) /* * main loop */ - while (TRUE) { - - /* - * carefully attempt to go to sleep (without losing "wakeups"!). - * we need splbio because we want to make sure the aio_done list - * is totally empty before we go to sleep. - */ - s = splbio(); + for (;;) { simple_lock(&uvm.pagedaemon_lock); - /* - * if we've got done aio's, then bypass the sleep - */ - - if (uvm.aio_done.tqh_first == NULL) { - UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0); - UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, - &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0); - uvmexp.pdwoke++; - UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); - - /* relock pagedaemon_lock, still at splbio */ - simple_lock(&uvm.pagedaemon_lock); - } - - /* - * check for done aio structures - */ - - aio = uvm.aio_done.tqh_first; /* save current list (if any)*/ - if (aio) { - TAILQ_INIT(&uvm.aio_done); /* zero global list */ - } - - simple_unlock(&uvm.pagedaemon_lock); /* unlock */ - splx(s); /* drop splbio */ - - /* - * first clear out any pending aios (to free space in case we - * want to pageout more stuff). - */ - - for (/*null*/; aio != NULL ; aio = nextaio) { - - uvmexp.paging -= aio->npages; - nextaio = aio->aioq.tqe_next; - aio->aiodone(aio); + UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); + UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, + &uvm.pagedaemon_lock, FALSE, "pgdaemon", 0); + uvmexp.pdwoke++; + UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); - } - - /* Next, drain pool resources */ + /* drain pool resources */ pool_drain(0); /* * now lock page queues and recompute inactive count */ - uvm_lock_pageq(); + uvm_lock_pageq(); if (npages != uvmexp.npages) { /* check for new pages? */ npages = uvmexp.npages; uvmpd_tune(); } uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; - if (uvmexp.inactarg <= uvmexp.freetarg) + if (uvmexp.inactarg <= uvmexp.freetarg) { uvmexp.inactarg = uvmexp.freetarg + 1; + } UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d", uvmexp.free, uvmexp.freetarg, uvmexp.inactive, @@ -287,30 +246,113 @@ uvm_pageout(void *arg) /* * scan if needed - * [XXX: note we are reading uvm.free without locking] */ + +#ifdef UBC + if (uvmexp.free + uvmexp.paging < uvmexp.freetarg || + uvmexp.inactive < uvmexp.inactarg || + uvm_pgcnt_vnode > + (uvmexp.active + uvmexp.inactive + uvmexp.wired + + uvmexp.free) * 13 / 16) { +#else if (uvmexp.free < uvmexp.freetarg || - uvmexp.inactive < uvmexp.inactarg) + uvmexp.inactive < uvmexp.inactarg) { +#endif uvmpd_scan(); + } /* - * done scan. unlock page queues (the only lock we are holding) + * if there's any free memory to be had, + * wake up any waiters. */ + + if (uvmexp.free > uvmexp.reserve_kernel || + uvmexp.paging == 0) { + wakeup(&uvmexp.free); + } + + /* + * scan done. unlock page queues (the only lock we are holding) + */ + uvm_unlock_pageq(); + } + /*NOTREACHED*/ +} + + +/* + * uvm_aiodone_daemon: main loop for the aiodone daemon. + */ + +void +uvm_aiodone_daemon(void *arg) +{ + int s, free; + struct buf *bp, *nbp; + UVMHIST_FUNC("uvm_aiodoned"); UVMHIST_CALLED(pdhist); + + for (;;) { /* - * done! restart loop. + * carefully attempt to go to sleep (without losing "wakeups"!). + * we need splbio because we want to make sure the aio_done list + * is totally empty before we go to sleep. */ - if (uvmexp.free > uvmexp.reserve_kernel || - uvmexp.paging == 0) + + s = splbio(); + simple_lock(&uvm.aiodoned_lock); + if (TAILQ_FIRST(&uvm.aio_done) == NULL) { + UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); + UVM_UNLOCK_AND_WAIT(&uvm.aiodoned, + &uvm.aiodoned_lock, FALSE, "aiodoned", 0); + UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); + + /* relock aiodoned_lock, still at splbio */ + simple_lock(&uvm.aiodoned_lock); + } + + /* + * check for done aio structures + */ + + bp = TAILQ_FIRST(&uvm.aio_done); + if (bp) { + TAILQ_INIT(&uvm.aio_done); + } + + simple_unlock(&uvm.aiodoned_lock); + splx(s); + + /* + * process each i/o that's done. + */ + + free = uvmexp.free; + while (bp != NULL) { + if (bp->b_flags & B_PDAEMON) { + uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT; + } + nbp = TAILQ_NEXT(bp, b_freelist); + (*bp->b_iodone)(bp); + bp = nbp; + } + if (free <= uvmexp.reserve_kernel) { + s = uvm_lock_fpageq(); + wakeup(&uvm.pagedaemon); + uvm_unlock_fpageq(s); + } else { + simple_lock(&uvm.pagedaemon_lock); wakeup(&uvmexp.free); + simple_unlock(&uvm.pagedaemon_lock); + } } - /*NOTREACHED*/ } + + /* - * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into - * its own function for ease of reading. + * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. * * => called with page queues locked * => we work on meeting our free target by converting inactive pages @@ -527,6 +569,7 @@ uvmpd_scan_inactive(pglst) * this page is dirty, skip it if we'll have met our * free target when all the current pageouts complete. */ + if (free + uvmexp.paging > uvmexp.freetarg << 2) { if (anon) { simple_unlock(&anon->an_lock); @@ -542,11 +585,8 @@ uvmpd_scan_inactive(pglst) * reactivate it so that we eventually cycle * all pages thru the inactive queue. */ -#ifdef DIAGNOSTIC - if (uvmexp.swpgonly > uvmexp.swpages) { - panic("uvmexp.swpgonly botch"); - } -#endif + + KASSERT(uvmexp.swpgonly <= uvmexp.swpages); if ((p->pqflags & PQ_SWAPBACKED) && uvmexp.swpgonly == uvmexp.swpages) { dirtyreacts++; @@ -564,11 +604,8 @@ uvmpd_scan_inactive(pglst) * is full, free any swap allocated to the page * so that other pages can be paged out. */ -#ifdef DIAGNOSTIC - if (uvmexp.swpginuse > uvmexp.swpages) { - panic("uvmexp.swpginuse botch"); - } -#endif + + KASSERT(uvmexp.swpginuse <= uvmexp.swpages); if ((p->pqflags & PQ_SWAPBACKED) && uvmexp.swpginuse == uvmexp.swpages) { @@ -621,13 +658,11 @@ uvmpd_scan_inactive(pglst) /* * start new cluster (if necessary) */ + if (swslot == 0) { - /* want this much */ swnpages = MAXBSIZE >> PAGE_SHIFT; - swslot = uvm_swap_alloc(&swnpages, TRUE); - if (swslot == 0) { /* no swap? give up! */ p->flags &= ~PG_BUSY; @@ -646,6 +681,7 @@ uvmpd_scan_inactive(pglst) /* * add block to cluster */ + swpps[swcpages] = p; if (anon) anon->an_swslot = swslot + swcpages; @@ -654,11 +690,7 @@ uvmpd_scan_inactive(pglst) p->offset >> PAGE_SHIFT, swslot + swcpages); swcpages++; - - /* done (swap-backed) */ } - - /* end: if (p) ["if we have new page to consider"] */ } else { /* if p == NULL we must be doing a last swap i/o */ @@ -666,16 +698,16 @@ uvmpd_scan_inactive(pglst) } /* - * now consider doing the pageout. + * now consider doing the pageout. * - * for swap-backed pages, we do the pageout if we have either - * filled the cluster (in which case (swnpages == swcpages) or + * for swap-backed pages, we do the pageout if we have either + * filled the cluster (in which case (swnpages == swcpages) or * run out of pages (p == NULL). * * for object pages, we always do the pageout. */ - if (swap_backed) { + if (swap_backed) { if (p) { /* if we just added a page to cluster */ if (anon) simple_unlock(&anon->an_lock); @@ -698,21 +730,18 @@ uvmpd_scan_inactive(pglst) if (swcpages < swnpages) { uvm_swap_free(swslot + swcpages, (swnpages - swcpages)); - } - + } } else { - /* normal object pageout */ ppsp = pps; npages = sizeof(pps) / sizeof(struct vm_page *); /* not looked at because PGO_ALLPAGES is set */ start = 0; - } /* * now do the pageout. - * + * * for swap_backed pages we have already built the cluster. * for !swap_backed pages, uvm_pager_put will call the object's * "make put cluster" function to build a cluster on our behalf. @@ -733,7 +762,7 @@ uvmpd_scan_inactive(pglst) /* locked: uobj (if !swap_backed), page queues */ uvmexp.pdpageouts++; - result = uvm_pager_put((swap_backed) ? NULL : uobj, p, + result = uvm_pager_put(swap_backed ? NULL : uobj, p, &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0); /* locked: uobj (if !swap_backed && result != PEND) */ /* unlocked: pageqs, object (if swap_backed ||result == PEND) */ @@ -761,21 +790,29 @@ uvmpd_scan_inactive(pglst) if (result == VM_PAGER_PEND) { uvmexp.paging += npages; - uvm_lock_pageq(); /* relock page queues */ + uvm_lock_pageq(); uvmexp.pdpending++; if (p) { if (p->pqflags & PQ_INACTIVE) - /* reload! */ - nextpg = p->pageq.tqe_next; + nextpg = TAILQ_NEXT(p, pageq); else - /* reload! */ - nextpg = pglst->tqh_first; - } else { - nextpg = NULL; /* done list */ + nextpg = TAILQ_FIRST(pglst); + } else { + nextpg = NULL; } continue; } +#ifdef UBC + if (result == VM_PAGER_ERROR && + curproc == uvm.pagedaemon_proc) { + uvm_lock_pageq(); + nextpg = TAILQ_NEXT(p, pageq); + uvm_pageactivate(p); + continue; + } +#endif + /* * clean up "p" if we have one */ @@ -836,24 +873,19 @@ uvmpd_scan_inactive(pglst) pmap_page_protect(p, VM_PROT_NONE); anon = NULL; uvm_lock_pageq(); - nextpg = p->pageq.tqe_next; + nextpg = TAILQ_NEXT(p, pageq); /* free released page */ uvm_pagefree(p); } else { -#ifdef DIAGNOSTIC - if (uobj->pgops->pgo_releasepg == NULL) - panic("pagedaemon: no " - "pgo_releasepg function"); -#endif - - /* + /* * pgo_releasepg nukes the page and * gets "nextpg" for us. it returns * with the page queues locked (when * given nextpg ptr). */ + if (!uobj->pgops->pgo_releasepg(p, &nextpg)) /* uobj died after release */ @@ -863,35 +895,27 @@ uvmpd_scan_inactive(pglst) * lock page queues here so that they're * always locked at the end of the loop. */ + uvm_lock_pageq(); } - } else { /* page was not released during I/O */ - uvm_lock_pageq(); - nextpg = p->pageq.tqe_next; - + nextpg = TAILQ_NEXT(p, pageq); if (result != VM_PAGER_OK) { - /* pageout was a failure... */ if (result != VM_PAGER_AGAIN) uvm_pageactivate(p); pmap_clear_reference(p); /* XXXCDC: if (swap_backed) FREE p's * swap block? */ - } else { - /* pageout was a success... */ pmap_clear_reference(p); pmap_clear_modify(p); p->flags |= PG_CLEAN; - /* XXX: could free page here, but old - * pagedaemon does not */ - } } - + /* * drop object lock (if there is an object left). do * a safety check of nextpg to make sure it is on the @@ -905,26 +929,27 @@ uvmpd_scan_inactive(pglst) else if (uobj) simple_unlock(&uobj->vmobjlock); - } /* if (p) */ else { + } else { + + /* + * if p is null in this loop, make sure it stays null + * in the next loop. + */ - /* if p is null in this loop, make sure it stays null - * in next loop */ nextpg = NULL; /* * lock page queues here just so they're always locked * at the end of the loop. */ + uvm_lock_pageq(); } if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) { - printf("pagedaemon: invalid nextpg! reverting to " - "queue head\n"); - nextpg = pglst->tqh_first; /* reload! */ + nextpg = TAILQ_FIRST(pglst); /* reload! */ } - - } /* end of "inactive" 'for' loop */ + } return (retval); } @@ -944,10 +969,8 @@ uvmpd_scan() UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist); uvmexp.pdrevs++; /* counter */ + uobj = NULL; -#ifdef __GNUC__ - uobj = NULL; /* XXX gcc */ -#endif /* * get current "free" page count */ @@ -961,13 +984,11 @@ uvmpd_scan() * we need to unlock the page queues for this. */ if (free < uvmexp.freetarg) { - uvmexp.pdswout++; UVMHIST_LOG(pdhist," free %d < target %d: swapout", free, uvmexp.freetarg, 0, 0); uvm_unlock_pageq(); uvm_swapout_threads(); - pmap_update(); /* update so we can scan inactive q */ uvm_lock_pageq(); } @@ -983,8 +1004,8 @@ uvmpd_scan() UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0); /* - * do loop #1! alternate starting queue between swap and object based - * on the low bit of uvmexp.pdrevs (which we bump by one each call). + * alternate starting queue between swap and object based on the + * low bit of uvmexp.pdrevs (which we bump by one each call). */ got_it = FALSE; @@ -1008,6 +1029,7 @@ uvmpd_scan() * detect if we're not going to be able to page anything out * until we free some swap resources from active pages. */ + swap_shortage = 0; if (uvmexp.free < uvmexp.freetarg && uvmexp.swpginuse == uvmexp.swpages && @@ -1015,13 +1037,13 @@ uvmpd_scan() pages_freed == 0) { swap_shortage = uvmexp.freetarg - uvmexp.free; } - + UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d", inactive_shortage, swap_shortage,0,0); - for (p = TAILQ_FIRST(&uvm.page_active); + for (p = TAILQ_FIRST(&uvm.page_active); p != NULL && (inactive_shortage > 0 || swap_shortage > 0); p = nextpg) { - nextpg = p->pageq.tqe_next; + nextpg = TAILQ_NEXT(p, pageq); if (p->flags & PG_BUSY) continue; /* quick check before trying to lock */ @@ -1030,22 +1052,13 @@ uvmpd_scan() */ /* is page anon owned or ownerless? */ if ((p->pqflags & PQ_ANON) || p->uobject == NULL) { - -#ifdef DIAGNOSTIC - if (p->uanon == NULL) - panic("pagedaemon: page with no anon or " - "object detected - loop 2"); -#endif + KASSERT(p->uanon != NULL); if (!simple_lock_try(&p->uanon->an_lock)) continue; /* take over the page? */ if ((p->pqflags & PQ_ANON) == 0) { -#ifdef DIAGNOSTIC - if (p->loan_count < 1) - panic("pagedaemon: non-loaned " - "ownerless page detected - loop 2"); -#endif + KASSERT(p->loan_count > 0); p->loan_count--; p->pqflags |= PQ_ANON; } @@ -1053,9 +1066,11 @@ uvmpd_scan() if (!simple_lock_try(&p->uobject->vmobjlock)) continue; } + /* * skip this page if it's busy. */ + if ((p->flags & PG_BUSY) != 0) { if (p->pqflags & PQ_ANON) simple_unlock(&p->uanon->an_lock); @@ -1063,11 +1078,12 @@ uvmpd_scan() simple_unlock(&p->uobject->vmobjlock); continue; } - + /* * if there's a shortage of swap, free any swap allocated * to this page so that other pages can be paged out. */ + if (swap_shortage > 0) { if ((p->pqflags & PQ_ANON) && p->uanon->an_swslot) { uvm_swap_free(p->uanon->an_swslot, 1); @@ -1085,11 +1101,12 @@ uvmpd_scan() } } } - + /* * deactivate this page if there's a shortage of * inactive pages. */ + if (inactive_shortage > 0) { pmap_page_protect(p, VM_PROT_NONE); /* no need to check wire_count as pg is "active" */ @@ -1097,7 +1114,6 @@ uvmpd_scan() uvmexp.pddeact++; inactive_shortage--; } - if (p->pqflags & PQ_ANON) simple_unlock(&p->uanon->an_lock); else diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c index e4bd678b122..e3447538d97 100644 --- a/sys/uvm/uvm_swap.c +++ b/sys/uvm/uvm_swap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_swap.c,v 1.38 2001/11/07 02:55:50 art Exp $ */ +/* $OpenBSD: uvm_swap.c,v 1.39 2001/11/10 18:42:32 art Exp $ */ /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */ /* @@ -77,11 +77,6 @@ * by the "swap_priority" global var. each "swappri" contains a * CIRCLEQ of "swapdev" structures at that priority. * - * the system maintains a fixed pool of "swapbuf" structures for use - * at swap i/o time. a swapbuf includes a "buf" structure and an - * "aiodone" [we want to avoid malloc()'ing anything at swapout time - * since memory may be low]. - * * locking: * - swap_syscall_lock (sleep lock): this lock serializes the swapctl * system call and prevents the swap priority list from changing @@ -89,8 +84,6 @@ * - uvm.swap_data_lock (simple_lock): this lock protects all swap data * structures including the priority list, the swapdev structures, * and the swapmap extent. - * - swap_buf_lock (simple_lock): this lock protects the free swapbuf - * pool. * * each swap device has the following info: * - swap device in use (could be disabled, preventing future use) @@ -172,15 +165,6 @@ struct swappri { }; /* - * swapbuf, swapbuffer plus async i/o info - */ -struct swapbuf { - struct buf sw_buf; /* a buffer structure */ - struct uvm_aiodesc sw_aio; /* aiodesc structure, used if ASYNC */ - SIMPLEQ_ENTRY(swapbuf) sw_sq; /* free list pointer */ -}; - -/* * The following two structures are used to keep track of data transfers * on swap devices associated with regular files. * NOTE: this code is more or less a copy of vnd.c; we use the same @@ -236,8 +220,6 @@ cdev_decl(sw); * local variables */ static struct extent *swapmap; /* controls the mapping of /dev/drum */ -SIMPLEQ_HEAD(swapbufhead, swapbuf); -struct pool swapbuf_pool; /* list of all active swap devices [by priority] */ LIST_HEAD(swap_priority, swappri); @@ -264,8 +246,6 @@ static void sw_reg_strategy __P((struct swapdev *, struct buf *, int)); static void sw_reg_iodone __P((struct buf *)); static void sw_reg_start __P((struct swapdev *)); -static void uvm_swap_aiodone __P((struct uvm_aiodesc *)); -static void uvm_swap_bufdone __P((struct buf *)); static int uvm_swap_io __P((struct vm_page **, int, int, int)); static void swapmount __P((void)); @@ -273,7 +253,6 @@ static void swapmount __P((void)); #ifdef UVM_SWAP_ENCRYPT /* for swap encrypt */ boolean_t uvm_swap_allocpages __P((struct vm_page **, int)); -void uvm_swap_freepages __P((struct vm_page **, int)); void uvm_swap_markdecrypt __P((struct swapdev *, int, int, int)); boolean_t uvm_swap_needdecrypt __P((struct swapdev *, int)); void uvm_swap_initcrypt __P((struct swapdev *, int)); @@ -317,16 +296,10 @@ uvm_swap_init() panic("uvm_swap_init: extent_create failed"); /* - * allocate our private pool of "swapbuf" structures (includes - * a "buf" structure). ["nswbuf" comes from param.c and can - * be adjusted by MD code before we get here]. + * allocate pools for structures used for swapping to files. */ - pool_init(&swapbuf_pool, sizeof(struct swapbuf), 0, 0, 0, "swp buf", 0, - NULL, NULL, 0); - /* XXX - set a maximum on swapbuf_pool? */ - pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx", 0, NULL, NULL, 0); @@ -1256,7 +1229,7 @@ swstrategy(bp) * be yanked out from under us because we are holding resources * in it (i.e. the blocks we are doing I/O on). */ - pageno = dbtob(bp->b_blkno) >> PAGE_SHIFT; + pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT; simple_lock(&uvm.swap_data_lock); sdp = swapdrum_getsdp(pageno); simple_unlock(&uvm.swap_data_lock); @@ -1275,7 +1248,7 @@ swstrategy(bp) pageno -= sdp->swd_drumoffset; /* page # on swapdev */ bn = btodb(pageno << PAGE_SHIFT); /* convert to diskblock */ - UVMHIST_LOG(pdhist, " %s: mapoff=%x bn=%x bcount=%ld\n", + UVMHIST_LOG(pdhist, " %s: mapoff=%x bn=%x bcount=%ld", ((bp->b_flags & B_READ) == 0) ? "write" : "read", sdp->swd_drumoffset, bn, bp->b_bcount); @@ -1393,18 +1366,15 @@ sw_reg_strategy(sdp, bp, bn) /* * compute the size ("sz") of this transfer (in bytes). - * XXXCDC: ignores read-ahead for non-zero offset */ - if ((off = (byteoff % sdp->swd_bsize)) != 0) - sz = sdp->swd_bsize - off; - else - sz = (1 + nra) * sdp->swd_bsize; - - if (resid < sz) + off = byteoff % sdp->swd_bsize; + sz = (1 + nra) * sdp->swd_bsize - off; + if (sz > resid) sz = resid; - UVMHIST_LOG(pdhist, "sw_reg_strategy: vp %p/%p offset 0x%x/0x%x", - sdp->swd_vp, vp, byteoff, nbn); + UVMHIST_LOG(pdhist, "sw_reg_strategy: " + "vp %p/%p offset 0x%x/0x%x", + sdp->swd_vp, vp, byteoff, nbn); /* * now get a buf structure. note that the vb_buf is @@ -1466,7 +1436,7 @@ sw_reg_strategy(sdp, bp, bn) vnx->vx_pending++; /* assoc new buffer with underlying vnode */ - bgetvp(vp, &nbp->vb_buf); + bgetvp(vp, &nbp->vb_buf); /* sort it in and start I/O if we are not over our limit */ disksort(&sdp->swd_tab, &nbp->vb_buf); @@ -1525,6 +1495,7 @@ sw_reg_start(sdp) bp, bp->b_vp, bp->b_blkno, bp->b_bcount); if ((bp->b_flags & B_READ) == 0) bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); } sdp->swd_flags &= ~SWF_BUSY; @@ -1571,14 +1542,7 @@ sw_reg_iodone(bp) /* * disassociate this buffer from the vnode (if any). */ - if (vbp->vb_buf.b_vp != NULLVP) { - brelvp(&vbp->vb_buf); - } - - /* - * disassociate this buffer from the vnode (if any). - */ - if (vbp->vb_buf.b_vp != NULLVP) { + if (vbp->vb_buf.b_vp != NULL) { brelvp(&vbp->vb_buf); } @@ -1719,8 +1683,9 @@ uvm_swap_markbad(startslot, nslots) * we assume here that the range of slots will all be within * one swap device. */ - sdp->swd_npgbad += nslots; + sdp->swd_npgbad += nslots; + UVMHIST_LOG(pdhist, "now %d bad", sdp->swd_npgbad, 0,0,0); simple_unlock(&uvm.swap_data_lock); } @@ -1870,10 +1835,10 @@ uvm_swap_io(pps, startslot, npages, flags) int startslot, npages, flags; { daddr_t startblk; - struct swapbuf *sbp; struct buf *bp; vaddr_t kva; int result, s, mapinflags, pflag; + boolean_t write, async; #ifdef UVM_SWAP_ENCRYPT vaddr_t dstkva; struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT]; @@ -1885,6 +1850,9 @@ uvm_swap_io(pps, startslot, npages, flags) UVMHIST_LOG(pdhist, "<- called, startslot=%d, npages=%d, flags=%d", startslot, npages, flags, 0); + write = (flags & B_READ) == 0; + async = (flags & B_ASYNC) != 0; + /* * convert starting drum slot to block number */ @@ -1892,20 +1860,17 @@ uvm_swap_io(pps, startslot, npages, flags) /* * first, map the pages into the kernel (XXX: currently required - * by buffer system). note that we don't let pagermapin alloc - * an aiodesc structure because we don't want to chance a malloc. - * we've got our own pool of aiodesc structures (in swapbuf). + * by buffer system). */ - mapinflags = (flags & B_READ) ? UVMPAGER_MAPIN_READ : - UVMPAGER_MAPIN_WRITE; - if ((flags & B_ASYNC) == 0) + mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE; + if (!async) mapinflags |= UVMPAGER_MAPIN_WAITOK; - kva = uvm_pagermapin(pps, npages, NULL, mapinflags); + kva = uvm_pagermapin(pps, npages, mapinflags); if (kva == 0) return (VM_PAGER_AGAIN); #ifdef UVM_SWAP_ENCRYPT - if ((flags & B_READ) == 0) { + if (write) { /* * Check if we need to do swap encryption on old pages. * Later we need a different scheme, that swap encrypts @@ -1934,7 +1899,7 @@ uvm_swap_io(pps, startslot, npages, flags) /* * encrypt to swap */ - if ((flags & B_READ) == 0 && encrypt) { + if (write && encrypt) { int i, opages; caddr_t src, dst; struct swap_key *key; @@ -1943,7 +1908,7 @@ uvm_swap_io(pps, startslot, npages, flags) /* We always need write access. */ swmapflags = UVMPAGER_MAPIN_READ; - if ((flags & B_ASYNC) == 0) + if (!async) swmapflags |= UVMPAGER_MAPIN_WAITOK; if (!uvm_swap_allocpages(tpps, npages)) { @@ -1951,7 +1916,7 @@ uvm_swap_io(pps, startslot, npages, flags) return (VM_PAGER_AGAIN); } - dstkva = uvm_pagermapin(tpps, npages, NULL, swmapflags); + dstkva = uvm_pagermapin(tpps, npages, swmapflags); if (dstkva == NULL) { uvm_pagermapout(kva, npages); uvm_swap_freepages(tpps, npages); @@ -1985,22 +1950,20 @@ uvm_swap_io(pps, startslot, npages, flags) #endif /* UVM_SWAP_ENCRYPT */ /* - * now allocate a swap buffer off of freesbufs + * now allocate a buf for the i/o. * [make sure we don't put the pagedaemon to sleep...] */ s = splbio(); - pflag = ((flags & B_ASYNC) != 0 || curproc == uvm.pagedaemon_proc) - ? 0 - : PR_WAITOK; - sbp = pool_get(&swapbuf_pool, pflag); - splx(s); /* drop splbio */ + pflag = (async || curproc == uvm.pagedaemon_proc) ? 0 : PR_WAITOK; + bp = pool_get(&bufpool, pflag); + splx(s); /* * if we failed to get a swapbuf, return "try again" */ - if (sbp == NULL) { + if (bp == NULL) { #ifdef UVM_SWAP_ENCRYPT - if ((flags & B_READ) == 0 && encrypt) { + if (write && encrypt) { int i; /* swap encrypt needs cleanup */ @@ -2019,15 +1982,17 @@ uvm_swap_io(pps, startslot, npages, flags) * prevent ASYNC reads. * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get * assumes that all gets are SYNCIO. Just make sure here. + * XXXARTUBC - might not be true anymore. */ - if (flags & B_READ) + if (!write) { flags &= ~B_ASYNC; + async = 0; + } #endif /* - * fill in the bp/sbp. we currently route our i/o through + * fill in the bp. we currently route our i/o through * /dev/drum's vnode [swapdev_vp]. */ - bp = &sbp->sw_buf; bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC)); bp->b_proc = &proc0; /* XXX */ bp->b_rcred = bp->b_wcred = proc0.p_ucred; @@ -2039,13 +2004,13 @@ uvm_swap_io(pps, startslot, npages, flags) bp->b_vp = NULL; buf_replacevnode(bp, swapdev_vp); splx(s); - bp->b_bcount = npages << PAGE_SHIFT; + bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT; /* * for pageouts we must set "dirtyoff" [NFS client code needs it]. * and we bump v_numoutput (counter of number of active outputs). */ - if ((bp->b_flags & B_READ) == 0) { + if (write) { bp->b_dirtyoff = 0; bp->b_dirtyend = npages << PAGE_SHIFT; #ifdef UVM_SWAP_ENCRYPT @@ -2059,33 +2024,29 @@ uvm_swap_io(pps, startslot, npages, flags) } /* - * for async ops we must set up the aiodesc and setup the callback - * XXX: we expect no async-reads, but we don't prevent it here. - */ - if (flags & B_ASYNC) { - sbp->sw_aio.aiodone = uvm_swap_aiodone; - sbp->sw_aio.kva = kva; - sbp->sw_aio.npages = npages; - sbp->sw_aio.pd_ptr = sbp; /* backpointer */ - bp->b_flags |= B_CALL; /* set callback */ - bp->b_iodone = uvm_swap_bufdone;/* "buf" iodone function */ + * for async ops we must set up the iodone handler. + */ + if (async) { + bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ? + B_PDAEMON : 0); + bp->b_iodone = uvm_aio_biodone; UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0); } UVMHIST_LOG(pdhist, - "about to start io: data = 0x%p blkno = 0x%x, bcount = %ld", + "about to start io: data = %p blkno = 0x%x, bcount = %ld", bp->b_data, bp->b_blkno, bp->b_bcount, 0); /* * now we start the I/O, and if async, return. */ VOP_STRATEGY(bp); - if (flags & B_ASYNC) + if (async) return (VM_PAGER_PEND); /* * must be sync i/o. wait for it to finish */ - bp->b_error = biowait(bp); + (void) biowait(bp); result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; #ifdef UVM_SWAP_ENCRYPT @@ -2124,13 +2085,15 @@ uvm_swap_io(pps, startslot, npages, flags) uvm_swap_freepages(tpps, npages); #endif /* - * now dispose of the swap buffer + * now dispose of the buf */ s = splbio(); if (bp->b_vp) brelvp(bp); - pool_put(&swapbuf_pool, sbp); + if (write && bp->b_vp) + vwakeup(bp->b_vp); + pool_put(&bufpool, bp); splx(s); /* @@ -2140,112 +2103,6 @@ uvm_swap_io(pps, startslot, npages, flags) return (result); } -/* - * uvm_swap_bufdone: called from the buffer system when the i/o is done - */ -static void -uvm_swap_bufdone(bp) - struct buf *bp; -{ - struct swapbuf *sbp = (struct swapbuf *) bp; - int s = splbio(); - UVMHIST_FUNC("uvm_swap_bufdone"); UVMHIST_CALLED(pdhist); - - UVMHIST_LOG(pdhist, "cleaning buf %p", buf, 0, 0, 0); -#ifdef DIAGNOSTIC - /* - * sanity check: swapbufs are private, so they shouldn't be wanted - */ - if (bp->b_flags & B_WANTED) - panic("uvm_swap_bufdone: private buf wanted"); -#endif - - /* - * drop the buffer's reference to the vnode. - */ - if (bp->b_vp) - brelvp(bp); - - /* - * now put the aio on the uvm.aio_done list and wake the - * pagedaemon (which will finish up our job in its context). - */ - simple_lock(&uvm.pagedaemon_lock); /* locks uvm.aio_done */ - TAILQ_INSERT_TAIL(&uvm.aio_done, &sbp->sw_aio, aioq); - simple_unlock(&uvm.pagedaemon_lock); - - wakeup(&uvm.pagedaemon); - splx(s); -} - -/* - * uvm_swap_aiodone: aiodone function for anonymous memory - * - * => this is called in the context of the pagedaemon (but with the - * page queues unlocked!) - * => our "aio" structure must be part of a "swapbuf" - */ -static void -uvm_swap_aiodone(aio) - struct uvm_aiodesc *aio; -{ - struct swapbuf *sbp = aio->pd_ptr; - struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT]; - int lcv, s; - vaddr_t addr; - UVMHIST_FUNC("uvm_swap_aiodone"); UVMHIST_CALLED(pdhist); - - UVMHIST_LOG(pdhist, "done with aio %p", aio, 0, 0, 0); -#ifdef DIAGNOSTIC - /* - * sanity check - */ - if (aio->npages > (MAXBSIZE >> PAGE_SHIFT)) - panic("uvm_swap_aiodone: aio too big!"); -#endif - - /* - * first, we have to recover the page pointers (pps) by poking in the - * kernel pmap (XXX: should be saved in the buf structure). - */ - for (addr = aio->kva, lcv = 0 ; lcv < aio->npages ; - addr += PAGE_SIZE, lcv++) { - pps[lcv] = uvm_pageratop(addr); - } - - /* - * now we can dispose of the kernel mappings of the buffer - */ - uvm_pagermapout(aio->kva, aio->npages); - - /* - * now we can dispose of the pages by using the dropcluster function - * [note that we have no "page of interest" so we pass in null] - */ - -#ifdef UVM_SWAP_ENCRYPT - /* - * XXX - assumes that we only get ASYNC writes. used to be above. - */ - if (pps[0]->pqflags & PQ_ENCRYPT) - uvm_swap_freepages(pps, aio->npages); - else -#endif /* UVM_SWAP_ENCRYPT */ - uvm_pager_dropcluster(NULL, NULL, pps, &aio->npages, - PGO_PDFREECLUST); - - /* - * finally, we can dispose of the swapbuf - */ - s = splbio(); - pool_put(&swapbuf_pool, sbp); - splx(s); - - /* - * done! - */ -} - static void swapmount() { diff --git a/sys/uvm/uvm_swap.h b/sys/uvm/uvm_swap.h index 3108dd10194..12db66a657b 100644 --- a/sys/uvm/uvm_swap.h +++ b/sys/uvm/uvm_swap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_swap.h,v 1.9 2001/11/07 02:55:51 art Exp $ */ +/* $OpenBSD: uvm_swap.h,v 1.10 2001/11/10 18:42:32 art Exp $ */ /* $NetBSD: uvm_swap.h,v 1.5 2000/01/11 06:57:51 chs Exp $ */ /* @@ -45,6 +45,7 @@ void uvm_swap_free __P((int, int)); void uvm_swap_markbad __P((int, int)); #ifdef UVM_SWAP_ENCRYPT void uvm_swap_initcrypt_all __P((void)); +void uvm_swap_freepages __P((struct vm_page **, int)); #endif #endif /* _KERNEL */ diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index e5e954eb9df..e921e4fb846 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_vnode.c,v 1.23 2001/11/07 02:55:51 art Exp $ */ +/* $OpenBSD: uvm_vnode.c,v 1.24 2001/11/10 18:42:32 art Exp $ */ /* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */ /* @@ -1609,7 +1609,7 @@ uvn_io(uvn, pps, npages, flags, rw) mapinflags = (rw == UIO_READ) ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE; - kva = uvm_pagermapin(pps, npages, NULL, mapinflags); + kva = uvm_pagermapin(pps, npages, mapinflags); if (kva == 0 && waitf == M_NOWAIT) { simple_unlock(&uvn->u_obj.vmobjlock); UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0); @@ -1626,7 +1626,7 @@ uvn_io(uvn, pps, npages, flags, rw) simple_unlock(&uvn->u_obj.vmobjlock); /* NOTE: object now unlocked */ if (kva == 0) - kva = uvm_pagermapin(pps, npages, NULL, + kva = uvm_pagermapin(pps, npages, mapinflags | UVMPAGER_MAPIN_WAITOK); /* |