diff options
133 files changed, 6236 insertions, 7293 deletions
diff --git a/sys/adosfs/adosfs.h b/sys/adosfs/adosfs.h index c608fba701b..5c0b9e3c893 100644 --- a/sys/adosfs/adosfs.h +++ b/sys/adosfs/adosfs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: adosfs.h,v 1.9 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: adosfs.h,v 1.10 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: adosfs.h,v 1.12 1996/10/08 22:18:02 thorpej Exp $ */ /* @@ -32,8 +32,6 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <miscfs/genfs/genfs.h> - /* * Amigados datestamp. (from 1/1/1978 00:00:00 local) */ @@ -53,7 +51,6 @@ enum anode_type { AROOT, ADIR, AFILE, ALDIR, ALFILE, ASLINK }; * table for f/e. it is always ANODETABSZ(ap) bytes in size. */ struct anode { - struct genfs_node gnode; LIST_ENTRY(anode) link; enum anode_type type; char name[31]; /* (r/d/f) name for object */ diff --git a/sys/adosfs/advfsops.c b/sys/adosfs/advfsops.c index 5cd599c4f8e..43a673042b5 100644 --- a/sys/adosfs/advfsops.c +++ b/sys/adosfs/advfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: advfsops.c,v 1.20 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: advfsops.c,v 1.21 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: advfsops.c,v 1.24 1996/12/22 10:10:12 cgd Exp $ */ /* @@ -557,7 +557,6 @@ adosfs_vget(mp, an, vpp) *vpp = vp; /* return vp */ brelse(bp); /* release buffer */ - vp->v_size = ap->fsize; return (0); } diff --git a/sys/adosfs/advnops.c b/sys/adosfs/advnops.c index 4928e2b1b93..27f806f3bce 100644 --- a/sys/adosfs/advnops.c +++ b/sys/adosfs/advnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: advnops.c,v 1.21 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: advnops.c,v 1.22 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: advnops.c,v 1.32 1996/10/13 02:52:09 christos Exp $ */ /* @@ -131,9 +131,7 @@ struct vnodeopv_entry_desc adosfs_vnodeop_entries[] = { { &vop_pathconf_desc, adosfs_pathconf }, /* pathconf */ { &vop_advlock_desc, adosfs_advlock }, /* advlock */ { &vop_bwrite_desc, adosfs_bwrite }, /* bwrite */ - { &vop_getpages_desc, genfs_getpages }, - { &vop_mmap_desc, vop_generic_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc adosfs_vnodeop_opv_desc = @@ -274,28 +272,6 @@ adosfs_read(v) /* * taken from ufs_read() */ - - if (sp->a_vp->v_type == VREG) { - error = 0; - while (uio->uio_resid > 0) { - void *win; - vsize_t bytelen = min(ap->fsize - uio->uio_offset, - uio->uio_resid); - - if (bytelen == 0) { - break; - } - win = ubc_alloc(&sp->a_vp->v_uobj, uio->uio_offset, - &bytelen, UBC_READ); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) { - break; - } - } - goto reterr; - } - do { /* * we are only supporting ADosFFS currently diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c index 8306df273c4..22fb769b976 100644 --- a/sys/arch/alpha/alpha/pmap.c +++ b/sys/arch/alpha/alpha/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.31 2001/12/08 02:24:05 art Exp $ */ +/* $OpenBSD: pmap.c,v 1.32 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */ /*- @@ -809,7 +809,7 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) /* * Figure out how many PTE's are necessary to map the kernel. */ - lev3mapsize = (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + + lev3mapsize = (VM_PHYS_SIZE + nbuf * MAXBSIZE + 16 * NCARGS + PAGER_MAP_SIZE) / NBPG + (maxproc * UPAGES) + nkmempages; diff --git a/sys/arch/amiga/dev/clock.c b/sys/arch/amiga/dev/clock.c index 213c3fa01e2..a9665e25289 100644 --- a/sys/arch/amiga/dev/clock.c +++ b/sys/arch/amiga/dev/clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: clock.c,v 1.12 2001/11/28 13:47:38 art Exp $ */ +/* $OpenBSD: clock.c,v 1.13 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: clock.c,v 1.25 1997/01/02 20:59:42 is Exp $ */ /* @@ -521,8 +521,8 @@ clockunmmap(dev, addr, p) if (addr == 0) return(EINVAL); /* XXX: how do we deal with this? */ - uvm_deallocate(p->p_vmspace->vm_map, (vm_offset_t)addr, PAGE_SIZE); - return (0); + rv = vm_deallocate(p->p_vmspace->vm_map, (vm_offset_t)addr, PAGE_SIZE); + return(rv == KERN_SUCCESS ? 0 : EINVAL); } startclock() diff --git a/sys/arch/hp300/dev/grf.c b/sys/arch/hp300/dev/grf.c index 3b1801f6a28..322cc2e78e9 100644 --- a/sys/arch/hp300/dev/grf.c +++ b/sys/arch/hp300/dev/grf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: grf.c,v 1.17 2001/11/28 13:47:38 art Exp $ */ +/* $OpenBSD: grf.c,v 1.18 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: grf.c,v 1.30 1998/08/20 08:33:41 kleink Exp $ */ /* @@ -654,6 +654,7 @@ grfunmap(dev, addr, p) struct grf_softc *sc = grf_cd.cd_devs[GRFUNIT(dev)]; struct grf_data *gp = sc->sc_data; vsize_t size; + int rv; #ifdef DEBUG if (grfdebug & GDB_MMAP) @@ -663,8 +664,9 @@ grfunmap(dev, addr, p) return(EINVAL); /* XXX: how do we deal with this? */ (void) (*gp->g_sw->gd_mode)(gp, GM_UNMAP, 0); size = round_page(gp->g_display.gd_regsize + gp->g_display.gd_fbsize); - uvm_unmap(&p->p_vmspace->vm_map, (vaddr_t)addr, (vaddr_t)addr + size); - return (0); + rv = uvm_unmap(&p->p_vmspace->vm_map, (vaddr_t)addr, + (vaddr_t)addr + size); + return(rv == KERN_SUCCESS ? 0 : EINVAL); } #ifdef COMPAT_HPUX diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 587bd8c8f97..d1b677fc309 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,5 +1,5 @@ -/* $OpenBSD: pmap.c,v 1.56 2001/12/11 18:49:25 art Exp $ */ -/* $NetBSD: pmap.c,v 1.120 2001/04/22 23:42:14 thorpej Exp $ */ +/* $OpenBSD: pmap.c,v 1.57 2001/12/19 08:58:05 art Exp $ */ +/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* * @@ -290,15 +290,6 @@ int nkpde = NKPTP; int pmap_pg_g = 0; -#ifdef LARGEPAGES -/* - * pmap_largepages: if our processor supports PG_PS and we are - * using it, this is set to TRUE. - */ - -int pmap_largepages; -#endif - /* * i386 physical memory comes in a big contig chunk with a small * hole toward the front of it... the following 4 paddr_t's @@ -307,6 +298,8 @@ int pmap_largepages; */ paddr_t avail_start; /* PA of first available physical page */ paddr_t avail_end; /* PA of last available physical page */ +paddr_t hole_start; /* PA of start of "hole" */ +paddr_t hole_end; /* PA of end of "hole" */ /* * other data structures @@ -346,6 +339,7 @@ static vaddr_t pv_cachedva; /* cached VA for later use */ */ static struct pmap_head pmaps; +static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ /* * pool that pmap structures are allocated from @@ -354,15 +348,6 @@ static struct pmap_head pmaps; struct pool pmap_pmap_pool; /* - * pool and cache that PDPs are allocated from - */ - -struct pool pmap_pdp_pool; -struct pool_cache pmap_pdp_cache; - -int pmap_pdp_ctor(void *, void *, int); - -/* * special VAs and the PTEs that map them */ @@ -389,7 +374,7 @@ extern vaddr_t pentium_idt_vaddr; */ static struct pv_entry *pmap_add_pvpage __P((struct pv_page *, boolean_t)); -static struct vm_page *pmap_alloc_ptp __P((struct pmap *, int)); +static struct vm_page *pmap_alloc_ptp __P((struct pmap *, int, boolean_t)); static struct pv_entry *pmap_alloc_pv __P((struct pmap *, int)); /* see codes below */ #define ALLOCPV_NEED 0 /* need PV now */ #define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ @@ -402,27 +387,36 @@ static void pmap_free_pv __P((struct pmap *, struct pv_entry *)); static void pmap_free_pvs __P((struct pmap *, struct pv_entry *)); static void pmap_free_pv_doit __P((struct pv_entry *)); static void pmap_free_pvpage __P((void)); -static struct vm_page *pmap_get_ptp __P((struct pmap *, int)); +static struct vm_page *pmap_get_ptp __P((struct pmap *, int, boolean_t)); static boolean_t pmap_is_curpmap __P((struct pmap *)); static pt_entry_t *pmap_map_ptes __P((struct pmap *)); static struct pv_entry *pmap_remove_pv __P((struct pv_head *, struct pmap *, vaddr_t)); -static void pmap_do_remove __P((struct pmap *, vaddr_t, - vaddr_t, int)); static boolean_t pmap_remove_pte __P((struct pmap *, struct vm_page *, - pt_entry_t *, vaddr_t, int)); + pt_entry_t *, vaddr_t)); static void pmap_remove_ptes __P((struct pmap *, struct pmap_remove_record *, struct vm_page *, vaddr_t, - vaddr_t, vaddr_t, int)); -#define PMAP_REMOVE_ALL 0 /* remove all mappings */ -#define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */ + vaddr_t, vaddr_t)); +static struct vm_page *pmap_steal_ptp __P((struct uvm_object *, + vaddr_t)); static vaddr_t pmap_tmpmap_pa __P((paddr_t)); static pt_entry_t *pmap_tmpmap_pvepte __P((struct pv_entry *)); static void pmap_tmpunmap_pa __P((void)); static void pmap_tmpunmap_pvepte __P((struct pv_entry *)); +static boolean_t pmap_transfer_ptes __P((struct pmap *, + struct pmap_transfer_location *, + struct pmap *, + struct pmap_transfer_location *, + int, boolean_t)); +static boolean_t pmap_try_steal_pv __P((struct pv_head *, + struct pv_entry *, + struct pv_entry *)); static void pmap_unmap_ptes __P((struct pmap *)); +void pmap_pinit __P((pmap_t)); +void pmap_release __P((pmap_t)); + /* * p m a p i n l i n e h e l p e r f u n c t i o n s */ @@ -603,16 +597,8 @@ pmap_kenter_pa(va, pa, prot) { pt_entry_t *pte, opte; - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); + pte = vtopte(va); opte = *pte; -#ifdef LARGEPAGES - /* XXX For now... */ - if (opte & PG_PS) - panic("pmap_kenter_pa: PG_PS"); -#endif *pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g; /* zap! */ if (pmap_valid_entry(opte)) @@ -638,16 +624,8 @@ pmap_kremove(va, len) pt_entry_t *pte; len >>= PAGE_SHIFT; - for ( /* null */ ; len ; len--, va += PAGE_SIZE) { - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); -#ifdef LARGEPAGES - /* XXX For now... */ - if (*pte & PG_PS) - panic("pmap_kremove: PG_PS"); -#endif + for ( /* null */ ; len ; len--, va += NBPG) { + pte = vtopte(va); #ifdef DIAGNOSTIC if (*pte & PG_PVLIST) panic("pmap_kremove: PG_PVLIST mapping for 0x%lx\n", @@ -782,44 +760,6 @@ pmap_bootstrap(kva_start) PTE_BASE[i386_btop(kva)] |= PG_G; } -#ifdef LARGEPAGES - /* - * enable large pages of they are supported. - */ - - if (cpu_feature & CPUID_PSE) { - paddr_t pa; - vaddr_t kva_end; - pd_entry_t *pde; - extern char _etext; - - lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */ - pmap_largepages = 1; /* enable software */ - - /* - * the TLB must be flushed after enabling large pages - * on Pentium CPUs, according to section 3.6.2.2 of - * "Intel Architecture Software Developer's Manual, - * Volume 3: System Programming". - */ - tlbflush(); - - /* - * now, remap the kernel text using large pages. we - * assume that the linker has properly aligned the - * .data segment to a 4MB boundary. - */ - kva_end = roundup((vaddr_t)&_etext, NBPD); - for (pa = 0, kva = KERNBASE; kva < kva_end; - kva += NBPD, pa += NBPD) { - pde = &kpm->pm_pdir[pdei(kva)]; - *pde = pa | pmap_pg_g | PG_PS | - PG_KR | PG_V; /* zap! */ - tlbflush(); - } - } -#endif /* LARGEPAGES */ - /* * now we allocate the "special" VAs which are used for tmp mappings * by the pmap (and other modules). we allocate the VAs by advancing @@ -899,14 +839,41 @@ pmap_bootstrap(kva_start) pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP); - /* - * initialize the PDE pool and cache. +#ifdef __NetBSD__ + /* + * we must call uvm_page_physload() after we are done playing with + * virtual_avail but before we call pmap_steal_memory. [i.e. here] + * this call tells the VM system how much physical memory it + * controls. If we have 16M of RAM or less, just put it all on + * the default free list. Otherwise, put the first 16M of RAM + * on a lower priority free list (so that all of the ISA DMA'able + * memory won't be eaten up first-off). */ - pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl", - 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP); - pool_cache_init(&pmap_pdp_cache, &pmap_pdp_pool, - pmap_pdp_ctor, NULL, NULL); + if (avail_end <= (16 * 1024 * 1024)) + first16q = VM_FREELIST_DEFAULT; + else + first16q = VM_FREELIST_FIRST16; + + if (avail_start < hole_start) /* any free memory before the hole? */ + uvm_page_physload(atop(avail_start), atop(hole_start), + atop(avail_start), atop(hole_start), + first16q); + + if (first16q != VM_FREELIST_DEFAULT && + hole_end < 16 * 1024 * 1024) { + uvm_page_physload(atop(hole_end), atop(16 * 1024 * 1024), + atop(hole_end), atop(16 * 1024 * 1024), + first16q); + uvm_page_physload(atop(16 * 1024 * 1024), atop(avail_end), + atop(16 * 1024 * 1024), atop(avail_end), + VM_FREELIST_DEFAULT); + } else { + uvm_page_physload(atop(hole_end), atop(avail_end), + atop(hole_end), atop(avail_end), + VM_FREELIST_DEFAULT); + } +#endif /* * ensure the TLB is sync'd with reality by flushing it... @@ -1025,8 +992,8 @@ pmap_alloc_pv(pmap, mode) simple_lock(&pvalloc_lock); - pvpage = TAILQ_FIRST(&pv_freepages); - if (pvpage != NULL) { + if (pv_freepages.tqh_first != NULL) { + pvpage = pv_freepages.tqh_first; pvpage->pvinfo.pvpi_nfree--; if (pvpage->pvinfo.pvpi_nfree == 0) { /* nothing left in this one? */ @@ -1079,17 +1046,17 @@ pmap_alloc_pvpage(pmap, mode) { struct vm_page *pg; struct pv_page *pvpage; - struct pv_entry *pv; - int s; + int lcv, idx, npg, s; + struct pv_entry *pv, *cpv, *prevpv; /* * if we need_entry and we've got unused pv_pages, allocate from there */ - pvpage = TAILQ_FIRST(&pv_unusedpgs); - if (mode != ALLOCPV_NONEED && pvpage != NULL) { + if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) { /* move it to pv_freepages list */ + pvpage = pv_unusedpgs.tqh_first; TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list); TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list); @@ -1111,24 +1078,24 @@ pmap_alloc_pvpage(pmap, mode) * if not, try to allocate one. */ - s = splvm(); /* must protect kmem_map/kmem_object with splvm! */ + s = splimp(); /* must protect kmem_map/kmem_object with splimp! */ if (pv_cachedva == 0) { pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object, - PAGE_SIZE, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC); + NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC); if (pv_cachedva == 0) { splx(s); - return (NULL); + goto steal_one; } } /* * we have a VA, now let's try and allocate a page in the object - * note: we are still holding splvm to protect kmem_object + * note: we are still holding splimp to protect kmem_object */ if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) { splx(s); - return (NULL); + goto steal_one; } pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva - @@ -1139,10 +1106,10 @@ pmap_alloc_pvpage(pmap, mode) simple_unlock(&uvmexp.kmem_object->vmobjlock); splx(s); - /* splvm now dropped */ + /* splimp now dropped */ if (pg == NULL) - return (NULL); + goto steal_one; /* * add a mapping for our new pv_page and free its entrys (save one!) @@ -1151,12 +1118,113 @@ pmap_alloc_pvpage(pmap, mode) * pmap is already locked! (...but entering the mapping is safe...) */ - pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ|VM_PROT_WRITE); - pmap_update(pmap_kernel()); - pvpage = (struct pv_page *)pv_cachedva; + pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL); + pvpage = (struct pv_page *) pv_cachedva; pv_cachedva = 0; - return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED)); + return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED)); + +steal_one: + /* + * if we don't really need a pv_entry right now, we can just return. + */ + + if (mode != ALLOCPV_NEED) + return(NULL); + + /* + * last ditch effort! we couldn't allocate a free page to make + * more pv_entrys so we try and steal one from someone else. + */ + + pv = NULL; + for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) { + npg = vm_physmem[lcv].end - vm_physmem[lcv].start; + for (idx = 0 ; idx < npg ; idx++) { + struct pv_head *pvhead = vm_physmem[lcv].pmseg.pvhead; + + if (pvhead->pvh_list == NULL) + continue; /* spot check */ + if (!simple_lock_try(&pvhead->pvh_lock)) + continue; + cpv = prevpv = pvhead->pvh_list; + while (cpv) { + if (pmap_try_steal_pv(pvhead, cpv, prevpv)) + break; + prevpv = cpv; + cpv = cpv->pv_next; + } + simple_unlock(&pvhead->pvh_lock); + /* got one? break out of the loop! */ + if (cpv) { + pv = cpv; + break; + } + } + } + + return(pv); +} + +/* + * pmap_try_steal_pv: try and steal a pv_entry from a pmap + * + * => return true if we did it! + */ + +static boolean_t +pmap_try_steal_pv(pvh, cpv, prevpv) + struct pv_head *pvh; + struct pv_entry *cpv, *prevpv; +{ + pt_entry_t *ptep; /* pointer to a PTE */ + + /* + * we never steal kernel mappings or mappings from pmaps we can't lock + */ + + if (cpv->pv_pmap == pmap_kernel() || + !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock)) + return(FALSE); + + /* + * yes, we can try and steal it. first we need to remove the + * mapping from the pmap. + */ + + ptep = pmap_tmpmap_pvepte(cpv); + if (*ptep & PG_W) { + ptep = NULL; /* wired page, avoid stealing this one */ + } else { + *ptep = 0; /* zap! */ + if (pmap_is_curpmap(cpv->pv_pmap)) + pmap_update_pg(cpv->pv_va); + pmap_tmpunmap_pvepte(cpv); + } + if (ptep == NULL) { + simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); + return(FALSE); /* wired page, abort! */ + } + cpv->pv_pmap->pm_stats.resident_count--; + if (cpv->pv_ptp && cpv->pv_ptp->wire_count) + /* drop PTP's wired count */ + cpv->pv_ptp->wire_count--; + + /* + * XXX: if wire_count goes to one the PTP could be freed, however, + * we'd have to lock the page queues (etc.) to do that and it could + * cause deadlock headaches. besides, the pmap we just stole from + * may want the mapping back anyway, so leave the PTP around. + */ + + /* + * now we need to remove the entry from the pvlist + */ + + if (cpv == pvh->pvh_list) + pvh->pvh_list = cpv->pv_next; + else + prevpv->pv_next = cpv->pv_next; + return(TRUE); } /* @@ -1417,19 +1485,33 @@ pmap_remove_pv(pvh, pmap, va) * => we use the ptp's wire_count to count the number of active mappings * in the PTP (we start it at one to prevent any chance this PTP * will ever leak onto the active/inactive queues) + * => we should not be holding any pv_head locks (in case we are forced + * to call pmap_steal_ptp()) + * => we may need to lock pv_head's if we have to steal a PTP + * => just_try: true if we want a PTP, but not enough to steal one + * from another pmap (e.g. during optional functions like pmap_copy) */ __inline static struct vm_page * -pmap_alloc_ptp(pmap, pde_index) +pmap_alloc_ptp(pmap, pde_index, just_try) struct pmap *pmap; int pde_index; + boolean_t just_try; { struct vm_page *ptp; ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); - if (ptp == NULL) - return(NULL); + if (ptp == NULL) { + if (just_try) + return(NULL); + ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index)); + if (ptp == NULL) { + return (NULL); + } + /* stole one; zero it. */ + pmap_zero_page(VM_PAGE_TO_PHYS(ptp)); + } /* got one! */ ptp->flags &= ~PG_BUSY; /* never busy */ @@ -1442,6 +1524,111 @@ pmap_alloc_ptp(pmap, pde_index) } /* + * pmap_steal_ptp: steal a PTP from any pmap that we can access + * + * => obj is locked by caller. + * => we can throw away mappings at this level (except in the kernel's pmap) + * => stolen PTP is placed in <obj,offset> pmap + * => we lock pv_head's + * => hopefully, this function will be seldom used [much better to have + * enough free pages around for us to allocate off the free page list] + */ + +static struct vm_page * +pmap_steal_ptp(obj, offset) + struct uvm_object *obj; + vaddr_t offset; +{ + struct vm_page *ptp = NULL; + struct pmap *firstpmap; + struct uvm_object *curobj; + pt_entry_t *ptes; + int idx, lcv; + boolean_t caller_locked, we_locked; + + simple_lock(&pmaps_lock); + if (pmaps_hand == NULL) + pmaps_hand = LIST_FIRST(&pmaps); + firstpmap = pmaps_hand; + + do { /* while we haven't looped back around to firstpmap */ + + curobj = &pmaps_hand->pm_obj; + we_locked = FALSE; + caller_locked = (curobj == obj); + if (!caller_locked) { + we_locked = simple_lock_try(&curobj->vmobjlock); + } + if (caller_locked || we_locked) { + ptp = curobj->memq.tqh_first; + for (/*null*/; ptp != NULL; ptp = ptp->listq.tqe_next) { + + /* + * might have found a PTP we can steal + * (unless it has wired pages). + */ + + idx = ptp_o2i(ptp->offset); +#ifdef DIAGNOSTIC + if (VM_PAGE_TO_PHYS(ptp) != + (pmaps_hand->pm_pdir[idx] & PG_FRAME)) + panic("pmap_steal_ptp: PTP mismatch!"); +#endif + + ptes = (pt_entry_t *) + pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp)); + for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) + if ((ptes[lcv] & (PG_V|PG_W)) == + (PG_V|PG_W)) + break; + if (lcv == PTES_PER_PTP) + pmap_remove_ptes(pmaps_hand, NULL, ptp, + (vaddr_t)ptes, + ptp_i2v(idx), + ptp_i2v(idx+1)); + pmap_tmpunmap_pa(); + + if (lcv != PTES_PER_PTP) + /* wired, try next PTP */ + continue; + + /* + * got it!!! + */ + + pmaps_hand->pm_pdir[idx] = 0; /* zap! */ + pmaps_hand->pm_stats.resident_count--; + if (pmap_is_curpmap(pmaps_hand)) + tlbflush(); + else if (pmap_valid_entry(*APDP_PDE) && + (*APDP_PDE & PG_FRAME) == + pmaps_hand->pm_pdirpa) { + pmap_update_pg(((vaddr_t)APTE_BASE) + + ptp->offset); + } + + /* put it in our pmap! */ + uvm_pagerealloc(ptp, obj, offset); + break; /* break out of "for" loop */ + } + if (we_locked) { + simple_unlock(&curobj->vmobjlock); + } + } + + /* advance the pmaps_hand */ + pmaps_hand = LIST_NEXT(pmaps_hand, pm_list); + if (pmaps_hand == NULL) { + pmaps_hand = LIST_FIRST(&pmaps); + } + + } while (ptp == NULL && pmaps_hand != firstpmap); + + simple_unlock(&pmaps_lock); + return(ptp); +} + +/* * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) * * => pmap should NOT be pmap_kernel() @@ -1449,9 +1636,10 @@ pmap_alloc_ptp(pmap, pde_index) */ static struct vm_page * -pmap_get_ptp(pmap, pde_index) +pmap_get_ptp(pmap, pde_index, just_try) struct pmap *pmap; int pde_index; + boolean_t just_try; { struct vm_page *ptp; @@ -1473,7 +1661,7 @@ pmap_get_ptp(pmap, pde_index) } /* allocate a new PTP (updates ptphint) */ - return(pmap_alloc_ptp(pmap, pde_index)); + return(pmap_alloc_ptp(pmap, pde_index, just_try)); } /* @@ -1481,41 +1669,6 @@ pmap_get_ptp(pmap, pde_index) */ /* - * pmap_pdp_ctor: constructor for the PDP cache. - */ - -int -pmap_pdp_ctor(void *arg, void *object, int flags) -{ - pd_entry_t *pdir = object; - paddr_t pdirpa; - - /* - * NOTE: The `pmap_lock' is held when the PDP is allocated. - * WE MUST NOT BLOCK! - */ - - /* fetch the physical address of the page directory. */ - (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); - - /* zero init area */ - memset(pdir, 0, PDSLOT_PTE * sizeof(pd_entry_t)); - - /* put in recursibve PDE to map the PTEs */ - pdir[PDSLOT_PTE] = pdirpa | PG_V | PG_KW; - - /* put in kernel VM PDEs */ - memcpy(&pdir[PDSLOT_KERN], &PDP_BASE[PDSLOT_KERN], - nkpde * sizeof(pd_entry_t)); - - /* zero the rest */ - memset(&pdir[PDSLOT_KERN + nkpde], 0, - PAGE_SIZE - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); - - return (0); -} - -/* * pmap_create: create a pmap * * => note: old pmap interface took a "size" args which allowed for @@ -1528,7 +1681,18 @@ pmap_create() struct pmap *pmap; pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); + pmap_pinit(pmap); + return(pmap); +} + +/* + * pmap_pinit: given a zero'd pmap structure, init it. + */ +void +pmap_pinit(pmap) + struct pmap *pmap; +{ /* init uvm_object */ simple_lock_init(&pmap->pm_obj.vmobjlock); pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ @@ -1540,35 +1704,39 @@ pmap_create() pmap->pm_ptphint = NULL; pmap->pm_flags = 0; + /* allocate PDP */ + pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG); + if (pmap->pm_pdir == NULL) + panic("pmap_pinit: kernel_map out of virtual space!"); + (void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, + (paddr_t *)&pmap->pm_pdirpa); + + /* init PDP */ + /* zero init area */ + bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + /* put in recursive PDE to map the PTEs */ + pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW; + /* init the LDT */ pmap->pm_ldt = NULL; pmap->pm_ldt_len = 0; pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); - /* allocate PDP */ - /* * we need to lock pmaps_lock to prevent nkpde from changing on - * us. note that there is no need to splvm to protect us from - * malloc since malloc allocates out of a submap and we should - * have already allocated kernel PTPs to cover the range... - * - * NOTE: WE MUST NOT BLOCK WHILE HOLDING THE `pmap_lock'! + * us. note that there is no need to splimp to protect us from + * malloc since malloc allocates out of a submap and we should have + * already allocated kernel PTPs to cover the range... */ simple_lock(&pmaps_lock); - - /* XXX Need a generic "I want memory" wchan */ - while ((pmap->pm_pdir = - pool_cache_get(&pmap_pdp_cache, PR_NOWAIT)) == NULL) - (void) ltsleep(&lbolt, PVM, "pmapcr", hz >> 3, &pmaps_lock); - - pmap->pm_pdirpa = pmap->pm_pdir[PDSLOT_PTE] & PG_FRAME; - + /* put in kernel VM PDEs */ + bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN], + nkpde * sizeof(pd_entry_t)); + /* zero the rest */ + bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde], + NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); LIST_INSERT_HEAD(&pmaps, pmap, pm_list); - simple_unlock(&pmaps_lock); - - return (pmap); } /* @@ -1580,7 +1748,6 @@ void pmap_destroy(pmap) struct pmap *pmap; { - struct vm_page *pg; int refs; /* @@ -1598,11 +1765,32 @@ pmap_destroy(pmap) * reference count is zero, free pmap resources and then free pmap. */ + pmap_release(pmap); + pool_put(&pmap_pmap_pool, pmap); +} + +/* + * pmap_release: release all resources held by a pmap + * + * => if pmap is still referenced it should be locked + * => XXX: we currently don't expect any busy PTPs because we don't + * allow anything to map them (except for the kernel's private + * recursive mapping) or make them busy. + */ + +void +pmap_release(pmap) + struct pmap *pmap; +{ + struct vm_page *pg; + /* * remove it from global list of pmaps */ simple_lock(&pmaps_lock); + if (pmap == pmaps_hand) + pmaps_hand = LIST_NEXT(pmaps_hand, pm_list); LIST_REMOVE(pmap, pm_list); simple_unlock(&pmaps_lock); @@ -1610,7 +1798,8 @@ pmap_destroy(pmap) * free any remaining PTPs */ - while ((pg = TAILQ_FIRST(&pmap->pm_obj.memq)) != NULL) { + while (pmap->pm_obj.memq.tqh_first != NULL) { + pg = pmap->pm_obj.memq.tqh_first; #ifdef DIAGNOSTIC if (pg->flags & PG_BUSY) panic("pmap_release: busy page table page"); @@ -1622,7 +1811,7 @@ pmap_destroy(pmap) } /* XXX: need to flush it out of other processor's APTE space? */ - pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir); + uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG); #ifdef USER_LDT if (pmap->pm_flags & PMF_USER_LDT) { @@ -1635,8 +1824,6 @@ pmap_destroy(pmap) pmap->pm_ldt_len * sizeof(union descriptor)); } #endif - - pool_put(&pmap_pmap_pool, pmap); } /* @@ -1776,49 +1963,20 @@ pmap_extract(pmap, va, pap) vaddr_t va; paddr_t *pap; { - pt_entry_t *ptes, pte; - pd_entry_t pde; - - if (__predict_true((pde = pmap->pm_pdir[pdei(va)]) != 0)) { -#ifdef LARGEPAGES - if (pde & PG_PS) { - if (pap != NULL) - *pap = (pde & PG_LGFRAME) | (va & ~PG_LGFRAME); - return (TRUE); - } -#endif + paddr_t retval; + pt_entry_t *ptes; + if (pmap->pm_pdir[pdei(va)]) { ptes = pmap_map_ptes(pmap); - pte = ptes[i386_btop(va)]; + retval = (paddr_t)(ptes[i386_btop(va)] & PG_FRAME); pmap_unmap_ptes(pmap); - - if (__predict_true((pte & PG_V) != 0)) { - if (pap != NULL) - *pap = (pte & PG_FRAME) | (va & ~PG_FRAME); - return (TRUE); - } + if (pap != NULL) + *pap = retval | (va & ~PG_FRAME); + return (TRUE); } return (FALSE); } -#ifdef LARGEPAGES -/* - * vtophys: virtual address to physical address. For use by - * machine-dependent code only. - */ - -paddr_t -vtophys(va) - vaddr_t va; -{ - paddr_t pa; - - if (pmap_extract(pmap_kernel(), va, &pa) == TRUE) - return (pa); - return (0); -} -#endif - /* * pmap_virtual_space: used during bootup [pmap_steal_memory] to * determine the bounds of the kernel virtual addess space. @@ -1841,10 +1999,15 @@ void pmap_zero_page(pa) paddr_t pa; { - simple_lock(&pmap_zero_page_lock); +#ifdef DIAGNOSTIC + if (*zero_pte) + panic("pmap_zero_page: lock botch"); +#endif + *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ - bzero(zerop, PAGE_SIZE); /* zero */ + bzero(zerop, NBPG); /* zero */ + *zero_pte = 0; /* zap! */ pmap_update_pg((vaddr_t)zerop); /* flush TLB */ simple_unlock(&pmap_zero_page_lock); } @@ -1857,31 +2020,20 @@ boolean_t pmap_zero_page_uncached(pa) paddr_t pa; { - int i, *ptr; - boolean_t rv = TRUE; - simple_lock(&pmap_zero_page_lock); +#ifdef DIAGNOSTIC + if (*zero_pte) + panic("pmap_zero_page_uncached: lock botch"); +#endif *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */ ((cpu_class != CPUCLASS_386) ? PG_N : 0); - pmap_update_pg((vaddr_t)zerop); - for (i = 0, ptr = (int *) zerop; i < PAGE_SIZE / sizeof(int); i++) { - if (whichqs != 0) { - /* - * A process has become ready. Abort now, - * so we don't keep it waiting while we - * do slow memory access to finish this - * page. - */ - rv = FALSE; - break; - } - *ptr++ = 0; - } - + memset(zerop, 0, NBPG); /* zero */ + *zero_pte = 0; /* zap! */ + pmap_update_pg((vaddr_t)zerop); /* flush TLB */ simple_unlock(&pmap_zero_page_lock); - return (rv); + return (TRUE); } /* @@ -1922,13 +2074,12 @@ pmap_copy_page(srcpa, dstpa) */ static void -pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva, flags) +pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva) struct pmap *pmap; struct pmap_remove_record *pmap_rr; struct vm_page *ptp; vaddr_t ptpva; vaddr_t startva, endva; - int flags; { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ struct pv_entry *pve; @@ -1946,12 +2097,9 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva, flags) */ for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) - ; pte++, startva += PAGE_SIZE) { + ; pte++, startva += NBPG) { if (!pmap_valid_entry(*pte)) continue; /* VA not mapped */ - if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { - continue; - } opte = *pte; /* save the old PTE */ *pte = 0; /* zap! */ @@ -2029,12 +2177,11 @@ pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva, flags) */ static boolean_t -pmap_remove_pte(pmap, ptp, pte, va, flags) +pmap_remove_pte(pmap, ptp, pte, va) struct pmap *pmap; struct vm_page *ptp; pt_entry_t *pte; vaddr_t va; - int flags; { pt_entry_t opte; int bank, off; @@ -2042,9 +2189,6 @@ pmap_remove_pte(pmap, ptp, pte, va, flags) if (!pmap_valid_entry(*pte)) return(FALSE); /* VA not mapped */ - if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { - return(FALSE); - } opte = *pte; /* save the old PTE */ *pte = 0; /* zap! */ @@ -2102,21 +2246,6 @@ pmap_remove(pmap, sva, eva) struct pmap *pmap; vaddr_t sva, eva; { - pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL); -} - -/* - * pmap_do_remove: mapping removal guts - * - * => caller should not be holding any pmap locks - */ - -static void -pmap_do_remove(pmap, sva, eva, flags) - struct pmap *pmap; - vaddr_t sva, eva; - int flags; -{ pt_entry_t *ptes; boolean_t result; paddr_t ptppa; @@ -2164,7 +2293,7 @@ pmap_do_remove(pmap, sva, eva, flags) /* do it! */ result = pmap_remove_pte(pmap, ptp, - &ptes[i386_btop(sva)], sva, flags); + &ptes[i386_btop(sva)], sva); /* * if mapping removed and the PTP is no longer @@ -2259,7 +2388,7 @@ pmap_do_remove(pmap, sva, eva, flags) } } pmap_remove_ptes(pmap, prr, ptp, - (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva, flags); + (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva); /* if PTP is no longer being used, free it! */ if (ptp && ptp->wire_count <= 1) { @@ -2756,7 +2885,449 @@ pmap_collect(pmap) */ pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS); - pmap_update(pmap); +} + +/* + * pmap_transfer: transfer (move or copy) mapping from one pmap + * to another. + * + * => this function is optional, it doesn't have to do anything + * => we assume that the mapping in the src pmap is valid (i.e. that + * it doesn't run off the end of the map's virtual space). + * => we assume saddr, daddr, and len are page aligned/lengthed + */ + +void +pmap_transfer(dstpmap, srcpmap, daddr, len, saddr, move) + struct pmap *dstpmap, *srcpmap; + vaddr_t daddr, saddr; + vsize_t len; + boolean_t move; +{ + /* base address of PTEs, dst could be NULL */ + pt_entry_t *srcptes, *dstptes; + + struct pmap_transfer_location srcl, dstl; + int dstvalid; /* # of PTEs left in dst's current PTP */ + struct pmap *mapped_pmap; /* the pmap we passed to pmap_map_ptes */ + vsize_t blklen; + int blkpgs, toxfer; + boolean_t ok; + +#ifdef DIAGNOSTIC + /* + * sanity check: let's make sure our len doesn't overflow our dst + * space. + */ + + if (daddr < VM_MAXUSER_ADDRESS) { + if (VM_MAXUSER_ADDRESS - daddr < len) { + printf("pmap_transfer: no room in user pmap " + "(addr=0x%lx, len=0x%lx)\n", daddr, len); + return; + } + } else if (daddr < VM_MIN_KERNEL_ADDRESS || + daddr >= VM_MAX_KERNEL_ADDRESS) { + printf("pmap_transfer: invalid transfer address 0x%lx\n", + daddr); + } else { + if (VM_MAX_KERNEL_ADDRESS - daddr < len) { + printf("pmap_transfer: no room in kernel pmap " + "(addr=0x%lx, len=0x%lx)\n", daddr, len); + return; + } + } +#endif + + /* + * ideally we would like to have either src or dst pmap's be the + * current pmap so that we can map the other one in APTE space + * (if needed... one of the maps could be the kernel's pmap). + * + * however, if we can't get this, then we have to use the tmpmap + * (alternately we could punt). + */ + + if (!pmap_is_curpmap(dstpmap) && !pmap_is_curpmap(srcpmap)) { + dstptes = NULL; /* dstptes NOT mapped */ + srcptes = pmap_map_ptes(srcpmap); /* let's map the source */ + mapped_pmap = srcpmap; + } else { + if (!pmap_is_curpmap(srcpmap)) { + srcptes = pmap_map_ptes(srcpmap); /* possible APTE */ + dstptes = PTE_BASE; + mapped_pmap = srcpmap; + } else { + dstptes = pmap_map_ptes(dstpmap); /* possible APTE */ + srcptes = PTE_BASE; + mapped_pmap = dstpmap; + } + } + + /* + * at this point we know that the srcptes are mapped. the dstptes + * are mapped if (dstptes != NULL). if (dstptes == NULL) then we + * will have to map the dst PTPs page at a time using the tmpmap. + * [XXX: is it worth the effort, or should we just punt?] + */ + + srcl.addr = saddr; + srcl.pte = &srcptes[i386_btop(srcl.addr)]; + srcl.ptp = NULL; + dstl.addr = daddr; + if (dstptes) + dstl.pte = &dstptes[i386_btop(dstl.addr)]; + else + dstl.pte = NULL; /* we map page at a time */ + dstl.ptp = NULL; + dstvalid = 0; /* force us to load a new dst PTP to start */ + + while (len) { + + /* + * compute the size of this block. + */ + + /* length in bytes */ + blklen = i386_round_pdr(srcl.addr+1) - srcl.addr; + if (blklen > len) + blklen = len; + blkpgs = i386_btop(blklen); + + /* + * if the block is not valid in the src pmap, + * then we can skip it! + */ + + if (!pmap_valid_entry(srcpmap->pm_pdir[pdei(srcl.addr)])) { + len = len - blklen; + srcl.pte = srcl.pte + blkpgs; + srcl.addr += blklen; + dstl.addr += blklen; + if (blkpgs > dstvalid) { + dstvalid = 0; + dstl.ptp = NULL; + } else { + dstvalid = dstvalid - blkpgs; + } + if (dstptes == NULL && (len == 0 || dstvalid == 0)) { + if (dstl.pte) { + pmap_tmpunmap_pa(); + dstl.pte = NULL; + } + } else { + dstl.pte += blkpgs; + } + continue; + } + + /* + * we have a valid source block of "blkpgs" PTEs to transfer. + * if we don't have any dst PTEs ready, then get some. + */ + + if (dstvalid == 0) { + if (!pmap_valid_entry(dstpmap-> + pm_pdir[pdei(dstl.addr)])) { +#ifdef DIAGNOSTIC + if (dstl.addr >= VM_MIN_KERNEL_ADDRESS) + panic("pmap_transfer: missing kernel " + "PTP at 0x%lx", dstl.addr); +#endif + dstl.ptp = pmap_get_ptp(dstpmap, + pdei(dstl.addr), TRUE); + if (dstl.ptp == NULL) /* out of RAM? punt. */ + break; + } else { + dstl.ptp = NULL; + } + dstvalid = i386_btop(i386_round_pdr(dstl.addr+1) - + dstl.addr); + if (dstptes == NULL) { + dstl.pte = (pt_entry_t *) + pmap_tmpmap_pa(dstpmap-> + pm_pdir[pdei(dstl.addr)] + & PG_FRAME); + dstl.pte = dstl.pte + (PTES_PER_PTP - dstvalid); + } + } + + /* + * we have a valid source block of "blkpgs" PTEs to transfer. + * we have a valid dst block of "dstvalid" PTEs ready. + * thus we can transfer min(blkpgs, dstvalid) PTEs now. + */ + + srcl.ptp = NULL; /* don't know source PTP yet */ + if (dstvalid < blkpgs) + toxfer = dstvalid; + else + toxfer = blkpgs; + + if (toxfer > 0) { + ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, + toxfer, move); + + if (!ok) /* memory shortage? punt. */ + break; + + dstvalid -= toxfer; + blkpgs -= toxfer; + len -= i386_ptob(toxfer); + if (blkpgs == 0) /* out of src PTEs? restart */ + continue; + } + + /* + * we have a valid source block of "blkpgs" PTEs left + * to transfer. we have just used up our "dstvalid" + * PTEs, and thus must obtain more dst PTEs to finish + * off the src block. since we are now going to + * obtain a brand new dst PTP, we know we can finish + * the src block in one more transfer. + */ + +#ifdef DIAGNOSTIC + if (dstvalid) + panic("pmap_transfer: dstvalid non-zero after drain"); + if ((dstl.addr & (NBPD-1)) != 0) + panic("pmap_transfer: dstaddr not on PD boundary " + "(0x%lx)\n", dstl.addr); +#endif + + if (dstptes == NULL && dstl.pte != NULL) { + /* dispose of old PT mapping */ + pmap_tmpunmap_pa(); + dstl.pte = NULL; + } + + /* + * get new dst PTP + */ + if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) { +#ifdef DIAGNOSTIC + if (dstl.addr >= VM_MIN_KERNEL_ADDRESS) + panic("pmap_transfer: missing kernel PTP at " + "0x%lx", dstl.addr); +#endif + dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE); + if (dstl.ptp == NULL) /* out of free RAM? punt. */ + break; + } else { + dstl.ptp = NULL; + } + + dstvalid = PTES_PER_PTP; /* new PTP */ + + /* + * if the dstptes are un-mapped, then we need to tmpmap in the + * dstl.ptp. + */ + + if (dstptes == NULL) { + dstl.pte = (pt_entry_t *) + pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)] + & PG_FRAME); + } + + /* + * we have a valid source block of "blkpgs" PTEs left + * to transfer. we just got a brand new dst PTP to + * receive these PTEs. + */ + +#ifdef DIAGNOSTIC + if (dstvalid < blkpgs) + panic("pmap_transfer: too many blkpgs?"); +#endif + toxfer = blkpgs; + ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer, + move); + + if (!ok) /* memory shortage? punt. */ + break; + + dstvalid -= toxfer; + blkpgs -= toxfer; + len -= i386_ptob(toxfer); + + /* + * done src pte block + */ + } + if (dstptes == NULL && dstl.pte != NULL) + pmap_tmpunmap_pa(); /* dst PTP still mapped? */ + pmap_unmap_ptes(mapped_pmap); +} + +/* + * pmap_transfer_ptes: transfer PTEs from one pmap to another + * + * => we assume that the needed PTPs are mapped and that we will + * not cross a block boundary. + * => we return TRUE if we transfered all PTEs, FALSE if we were + * unable to allocate a pv_entry + */ + +static boolean_t +pmap_transfer_ptes(srcpmap, srcl, dstpmap, dstl, toxfer, move) + struct pmap *srcpmap, *dstpmap; + struct pmap_transfer_location *srcl, *dstl; + int toxfer; + boolean_t move; +{ + pt_entry_t dstproto, opte; + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve, *lpve; + + /* + * generate "prototype" dst PTE + */ + + if (dstl->addr < VM_MAX_ADDRESS) + dstproto = PG_u; /* "user" page */ + else + dstproto = pmap_pg_g; /* kernel page */ + + /* + * ensure we have dst PTP for user addresses. + */ + + if (dstl->ptp == NULL && dstl->addr < VM_MAXUSER_ADDRESS) + dstl->ptp = PHYS_TO_VM_PAGE(dstpmap->pm_pdir[pdei(dstl->addr)] & + PG_FRAME); + + /* + * main loop over range + */ + + for (/*null*/; toxfer > 0 ; toxfer--, + srcl->addr += NBPG, dstl->addr += NBPG, + srcl->pte++, dstl->pte++) { + + if (!pmap_valid_entry(*srcl->pte)) /* skip invalid entrys */ + continue; + +#ifdef DIAGNOSTIC + if (pmap_valid_entry(*dstl->pte)) + panic("pmap_transfer_ptes: attempt to overwrite " + "active entry"); +#endif + + /* + * let's not worry about non-pvlist mappings (typically device + * pager mappings). + */ + + opte = *srcl->pte; + + if ((opte & PG_PVLIST) == 0) + continue; + + /* + * if we are moving the mapping, then we can just adjust the + * current pv_entry. if we are copying the mapping, then we + * need to allocate a new pv_entry to account for it. + */ + + if (move == FALSE) { + pve = pmap_alloc_pv(dstpmap, ALLOCPV_TRY); + if (pve == NULL) + return(FALSE); /* punt! */ + } else { + pve = NULL; /* XXX: quiet gcc warning */ + } + + /* + * find the pv_head for this mapping. since our mapping is + * on the pvlist (PG_PVLIST), there must be a pv_head. + */ + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_transfer_ptes: PG_PVLIST PTE and " + "no pv_head!"); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + + /* + * now lock down the pvhead and find the current entry (there + * must be one). + */ + + simple_lock(&pvh->pvh_lock); + for (lpve = pvh->pvh_list ; lpve ; lpve = lpve->pv_next) + if (lpve->pv_pmap == srcpmap && + lpve->pv_va == srcl->addr) + break; +#ifdef DIAGNOSTIC + if (lpve == NULL) + panic("pmap_transfer_ptes: PG_PVLIST PTE, but " + "entry not found"); +#endif + + /* + * update src ptp. if the ptp is null in the pventry, then + * we are not counting valid entrys for this ptp (this is only + * true for kernel PTPs). + */ + + if (srcl->ptp == NULL) + srcl->ptp = lpve->pv_ptp; +#ifdef DIAGNOSTIC + if (srcl->ptp && + (srcpmap->pm_pdir[pdei(srcl->addr)] & PG_FRAME) != + VM_PAGE_TO_PHYS(srcl->ptp)) + panic("pmap_transfer_ptes: pm_pdir - pv_ptp mismatch!"); +#endif + + /* + * for move, update the pve we just found (lpve) to + * point to its new mapping. for copy, init the new + * pve and put it in the list. + */ + + if (move == TRUE) { + pve = lpve; + } + pve->pv_pmap = dstpmap; + pve->pv_va = dstl->addr; + pve->pv_ptp = dstl->ptp; + if (move == FALSE) { /* link in copy */ + pve->pv_next = lpve->pv_next; + lpve->pv_next = pve; + } + + /* + * sync the R/M bits while we are here. + */ + + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + + /* + * now actually update the ptes and unlock the pvlist. + */ + + if (move) { + *srcl->pte = 0; /* zap! */ + if (pmap_is_curpmap(srcpmap)) + pmap_update_pg(srcl->addr); + if (srcl->ptp) + /* don't bother trying to free PTP */ + srcl->ptp->wire_count--; + srcpmap->pm_stats.resident_count--; + if (opte & PG_W) + srcpmap->pm_stats.wired_count--; + } + *dstl->pte = (opte & ~(PG_u|PG_U|PG_M|PG_G|PG_W)) | dstproto; + dstpmap->pm_stats.resident_count++; + if (dstl->ptp) + dstl->ptp->wire_count++; + simple_unlock(&pvh->pvh_lock); + } + return(TRUE); } /* @@ -2817,10 +3388,10 @@ pmap_enter(pmap, va, pa, prot, flags) if (pmap == pmap_kernel()) { ptp = NULL; } else { - ptp = pmap_get_ptp(pmap, pdei(va)); + ptp = pmap_get_ptp(pmap, pdei(va), FALSE); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { - return ENOMEM; + return (KERN_RESOURCE_SHORTAGE); } panic("pmap_enter: get ptp failed"); } @@ -2920,7 +3491,7 @@ pmap_enter(pmap, va, pa, prot, flags) pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); if (pve == NULL) { if (flags & PMAP_CANFAIL) { - error = ENOMEM; + error = KERN_RESOURCE_SHORTAGE; goto out; } panic("pmap_enter: no pv entries available"); @@ -3029,7 +3600,7 @@ pmap_growkernel(maxkvaddr) * INVOKED WHILE pmap_init() IS RUNNING! */ - if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde) == NULL) { + if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) { panic("pmap_growkernel: alloc ptp failed"); } @@ -3038,14 +3609,11 @@ pmap_growkernel(maxkvaddr) /* distribute new kernel PTP to all active pmaps */ simple_lock(&pmaps_lock); - LIST_FOREACH(pm, &pmaps, pm_list) { + for (pm = pmaps.lh_first; pm != NULL; + pm = pm->pm_list.le_next) { pm->pm_pdir[PDSLOT_KERN + nkpde] = kpm->pm_pdir[PDSLOT_KERN + nkpde]; } - - /* Invalidate the PDP cache. */ - pool_cache_invalidate(&pmap_pdp_cache); - simple_unlock(&pmaps_lock); } @@ -3104,7 +3672,7 @@ pmap_dump(pmap, sva, eva) continue; pte = &ptes[i386_btop(sva)]; - for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { + for (/* null */; sva < blkendva ; sva += NBPG, pte++) { if (!pmap_valid_entry(*pte)) continue; printf("va %#lx -> pa %#x (pte=%#x)\n", diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index b9950057cef..ff7cca16a22 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.24 2001/12/11 17:24:34 art Exp $ */ +/* $OpenBSD: pmap.h,v 1.25 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -204,12 +204,8 @@ #define vtopte(VA) (PTE_BASE + i386_btop(VA)) #define kvtopte(VA) vtopte(VA) #define ptetov(PT) (i386_ptob(PT - PTE_BASE)) -#ifdef LARGEPAGES -paddr_t vtophys(vaddr_t); -#else #define vtophys(VA) ((*vtopte(VA) & PG_FRAME) | \ ((unsigned)(VA) & ~PG_FRAME)) -#endif #define avtopte(VA) (APTE_BASE + i386_btop(VA)) #define ptetoav(PT) (i386_ptob(PT - APTE_BASE)) #define avtophys(VA) ((*avtopte(VA) & PG_FRAME) | \ diff --git a/sys/arch/mac68k/dev/grf.c b/sys/arch/mac68k/dev/grf.c index c11cbc5da35..34aeb896c27 100644 --- a/sys/arch/mac68k/dev/grf.c +++ b/sys/arch/mac68k/dev/grf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: grf.c,v 1.19 2001/11/28 13:47:38 art Exp $ */ +/* $OpenBSD: grf.c,v 1.20 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: grf.c,v 1.41 1997/02/24 06:20:04 scottr Exp $ */ /* @@ -391,6 +391,7 @@ grfunmap(dev, addr, p) { struct grf_softc *gp; vm_size_t size; + int rv; gp = grf_cd.cd_devs[GRFUNIT(dev)]; @@ -404,8 +405,8 @@ grfunmap(dev, addr, p) size = round_page(gp->sc_grfmode->fbsize); - uvm_unmap(&p->p_vmspace->vm_map, (vm_offset_t)addr, + rv = uvm_unmap(&p->p_vmspace->vm_map, (vm_offset_t)addr, (vm_offset_t)addr + size); - return (0); + return (rv == 0 ? 0 : EINVAL); } diff --git a/sys/arch/sparc/include/pmap.h b/sys/arch/sparc/include/pmap.h index a67cbb80097..ff2f0d93710 100644 --- a/sys/arch/sparc/include/pmap.h +++ b/sys/arch/sparc/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.29 2001/12/07 10:52:25 art Exp $ */ +/* $OpenBSD: pmap.h,v 1.30 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: pmap.h,v 1.30 1997/08/04 20:00:47 pk Exp $ */ /* @@ -295,17 +295,22 @@ void pmap_pinit __P((pmap_t)); void pmap_reference __P((pmap_t)); void pmap_release __P((pmap_t)); void pmap_remove __P((pmap_t, vaddr_t, vaddr_t)); -#define pmap_update(pm) /* nothing */ void pmap_init __P((void)); int pmap_page_index __P((paddr_t)); void pmap_virtual_space __P((vaddr_t *, vaddr_t *)); void pmap_redzone __P((void)); -void kvm_uncache __P((caddr_t, int)); +void kvm_setcache __P((caddr_t, int, int)); +#define kvm_uncache(addr, npages) kvm_setcache(addr, npages, 0) +#define kvm_recache(addr, npages) kvm_setcache(addr, npages, 1) +void pmap_cache_enable __P((void)); struct user; void switchexit __P((struct proc *)); int mmu_pagein __P((struct pmap *pm, vaddr_t, int)); void pmap_writetext __P((unsigned char *, int)); +#define pmap_update(pm) /* nothing */ +#define pmap_copy(DP,SP,D,L,S) /* nothing */ + /* SUN4/SUN4C SPECIFIC DECLARATIONS */ #if defined(SUN4) || defined(SUN4C) diff --git a/sys/arch/sparc/include/psl.h b/sys/arch/sparc/include/psl.h index 7b1139f03c0..8959f9f1072 100644 --- a/sys/arch/sparc/include/psl.h +++ b/sys/arch/sparc/include/psl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: psl.h,v 1.7 2001/12/07 10:38:11 art Exp $ */ +/* $OpenBSD: psl.h,v 1.8 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: psl.h,v 1.12 1997/03/10 21:49:11 pk Exp $ */ /* @@ -203,6 +203,11 @@ SPLHOLD(spltty, PIL_TTY) SPLHOLD(splimp, 7) SPLHOLD(splvm, 7) +/* + * remove. + */ +SPLHOLD(splpmap, 7) + SPLHOLD(splclock, PIL_CLOCK) /* fd hardware interrupts are at level 11 */ diff --git a/sys/arch/sparc/include/vmparam.h b/sys/arch/sparc/include/vmparam.h index 40858973242..15e06e9ad4c 100644 --- a/sys/arch/sparc/include/vmparam.h +++ b/sys/arch/sparc/include/vmparam.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmparam.h,v 1.21 2001/12/05 16:25:44 art Exp $ */ +/* $OpenBSD: vmparam.h,v 1.22 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: vmparam.h,v 1.13 1997/07/12 16:20:03 perry Exp $ */ /* @@ -109,6 +109,7 @@ /* virtual sizes (bytes) for various kernel submaps */ #define VM_MBUF_SIZE (NMBCLUSTERS*MCLBYTES) +#define VM_KMEM_SIZE (NKMEMCLUSTERS*PAGE_SIZE) #define VM_PHYSSEG_MAX 32 /* we only have one "hole" */ #define VM_PHYSSEG_STRAT VM_PSTRAT_BSEARCH @@ -117,7 +118,6 @@ /* * pmap specific data stored in the vm_physmem[] array */ -#define __HAVE_PMAP_PHYSSEG struct pmap_physseg { struct pvlist *pv_head; }; diff --git a/sys/arch/sparc/sparc/cache.c b/sys/arch/sparc/sparc/cache.c index be40c5ae1c8..4fe55973023 100644 --- a/sys/arch/sparc/sparc/cache.c +++ b/sys/arch/sparc/sparc/cache.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cache.c,v 1.14 2001/12/05 14:40:48 art Exp $ */ +/* $OpenBSD: cache.c,v 1.15 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: cache.c,v 1.34 1997/09/26 22:17:23 pk Exp $ */ /* @@ -768,102 +768,72 @@ viking_cache_flush(base, len) } void -viking_pcache_flush_page(pa, invalidate_only) - paddr_t pa; - int invalidate_only; +viking_pcache_flush_line(va, pa) + int va; + int pa; { - int set, i; + /* + * Flush cache line corresponding to virtual address `va' + * which is mapped at physical address `pa'. + */ + extern char etext[]; + static char *base; + int i; + char *v; /* - * The viking's on-chip data cache is 4-way set associative, - * consisting of 128 sets, each holding 4 lines of 32 bytes. - * Note that one 4096 byte page exactly covers all 128 sets - * in the cache. + * Construct a virtual address that hits the same cache line + * as PA, then read from 2*ASSOCIATIVITY-1 different physical + * locations (all different from PA). */ - if (invalidate_only) { - u_int pa_tag = (pa >> 12); - u_int tagaddr; - u_int64_t tag; - - /* - * Loop over all sets and invalidate all entries tagged - * with the given physical address by resetting the cache - * tag in ASI_DCACHETAG control space. - * - * The address format for accessing a tag is: - * - * 31 30 27 26 11 5 4 3 2 0 - * +------+-----+------+-------//--------+--------+----+-----+ - * | type | xxx | line | xxx | set | xx | 0 | - * +------+-----+------+-------//--------+--------+----+-----+ - * - * set: the cache set tag to be read (0-127) - * line: the line within the set (0-3) - * type: 1: read set tag; 2: read physical tag - * - * The (type 2) tag read from this address is a 64-bit word - * formatted as follows: - * - * 5 4 4 - * 63 6 8 0 23 0 - * +-------+-+-------+-+-------+-+-----------+----------------+ - * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] | - * +-------+-+-------+-+-------+-+-----------+----------------+ - * - * PA: bits 12-35 of the physical address - * S: line shared bit - * D: line dirty bit - * V: line valid bit - */ - -#define VIKING_DCACHETAG_S 0x0000010000000000UL /* line valid bit */ -#define VIKING_DCACHETAG_D 0x0001000000000000UL /* line dirty bit */ -#define VIKING_DCACHETAG_V 0x0100000000000000UL /* line shared bit */ -#define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffUL /* PA tag field */ - - for (set = 0; set < 128; set++) { - /* Set set number and access type */ - tagaddr = (set << 5) | (2 << 30); - - /* Examine the tag for each line in the set */ - for (i = 0 ; i < 4; i++) { - tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG); - /* - * If this is a valid tag and the PA field - * matches clear the tag. - */ - if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag && - (tag & VIKING_DCACHETAG_V) != 0) - stda(tagaddr | (i << 26), - ASI_DCACHETAG, 0); - } - } - } else { - extern char kernel_text[]; - - /* - * Force the cache to validate its backing memory - * by displacing all cache lines with known read-only - * content from the start of kernel text. - * - * Note that this thrashes the entire cache. However, - * we currently only need to call upon this code - * once at boot time. - */ - for (set = 0; set < 128; set++) { - int *v = (int *)(kernel_text + (set << 5)); - - /* - * We need to read (2*associativity-1) different - * locations to be sure to displace the entire set. - */ - i = 2 * 4 - 1; - while (i--) { - (*(volatile int *)v); - v += 4096; - } - } +#if 0 + if (base == 0) { + cshift = CACHEINFO.ic_l2linesize; + csize = CACHEINFO.ic_nlines << cshift; + cmask = csize - 1; + base = (char *)roundup((int)etext, csize); + } + + v = base + (((va & cmask) >> cshift) << cshift); + i = CACHEINFO.dc_associativity * 2 - 1; + + while (i--) { + (*(volatile int *)v); + v += csize; + } +#else +#define cshift 5 /* CACHEINFO.ic_l2linesize */ +#define csize (128 << cshift) /* CACHEINFO.ic_nlines << cshift */ +#define cmask (csize - 1) +#define cass 4 /* CACHEINFO.dc_associativity */ + + if (base == 0) + base = (char *)roundup((unsigned int)etext, csize); + + v = base + (((pa & cmask) >> cshift) << cshift); + i = 2 * cass - 1; + + while (i--) { + (*(volatile int *)v); + v += csize; } +#undef cass +#undef cmask +#undef csize +#undef cshift +#endif +} + +void +srmmu_pcache_flush_line(va, pa) + int va; + int pa; +{ + /* + * Flush cache line corresponding to virtual address `va' + * which is mapped at physical address `pa'. + */ + sta(va, ASI_IDCACHELFP, 0); } #endif /* SUN4M */ diff --git a/sys/arch/sparc/sparc/cache.h b/sys/arch/sparc/sparc/cache.h index 445ec2369f8..e4c58416831 100644 --- a/sys/arch/sparc/sparc/cache.h +++ b/sys/arch/sparc/sparc/cache.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cache.h,v 1.5 2001/12/05 14:40:48 art Exp $ */ +/* $OpenBSD: cache.h,v 1.6 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: cache.h,v 1.16 1997/07/06 21:15:14 pk Exp $ */ /* @@ -178,7 +178,8 @@ void hypersparc_cache_flush_all __P((void)); void ms1_cache_flush __P((caddr_t, u_int)); void viking_cache_flush __P((caddr_t, u_int)); -void viking_pcache_flush_page __P((paddr_t, int)); +void viking_pcache_flush_line __P((int, int)); +void srmmu_pcache_flush_line __P((int, int)); extern void sparc_noop __P((void)); @@ -192,8 +193,8 @@ extern void sparc_noop __P((void)); (void (*)__P((int))) sparc_noop #define noop_cache_flush \ (void (*)__P((caddr_t, u_int))) sparc_noop -#define noop_pcache_flush_page \ - (void (*)__P((paddr_t, int))) sparc_noop +#define noop_pcache_flush_line \ + (void (*)__P((int, int))) sparc_noop #define noop_pure_vcache_flush \ (void (*)__P((void))) sparc_noop #define noop_cache_flush_all \ @@ -203,7 +204,6 @@ extern void sparc_noop __P((void)); #define cache_flush_segment(vr,vs) cpuinfo.vcache_flush_segment(vr,vs) #define cache_flush_region(vr) cpuinfo.vcache_flush_region(vr) #define cache_flush_context() cpuinfo.vcache_flush_context() -#define pcache_flush_page(pa,flag) cpuinfo.pcache_flush_page(pa,flag) /* * Cache control information. diff --git a/sys/arch/sparc/sparc/cpu.c b/sys/arch/sparc/sparc/cpu.c index 66425e45b3b..e2662c99a49 100644 --- a/sys/arch/sparc/sparc/cpu.c +++ b/sys/arch/sparc/sparc/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.35 2001/12/07 10:44:52 art Exp $ */ +/* $OpenBSD: cpu.c,v 1.36 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: cpu.c,v 1.56 1997/09/15 20:52:36 pk Exp $ */ /* @@ -252,6 +252,7 @@ cpu_attach(parent, self, aux) */ s = splhigh(); sc->cache_enable(); + pmap_cache_enable(); splx(s); return; } @@ -428,7 +429,7 @@ struct module_info module_sun4 = { sun4_vcache_flush_segment, sun4_vcache_flush_region, sun4_vcache_flush_context, - noop_pcache_flush_page, + noop_pcache_flush_line, noop_pure_vcache_flush, noop_cache_flush_all, 0 @@ -554,7 +555,7 @@ struct module_info module_sun4c = { sun4_vcache_flush_segment, sun4_vcache_flush_region, sun4_vcache_flush_context, - noop_pcache_flush_page, + noop_pcache_flush_line, noop_pure_vcache_flush, noop_cache_flush_all, 0 @@ -752,7 +753,7 @@ struct module_info module_ms1 = { noop_vcache_flush_segment, noop_vcache_flush_region, noop_vcache_flush_context, - noop_pcache_flush_page, + noop_pcache_flush_line, noop_pure_vcache_flush, ms1_cache_flush_all, memerr4m @@ -780,7 +781,7 @@ struct module_info module_ms2 = { srmmu_vcache_flush_segment, srmmu_vcache_flush_region, srmmu_vcache_flush_context, - noop_pcache_flush_page, + noop_pcache_flush_line, noop_pure_vcache_flush, srmmu_cache_flush_all, memerr4m @@ -803,7 +804,7 @@ struct module_info module_swift = { srmmu_vcache_flush_segment, srmmu_vcache_flush_region, srmmu_vcache_flush_context, - noop_pcache_flush_page, + srmmu_pcache_flush_line, noop_pure_vcache_flush, srmmu_cache_flush_all, memerr4m @@ -851,7 +852,7 @@ struct module_info module_viking = { noop_vcache_flush_segment, noop_vcache_flush_region, noop_vcache_flush_context, - viking_pcache_flush_page, + viking_pcache_flush_line, noop_pure_vcache_flush, noop_cache_flush_all, viking_memerr @@ -889,7 +890,7 @@ viking_hotfix(sc) sc->flags |= CPUFLG_CACHEPAGETABLES; } else { sc->cache_flush = viking_cache_flush; - sc->pcache_flush_page = viking_pcache_flush_page; + sc->pcache_flush_line = viking_pcache_flush_line; } /* XXX! */ @@ -934,7 +935,7 @@ struct module_info module_hypersparc = { srmmu_vcache_flush_segment, srmmu_vcache_flush_region, srmmu_vcache_flush_context, - noop_pcache_flush_page, + srmmu_pcache_flush_line, hypersparc_pure_vcache_flush, hypersparc_cache_flush_all, hypersparc_memerr @@ -985,7 +986,7 @@ struct module_info module_cypress = { srmmu_vcache_flush_segment, srmmu_vcache_flush_region, srmmu_vcache_flush_context, - noop_pcache_flush_page, + srmmu_pcache_flush_line, noop_pure_vcache_flush, cypress_cache_flush_all, memerr4m @@ -1008,7 +1009,7 @@ struct module_info module_turbosparc = { /* UNTESTED */ srmmu_vcache_flush_segment, srmmu_vcache_flush_region, srmmu_vcache_flush_context, - noop_pcache_flush_page, + srmmu_pcache_flush_line, noop_pure_vcache_flush, srmmu_cache_flush_all, memerr4m @@ -1046,7 +1047,7 @@ cpumatch_turbosparc(sc, mp, node) sc->vcache_flush_segment = 0; sc->vcache_flush_region = 0; sc->vcache_flush_context = 0; - sc->pcache_flush_page = 0; + sc->pcache_flush_line = 0; replacemul(); } @@ -1233,7 +1234,7 @@ getcpuinfo(sc, node) MPCOPY(vcache_flush_segment); MPCOPY(vcache_flush_region); MPCOPY(vcache_flush_context); - MPCOPY(pcache_flush_page); + MPCOPY(pcache_flush_line); MPCOPY(pure_vcache_flush); MPCOPY(cache_flush_all); MPCOPY(memerr); diff --git a/sys/arch/sparc/sparc/cpuvar.h b/sys/arch/sparc/sparc/cpuvar.h index 45acc703a3e..fc6e4de2778 100644 --- a/sys/arch/sparc/sparc/cpuvar.h +++ b/sys/arch/sparc/sparc/cpuvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpuvar.h,v 1.6 2001/12/07 10:39:47 art Exp $ */ +/* $OpenBSD: cpuvar.h,v 1.7 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: cpuvar.h,v 1.4 1997/07/06 21:14:25 pk Exp $ */ /* @@ -70,7 +70,7 @@ struct module_info { void (*vcache_flush_segment) __P((int, int)); void (*vcache_flush_region) __P((int)); void (*vcache_flush_context) __P((void)); - void (*pcache_flush_page) __P((paddr_t, int)); + void (*pcache_flush_line) __P((int, int)); void (*pure_vcache_flush) __P((void)); void (*cache_flush_all)__P((void)); void (*memerr) __P((unsigned, u_int, u_int, struct trapframe *)); @@ -183,7 +183,7 @@ struct cpu_softc { void (*vcache_flush_segment)__P((int, int)); void (*vcache_flush_region)__P((int)); void (*vcache_flush_context)__P((void)); - void (*pcache_flush_page)__P((paddr_t, int)); + void (*pcache_flush_line)__P((int, int)); void (*pure_vcache_flush) __P((void)); void (*cache_flush_all)__P((void)); @@ -283,6 +283,7 @@ struct cpu_softc { */ void getcpuinfo __P((struct cpu_softc *sc, int node)); void mmu_install_tables __P((struct cpu_softc *)); +void pmap_alloc_cpu __P((struct cpu_softc *)); #define cpuinfo (*(struct cpu_softc *)CPUINFO_VA) #endif /* _SPARC_CPUVAR_H */ diff --git a/sys/arch/sparc/sparc/machdep.c b/sys/arch/sparc/sparc/machdep.c index f240f3218cb..e4383c71bbb 100644 --- a/sys/arch/sparc/sparc/machdep.c +++ b/sys/arch/sparc/sparc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.74 2001/12/08 02:24:07 art Exp $ */ +/* $OpenBSD: machdep.c,v 1.75 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */ /* @@ -245,7 +245,6 @@ cpu_startup() curbufsize -= PAGE_SIZE; } } - pmap_update(pmap_kernel()); /* * Allocate a submap for exec arguments. This map effectively * limits the number of processes exec'ing at any time. @@ -869,11 +868,9 @@ dumpsys() (void) pmap_map(dumpspace, maddr, maddr + n, VM_PROT_READ); - pmap_update(pmap_kernel()); error = (*dump)(dumpdev, blkno, (caddr_t)dumpspace, (int)n); pmap_remove(pmap_kernel(), dumpspace, dumpspace + n); - pmap_update(pmap_kernel()); if (error) break; maddr += n; @@ -978,7 +975,6 @@ mapdev(phys, virt, offset, size) va += PAGE_SIZE; pa += PAGE_SIZE; } while ((size -= PAGE_SIZE) > 0); - pmap_update(pmap_kernel()); return (ret); } diff --git a/sys/arch/sparc/sparc/pmap.c b/sys/arch/sparc/sparc/pmap.c index e23610b67a6..f0ef4375d49 100644 --- a/sys/arch/sparc/sparc/pmap.c +++ b/sys/arch/sparc/sparc/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.116 2001/12/09 04:51:35 art Exp $ */ +/* $OpenBSD: pmap.c,v 1.117 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: pmap.c,v 1.118 1998/05/19 19:00:18 thorpej Exp $ */ /* @@ -162,13 +162,20 @@ int pmapdebug = 0; /* * Internal helpers. */ -static __inline struct pvlist *pvhead(int); +static __inline struct pvlist *pvhead __P((int)); +static __inline struct pvlist *pvalloc __P((void)); +static __inline void pvfree __P((struct pvlist *)); + +#if defined(SUN4M) +static u_int VA2PA __P((caddr_t)); +#endif /* * Given a page number, return the head of its pvlist. */ static __inline struct pvlist * -pvhead(int pnum) +pvhead(pnum) + int pnum; { int bank, off; @@ -181,6 +188,24 @@ pvhead(int pnum) struct pool pvpool; +/* + * Wrappers around some memory allocation. + * XXX - the plan is to make them non-sleeping. + */ + +static __inline struct pvlist * +pvalloc() +{ + return pool_get(&pvpool, PR_WAITOK); +} + +static __inline void +pvfree(pv) + struct pvlist *pv; +{ + pool_put(&pvpool, pv); +} + #if defined(SUN4M) /* * Memory pools and back-end supplier for SRMMU page tables. @@ -189,49 +214,49 @@ struct pool pvpool; */ static struct pool L1_pool; static struct pool L23_pool; -void *pgt_page_alloc(unsigned long, int, int); -void pgt_page_free(void *, unsigned long, int); +void *pgt_page_alloc __P((unsigned long, int, int)); +void pgt_page_free __P((void *, unsigned long, int)); + +void pcache_flush __P((caddr_t, caddr_t, int)); +void +pcache_flush(va, pa, n) + caddr_t va, pa; + int n; +{ + void (*f)__P((int,int)) = cpuinfo.pcache_flush_line; + + while ((n -= 4) >= 0) + (*f)((u_int)va+n, (u_int)pa+n); +} /* * Page table pool back-end. */ void * -pgt_page_alloc(unsigned long sz, int flags, int mtype) +pgt_page_alloc(sz, flags, mtype) + unsigned long sz; + int flags; + int mtype; { - struct vm_page *pg; - int nocache = (cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0; - vaddr_t va; - paddr_t pa; - - if ((pg = uvm_pagealloc(NULL, 0, NULL, 0)) == NULL) - return (NULL); - - if ((va = uvm_km_valloc(kernel_map, PAGE_SIZE)) == 0) { - uvm_pagefree(pg); - return (NULL); - } + caddr_t p; - pa = VM_PAGE_TO_PHYS(pg); - if (nocache) - pcache_flush_page(pa, 1); + p = (caddr_t)uvm_km_kmemalloc(kernel_map, uvm.kernel_object, + (vsize_t)sz, UVM_KMF_NOWAIT); - pmap_kenter_pa(va, pa | (nocache ? PMAP_NC : 0), - VM_PROT_READ|VM_PROT_WRITE); - pmap_update(pmap_kernel()); - return ((void *)va); + if (p != NULL && ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0)) { + pcache_flush(p, (caddr_t)VA2PA(p), sz); + kvm_uncache(p, atop(sz)); + } + return (p); } void -pgt_page_free(void *v, unsigned long sz, int mtype) +pgt_page_free(v, sz, mtype) + void *v; + unsigned long sz; + int mtype; { - vaddr_t va = (vaddr_t)v; - paddr_t pa; - - if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) - panic("pgt_page_free"); - uvm_pagefree(PHYS_TO_VM_PAGE(pa)); - pmap_kremove(va, sz); - uvm_km_free(kernel_map, (vaddr_t)v, sz); + uvm_km_free(kernel_map, (vaddr_t)v, sz); } #endif /* SUN4M */ @@ -380,9 +405,9 @@ vaddr_t pagetables_start, pagetables_end; struct memarr pmemarr[MA_SIZE];/* physical memory regions */ int npmemarr; /* number of entries in pmemarr */ -static void pmap_page_upload(paddr_t); -void pmap_pinit(pmap_t); -void pmap_release(pmap_t); +static void pmap_page_upload __P((paddr_t)); +void pmap_pinit __P((pmap_t)); +void pmap_release __P((pmap_t)); int mmu_has_hole; @@ -440,9 +465,9 @@ static u_long segfixmask = 0xffffffff; /* all bits valid to start */ #if defined(SUN4M) #define getpte4m(va) lda((va & 0xFFFFF000) | ASI_SRMMUFP_L3, \ ASI_SRMMUFP) -u_int *getptep4m(struct pmap *, vaddr_t); -static __inline void setpgt4m(int *, int); -void setpte4m(vaddr_t va, int pte); +u_int *getptep4m __P((struct pmap *, vaddr_t)); +static __inline void setpgt4m __P((int *, int)); +void setpte4m __P((vaddr_t va, int pte)); #endif #if defined(SUN4) || defined(SUN4C) @@ -464,35 +489,35 @@ void setpte4m(vaddr_t va, int pte); */ #if defined(SUN4M) -static void mmu_setup4m_L1(int, struct pmap *); -static void mmu_setup4m_L2(int, struct regmap *); -static void mmu_setup4m_L3(int, struct segmap *); -void mmu_reservemon4m(struct pmap *); - -void pmap_rmk4m(struct pmap *, vaddr_t, vaddr_t, int, int); -void pmap_rmu4m(struct pmap *, vaddr_t, vaddr_t, int, int); -int pmap_enk4m(struct pmap *, vaddr_t, vm_prot_t, - int, struct pvlist *, int); -int pmap_enu4m(struct pmap *, vaddr_t, vm_prot_t, - int, struct pvlist *, int); -void pv_changepte4m(struct pvlist *, int, int); -int pv_syncflags4m(struct pvlist *); -int pv_link4m(struct pvlist *, struct pmap *, vaddr_t, int); -void pv_unlink4m(struct pvlist *, struct pmap *, vaddr_t); +static void mmu_setup4m_L1 __P((int, struct pmap *)); +static void mmu_setup4m_L2 __P((int, struct regmap *)); +static void mmu_setup4m_L3 __P((int, struct segmap *)); +void mmu_reservemon4m __P((struct pmap *)); + +void pmap_rmk4m __P((struct pmap *, vaddr_t, vaddr_t, int, int)); +void pmap_rmu4m __P((struct pmap *, vaddr_t, vaddr_t, int, int)); +int pmap_enk4m __P((struct pmap *, vaddr_t, vm_prot_t, + int, struct pvlist *, int)); +int pmap_enu4m __P((struct pmap *, vaddr_t, vm_prot_t, + int, struct pvlist *, int)); +void pv_changepte4m __P((struct pvlist *, int, int)); +int pv_syncflags4m __P((struct pvlist *)); +int pv_link4m __P((struct pvlist *, struct pmap *, vaddr_t, int)); +void pv_unlink4m __P((struct pvlist *, struct pmap *, vaddr_t)); #endif #if defined(SUN4) || defined(SUN4C) -void mmu_reservemon4_4c(int *, int *); -void pmap_rmk4_4c(struct pmap *, vaddr_t, vaddr_t, int, int); -void pmap_rmu4_4c(struct pmap *, vaddr_t, vaddr_t, int, int); -int pmap_enk4_4c(struct pmap *, vaddr_t, vm_prot_t, int, struct pvlist *, - int); -int pmap_enu4_4c(struct pmap *, vaddr_t, vm_prot_t, int, struct pvlist *, - int); -void pv_changepte4_4c(struct pvlist *, int, int); -int pv_syncflags4_4c(struct pvlist *); -int pv_link4_4c(struct pvlist *, struct pmap *, vaddr_t, int); -void pv_unlink4_4c(struct pvlist *, struct pmap *, vaddr_t); +void mmu_reservemon4_4c __P((int *, int *)); +void pmap_rmk4_4c __P((struct pmap *, vaddr_t, vaddr_t, int, int)); +void pmap_rmu4_4c __P((struct pmap *, vaddr_t, vaddr_t, int, int)); +int pmap_enk4_4c __P((struct pmap *, vaddr_t, vm_prot_t, + int, struct pvlist *, int)); +int pmap_enu4_4c __P((struct pmap *, vaddr_t, vm_prot_t, + int, struct pvlist *, int)); +void pv_changepte4_4c __P((struct pvlist *, int, int)); +int pv_syncflags4_4c __P((struct pvlist *)); +int pv_link4_4c __P((struct pvlist *, struct pmap *, vaddr_t, int)); +void pv_unlink4_4c __P((struct pvlist *, struct pmap *, vaddr_t)); #endif #if !defined(SUN4M) && (defined(SUN4) || defined(SUN4C)) @@ -507,22 +532,22 @@ void pv_unlink4_4c(struct pvlist *, struct pmap *, vaddr_t); /* function pointer declarations */ /* from pmap.h: */ -boolean_t (*pmap_clear_modify_p)(struct vm_page *); -boolean_t (*pmap_clear_reference_p)(struct vm_page *); -void (*pmap_copy_page_p)(paddr_t, paddr_t); -int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); -boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *); -boolean_t (*pmap_is_modified_p)(struct vm_page *); -boolean_t (*pmap_is_referenced_p)(struct vm_page *); -void (*pmap_kenter_pa_p)(vaddr_t, paddr_t, vm_prot_t); -void (*pmap_kremove_p)(vaddr_t, vsize_t); -void (*pmap_page_protect_p)(struct vm_page *, vm_prot_t); -void (*pmap_protect_p)(pmap_t, vaddr_t, vaddr_t, vm_prot_t); -void (*pmap_zero_page_p)(paddr_t); -void (*pmap_changeprot_p)(pmap_t, vaddr_t, vm_prot_t, int); +boolean_t (*pmap_clear_modify_p) __P((struct vm_page *)); +boolean_t (*pmap_clear_reference_p) __P((struct vm_page *)); +void (*pmap_copy_page_p) __P((paddr_t, paddr_t)); +int (*pmap_enter_p) __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, int)); +boolean_t (*pmap_extract_p) __P((pmap_t, vaddr_t, paddr_t *)); +boolean_t (*pmap_is_modified_p) __P((struct vm_page *)); +boolean_t (*pmap_is_referenced_p) __P((struct vm_page *)); +void (*pmap_kenter_pa_p) __P((vaddr_t, paddr_t, vm_prot_t)); +void (*pmap_kremove_p) __P((vaddr_t, vsize_t)); +void (*pmap_page_protect_p) __P((struct vm_page *, vm_prot_t)); +void (*pmap_protect_p) __P((pmap_t, vaddr_t, vaddr_t, vm_prot_t)); +void (*pmap_zero_page_p) __P((paddr_t)); +void (*pmap_changeprot_p) __P((pmap_t, vaddr_t, vm_prot_t, int)); /* local: */ -void (*pmap_rmk_p)(struct pmap *, vaddr_t, vaddr_t, int, int); -void (*pmap_rmu_p)(struct pmap *, vaddr_t, vaddr_t, int, int); +void (*pmap_rmk_p) __P((struct pmap *, vaddr_t, vaddr_t, int, int)); +void (*pmap_rmu_p) __P((struct pmap *, vaddr_t, vaddr_t, int, int)); #define pmap_rmk (*pmap_rmk_p) #define pmap_rmu (*pmap_rmu_p) @@ -537,7 +562,6 @@ void (*pmap_rmu_p)(struct pmap *, vaddr_t, vaddr_t, int, int); */ #if defined(SUN4M) -static u_int VA2PA(caddr_t); /* * Macros which implement SRMMU TLB flushing/invalidation @@ -560,7 +584,8 @@ static u_int VA2PA(caddr_t); * during bootup to interact with the ROM's initial L1 mapping of the kernel. */ static u_int -VA2PA(caddr_t addr) +VA2PA(addr) + caddr_t addr; { u_int pte; @@ -599,7 +624,9 @@ VA2PA(caddr_t addr) * Assumes level 3 mapping (for now). */ u_int * -getptep4m(struct pmap *pm, vaddr_t va) +getptep4m(pm, va) + struct pmap *pm; + vaddr_t va; { struct regmap *rm; struct segmap *sm; @@ -627,7 +654,9 @@ getptep4m(struct pmap *pm, vaddr_t va) * Set the pte at "ptep" to "pte". */ static __inline void -setpgt4m(int *ptep, int pte) +setpgt4m(ptep, pte) + int *ptep; + int pte; { swap(ptep, pte); } @@ -636,7 +665,9 @@ setpgt4m(int *ptep, int pte) * Set the page table entry for va to pte. Only legal for kernel mappings. */ void -setpte4m(vaddr_t va, int pte) +setpte4m(va, pte) + vaddr_t va; + int pte; { int *ptep; @@ -681,21 +712,24 @@ setpte4m(vaddr_t va, int pte) } while (0) -static void sortm(struct memarr *, int); -void ctx_alloc(struct pmap *); -void ctx_free(struct pmap *); -void pv_flushcache(struct pvlist *); +static void sortm __P((struct memarr *, int)); +void ctx_alloc __P((struct pmap *)); +void ctx_free __P((struct pmap *)); +void pv_flushcache __P((struct pvlist *)); +void kvm_iocache __P((caddr_t, int)); #ifdef DEBUG -void pm_check(char *, struct pmap *); -void pm_check_k(char *, struct pmap *); -void pm_check_u(char *, struct pmap *); +void pm_check __P((char *, struct pmap *)); +void pm_check_k __P((char *, struct pmap *)); +void pm_check_u __P((char *, struct pmap *)); #endif /* * Sort a memory array by address. */ static void -sortm(struct memarr *mp, int n) +sortm(mp, n) + struct memarr *mp; + int n; { struct memarr *mpj; int i, j; @@ -719,11 +753,21 @@ sortm(struct memarr *mp, int n) } /* + * For our convenience, vm_page.c implements: + * vm_bootstrap_steal_memory() + * using the functions: + * pmap_virtual_space(), pmap_free_pages(), pmap_next_page(), + * which are much simpler to implement. + */ + +/* * How much virtual space does this kernel have? * (After mapping kernel text, data, etc.) */ void -pmap_virtual_space(vaddr_t *v_start, vaddr_t *v_end) +pmap_virtual_space(v_start, v_end) + vaddr_t *v_start; + vaddr_t *v_end; { *v_start = virtual_avail; *v_end = virtual_end; @@ -733,7 +777,8 @@ pmap_virtual_space(vaddr_t *v_start, vaddr_t *v_end) * Helper routine that hands off available physical pages to the VM system. */ static void -pmap_page_upload(paddr_t first_pa) +pmap_page_upload(first_pa) + paddr_t first_pa; { int n = 0; paddr_t start, end; @@ -766,7 +811,8 @@ pmap_page_upload(paddr_t first_pa) } int -pmap_pa_exists(paddr_t pa) +pmap_pa_exists(pa) + paddr_t pa; { return (pa < phys_avail || (pvhead(atop(pa)) != NULL)); } @@ -788,7 +834,8 @@ pmap_pa_exists(paddr_t pa) */ #if defined(SUN4) || defined(SUN4C) void -mmu_reservemon4_4c(int *nrp, int *nsp) +mmu_reservemon4_4c(nrp, nsp) + int *nrp, *nsp; { u_int va = 0, eva = 0; int mmuseg, i, nr, ns, vr, lastvr; @@ -886,12 +933,15 @@ mmu_reservemon4_4c(int *nrp, int *nsp) * NOTE: This also revokes all user-mode access to the mapped regions. */ void -mmu_reservemon4m(struct pmap *kpmap) +mmu_reservemon4m(kpmap) + struct pmap *kpmap; { unsigned int rom_ctxtbl; int te; unsigned int mmupcrsave; +/*XXX-GCC!*/mmupcrsave = 0; + /* * XXX: although the Sun4M can handle 36 bits of physical * address space, we assume that all these page tables, etc @@ -942,7 +992,9 @@ mmu_reservemon4m(struct pmap *kpmap) } void -mmu_setup4m_L1(int regtblptd, struct pmap *kpmap) +mmu_setup4m_L1(regtblptd, kpmap) + int regtblptd; /* PTD for region table to be remapped */ + struct pmap *kpmap; { unsigned int regtblrover; int i; @@ -1013,7 +1065,9 @@ mmu_setup4m_L1(int regtblptd, struct pmap *kpmap) } void -mmu_setup4m_L2(int segtblptd, struct regmap *rp) +mmu_setup4m_L2(segtblptd, rp) + int segtblptd; + struct regmap *rp; { unsigned int segtblrover; int i, k; @@ -1536,7 +1590,7 @@ printf("mmu_pagein: kernel wants map at va 0x%x, vr %d, vs %d\n", va, vr, vs); unsigned int tva = VA_ROUNDDOWNTOREG(va); struct segmap *sp = rp->rg_segmap; - s = splvm(); /* paranoid */ + s = splpmap(); /* paranoid */ smeg = region_alloc(®ion_lru, pm, vr)->me_cookie; setregmap(tva, smeg); i = NSEGRG; @@ -1559,7 +1613,7 @@ printf("mmu_pagein: kernel wants map at va 0x%x, vr %d, vs %d\n", va, vr, vs); /* reload segment: write PTEs into a new LRU entry */ va = VA_ROUNDDOWNTOSEG(va); - s = splvm(); /* paranoid */ + s = splpmap(); /* paranoid */ pmeg = me_alloc(&segm_lru, pm, vr, vs)->me_cookie; setsegmap(va, pmeg); i = NPTESG; @@ -1601,7 +1655,7 @@ ctx_alloc(pm) gap_end = pm->pm_gap_end; } - s = splvm(); + s = splpmap(); if ((c = ctx_freelist) != NULL) { ctx_freelist = c->c_nextfree; cnum = c - cpuinfo.ctxinfo; @@ -1831,7 +1885,7 @@ pv_changepte4_4c(pv0, bis, bic) write_user_windows(); /* paranoid? */ - s = splvm(); /* paranoid? */ + s = splpmap(); /* paranoid? */ if (pv0->pv_pmap == NULL) { splx(s); return; @@ -1930,7 +1984,7 @@ pv_syncflags4_4c(pv0) write_user_windows(); /* paranoid? */ - s = splvm(); /* paranoid? */ + s = splpmap(); /* paranoid? */ if (pv0->pv_pmap == NULL) { /* paranoid */ splx(s); return (0); @@ -2016,7 +2070,7 @@ pv_unlink4_4c(pv, pm, va) pv->pv_va = npv->pv_va; pv->pv_flags &= ~PV_NC; pv->pv_flags |= npv->pv_flags & PV_NC; - pool_put(&pvpool, npv); + pvfree(npv); } else { /* * No mappings left; we still need to maintain @@ -2040,7 +2094,7 @@ pv_unlink4_4c(pv, pm, va) break; } prev->pv_next = npv->pv_next; - pool_put(&pvpool, npv); + pvfree(npv); } if (pv->pv_flags & PV_ANC && (pv->pv_flags & PV_NC) == 0) { /* @@ -2113,9 +2167,7 @@ pv_link4_4c(pv, pm, va, nc) } } } - npv = pool_get(&pvpool, PR_NOWAIT); - if (npv == NULL) - panic("pvpool exhausted"); + npv = pvalloc(); npv->pv_next = pv->pv_next; npv->pv_pmap = pm; npv->pv_va = va; @@ -2139,9 +2191,16 @@ pv_link4_4c(pv, pm, va, nc) * as long as the process has a context; this is overly conservative. * It also copies ref and mod bits to the pvlist, on the theory that * this might save work later. (XXX should test this theory) + * + * In addition, if the cacheable bit (SRMMU_PG_C) is updated in the PTE + * the corresponding PV_C4M flag is also updated in each pv entry. This + * is done so kvm_uncache() can use this routine and have the uncached + * status stick. */ void -pv_changepte4m(struct pvlist *pv0, int bis, int bic) +pv_changepte4m(pv0, bis, bic) + struct pvlist *pv0; + int bis, bic; { struct pvlist *pv; struct pmap *pm; @@ -2150,7 +2209,7 @@ pv_changepte4m(struct pvlist *pv0, int bis, int bic) write_user_windows(); /* paranoid? */ - s = splvm(); /* paranoid? */ + s = splpmap(); /* paranoid? */ if (pv0->pv_pmap == NULL) { splx(s); return; @@ -2203,6 +2262,19 @@ pv_changepte4m(struct pvlist *pv0, int bis, int bic) pv0->pv_flags |= MR4M(tpte); tpte = (tpte | bis) & ~bic; setpgt4m(ptep, tpte); + + /* Update PV_C4M flag if required */ + /* + * XXX - this is incorrect. The PV_C4M means that _this_ + * mapping should be kept uncached. This way we + * effectively uncache this pa until all mappings + * to it are gone (see also the XXX in pv_link4m and + * pv_unlink4m). + */ + if (bis & SRMMU_PG_C) + pv->pv_flags |= PV_C4M; + if (bic & SRMMU_PG_C) + pv->pv_flags &= ~PV_C4M; } setcontext4m(ctx); splx(s); @@ -2225,7 +2297,7 @@ pv_syncflags4m(pv0) write_user_windows(); /* paranoid? */ - s = splvm(); /* paranoid? */ + s = splpmap(); /* paranoid? */ if (pv0->pv_pmap == NULL) { /* paranoid */ splx(s); return (0); @@ -2309,7 +2381,7 @@ pv_unlink4m(pv, pm, va) pv->pv_va = npv->pv_va; pv->pv_flags &= ~PV_C4M; pv->pv_flags |= (npv->pv_flags & PV_C4M); - pool_put(&pvpool, npv); + pvfree(npv); } else { /* * No mappings left; we still need to maintain @@ -2333,7 +2405,7 @@ pv_unlink4m(pv, pm, va) break; } prev->pv_next = npv->pv_next; - pool_put(&pvpool, npv); + pvfree(npv); } if ((pv->pv_flags & (PV_C4M|PV_ANC)) == (PV_C4M|PV_ANC)) { /* @@ -2367,11 +2439,12 @@ pv_link4m(pv, pm, va, nc) vaddr_t va; int nc; { - struct pvlist *npv; + struct pvlist *npv, *mpv; int ret; ret = nc ? SRMMU_PG_C : 0; +retry: if (pv->pv_pmap == NULL) { /* no pvlist entries yet */ pmap_stats.ps_enter_firstpv++; @@ -2386,6 +2459,21 @@ pv_link4m(pv, pm, va, nc) } /* + * We do the malloc early so that we catch all changes that happen + * during the (possible) sleep. + */ + mpv = pvalloc(); + if (pv->pv_pmap == NULL) { + /* + * XXX - remove this printf some day when we know that + * can/can't happen. + */ + printf("pv_link4m: pv changed during sleep!\n"); + pvfree(mpv); + goto retry; + } + + /* * Before entering the new mapping, see if * it will cause old mappings to become aliased * and thus need to be `discached'. @@ -2423,14 +2511,11 @@ pv_link4m(pv, pm, va, nc) } } - npv = pool_get(&pvpool, PR_NOWAIT); - if (npv == NULL) - panic("pvpool exhausted"); - npv->pv_next = pv->pv_next; - npv->pv_pmap = pm; - npv->pv_va = va; - npv->pv_flags = nc ? 0 : PV_C4M; - pv->pv_next = npv; + mpv->pv_next = pv->pv_next; + mpv->pv_pmap = pm; + mpv->pv_va = va; + mpv->pv_flags = nc ? 0 : PV_C4M; + pv->pv_next = mpv; return (ret); } #endif @@ -2448,7 +2533,7 @@ pv_flushcache(pv) write_user_windows(); /* paranoia? */ - s = splvm(); /* XXX extreme paranoia */ + s = splpmap(); /* XXX extreme paranoia */ if ((pm = pv->pv_pmap) != NULL) { ctx = getcontext(); for (;;) { @@ -3148,32 +3233,11 @@ pmap_bootstrap4m(void) cpuinfo.ctx_tbl[i] = cpuinfo.ctx_tbl[0]; #endif - if ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0) { - /* - * The page tables have been setup. Since we're still - * running on the PROM's memory map, the memory we - * allocated for our page tables might still be cached. - * Flush it now, and don't touch it again until we - * switch to our own tables (will be done immediately below). - */ - int size = pagetables_end - pagetables_start; - - if (CACHEINFO.c_vactype != VAC_NONE) { - int va = (vaddr_t)pagetables_start; - while (size != 0) { - cache_flush_page(va); - va += NBPG; - size -= NBPG; - } - } else if (cpuinfo.pcache_flush_page != NULL) { - int pa = pagetables_start; - while (size != 0) { - pcache_flush_page(pa, 0); - pa += NBPG; - size -= NBPG; - } - } - } + if ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) == 0) + /* Flush page tables from cache */ + pcache_flush((caddr_t)pagetables_start, + (caddr_t)VA2PA((caddr_t)pagetables_start), + pagetables_end - pagetables_start); /* * Now switch to kernel pagetables (finally!) @@ -3209,6 +3273,66 @@ mmu_install_tables(sc) #endif } +/* + * Allocate per-CPU page tables. + * Note: this routine is called in the context of the boot CPU + * during autoconfig. + */ +void +pmap_alloc_cpu(sc) + struct cpu_softc *sc; +{ + caddr_t cpustore; + int *ctxtable; + int *regtable; + int *segtable; + int *pagtable; + int vr, vs, vpg; + struct regmap *rp; + struct segmap *sp; + + /* Allocate properly aligned and physically contiguous memory here */ + cpustore = 0; + ctxtable = 0; + regtable = 0; + segtable = 0; + pagtable = 0; + + vr = VA_VREG(CPUINFO_VA); + vs = VA_VSEG(CPUINFO_VA); + vpg = VA_VPG(CPUINFO_VA); + rp = &pmap_kernel()->pm_regmap[vr]; + sp = &rp->rg_segmap[vs]; + + /* + * Copy page tables, then modify entry for CPUINFO_VA so that + * it points at the per-CPU pages. + */ + bcopy(cpuinfo.L1_ptps, regtable, SRMMU_L1SIZE * sizeof(int)); + regtable[vr] = + (VA2PA((caddr_t)segtable) >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD; + + bcopy(rp->rg_seg_ptps, segtable, SRMMU_L2SIZE * sizeof(int)); + segtable[vs] = + (VA2PA((caddr_t)pagtable) >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD; + + bcopy(sp->sg_pte, pagtable, SRMMU_L3SIZE * sizeof(int)); + pagtable[vpg] = + (VA2PA((caddr_t)cpustore) >> SRMMU_PPNPASHIFT) | + (SRMMU_TEPTE | PPROT_RWX_RWX | SRMMU_PG_C); + + /* Install L1 table in context 0 */ + ctxtable[0] = ((u_int)regtable >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD; + + sc->ctx_tbl = ctxtable; + sc->L1_ptps = regtable; + +#if 0 + if ((sc->flags & CPUFLG_CACHEPAGETABLES) == 0) { + kvm_uncache((caddr_t)0, 1); + } +#endif +} #endif /* defined sun4m */ @@ -3265,6 +3389,35 @@ pmap_init() } /* + * Called just after enabling cache (so that CPUFLG_CACHEPAGETABLES is + * set correctly). + */ +void +pmap_cache_enable() +{ +#ifdef SUN4M + if (CPU_ISSUN4M) { + int pte; + + /* + * Deal with changed CPUFLG_CACHEPAGETABLES. + * + * If the tables were uncached during the initial mapping + * and cache_enable set the flag we recache the tables. + */ + + pte = getpte4m(pagetables_start); + + if ((cpuinfo.flags & CPUFLG_CACHEPAGETABLES) != 0 && + (pte & SRMMU_PG_C) == 0) + kvm_recache((caddr_t)pagetables_start, + atop(pagetables_end - pagetables_start)); + } +#endif +} + + +/* * Map physical addresses into kernel VM. */ vaddr_t @@ -3280,7 +3433,6 @@ pmap_map(va, pa, endpa, prot) va += pgsize; pa += pgsize; } - pmap_update(pmap_kernel()); return (va); } @@ -3409,7 +3561,7 @@ pmap_release(pm) struct pmap *pm; { union ctxinfo *c; - int s = splvm(); /* paranoia */ + int s = splpmap(); /* paranoia */ #ifdef DEBUG if (pmapdebug & PDB_DESTROY) @@ -3527,7 +3679,7 @@ pmap_remove(pm, va, endva) } ctx = getcontext(); - s = splvm(); /* XXX conservative */ + s = splpmap(); /* XXX conservative */ simple_lock(&pm->pm_lock); for (; va < endva; va = nva) { /* do one virtual segment at a time */ @@ -4074,7 +4226,7 @@ pmap_page_protect4_4c(pg, prot) * The logic is much like that for pmap_remove, * but we know we are removing exactly one page. */ - s = splvm(); + s = splpmap(); if ((pm = pv->pv_pmap) == NULL) { splx(s); return; @@ -4202,7 +4354,7 @@ pmap_page_protect4_4c(pg, prot) nextpv: npv = pv->pv_next; if (pv != pv0) - pool_put(&pvpool, pv); + pvfree(pv); if ((pv = npv) == NULL) break; } @@ -4243,7 +4395,7 @@ pmap_protect4_4c(pm, sva, eva, prot) write_user_windows(); ctx = getcontext4(); - s = splvm(); + s = splpmap(); simple_lock(&pm->pm_lock); for (va = sva; va < eva;) { @@ -4352,7 +4504,7 @@ pmap_changeprot4_4c(pm, va, prot, wired) newprot = prot & VM_PROT_WRITE ? PG_W : 0; vr = VA_VREG(va); vs = VA_VSEG(va); - s = splvm(); /* conservative */ + s = splpmap(); /* conservative */ rp = &pm->pm_regmap[vr]; if (rp->rg_nsegmap == 0) { printf("pmap_changeprot: no segments in %d\n", vr); @@ -4472,7 +4624,7 @@ pmap_page_protect4m(pg, prot) * The logic is much like that for pmap_remove, * but we know we are removing exactly one page. */ - s = splvm(); + s = splpmap(); if ((pm = pv->pv_pmap) == NULL) { splx(s); return; @@ -4535,7 +4687,7 @@ pmap_page_protect4m(pg, prot) npv = pv->pv_next; if (pv != pv0) - pool_put(&pvpool, pv); + pvfree(pv); pv = npv; } pv0->pv_pmap = NULL; @@ -4575,7 +4727,7 @@ pmap_protect4m(pm, sva, eva, prot) write_user_windows(); ctx = getcontext4m(); - s = splvm(); + s = splpmap(); simple_lock(&pm->pm_lock); for (va = sva; va < eva;) { @@ -4665,7 +4817,7 @@ pmap_changeprot4m(pm, va, prot, wired) pmap_stats.ps_changeprots++; - s = splvm(); /* conservative */ + s = splpmap(); /* conservative */ ptep = getptep4m(pm, va); if (pm->pm_ctx) { ctx = getcontext4m(); @@ -4786,7 +4938,7 @@ pmap_enk4_4c(pm, va, prot, flags, pv, pteproto) vs = VA_VSEG(va); rp = &pm->pm_regmap[vr]; sp = &rp->rg_segmap[vs]; - s = splvm(); /* XXX way too conservative */ + s = splpmap(); /* XXX way too conservative */ #if defined(SUN4_MMU3L) if (HASSUN4_MMU3L && rp->rg_smeg == reginval) { @@ -4803,7 +4955,7 @@ pmap_enk4_4c(pm, va, prot, flags, pv, pteproto) for (i = 0; i < NSEGRG; i++) { setsegmap(tva, rp->rg_segmap[i].sg_pmeg); tva += NBPSG; - } + }; } #endif if (sp->sg_pmeg != seginval && (tpte = getpte4(va)) & PG_V) { @@ -4909,7 +5061,7 @@ pmap_enu4_4c(pm, va, prot, flags, pv, pteproto) vr = VA_VREG(va); vs = VA_VSEG(va); rp = &pm->pm_regmap[vr]; - s = splvm(); /* XXX conservative */ + s = splpmap(); /* XXX conservative */ /* * If there is no space in which the PTEs can be written @@ -5048,200 +5200,26 @@ pmap_enu4_4c(pm, va, prot, flags, pv, pteproto) splx(s); - return (0); + return (KERN_SUCCESS); } void -pmap_kenter_pa4_4c(vaddr_t va, paddr_t pa, vm_prot_t prot) +pmap_kenter_pa4_4c(va, pa, prot) + vaddr_t va; + paddr_t pa; + vm_prot_t prot; { - struct pmap *pm = pmap_kernel(); - struct regmap *rp; - struct segmap *sp; - int vr, vs, i, s; - int pteproto, ctx; - - pteproto = PG_V | PMAP_T2PTE_4(pa); - pa &= ~PMAP_TNC_4; - pteproto |= atop(pa) & PG_PFNUM; - if (prot & VM_PROT_WRITE) - pteproto |= PG_W; - - vr = VA_VREG(va); - vs = VA_VSEG(va); - rp = &pm->pm_regmap[vr]; - sp = &rp->rg_segmap[vs]; - - ctx = getcontext4(); - s = splvm(); -#if defined(SUN4_MMU3L) - if (HASSUN4_MMU3L && rp->rg_smeg == reginval) { - vaddr_t tva; - rp->rg_smeg = region_alloc(®ion_locked, pm, vr)->me_cookie; - i = ncontext - 1; - do { - setcontext4(i); - setregmap(va, rp->rg_smeg); - } while (--i >= 0); - - /* set all PTEs to invalid, then overwrite one PTE below */ - tva = VA_ROUNDDOWNTOREG(va); - for (i = 0; i < NSEGRG; i++) { - setsegmap(tva, rp->rg_segmap[i].sg_pmeg); - tva += NBPSG; - } - } -#endif - KASSERT(sp->sg_pmeg == seginval || (getpte4(va) & PG_V) == 0); - if (sp->sg_pmeg == seginval) { - int tva; - - /* - * Allocate an MMU entry now (on locked list), - * and map it into every context. Set all its - * PTEs invalid (we will then overwrite one, but - * this is more efficient than looping twice). - */ - - sp->sg_pmeg = me_alloc(&segm_locked, pm, vr, vs)->me_cookie; - rp->rg_nsegmap++; - -#if defined(SUN4_MMU3L) - if (HASSUN4_MMU3L) - setsegmap(va, sp->sg_pmeg); - else -#endif - { - i = ncontext - 1; - do { - setcontext4(i); - setsegmap(va, sp->sg_pmeg); - } while (--i >= 0); - } - - /* set all PTEs to invalid, then overwrite one PTE below */ - tva = VA_ROUNDDOWNTOSEG(va); - i = NPTESG; - do { - setpte4(tva, 0); - tva += NBPG; - } while (--i > 0); - } - - /* ptes kept in hardware only */ - setpte4(va, pteproto); - sp->sg_npte++; - splx(s); - setcontext4(ctx); + pmap_enter4_4c(pmap_kernel(), va, pa, prot, PMAP_WIRED); } void -pmap_kremove4_4c(vaddr_t va, vsize_t len) +pmap_kremove4_4c(va, len) + vaddr_t va; + vsize_t len; { - struct pmap *pm = pmap_kernel(); - struct regmap *rp; - struct segmap *sp; - vaddr_t nva, endva; - int i, tpte, perpage, npg; - int nleft, pmeg; - int vr, vs, s, ctx; - - endva = va + len; -#ifdef DEBUG - if (pmapdebug & PDB_REMOVE) - printf("pmap_kremove(0x%lx, 0x%lx)\n", va, endva); -#endif - - s = splvm(); - ctx = getcontext(); - simple_lock(&pm->pm_lock); - for (; va < endva; va = nva) { - /* do one virtual segment at a time */ - vr = VA_VREG(va); - vs = VA_VSEG(va); - nva = VSTOVA(vr, vs + 1); - if (nva == 0 || nva > endva) - nva = endva; - - rp = &pm->pm_regmap[vr]; - sp = &rp->rg_segmap[vs]; - - if (rp->rg_nsegmap == 0) - continue; - nleft = sp->sg_npte; - if (nleft == 0) - continue; - pmeg = sp->sg_pmeg; - KASSERT(pmeg != seginval); - setcontext4(0); - /* decide how to flush cache */ - npg = (endva - va) >> PGSHIFT; - if (npg > PMAP_RMK_MAGIC) { - /* flush the whole segment */ - perpage = 0; - cache_flush_segment(vr, vs); - } else { - /* - * flush each page individually; - * some never need flushing - */ - perpage = (CACHEINFO.c_vactype != VAC_NONE); - } - while (va < nva) { - tpte = getpte4(va); - if ((tpte & PG_V) == 0) { - va += NBPG; - continue; - } - if ((tpte & PG_TYPE) == PG_OBMEM) { - /* if cacheable, flush page as needed */ - if (perpage && (tpte & PG_NC) == 0) - cache_flush_page(va); - } - nleft--; -#ifdef DIAGNOSTIC - if (nleft < 0) - panic("pmap_kremove: too many PTEs in segment; " - "va 0x%lx; endva 0x%lx", va, endva); -#endif - setpte4(va, 0); - va += NBPG; - } - - /* - * If the segment is all gone, remove it from everyone and - * free the MMU entry. - */ - - sp->sg_npte = nleft; - if (nleft == 0) { - va = VSTOVA(vr, vs); -#if defined(SUN4_MMU3L) - if (HASSUN4_MMU3L) - setsegmap(va, seginval); - else -#endif - for (i = ncontext; --i >= 0;) { - setcontext4(i); - setsegmap(va, seginval); - } - me_free(pm, pmeg); - if (--rp->rg_nsegmap == 0) { -#if defined(SUN4_MMU3L) - if (HASSUN4_MMU3L) { - for (i = ncontext; --i >= 0;) { - setcontext4(i); - setregmap(va, reginval); - } - /* note: context is 0 */ - region_free(pm, rp->rg_smeg); - } -#endif - } - } + for (len >>= PAGE_SHIFT; len > 0; len--, va += PAGE_SIZE) { + pmap_remove(pmap_kernel(), va, va + PAGE_SIZE); } - simple_unlock(&pm->pm_lock); - setcontext(ctx); - splx(s); } #endif /*sun4,4c*/ @@ -5350,7 +5328,7 @@ pmap_enk4m(pm, va, prot, flags, pv, pteproto) rp = &pm->pm_regmap[VA_VREG(va)]; sp = &rp->rg_segmap[VA_VSEG(va)]; - s = splvm(); /* XXX way too conservative */ + s = splpmap(); /* XXX way too conservative */ #ifdef DEBUG if (rp->rg_seg_ptps == NULL) /* enter new region */ @@ -5538,102 +5516,41 @@ pmap_enu4m(pm, va, prot, flags, pv, pteproto) splx(s); - return (0); + return (KERN_SUCCESS); } void -pmap_kenter_pa4m(vaddr_t va, paddr_t pa, vm_prot_t prot) +pmap_kenter_pa4m(va, pa, prot) + vaddr_t va; + paddr_t pa; + vm_prot_t prot; { - int pteproto; - struct regmap *rp; - struct segmap *sp; - int *ptep; + struct pvlist *pv; + int pteproto, ctx; pteproto = ((pa & PMAP_NC) == 0 ? SRMMU_PG_C : 0) | - PMAP_T2PTE_SRMMU(pa) | SRMMU_TEPTE | - ((prot & VM_PROT_WRITE) ? PPROT_N_RWX : PPROT_N_RX) | - (atop((pa & ~PMAP_TNC_SRMMU)) << SRMMU_PPNSHIFT); - rp = &pmap_kernel()->pm_regmap[VA_VREG(va)]; - sp = &rp->rg_segmap[VA_VSEG(va)]; - ptep = &sp->sg_pte[VA_SUN4M_VPG(va)]; -#ifdef DIAGNOSTIC - if ((*ptep & SRMMU_TETYPE) == SRMMU_TEPTE) - panic("pmap_kenter_pa4m: mapping exists"); -#endif - sp->sg_npte++; - setpgt4m(ptep, pteproto); -} + PMAP_T2PTE_SRMMU(pa) | SRMMU_TEPTE | + ((prot & VM_PROT_WRITE) ? PPROT_N_RWX : PPROT_N_RX); -void -pmap_kremove4m(vaddr_t va, vsize_t len) -{ - struct pmap *pm = pmap_kernel(); - struct regmap *rp; - struct segmap *sp; - vaddr_t endva, nva; - int vr, vs, ctx; - int tpte, perpage, npg; - int nleft; - - endva = va + len; - ctx = getcontext(); - for (; va < endva; va = nva) { - /* do one virtual segment at a time */ - vr = VA_VREG(va); - vs = VA_VSEG(va); - nva = VSTOVA(vr, vs + 1); - if (nva == 0 || nva > endva) { - nva = endva; - } - - rp = &pm->pm_regmap[vr]; - if (rp->rg_nsegmap == 0) { - continue; - } - - sp = &rp->rg_segmap[vs]; - nleft = sp->sg_npte; - if (nleft == 0) { - continue; - } + pa &= ~PMAP_TNC_SRMMU; - setcontext4m(0); - /* decide how to flush cache */ - npg = (nva - va) >> PGSHIFT; - if (npg > PMAP_RMK_MAGIC) { - /* flush the whole segment */ - perpage = 0; - if (CACHEINFO.c_vactype != VAC_NONE) { - cache_flush_segment(vr, vs); - } - } else { + pteproto |= atop(pa) << SRMMU_PPNSHIFT; - /* - * flush each page individually; - * some never need flushing - */ + pv = pvhead(atop(pa)); - perpage = (CACHEINFO.c_vactype != VAC_NONE); - } - for (; va < nva; va += NBPG) { - tpte = sp->sg_pte[VA_SUN4M_VPG(va)]; - if ((tpte & SRMMU_TETYPE) != SRMMU_TEPTE) { - continue; - } - if ((tpte & SRMMU_PGTYPE) == PG_SUN4M_OBMEM) { - /* if cacheable, flush page as needed */ - if (perpage && (tpte & SRMMU_PG_C)) - cache_flush_page(va); - } - setpgt4m(&sp->sg_pte[VA_SUN4M_VPG(va)], - SRMMU_TEINVALID); - nleft--; - } - sp->sg_npte = nleft; - } + ctx = getcontext4m(); + pmap_enk4m(pmap_kernel(), va, prot, TRUE, pv, pteproto); setcontext(ctx); } +void +pmap_kremove4m(va, len) + vaddr_t va; + vsize_t len; +{ + pmap_remove(pmap_kernel(), va, va + len); +} + #endif /* sun4m */ /* @@ -5792,24 +5709,6 @@ pmap_extract4m(pm, va, pa) #endif /* sun4m */ /* - * Copy the range specified by src_addr/len - * from the source map to the range dst_addr/len - * in the destination map. - * - * This routine is only advisory and need not do anything. - */ -/* ARGSUSED */ -int pmap_copy_disabled=0; -void -pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) - struct pmap *dst_pmap, *src_pmap; - vaddr_t dst_addr; - vsize_t len; - vaddr_t src_addr; -{ -} - -/* * Garbage collects the physical map system for * pages which are no longer used. * Success need not be guaranteed -- that is, there @@ -5995,15 +5894,14 @@ pmap_is_referenced4m(pg) #if defined(SUN4) || defined(SUN4C) void -pmap_zero_page4_4c(paddr_t pa) +pmap_zero_page4_4c(pa) + paddr_t pa; { - struct pvlist *pv; caddr_t va; - u_int pfn; int pte; + struct pvlist *pv; - pfn = atop(pa); - if (pmap_initialized && (pv = pvhead(pfn)) != NULL) { + if (pmap_initialized && (pv = pvhead(atop(pa))) != NULL) { /* * The following might not be necessary since the page * is being cleared because it is about to be allocated, @@ -6011,7 +5909,7 @@ pmap_zero_page4_4c(paddr_t pa) */ pv_flushcache(pv); } - pte = PG_V | PG_S | PG_W | PG_NC | (pfn & PG_PFNUM); + pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM); va = vpage[0]; setpte4(va, pte); @@ -6029,22 +5927,24 @@ pmap_zero_page4_4c(paddr_t pa) * the processor. */ void -pmap_copy_page4_4c(paddr_t src, paddr_t dst) +pmap_copy_page4_4c(src, dst) + paddr_t src, dst; { - struct pvlist *pv; caddr_t sva, dva; int spte, dpte; - u_int pfn; + struct pvlist *pv; - pfn = atop(src); - if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype == VAC_WRITEBACK) + pv = pvhead(atop(src)); + if (pv && CACHEINFO.c_vactype == VAC_WRITEBACK) pv_flushcache(pv); - spte = PG_V | PG_S | (pfn & PG_PFNUM); - pfn = atop(dst); - if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype != VAC_NONE) + spte = PG_V | PG_S | (atop(src) & PG_PFNUM); + + pv = pvhead(atop(dst)); + if (pv && CACHEINFO.c_vactype != VAC_NONE) pv_flushcache(pv); - dpte = PG_V | PG_S | PG_W | PG_NC | (pfn & PG_PFNUM); + + dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM); sva = vpage[0]; dva = vpage[1]; @@ -6065,26 +5965,29 @@ pmap_copy_page4_4c(paddr_t src, paddr_t dst) * XXX might be faster to use destination's context and allow cache to fill? */ void -pmap_zero_page4m(paddr_t pa) +pmap_zero_page4m(pa) + paddr_t pa; { + int pte; struct pvlist *pv; static int *ptep; static vaddr_t va; - u_int pfn; - int pte; if (ptep == NULL) ptep = getptep4m(pmap_kernel(), (va = (vaddr_t)vpage[0])); - pfn = atop(pa); - if (pmap_initialized && (pv = pvhead(pfn)) != NULL) { - if (CACHEINFO.c_vactype != VAC_NONE) - pv_flushcache(pv); - else - pcache_flush_page(pa, 1); + if (pmap_initialized && (pv = pvhead(atop(pa))) != NULL && + CACHEINFO.c_vactype != VAC_NONE) { + /* + * The following might not be necessary since the page + * is being cleared because it is about to be allocated, + * i.e., is in use by no one. + */ + pv_flushcache(pv); } - pte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE | (pfn << SRMMU_PPNSHIFT)); + pte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE | + (atop(pa) << SRMMU_PPNSHIFT)); if (cpuinfo.flags & CPUFLG_CACHE_MANDATORY) pte |= SRMMU_PG_C; else @@ -6107,29 +6010,32 @@ pmap_zero_page4m(paddr_t pa) * the processor. */ void -pmap_copy_page4m(paddr_t src, paddr_t dst) +pmap_copy_page4m(src, dst) + paddr_t src, dst; { int spte, dpte; struct pvlist *pv; static int *sptep, *dptep; static vaddr_t sva, dva; - u_int pfn; if (sptep == NULL) { sptep = getptep4m(pmap_kernel(), (sva = (vaddr_t)vpage[0])); dptep = getptep4m(pmap_kernel(), (dva = (vaddr_t)vpage[1])); } - pfn = atop(src); - if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype == VAC_WRITEBACK) + pv = pvhead(atop(src)); + if (pv && CACHEINFO.c_vactype == VAC_WRITEBACK) pv_flushcache(pv); - spte = SRMMU_TEPTE | SRMMU_PG_C | PPROT_S | (pfn << SRMMU_PPNSHIFT); - pfn = atop(dst); - if ((pv = pvhead(pfn)) != NULL && CACHEINFO.c_vactype != VAC_NONE) + spte = SRMMU_TEPTE | SRMMU_PG_C | PPROT_S | + (atop(src) << SRMMU_PPNSHIFT); + + pv = pvhead(atop(dst)); + if (pv && CACHEINFO.c_vactype != VAC_NONE) pv_flushcache(pv); - dpte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE | (pfn << SRMMU_PPNSHIFT)); + dpte = (SRMMU_TEPTE | PPROT_S | PPROT_WRITE | + (atop(dst) << SRMMU_PPNSHIFT)); if (cpuinfo.flags & CPUFLG_CACHE_MANDATORY) dpte |= SRMMU_PG_C; else @@ -6154,8 +6060,10 @@ pmap_copy_page4m(paddr_t src, paddr_t dst) * elsewhere, or even not at all */ paddr_t -pmap_phys_address(int x) +pmap_phys_address(x) + int x; { + return (x); } @@ -6164,31 +6072,70 @@ pmap_phys_address(int x) * * We just assert PG_NC for each PTE; the addresses must reside * in locked kernel space. A cache flush is also done. - * Please do not use this function in new code. - * Doesn't work on sun4m, nor for pages with multiple mappings. */ void -kvm_uncache(caddr_t va, int npages) +kvm_setcache(va, npages, cached) + caddr_t va; + int npages; + int cached; { -#if defined(SUN4) || defined(SUN4C) int pte; struct pvlist *pv; -#endif if (CPU_ISSUN4M) { - panic("kvm_uncache on 4m"); +#if defined(SUN4M) + int ctx = getcontext4m(); + + setcontext4m(0); + for (; --npages >= 0; va += NBPG) { + int *ptep; + + ptep = getptep4m(pmap_kernel(), (vaddr_t)va); + pte = *ptep; +#ifdef DIAGNOSTIC + if ((pte & SRMMU_TETYPE) != SRMMU_TEPTE) + panic("kvm_uncache: table entry not pte"); +#endif + pv = pvhead((pte & SRMMU_PPNMASK) >> SRMMU_PPNSHIFT); + if (pv) { + if (cached) + pv_changepte4m(pv, SRMMU_PG_C, 0); + else + pv_changepte4m(pv, 0, SRMMU_PG_C); + } + if (cached) + pte |= SRMMU_PG_C; + else + pte &= ~SRMMU_PG_C; + tlb_flush_page((vaddr_t)va); + setpgt4m(ptep, pte); + + if ((pte & SRMMU_PGTYPE) == PG_SUN4M_OBMEM) + cache_flush_page((int)va); + + } + setcontext4m(ctx); + +#endif } else { #if defined(SUN4) || defined(SUN4C) for (; --npages >= 0; va += NBPG) { pte = getpte4(va); -#ifdef DIAGNOSTIC if ((pte & PG_V) == 0) panic("kvm_uncache !pg_v"); -#endif - if ((pv = pvhead(pte & PG_PFNUM)) != NULL) { - pv_changepte4_4c(pv, PG_NC, 0); + + pv = pvhead(pte & PG_PFNUM); + /* XXX - we probably don't need to check for OBMEM */ + if ((pte & PG_TYPE) == PG_OBMEM && pv) { + if (cached) + pv_changepte4_4c(pv, 0, PG_NC); + else + pv_changepte4_4c(pv, PG_NC, 0); } - pte |= PG_NC; + if (cached) + pte &= ~PG_NC; + else + pte |= PG_NC; setpte4(va, pte); if ((pte & PG_TYPE) == PG_OBMEM) cache_flush_page((int)va); @@ -6197,8 +6144,36 @@ kvm_uncache(caddr_t va, int npages) } } +/* + * Turn on IO cache for a given (va, number of pages). + * + * We just assert PG_NC for each PTE; the addresses must reside + * in locked kernel space. A cache flush is also done. + */ +void +kvm_iocache(va, npages) + caddr_t va; + int npages; +{ + +#ifdef SUN4M + if (CPU_ISSUN4M) /* %%%: Implement! */ + panic("kvm_iocache: 4m iocache not implemented"); +#endif +#if defined(SUN4) || defined(SUN4C) + for (; --npages >= 0; va += NBPG) { + int pte = getpte4(va); + if ((pte & PG_V) == 0) + panic("kvm_iocache !pg_v"); + pte |= PG_IOC; + setpte4(va, pte); + } +#endif +} + int -pmap_count_ptes(struct pmap *pm) +pmap_count_ptes(pm) + struct pmap *pm; { int idx, total; struct regmap *rp; @@ -6224,7 +6199,9 @@ pmap_count_ptes(struct pmap *pm) * (This will just seg-align mappings.) */ void -pmap_prefer(vaddr_t foff, vaddr_t *vap) +pmap_prefer(foff, vap) + vaddr_t foff; + vaddr_t *vap; { vaddr_t va = *vap; long d, m; @@ -6242,7 +6219,7 @@ pmap_prefer(vaddr_t foff, vaddr_t *vap) } void -pmap_redzone(void) +pmap_redzone() { #if defined(SUN4M) if (CPU_ISSUN4M) { @@ -6263,7 +6240,8 @@ pmap_redzone(void) * process is the current process, load the new MMU context. */ void -pmap_activate(struct proc *p) +pmap_activate(p) + struct proc *p; { pmap_t pmap = p->p_vmspace->vm_map.pmap; int s; @@ -6275,7 +6253,7 @@ pmap_activate(struct proc *p) * the new context. */ - s = splvm(); + s = splpmap(); if (p == curproc) { write_user_windows(); if (pmap->pm_ctx == NULL) { @@ -6293,7 +6271,8 @@ pmap_activate(struct proc *p) * Deactivate the address space of the specified process. */ void -pmap_deactivate(struct proc *p) +pmap_deactivate(p) + struct proc *p; { } @@ -6302,7 +6281,9 @@ pmap_deactivate(struct proc *p) * Check consistency of a pmap (time consuming!). */ void -pm_check(char *s, struct pmap *pm) +pm_check(s, pm) + char *s; + struct pmap *pm; { if (pm == pmap_kernel()) pm_check_k(s, pm); @@ -6311,7 +6292,9 @@ pm_check(char *s, struct pmap *pm) } void -pm_check_u(char *s, struct pmap *pm) +pm_check_u(s, pm) + char *s; + struct pmap *pm; { struct regmap *rp; struct segmap *sp; @@ -6397,7 +6380,9 @@ pm_check_u(char *s, struct pmap *pm) } void -pm_check_k(char *s, struct pmap *pm) /* Note: not as extensive as pm_check_u. */ +pm_check_k(s, pm) /* Note: not as extensive as pm_check_u. */ + char *s; + struct pmap *pm; { struct regmap *rp; int vr, vs, n; @@ -6457,7 +6442,7 @@ pm_check_k(char *s, struct pmap *pm) /* Note: not as extensive as pm_check_u. */ * The last page or two contains stuff so libkvm can bootstrap. */ int -pmap_dumpsize(void) +pmap_dumpsize() { long sz; @@ -6476,15 +6461,17 @@ pmap_dumpsize(void) * there is no in-core copy of kernel memory mappings on a 4/4c machine. */ int -pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno) +pmap_dumpmmu(dump, blkno) + daddr_t blkno; + int (*dump) __P((dev_t, daddr_t, caddr_t, size_t)); { kcore_seg_t *ksegp; cpu_kcore_hdr_t *kcpup; phys_ram_seg_t memseg; int error = 0; int i, memsegoffset, pmegoffset; - int buffer[dbtob(1) / sizeof(int)]; - int *bp, *ep; + int buffer[dbtob(1) / sizeof(int)]; + int *bp, *ep; #if defined(SUN4C) || defined(SUN4) int pmeg; #endif @@ -6583,12 +6570,14 @@ out: * Helper function for debuggers. */ void -pmap_writetext(unsigned char *dst, int ch) +pmap_writetext(dst, ch) + unsigned char *dst; + int ch; { int s, pte0, pte, ctx; vaddr_t va; - s = splvm(); + s = splpmap(); va = (unsigned long)dst & (~PGOFSET); cpuinfo.cache_flush(dst, 1); @@ -6633,10 +6622,10 @@ pmap_writetext(unsigned char *dst, int ch) #ifdef EXTREME_DEBUG -static void test_region(int, int, int); +static void test_region __P((int, int, int)); void -debug_pagetables(void) +debug_pagetables() { int i; int *regtbl; @@ -6683,7 +6672,10 @@ debug_pagetables(void) } static u_int -VA2PAsw(int ctx, caddr_t addr, int *pte) +VA2PAsw(ctx, addr, pte) + int ctx; + caddr_t addr; + int *pte; { int *curtbl; int curpte; @@ -6750,8 +6742,9 @@ VA2PAsw(int ctx, caddr_t addr, int *pte) printf("Bizarreness with address 0x%x!\n",addr); } -void -test_region(int reg, int start, int stop) +void test_region(reg, start, stop) + int reg; + int start, stop; { int i; int addr; @@ -6789,8 +6782,7 @@ test_region(int reg, int start, int stop) } -void -print_fe_map(void) +void print_fe_map(void) { u_int i, pte; diff --git a/sys/arch/sparc/sparc/vm_machdep.c b/sys/arch/sparc/sparc/vm_machdep.c index fe0b1314cc1..e3492f7edae 100644 --- a/sys/arch/sparc/sparc/vm_machdep.c +++ b/sys/arch/sparc/sparc/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.40 2001/12/09 01:45:32 art Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.41 2001/12/19 08:58:05 art Exp $ */ /* $NetBSD: vm_machdep.c,v 1.30 1997/03/10 23:55:40 pk Exp $ */ /* @@ -115,87 +115,54 @@ extern int has_iocache; #endif caddr_t -dvma_malloc_space(size_t len, void *kaddr, int flags, int space) +dvma_malloc_space(len, kaddr, flags, space) + size_t len; + void *kaddr; + int flags; { - int waitok = (flags & M_NOWAIT) == 0; - vsize_t maplen, tmplen; vaddr_t kva; vaddr_t dva; - int s; len = round_page(len); - /* XXX - verify if maybe uvm_km_valloc from kernel_map would be ok. */ - s = splvm(); - kva = uvm_km_valloc(kmem_map, len); - splx(s); - if (kva == 0) + kva = (vaddr_t)malloc(len, M_DEVBUF, flags); + if (kva == NULL) return (NULL); - for (maplen = 0; maplen < len; maplen += PAGE_SIZE) { - struct vm_page *pg; - paddr_t pa; - -again: - pg = uvm_pagealloc(NULL, 0, NULL, 0); - if (pg == NULL) { - if (waitok) { - uvm_wait("dvmapg"); - goto again; - } - goto dropit; - } - pa = VM_PAGE_TO_PHYS(pg); #if defined(SUN4M) - if (!has_iocache) + if (!has_iocache) #endif - pa |= PMAP_NC; - pmap_kenter_pa(kva + maplen, pa, VM_PROT_ALL); - } - pmap_update(pmap_kernel()); + kvm_uncache((caddr_t)kva, atop(len)); *(vaddr_t *)kaddr = kva; - dva = dvma_mapin_space(kernel_map, kva, len, waitok ? 1 : 0, space); + dva = dvma_mapin_space(kernel_map, kva, len, (flags & M_NOWAIT) ? 0 : 1, space); if (dva == NULL) { - goto dropit; + free((void *)kva, M_DEVBUF); + return (NULL); } return (caddr_t)dva; -dropit: - for (tmplen = 0; tmplen < maplen; tmplen += PAGE_SIZE) { - paddr_t pa; - - if (pmap_extract(pmap_kernel(), kva + tmplen, &pa) == FALSE) - panic("dvma_malloc_space: pmap_extract"); - - pmap_kremove(kva + tmplen, PAGE_SIZE); - uvm_pagefree(PHYS_TO_VM_PAGE(pa)); - } - pmap_update(pmap_kernel()); - - uvm_km_free(kmem_map, kva, len); - - return (NULL); } void -dvma_free(caddr_t dva, size_t len, void *kaddr) +dvma_free(dva, len, kaddr) + caddr_t dva; + size_t len; + void *kaddr; { - size_t tmplen; vaddr_t kva = *(vaddr_t *)kaddr; len = round_page(len); dvma_mapout((vaddr_t)dva, kva, len); - for (tmplen = 0; tmplen < len; tmplen += PAGE_SIZE) { - paddr_t pa; - - if (pmap_extract(pmap_kernel(), kva + tmplen, &pa) == FALSE) - panic("dvma_malloc_space: pmap_extract"); - - pmap_kremove(kva + tmplen, PAGE_SIZE); - uvm_pagefree(PHYS_TO_VM_PAGE(pa)); - } - - uvm_km_free(kmem_map, kva, len); + /* + * Even if we're freeing memory here, we can't be sure that it will + * be unmapped, so we must recache the memory range to avoid impact + * on other kernel subsystems. + */ +#if defined(SUN4M) + if (!has_iocache) +#endif + kvm_recache(kaddr, atop(len)); + free((void *)kva, M_DEVBUF); } u_long dvma_cachealign = 0; diff --git a/sys/conf/files b/sys/conf/files index b7331c270ea..fe292932daa 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.233 2001/12/10 04:45:31 art Exp $ +# $OpenBSD: files,v 1.234 2001/12/19 08:58:05 art Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -533,7 +533,6 @@ file kern/vfs_syscalls.c file kern/vfs_vnops.c file kern/vnode_if.c file miscfs/deadfs/dead_vnops.c -file miscfs/genfs/genfs_vnops.c file miscfs/fdesc/fdesc_vfsops.c fdesc file miscfs/fdesc/fdesc_vnops.c fdesc file miscfs/fifofs/fifo_vnops.c fifo @@ -802,7 +801,6 @@ file xfs/xfs_syscalls-dummy.c !xfs file uvm/uvm_amap.c file uvm/uvm_anon.c file uvm/uvm_aobj.c -file uvm/uvm_bio.c file uvm/uvm_device.c file uvm/uvm_fault.c file uvm/uvm_glue.c diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c index 8a62beb64d3..58064eab3c2 100644 --- a/sys/dev/vnd.c +++ b/sys/dev/vnd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vnd.c,v 1.30 2001/12/07 00:11:14 niklas Exp $ */ +/* $OpenBSD: vnd.c,v 1.31 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ /* @@ -559,6 +559,10 @@ vndstrategy(bp) nbp->vb_buf.b_proc = bp->b_proc; nbp->vb_buf.b_iodone = vndiodone; nbp->vb_buf.b_vp = vp; + nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; + nbp->vb_buf.b_dirtyend = bp->b_dirtyend; + nbp->vb_buf.b_validoff = bp->b_validoff; + nbp->vb_buf.b_validend = bp->b_validend; LIST_INIT(&nbp->vb_buf.b_dep); /* save a reference to the old buffer */ diff --git a/sys/isofs/cd9660/cd9660_node.h b/sys/isofs/cd9660/cd9660_node.h index 31c112e6a16..a1b70cf4a79 100644 --- a/sys/isofs/cd9660/cd9660_node.h +++ b/sys/isofs/cd9660/cd9660_node.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_node.h,v 1.12 2001/12/10 18:49:51 art Exp $ */ +/* $OpenBSD: cd9660_node.h,v 1.13 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: cd9660_node.h,v 1.15 1997/04/11 21:52:01 kleink Exp $ */ /*- @@ -42,7 +42,6 @@ */ #include <sys/buf.h> -#include <miscfs/genfs/genfs.h> /* * Theoretically, directories can be more than 2Gb in length, @@ -77,7 +76,6 @@ struct iso_dnode { #endif struct iso_node { - struct genfs_node i_gnode; struct iso_node *i_next, **i_prev; /* hash chain */ struct vnode *i_vnode; /* vnode associated with this inode */ struct vnode *i_devvp; /* vnode for block I/O */ @@ -98,6 +96,7 @@ struct iso_node { long iso_start; /* actual start of data of file (may be different */ /* from iso_extent, if file has extended attributes) */ ISO_RRIP_INODE inode; + struct cluster_info i_ci; }; #define i_forw i_chain[0] diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c index 75eb9be50a5..f91662cc09f 100644 --- a/sys/isofs/cd9660/cd9660_vfsops.c +++ b/sys/isofs/cd9660/cd9660_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_vfsops.c,v 1.26 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: cd9660_vfsops.c,v 1.27 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: cd9660_vfsops.c,v 1.26 1997/06/13 15:38:58 pk Exp $ */ /*- @@ -80,10 +80,6 @@ struct vfsops cd9660_vfsops = { cd9660_check_export }; -struct genfs_ops cd9660_genfsops = { - genfs_size, -}; - /* * Called by vfs_mountroot when iso is going to be mounted as root. */ @@ -363,8 +359,6 @@ iso_mountfs(devvp, mp, p, argp) mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; - mp->mnt_dev_bshift = iso_bsize; - mp->mnt_fs_bshift = isomp->im_bshift; isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; @@ -943,8 +937,7 @@ retry: /* * XXX need generation number? */ - - genfs_node_init(vp, &cd9660_genfsops); + *vpp = vp; return (0); } diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c index f95775a9261..537878c775c 100644 --- a/sys/isofs/cd9660/cd9660_vnops.c +++ b/sys/isofs/cd9660/cd9660_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_vnops.c,v 1.19 2001/12/10 18:49:51 art Exp $ */ +/* $OpenBSD: cd9660_vnops.c,v 1.20 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: cd9660_vnops.c,v 1.42 1997/10/16 23:56:57 christos Exp $ */ /*- @@ -290,6 +290,16 @@ cd9660_getattr(v) return (0); } +#ifdef DEBUG +extern int doclusterread; +#else +#define doclusterread 1 +#endif + +/* XXX until cluster routines can handle block sizes less than one page */ +#define cd9660_doclusterread \ + (doclusterread && (ISO_DEFAULT_BLOCK_SIZE >= NBPG)) + /* * Vnode op for reading. */ @@ -304,40 +314,63 @@ cd9660_read(v) struct ucred *a_cred; } */ *ap = v; struct vnode *vp = ap->a_vp; - struct uio *uio = ap->a_uio; - struct iso_node *ip = VTOI(vp); - int error; + register struct uio *uio = ap->a_uio; + register struct iso_node *ip = VTOI(vp); + register struct iso_mnt *imp; + struct buf *bp; + daddr_t lbn, rablock; + off_t diff; + int rasize, error = 0; + long size, n, on; if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); - - if (vp->v_type != VREG) { - /* - * XXXART - maybe we should just panic? this is not possible - * unless vn_rdwr is called with VDIR and that's an error. - */ - return (EISDIR); - } - ip->i_flag |= IN_ACCESS; - - while (uio->uio_resid > 0) { - void *win; - vsize_t bytelen = MIN(ip->i_size - uio->uio_offset, - uio->uio_resid); - if (bytelen == 0) - break; - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, - UBC_READ); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) + imp = ip->i_mnt; + do { + lbn = lblkno(imp, uio->uio_offset); + on = blkoff(imp, uio->uio_offset); + n = min((u_int)(imp->logical_block_size - on), + uio->uio_resid); + diff = (off_t)ip->i_size - uio->uio_offset; + if (diff <= 0) + return (0); + if (diff < n) + n = diff; + size = blksize(imp, ip, lbn); + rablock = lbn + 1; + if (cd9660_doclusterread) { + if (lblktosize(imp, rablock) <= ip->i_size) + error = cluster_read(vp, &ip->i_ci, + (off_t)ip->i_size, lbn, size, NOCRED, &bp); + else + error = bread(vp, lbn, size, NOCRED, &bp); + } else { + if (ip->i_ci.ci_lastr + 1 == lbn && + lblktosize(imp, rablock) < ip->i_size) { + rasize = blksize(imp, ip, rablock); + error = breadn(vp, lbn, size, &rablock, + &rasize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, size, NOCRED, &bp); + } + ip->i_ci.ci_lastr = lbn; + n = min(n, size - bp->b_resid); + if (error) { + brelse(bp); return (error); - } + } - return (0); + error = uiomove(bp->b_data + on, (int)n, uio); + + if (n + on == imp->logical_block_size || + uio->uio_offset == (off_t)ip->i_size) + bp->b_flags |= B_AGE; + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + return (error); } /* ARGSUSED */ @@ -1012,9 +1045,7 @@ struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = { { &vop_pathconf_desc, cd9660_pathconf },/* pathconf */ { &vop_advlock_desc, cd9660_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, - { &vop_getpages_desc, genfs_getpages }, - { &vop_mmap_desc, cd9660_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc cd9660_vnodeop_opv_desc = { &cd9660_vnodeop_p, cd9660_vnodeop_entries }; @@ -1060,8 +1091,7 @@ struct vnodeopv_entry_desc cd9660_specop_entries[] = { { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ { &vop_advlock_desc, spec_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, - { &vop_mmap_desc, spec_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc cd9660_specop_opv_desc = { &cd9660_specop_p, cd9660_specop_entries }; @@ -1105,8 +1135,7 @@ struct vnodeopv_entry_desc cd9660_fifoop_entries[] = { { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ { &vop_advlock_desc, fifo_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, - { &vop_mmap_desc, fifo_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc cd9660_fifoop_opv_desc = { &cd9660_fifoop_p, cd9660_fifoop_entries }; diff --git a/sys/kern/exec_subr.c b/sys/kern/exec_subr.c index 1d816ded073..d1ba66f4a82 100644 --- a/sys/kern/exec_subr.c +++ b/sys/kern/exec_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: exec_subr.c,v 1.16 2001/11/28 13:47:39 art Exp $ */ +/* $OpenBSD: exec_subr.c,v 1.17 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: exec_subr.c,v 1.9 1994/12/04 03:10:42 mycroft Exp $ */ /* @@ -138,8 +138,14 @@ vmcmd_map_pagedvn(p, cmd) struct proc *p; struct exec_vmcmd *cmd; { + /* + * note that if you're going to map part of an process as being + * paged from a vnode, that vnode had damn well better be marked as + * VTEXT. that's handled in the routine which sets up the vmcmd to + * call this routine. + */ struct uvm_object *uobj; - int error; + int retval; /* * map the vnode in using uvm_map. @@ -161,22 +167,29 @@ vmcmd_map_pagedvn(p, cmd) uobj = uvn_attach((void *) cmd->ev_vp, VM_PROT_READ|VM_PROT_EXECUTE); if (uobj == NULL) return(ENOMEM); - VREF(cmd->ev_vp); /* * do the map */ - error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len, + retval = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len, uobj, cmd->ev_offset, 0, UVM_MAPFLAG(cmd->ev_prot, VM_PROT_ALL, UVM_INH_COPY, UVM_ADV_NORMAL, UVM_FLAG_COPYONW|UVM_FLAG_FIXED)); - if (error) { - uobj->pgops->pgo_detach(uobj); - } + /* + * check for error + */ + + if (retval == KERN_SUCCESS) + return(0); + + /* + * error: detach from object + */ - return(error); + uobj->pgops->pgo_detach(uobj); + return(EINVAL); } /* diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index d1eeb637489..eb68a253268 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_main.c,v 1.88 2001/12/04 23:22:42 art Exp $ */ +/* $OpenBSD: init_main.c,v 1.89 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */ /* @@ -221,8 +221,6 @@ main(framep) cpu_configure(); - ubc_init(); /* Initialize the unified buffer cache */ - /* Initialize sysctls (must be done before any processes run) */ sysctl_init(); @@ -297,7 +295,7 @@ main(framep) /* Allocate a prototype map so we have something to fork. */ uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS), - trunc_page(VM_MAX_ADDRESS)); + trunc_page(VM_MAX_ADDRESS), TRUE); p->p_vmspace = &vmspace0; p->p_addr = proc0paddr; /* XXX */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 9f621da43d2..6bec610b8e2 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exec.c,v 1.61 2001/11/27 05:27:11 art Exp $ */ +/* $OpenBSD: kern_exec.c,v 1.62 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */ /*- @@ -150,7 +150,6 @@ check_exec(p, epp) goto bad1; /* now we have the file, get the exec header */ - uvn_attach(vp, VM_PROT_READ); error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p); if (error) diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index aee61ff48d6..4584481ba7d 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_malloc.c,v 1.44 2001/12/05 17:49:06 art Exp $ */ +/* $OpenBSD: kern_malloc.c,v 1.45 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */ /* @@ -46,7 +46,7 @@ #include <uvm/uvm_extern.h> -static struct vm_map kmem_map_store; +static struct vm_map_intrsafe kmem_map_store; struct vm_map *kmem_map = NULL; #ifdef NKMEMCLUSTERS @@ -492,7 +492,7 @@ kmeminit() kmem_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&kmembase, (vaddr_t *)&kmemlimit, (vsize_t)(nkmempages * PAGE_SIZE), - VM_MAP_INTRSAFE, FALSE, &kmem_map_store); + VM_MAP_INTRSAFE, FALSE, &kmem_map_store.vmi_map); kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map, (vsize_t)(nkmempages * sizeof(struct kmemusage))); #ifdef KMEMSTATS diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c index 2022279f6d8..e24cde8096b 100644 --- a/sys/kern/spec_vnops.c +++ b/sys/kern/spec_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: spec_vnops.c,v 1.21 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: spec_vnops.c,v 1.22 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */ /* @@ -104,8 +104,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ { &vop_advlock_desc, spec_advlock }, /* advlock */ { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ - { &vop_mmap_desc, spec_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 817d7512b6c..d787fde7c5b 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sysv_shm.c,v 1.23 2001/11/28 13:47:39 art Exp $ */ +/* $OpenBSD: sysv_shm.c,v 1.24 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */ /* @@ -144,13 +144,15 @@ shm_delete_mapping(vm, shmmap_s) struct shmmap_state *shmmap_s; { struct shmid_ds *shmseg; - int segnum; + int segnum, result; size_t size; segnum = IPCID_TO_IX(shmmap_s->shmid); shmseg = &shmsegs[segnum]; size = round_page(shmseg->shm_segsz); - uvm_deallocate(&vm->vm_map, shmmap_s->va, size); + result = uvm_deallocate(&vm->vm_map, shmmap_s->va, size); + if (result != KERN_SUCCESS) + return EINVAL; shmmap_s->shmid = -1; shmseg->shm_dtime = time.tv_sec; if ((--shmseg->shm_nattch <= 0) && @@ -205,6 +207,7 @@ sys_shmat(p, v, retval) vaddr_t attach_va; vm_prot_t prot; vsize_t size; + int rv; shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; if (shmmap_s == NULL) { @@ -249,11 +252,11 @@ sys_shmat(p, v, retval) } shm_handle = shmseg->shm_internal; uao_reference(shm_handle->shm_object); - error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size, + rv = uvm_map(&p->p_vmspace->vm_map, &attach_va, size, shm_handle->shm_object, 0, 0, UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); - if (error) { - return error; + if (rv != KERN_SUCCESS) { + return ENOMEM; } shmmap_s->va = attach_va; diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 52be1533b43..22ef4dfb385 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_bio.c,v 1.54 2001/11/30 05:45:33 csapuntz Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.55 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ /*- @@ -327,6 +327,23 @@ breadn(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp) } /* + * Read with single-block read-ahead. Defined in Bach (p.55), but + * implemented as a call to breadn(). + * XXX for compatibility with old file systems. + */ +int +breada(vp, blkno, size, rablkno, rabsize, cred, bpp) + struct vnode *vp; + daddr_t blkno; int size; + daddr_t rablkno; int rabsize; + struct ucred *cred; + struct buf **bpp; +{ + + return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp)); +} + +/* * Block write. Described in Bach (p.56) */ int @@ -389,6 +406,7 @@ bwrite(bp) /* Initiate disk write. Make sure the appropriate party is charged. */ bp->b_vp->v_numoutput++; splx(s); + SET(bp->b_flags, B_WRITEINPROG); VOP_STRATEGY(bp); if (async) @@ -448,6 +466,7 @@ bdwrite(bp) } /* Otherwise, the "write" is done, so mark and release the buffer. */ + CLR(bp->b_flags, B_NEEDCOMMIT); SET(bp->b_flags, B_DONE); brelse(bp); } @@ -569,7 +588,6 @@ brelse(bp) /* Unlock the buffer. */ CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE | B_DEFERRED)); - SET(bp->b_flags, B_CACHE); /* Allow disk interrupts. */ splx(s); @@ -633,30 +651,44 @@ getblk(vp, blkno, size, slpflag, slptimeo) daddr_t blkno; int size, slpflag, slptimeo; { + struct bufhashhdr *bh; struct buf *bp, *nbp = NULL; int s, err; + /* + * XXX + * The following is an inlined version of 'incore()', but with + * the 'invalid' test moved to after the 'busy' test. It's + * necessary because there are some cases in which the NFS + * code sets B_INVAL prior to writing data to the server, but + * in which the buffers actually contain valid data. In this + * case, we can't allow the system to allocate a new buffer for + * the block until the write is finished. + */ + bh = BUFHASH(vp, blkno); start: - bp = incore(vp, blkno); - if (bp != NULL) { + bp = bh->lh_first; + for (; bp != NULL; bp = bp->b_hash.le_next) { + if (bp->b_lblkno != blkno || bp->b_vp != vp) + continue; + s = splbio(); if (ISSET(bp->b_flags, B_BUSY)) { SET(bp->b_flags, B_WANTED); err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk", slptimeo); splx(s); - if (err) { - if (nbp != NULL) { - SET(nbp->b_flags, B_AGE); - brelse(nbp); - } + if (err) return (NULL); - } goto start; } - SET(bp->b_flags, (B_BUSY | B_CACHE)); - bremfree(bp); + if (!ISSET(bp->b_flags, B_INVAL)) { + SET(bp->b_flags, (B_BUSY | B_CACHE)); + bremfree(bp); + splx(s); + break; + } splx(s); } @@ -665,7 +697,7 @@ start: goto start; } bp = nbp; - binshash(bp, BUFHASH(vp, blkno)); + binshash(bp, bh); bp->b_blkno = bp->b_lblkno = blkno; s = splbio(); bgetvp(vp, bp); @@ -868,6 +900,8 @@ start: bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; + bp->b_dirtyoff = bp->b_dirtyend = 0; + bp->b_validoff = bp->b_validend = 0; bremhash(bp); *bpp = bp; @@ -988,6 +1022,7 @@ biodone(bp) buf_complete(bp); if (!ISSET(bp->b_flags, B_READ)) { + CLR(bp->b_flags, B_WRITEINPROG); vwakeup(bp->b_vp); } @@ -1063,16 +1098,3 @@ vfs_bufstats() splx(s); } #endif /* DEBUG */ - -int -buf_cleanout(struct buf *bp) { - if (bp->b_vp != NULL) - brelvp(bp); - - if (bp->b_flags & B_WANTED) { - bp->b_flags &= ~B_WANTED; - wakeup(bp); - } - - return (0); -} diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 73414b79b42..d987b1a78ec 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1,9 +1,10 @@ -/* $OpenBSD: vfs_default.c,v 1.15 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: vfs_default.c,v 1.16 2001/12/19 08:58:06 art Exp $ */ + /* * Portions of this code are: * - * Copyright (c) 1982, 1986, 1989, 1993 + * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed @@ -48,11 +49,9 @@ #include <sys/vnode.h> #include <sys/namei.h> #include <sys/malloc.h> -#include <sys/pool.h> #include <sys/event.h> #include <miscfs/specfs/specdev.h> -#include <uvm/uvm.h> extern struct simplelock spechash_slock; @@ -311,10 +310,3 @@ lease_check(void *v) { return (0); } - -int -vop_generic_mmap(v) - void *v; -{ - return 0; -} diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 7d000db08bf..aaff1342b67 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.79 2001/12/10 18:47:16 art Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.80 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -377,8 +377,6 @@ getnewvnode(tag, mp, vops, vpp) int (**vops) __P((void *)); struct vnode **vpp; { - extern struct uvm_pagerops uvm_vnodeops; - struct uvm_object *uobj; struct proc *p = curproc; /* XXX */ struct freelst *listhd; static int toggle; @@ -412,17 +410,11 @@ getnewvnode(tag, mp, vops, vpp) splx(s); simple_unlock(&vnode_free_list_slock); vp = pool_get(&vnode_pool, PR_WAITOK); - bzero(vp, sizeof *vp); - /* - * initialize uvm_object within vnode. - */ - uobj = &vp->v_uobj; - uobj->pgops = &uvm_vnodeops; - uobj->uo_npages = 0; - TAILQ_INIT(&uobj->memq); + bzero((char *)vp, sizeof *vp); numvnodes++; } else { - TAILQ_FOREACH(vp, listhd, v_freelist) { + for (vp = TAILQ_FIRST(listhd); vp != NULLVP; + vp = TAILQ_NEXT(vp, v_freelist)) { if (simple_lock_try(&vp->v_interlock)) break; } @@ -453,13 +445,14 @@ getnewvnode(tag, mp, vops, vpp) else simple_unlock(&vp->v_interlock); #ifdef DIAGNOSTIC - if (vp->v_data || vp->v_uobj.uo_npages || - TAILQ_FIRST(&vp->v_uobj.memq)) { + if (vp->v_data) { vprint("cleaned vnode", vp); panic("cleaned vnode isn't"); } + s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); + splx(s); #endif vp->v_flag = 0; vp->v_socket = 0; @@ -472,10 +465,7 @@ getnewvnode(tag, mp, vops, vpp) *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; - simple_lock_init(&vp->v_uobj.vmobjlock); - - vp->v_size = VSIZENOTSET; - + simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); return (0); } @@ -679,10 +669,6 @@ vget(vp, flags, p) flags |= LK_INTERLOCK; } if (vp->v_flag & VXLOCK) { - if (flags & LK_NOWAIT) { - simple_unlock(&vp->v_interlock); - return (EBUSY); - } vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); @@ -801,11 +787,6 @@ vput(vp) #endif vputonfreelist(vp); - if (vp->v_flag & VTEXT) { - uvmexp.vtextpages -= vp->v_uobj.uo_npages; - uvmexp.vnodepages += vp->v_uobj.uo_npages; - } - vp->v_flag &= ~VTEXT; simple_unlock(&vp->v_interlock); VOP_INACTIVE(vp, p); @@ -846,21 +827,18 @@ vrele(vp) #endif vputonfreelist(vp); - if (vp->v_flag & VTEXT) { - uvmexp.vtextpages -= vp->v_uobj.uo_npages; - uvmexp.vnodepages += vp->v_uobj.uo_npages; - } - vp->v_flag &= ~VTEXT; if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0) VOP_INACTIVE(vp, p); } +void vhold __P((struct vnode *vp)); + /* * Page or buffer structure gets a reference. - * Must be called at splbio(); */ void -vhold(struct vnode *vp) +vhold(vp) + register struct vnode *vp; { /* @@ -880,34 +858,6 @@ vhold(struct vnode *vp) } /* - * Release a vhold reference. - * Must be called at splbio(); - */ -void -vholdrele(struct vnode *vp) -{ - simple_lock(&vp->v_interlock); -#ifdef DIAGNOSTIC - if (vp->v_holdcnt == 0) - panic("vholdrele: holdcnt"); -#endif - vp->v_holdcnt--; - - /* - * If it is on the holdlist and the hold count drops to - * zero, move it to the free list. - */ - if ((vp->v_bioflag & VBIOONFREELIST) && - vp->v_holdcnt == 0 && vp->v_usecount == 0) { - simple_lock(&vnode_free_list_slock); - TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); - } - simple_unlock(&vp->v_interlock); -} - -/* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, @@ -1059,12 +1009,6 @@ vclean(vp, flags, p) if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; - if (vp->v_flag & VTEXT) { - uvmexp.vtextpages -= vp->v_uobj.uo_npages; - uvmexp.vnodepages += vp->v_uobj.uo_npages; - } - vp->v_flag &= ~VTEXT; - /* * Even if the count is zero, the VOP_INACTIVE routine may still * have the object locked while it cleans it out. The VOP_LOCK @@ -1075,7 +1019,11 @@ vclean(vp, flags, p) VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* - * Clean out any cached data associated with the vnode. + * clean out any VM data associated with the vnode. + */ + uvm_vnp_terminate(vp); + /* + * Clean out any buffers associated with the vnode. */ if (flags & DOCLOSE) vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); @@ -2020,22 +1968,9 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) struct proc *p; int slpflag, slptimeo; { - struct uvm_object *uobj = &vp->v_uobj; - struct buf *bp; + register struct buf *bp; struct buf *nbp, *blist; - int s, error, rv; - int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO| - (flags & V_SAVE ? PGO_CLEANIT : 0); - - /* XXXUBC this doesn't look at flags or slp* */ - if (vp->v_type == VREG) { - simple_lock(&uobj->vmobjlock); - rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags); - simple_unlock(&uobj->vmobjlock); - if (!rv) { - return EIO; - } - } + int s, error; if (flags & V_SAVE) { s = splbio(); @@ -2105,21 +2040,12 @@ loop: void vflushbuf(vp, sync) - struct vnode *vp; + register struct vnode *vp; int sync; { - struct uvm_object *uobj = &vp->v_uobj; - struct buf *bp, *nbp; + register struct buf *bp, *nbp; int s; - if (vp->v_type == VREG) { - int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0); - - simple_lock(&uobj->vmobjlock); - (uobj->pgops->pgo_flush)(uobj, 0, 0, flags); - simple_unlock(&uobj->vmobjlock); - } - loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { @@ -2185,27 +2111,44 @@ bgetvp(vp, bp) * Manipulates vnode buffer queues. Must be called at splbio(). */ void -brelvp(struct buf *bp) +brelvp(bp) + register struct buf *bp; { struct vnode *vp; - if ((vp = bp->b_vp) == NULL) + if ((vp = bp->b_vp) == (struct vnode *) 0) panic("brelvp: NULL"); - /* * Delete from old vnode list, if on one. */ if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); - if (TAILQ_EMPTY(&vp->v_uobj.memq) && - (vp->v_bioflag & VBIOONSYNCLIST) && + if ((vp->v_bioflag & VBIOONSYNCLIST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { vp->v_bioflag &= ~VBIOONSYNCLIST; LIST_REMOVE(vp, v_synclist); } - bp->b_vp = NULL; + bp->b_vp = (struct vnode *) 0; + + simple_lock(&vp->v_interlock); +#ifdef DIAGNOSTIC + if (vp->v_holdcnt == 0) + panic("brelvp: holdcnt"); +#endif + vp->v_holdcnt--; - vholdrele(vp); + /* + * If it is on the holdlist and the hold count drops to + * zero, move it to the free list. + */ + if ((vp->v_bioflag & VBIOONFREELIST) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + simple_unlock(&vp->v_interlock); } /* @@ -2262,8 +2205,7 @@ reassignbuf(bp) */ if ((bp->b_flags & B_DELWRI) == 0) { listheadp = &vp->v_cleanblkhd; - if (TAILQ_EMPTY(&vp->v_uobj.memq) && - (vp->v_bioflag & VBIOONSYNCLIST) && + if ((vp->v_bioflag & VBIOONSYNCLIST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { vp->v_bioflag &= ~VBIOONSYNCLIST; LIST_REMOVE(vp, v_synclist); diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index 0adeb2f3065..33a8f9b6633 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_sync.c,v 1.21 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: vfs_sync.c,v 1.22 2001/12/19 08:58:06 art Exp $ */ /* * Portions of this code are: @@ -176,12 +176,15 @@ sched_sync(p) VOP_UNLOCK(vp, 0, p); s = splbio(); if (LIST_FIRST(slp) == vp) { -#ifdef DIAGNOSTIC - if (!(vp->v_bioflag & VBIOONSYNCLIST)) { - vprint("vnode", vp); - panic("sched_fsync: on synclist, but no flag"); - } -#endif + /* + * Note: disk vps can remain on the + * worklist too with no dirty blocks, but + * since sync_fsync() moves it to a different + * slot we are safe. + */ + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && + vp->v_type != VBLK) + panic("sched_sync: fsync failed"); /* * Put us back on the worklist. The worklist * routine will remove us from our current diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 5433711decd..a74fd9eedf9 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_syscalls.c,v 1.83 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: vfs_syscalls.c,v 1.84 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */ /* @@ -493,6 +493,7 @@ sys_sync(p, v, retval) if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; + uvm_vnp_sync(mp); VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; @@ -1063,13 +1064,6 @@ sys_fhopen(p, v, retval) } if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) goto bad; - - if (vp->v_type == VREG && - uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) { - error = EIO; - goto bad; - } - if (flags & FWRITE) vp->v_writecount++; @@ -1481,6 +1475,8 @@ sys_unlink(p, v, retval) goto out; } + (void)uvm_vnp_uncache(vp); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); @@ -2342,6 +2338,7 @@ out: if (fromnd.ni_dvp != tdvp) VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (tvp) { + (void)uvm_vnp_uncache(tvp); VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index a788a93c9df..8314a049da7 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.38 2001/12/10 18:45:34 art Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.39 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -165,11 +165,6 @@ vn_open(ndp, fmode, cmode) } if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) goto bad; - if (vp->v_type == VREG && - uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) { - error = EIO; - goto bad; - } if (fmode & FWRITE) vp->v_writecount++; return (0); @@ -202,10 +197,11 @@ vn_writechk(vp) } } /* - * If the vnode is in use as a process's text, - * we can't allow writing. + * If there's shared text associated with + * the vnode, try to free it up once. If + * we fail, we can't allow writing. */ - if (vp->v_flag & VTEXT) + if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp)) return (ETXTBSY); return (0); @@ -218,23 +214,6 @@ void vn_marktext(vp) struct vnode *vp; { - if ((vp->v_flag & VTEXT) == 0) { - uvmexp.vnodepages -= vp->v_uobj.uo_npages; - uvmexp.vtextpages += vp->v_uobj.uo_npages; -#if 0 - /* - * Doesn't help much because the pager is borked and ubc_flush is - * slow. - */ -#ifdef PMAP_PREFER - /* - * Get rid of any cached reads from this vnode. - * exec can't respect PMAP_PREFER when mapping the text. - */ - ubc_flush(&vp->v_uobj, 0, 0); -#endif -#endif - } vp->v_flag |= VTEXT; } @@ -521,10 +500,6 @@ vn_lock(vp, flags, p) if ((flags & LK_INTERLOCK) == 0) simple_lock(&vp->v_interlock); if (vp->v_flag & VXLOCK) { - if (flags & LK_NOWAIT) { - simple_unlock(&vp->v_interlock); - return (EBUSY); - } vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vn_lock", 0); diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c index dbb2b7a438d..1f30d85c507 100644 --- a/sys/kern/vnode_if.c +++ b/sys/kern/vnode_if.c @@ -3,7 +3,7 @@ * (Modifications made here may easily be lost!) * * Created from the file: - * OpenBSD: vnode_if.src,v 1.17 2001/12/10 04:45:31 art Exp + * OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp * by the script: * OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp */ @@ -1230,109 +1230,6 @@ int VOP_WHITEOUT(dvp, cnp, flags) return (VCALL(dvp, VOFFSET(vop_whiteout), &a)); } -int vop_getpages_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_getpages_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_getpages_desc = { - 0, - "vop_getpages", - 0, - vop_getpages_vp_offsets, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - NULL, -}; - -int VOP_GETPAGES(vp, offset, m, count, centeridx, access_type, advice, flags) - struct vnode *vp; - voff_t offset; - struct vm_page **m; - int *count; - int centeridx; - vm_prot_t access_type; - int advice; - int flags; -{ - struct vop_getpages_args a; - a.a_desc = VDESC(vop_getpages); - a.a_vp = vp; - a.a_offset = offset; - a.a_m = m; - a.a_count = count; - a.a_centeridx = centeridx; - a.a_access_type = access_type; - a.a_advice = advice; - a.a_flags = flags; - return (VCALL(vp, VOFFSET(vop_getpages), &a)); -} - -int vop_putpages_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_putpages_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_putpages_desc = { - 0, - "vop_putpages", - 0, - vop_putpages_vp_offsets, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - NULL, -}; - -int VOP_PUTPAGES(vp, m, count, flags, rtvals) - struct vnode *vp; - struct vm_page **m; - int count; - int flags; - int *rtvals; -{ - struct vop_putpages_args a; - a.a_desc = VDESC(vop_putpages); - a.a_vp = vp; - a.a_m = m; - a.a_count = count; - a.a_flags = flags; - a.a_rtvals = rtvals; - return (VCALL(vp, VOFFSET(vop_putpages), &a)); -} - -int vop_mmap_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_mmap_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_mmap_desc = { - 0, - "vop_mmap", - 0, - vop_mmap_vp_offsets, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_mmap_args, a_cred), - VOPARG_OFFSETOF(struct vop_mmap_args, a_p), - VDESC_NO_OFFSET, - NULL, -}; - -int VOP_MMAP(vp, fflags, cred, p) - struct vnode *vp; - int fflags; - struct ucred *cred; - struct proc *p; -{ - struct vop_mmap_args a; - a.a_desc = VDESC(vop_mmap); - a.a_vp = vp; - a.a_fflags = fflags; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_mmap), &a)); -} - /* Special cases: */ int vop_strategy_vp_offsets[] = { @@ -1426,9 +1323,6 @@ struct vnodeop_desc *vfs_op_descs[] = { &vop_advlock_desc, &vop_reallocblks_desc, &vop_whiteout_desc, - &vop_getpages_desc, - &vop_putpages_desc, - &vop_mmap_desc, NULL }; diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 0efb5afdd4f..a1cd5c5b9c5 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -1,4 +1,4 @@ -# $OpenBSD: vnode_if.src,v 1.17 2001/12/10 04:45:31 art Exp $ +# $OpenBSD: vnode_if.src,v 1.18 2001/12/19 08:58:06 art Exp $ # $NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $ # # Copyright (c) 1992, 1993 @@ -467,38 +467,3 @@ vop_whiteout { #vop_bwrite { # IN struct buf *bp; #}; - -# -#% getpages vp L L L -# -vop_getpages { - IN struct vnode *vp; - IN voff_t offset; - IN struct vm_page **m; - IN int *count; - IN int centeridx; - IN vm_prot_t access_type; - IN int advice; - IN int flags; -}; - -# -#% putpages vp L L L -# -vop_putpages { - IN struct vnode *vp; - IN struct vm_page **m; - IN int count; - IN int flags; - IN int *rtvals; -}; - -# -#% mmap vp = = = -# -vop_mmap { - IN struct vnode *vp; - IN int fflags; - IN struct ucred *cred; - IN struct proc *p; -}; diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c index 97dc05349c0..820e9b4bf22 100644 --- a/sys/miscfs/deadfs/dead_vnops.c +++ b/sys/miscfs/deadfs/dead_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dead_vnops.c,v 1.9 2001/12/04 22:44:31 art Exp $ */ +/* $OpenBSD: dead_vnops.c,v 1.10 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: dead_vnops.c,v 1.16 1996/02/13 13:12:48 mycroft Exp $ */ /* @@ -84,7 +84,6 @@ int dead_print __P((void *)); #define dead_pathconf dead_ebadf #define dead_advlock dead_ebadf #define dead_bwrite nullop -#define dead_mmap dead_badop int chkvnlock __P((struct vnode *)); @@ -125,8 +124,7 @@ struct vnodeopv_entry_desc dead_vnodeop_entries[] = { { &vop_pathconf_desc, dead_pathconf }, /* pathconf */ { &vop_advlock_desc, dead_advlock }, /* advlock */ { &vop_bwrite_desc, dead_bwrite }, /* bwrite */ - { &vop_mmap_desc, dead_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc dead_vnodeop_opv_desc = { &dead_vnodeop_p, dead_vnodeop_entries }; diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c index 9196ed10067..b5c22156ecf 100644 --- a/sys/miscfs/fdesc/fdesc_vnops.c +++ b/sys/miscfs/fdesc/fdesc_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fdesc_vnops.c,v 1.26 2001/12/04 22:44:31 art Exp $ */ +/* $OpenBSD: fdesc_vnops.c,v 1.27 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: fdesc_vnops.c,v 1.32 1996/04/11 11:24:29 mrg Exp $ */ /* @@ -123,7 +123,6 @@ int fdesc_pathconf __P((void *)); #define fdesc_islocked vop_generic_islocked #define fdesc_advlock eopnotsupp #define fdesc_bwrite eopnotsupp -#define fdesc_mmap eopnotsupp int (**fdesc_vnodeop_p) __P((void *)); struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { @@ -162,8 +161,7 @@ struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { { &vop_pathconf_desc, fdesc_pathconf }, /* pathconf */ { &vop_advlock_desc, fdesc_advlock }, /* advlock */ { &vop_bwrite_desc, fdesc_bwrite }, /* bwrite */ - { &vop_mmap_desc, fdesc_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc fdesc_vnodeop_opv_desc = diff --git a/sys/miscfs/fifofs/fifo.h b/sys/miscfs/fifofs/fifo.h index 6e8bae9a060..94b0807d568 100644 --- a/sys/miscfs/fifofs/fifo.h +++ b/sys/miscfs/fifofs/fifo.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo.h,v 1.9 2001/12/04 22:44:31 art Exp $ */ +/* $OpenBSD: fifo.h,v 1.10 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: fifo.h,v 1.10 1996/02/09 22:40:15 christos Exp $ */ /* @@ -80,7 +80,6 @@ int fifo_pathconf __P((void *)); int fifo_advlock __P((void *)); #define fifo_reallocblks fifo_badop #define fifo_bwrite nullop -#define fifo_mmap fifo_badop void fifo_printinfo __P((struct vnode *)); diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index 174f02cca6f..42da5773e63 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.11 2001/12/04 22:44:31 art Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.12 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -103,8 +103,7 @@ struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ { &vop_advlock_desc, fifo_advlock }, /* advlock */ { &vop_bwrite_desc, fifo_bwrite }, /* bwrite */ - { &vop_mmap_desc, fifo_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; void filt_fifordetach(struct knote *kn); diff --git a/sys/miscfs/genfs/genfs.h b/sys/miscfs/genfs/genfs.h deleted file mode 100644 index 8cb886b5074..00000000000 --- a/sys/miscfs/genfs/genfs.h +++ /dev/null @@ -1,71 +0,0 @@ -/* $OpenBSD: genfs.h,v 1.1 2001/12/10 04:45:31 art Exp $ */ -/* $NetBSD: genfs_node.h,v 1.2 2001/09/15 22:38:40 chs Exp $ */ - -/* - * Copyright (c) 2001 Chuck Silvers. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Chuck Silvers. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _MISCFS_GENFS_GENFS_NODE_H_ -#define _MISCFS_GENFS_GENFS_NODE_H_ - -struct vm_page; - -struct genfs_ops { - void (*gop_size)(struct vnode *, off_t, off_t *); - int (*gop_alloc)(struct vnode *, off_t, off_t, int, struct ucred *); -#ifdef notyet - int (*gop_write)(struct vnode *, struct vm_page **, int, int); -#endif -}; - -#define GOP_SIZE(vp, size, eobp) \ - (*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp)) -#define GOP_ALLOC(vp, off, len, flags, cred) \ - (*VTOG(vp)->g_op->gop_alloc)((vp), (off), (len), (flags), (cred)) -#ifdef notyet -#define GOP_WRITE(vp, pgs, npages, flags) \ - (*VTOG(vp)->g_op->gop_write)((vp), (pgs), (npages), (flags)) -#endif -struct genfs_node { - struct genfs_ops *g_op; /* ops vector */ - struct lock g_glock; /* getpages lock */ -}; - -#define VTOG(vp) ((struct genfs_node *)(vp)->v_data) - -void genfs_size(struct vnode *, off_t, off_t *); -void genfs_node_init(struct vnode *, struct genfs_ops *); -#ifdef notyet -int genfs_gop_write(struct vnode *, struct vm_page **, int, int); -#endif - -int genfs_getpages __P((void *)); -int genfs_putpages __P((void *)); - -#endif /* _MISCFS_GENFS_GENFS_NODE_H_ */ diff --git a/sys/miscfs/genfs/genfs_vnops.c b/sys/miscfs/genfs/genfs_vnops.c deleted file mode 100644 index 7630caa60e7..00000000000 --- a/sys/miscfs/genfs/genfs_vnops.c +++ /dev/null @@ -1,723 +0,0 @@ -/* $OpenBSD: genfs_vnops.c,v 1.1 2001/12/10 04:45:31 art Exp $ */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/proc.h> -#include <sys/kernel.h> -#include <sys/mount.h> -#include <sys/namei.h> -#include <sys/vnode.h> -#include <sys/fcntl.h> -#include <sys/malloc.h> -#include <sys/poll.h> -#include <sys/mman.h> -#include <sys/pool.h> - -#include <miscfs/genfs/genfs.h> -#include <miscfs/specfs/specdev.h> - -#include <uvm/uvm.h> -#include <uvm/uvm_pager.h> - -/* - * generic VM getpages routine. - * Return PG_BUSY pages for the given range, - * reading from backing store if necessary. - */ - -int -genfs_getpages(v) - void *v; -{ - struct vop_getpages_args /* { - struct vnode *a_vp; - voff_t a_offset; - vm_page_t *a_m; - int *a_count; - int a_centeridx; - vm_prot_t a_access_type; - int a_advice; - int a_flags; - } */ *ap = v; - - off_t newsize, diskeof, memeof; - off_t offset, origoffset, startoffset, endoffset, raoffset; - daddr_t lbn, blkno; - int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount; - int fs_bshift, fs_bsize, dev_bshift; - int flags = ap->a_flags; - size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; - vaddr_t kva; - struct buf *bp, *mbp; - struct vnode *vp = ap->a_vp; - struct uvm_object *uobj = &vp->v_uobj; - struct vm_page *pgs[16]; /* XXXUBC 16 */ - struct genfs_node *gp = VTOG(vp); - struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */ - boolean_t async = (flags & PGO_SYNCIO) == 0; - boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; - boolean_t sawhole = FALSE; - struct proc *p = curproc; - UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist); - - UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d", - vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count); - - /* XXXUBC temp limit */ - if (*ap->a_count > 16) { - return EINVAL; - } - - error = 0; - origoffset = ap->a_offset; - orignpages = *ap->a_count; - GOP_SIZE(vp, vp->v_size, &diskeof); - if (flags & PGO_PASTEOF) { - newsize = MAX(vp->v_size, - origoffset + (orignpages << PAGE_SHIFT)); - GOP_SIZE(vp, newsize, &memeof); - } else { - memeof = diskeof; - } - KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages); - KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0); - KASSERT(orignpages > 0); - - /* - * Bounds-check the request. - */ - - if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) { - if ((flags & PGO_LOCKED) == 0) { - simple_unlock(&uobj->vmobjlock); - } - UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", - origoffset, *ap->a_count, memeof,0); - return EINVAL; - } - - /* - * For PGO_LOCKED requests, just return whatever's in memory. - */ - - if (flags & PGO_LOCKED) { - uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, - UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY); - - return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; - } - - /* vnode is VOP_LOCKed, uobj is locked */ - - if (write && (vp->v_bioflag & VBIOONSYNCLIST) == 0) { - vn_syncer_add_to_worklist(vp, syncdelay); - } - - /* - * find the requested pages and make some simple checks. - * leave space in the page array for a whole block. - */ - - fs_bshift = vp->v_mount->mnt_fs_bshift; - fs_bsize = 1 << fs_bshift; - dev_bshift = vp->v_mount->mnt_dev_bshift; - - orignpages = MIN(orignpages, - round_page(memeof - origoffset) >> PAGE_SHIFT); - npages = orignpages; - startoffset = origoffset & ~(fs_bsize - 1); - endoffset = round_page((origoffset + (npages << PAGE_SHIFT) - + fs_bsize - 1) & ~(fs_bsize - 1)); - endoffset = MIN(endoffset, round_page(memeof)); - ridx = (origoffset - startoffset) >> PAGE_SHIFT; - - memset(pgs, 0, sizeof(pgs)); - uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); - - /* - * if PGO_OVERWRITE is set, don't bother reading the pages. - * PGO_OVERWRITE also means that the caller guarantees - * that the pages already have backing store allocated. - */ - - if (flags & PGO_OVERWRITE) { - UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); - - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if (pg->flags & PG_FAKE) { - uvm_pagezero(pg); - pg->flags &= ~(PG_FAKE); - } - pg->flags &= ~(PG_RDONLY); - } - npages += ridx; - goto out; - } - - /* - * if the pages are already resident, just return them. - */ - - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if ((pg->flags & PG_FAKE) || - (write && (pg->flags & PG_RDONLY))) { - break; - } - } - if (i == npages) { - UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); - raoffset = origoffset + (orignpages << PAGE_SHIFT); - npages += ridx; - goto raout; - } - - /* - * the page wasn't resident and we're not overwriting, - * so we're going to have to do some i/o. - * find any additional pages needed to cover the expanded range. - */ - - npages = (endoffset - startoffset) >> PAGE_SHIFT; - if (startoffset != origoffset || npages != orignpages) { - - /* - * XXXUBC we need to avoid deadlocks caused by locking - * additional pages at lower offsets than pages we - * already have locked. for now, unlock them all and - * start over. - */ - - for (i = 0; i < orignpages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if (pg->flags & PG_FAKE) { - pg->flags |= PG_RELEASED; - } - } - uvm_page_unbusy(&pgs[ridx], orignpages); - memset(pgs, 0, sizeof(pgs)); - - UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", - startoffset, endoffset, 0,0); - npgs = npages; - uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); - } - simple_unlock(&uobj->vmobjlock); - - /* - * read the desired page(s). - */ - - totalbytes = npages << PAGE_SHIFT; - bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0)); - tailbytes = totalbytes - bytes; - skipbytes = 0; - - kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | - UVMPAGER_MAPIN_READ); - - s = splbio(); - mbp = pool_get(&bufpool, PR_WAITOK); - splx(s); - mbp->b_bufsize = totalbytes; - mbp->b_data = (void *)kva; - mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0); - mbp->b_iodone = uvm_aio_biodone; - mbp->b_vp = NULL; - LIST_INIT(&mbp->b_dep); - bgetvp(vp, mbp); - - /* - * if EOF is in the middle of the range, zero the part past EOF. - */ - - if (tailbytes > 0) { - memset((void *)(kva + bytes), 0, tailbytes); - } - - /* - * now loop over the pages, reading as needed. - */ - - if (write) { - lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, p); - } else { - lockmgr(&gp->g_glock, LK_SHARED, NULL, p); - } - - bp = NULL; - for (offset = startoffset; - bytes > 0; - offset += iobytes, bytes -= iobytes) { - - /* - * skip pages which don't need to be read. - */ - - pidx = (offset - startoffset) >> PAGE_SHIFT; - while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) { - size_t b; - - KASSERT((offset & (PAGE_SIZE - 1)) == 0); - b = MIN(PAGE_SIZE, bytes); - offset += b; - bytes -= b; - skipbytes += b; - pidx++; - UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", - offset, 0,0,0); - if (bytes == 0) { - goto loopdone; - } - } - - /* - * bmap the file to find out the blkno to read from and - * how much we can read in one i/o. if bmap returns an error, - * skip the rest of the top-level i/o. - */ - - lbn = offset >> fs_bshift; - error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); - if (error) { - UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n", - lbn, error,0,0); - skipbytes += bytes; - goto loopdone; - } - - /* - * see how many pages can be read with this i/o. - * reduce the i/o size if necessary to avoid - * overwriting pages with valid data. - */ - - iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, - bytes); - if (offset + iobytes > round_page(offset)) { - pcount = 1; - while (pidx + pcount < npages && - pgs[pidx + pcount]->flags & PG_FAKE) { - pcount++; - } - iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) - - (offset - trunc_page(offset))); - } - - /* - * if this block isn't allocated, zero it instead of reading it. - * if this is a read access, mark the pages we zeroed PG_RDONLY. - */ - - if (blkno < 0) { - int holepages = (round_page(offset + iobytes) - - trunc_page(offset)) >> PAGE_SHIFT; - UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0); - - sawhole = TRUE; - memset((char *)kva + (offset - startoffset), 0, - iobytes); - skipbytes += iobytes; - - for (i = 0; i < holepages; i++) { - if (write) { - pgs[pidx + i]->flags &= ~PG_CLEAN; - } else { - pgs[pidx + i]->flags |= PG_RDONLY; - } - } - continue; - } - - /* - * allocate a sub-buf for this piece of the i/o - * (or just use mbp if there's only 1 piece), - * and start it going. - */ - - if (offset == startoffset && iobytes == bytes) { - bp = mbp; - } else { - s = splbio(); - bp = pool_get(&bufpool, PR_WAITOK); - splx(s); - bp->b_data = (char *)kva + offset - startoffset; - bp->b_resid = bp->b_bcount = iobytes; - bp->b_flags = B_BUSY|B_READ|B_CALL; - bp->b_iodone = uvm_aio_biodone1; - bp->b_vp = vp; - LIST_INIT(&bp->b_dep); - } - bp->b_lblkno = 0; - bp->b_private = mbp; - - /* adjust physical blkno for partial blocks */ - bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> - dev_bshift); - - UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", - bp, offset, iobytes, bp->b_blkno); - - VOP_STRATEGY(bp); - } - -loopdone: - if (skipbytes) { - s = splbio(); - if (error) { - mbp->b_flags |= B_ERROR; - mbp->b_error = error; - } - mbp->b_resid -= skipbytes; - if (mbp->b_resid == 0) { - biodone(mbp); - } - splx(s); - } - - if (async) { - UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0); - lockmgr(&gp->g_glock, LK_RELEASE, NULL, p); - return 0; - } - if (bp != NULL) { - error = biowait(mbp); - } - s = splbio(); - (void) buf_cleanout(mbp); - pool_put(&bufpool, mbp); - splx(s); - uvm_pagermapout(kva, npages); - raoffset = startoffset + totalbytes; - - /* - * if this we encountered a hole then we have to do a little more work. - * for read faults, we marked the page PG_RDONLY so that future - * write accesses to the page will fault again. - * for write faults, we must make sure that the backing store for - * the page is completely allocated while the pages are locked. - */ - - if (error == 0 && sawhole && write) { - error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0, - cred); - if (error) { - UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d", - lbn, error,0,0); - lockmgr(&gp->g_glock, LK_RELEASE, NULL, p); - simple_lock(&uobj->vmobjlock); - goto out; - } - } - lockmgr(&gp->g_glock, LK_RELEASE, NULL, p); - simple_lock(&uobj->vmobjlock); - - /* - * see if we want to start any readahead. - * XXXUBC for now, just read the next 128k on 64k boundaries. - * this is pretty nonsensical, but it is 50% faster than reading - * just the next 64k. - */ - -raout: - if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 && - PAGE_SHIFT <= 16) { - int racount; - - racount = 1 << (16 - PAGE_SHIFT); - (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0, - VM_PROT_READ, 0, 0); - simple_lock(&uobj->vmobjlock); - - racount = 1 << (16 - PAGE_SHIFT); - (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0, - VM_PROT_READ, 0, 0); - simple_lock(&uobj->vmobjlock); - } - - /* - * we're almost done! release the pages... - * for errors, we free the pages. - * otherwise we activate them and mark them as valid and clean. - * also, unbusy pages that were not actually requested. - */ - -out: - if (error) { - uvm_lock_pageq(); - for (i = 0; i < npages; i++) { - if (pgs[i] == NULL) { - continue; - } - UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", - pgs[i], pgs[i]->flags, 0,0); - if (pgs[i]->flags & PG_WANTED) { - wakeup(pgs[i]); - } - if (pgs[i]->flags & PG_RELEASED) { - uvm_unlock_pageq(); - (uobj->pgops->pgo_releasepg)(pgs[i], NULL); - uvm_lock_pageq(); - continue; - } - if (pgs[i]->flags & PG_FAKE) { - uvm_pagefree(pgs[i]); - continue; - } - uvm_pageactivate(pgs[i]); - pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); - UVM_PAGE_OWN(pgs[i], NULL); - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); - UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); - return error; - } - - UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); - uvm_lock_pageq(); - for (i = 0; i < npages; i++) { - if (pgs[i] == NULL) { - continue; - } - UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", - pgs[i], pgs[i]->flags, 0,0); - if (pgs[i]->flags & PG_FAKE) { - UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x", - pgs[i], pgs[i]->offset,0,0); - pgs[i]->flags &= ~(PG_FAKE); - pmap_clear_modify(pgs[i]); - pmap_clear_reference(pgs[i]); - } - if (write) { - pgs[i]->flags &= ~(PG_RDONLY); - } - if (i < ridx || i >= ridx + orignpages || async) { - UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", - pgs[i], pgs[i]->offset,0,0); - if (pgs[i]->flags & PG_WANTED) { - wakeup(pgs[i]); - } - if (pgs[i]->flags & PG_RELEASED) { - uvm_unlock_pageq(); - (uobj->pgops->pgo_releasepg)(pgs[i], NULL); - uvm_lock_pageq(); - continue; - } - uvm_pageactivate(pgs[i]); - pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); - UVM_PAGE_OWN(pgs[i], NULL); - } - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); - if (ap->a_m != NULL) { - memcpy(ap->a_m, &pgs[ridx], - orignpages * sizeof(struct vm_page *)); - } - return 0; -} - -/* - * generic VM putpages routine. - * Write the given range of pages to backing store. - */ - -int -genfs_putpages(v) - void *v; -{ - struct vop_putpages_args /* { - struct vnode *a_vp; - struct vm_page **a_m; - int a_count; - int a_flags; - int *a_rtvals; - } */ *ap = v; - - int s, error, npages, run; - int fs_bshift, dev_bshift; - vaddr_t kva; - off_t eof, offset, startoffset; - size_t bytes, iobytes, skipbytes; - daddr_t lbn, blkno; - struct vm_page *pg; - struct buf *mbp, *bp; - struct vnode *vp = ap->a_vp; - boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0; - UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); - UVMHIST_LOG(ubchist, "vp %p offset 0x%x count %d", - vp, ap->a_m[0]->offset, ap->a_count, 0); - - simple_unlock(&vp->v_uobj.vmobjlock); - - GOP_SIZE(vp, vp->v_size, &eof); - - error = 0; - npages = ap->a_count; - fs_bshift = vp->v_mount->mnt_fs_bshift; - dev_bshift = vp->v_mount->mnt_dev_bshift; - - pg = ap->a_m[0]; - startoffset = pg->offset; - bytes = MIN(npages << PAGE_SHIFT, eof - startoffset); - skipbytes = 0; - KASSERT(bytes != 0); - - kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK); - - s = splbio(); - vp->v_numoutput += 2; - mbp = pool_get(&bufpool, PR_WAITOK); - UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", - vp, mbp, vp->v_numoutput, bytes); - splx(s); - mbp->b_bufsize = npages << PAGE_SHIFT; - mbp->b_data = (void *)kva; - mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY|B_WRITE|B_AGE | - (async ? B_CALL : 0) | - (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0); - mbp->b_iodone = uvm_aio_biodone; - mbp->b_vp = NULL; - LIST_INIT(&mbp->b_dep); - bgetvp(vp, mbp); - - bp = NULL; - for (offset = startoffset; - bytes > 0; - offset += iobytes, bytes -= iobytes) { - lbn = offset >> fs_bshift; - error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); - if (error) { - UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); - skipbytes += bytes; - bytes = 0; - break; - } - - iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, - bytes); - if (blkno == (daddr_t)-1) { - skipbytes += iobytes; - continue; - } - - /* if it's really one i/o, don't make a second buf */ - if (offset == startoffset && iobytes == bytes) { - bp = mbp; - } else { - s = splbio(); - vp->v_numoutput++; - bp = pool_get(&bufpool, PR_WAITOK); - UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", - vp, bp, vp->v_numoutput, 0); - splx(s); - bp->b_data = (char *)kva + - (vaddr_t)(offset - pg->offset); - bp->b_resid = bp->b_bcount = iobytes; - bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC; - bp->b_iodone = uvm_aio_biodone1; - bp->b_vp = vp; - LIST_INIT(&bp->b_dep); - } - bp->b_lblkno = 0; - bp->b_private = mbp; - - /* adjust physical blkno for partial blocks */ - bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> - dev_bshift); - UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x", - vp, offset, bp->b_bcount, bp->b_blkno); - VOP_STRATEGY(bp); - } - if (skipbytes) { - UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); - s = splbio(); - mbp->b_resid -= skipbytes; - if (error) { - mbp->b_flags |= B_ERROR; - mbp->b_error = error; - } - if (mbp->b_resid == 0) { - biodone(mbp); - } - splx(s); - } - if (async) { - UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0); - return 0; - } - if (bp != NULL) { - UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0); - error = biowait(mbp); - } - if (bioops.io_pageiodone) { - (*bioops.io_pageiodone)(mbp); - } - s = splbio(); - if (mbp->b_vp) { - vwakeup(mbp->b_vp); - } - buf_cleanout(mbp); - pool_put(&bufpool, mbp); - splx(s); - uvm_pagermapout(kva, npages); - UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0); - return error; -} - -void -genfs_size(struct vnode *vp, off_t size, off_t *eobp) -{ - int bsize; - - bsize = 1 << vp->v_mount->mnt_fs_bshift; - *eobp = (size + bsize - 1) & ~(bsize - 1); -} - -void -genfs_node_init(struct vnode *vp, struct genfs_ops *ops) -{ - struct genfs_node *gp = VTOG(vp); - - lockinit(&gp->g_glock, PINOD, "glock", 0, 0); - gp->g_op = ops; -} diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 2022279f6d8..e24cde8096b 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: spec_vnops.c,v 1.21 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: spec_vnops.c,v 1.22 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */ /* @@ -104,8 +104,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ { &vop_advlock_desc, spec_advlock }, /* advlock */ { &vop_bwrite_desc, spec_bwrite }, /* bwrite */ - { &vop_mmap_desc, spec_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h index 51fb9564c51..bdd2008545f 100644 --- a/sys/miscfs/specfs/specdev.h +++ b/sys/miscfs/specfs/specdev.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specdev.h,v 1.11 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: specdev.h,v 1.12 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $ */ /* @@ -121,4 +121,3 @@ int spec_advlock __P((void *)); #define spec_reallocblks spec_badop #define spec_bwrite vop_generic_bwrite #define spec_revoke vop_generic_revoke -#define spec_mmap spec_badop diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c index 46f27a40e62..df3fb4efc01 100644 --- a/sys/miscfs/union/union_vnops.c +++ b/sys/miscfs/union/union_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: union_vnops.c,v 1.16 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: union_vnops.c,v 1.17 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: union_vnops.c,v 1.30.4.1 1996/05/25 22:10:14 jtc Exp $ */ /* @@ -94,7 +94,6 @@ int union_islocked __P((void *)); int union_pathconf __P((void *)); int union_advlock __P((void *)); int union_strategy __P((void *)); -int union_mmap __P((void *)); int (**union_vnodeop_p) __P((void *)); struct vnodeopv_entry_desc union_vnodeop_entries[] = { @@ -134,8 +133,7 @@ struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_islocked_desc, union_islocked }, /* islocked */ { &vop_pathconf_desc, union_pathconf }, /* pathconf */ { &vop_advlock_desc, union_advlock }, /* advlock */ - { &vop_mmap_desc, union_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc union_vnodeop_opv_desc = { &union_vnodeop_p, union_vnodeop_entries }; @@ -1844,13 +1842,3 @@ union_strategy(v) return (error); } -int -union_mmap(v) - void *v; -{ - struct vop_mmap_args *ap = v; - struct vnode *vp = OTHERVP(ap->a_vp); - - ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_mmap), ap)); -}
\ No newline at end of file diff --git a/sys/msdosfs/denode.h b/sys/msdosfs/denode.h index db945e35bcd..33ebeb8b122 100644 --- a/sys/msdosfs/denode.h +++ b/sys/msdosfs/denode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: denode.h,v 1.11 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: denode.h,v 1.12 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: denode.h,v 1.24 1997/10/17 11:23:39 ws Exp $ */ /*- @@ -91,8 +91,6 @@ * things. */ -#include <miscfs/genfs/genfs.h> - /* * Internal pseudo-offset for (nonexistent) directory entry for the root * dir in the root dir @@ -137,7 +135,6 @@ struct fatcache { * contained within a vnode. */ struct denode { - struct genfs_node de_gnode; struct denode *de_next; /* Hash chain forward */ struct denode **de_prev; /* Hash chain back */ struct vnode *de_vnode; /* addr of vnode we are part of */ @@ -314,5 +311,4 @@ void reinsert __P((struct denode *)); int removede __P((struct denode *, struct denode *)); int uniqdosname __P((struct denode *, struct componentname *, u_char *)); int findwin95 __P((struct denode *)); -int msdosfs_gop_alloc __P((struct vnode *, off_t, off_t, int, struct ucred *)); #endif /* _KERNEL */ diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c index e01491efc85..dbc1f372fa2 100644 --- a/sys/msdosfs/msdosfs_denode.c +++ b/sys/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_denode.c,v 1.22 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: msdosfs_denode.c,v 1.23 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.23 1997/10/17 11:23:58 ws Exp $ */ /*- @@ -72,13 +72,6 @@ u_long dehash; /* size of hash table - 1 */ #define DEHASH(dev, dcl, doff) (((dev) + (dcl) + (doff) / sizeof(struct direntry)) \ & dehash) -extern int prtactive; - -struct genfs_ops msdosfs_genfsops = { - genfs_size, - msdosfs_gop_alloc, -}; - static struct denode *msdosfs_hashget __P((dev_t, u_long, u_long)); static int msdosfs_hashins __P((struct denode *)); static void msdosfs_hashrem __P((struct denode *)); @@ -337,10 +330,8 @@ retry: } } else nvp->v_type = VREG; - genfs_node_init(nvp, &msdosfs_genfsops); VREF(ldep->de_devvp); *depp = ldep; - nvp->v_size = ldep->de_FileSize; return (0); } @@ -470,7 +461,7 @@ detrunc(dep, length, flags, cred, p) #endif return (error); } - + uvm_vnp_uncache(DETOV(dep)); /* * is this the right place for it? */ @@ -533,7 +524,7 @@ deextend(dep, length, cred) struct ucred *cred; { struct msdosfsmount *pmp = dep->de_pmp; - u_long count, osize; + u_long count; int error; /* @@ -566,12 +557,8 @@ deextend(dep, length, cred) } } - osize = dep->de_FileSize; dep->de_FileSize = length; - uvm_vnp_setsize(DETOV(dep), (voff_t)dep->de_FileSize); dep->de_flag |= DE_UPDATE|DE_MODIFIED; - uvm_vnp_zerorange(DETOV(dep), (off_t)osize, - (size_t)(dep->de_FileSize - osize)); return (deupdat(dep, 1)); } @@ -606,6 +593,7 @@ msdosfs_reclaim(v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); + extern int prtactive; #ifdef MSDOSFS_DEBUG printf("msdosfs_reclaim(): dep %08x, file %s, refcnt %d\n", @@ -646,6 +634,7 @@ msdosfs_inactive(v) struct denode *dep = VTODE(vp); struct proc *p = ap->a_p; int error; + extern int prtactive; #ifdef MSDOSFS_DEBUG printf("msdosfs_inactive(): dep %08x, de_Name[0] %x\n", dep, dep->de_Name[0]); @@ -672,9 +661,7 @@ msdosfs_inactive(v) dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY); #endif if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - if (dep->de_FileSize != 0) { - error = detrunc(dep, (u_long)0, 0, NOCRED, NULL); - } + error = detrunc(dep, (u_long)0, 0, NOCRED, NULL); dep->de_Name[0] = SLOT_DELETED; } deupdat(dep, 0); @@ -693,10 +680,3 @@ out: vrecycle(vp, (struct simplelock *)0, p); return (error); } - -int -msdosfs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, - struct ucred *cred) -{ - return 0; -} diff --git a/sys/msdosfs/msdosfs_fat.c b/sys/msdosfs/msdosfs_fat.c index 3576a663cdc..d01e16eb89f 100644 --- a/sys/msdosfs/msdosfs_fat.c +++ b/sys/msdosfs/msdosfs_fat.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_fat.c,v 1.9 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: msdosfs_fat.c,v 1.10 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: msdosfs_fat.c,v 1.26 1997/10/17 11:24:02 ws Exp $ */ /*- @@ -988,7 +988,8 @@ extendfile(dep, count, bpp, ncp, flags) int flags; { int error; - u_long frcn = 0, cn, got; + u_long frcn; + u_long cn, got; struct msdosfsmount *pmp = dep->de_pmp; struct buf *bp; @@ -1059,26 +1060,41 @@ extendfile(dep, count, bpp, ncp, flags) } /* - * Update the "last cluster of the file" entry in the - * denode's fat cache. + * Update the "last cluster of the file" entry in the denode's fat + * cache. */ - fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1); - if (flags & DE_CLEAR && - (dep->de_Attributes & ATTR_DIRECTORY)) { + + if (flags & DE_CLEAR) { while (got-- > 0) { - bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++), - pmp->pm_bpcluster, 0, 0); + /* + * Get the buf header for the new block of the file. + */ + if (dep->de_Attributes & ATTR_DIRECTORY) + bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++), + pmp->pm_bpcluster, 0, 0); + else { + bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++), + pmp->pm_bpcluster, 0, 0); + /* + * Do the bmap now, as in msdosfs_write + */ + if (pcbmap(dep, + de_bn2cn(pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0)) + bp->b_blkno = -1; + if (bp->b_blkno == -1) + panic("extendfile: pcbmap"); + } clrbuf(bp); if (bpp) { *bpp = bp; bpp = NULL; - } else { + } else bdwrite(bp); - } } } } - + return (0); } diff --git a/sys/msdosfs/msdosfs_vfsops.c b/sys/msdosfs/msdosfs_vfsops.c index 60162fa8af9..6aa2d72423b 100644 --- a/sys/msdosfs/msdosfs_vfsops.c +++ b/sys/msdosfs/msdosfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_vfsops.c,v 1.27 2001/12/10 02:19:34 art Exp $ */ +/* $OpenBSD: msdosfs_vfsops.c,v 1.28 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.48 1997/10/18 02:54:57 briggs Exp $ */ /*- @@ -584,9 +584,15 @@ msdosfs_mountfs(devvp, mp, p, argp) mp->mnt_data = (qaddr_t)pmp; mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; - mp->mnt_dev_bshift = pmp->pm_bnshift; - mp->mnt_fs_bshift = pmp->pm_cnshift; - +#ifdef QUOTA + /* + * If we ever do quotas for DOS filesystems this would be a place + * to fill in the info in the msdosfsmount structure. You dolt, + * quotas on dos filesystems make no sense because files have no + * owners on dos filesystems. of course there is some empty space + * in the directory entry where we could put uid's and gid's. + */ +#endif devvp->v_specmountpoint = mp; return (0); @@ -714,11 +720,10 @@ msdosfs_sync_vnode(struct vnode *vp, void *arg) struct denode *dep; dep = VTODE(vp); - if (msa->waitfor == MNT_LAZY || vp->v_type == VNON || - (((dep->de_flag & - (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) && - (LIST_EMPTY(&vp->v_dirtyblkhd) && - vp->v_uobj.uo_npages == 0))) { + if (vp->v_type == VNON || + ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 + && vp->v_dirtyblkhd.lh_first == NULL) || + msa->waitfor == MNT_LAZY) { simple_unlock(&vp->v_interlock); return (0); } diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c index 8d08da9e229..3fccaf27353 100644 --- a/sys/msdosfs/msdosfs_vnops.c +++ b/sys/msdosfs/msdosfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_vnops.c,v 1.34 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: msdosfs_vnops.c,v 1.35 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.63 1997/10/17 11:24:19 ws Exp $ */ /*- @@ -320,7 +320,6 @@ msdosfs_setattr(v) } */ *ap = v; int error = 0; struct denode *dep = VTODE(ap->a_vp); - struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; @@ -332,8 +331,7 @@ msdosfs_setattr(v) (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL) || - (vap->va_uid != VNOVAL && vap->va_uid != pmp->pm_uid) || - (vap->va_gid != VNOVAL && vap->va_gid != pmp->pm_gid)) { + (vap->va_uid != VNOVAL) || (vap->va_gid != VNOVAL)) { #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): returning EINVAL\n"); printf(" va_type %d, va_nlink %x, va_fsid %x, va_fileid %x\n", @@ -415,11 +413,11 @@ msdosfs_read(v) int error = 0; int diff; int blsize; + int isadir; long n; long on; daddr_t lbn; - void *win; - vsize_t bytelen; + daddr_t rablock; struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); @@ -434,45 +432,42 @@ msdosfs_read(v) if (uio->uio_offset < 0) return (EINVAL); - if (vp->v_type == VREG) { - while (uio->uio_resid > 0) { - bytelen = MIN(dep->de_FileSize - uio->uio_offset, - uio->uio_resid); - - if (bytelen == 0) - break; - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, - &bytelen, UBC_READ); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) - break; - } - dep->de_flag |= DE_ACCESS; - goto out; - } - - /* this loop is only for directories now */ + isadir = dep->de_Attributes & ATTR_DIRECTORY; do { lbn = de_cluster(pmp, uio->uio_offset); on = uio->uio_offset & pmp->pm_crbomask; - n = MIN((pmp->pm_bpcluster - on), uio->uio_resid); + n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid); diff = dep->de_FileSize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; /* convert cluster # to block # if a directory */ - error = pcbmap(dep, lbn, &lbn, 0, &blsize); - if (error) - return (error); + if (isadir) { + error = pcbmap(dep, lbn, &lbn, 0, &blsize); + if (error) + return (error); + } /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ - error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); - n = MIN(n, pmp->pm_bpcluster - bp->b_resid); + if (isadir) { + error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); + } else { + rablock = lbn + 1; + if (dep->de_lastr + 1 == lbn && + de_cn2off(pmp, rablock) < dep->de_FileSize) + error = breada(vp, de_cn2bn(pmp, lbn), + pmp->pm_bpcluster, de_cn2bn(pmp, rablock), + pmp->pm_bpcluster, NOCRED, &bp); + else + error = bread(vp, de_cn2bn(pmp, lbn), + pmp->pm_bpcluster, NOCRED, &bp); + dep->de_lastr = lbn; + } + n = min(n, pmp->pm_bpcluster - bp->b_resid); if (error) { brelse(bp); return (error); @@ -480,10 +475,8 @@ msdosfs_read(v) error = uiomove(bp->b_data + on, (int) n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); - -out: - if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) - error = deupdat(dep, 1); + if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME)) + dep->de_flag |= DE_ACCESS; return (error); } @@ -500,19 +493,19 @@ msdosfs_write(v) int a_ioflag; struct ucred *a_cred; } */ *ap = v; + int n; + int croffset; int resid; u_long osize; int error = 0; u_long count; - daddr_t lastcn; + daddr_t bn, lastcn; + struct buf *bp; int ioflag = ap->a_ioflag; - void *win; - vsize_t bytelen; - off_t oldoff; - boolean_t rv; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct vnode *vp = ap->a_vp; + struct vnode *thisvp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; @@ -528,6 +521,7 @@ msdosfs_write(v) case VREG: if (ioflag & IO_APPEND) uio->uio_offset = dep->de_FileSize; + thisvp = vp; break; case VDIR: return EISDIR; @@ -582,52 +576,84 @@ msdosfs_write(v) } else lastcn = de_clcount(pmp, osize) - 1; - if (dep->de_FileSize < uio->uio_offset + resid) { - dep->de_FileSize = uio->uio_offset + resid; - uvm_vnp_setsize(vp, dep->de_FileSize); - } - do { - oldoff = uio->uio_offset; - if (de_cluster(pmp, oldoff) > lastcn) { + if (de_cluster(pmp, uio->uio_offset) > lastcn) { error = ENOSPC; break; } - bytelen = MIN(dep->de_FileSize - oldoff, uio->uio_resid); + bn = de_blk(pmp, uio->uio_offset); + if ((uio->uio_offset & pmp->pm_crbomask) == 0 + && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > de_blk(pmp, uio->uio_offset) + || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { + /* + * If either the whole cluster gets written, + * or we write the cluster from its start beyond EOF, + * then no need to read data from disk. + */ + bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); + clrbuf(bp); + /* + * Do the bmap now, since pcbmap needs buffers + * for the fat table. (see msdosfs_strategy) + */ + if (bp->b_blkno == bp->b_lblkno) { + error = pcbmap(dep, + de_bn2cn(pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0); + if (error) + bp->b_blkno = -1; + } + if (bp->b_blkno == -1) { + brelse(bp); + if (!error) + error = EIO; /* XXX */ + break; + } + } else { + /* + * The block we need to write into exists, so read it in. + */ + error = bread(thisvp, bn, pmp->pm_bpcluster, + NOCRED, &bp); + if (error) { + brelse(bp); + break; + } + } + + croffset = uio->uio_offset & pmp->pm_crbomask; + n = min(uio->uio_resid, pmp->pm_bpcluster - croffset); + if (uio->uio_offset + n > dep->de_FileSize) { + dep->de_FileSize = uio->uio_offset + n; + uvm_vnp_setsize(vp, dep->de_FileSize); + } + uvm_vnp_uncache(vp); /* - * XXXUBC if file is mapped and this is the last block, - * process one page at a time. + * Should these vnode_pager_* functions be done on dir + * files? */ - if (bytelen == 0) - break; - win = ubc_alloc(&vp->v_uobj, oldoff, &bytelen, UBC_WRITE); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) { - break; - } /* - * flush what we just wrote if necessary. - * XXXUBC simplistic async flushing. + * Copy the data from user space into the buf header. */ - if (ioflag & IO_SYNC) { - - simple_lock(&vp->v_uobj.vmobjlock); - rv = vp->v_uobj.pgops->pgo_flush( - &vp->v_uobj, oldoff, - oldoff + bytelen, PGO_CLEANIT|PGO_SYNCIO); - simple_unlock(&vp->v_uobj.vmobjlock); - } else if (oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&vp->v_uobj.vmobjlock); - rv = vp->v_uobj.pgops->pgo_flush( - &vp->v_uobj, (oldoff >> 16) << 16, - (uio->uio_offset >> 16) << 16, PGO_CLEANIT); - simple_unlock(&vp->v_uobj.vmobjlock); - } + error = uiomove(bp->b_data + croffset, n, uio); + + /* + * If they want this synchronous then write it and wait for + * it. Otherwise, if on a cluster boundary write it + * asynchronously so we can move on to the next block + * without delay. Otherwise do a delayed write because we + * may want to write somemore into the block later. + */ + if (ioflag & IO_SYNC) + (void) bwrite(bp); + else if (n + croffset == pmp->pm_bpcluster) + bawrite(bp); + else + bdwrite(bp); + dep->de_flag |= DE_UPDATE; } while (error == 0 && uio->uio_resid > 0); - dep->de_flag |= DE_UPDATE; /* * If the write failed and they want us to, truncate the file back @@ -640,8 +666,7 @@ errexit: uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } else { - detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, - NULL); + detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL); if (uio->uio_resid != resid) error = 0; } @@ -1481,11 +1506,11 @@ msdosfs_readdir(v) while (uio->uio_resid > 0) { lbn = de_cluster(pmp, offset - bias); on = (offset - bias) & pmp->pm_crbomask; - n = MIN(pmp->pm_bpcluster - on, uio->uio_resid); + n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); if (diff <= 0) break; - n = MIN(n, diff); + n = min(n, diff); if ((error = pcbmap(dep, lbn, &bn, &cn, &blsize)) != 0) break; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); @@ -1493,7 +1518,7 @@ msdosfs_readdir(v) brelse(bp); return (error); } - n = MIN(n, blsize - bp->b_resid); + n = min(n, blsize - bp->b_resid); /* * Convert from dos directory entries to fs-independent @@ -1692,6 +1717,7 @@ msdosfs_bmap(v) int *a_runp; } */ *ap = v; struct denode *dep = VTODE(ap->a_vp); + struct msdosfsmount *pmp = dep->de_pmp; if (ap->a_vpp != NULL) *ap->a_vpp = dep->de_devvp; @@ -1703,7 +1729,7 @@ msdosfs_bmap(v) */ *ap->a_runp = 0; } - return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0)); + return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0)); } int @@ -1876,10 +1902,7 @@ struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = { { &vop_advlock_desc, msdosfs_advlock }, /* advlock */ { &vop_reallocblks_desc, msdosfs_reallocblks }, /* reallocblks */ { &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */ - { &vop_getpages_desc, genfs_getpages }, - { &vop_putpages_desc, genfs_putpages }, - { &vop_mmap_desc, vop_generic_mmap }, - { NULL, NULL } + { (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL } }; struct vnodeopv_desc msdosfs_vnodeop_opv_desc = { &msdosfs_vnodeop_p, msdosfs_vnodeop_entries }; diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h index b86819902f2..6956ce54b26 100644 --- a/sys/nfs/nfs.h +++ b/sys/nfs/nfs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs.h,v 1.14 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: nfs.h,v 1.15 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs.h,v 1.10.4.1 1996/05/27 11:23:56 fvdl Exp $ */ /* @@ -78,18 +78,8 @@ * Ideally, NFS_DIRBLKSIZ should be bigger, but I've seen servers with * broken NFS/ethernet drivers that won't work with anything bigger (Linux..) */ -#if 1 -/* - * XXXUBC temp hack because of the removal of b_validend. - * eventually we'll store NFS VDIR data in the page cache as well, - * we'll fix this at that point. - */ -#define NFS_DIRBLKSIZ PAGE_SIZE -#define NFS_READDIRBLKSIZ PAGE_SIZE -#else -#define NFS_DIRBLKSIZ 1024 /* Must be a multiple of DIRBLKSIZ */ +#define NFS_DIRBLKSIZ 1024 /* Must be a multiple of DIRBLKSIZ */ #define NFS_READDIRBLKSIZ 512 /* Size of read dir blocks. XXX */ -#endif /* * Oddballs @@ -121,10 +111,10 @@ #endif /* - * Use the vm_page flag reserved for pager use to indicate pages - * which have been written to the server but not yet committed. + * The B_INVAFTERWRITE flag should be set to whatever is required by the + * buffer cache code to say "Invalidate the block after it is written back". */ -#define PG_NEEDCOMMIT PG_PAGER1 +#define B_INVAFTERWRITE B_INVAL /* * The IO_METASYNC flag should be implemented for local file systems. diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index e1f17ed2482..fb26a59aeae 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_bio.c,v 1.32 2001/12/14 03:16:02 art Exp $ */ +/* $OpenBSD: nfs_bio.c,v 1.33 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ /* @@ -50,9 +50,8 @@ #include <sys/mount.h> #include <sys/kernel.h> #include <sys/namei.h> -#include <sys/pool.h> -#include <uvm/uvm.h> +#include <uvm/uvm_extern.h> #include <nfs/rpcv2.h> #include <nfs/nfsproto.h> @@ -71,19 +70,20 @@ struct nfsstats nfsstats; */ int nfs_bioread(vp, uio, ioflag, cred) - struct vnode *vp; - struct uio *uio; + register struct vnode *vp; + register struct uio *uio; int ioflag; struct ucred *cred; { - struct nfsnode *np = VTONFS(vp); - int biosize; - struct buf *bp = NULL; + register struct nfsnode *np = VTONFS(vp); + register int biosize, diff; + struct buf *bp = NULL, *rabp; struct vattr vattr; struct proc *p; struct nfsmount *nmp = VFSTONFS(vp->v_mount); + daddr_t lbn, bn, rabn; caddr_t baddr; - int got_buf = 0, error = 0, n = 0, on = 0; + int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) @@ -153,25 +153,87 @@ nfs_bioread(vp, uio, ioflag, cred) switch (vp->v_type) { case VREG: nfsstats.biocache_reads++; - error = 0; - while (uio->uio_resid > 0) { - void *win; - vsize_t bytelen = MIN(np->n_size - uio->uio_offset, - uio->uio_resid); + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize - 1); + bn = lbn * (biosize / DEV_BSIZE); + not_readin = 1; - if (bytelen == 0) - break; - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, - &bytelen, UBC_READ); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) { - break; + /* + * Start the read ahead(s), as required. + */ + if (nfs_numasync > 0 && nmp->nm_readahead > 0) { + for (nra = 0; nra < nmp->nm_readahead && + (lbn + 1 + nra) * biosize < np->n_size; nra++) { + rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); + if (!incore(vp, rabn)) { + rabp = nfs_getcacheblk(vp, rabn, biosize, p); + if (!rabp) + return (EINTR); + if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { + rabp->b_flags |= (B_READ | B_ASYNC); + if (nfs_asyncio(rabp)) { + rabp->b_flags |= B_INVAL; + brelse(rabp); + } + } else + brelse(rabp); } + } } - n = 0; - break; + /* + * If the block is in the cache and has the required data + * in a valid region, just copy it out. + * Otherwise, get the block and write back/read in, + * as required. + */ + if ((bp = incore(vp, bn)) && + (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == + (B_BUSY | B_WRITEINPROG)) + got_buf = 0; + else { +again: + bp = nfs_getcacheblk(vp, bn, biosize, p); + if (!bp) + return (EINTR); + got_buf = 1; + if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { + bp->b_flags |= B_READ; + not_readin = 0; + error = nfs_doio(bp, p); + if (error) { + brelse(bp); + return (error); + } + } + } + n = min((unsigned)(biosize - on), uio->uio_resid); + diff = np->n_size - uio->uio_offset; + if (diff < n) + n = diff; + if (not_readin && n > 0) { + if (on < bp->b_validoff || (on + n) > bp->b_validend) { + if (!got_buf) { + bp = nfs_getcacheblk(vp, bn, biosize, p); + if (!bp) + return (EINTR); + got_buf = 1; + } + bp->b_flags |= B_INVAFTERWRITE; + if (bp->b_dirtyend > 0) { + if ((bp->b_flags & B_DELWRI) == 0) + panic("nfsbioread"); + if (VOP_BWRITE(bp) == EINTR) + return (EINTR); + } else + brelse(bp); + goto again; + } + } + diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); + if (diff < n) + n = diff; + break; case VLNK: nfsstats.biocache_readlinks++; bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); @@ -185,7 +247,7 @@ nfs_bioread(vp, uio, ioflag, cred) return (error); } } - n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); + n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); got_buf = 1; on = 0; break; @@ -227,17 +289,18 @@ nfs_write(v) int a_ioflag; struct ucred *a_cred; } */ *ap = v; - int biosize; - struct uio *uio = ap->a_uio; + register int biosize; + register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; - struct vnode *vp = ap->a_vp; + register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); - struct ucred *cred = ap->a_cred; + register struct ucred *cred = ap->a_cred; int ioflag = ap->a_ioflag; + struct buf *bp; struct vattr vattr; struct nfsmount *nmp = VFSTONFS(vp->v_mount); - int error = 0; - int rv; + daddr_t lbn, bn; + int n, on, error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -297,52 +360,85 @@ nfs_write(v) */ biosize = nmp->nm_rsize; do { - void *win; - voff_t oldoff = uio->uio_offset; - vsize_t bytelen; /* - * XXXART - workaround for compiler bug on 68k. Wieee! + * XXX make sure we aren't cached in the VM page cache */ - *((volatile vsize_t *)&bytelen) = uio->uio_resid; + uvm_vnp_uncache(vp); nfsstats.biocache_writes++; + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize-1); + n = min((unsigned)(biosize - on), uio->uio_resid); + bn = lbn * (biosize / DEV_BSIZE); +again: + bp = nfs_getcacheblk(vp, bn, biosize, p); + if (!bp) + return (EINTR); np->n_flag |= NMODIFIED; - if (np->n_size < uio->uio_offset + bytelen) { - np->n_size = uio->uio_offset + bytelen; - uvm_vnp_setsize(vp, np->n_size); - } - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, - UBC_WRITE); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - rv = 1; - if ((ioflag & IO_SYNC)) { - simple_lock(&vp->v_uobj.vmobjlock); - rv = vp->v_uobj.pgops->pgo_flush( - &vp->v_uobj, - oldoff & ~(nmp->nm_wsize - 1), - uio->uio_offset & ~(nmp->nm_wsize - 1), - PGO_CLEANIT|PGO_SYNCIO); - simple_unlock(&vp->v_uobj.vmobjlock); - } else if ((oldoff & ~(nmp->nm_wsize - 1)) != - (uio->uio_offset & ~(nmp->nm_wsize - 1))) { - simple_lock(&vp->v_uobj.vmobjlock); - rv = vp->v_uobj.pgops->pgo_flush( - &vp->v_uobj, - oldoff & ~(nmp->nm_wsize - 1), - uio->uio_offset & ~(nmp->nm_wsize - 1), - PGO_CLEANIT|PGO_WEAK); - simple_unlock(&vp->v_uobj.vmobjlock); + if (uio->uio_offset + n > np->n_size) { + np->n_size = uio->uio_offset + n; + uvm_vnp_setsize(vp, (u_long)np->n_size); } - if (!rv) { - error = EIO; + + /* + * If the new write will leave a contiguous dirty + * area, just update the b_dirtyoff and b_dirtyend, + * otherwise force a write rpc of the old dirty area. + */ + if (bp->b_dirtyend > 0 && + (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { + bp->b_proc = p; + if (VOP_BWRITE(bp) == EINTR) + return (EINTR); + goto again; } + + error = uiomove((char *)bp->b_data + on, n, uio); if (error) { - break; + bp->b_flags |= B_ERROR; + brelse(bp); + return (error); } - } while (uio->uio_resid > 0); - return (error); + if (bp->b_dirtyend > 0) { + bp->b_dirtyoff = min(on, bp->b_dirtyoff); + bp->b_dirtyend = max((on + n), bp->b_dirtyend); + } else { + bp->b_dirtyoff = on; + bp->b_dirtyend = on + n; + } + if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || + bp->b_validoff > bp->b_dirtyend) { + bp->b_validoff = bp->b_dirtyoff; + bp->b_validend = bp->b_dirtyend; + } else { + bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); + bp->b_validend = max(bp->b_validend, bp->b_dirtyend); + } + + /* + * Since this block is being modified, it must be written + * again and not just committed. + */ + bp->b_flags &= ~B_NEEDCOMMIT; + + /* + * If the lease is non-cachable or IO_SYNC do bwrite(). + */ + if (ioflag & IO_SYNC) { + bp->b_proc = p; + error = VOP_BWRITE(bp); + if (error) + return (error); + } else if ((n + on) == biosize) { + bp->b_proc = (struct proc *)0; + bp->b_flags |= B_ASYNC; + (void)nfs_writebp(bp, 0); + } else { + bdwrite(bp); + } + } while (uio->uio_resid > 0 && n > 0); + return (0); } /* @@ -364,9 +460,9 @@ nfs_getcacheblk(vp, bn, size, p) if (nmp->nm_flag & NFSMNT_INT) { bp = getblk(vp, bn, size, PCATCH, 0); - while (bp == NULL) { - if (nfs_sigintr(nmp, NULL, p)) - return (NULL); + while (bp == (struct buf *)0) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return ((struct buf *)0); bp = getblk(vp, bn, size, 0, 2 * hz); } } else @@ -406,7 +502,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) np->n_flag |= NFLUSHWANT; error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", slptimeo); - if (error && intrflg && nfs_sigintr(nmp, NULL, p)) + if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) return (EINTR); } @@ -416,7 +512,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) np->n_flag |= NFLUSHINPROG; error = vinvalbuf(vp, flags, cred, p, slpflag, 0); while (error) { - if (intrflg && nfs_sigintr(nmp, NULL, p)) { + if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { np->n_flag &= ~NFLUSHINPROG; if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; @@ -443,20 +539,41 @@ int nfs_asyncio(bp) struct buf *bp; { - int i; + int i,s; if (nfs_numasync == 0) return (EIO); - for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (nfs_iodwant[i]) { + if ((bp->b_flags & B_READ) == 0) { + bp->b_flags |= B_WRITEINPROG; + } + TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); - nfs_iodwant[i] = NULL; + nfs_iodwant[i] = (struct proc *)0; wakeup((caddr_t)&nfs_iodwant[i]); return (0); } - } - return (EIO); + /* + * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE + * return EIO so the process will call nfs_doio() and do it + * synchronously. + */ + if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE)) + return (EIO); + + /* + * Just turn the async write into a delayed write, instead of + * doing in synchronously. Hopefully, at least one of the nfsiods + * is currently doing a write for this file and will pick up the + * delayed writes before going back to sleep. + */ + s = splbio(); + buf_dirty(bp); + splx(s); + biodone(bp); + return (0); } /* @@ -472,7 +589,7 @@ nfs_doio(bp, p) register struct vnode *vp; struct nfsnode *np; struct nfsmount *nmp; - int error = 0, diff, len, iomode, must_commit = 0; + int s, error = 0, diff, len, iomode, must_commit = 0; struct uio uio; struct iovec io; @@ -519,7 +636,9 @@ nfs_doio(bp, p) uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; nfsstats.read_bios++; error = nfs_readrpc(vp, uiop); - if (!error && uiop->uio_resid) { + if (!error) { + bp->b_validoff = 0; + if (uiop->uio_resid) { /* * If len > 0, there is a hole in the file and * no writes after the hole have been pushed to @@ -530,9 +649,13 @@ nfs_doio(bp, p) len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) + diff); if (len > 0) { - len = MIN(len, uiop->uio_resid); - memset((char *)bp->b_data + diff, 0, len); - } + len = min(len, uiop->uio_resid); + bzero((char *)bp->b_data + diff, len); + bp->b_validend = diff + len; + } else + bp->b_validend = diff; + } else + bp->b_validend = bp->b_bcount; } if (p && (vp->v_flag & VTEXT) && (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) { @@ -549,19 +672,62 @@ nfs_doio(bp, p) default: printf("nfs_doio: type %x unexpected\n",vp->v_type); break; - } + }; if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } } else { - io.iov_base = bp->b_data; - io.iov_len = uiop->uio_resid = bp->b_bcount; - uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; + io.iov_len = uiop->uio_resid = bp->b_dirtyend + - bp->b_dirtyoff; + uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE + + bp->b_dirtyoff; + io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; nfsstats.write_bios++; - iomode = NFSV3WRITE_UNSTABLE; + if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) + iomode = NFSV3WRITE_UNSTABLE; + else + iomode = NFSV3WRITE_FILESYNC; + bp->b_flags |= B_WRITEINPROG; +#ifdef fvdl_debug + printf("nfs_doio(%x): bp %x doff %d dend %d\n", + vp, bp, bp->b_dirtyoff, bp->b_dirtyend); +#endif error = nfs_writerpc(vp, uiop, &iomode, &must_commit); + if (!error && iomode == NFSV3WRITE_UNSTABLE) + bp->b_flags |= B_NEEDCOMMIT; + else + bp->b_flags &= ~B_NEEDCOMMIT; + bp->b_flags &= ~B_WRITEINPROG; + + /* + * For an interrupted write, the buffer is still valid and the + * write hasn't been pushed to the server yet, so we can't set + * B_ERROR and report the interruption by setting B_EINTR. For + * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt + * is essentially a noop. + * For the case of a V3 write rpc not being committed to stable + * storage, the block is still dirty and requires either a commit + * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC + * before the block is reused. This is indicated by setting the + * B_DELWRI and B_NEEDCOMMIT flags. + */ + if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + s = splbio(); + buf_dirty(bp); + splx(s); + + if (!(bp->b_flags & B_ASYNC) && error) + bp->b_flags |= B_EINTR; + } else { + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = np->n_error = error; + np->n_flag |= NWRITEERR; + } + bp->b_dirtyoff = bp->b_dirtyend = 0; + } } bp->b_resid = uiop->uio_resid; if (must_commit) @@ -569,597 +735,3 @@ nfs_doio(bp, p) biodone(bp); return (error); } - -/* - * Vnode op for VM getpages. - */ -int -nfs_getpages(v) - void *v; -{ - struct vop_getpages_args /* { - struct vnode *a_vp; - voff_t a_offset; - vm_page_t *a_m; - int *a_count; - int a_centeridx; - vm_prot_t a_access_type; - int a_advice; - int a_flags; - } */ *ap = v; - - off_t eof, offset, origoffset, startoffset, endoffset; - int s, i, error, npages, orignpages, npgs, ridx, pidx, pcount; - vaddr_t kva; - struct buf *bp, *mbp; - struct vnode *vp = ap->a_vp; - struct nfsnode *np = VTONFS(vp); - struct uvm_object *uobj = &vp->v_uobj; - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; - int flags = ap->a_flags; - int bsize; - struct vm_page *pgs[16]; /* XXXUBC 16 */ - boolean_t v3 = NFS_ISV3(vp); - boolean_t async = (flags & PGO_SYNCIO) == 0; - boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; - struct proc *p = curproc; - - UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist); - UVMHIST_LOG(ubchist, "vp %p off 0x%x count %d", vp, (int)ap->a_offset, - *ap->a_count,0); - -#ifdef DIAGNOSTIC - if (ap->a_centeridx < 0 || ap->a_centeridx >= *ap->a_count) { - panic("nfs_getpages: centeridx %d out of range", - ap->a_centeridx); - } -#endif - - error = 0; - origoffset = ap->a_offset; - eof = vp->v_size; - if (origoffset >= eof) { - if ((flags & PGO_LOCKED) == 0) { - simple_unlock(&uobj->vmobjlock); - } - UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x", - (int)origoffset, (int)eof,0,0); - return EINVAL; - } - - if (flags & PGO_LOCKED) { - uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, - UFP_NOWAIT|UFP_NOALLOC); - return 0; - } - - /* vnode is VOP_LOCKed, uobj is locked */ - if (write && (vp->v_bioflag & VBIOONSYNCLIST) == 0) { - vn_syncer_add_to_worklist(vp, syncdelay); - } - bsize = nmp->nm_rsize; - orignpages = MIN(*ap->a_count, - round_page(eof - origoffset) >> PAGE_SHIFT); - npages = orignpages; - startoffset = origoffset & ~(bsize - 1); - endoffset = round_page((origoffset + (npages << PAGE_SHIFT) - + bsize - 1) & ~(bsize - 1)); - endoffset = MIN(endoffset, round_page(eof)); - ridx = (origoffset - startoffset) >> PAGE_SHIFT; - - if (!async && !write) { - int rapages = MAX(PAGE_SIZE, nmp->nm_rsize) >> PAGE_SHIFT; - - (void) VOP_GETPAGES(vp, endoffset, NULL, &rapages, 0, - VM_PROT_READ, 0, 0); - simple_lock(&uobj->vmobjlock); - } - - UVMHIST_LOG(ubchist, "npages %d offset 0x%x", npages, - (int)origoffset, 0,0); - memset(pgs, 0, sizeof(pgs)); - uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); - - if (flags & PGO_OVERWRITE) { - UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); - - /* XXXUBC for now, zero the page if we allocated it */ - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if (pg->flags & PG_FAKE) { - uvm_pagezero(pg); - pg->flags &= ~(PG_FAKE); - } - } - npages += ridx; - if (v3) { - simple_unlock(&uobj->vmobjlock); - goto uncommit; - } - goto out; - } - - /* - * if the pages are already resident, just return them. - */ - - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if ((pg->flags & PG_FAKE) != 0 || - ((ap->a_access_type & VM_PROT_WRITE) && - (pg->flags & PG_RDONLY))) { - break; - } - } - if (i == npages) { - UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); - npages += ridx; - goto out; - } - - /* - * the page wasn't resident and we're not overwriting, - * so we're going to have to do some i/o. - * find any additional pages needed to cover the expanded range. - */ - - if (startoffset != origoffset || - startoffset + (npages << PAGE_SHIFT) != endoffset) { - - /* - * XXXUBC we need to avoid deadlocks caused by locking - * additional pages at lower offsets than pages we - * already have locked. for now, unlock them all and - * start over. - */ - - for (i = 0; i < npages; i++) { - struct vm_page *pg = pgs[ridx + i]; - - if (pg->flags & PG_FAKE) { - pg->flags |= PG_RELEASED; - } - } - uvm_page_unbusy(&pgs[ridx], npages); - memset(pgs, 0, sizeof(pgs)); - - UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", - startoffset, endoffset, 0,0); - npages = (endoffset - startoffset) >> PAGE_SHIFT; - npgs = npages; - uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); - } - simple_unlock(&uobj->vmobjlock); - - /* - * update the cached read creds for this node. - */ - - if (np->n_rcred) { - crfree(np->n_rcred); - } - np->n_rcred = curproc->p_ucred; - crhold(np->n_rcred); - - /* - * read the desired page(s). - */ - - totalbytes = npages << PAGE_SHIFT; - bytes = MIN(totalbytes, vp->v_size - startoffset); - tailbytes = totalbytes - bytes; - skipbytes = 0; - - kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | - UVMPAGER_MAPIN_READ); - - s = splbio(); - mbp = pool_get(&bufpool, PR_WAITOK); - splx(s); - mbp->b_bufsize = totalbytes; - mbp->b_data = (void *)kva; - mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0); - mbp->b_iodone = uvm_aio_biodone; - mbp->b_vp = NULL; - mbp->b_proc = NULL; /* XXXUBC */ - LIST_INIT(&mbp->b_dep); - bgetvp(vp, mbp); - - /* - * if EOF is in the middle of the last page, zero the part past EOF. - */ - - if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) { - memset((char *)kva + bytes, 0, tailbytes); - } - - /* - * now loop over the pages, reading as needed. - */ - - bp = NULL; - for (offset = startoffset; - bytes > 0; - offset += iobytes, bytes -= iobytes) { - - /* - * skip pages which don't need to be read. - */ - - pidx = (offset - startoffset) >> PAGE_SHIFT; - UVMHIST_LOG(ubchist, "pidx %d offset 0x%x startoffset 0x%x", - pidx, (int)offset, (int)startoffset,0); - while ((pgs[pidx]->flags & PG_FAKE) == 0) { - size_t b; - - KASSERT((offset & (PAGE_SIZE - 1)) == 0); - b = MIN(PAGE_SIZE, bytes); - offset += b; - bytes -= b; - skipbytes += b; - pidx++; - UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", - (int)offset, 0,0,0); - if (bytes == 0) { - goto loopdone; - } - } - - /* - * see how many pages can be read with this i/o. - * reduce the i/o size if necessary. - */ - - iobytes = bytes; - if (offset + iobytes > round_page(offset)) { - pcount = 1; - while (pidx + pcount < npages && - pgs[pidx + pcount]->flags & PG_FAKE) { - pcount++; - } - iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) - - (offset - trunc_page(offset))); - } - iobytes = MIN(iobytes, nmp->nm_rsize); - - /* - * allocate a sub-buf for this piece of the i/o - * (or just use mbp if there's only 1 piece), - * and start it going. - */ - - if (offset == startoffset && iobytes == bytes) { - bp = mbp; - } else { - s = splbio(); - bp = pool_get(&bufpool, PR_WAITOK); - splx(s); - bp->b_data = (char *)kva + offset - startoffset; - bp->b_resid = bp->b_bcount = iobytes; - bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC; - bp->b_iodone = uvm_aio_biodone1; - bp->b_vp = vp; - bp->b_proc = NULL; /* XXXUBC */ - LIST_INIT(&bp->b_dep); - } - bp->b_private = mbp; - bp->b_lblkno = bp->b_blkno = offset >> DEV_BSHIFT; - - UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", - bp, offset, iobytes, bp->b_blkno); - - VOP_STRATEGY(bp); - } - -loopdone: - if (skipbytes) { - s = splbio(); - mbp->b_resid -= skipbytes; - if (mbp->b_resid == 0) { - biodone(mbp); - } - splx(s); - } - if (async) { - UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0); - return 0; - } - if (bp != NULL) { - error = biowait(mbp); - } - s = splbio(); - (void) buf_cleanout(mbp); - pool_put(&bufpool, mbp); - splx(s); - uvm_pagermapout(kva, npages); - - if (write && v3) { -uncommit: - lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p); - nfs_del_committed_range(vp, origoffset, npages); - nfs_del_tobecommitted_range(vp, origoffset, npages); - simple_lock(&uobj->vmobjlock); - for (i = 0; i < npages; i++) { - if (pgs[i] == NULL) { - continue; - } - pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY); - } - simple_unlock(&uobj->vmobjlock); - lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p); - } - - simple_lock(&uobj->vmobjlock); - -out: - if (error) { - uvm_lock_pageq(); - for (i = 0; i < npages; i++) { - if (pgs[i] == NULL) { - continue; - } - UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", - pgs[i], pgs[i]->flags, 0,0); - if (pgs[i]->flags & PG_WANTED) { - wakeup(pgs[i]); - } - if (pgs[i]->flags & PG_RELEASED) { - uvm_unlock_pageq(); - (uobj->pgops->pgo_releasepg)(pgs[i], NULL); - uvm_lock_pageq(); - continue; - } - if (pgs[i]->flags & PG_FAKE) { - uvm_pagefree(pgs[i]); - continue; - } - uvm_pageactivate(pgs[i]); - pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); - UVM_PAGE_OWN(pgs[i], NULL); - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); - UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); - return error; - } - - UVMHIST_LOG(ubchist, "ridx %d count %d", ridx, npages, 0,0); - uvm_lock_pageq(); - for (i = 0; i < npages; i++) { - if (pgs[i] == NULL) { - continue; - } - UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", - pgs[i], pgs[i]->flags, 0,0); - if (pgs[i]->flags & PG_FAKE) { - UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x", - pgs[i], (int)pgs[i]->offset,0,0); - pgs[i]->flags &= ~(PG_FAKE); - pmap_clear_modify(pgs[i]); - pmap_clear_reference(pgs[i]); - } - if (i < ridx || i >= ridx + orignpages || async) { - UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", - pgs[i], (int)pgs[i]->offset,0,0); - if (pgs[i]->flags & PG_WANTED) { - wakeup(pgs[i]); - } - if (pgs[i]->flags & PG_RELEASED) { - uvm_unlock_pageq(); - (uobj->pgops->pgo_releasepg)(pgs[i], NULL); - uvm_lock_pageq(); - continue; - } - uvm_pageactivate(pgs[i]); - pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); - UVM_PAGE_OWN(pgs[i], NULL); - } - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); - if (ap->a_m != NULL) { - memcpy(ap->a_m, &pgs[ridx], - *ap->a_count * sizeof(struct vm_page *)); - } - return 0; -} - -/* - * Vnode op for VM putpages. - */ -int -nfs_putpages(v) - void *v; -{ - struct vop_putpages_args /* { - struct vnode *a_vp; - struct vm_page **a_m; - int a_count; - int a_flags; - int *a_rtvals; - } */ *ap = v; - - struct vnode *vp = ap->a_vp; - struct nfsnode *np = VTONFS(vp); - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - struct buf *bp, *mbp; - struct vm_page **pgs = ap->a_m; - int flags = ap->a_flags; - int npages = ap->a_count; - int s, error, i; - size_t bytes, iobytes, skipbytes; - vaddr_t kva; - off_t offset, origoffset, commitoff; - uint32_t commitbytes; - boolean_t v3 = NFS_ISV3(vp); - boolean_t async = (flags & PGO_SYNCIO) == 0; - boolean_t weak = (flags & PGO_WEAK) && v3; - struct proc *p = curproc; - UVMHIST_FUNC("nfs_putpages"); UVMHIST_CALLED(ubchist); - - UVMHIST_LOG(ubchist, "vp %p pgp %p count %d", - vp, ap->a_m, ap->a_count,0); - - simple_unlock(&vp->v_uobj.vmobjlock); - - error = 0; - origoffset = pgs[0]->offset; - bytes = MIN(ap->a_count << PAGE_SHIFT, vp->v_size - origoffset); - skipbytes = 0; - - /* - * if the range has been committed already, mark the pages thus. - * if the range just needs to be committed, we're done - * if it's a weak putpage, otherwise commit the range. - */ - - if (v3) { - lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p); - if (nfs_in_committed_range(vp, origoffset, bytes)) { - goto committed; - } - if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) { - if (weak) { - lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p); - return 0; - } else { - commitoff = np->n_pushlo; - commitbytes = (uint32_t)(np->n_pushhi - - np->n_pushlo); - goto commit; - } - } - lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p); - } - - /* - * otherwise write or commit all the pages. - */ - - kva = uvm_pagermapin(pgs, ap->a_count, UVMPAGER_MAPIN_WAITOK| - UVMPAGER_MAPIN_WRITE); - - s = splbio(); - vp->v_numoutput += 2; - mbp = pool_get(&bufpool, PR_WAITOK); - UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", - vp, mbp, vp->v_numoutput, bytes); - splx(s); - mbp->b_bufsize = npages << PAGE_SHIFT; - mbp->b_data = (void *)kva; - mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY|B_WRITE|B_AGE | - (async ? B_CALL|B_ASYNC : 0) | - (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0); - mbp->b_iodone = uvm_aio_biodone; - mbp->b_vp = NULL; - mbp->b_proc = NULL; /* XXXUBC */ - LIST_INIT(&mbp->b_dep); - bgetvp(vp, mbp); - - for (offset = origoffset; - bytes > 0; - offset += iobytes, bytes -= iobytes) { - iobytes = MIN(nmp->nm_wsize, bytes); - - /* - * skip writing any pages which only need a commit. - */ - - if ((pgs[(offset - origoffset) >> PAGE_SHIFT]->flags & - PG_NEEDCOMMIT) != 0) { - KASSERT((offset & (PAGE_SIZE - 1)) == 0); - iobytes = MIN(PAGE_SIZE, bytes); - skipbytes += iobytes; - continue; - } - - /* if it's really one i/o, don't make a second buf */ - if (offset == origoffset && iobytes == bytes) { - bp = mbp; - } else { - s = splbio(); - vp->v_numoutput++; - bp = pool_get(&bufpool, PR_WAITOK); - UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", - vp, bp, vp->v_numoutput, 0); - splx(s); - bp->b_data = (char *)kva + (offset - origoffset); - bp->b_resid = bp->b_bcount = iobytes; - bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC; - bp->b_iodone = uvm_aio_biodone1; - bp->b_vp = vp; - bp->b_proc = NULL; /* XXXUBC */ - LIST_INIT(&bp->b_dep); - } - bp->b_private = mbp; - bp->b_lblkno = bp->b_blkno = (daddr_t)(offset >> DEV_BSHIFT); - UVMHIST_LOG(ubchist, "bp %p numout %d", - bp, vp->v_numoutput,0,0); - VOP_STRATEGY(bp); - } - if (skipbytes) { - UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0); - s = splbio(); - mbp->b_resid -= skipbytes; - if (mbp->b_resid == 0) { - biodone(mbp); - } - splx(s); - } - if (async) { - return 0; - } - if (bp != NULL) { - error = biowait(mbp); - } - - s = splbio(); - if (mbp->b_vp) { - vwakeup(mbp->b_vp); - } - (void) buf_cleanout(mbp); - pool_put(&bufpool, mbp); - splx(s); - - uvm_pagermapout(kva, ap->a_count); - if (error || !v3) { - UVMHIST_LOG(ubchist, "returning error %d", error, 0,0,0); - return error; - } - - /* - * for a weak put, mark the range as "to be committed" - * and mark the pages read-only so that we will be notified - * to remove the pages from the "to be committed" range - * if they are made dirty again. - * for a strong put, commit the pages and remove them from the - * "to be committed" range. also, mark them as writable - * and not cleanable with just a commit. - */ - - lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p); - if (weak) { - nfs_add_tobecommitted_range(vp, origoffset, - npages << PAGE_SHIFT); - for (i = 0; i < npages; i++) { - pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY; - } - } else { - commitoff = origoffset; - commitbytes = npages << PAGE_SHIFT; -commit: - error = nfs_commit(vp, commitoff, commitbytes, curproc); - nfs_del_tobecommitted_range(vp, commitoff, commitbytes); -committed: - for (i = 0; i < npages; i++) { - pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY); - } - } - lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p); - return error; -} diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c index f0cebcb4566..d88a7649524 100644 --- a/sys/nfs/nfs_node.c +++ b/sys/nfs/nfs_node.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_node.c,v 1.18 2001/12/01 01:44:35 art Exp $ */ +/* $OpenBSD: nfs_node.c,v 1.19 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_node.c,v 1.16 1996/02/18 11:53:42 fvdl Exp $ */ /* @@ -145,7 +145,6 @@ loop: vp = nvp; np = pool_get(&nfs_node_pool, PR_WAITOK); bzero((caddr_t)np, sizeof *np); - lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0); vp->v_data = np; np->n_vnode = vp; @@ -170,19 +169,6 @@ loop: np->n_fhp = &np->n_fh; bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize); np->n_fhsize = fhsize; - - /* - * XXXUBC doing this while holding the nfs_hashlock is bad, - * but there's no alternative at the moment. - */ - error = VOP_GETATTR(vp, &np->n_vattr, curproc->p_ucred, curproc); - if (error) { - lockmgr(&nfs_hashlock, LK_RELEASE, 0, p); - vrele(vp); - return error; - } - uvm_vnp_setsize(vp, np->n_vattr.va_size); - lockmgr(&nfs_hashlock, LK_RELEASE, 0, p); *npp = np; return (0); @@ -199,12 +185,11 @@ nfs_inactive(v) struct nfsnode *np; struct sillyrename *sp; struct proc *p = curproc; /* XXX */ - struct vnode *vp = ap->a_vp; - np = VTONFS(vp); - if (prtactive && vp->v_usecount != 0) - vprint("nfs_inactive: pushing active", vp); - if (vp->v_type != VDIR) { + np = VTONFS(ap->a_vp); + if (prtactive && ap->a_vp->v_usecount != 0) + vprint("nfs_inactive: pushing active", ap->a_vp); + if (ap->a_vp->v_type != VDIR) { sp = np->n_sillyrename; np->n_sillyrename = (struct sillyrename *)0; } else @@ -213,7 +198,7 @@ nfs_inactive(v) /* * Remove the silly file that was rename'd earlier */ - (void) nfs_vinvalbuf(vp, 0, sp->s_cred, p, 1); + (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1); nfs_removeit(sp); crfree(sp->s_cred); vrele(sp->s_dvp); @@ -221,7 +206,7 @@ nfs_inactive(v) } np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT); - VOP_UNLOCK(vp, 0, ap->a_p); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c index 9534e7221da..a66f457ceeb 100644 --- a/sys/nfs/nfs_serv.c +++ b/sys/nfs/nfs_serv.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_serv.c,v 1.28 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: nfs_serv.c,v 1.29 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_serv.c,v 1.34 1997/05/12 23:37:12 fvdl Exp $ */ /* @@ -1663,6 +1663,8 @@ nfsrv_remove(nfsd, slp, procp, mrq) error = EBUSY; goto out; } + if (vp->v_flag & VTEXT) + uvm_vnp_uncache(vp); out: if (!error) { error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); @@ -3274,10 +3276,11 @@ nfsrv_access(vp, flags, cred, rdonly, p, override) } } /* - * If the vnode is in use as a process's text, - * we can't allow writing. + * If there's shared text associated with + * the inode, try to free it up once. If + * we fail, we can't allow writing. */ - if ((vp->v_flag & VTEXT)) + if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp)) return (ETXTBSY); } error = VOP_ACCESS(vp, flags, cred, p); diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index efee3069743..38a91d45245 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_subs.c,v 1.37 2001/12/10 02:19:34 art Exp $ */ +/* $OpenBSD: nfs_subs.c,v 1.38 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_subs.c,v 1.27.4.3 1996/07/08 20:34:24 jtc Exp $ */ /* @@ -39,40 +39,6 @@ * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 */ -/* - * Copyright 2000 Wasabi Systems, Inc. - * All rights reserved. - * - * Written by Frank van der Linden for Wasabi Systems, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed for the NetBSD Project by - * Wasabi Systems, Inc. - * 4. The name of Wasabi Systems, Inc. may not be used to endorse - * or promote products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ /* * These functions support the macros and help fiddle mbuf chains for @@ -1275,14 +1241,17 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) vap->va_filerev = 0; } if (vap->va_size != np->n_size) { - if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) { - vap->va_size = np->n_size; - } else { + if (vap->va_type == VREG) { + if (np->n_flag & NMODIFIED) { + if (vap->va_size < np->n_size) + vap->va_size = np->n_size; + else + np->n_size = vap->va_size; + } else + np->n_size = vap->va_size; + uvm_vnp_setsize(vp, np->n_size); + } else np->n_size = vap->va_size; - if (vap->va_type == VREG) { - uvm_vnp_setsize(vp, np->n_size); - } - } } np->n_attrstamp = time.tv_sec; if (vaper != NULL) { @@ -1772,216 +1741,26 @@ void nfs_clearcommit(mp) struct mount *mp; { - struct vnode *vp; - struct vm_page *pg; - struct nfsnode *np; + register struct vnode *vp, *nvp; + register struct buf *bp, *nbp; int s; s = splbio(); - LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - if (vp->v_type == VNON) - continue; - np = VTONFS(vp); - np->n_pushlo = np->n_pushhi = np->n_pushedlo = - np->n_pushedhi = 0; - np->n_commitflags &= - ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID); - simple_lock(&vp->v_uobj.vmobjlock); - TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) { - pg->flags &= ~PG_NEEDCOMMIT; +loop: + for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { + if (vp->v_mount != mp) /* Paranoia */ + goto loop; + nvp = vp->v_mntvnodes.le_next; + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) + == (B_DELWRI | B_NEEDCOMMIT)) + bp->b_flags &= ~B_NEEDCOMMIT; } - simple_unlock(&vp->v_uobj.vmobjlock); } splx(s); } -void -nfs_merge_commit_ranges(vp) - struct vnode *vp; -{ - struct nfsnode *np = VTONFS(vp); - - if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) { - np->n_pushedlo = np->n_pushlo; - np->n_pushedhi = np->n_pushhi; - np->n_commitflags |= NFS_COMMIT_PUSHED_VALID; - } else { - if (np->n_pushlo < np->n_pushedlo) - np->n_pushedlo = np->n_pushlo; - if (np->n_pushhi > np->n_pushedhi) - np->n_pushedhi = np->n_pushhi; - } - - np->n_pushlo = np->n_pushhi = 0; - np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID; - -#ifdef fvdl_debug - printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo, - (unsigned)np->n_pushedhi); -#endif -} - -int -nfs_in_committed_range(vp, off, len) - struct vnode *vp; - off_t off, len; -{ - struct nfsnode *np = VTONFS(vp); - off_t lo, hi; - - if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) - return 0; - lo = off; - hi = lo + len; - - return (lo >= np->n_pushedlo && hi <= np->n_pushedhi); -} - -int -nfs_in_tobecommitted_range(vp, off, len) - struct vnode *vp; - off_t off, len; -{ - struct nfsnode *np = VTONFS(vp); - off_t lo, hi; - - if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) - return 0; - lo = off; - hi = lo + len; - - return (lo >= np->n_pushlo && hi <= np->n_pushhi); -} - -void -nfs_add_committed_range(vp, off, len) - struct vnode *vp; - off_t off, len; -{ - struct nfsnode *np = VTONFS(vp); - off_t lo, hi; - - lo = off; - hi = lo + len; - - if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) { - np->n_pushedlo = lo; - np->n_pushedhi = hi; - np->n_commitflags |= NFS_COMMIT_PUSHED_VALID; - } else { - if (hi > np->n_pushedhi) - np->n_pushedhi = hi; - if (lo < np->n_pushedlo) - np->n_pushedlo = lo; - } -#ifdef fvdl_debug - printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo, - (unsigned)np->n_pushedhi); -#endif -} - -void -nfs_del_committed_range(vp, off, len) - struct vnode *vp; - off_t off, len; -{ - struct nfsnode *np = VTONFS(vp); - off_t lo, hi; - - if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) - return; - - lo = off; - hi = lo + len; - - if (lo > np->n_pushedhi || hi < np->n_pushedlo) - return; - if (lo <= np->n_pushedlo) - np->n_pushedlo = hi; - else if (hi >= np->n_pushedhi) - np->n_pushedhi = lo; - else { - /* - * XXX There's only one range. If the deleted range - * is in the middle, pick the largest of the - * contiguous ranges that it leaves. - */ - if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi)) - np->n_pushedhi = lo; - else - np->n_pushedlo = hi; - } -#ifdef fvdl_debug - printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo, - (unsigned)np->n_pushedhi); -#endif -} - -void -nfs_add_tobecommitted_range(vp, off, len) - struct vnode *vp; - off_t off, len; -{ - struct nfsnode *np = VTONFS(vp); - off_t lo, hi; - - lo = off; - hi = lo + len; - - if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) { - np->n_pushlo = lo; - np->n_pushhi = hi; - np->n_commitflags |= NFS_COMMIT_PUSH_VALID; - } else { - if (lo < np->n_pushlo) - np->n_pushlo = lo; - if (hi > np->n_pushhi) - np->n_pushhi = hi; - } -#ifdef fvdl_debug - printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo, - (unsigned)np->n_pushhi); -#endif -} - -void -nfs_del_tobecommitted_range(vp, off, len) - struct vnode *vp; - off_t off, len; -{ - struct nfsnode *np = VTONFS(vp); - off_t lo, hi; - - if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) - return; - - lo = off; - hi = lo + len; - - if (lo > np->n_pushhi || hi < np->n_pushlo) - return; - - if (lo <= np->n_pushlo) - np->n_pushlo = hi; - else if (hi >= np->n_pushhi) - np->n_pushhi = lo; - else { - /* - * XXX There's only one range. If the deleted range - * is in the middle, pick the largest of the - * contiguous ranges that it leaves. - */ - if ((np->n_pushlo - lo) > (hi - np->n_pushhi)) - np->n_pushhi = lo; - else - np->n_pushlo = hi; - } -#ifdef fvdl_debug - printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo, - (unsigned)np->n_pushhi); -#endif -} - /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c index 5a189ba344d..87c1618a4a4 100644 --- a/sys/nfs/nfs_syscalls.c +++ b/sys/nfs/nfs_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_syscalls.c,v 1.21 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: nfs_syscalls.c,v 1.22 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $ */ /* @@ -913,9 +913,10 @@ int nfssvc_iod(p) struct proc *p; { - struct buf *bp; - int i, myiod; - int error = 0; + register struct buf *bp, *nbp; + register int i, myiod; + struct vnode *vp; + int error = 0, s; /* * Assign my position or return error if too many already running @@ -943,7 +944,39 @@ nfssvc_iod(p) while ((bp = nfs_bufq.tqh_first) != NULL) { /* Take one off the front of the list */ TAILQ_REMOVE(&nfs_bufq, bp, b_freelist); - (void) nfs_doio(bp, NULL); + if (bp->b_flags & B_READ) + (void) nfs_doio(bp, NULL); + else do { + /* + * Look for a delayed write for the same vnode, so I can do + * it now. We must grab it before calling nfs_doio() to + * avoid any risk of the vnode getting vclean()'d while + * we are doing the write rpc. + */ + vp = bp->b_vp; + s = splbio(); + for (nbp = vp->v_dirtyblkhd.lh_first; nbp; + nbp = nbp->b_vnbufs.le_next) { + if ((nbp->b_flags & + (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI) + continue; + bremfree(nbp); + nbp->b_flags |= (B_BUSY|B_ASYNC); + break; + } + /* + * For the delayed write, do the first part of nfs_bwrite() + * up to, but not including nfs_strategy(). + */ + if (nbp) { + nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR); + buf_undirty(bp); + nbp->b_vp->v_numoutput++; + } + splx(s); + + (void) nfs_doio(bp, NULL); + } while ((bp = nbp) != NULL); } if (error) { PRELE(p); diff --git a/sys/nfs/nfs_var.h b/sys/nfs/nfs_var.h index 71985e581a8..bf2c5376815 100644 --- a/sys/nfs/nfs_var.h +++ b/sys/nfs/nfs_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_var.h,v 1.16 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: nfs_var.h,v 1.17 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_var.h,v 1.3 1996/02/18 11:53:54 fvdl Exp $ */ /* @@ -119,7 +119,7 @@ int nfs_sillyrename __P((struct vnode *, struct vnode *, struct componentname *)); int nfs_lookitup __P((struct vnode *, char *, int, struct ucred *, struct proc *, struct nfsnode **)); -int nfs_commit __P((struct vnode *, u_quad_t, unsigned, struct proc *)); +int nfs_commit __P((struct vnode *, u_quad_t, int, struct proc *)); int nfs_bmap __P((void *)); int nfs_strategy __P((void *)); int nfs_mmap __P((void *)); @@ -134,6 +134,7 @@ int nfs_vfree __P((void *)); int nfs_truncate __P((void *)); int nfs_update __P((void *)); int nfs_bwrite __P((void *)); +int nfs_writebp __P((struct buf *, int)); int nfsspec_access __P((void *)); int nfsspec_read __P((void *)); int nfsspec_write __P((void *)); @@ -257,16 +258,7 @@ void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *, int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *, struct nfssvc_sock *, struct mbuf *, int *, int)); int netaddr_match __P((int, union nethostaddr *, struct mbuf *)); - void nfs_clearcommit __P((struct mount *)); -void nfs_merge_commit_ranges __P((struct vnode *)); -int nfs_in_committed_range __P((struct vnode *, off_t, off_t)); -int nfs_in_tobecommitted_range __P((struct vnode *, off_t, off_t)); -void nfs_add_committed_range __P((struct vnode *, off_t, off_t)); -void nfs_del_committed_range __P((struct vnode *, off_t, off_t)); -void nfs_add_tobecommitted_range __P((struct vnode *, off_t, off_t)); -void nfs_del_tobecommitted_range __P((struct vnode *, off_t, off_t)); - int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrvw_sort __P((gid_t *, int)); void nfsrv_setcred __P((struct ucred *, struct ucred *)); diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c index 069783e6bf9..4b7733156c8 100644 --- a/sys/nfs/nfs_vfsops.c +++ b/sys/nfs/nfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_vfsops.c,v 1.41 2001/12/11 09:32:46 art Exp $ */ +/* $OpenBSD: nfs_vfsops.c,v 1.42 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_vfsops.c,v 1.46.4.1 1996/05/25 22:40:35 fvdl Exp $ */ /* @@ -748,8 +748,6 @@ mountnfs(argp, mp, nam, pth, hst) * point. */ mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA; - mp->mnt_fs_bshift = DEV_BSHIFT; - mp->mnt_dev_bshift = DEV_BSHIFT; return (0); bad: @@ -858,9 +856,8 @@ loop: */ if (vp->v_mount != mp) goto loop; - if (waitfor == MNT_LAZY || - (LIST_EMPTY(&vp->v_dirtyblkhd) && - vp->v_uobj.uo_npages == 0)) + if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL || + waitfor == MNT_LAZY) continue; if (vget(vp, LK_EXCLUSIVE, p)) goto loop; diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c index 1af7a6bd1d4..44cceab8a1f 100644 --- a/sys/nfs/nfs_vnops.c +++ b/sys/nfs/nfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_vnops.c,v 1.44 2001/12/11 09:32:46 art Exp $ */ +/* $OpenBSD: nfs_vnops.c,v 1.45 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $ */ /* @@ -126,10 +126,7 @@ struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { { &vop_advlock_desc, nfs_advlock }, /* advlock */ { &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */ { &vop_bwrite_desc, nfs_bwrite }, - { &vop_getpages_desc, nfs_getpages }, /* getpages */ - { &vop_putpages_desc, nfs_putpages }, /* putpages */ - { &vop_mmap_desc, vop_generic_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc nfsv2_vnodeop_opv_desc = { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; @@ -154,7 +151,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = { { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ { &vop_revoke_desc, spec_revoke }, /* revoke */ - { &vop_fsync_desc, spec_fsync }, /* fsync */ + { &vop_fsync_desc, nfs_fsync }, /* fsync */ { &vop_remove_desc, spec_remove }, /* remove */ { &vop_link_desc, spec_link }, /* link */ { &vop_rename_desc, spec_rename }, /* rename */ @@ -176,8 +173,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = { { &vop_advlock_desc, spec_advlock }, /* advlock */ { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */ { &vop_bwrite_desc, vop_generic_bwrite }, - { &vop_mmap_desc, spec_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries }; @@ -222,8 +218,7 @@ struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = { { &vop_advlock_desc, fifo_advlock }, /* advlock */ { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */ { &vop_bwrite_desc, vop_generic_bwrite }, - { &vop_mmap_desc, fifo_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries }; @@ -378,30 +373,11 @@ nfs_open(v) return (EACCES); } - /* - * Initialize read and write creds here, for swapfiles - * and other paths that don't set the creds themselves. - */ - - if (ap->a_mode & FREAD) { - if (np->n_rcred) { - crfree(np->n_rcred); - } - np->n_rcred = ap->a_cred; - crhold(np->n_rcred); - } - if (ap->a_mode & FWRITE) { - if (np->n_wcred) { - crfree(np->n_wcred); - } - np->n_wcred = ap->a_cred; - crhold(np->n_wcred); - } - if (np->n_flag & NMODIFIED) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) return (error); + uvm_vnp_uncache(vp); np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; @@ -419,6 +395,7 @@ nfs_open(v) if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) return (error); + uvm_vnp_uncache(vp); np->n_mtime = vattr.va_mtime.tv_sec; } } @@ -2534,7 +2511,7 @@ int nfs_commit(vp, offset, cnt, procp) struct vnode *vp; u_quad_t offset; - unsigned cnt; + int cnt; struct proc *procp; { caddr_t cp; @@ -2589,7 +2566,7 @@ nfs_bmap(v) daddr_t *a_bnp; int *a_runp; } */ *ap = v; - struct vnode *vp = ap->a_vp; + register struct vnode *vp = ap->a_vp; if (ap->a_vpp != NULL) *ap->a_vpp = vp; @@ -2649,7 +2626,9 @@ nfs_fsync(v) } /* - * Flush all the data associated with a vnode. + * Flush all the blocks associated with a vnode. + * Walk through the buffer pool and push any dirty pages + * associated with the vnode. */ int nfs_flush(vp, cred, waitfor, p, commit) @@ -2659,19 +2638,154 @@ nfs_flush(vp, cred, waitfor, p, commit) struct proc *p; int commit; { - struct uvm_object *uobj = &vp->v_uobj; struct nfsnode *np = VTONFS(vp); - int error; - int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO; - int rv; + struct buf *bp; + int i; + struct buf *nbp; + struct nfsmount *nmp = VFSTONFS(vp->v_mount); + int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; + int passone = 1; + u_quad_t off = (u_quad_t)-1, endoff = 0, toff; +#ifndef NFS_COMMITBVECSIZ +#define NFS_COMMITBVECSIZ 20 +#endif + struct buf *bvec[NFS_COMMITBVECSIZ]; - error = 0; + if (nmp->nm_flag & NFSMNT_INT) + slpflag = PCATCH; + if (!commit) + passone = 0; + /* + * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the + * server, but nas not been committed to stable storage on the server + * yet. On the first pass, the byte range is worked out and the commit + * rpc is done. On the second pass, nfs_writebp() is called to do the + * job. + */ +again: + bvecpos = 0; + if (NFS_ISV3(vp) && commit) { + s = splbio(); + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if (bvecpos >= NFS_COMMITBVECSIZ) + break; + if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) + != (B_DELWRI | B_NEEDCOMMIT)) + continue; + bremfree(bp); + bp->b_flags |= (B_BUSY | B_WRITEINPROG); + /* + * A list of these buffers is kept so that the + * second loop knows which buffers have actually + * been committed. This is necessary, since there + * may be a race between the commit rpc and new + * uncommitted writes on the file. + */ + bvec[bvecpos++] = bp; + toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + + bp->b_dirtyoff; + if (toff < off) + off = toff; + toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); + if (toff > endoff) + endoff = toff; + } + splx(s); + } + if (bvecpos > 0) { + /* + * Commit data on the server, as required. + */ + retv = nfs_commit(vp, off, (int)(endoff - off), p); + if (retv == NFSERR_STALEWRITEVERF) + nfs_clearcommit(vp->v_mount); + /* + * Now, either mark the blocks I/O done or mark the + * blocks dirty, depending on whether the commit + * succeeded. + */ + for (i = 0; i < bvecpos; i++) { + bp = bvec[i]; + bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG); + if (retv) + brelse(bp); + else { + s = splbio(); + buf_undirty(bp); + vp->v_numoutput++; + bp->b_flags |= B_ASYNC; + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR); + bp->b_dirtyoff = bp->b_dirtyend = 0; + splx(s); + biodone(bp); + } + } + } - simple_lock(&uobj->vmobjlock); - rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags); - simple_unlock(&uobj->vmobjlock); - if (!rv) { - error = EIO; + /* + * Start/do any write(s) that are required. + */ +loop: + s = splbio(); + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if (bp->b_flags & B_BUSY) { + if (waitfor != MNT_WAIT || passone) + continue; + bp->b_flags |= B_WANTED; + error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), + "nfsfsync", slptimeo); + splx(s); + if (error) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return (EINTR); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + } + goto loop; + } + if ((bp->b_flags & B_DELWRI) == 0) + panic("nfs_fsync: not dirty"); + if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) + continue; + bremfree(bp); + if (passone || !commit) + bp->b_flags |= (B_BUSY|B_ASYNC); + else + bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT); + splx(s); + VOP_BWRITE(bp); + goto loop; + } + splx(s); + if (passone) { + passone = 0; + goto again; + } + if (waitfor == MNT_WAIT) { + loop2: + s = splbio(); + error = vwaitforio(vp, slpflag, "nfs_fsync", slptimeo); + splx(s); + if (error) { + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) + return (EINTR); + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } + goto loop2; + } + + if (vp->v_dirtyblkhd.lh_first && commit) { +#if 0 + vprint("nfs_fsync: dirty", vp); +#endif + goto loop; + } } if (np->n_flag & NWRITEERR) { error = np->n_error; @@ -2746,7 +2860,7 @@ nfs_print(v) } /* - * Just call bwrite(). + * Just call nfs_writebp() with the force argument set to 1. */ int nfs_bwrite(v) @@ -2756,7 +2870,76 @@ nfs_bwrite(v) struct buf *a_bp; } */ *ap = v; - return (bwrite(ap->a_bp)); + return (nfs_writebp(ap->a_bp, 1)); +} + +/* + * This is a clone of vop_generic_bwrite(), except that B_WRITEINPROG isn't set unless + * the force flag is one and it also handles the B_NEEDCOMMIT flag. + */ +int +nfs_writebp(bp, force) + register struct buf *bp; + int force; +{ + register int oldflags = bp->b_flags, retv = 1; + register struct proc *p = curproc; /* XXX */ + off_t off; + int s; + + if(!(bp->b_flags & B_BUSY)) + panic("bwrite: buffer is not busy???"); + +#ifdef fvdl_debug + printf("nfs_writebp(%x): vp %x voff %d vend %d doff %d dend %d\n", + bp, bp->b_vp, bp->b_validoff, bp->b_validend, bp->b_dirtyoff, + bp->b_dirtyend); +#endif + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR); + + s = splbio(); + buf_undirty(bp); + + if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p) + ++p->p_stats->p_ru.ru_oublock; + + bp->b_vp->v_numoutput++; + splx(s); + + /* + * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not + * an actual write will have to be scheduled via. VOP_STRATEGY(). + * If B_WRITEINPROG is already set, then push it with a write anyhow. + */ + if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) { + off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; + bp->b_flags |= B_WRITEINPROG; + retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, + bp->b_proc); + bp->b_flags &= ~B_WRITEINPROG; + if (!retv) { + bp->b_dirtyoff = bp->b_dirtyend = 0; + bp->b_flags &= ~B_NEEDCOMMIT; + biodone(bp); + } else if (retv == NFSERR_STALEWRITEVERF) + nfs_clearcommit(bp->b_vp->v_mount); + } + if (retv) { + if (force) + bp->b_flags |= B_WRITEINPROG; + VOP_STRATEGY(bp); + } + + if( (oldflags & B_ASYNC) == 0) { + int rtval = biowait(bp); + if (!(oldflags & B_DELWRI) && p) { + ++p->p_stats->p_ru.ru_oublock; + } + brelse(bp); + return (rtval); + } + + return (0); } /* diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h index 42aaddfa637..17c02979154 100644 --- a/sys/nfs/nfsnode.h +++ b/sys/nfs/nfsnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nfsnode.h,v 1.12 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: nfsnode.h,v 1.13 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $ */ /* @@ -119,20 +119,8 @@ struct nfsnode { nfsfh_t n_fh; /* Small File Handle */ struct ucred *n_rcred; struct ucred *n_wcred; - off_t n_pushedlo; /* 1st blk in commited range */ - off_t n_pushedhi; /* Last block in range */ - off_t n_pushlo; /* 1st block in commit range */ - off_t n_pushhi; /* Last block in range */ - struct lock n_commitlock; /* Serialize commits XXX */ - int n_commitflags; }; -/* - * Values for n_commitflags - */ -#define NFS_COMMIT_PUSH_VALID 0x0001 /* push range valid */ -#define NFS_COMMIT_PUSHED_VALID 0x0002 /* pushed range valid */ - #define n_atim n_un1.nf_atim #define n_mtim n_un2.nf_mtim #define n_sillyrename n_un3.nf_silly @@ -211,8 +199,6 @@ int nfs_bwrite __P((void *)); int nfs_vget __P((struct mount *, ino_t, struct vnode **)); #define nfs_reallocblks \ ((int (*) __P((void *)))eopnotsupp) -int nfs_getpages __P((void *)); -int nfs_putpages __P((void *)); /* other stuff */ int nfs_removeit __P((struct sillyrename *)); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index bf752691c3c..ede1b021c65 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: buf.h,v 1.36 2001/11/30 05:45:33 csapuntz Exp $ */ +/* $OpenBSD: buf.h,v 1.37 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */ /* @@ -68,7 +68,6 @@ extern struct bio_ops { void (*io_deallocate) __P((struct buf *)); void (*io_movedeps) __P((struct buf *, struct buf *)); int (*io_countdeps) __P((struct buf *, int, int)); - void (*io_pageiodone) __P((struct buf *)); } bioops; /* @@ -97,7 +96,10 @@ struct buf { /* Function to call upon completion. */ void (*b_iodone) __P((struct buf *)); struct vnode *b_vp; /* Device vnode. */ - void *b_private; + int b_dirtyoff; /* Offset in buffer of dirty region. */ + int b_dirtyend; /* Offset of end of dirty region. */ + int b_validoff; /* Offset in buffer of valid region. */ + int b_validend; /* Offset of end of valid region. */ struct workhead b_dep; /* List of filesystem dependencies. */ }; @@ -118,6 +120,7 @@ struct buf { * These flags are kept in b_flags. */ #define B_AGE 0x00000001 /* Move to age queue when I/O done. */ +#define B_NEEDCOMMIT 0x00000002 /* Needs committing to stable storage */ #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ #define B_BAD 0x00000008 /* Bad block revectoring in progress. */ #define B_BUSY 0x00000010 /* I/O in progress. */ @@ -141,6 +144,7 @@ struct buf { #define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */ #define B_WANTED 0x00800000 /* Process wants this buffer. */ #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ +#define B_WRITEINPROG 0x01000000 /* Write in progress. */ #define B_XXX 0x02000000 /* Debugging flag. */ #define B_DEFERRED 0x04000000 /* Skipped over for cleaning */ #define B_SCANNED 0x08000000 /* Block already pushed during sync */ @@ -199,6 +203,8 @@ void biodone __P((struct buf *)); int biowait __P((struct buf *)); int bread __P((struct vnode *, daddr_t, int, struct ucred *, struct buf **)); +int breada __P((struct vnode *, daddr_t, int, daddr_t, int, + struct ucred *, struct buf **)); int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, struct ucred *, struct buf **)); void brelse __P((struct buf *)); @@ -266,8 +272,6 @@ int cluster_read __P((struct vnode *, struct cluster_info *, u_quad_t, daddr_t, long, struct ucred *, struct buf **)); void cluster_write __P((struct buf *, struct cluster_info *, u_quad_t)); -int buf_cleanout(struct buf *bp); - __END_DECLS #endif #endif /* !_SYS_BUF_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 50f59e4a532..6709ef88a7b 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mount.h,v 1.41 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: mount.h,v 1.42 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */ /* @@ -336,8 +336,6 @@ struct mount { struct lock mnt_lock; /* mount structure lock */ int mnt_flag; /* flags */ int mnt_maxsymlinklen; /* max size of short symlink */ - int mnt_fs_bshift; /* offset shift for lblkno */ - int mnt_dev_bshift; /* shift for device sectors */ struct statfs mnt_stat; /* cache of filesystem stats */ qaddr_t mnt_data; /* private data */ }; diff --git a/sys/sys/param.h b/sys/sys/param.h index 998594a680a..0dbb101ed1b 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.44 2001/12/10 03:03:10 art Exp $ */ +/* $OpenBSD: param.h,v 1.45 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $ */ /*- @@ -227,17 +227,3 @@ #define RFCNAMEG (1<<10) /* UNIMPL zero plan9 `name space' */ #define RFCENVG (1<<11) /* UNIMPL zero plan9 `env space' */ #define RFCFDG (1<<12) /* zero fd table */ - -#ifdef _KERNEL -/* - * Defaults for Unified Buffer Cache parameters. - * May be overridden in <machine/param.h> - */ - -#ifndef UBC_WINSHIFT -#define UBC_WINSHIFT 13 -#endif -#ifndef UBC_NWINS -#define UBC_NWINS 1024 -#endif -#endif /* _KERNEL */ diff --git a/sys/sys/specdev.h b/sys/sys/specdev.h index 51fb9564c51..bdd2008545f 100644 --- a/sys/sys/specdev.h +++ b/sys/sys/specdev.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specdev.h,v 1.11 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: specdev.h,v 1.12 2001/12/19 08:58:06 art Exp $ */ /* $NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $ */ /* @@ -121,4 +121,3 @@ int spec_advlock __P((void *)); #define spec_reallocblks spec_badop #define spec_bwrite vop_generic_bwrite #define spec_revoke vop_generic_revoke -#define spec_mmap spec_badop diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index e97fa77b696..051ddaee942 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode.h,v 1.46 2001/12/10 04:45:31 art Exp $ */ +/* $OpenBSD: vnode.h,v 1.47 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */ /* @@ -45,6 +45,7 @@ #include <uvm/uvm_pglist.h> /* XXX */ #include <sys/lock.h> /* XXX */ #include <uvm/uvm.h> /* XXX */ +#include <uvm/uvm_vnode.h> /* XXX */ /* * The vnode is the focus of all file activity in UNIX. There is a @@ -86,14 +87,11 @@ LIST_HEAD(buflists, buf); */ struct vnode { - struct uvm_object v_uobj; /* the VM object */ -#define v_usecount v_uobj.uo_refs -#define v_interlock v_uobj.vmobjlock - voff_t v_size; - int v_flag; - int v_numoutput; + struct uvm_vnode v_uvm; /* uvm data */ int (**v_op) __P((void *)); /* vnode operations vector */ enum vtype v_type; /* vnode type */ + u_int v_flag; /* vnode flags (see below) */ + u_int v_usecount; /* reference count of users */ /* reference count of writers */ u_int v_writecount; /* Flags that can be read/written in interrupts */ @@ -105,6 +103,7 @@ struct vnode { LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ + u_int v_numoutput; /* num of writes in progress */ LIST_ENTRY(vnode) v_synclist; /* vnode with dirty buffers */ union { struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ @@ -113,6 +112,7 @@ struct vnode { struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */ } v_un; + struct simplelock v_interlock; /* lock on usecount and flag */ struct lock *v_vnlock; /* used for non-locking fs's */ enum vtagtype v_tag; /* type of underlying data */ void *v_data; /* private data for fs */ @@ -137,9 +137,6 @@ struct vnode { #define VXWANT 0x0200 /* process is waiting for vnode */ #define VALIASED 0x0800 /* vnode has an alias */ #define VLOCKSWORK 0x4000 /* FS supports locking discipline */ -#define VDIRTY 0x8000 /* vnode possibly has dirty pages */ - -#define VSIZENOTSET ((voff_t)-1) /* * (v_bioflag) Flags that may be manipulated by interrupt handlers @@ -252,9 +249,6 @@ vref(vp) } #endif /* DIAGNOSTIC */ -void vhold __P((struct vnode *)); -void vholdrele __P((struct vnode *)); - #define NULLVP ((struct vnode *)NULL) /* @@ -451,7 +445,6 @@ int vop_generic_lock __P((void *)); int vop_generic_unlock __P((void *)); int vop_generic_revoke __P((void *)); int vop_generic_kqfilter __P((void *)); -int vop_generic_mmap __P((void *)); int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p)); diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h index d64945fa5a4..00cdadabe25 100644 --- a/sys/sys/vnode_if.h +++ b/sys/sys/vnode_if.h @@ -3,7 +3,7 @@ * (Modifications made here may easily be lost!) * * Created from the file: - * OpenBSD: vnode_if.src,v 1.17 2001/12/10 04:45:31 art Exp + * OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp * by the script: * OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp */ @@ -397,42 +397,6 @@ struct vop_whiteout_args { extern struct vnodeop_desc vop_whiteout_desc; int VOP_WHITEOUT __P((struct vnode *, struct componentname *, int)); -struct vop_getpages_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - voff_t a_offset; - struct vm_page **a_m; - int *a_count; - int a_centeridx; - vm_prot_t a_access_type; - int a_advice; - int a_flags; -}; -extern struct vnodeop_desc vop_getpages_desc; -int VOP_GETPAGES __P((struct vnode *, voff_t, struct vm_page **, int *, int, - vm_prot_t, int, int)); - -struct vop_putpages_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct vm_page **a_m; - int a_count; - int a_flags; - int *a_rtvals; -}; -extern struct vnodeop_desc vop_putpages_desc; -int VOP_PUTPAGES __P((struct vnode *, struct vm_page **, int, int, int *)); - -struct vop_mmap_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_fflags; - struct ucred *a_cred; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_mmap_desc; -int VOP_MMAP __P((struct vnode *, int, struct ucred *, struct proc *)); - /* Special cases: */ #include <sys/buf.h> diff --git a/sys/ufs/ext2fs/ext2fs_balloc.c b/sys/ufs/ext2fs/ext2fs_balloc.c index 390f02dc13f..eb2d7a6f414 100644 --- a/sys/ufs/ext2fs/ext2fs_balloc.c +++ b/sys/ufs/ext2fs/ext2fs_balloc.c @@ -1,4 +1,5 @@ -/* $NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $ */ +/* $OpenBSD: ext2fs_balloc.c,v 1.11 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: ext2fs_balloc.c,v 1.10 2001/07/04 21:16:01 chs Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -43,9 +44,8 @@ #include <sys/proc.h> #include <sys/file.h> #include <sys/vnode.h> -#include <sys/mount.h> -#include <uvm/uvm.h> +#include <uvm/uvm_extern.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> @@ -73,13 +73,8 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, u_int deallocated; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int unwindidx = -1; - UVMHIST_FUNC("ext2fs_buf_alloc"); UVMHIST_CALLED(ubchist); - UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); - - if (bpp != NULL) { - *bpp = NULL; - } + *bpp = NULL; if (bn < 0) return (EFBIG); fs = ip->i_e2fs; @@ -91,29 +86,20 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, if (bn < NDADDR) { nb = fs2h32(ip->i_e2fs_blocks[bn]); if (nb != 0) { - - /* - * the block is already allocated, just read it. - */ - - if (bpp != NULL) { - error = bread(vp, bn, fs->e2fs_bsize, NOCRED, - &bp); - if (error) { - brelse(bp); - return (error); - } - *bpp = bp; + error = bread(vp, bn, fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); } + *bpp = bp; return (0); } /* * allocate a new direct block. */ - error = ext2fs_alloc(ip, bn, - ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), + ext2fs_blkpref(ip, bn, (int)bn, &ip->i_e2fs_blocks[0]), cred, &newb); if (error) return (error); @@ -121,13 +107,11 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, ip->i_e2fs_last_blk = newb; ip->i_e2fs_blocks[bn] = h2fs32(newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (bpp != NULL) { - bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); - if (flags & B_CLRBUF) - clrbuf(bp); - *bpp = bp; - } + bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + if (flags & B_CLRBUF) + clrbuf(bp); + *bpp = bp; return (0); } /* @@ -245,30 +229,26 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, } else { bdwrite(bp); } - if (bpp != NULL) { - nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - if (flags & B_CLRBUF) - clrbuf(nbp); - *bpp = nbp; - } + nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) + clrbuf(nbp); + *bpp = nbp; return (0); } brelse(bp); - if (bpp != NULL) { - if (flags & B_CLRBUF) { - error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, - &nbp); - if (error) { - brelse(nbp); - goto fail; - } - } else { - nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) { + error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp); + if (error) { + brelse(nbp); + goto fail; } - *bpp = nbp; + } else { + nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); } + + *bpp = nbp; return (0); fail: /* @@ -312,142 +292,3 @@ fail: } return error; } - -int -ext2fs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, - struct ucred *cred) -{ - struct inode *ip = VTOI(vp); - struct m_ext2fs *fs = ip->i_e2fs; - int error, delta, bshift, bsize; - UVMHIST_FUNC("ext2fs_gop_alloc"); UVMHIST_CALLED(ubchist); - - bshift = fs->e2fs_bshift; - bsize = 1 << bshift; - - delta = off & (bsize - 1); - off -= delta; - len += delta; - - while (len > 0) { - bsize = min(bsize, len); - UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x", - off, len, bsize, 0); - - error = ext2fs_buf_alloc(ip, lblkno(fs, off), bsize, cred, - NULL, flags); - if (error) { - UVMHIST_LOG(ubchist, "error %d", error, 0,0,0); - return error; - } - - /* - * increase file size now, VOP_BALLOC() requires that - * EOF be up-to-date before each call. - */ - - if (ip->i_e2fs_size < off + bsize) { - UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", - ip->i_e2fs_size, off + bsize,0,0); - ip->i_e2fs_size = off + bsize; - if (vp->v_size < ip->i_e2fs_size) { - uvm_vnp_setsize(vp, ip->i_e2fs_size); - } - } - - off += bsize; - len -= bsize; - } - return 0; -} - -/* - * allocate a range of blocks in a file. - * after this function returns, any page entirely contained within the range - * will map to invalid data and thus must be overwritten before it is made - * accessible to others. - */ - -int -ext2fs_balloc_range(vp, off, len, cred, flags) - struct vnode *vp; - off_t off, len; - struct ucred *cred; - int flags; -{ - off_t oldeof, eof, pagestart; - struct uvm_object *uobj; - struct genfs_node *gp = VTOG(vp); - int i, delta, error, npages; - int bshift = vp->v_mount->mnt_fs_bshift; - int bsize = 1 << bshift; - int ppb = max(bsize >> PAGE_SHIFT, 1); - struct vm_page *pgs[ppb]; - UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist); - UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", - vp, off, len, vp->v_size); - - error = 0; - uobj = &vp->v_uobj; - oldeof = vp->v_size; - eof = max(oldeof, off + len); - UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0); - pgs[0] = NULL; - - /* - * cache the new range of the file. this will create zeroed pages - * where the new block will be and keep them locked until the - * new block is allocated, so there will be no window where - * the old contents of the new block is visible to racing threads. - */ - - pagestart = trunc_page(off) & ~(bsize - 1); - npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT); - memset(pgs, 0, npages); - simple_lock(&uobj->vmobjlock); - error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, - VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF); - if (error) { - UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0); - goto errout; - } - for (i = 0; i < npages; i++) { - UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); - KASSERT((pgs[i]->flags & PG_RELEASED) == 0); - pgs[i]->flags &= ~PG_CLEAN; - uvm_pageactivate(pgs[i]); - } - - /* - * adjust off to be block-aligned. - */ - - delta = off & (bsize - 1); - off -= delta; - len += delta; - - /* - * now allocate the range. - */ - - lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, curproc); - error = GOP_ALLOC(vp, off, len, flags, cred); - UVMHIST_LOG(ubchist, "alloc %d", error,0,0,0); - lockmgr(&gp->g_glock, LK_RELEASE, NULL, curproc); - - /* - * unbusy any pages we are holding. - */ - -errout: - simple_lock(&uobj->vmobjlock); - if (error) { - (void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb, - PGO_FREE); - } - if (pgs[0] != NULL) { - uvm_page_unbusy(pgs, npages); - } - simple_unlock(&uobj->vmobjlock); - return (error); -} diff --git a/sys/ufs/ext2fs/ext2fs_extern.h b/sys/ufs/ext2fs/ext2fs_extern.h index 5f5b2c3a47b..5063d34427a 100644 --- a/sys/ufs/ext2fs/ext2fs_extern.h +++ b/sys/ufs/ext2fs/ext2fs_extern.h @@ -1,5 +1,5 @@ -/* $OpenBSD: ext2fs_extern.h,v 1.12 2001/12/10 04:45:31 art Exp $ */ -/* $NetBSD: ext2fs_extern.h,v 1.9 2000/11/27 08:39:53 chs Exp $ */ +/* $OpenBSD: ext2fs_extern.h,v 1.13 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: ext2fs_extern.h,v 1.1 1997/06/11 09:33:55 bouyer Exp $ */ /*- * Copyright (c) 1997 Manuel Bouyer. @@ -74,9 +74,6 @@ int ext2fs_inode_free(struct inode *pip, ino_t ino, int mode); /* ext2fs_balloc.c */ int ext2fs_buf_alloc(struct inode *, daddr_t, int, struct ucred *, struct buf **, int); -int ext2fs_gop_alloc __P((struct vnode *, off_t, off_t, int, struct ucred *)); -int ext2fs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, - int)); /* ext2fs_bmap.c */ int ext2fs_bmap __P((void *)); diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c index f77c99c47b5..0e2a975e333 100644 --- a/sys/ufs/ext2fs/ext2fs_inode.c +++ b/sys/ufs/ext2fs/ext2fs_inode.c @@ -1,4 +1,5 @@ -/* $NetBSD: ext2fs_inode.c,v 1.23 2001/02/18 20:17:04 chs Exp $ */ +/* $OpenBSD: ext2fs_inode.c,v 1.19 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -58,10 +59,8 @@ #include <ufs/ext2fs/ext2fs.h> #include <ufs/ext2fs/ext2fs_extern.h> -extern int prtactive; - static int ext2fs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, - ufs_daddr_t, int, long *)); + ufs_daddr_t, int, long *)); /* * Last reference to an inode. If necessary, write or delete it. @@ -79,6 +78,7 @@ ext2fs_inactive(v) struct proc *p = ap->a_p; struct timespec ts; int error = 0; + extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ext2fs_inactive: pushing active", vp); @@ -171,13 +171,14 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { struct vnode *ovp = ITOV(oip); ufs_daddr_t lastblock; - ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR]; + ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct m_ext2fs *fs; + struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; int i; - int error, allerror; + int aflags, error, allerror; off_t osize; if (length < 0) @@ -218,8 +219,22 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) if (length > fs->fs_maxfilesize) return (EFBIG); #endif - ext2fs_balloc_range(ovp, length - 1, 1, cred, - flags & IO_SYNC ? B_SYNC : 0); + offset = blkoff(fs, length - 1); + lbn = lblkno(fs, length - 1); + aflags = B_CLRBUF; + if (flags & IO_SYNC) + aflags |= B_SYNC; + error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp, + aflags); + if (error) + return (error); + oip->i_e2fs_size = length; + uvm_vnp_setsize(ovp, length); + uvm_vnp_uncache(ovp); + if (aflags & B_SYNC) + bwrite(bp); + else + bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ext2fs_update(oip, NULL, NULL, 1)); } @@ -231,15 +246,28 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) * of subsequent file growth. */ offset = blkoff(fs, length); - if (offset != 0) { + if (offset == 0) { + oip->i_e2fs_size = length; + } else { + lbn = lblkno(fs, length); + aflags = B_CLRBUF; + if (flags & IO_SYNC) + aflags |= B_SYNC; + error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp, + aflags); + if (error) + return (error); + oip->i_e2fs_size = length; size = fs->e2fs_bsize; - - /* XXXUBC we should handle more than just VREG */ - uvm_vnp_zerorange(ovp, length, size - offset); + uvm_vnp_setsize(ovp, length); + uvm_vnp_uncache(ovp); + bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + allocbuf(bp, size); + if (aflags & B_SYNC) + bwrite(bp); + else + bawrite(bp); } - oip->i_e2fs_size = length; - uvm_vnp_setsize(ovp, length); - /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c index 03768d06b42..9ae4322756f 100644 --- a/sys/ufs/ext2fs/ext2fs_readwrite.c +++ b/sys/ufs/ext2fs/ext2fs_readwrite.c @@ -79,8 +79,6 @@ ext2fs_read(v) struct uio *uio; struct m_ext2fs *fs; struct buf *bp; - void *win; - vsize_t bytelen; ufs_daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; @@ -109,27 +107,6 @@ ext2fs_read(v) if (uio->uio_resid == 0) return (0); - if (vp->v_type == VREG) { - error = 0; - while (uio->uio_resid > 0) { - - bytelen = MIN(ip->i_e2fs_size - uio->uio_offset, - uio->uio_resid); - - if (bytelen == 0) { - break; - } - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, - &bytelen, UBC_READ); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) { - break; - } - } - goto out; - } - for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_e2fs_size - uio->uio_offset) <= 0) break; @@ -179,11 +156,8 @@ ext2fs_read(v) if (bp != NULL) brelse(bp); -out: if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { ip->i_flag |= IN_ACCESS; - if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) - error = ext2fs_update(ip, NULL, NULL, 1); } return (error); } @@ -209,17 +183,12 @@ ext2fs_write(v) struct proc *p; ufs_daddr_t lbn; off_t osize; - int blkoffset, error, flags, ioflag, resid, xfersize; - vsize_t bytelen; - void *win; - off_t oldoff; - boolean_t rv; + int blkoffset, error, flags, ioflag, resid, size, xfersize; ioflag = ap->a_ioflag; uio = ap->a_uio; vp = ap->a_vp; ip = VTOI(vp); - error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -263,65 +232,35 @@ ext2fs_write(v) resid = uio->uio_resid; osize = ip->i_e2fs_size; - - if (vp->v_type == VREG) { - while (uio->uio_resid > 0) { - oldoff = uio->uio_offset; - blkoffset = blkoff(fs, uio->uio_offset); - bytelen = MIN(fs->e2fs_bsize - blkoffset, - uio->uio_resid); - - /* - * XXXUBC if file is mapped and this is the last block, - * process one page at a time. - */ - - error = ext2fs_balloc_range(vp, uio->uio_offset, - bytelen, ap->a_cred, 0); - if (error) { - break; - } - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, - &bytelen, UBC_WRITE); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) { - break; - } - - /* - * flush what we just wrote if necessary. - * XXXUBC simplistic async flushing. - */ - - if (oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&vp->v_uobj.vmobjlock); - rv = vp->v_uobj.pgops->pgo_flush( - &vp->v_uobj, (oldoff >> 16) << 16, - (uio->uio_offset >> 16) << 16, PGO_CLEANIT); - simple_unlock(&vp->v_uobj.vmobjlock); - } - } - goto out; - } - flags = ioflag & IO_SYNC ? B_SYNC : 0; + for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); - xfersize = MIN(fs->e2fs_bsize - blkoffset, uio->uio_resid); - if (xfersize < fs->e2fs_bsize) + xfersize = fs->e2fs_bsize - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (fs->e2fs_bsize > xfersize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; + error = ext2fs_buf_alloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); + lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); if (error) break; - if (ip->i_e2fs_size < uio->uio_offset + xfersize) { + if (uio->uio_offset + xfersize > ip->i_e2fs_size) { ip->i_e2fs_size = uio->uio_offset + xfersize; + uvm_vnp_setsize(vp, ip->i_e2fs_size); } - error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); + uvm_vnp_uncache(vp); + + size = fs->e2fs_bsize - bp->b_resid; + if (size < xfersize) + xfersize = size; + + error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (ioflag & IO_SYNC) (void)bwrite(bp); else if (xfersize + blkoffset == fs->e2fs_bsize) { @@ -333,14 +272,13 @@ ext2fs_write(v) bdwrite(bp); if (error || xfersize == 0) break; + ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ -out: - ip->i_flag |= IN_CHANGE | IN_UPDATE; if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_e2fs_mode &= ~(ISUID | ISGID); if (error) { @@ -350,7 +288,8 @@ out: uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } - } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) + } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { error = ext2fs_update(ip, NULL, NULL, 1); + } return (error); } diff --git a/sys/ufs/ext2fs/ext2fs_subr.c b/sys/ufs/ext2fs/ext2fs_subr.c index 3263f7e5391..02d84be4302 100644 --- a/sys/ufs/ext2fs/ext2fs_subr.c +++ b/sys/ufs/ext2fs/ext2fs_subr.c @@ -1,4 +1,5 @@ -/* $NetBSD: ext2fs_subr.c,v 1.4 2000/03/30 12:41:11 augustss Exp $ */ +/* $OpenBSD: ext2fs_subr.c,v 1.8 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: ext2fs_subr.c,v 1.1 1997/06/11 09:34:03 bouyer Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -95,7 +96,7 @@ ext2fs_checkoverlap(bp, ip) if (ep == bp || (ep->b_flags & B_INVAL) || ep->b_vp == NULLVP) continue; - if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL)) + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL)) continue; if (vp != ip->i_devvp) continue; diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c index b77f4edec62..6f404d98157 100644 --- a/sys/ufs/ext2fs/ext2fs_vfsops.c +++ b/sys/ufs/ext2fs/ext2fs_vfsops.c @@ -1,5 +1,5 @@ -/* $OpenBSD: ext2fs_vfsops.c,v 1.19 2001/12/10 04:45:31 art Exp $ */ -/* $NetBSD: ext2fs_vfsops.c,v 1.40 2000/11/27 08:39:53 chs Exp $ */ +/* $OpenBSD: ext2fs_vfsops.c,v 1.20 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -100,11 +100,6 @@ struct vfsops ext2fs_vfsops = { ufs_check_export }; -struct genfs_ops ext2fs_genfsops = { - genfs_size, - ext2fs_gop_alloc, -}; - struct pool ext2fs_inode_pool; extern u_long ext2gennumber; @@ -407,11 +402,9 @@ ext2fs_reload(mountp, cred, p) * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - error = vinvalbuf(devvp, 0, cred, p, 0, 0); - VOP_UNLOCK(devvp, 0, p); - if (error) + if (vinvalbuf(devvp, 0, cred, p, 0, 0)) panic("ext2fs_reload: dirty1"); + /* * Step 2: re-read superblock from disk. */ @@ -590,18 +583,14 @@ ext2fs_mountfs(devvp, mp, p) mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; mp->mnt_flag |= MNT_LOCAL; - mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ - mp->mnt_fs_bshift = m_fs->e2fs_bshift; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = NINDIR(m_fs); - ump->um_lognindir = ffs(NINDIR(m_fs)) - 1; ump->um_bptrtodb = m_fs->e2fs_fsbtodb; ump->um_seqinc = 1; /* no frags */ devvp->v_specmountpoint = mp; return (0); - out: if (bp) brelse(bp); @@ -921,7 +910,6 @@ ext2fs_vget(mp, ino, vpp) /* * Finish inode initialization now that aliasing has been resolved. */ - genfs_node_init(vp, &ext2fs_genfsops); ip->i_devvp = ump->um_devvp; VREF(ip->i_devvp); /* @@ -936,7 +924,6 @@ ext2fs_vget(mp, ino, vpp) ip->i_flag |= IN_MODIFIED; } - vp->v_size = ip->i_e2fs_size; *vpp = vp; return (0); } diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c index 6e82f66279a..d85d4eba5d9 100644 --- a/sys/ufs/ext2fs/ext2fs_vnops.c +++ b/sys/ufs/ext2fs/ext2fs_vnops.c @@ -1,5 +1,5 @@ -/* $OpenBSD: ext2fs_vnops.c,v 1.20 2001/12/10 04:45:31 art Exp $ */ -/* $NetBSD: ext2fs_vnops.c,v 1.30 2000/11/27 08:39:53 chs Exp $ */ +/* $OpenBSD: ext2fs_vnops.c,v 1.21 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: ext2fs_vnops.c,v 1.1 1997/06/11 09:34:09 bouyer Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -402,6 +402,8 @@ ext2fs_chmod(vp, mode, cred, p) ip->i_e2fs_mode &= ~ALLPERMS; ip->i_e2fs_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; + if ((vp->v_flag & VTEXT) && (ip->i_e2fs_mode & S_ISTXT) == 0) + (void) uvm_vnp_uncache(vp); return (0); } @@ -1467,10 +1469,7 @@ struct vnodeopv_entry_desc ext2fs_vnodeop_entries[] = { { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ { &vop_advlock_desc, ext2fs_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */ - { &vop_getpages_desc, genfs_getpages }, - { &vop_putpages_desc, genfs_putpages }, - { &vop_mmap_desc, ufs_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL } }; struct vnodeopv_desc ext2fs_vnodeop_opv_desc = { &ext2fs_vnodeop_p, ext2fs_vnodeop_entries }; @@ -1513,8 +1512,7 @@ struct vnodeopv_entry_desc ext2fs_specop_entries[] = { { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ { &vop_advlock_desc, spec_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */ - { &vop_mmap_desc, spec_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc ext2fs_specop_opv_desc = { &ext2fs_specop_p, ext2fs_specop_entries }; @@ -1558,8 +1556,7 @@ struct vnodeopv_entry_desc ext2fs_fifoop_entries[] = { { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ { &vop_advlock_desc, fifo_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */ - { &vop_mmap_desc, fifo_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc ext2fs_fifoop_opv_desc = { &ext2fs_fifoop_p, ext2fs_fifoop_entries }; diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 92b4d993c2d..c42897ac4d4 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_alloc.c,v 1.37 2001/11/30 16:37:57 art Exp $ */ +/* $OpenBSD: ffs_alloc.c,v 1.38 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */ /* @@ -169,7 +169,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop) struct buf **bpp; ufs_daddr_t *blknop; { - struct fs *fs; + register struct fs *fs; struct buf *bp = NULL; ufs_daddr_t quota_updated = 0; int cg, request, error; @@ -177,7 +177,6 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop) if (bpp != NULL) *bpp = NULL; - fs = ip->i_fs; #ifdef DIAGNOSTIC if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || @@ -283,6 +282,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop) if (bno <= 0) goto nospace; + (void) uvm_vnp_uncache(ITOV(ip)); if (!DOINGSOFTDEP(ITOV(ip))) ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) @@ -362,8 +362,7 @@ ffs_reallocblks(v) struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; int i, len, start_lvl, end_lvl, pref, ssize; - /* XXXUBC - don't reallocblks for now */ - if (1 || doreallocblks == 0) + if (doreallocblks == 0) return (ENOSPC); vp = ap->a_vp; diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index aa452edeabb..daf5b86082b 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_balloc.c,v 1.21 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ffs_balloc.c,v 1.22 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */ /* @@ -402,47 +402,3 @@ fail: return (error); } - -int -ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, - struct ucred *cred) -{ - struct inode *ip = VTOI(vp); - struct fs *fs = ip->i_fs; - int error, delta, bshift, bsize; - - error = 0; - bshift = fs->fs_bshift; - bsize = 1 << bshift; - - delta = off & (bsize - 1); - off -= delta; - len += delta; - - while (len > 0) { - bsize = MIN(bsize, len); - - error = ffs_balloc(ip, off, bsize, cred, flags, NULL); - if (error) { - goto out; - } - - /* - * increase file size now, VOP_BALLOC() requires that - * EOF be up-to-date before each call. - */ - - if (ip->i_ffs_size < off + bsize) { - ip->i_ffs_size = off + bsize; - if (vp->v_size < ip->i_ffs_size) { - uvm_vnp_setsize(vp, ip->i_ffs_size); - } - } - - off += bsize; - len -= bsize; - } - -out: - return error; - } diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 7aac0f33de9..15eb204efd3 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_extern.h,v 1.16 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ffs_extern.h,v 1.17 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */ /*- @@ -87,7 +87,6 @@ void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int)); /* ffs_balloc.c */ int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **); -int ffs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *); /* ffs_inode.c */ int ffs_init __P((struct vfsconf *)); @@ -129,7 +128,7 @@ int ffs_read __P((void *)); int ffs_write __P((void *)); int ffs_fsync __P((void *)); int ffs_reclaim __P((void *)); -void ffs_gop_size __P((struct vnode *, off_t, off_t *)); + /* * Soft dependency function prototypes. diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 3bec117a700..fecb1fbed77 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_inode.c,v 1.28 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ffs_inode.c,v 1.29 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ /* @@ -148,21 +148,21 @@ ffs_update(struct inode *ip, struct timespec *atime, int ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { - struct vnode *ovp = ITOV(oip); - struct genfs_node *gp = VTOG(ovp); + struct vnode *ovp; daddr_t lastblock; - daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR]; + daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct fs *fs; - struct proc *p = curproc; + struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; register int i; - int error, allerror; + int aflags, error, allerror; off_t osize; if (length < 0) return (EINVAL); + ovp = ITOV(oip); if (ovp->v_type != VREG && ovp->v_type != VDIR && @@ -188,55 +188,10 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) if ((error = getinoquota(oip)) != 0) return (error); - fs = oip->i_fs; - if (length > fs->fs_maxfilesize) - return (EFBIG); - osize = oip->i_ffs_size; + uvm_vnp_setsize(ovp, length); oip->i_ci.ci_lasta = oip->i_ci.ci_clen = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; - /* - * Lengthen the size of the file. We must ensure that the - * last byte of the file is allocated. Since the smallest - * value of osize is 0, length will be at least 1. - */ - - if (osize < length) { - ufs_balloc_range(ovp, length - 1, 1, cred, - flags & IO_SYNC ? B_SYNC : 0); - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (UFS_UPDATE(oip, 1)); - } - - /* - * When truncating a regular file down to a non-block-aligned size, - * we must zero the part of last block which is past the new EOF. - * We must synchronously flush the zeroed pages to disk - * since the new pages will be invalidated as soon as we - * inform the VM system of the new, smaller size. - * We must to this before acquiring the GLOCK, since fetching - * the pages will acquire the GLOCK internally. - * So there is a window where another thread could see a whole - * zeroed page past EOF, but that's life. - */ - - offset = blkoff(fs, length); - if (ovp->v_type == VREG && length < osize && offset != 0) { - struct uvm_object *uobj; - voff_t eoz; - - size = blksize(fs, oip, lblkno(fs, length)); - eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize); - uvm_vnp_zerorange(ovp, length, eoz - length); - uobj = &ovp->v_uobj; - simple_lock(&uobj->vmobjlock); - uobj->pgops->pgo_flush(uobj, length, eoz, - PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO); - simple_unlock(&uobj->vmobjlock); - } - - lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, p); - if (DOINGSOFTDEP(ovp)) { if (length > 0 || softdep_slowdown(ovp)) { /* @@ -249,29 +204,80 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) * so that it will have no data structures left. */ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, - curproc)) != 0) { - lockmgr(&gp->g_glock, LK_RELEASE, NULL, p); + curproc)) != 0) return (error); - } } else { - uvm_vnp_setsize(ovp, length); (void)ufs_quota_free_blocks(oip, oip->i_ffs_blocks, NOCRED); softdep_setup_freeblocks(oip, length); (void) vinvalbuf(ovp, 0, cred, curproc, 0, 0); - lockmgr(&gp->g_glock, LK_RELEASE, NULL, p); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(oip, 0)); } } + fs = oip->i_fs; + osize = oip->i_ffs_size; /* - * Reduce the size of the file. + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of osize is 0, length will be at least 1. */ - oip->i_ffs_size = length; + if (osize < length) { + if (length > fs->fs_maxfilesize) + return (EFBIG); + aflags = B_CLRBUF; + if (flags & IO_SYNC) + aflags |= B_SYNC; + error = UFS_BUF_ALLOC(oip, length - 1, 1, + cred, aflags, &bp); + if (error) + return (error); + oip->i_ffs_size = length; + uvm_vnp_setsize(ovp, length); + (void) uvm_vnp_uncache(ovp); + if (aflags & B_SYNC) + bwrite(bp); + else + bawrite(bp); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (UFS_UPDATE(oip, MNT_WAIT)); + } uvm_vnp_setsize(ovp, length); /* + * Shorten the size of the file. If the file is not being + * truncated to a block boundary, the contents of the + * partial block following the end of the file must be + * zero'ed in case it ever becomes accessible again because + * of subsequent file growth. Directories however are not + * zero'ed as they should grow back initialized to empty. + */ + offset = blkoff(fs, length); + if (offset == 0) { + oip->i_ffs_size = length; + } else { + lbn = lblkno(fs, length); + aflags = B_CLRBUF; + if (flags & IO_SYNC) + aflags |= B_SYNC; + error = UFS_BUF_ALLOC(oip, length - 1, 1, + cred, aflags, &bp); + if (error) + return (error); + oip->i_ffs_size = length; + size = blksize(fs, oip, lbn); + (void) uvm_vnp_uncache(ovp); + if (ovp->v_type != VDIR) + bzero((char *)bp->b_data + offset, + (u_int)(size - offset)); + allocbuf(bp, size); + if (aflags & B_SYNC) + bwrite(bp); + else + bawrite(bp); + } + /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when @@ -396,7 +402,6 @@ done: oip->i_ffs_blocks -= blocksreleased; if (oip->i_ffs_blocks < 0) /* sanity */ oip->i_ffs_blocks = 0; - lockmgr(&gp->g_glock, LK_RELEASE, NULL, p); oip->i_flag |= IN_CHANGE; (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); return (allerror); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 2dfed4d83bd..dcb58550fc1 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_softdep.c,v 1.30 2001/12/10 02:19:34 art Exp $ */ +/* $OpenBSD: ffs_softdep.c,v 1.31 2001/12/19 08:58:07 art Exp $ */ /* * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. * @@ -53,7 +53,6 @@ #include <sys/malloc.h> #include <sys/mount.h> #include <sys/proc.h> -#include <sys/pool.h> #include <sys/syslog.h> #include <sys/systm.h> #include <sys/vnode.h> @@ -67,10 +66,6 @@ #include <ufs/ffs/ffs_extern.h> #include <ufs/ufs/ufs_extern.h> -#include <uvm/uvm.h> -struct pool sdpcpool; -int softdep_lockedbufs; - #define STATIC /* @@ -111,13 +106,6 @@ extern char *memname[]; */ /* - * Definitions for page cache info hashtable. - */ -#define PCBPHASHSIZE 1024 -LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE]; -#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1)) - -/* * Internal function prototypes. */ STATIC void softdep_error __P((char *, int)); @@ -169,13 +157,6 @@ STATIC void pause_timer __P((void *)); STATIC int request_cleanup __P((int, int)); STATIC int process_worklist_item __P((struct mount *, int)); STATIC void add_to_worklist __P((struct worklist *)); -STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t, - long)); -STATIC void softdep_collect_pagecache __P((struct inode *)); -STATIC void softdep_free_pagecache __P((struct inode *)); -STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t); -STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t)); -void softdep_pageiodone __P((struct buf *)); /* * Exported softdep operations. @@ -192,7 +173,6 @@ struct bio_ops bioops = { softdep_deallocate_dependencies, /* io_deallocate */ softdep_move_dependencies, /* io_movedeps */ softdep_count_dependencies, /* io_countdeps */ - softdep_pageiodone, /* io_pagedone */ }; /* @@ -1081,7 +1061,6 @@ top: void softdep_initialize() { - int i; LIST_INIT(&mkdirlisthd); LIST_INIT(&softdep_workitem_pending); @@ -1100,11 +1079,6 @@ softdep_initialize() newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash); sema_init(&newblk_in_progress, "newblk", PRIBIO, 0); timeout_set(&proc_waiting_timeout, pause_timer, 0); - pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool", - 0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP); - for (i = 0; i < PCBPHASHSIZE; i++) { - LIST_INIT(&pcbphashhead[i]); - } } /* @@ -1357,16 +1331,11 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) LIST_REMOVE(newblk, nb_hash); FREE(newblk, M_NEWBLK); - /* - * If we were not passed a bp to attach the dep to, - * then this must be for a regular file. - * Allocate a buffer to represent the page cache pages - * that are the real dependency. The pages themselves - * cannot refer to the dependency since we don't want to - * add a field to struct vm_page for this. - */ if (bp == NULL) { - bp = softdep_setup_pagecache(ip, lbn, newsize); + /* + * XXXUBC - Yes, I know how to fix this, but not right now. + */ + panic("softdep_setup_allocdirect: Bonk art in the head\n"); } WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); if (lbn >= NDADDR) { @@ -1600,7 +1569,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); if (nbp == NULL) { - nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize); + /* + * XXXUBC - Yes, I know how to fix this, but not right now. + */ + panic("softdep_setup_allocindir_page: Bonk art in the head\n"); } WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); FREE_LOCK(&lk); @@ -1779,7 +1751,6 @@ softdep_setup_freeblocks(ip, length) int i, delay, error; fs = ip->i_fs; - vp = ITOV(ip); if (length != 0) panic("softdep_setup_freeblocks: non-zero length"); MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks), @@ -1839,15 +1810,9 @@ softdep_setup_freeblocks(ip, length) * with this inode are obsolete and can simply be de-allocated. * We must first merge the two dependency lists to get rid of * any duplicate freefrag structures, then purge the merged list. - * We must remove any pagecache markers from the pagecache - * hashtable first because any I/Os in flight will want to see - * dependencies attached to their pagecache markers. We cannot - * free the pagecache markers until after we've freed all the - * dependencies that reference them later. * If we still have a bitmap dependency, then the inode has never * been written to disk, so we can free any fragments without delay. */ - softdep_collect_pagecache(ip); merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, delay); @@ -1859,6 +1824,7 @@ softdep_setup_freeblocks(ip, length) * Once they are all there, walk the list and get rid of * any dependencies. */ + vp = ITOV(ip); ACQUIRE_LOCK(&lk); drain_output(vp, 1); while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) { @@ -1870,7 +1836,6 @@ softdep_setup_freeblocks(ip, length) brelse(bp); ACQUIRE_LOCK(&lk); } - softdep_free_pagecache(ip); if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); FREE_LOCK(&lk); @@ -2939,6 +2904,7 @@ handle_workitem_freefile(freefile) struct freefile *freefile; { struct fs *fs; + struct vnode vp; struct inode tip; #ifdef DEBUG struct inodedep *idp; @@ -2956,7 +2922,8 @@ handle_workitem_freefile(freefile) tip.i_devvp = freefile->fx_devvp; tip.i_dev = freefile->fx_devvp->v_rdev; tip.i_fs = fs; - tip.i_vnode = NULL; + tip.i_vnode = &vp; + vp.v_data = &tip; if ((error = ffs_freefile(&tip, freefile->fx_oldinum, freefile->fx_mode)) != 0) { @@ -4354,15 +4321,6 @@ flush_inodedep_deps(fs, ino) struct allocdirect *adp; int error, waitfor; struct buf *bp; - struct vnode *vp; - struct uvm_object *uobj; - - vp = softdep_lookupvp(fs, ino); -#ifdef DIAGNOSTIC - if (vp == NULL) - panic("flush_inodedep_deps: null vp"); -#endif - uobj = &vp->v_uobj; /* * This work is done in two passes. The first pass grabs most @@ -4382,26 +4340,6 @@ flush_inodedep_deps(fs, ino) ACQUIRE_LOCK(&lk); if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) return (0); - - /* - * When file data was in the buffer cache, - * softdep_sync_metadata() would start i/o on - * file data buffers itself. But now that - * we're using the page cache to hold file data, - * we need something else to trigger those flushes. - * let's just do it here. - */ - FREE_LOCK(&lk); - simple_lock(&uobj->vmobjlock); - (uobj->pgops->pgo_flush)(uobj, 0, 0, - PGO_ALLPAGES|PGO_CLEANIT| - (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO)); - simple_unlock(&uobj->vmobjlock); - if (waitfor == MNT_WAIT) { - drain_output(vp, 0); - } - ACQUIRE_LOCK(&lk); - TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) { if (adp->ad_state & DEPCOMPLETE) continue; @@ -5017,194 +4955,3 @@ softdep_error(func, error) /* XXX should do something better! */ printf("%s: got error %d while accessing filesystem\n", func, error); } - -/* - * Allocate a buffer on which to attach a dependency. - */ -STATIC struct buf * -softdep_setup_pagecache(ip, lbn, size) - struct inode *ip; - ufs_lbn_t lbn; - long size; -{ - struct vnode *vp = ITOV(ip); - struct buf *bp; - int s; - - /* - * Enter pagecache dependency buf in hash. - * Always reset b_resid to be the full amount of data in the block - * since the caller has the corresponding pages locked and dirty. - */ - - bp = softdep_lookup_pcbp(vp, lbn); - if (bp == NULL) { - s = splbio(); - bp = pool_get(&sdpcpool, PR_WAITOK); - splx(s); - - bp->b_vp = vp; - bp->b_lblkno = lbn; - LIST_INIT(&bp->b_dep); - LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash); - LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs); - } - bp->b_bcount = bp->b_resid = size; - return bp; -} - -/* - * softdep_collect_pagecache() and softdep_free_pagecache() - * are used to remove page cache dependency buffers when - * a file is being truncated to 0. - */ - -STATIC void -softdep_collect_pagecache(ip) - struct inode *ip; -{ - struct buf *bp; - - LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) { - LIST_REMOVE(bp, b_hash); - } -} - -STATIC void -softdep_free_pagecache(ip) - struct inode *ip; -{ - struct buf *bp, *nextbp; - - for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) { - nextbp = LIST_NEXT(bp, b_vnbufs); - LIST_REMOVE(bp, b_vnbufs); - KASSERT(LIST_FIRST(&bp->b_dep) == NULL); - pool_put(&sdpcpool, bp); - } -} - -STATIC struct vnode * -softdep_lookupvp(fs, ino) - struct fs *fs; - ino_t ino; -{ - struct mount *mp; - extern struct vfsops ffs_vfsops; - - CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { - if (mp->mnt_op == &ffs_vfsops && - VFSTOUFS(mp)->um_fs == fs) { - break; - } - } - if (mp == NULL) { - return NULL; - } - return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino); -} - -STATIC struct buf * -softdep_lookup_pcbp(vp, lbn) - struct vnode *vp; - ufs_lbn_t lbn; -{ - struct buf *bp; - - LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) { - if (bp->b_vp == vp && bp->b_lblkno == lbn) { - break; - } - } - return bp; -} - -/* - * Do softdep i/o completion processing for page cache writes. - */ - -void -softdep_pageiodone(bp) - struct buf *bp; -{ - int npages = bp->b_bufsize >> PAGE_SHIFT; - struct vnode *vp = bp->b_vp; - struct vm_page *pg; - struct buf *pcbp = NULL; - struct allocdirect *adp; - struct allocindir *aip; - struct worklist *wk; - ufs_lbn_t lbn; - voff_t off; - long iosize = bp->b_bcount; - int size, asize, bshift, bsize; - int i; - - KASSERT(!(bp->b_flags & B_READ)); - bshift = vp->v_mount->mnt_fs_bshift; - bsize = 1 << bshift; - asize = min(PAGE_SIZE, bsize); - ACQUIRE_LOCK(&lk); - for (i = 0; i < npages; i++) { - pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); - if (pg == NULL) { - continue; - } - - for (off = pg->offset; - off < pg->offset + PAGE_SIZE; - off += bsize) { - size = min(asize, iosize); - iosize -= size; - lbn = off >> bshift; - if (pcbp == NULL || pcbp->b_lblkno != lbn) { - pcbp = softdep_lookup_pcbp(vp, lbn); - } - if (pcbp == NULL) { - continue; - } - pcbp->b_resid -= size; - if (pcbp->b_resid < 0) { - panic("softdep_pageiodone: " - "resid < 0, vp %p lbn 0x%lx pcbp %p", - vp, lbn, pcbp); - } - if (pcbp->b_resid > 0) { - continue; - } - - /* - * We've completed all the i/o for this block. - * mark the dep complete. - */ - - KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL); - while ((wk = LIST_FIRST(&pcbp->b_dep))) { - WORKLIST_REMOVE(wk); - switch (wk->wk_type) { - case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - adp->ad_state |= COMPLETE; - handle_allocdirect_partdone(adp); - break; - - case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - aip->ai_state |= COMPLETE; - handle_allocindir_partdone(aip); - break; - - default: - panic("softdep_pageiodone: " - "bad type %d, pcbp %p wk %p", - wk->wk_type, pcbp, wk); - } - } - LIST_REMOVE(pcbp, b_hash); - LIST_REMOVE(pcbp, b_vnbufs); - pool_put(&sdpcpool, pcbp); - pcbp = NULL; - } - } - FREE_LOCK(&lk); -} diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 4aac12f8752..8aec3d7de20 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vfsops.c,v 1.47 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ffs_vfsops.c,v 1.48 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */ /* @@ -96,11 +96,6 @@ struct inode_vtbl ffs_vtbl = { ffs_bufatoff }; -struct genfs_ops ffs_genfsops = { - ffs_gop_size, - ffs_gop_alloc, -}; - extern u_long nextgennumber; /* @@ -742,14 +737,11 @@ ffs_mountfs(devvp, mp, p) else mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; - mp->mnt_fs_bshift = fs->fs_bshift; - mp->mnt_dev_bshift = DEV_BSHIFT; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; - ump->um_lognindir = ffs(fs->fs_nindir) - 1; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) @@ -1127,7 +1119,6 @@ retry: ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; - LIST_INIT(&ip->i_pcbufhd); ip->i_vtbl = &ffs_vtbl; /* @@ -1187,8 +1178,6 @@ retry: /* * Finish inode initialization now that aliasing has been resolved. */ - - genfs_node_init(vp, &ffs_genfsops); ip->i_devvp = ump->um_devvp; VREF(ip->i_devvp); /* @@ -1210,7 +1199,6 @@ retry: ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid; /* XXX */ ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid; /* XXX */ } /* XXX */ - uvm_vnp_setsize(vp, ip->i_ffs_size); *vpp = vp; return (0); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 3794d5e8049..1020b14a2bb 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vnops.c,v 1.24 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ffs_vnops.c,v 1.25 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ /* @@ -107,12 +107,8 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_advlock_desc, ufs_advlock }, /* advlock */ { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ { &vop_bwrite_desc, vop_generic_bwrite }, - { &vop_getpages_desc, genfs_getpages }, - { &vop_putpages_desc, genfs_putpages }, - { &vop_mmap_desc, ufs_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL } }; - struct vnodeopv_desc ffs_vnodeop_opv_desc = { &ffs_vnodeop_p, ffs_vnodeop_entries }; @@ -233,7 +229,6 @@ ffs_fsync(v) struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; int s, error, passes, skipmeta; - struct uvm_object *uobj; if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && @@ -241,22 +236,13 @@ ffs_fsync(v) softdep_fsync_mountdev(vp); /* - * Flush all dirty data associated with a vnode. + * Flush all dirty buffers associated with a vnode. */ passes = NIADDR + 1; skipmeta = 0; if (ap->a_waitfor == MNT_WAIT) skipmeta = 1; s = splbio(); - - if (vp->v_type == VREG) { - uobj = &vp->v_uobj; - simple_lock(&uobj->vmobjlock); - (uobj->pgops->pgo_flush)(uobj, 0, 0, PGO_ALLPAGES|PGO_CLEANIT| - ((ap->a_waitfor == MNT_WAIT) ? PGO_SYNCIO : 0)); - simple_unlock(&uobj->vmobjlock); - } - loop: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = LIST_NEXT(bp, b_vnbufs)) @@ -295,10 +281,8 @@ loop: */ if (passes > 0 || ap->a_waitfor != MNT_WAIT) (void) bawrite(bp); - else if ((error = bwrite(bp)) != 0) { - printf("ffs_fsync: bwrite failed %d\n", error); + else if ((error = bwrite(bp)) != 0) return (error); - } s = splbio(); /* * Since we may have slept during the I/O, we need @@ -341,11 +325,7 @@ loop: } } splx(s); - - error = (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); - if (error) - printf("ffs_fsync: UFS_UPDATE failed. %d\n", error); - return (error); + return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); } /* @@ -369,23 +349,3 @@ ffs_reclaim(v) vp->v_data = NULL; return (0); } - -/* - * Return the last logical file offset that should be written for this file - * if we're doing a write that ends at "size". - */ -void -ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp) -{ - struct inode *ip = VTOI(vp); - struct fs *fs = ip->i_fs; - ufs_lbn_t olbn, nlbn; - - olbn = lblkno(fs, ip->i_ffs_size); - nlbn = lblkno(fs, size); - if (nlbn < NDADDR && olbn <= nlbn) { - *eobp = fragroundup(fs, size); - } else { - *eobp = blkroundup(fs, size); - } -} diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c index cacf6fce8ee..619e5327c26 100644 --- a/sys/ufs/mfs/mfs_vnops.c +++ b/sys/ufs/mfs/mfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_vnops.c,v 1.17 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: mfs_vnops.c,v 1.18 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: mfs_vnops.c,v 1.8 1996/03/17 02:16:32 christos Exp $ */ /* @@ -93,8 +93,7 @@ struct vnodeopv_entry_desc mfs_vnodeop_entries[] = { { &vop_pathconf_desc, mfs_pathconf }, /* pathconf */ { &vop_advlock_desc, mfs_advlock }, /* advlock */ { &vop_bwrite_desc, mfs_bwrite }, /* bwrite */ - { &vop_mmap_desc, mfs_mmap }, - { NULL, NULL } + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } }; struct vnodeopv_desc mfs_vnodeop_opv_desc = { &mfs_vnodeop_p, mfs_vnodeop_entries }; diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h index dbf8b7dd2c2..0ea03f72d90 100644 --- a/sys/ufs/mfs/mfsnode.h +++ b/sys/ufs/mfs/mfsnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mfsnode.h,v 1.8 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: mfsnode.h,v 1.9 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: mfsnode.h,v 1.3 1996/02/09 22:31:31 christos Exp $ */ /* @@ -87,4 +87,3 @@ struct mfsnode { #define mfs_truncate mfs_badop #define mfs_update nullop #define mfs_bwrite vop_generic_bwrite -#define mfs_mmap mfs_badop
\ No newline at end of file diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index eb3f0069790..52a78783351 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: inode.h,v 1.18 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: inode.h,v 1.19 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */ /* @@ -45,7 +45,6 @@ #include <ufs/ufs/dinode.h> #include <ufs/ufs/dir.h> #include <ufs/ext2fs/ext2fs_dinode.h> -#include <miscfs/genfs/genfs.h> typedef long ufs_lbn_t; @@ -67,7 +66,6 @@ struct ext2fs_inode_ext { * active, and is put back when the file is no longer being used. */ struct inode { - struct genfs_node i_gnode; LIST_ENTRY(inode) i_hash; /* Hash chain */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ @@ -86,7 +84,6 @@ struct inode { #define i_e2fs inode_u.e2fs struct cluster_info i_ci; - LIST_HEAD(,buf) i_pcbufhd; struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index fdf5c1be055..fa060e3c6b8 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_bmap.c,v 1.11 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: ufs_bmap.c,v 1.12 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ufs_bmap.c,v 1.3 1996/02/09 22:36:00 christos Exp $ */ /* @@ -233,7 +233,6 @@ ufs_getlbns(vp, bn, ap, nump) long metalbn, realbn; struct ufsmount *ump; int64_t blockcnt; - int lbc; int i, numlevels, off; ump = VFSTOUFS(vp->v_mount); @@ -261,14 +260,10 @@ ufs_getlbns(vp, bn, ap, nump) * at the given level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ - bn -= NDADDR; - for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) { + for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); - - lbc += ump->um_lognindir; - blockcnt = (int64_t)1 << lbc; - + blockcnt *= MNINDIR(ump); if (bn < blockcnt) break; } @@ -294,9 +289,8 @@ ufs_getlbns(vp, bn, ap, nump) if (metalbn == realbn) break; - lbc -= ump->um_lognindir; - blockcnt = (int64_t)1 << lbc; - off = (bn >> lbc) & (MNINDIR(ump) - 1); + blockcnt /= MNINDIR(ump); + off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index 85df8cf99ec..894187d0b7b 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_extern.h,v 1.15 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ufs_extern.h,v 1.16 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */ /*- @@ -78,6 +78,7 @@ int ufs_lock __P((void *)); int ufs_lookup __P((void *)); int ufs_mkdir __P((void *)); int ufs_mknod __P((void *)); +int ufs_mmap __P((void *)); int ufs_open __P((void *)); int ufs_pathconf __P((void *)); int ufs_print __P((void *)); @@ -98,7 +99,6 @@ int ufs_whiteout __P((void *)); int ufsspec_close __P((void *)); int ufsspec_read __P((void *)); int ufsspec_write __P((void *)); -#define ufs_mmap vop_generic_mmap #ifdef FIFO int ufsfifo_read __P((void *)); @@ -121,7 +121,6 @@ void ufs_ihashrem __P((struct inode *)); /* ufs_inode.c */ int ufs_init __P((struct vfsconf *)); int ufs_reclaim __P((struct vnode *, struct proc *)); -int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int)); /* ufs_lookup.c */ void ufs_dirbad __P((struct inode *, doff_t, char *)); diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 61ec4eeede9..dd2c6574d30 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_inode.c,v 1.15 2001/12/10 04:45:32 art Exp $ */ +/* $OpenBSD: ufs_inode.c,v 1.16 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */ /* @@ -101,9 +101,7 @@ ufs_inactive(v) if (getinoquota(ip) != 0) (void)ufs_quota_free_inode(ip, NOCRED); - if (ip->i_ffs_size != 0) { - (void) UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED); - } + (void) UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED); ip->i_ffs_rdev = 0; mode = ip->i_ffs_mode; ip->i_ffs_mode = 0; @@ -153,153 +151,3 @@ ufs_reclaim(vp, p) ufs_quota_delete(ip); return (0); } - -/* - * allocate a range of blocks in a file. - * after this function returns, any page entirely contained within the range - * will map to invalid data and thus must be overwritten before it is made - * accessible to others. - */ - -int -ufs_balloc_range(vp, off, len, cred, flags) - struct vnode *vp; - off_t off, len; - struct ucred *cred; - int flags; -{ - off_t oldeof, neweof, oldeob, neweob, oldpagestart, pagestart; - struct uvm_object *uobj; - struct genfs_node *gp = VTOG(vp); - int i, delta, error, npages1, npages2; - int bshift = vp->v_mount->mnt_fs_bshift; - int bsize = 1 << bshift; - int ppb = MAX(bsize >> PAGE_SHIFT, 1); - struct vm_page *pgs1[ppb], *pgs2[ppb]; - UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist); - UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", - vp, off, len, vp->v_size); - - oldeof = vp->v_size; - GOP_SIZE(vp, oldeof, &oldeob); - - neweof = MAX(vp->v_size, off + len); - GOP_SIZE(vp, neweof, &neweob); - - error = 0; - uobj = &vp->v_uobj; - pgs1[0] = pgs2[0] = NULL; - - /* - * if the last block in the file is not a full block (ie. it is a - * fragment), and this allocation is causing the fragment to change - * size (either to expand the fragment or promote it to a full block), - * cache the old last block (at its new size). - */ - - oldpagestart = trunc_page(oldeof) & ~(bsize - 1); - if ((oldeob & (bsize - 1)) != 0 && oldeob != neweob) { - npages1 = MIN(ppb, (round_page(neweob) - oldpagestart) >> - PAGE_SHIFT); - memset(pgs1, 0, npages1 * sizeof(struct vm_page *)); - simple_lock(&uobj->vmobjlock); - error = VOP_GETPAGES(vp, oldpagestart, pgs1, &npages1, - 0, VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF); - if (error) { - goto out; - } - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); - for (i = 0; i < npages1; i++) { - UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0); - KASSERT((pgs1[i]->flags & PG_RELEASED) == 0); - pgs1[i]->flags &= ~PG_CLEAN; - uvm_pageactivate(pgs1[i]); - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); - } - - /* - * cache the new range as well. this will create zeroed pages - * where the new block will be and keep them locked until the - * new block is allocated, so there will be no window where - * the old contents of the new block is visible to racing threads. - */ - - pagestart = trunc_page(off) & ~(bsize - 1); - if (pagestart != oldpagestart || pgs1[0] == NULL) { - npages2 = MIN(ppb, (round_page(neweob) - pagestart) >> - PAGE_SHIFT); - memset(pgs2, 0, npages2 * sizeof(struct vm_page *)); - simple_lock(&uobj->vmobjlock); - error = VOP_GETPAGES(vp, pagestart, pgs2, &npages2, 0, - VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF); - if (error) { - goto out; - } - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); - for (i = 0; i < npages2; i++) { - UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0); - KASSERT((pgs2[i]->flags & PG_RELEASED) == 0); - pgs2[i]->flags &= ~PG_CLEAN; - uvm_pageactivate(pgs2[i]); - } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); - } - - /* - * adjust off to be block-aligned. - */ - - delta = off & (bsize - 1); - off -= delta; - len += delta; - - /* - * now allocate the range. - */ - - lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL, curproc); - error = GOP_ALLOC(vp, off, len, flags, cred); - lockmgr(&gp->g_glock, LK_RELEASE, NULL, curproc); - - /* - * clear PG_RDONLY on any pages we are holding - * (since they now have backing store) and unbusy them. - * if we got an error, free any pages we created past the old eob. - */ - -out: - simple_lock(&uobj->vmobjlock); - if (error) { - (void) (uobj->pgops->pgo_flush)(uobj, round_page(oldeob), 0, - PGO_FREE); - } - if (pgs1[0] != NULL) { - for (i = 0; i < npages1; i++) { - pgs1[i]->flags &= ~PG_RDONLY; - } - uvm_page_unbusy(pgs1, npages1); - - /* - * The data in the frag might be moving to a new disk location. - * We need to flush pages to the new disk locations. - */ - - if ((flags & B_SYNC) == 0) - (*uobj->pgops->pgo_flush)(uobj, oldeof & ~(bsize - 1), - MIN((oldeof + bsize) & ~(bsize - 1), neweof), - PGO_CLEANIT | PGO_SYNCIO); - } - if (pgs2[0] != NULL) { - for (i = 0; i < npages2; i++) { - pgs2[i]->flags &= ~PG_RDONLY; - } - uvm_page_unbusy(pgs2, npages2); - } - simple_unlock(&uobj->vmobjlock); - return error; -} diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 50e4657c6e7..5b562568de7 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_readwrite.c,v 1.22 2001/12/10 03:04:58 art Exp $ */ +/* $OpenBSD: ufs_readwrite.c,v 1.23 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */ /*- @@ -76,22 +76,21 @@ READ(v) int a_ioflag; struct ucred *a_cred; } */ *ap = v; - struct vnode *vp; - struct inode *ip; - struct uio *uio; - FS *fs; - void *win; - vsize_t bytelen; + register struct vnode *vp; + register struct inode *ip; + register struct uio *uio; + register FS *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error; + u_short mode; vp = ap->a_vp; ip = VTOI(vp); + mode = ip->i_ffs_mode; uio = ap->a_uio; - error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) @@ -111,24 +110,6 @@ READ(v) if (uio->uio_resid == 0) return (0); - if (uio->uio_offset >= ip->i_ffs_size) - goto out; - - if (vp->v_type == VREG) { - while (uio->uio_resid > 0) { - bytelen = min(ip->i_ffs_size - uio->uio_offset, - uio->uio_resid); - if (bytelen == 0) - break; - win = ubc_alloc(&vp->v_uobj, uio->uio_offset, - &bytelen, UBC_READ); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - if (error) - break; - } - goto out; - } for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_ffs_size - uio->uio_offset) <= 0) @@ -143,14 +124,23 @@ READ(v) if (bytesinfile < xfersize) xfersize = bytesinfile; +#ifdef LFS_READWRITE + (void)lfs_check(vp, lbn); + error = cluster_read(vp, &ip->i_ci, ip->i_ffs_size, lbn, + size, NOCRED, &bp); +#else if (lblktosize(fs, nextlbn) >= ip->i_ffs_size) error = bread(vp, lbn, size, NOCRED, &bp); + else if (doclusterread) + error = cluster_read(vp, &ip->i_ci, + ip->i_ffs_size, lbn, size, NOCRED, &bp); else if (lbn - 1 == ip->i_ci.ci_lastr) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); +#endif if (error) break; ip->i_ci.ci_lastr = lbn; @@ -168,7 +158,7 @@ READ(v) break; xfersize = size; } - error = uiomove((char *)bp->b_data + blkoffset, xfersize, + error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; @@ -176,7 +166,6 @@ READ(v) } if (bp != NULL) brelse(bp); -out: ip->i_flag |= IN_ACCESS; return (error); } @@ -194,19 +183,15 @@ WRITE(v) int a_ioflag; struct ucred *a_cred; } */ *ap = v; - struct vnode *vp; - struct uio *uio; - struct inode *ip; - FS *fs; + register struct vnode *vp; + register struct uio *uio; + register struct inode *ip; + register FS *fs; struct buf *bp; struct proc *p; daddr_t lbn; off_t osize; int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; - void *win; - vsize_t bytelen; - off_t oldoff; - boolean_t rv; extended = 0; ioflag = ap->a_ioflag; @@ -254,76 +239,9 @@ WRITE(v) resid = uio->uio_resid; osize = ip->i_ffs_size; - error = 0; - - if (vp->v_type != VREG) - goto bcache; - - while (uio->uio_resid > 0) { - struct uvm_object *uobj = &vp->v_uobj; - oldoff = uio->uio_offset; - blkoffset = blkoff(fs, uio->uio_offset); - bytelen = min(fs->fs_bsize - blkoffset, uio->uio_resid); - - /* - * XXXUBC if file is mapped and this is the last block, - * process one page at a time. - */ - - error = ufs_balloc_range(vp, uio->uio_offset, bytelen, - ap->a_cred, ioflag & IO_SYNC ? B_SYNC : 0); - if (error) { - return error; - } - - win = ubc_alloc(uobj, uio->uio_offset, &bytelen, UBC_WRITE); - error = uiomove(win, bytelen, uio); - ubc_release(win, 0); - - /* - * flush what we just wrote if necessary. - * XXXUBC simplistic async flushing. - */ - - if (ioflag & IO_SYNC) { - simple_lock(&uobj->vmobjlock); -#if 1 - /* - * XXX - * flush whole blocks in case there are deps. - * otherwise we can dirty and flush part of - * a block multiple times and the softdep code - * will get confused. fixing this the right way - * is complicated so we'll work around it for now. - */ - - rv = uobj->pgops->pgo_flush( - uobj, oldoff & ~(fs->fs_bsize - 1), - (oldoff + bytelen + fs->fs_bsize - 1) & - ~(fs->fs_bsize - 1), - PGO_CLEANIT|PGO_SYNCIO); -#else - rv = uobj->pgops->pgo_flush( - uobj, oldoff, oldoff + bytelen, - PGO_CLEANIT|PGO_SYNCIO); -#endif - simple_unlock(uobj->vmobjlock); - } else if (oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&uobj->vmobjlock); - rv = uobj->pgops->pgo_flush(uobj, - (oldoff >> 16) << 16, - (uio->uio_offset >> 16) << 16, PGO_CLEANIT); - simple_unlock(&uobj->vmobjlock); - } - if (error) { - break; - } - } - goto out; - -bcache: flags = ioflag & IO_SYNC ? B_SYNC : 0; - while (uio->uio_resid > 0) { + + for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; @@ -342,16 +260,21 @@ bcache: uvm_vnp_setsize(vp, ip->i_ffs_size); extended = 1; } + (void)uvm_vnp_uncache(vp); size = BLKSIZE(fs, ip, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; - error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); + error = + uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if (error != 0) bzero((char *)bp->b_data + blkoffset, xfersize); +#ifdef LFS_READWRITE + (void)VOP_BWRITE(bp); +#else if (ioflag & IO_SYNC) (void)bwrite(bp); else if (xfersize + blkoffset == fs->fs_bsize) { @@ -361,16 +284,16 @@ bcache: bawrite(bp); } else bdwrite(bp); +#endif if (error || xfersize == 0) break; + ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ -out: - ip->i_flag |= IN_CHANGE | IN_UPDATE; if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_ffs_mode &= ~(ISUID | ISGID); if (resid > uio->uio_resid) diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index e61259fa820..cb6060f1cc7 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_vnops.c,v 1.41 2001/12/04 22:44:32 art Exp $ */ +/* $OpenBSD: ufs_vnops.c,v 1.42 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */ /* @@ -469,6 +469,8 @@ ufs_chmod(vp, mode, cred, p) ip->i_ffs_mode &= ~ALLPERMS; ip->i_ffs_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; + if ((vp->v_flag & VTEXT) && (ip->i_ffs_mode & S_ISTXT) == 0) + (void) uvm_vnp_uncache(vp); return (0); } diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 981eb21474b..847ee1558e8 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ufsmount.h,v 1.6 2001/11/27 05:27:12 art Exp $ */ +/* $OpenBSD: ufsmount.h,v 1.7 2001/12/19 08:58:07 art Exp $ */ /* $NetBSD: ufsmount.h,v 1.4 1994/12/21 20:00:23 mycroft Exp $ */ /* @@ -64,7 +64,6 @@ struct ufsmount { struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */ u_long um_nindir; /* indirect ptrs per block */ - u_long um_lognindir; /* log2 of um_nindir */ u_long um_bptrtodb; /* indir ptr to disk block */ u_long um_seqinc; /* inc between seq blocks */ time_t um_btime[MAXQUOTAS]; /* block quota time limit */ diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h index 92d420cd160..b2216fcc92f 100644 --- a/sys/uvm/uvm.h +++ b/sys/uvm/uvm.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm.h,v 1.15 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm.h,v 1.30 2001/06/27 21:18:34 thorpej Exp $ */ +/* $OpenBSD: uvm.h,v 1.16 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */ /* * @@ -38,12 +38,6 @@ #ifndef _UVM_UVM_H_ #define _UVM_UVM_H_ -#if defined(_KERNEL_OPT) -#include "opt_lockdebug.h" -#include "opt_multiprocessor.h" -#include "opt_uvmhist.h" -#endif - #include <uvm/uvm_extern.h> #include <uvm/uvm_stat.h> @@ -83,11 +77,11 @@ struct uvm { /* vm_page queues */ struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */ - int page_free_nextcolor; /* next color to allocate from */ struct pglist page_active; /* allocated pages, in use */ - struct pglist page_inactive; /* pages between the clock hands */ - struct simplelock pageqlock; /* lock for active/inactive page q */ - struct simplelock fpageqlock; /* lock for free page q */ + struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */ + struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */ + simple_lock_data_t pageqlock; /* lock for active/inactive page q */ + simple_lock_data_t fpageqlock; /* lock for free page q */ boolean_t page_init_done; /* TRUE if uvm_page_init() finished */ boolean_t page_idle_zero; /* TRUE if we should try to zero pages in the idle loop */ @@ -95,26 +89,26 @@ struct uvm { /* page daemon trigger */ int pagedaemon; /* daemon sleeps on this */ struct proc *pagedaemon_proc; /* daemon's pid */ - struct simplelock pagedaemon_lock; + simple_lock_data_t pagedaemon_lock; /* aiodone daemon trigger */ int aiodoned; /* daemon sleeps on this */ struct proc *aiodoned_proc; /* daemon's pid */ - struct simplelock aiodoned_lock; + simple_lock_data_t aiodoned_lock; /* page hash */ struct pglist *page_hash; /* page hash table (vp/off->page) */ int page_nhash; /* number of buckets */ int page_hashmask; /* hash mask */ - struct simplelock hashlock; /* lock on page_hash array */ + simple_lock_data_t hashlock; /* lock on page_hash array */ /* anon stuff */ struct vm_anon *afree; /* anon free list */ - struct simplelock afreelock; /* lock on anon free list */ + simple_lock_data_t afreelock; /* lock on anon free list */ /* static kernel map entry pool */ - struct vm_map_entry *kentry_free; /* free page pool */ - struct simplelock kentry_lock; + vm_map_entry_t kentry_free; /* free page pool */ + simple_lock_data_t kentry_lock; /* aio_done is locked by uvm.pagedaemon_lock and splbio! */ TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */ @@ -124,7 +118,7 @@ struct uvm { vaddr_t pager_eva; /* end of pager VA area */ /* swap-related items */ - struct simplelock swap_data_lock; + simple_lock_data_t swap_data_lock; /* kernel object: to support anonymous pageable kernel memory */ struct uvm_object *kernel_object; @@ -171,20 +165,6 @@ do { \ } while (0) /* - * UVM_KICK_PDAEMON: perform checks to determine if we need to - * give the pagedaemon a nudge, and do so if necessary. - */ - -#define UVM_KICK_PDAEMON() \ -do { \ - if (uvmexp.free + uvmexp.paging < uvmexp.freemin || \ - (uvmexp.free + uvmexp.paging < uvmexp.freetarg && \ - uvmexp.inactive < uvmexp.inactarg)) { \ - wakeup(&uvm.pagedaemon); \ - } \ -} while (/*CONSTCOND*/0) - -/* * UVM_PAGE_OWN: track page ownership (only if UVM_PAGE_TRKOWN) */ diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c index a8a1a527367..29263bf7d60 100644 --- a/sys/uvm/uvm_amap.c +++ b/sys/uvm/uvm_amap.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_amap.c,v 1.17 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_amap.c,v 1.33 2001/07/22 13:34:12 wiz Exp $ */ +/* $OpenBSD: uvm_amap.c,v 1.18 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_amap.c,v 1.30 2001/02/18 21:19:09 chs Exp $ */ /* * @@ -101,7 +101,7 @@ static struct vm_amap *amap_alloc1 __P((int, int, int)); * chunk. note that the "plus one" part is needed because a reference * count of zero is neither positive or negative (need a way to tell * if we've got one zero or a bunch of them). - * + * * here are some in-line functions to help us. */ @@ -157,7 +157,7 @@ amap_init() * Initialize the vm_amap pool. */ pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, - "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr, + "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr, M_UVMAMAP); } @@ -283,7 +283,7 @@ amap_free(amap) */ void amap_extend(entry, addsize) - struct vm_map_entry *entry; + vm_map_entry_t entry; vsize_t addsize; { struct vm_amap *amap = entry->aref.ar_amap; @@ -324,7 +324,7 @@ amap_extend(entry, addsize) } #endif amap_unlock(amap); - UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d", + UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d", amap, slotneed, 0, 0); return; /* done! */ } @@ -337,10 +337,10 @@ amap_extend(entry, addsize) #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { if ((slotoff + slotmapped) < amap->am_nslot) - amap_pp_adjref(amap, slotoff + slotmapped, + amap_pp_adjref(amap, slotoff + slotmapped, (amap->am_nslot - (slotoff + slotmapped)), 1); - pp_setreflen(amap->am_ppref, amap->am_nslot, 1, + pp_setreflen(amap->am_ppref, amap->am_nslot, 1, slotneed - amap->am_nslot); } #endif @@ -350,7 +350,7 @@ amap_extend(entry, addsize) * no need to zero am_anon since that was done at * alloc time and we never shrink an allocation. */ - UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d", + UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d", amap, slotneed, 0, 0); return; } @@ -359,7 +359,7 @@ amap_extend(entry, addsize) * case 3: we need to malloc a new amap and copy all the amap * data over from old amap to the new one. * - * XXXCDC: could we take advantage of a kernel realloc()? + * XXXCDC: could we take advantage of a kernel realloc()? */ amap_unlock(amap); /* unlock in case we sleep in malloc */ @@ -412,7 +412,7 @@ amap_extend(entry, addsize) memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); amap->am_ppref = newppref; if ((slotoff + slotmapped) < amap->am_nslot) - amap_pp_adjref(amap, slotoff + slotmapped, + amap_pp_adjref(amap, slotoff + slotmapped, (amap->am_nslot - (slotoff + slotmapped)), 1); pp_setreflen(newppref, amap->am_nslot, 1, slotadded); } @@ -433,7 +433,7 @@ amap_extend(entry, addsize) if (oldppref && oldppref != PPREF_NONE) free(oldppref, M_UVMAMAP); #endif - UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", + UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", amap, slotneed, 0, 0); } @@ -452,7 +452,7 @@ amap_extend(entry, addsize) */ void amap_share_protect(entry, prot) - struct vm_map_entry *entry; + vm_map_entry_t entry; vm_prot_t prot; { struct vm_amap *amap = entry->aref.ar_amap; @@ -489,7 +489,7 @@ amap_share_protect(entry, prot) /* * amap_wipeout: wipeout all anon's in an amap; then free the amap! * - * => called from amap_unref when the final reference to an amap is + * => called from amap_unref when the final reference to an amap is * discarded (i.e. when reference count == 1) * => the amap should be locked (by the caller) */ @@ -511,12 +511,12 @@ amap_wipeout(amap) slot = amap->am_slots[lcv]; anon = amap->am_anon[slot]; - if (anon == NULL || anon->an_ref == 0) + if (anon == NULL || anon->an_ref == 0) panic("amap_wipeout: corrupt amap"); simple_lock(&anon->an_lock); /* lock anon */ - UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, + UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, anon->an_ref, 0, 0); refs = --anon->an_ref; @@ -542,7 +542,7 @@ amap_wipeout(amap) /* * amap_copy: ensure that a map entry's "needs_copy" flag is false * by copying the amap if necessary. - * + * * => an entry with a null amap pointer will get a new (blank) one. * => the map that the map entry belongs to must be locked by caller. * => the amap currently attached to "entry" (if any) must be unlocked. @@ -555,8 +555,8 @@ amap_wipeout(amap) void amap_copy(map, entry, waitf, canchunk, startva, endva) - struct vm_map *map; - struct vm_map_entry *entry; + vm_map_t map; + vm_map_entry_t entry; int waitf; boolean_t canchunk; vaddr_t startva, endva; @@ -595,7 +595,7 @@ amap_copy(map, entry, waitf, canchunk, startva, endva) UVM_MAP_CLIP_END(map, entry, endva); } - UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", + UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", entry->start, entry->end, 0, 0); entry->aref.ar_pageoff = 0; entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, @@ -626,7 +626,7 @@ amap_copy(map, entry, waitf, canchunk, startva, endva) * looks like we need to copy the map. */ - UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it", + UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it", entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0); AMAP_B2SLOT(slots, entry->end - entry->start); amap = amap_alloc1(slots, 0, waitf); @@ -683,7 +683,7 @@ amap_copy(map, entry, waitf, canchunk, startva, endva) srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ #ifdef UVM_AMAP_PPREF if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { - amap_pp_adjref(srcamap, entry->aref.ar_pageoff, + amap_pp_adjref(srcamap, entry->aref.ar_pageoff, (entry->end - entry->start) >> PAGE_SHIFT, -1); } #endif @@ -813,7 +813,7 @@ ReStart: uvm_wait("cownowpage"); goto ReStart; } - + /* * got it... now we can copy the data and replace anon * with our new one... diff --git a/sys/uvm/uvm_amap.h b/sys/uvm/uvm_amap.h index e6b071d5b63..811f121ea9e 100644 --- a/sys/uvm/uvm_amap.h +++ b/sys/uvm/uvm_amap.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_amap.h,v 1.9 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_amap.h,v 1.17 2001/06/02 18:09:25 chs Exp $ */ +/* $OpenBSD: uvm_amap.h,v 1.10 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_amap.h,v 1.14 2001/02/18 21:19:08 chs Exp $ */ /* * @@ -60,7 +60,7 @@ /* * forward definition of vm_amap structure. only amap * implementation-specific code should directly access the fields of - * this structure. + * this structure. */ struct vm_amap; @@ -72,13 +72,13 @@ struct vm_amap; #ifdef UVM_AMAP_INLINE /* defined/undef'd in uvm_amap.c */ #define AMAP_INLINE static __inline /* inline enabled */ -#else +#else #define AMAP_INLINE /* inline disabled */ #endif /* UVM_AMAP_INLINE */ /* - * prototypes for the amap interface + * prototypes for the amap interface */ AMAP_INLINE @@ -88,16 +88,16 @@ void amap_add /* add an anon to an amap */ struct vm_amap *amap_alloc /* allocate a new amap */ __P((vaddr_t, vaddr_t, int)); void amap_copy /* clear amap needs-copy flag */ - __P((struct vm_map *, struct vm_map_entry *, int, + __P((vm_map_t, vm_map_entry_t, int, boolean_t, vaddr_t, vaddr_t)); void amap_cow_now /* resolve all COW faults now */ - __P((struct vm_map *, struct vm_map_entry *)); + __P((vm_map_t, vm_map_entry_t)); void amap_extend /* make amap larger */ - __P((struct vm_map_entry *, vsize_t)); + __P((vm_map_entry_t, vsize_t)); int amap_flags /* get amap's flags */ __P((struct vm_amap *)); void amap_free /* free amap */ - __P((struct vm_amap *)); + __P((struct vm_amap *)); void amap_init /* init amap module (at boot time) */ __P((void)); void amap_lock /* lock amap */ @@ -107,7 +107,7 @@ struct vm_anon *amap_lookup /* lookup an anon @ offset in amap */ __P((struct vm_aref *, vaddr_t)); AMAP_INLINE void amap_lookups /* lookup multiple anons */ - __P((struct vm_aref *, vaddr_t, + __P((struct vm_aref *, vaddr_t, struct vm_anon **, int)); AMAP_INLINE void amap_ref /* add a reference to an amap */ @@ -115,9 +115,9 @@ void amap_ref /* add a reference to an amap */ int amap_refs /* get number of references of amap */ __P((struct vm_amap *)); void amap_share_protect /* protect pages in a shared amap */ - __P((struct vm_map_entry *, vm_prot_t)); + __P((vm_map_entry_t, vm_prot_t)); void amap_splitref /* split reference to amap into two */ - __P((struct vm_aref *, struct vm_aref *, + __P((struct vm_aref *, struct vm_aref *, vaddr_t)); AMAP_INLINE void amap_unadd /* remove an anon from an amap */ @@ -159,7 +159,7 @@ void amap_wipeout /* remove all anons from amap */ */ struct vm_amap { - struct simplelock am_l; /* simple lock [locks all vm_amap fields] */ + simple_lock_data_t am_l; /* simple lock [locks all vm_amap fields] */ int am_ref; /* reference count */ int am_flags; /* flags */ int am_maxslot; /* max # of slots allocated */ @@ -177,7 +177,7 @@ struct vm_amap { * note that am_slots, am_bckptr, and am_anon are arrays. this allows * fast lookup of pages based on their virual address at the expense of * some extra memory. in the future we should be smarter about memory - * usage and fall back to a non-array based implementation on systems + * usage and fall back to a non-array based implementation on systems * that are short of memory (XXXCDC). * * the entries in the array are called slots... for example an amap that @@ -185,13 +185,13 @@ struct vm_amap { * is an example of the array usage for a four slot amap. note that only * slots one and three have anons assigned to them. "D/C" means that we * "don't care" about the value. - * + * * 0 1 2 3 * am_anon: NULL, anon0, NULL, anon1 (actual pointers to anons) * am_bckptr: D/C, 1, D/C, 0 (points to am_slots entry) * * am_slots: 3, 1, D/C, D/C (says slots 3 and 1 are in use) - * + * * note that am_bckptr is D/C if the slot in am_anon is set to NULL. * to find the entry in am_slots for an anon, look at am_bckptr[slot], * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]]. @@ -203,7 +203,7 @@ struct vm_amap { /* * defines for handling of large sparce amaps: - * + * * one of the problems of array-based amaps is that if you allocate a * large sparcely-used area of virtual memory you end up allocating * large arrays that, for the most part, don't get used. this is a @@ -216,15 +216,15 @@ struct vm_amap { * it makes sense for it to live in an amap, but if we allocated an * amap for the entire stack range we could end up wasting a large * amount of malloc'd KVM. - * - * for example, on the i386 at boot time we allocate two amaps for the stack - * of /sbin/init: + * + * for example, on the i386 at boot time we allocate two amaps for the stack + * of /sbin/init: * 1. a 7680 slot amap at protection 0 (reserve space for stack) * 2. a 512 slot amap at protection 7 (top of stack) * - * most of the array allocated for the amaps for this is never used. + * most of the array allocated for the amaps for this is never used. * the amap interface provides a way for us to avoid this problem by - * allowing amap_copy() to break larger amaps up into smaller sized + * allowing amap_copy() to break larger amaps up into smaller sized * chunks (controlled by the "canchunk" option). we use this feature * to reduce our memory usage with the BSD stack management. if we * are asked to create an amap with more than UVM_AMAP_LARGE slots in it, diff --git a/sys/uvm/uvm_amap_i.h b/sys/uvm/uvm_amap_i.h index c88f7916bae..d2d8f73d350 100644 --- a/sys/uvm/uvm_amap_i.h +++ b/sys/uvm/uvm_amap_i.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_amap_i.h,v 1.11 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_amap_i.h,v 1.17 2001/05/25 04:06:11 chs Exp $ */ +/* $OpenBSD: uvm_amap_i.h,v 1.12 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_amap_i.h,v 1.15 2000/11/25 06:27:59 chs Exp $ */ /* * @@ -109,9 +109,10 @@ amap_lookups(aref, offset, anons, npages) /* * amap_add: add (or replace) a page to an amap * - * => caller must lock amap. + * => caller must lock amap. * => if (replace) caller must lock anon because we might have to call * pmap_page_protect on the anon's page. + * => returns an "offset" which is meaningful to amap_unadd(). */ AMAP_INLINE void amap_add(aref, offset, anon, replace) @@ -134,7 +135,7 @@ amap_add(aref, offset, anon, replace) if (amap->am_anon[slot] == NULL) panic("amap_add: replacing null anon"); - if (amap->am_anon[slot]->u.an_page != NULL && + if (amap->am_anon[slot]->u.an_page != NULL && (amap->am_flags & AMAP_SHARED) != 0) { pmap_page_protect(amap->am_anon[slot]->u.an_page, VM_PROT_NONE); diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c index b05abc32642..9cf22f1f21f 100644 --- a/sys/uvm/uvm_anon.c +++ b/sys/uvm/uvm_anon.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_anon.c,v 1.18 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_anon.c,v 1.17 2001/05/25 04:06:12 chs Exp $ */ +/* $OpenBSD: uvm_anon.c,v 1.19 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_anon.c,v 1.15 2001/02/18 21:19:08 chs Exp $ */ /* * @@ -116,7 +116,7 @@ uvm_anon_add(count) anonblock->anons = anon; LIST_INSERT_HEAD(&anonblock_list, anonblock, list); memset(anon, 0, sizeof(*anon) * needed); - + simple_lock(&uvm.afreelock); uvmexp.nanon += needed; uvmexp.nfreeanon += needed; @@ -214,7 +214,7 @@ uvm_anfree(anon) if (pg) { /* - * if the page is owned by a uobject (now locked), then we must + * if the page is owned by a uobject (now locked), then we must * kill the loan on the page rather than free it. */ @@ -240,10 +240,10 @@ uvm_anfree(anon) /* tell them to dump it when done */ pg->flags |= PG_RELEASED; UVMHIST_LOG(maphist, - " anon 0x%x, page 0x%x: BUSY (released!)", + " anon 0x%x, page 0x%x: BUSY (released!)", anon, pg, 0, 0); return; - } + } pmap_page_protect(pg, VM_PROT_NONE); uvm_lock_pageq(); /* lock out pagedaemon */ uvm_pagefree(pg); /* bye bye */ @@ -272,7 +272,7 @@ uvm_anfree(anon) /* * uvm_anon_dropswap: release any swap resources from this anon. - * + * * => anon must be locked or have a reference count of 0. */ void @@ -294,7 +294,7 @@ uvm_anon_dropswap(anon) simple_lock(&uvm.swap_data_lock); uvmexp.swpgonly--; simple_unlock(&uvm.swap_data_lock); - } + } } /* @@ -398,7 +398,7 @@ uvm_anon_lockloanpg(anon) /* * page in every anon that is paged out to a range of swslots. - * + * * swap_syscall_lock should be held (protects anonblock_list). */ @@ -482,20 +482,20 @@ anon_pagein(anon) rv = uvmfault_anonget(NULL, NULL, anon); /* - * if rv == 0, anon is still locked, else anon + * if rv == VM_PAGER_OK, anon is still locked, else anon * is unlocked */ switch (rv) { - case 0: + case VM_PAGER_OK: break; - case EIO: - case ERESTART: + case VM_PAGER_ERROR: + case VM_PAGER_REFAULT: /* * nothing more to do on errors. - * ERESTART can only mean that the anon was freed, + * VM_PAGER_REFAULT can only mean that the anon was freed, * so again there's nothing to do. */ @@ -518,6 +518,9 @@ anon_pagein(anon) */ pmap_clear_reference(pg); +#ifndef UBC + pmap_page_protect(pg, VM_PROT_NONE); +#endif uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h index 1dc9ff7b566..702b5dc4e62 100644 --- a/sys/uvm/uvm_anon.h +++ b/sys/uvm/uvm_anon.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_anon.h,v 1.9 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_anon.h,v 1.15 2001/05/26 16:32:46 chs Exp $ */ +/* $OpenBSD: uvm_anon.h,v 1.10 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_anon.h,v 1.13 2000/12/27 09:17:04 chs Exp $ */ /* * @@ -50,12 +50,12 @@ struct vm_anon { int an_ref; /* reference count [an_lock] */ - struct simplelock an_lock; /* lock for an_ref */ + simple_lock_data_t an_lock; /* lock for an_ref */ union { struct vm_anon *an_nxt; /* if on free list [afreelock] */ struct vm_page *an_page;/* if in RAM [an_lock] */ } u; - int an_swslot; /* drum swap slot # (if != 0) + int an_swslot; /* drum swap slot # (if != 0) [an_lock. also, it is ok to read an_swslot if we hold an_page PG_BUSY] */ }; @@ -79,7 +79,7 @@ struct vm_anon { */ /* - * processes reference anonymous virtual memory maps with an anonymous + * processes reference anonymous virtual memory maps with an anonymous * reference structure: */ diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c index 924769d66bf..9a7f135cb98 100644 --- a/sys/uvm/uvm_aobj.c +++ b/sys/uvm/uvm_aobj.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_aobj.c,v 1.23 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_aobj.c,v 1.45 2001/06/23 20:52:03 chs Exp $ */ +/* $OpenBSD: uvm_aobj.c,v 1.24 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ /* * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and @@ -174,7 +174,7 @@ static boolean_t uao_flush __P((struct uvm_object *, voff_t, voff_t, int)); static void uao_free __P((struct uvm_aobj *)); static int uao_get __P((struct uvm_object *, voff_t, - struct vm_page **, int *, int, + vm_page_t *, int *, int, vm_prot_t, int, int)); static boolean_t uao_releasepg __P((struct vm_page *, struct vm_page **)); @@ -183,7 +183,7 @@ static boolean_t uao_pagein_page __P((struct uvm_aobj *, int)); /* * aobj_pager - * + * * note that some functions (e.g. put) are handled elsewhere */ @@ -205,7 +205,7 @@ struct uvm_pagerops aobj_pager = { */ static LIST_HEAD(aobjlist, uvm_aobj) uao_list; -static struct simplelock uao_list_lock; +static simple_lock_data_t uao_list_lock; /* @@ -233,41 +233,38 @@ uao_find_swhash_elt(aobj, pageidx, create) struct uao_swhash_elt *elt; voff_t page_tag; - swhash = UAO_SWHASH_HASH(aobj, pageidx); - page_tag = UAO_SWHASH_ELT_TAG(pageidx); + swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */ + page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */ /* * now search the bucket for the requested tag */ - LIST_FOREACH(elt, swhash, list) { - if (elt->tag == page_tag) { - return elt; - } + if (elt->tag == page_tag) + return(elt); } - if (!create) { + + /* fail now if we are not allowed to create a new entry in the bucket */ + if (!create) return NULL; - } + /* * allocate a new entry for the bucket and init/insert it in */ - - elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT); - if (elt == NULL) { - return NULL; - } + elt = pool_get(&uao_swhash_elt_pool, PR_WAITOK); LIST_INSERT_HEAD(swhash, elt, list); elt->tag = page_tag; elt->count = 0; memset(elt->slots, 0, sizeof(elt->slots)); - return elt; + + return(elt); } /* * uao_find_swslot: find the swap slot number for an aobj/pageidx * - * => object must be locked by caller + * => object must be locked by caller */ __inline static int uao_find_swslot(aobj, pageidx) @@ -296,7 +293,7 @@ uao_find_swslot(aobj, pageidx) return(0); } - /* + /* * otherwise, look in the array */ return(aobj->u_swslots[pageidx]); @@ -307,8 +304,6 @@ uao_find_swslot(aobj, pageidx) * * => setting a slot to zero frees the slot * => object must be locked by caller - * => we return the old slot number, or -1 if we failed to allocate - * memory to record the new slot number */ int uao_set_swslot(uobj, pageidx, slot) @@ -316,7 +311,6 @@ uao_set_swslot(uobj, pageidx, slot) int pageidx, slot; { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; - struct uao_swhash_elt *elt; int oldslot; UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist); UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d", @@ -348,9 +342,11 @@ uao_set_swslot(uobj, pageidx, slot) * we are freeing. */ - elt = uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE); + struct uao_swhash_elt *elt = + uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE); if (elt == NULL) { - return slot ? -1 : 0; + KASSERT(slot == 0); + return (0); } oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); @@ -365,8 +361,8 @@ uao_set_swslot(uobj, pageidx, slot) if (slot) { if (oldslot == 0) elt->count++; - } else { - if (oldslot) + } else { /* freeing slot ... */ + if (oldslot) /* to be safe */ elt->count--; if (elt->count == 0) { @@ -374,7 +370,7 @@ uao_set_swslot(uobj, pageidx, slot) pool_put(&uao_swhash_elt_pool, elt); } } - } else { + } else { /* we are using an array */ oldslot = aobj->u_swslots[pageidx]; aobj->u_swslots[pageidx] = slot; @@ -630,7 +626,7 @@ uao_reference_locked(uobj) return; uobj->uo_refs++; /* bump! */ - UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", + UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", uobj, uobj->uo_refs,0,0); } @@ -663,7 +659,7 @@ uao_detach_locked(uobj) struct uvm_object *uobj; { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; - struct vm_page *pg, *nextpg; + struct vm_page *pg; boolean_t busybody; UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist); @@ -695,8 +691,9 @@ uao_detach_locked(uobj) * mark for release any that are. */ busybody = FALSE; - for (pg = TAILQ_FIRST(&uobj->memq); pg != NULL; pg = nextpg) { - nextpg = TAILQ_NEXT(pg, listq); + for (pg = TAILQ_FIRST(&uobj->memq); + pg != NULL; + pg = TAILQ_NEXT(pg, listq)) { if (pg->flags & PG_BUSY) { pg->flags |= PG_RELEASED; busybody = TRUE; @@ -864,7 +861,7 @@ uao_flush(uobj, start, stop, flags) if (pp == NULL) continue; } - + switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { /* * XXX In these first 3 cases, we always just @@ -881,8 +878,15 @@ uao_flush(uobj, start, stop, flags) pp->wire_count != 0) continue; +#ifdef UBC /* ...and deactivate the page. */ pmap_clear_reference(pp); +#else + /* zap all mappings for the page. */ + pmap_page_protect(pp, VM_PROT_NONE); + + /* ...and deactivate the page. */ +#endif uvm_pagedeactivate(pp); continue; @@ -938,7 +942,7 @@ uao_flush(uobj, start, stop, flags) * * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot. * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES), - * then we will need to return EBUSY. + * then we will need to return VM_PAGER_UNLOCK. * * => prefer map unlocked (not required) * => object must be locked! we will _unlock_ it before starting any I/O. @@ -958,7 +962,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; voff_t current_offset; - struct vm_page *ptmp; + vm_page_t ptmp; int lcv, gotpages, maxpages, swslot, rv, pageidx; boolean_t done; UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist); @@ -1017,7 +1021,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) if (lcv == centeridx || (flags & PGO_ALLPAGES) != 0) /* need to do a wait or I/O! */ - done = FALSE; + done = FALSE; continue; } @@ -1026,7 +1030,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) * result array */ /* caller must un-busy this page */ - ptmp->flags |= PG_BUSY; + ptmp->flags |= PG_BUSY; UVM_PAGE_OWN(ptmp, "uao_get1"); pps[lcv] = ptmp; gotpages++; @@ -1043,10 +1047,10 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) *npagesp = gotpages; if (done) /* bingo! */ - return(0); + return(VM_PAGER_OK); else /* EEK! Need to unlock and I/O */ - return(EBUSY); + return(VM_PAGER_UNLOCK); } /* @@ -1103,7 +1107,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) uvm_wait("uao_getpage"); simple_lock(&uobj->vmobjlock); /* goto top of pps while loop */ - continue; + continue; } /* @@ -1112,7 +1116,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) */ ptmp->pqflags |= PQ_AOBJ; - /* + /* * got new page ready for I/O. break pps while * loop. pps[lcv] is still NULL. */ @@ -1130,8 +1134,8 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) simple_lock(&uobj->vmobjlock); continue; /* goto top of pps while loop */ } - - /* + + /* * if we get here then the page has become resident and * unbusy between steps 1 and 2. we busy it now (so we * own it) and set pps[lcv] (so that we exit the while @@ -1151,7 +1155,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) continue; /* next lcv */ /* - * we have a "fake/busy/clean" page that we just allocated. + * we have a "fake/busy/clean" page that we just allocated. * do the needed "i/o", either reading from swap or zeroing. */ swslot = uao_find_swslot(aobj, pageidx); @@ -1180,7 +1184,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) /* * I/O done. check for errors. */ - if (rv != 0) + if (rv != VM_PAGER_OK) { UVMHIST_LOG(pdhist, "<- done (error=%d)", rv,0,0,0); @@ -1195,9 +1199,7 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) */ swslot = uao_set_swslot(&aobj->u_obj, pageidx, SWSLOT_BAD); - if (swslot != -1) { - uvm_swap_markbad(swslot, 1); - } + uvm_swap_markbad(swslot, 1); ptmp->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(ptmp, NULL); @@ -1210,10 +1212,10 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) } } - /* + /* * we got the page! clear the fake flag (indicates valid * data now in page) and plug into our result array. note - * that page is still busy. + * that page is still busy. * * it is the callers job to: * => check if the page is released @@ -1233,12 +1235,12 @@ uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) simple_unlock(&uobj->vmobjlock); UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0); - return(0); + return(VM_PAGER_OK); } /* * uao_releasepg: handle released page in an aobj - * + * * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need * to dispose of. * => caller must handle PG_WANTED case @@ -1299,7 +1301,7 @@ uao_releasepg(pg, nextpgp) /* * uao_dropswap: release any swap resources from this aobj page. - * + * * => aobj must be locked or have a reference count of 0. */ @@ -1319,7 +1321,7 @@ uao_dropswap(uobj, pageidx) /* * page in every page in every aobj that is paged-out to a range of swslots. - * + * * => nothing should be locked. * => returns TRUE if pagein was aborted due to lack of memory. */ @@ -1420,7 +1422,7 @@ restart: /* * if the slot isn't in range, skip it. */ - if (slot < startslot || + if (slot < startslot || slot >= endslot) { continue; } @@ -1493,14 +1495,14 @@ uao_pagein_page(aobj, pageidx) simple_lock(&aobj->u_obj.vmobjlock); switch (rv) { - case 0: + case VM_PAGER_OK: break; - case EIO: - case ERESTART: + case VM_PAGER_ERROR: + case VM_PAGER_REFAULT: /* * nothing more to do on errors. - * ERESTART can only mean that the anon was freed, + * VM_PAGER_REFAULT can only mean that the anon was freed, * so again there's nothing to do. */ return FALSE; @@ -1521,6 +1523,9 @@ uao_pagein_page(aobj, pageidx) * deactivate the page (to put it on a page queue). */ pmap_clear_reference(pg); +#ifndef UBC + pmap_page_protect(pg, VM_PROT_NONE); +#endif uvm_lock_pageq(); uvm_pagedeactivate(pg); uvm_unlock_pageq(); diff --git a/sys/uvm/uvm_bio.c b/sys/uvm/uvm_bio.c deleted file mode 100644 index f6ce9852451..00000000000 --- a/sys/uvm/uvm_bio.c +++ /dev/null @@ -1,558 +0,0 @@ -/* $NetBSD: uvm_bio.c,v 1.17 2001/09/10 21:19:43 chris Exp $ */ - -/* - * Copyright (c) 1998 Chuck Silvers. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -/* - * uvm_bio.c: buffered i/o vnode mapping cache - */ - - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/vnode.h> - -#include <uvm/uvm.h> -#include <uvm/uvm_page.h> - -/* - * global data structures - */ - -/* - * local functions - */ - -static int ubc_fault __P((struct uvm_faultinfo *, vaddr_t, - struct vm_page **, int, int, vm_fault_t, vm_prot_t, int)); -static struct ubc_map *ubc_find_mapping __P((struct uvm_object *, voff_t)); - -/* - * local data structues - */ - -#define UBC_HASH(uobj, offset) (((((u_long)(uobj)) >> 8) + \ - (((u_long)(offset)) >> PAGE_SHIFT)) & \ - ubc_object.hashmask) - -#define UBC_QUEUE(offset) (&ubc_object.inactive[((offset) >> ubc_winshift) & \ - (UBC_NQUEUES - 1)]) - -struct ubc_map -{ - struct uvm_object * uobj; /* mapped object */ - voff_t offset; /* offset into uobj */ - int refcount; /* refcount on mapping */ - voff_t writeoff; /* overwrite offset */ - vsize_t writelen; /* overwrite len */ - - LIST_ENTRY(ubc_map) hash; /* hash table */ - TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */ -}; - -static struct ubc_object -{ - struct uvm_object uobj; /* glue for uvm_map() */ - char *kva; /* where ubc_object is mapped */ - struct ubc_map *umap; /* array of ubc_map's */ - - LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */ - u_long hashmask; /* mask for hashtable */ - - TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive; - /* inactive queues for ubc_map's */ - -} ubc_object; - -struct uvm_pagerops ubc_pager = -{ - NULL, /* init */ - NULL, /* reference */ - NULL, /* detach */ - ubc_fault, /* fault */ - /* ... rest are NULL */ -}; - -int ubc_nwins = UBC_NWINS; -int ubc_winshift = UBC_WINSHIFT; -int ubc_winsize; -#ifdef PMAP_PREFER -int ubc_nqueues; -boolean_t ubc_release_unmap = FALSE; -#define UBC_NQUEUES ubc_nqueues -#define UBC_RELEASE_UNMAP ubc_release_unmap -#else -#define UBC_NQUEUES 1 -#define UBC_RELEASE_UNMAP FALSE -#endif - -/* - * ubc_init - * - * init pager private data structures. - */ - -void -ubc_init(void) -{ - struct ubc_map *umap; - vaddr_t va; - int i; - - /* - * Make sure ubc_winshift is sane. - */ - if (ubc_winshift < PAGE_SHIFT) - ubc_winshift = PAGE_SHIFT; - - /* - * init ubc_object. - * alloc and init ubc_map's. - * init inactive queues. - * alloc and init hashtable. - * map in ubc_object. - */ - - simple_lock_init(&ubc_object.uobj.vmobjlock); - ubc_object.uobj.pgops = &ubc_pager; - TAILQ_INIT(&ubc_object.uobj.memq); - ubc_object.uobj.uo_npages = 0; - ubc_object.uobj.uo_refs = UVM_OBJ_KERN; - - ubc_object.umap = malloc(ubc_nwins * sizeof(struct ubc_map), - M_TEMP, M_NOWAIT); - if (ubc_object.umap == NULL) - panic("ubc_init: failed to allocate ubc_map"); - memset(ubc_object.umap, 0, ubc_nwins * sizeof(struct ubc_map)); - - va = (vaddr_t)1L; -#ifdef PMAP_PREFER - PMAP_PREFER(0, &va); - ubc_nqueues = va >> ubc_winshift; - if (ubc_nqueues == 0) { - ubc_nqueues = 1; - } - if (ubc_nqueues != 1) { - ubc_release_unmap = TRUE; - } -#endif - ubc_winsize = 1 << ubc_winshift; - ubc_object.inactive = malloc(UBC_NQUEUES * - sizeof(struct ubc_inactive_head), - M_TEMP, M_NOWAIT); - if (ubc_object.inactive == NULL) - panic("ubc_init: failed to allocate inactive queue heads"); - for (i = 0; i < UBC_NQUEUES; i++) { - TAILQ_INIT(&ubc_object.inactive[i]); - } - for (i = 0; i < ubc_nwins; i++) { - umap = &ubc_object.umap[i]; - TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)], - umap, inactive); - } - - ubc_object.hash = hashinit(ubc_nwins, M_TEMP, M_NOWAIT, - &ubc_object.hashmask); - for (i = 0; i <= ubc_object.hashmask; i++) { - LIST_INIT(&ubc_object.hash[i]); - } - - if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva, - ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va, - UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, - UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { - panic("ubc_init: failed to map ubc_object\n"); - } - UVMHIST_INIT(ubchist, 300); -} - - -/* - * ubc_fault: fault routine for ubc mapping - */ -int -ubc_fault(ufi, ign1, ign2, ign3, ign4, fault_type, access_type, flags) - struct uvm_faultinfo *ufi; - vaddr_t ign1; - struct vm_page **ign2; - int ign3, ign4; - vm_fault_t fault_type; - vm_prot_t access_type; - int flags; -{ - struct uvm_object *uobj; - struct vnode *vp; - struct ubc_map *umap; - vaddr_t va, eva, ubc_offset, slot_offset; - int i, error, rv, npages; - struct vm_page *pgs[(1 << ubc_winshift) >> PAGE_SHIFT], *pg; - UVMHIST_FUNC("ubc_fault"); UVMHIST_CALLED(ubchist); - - /* - * no need to try with PGO_LOCKED... - * we don't need to have the map locked since we know that - * no one will mess with it until our reference is released. - */ - if (flags & PGO_LOCKED) { -#if 0 - return EBUSY; -#else - uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL); - flags &= ~PGO_LOCKED; -#endif - } - - va = ufi->orig_rvaddr; - ubc_offset = va - (vaddr_t)ubc_object.kva; - - UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx at %d", - va, ubc_offset, access_type,0); - - umap = &ubc_object.umap[ubc_offset >> ubc_winshift]; - KASSERT(umap->refcount != 0); - slot_offset = trunc_page(ubc_offset & (ubc_winsize - 1)); - - /* no umap locking needed since we have a ref on the umap */ - uobj = umap->uobj; - vp = (struct vnode *)uobj; - KASSERT(uobj != NULL); - - npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT; - - /* - * XXXUBC - * if npages is more than 1 we have to be sure that - * we set PGO_OVERWRITE correctly. - */ - if (access_type == VM_PROT_WRITE) { - npages = 1; - } - -again: - memset(pgs, 0, sizeof (pgs)); - simple_lock(&uobj->vmobjlock); - - UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x " - "u_size 0x%x", slot_offset, umap->writeoff, umap->writelen, - vp->v_size); - - if (access_type & VM_PROT_WRITE && - slot_offset >= umap->writeoff && - (slot_offset + PAGE_SIZE <= umap->writeoff + umap->writelen || - slot_offset + PAGE_SIZE >= vp->v_size - umap->offset)) { - UVMHIST_LOG(ubchist, "setting PGO_OVERWRITE", 0,0,0,0); - flags |= PGO_OVERWRITE; - } - else { UVMHIST_LOG(ubchist, "NOT setting PGO_OVERWRITE", 0,0,0,0); } - /* XXX be sure to zero any part of the page past EOF */ - - /* - * XXX - * ideally we'd like to pre-fault all of the pages we're overwriting. - * so for PGO_OVERWRITE, we should call VOP_GETPAGES() with all of the - * pages in [writeoff, writeoff+writesize] instead of just the one. - */ - - UVMHIST_LOG(ubchist, "getpages vp %p offset 0x%x npages %d", - uobj, umap->offset + slot_offset, npages, 0); - - error = VOP_GETPAGES(vp, umap->offset + slot_offset, pgs, &npages, 0, - access_type, 0, flags); - UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages,0,0); - - if (error == EAGAIN) { - tsleep(&lbolt, PVM, "ubc_fault", 0); - goto again; - } - if (error) { - return error; - } - if (npages == 0) { - return 0; - } - - va = ufi->orig_rvaddr; - eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT); - - UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0,0); - simple_lock(&uobj->vmobjlock); - for (i = 0; va < eva; i++, va += PAGE_SIZE) { - UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0); - pg = pgs[i]; - - if (pg == NULL || pg == PGO_DONTCARE) { - continue; - } - if (pg->flags & PG_WANTED) { - wakeup(pg); - } - KASSERT((pg->flags & PG_FAKE) == 0); - if (pg->flags & PG_RELEASED) { - rv = uobj->pgops->pgo_releasepg(pg, NULL); - KASSERT(rv); - continue; - } - KASSERT(access_type == VM_PROT_READ || - (pg->flags & PG_RDONLY) == 0); - - uvm_lock_pageq(); - uvm_pageactivate(pg); - uvm_unlock_pageq(); - - pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg), - VM_PROT_READ | VM_PROT_WRITE, access_type); - - pg->flags &= ~(PG_BUSY); - UVM_PAGE_OWN(pg, NULL); - } - simple_unlock(&uobj->vmobjlock); - pmap_update(ufi->orig_map->pmap); - return 0; -} - -/* - * local functions - */ - -struct ubc_map * -ubc_find_mapping(uobj, offset) - struct uvm_object *uobj; - voff_t offset; -{ - struct ubc_map *umap; - - LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) { - if (umap->uobj == uobj && umap->offset == offset) { - return umap; - } - } - return NULL; -} - - -/* - * ubc interface functions - */ - -/* - * ubc_alloc: allocate a buffer mapping - */ -void * -ubc_alloc(uobj, offset, lenp, flags) - struct uvm_object *uobj; - voff_t offset; - vsize_t *lenp; - int flags; -{ - int s; - vaddr_t slot_offset, va; - struct ubc_map *umap; - voff_t umap_offset; - UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist); - - UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x", - uobj, offset, *lenp, ((struct vnode *)vp)->v_size); - - umap_offset = (offset & ~((voff_t)ubc_winsize - 1)); - slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1)); - *lenp = min(*lenp, ubc_winsize - slot_offset); - - /* - * the vnode is always locked here, so we don't need to add a ref. - */ - - s = splbio(); - -again: - simple_lock(&ubc_object.uobj.vmobjlock); - umap = ubc_find_mapping(uobj, umap_offset); - if (umap == NULL) { - umap = TAILQ_FIRST(UBC_QUEUE(offset)); - if (umap == NULL) { - simple_unlock(&ubc_object.uobj.vmobjlock); - tsleep(&lbolt, PVM, "ubc_alloc", 0); - goto again; - } - - /* - * remove from old hash (if any), - * add to new hash. - */ - - if (umap->uobj != NULL) { - LIST_REMOVE(umap, hash); - } - - umap->uobj = uobj; - umap->offset = umap_offset; - - LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)], - umap, hash); - - va = (vaddr_t)(ubc_object.kva + - ((umap - ubc_object.umap) << ubc_winshift)); - pmap_remove(pmap_kernel(), va, va + ubc_winsize); - pmap_update(pmap_kernel()); - } - - if (umap->refcount == 0) { - TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive); - } - -#ifdef DIAGNOSTIC - if ((flags & UBC_WRITE) && - (umap->writeoff || umap->writelen)) { - panic("ubc_fault: concurrent writes vp %p", uobj); - } -#endif - if (flags & UBC_WRITE) { - umap->writeoff = slot_offset; - umap->writelen = *lenp; - } - - umap->refcount++; - simple_unlock(&ubc_object.uobj.vmobjlock); - splx(s); - UVMHIST_LOG(ubchist, "umap %p refs %d va %p", - umap, umap->refcount, - ubc_object.kva + ((umap - ubc_object.umap) << ubc_winshift), - 0); - - return ubc_object.kva + - ((umap - ubc_object.umap) << ubc_winshift) + slot_offset; -} - - -void -ubc_release(va, wlen) - void *va; - vsize_t wlen; -{ - struct ubc_map *umap; - struct uvm_object *uobj; - int s; - UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist); - - UVMHIST_LOG(ubchist, "va %p", va,0,0,0); - - s = splbio(); - simple_lock(&ubc_object.uobj.vmobjlock); - - umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift]; - uobj = umap->uobj; - KASSERT(uobj != NULL); - - umap->writeoff = 0; - umap->writelen = 0; - umap->refcount--; - if (umap->refcount == 0) { - if (UBC_RELEASE_UNMAP && - (((struct vnode *)uobj)->v_flag & VTEXT)) { - vaddr_t va; - - /* - * if this file is the executable image of - * some process, that process will likely have - * the file mapped at an alignment other than - * what PMAP_PREFER() would like. we'd like - * to have process text be able to use the - * cache even if someone is also reading the - * file, so invalidate mappings of such files - * as soon as possible. - */ - - va = (vaddr_t)(ubc_object.kva + - ((umap - ubc_object.umap) << ubc_winshift)); - pmap_remove(pmap_kernel(), va, va + ubc_winsize); - pmap_update(pmap_kernel()); - LIST_REMOVE(umap, hash); - umap->uobj = NULL; - TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, - inactive); - } else { - TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap, - inactive); - } - } - UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount,0,0); - simple_unlock(&ubc_object.uobj.vmobjlock); - splx(s); -} - - -/* - * removing a range of mappings from the ubc mapping cache. - */ - -void -ubc_flush(uobj, start, end) - struct uvm_object *uobj; - voff_t start, end; -{ - struct ubc_map *umap; - vaddr_t va; - int s; - UVMHIST_FUNC("ubc_flush"); UVMHIST_CALLED(ubchist); - - UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx", - uobj, start, end,0); - - s = splbio(); - simple_lock(&ubc_object.uobj.vmobjlock); - for (umap = ubc_object.umap; - umap < &ubc_object.umap[ubc_nwins]; - umap++) { - - if (umap->uobj != uobj || - umap->offset < start || - (umap->offset >= end && end != 0) || - umap->refcount > 0) { - continue; - } - - /* - * remove from hash, - * move to head of inactive queue. - */ - - va = (vaddr_t)(ubc_object.kva + - ((umap - ubc_object.umap) << ubc_winshift)); - pmap_remove(pmap_kernel(), va, va + ubc_winsize); - pmap_update(pmap_kernel()); - - LIST_REMOVE(umap, hash); - umap->uobj = NULL; - TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive); - TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive); - } - simple_unlock(&ubc_object.uobj.vmobjlock); - splx(s); -} diff --git a/sys/uvm/uvm_ddb.h b/sys/uvm/uvm_ddb.h index f2de2a1c9e8..469b381a6df 100644 --- a/sys/uvm/uvm_ddb.h +++ b/sys/uvm/uvm_ddb.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_ddb.h,v 1.8 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_ddb.h,v 1.7 2001/06/02 18:09:26 chs Exp $ */ +/* $OpenBSD: uvm_ddb.h,v 1.9 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_ddb.h,v 1.5 2000/11/25 06:27:59 chs Exp $ */ /* * @@ -41,7 +41,7 @@ #ifdef _KERNEL #ifdef DDB -void uvm_map_printit __P((struct vm_map *, boolean_t, +void uvm_map_printit __P((vm_map_t, boolean_t, int (*) __P((const char *, ...)))); void uvm_object_printit __P((struct uvm_object *, boolean_t, int (*) __P((const char *, ...)))); diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c index 0f5f2214ec8..08bdccca0d0 100644 --- a/sys/uvm/uvm_device.c +++ b/sys/uvm/uvm_device.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_device.c,v 1.20 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_device.c,v 1.37 2001/09/10 21:19:42 chris Exp $ */ +/* $OpenBSD: uvm_device.c,v 1.21 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_device.c,v 1.30 2000/11/25 06:27:59 chs Exp $ */ /* * @@ -57,7 +57,7 @@ LIST_HEAD(udv_list_struct, uvm_device); static struct udv_list_struct udv_list; -static struct simplelock udv_lock; +static simple_lock_data_t udv_lock; /* * functions @@ -67,7 +67,7 @@ static void udv_init __P((void)); static void udv_reference __P((struct uvm_object *)); static void udv_detach __P((struct uvm_object *)); static int udv_fault __P((struct uvm_faultinfo *, vaddr_t, - struct vm_page **, int, int, vm_fault_t, + vm_page_t *, int, int, vm_fault_t, vm_prot_t, int)); static boolean_t udv_flush __P((struct uvm_object *, voff_t, voff_t, int)); @@ -145,7 +145,7 @@ udv_attach(arg, accessprot, off, size) /* * Check that the specified range of the device allows the * desired protection. - * + * * XXX assumes VM_PROT_* == PROT_* * XXX clobbers off and size, but nothing else here needs them. */ @@ -163,7 +163,7 @@ udv_attach(arg, accessprot, off, size) for (;;) { /* - * first, attempt to find it on the main list + * first, attempt to find it on the main list */ simple_lock(&udv_lock); @@ -259,7 +259,7 @@ udv_attach(arg, accessprot, off, size) } /*NOTREACHED*/ } - + /* * udv_reference * @@ -278,7 +278,7 @@ udv_reference(uobj) simple_lock(&uobj->vmobjlock); uobj->uo_refs++; - UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", + UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", uobj, uobj->uo_refs,0,0); simple_unlock(&uobj->vmobjlock); } @@ -306,7 +306,7 @@ again: if (uobj->uo_refs > 1) { uobj->uo_refs--; simple_unlock(&uobj->vmobjlock); - UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d", + UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d", uobj,uobj->uo_refs,0,0); return; } @@ -374,7 +374,7 @@ static int udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags) struct uvm_faultinfo *ufi; vaddr_t vaddr; - struct vm_page **pps; + vm_page_t *pps; int npages, centeridx, flags; vm_fault_t fault_type; vm_prot_t access_type; @@ -396,16 +396,16 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags) * we do not allow device mappings to be mapped copy-on-write * so we kill any attempt to do so here. */ - + if (UVM_ET_ISCOPYONWRITE(entry)) { - UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", + UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", entry->etype, 0,0,0); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL); - return(EIO); + return(VM_PAGER_ERROR); } /* - * get device map function. + * get device map function. */ device = udv->u_device; @@ -422,12 +422,12 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags) curr_offset = entry->offset + (vaddr - entry->start); /* pmap va = vaddr (virtual address of pps[0]) */ curr_va = vaddr; - + /* * loop over the page range entering in as needed */ - retval = 0; + retval = VM_PAGER_OK; for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE, curr_va += PAGE_SIZE) { if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) @@ -438,7 +438,7 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags) mdpgno = (*mapfn)(device, curr_offset, access_type); if (mdpgno == -1) { - retval = EIO; + retval = VM_PAGER_ERROR; break; } paddr = pmap_phys_address(mdpgno); @@ -447,7 +447,7 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags) " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d", ufi->orig_map->pmap, curr_va, paddr, mapprot); if (pmap_enter(ufi->orig_map->pmap, curr_va, paddr, - mapprot, PMAP_CANFAIL | mapprot) != 0) { + mapprot, PMAP_CANFAIL | mapprot) != KERN_SUCCESS) { /* * pmap_enter() didn't have the resource to * enter this mapping. Unlock everything, @@ -460,13 +460,11 @@ udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags) */ uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL); - pmap_update(ufi->orig_map->pmap); /* sync what we have so far */ uvm_wait("udv_fault"); - return (ERESTART); + return (VM_PAGER_REFAULT); } } uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL); - pmap_update(ufi->orig_map->pmap); return (retval); } diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index ac984530ff3..39d6fcb6767 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_extern.h,v 1.39 2001/12/06 23:01:07 niklas Exp $ */ -/* $NetBSD: uvm_extern.h,v 1.66 2001/08/16 01:37:50 chs Exp $ */ +/* $OpenBSD: uvm_extern.h,v 1.40 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */ /* * @@ -88,12 +88,24 @@ * typedefs, necessary for standard UVM headers. */ -typedef unsigned int uvm_flag_t; +typedef unsigned int uvm_flag_t; typedef int vm_fault_t; typedef int vm_inherit_t; /* XXX: inheritance codes */ typedef off_t voff_t; /* XXX: offset within a uvm_object */ +union vm_map_object; +typedef union vm_map_object vm_map_object_t; + +struct vm_map_entry; +typedef struct vm_map_entry *vm_map_entry_t; + +struct vm_map; +typedef struct vm_map *vm_map_t; + +struct vm_page; +typedef struct vm_page *vm_page_t; + /* * defines */ @@ -211,21 +223,6 @@ typedef int vm_prot_t; #define UVM_PGA_ZERO 0x0002 /* returned page must be zero'd */ /* - * the following defines are for ubc_alloc's flags - */ -#define UBC_READ 0 -#define UBC_WRITE 1 - -/* - * flags for uvn_findpages(). - */ -#define UFP_ALL 0x0 -#define UFP_NOWAIT 0x1 -#define UFP_NOALLOC 0x2 -#define UFP_NOCACHE 0x4 -#define UFP_NORDONLY 0x8 - -/* * lockflags that control the locking behavior of various functions. */ #define UVM_LK_ENTER 0x00000001 /* map locked on entry */ @@ -251,9 +248,6 @@ struct pmap; struct vnode; struct pool; struct simplelock; -struct vm_map_entry; -struct vm_map; -struct vm_page; extern struct pool *uvm_aiobuf_pool; @@ -276,9 +270,6 @@ struct uvmexp { int paging; /* number of pages in the process of being paged out */ int wired; /* number of wired pages */ - int ncolors; /* number of page color buckets: must be p-o-2 */ - int colormask; /* color bucket mask */ - int zeropages; /* number of zero'd pages */ int reserve_pagedaemon; /* number of pages reserved for pagedaemon */ int reserve_kernel; /* number of pages reserved for kernel */ @@ -328,9 +319,8 @@ struct uvmexp { was available */ int pga_zeromiss; /* pagealloc where zero wanted and zero not available */ - int zeroaborts; /* number of times page zeroing was aborted */ - int colorhit; /* pagealloc where we got optimal color */ - int colormiss; /* pagealloc where we didn't */ + int zeroaborts; /* number of times page zeroing was + aborted */ /* fault subcounters */ int fltnoram; /* number of times fault was out of ram */ @@ -402,7 +392,7 @@ struct vmspace { caddr_t vm_shm; /* SYS5 shared memory private data XXX */ /* we copy from vm_startcopy to the end of the structure on fork */ #define vm_startcopy vm_rssize - segsz_t vm_rssize; /* current resident set size in pages */ + segsz_t vm_rssize; /* current resident set size in pages */ segsz_t vm_swrss; /* resident set size before last swap */ segsz_t vm_tsize; /* text size (pages) XXX */ segsz_t vm_dsize; /* data size (pages) XXX */ @@ -424,6 +414,7 @@ extern struct vm_map *kmem_map; extern struct vm_map *mb_map; extern struct vm_map *phys_map; + /* * macros */ @@ -434,7 +425,11 @@ extern struct vm_map *phys_map; #endif /* _KERNEL */ +#ifdef pmap_resident_count #define vm_resident_count(vm) (pmap_resident_count((vm)->vm_map.pmap)) +#else +#define vm_resident_count(vm) ((vm)->vm_rssize) +#endif /* XXX clean up later */ struct buf; @@ -469,16 +464,9 @@ void uao_detach_locked __P((struct uvm_object *)); void uao_reference __P((struct uvm_object *)); void uao_reference_locked __P((struct uvm_object *)); -/* uvm_bio.c */ -void ubc_init __P((void)); -void * ubc_alloc __P((struct uvm_object *, voff_t, vsize_t *, - int)); -void ubc_release __P((void *, vsize_t)); -void ubc_flush __P((struct uvm_object *, voff_t, voff_t)); - /* uvm_fault.c */ -int uvm_fault __P((struct vm_map *, vaddr_t, vm_fault_t, - vm_prot_t)); +int uvm_fault __P((vm_map_t, vaddr_t, + vm_fault_t, vm_prot_t)); /* handle a page fault */ /* uvm_glue.c */ @@ -499,53 +487,50 @@ void uvm_vsunlock __P((struct proc *, caddr_t, size_t)); /* uvm_init.c */ -void uvm_init __P((void)); +void uvm_init __P((void)); /* init the uvm system */ /* uvm_io.c */ -int uvm_io __P((struct vm_map *, struct uio *)); +int uvm_io __P((vm_map_t, struct uio *)); /* uvm_km.c */ -vaddr_t uvm_km_alloc1 __P((struct vm_map *, vsize_t, - boolean_t)); -void uvm_km_free __P((struct vm_map *, vaddr_t, vsize_t)); -void uvm_km_free_wakeup __P((struct vm_map *, vaddr_t, - vsize_t)); -vaddr_t uvm_km_kmemalloc __P((struct vm_map *, struct - uvm_object *, vsize_t, int)); -struct vm_map *uvm_km_suballoc __P((struct vm_map *, vaddr_t *, - vaddr_t *, vsize_t, int, boolean_t, - struct vm_map *)); -vaddr_t uvm_km_valloc __P((struct vm_map *, vsize_t)); -vaddr_t uvm_km_valloc_align __P((struct vm_map *, vsize_t, - vsize_t)); -vaddr_t uvm_km_valloc_wait __P((struct vm_map *, vsize_t)); -vaddr_t uvm_km_valloc_prefer_wait __P((struct vm_map *, vsize_t, - voff_t)); -vaddr_t uvm_km_alloc_poolpage1 __P((struct vm_map *, - struct uvm_object *, boolean_t)); -void uvm_km_free_poolpage1 __P((struct vm_map *, vaddr_t)); - -#define uvm_km_alloc_poolpage(waitok) \ - uvm_km_alloc_poolpage1(kmem_map, uvmexp.kmem_object, (waitok)) -#define uvm_km_free_poolpage(addr) \ - uvm_km_free_poolpage1(kmem_map, (addr)) +vaddr_t uvm_km_alloc1 __P((vm_map_t, vsize_t, boolean_t)); +void uvm_km_free __P((vm_map_t, vaddr_t, vsize_t)); +void uvm_km_free_wakeup __P((vm_map_t, vaddr_t, + vsize_t)); +vaddr_t uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *, + vsize_t, int)); +struct vm_map *uvm_km_suballoc __P((vm_map_t, vaddr_t *, + vaddr_t *, vsize_t, int, + boolean_t, vm_map_t)); +vaddr_t uvm_km_valloc __P((vm_map_t, vsize_t)); +vaddr_t uvm_km_valloc_align __P((vm_map_t, vsize_t, vsize_t)); +vaddr_t uvm_km_valloc_wait __P((vm_map_t, vsize_t)); +vaddr_t uvm_km_valloc_prefer_wait __P((vm_map_t, vsize_t, + voff_t)); +vaddr_t uvm_km_alloc_poolpage1 __P((vm_map_t, + struct uvm_object *, boolean_t)); +void uvm_km_free_poolpage1 __P((vm_map_t, vaddr_t)); + +#define uvm_km_alloc_poolpage(waitok) uvm_km_alloc_poolpage1(kmem_map, \ + uvmexp.kmem_object, (waitok)) +#define uvm_km_free_poolpage(addr) uvm_km_free_poolpage1(kmem_map, (addr)) /* uvm_map.c */ -int uvm_map __P((struct vm_map *, vaddr_t *, vsize_t, +int uvm_map __P((vm_map_t, vaddr_t *, vsize_t, struct uvm_object *, voff_t, vsize_t, uvm_flag_t)); -int uvm_map_pageable __P((struct vm_map *, vaddr_t, +int uvm_map_pageable __P((vm_map_t, vaddr_t, vaddr_t, boolean_t, int)); -int uvm_map_pageable_all __P((struct vm_map *, int, - vsize_t)); -boolean_t uvm_map_checkprot __P((struct vm_map *, vaddr_t, +int uvm_map_pageable_all __P((vm_map_t, int, vsize_t)); +boolean_t uvm_map_checkprot __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t)); -int uvm_map_protect __P((struct vm_map *, vaddr_t, +int uvm_map_protect __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t, boolean_t)); -struct vmspace *uvmspace_alloc __P((vaddr_t, vaddr_t)); +struct vmspace *uvmspace_alloc __P((vaddr_t, vaddr_t, + boolean_t)); void uvmspace_init __P((struct vmspace *, struct pmap *, - vaddr_t, vaddr_t)); + vaddr_t, vaddr_t, boolean_t)); void uvmspace_exec __P((struct proc *, vaddr_t, vaddr_t)); struct vmspace *uvmspace_fork __P((struct vmspace *)); void uvmspace_free __P((struct vmspace *)); @@ -555,14 +540,14 @@ void uvmspace_unshare __P((struct proc *)); /* uvm_meter.c */ void uvm_meter __P((void)); -int uvm_sysctl __P((int *, u_int, void *, size_t *, +int uvm_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); void uvm_total __P((struct vmtotal *)); /* uvm_mmap.c */ -int uvm_mmap __P((struct vm_map *, vaddr_t *, vsize_t, - vm_prot_t, vm_prot_t, int, - void *, voff_t, vsize_t)); +int uvm_mmap __P((vm_map_t, vaddr_t *, vsize_t, + vm_prot_t, vm_prot_t, int, + caddr_t, voff_t, vsize_t)); /* uvm_page.c */ struct vm_page *uvm_pagealloc_strat __P((struct uvm_object *, @@ -570,7 +555,9 @@ struct vm_page *uvm_pagealloc_strat __P((struct uvm_object *, #define uvm_pagealloc(obj, off, anon, flags) \ uvm_pagealloc_strat((obj), (off), (anon), (flags), \ UVM_PGA_STRAT_NORMAL, 0) -void uvm_pagerealloc __P((struct vm_page *, +vaddr_t uvm_pagealloc_contig __P((vaddr_t, vaddr_t, + vaddr_t, vaddr_t)); +void uvm_pagerealloc __P((struct vm_page *, struct uvm_object *, voff_t)); /* Actually, uvm_page_physload takes PF#s which need their own type */ void uvm_page_physload __P((paddr_t, paddr_t, @@ -589,28 +576,27 @@ void uvm_aiodone_daemon __P((void *)); /* uvm_pglist.c */ int uvm_pglistalloc __P((psize_t, paddr_t, paddr_t, paddr_t, paddr_t, - struct pglist *, int, int)); + struct pglist *, int, int)); void uvm_pglistfree __P((struct pglist *)); /* uvm_swap.c */ void uvm_swap_init __P((void)); /* uvm_unix.c */ -int uvm_coredump __P((struct proc *, struct vnode *, +int uvm_coredump __P((struct proc *, struct vnode *, struct ucred *, struct core *)); int uvm_grow __P((struct proc *, vaddr_t)); /* uvm_user.c */ -void uvm_deallocate __P((struct vm_map *, vaddr_t, vsize_t)); +int uvm_deallocate __P((vm_map_t, vaddr_t, vsize_t)); /* uvm_vnode.c */ void uvm_vnp_setsize __P((struct vnode *, voff_t)); void uvm_vnp_sync __P((struct mount *)); +void uvm_vnp_terminate __P((struct vnode *)); + /* terminate a uvm/uvn object */ +boolean_t uvm_vnp_uncache __P((struct vnode *)); struct uvm_object *uvn_attach __P((void *, vm_prot_t)); -void uvn_findpages __P((struct uvm_object *, voff_t, - int *, struct vm_page **, int)); -void uvm_vnp_zerorange __P((struct vnode *, off_t, size_t)); -void uvm_vnp_asyncget __P((struct vnode *, off_t, size_t)); /* kern_malloc.c */ void kmeminit_nkmempages __P((void)); diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index 4e08eaa63a4..6736aa6a8d5 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_fault.c,v 1.28 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_fault.c,v 1.68 2001/09/10 21:19:42 chris Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.29 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_fault.c,v 1.56 2001/02/18 21:19:08 chs Exp $ */ /* * @@ -59,7 +59,7 @@ * * CASE 1A CASE 1B CASE 2A CASE 2B * read/write1 write>1 read/write +-cow_write/zero - * | | | | + * | | | | * +--|--+ +--|--+ +-----+ + | + | +-----+ * amap | V | | ----------->new| | | | ^ | * +-----+ +-----+ +-----+ + | + | +--|--+ @@ -69,7 +69,7 @@ * +-----+ +-----+ +-----+ +-----+ * * d/c = don't care - * + * * case [0]: layerless fault * no amap or uobj is present. this is an error. * @@ -83,17 +83,17 @@ * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] * I/O takes place directly in object. * 2B: [write to copy_on_write] or [read on NULL uobj] - * data is "promoted" from uobj to a new anon. + * data is "promoted" from uobj to a new anon. * if uobj is null, then we zero fill. * * we follow the standard UVM locking protocol ordering: * - * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) + * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) * we hold a PG_BUSY page if we unlock for I/O * * * the code is structured as follows: - * + * * - init the "IN" params in the ufi structure * ReFault: * - do lookups [locks maps], check protection, handle needs_copy @@ -125,7 +125,7 @@ * * alternative 1: unbusy the page in question and restart the page fault * from the top (ReFault). this is easy but does not take advantage - * of the information that we already have from our previous lookup, + * of the information that we already have from our previous lookup, * although it is possible that the "hints" in the vm_map will help here. * * alternative 2: the system already keeps track of a "version" number of @@ -159,7 +159,7 @@ struct uvm_advice { /* * page range array: - * note: index in array must match "advice" value + * note: index in array must match "advice" value * XXX: borrowed numbers from freebsd. do they work well for us? */ @@ -195,7 +195,7 @@ uvmfault_anonflush(anons, n) { int lcv; struct vm_page *pg; - + for (lcv = 0 ; lcv < n ; lcv++) { if (anons[lcv] == NULL) continue; @@ -204,7 +204,11 @@ uvmfault_anonflush(anons, n) if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) { uvm_lock_pageq(); if (pg->wire_count == 0) { +#ifdef UBC pmap_clear_reference(pg); +#else + pmap_page_protect(pg, VM_PROT_NONE); +#endif uvm_pagedeactivate(pg); } uvm_unlock_pageq(); @@ -248,7 +252,7 @@ uvmfault_amapcopy(ufi) */ if (UVM_ET_ISNEEDSCOPY(ufi->entry)) - amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, + amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, ufi->orig_rvaddr, ufi->orig_rvaddr + 1); /* @@ -264,7 +268,7 @@ uvmfault_amapcopy(ufi) /* * got it! unlock and return. */ - + uvmfault_unlockmaps(ufi, TRUE); return; } @@ -276,7 +280,7 @@ uvmfault_amapcopy(ufi) * page in that anon. * * => maps, amap, and anon locked by caller. - * => if we fail (result != 0) we unlock everything. + * => if we fail (result != VM_PAGER_OK) we unlock everything. * => if we are successful, we return with everything still locked. * => we don't move the page on the queues [gets moved later] * => if we allocate a new page [we_own], it gets put on the queues. @@ -296,12 +300,12 @@ uvmfault_anonget(ufi, amap, anon) boolean_t we_own; /* we own anon's page? */ boolean_t locked; /* did we relock? */ struct vm_page *pg; - int error; + int result; UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); LOCK_ASSERT(simple_lock_held(&anon->an_lock)); - error = 0; + result = 0; /* XXX shut up gcc */ uvmexp.fltanget++; /* bump rusage counters */ if (anon->u.an_page) @@ -309,7 +313,7 @@ uvmfault_anonget(ufi, amap, anon) else curproc->p_addr->u_stats.p_ru.ru_majflt++; - /* + /* * loop until we get it, or fail. */ @@ -342,7 +346,7 @@ uvmfault_anonget(ufi, amap, anon) if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) { UVMHIST_LOG(maphist, "<- OK",0,0,0,0); - return (0); + return (VM_PAGER_OK); } pg->flags |= PG_WANTED; uvmexp.fltpgwait++; @@ -369,7 +373,7 @@ uvmfault_anonget(ufi, amap, anon) /* ready to relock and try again */ } else { - + /* * no page, we must try and bring it in. */ @@ -385,9 +389,9 @@ uvmfault_anonget(ufi, amap, anon) /* ready to relock and try again */ } else { - + /* we set the PG_BUSY bit */ - we_own = TRUE; + we_own = TRUE; uvmfault_unlockall(ufi, amap, NULL, anon); /* @@ -398,7 +402,7 @@ uvmfault_anonget(ufi, amap, anon) * we hold PG_BUSY on the page. */ uvmexp.pageins++; - error = uvm_swap_get(pg, anon->an_swslot, + result = uvm_swap_get(pg, anon->an_swslot, PGO_SYNCIO); /* @@ -425,23 +429,23 @@ uvmfault_anonget(ufi, amap, anon) * to clean up after the I/O. there are three cases to * consider: * [1] page released during I/O: free anon and ReFault. - * [2] I/O not OK. free the page and cause the fault + * [2] I/O not OK. free the page and cause the fault * to fail. * [3] I/O OK! activate the page and sync with the * non-we_own case (i.e. drop anon lock if not locked). */ - + if (we_own) { if (pg->flags & PG_WANTED) { /* still holding object lock */ - wakeup(pg); + wakeup(pg); } /* un-busy! */ pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); - /* + /* * if we were RELEASED during I/O, then our anon is * no longer part of an amap. we need to free the * anon and try again. @@ -455,10 +459,12 @@ uvmfault_anonget(ufi, amap, anon) NULL); uvmexp.fltpgrele++; UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); - return (ERESTART); /* refault! */ + return (VM_PAGER_REFAULT); /* refault! */ } - if (error) { + if (result != VM_PAGER_OK) { + KASSERT(result != VM_PAGER_PEND); + /* remove page from anon */ anon->u.an_page = NULL; @@ -486,9 +492,9 @@ uvmfault_anonget(ufi, amap, anon) else simple_unlock(&anon->an_lock); UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); - return error; + return (VM_PAGER_ERROR); } - + /* * must be OK, clear modify (already PG_CLEAN) * and activate @@ -507,7 +513,7 @@ uvmfault_anonget(ufi, amap, anon) if (!locked) { UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); - return (ERESTART); + return (VM_PAGER_REFAULT); } /* @@ -515,16 +521,16 @@ uvmfault_anonget(ufi, amap, anon) */ if (ufi != NULL && - amap_lookup(&ufi->entry->aref, + amap_lookup(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start) != anon) { - + uvmfault_unlockall(ufi, amap, NULL, anon); UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); - return (ERESTART); + return (VM_PAGER_REFAULT); } - + /* - * try it again! + * try it again! */ uvmexp.fltanretry++; @@ -543,12 +549,11 @@ uvmfault_anonget(ufi, amap, anon) * uvm_fault: page fault handler * * => called from MD code to resolve a page fault - * => VM data structures usually should be unlocked. however, it is + * => VM data structures usually should be unlocked. however, it is * possible to call here with the main map locked if the caller * gets a write lock, sets it recusive, and then calls us (c.f. * uvm_map_pageable). this should be avoided because it keeps * the map locked off during I/O. - * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT */ #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ @@ -556,7 +561,7 @@ uvmfault_anonget(ufi, amap, anon) int uvm_fault(orig_map, vaddr, fault_type, access_type) - struct vm_map *orig_map; + vm_map_t orig_map; vaddr_t vaddr; vm_fault_t fault_type; vm_prot_t access_type; @@ -564,9 +569,9 @@ uvm_fault(orig_map, vaddr, fault_type, access_type) struct uvm_faultinfo ufi; vm_prot_t enter_prot; boolean_t wired, narrow, promote, locked, shadowed; - int npages, nback, nforw, centeridx, error, lcv, gotpages; + int npages, nback, nforw, centeridx, result, lcv, gotpages; vaddr_t startva, objaddr, currva, offset, uoff; - paddr_t pa; + paddr_t pa; struct vm_amap *amap; struct uvm_object *uobj; struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon; @@ -595,6 +600,19 @@ uvm_fault(orig_map, vaddr, fault_type, access_type) narrow = FALSE; /* normal fault */ /* + * before we do anything else, if this is a fault on a kernel + * address, check to see if the address is managed by an + * interrupt-safe map. If it is, we fail immediately. Intrsafe + * maps are never pageable, and this approach avoids an evil + * locking mess. + */ + if (orig_map == kernel_map && uvmfault_check_intrsafe(&ufi)) { + UVMHIST_LOG(maphist, "<- VA 0x%lx in intrsafe map %p", + ufi.orig_rvaddr, ufi.map, 0, 0); + return (KERN_FAILURE); + } + + /* * "goto ReFault" means restart the page fault from ground zero. */ ReFault: @@ -605,20 +623,10 @@ ReFault: if (uvmfault_lookup(&ufi, FALSE) == FALSE) { UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0); - return (EFAULT); + return (KERN_INVALID_ADDRESS); } /* locked: maps(read) */ -#ifdef DIAGNOSTIC - if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) { - printf("Page fault on non-pageable map:\n"); - printf("ufi.map = %p\n", ufi.map); - printf("ufi.orig_map = %p\n", ufi.orig_map); - printf("ufi.orig_rvaddr = 0x%lx\n", (u_long) ufi.orig_rvaddr); - panic("uvm_fault: (ufi.map->flags & VM_MAP_PAGEABLE) == 0"); - } -#endif - /* * check protection */ @@ -628,7 +636,18 @@ ReFault: "<- protection failure (prot=0x%x, access=0x%x)", ufi.entry->protection, access_type, 0, 0); uvmfault_unlockmaps(&ufi, FALSE); - return EACCES; + return (KERN_PROTECTION_FAILURE); + } + + /* + * if the map is not a pageable map, a page fault always fails. + */ + + if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) { + UVMHIST_LOG(maphist, + "<- map %p not pageable", ufi.map, 0, 0, 0); + uvmfault_unlockmaps(&ufi, FALSE); + return (KERN_FAILURE); } /* @@ -667,7 +686,7 @@ ReFault: * ensure that we pmap_enter page R/O since * needs_copy is still true */ - enter_prot &= ~VM_PROT_WRITE; + enter_prot &= ~VM_PROT_WRITE; } } @@ -687,13 +706,13 @@ ReFault: if (amap == NULL && uobj == NULL) { uvmfault_unlockmaps(&ufi, FALSE); UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); - return (EFAULT); + return (KERN_INVALID_ADDRESS); } /* * establish range of interest based on advice from mapper * and then clip to fit map entry. note that we only want - * to do this the first time through the fault. if we + * to do this the first time through the fault. if we * ReFault we will disable this by setting "narrow" to true. */ @@ -718,7 +737,7 @@ ReFault: narrow = TRUE; /* ensure only once per-fault */ } else { - + /* narrow fault! */ nback = nforw = 0; startva = ufi.orig_rvaddr; @@ -758,7 +777,7 @@ ReFault: UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 0,0,0,0); /* flush back-page anons? */ - if (amap) + if (amap) uvmfault_anonflush(anons, nback); /* flush object? */ @@ -766,7 +785,7 @@ ReFault: objaddr = (startva - ufi.entry->start) + ufi.entry->offset; simple_lock(&uobj->vmobjlock); - (void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr + + (void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr + (nback << PAGE_SHIFT), PGO_DEACTIVATE); simple_unlock(&uobj->vmobjlock); } @@ -845,12 +864,11 @@ ReFault: (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0)); } simple_unlock(&anon->an_lock); - pmap_update(ufi.orig_map->pmap); } /* locked: maps(read), amap(if there) */ /* (shadowed == TRUE) if there is an anon at the faulting address */ - UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, + UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, (uobj && shadowed == FALSE),0,0); /* @@ -860,7 +878,7 @@ ReFault: * XXX Actually, that is bad; pmap_enter() should just fail in that * XXX case. --thorpej */ - + /* * if the desired page is not shadowed by the amap and we have a * backing object, then we check to see if the backing object would @@ -873,17 +891,18 @@ ReFault: simple_lock(&uobj->vmobjlock); /* locked: maps(read), amap (if there), uobj */ - error = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, - centeridx, fault_type, access_type, PGO_LOCKED|PGO_SYNCIO); + result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, + centeridx, fault_type, access_type, + PGO_LOCKED|PGO_SYNCIO); /* locked: nothing, pgo_fault has unlocked everything */ - if (error == ERESTART) + if (result == VM_PAGER_OK) + return (KERN_SUCCESS); /* pgo_fault did pmap enter */ + else if (result == VM_PAGER_REFAULT) goto ReFault; /* try again! */ - /* - * object fault routine responsible for pmap_update(). - */ - return error; + else + return (KERN_PROTECTION_FAILURE); } /* @@ -936,16 +955,16 @@ ReFault: * us a handle to it. remember this * page as "uobjpage." (for later use). */ - + if (lcv == centeridx) { uobjpage = pages[lcv]; UVMHIST_LOG(maphist, " got uobjpage " - "(0x%x) with locked get", + "(0x%x) with locked get", uobjpage, 0,0,0); continue; } - - /* + + /* * note: calling pgo_get with locked data * structures returns us pages which are * neither busy nor released, so we don't @@ -976,7 +995,7 @@ ReFault: PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)); - /* + /* * NOTE: page can't be PG_WANTED or PG_RELEASED * because we've held the lock the whole time * we've had the handle. @@ -985,7 +1004,6 @@ ReFault: pages[lcv]->flags &= ~(PG_BUSY); /* un-busy! */ UVM_PAGE_OWN(pages[lcv], NULL); } /* for "lcv" loop */ - pmap_update(ufi.orig_map->pmap); } /* "gotpages" != 0 */ /* note: object still _locked_ */ } else { @@ -993,7 +1011,7 @@ ReFault: } /* locked (shadowed): maps(read), amap */ - /* locked (!shadowed): maps(read), amap(if there), + /* locked (!shadowed): maps(read), amap(if there), uobj(if !null), uobjpage(if !null) */ /* @@ -1015,7 +1033,7 @@ ReFault: * redirect case 2: if we are not shadowed, go to case 2. */ - if (shadowed == FALSE) + if (shadowed == FALSE) goto Case2; /* locked: maps(read), amap */ @@ -1044,20 +1062,24 @@ ReFault: * lock that object for us if it does not fail. */ - error = uvmfault_anonget(&ufi, amap, anon); - switch (error) { - case 0: - break; + result = uvmfault_anonget(&ufi, amap, anon); + switch (result) { + case VM_PAGER_OK: + break; - case ERESTART: + case VM_PAGER_REFAULT: goto ReFault; - case EAGAIN: + case VM_PAGER_AGAIN: tsleep(&lbolt, PVM, "fltagain1", 0); goto ReFault; default: - return error; +#ifdef DIAGNOSTIC + panic("uvm_fault: uvmfault_anonget -> %d", result); +#else + return (KERN_PROTECTION_FAILURE); +#endif } /* @@ -1069,13 +1091,13 @@ ReFault: /* locked: maps(read), amap, anon, uobj(if one) */ /* - * special handling for loaned pages + * special handling for loaned pages */ if (anon->u.an_page->loan_count) { if ((access_type & VM_PROT_WRITE) == 0) { - + /* * for read faults on loaned pages we just cap the * protection at read-only. @@ -1151,8 +1173,8 @@ ReFault: * also note that the ref count can't drop to zero here because * it is > 1 and we are only dropping one ref. * - * in the (hopefully very rare) case that we are out of RAM we - * will unlock, wait for more RAM, and refault. + * in the (hopefully very rare) case that we are out of RAM we + * will unlock, wait for more RAM, and refault. * * if we are out of anon VM we kill the process (XXX: could wait?). */ @@ -1181,7 +1203,7 @@ ReFault: UVMHIST_LOG(maphist, "<- failed. out of VM",0,0,0,0); uvmexp.fltnoanon++; - return ENOMEM; + return (KERN_RESOURCE_SHORTAGE); } uvmexp.fltnoram++; @@ -1229,7 +1251,7 @@ ReFault: ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0); if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) - != 0) { + != KERN_SUCCESS) { /* * No need to undo what we did; we can simply think of * this as the pmap throwing away the mapping information. @@ -1245,7 +1267,7 @@ ReFault: UVMHIST_LOG(maphist, "<- failed. out of VM",0,0,0,0); /* XXX instrumentation */ - return ENOMEM; + return (KERN_RESOURCE_SHORTAGE); } /* XXX instrumentation */ uvm_wait("flt_pmfail1"); @@ -1284,8 +1306,7 @@ ReFault: if (anon != oanon) simple_unlock(&anon->an_lock); uvmfault_unlockall(&ufi, amap, uobj, oanon); - pmap_update(ufi.orig_map->pmap); - return 0; + return (KERN_SUCCESS); Case2: @@ -1306,7 +1327,7 @@ Case2: */ if (uobj == NULL) { - uobjpage = PGO_DONTCARE; + uobjpage = PGO_DONTCARE; promote = TRUE; /* always need anon here */ } else { KASSERT(uobjpage != PGO_DONTCARE); @@ -1320,7 +1341,7 @@ Case2: * if uobjpage is not null then we do not need to do I/O to get the * uobjpage. * - * if uobjpage is null, then we need to unlock and ask the pager to + * if uobjpage is null, then we need to unlock and ask the pager to * get the data for us. once we have the data, we need to reverify * the state the world. we are currently not holding any resources. */ @@ -1331,7 +1352,7 @@ Case2: } else { /* update rusage counters */ curproc->p_addr->u_stats.p_ru.ru_majflt++; - + /* locked: maps(read), amap(if there), uobj */ uvmfault_unlockall(&ufi, amap, NULL, NULL); /* locked: uobj */ @@ -1339,27 +1360,29 @@ Case2: uvmexp.fltget++; gotpages = 1; uoff = (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset; - error = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages, + result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages, 0, access_type & MASK(ufi.entry), ufi.entry->advice, PGO_SYNCIO); - /* locked: uobjpage(if no error) */ + /* locked: uobjpage(if result OK) */ /* * recover from I/O */ - if (error) { - if (error == EAGAIN) { + if (result != VM_PAGER_OK) { + KASSERT(result != VM_PAGER_PEND); + + if (result == VM_PAGER_AGAIN) { UVMHIST_LOG(maphist, " pgo_get says TRY AGAIN!",0,0,0,0); - tsleep(&lbolt, PVM, "fltagain2", 0); + tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0); goto ReFault; } UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)", - error, 0,0,0); - return error; + result, 0,0,0); + return (KERN_PROTECTION_FAILURE); /* XXX i/o error */ } /* locked: uobjpage */ @@ -1373,7 +1396,7 @@ Case2: if (locked && amap) amap_lock(amap); simple_lock(&uobj->vmobjlock); - + /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */ /* locked(!locked): uobj, uobjpage */ @@ -1384,10 +1407,10 @@ Case2: */ if ((uobjpage->flags & PG_RELEASED) != 0 || - (locked && amap && + (locked && amap && amap_lookup(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start))) { - if (locked) + if (locked) uvmfault_unlockall(&ufi, amap, NULL, NULL); locked = FALSE; } @@ -1399,7 +1422,7 @@ Case2: if (locked == FALSE) { UVMHIST_LOG(maphist, - " wasn't able to relock after fault: retry", + " wasn't able to relock after fault: retry", 0,0,0,0); if (uobjpage->flags & PG_WANTED) /* still holding object lock */ @@ -1449,7 +1472,7 @@ Case2: * for it above) * - at this point uobjpage could be PG_WANTED (handle later) */ - + if (promote == FALSE) { /* @@ -1553,7 +1576,7 @@ Case2: } /* if loan_count */ } else { - + /* * if we are going to promote the data to an anon we * allocate a blank anon here and plug it into our amap. @@ -1610,7 +1633,7 @@ Case2: UVMHIST_LOG(maphist, " promote: out of VM", 0,0,0,0); uvmexp.fltnoanon++; - return ENOMEM; + return (KERN_RESOURCE_SHORTAGE); } UVMHIST_LOG(maphist, " out of RAM, waiting for more", @@ -1635,11 +1658,8 @@ Case2: */ if ((amap_flags(amap) & AMAP_SHARED) != 0) { pmap_page_protect(uobjpage, VM_PROT_NONE); - /* - * XXX: PAGE MIGHT BE WIRED! - */ } - + /* * dispose of uobjpage. it can't be PG_RELEASED * since we still hold the object lock. @@ -1694,7 +1714,8 @@ Case2: KASSERT(access_type == VM_PROT_READ || (pg->flags & PG_RDONLY) == 0); if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), pg->flags & PG_RDONLY ? VM_PROT_READ : enter_prot, - access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) != 0) { + access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) + != KERN_SUCCESS) { /* * No need to undo what we did; we can simply think of @@ -1707,11 +1728,11 @@ Case2: if (pg->flags & PG_WANTED) wakeup(pg); /* lock still held */ - /* + /* * note that pg can't be PG_RELEASED since we did not drop * the object lock since the last time we checked. */ - + pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); UVM_PAGE_OWN(pg, NULL); uvmfault_unlockall(&ufi, amap, uobj, anon); @@ -1720,7 +1741,7 @@ Case2: UVMHIST_LOG(maphist, "<- failed. out of VM",0,0,0,0); /* XXX instrumentation */ - return ENOMEM; + return (KERN_RESOURCE_SHORTAGE); } /* XXX instrumentation */ uvm_wait("flt_pmfail2"); @@ -1752,19 +1773,17 @@ Case2: if (pg->flags & PG_WANTED) wakeup(pg); /* lock still held */ - /* - * note that pg can't be PG_RELEASED since we did not drop the object + /* + * note that pg can't be PG_RELEASED since we did not drop the object * lock since the last time we checked. */ - + pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); UVM_PAGE_OWN(pg, NULL); uvmfault_unlockall(&ufi, amap, uobj, anon); - pmap_update(ufi.orig_map->pmap); - UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); - return 0; + return (KERN_SUCCESS); } @@ -1779,37 +1798,33 @@ Case2: int uvm_fault_wire(map, start, end, access_type) - struct vm_map *map; + vm_map_t map; vaddr_t start, end; vm_prot_t access_type; { vaddr_t va; - int error; + pmap_t pmap; + int rv; + + pmap = vm_map_pmap(map); /* * now fault it in a page at a time. if the fault fails then we have - * to undo what we have done. note that in uvm_fault VM_PROT_NONE + * to undo what we have done. note that in uvm_fault VM_PROT_NONE * is replaced with the max protection if fault_type is VM_FAULT_WIRE. */ - /* - * XXX work around overflowing a vaddr_t. this prevents us from - * wiring the last page in the address space, though. - */ - if (start > end) { - return EFAULT; - } - for (va = start ; va < end ; va += PAGE_SIZE) { - error = uvm_fault(map, va, VM_FAULT_WIRE, access_type); - if (error) { + rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type); + if (rv) { if (va != start) { uvm_fault_unwire(map, start, va); } - return error; + return (rv); } } - return 0; + + return (KERN_SUCCESS); } /* @@ -1818,7 +1833,7 @@ uvm_fault_wire(map, start, end, access_type) void uvm_fault_unwire(map, start, end) - struct vm_map *map; + vm_map_t map; vaddr_t start, end; { @@ -1835,10 +1850,10 @@ uvm_fault_unwire(map, start, end) void uvm_fault_unwire_locked(map, start, end) - struct vm_map *map; + vm_map_t map; vaddr_t start, end; { - struct vm_map_entry *entry; + vm_map_entry_t entry; pmap_t pmap = vm_map_pmap(map); vaddr_t va; paddr_t pa; diff --git a/sys/uvm/uvm_fault.h b/sys/uvm/uvm_fault.h index 8bb25b00b12..a0a80dca0a2 100644 --- a/sys/uvm/uvm_fault.h +++ b/sys/uvm/uvm_fault.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_fault.h,v 1.10 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_fault.h,v 1.15 2001/06/02 18:09:26 chs Exp $ */ +/* $OpenBSD: uvm_fault.h,v 1.11 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_fault.h,v 1.14 2000/06/26 14:21:17 mrg Exp $ */ /* * @@ -57,12 +57,12 @@ struct uvm_faultinfo { - struct vm_map *orig_map; /* IN: original map */ + vm_map_t orig_map; /* IN: original map */ vaddr_t orig_rvaddr; /* IN: original rounded VA */ vsize_t orig_size; /* IN: original size of interest */ - struct vm_map *map; /* map (could be a submap) */ + vm_map_t map; /* map (could be a submap) */ unsigned int mapv; /* map's version number */ - struct vm_map_entry *entry; /* map entry (from 'map') */ + vm_map_entry_t entry; /* map entry (from 'map') */ vsize_t size; /* size of interest */ }; @@ -76,9 +76,9 @@ struct uvm_faultinfo { int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *, struct vm_anon *)); -int uvm_fault_wire __P((struct vm_map *, vaddr_t, vaddr_t, vm_prot_t)); -void uvm_fault_unwire __P((struct vm_map *, vaddr_t, vaddr_t)); -void uvm_fault_unwire_locked __P((struct vm_map *, vaddr_t, vaddr_t)); +int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t)); +void uvm_fault_unwire __P((vm_map_t, vaddr_t, vaddr_t)); +void uvm_fault_unwire_locked __P((vm_map_t, vaddr_t, vaddr_t)); #endif /* _KERNEL */ diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h index f262e48f09f..8f8edb93d6a 100644 --- a/sys/uvm/uvm_fault_i.h +++ b/sys/uvm/uvm_fault_i.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_fault_i.h,v 1.8 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_fault_i.h,v 1.14 2001/06/26 17:55:15 thorpej Exp $ */ +/* $OpenBSD: uvm_fault_i.h,v 1.9 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_fault_i.h,v 1.11 2000/06/26 14:21:17 mrg Exp $ */ /* * @@ -41,6 +41,7 @@ /* * uvm_fault_i.h: fault inline functions */ +static boolean_t uvmfault_check_intrsafe __P((struct uvm_faultinfo *)); static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t)); static boolean_t uvmfault_relock __P((struct uvm_faultinfo *)); static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *, @@ -96,6 +97,39 @@ uvmfault_unlockall(ufi, amap, uobj, anon) } /* + * uvmfault_check_intrsafe: check for a virtual address managed by + * an interrupt-safe map. + * + * => caller must provide a uvm_faultinfo structure with the IN + * params properly filled in + * => if we find an intersafe VA, we fill in ufi->map, and return TRUE + */ + +static __inline boolean_t +uvmfault_check_intrsafe(ufi) + struct uvm_faultinfo *ufi; +{ + struct vm_map_intrsafe *vmi; + int s; + + s = vmi_list_lock(); + for (vmi = LIST_FIRST(&vmi_list); vmi != NULL; + vmi = LIST_NEXT(vmi, vmi_list)) { + if (ufi->orig_rvaddr >= vm_map_min(&vmi->vmi_map) && + ufi->orig_rvaddr < vm_map_max(&vmi->vmi_map)) + break; + } + vmi_list_unlock(s); + + if (vmi != NULL) { + ufi->map = &vmi->vmi_map; + return (TRUE); + } + + return (FALSE); +} + +/* * uvmfault_lookup: lookup a virtual address in a map * * => caller must provide a uvm_faultinfo structure with the IN @@ -104,7 +138,7 @@ uvmfault_unlockall(ufi, amap, uobj, anon) * => if the lookup is a success we will return with the maps locked * => if "write_lock" is TRUE, we write_lock the map, otherwise we only * get a read lock. - * => note that submaps can only appear in the kernel and they are + * => note that submaps can only appear in the kernel and they are * required to use the same virtual addresses as the map they * are referenced by (thus address translation between the main * map and the submap is unnecessary). @@ -115,7 +149,7 @@ uvmfault_lookup(ufi, write_lock) struct uvm_faultinfo *ufi; boolean_t write_lock; { - struct vm_map *tmpmap; + vm_map_t tmpmap; /* * init ufi values for lookup. @@ -130,13 +164,6 @@ uvmfault_lookup(ufi, write_lock) */ while (1) { - /* - * Make sure this is not an "interrupt safe" map. - * Such maps are never supposed to be involved in - * a fault. - */ - if (ufi->map->flags & VM_MAP_INTRSAFE) - return (FALSE); /* * lock map @@ -150,7 +177,7 @@ uvmfault_lookup(ufi, write_lock) /* * lookup */ - if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, + if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, &ufi->entry)) { uvmfault_unlockmaps(ufi, write_lock); return(FALSE); @@ -212,7 +239,7 @@ uvmfault_relock(ufi) uvmexp.fltrelck++; /* - * relock map. fail if version mismatch (in which case nothing + * relock map. fail if version mismatch (in which case nothing * gets locked). */ diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c index 7e6057194f1..b8840cf8f92 100644 --- a/sys/uvm/uvm_glue.c +++ b/sys/uvm/uvm_glue.c @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_glue.c,v 1.30 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_glue.c,v 1.51 2001/09/10 21:19:42 chris Exp $ */ +/* $OpenBSD: uvm_glue.c,v 1.31 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -148,7 +148,7 @@ uvm_useracc(addr, len, rw) size_t len; int rw; { - struct vm_map *map; + vm_map_t map; boolean_t rv; vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; @@ -191,12 +191,14 @@ uvm_chgkprot(addr, len, rw) for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) { /* * Extract physical address for the page. + * We use a cheezy hack to differentiate physical + * page 0 from an invalid mapping, not that it + * really matters... */ if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE) panic("chgkprot: invalid page"); pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED); } - pmap_update(pmap_kernel()); } #endif @@ -214,15 +216,17 @@ uvm_vslock(p, addr, len, access_type) size_t len; vm_prot_t access_type; { - struct vm_map *map; + vm_map_t map; vaddr_t start, end; - int error; + int rv; map = &p->p_vmspace->vm_map; start = trunc_page((vaddr_t)addr); end = round_page((vaddr_t)addr + len); - error = uvm_fault_wire(map, start, end, access_type); - return error; + + rv = uvm_fault_wire(map, start, end, access_type); + + return (rv); } /* @@ -267,7 +271,7 @@ uvm_fork(p1, p2, shared, stack, stacksize, func, arg) void *arg; { struct user *up = p2->p_addr; - int error; + int rv; if (shared == TRUE) { p2->p_vmspace = NULL; @@ -284,10 +288,10 @@ uvm_fork(p1, p2, shared, stack, stacksize, func, arg) * Note the kernel stack gets read/write accesses right off * the bat. */ - error = uvm_fault_wire(kernel_map, (vaddr_t)up, + rv = uvm_fault_wire(kernel_map, (vaddr_t)up, (vaddr_t)up + USPACE, VM_PROT_READ | VM_PROT_WRITE); - if (error) - panic("uvm_fork: uvm_fault_wire failed: %d", error); + if (rv != KERN_SUCCESS) + panic("uvm_fork: uvm_fault_wire failed: %d", rv); /* * p_stats currently points at a field in the user struct. Copy @@ -300,7 +304,7 @@ uvm_fork(p1, p2, shared, stack, stacksize, func, arg) memcpy(&up->u_stats.pstat_startcopy, &p1->p_stats->pstat_startcopy, ((caddr_t)&up->u_stats.pstat_endcopy - (caddr_t)&up->u_stats.pstat_startcopy)); - + /* * cpu_fork() copy and update the pcb, and make the child ready * to run. If this is a normal user fork, the child will exit @@ -500,7 +504,7 @@ uvm_swapout_threads() struct proc *outp, *outp2; int outpri, outpri2; int didswap = 0; - extern int maxslp; + extern int maxslp; /* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */ #ifdef DEBUG @@ -524,7 +528,7 @@ uvm_swapout_threads() outpri2 = p->p_swtime; } continue; - + case SSLEEP: case SSTOP: if (p->p_slptime >= maxslp) { @@ -559,7 +563,7 @@ uvm_swapout_threads() /* * uvm_swapout: swap out process "p" * - * - currently "swapout" means "unwire U-area" and "pmap_collect()" + * - currently "swapout" means "unwire U-area" and "pmap_collect()" * the pmap. * - XXXCDC: should deactivate all process' private anonymous memory */ diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c index 100e82cfe3b..ecb007827dc 100644 --- a/sys/uvm/uvm_io.c +++ b/sys/uvm/uvm_io.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_io.c,v 1.12 2001/11/28 19:28:14 art Exp $ */ -/* $NetBSD: uvm_io.c,v 1.15 2001/06/02 18:09:26 chs Exp $ */ +/* $OpenBSD: uvm_io.c,v 1.13 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_io.c,v 1.12 2000/06/27 17:29:23 mrg Exp $ */ /* * @@ -61,12 +61,12 @@ int uvm_io(map, uio) - struct vm_map *map; + vm_map_t map; struct uio *uio; { vaddr_t baseva, endva, pageoffset, kva; vsize_t chunksz, togo, sz; - struct vm_map_entry *dead_entries; + vm_map_entry_t dead_entries; int error; /* @@ -106,7 +106,7 @@ uvm_io(map, uio) */ error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva, - UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG | + UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG | UVM_EXTRACT_FIXPROT); if (error) { @@ -138,7 +138,8 @@ uvm_io(map, uio) */ vm_map_lock(kernel_map); - uvm_unmap_remove(kernel_map, kva, kva + chunksz, &dead_entries); + (void)uvm_unmap_remove(kernel_map, kva, kva+chunksz, + &dead_entries); vm_map_unlock(kernel_map); if (dead_entries != NULL) diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c index afc2ac92d10..652ddafcc77 100644 --- a/sys/uvm/uvm_km.c +++ b/sys/uvm/uvm_km.c @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_km.c,v 1.26 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_km.c,v 1.51 2001/09/10 21:19:42 chris Exp $ */ +/* $OpenBSD: uvm_km.c,v 1.27 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -78,11 +78,11 @@ * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS. * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map). * - * the kernel_map has several "submaps." submaps can only appear in + * the kernel_map has several "submaps." submaps can only appear in * the kernel_map (user processes can't use them). submaps "take over" * the management of a sub-range of the kernel's address space. submaps * are typically allocated at boot time and are never released. kernel - * virtual address space that is mapped by a submap is locked by the + * virtual address space that is mapped by a submap is locked by the * submap's lock -- not the kernel_map's lock. * * thus, the useful feature of submaps is that they allow us to break @@ -102,19 +102,19 @@ * the kernel allocates its private memory out of special uvm_objects whose * reference count is set to UVM_OBJ_KERN (thus indicating that the objects * are "special" and never die). all kernel objects should be thought of - * as large, fixed-sized, sparsely populated uvm_objects. each kernel + * as large, fixed-sized, sparsely populated uvm_objects. each kernel * object is equal to the size of kernel virtual address space (i.e. the * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS"). * * most kernel private memory lives in kernel_object. the only exception * to this is for memory that belongs to submaps that must be protected - * by splvm(). each of these submaps has their own private kernel + * by splvm(). each of these submaps has their own private kernel * object (e.g. kmem_object, mb_object). * * note that just because a kernel object spans the entire kernel virutal * address space doesn't mean that it has to be mapped into the entire space. - * large chunks of a kernel object's space go unused either because - * that area of kernel VM is unmapped, or there is some other type of + * large chunks of a kernel object's space go unused either because + * that area of kernel VM is unmapped, or there is some other type of * object mapped into that range (e.g. a vnode). for submap's kernel * objects, the only part of the object that can ever be populated is the * offsets that are managed by the submap. @@ -126,7 +126,7 @@ * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, * then that means that the page at offset 0x235000 in kernel_object is - * mapped at 0xf8235000. + * mapped at 0xf8235000. * * note that the offsets in kmem_object and mb_object also follow this * rule. this means that the offsets for kmem_object must fall in the @@ -151,7 +151,10 @@ * global data structures */ -struct vm_map *kernel_map = NULL; +vm_map_t kernel_map = NULL; + +struct vmi_list vmi_list; +simple_lock_data_t vmi_list_slock; /* * local data structues @@ -184,6 +187,12 @@ uvm_km_init(start, end) vaddr_t base = VM_MIN_KERNEL_ADDRESS; /* + * first, initialize the interrupt-safe map list. + */ + LIST_INIT(&vmi_list); + simple_lock_init(&vmi_list_slock); + + /* * next, init kernel memory objects. */ @@ -202,7 +211,7 @@ uvm_km_init(start, end) TAILQ_INIT(&kmem_object_store.memq); kmem_object_store.uo_npages = 0; /* we are special. we never die */ - kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; + kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; uvmexp.kmem_object = &kmem_object_store; /* @@ -215,11 +224,11 @@ uvm_km_init(start, end) TAILQ_INIT(&mb_object_store.memq); mb_object_store.uo_npages = 0; /* we are special. we never die */ - mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; + mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; uvmexp.mb_object = &mb_object_store; /* - * init the map and reserve allready allocated kernel space + * init the map and reserve allready allocated kernel space * before installing. */ @@ -227,9 +236,9 @@ uvm_km_init(start, end) kernel_map_store.pmap = pmap_kernel(); if (uvm_map(&kernel_map_store, &base, start - base, NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, - UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != 0) + UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != KERN_SUCCESS) panic("uvm_km_init: could not reserve space for kernel"); - + /* * install! */ @@ -266,7 +275,7 @@ uvm_km_suballoc(map, min, max, size, flags, fixed, submap) if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, - UVM_ADV_RANDOM, mapflags)) != 0) { + UVM_ADV_RANDOM, mapflags)) != KERN_SUCCESS) { panic("uvm_km_suballoc: unable to allocate space in parent map"); } @@ -294,7 +303,7 @@ uvm_km_suballoc(map, min, max, size, flags, fixed, submap) * now let uvm_map_submap plug in it... */ - if (uvm_map_submap(map, *min, *max, submap) != 0) + if (uvm_map_submap(map, *min, *max, submap) != KERN_SUCCESS) panic("uvm_km_suballoc: submap allocation failed"); return(submap); @@ -325,7 +334,7 @@ uvm_km_pgremove(uobj, start, end) /* choose cheapest traversal */ by_list = (uobj->uo_npages <= ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY); - + if (by_list) goto loop_by_list; @@ -417,7 +426,7 @@ uvm_km_pgremove_intrsafe(uobj, start, end) /* choose cheapest traversal */ by_list = (uobj->uo_npages <= ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY); - + if (by_list) goto loop_by_list; @@ -472,14 +481,13 @@ loop_by_list: vaddr_t uvm_km_kmemalloc(map, obj, size, flags) - struct vm_map *map; + vm_map_t map; struct uvm_object *obj; vsize_t size; int flags; { vaddr_t kva, loopva; vaddr_t offset; - vsize_t loopsize; struct vm_page *pg; UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist); @@ -500,8 +508,8 @@ uvm_km_kmemalloc(map, obj, size, flags) if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, - UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) - != 0)) { + UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) + != KERN_SUCCESS)) { UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0); return(0); } @@ -528,16 +536,15 @@ uvm_km_kmemalloc(map, obj, size, flags) */ loopva = kva; - loopsize = size; - while (loopsize) { + while (size) { simple_lock(&obj->vmobjlock); pg = uvm_pagealloc(obj, offset, NULL, 0); - if (__predict_true(pg != NULL)) { + if (pg) { pg->flags &= ~PG_BUSY; /* new page */ UVM_PAGE_OWN(pg, NULL); } simple_unlock(&obj->vmobjlock); - + /* * out of memory? */ @@ -552,7 +559,7 @@ uvm_km_kmemalloc(map, obj, size, flags) continue; } } - + /* * map it in: note that we call pmap_enter with the map and * object unlocked in case we are kmem_map/kmem_object @@ -570,11 +577,8 @@ uvm_km_kmemalloc(map, obj, size, flags) } loopva += PAGE_SIZE; offset += PAGE_SIZE; - loopsize -= PAGE_SIZE; + size -= PAGE_SIZE; } - - pmap_update(pmap_kernel()); - UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0); return(kva); } @@ -585,7 +589,7 @@ uvm_km_kmemalloc(map, obj, size, flags) void uvm_km_free(map, addr, size) - struct vm_map *map; + vm_map_t map; vaddr_t addr; vsize_t size; { @@ -601,17 +605,18 @@ uvm_km_free(map, addr, size) void uvm_km_free_wakeup(map, addr, size) - struct vm_map *map; + vm_map_t map; vaddr_t addr; vsize_t size; { - struct vm_map_entry *dead_entries; + vm_map_entry_t dead_entries; vm_map_lock(map); - uvm_unmap_remove(map, trunc_page(addr), round_page(addr + size), - &dead_entries); + (void)uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), + &dead_entries); wakeup(map); vm_map_unlock(map); + if (dead_entries != NULL) uvm_unmap_detach(dead_entries, 0); } @@ -624,7 +629,7 @@ uvm_km_free_wakeup(map, addr, size) vaddr_t uvm_km_alloc1(map, size, zeroit) - struct vm_map *map; + vm_map_t map; vsize_t size; boolean_t zeroit; { @@ -645,7 +650,7 @@ uvm_km_alloc1(map, size, zeroit) if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, - 0)) != 0)) { + 0)) != KERN_SUCCESS)) { UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0); return(0); } @@ -678,7 +683,7 @@ uvm_km_alloc1(map, size, zeroit) FALSE, "km_alloc", 0); continue; /* retry */ } - + /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { @@ -690,7 +695,7 @@ uvm_km_alloc1(map, size, zeroit) uvm_wait("km_alloc1w"); /* wait for memory */ continue; } - + /* * map it in; note we're never called with an intrsafe * object, so we always use regular old pmap_enter(). @@ -702,9 +707,7 @@ uvm_km_alloc1(map, size, zeroit) offset += PAGE_SIZE; size -= PAGE_SIZE; } - - pmap_update(map->pmap); - + /* * zero on request (note that "size" is now zero due to the above loop * so we need to subtract kva from loopva to reconstruct the size). @@ -725,7 +728,7 @@ uvm_km_alloc1(map, size, zeroit) vaddr_t uvm_km_valloc(map, size) - struct vm_map *map; + vm_map_t map; vsize_t size; { return(uvm_km_valloc_align(map, size, 0)); @@ -733,7 +736,7 @@ uvm_km_valloc(map, size) vaddr_t uvm_km_valloc_align(map, size, align) - struct vm_map *map; + vm_map_t map; vsize_t size; vsize_t align; { @@ -753,7 +756,7 @@ uvm_km_valloc_align(map, size, align) if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, - 0)) != 0)) { + 0)) != KERN_SUCCESS)) { UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); return(0); } @@ -772,7 +775,7 @@ uvm_km_valloc_align(map, size, align) vaddr_t uvm_km_valloc_prefer_wait(map, size, prefer) - struct vm_map *map; + vm_map_t map; vsize_t size; voff_t prefer; { @@ -797,7 +800,7 @@ uvm_km_valloc_prefer_wait(map, size, prefer) if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object, prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) - == 0)) { + == KERN_SUCCESS)) { UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0); return(kva); } @@ -814,7 +817,7 @@ uvm_km_valloc_prefer_wait(map, size, prefer) vaddr_t uvm_km_valloc_wait(map, size) - struct vm_map *map; + vm_map_t map; vsize_t size; { return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET); @@ -835,7 +838,7 @@ uvm_km_valloc_wait(map, size) /* ARGSUSED */ vaddr_t uvm_km_alloc_poolpage1(map, obj, waitok) - struct vm_map *map; + vm_map_t map; struct uvm_object *obj; boolean_t waitok; { @@ -886,7 +889,7 @@ uvm_km_alloc_poolpage1(map, obj, waitok) /* ARGSUSED */ void uvm_km_free_poolpage1(map, addr) - struct vm_map *map; + vm_map_t map; vaddr_t addr; { #if defined(PMAP_UNMAP_POOLPAGE) diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c index cc82286e91b..e3c99ea8bb9 100644 --- a/sys/uvm/uvm_loan.c +++ b/sys/uvm/uvm_loan.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_loan.c,v 1.16 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_loan.c,v 1.31 2001/08/27 02:34:29 chuck Exp $ */ +/* $OpenBSD: uvm_loan.c,v 1.17 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_loan.c,v 1.23 2001/01/23 02:27:39 thorpej Exp $ */ /* * @@ -49,7 +49,7 @@ #include <uvm/uvm.h> /* - * "loaned" pages are pages which are (read-only, copy-on-write) loaned + * "loaned" pages are pages which are (read-only, copy-on-write) loaned * from the VM system to other parts of the kernel. this allows page * copying to be avoided (e.g. you can loan pages from objs/anons to * the mbuf system). @@ -75,7 +75,7 @@ * object/anon which the page is owned by. this is a good side-effect, * since a kernel write to a loaned page is an error. * - * owners that want to free their pages and discover that they are + * owners that want to free their pages and discover that they are * loaned out simply "disown" them (the page becomes an orphan). these * pages should be freed when the last loan is dropped. in some cases * an anon may "adopt" an orphaned page. @@ -92,7 +92,7 @@ * use "try" locking. * * loans are typically broken by the following events: - * 1. user-level xwrite fault to a loaned page + * 1. write fault to a loaned page * 2. pageout of clean+inactive O->A loaned page * 3. owner frees page (e.g. pager flush) * @@ -105,10 +105,10 @@ * local prototypes */ -static int uvm_loananon __P((struct uvm_faultinfo *, void ***, +static int uvm_loananon __P((struct uvm_faultinfo *, void ***, int, struct vm_anon *)); static int uvm_loanentry __P((struct uvm_faultinfo *, void ***, int)); -static int uvm_loanuobj __P((struct uvm_faultinfo *, void ***, +static int uvm_loanuobj __P((struct uvm_faultinfo *, void ***, int, vaddr_t)); static int uvm_loanzero __P((struct uvm_faultinfo *, void ***, int)); @@ -120,14 +120,10 @@ static int uvm_loanzero __P((struct uvm_faultinfo *, void ***, int)); * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan()) * * => "ufi" is the result of a successful map lookup (meaning that - * on entry the map is locked by the caller) - * => we may unlock and then relock the map if needed (for I/O) + * the maps are locked by the caller) + * => we may unlock the maps if needed (for I/O) * => we put our output result in "output" - * => we always return with the map unlocked - * => possible return values: - * -1 == error, map is unlocked - * 0 == map relock error (try again!), map is unlocked - * >0 == number of pages we loaned, map is unlocked + * => we return the number of pages we loaned, or -1 if we had an error */ static __inline int @@ -144,7 +140,7 @@ uvm_loanentry(ufi, output, flags) int rv, result = 0; /* - * lock us the rest of the way down (we unlock before return) + * lock us the rest of the way down */ if (aref->ar_amap) amap_lock(aref->ar_amap); @@ -166,7 +162,6 @@ uvm_loanentry(ufi, output, flags) anon = NULL; } - /* locked: map, amap, uobj */ if (anon) { rv = uvm_loananon(ufi, output, flags, anon); } else if (uobj) { @@ -174,17 +169,16 @@ uvm_loanentry(ufi, output, flags) } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) { rv = uvm_loanzero(ufi, output, flags); } else { - rv = -1; /* null map entry... fail now */ + rv = -1; /* null map entry... fail now */ } - /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ /* total failure */ if (rv < 0) - return(-1); /* everything unlocked */ + return(-1); /* relock failed, need to do another lookup */ if (rv == 0) - return(result); /* everything unlocked */ + return(result); /* * got it... advance to next page @@ -195,13 +189,9 @@ uvm_loanentry(ufi, output, flags) } /* - * unlock what we locked, unlock the maps and return + * unlock everything and return */ - if (aref->ar_amap) - amap_unlock(aref->ar_amap); - if (uobj) - simple_unlock(&uobj->vmobjlock); - uvmfault_unlockmaps(ufi, FALSE); + uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL); return(result); } @@ -210,15 +200,14 @@ uvm_loanentry(ufi, output, flags) */ /* - * uvm_loan: loan pages in a map out to anons or to the kernel - * + * uvm_loan: loan pages out to anons or to the kernel + * * => map should be unlocked * => start and len should be multiples of PAGE_SIZE * => result is either an array of anon's or vm_pages (depending on flags) * => flag values: UVM_LOAN_TOANON - loan to anons * UVM_LOAN_TOPAGE - loan to wired kernel page * one and only one of these flags must be set! - * => returns 0 (success), or an appropriate error number */ int @@ -231,15 +220,21 @@ uvm_loan(map, start, len, result, flags) { struct uvm_faultinfo ufi; void **output; - int rv, error; + int rv; + +#ifdef DIAGNOSTIC + if (map->flags & VM_MAP_INTRSAFE) + panic("uvm_loan: intrsafe map"); +#endif /* * ensure that one and only one of the flags is set */ - KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^ - ((flags & UVM_LOAN_TOPAGE) == 0)); - KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); + if ((flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == + (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE) || + (flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == 0) + return(KERN_FAILURE); /* * "output" is a pointer to the current place to put the loaned @@ -261,51 +256,40 @@ uvm_loan(map, start, len, result, flags) ufi.orig_map = map; ufi.orig_rvaddr = start; ufi.orig_size = len; - + /* * do the lookup, the only time this will fail is if we hit on * an unmapped region (an error) */ - if (!uvmfault_lookup(&ufi, FALSE)) { - error = ENOENT; + if (!uvmfault_lookup(&ufi, FALSE)) goto fail; - } /* - * map now locked. now do the loanout... + * now do the loanout */ rv = uvm_loanentry(&ufi, &output, flags); - if (rv < 0) { - /* all unlocked due to error */ - error = EINVAL; + if (rv < 0) goto fail; - } /* - * done! the map is unlocked. advance, if possible. - * - * XXXCDC: could be recoded to hold the map lock with - * smarter code (but it only happens on map entry - * boundaries, so it isn't that bad). + * done! advance pointers and unlock. */ - if (rv) { - rv <<= PAGE_SHIFT; - len -= rv; - start += rv; - } + rv <<= PAGE_SHIFT; + len -= rv; + start += rv; + uvmfault_unlockmaps(&ufi, FALSE); } - + /* * got it! return success. */ - return 0; + return(KERN_SUCCESS); fail: /* * fail: failed to do it. drop our loans and return failure code. - * map is already unlocked. */ if (output - result) { if (flags & UVM_LOAN_TOANON) @@ -315,13 +299,12 @@ fail: uvm_unloanpage((struct vm_page **)result, output - result); } - return (error); + return(KERN_FAILURE); } /* * uvm_loananon: loan a page from an anon out - * - * => called with map, amap, uobj locked + * * => return value: * -1 = fatal error, everything is unlocked, abort. * 0 = lookup in ufi went stale, everything unlocked, relookup and @@ -340,16 +323,15 @@ uvm_loananon(ufi, output, flags, anon) int result; /* - * if we are loaning to "another" anon then it is easy, we just + * if we are loaning to another anon then it is easy, we just * bump the reference count on the current anon and return a - * pointer to it (it becomes copy-on-write shared). + * pointer to it. */ if (flags & UVM_LOAN_TOANON) { simple_lock(&anon->an_lock); pg = anon->u.an_page; - /* if (in RAM) and (owned by this anon) and (only 1 ref) */ if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) - /* write-protect it */ + /* read protect it */ pmap_page_protect(pg, VM_PROT_READ); anon->an_ref++; **output = anon; @@ -371,15 +353,16 @@ uvm_loananon(ufi, output, flags, anon) * if we were unable to get the anon, then uvmfault_anonget has * unlocked everything and returned an error code. */ - if (result != 0) { + + if (result != VM_PAGER_OK) { /* need to refault (i.e. refresh our lookup) ? */ - if (result == ERESTART) + if (result == VM_PAGER_REFAULT) return(0); /* "try again"? sleep a bit and retry ... */ - if (result == EAGAIN) { - tsleep(&lbolt, PVM, "loanagain", 0); + if (result == VM_PAGER_AGAIN) { + tsleep((caddr_t)&lbolt, PVM, "loanagain", 0); return(0); } @@ -402,7 +385,7 @@ uvm_loananon(ufi, output, flags, anon) *output = (*output) + 1; /* unlock anon and return success */ - if (pg->uobject) /* XXXCDC: what if this is our uobj? bad */ + if (pg->uobject) simple_unlock(&pg->uobject->vmobjlock); simple_unlock(&anon->an_lock); return(1); @@ -411,7 +394,6 @@ uvm_loananon(ufi, output, flags, anon) /* * uvm_loanuobj: loan a page from a uobj out * - * => called with map, amap, uobj locked * => return value: * -1 = fatal error, everything is unlocked, abort. * 0 = lookup in ufi went stale, everything unlocked, relookup and @@ -439,13 +421,13 @@ uvm_loanuobj(ufi, output, flags, va) * XXXCDC: duplicate code with uvm_fault(). */ - if (uobj->pgops->pgo_get) { /* try locked pgo_get */ + if (uobj->pgops->pgo_get) { npages = 1; pg = NULL; result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start, &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED); } else { - result = EIO; /* must have pgo_get op */ + result = VM_PAGER_ERROR; } /* @@ -453,7 +435,7 @@ uvm_loanuobj(ufi, output, flags, va) * then we fail the loan. */ - if (result != 0 && result != EBUSY) { + if (result != VM_PAGER_OK && result != VM_PAGER_UNLOCK) { uvmfault_unlockall(ufi, amap, uobj, NULL); return(-1); } @@ -462,24 +444,24 @@ uvm_loanuobj(ufi, output, flags, va) * if we need to unlock for I/O, do so now. */ - if (result == EBUSY) { + if (result == VM_PAGER_UNLOCK) { uvmfault_unlockall(ufi, amap, NULL, NULL); - + npages = 1; /* locked: uobj */ result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start, - &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO); + &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, 0); /* locked: <nothing> */ - + /* * check for errors */ - if (result != 0) { - if (result == EAGAIN) { - tsleep(&lbolt, PVM, "fltagain2", 0); + if (result != VM_PAGER_OK) { + if (result == VM_PAGER_AGAIN) { + tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0); return(0); /* redo the lookup and try again */ - } + } return(-1); /* total failure */ } @@ -497,15 +479,15 @@ uvm_loanuobj(ufi, output, flags, va) * that amap slot is still free. if there is a problem we * drop our lock (thus force a lookup refresh/retry). */ - + if ((pg->flags & PG_RELEASED) != 0 || (locked && amap && amap_lookup(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start))) { - + if (locked) uvmfault_unlockall(ufi, amap, NULL, NULL); locked = FALSE; - } + } /* * didn't get the lock? release the page and retry. @@ -544,7 +526,7 @@ uvm_loanuobj(ufi, output, flags, va) * not be PG_RELEASED (we caught this above). */ - if ((flags & UVM_LOAN_TOANON) == 0) { /* loan to wired-kernel page? */ + if ((flags & UVM_LOAN_TOANON) == 0) { /* loan to wired-kernel page? */ uvm_lock_pageq(); if (pg->loan_count == 0) pmap_page_protect(pg, VM_PROT_READ); @@ -563,7 +545,7 @@ uvm_loanuobj(ufi, output, flags, va) /* * must be a loan to an anon. check to see if there is already * an anon associated with this page. if so, then just return - * a reference to this object. the page should already be + * a reference to this object. the page should already be * mapped read-only because it is already on loan. */ @@ -583,7 +565,7 @@ uvm_loanuobj(ufi, output, flags, va) UVM_PAGE_OWN(pg, NULL); return(1); } - + /* * need to allocate a new anon */ @@ -619,7 +601,6 @@ uvm_loanuobj(ufi, output, flags, va) /* * uvm_loanzero: "loan" a zero-fill page out * - * => called with map, amap, uobj locked * => return value: * -1 = fatal error, everything is unlocked, abort. * 0 = lookup in ufi went stale, everything unlocked, relookup and @@ -640,7 +621,7 @@ uvm_loanzero(ufi, output, flags) while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == NULL) { - uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, + uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, ufi->entry->object.uvm_obj, NULL); uvm_wait("loanzero1"); if (!uvmfault_relock(ufi)) @@ -652,7 +633,7 @@ uvm_loanzero(ufi, output, flags) &ufi->entry->object.uvm_obj->vmobjlock); /* ... and try again */ } - + /* got a zero'd page; return */ pg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); @@ -667,7 +648,7 @@ uvm_loanzero(ufi, output, flags) } /* loaning to an anon */ - while ((anon = uvm_analloc()) == NULL || + while ((anon = uvm_analloc()) == NULL || (pg = uvm_pagealloc(NULL, 0, anon, UVM_PGA_ZERO)) == NULL) { /* unlock everything */ @@ -761,7 +742,7 @@ uvm_unloanpage(ploans, npages) panic("uvm_unloanpage: page %p isn't loaned", pg); pg->loan_count--; /* drop loan */ - uvm_pageunwire(pg); /* and unwire */ + uvm_pageunwire(pg); /* and wire */ /* * if page is unowned and we killed last loan, then we can diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index f2ebe948eb9..058d8e53d80 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_map.c,v 1.34 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_map.c,v 1.105 2001/09/10 21:19:42 chris Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.35 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_map.c,v 1.93 2001/02/11 01:34:23 eeh Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -77,7 +77,6 @@ #include <sys/proc.h> #include <sys/malloc.h> #include <sys/pool.h> -#include <sys/kernel.h> #ifdef SYSVSHM #include <sys/shm.h> @@ -106,7 +105,6 @@ struct pool uvm_vmspace_pool; */ struct pool uvm_map_entry_pool; -struct pool uvm_map_entry_kmem_pool; #ifdef PMAP_GROWKERNEL /* @@ -178,12 +176,12 @@ vaddr_t uvm_maxkaddr; * local prototypes */ -static struct vm_map_entry *uvm_mapent_alloc __P((struct vm_map *)); -static void uvm_mapent_copy __P((struct vm_map_entry *, struct vm_map_entry *)); -static void uvm_mapent_free __P((struct vm_map_entry *)); -static void uvm_map_entry_unwire __P((struct vm_map *, struct vm_map_entry *)); -static void uvm_map_reference_amap __P((struct vm_map_entry *, int)); -static void uvm_map_unreference_amap __P((struct vm_map_entry *, int)); +static vm_map_entry_t uvm_mapent_alloc __P((vm_map_t)); +static void uvm_mapent_copy __P((vm_map_entry_t,vm_map_entry_t)); +static void uvm_mapent_free __P((vm_map_entry_t)); +static void uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); +static void uvm_map_reference_amap __P((vm_map_entry_t, int)); +static void uvm_map_unreference_amap __P((vm_map_entry_t, int)); /* * local inlines @@ -191,66 +189,66 @@ static void uvm_map_unreference_amap __P((struct vm_map_entry *, int)); /* * uvm_mapent_alloc: allocate a map entry + * + * => XXX: static pool for kernel map? */ -static __inline struct vm_map_entry * +static __inline vm_map_entry_t uvm_mapent_alloc(map) - struct vm_map *map; + vm_map_t map; { - struct vm_map_entry *me; + vm_map_entry_t me; int s; - UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); + UVMHIST_FUNC("uvm_mapent_alloc"); + UVMHIST_CALLED(maphist); - if (map->flags & VM_MAP_INTRSAFE || cold) { - s = splvm(); + if ((map->flags & VM_MAP_INTRSAFE) == 0 && + map != kernel_map && kernel_map != NULL /* XXX */) { + me = pool_get(&uvm_map_entry_pool, PR_WAITOK); + me->flags = 0; + /* me can't be null, wait ok */ + } else { + s = splvm(); /* protect kentry_free list with splvm */ simple_lock(&uvm.kentry_lock); me = uvm.kentry_free; if (me) uvm.kentry_free = me->next; simple_unlock(&uvm.kentry_lock); splx(s); - if (me == NULL) { - panic("uvm_mapent_alloc: out of static map entries, " - "check MAX_KMAPENT (currently %d)", - MAX_KMAPENT); - } + if (!me) + panic("mapent_alloc: out of static map entries, check MAX_KMAPENT"); me->flags = UVM_MAP_STATIC; - } else if (map == kernel_map) { - me = pool_get(&uvm_map_entry_kmem_pool, PR_WAITOK); - me->flags = UVM_MAP_KMEM; - } else { - me = pool_get(&uvm_map_entry_pool, PR_WAITOK); - me->flags = 0; } - UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, - ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); + UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", + me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map) + ? TRUE : FALSE, 0, 0); return(me); } /* * uvm_mapent_free: free map entry + * + * => XXX: static pool for kernel map? */ static __inline void uvm_mapent_free(me) - struct vm_map_entry *me; + vm_map_entry_t me; { int s; - UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); - - UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", + UVMHIST_FUNC("uvm_mapent_free"); + UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", me, me->flags, 0, 0); - if (me->flags & UVM_MAP_STATIC) { - s = splvm(); + if ((me->flags & UVM_MAP_STATIC) == 0) { + pool_put(&uvm_map_entry_pool, me); + } else { + s = splvm(); /* protect kentry_free list with splvm */ simple_lock(&uvm.kentry_lock); me->next = uvm.kentry_free; uvm.kentry_free = me; simple_unlock(&uvm.kentry_lock); splx(s); - } else if (me->flags & UVM_MAP_KMEM) { - pool_put(&uvm_map_entry_kmem_pool, me); - } else { - pool_put(&uvm_map_entry_pool, me); } } @@ -260,11 +258,11 @@ uvm_mapent_free(me) static __inline void uvm_mapent_copy(src, dst) - struct vm_map_entry *src; - struct vm_map_entry *dst; + vm_map_entry_t src; + vm_map_entry_t dst; { - memcpy(dst, src, - ((char *)&src->uvm_map_entry_stop_copy) - ((char *)src)); + + memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - ((char*)src)); } /* @@ -275,9 +273,10 @@ uvm_mapent_copy(src, dst) static __inline void uvm_map_entry_unwire(map, entry) - struct vm_map *map; - struct vm_map_entry *entry; + vm_map_t map; + vm_map_entry_t entry; { + entry->wired_count = 0; uvm_fault_unwire_locked(map, entry->start, entry->end); } @@ -288,34 +287,34 @@ uvm_map_entry_unwire(map, entry) */ static __inline void uvm_map_reference_amap(entry, flags) - struct vm_map_entry *entry; + vm_map_entry_t entry; int flags; { - amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, + amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, (entry->end - entry->start) >> PAGE_SHIFT, flags); } /* - * wrapper for calling amap_unref() + * wrapper for calling amap_unref() */ static __inline void uvm_map_unreference_amap(entry, flags) - struct vm_map_entry *entry; + vm_map_entry_t entry; int flags; { - amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, + amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, (entry->end - entry->start) >> PAGE_SHIFT, flags); } /* * uvm_map_init: init mapping system at boot time. note that we allocate - * and init the static pool of struct vm_map_entry *'s for the kernel here. + * and init the static pool of vm_map_entry_t's for the kernel here. */ void -uvm_map_init() +uvm_map_init() { static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; #if defined(UVMHIST) @@ -361,8 +360,6 @@ uvm_map_init() pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 0, 0, "vmmpepl", 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMMAP); - pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), - 0, 0, 0, "vmmpekpl", 0, NULL, NULL, M_VMMAP); } /* @@ -372,19 +369,18 @@ uvm_map_init() /* * uvm_map_clip_start: ensure that the entry begins at or after * the starting address, if it doesn't we split the entry. - * + * * => caller should use UVM_MAP_CLIP_START macro rather than calling * this directly * => map must be locked by caller */ -void -uvm_map_clip_start(map, entry, start) - struct vm_map *map; - struct vm_map_entry *entry; - vaddr_t start; +void uvm_map_clip_start(map, entry, start) + vm_map_t map; + vm_map_entry_t entry; + vaddr_t start; { - struct vm_map_entry *new_entry; + vm_map_entry_t new_entry; vaddr_t new_adj; /* uvm_map_simplify_entry(map, entry); */ /* XXX */ @@ -398,7 +394,7 @@ uvm_map_clip_start(map, entry, start) new_entry = uvm_mapent_alloc(map); uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ - new_entry->end = start; + new_entry->end = start; new_adj = start - new_entry->start; if (entry->object.uvm_obj) entry->offset += new_adj; /* shift start over */ @@ -414,7 +410,7 @@ uvm_map_clip_start(map, entry, start) /* ... unlikely to happen, but play it safe */ uvm_map_reference(new_entry->object.sub_map); } else { - if (UVM_ET_ISOBJ(entry) && + if (UVM_ET_ISOBJ(entry) && entry->object.uvm_obj->pgops && entry->object.uvm_obj->pgops->pgo_reference) entry->object.uvm_obj->pgops->pgo_reference( @@ -425,7 +421,7 @@ uvm_map_clip_start(map, entry, start) /* * uvm_map_clip_end: ensure that the entry ends at or before * the ending address, if it does't we split the reference - * + * * => caller should use UVM_MAP_CLIP_END macro rather than calling * this directly * => map must be locked by caller @@ -433,11 +429,11 @@ uvm_map_clip_start(map, entry, start) void uvm_map_clip_end(map, entry, end) - struct vm_map *map; - struct vm_map_entry *entry; + vm_map_t map; + vm_map_entry_t entry; vaddr_t end; { - struct vm_map_entry * new_entry; + vm_map_entry_t new_entry; vaddr_t new_adj; /* #bytes we move start forward */ /* @@ -487,7 +483,7 @@ uvm_map_clip_end(map, entry, end) * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER * [3] <uobj,uoffset> == normal mapping * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA - * + * * case [4] is for kernel mappings where we don't know the offset until * we've found a virtual address. note that kernel object offsets are * always relative to vm_map_min(kernel_map). @@ -502,7 +498,7 @@ uvm_map_clip_end(map, entry, end) int uvm_map(map, startp, size, uobj, uoffset, align, flags) - struct vm_map *map; + vm_map_t map; vaddr_t *startp; /* IN/OUT */ vsize_t size; struct uvm_object *uobj; @@ -510,7 +506,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) vsize_t align; uvm_flag_t flags; { - struct vm_map_entry *prev_entry, *new_entry; + vm_map_entry_t prev_entry, new_entry; vm_prot_t prot = UVM_PROTECTION(flags), maxprot = UVM_MAXPROTECTION(flags); vm_inherit_t inherit = UVM_INHERIT(flags); @@ -527,9 +523,9 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) */ if ((prot & maxprot) != prot) { - UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", + UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", prot, maxprot,0,0); - return EACCES; + return(KERN_PROTECTION_FAILURE); } /* @@ -538,14 +534,14 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) if (vm_map_lock_try(map) == FALSE) { if (flags & UVM_FLAG_TRYLOCK) - return EAGAIN; + return(KERN_FAILURE); vm_map_lock(map); /* could sleep here */ } - if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, + if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, uobj, uoffset, align, flags)) == NULL) { UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0); vm_map_unlock(map); - return ENOMEM; + return (KERN_NO_SPACE); } #ifdef PMAP_GROWKERNEL @@ -563,11 +559,11 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) /* * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER - * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in - * either case we want to zero it before storing it in the map entry + * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in + * either case we want to zero it before storing it in the map entry * (because it looks strange and confusing when debugging...) - * - * if uobj is not null + * + * if uobj is not null * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping * and we do not need to change uoffset. * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset @@ -593,7 +589,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) * for a stack, but we are currently allocating our stack in advance. */ - if ((flags & UVM_FLAG_NOMERGE) == 0 && + if ((flags & UVM_FLAG_NOMERGE) == 0 && prev_entry->end == *startp && prev_entry != &map->header && prev_entry->object.uvm_obj == uobj) { @@ -604,7 +600,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) if (UVM_ET_ISSUBMAP(prev_entry)) goto step3; - if (prev_entry->protection != prot || + if (prev_entry->protection != prot || prev_entry->max_protection != maxprot) goto step3; @@ -614,10 +610,10 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) /* wiring status must match (new area is unwired) */ if (VM_MAPENT_ISWIRED(prev_entry)) - goto step3; + goto step3; /* - * can't extend a shared amap. note: no need to lock amap to + * can't extend a shared amap. note: no need to lock amap to * look at refs since we don't care about its exact value. * if it is one (i.e. we have only reference) it will stay there */ @@ -648,7 +644,7 @@ uvm_map(map, startp, size, uobj, uoffset, align, flags) UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); vm_map_unlock(map); - return 0; + return (KERN_SUCCESS); } step3: @@ -656,11 +652,11 @@ step3: /* * check for possible forward merge (which we don't do) and count - * the number of times we missed a *possible* chance to merge more + * the number of times we missed a *possible* chance to merge more */ if ((flags & UVM_FLAG_NOMERGE) == 0 && - prev_entry->next != &map->header && + prev_entry->next != &map->header && prev_entry->next->start == (*startp + size)) UVMCNT_INCR(map_forwmerge); @@ -674,7 +670,7 @@ step3: new_entry->object.uvm_obj = uobj; new_entry->offset = uoffset; - if (uobj) + if (uobj) new_entry->etype = UVM_ET_OBJ; else new_entry->etype = 0; @@ -695,7 +691,7 @@ step3: * to_add: for BSS we overallocate a little since we * are likely to extend */ - vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? + vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? UVM_AMAP_CHUNK << PAGE_SHIFT : 0; struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK); new_entry->aref.ar_pageoff = 0; @@ -704,7 +700,9 @@ step3: new_entry->aref.ar_pageoff = 0; new_entry->aref.ar_amap = NULL; } + uvm_map_entry_link(map, prev_entry, new_entry); + map->size += size; /* @@ -717,7 +715,7 @@ step3: UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); vm_map_unlock(map); - return 0; + return(KERN_SUCCESS); } /* @@ -730,12 +728,12 @@ step3: boolean_t uvm_map_lookup_entry(map, address, entry) - struct vm_map *map; + vm_map_t map; vaddr_t address; - struct vm_map_entry **entry; /* OUT */ + vm_map_entry_t *entry; /* OUT */ { - struct vm_map_entry *cur; - struct vm_map_entry *last; + vm_map_entry_t cur; + vm_map_entry_t last; UVMHIST_FUNC("uvm_map_lookup_entry"); UVMHIST_CALLED(maphist); @@ -756,7 +754,6 @@ uvm_map_lookup_entry(map, address, entry) UVMCNT_INCR(uvm_mlk_call); if (address >= cur->start) { - /* * go from hint to end of list. * @@ -768,7 +765,6 @@ uvm_map_lookup_entry(map, address, entry) * at the header, in which case the hint didn't * buy us anything anyway). */ - last = &map->header; if ((cur != last) && (cur->end > address)) { UVMCNT_INCR(uvm_mlk_hint); @@ -778,11 +774,9 @@ uvm_map_lookup_entry(map, address, entry) return (TRUE); } } else { - /* * go from start to hint, *inclusively* */ - last = cur->next; cur = map->header.next; } @@ -828,9 +822,9 @@ uvm_map_lookup_entry(map, address, entry) * => note this is a cross between the old vm_map_findspace and vm_map_find */ -struct vm_map_entry * +vm_map_entry_t uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags) - struct vm_map *map; + vm_map_t map; vaddr_t hint; vsize_t length; vaddr_t *result; /* OUT */ @@ -839,12 +833,12 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags) vsize_t align; int flags; { - struct vm_map_entry *entry, *next, *tmp; + vm_map_entry_t entry, next, tmp; vaddr_t end, orig_hint; UVMHIST_FUNC("uvm_map_findspace"); UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", + UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", map, hint, length, flags); KASSERT((align & (align - 1)) == 0); KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); @@ -875,7 +869,7 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags) */ if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) { - if ((entry = map->first_free) != &map->header) + if ((entry = map->first_free) != &map->header) hint = entry->end; } else { if (uvm_map_lookup_entry(map, hint, &tmp)) { @@ -896,9 +890,7 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags) * note: entry->end = base VA of current gap, * next->start = VA of end of current gap */ - for (;; hint = (entry = next)->end) { - /* * Find the end of the proposed new region. Be sure we didn't * go beyond the end of the map, or wrap around the address; @@ -912,7 +904,6 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags) * push hint forward as needed to avoid VAC alias problems. * we only do this if a valid offset is specified. */ - if ((flags & UVM_FLAG_FIXED) == 0 && uoffset != UVM_UNKNOWN_OFFSET) PMAP_PREFER(uoffset, &hint); @@ -957,21 +948,22 @@ uvm_map_findspace(map, hint, length, result, uobj, uoffset, align, flags) /* * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") * - * => caller must check alignment and size + * => caller must check alignment and size * => map must be locked by caller * => we return a list of map entries that we've remove from the map * in "entry_list" */ -void +int uvm_unmap_remove(map, start, end, entry_list) - struct vm_map *map; - vaddr_t start, end; - struct vm_map_entry **entry_list; /* OUT */ + vm_map_t map; + vaddr_t start,end; + vm_map_entry_t *entry_list; /* OUT */ { - struct vm_map_entry *entry, *first_entry, *next; + vm_map_entry_t entry, first_entry, next; vaddr_t len; - UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); + UVMHIST_FUNC("uvm_unmap_remove"); + UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", map, start, end, 0); @@ -981,13 +973,13 @@ uvm_unmap_remove(map, start, end, entry_list) /* * find first entry */ - if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { /* clip and go... */ entry = first_entry; UVM_MAP_CLIP_START(map, entry, start); /* critical! prevents stale hint */ SAVE_HINT(map, entry, entry->prev); + } else { entry = first_entry->next; } @@ -1011,14 +1003,13 @@ uvm_unmap_remove(map, start, end, entry_list) * [3] dropping references may trigger pager I/O, and if we hit * a pager that does synchronous I/O we may have to wait for it. * [4] we would like all waiting for I/O to occur with maps unlocked - * so that we don't block other threads. + * so that we don't block other threads. */ - first_entry = NULL; *entry_list = NULL; /* to be safe */ /* - * break up the area into map entry sized regions and unmap. note + * break up the area into map entry sized regions and unmap. note * that all mappings have to be removed before we can even consider * dropping references to amaps or VM objects (otherwise we could end * up with a mapping to a page on the free list which would be very bad) @@ -1026,7 +1017,7 @@ uvm_unmap_remove(map, start, end, entry_list) while ((entry != &map->header) && (entry->start < end)) { - UVM_MAP_CLIP_END(map, entry, end); + UVM_MAP_CLIP_END(map, entry, end); next = entry->next; len = entry->end - entry->start; @@ -1042,7 +1033,6 @@ uvm_unmap_remove(map, start, end, entry_list) * special case: handle mappings to anonymous kernel objects. * we want to free these pages right away... */ - if (UVM_ET_ISOBJ(entry) && UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { KASSERT(vm_map_pmap(map) == pmap_kernel()); @@ -1068,8 +1058,8 @@ uvm_unmap_remove(map, start, end, entry_list) * doesn't hurt to call uvm_km_pgremove just to be * safe?] * - * uvm_km_pgremove currently does the following: - * for pages in the kernel object in range: + * uvm_km_pgremove currently does the following: + * for pages in the kernel object in range: * - drops the swap slot * - uvm_pagefree the page * @@ -1082,7 +1072,6 @@ uvm_unmap_remove(map, start, end, entry_list) * from the object. offsets are always relative * to vm_map_min(kernel_map). */ - if (UVM_OBJ_IS_INTRSAFE_OBJECT(entry->object.uvm_obj)) { pmap_kremove(entry->start, len); uvm_km_pgremove_intrsafe(entry->object.uvm_obj, @@ -1100,24 +1089,20 @@ uvm_unmap_remove(map, start, end, entry_list) * null out kernel_object reference, we've just * dropped it */ - entry->etype &= ~UVM_ET_OBJ; entry->object.uvm_obj = NULL; /* to be safe */ } else { - /* * remove mappings the standard way. */ - pmap_remove(map->pmap, entry->start, entry->end); } /* - * remove entry from map and put it on our list of entries + * remove entry from map and put it on our list of entries * that we've nuked. then go do next entry. */ - UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); /* critical! prevents stale hint */ @@ -1129,15 +1114,15 @@ uvm_unmap_remove(map, start, end, entry_list) first_entry = entry; entry = next; /* next entry, please */ } - pmap_update(vm_map_pmap(map)); /* * now we've cleaned up the map and are ready for the caller to drop - * references to the mapped objects. + * references to the mapped objects. */ *entry_list = first_entry; UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); + return(KERN_SUCCESS); } /* @@ -1148,17 +1133,17 @@ uvm_unmap_remove(map, start, end, entry_list) void uvm_unmap_detach(first_entry, flags) - struct vm_map_entry *first_entry; + vm_map_entry_t first_entry; int flags; { - struct vm_map_entry *next_entry; + vm_map_entry_t next_entry; UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); while (first_entry) { KASSERT(!VM_MAPENT_ISWIRED(first_entry)); UVMHIST_LOG(maphist, - " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", - first_entry, first_entry->aref.ar_amap, + " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", + first_entry, first_entry->aref.ar_amap, first_entry->object.uvm_obj, UVM_ET_ISSUBMAP(first_entry)); @@ -1182,6 +1167,7 @@ uvm_unmap_detach(first_entry, flags) first_entry->object.uvm_obj->pgops-> pgo_detach(first_entry->object.uvm_obj); } + next_entry = first_entry->next; uvm_mapent_free(first_entry); first_entry = next_entry; @@ -1193,10 +1179,10 @@ uvm_unmap_detach(first_entry, flags) * E X T R A C T I O N F U N C T I O N S */ -/* +/* * uvm_map_reserve: reserve space in a vm_map for future use. * - * => we reserve space in a map by putting a dummy map entry in the + * => we reserve space in a map by putting a dummy map entry in the * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) * => map should be unlocked (we will write lock it) * => we return true if we were able to reserve space @@ -1205,13 +1191,13 @@ uvm_unmap_detach(first_entry, flags) int uvm_map_reserve(map, size, offset, align, raddr) - struct vm_map *map; + vm_map_t map; vsize_t size; vaddr_t offset; /* hint for pmap_prefer */ vsize_t align; /* alignment hint */ vaddr_t *raddr; /* IN:hint, OUT: reserved VA */ { - UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); + UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", map,size,offset,raddr); @@ -1226,20 +1212,20 @@ uvm_map_reserve(map, size, offset, align, raddr) if (uvm_map(map, raddr, size, NULL, offset, 0, UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, - UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { + UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != KERN_SUCCESS) { UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); return (FALSE); - } + } UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); return (TRUE); } /* - * uvm_map_replace: replace a reserved (blank) area of memory with + * uvm_map_replace: replace a reserved (blank) area of memory with * real mappings. * - * => caller must WRITE-LOCK the map + * => caller must WRITE-LOCK the map * => we return TRUE if replacement was a success * => we expect the newents chain to have nnewents entrys on it and * we expect newents->prev to point to the last entry on the list @@ -1250,10 +1236,10 @@ int uvm_map_replace(map, start, end, newents, nnewents) struct vm_map *map; vaddr_t start, end; - struct vm_map_entry *newents; + vm_map_entry_t newents; int nnewents; { - struct vm_map_entry *oldent, *last; + vm_map_entry_t oldent, last; /* * first find the blank map entry at the specified address @@ -1267,19 +1253,17 @@ uvm_map_replace(map, start, end, newents, nnewents) * check to make sure we have a proper blank entry */ - if (oldent->start != start || oldent->end != end || + if (oldent->start != start || oldent->end != end || oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { return (FALSE); } #ifdef DIAGNOSTIC - /* * sanity check the newents chain */ - { - struct vm_map_entry *tmpent = newents; + vm_map_entry_t tmpent = newents; int nent = 0; vaddr_t cur = start; @@ -1313,7 +1297,8 @@ uvm_map_replace(map, start, end, newents, nnewents) */ if (newents) { - last = newents->prev; + + last = newents->prev; /* we expect this */ /* critical: flush stale hints out of map */ SAVE_HINT(map, map->hint, newents); @@ -1366,15 +1351,15 @@ uvm_map_replace(map, start, end, newents, nnewents) int uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) - struct vm_map *srcmap, *dstmap; + vm_map_t srcmap, dstmap; vaddr_t start, *dstaddrp; vsize_t len; int flags; { vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge, oldstart; - struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, - *deadentry, *oldentry; + vm_map_entry_t chain, endchain, entry, orig_entry, newentry, deadentry; + vm_map_entry_t oldentry; vsize_t elen; int nchain, error, copy_ok; UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); @@ -1404,7 +1389,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); /* - * step 2: setup for the extraction process loop by init'ing the + * step 2: setup for the extraction process loop by init'ing the * map entry chain, locking src map, and looking up the first useful * entry in the map. */ @@ -1514,8 +1499,8 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) newentry->offset = 0; } newentry->etype = entry->etype; - newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? - entry->max_protection : entry->protection; + newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? + entry->max_protection : entry->protection; newentry->max_protection = entry->max_protection; newentry->inheritance = entry->inheritance; newentry->wired_count = 0; @@ -1540,7 +1525,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) } /* end of 'while' loop! */ - if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && + if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && (entry->next == &srcmap->header || entry->next->start != entry->end)) { error = EINVAL; @@ -1559,7 +1544,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) /* * step 5: attempt to lock the dest map so we can pmap_copy. - * note usage of copy_ok: + * note usage of copy_ok: * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 */ @@ -1610,7 +1595,7 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) /* we advance "entry" in the following if statement */ if (flags & UVM_EXTRACT_REMOVE) { - pmap_remove(srcmap->pmap, entry->start, + pmap_remove(srcmap->pmap, entry->start, entry->end); oldentry = entry; /* save entry */ entry = entry->next; /* advance */ @@ -1625,7 +1610,6 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) /* end of 'while' loop */ fudge = 0; } - pmap_update(srcmap->pmap); /* * unlock dstmap. we will dispose of deadentry in @@ -1635,9 +1619,9 @@ uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags) if (copy_ok && srcmap != dstmap) vm_map_unlock(dstmap); - } else { - deadentry = NULL; } + else + deadentry = NULL; /* XXX: gcc */ /* * step 7: we are done with the source map, unlock. if copy_ok @@ -1688,7 +1672,7 @@ bad2: /* src already unlocked */ * call [with uobj==NULL] to create a blank map entry in the main map. * [And it had better still be blank!] * => maps which contain submaps should never be copied or forked. - * => to remove a submap, use uvm_unmap() on the main map + * => to remove a submap, use uvm_unmap() on the main map * and then uvm_map_deallocate() the submap. * => main map must be unlocked. * => submap must have been init'd and have a zero reference count. @@ -1697,11 +1681,11 @@ bad2: /* src already unlocked */ int uvm_map_submap(map, start, end, submap) - struct vm_map *map, *submap; + vm_map_t map, submap; vaddr_t start, end; { - struct vm_map_entry *entry; - int error; + vm_map_entry_t entry; + int result; vm_map_lock(map); @@ -1714,7 +1698,7 @@ uvm_map_submap(map, start, end, submap) entry = NULL; } - if (entry != NULL && + if (entry != NULL && entry->start == start && entry->end == end && entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { @@ -1722,12 +1706,12 @@ uvm_map_submap(map, start, end, submap) entry->object.sub_map = submap; entry->offset = 0; uvm_map_reference(submap); - error = 0; + result = KERN_SUCCESS; } else { - error = EINVAL; + result = KERN_INVALID_ARGUMENT; } vm_map_unlock(map); - return error; + return(result); } @@ -1740,22 +1724,25 @@ uvm_map_submap(map, start, end, submap) #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ ~VM_PROT_WRITE : VM_PROT_ALL) +#define max(a,b) ((a) > (b) ? (a) : (b)) int uvm_map_protect(map, start, end, new_prot, set_max) - struct vm_map *map; + vm_map_t map; vaddr_t start, end; vm_prot_t new_prot; boolean_t set_max; { - struct vm_map_entry *current, *entry; - int error = 0; + vm_map_entry_t current, entry; + int rv = KERN_SUCCESS; UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", map, start, end, new_prot); vm_map_lock(map); + VM_MAP_RANGE_CHECK(map, start, end); + if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); } else { @@ -1769,11 +1756,11 @@ uvm_map_protect(map, start, end, new_prot, set_max) current = entry; while ((current != &map->header) && (current->start < end)) { if (UVM_ET_ISSUBMAP(current)) { - error = EINVAL; + rv = KERN_INVALID_ARGUMENT; goto out; } if ((new_prot & current->max_protection) != new_prot) { - error = EACCES; + rv = KERN_PROTECTION_FAILURE; goto out; } current = current->next; @@ -1782,10 +1769,12 @@ uvm_map_protect(map, start, end, new_prot, set_max) /* go back and fix up protections (no need to clip this time). */ current = entry; + while ((current != &map->header) && (current->start < end)) { vm_prot_t old_prot; UVM_MAP_CLIP_END(map, current, end); + old_prot = current->protection; if (set_max) current->protection = @@ -1794,7 +1783,7 @@ uvm_map_protect(map, start, end, new_prot, set_max) current->protection = new_prot; /* - * update physical map if necessary. worry about copy-on-write + * update physical map if necessary. worry about copy-on-write * here -- CHECK THIS XXX */ @@ -1816,14 +1805,13 @@ uvm_map_protect(map, start, end, new_prot, set_max) new_prot != VM_PROT_NONE) { if (uvm_map_pageable(map, entry->start, entry->end, FALSE, - UVM_LK_ENTER|UVM_LK_EXIT) != 0) { - + UVM_LK_ENTER|UVM_LK_EXIT) != KERN_SUCCESS) { /* * If locking the entry fails, remember the * error if it's the first one. Note we * still continue setting the protection in - * the map, but will return the error - * condition regardless. + * the map, but will return the resource + * shortage condition regardless. * * XXX Ignore what the actual error is, * XXX just call it a resource shortage @@ -1831,23 +1819,23 @@ uvm_map_protect(map, start, end, new_prot, set_max) * XXX what uvm_map_protect() itself would * XXX normally return. */ - - error = ENOMEM; + rv = KERN_RESOURCE_SHORTAGE; } } + current = current->next; } - pmap_update(map->pmap); out: vm_map_unlock(map); - UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); - return error; + UVMHIST_LOG(maphist, "<- done, rv=%d",rv,0,0,0); + return (rv); } +#undef max #undef MASK -/* +/* * uvm_map_inherit: set inheritance code for range of addrs in map. * * => map must be unlocked @@ -1857,12 +1845,12 @@ uvm_map_protect(map, start, end, new_prot, set_max) int uvm_map_inherit(map, start, end, new_inheritance) - struct vm_map *map; + vm_map_t map; vaddr_t start; vaddr_t end; vm_inherit_t new_inheritance; { - struct vm_map_entry *entry, *temp_entry; + vm_map_entry_t entry, temp_entry; UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", map, start, end, new_inheritance); @@ -1874,11 +1862,13 @@ uvm_map_inherit(map, start, end, new_inheritance) break; default: UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); - return EINVAL; + return (KERN_INVALID_ARGUMENT); } - vm_map_lock(map); + vm_map_lock(map); + VM_MAP_RANGE_CHECK(map, start, end); + if (uvm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; UVM_MAP_CLIP_START(map, entry, start); @@ -1891,12 +1881,13 @@ uvm_map_inherit(map, start, end, new_inheritance) entry->inheritance = new_inheritance; entry = entry->next; } + vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); - return 0; + return(KERN_SUCCESS); } -/* +/* * uvm_map_advice: set advice code for range of addrs in map. * * => map must be unlocked @@ -1904,12 +1895,12 @@ uvm_map_inherit(map, start, end, new_inheritance) int uvm_map_advice(map, start, end, new_advice) - struct vm_map *map; + vm_map_t map; vaddr_t start; vaddr_t end; int new_advice; { - struct vm_map_entry *entry, *temp_entry; + vm_map_entry_t entry, temp_entry; UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", map, start, end, new_advice); @@ -1940,7 +1931,7 @@ uvm_map_advice(map, start, end, new_advice) default: vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); - return EINVAL; + return (KERN_INVALID_ARGUMENT); } entry->advice = new_advice; entry = entry->next; @@ -1948,7 +1939,7 @@ uvm_map_advice(map, start, end, new_advice) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); - return 0; + return (KERN_SUCCESS); } /* @@ -1967,12 +1958,12 @@ uvm_map_advice(map, start, end, new_advice) int uvm_map_pageable(map, start, end, new_pageable, lockflags) - struct vm_map *map; + vm_map_t map; vaddr_t start, end; boolean_t new_pageable; int lockflags; { - struct vm_map_entry *entry, *start_entry, *failed_entry; + vm_map_entry_t entry, start_entry, failed_entry; int rv; #ifdef DIAGNOSTIC u_int timestamp_save; @@ -1984,26 +1975,27 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) if ((lockflags & UVM_LK_ENTER) == 0) vm_map_lock(map); + VM_MAP_RANGE_CHECK(map, start, end); - /* + /* * only one pageability change may take place at one time, since * uvm_fault_wire assumes it will be called only once for each * wiring/unwiring. therefore, we have to make sure we're actually * changing the pageability for the entire region. we do so before - * making any changes. + * making any changes. */ if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); - UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); - return EFAULT; + UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); + return (KERN_INVALID_ADDRESS); } entry = start_entry; - /* + /* * handle wiring and unwiring separately. */ @@ -2012,7 +2004,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) /* * unwiring. first ensure that the range to be unwired is - * really wired down and that there are no holes. + * really wired down and that there are no holes. */ while ((entry != &map->header) && (entry->start < end)) { @@ -2022,13 +2014,14 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) entry->next->start > entry->end))) { if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); - UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); - return EINVAL; + UVMHIST_LOG(maphist, + "<- done (INVALID UNWIRE ARG)",0,0,0,0); + return (KERN_INVALID_ARGUMENT); } entry = entry->next; } - /* + /* * POSIX 1003.1b - a single munlock call unlocks a region, * regardless of the number of mlock calls made on that * region. @@ -2044,7 +2037,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); - return 0; + return(KERN_SUCCESS); } /* @@ -2052,7 +2045,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) * * 1: holding the write lock, we create any anonymous maps that need * to be created. then we clip each map entry to the region to - * be wired and increment its wiring count. + * be wired and increment its wiring count. * * 2: we downgrade to a read lock, and call uvm_fault_wire to fault * in the pages for any newly wired area (wired_count == 1). @@ -2080,11 +2073,11 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) */ if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ - if (UVM_ET_ISNEEDSCOPY(entry) && + if (UVM_ET_ISNEEDSCOPY(entry) && ((entry->protection & VM_PROT_WRITE) || (entry->object.uvm_obj == NULL))) { amap_copy(map, entry, M_WAITOK, TRUE, - start, end); + start, end); /* XXXCDC: wait OK? */ } } @@ -2094,7 +2087,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) entry->wired_count++; /* - * Check for holes + * Check for holes */ if (entry->protection == VM_PROT_NONE || @@ -2104,7 +2097,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) /* * found one. amap creation actions do not need to - * be undone, but the wired counts need to be restored. + * be undone, but the wired counts need to be restored. */ while (entry != &map->header && entry->end > start) { @@ -2114,7 +2107,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) if ((lockflags & UVM_LK_EXIT) == 0) vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); - return EINVAL; + return (KERN_INVALID_ARGUMENT); } entry = entry->next; } @@ -2136,13 +2129,11 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) rv = uvm_fault_wire(map, entry->start, entry->end, entry->protection); if (rv) { - /* * wiring failed. break out of the loop. * we'll clean up the map below, once we * have a write lock again. */ - break; } } @@ -2207,7 +2198,7 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) } UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); - return 0; + return(KERN_SUCCESS); } /* @@ -2221,11 +2212,11 @@ uvm_map_pageable(map, start, end, new_pageable, lockflags) int uvm_map_pageable_all(map, flags, limit) - struct vm_map *map; + vm_map_t map; int flags; vsize_t limit; { - struct vm_map_entry *entry, *failed_entry; + vm_map_entry_t entry, failed_entry; vsize_t size; int rv; #ifdef DIAGNOSTIC @@ -2243,12 +2234,10 @@ uvm_map_pageable_all(map, flags, limit) */ if (flags == 0) { /* unwire */ - /* * POSIX 1003.1b -- munlockall unlocks all regions, * regardless of how many times mlockall has been called. */ - for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (VM_MAPENT_ISWIRED(entry)) @@ -2257,27 +2246,27 @@ uvm_map_pageable_all(map, flags, limit) vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); - return 0; + return (KERN_SUCCESS); + + /* + * end of unwire case! + */ } if (flags & MCL_FUTURE) { - /* * must wire all future mappings; remember this. */ - vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); } if ((flags & MCL_CURRENT) == 0) { - /* * no more work to do! */ - UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); vm_map_unlock(map); - return 0; + return (KERN_SUCCESS); } /* @@ -2313,7 +2302,7 @@ uvm_map_pageable_all(map, flags, limit) if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { vm_map_unlock(map); - return ENOMEM; + return (KERN_NO_SPACE); /* XXX overloaded */ } /* XXX non-pmap_wired_count case must be handled by caller */ @@ -2321,7 +2310,7 @@ uvm_map_pageable_all(map, flags, limit) if (limit != 0 && (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { vm_map_unlock(map); - return ENOMEM; + return (KERN_NO_SPACE); /* XXX overloaded */ } #endif @@ -2334,7 +2323,6 @@ uvm_map_pageable_all(map, flags, limit) if (entry->protection == VM_PROT_NONE) continue; if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ - /* * perform actions of vm_map_lookup that need the * write lock on the map: create an anonymous map @@ -2342,9 +2330,8 @@ uvm_map_pageable_all(map, flags, limit) * for a zero-fill region. (XXXCDC: submap case * ok?) */ - if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ - if (UVM_ET_ISNEEDSCOPY(entry) && + if (UVM_ET_ISNEEDSCOPY(entry) && ((entry->protection & VM_PROT_WRITE) || (entry->object.uvm_obj == NULL))) { amap_copy(map, entry, M_WAITOK, TRUE, @@ -2366,31 +2353,27 @@ uvm_map_pageable_all(map, flags, limit) vm_map_busy(map); vm_map_downgrade(map); - rv = 0; + rv = KERN_SUCCESS; for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry->wired_count == 1) { rv = uvm_fault_wire(map, entry->start, entry->end, entry->protection); if (rv) { - /* * wiring failed. break out of the loop. * we'll clean up the map below, once we * have a write lock again. */ - break; } } } - if (rv) { - + if (rv) { /* failed? */ /* * Get back an exclusive (write) lock. */ - vm_map_upgrade(map); vm_map_unbusy(map); @@ -2405,7 +2388,6 @@ uvm_map_pageable_all(map, flags, limit) * * Skip VM_PROT_NONE entries like we did above. */ - failed_entry = entry; for (/* nothing */; entry != &map->header; entry = entry->next) { @@ -2420,7 +2402,6 @@ uvm_map_pageable_all(map, flags, limit) * * Skip VM_PROT_NONE entries like we did above. */ - for (entry = map->header.next; entry != failed_entry; entry = entry->next) { if (entry->protection == VM_PROT_NONE) @@ -2439,7 +2420,7 @@ uvm_map_pageable_all(map, flags, limit) vm_map_unlock_read(map); UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); - return 0; + return (KERN_SUCCESS); } /* @@ -2451,7 +2432,7 @@ uvm_map_pageable_all(map, flags, limit) * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean * if (flags & PGO_FREE): any cached pages are freed after clean * => returns an error if any part of the specified range isn't mapped - * => never a need to flush amap layer since the anonymous memory has + * => never a need to flush amap layer since the anonymous memory has * no permanent home, but may deactivate pages there * => called from sys_msync() and sys_madvise() * => caller must not write-lock map (read OK). @@ -2460,11 +2441,11 @@ uvm_map_pageable_all(map, flags, limit) int uvm_map_clean(map, start, end, flags) - struct vm_map *map; + vm_map_t map; vaddr_t start, end; int flags; { - struct vm_map_entry *current, *entry; + vm_map_entry_t current, entry; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; @@ -2483,7 +2464,7 @@ uvm_map_clean(map, start, end, flags) VM_MAP_RANGE_CHECK(map, start, end); if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { vm_map_unlock_read(map); - return EFAULT; + return(KERN_INVALID_ADDRESS); } /* @@ -2493,18 +2474,19 @@ uvm_map_clean(map, start, end, flags) for (current = entry; current->start < end; current = current->next) { if (UVM_ET_ISSUBMAP(current)) { vm_map_unlock_read(map); - return EINVAL; + return (KERN_INVALID_ARGUMENT); } if (end <= current->end) { break; } if (current->end != current->next->start) { vm_map_unlock_read(map); - return EFAULT; + return (KERN_INVALID_ADDRESS); } } - error = 0; + error = KERN_SUCCESS; + for (current = entry; start < end; current = current->next) { amap = current->aref.ar_amap; /* top layer */ uobj = current->object.uvm_obj; /* bottom layer */ @@ -2604,7 +2586,7 @@ uvm_map_clean(map, start, end, flags) continue; default: - panic("uvm_map_clean: weird flags"); + panic("uvm_map_clean: wierd flags"); } } amap_unlock(amap); @@ -2623,12 +2605,12 @@ uvm_map_clean(map, start, end, flags) simple_unlock(&uobj->vmobjlock); if (rv == FALSE) - error = EIO; + error = KERN_FAILURE; } start += size; } vm_map_unlock_read(map); - return (error); + return (error); } @@ -2641,41 +2623,44 @@ uvm_map_clean(map, start, end, flags) boolean_t uvm_map_checkprot(map, start, end, protection) - struct vm_map * map; - vaddr_t start, end; - vm_prot_t protection; + vm_map_t map; + vaddr_t start, end; + vm_prot_t protection; { - struct vm_map_entry *entry; - struct vm_map_entry *tmp_entry; - - if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { - return(FALSE); - } - entry = tmp_entry; - while (start < end) { - if (entry == &map->header) { - return(FALSE); - } + vm_map_entry_t entry; + vm_map_entry_t tmp_entry; + + if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { + return(FALSE); + } + entry = tmp_entry; + while (start < end) { + if (entry == &map->header) { + return(FALSE); + } /* * no holes allowed */ - if (start < entry->start) { - return(FALSE); - } + if (start < entry->start) { + return(FALSE); + } /* * check protection associated with entry */ - if ((entry->protection & protection) != protection) { - return(FALSE); - } - start = entry->end; - entry = entry->next; - } - return(TRUE); + if ((entry->protection & protection) != protection) { + return(FALSE); + } + + /* go to next entry */ + + start = entry->end; + entry = entry->next; + } + return(TRUE); } /* @@ -2686,14 +2671,15 @@ uvm_map_checkprot(map, start, end, protection) * - refcnt set to 1, rest must be init'd by caller */ struct vmspace * -uvmspace_alloc(min, max) +uvmspace_alloc(min, max, pageable) vaddr_t min, max; + int pageable; { struct vmspace *vm; UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); vm = pool_get(&uvm_vmspace_pool, PR_WAITOK); - uvmspace_init(vm, NULL, min, max); + uvmspace_init(vm, NULL, min, max, pageable); UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); return (vm); } @@ -2705,20 +2691,24 @@ uvmspace_alloc(min, max) * - refcnt set to 1, rest must me init'd by caller */ void -uvmspace_init(vm, pmap, min, max) +uvmspace_init(vm, pmap, min, max, pageable) struct vmspace *vm; struct pmap *pmap; vaddr_t min, max; + boolean_t pageable; { UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); memset(vm, 0, sizeof(*vm)); - uvm_map_setup(&vm->vm_map, min, max, VM_MAP_PAGEABLE); + + uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); + if (pmap) pmap_reference(pmap); else pmap = pmap_create(); vm->vm_map.pmap = pmap; + vm->vm_refcnt = 1; UVMHIST_LOG(maphist,"<- done",0,0,0,0); } @@ -2746,7 +2736,7 @@ uvmspace_share(p1, p2) void uvmspace_unshare(p) - struct proc *p; + struct proc *p; { struct vmspace *nvm, *ovm = p->p_vmspace; @@ -2758,7 +2748,7 @@ uvmspace_unshare(p) nvm = uvmspace_fork(ovm); pmap_deactivate(p); /* unbind old vmspace */ - p->p_vmspace = nvm; + p->p_vmspace = nvm; pmap_activate(p); /* switch to new vmspace */ uvmspace_free(ovm); /* drop reference to old vmspace */ @@ -2776,7 +2766,7 @@ uvmspace_exec(p, start, end) vaddr_t start, end; { struct vmspace *nvm, *ovm = p->p_vmspace; - struct vm_map *map = &ovm->vm_map; + vm_map_t map = &ovm->vm_map; #ifdef __sparc__ /* XXX cgd 960926: the sparc #ifdef should be a MD hook */ @@ -2798,7 +2788,6 @@ uvmspace_exec(p, start, end) /* * SYSV SHM semantics require us to kill all segments on an exec */ - if (ovm->vm_shm) shmexit(ovm); #endif @@ -2807,7 +2796,6 @@ uvmspace_exec(p, start, end) * POSIX 1003.1b -- "lock future mappings" is revoked * when a process execs another program image. */ - vm_map_lock(map); vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); vm_map_unlock(map); @@ -2815,17 +2803,17 @@ uvmspace_exec(p, start, end) /* * now unmap the old program */ - uvm_unmap(map, map->min_offset, map->max_offset); /* * resize the map */ - vm_map_lock(map); map->min_offset = start; map->max_offset = end; vm_map_unlock(map); + + } else { /* @@ -2833,8 +2821,8 @@ uvmspace_exec(p, start, end) * it is still being used for others. allocate a new vmspace * for p */ - - nvm = uvmspace_alloc(start, end); + nvm = uvmspace_alloc(start, end, + (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE); /* * install new vmspace and drop our ref to the old one. @@ -2858,18 +2846,16 @@ void uvmspace_free(vm) struct vmspace *vm; { - struct vm_map_entry *dead_entries; + vm_map_entry_t dead_entries; UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); if (--vm->vm_refcnt == 0) { - /* * lock the map, to wait out all other references to it. delete * all of the mappings and pages they hold, then call the pmap * module to reclaim anything left. */ - #ifdef SYSVSHM /* Get rid of any SYSV shared memory segments. */ if (vm->vm_shm != NULL) @@ -2877,7 +2863,7 @@ uvmspace_free(vm) #endif vm_map_lock(&vm->vm_map); if (vm->vm_map.nentries) { - uvm_unmap_remove(&vm->vm_map, + (void)uvm_unmap_remove(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset, &dead_entries); if (dead_entries != NULL) @@ -2905,17 +2891,18 @@ uvmspace_fork(vm1) struct vmspace *vm1; { struct vmspace *vm2; - struct vm_map *old_map = &vm1->vm_map; - struct vm_map *new_map; - struct vm_map_entry *old_entry; - struct vm_map_entry *new_entry; - pmap_t new_pmap; - boolean_t protect_child; + vm_map_t old_map = &vm1->vm_map; + vm_map_t new_map; + vm_map_entry_t old_entry; + vm_map_entry_t new_entry; + pmap_t new_pmap; + boolean_t protect_child; UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); vm_map_lock(old_map); - vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset); + vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, + (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE); memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); new_map = &vm2->vm_map; /* XXX */ @@ -2932,26 +2919,27 @@ uvmspace_fork(vm1) /* * first, some sanity checks on the old entry */ + if (UVM_ET_ISSUBMAP(old_entry)) + panic("fork: encountered a submap during fork (illegal)"); + + if (!UVM_ET_ISCOPYONWRITE(old_entry) && + UVM_ET_ISNEEDSCOPY(old_entry)) + panic("fork: non-copy_on_write map entry marked needs_copy (illegal)"); - KASSERT(!UVM_ET_ISSUBMAP(old_entry)); - KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || - !UVM_ET_ISNEEDSCOPY(old_entry)); switch (old_entry->inheritance) { case MAP_INHERIT_NONE: - /* * drop the mapping */ - break; case MAP_INHERIT_SHARE: - /* * share the mapping: this means we want the old and * new entries to share amaps and backing objects. */ + /* * if the old_entry needs a new amap (due to prev fork) * then we need to allocate it now so that we have @@ -2962,7 +2950,7 @@ uvmspace_fork(vm1) if (UVM_ET_ISNEEDSCOPY(old_entry)) { /* get our own amap, clears needs_copy */ amap_copy(old_map, old_entry, M_WAITOK, FALSE, - 0, 0); + 0, 0); /* XXXCDC: WAITOK??? */ } @@ -2977,8 +2965,8 @@ uvmspace_fork(vm1) * gain reference to object backing the map (can't * be a submap, already checked this case). */ - if (new_entry->aref.ar_amap) + /* share reference */ uvm_map_reference_amap(new_entry, AMAP_SHARED); if (new_entry->object.uvm_obj && @@ -2991,7 +2979,7 @@ uvmspace_fork(vm1) uvm_map_entry_link(new_map, new_map->header.prev, new_entry); - /* + /* * pmap_copy the mappings: this routine is optional * but if it is there it will reduce the number of * page faults in the new proc. @@ -3009,7 +2997,7 @@ uvmspace_fork(vm1) * copy-on-write the mapping (using mmap's * MAP_PRIVATE semantics) * - * allocate new_entry, adjust reference counts. + * allocate new_entry, adjust reference counts. * (note that new references are read-only). */ @@ -3045,20 +3033,20 @@ uvmspace_fork(vm1) * conditions hold: * 1. the old entry has an amap and that amap is * being shared. this means that the old (parent) - * process is sharing the amap with another + * process is sharing the amap with another * process. if we do not clear needs_copy here * we will end up in a situation where both the * parent and child process are refering to the - * same amap with "needs_copy" set. if the + * same amap with "needs_copy" set. if the * parent write-faults, the fault routine will * clear "needs_copy" in the parent by allocating - * a new amap. this is wrong because the + * a new amap. this is wrong because the * parent is supposed to be sharing the old amap * and the new amap will break that. * * 2. if the old entry has an amap and a non-zero * wire count then we are going to have to call - * amap_cow_now to avoid page faults in the + * amap_cow_now to avoid page faults in the * parent process. since amap_cow_now requires * "needs_copy" to be clear we might as well * clear it here as well. @@ -3066,14 +3054,15 @@ uvmspace_fork(vm1) */ if (old_entry->aref.ar_amap != NULL) { - if ((amap_flags(old_entry->aref.ar_amap) & - AMAP_SHARED) != 0 || - VM_MAPENT_ISWIRED(old_entry)) { - amap_copy(new_map, new_entry, M_WAITOK, - FALSE, 0, 0); - /* XXXCDC: M_WAITOK ... ok? */ - } + if ((amap_flags(old_entry->aref.ar_amap) & + AMAP_SHARED) != 0 || + VM_MAPENT_ISWIRED(old_entry)) { + + amap_copy(new_map, new_entry, M_WAITOK, FALSE, + 0, 0); + /* XXXCDC: M_WAITOK ... ok? */ + } } /* @@ -3089,9 +3078,9 @@ uvmspace_fork(vm1) if (VM_MAPENT_ISWIRED(old_entry)) { - /* + /* * resolve all copy-on-write faults now - * (note that there is nothing to do if + * (note that there is nothing to do if * the old mapping does not have an amap). * XXX: is it worthwhile to bother with pmap_copy * in this case? @@ -3099,7 +3088,7 @@ uvmspace_fork(vm1) if (old_entry->aref.ar_amap) amap_cow_now(new_map, new_entry); - } else { + } else { /* * setup mappings to trigger copy-on-write faults @@ -3127,7 +3116,6 @@ uvmspace_fork(vm1) old_entry->end, old_entry->protection & ~VM_PROT_WRITE); - pmap_update(old_map->pmap); } old_entry->etype |= UVM_ET_NEEDSCOPY; } @@ -3139,7 +3127,7 @@ uvmspace_fork(vm1) } else { /* - * we only need to protect the child if the + * we only need to protect the child if the * parent has write access. */ if (old_entry->max_protection & VM_PROT_WRITE) @@ -3164,10 +3152,9 @@ uvmspace_fork(vm1) */ if (protect_child) { pmap_protect(new_pmap, new_entry->start, - new_entry->end, - new_entry->protection & + new_entry->end, + new_entry->protection & ~VM_PROT_WRITE); - pmap_update(new_pmap); } } @@ -3177,7 +3164,7 @@ uvmspace_fork(vm1) } new_map->size = old_map->size; - vm_map_unlock(old_map); + vm_map_unlock(old_map); #ifdef SYSVSHM if (vm1->vm_shm) @@ -3189,7 +3176,7 @@ uvmspace_fork(vm1) #endif UVMHIST_LOG(maphist,"<- done",0,0,0,0); - return(vm2); + return(vm2); } @@ -3205,11 +3192,11 @@ uvmspace_fork(vm1) void uvm_map_printit(map, full, pr) - struct vm_map *map; + vm_map_t map; boolean_t full; int (*pr) __P((const char *, ...)); { - struct vm_map_entry *entry; + vm_map_entry_t entry; (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", @@ -3234,12 +3221,12 @@ uvm_map_printit(map, full, pr) "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " "wc=%d, adv=%d\n", (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', - (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', + (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', entry->protection, entry->max_protection, entry->inheritance, entry->wired_count, entry->advice); } -} +} /* * uvm_object_printit: actually prints the object @@ -3276,7 +3263,7 @@ uvm_object_printit(uobj, full, pr) if ((cnt % 3) != 2) { (*pr)("\n"); } -} +} /* * uvm_page_printit: actually print the page @@ -3348,11 +3335,11 @@ uvm_page_printit(pg, full, pr) /* cross-verify page queue */ if (pg->pqflags & PQ_FREE) { int fl = uvm_page_lookup_freelist(pg); - int color = VM_PGCOLOR_BUCKET(pg); - pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ - ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; + pgl = &uvm.page_free[fl].pgfl_queues[((pg)->flags & PG_ZERO) ? + PGFL_ZEROS : PGFL_UNKNOWN]; } else if (pg->pqflags & PQ_INACTIVE) { - pgl = &uvm.page_inactive; + pgl = (pg->pqflags & PQ_SWAPBACKED) ? + &uvm.page_inactive_swp : &uvm.page_inactive_obj; } else if (pg->pqflags & PQ_ACTIVE) { pgl = &uvm.page_active; } else { diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h index d0d1509fc4c..724bd78ab23 100644 --- a/sys/uvm/uvm_map.h +++ b/sys/uvm/uvm_map.h @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_map.h,v 1.19 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_map.h,v 1.30 2001/09/09 19:38:23 chs Exp $ */ +/* $OpenBSD: uvm_map.h,v 1.20 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -83,7 +83,7 @@ /* * UVM_MAP_CLIP_START: ensure that the entry begins at or after * the starting address, if it doesn't we split the entry. - * + * * => map must be locked by caller */ @@ -113,6 +113,26 @@ #include <uvm/uvm_anon.h> /* + * types defined: + * + * vm_map_t the high-level address map data structure. + * vm_map_entry_t an entry in an address map. + * vm_map_version_t a timestamp of a map, for use with vm_map_lookup + */ + +/* + * Objects which live in maps may be either VM objects, or another map + * (called a "sharing map") which denotes read-write sharing with other maps. + * + * XXXCDC: private pager data goes here now + */ + +union vm_map_object { + struct uvm_object *uvm_obj; /* UVM OBJECT */ + struct vm_map *sub_map; /* belongs to another map */ +}; + +/* * Address map entries consist of start and end addresses, * a VM object (or sharing map) and offset into that object, * and user-exported inheritance and protection information. @@ -123,10 +143,7 @@ struct vm_map_entry { struct vm_map_entry *next; /* next entry */ vaddr_t start; /* start address */ vaddr_t end; /* end address */ - union { - struct uvm_object *uvm_obj; /* uvm object */ - struct vm_map *sub_map; /* belongs to another map */ - } object; /* object I point to */ + union vm_map_object object; /* object I point to */ voff_t offset; /* offset into object */ int etype; /* entry type */ vm_prot_t protection; /* protection code */ @@ -139,7 +156,6 @@ struct vm_map_entry { u_int8_t flags; /* flags */ #define UVM_MAP_STATIC 0x01 /* static map entry */ -#define UVM_MAP_KMEM 0x02 /* from kmem entry pool */ }; @@ -199,17 +215,17 @@ struct vm_map_entry { */ struct vm_map { struct pmap * pmap; /* Physical map */ - struct lock lock; /* Lock for map data */ + lock_data_t lock; /* Lock for map data */ struct vm_map_entry header; /* List of entries */ int nentries; /* Number of entries */ vsize_t size; /* virtual size */ int ref_count; /* Reference count */ - struct simplelock ref_lock; /* Lock for ref_count field */ - struct vm_map_entry * hint; /* hint for quick lookups */ - struct simplelock hint_lock; /* lock for hint storage */ - struct vm_map_entry * first_free; /* First free space hint */ + simple_lock_data_t ref_lock; /* Lock for ref_count field */ + vm_map_entry_t hint; /* hint for quick lookups */ + simple_lock_data_t hint_lock; /* lock for hint storage */ + vm_map_entry_t first_free; /* First free space hint */ int flags; /* flags */ - struct simplelock flags_lock; /* Lock for flags field */ + simple_lock_data_t flags_lock; /* Lock for flags field */ unsigned int timestamp; /* Version number */ #define min_offset header.start #define max_offset header.end @@ -242,12 +258,49 @@ do { \ #endif /* _KERNEL */ /* + * Interrupt-safe maps must also be kept on a special list, + * to assist uvm_fault() in avoiding locking problems. + */ +struct vm_map_intrsafe { + struct vm_map vmi_map; + LIST_ENTRY(vm_map_intrsafe) vmi_list; +}; + +LIST_HEAD(vmi_list, vm_map_intrsafe); +#ifdef _KERNEL +extern simple_lock_data_t vmi_list_slock; +extern struct vmi_list vmi_list; + +static __inline int vmi_list_lock __P((void)); +static __inline void vmi_list_unlock __P((int)); + +static __inline int +vmi_list_lock() +{ + int s; + + s = splhigh(); + simple_lock(&vmi_list_slock); + return (s); +} + +static __inline void +vmi_list_unlock(s) + int s; +{ + + simple_unlock(&vmi_list_slock); + splx(s); +} +#endif /* _KERNEL */ + +/* * handle inline options */ #ifdef UVM_MAP_INLINE #define MAP_INLINE static __inline -#else +#else #define MAP_INLINE /* nothing */ #endif /* UVM_MAP_INLINE */ @@ -266,39 +319,34 @@ extern vaddr_t uvm_maxkaddr; */ MAP_INLINE -void uvm_map_deallocate __P((struct vm_map *)); +void uvm_map_deallocate __P((vm_map_t)); -int uvm_map_clean __P((struct vm_map *, vaddr_t, vaddr_t, int)); -void uvm_map_clip_start __P((struct vm_map *, struct vm_map_entry *, - vaddr_t)); -void uvm_map_clip_end __P((struct vm_map *, struct vm_map_entry *, - vaddr_t)); +int uvm_map_clean __P((vm_map_t, vaddr_t, vaddr_t, int)); +void uvm_map_clip_start __P((vm_map_t, vm_map_entry_t, vaddr_t)); +void uvm_map_clip_end __P((vm_map_t, vm_map_entry_t, vaddr_t)); MAP_INLINE -struct vm_map *uvm_map_create __P((pmap_t, vaddr_t, vaddr_t, int)); -int uvm_map_extract __P((struct vm_map *, vaddr_t, vsize_t, - struct vm_map *, vaddr_t *, int)); -struct vm_map_entry *uvm_map_findspace __P((struct vm_map *, vaddr_t, vsize_t, - vaddr_t *, struct uvm_object *, voff_t, vsize_t, int)); -int uvm_map_inherit __P((struct vm_map *, vaddr_t, vaddr_t, - vm_inherit_t)); -int uvm_map_advice __P((struct vm_map *, vaddr_t, vaddr_t, int)); +vm_map_t uvm_map_create __P((pmap_t, vaddr_t, vaddr_t, int)); +int uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t, + vm_map_t, vaddr_t *, int)); +vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t, vaddr_t *, + struct uvm_object *, voff_t, vsize_t, int)); +int uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t, vm_inherit_t)); +int uvm_map_advice __P((vm_map_t, vaddr_t, vaddr_t, int)); void uvm_map_init __P((void)); -boolean_t uvm_map_lookup_entry __P((struct vm_map *, vaddr_t, - struct vm_map_entry **)); +boolean_t uvm_map_lookup_entry __P((vm_map_t, vaddr_t, vm_map_entry_t *)); MAP_INLINE -void uvm_map_reference __P((struct vm_map *)); -int uvm_map_replace __P((struct vm_map *, vaddr_t, vaddr_t, - struct vm_map_entry *, int)); -int uvm_map_reserve __P((struct vm_map *, vsize_t, vaddr_t, vsize_t, - vaddr_t *)); -void uvm_map_setup __P((struct vm_map *, vaddr_t, vaddr_t, int)); -int uvm_map_submap __P((struct vm_map *, vaddr_t, vaddr_t, - struct vm_map *)); +void uvm_map_reference __P((vm_map_t)); +int uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t, + vm_map_entry_t, int)); +int uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t, vsize_t, + vaddr_t *)); +void uvm_map_setup __P((vm_map_t, vaddr_t, vaddr_t, int)); +int uvm_map_submap __P((vm_map_t, vaddr_t, vaddr_t, vm_map_t)); MAP_INLINE -void uvm_unmap __P((struct vm_map *, vaddr_t, vaddr_t)); -void uvm_unmap_detach __P((struct vm_map_entry *,int)); -void uvm_unmap_remove __P((struct vm_map *, vaddr_t, vaddr_t, - struct vm_map_entry **)); +int uvm_unmap __P((vm_map_t, vaddr_t, vaddr_t)); +void uvm_unmap_detach __P((vm_map_entry_t,int)); +int uvm_unmap_remove __P((vm_map_t, vaddr_t, vaddr_t, + vm_map_entry_t *)); #endif /* _KERNEL */ @@ -336,13 +384,13 @@ void uvm_unmap_remove __P((struct vm_map *, vaddr_t, vaddr_t, #include <sys/proc.h> /* for tsleep(), wakeup() */ #include <sys/systm.h> /* for panic() */ -static __inline boolean_t vm_map_lock_try __P((struct vm_map *)); -static __inline void vm_map_lock __P((struct vm_map *)); +static __inline boolean_t vm_map_lock_try __P((vm_map_t)); +static __inline void vm_map_lock __P((vm_map_t)); extern const char vmmapbsy[]; static __inline boolean_t vm_map_lock_try(map) - struct vm_map *map; + vm_map_t map; { boolean_t rv; @@ -366,7 +414,7 @@ vm_map_lock_try(map) static __inline void vm_map_lock(map) - struct vm_map *map; + vm_map_t map; { int error; @@ -379,7 +427,7 @@ vm_map_lock(map) simple_lock(&map->flags_lock); while (map->flags & VM_MAP_BUSY) { map->flags |= VM_MAP_WANTLOCK; - ltsleep(&map->flags, PVM, vmmapbsy, 0, &map->flags_lock); + ltsleep(&map->flags, PVM, (char *)vmmapbsy, 0, &map->flags_lock); } error = lockmgr(&map->lock, LK_EXCLUSIVE|LK_SLEEPFAIL|LK_INTERLOCK, diff --git a/sys/uvm/uvm_map_i.h b/sys/uvm/uvm_map_i.h index 069cbd5f125..54625e7fb4e 100644 --- a/sys/uvm/uvm_map_i.h +++ b/sys/uvm/uvm_map_i.h @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_map_i.h,v 1.11 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_map_i.h,v 1.22 2001/06/26 17:55:15 thorpej Exp $ */ +/* $OpenBSD: uvm_map_i.h,v 1.12 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_map_i.h,v 1.18 2000/11/27 08:40:04 chs Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -84,15 +84,17 @@ * uvm_map_create: create map */ -MAP_INLINE struct vm_map * +MAP_INLINE vm_map_t uvm_map_create(pmap, min, max, flags) pmap_t pmap; vaddr_t min, max; int flags; { - struct vm_map *result; + vm_map_t result; - MALLOC(result, struct vm_map *, sizeof(struct vm_map), + MALLOC(result, vm_map_t, + (flags & VM_MAP_INTRSAFE) ? sizeof(struct vm_map_intrsafe) : + sizeof(struct vm_map), M_VMMAP, M_WAITOK); uvm_map_setup(result, min, max, flags); result->pmap = pmap; @@ -107,7 +109,7 @@ uvm_map_create(pmap, min, max, flags) MAP_INLINE void uvm_map_setup(map, min, max, flags) - struct vm_map *map; + vm_map_t map; vaddr_t min, max; int flags; { @@ -126,6 +128,23 @@ uvm_map_setup(map, min, max, flags) simple_lock_init(&map->ref_lock); simple_lock_init(&map->hint_lock); simple_lock_init(&map->flags_lock); + + /* + * If the map is interrupt safe, place it on the list + * of interrupt safe maps, for uvm_fault(). + * + * We almost never set up an interrupt-safe map, but we set + * up quite a few regular ones (at every fork!), so put + * interrupt-safe map setup in the slow path. + */ + if (__predict_false(flags & VM_MAP_INTRSAFE)) { + struct vm_map_intrsafe *vmi = (struct vm_map_intrsafe *)map; + int s; + + s = vmi_list_lock(); + LIST_INSERT_HEAD(&vmi_list, vmi, vmi_list); + vmi_list_unlock(s); + } } @@ -136,16 +155,17 @@ uvm_map_setup(map, min, max, flags) /* * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop") * - * => caller must check alignment and size + * => caller must check alignment and size * => map must be unlocked (we will lock it) */ -MAP_INLINE void +MAP_INLINE int uvm_unmap(map, start, end) - struct vm_map *map; + vm_map_t map; vaddr_t start,end; { - struct vm_map_entry *dead_entries; + int result; + vm_map_entry_t dead_entries; UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", @@ -155,13 +175,14 @@ uvm_unmap(map, start, end) * detach from the dead entries... */ vm_map_lock(map); - uvm_unmap_remove(map, start, end, &dead_entries); + result = uvm_unmap_remove(map, start, end, &dead_entries); vm_map_unlock(map); if (dead_entries != NULL) uvm_unmap_detach(dead_entries, 0); UVMHIST_LOG(maphist, "<- done", 0,0,0,0); + return(result); } @@ -173,10 +194,10 @@ uvm_unmap(map, start, end) MAP_INLINE void uvm_map_reference(map) - struct vm_map *map; + vm_map_t map; { simple_lock(&map->ref_lock); - map->ref_count++; + map->ref_count++; simple_unlock(&map->ref_lock); } @@ -189,7 +210,7 @@ uvm_map_reference(map) MAP_INLINE void uvm_map_deallocate(map) - struct vm_map *map; + vm_map_t map; { int c; diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 8ff16e98351..c5a49768f7a 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,11 +1,11 @@ -/* $OpenBSD: uvm_mmap.c,v 1.32 2001/12/10 02:19:34 art Exp $ */ -/* $NetBSD: uvm_mmap.c,v 1.55 2001/08/17 05:52:46 chs Exp $ */ +/* $OpenBSD: uvm_mmap.c,v 1.33 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993 The Regents of the University of California. + * Copyright (c) 1991, 1993 The Regents of the University of California. * Copyright (c) 1988 University of Utah. - * + * * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -23,7 +23,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Charles D. Cranor, - * Washington University, University of California, Berkeley and + * Washington University, University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -69,6 +69,7 @@ #include <uvm/uvm.h> #include <uvm/uvm_device.h> +#include <uvm/uvm_vnode.h> /* @@ -131,14 +132,14 @@ sys_mincore(p, v, retval) syscallarg(size_t) len; syscallarg(char *) vec; } */ *uap = v; - struct vm_page *m; + vm_page_t m; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; - struct vm_map_entry *entry; + vm_map_entry_t entry; vaddr_t start, end, lim; - struct vm_map *map; + vm_map_t map; vsize_t len; int error = 0, npgs; @@ -164,8 +165,8 @@ sys_mincore(p, v, retval) * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ - uvm_vslock(p, vec, npgs, VM_PROT_WRITE); + vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { @@ -193,7 +194,6 @@ sys_mincore(p, v, retval) * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ - if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (entry->object.uvm_obj->pgops->pgo_releasepg @@ -221,31 +221,30 @@ sys_mincore(p, v, retval) start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->u.an_page != NULL) { - /* * Anon has the page for this entry * offset. */ - pgi = 1; } } + if (uobj != NULL && pgi == 0) { /* Check the bottom layer. */ m = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (m != NULL) { - /* * Object has the page for this entry * offset. */ - pgi = 1; } } + (void) subyte(vec, pgi); } + if (uobj != NULL) simple_unlock(&uobj->vmobjlock); if (amap != NULL) @@ -292,15 +291,15 @@ sys_mmap(p, v, retval) struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; - void *handle; + caddr_t handle; int error; /* * first, extract syscall args from the uap. */ - addr = (vaddr_t)SCARG(uap, addr); - size = (vsize_t)SCARG(uap, len); + addr = (vaddr_t) SCARG(uap, addr); + size = (vsize_t) SCARG(uap, len); prot = SCARG(uap, prot) & VM_PROT_ALL; flags = SCARG(uap, flags); fd = SCARG(uap, fd); @@ -322,12 +321,12 @@ sys_mmap(p, v, retval) pageoff = (pos & PAGE_MASK); pos -= pageoff; size += pageoff; /* add offset */ - size = (vsize_t)round_page(size); /* round up */ + size = (vsize_t) round_page(size); /* round up */ if ((ssize_t) size < 0) return (EINVAL); /* don't allow wrap */ /* - * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" + * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" */ if (flags & MAP_FIXED) { @@ -352,8 +351,10 @@ sys_mmap(p, v, retval) * we will refine our guess later (e.g. to account for VAC, etc) */ - addr = MAX(addr, round_page((vaddr_t)p->p_vmspace->vm_daddr + - MAXDSIZ)); + if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr + + MAXDSIZ)) + addr = round_page((vaddr_t)p->p_vmspace->vm_daddr + + MAXDSIZ); } /* @@ -401,7 +402,7 @@ sys_mmap(p, v, retval) flags |= MAP_PRIVATE; /* for a file */ } - /* + /* * MAP_PRIVATE device mappings don't make sense (and aren't * supported anyway). However, some programs rely on this, * so just change it to MAP_SHARED. @@ -445,7 +446,12 @@ sys_mmap(p, v, retval) /* MAP_PRIVATE mappings can always write to */ maxprot |= VM_PROT_WRITE; } - handle = vp; + + /* + * set handle to vnode + */ + + handle = (caddr_t)vp; } else { /* MAP_ANON case */ /* @@ -470,8 +476,7 @@ sys_mmap(p, v, retval) if ((flags & MAP_ANON) != 0 || ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) { if (size > - (p->p_rlimit[RLIMIT_DATA].rlim_cur - - ctob(p->p_vmspace->vm_dsize))) { + (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dsize))) { return (ENOMEM); } } @@ -507,8 +512,8 @@ sys_msync(p, v, retval) } */ *uap = v; vaddr_t addr; vsize_t size, pageoff; - struct vm_map *map; - int error, rv, flags, uvmflags; + vm_map_t map; + int rv, flags, uvmflags; /* * extract syscall args from the uap @@ -527,13 +532,13 @@ sys_msync(p, v, retval) flags |= MS_SYNC; /* - * align the address to a page boundary and adjust the size accordingly. + * align the address to a page boundary, and adjust the size accordingly */ pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); + size = (vsize_t) round_page(size); /* disallow wrap-around. */ if (addr + size < addr) @@ -555,10 +560,9 @@ sys_msync(p, v, retval) * This can be incorrect if the region splits or is coalesced * with a neighbor. */ - if (size == 0) { - struct vm_map_entry *entry; - + vm_map_entry_t entry; + vm_map_lock_read(map); rv = uvm_map_lookup_entry(map, addr, &entry); if (rv == TRUE) { @@ -573,7 +577,6 @@ sys_msync(p, v, retval) /* * translate MS_ flags into PGO_ flags */ - uvmflags = PGO_CLEANIT; if (flags & MS_INVALIDATE) uvmflags |= PGO_FREE; @@ -582,8 +585,15 @@ sys_msync(p, v, retval) else uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */ - error = uvm_map_clean(map, addr, addr+size, uvmflags); - return error; + /* + * doit! + */ + rv = uvm_map_clean(map, addr, addr+size, uvmflags); + + /* + * and return... + */ + return (rv); } /* @@ -602,25 +612,25 @@ sys_munmap(p, v, retval) } */ *uap = v; vaddr_t addr; vsize_t size, pageoff; - struct vm_map *map; + vm_map_t map; vaddr_t vm_min_address = VM_MIN_ADDRESS; struct vm_map_entry *dead_entries; /* - * get syscall args. + * get syscall args... */ - addr = (vaddr_t)SCARG(uap, addr); - size = (vsize_t)SCARG(uap, len); - + addr = (vaddr_t) SCARG(uap, addr); + size = (vsize_t) SCARG(uap, len); + /* - * align the address to a page boundary and adjust the size accordingly. + * align the address to a page boundary, and adjust the size accordingly */ pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); + size = (vsize_t) round_page(size); if ((int)size < 0) return (EINVAL); @@ -639,20 +649,29 @@ sys_munmap(p, v, retval) return (EINVAL); map = &p->p_vmspace->vm_map; + + vm_map_lock(map); /* lock map so we can checkprot */ + /* - * interesting system call semantic: make sure entire range is + * interesting system call semantic: make sure entire range is * allocated before allowing an unmap. */ - vm_map_lock(map); if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { vm_map_unlock(map); return (EINVAL); } - uvm_unmap_remove(map, addr, addr + size, &dead_entries); - vm_map_unlock(map); + + /* + * doit! + */ + (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries); + + vm_map_unlock(map); /* and unlock */ + if (dead_entries != NULL) uvm_unmap_detach(dead_entries, 0); + return (0); } @@ -674,7 +693,7 @@ sys_mprotect(p, v, retval) vaddr_t addr; vsize_t size, pageoff; vm_prot_t prot; - int error; + int rv; /* * extract syscall args from uap @@ -685,19 +704,27 @@ sys_mprotect(p, v, retval) prot = SCARG(uap, prot) & VM_PROT_ALL; /* - * align the address to a page boundary and adjust the size accordingly. + * align the address to a page boundary, and adjust the size accordingly */ - pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); - + size = (vsize_t) round_page(size); if ((int)size < 0) return (EINVAL); - error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, - FALSE); - return error; + + /* + * doit + */ + + rv = uvm_map_protect(&p->p_vmspace->vm_map, + addr, addr+size, prot, FALSE); + + if (rv == KERN_SUCCESS) + return (0); + if (rv == KERN_PROTECTION_FAILURE) + return (EACCES); + return (EINVAL); } /* @@ -718,26 +745,30 @@ sys_minherit(p, v, retval) vaddr_t addr; vsize_t size, pageoff; vm_inherit_t inherit; - int error; - + addr = (vaddr_t)SCARG(uap, addr); size = (vsize_t)SCARG(uap, len); inherit = SCARG(uap, inherit); - /* - * align the address to a page boundary and adjust the size accordingly. + * align the address to a page boundary, and adjust the size accordingly */ pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); + size = (vsize_t) round_page(size); if ((int)size < 0) return (EINVAL); - error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, - inherit); - return error; + + switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, + inherit)) { + case KERN_SUCCESS: + return (0); + case KERN_PROTECTION_FAILURE: + return (EACCES); + } + return (EINVAL); } /* @@ -758,8 +789,8 @@ sys_madvise(p, v, retval) } */ *uap = v; vaddr_t addr; vsize_t size, pageoff; - int advice, error; - + int advice, rv;; + addr = (vaddr_t)SCARG(uap, addr); size = (vsize_t)SCARG(uap, len); advice = SCARG(uap, behav); @@ -767,11 +798,10 @@ sys_madvise(p, v, retval) /* * align the address to a page boundary, and adjust the size accordingly */ - pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); + size = (vsize_t) round_page(size); if ((ssize_t)size <= 0) return (EINVAL); @@ -780,12 +810,11 @@ sys_madvise(p, v, retval) case MADV_NORMAL: case MADV_RANDOM: case MADV_SEQUENTIAL: - error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, + rv = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, advice); break; case MADV_WILLNEED: - /* * Activate all these pages, pre-faulting them in if * necessary. @@ -795,35 +824,29 @@ sys_madvise(p, v, retval) * Should invent a "weak" mode for uvm_fault() * which would only do the PGO_LOCKED pgo_get(). */ - return (0); case MADV_DONTNEED: - /* * Deactivate all these pages. We don't need them * any more. We don't, however, toss the data in * the pages. */ - - error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, + rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, PGO_DEACTIVATE); break; case MADV_FREE: - /* * These pages contain no valid data, and may be * garbage-collected. Toss all resources, including * any swap space in use. */ - - error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, + rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, PGO_FREE); break; case MADV_SPACEAVAIL: - /* * XXXMRG What is this? I think it's: * @@ -834,14 +857,13 @@ sys_madvise(p, v, retval) * as it will free swap space allocated to pages in core. * There's also what to do for device/file/anonymous memory. */ - return (EINVAL); default: return (EINVAL); } - return error; + return (rv); } /* @@ -865,21 +887,19 @@ sys_mlock(p, v, retval) /* * extract syscall args from uap */ - addr = (vaddr_t)SCARG(uap, addr); size = (vsize_t)SCARG(uap, len); /* * align the address to a page boundary and adjust the size accordingly */ - pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); - + size = (vsize_t) round_page(size); + /* disallow wrap-around. */ - if (addr + size < addr) + if (addr + (int)size < addr) return (EINVAL); if (atop(size) + uvmexp.wired > uvmexp.wiredmax) @@ -896,7 +916,7 @@ sys_mlock(p, v, retval) error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE, 0); - return error; + return (error == KERN_SUCCESS ? 0 : ENOMEM); } /* @@ -927,14 +947,13 @@ sys_munlock(p, v, retval) /* * align the address to a page boundary, and adjust the size accordingly */ - pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; - size = (vsize_t)round_page(size); + size = (vsize_t) round_page(size); /* disallow wrap-around. */ - if (addr + size < addr) + if (addr + (int)size < addr) return (EINVAL); #ifndef pmap_wired_count @@ -944,7 +963,7 @@ sys_munlock(p, v, retval) error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE, 0); - return error; + return (error == KERN_SUCCESS ? 0 : ENOMEM); } /* @@ -975,6 +994,23 @@ sys_mlockall(p, v, retval) error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); + switch (error) { + case KERN_SUCCESS: + error = 0; + break; + + case KERN_NO_SPACE: /* XXX overloaded */ + error = ENOMEM; + break; + + default: + /* + * "Some or all of the memory could not be locked when + * the call was made." + */ + error = EAGAIN; + } + return (error); } @@ -1004,18 +1040,18 @@ sys_munlockall(p, v, retval) int uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) - struct vm_map *map; + vm_map_t map; vaddr_t *addr; vsize_t size; vm_prot_t prot, maxprot; int flags; - void *handle; + caddr_t handle; /* XXX: VNODE? */ voff_t foff; vsize_t locklimit; { struct uvm_object *uobj; struct vnode *vp; - int error; + int retval; int advice = UVM_ADV_NORMAL; uvm_flag_t uvmflag = 0; @@ -1038,6 +1074,7 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) if ((flags & MAP_FIXED) == 0) { *addr = round_page(*addr); /* round */ } else { + if (*addr & PAGE_MASK) return(EINVAL); uvmflag |= UVM_FLAG_FIXED; @@ -1060,18 +1097,46 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) uvmflag |= UVM_FLAG_OVERLAY; } else { - vp = (struct vnode *)handle; - if (vp->v_type != VCHR) { - error = VOP_MMAP(vp, 0, curproc->p_ucred, curproc); - if (error) { - return error; - } - uobj = uvn_attach((void *)vp, (flags & MAP_SHARED) ? + vp = (struct vnode *) handle; /* get vnode */ + if (vp->v_type != VCHR) { + uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ? maxprot : (maxprot & ~VM_PROT_WRITE)); +#ifndef UBC + /* + * XXXCDC: hack from old code + * don't allow vnodes which have been mapped + * shared-writeable to persist [forces them to be + * flushed out when last reference goes]. + * XXXCDC: interesting side effect: avoids a bug. + * note that in WRITE [ufs_readwrite.c] that we + * allocate buffer, uncache, and then do the write. + * the problem with this is that if the uncache causes + * VM data to be flushed to the same area of the file + * we are writing to... in that case we've got the + * buffer locked and our process goes to sleep forever. + * + * XXXCDC: checking maxprot protects us from the + * "persistbug" program but this is not a long term + * solution. + * + * XXXCDC: we don't bother calling uncache with the vp + * VOP_LOCKed since we know that we are already + * holding a valid reference to the uvn (from the + * uvn_attach above), and thus it is impossible for + * the uncache to kill the uvn and trigger I/O. + */ + if (flags & MAP_SHARED) { + if ((prot & VM_PROT_WRITE) || + (maxprot & VM_PROT_WRITE)) { + uvm_vnp_uncache(vp); + } + } +#else /* XXX for now, attach doesn't gain a ref */ VREF(vp); +#endif } else { uobj = udv_attach((void *) &vp->v_rdev, (flags & MAP_SHARED) ? maxprot : @@ -1083,67 +1148,88 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) */ if (uobj == NULL && (prot & PROT_EXEC) == 0) { maxprot &= ~VM_PROT_EXECUTE; - uobj = udv_attach((void *)&vp->v_rdev, + uobj = udv_attach((void *) &vp->v_rdev, (flags & MAP_SHARED) ? maxprot : (maxprot & ~VM_PROT_WRITE), foff, size); } advice = UVM_ADV_RANDOM; } + if (uobj == NULL) return((vp->v_type == VREG) ? ENOMEM : EINVAL); + if ((flags & MAP_SHARED) == 0) uvmflag |= UVM_FLAG_COPYONW; } - uvmflag = UVM_MAPFLAG(prot, maxprot, + /* + * set up mapping flags + */ + + uvmflag = UVM_MAPFLAG(prot, maxprot, (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, uvmflag); - error = uvm_map(map, addr, size, uobj, foff, 0, uvmflag); - if (error) { - if (uobj) - uobj->pgops->pgo_detach(uobj); - return error; - } /* - * POSIX 1003.1b -- if our address space was configured - * to lock all future mappings, wire the one we just made. + * do it! */ - if (prot == VM_PROT_NONE) { + retval = uvm_map(map, addr, size, uobj, foff, 0, uvmflag); + if (retval == KERN_SUCCESS) { /* - * No more work to do in this case. + * POSIX 1003.1b -- if our address space was configured + * to lock all future mappings, wire the one we just made. */ + if (prot == VM_PROT_NONE) { + /* + * No more work to do in this case. + */ + return (0); + } + + vm_map_lock(map); - return (0); - } - vm_map_lock(map); - if (map->flags & VM_MAP_WIREFUTURE) { - if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax + if (map->flags & VM_MAP_WIREFUTURE) { + if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax #ifdef pmap_wired_count - || (locklimit != 0 && (size + - ptoa(pmap_wired_count(vm_map_pmap(map)))) > - locklimit) + || (locklimit != 0 && (size + + ptoa(pmap_wired_count(vm_map_pmap(map)))) > + locklimit) #endif - ) { - vm_map_unlock(map); - uvm_unmap(map, *addr, *addr + size); - return ENOMEM; + ) { + retval = KERN_RESOURCE_SHORTAGE; + vm_map_unlock(map); + /* unmap the region! */ + (void) uvm_unmap(map, *addr, *addr + size); + goto bad; + } + /* + * uvm_map_pageable() always returns the map + * unlocked. + */ + retval = uvm_map_pageable(map, *addr, *addr + size, + FALSE, UVM_LK_ENTER); + if (retval != KERN_SUCCESS) { + /* unmap the region! */ + (void) uvm_unmap(map, *addr, *addr + size); + goto bad; + } + return (0); } - /* - * uvm_map_pageable() always returns the map unlocked. - */ + vm_map_unlock(map); - error = uvm_map_pageable(map, *addr, *addr + size, - FALSE, UVM_LK_ENTER); - if (error) { - uvm_unmap(map, *addr, *addr + size); - return error; - } return (0); } - vm_map_unlock(map); - return 0; + + /* + * errors: first detach from the uobj, if any. + */ + + if (uobj) + uobj->pgops->pgo_detach(uobj); + + bad: + return (retval); } diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h index 239152fb5fe..b1b1daa9490 100644 --- a/sys/uvm/uvm_object.h +++ b/sys/uvm/uvm_object.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_object.h,v 1.7 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_object.h,v 1.12 2001/05/26 16:32:47 chs Exp $ */ +/* $OpenBSD: uvm_object.h,v 1.8 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_object.h,v 1.11 2001/03/09 01:02:12 chs Exp $ */ /* * @@ -47,7 +47,7 @@ */ struct uvm_object { - struct simplelock vmobjlock; /* lock on memq */ + simple_lock_data_t vmobjlock; /* lock on memq */ struct uvm_pagerops *pgops; /* pager ops */ struct pglist memq; /* pages in this object */ int uo_npages; /* # of pages in memq */ diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index ed2a8c6f601..edfb5b1ca31 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_page.c,v 1.38 2001/12/06 12:43:20 art Exp $ */ -/* $NetBSD: uvm_page.c,v 1.66 2001/09/10 21:19:43 chris Exp $ */ +/* $OpenBSD: uvm_page.c,v 1.39 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_page.c,v 1.51 2001/03/09 01:02:12 chs Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -126,16 +126,6 @@ static vaddr_t virtual_space_end; static struct pglist uvm_bootbucket; /* - * we allocate an initial number of page colors in uvm_page_init(), - * and remember them. We may re-color pages as cache sizes are - * discovered during the autoconfiguration phase. But we can never - * free the initial set of buckets, since they are allocated using - * uvm_pageboot_alloc(). - */ - -static boolean_t have_recolored_pages /* = FALSE */; - -/* * local prototypes */ @@ -197,14 +187,10 @@ uvm_pageremove(pg) simple_unlock(&uvm.hashlock); splx(s); - if (UVM_OBJ_IS_VTEXT(pg->uobject) || UVM_OBJ_IS_VNODE(pg->uobject)) { - if (UVM_OBJ_IS_VNODE(pg->uobject)) - uvmexp.vnodepages--; - else - uvmexp.vtextpages--; - s = splbio(); - vholdrele((struct vnode *)pg->uobject); - splx(s); + if (UVM_OBJ_IS_VTEXT(pg->uobject)) { + uvmexp.vtextpages--; + } else if (UVM_OBJ_IS_VNODE(pg->uobject)) { + uvmexp.vnodepages--; } /* object should be locked */ @@ -216,22 +202,9 @@ uvm_pageremove(pg) pg->version++; } -static void -uvm_page_init_buckets(struct pgfreelist *pgfl) -{ - int color, i; - - for (color = 0; color < uvmexp.ncolors; color++) { - for (i = 0; i < PGFL_NQUEUES; i++) { - TAILQ_INIT(&pgfl->pgfl_buckets[ - color].pgfl_queues[i]); - } - } -} - /* * uvm_page_init: init the page system. called from uvm_init(). - * + * * => we return the range of kernel virtual memory in kvm_startp/kvm_endp */ @@ -239,20 +212,22 @@ void uvm_page_init(kvm_startp, kvm_endp) vaddr_t *kvm_startp, *kvm_endp; { - vsize_t freepages, pagecount, bucketcount, n; - struct pgflbucket *bucketarray; - struct vm_page *pagearray; - int lcv, i; + vsize_t freepages, pagecount, n; + vm_page_t pagearray; + int lcv, i; paddr_t paddr; /* - * init the page queues and page queue locks, except the free - * list; we allocate that later (with the initial vm_page - * structures). + * init the page queues and page queue locks */ + for (lcv = 0; lcv < VM_NFREELIST; lcv++) { + for (i = 0; i < PGFL_NQUEUES; i++) + TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]); + } TAILQ_INIT(&uvm.page_active); - TAILQ_INIT(&uvm.page_inactive); + TAILQ_INIT(&uvm.page_inactive_swp); + TAILQ_INIT(&uvm.page_inactive_obj); simple_lock_init(&uvm.pageqlock); simple_lock_init(&uvm.fpageqlock); @@ -268,7 +243,7 @@ uvm_page_init(kvm_startp, kvm_endp) TAILQ_INIT(uvm.page_hash); /* init hash table */ simple_lock_init(&uvm.hashlock); /* init hash table lock */ - /* + /* * allocate vm_page structures. */ @@ -281,28 +256,20 @@ uvm_page_init(kvm_startp, kvm_endp) if (vm_nphysseg == 0) panic("uvm_page_bootstrap: no memory pre-allocated"); - + /* - * first calculate the number of free pages... + * first calculate the number of free pages... * * note that we use start/end rather than avail_start/avail_end. * this allows us to allocate extra vm_page structures in case we * want to return some memory to the pool after booting. */ - + freepages = 0; for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start); /* - * Let MD code initialize the number of colors, or default - * to 1 color if MD code doesn't care. - */ - if (uvmexp.ncolors == 0) - uvmexp.ncolors = 1; - uvmexp.colormask = uvmexp.ncolors - 1; - - /* * we now know we have (PAGE_SIZE * freepages) bytes of memory we can * use. for each page of memory we use we need a vm_page structure. * thus, the total number of pages we can use is the total size of @@ -311,24 +278,13 @@ uvm_page_init(kvm_startp, kvm_endp) * truncation errors (since we can only allocate in terms of whole * pages). */ - - bucketcount = uvmexp.ncolors * VM_NFREELIST; + pagecount = ((freepages + 1) << PAGE_SHIFT) / (PAGE_SIZE + sizeof(struct vm_page)); - - bucketarray = (void *) uvm_pageboot_alloc((bucketcount * - sizeof(struct pgflbucket)) + (pagecount * - sizeof(struct vm_page))); - pagearray = (struct vm_page *)(bucketarray + bucketcount); - - for (lcv = 0; lcv < VM_NFREELIST; lcv++) { - uvm.page_free[lcv].pgfl_buckets = - (bucketarray + (lcv * uvmexp.ncolors)); - uvm_page_init_buckets(&uvm.page_free[lcv]); - } - + pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount * + sizeof(struct vm_page)); memset(pagearray, 0, pagecount * sizeof(struct vm_page)); - + /* * init the vm_page structures and put them in the correct place. */ @@ -352,9 +308,6 @@ uvm_page_init(kvm_startp, kvm_endp) paddr = ptoa(vm_physmem[lcv].start); for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { vm_physmem[lcv].pgs[i].phys_addr = paddr; -#ifdef __HAVE_VM_PAGE_MD - VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]); -#endif if (atop(paddr) >= vm_physmem[lcv].avail_start && atop(paddr) <= vm_physmem[lcv].avail_end) { uvmexp.npages++; @@ -408,9 +361,9 @@ uvm_page_init(kvm_startp, kvm_endp) /* * uvm_setpagesize: set the page size - * + * * => sets page_shift and page_mask from uvmexp.pagesize. - */ + */ void uvm_setpagesize() @@ -433,12 +386,28 @@ vaddr_t uvm_pageboot_alloc(size) vsize_t size; { - static boolean_t initialized = FALSE; +#if defined(PMAP_STEAL_MEMORY) vaddr_t addr; -#if !defined(PMAP_STEAL_MEMORY) - vaddr_t vaddr; + + /* + * defer bootstrap allocation to MD code (it may want to allocate + * from a direct-mapped segment). pmap_steal_memory should round + * off virtual_space_start/virtual_space_end. + */ + + addr = pmap_steal_memory(size, &virtual_space_start, + &virtual_space_end); + + return(addr); + +#else /* !PMAP_STEAL_MEMORY */ + + static boolean_t initialized = FALSE; + vaddr_t addr, vaddr; paddr_t paddr; -#endif + + /* round to page size */ + size = round_page(size); /* * on first call to this function, initialize ourselves. @@ -453,24 +422,6 @@ uvm_pageboot_alloc(size) initialized = TRUE; } - /* round to page size */ - size = round_page(size); - -#if defined(PMAP_STEAL_MEMORY) - - /* - * defer bootstrap allocation to MD code (it may want to allocate - * from a direct-mapped segment). pmap_steal_memory should adjust - * virtual_space_start/virtual_space_end if necessary. - */ - - addr = pmap_steal_memory(size, &virtual_space_start, - &virtual_space_end); - - return(addr); - -#else /* !PMAP_STEAL_MEMORY */ - /* * allocate virtual memory for this request */ @@ -510,7 +461,6 @@ uvm_pageboot_alloc(size) */ pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE); } - pmap_update(pmap_kernel()); return(addr); #endif /* PMAP_STEAL_MEMORY */ } @@ -881,76 +831,6 @@ uvm_page_rehash() return; } -/* - * uvm_page_recolor: Recolor the pages if the new bucket count is - * larger than the old one. - */ - -void -uvm_page_recolor(int newncolors) -{ - struct pgflbucket *bucketarray, *oldbucketarray; - struct pgfreelist pgfl; - struct vm_page *pg; - vsize_t bucketcount; - int s, lcv, color, i, ocolors; - - if (newncolors <= uvmexp.ncolors) - return; - - bucketcount = newncolors * VM_NFREELIST; - bucketarray = malloc(bucketcount * sizeof(struct pgflbucket), - M_VMPAGE, M_NOWAIT); - if (bucketarray == NULL) { - printf("WARNING: unable to allocate %ld page color buckets\n", - (long) bucketcount); - return; - } - - s = uvm_lock_fpageq(); - - /* Make sure we should still do this. */ - if (newncolors <= uvmexp.ncolors) { - uvm_unlock_fpageq(s); - free(bucketarray, M_VMPAGE); - return; - } - - oldbucketarray = uvm.page_free[0].pgfl_buckets; - ocolors = uvmexp.ncolors; - - uvmexp.ncolors = newncolors; - uvmexp.colormask = uvmexp.ncolors - 1; - - for (lcv = 0; lcv < VM_NFREELIST; lcv++) { - pgfl.pgfl_buckets = (bucketarray + (lcv * newncolors)); - uvm_page_init_buckets(&pgfl); - for (color = 0; color < ocolors; color++) { - for (i = 0; i < PGFL_NQUEUES; i++) { - while ((pg = TAILQ_FIRST(&uvm.page_free[ - lcv].pgfl_buckets[color].pgfl_queues[i])) - != NULL) { - TAILQ_REMOVE(&uvm.page_free[ - lcv].pgfl_buckets[ - color].pgfl_queues[i], pg, pageq); - TAILQ_INSERT_TAIL(&pgfl.pgfl_buckets[ - VM_PGCOLOR_BUCKET(pg)].pgfl_queues[ - i], pg, pageq); - } - } - } - uvm.page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets; - } - - if (have_recolored_pages) { - uvm_unlock_fpageq(s); - free(oldbucketarray, M_VMPAGE); - return; - } - - have_recolored_pages = TRUE; - uvm_unlock_fpageq(s); -} #if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */ @@ -982,49 +862,6 @@ uvm_page_physdump() #endif /* - * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat - */ - -static __inline struct vm_page * -uvm_pagealloc_pgfl(struct pgfreelist *pgfl, int try1, int try2, - unsigned int *trycolorp) -{ - struct pglist *freeq; - struct vm_page *pg; - int color, trycolor = *trycolorp; - - color = trycolor; - do { - if ((pg = TAILQ_FIRST((freeq = - &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) - goto gotit; - if ((pg = TAILQ_FIRST((freeq = - &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) - goto gotit; - color = (color + 1) & uvmexp.colormask; - } while (color != trycolor); - - return (NULL); - - gotit: - TAILQ_REMOVE(freeq, pg, pageq); - uvmexp.free--; - - /* update zero'd page count */ - if (pg->flags & PG_ZERO) - uvmexp.zeropages--; - - if (color == trycolor) - uvmexp.colorhit++; - else { - uvmexp.colormiss++; - *trycolorp = color; - } - - return (pg); -} - -/* * uvm_pagealloc_strat: allocate vm_page from a particular free list. * * => return null if no pages free @@ -1050,8 +887,10 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) struct vm_anon *anon; int strat, free_list; { - int lcv, try1, try2, s, zeroit = 0, color; + int lcv, try1, try2, s, zeroit = 0; struct vm_page *pg; + struct pglist *freeq; + struct pgfreelist *pgfl; boolean_t use_reserve; KASSERT(obj == NULL || anon == NULL); @@ -1063,20 +902,21 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) s = uvm_lock_fpageq(); /* - * This implements a global round-robin page coloring - * algorithm. - * - * XXXJRT: Should we make the `nextcolor' per-cpu? - * XXXJRT: What about virtually-indexed caches? - */ - color = uvm.page_free_nextcolor; - - /* * check to see if we need to generate some free pages waking * the pagedaemon. */ - UVM_KICK_PDAEMON(); +#ifdef UBC + if (uvmexp.free + uvmexp.paging < uvmexp.freemin || + (uvmexp.free + uvmexp.paging < uvmexp.freetarg && + uvmexp.inactive < uvmexp.inactarg)) { + wakeup(&uvm.pagedaemon); + } +#else + if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg && + uvmexp.inactive < uvmexp.inactarg)) + wakeup(&uvm.pagedaemon); +#endif /* * fail if any of these conditions is true: @@ -1116,9 +956,11 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) case UVM_PGA_STRAT_NORMAL: /* Check all freelists in descending priority order. */ for (lcv = 0; lcv < VM_NFREELIST; lcv++) { - pg = uvm_pagealloc_pgfl(&uvm.page_free[lcv], - try1, try2, &color); - if (pg != NULL) + pgfl = &uvm.page_free[lcv]; + if ((pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try1]))) != NULL || + (pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try2]))) != NULL) goto gotit; } @@ -1129,9 +971,11 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) case UVM_PGA_STRAT_FALLBACK: /* Attempt to allocate from the specified free list. */ KASSERT(free_list >= 0 && free_list < VM_NFREELIST); - pg = uvm_pagealloc_pgfl(&uvm.page_free[free_list], - try1, try2, &color); - if (pg != NULL) + pgfl = &uvm.page_free[free_list]; + if ((pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try1]))) != NULL || + (pg = TAILQ_FIRST((freeq = + &pgfl->pgfl_queues[try2]))) != NULL) goto gotit; /* Fall back, if possible. */ @@ -1149,11 +993,12 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) } gotit: - /* - * We now know which color we actually allocated from; set - * the next color accordingly. - */ - uvm.page_free_nextcolor = (color + 1) & uvmexp.colormask; + TAILQ_REMOVE(freeq, pg, pageq); + uvmexp.free--; + + /* update zero'd page count */ + if (pg->flags & PG_ZERO) + uvmexp.zeropages--; /* * update allocation statistics and remember if we have to @@ -1275,24 +1120,24 @@ uvm_pagefree(pg) * if the object page is on loan we are going to drop ownership. * it is possible that an anon will take over as owner for this * page later on. the anon will want a !PG_CLEAN page so that - * it knows it needs to allocate swap if it wants to page the - * page out. + * it knows it needs to allocate swap if it wants to page the + * page out. */ if (saved_loan_count) pg->flags &= ~PG_CLEAN; /* in case an anon takes over */ uvm_pageremove(pg); - + /* * if our page was on loan, then we just lost control over it * (in fact, if it was loaned to an anon, the anon may have * already taken over ownership of the page by now and thus - * changed the loan_count [e.g. in uvmfault_anonget()]) we just - * return (when the last loan is dropped, then the page can be + * changed the loan_count [e.g. in uvmfault_anonget()]) we just + * return (when the last loan is dropped, then the page can be * freed by whatever was holding the last loan). */ - if (saved_loan_count) + if (saved_loan_count) return; } else if (saved_loan_count && (pg->pqflags & PQ_ANON)) { @@ -1318,8 +1163,12 @@ uvm_pagefree(pg) TAILQ_REMOVE(&uvm.page_active, pg, pageq); pg->pqflags &= ~PQ_ACTIVE; uvmexp.active--; - } else if (pg->pqflags & PQ_INACTIVE) { - TAILQ_REMOVE(&uvm.page_inactive, pg, pageq); + } + if (pg->pqflags & PQ_INACTIVE) { + if (pg->pqflags & PQ_SWAPBACKED) + TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq); + else + TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq); pg->pqflags &= ~PQ_INACTIVE; uvmexp.inactive--; } @@ -1345,8 +1194,7 @@ uvm_pagefree(pg) s = uvm_lock_fpageq(); TAILQ_INSERT_TAIL(&uvm.page_free[ - uvm_page_lookup_freelist(pg)].pgfl_buckets[ - VM_PGCOLOR_BUCKET(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq); + uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq); pg->pqflags = PQ_FREE; #ifdef DEBUG pg->uobject = (void *)0xdeadbeef; @@ -1450,8 +1298,7 @@ uvm_page_own(pg, tag) /* * uvm_pageidlezero: zero free pages while the system is idle. * - * => try to complete one color bucket at a time, to reduce our impact - * on the CPU cache. + * => we do at least one iteration per call, if we are below the target. * => we loop until we either reach the target or whichqs indicates that * there is a process ready to run. */ @@ -1460,17 +1307,10 @@ uvm_pageidlezero() { struct vm_page *pg; struct pgfreelist *pgfl; - int free_list, s, firstbucket; - static int nextbucket; + int free_list, s; - s = uvm_lock_fpageq(); - - firstbucket = nextbucket; do { - if (whichqs != 0) { - uvm_unlock_fpageq(s); - return; - } + s = uvm_lock_fpageq(); if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) { uvm.page_idle_zero = FALSE; @@ -1480,52 +1320,54 @@ uvm_pageidlezero() for (free_list = 0; free_list < VM_NFREELIST; free_list++) { pgfl = &uvm.page_free[free_list]; - while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[ - nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) { - if (whichqs != 0) { - uvm_unlock_fpageq(s); - return; - } - - TAILQ_REMOVE(&pgfl->pgfl_buckets[ - nextbucket].pgfl_queues[PGFL_UNKNOWN], - pg, pageq); - uvmexp.free--; - uvm_unlock_fpageq(s); -#ifdef PMAP_PAGEIDLEZERO - if (PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg)) == - FALSE) { - /* - * The machine-dependent code detected - * some reason for us to abort zeroing - * pages, probably because there is a - * process now ready to run. - */ - s = uvm_lock_fpageq(); - TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[ - nextbucket].pgfl_queues[ - PGFL_UNKNOWN], pg, pageq); - uvmexp.free++; - uvmexp.zeroaborts++; - uvm_unlock_fpageq(s); - return; - } -#else - pmap_zero_page(VM_PAGE_TO_PHYS(pg)); -#endif /* PMAP_PAGEIDLEZERO */ - pg->flags |= PG_ZERO; - - s = uvm_lock_fpageq(); - TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[ - nextbucket].pgfl_queues[PGFL_ZEROS], - pg, pageq); - uvmexp.free++; - uvmexp.zeropages++; - } + if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[ + PGFL_UNKNOWN])) != NULL) + break; } - nextbucket = (nextbucket + 1) & uvmexp.colormask; - } while (nextbucket != firstbucket); + if (pg == NULL) { + /* + * No non-zero'd pages; don't bother trying again + * until we know we have non-zero'd pages free. + */ + uvm.page_idle_zero = FALSE; + uvm_unlock_fpageq(s); + return; + } - uvm_unlock_fpageq(s); + TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq); + uvmexp.free--; + uvm_unlock_fpageq(s); + +#ifdef PMAP_PAGEIDLEZERO + if (PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg)) == FALSE) { + /* + * The machine-dependent code detected some + * reason for us to abort zeroing pages, + * probably because there is a process now + * ready to run. + */ + s = uvm_lock_fpageq(); + TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN], + pg, pageq); + uvmexp.free++; + uvmexp.zeroaborts++; + uvm_unlock_fpageq(s); + return; + } +#else + /* + * XXX This will toast the cache unless the pmap_zero_page() + * XXX implementation does uncached access. + */ + pmap_zero_page(VM_PAGE_TO_PHYS(pg)); +#endif + pg->flags |= PG_ZERO; + + s = uvm_lock_fpageq(); + TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq); + uvmexp.free++; + uvmexp.zeropages++; + uvm_unlock_fpageq(s); + } while (whichqs == 0); } diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h index 45b26021f3e..d1f531cbff4 100644 --- a/sys/uvm/uvm_page.h +++ b/sys/uvm/uvm_page.h @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_page.h,v 1.16 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_page.h,v 1.30 2001/07/25 23:05:04 thorpej Exp $ */ +/* $OpenBSD: uvm_page.h,v 1.17 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_page.h,v 1.19 2000/12/28 08:24:55 chs Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -110,7 +110,7 @@ * fields were dumped and all the flags were lumped into one short. * that is fine for a single threaded uniprocessor OS, but bad if you * want to actual make use of locking (simple_lock's). so, we've - * separated things back out again. + * seperated things back out again. * * note the page structure has no lock of its own. */ @@ -128,22 +128,14 @@ struct vm_page { struct uvm_object *uobject; /* object (O,P) */ voff_t offset; /* offset into object (O,P) */ - u_int flags: 16, /* object flags [O] */ - version: 16; /* version count [O] */ - - u_int wire_count: 16, /* wired down map refs [P] */ - pqflags: 8, /* page queue flags [P] */ - : 8; - + u_short flags; /* object flags [O] */ + u_short version; /* version count [O] */ + u_short wire_count; /* wired down map refs [P] */ + u_short pqflags; /* page queue flags [P] */ u_int loan_count; /* number of active loans * to read: [O or P] * to modify: [O _and_ P] */ paddr_t phys_addr; /* physical address of page */ - -#ifdef __HAVE_VM_PAGE_MD - struct vm_page_md mdpage; /* pmap-specific data */ -#endif - #if defined(UVM_PAGE_TRKOWN) /* debugging fields to track page ownership */ pid_t owner; /* proc that set PG_BUSY */ @@ -153,12 +145,14 @@ struct vm_page { /* * These are the flags defined for vm_page. + * + * Note: PG_FILLED and PG_DIRTY are added for the filesystems. */ /* * locking rules: * PG_ ==> locked by object lock - * PQ_ ==> lock by page queue lock + * PQ_ ==> lock by page queue lock * PQ_FREE is locked by free queue lock and is mutex with all other PQs * * PG_ZERO is used to indicate that a page has been pre-zero'd. This flag @@ -178,12 +172,12 @@ struct vm_page { #define PG_PAGER1 0x1000 /* pager-specific flag */ -#define PQ_FREE 0x01 /* page is on free list */ -#define PQ_INACTIVE 0x02 /* page is in inactive list */ -#define PQ_ACTIVE 0x04 /* page is in active list */ -#define PQ_ANON 0x10 /* page is part of an anon, rather +#define PQ_FREE 0x0001 /* page is on free list */ +#define PQ_INACTIVE 0x0002 /* page is in inactive list */ +#define PQ_ACTIVE 0x0004 /* page is in active list */ +#define PQ_ANON 0x0010 /* page is part of an anon, rather than an uvm_object */ -#define PQ_AOBJ 0x20 /* page is part of an anonymous +#define PQ_AOBJ 0x0020 /* page is part of an anonymous uvm_object */ #define PQ_SWAPBACKED (PQ_ANON|PQ_AOBJ) #define PQ_ENCRYPT 0x0040 /* page needs {en,de}cryption */ @@ -216,9 +210,7 @@ struct vm_physseg { int free_list; /* which free list they belong on */ struct vm_page *pgs; /* vm_page structures (from start) */ struct vm_page *lastpg; /* vm_page structure for end */ -#ifdef __HAVE_PMAP_PHYSSEG struct pmap_physseg pmseg; /* pmap specific (MD) data */ -#endif }; #ifdef _KERNEL @@ -232,7 +224,7 @@ extern boolean_t vm_page_zero_enable; /* * Each pageable resident page falls into one of three lists: * - * free + * free * Available for allocation now. * inactive * Not referenced in any map, but still has an @@ -262,7 +254,7 @@ extern int vm_nphysseg; #ifdef UVM_PAGE_INLINE #define PAGE_INLINE static __inline -#else +#else #define PAGE_INLINE /* nothing */ #endif /* UVM_PAGE_INLINE */ @@ -278,7 +270,6 @@ void uvm_page_own __P((struct vm_page *, char *)); boolean_t uvm_page_physget __P((paddr_t *)); #endif void uvm_page_rehash __P((void)); -void uvm_page_recolor __P((int)); void uvm_pageidlezero __P((void)); PAGE_INLINE int uvm_lock_fpageq __P((void)); @@ -317,12 +308,6 @@ static int vm_physseg_find __P((paddr_t, int *)); #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) /* - * Compute the page color bucket for a given page. - */ -#define VM_PGCOLOR_BUCKET(pg) \ - (atop(VM_PAGE_TO_PHYS((pg))) & uvmexp.colormask) - -/* * when VM_PHYSSEG_MAX is 1, we can simplify these functions */ diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h index cf8636bb42d..024c692b5b9 100644 --- a/sys/uvm/uvm_page_i.h +++ b/sys/uvm/uvm_page_i.h @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_page_i.h,v 1.12 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_page_i.h,v 1.19 2001/06/27 23:57:17 thorpej Exp $ */ +/* $OpenBSD: uvm_page_i.h,v 1.13 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_page_i.h,v 1.16 2001/01/28 23:30:45 thorpej Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -159,8 +159,12 @@ uvm_pagewire(pg) TAILQ_REMOVE(&uvm.page_active, pg, pageq); pg->pqflags &= ~PQ_ACTIVE; uvmexp.active--; - } else if (pg->pqflags & PQ_INACTIVE) { - TAILQ_REMOVE(&uvm.page_inactive, pg, pageq); + } + if (pg->pqflags & PQ_INACTIVE) { + if (pg->pqflags & PQ_SWAPBACKED) + TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq); + else + TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq); pg->pqflags &= ~PQ_INACTIVE; uvmexp.inactive--; } @@ -170,12 +174,12 @@ uvm_pagewire(pg) } /* - * uvm_pageunwire: unwire the page. + * uvm_pageunwire: unwire the page. * * => activate if wire count goes to zero. * => caller must lock page queues */ - + PAGE_INLINE void uvm_pageunwire(pg) struct vm_page *pg; @@ -209,9 +213,15 @@ uvm_pagedeactivate(pg) } if ((pg->pqflags & PQ_INACTIVE) == 0) { KASSERT(pg->wire_count == 0); - TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq); + if (pg->pqflags & PQ_SWAPBACKED) + TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq); + else + TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq); pg->pqflags |= PQ_INACTIVE; uvmexp.inactive++; +#ifndef UBC + pmap_clear_reference(pg); +#endif /* * update the "clean" bit. this isn't 100% * accurate, and doesn't have to be. we'll @@ -235,7 +245,10 @@ uvm_pageactivate(pg) struct vm_page *pg; { if (pg->pqflags & PQ_INACTIVE) { - TAILQ_REMOVE(&uvm.page_inactive, pg, pageq); + if (pg->pqflags & PQ_SWAPBACKED) + TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq); + else + TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq); pg->pqflags &= ~PQ_INACTIVE; uvmexp.inactive--; } diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c index 8259df56237..5c93fe9f9db 100644 --- a/sys/uvm/uvm_pager.c +++ b/sys/uvm/uvm_pager.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_pager.c,v 1.28 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_pager.c,v 1.49 2001/09/10 21:19:43 chris Exp $ */ +/* $OpenBSD: uvm_pager.c,v 1.29 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pager.c,v 1.41 2001/02/18 19:26:50 chs Exp $ */ /* * @@ -58,21 +58,25 @@ struct pool *uvm_aiobuf_pool; extern struct uvm_pagerops uvm_deviceops; extern struct uvm_pagerops uvm_vnodeops; +#ifdef UBC extern struct uvm_pagerops ubc_pager; +#endif struct uvm_pagerops *uvmpagerops[] = { &aobj_pager, &uvm_deviceops, &uvm_vnodeops, +#ifdef UBC &ubc_pager, +#endif }; /* * the pager map: provides KVA for I/O */ -struct vm_map *pager_map; /* XXX */ -struct simplelock pager_map_wanted_lock; +vm_map_t pager_map; /* XXX */ +simple_lock_data_t pager_map_wanted_lock; boolean_t pager_map_wanted; /* locked by pager map */ static vaddr_t emergva; static boolean_t emerginuse; @@ -100,7 +104,7 @@ uvm_pager_init() /* * init ASYNC I/O queue */ - + TAILQ_INIT(&uvm.aio_done); /* @@ -148,8 +152,8 @@ ReStart: size = npages << PAGE_SHIFT; kva = 0; /* let system choose VA */ - if (uvm_map(pager_map, &kva, size, NULL, - UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != 0) { + if (uvm_map(pager_map, &kva, size, NULL, + UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) { if (curproc == uvm.pagedaemon_proc) { simple_lock(&pager_map_wanted_lock); if (emerginuse) { @@ -169,9 +173,9 @@ ReStart: return(0); } simple_lock(&pager_map_wanted_lock); - pager_map_wanted = TRUE; + pager_map_wanted = TRUE; UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0); - UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE, + UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE, "pager_map", 0); goto ReStart; } @@ -186,7 +190,6 @@ enter: prot, PMAP_WIRED | ((pp->flags & PG_FAKE) ? prot : VM_PROT_READ)); } - pmap_update(vm_map_pmap(pager_map)); UVMHIST_LOG(maphist, "<- done (KVA=0x%x)", kva,0,0,0); return(kva); @@ -205,7 +208,7 @@ uvm_pagermapout(kva, npages) int npages; { vsize_t size = npages << PAGE_SHIFT; - struct vm_map_entry *entries; + vm_map_entry_t entries; UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0); @@ -224,7 +227,7 @@ uvm_pagermapout(kva, npages) } vm_map_lock(pager_map); - uvm_unmap_remove(pager_map, kva, kva + size, &entries); + (void) uvm_unmap_remove(pager_map, kva, kva + size, &entries); simple_lock(&pager_map_wanted_lock); if (pager_map_wanted) { pager_map_wanted = FALSE; @@ -232,12 +235,11 @@ uvm_pagermapout(kva, npages) } simple_unlock(&pager_map_wanted_lock); vm_map_unlock(pager_map); - remove: pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT)); if (entries) uvm_unmap_detach(entries, 0); - pmap_update(pmap_kernel()); + UVMHIST_LOG(maphist,"<- done",0,0,0,0); } @@ -275,7 +277,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) int center_idx, forward, incr; UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist); - /* + /* * center page should already be busy and write protected. XXX: * suppose page is wired? if we lock, then a process could * fault/block on it. if we don't lock, a process could write the @@ -311,8 +313,8 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) *npages = 1; /* - * attempt to cluster around the left [backward], and then - * the right side [forward]. + * attempt to cluster around the left [backward], and then + * the right side [forward]. */ for (forward = 0 ; forward <= 1 ; forward++) { @@ -371,7 +373,7 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) (*npages)++; } } - + /* * done! return the cluster array to the caller!!! */ @@ -398,22 +400,22 @@ uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi) * => flags (first two for non-swap-backed pages) * PGO_ALLPAGES: all pages in uobj are valid targets * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets - * PGO_SYNCIO: wait for i/o to complete + * PGO_SYNCIO: do SYNC I/O (no async) * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range * if (!uobj) start is the (daddr_t) of the starting swapblk * => return state: - * 1. we return the error code of the pageout + * 1. we return the VM_PAGER status code of the pageout * 2. we return with the page queues unlocked * 3. if (uobj != NULL) [!swap_backed] we return with - * uobj locked _only_ if PGO_PDFREECLUST is set - * AND result == 0 AND async. in all other cases + * uobj locked _only_ if PGO_PDFREECLUST is set + * AND result != VM_PAGER_PEND. in all other cases * we return with uobj unlocked. [this is a hack * that allows the pagedaemon to save one lock/unlock * pair in the !swap_backed case since we have to * lock the uobj to drop the cluster anyway] * 4. on errors we always drop the cluster. thus, if we return - * an error, then the caller only has to worry about + * !PEND, !OK, then the caller only has to worry about * un-busying the main page (not the cluster pages). * 5. on success, if !PGO_PDFREECLUST, we return the cluster * with all pages busy (caller must un-busy and check @@ -430,7 +432,6 @@ uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop) { int result; daddr_t swblk; - boolean_t async = (flags & PGO_SYNCIO) == 0; struct vm_page **ppsp = *ppsp_ptr; UVMHIST_FUNC("uvm_pager_put"); UVMHIST_CALLED(ubchist); @@ -496,7 +497,7 @@ ReTry: * we have attempted the I/O. * * if the I/O was a success then: - * if !PGO_PDFREECLUST, we return the cluster to the + * if !PGO_PDFREECLUST, we return the cluster to the * caller (who must un-busy all pages) * else we un-busy cluster pages for the pagedaemon * @@ -505,21 +506,20 @@ ReTry: * i/o is done...] */ - if (result == 0) { - if (flags & PGO_PDFREECLUST && !async) { - + if (result == VM_PAGER_PEND || result == VM_PAGER_OK) { + if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) { /* - * drop cluster and relock object for sync i/o. + * drop cluster and relock object (only if I/O is + * not pending) */ - if (uobj) /* required for dropcluster */ simple_lock(&uobj->vmobjlock); if (*npages > 1 || pg == NULL) uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_PDFREECLUST); - - /* if (uobj): object still locked, as per #3 */ + /* if (uobj): object still locked, as per + * return-state item #3 */ } return (result); } @@ -537,24 +537,27 @@ ReTry: uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP); /* - * for hard failures on swap-backed pageouts with a "pg" - * we need to clear pg's swslot since uvm_pager_dropcluster() - * didn't do it and we aren't going to retry. + * for failed swap-backed pageouts with a "pg", + * we need to reset pg's swslot to either: + * "swblk" (for transient errors, so we can retry), + * or 0 (for hard errors). */ - if (uobj == NULL && pg != NULL && result != EAGAIN) { + if (uobj == NULL && pg != NULL) { + int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; if (pg->pqflags & PQ_ANON) { simple_lock(&pg->uanon->an_lock); - pg->uanon->an_swslot = 0; + pg->uanon->an_swslot = nswblk; simple_unlock(&pg->uanon->an_lock); } else { simple_lock(&pg->uobject->vmobjlock); uao_set_swslot(pg->uobject, - pg->offset >> PAGE_SHIFT, 0); + pg->offset >> PAGE_SHIFT, + nswblk); simple_unlock(&pg->uobject->vmobjlock); } } - if (result == EAGAIN) { + if (result == VM_PAGER_AGAIN) { /* * for transient failures, free all the swslots that @@ -590,18 +593,18 @@ ReTry: * was one). give up! the caller only has one page ("pg") * to worry about. */ - + if (uobj && (flags & PGO_PDFREECLUST) != 0) simple_lock(&uobj->vmobjlock); return(result); } /* - * uvm_pager_dropcluster: drop a cluster we have built (because we + * uvm_pager_dropcluster: drop a cluster we have built (because we * got an error, or, if PGO_PDFREECLUST we are un-busying the * cluster pages on behalf of the pagedaemon). * - * => uobj, if non-null, is a non-swap-backed object that is + * => uobj, if non-null, is a non-swap-backed object that is * locked by the caller. we return with this object still * locked. * => page queues are not locked @@ -609,7 +612,7 @@ ReTry: * => ppsp/npages is our current cluster * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster * pages on behalf of the pagedaemon. - * PGO_REALLOCSWAP: drop previously allocated swap slots for + * PGO_REALLOCSWAP: drop previously allocated swap slots for * clustered swap-backed pages (except for "pg" if !NULL) * "swblk" is the start of swap alloc (e.g. for ppsp[0]) * [only meaningful if swap-backed (uobj == NULL)] @@ -623,7 +626,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) int flags; { int lcv; - boolean_t obj_is_alive; + boolean_t obj_is_alive; struct uvm_object *saved_uobj; /* @@ -635,7 +638,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) /* skip "pg" or empty slot */ if (ppsp[lcv] == pg || ppsp[lcv] == NULL) continue; - + /* * if swap-backed, gain lock on object that owns page. note * that PQ_ANON bit can't change as long as we are holding @@ -688,7 +691,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) saved_uobj = ppsp[lcv]->uobject; obj_is_alive = saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL); - + /* for normal objects, "pg" is still PG_BUSY by us, * so obj can't die */ KASSERT(!uobj || obj_is_alive); @@ -711,7 +714,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) } /* - * if we are operating on behalf of the pagedaemon and we + * if we are operating on behalf of the pagedaemon and we * had a successful pageout update the page! */ if (flags & PGO_PDFREECLUST) { @@ -730,6 +733,7 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags) } } +#ifdef UBC /* * interrupt-context iodone handler for nested i/o bufs. * @@ -753,6 +757,7 @@ uvm_aio_biodone1(bp) biodone(mbp); } } +#endif /* * interrupt-context iodone handler for single-buf i/os @@ -793,10 +798,12 @@ uvm_aio_aiodone(bp) error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0; write = (bp->b_flags & B_READ) == 0; +#ifdef UBC /* XXXUBC B_NOCACHE is for swap pager, should be done differently */ if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) { (*bioops.io_pageiodone)(bp); } +#endif uobj = NULL; for (i = 0; i < npages; i++) { @@ -873,12 +880,35 @@ uvm_aio_aiodone(bp) freed: #endif s = splbio(); - if (bp->b_vp != NULL) { - if (write && (bp->b_flags & B_AGE) != 0) { - vwakeup(bp->b_vp); - } + if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) { + vwakeup(bp->b_vp); } - (void) buf_cleanout(bp); pool_put(&bufpool, bp); splx(s); } + +/* + * translate unix errno values to VM_PAGER_*. + */ + +int +uvm_errno2vmerror(errno) + int errno; +{ + switch (errno) { + case 0: + return VM_PAGER_OK; + case EINVAL: + return VM_PAGER_BAD; + case EINPROGRESS: + return VM_PAGER_PEND; + case EIO: + return VM_PAGER_ERROR; + case EAGAIN: + return VM_PAGER_AGAIN; + case EBUSY: + return VM_PAGER_UNLOCK; + default: + return VM_PAGER_ERROR; + } +} diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h index 6b7ddc02d24..37592460b28 100644 --- a/sys/uvm/uvm_pager.h +++ b/sys/uvm/uvm_pager.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_pager.h,v 1.16 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_pager.h,v 1.23 2001/05/26 21:27:21 chs Exp $ */ +/* $OpenBSD: uvm_pager.h,v 1.17 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $ */ /* * @@ -89,21 +89,20 @@ struct uvm_pagerops { void (*pgo_init) __P((void));/* init pager */ void (*pgo_reference) /* add reference to obj */ - __P((struct uvm_object *)); + __P((struct uvm_object *)); void (*pgo_detach) /* drop reference to obj */ __P((struct uvm_object *)); int (*pgo_fault) /* special nonstd fault fn */ __P((struct uvm_faultinfo *, vaddr_t, - struct vm_page **, int, int, vm_fault_t, + vm_page_t *, int, int, vm_fault_t, vm_prot_t, int)); boolean_t (*pgo_flush) /* flush pages out of obj */ __P((struct uvm_object *, voff_t, voff_t, int)); int (*pgo_get) /* get/read page */ __P((struct uvm_object *, voff_t, - struct vm_page **, int *, int, vm_prot_t, int, - int)); + vm_page_t *, int *, int, vm_prot_t, int, int)); int (*pgo_put) /* put/write page */ - __P((struct uvm_object *, struct vm_page **, + __P((struct uvm_object *, vm_page_t *, int, boolean_t)); void (*pgo_cluster) /* return range of cluster */ __P((struct uvm_object *, voff_t, voff_t *, @@ -144,7 +143,7 @@ struct uvm_pagerops { #ifdef UVM_PAGER_INLINE #define PAGER_INLINE static __inline -#else +#else #define PAGER_INLINE /* nothing */ #endif /* UVM_PAGER_INLINE */ @@ -152,12 +151,12 @@ struct uvm_pagerops { * prototypes */ -void uvm_pager_dropcluster __P((struct uvm_object *, - struct vm_page *, struct vm_page **, +void uvm_pager_dropcluster __P((struct uvm_object *, + struct vm_page *, struct vm_page **, int *, int)); void uvm_pager_init __P((void)); -int uvm_pager_put __P((struct uvm_object *, struct vm_page *, - struct vm_page ***, int *, int, +int uvm_pager_put __P((struct uvm_object *, struct vm_page *, + struct vm_page ***, int *, int, voff_t, voff_t)); PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t)); @@ -165,8 +164,9 @@ PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t)); vaddr_t uvm_pagermapin __P((struct vm_page **, int, int)); void uvm_pagermapout __P((vaddr_t, int)); struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **, - int *, struct vm_page *, int, + int *, struct vm_page *, int, voff_t, voff_t)); +int uvm_errno2vmerror __P((int)); /* Flags to uvm_pagermapin() */ #define UVMPAGER_MAPIN_WAITOK 0x01 /* it's okay to wait */ @@ -174,6 +174,27 @@ struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **, #define UVMPAGER_MAPIN_WRITE 0x00 /* device -> host (pseudo flag) */ /* + * get/put return values + * OK operation was successful + * BAD specified data was out of the accepted range + * FAIL specified data was in range, but doesn't exist + * PEND operations was initiated but not completed + * ERROR error while accessing data that is in range and exists + * AGAIN temporary resource shortage prevented operation from happening + * UNLOCK unlock the map and try again + * REFAULT [uvm_fault internal use only!] unable to relock data structures, + * thus the mapping needs to be reverified before we can procede + */ +#define VM_PAGER_OK 0 +#define VM_PAGER_BAD 1 +#define VM_PAGER_FAIL 2 +#define VM_PAGER_PEND 3 +#define VM_PAGER_ERROR 4 +#define VM_PAGER_AGAIN 5 +#define VM_PAGER_UNLOCK 6 +#define VM_PAGER_REFAULT 7 + +/* * XXX * this is needed until the device strategy interface * is changed to do physically-addressed i/o. diff --git a/sys/uvm/uvm_pager_i.h b/sys/uvm/uvm_pager_i.h index f1b9f5e42f2..c027cd17fb2 100644 --- a/sys/uvm/uvm_pager_i.h +++ b/sys/uvm/uvm_pager_i.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_pager_i.h,v 1.9 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_pager_i.h,v 1.11 2001/05/25 04:06:16 chs Exp $ */ +/* $OpenBSD: uvm_pager_i.h,v 1.10 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pager_i.h,v 1.10 2000/11/25 06:28:00 chs Exp $ */ /* * @@ -32,7 +32,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * from: Id: uvm_pager_i.h,v 1.1.2.2 1997/10/09 23:05:46 chuck Exp + * from: Id: uvm_pager_i.h,v 1.1.2.2 1997/10/09 23:05:46 chuck Exp */ #ifndef _UVM_UVM_PAGER_I_H_ @@ -60,13 +60,13 @@ uvm_pageratop(kva) struct vm_page *pg; paddr_t pa; boolean_t rv; - + rv = pmap_extract(pmap_kernel(), kva, &pa); KASSERT(rv); pg = PHYS_TO_VM_PAGE(pa); KASSERT(pg != NULL); return (pg); -} +} #endif /* defined(UVM_PAGER_INLINE) || defined(UVM_PAGER) */ diff --git a/sys/uvm/uvm_param.h b/sys/uvm/uvm_param.h index 5cc1be262cc..60e5296d90b 100644 --- a/sys/uvm/uvm_param.h +++ b/sys/uvm/uvm_param.h @@ -1,7 +1,7 @@ -/* $OpenBSD: uvm_param.h,v 1.6 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_param.h,v 1.12 2001/08/05 03:33:16 matt Exp $ */ +/* $OpenBSD: uvm_param.h,v 1.7 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_param.h,v 1.5 2001/03/09 01:02:12 chs Exp $ */ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -72,9 +72,7 @@ #ifndef _VM_PARAM_ #define _VM_PARAM_ -#ifdef _KERNEL #include <machine/vmparam.h> -#endif /* * This belongs in types.h, but breaks too many existing programs. @@ -141,6 +139,18 @@ struct _ps_strings { #define SWAPSKIPBYTES 8192 /* never use at the start of a swap space */ +/* + * Return values from the VM routines. + */ +#define KERN_SUCCESS 0 +#define KERN_INVALID_ADDRESS EFAULT +#define KERN_PROTECTION_FAILURE EACCES +#define KERN_NO_SPACE ENOMEM +#define KERN_INVALID_ARGUMENT EINVAL +#define KERN_FAILURE EFAULT +#define KERN_RESOURCE_SHORTAGE ENOMEM +#define KERN_PAGES_LOCKED 9 /* XXX never returned */ + #ifndef ASSEMBLER /* * Convert addresses to pages and vice versa. @@ -158,8 +168,10 @@ struct _ps_strings { #define trunc_page(x) ((x) & ~PAGE_MASK) extern psize_t mem_size; /* size of physical memory (bytes) */ +#ifdef UBC extern int ubc_nwins; /* number of UBC mapping windows */ -extern int ubc_winshift; /* shift for a UBC mapping window */ +extern int ubc_winsize; /* size of a UBC mapping window */ +#endif #else /* out-of-kernel versions of round_page and trunc_page */ diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index 2e46a28ec7d..42fa8b0809d 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_pdaemon.c,v 1.20 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_pdaemon.c,v 1.36 2001/06/27 18:52:10 thorpej Exp $ */ +/* $OpenBSD: uvm_pdaemon.c,v 1.21 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pdaemon.c,v 1.30 2001/03/09 01:02:12 chs Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -373,6 +373,14 @@ uvmpd_scan_inactive(pglst) UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist); /* + * note: we currently keep swap-backed pages on a seperate inactive + * list from object-backed pages. however, merging the two lists + * back together again hasn't been ruled out. thus, we keep our + * swap cluster in "swpps" rather than in pps (allows us to mix + * clustering types in the event of a mixed inactive queue). + */ + + /* * swslot is non-zero if we are building a swap cluster. we want * to stay in the loop while we have a page to scan or we have * a swap-cluster to build. @@ -687,20 +695,13 @@ uvmpd_scan_inactive(pglst) * add block to cluster */ - if (anon) { + swpps[swcpages] = p; + if (anon) anon->an_swslot = swslot + swcpages; - } else { - result = uao_set_swslot(uobj, + else + uao_set_swslot(uobj, p->offset >> PAGE_SHIFT, swslot + swcpages); - if (result == -1) { - p->flags &= ~PG_BUSY; - UVM_PAGE_OWN(p, NULL); - simple_unlock(&uobj->vmobjlock); - continue; - } - } - swpps[swcpages] = p; swcpages++; } } else { @@ -766,14 +767,18 @@ uvmpd_scan_inactive(pglst) * * note locking semantics of uvm_pager_put with PGO_PDFREECLUST: * IN: locked: uobj (if !swap_backed), page queues - * OUT:!locked: pageqs, uobj + * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND) + * !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND) + * + * [the bit about VM_PAGER_PEND saves us one lock-unlock pair] */ /* locked: uobj (if !swap_backed), page queues */ uvmexp.pdpageouts++; result = uvm_pager_put(swap_backed ? NULL : uobj, p, &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0); - /* unlocked: pageqs, uobj */ + /* locked: uobj (if !swap_backed && result != PEND) */ + /* unlocked: pageqs, object (if swap_backed ||result == PEND) */ /* * if we did i/o to swap, zero swslot to indicate that we are @@ -784,10 +789,35 @@ uvmpd_scan_inactive(pglst) swslot = 0; /* done with this cluster */ /* - * if the pageout failed, reactivate the page and continue. + * first, we check for VM_PAGER_PEND which means that the + * async I/O is in progress and the async I/O done routine + * will clean up after us. in this case we move on to the + * next page. + * + * there is a very remote chance that the pending async i/o can + * finish _before_ we get here. if that happens, our page "p" + * may no longer be on the inactive queue. so we verify this + * when determining the next page (starting over at the head if + * we've lost our inactive page). */ - if (result == EIO && curproc == uvm.pagedaemon_proc) { + if (result == VM_PAGER_PEND) { + uvmexp.paging += npages; + uvm_lock_pageq(); + uvmexp.pdpending++; + if (p) { + if (p->pqflags & PQ_INACTIVE) + nextpg = TAILQ_NEXT(p, pageq); + else + nextpg = TAILQ_FIRST(pglst); + } else { + nextpg = NULL; + } + continue; + } + + if (result == VM_PAGER_ERROR && + curproc == uvm.pagedaemon_proc) { uvm_lock_pageq(); nextpg = TAILQ_NEXT(p, pageq); uvm_pageactivate(p); @@ -795,20 +825,134 @@ uvmpd_scan_inactive(pglst) } /* - * the pageout is in progress. bump counters and set up - * for the next loop. + * clean up "p" if we have one */ - uvm_lock_pageq(); - uvmexp.paging += npages; - uvmexp.pdpending++; if (p) { - if (p->pqflags & PQ_INACTIVE) + /* + * the I/O request to "p" is done and uvm_pager_put + * has freed any cluster pages it may have allocated + * during I/O. all that is left for us to do is + * clean up page "p" (which is still PG_BUSY). + * + * our result could be one of the following: + * VM_PAGER_OK: successful pageout + * + * VM_PAGER_AGAIN: tmp resource shortage, we skip + * to next page + * VM_PAGER_{FAIL,ERROR,BAD}: an error. we + * "reactivate" page to get it out of the way (it + * will eventually drift back into the inactive + * queue for a retry). + * VM_PAGER_UNLOCK: should never see this as it is + * only valid for "get" operations + */ + + /* relock p's object: page queues not lock yet, so + * no need for "try" */ + + /* !swap_backed case: already locked... */ + if (swap_backed) { + if (anon) + simple_lock(&anon->an_lock); + else + simple_lock(&uobj->vmobjlock); + } + + /* handle PG_WANTED now */ + if (p->flags & PG_WANTED) + /* still holding object lock */ + wakeup(p); + + p->flags &= ~(PG_BUSY|PG_WANTED); + UVM_PAGE_OWN(p, NULL); + + /* released during I/O? */ + if (p->flags & PG_RELEASED) { + if (anon) { + /* remove page so we can get nextpg */ + anon->u.an_page = NULL; + + simple_unlock(&anon->an_lock); + uvm_anfree(anon); /* kills anon */ + pmap_page_protect(p, VM_PROT_NONE); + anon = NULL; + uvm_lock_pageq(); + nextpg = TAILQ_NEXT(p, pageq); + /* free released page */ + uvm_pagefree(p); + + } else { + + /* + * pgo_releasepg nukes the page and + * gets "nextpg" for us. it returns + * with the page queues locked (when + * given nextpg ptr). + */ + + if (!uobj->pgops->pgo_releasepg(p, + &nextpg)) + /* uobj died after release */ + uobj = NULL; + + /* + * lock page queues here so that they're + * always locked at the end of the loop. + */ + + uvm_lock_pageq(); + } + } else { /* page was not released during I/O */ + uvm_lock_pageq(); nextpg = TAILQ_NEXT(p, pageq); - else - nextpg = TAILQ_FIRST(pglst); + if (result != VM_PAGER_OK) { + /* pageout was a failure... */ + if (result != VM_PAGER_AGAIN) + uvm_pageactivate(p); + pmap_clear_reference(p); + /* XXXCDC: if (swap_backed) FREE p's + * swap block? */ + } else { + /* pageout was a success... */ + pmap_clear_reference(p); + pmap_clear_modify(p); + p->flags |= PG_CLEAN; + } + } + + /* + * drop object lock (if there is an object left). do + * a safety check of nextpg to make sure it is on the + * inactive queue (it should be since PG_BUSY pages on + * the inactive queue can't be re-queued [note: not + * true for active queue]). + */ + + if (anon) + simple_unlock(&anon->an_lock); + else if (uobj) + simple_unlock(&uobj->vmobjlock); + } else { + + /* + * if p is null in this loop, make sure it stays null + * in the next loop. + */ + nextpg = NULL; + + /* + * lock page queues here just so they're always locked + * at the end of the loop. + */ + + uvm_lock_pageq(); + } + + if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) { + nextpg = TAILQ_FIRST(pglst); /* reload! */ } } return (retval); @@ -871,7 +1015,12 @@ uvmpd_scan() got_it = FALSE; pages_freed = uvmexp.pdfreed; - (void) uvmpd_scan_inactive(&uvm.page_inactive); + if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0) + got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp); + if (!got_it) + got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj); + if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0) + (void) uvmpd_scan_inactive(&uvm.page_inactive_swp); pages_freed = uvmexp.pdfreed - pages_freed; /* @@ -959,14 +1108,13 @@ uvmpd_scan() } /* - * If we're short on inactive pages, move this over - * to the inactive list. The second hand will sweep - * it later, and if it has been referenced again, it - * will be moved back to active. + * If the page has not been referenced since the + * last scan, deactivate the page if there is a + * shortage of inactive pages. */ - if (inactive_shortage > 0) { - pmap_clear_reference(p); + if (inactive_shortage > 0 && + pmap_clear_reference(p) == FALSE) { /* no need to check wire_count as pg is "active" */ uvm_pagedeactivate(p); uvmexp.pddeact++; diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h index bc6b96f5a07..dbae4b6ba97 100644 --- a/sys/uvm/uvm_pdaemon.h +++ b/sys/uvm/uvm_pdaemon.h @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_pdaemon.h,v 1.8 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_pdaemon.h,v 1.9 2001/05/25 04:06:17 chs Exp $ */ +/* $OpenBSD: uvm_pdaemon.h,v 1.9 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pdaemon.h,v 1.8 1999/11/04 21:51:42 thorpej Exp $ */ -/* +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993, The Regents of the University of California. + * Copyright (c) 1991, 1993, The Regents of the University of California. * * All rights reserved. * @@ -21,7 +21,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -45,17 +45,17 @@ * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c index e747f827e6b..7d89a04c969 100644 --- a/sys/uvm/uvm_pglist.c +++ b/sys/uvm/uvm_pglist.c @@ -1,20 +1,20 @@ -/* $OpenBSD: uvm_pglist.c,v 1.11 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_pglist.c,v 1.17 2001/06/27 21:18:34 thorpej Exp $ */ +/* $OpenBSD: uvm_pglist.c,v 1.12 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $ */ /*- * Copyright (c) 1997 The NetBSD Foundation, Inc. * All rights reserved. - * + * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, - * NASA Ames Research Center. + * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright + * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software @@ -24,7 +24,7 @@ * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -78,7 +78,7 @@ u_long uvm_pglistalloc_npages; * low the low address of the allowed allocation range. * high the high address of the allowed allocation range. * alignment memory must be aligned to this power-of-two boundary. - * boundary no segment in the allocation may cross this + * boundary no segment in the allocation may cross this * power-of-two boundary (relative to zero). */ @@ -92,16 +92,16 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) paddr_t try, idxpa, lastidxpa; int psi; struct vm_page *pgs; - int s, tryidx, idx, pgflidx, end, error, free_list, color; - struct vm_page *m; + int s, tryidx, idx, pgflidx, end, error, free_list; + vm_page_t m; u_long pagemask; #ifdef DEBUG - struct vm_page *tp; + vm_page_t tp; #endif KASSERT((alignment & (alignment - 1)) == 0); KASSERT((boundary & (boundary - 1)) == 0); - + /* * Our allocations are always page granularity, so our alignment * must be, too. @@ -198,11 +198,10 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) while (idx < end) { m = &pgs[idx]; free_list = uvm_page_lookup_freelist(m); - color = VM_PGCOLOR_BUCKET(m); pgflidx = (m->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; #ifdef DEBUG for (tp = TAILQ_FIRST(&uvm.page_free[ - free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); + free_list].pgfl_queues[pgflidx]); tp != NULL; tp = TAILQ_NEXT(tp, pageq)) { if (tp == m) @@ -211,8 +210,8 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) if (tp == NULL) panic("uvm_pglistalloc: page not on freelist"); #endif - TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ - color].pgfl_queues[pgflidx], m, pageq); + TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_queues[pgflidx], + m, pageq); uvmexp.free--; if (m->flags & PG_ZERO) uvmexp.zeropages--; @@ -232,8 +231,12 @@ out: * check to see if we need to generate some free pages waking * the pagedaemon. */ - - UVM_KICK_PDAEMON(); + + if (uvmexp.free + uvmexp.paging < uvmexp.freemin || + (uvmexp.free + uvmexp.paging < uvmexp.freetarg && + uvmexp.inactive < uvmexp.inactarg)) { + wakeup(&uvm.pagedaemon); + } uvm_unlock_fpageq(s); @@ -250,7 +253,7 @@ void uvm_pglistfree(list) struct pglist *list; { - struct vm_page *m; + vm_page_t m; int s; /* @@ -263,8 +266,8 @@ uvm_pglistfree(list) TAILQ_REMOVE(list, m, pageq); m->pqflags = PQ_FREE; TAILQ_INSERT_TAIL(&uvm.page_free[ - uvm_page_lookup_freelist(m)].pgfl_buckets[ - VM_PGCOLOR_BUCKET(m)].pgfl_queues[PGFL_UNKNOWN], m, pageq); + uvm_page_lookup_freelist(m)].pgfl_queues[PGFL_UNKNOWN], + m, pageq); uvmexp.free++; if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) uvm.page_idle_zero = vm_page_zero_enable; diff --git a/sys/uvm/uvm_pglist.h b/sys/uvm/uvm_pglist.h index 883171ebb86..3020df4d5b0 100644 --- a/sys/uvm/uvm_pglist.h +++ b/sys/uvm/uvm_pglist.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_pglist.h,v 1.4 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_pglist.h,v 1.5 2001/08/25 20:37:46 chs Exp $ */ +/* $OpenBSD: uvm_pglist.h,v 1.5 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_pglist.h,v 1.3 2001/05/02 01:22:20 thorpej Exp $ */ /*- * Copyright (c) 2000, 2001 The NetBSD Foundation, Inc. @@ -37,8 +37,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _UVM_UVM_PGLIST_H_ -#define _UVM_UVM_PGLIST_H_ +#ifndef _PGLIST_H_ +#define _PGLIST_H_ /* * This defines the type of a page queue, e.g. active list, inactive @@ -54,12 +54,8 @@ TAILQ_HEAD(pglist, vm_page); #define PGFL_ZEROS 1 #define PGFL_NQUEUES 2 -struct pgflbucket { - struct pglist pgfl_queues[PGFL_NQUEUES]; -}; - struct pgfreelist { - struct pgflbucket *pgfl_buckets; + struct pglist pgfl_queues[PGFL_NQUEUES]; }; -#endif /* _UVM_UVM_PGLIST_H_ */ +#endif diff --git a/sys/uvm/uvm_pmap.h b/sys/uvm/uvm_pmap.h index f4f2e4ce0ea..5e9617bc624 100644 --- a/sys/uvm/uvm_pmap.h +++ b/sys/uvm/uvm_pmap.h @@ -1,6 +1,6 @@ -/* $NetBSD: uvm_pmap.h,v 1.9 2001/09/10 21:19:43 chris Exp $ */ +/* $NetBSD: uvm_pmap.h,v 1.1 2000/06/27 09:00:14 mrg Exp $ */ -/* +/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -42,17 +42,17 @@ * All rights reserved. * * Author: Avadis Tevanian, Jr. - * + * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * + * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU @@ -86,9 +86,7 @@ struct pmap_statistics { }; typedef struct pmap_statistics *pmap_statistics_t; -#ifdef _KERNEL #include <machine/pmap.h> -#endif /* * Flags passed to pmap_enter(). Note the bottom 3 bits are VM_PROT_* @@ -101,10 +99,7 @@ typedef struct pmap_statistics *pmap_statistics_t; #ifndef PMAP_EXCLUDE_DECLS /* Used in Sparc port to virtualize pmap mod */ #ifdef _KERNEL __BEGIN_DECLS -#if !defined(pmap_kernel) -struct pmap *pmap_kernel __P((void)); -#endif - +void *pmap_bootstrap_alloc __P((int)); void pmap_activate __P((struct proc *)); void pmap_deactivate __P((struct proc *)); void pmap_unwire __P((pmap_t, vaddr_t)); @@ -143,19 +138,13 @@ boolean_t pmap_is_referenced __P((struct vm_page *)); void pmap_page_protect __P((struct vm_page *, vm_prot_t)); #if !defined(pmap_phys_address) -paddr_t pmap_phys_address __P((int)); +paddr_t pmap_phys_address __P((int)); #endif void pmap_protect __P((pmap_t, vaddr_t, vaddr_t, vm_prot_t)); void pmap_reference __P((pmap_t)); void pmap_remove __P((pmap_t, vaddr_t, vaddr_t)); -void pmap_update __P((pmap_t)); -#if !defined(pmap_resident_count) -long pmap_resident_count __P((pmap_t)); -#endif -#if !defined(pmap_wired_count) -long pmap_wired_count __P((pmap_t)); -#endif +void pmap_update __P((void)); void pmap_zero_page __P((paddr_t)); void pmap_virtual_space __P((vaddr_t *, vaddr_t *)); diff --git a/sys/uvm/uvm_stat.c b/sys/uvm/uvm_stat.c index 4746b59f6df..801d240fdf0 100644 --- a/sys/uvm/uvm_stat.c +++ b/sys/uvm/uvm_stat.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_stat.c,v 1.10 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_stat.c,v 1.19 2001/05/25 04:06:17 chs Exp $ */ +/* $OpenBSD: uvm_stat.c,v 1.11 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_stat.c,v 1.18 2001/03/09 01:02:13 chs Exp $ */ /* * @@ -135,7 +135,7 @@ restart: cur[lcv] = -1; goto restart; } - + /* * if the time hasn't been set yet, or this entry is * earlier than the current tv, set the time and history @@ -158,7 +158,7 @@ restart: if (cur[hi] == hists[hi]->f) cur[hi] = -1; } - + /* done! */ splx(s); } diff --git a/sys/uvm/uvm_stat.h b/sys/uvm/uvm_stat.h index 2644314f99c..94dc3bb1a39 100644 --- a/sys/uvm/uvm_stat.h +++ b/sys/uvm/uvm_stat.h @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_stat.h,v 1.11 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_stat.h,v 1.22 2001/05/30 11:57:17 mrg Exp $ */ +/* $OpenBSD: uvm_stat.h,v 1.12 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_stat.h,v 1.19 2001/02/04 10:55:58 mrg Exp $ */ /* * @@ -38,10 +38,6 @@ #ifndef _UVM_UVM_STAT_H_ #define _UVM_UVM_STAT_H_ -#if defined(_KERNEL_OPT) -#include "opt_uvmhist.h" -#endif - #include <sys/queue.h> /* @@ -117,7 +113,7 @@ struct uvm_history { LIST_ENTRY(uvm_history) list; /* link on list of all histories */ int n; /* number of entries */ int f; /* next free one */ - struct simplelock l; /* lock on this history */ + simple_lock_data_t l; /* lock on this history */ struct uvm_history_ent *e; /* the malloc'd entries */ }; @@ -232,7 +228,7 @@ do { \ #define UVMHIST_FUNC(FNAME) \ static int _uvmhist_cnt = 0; \ static char *_uvmhist_name = FNAME; \ - int _uvmhist_call; + int _uvmhist_call; static __inline void uvmhist_print __P((struct uvm_history_ent *)); diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c index 02d7901ba9f..3ed77ab3555 100644 --- a/sys/uvm/uvm_swap.c +++ b/sys/uvm/uvm_swap.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_swap.c,v 1.46 2001/12/04 23:22:42 art Exp $ */ -/* $NetBSD: uvm_swap.c,v 1.53 2001/08/26 00:43:53 chs Exp $ */ +/* $OpenBSD: uvm_swap.c,v 1.47 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_swap.c,v 1.46 2001/02/18 21:19:08 chs Exp $ */ /* * Copyright (c) 1995, 1996, 1997 Matthew R. Green @@ -63,7 +63,7 @@ /* * swap space is managed in the following way: - * + * * each swap partition or file is described by a "swapdev" structure. * each "swapdev" structure contains a "swapent" structure which contains * information that is passed up to the user (via system calls). @@ -74,7 +74,7 @@ * the system maintains a global data structure describing all swap * partitions/files. there is a sorted LIST of "swappri" structures * which describe "swapdev"'s at that priority. this LIST is headed - * by the "swap_priority" global var. each "swappri" contains a + * by the "swap_priority" global var. each "swappri" contains a * CIRCLEQ of "swapdev" structures at that priority. * * locking: @@ -99,7 +99,7 @@ * userland controls and configures swap with the swapctl(2) system call. * the sys_swapctl performs the following operations: * [1] SWAP_NSWAP: returns the number of swap devices currently configured - * [2] SWAP_STATS: given a pointer to an array of swapent structures + * [2] SWAP_STATS: given a pointer to an array of swapent structures * (passed in via "arg") of a size passed in via "misc" ... we load * the current swap config into the array. * [3] SWAP_ON: given a pathname in arg (could be device or file) and a @@ -227,15 +227,16 @@ LIST_HEAD(swap_priority, swappri); static struct swap_priority swap_priority; /* locks */ -struct lock swap_syscall_lock; +lock_data_t swap_syscall_lock; /* * prototypes */ +static void swapdrum_add __P((struct swapdev *, int)); static struct swapdev *swapdrum_getsdp __P((int)); static struct swapdev *swaplist_find __P((struct vnode *, int)); -static void swaplist_insert __P((struct swapdev *, +static void swaplist_insert __P((struct swapdev *, struct swappri *, int)); static void swaplist_trim __P((void)); @@ -261,7 +262,7 @@ void uvm_swap_initcrypt __P((struct swapdev *, int)); /* * uvm_swap_init: init the swap system data structures and locks * - * => called at boot time from init_main.c after the filesystems + * => called at boot time from init_main.c after the filesystems * are brought up (which happens after uvm_init()) */ void @@ -287,7 +288,7 @@ uvm_swap_init() /* * create swap block resource map to map /dev/drum. the range * from 1 to INT_MAX allows 2 gigablocks of swap space. note - * that block 0 is reserved (used to indicate an allocation + * that block 0 is reserved (used to indicate an allocation * failure, or no allocation). */ swapmap = extent_create("swapmap", 1, INT_MAX, @@ -562,6 +563,27 @@ swaplist_trim() } /* + * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area. + * + * => caller must hold swap_syscall_lock + * => uvm.swap_data_lock should be unlocked (we may sleep) + */ +static void +swapdrum_add(sdp, npages) + struct swapdev *sdp; + int npages; +{ + u_long result; + + if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY, + EX_WAITOK, &result)) + panic("swapdrum_add"); + + sdp->swd_drumoffset = result; + sdp->swd_drumsize = npages; +} + +/* * swapdrum_getsdp: given a page offset in /dev/drum, convert it back * to the "swapdev" that maps that section of the drum. * @@ -574,19 +596,16 @@ swapdrum_getsdp(pgno) { struct swapdev *sdp; struct swappri *spp; - + for (spp = LIST_FIRST(&swap_priority); spp != NULL; spp = LIST_NEXT(spp, spi_swappri)) for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); sdp != (void *)&spp->spi_swapdev; - sdp = CIRCLEQ_NEXT(sdp, swd_next)) { - if (sdp->swd_flags & SWF_FAKE) - continue; + sdp = CIRCLEQ_NEXT(sdp, swd_next)) if (pgno >= sdp->swd_drumoffset && pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) { return sdp; } - } return NULL; } @@ -627,7 +646,7 @@ sys_swapctl(p, v, retval) /* * we handle the non-priv NSWAP and STATS request first. * - * SWAP_NSWAP: return number of config'd swap devices + * SWAP_NSWAP: return number of config'd swap devices * [can also be obtained with uvmexp sysctl] */ if (SCARG(uap, cmd) == SWAP_NSWAP) { @@ -641,9 +660,9 @@ sys_swapctl(p, v, retval) /* * SWAP_STATS: get stats on current # of configured swap devs * - * note that the swap_priority list can't change as long + * note that the swap_priority list can't change as long * as we are holding the swap_syscall_lock. we don't want - * to grab the uvm.swap_data_lock because we may fault&sleep during + * to grab the uvm.swap_data_lock because we may fault&sleep during * copyout() and we don't want to be holding that lock then! */ if (SCARG(uap, cmd) == SWAP_STATS @@ -659,7 +678,7 @@ sys_swapctl(p, v, retval) for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); sdp != (void *)&spp->spi_swapdev && misc-- > 0; sdp = CIRCLEQ_NEXT(sdp, swd_next)) { - sdp->swd_inuse = + sdp->swd_inuse = btodb((u_int64_t)sdp->swd_npginuse << PAGE_SHIFT); error = copyout(&sdp->swd_se, sep, @@ -679,8 +698,7 @@ sys_swapctl(p, v, retval) count++; #if defined(COMPAT_13) if (SCARG(uap, cmd) == SWAP_OSTATS) - sep = (struct swapent *) - ((struct oswapent *)sep + 1); + ((struct oswapent *)sep)++; else #endif sep++; @@ -692,7 +710,7 @@ sys_swapctl(p, v, retval) *retval = count; error = 0; goto out; - } + } /* * all other requests require superuser privs. verify. @@ -779,16 +797,14 @@ sys_swapctl(p, v, retval) */ priority = SCARG(uap, misc); - sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); - spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); simple_lock(&uvm.swap_data_lock); - if (swaplist_find(vp, 0) != NULL) { + if ((sdp = swaplist_find(vp, 0)) != NULL) { error = EBUSY; simple_unlock(&uvm.swap_data_lock); - free(sdp, M_VMSWAP); - free(spp, M_VMSWAP); break; } + sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); + spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); memset(sdp, 0, sizeof(*sdp)); sdp->swd_flags = SWF_FAKE; /* placeholder only */ sdp->swd_vp = vp; @@ -889,7 +905,6 @@ swap_on(p, sdp) struct vnode *vp; int error, npages, nblocks, size; long addr; - u_long result; struct vattr va; #if defined(NFSCLIENT) extern int (**nfsv2_vnodeop_p) __P((void *)); @@ -1018,9 +1033,9 @@ swap_on(p, sdp) } /* - * if the vnode we are swapping to is the root vnode + * if the vnode we are swapping to is the root vnode * (i.e. we are swapping to the miniroot) then we want - * to make sure we don't overwrite it. do a statfs to + * to make sure we don't overwrite it. do a statfs to * find its size and skip over it. */ if (vp == rootvp) { @@ -1035,7 +1050,7 @@ swap_on(p, sdp) if (rootpages > size) panic("swap_on: miniroot larger than swap?"); - if (extent_alloc_region(sdp->swd_ex, addr, + if (extent_alloc_region(sdp->swd_ex, addr, rootpages, EX_WAITOK)) panic("swap_on: unable to preserve miniroot"); @@ -1065,14 +1080,9 @@ swap_on(p, sdp) /* * now add the new swapdev to the drum and enable. */ - if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY, - EX_WAITOK, &result)) - panic("swapdrum_add"); - - sdp->swd_drumoffset = (int)result; - sdp->swd_drumsize = npages; - sdp->swd_npages = size; simple_lock(&uvm.swap_data_lock); + swapdrum_add(sdp, npages); + sdp->swd_npages = size; sdp->swd_flags &= ~SWF_FAKE; /* going live */ sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE); uvmexp.swpages += size; @@ -1121,7 +1131,7 @@ swap_off(p, sdp) sdp->swd_drumoffset + sdp->swd_drumsize) || anon_swap_off(sdp->swd_drumoffset, sdp->swd_drumoffset + sdp->swd_drumsize)) { - + simple_lock(&uvm.swap_data_lock); sdp->swd_flags |= SWF_ENABLE; simple_unlock(&uvm.swap_data_lock); @@ -1151,7 +1161,6 @@ swap_off(p, sdp) if (swaplist_find(sdp->swd_vp, 1) == NULL) panic("swap_off: swapdev not in list\n"); swaplist_trim(); - simple_unlock(&uvm.swap_data_lock); /* * free all resources! @@ -1160,6 +1169,7 @@ swap_off(p, sdp) EX_WAITOK); extent_destroy(sdp->swd_ex); free(sdp, M_VMSWAP); + simple_unlock(&uvm.swap_data_lock); return (0); } @@ -1326,7 +1336,7 @@ sw_reg_strategy(sdp, bp, bn) &vp, &nbn, &nra); if (error == 0 && nbn == (daddr_t)-1) { - /* + /* * this used to just set error, but that doesn't * do the right thing. Instead, it causes random * memory errors. The panic() should remain until @@ -1383,6 +1393,32 @@ sw_reg_strategy(sdp, bp, bn) nbp->vb_buf.b_vnbufs.le_next = NOLIST; LIST_INIT(&nbp->vb_buf.b_dep); + /* + * set b_dirtyoff/end and b_validoff/end. this is + * required by the NFS client code (otherwise it will + * just discard our I/O request). + */ + if (bp->b_dirtyend == 0) { + nbp->vb_buf.b_dirtyoff = 0; + nbp->vb_buf.b_dirtyend = sz; + } else { + nbp->vb_buf.b_dirtyoff = + max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_dirtyend = + min(sz, + max(0, bp->b_dirtyend - (bp->b_bcount-resid))); + } + if (bp->b_validend == 0) { + nbp->vb_buf.b_validoff = 0; + nbp->vb_buf.b_validend = sz; + } else { + nbp->vb_buf.b_validoff = + max(0, bp->b_validoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_validend = + min(sz, + max(0, bp->b_validend - (bp->b_bcount-resid))); + } + nbp->vb_xfer = vnx; /* patch it back in to vnx */ /* @@ -1503,7 +1539,9 @@ sw_reg_iodone(bp) /* * disassociate this buffer from the vnode (if any). */ - (void) buf_cleanout(&vbp->vb_buf); + if (vbp->vb_buf.b_vp != NULL) { + brelvp(&vbp->vb_buf); + } /* * kill vbp structure @@ -1566,7 +1604,7 @@ uvm_swap_alloc(nslots, lessok) */ if (uvmexp.nswapdev < 1) return 0; - + /* * lock data lock, convert slots into blocks, and enter loop */ @@ -1670,8 +1708,8 @@ uvm_swap_free(startslot, nslots) } /* - * convert drum slot offset back to sdp, free the blocks - * in the extent, and return. must hold pri lock to do + * convert drum slot offset back to sdp, free the blocks + * in the extent, and return. must hold pri lock to do * lookup and access the extent. */ @@ -1742,26 +1780,23 @@ uvm_swap_get(page, swslot, flags) uvmexp.nswget++; KASSERT(flags & PGO_SYNCIO); if (swslot == SWSLOT_BAD) { - return EIO; + return VM_PAGER_ERROR; } /* * this page is (about to be) no longer only in swap. */ - simple_lock(&uvm.swap_data_lock); uvmexp.swpgonly--; simple_unlock(&uvm.swap_data_lock); - result = uvm_swap_io(&page, swslot, 1, B_READ | + result = uvm_swap_io(&page, swslot, 1, B_READ | ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); - if (result != 0) { - + if (result != VM_PAGER_OK && result != VM_PAGER_PEND) { /* * oops, the read failed so it really is still only in swap. */ - simple_lock(&uvm.swap_data_lock); uvmexp.swpgonly++; simple_unlock(&uvm.swap_data_lock); @@ -1782,7 +1817,7 @@ uvm_swap_io(pps, startslot, npages, flags) daddr_t startblk; struct buf *bp; vaddr_t kva; - int error, s, mapinflags, pflag; + int result, s, mapinflags, pflag; boolean_t write, async; #ifdef UVM_SWAP_ENCRYPT vaddr_t dstkva; @@ -1812,7 +1847,7 @@ uvm_swap_io(pps, startslot, npages, flags) mapinflags |= UVMPAGER_MAPIN_WAITOK; kva = uvm_pagermapin(pps, npages, mapinflags); if (kva == 0) - return (EAGAIN); + return (VM_PAGER_AGAIN); #ifdef UVM_SWAP_ENCRYPT if (write) { @@ -1858,14 +1893,14 @@ uvm_swap_io(pps, startslot, npages, flags) if (!uvm_swap_allocpages(tpps, npages)) { uvm_pagermapout(kva, npages); - return (EAGAIN); + return (VM_PAGER_AGAIN); } dstkva = uvm_pagermapin(tpps, npages, swmapflags); if (dstkva == NULL) { uvm_pagermapout(kva, npages); uvm_swap_freepages(tpps, npages); - return (EAGAIN); + return (VM_PAGER_AGAIN); } src = (caddr_t) kva; @@ -1894,7 +1929,7 @@ uvm_swap_io(pps, startslot, npages, flags) } #endif /* UVM_SWAP_ENCRYPT */ - /* + /* * now allocate a buf for the i/o. * [make sure we don't put the pagedaemon to sleep...] */ @@ -1919,7 +1954,7 @@ uvm_swap_io(pps, startslot, npages, flags) uvm_swap_freepages(tpps, npages); } #endif - return (EAGAIN); + return (VM_PAGER_AGAIN); } #ifdef UVM_SWAP_ENCRYPT @@ -1950,10 +1985,13 @@ uvm_swap_io(pps, startslot, npages, flags) splx(s); bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT; - /* - * bump v_numoutput (counter of number of active outputs). + /* + * for pageouts we must set "dirtyoff" [NFS client code needs it]. + * and we bump v_numoutput (counter of number of active outputs). */ if (write) { + bp->b_dirtyoff = 0; + bp->b_dirtyend = npages << PAGE_SHIFT; #ifdef UVM_SWAP_ENCRYPT /* mark the pages in the drum for decryption */ if (swap_encrypt_initalized) @@ -1982,12 +2020,13 @@ uvm_swap_io(pps, startslot, npages, flags) */ VOP_STRATEGY(bp); if (async) - return 0; + return (VM_PAGER_PEND); /* * must be sync i/o. wait for it to finish */ - error = biowait(bp); + (void) biowait(bp); + result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; #ifdef UVM_SWAP_ENCRYPT /* @@ -2028,18 +2067,19 @@ uvm_swap_io(pps, startslot, npages, flags) * now dispose of the buf */ s = splbio(); + if (bp->b_vp) + brelvp(bp); + if (write && bp->b_vp) vwakeup(bp->b_vp); - - (void) buf_cleanout(bp); pool_put(&bufpool, bp); splx(s); /* * finally return. */ - UVMHIST_LOG(pdhist, "<- done (sync) error=%d", error, 0, 0, 0); - return (error); + UVMHIST_LOG(pdhist, "<- done (sync) result=%d", result, 0, 0, 0); + return (result); } static void diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c index a6debf6ff8d..98724938298 100644 --- a/sys/uvm/uvm_unix.c +++ b/sys/uvm/uvm_unix.c @@ -1,9 +1,9 @@ -/* $OpenBSD: uvm_unix.c,v 1.19 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_unix.c,v 1.24 2001/06/06 21:28:51 mrg Exp $ */ +/* $OpenBSD: uvm_unix.c,v 1.20 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_unix.c,v 1.18 2000/09/13 15:00:25 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. - * Copyright (c) 1991, 1993 The Regents of the University of California. + * Copyright (c) 1991, 1993 The Regents of the University of California. * Copyright (c) 1988 University of Utah. * * All rights reserved. @@ -23,7 +23,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -77,39 +77,44 @@ sys_obreak(p, v, retval) } */ *uap = v; struct vmspace *vm = p->p_vmspace; vaddr_t new, old; - int error; + ssize_t diff; + int rv; old = (vaddr_t)vm->vm_daddr; new = round_page((vaddr_t)SCARG(uap, nsize)); - if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur && new > old) + if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur) return (ENOMEM); old = round_page(old + ptoa(vm->vm_dsize)); + diff = new - old; - if (new == old) + if (diff == 0) return (0); /* * grow or shrink? */ - if (new > old) { - error = uvm_map(&vm->vm_map, &old, new - old, NULL, - UVM_UNKNOWN_OFFSET, 0, - UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY, + if (diff > 0) { + rv = uvm_map(&vm->vm_map, &old, diff, NULL, UVM_UNKNOWN_OFFSET, + 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY, UVM_ADV_NORMAL, UVM_FLAG_AMAPPAD|UVM_FLAG_FIXED| UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)); - if (error) { - uprintf("sbrk: grow %ld failed, error = %d\n", - new - old, error); - return error; + if (rv == KERN_SUCCESS) { + vm->vm_dsize += atop(diff); + return (0); } - vm->vm_dsize += atop(new - old); } else { - uvm_deallocate(&vm->vm_map, new, old - new); - vm->vm_dsize -= atop(old - new); + rv = uvm_deallocate(&vm->vm_map, new, -diff); + if (rv == KERN_SUCCESS) { + vm->vm_dsize -= atop(-diff); + return (0); + } } - return (0); + uprintf("sbrk: %s %ld failed, return = %d\n", + diff > 0 ? "grow" : "shrink", + (long)(diff > 0 ? diff : -diff), rv); + return (ENOMEM); } /* @@ -190,8 +195,8 @@ uvm_coredump(p, vp, cred, chdr) struct core *chdr; { struct vmspace *vm = p->p_vmspace; - struct vm_map *map = &vm->vm_map; - struct vm_map_entry *entry; + vm_map_t map = &vm->vm_map; + vm_map_entry_t entry; vaddr_t start, end, maxstack; struct coreseg cseg; off_t offset; diff --git a/sys/uvm/uvm_user.c b/sys/uvm/uvm_user.c index 502d2aca440..01677547711 100644 --- a/sys/uvm/uvm_user.c +++ b/sys/uvm/uvm_user.c @@ -1,5 +1,5 @@ -/* $OpenBSD: uvm_user.c,v 1.8 2001/11/28 19:28:15 art Exp $ */ -/* $NetBSD: uvm_user.c,v 1.10 2001/06/02 18:09:27 chs Exp $ */ +/* $OpenBSD: uvm_user.c,v 1.9 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_user.c,v 1.8 2000/06/27 17:29:37 mrg Exp $ */ /* * @@ -50,15 +50,19 @@ * uvm_deallocate: deallocate memory (unmap) */ -void +int uvm_deallocate(map, start, size) - struct vm_map *map; + vm_map_t map; vaddr_t start; vsize_t size; { - if (size == 0) - return; + if (map == NULL) + panic("uvm_deallocate with null map"); + + if (size == (vaddr_t) 0) + return (KERN_SUCCESS); + + return(uvm_unmap(map, trunc_page(start), round_page(start+size))); - uvm_unmap(map, trunc_page(start), round_page(start + size)); } diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index d58d0cf93f4..4783597df3d 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,10 +1,10 @@ -/* $OpenBSD: uvm_vnode.c,v 1.31 2001/12/10 02:19:34 art Exp $ */ -/* $NetBSD: uvm_vnode.c,v 1.51 2001/08/17 05:53:02 chs Exp $ */ +/* $OpenBSD: uvm_vnode.c,v 1.32 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993 - * The Regents of the University of California. + * The Regents of the University of California. * Copyright (c) 1990 University of Utah. * * All rights reserved. @@ -24,7 +24,7 @@ * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor, - * Washington University, the University of California, Berkeley and + * Washington University, the University of California, Berkeley and * its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software @@ -52,7 +52,6 @@ #include <sys/param.h> #include <sys/systm.h> -#include <sys/kernel.h> #include <sys/proc.h> #include <sys/malloc.h> #include <sys/vnode.h> @@ -60,47 +59,62 @@ #include <sys/ioctl.h> #include <sys/fcntl.h> #include <sys/conf.h> -#include <sys/pool.h> -#include <sys/mount.h> #include <miscfs/specfs/specdev.h> #include <uvm/uvm.h> +#include <uvm/uvm_vnode.h> + +/* + * private global data structure + * + * we keep a list of writeable active vnode-backed VM objects for sync op. + * we keep a simpleq of vnodes that are currently being sync'd. + */ + +LIST_HEAD(uvn_list_struct, uvm_vnode); +static struct uvn_list_struct uvn_wlist; /* writeable uvns */ +static simple_lock_data_t uvn_wl_lock; /* locks uvn_wlist */ + +SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode); +static struct uvn_sq_struct uvn_sync_q; /* sync'ing uvns */ +lock_data_t uvn_sync_lock; /* locks sync operation */ /* * functions */ -static void uvn_cluster __P((struct uvm_object *, voff_t, voff_t *, - voff_t *)); -static void uvn_detach __P((struct uvm_object *)); -static int uvn_findpage __P((struct uvm_object *, voff_t, - struct vm_page **, int)); -boolean_t uvn_flush __P((struct uvm_object *, voff_t, voff_t, - int)); -int uvn_get __P((struct uvm_object *, voff_t, - struct vm_page **, int *, int, vm_prot_t, - int, int)); -int uvn_put __P((struct uvm_object *, struct vm_page **, - int, boolean_t)); -static void uvn_reference __P((struct uvm_object *)); -static boolean_t uvn_releasepg __P((struct vm_page *, - struct vm_page **)); +static void uvn_cluster __P((struct uvm_object *, voff_t, + voff_t *, voff_t *)); +static void uvn_detach __P((struct uvm_object *)); +static boolean_t uvn_flush __P((struct uvm_object *, voff_t, + voff_t, int)); +static int uvn_get __P((struct uvm_object *, voff_t, + vm_page_t *, int *, int, + vm_prot_t, int, int)); +static void uvn_init __P((void)); +static int uvn_io __P((struct uvm_vnode *, vm_page_t *, + int, int, int)); +static int uvn_put __P((struct uvm_object *, vm_page_t *, + int, boolean_t)); +static void uvn_reference __P((struct uvm_object *)); +static boolean_t uvn_releasepg __P((struct vm_page *, + struct vm_page **)); /* * master pager structure */ struct uvm_pagerops uvm_vnodeops = { - NULL, + uvn_init, uvn_reference, uvn_detach, - NULL, + NULL, /* no specialized fault routine required */ uvn_flush, uvn_get, uvn_put, uvn_cluster, - uvm_mk_pcluster, + uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */ uvn_releasepg, }; @@ -109,6 +123,22 @@ struct uvm_pagerops uvm_vnodeops = { */ /* + * uvn_init + * + * init pager private data structures. + */ + +static void +uvn_init() +{ + + LIST_INIT(&uvn_wlist); + simple_lock_init(&uvn_wl_lock); + /* note: uvn_sync_q init'd in uvm_vnp_sync() */ + lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0); +} + +/* * uvn_attach * * attach a vnode structure to a VM object. if the vnode is already @@ -129,26 +159,29 @@ uvn_attach(arg, accessprot) vm_prot_t accessprot; { struct vnode *vp = arg; - struct uvm_object *uobj = &vp->v_uobj; + struct uvm_vnode *uvn = &vp->v_uvm; struct vattr vattr; - int result; + int oldflags, result; struct partinfo pi; - voff_t used_vnode_size; + u_quad_t used_vnode_size; UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0); - used_vnode_size = (voff_t)0; + + used_vnode_size = (u_quad_t)0; /* XXX gcc -Wuninitialized */ /* * first get a lock on the uvn. */ - simple_lock(uobj->vmobjlock); - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; + simple_lock(&uvn->u_obj.vmobjlock); + while (uvn->u_flags & UVM_VNODE_BLOCKED) { + printf("uvn_attach: blocked at 0x%p flags 0x%x\n", + uvn, uvn->u_flags); + uvn->u_flags |= UVM_VNODE_WANTED; UVMHIST_LOG(maphist, " SLEEPING on blocked vn",0,0,0,0); - UVM_UNLOCK_AND_WAIT(vp, &uobj->vmobjlock, FALSE, + UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE, "uvn_attach", 0); - simple_lock(&uobj->vmobjlock); + simple_lock(&uvn->u_obj.vmobjlock); UVMHIST_LOG(maphist," WOKE UP",0,0,0,0); } @@ -156,21 +189,56 @@ uvn_attach(arg, accessprot) * if we're mapping a BLK device, make sure it is a disk. */ if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) { - simple_unlock(&uobj->vmobjlock); + simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */ UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0); return(NULL); } - KASSERT(vp->v_type == VREG || vp->v_type == VBLK); /* - * set up our idea of the size - * if this hasn't been done already. + * now we have lock and uvn must not be in a blocked state. + * first check to see if it is already active, in which case + * we can bump the reference count, check to see if we need to + * add it to the writeable list, and then return. */ - if (vp->v_size == VSIZENOTSET) { + if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */ + + /* regain VREF if we were persisting */ + if (uvn->u_obj.uo_refs == 0) { + VREF(vp); + UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)", + 0,0,0,0); + } + uvn->u_obj.uo_refs++; /* bump uvn ref! */ + + /* check for new writeable uvn */ + if ((accessprot & VM_PROT_WRITE) != 0 && + (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) { + simple_lock(&uvn_wl_lock); + LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist); + simple_unlock(&uvn_wl_lock); + /* we are now on wlist! */ + uvn->u_flags |= UVM_VNODE_WRITEABLE; + } + + /* unlock and return */ + simple_unlock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs, + 0, 0, 0); + return (&uvn->u_obj); + } - vp->v_flag |= VXLOCK; - simple_unlock(&uobj->vmobjlock); /* drop lock in case we sleep */ + /* + * need to call VOP_GETATTR() to get the attributes, but that could + * block (due to I/O), so we want to unlock the object before calling. + * however, we want to keep anyone else from playing with the object + * while it is unlocked. to do this we set UVM_VNODE_ALOCK which + * prevents anyone from attaching to the vnode until we are done with + * it. + */ + uvn->u_flags = UVM_VNODE_ALOCK; + simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */ /* XXX: curproc? */ + if (vp->v_type == VBLK) { /* * We could implement this as a specfs getattr call, but: @@ -184,8 +252,8 @@ uvn_attach(arg, accessprot) DIOCGPART, (caddr_t)&pi, FREAD, curproc); if (result == 0) { /* XXX should remember blocksize */ - used_vnode_size = (voff_t)pi.disklab->d_secsize * - (voff_t)pi.part->p_size; + used_vnode_size = (u_quad_t)pi.disklab->d_secsize * + (u_quad_t)pi.part->p_size; } } else { result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc); @@ -194,26 +262,58 @@ uvn_attach(arg, accessprot) } /* relock object */ - simple_lock(&uobj->vmobjlock); - - if (vp->v_flag & VXWANT) - wakeup(vp); - vp->v_flag &= ~(VXLOCK|VXWANT); + simple_lock(&uvn->u_obj.vmobjlock); if (result != 0) { - simple_unlock(&uobj->vmobjlock); /* drop lock */ + if (uvn->u_flags & UVM_VNODE_WANTED) + wakeup(uvn); + uvn->u_flags = 0; + simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */ UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0); return(NULL); } - vp->v_size = used_vnode_size; + /* + * make sure that the newsize fits within a vaddr_t + * XXX: need to revise addressing data types + */ +#ifdef DEBUG + if (vp->v_type == VBLK) + printf("used_vnode_size = %llu\n", (long long)used_vnode_size); +#endif + + /* + * now set up the uvn. + */ + uvn->u_obj.pgops = &uvm_vnodeops; + TAILQ_INIT(&uvn->u_obj.memq); + uvn->u_obj.uo_npages = 0; + uvn->u_obj.uo_refs = 1; /* just us... */ + oldflags = uvn->u_flags; + uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST; + uvn->u_nio = 0; + uvn->u_size = used_vnode_size; + + /* if write access, we need to add it to the wlist */ + if (accessprot & VM_PROT_WRITE) { + simple_lock(&uvn_wl_lock); + LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist); + simple_unlock(&uvn_wl_lock); + uvn->u_flags |= UVM_VNODE_WRITEABLE; /* we are on wlist! */ } - /* unlock and return */ - simple_unlock(&uobj->vmobjlock); - UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs, - 0, 0, 0); - return (uobj); + /* + * add a reference to the vnode. this reference will stay as long + * as there is a valid mapping of the vnode. dropped when the + * reference count goes to zero [and we either free or persist]. + */ + VREF(vp); + simple_unlock(&uvn->u_obj.vmobjlock); + if (oldflags & UVM_VNODE_WANTED) + wakeup(uvn); + + UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0); + return(&uvn->u_obj); } @@ -221,10 +321,10 @@ uvn_attach(arg, accessprot) * uvn_reference * * duplicate a reference to a VM object. Note that the reference - * count must already be at least one (the passed in reference) so + * count must already be at least one (the passed in reference) so * there is no chance of the uvn being killed or locked out here. * - * => caller must call with object unlocked. + * => caller must call with object unlocked. * => caller must be using the same accessprot as was used at attach time */ @@ -233,7 +333,23 @@ static void uvn_reference(uobj) struct uvm_object *uobj; { - VREF((struct vnode *)uobj); +#ifdef DEBUG + struct uvm_vnode *uvn = (struct uvm_vnode *) uobj; +#endif + UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist); + + simple_lock(&uobj->vmobjlock); +#ifdef DEBUG + if ((uvn->u_flags & UVM_VNODE_VALID) == 0) { + printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags, + uobj->uo_refs); + panic("uvn_reference: invalid state"); + } +#endif + uobj->uo_refs++; + UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", + uobj, uobj->uo_refs,0,0); + simple_unlock(&uobj->vmobjlock); } /* @@ -242,12 +358,298 @@ uvn_reference(uobj) * remove a reference to a VM object. * * => caller must call with object unlocked and map locked. + * => this starts the detach process, but doesn't have to finish it + * (async i/o could still be pending). */ static void uvn_detach(uobj) struct uvm_object *uobj; { - vrele((struct vnode *)uobj); + struct uvm_vnode *uvn; + struct vnode *vp; + int oldflags; + UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist); + + simple_lock(&uobj->vmobjlock); + + UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0); + uobj->uo_refs--; /* drop ref! */ + if (uobj->uo_refs) { /* still more refs */ + simple_unlock(&uobj->vmobjlock); + UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0); + return; + } + + /* + * get other pointers ... + */ + + uvn = (struct uvm_vnode *) uobj; + vp = (struct vnode *) uobj; + + /* + * clear VTEXT flag now that there are no mappings left (VTEXT is used + * to keep an active text file from being overwritten). + */ + vp->v_flag &= ~VTEXT; + + /* + * we just dropped the last reference to the uvn. see if we can + * let it "stick around". + */ + + if (uvn->u_flags & UVM_VNODE_CANPERSIST) { + /* won't block */ + uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES); + simple_unlock(&uobj->vmobjlock); + vrele(vp); /* drop vnode reference */ + UVMHIST_LOG(maphist,"<- done/vrele! (persist)", 0,0,0,0); + return; + } + + /* + * its a goner! + */ + + UVMHIST_LOG(maphist," its a goner (flushing)!", 0,0,0,0); + + uvn->u_flags |= UVM_VNODE_DYING; + + /* + * even though we may unlock in flush, no one can gain a reference + * to us until we clear the "dying" flag [because it blocks + * attaches]. we will not do that until after we've disposed of all + * the pages with uvn_flush(). note that before the flush the only + * pages that could be marked PG_BUSY are ones that are in async + * pageout by the daemon. (there can't be any pending "get"'s + * because there are no references to the object). + */ + + (void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES); + + UVMHIST_LOG(maphist," its a goner (done flush)!", 0,0,0,0); + + /* + * given the structure of this pager, the above flush request will + * create the following state: all the pages that were in the object + * have either been free'd or they are marked PG_BUSY|PG_RELEASED. + * the PG_BUSY bit was set either by us or the daemon for async I/O. + * in either case, if we have pages left we can't kill the object + * yet because i/o is pending. in this case we set the "relkill" + * flag which will cause pgo_releasepg to kill the object once all + * the I/O's are done [pgo_releasepg will be called from the aiodone + * routine or from the page daemon]. + */ + + if (uobj->uo_npages) { /* I/O pending. iodone will free */ +#ifdef DEBUG + /* + * XXXCDC: very unlikely to happen until we have async i/o + * so print a little info message in case it does. + */ + printf("uvn_detach: vn %p has pages left after flush - " + "relkill mode\n", uobj); +#endif + uvn->u_flags |= UVM_VNODE_RELKILL; + simple_unlock(&uobj->vmobjlock); + UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0, + 0, 0); + return; + } + + /* + * kill object now. note that we can't be on the sync q because + * all references are gone. + */ + if (uvn->u_flags & UVM_VNODE_WRITEABLE) { + simple_lock(&uvn_wl_lock); /* protect uvn_wlist */ + LIST_REMOVE(uvn, u_wlist); + simple_unlock(&uvn_wl_lock); + } +#ifdef DIAGNOSTIC + if (uobj->memq.tqh_first != NULL) + panic("uvn_deref: vnode VM object still has pages afer " + "syncio/free flush"); +#endif + oldflags = uvn->u_flags; + uvn->u_flags = 0; + simple_unlock(&uobj->vmobjlock); + + /* wake up any sleepers */ + if (oldflags & UVM_VNODE_WANTED) + wakeup(uvn); + + /* + * drop our reference to the vnode. + */ + vrele(vp); + UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0); + + return; +} + +/* + * uvm_vnp_terminate: external hook to clear out a vnode's VM + * + * called in two cases: + * [1] when a persisting vnode vm object (i.e. one with a zero reference + * count) needs to be freed so that a vnode can be reused. this + * happens under "getnewvnode" in vfs_subr.c. if the vnode from + * the free list is still attached (i.e. not VBAD) then vgone is + * called. as part of the vgone trace this should get called to + * free the vm object. this is the common case. + * [2] when a filesystem is being unmounted by force (MNT_FORCE, + * "umount -f") the vgone() function is called on active vnodes + * on the mounted file systems to kill their data (the vnodes become + * "dead" ones [see src/sys/miscfs/deadfs/...]). that results in a + * call here (even if the uvn is still in use -- i.e. has a non-zero + * reference count). this case happens at "umount -f" and during a + * "reboot/halt" operation. + * + * => the caller must XLOCK and VOP_LOCK the vnode before calling us + * [protects us from getting a vnode that is already in the DYING + * state...] + * => unlike uvn_detach, this function must not return until all the + * uvn's pages are disposed of. + * => in case [2] the uvn is still alive after this call, but all I/O + * ops will fail (due to the backing vnode now being "dead"). this + * will prob. kill any process using the uvn due to pgo_get failing. + */ + +void +uvm_vnp_terminate(vp) + struct vnode *vp; +{ + struct uvm_vnode *uvn = &vp->v_uvm; + int oldflags; + UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist); + + /* + * lock object and check if it is valid + */ + simple_lock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist, " vp=0x%x, ref=%d, flag=0x%x", vp, + uvn->u_obj.uo_refs, uvn->u_flags, 0); + if ((uvn->u_flags & UVM_VNODE_VALID) == 0) { + simple_unlock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0); + return; + } + + /* + * must be a valid uvn that is not already dying (because XLOCK + * protects us from that). the uvn can't in the ALOCK state + * because it is valid, and uvn's that are in the ALOCK state haven't + * been marked valid yet. + */ + +#ifdef DEBUG + /* + * debug check: are we yanking the vnode out from under our uvn? + */ + if (uvn->u_obj.uo_refs) { + printf("uvm_vnp_terminate(%p): terminating active vnode " + "(refs=%d)\n", uvn, uvn->u_obj.uo_refs); + } +#endif + + /* + * it is possible that the uvn was detached and is in the relkill + * state [i.e. waiting for async i/o to finish so that releasepg can + * kill object]. we take over the vnode now and cancel the relkill. + * we want to know when the i/o is done so we can recycle right + * away. note that a uvn can only be in the RELKILL state if it + * has a zero reference count. + */ + + if (uvn->u_flags & UVM_VNODE_RELKILL) + uvn->u_flags &= ~UVM_VNODE_RELKILL; /* cancel RELKILL */ + + /* + * block the uvn by setting the dying flag, and then flush the + * pages. (note that flush may unlock object while doing I/O, but + * it will re-lock it before it returns control here). + * + * also, note that we tell I/O that we are already VOP_LOCK'd so + * that uvn_io doesn't attempt to VOP_LOCK again. + * + * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated + * due to a forceful unmount might not be a good idea. maybe we + * need a way to pass in this info to uvn_flush through a + * pager-defined PGO_ constant [currently there are none]. + */ + uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED; + + (void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES); + + /* + * as we just did a flush we expect all the pages to be gone or in + * the process of going. sleep to wait for the rest to go [via iosync]. + */ + + while (uvn->u_obj.uo_npages) { +#ifdef DEBUG + struct vm_page *pp; + for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ; + pp = pp->listq.tqe_next) { + if ((pp->flags & PG_BUSY) == 0) + panic("uvm_vnp_terminate: detected unbusy pg"); + } + if (uvn->u_nio == 0) + panic("uvm_vnp_terminate: no I/O to wait for?"); + printf("uvm_vnp_terminate: waiting for I/O to fin.\n"); + /* + * XXXCDC: this is unlikely to happen without async i/o so we + * put a printf in just to keep an eye on it. + */ +#endif + uvn->u_flags |= UVM_VNODE_IOSYNC; + UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE, + "uvn_term",0); + simple_lock(&uvn->u_obj.vmobjlock); + } + + /* + * done. now we free the uvn if its reference count is zero + * (true if we are zapping a persisting uvn). however, if we are + * terminating a uvn with active mappings we let it live ... future + * calls down to the vnode layer will fail. + */ + + oldflags = uvn->u_flags; + if (uvn->u_obj.uo_refs) { + + /* + * uvn must live on it is dead-vnode state until all references + * are gone. restore flags. clear CANPERSIST state. + */ + + uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED| + UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST); + + } else { + + /* + * free the uvn now. note that the VREF reference is already + * gone [it is dropped when we enter the persist state]. + */ + if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED) + panic("uvm_vnp_terminate: io sync wanted bit set"); + + if (uvn->u_flags & UVM_VNODE_WRITEABLE) { + simple_lock(&uvn_wl_lock); + LIST_REMOVE(uvn, u_wlist); + simple_unlock(&uvn_wl_lock); + } + uvn->u_flags = 0; /* uvn is history, clear all bits */ + } + + if (oldflags & UVM_VNODE_WANTED) + wakeup(uvn); /* object lock still held */ + + simple_unlock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0); + } /* @@ -260,7 +662,7 @@ uvn_detach(uobj) * => returns TRUE if page's object is still alive, FALSE if we * killed the page's object. if we return TRUE, then we * return with the object locked. - * => if (nextpgp != NULL) => we return the next page on the queue, and return + * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return * with the page queues locked [for pagedaemon] * => if (nextpgp == NULL) => we return with page queues unlocked [normal case] * => we kill the uvn if it is not referenced and we are suppose to @@ -272,33 +674,76 @@ uvn_releasepg(pg, nextpgp) struct vm_page *pg; struct vm_page **nextpgp; /* OUT */ { - KASSERT(pg->flags & PG_RELEASED); - + struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject; +#ifdef DIAGNOSTIC + if ((pg->flags & PG_RELEASED) == 0) + panic("uvn_releasepg: page not released!"); +#endif + /* * dispose of the page [caller handles PG_WANTED] */ pmap_page_protect(pg, VM_PROT_NONE); uvm_lock_pageq(); if (nextpgp) - *nextpgp = TAILQ_NEXT(pg, pageq); + *nextpgp = pg->pageq.tqe_next; /* next page for daemon */ uvm_pagefree(pg); if (!nextpgp) uvm_unlock_pageq(); + /* + * now see if we need to kill the object + */ + if (uvn->u_flags & UVM_VNODE_RELKILL) { + if (uvn->u_obj.uo_refs) + panic("uvn_releasepg: kill flag set on referenced " + "object!"); + if (uvn->u_obj.uo_npages == 0) { + if (uvn->u_flags & UVM_VNODE_WRITEABLE) { + simple_lock(&uvn_wl_lock); + LIST_REMOVE(uvn, u_wlist); + simple_unlock(&uvn_wl_lock); + } +#ifdef DIAGNOSTIC + if (uvn->u_obj.memq.tqh_first) + panic("uvn_releasepg: pages in object with npages == 0"); +#endif + if (uvn->u_flags & UVM_VNODE_WANTED) + /* still holding object lock */ + wakeup(uvn); + + uvn->u_flags = 0; /* DEAD! */ + simple_unlock(&uvn->u_obj.vmobjlock); + return (FALSE); + } + } return (TRUE); } /* + * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go + * through the buffer cache and allow I/O in any size. These VOPs use + * synchronous i/o. [vs. VOP_STRATEGY which can be async, but doesn't + * go through the buffer cache or allow I/O sizes larger than a + * block]. we will eventually want to change this. + * * issues to consider: + * uvm provides the uvm_aiodesc structure for async i/o management. * there are two tailq's in the uvm. structure... one for pending async * i/o and one for "done" async i/o. to do an async i/o one puts - * a buf on the "pending" list (protected by splbio()), starts the - * i/o and returns 0. when the i/o is done, we expect + * an aiodesc on the "pending" list (protected by splbio()), starts the + * i/o and returns VM_PAGER_PEND. when the i/o is done, we expect * some sort of "i/o done" function to be called (at splbio(), interrupt - * time). this function should remove the buf from the pending list + * time). this function should remove the aiodesc from the pending list * and place it on the "done" list and wakeup the daemon. the daemon * will run at normal spl() and will remove all items from the "done" - * list and call the iodone hook for each done request (see uvm_pager.c). + * list and call the "aiodone" hook for each done request (see uvm_pager.c). + * [in the old vm code, this was done by calling the "put" routine with + * null arguments which made the code harder to read and understand because + * you had one function ("put") doing two things.] + * + * so the current pager needs: + * int uvn_aiodone(struct uvm_aiodesc *) * * => return KERN_SUCCESS (aio finished, free it). otherwise requeue for * later collection. @@ -319,17 +764,15 @@ uvn_releasepg(pg, nextpgp) /* * uvn_flush: flush pages out of a uvm object. * - * => "stop == 0" means flush all pages at or after "start". * => object should be locked by caller. we may _unlock_ the object - * if (and only if) we need to clean a page (PGO_CLEANIT), or - * if PGO_SYNCIO is set and there are pages busy. + * if (and only if) we need to clean a page (PGO_CLEANIT). * we return with the object locked. - * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O). - * thus, a caller might want to unlock higher level resources - * (e.g. vm_map) before calling flush. - * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither - * unlock the object nor block. - * => if PGO_ALLPAGES is set, then all pages in the object are valid targets + * => if PGO_CLEANIT is set, we may block (due to I/O). thus, a caller + * might want to unlock higher level resources (e.g. vm_map) + * before calling flush. + * => if PGO_CLEANIT is not set, then we will neither unlock the object + * or block. + * => if PGO_ALLPAGE is set, then all pages in the object are valid targets * for flushing. * => NOTE: we rely on the fact that the object's memq is a TAILQ and * that new pages are inserted on the tail end of the list. thus, @@ -349,9 +792,9 @@ uvn_releasepg(pg, nextpgp) * in, then it can not be dirty (!PG_CLEAN) because no one has * had a chance to modify it yet. if the PG_BUSY page is being * paged out then it means that someone else has already started - * cleaning the page for us (how nice!). in this case, if we + * cleaning the page for us (how nice!). in this case, if we * have syncio specified, then after we make our pass through the - * object we need to wait for the other PG_BUSY pages to clear + * object we need to wait for the other PG_BUSY pages to clear * off (i.e. we need to do an iosync). also note that once a * page is PG_BUSY it must stay in its object until it is un-busyed. * @@ -359,76 +802,53 @@ uvn_releasepg(pg, nextpgp) * we can traverse the pages in an object either by going down the * linked list in "uobj->memq", or we can go over the address range * by page doing hash table lookups for each address. depending - * on how many pages are in the object it may be cheaper to do one + * on how many pages are in the object it may be cheaper to do one * or the other. we set "by_list" to true if we are using memq. * if the cost of a hash lookup was equal to the cost of the list * traversal we could compare the number of pages in the start->stop * range to the total number of pages in the object. however, it * seems that a hash table lookup is more expensive than the linked - * list traversal, so we multiply the number of pages in the + * list traversal, so we multiply the number of pages in the * start->stop range by a penalty which we define below. */ #define UVN_HASH_PENALTY 4 /* XXX: a guess */ -boolean_t +static boolean_t uvn_flush(uobj, start, stop, flags) struct uvm_object *uobj; voff_t start, stop; int flags; { - struct vnode *vp = (struct vnode *)uobj; + struct uvm_vnode *uvn = (struct uvm_vnode *) uobj; struct vm_page *pp, *ppnext, *ptmp; - struct vm_page *pps[256], **ppsp; - int s; + struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp; int npages, result, lcv; - boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean; - boolean_t async = (flags & PGO_SYNCIO) == 0; + boolean_t retval, need_iosync, by_list, needs_clean, all; voff_t curoff; u_short pp_version; UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x", - uobj, start, stop, flags); - KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)); - - if (uobj->uo_npages == 0) { - s = splbio(); - if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && - (vp->v_bioflag & VBIOONSYNCLIST)) { - vp->v_bioflag &= ~VBIOONSYNCLIST; - LIST_REMOVE(vp, v_synclist); - } - splx(s); - return TRUE; - } - -#ifdef DIAGNOSTIC - if (vp->v_size == VSIZENOTSET) { - printf("uvn_flush: size not set vp %p\n", vp); - vprint("uvn_flush VSIZENOTSET", vp); - flags |= PGO_ALLPAGES; - } -#endif + curoff = 0; /* XXX: shut up gcc */ /* * get init vals and determine how we are going to traverse object */ - if (stop == 0) { - stop = trunc_page(LLONG_MAX); - } - curoff = 0; need_iosync = FALSE; - retval = TRUE; - wasclean = TRUE; + retval = TRUE; /* return value */ if (flags & PGO_ALLPAGES) { all = TRUE; - by_list = TRUE; + by_list = TRUE; /* always go by the list */ } else { start = trunc_page(start); stop = round_page(stop); +#ifdef DEBUG + if (stop > round_page(uvn->u_size)) + printf("uvn_flush: strange, got an out of range " + "flush (fixed)\n"); +#endif all = FALSE; - by_list = (uobj->uo_npages <= + by_list = (uobj->uo_npages <= ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY); } @@ -450,7 +870,8 @@ uvn_flush(uobj, start, stop, flags) if ((flags & PGO_CLEANIT) != 0 && uobj->pgops->pgo_mk_pcluster != NULL) { if (by_list) { - TAILQ_FOREACH(pp, &uobj->memq, listq) { + for (pp = uobj->memq.tqh_first ; pp != NULL ; + pp = pp->listq.tqe_next) { if (!all && (pp->offset < start || pp->offset >= stop)) continue; @@ -474,39 +895,52 @@ uvn_flush(uobj, start, stop, flags) */ if (by_list) { - pp = TAILQ_FIRST(&uobj->memq); + pp = uobj->memq.tqh_first; } else { curoff = start; pp = uvm_pagelookup(uobj, curoff); } - ppnext = NULL; - ppsp = NULL; - uvm_lock_pageq(); + ppnext = NULL; /* XXX: shut up gcc */ + ppsp = NULL; /* XXX: shut up gcc */ + uvm_lock_pageq(); /* page queues locked */ /* locked: both page queues and uobj */ - for ( ; (by_list && pp != NULL) || - (!by_list && curoff < stop) ; pp = ppnext) { + for ( ; (by_list && pp != NULL) || + (!by_list && curoff < stop) ; pp = ppnext) { + if (by_list) { + + /* + * range check + */ + if (!all && (pp->offset < start || pp->offset >= stop)) { - ppnext = TAILQ_NEXT(pp, listq); + ppnext = pp->listq.tqe_next; continue; } + } else { + + /* + * null check + */ + curoff += PAGE_SIZE; if (pp == NULL) { if (curoff < stop) ppnext = uvm_pagelookup(uobj, curoff); continue; } + } /* * handle case where we do not need to clean page (either * because we are not clean or because page is not dirty or * is busy): - * + * * NOTE: we are allowed to deactivate a non-wired active * PG_BUSY page, but once a PG_BUSY page is on the inactive * queue it must stay put until it is !PG_BUSY (so as not to @@ -515,23 +949,24 @@ uvn_flush(uobj, start, stop, flags) if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) { needs_clean = FALSE; - if (!async) + if ((pp->flags & PG_BUSY) != 0 && + (flags & (PGO_CLEANIT|PGO_SYNCIO)) == + (PGO_CLEANIT|PGO_SYNCIO)) need_iosync = TRUE; } else { - /* * freeing: nuke all mappings so we can sync * PG_CLEAN bit with no race */ - if ((pp->flags & PG_CLEAN) != 0 && + if ((pp->flags & PG_CLEAN) != 0 && (flags & PGO_FREE) != 0 && - /* XXX ACTIVE|INACTIVE test unnecessary? */ - (pp->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) != 0) + (pp->pqflags & PQ_ACTIVE) != 0) pmap_page_protect(pp, VM_PROT_NONE); if ((pp->flags & PG_CLEAN) != 0 && pmap_is_modified(pp)) pp->flags &= ~(PG_CLEAN); - pp->flags |= PG_CLEANCHK; + pp->flags |= PG_CLEANCHK; /* update "hint" */ + needs_clean = ((pp->flags & PG_CLEAN) == 0); } @@ -539,26 +974,29 @@ uvn_flush(uobj, start, stop, flags) * if we don't need a clean... load ppnext and dispose of pp */ if (!needs_clean) { + /* load ppnext */ if (by_list) - ppnext = TAILQ_NEXT(pp, listq); + ppnext = pp->listq.tqe_next; else { if (curoff < stop) ppnext = uvm_pagelookup(uobj, curoff); } + /* now dispose of pp */ if (flags & PGO_DEACTIVATE) { if ((pp->pqflags & PQ_INACTIVE) == 0 && - (pp->flags & PG_BUSY) == 0 && pp->wire_count == 0) { - pmap_clear_reference(pp); + pmap_page_protect(pp, VM_PROT_NONE); uvm_pagedeactivate(pp); } } else if (flags & PGO_FREE) { if (pp->flags & PG_BUSY) { + /* release busy pages */ pp->flags |= PG_RELEASED; } else { pmap_page_protect(pp, VM_PROT_NONE); + /* removed page from object */ uvm_pagefree(pp); } } @@ -575,23 +1013,23 @@ uvn_flush(uobj, start, stop, flags) * note: locked: uobj and page queues. */ - wasclean = FALSE; pp->flags |= PG_BUSY; /* we 'own' page now */ UVM_PAGE_OWN(pp, "uvn_flush"); pmap_page_protect(pp, VM_PROT_READ); pp_version = pp->version; +ReTry: ppsp = pps; npages = sizeof(pps) / sizeof(struct vm_page *); /* locked: page queues, uobj */ - result = uvm_pager_put(uobj, pp, &ppsp, &npages, - flags | PGO_DOACTCLUST, start, stop); + result = uvm_pager_put(uobj, pp, &ppsp, &npages, + flags | PGO_DOACTCLUST, start, stop); /* unlocked: page queues, uobj */ /* * at this point nothing is locked. if we did an async I/O - * it is remotely possible for the async i/o to complete and - * the page "pp" be freed or what not before we get a chance + * it is remotely possible for the async i/o to complete and + * the page "pp" be freed or what not before we get a chance * to relock the object. in order to detect this, we have * saved the version number of the page in "pp_version". */ @@ -601,10 +1039,33 @@ uvn_flush(uobj, start, stop, flags) uvm_lock_pageq(); /* - * the cleaning operation is now done. finish up. note that - * on error uvm_pager_put drops the cluster for us. - * on success uvm_pager_put returns the cluster to us in - * ppsp/npages. + * VM_PAGER_AGAIN: given the structure of this pager, this + * can only happen when we are doing async I/O and can't + * map the pages into kernel memory (pager_map) due to lack + * of vm space. if this happens we drop back to sync I/O. + */ + + if (result == VM_PAGER_AGAIN) { + /* + * it is unlikely, but page could have been released + * while we had the object lock dropped. we ignore + * this now and retry the I/O. we will detect and + * handle the released page after the syncio I/O + * completes. + */ +#ifdef DIAGNOSTIC + if (flags & PGO_SYNCIO) + panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)"); +#endif + flags |= PGO_SYNCIO; + goto ReTry; + } + + /* + * the cleaning operation is now done. finish up. note that + * on error (!OK, !PEND) uvm_pager_put drops the cluster for us. + * if success (OK, PEND) then uvm_pager_put returns the cluster + * to us in ppsp/npages. */ /* @@ -612,29 +1073,34 @@ uvn_flush(uobj, start, stop, flags) * we can move on to the next page. */ - if (result == 0 && async && - (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { + if (result == VM_PAGER_PEND) { - /* - * no per-page ops: refresh ppnext and continue - */ - if (by_list) { - if (pp->version == pp_version) - ppnext = TAILQ_NEXT(pp, listq); - else - ppnext = TAILQ_FIRST(&uobj->memq); - } else { - if (curoff < stop) - ppnext = uvm_pagelookup(uobj, curoff); + if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { + /* + * no per-page ops: refresh ppnext and continue + */ + if (by_list) { + if (pp->version == pp_version) + ppnext = pp->listq.tqe_next; + else + /* reset */ + ppnext = uobj->memq.tqh_first; + } else { + if (curoff < stop) + ppnext = uvm_pagelookup(uobj, + curoff); + } + continue; } - continue; + + /* need to do anything here? */ } /* - * need to look at each page of the I/O operation. we defer - * processing "pp" until the last trip through this "for" loop + * need to look at each page of the I/O operation. we defer + * processing "pp" until the last trip through this "for" loop * so that we can load "ppnext" for the main loop after we - * play with the cluster pages [thus the "npages + 1" in the + * play with the cluster pages [thus the "npages + 1" in the * loop below]. */ @@ -654,84 +1120,77 @@ uvn_flush(uobj, start, stop, flags) /* set up next page for outer loop */ if (by_list) { if (pp->version == pp_version) - ppnext = TAILQ_NEXT(pp, listq); + ppnext = pp->listq.tqe_next; else - ppnext = TAILQ_FIRST( - &uobj->memq); + /* reset */ + ppnext = uobj->memq.tqh_first; } else { if (curoff < stop) - ppnext = uvm_pagelookup(uobj, - curoff); + ppnext = uvm_pagelookup(uobj, curoff); } } /* - * verify the page wasn't moved while obj was + * verify the page didn't get moved while obj was * unlocked */ - if (result == 0 && async && ptmp->uobject != uobj) + if (result == VM_PAGER_PEND && ptmp->uobject != uobj) continue; /* * unbusy the page if I/O is done. note that for - * async I/O it is possible that the I/O op + * pending I/O it is possible that the I/O op * finished before we relocked the object (in * which case the page is no longer busy). */ - if (result != 0 || !async) { - if (ptmp->flags & PG_WANTED) { + if (result != VM_PAGER_PEND) { + if (ptmp->flags & PG_WANTED) /* still holding object lock */ wakeup(ptmp); - } + ptmp->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(ptmp, NULL); if (ptmp->flags & PG_RELEASED) { + + /* pgo_releasepg wants this */ uvm_unlock_pageq(); - if (!uvn_releasepg(ptmp, NULL)) { - UVMHIST_LOG(maphist, - "released %p", - ptmp, 0,0,0); + if (!uvn_releasepg(ptmp, NULL)) return (TRUE); - } - uvm_lock_pageq(); - continue; + + uvm_lock_pageq(); /* relock */ + continue; /* next page */ + } else { - if ((flags & PGO_WEAK) == 0 && - !(result == EIO && - curproc == uvm.pagedaemon_proc)) { - ptmp->flags |= - (PG_CLEAN|PG_CLEANCHK); - if ((flags & PGO_FREE) == 0) { - pmap_clear_modify(ptmp); - } - } + ptmp->flags |= (PG_CLEAN|PG_CLEANCHK); + if ((flags & PGO_FREE) == 0) + pmap_clear_modify(ptmp); } } - + /* * dispose of page */ if (flags & PGO_DEACTIVATE) { if ((pp->pqflags & PQ_INACTIVE) == 0 && - (pp->flags & PG_BUSY) == 0 && pp->wire_count == 0) { - pmap_clear_reference(ptmp); + pmap_page_protect(ptmp, VM_PROT_NONE); uvm_pagedeactivate(ptmp); } + } else if (flags & PGO_FREE) { - if (result == 0 && async) { + if (result == VM_PAGER_PEND) { if ((ptmp->flags & PG_BUSY) != 0) /* signal for i/o done */ ptmp->flags |= PG_RELEASED; } else { - if (result != 0) { + if (result != VM_PAGER_OK) { printf("uvn_flush: obj=%p, " - "offset=0x%llx. error %d\n", + "offset=0x%llx. error " + "during pageout.\n", pp->uobject, - (long long)pp->offset, - result); + (long long)pp->offset); printf("uvn_flush: WARNING: " "changes to page may be " "lost!\n"); @@ -741,38 +1200,31 @@ uvn_flush(uobj, start, stop, flags) uvm_pagefree(ptmp); } } + } /* end of "lcv" for loop */ + } /* end of "pp" for loop */ + /* + * done with pagequeues: unlock + */ uvm_unlock_pageq(); - s = splbio(); - if ((flags & PGO_CLEANIT) && all && wasclean && - LIST_FIRST(&vp->v_dirtyblkhd) == NULL && - (vp->v_bioflag & VBIOONSYNCLIST)) { - vp->v_bioflag &= ~VBIOONSYNCLIST; - LIST_REMOVE(vp, v_synclist); - } - splx(s); - if (need_iosync) { - UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0); - - /* - * XXX this doesn't use the new two-flag scheme, - * but to use that, all i/o initiators will have to change. - */ - s = splbio(); - while (vp->v_numoutput != 0) { - UVMHIST_LOG(ubchist, "waiting for vp %p num %d", - vp, vp->v_numoutput,0,0); + /* + * now wait for all I/O if required. + */ + if (need_iosync) { - vp->v_bioflag |= VBIOWAIT; - UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, - &uobj->vmobjlock, - FALSE, "uvn_flush",0); - simple_lock(&uobj->vmobjlock); + UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0); + while (uvn->u_nio != 0) { + uvn->u_flags |= UVM_VNODE_IOSYNC; + UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, + FALSE, "uvn_flush",0); + simple_lock(&uvn->u_obj.vmobjlock); } - splx(s); + if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED) + wakeup(&uvn->u_flags); + uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED); } /* return, with object locked! */ @@ -796,31 +1248,46 @@ uvn_cluster(uobj, offset, loffset, hoffset) voff_t offset; voff_t *loffset, *hoffset; /* OUT */ { - struct vnode *vp = (struct vnode *)uobj; - + struct uvm_vnode *uvn = (struct uvm_vnode *) uobj; *loffset = offset; - *hoffset = MIN(offset + MAXBSIZE, round_page(vp->v_size)); + + if (*loffset >= uvn->u_size) + panic("uvn_cluster: offset out of range"); + + /* + * XXX: old pager claims we could use VOP_BMAP to get maxcontig value. + */ + *hoffset = *loffset + MAXBSIZE; + if (*hoffset > round_page(uvn->u_size)) /* past end? */ + *hoffset = round_page(uvn->u_size); + + return; } /* * uvn_put: flush page data to backing store. * + * => prefer map unlocked (not required) * => object must be locked! we will _unlock_ it before starting I/O. * => flags: PGO_SYNCIO -- use sync. I/O * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) + * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync. + * [thus we never do async i/o! see iodone comment] */ -int +static int uvn_put(uobj, pps, npages, flags) struct uvm_object *uobj; struct vm_page **pps; int npages, flags; { - struct vnode *vp = (struct vnode *)uobj; - int error; + int retval; + + /* note: object locked */ + retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE); + /* note: object unlocked */ - error = VOP_PUTPAGES(vp, pps, npages, flags, NULL); - return error; + return(retval); } @@ -834,140 +1301,558 @@ uvn_put(uobj, pps, npages, flags) * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] * => NOTE: caller must check for released pages!! */ - -int + +static int uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) struct uvm_object *uobj; voff_t offset; struct vm_page **pps; /* IN/OUT */ int *npagesp; /* IN (OUT if PGO_LOCKED) */ - int centeridx; + int centeridx, advice, flags; vm_prot_t access_type; - int advice, flags; { - struct vnode *vp = (struct vnode *)uobj; - struct proc *p = curproc; - int error; - UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist); - - UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0); - error = vn_lock(vp, LK_EXCLUSIVE|LK_RECURSEFAIL|LK_NOWAIT, p); - if (error) { - if (error == EBUSY) - return EAGAIN; - return error; + voff_t current_offset; + struct vm_page *ptmp; + int lcv, result, gotpages; + boolean_t done; + UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0); + + /* + * step 1: handled the case where fault data structures are locked. + */ + + if (flags & PGO_LOCKED) { + + /* + * gotpages is the current number of pages we've gotten (which + * we pass back up to caller via *npagesp. + */ + + gotpages = 0; + + /* + * step 1a: get pages that are already resident. only do this + * if the data structures are locked (i.e. the first time + * through). + */ + + done = TRUE; /* be optimistic */ + + for (lcv = 0, current_offset = offset ; lcv < *npagesp ; + lcv++, current_offset += PAGE_SIZE) { + + /* do we care about this page? if not, skip it */ + if (pps[lcv] == PGO_DONTCARE) + continue; + + /* lookup page */ + ptmp = uvm_pagelookup(uobj, current_offset); + + /* to be useful must get a non-busy, non-released pg */ + if (ptmp == NULL || + (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) { + if (lcv == centeridx || (flags & PGO_ALLPAGES) + != 0) + done = FALSE; /* need to do a wait or I/O! */ + continue; + } + + /* + * useful page: busy/lock it and plug it in our + * result array + */ + ptmp->flags |= PG_BUSY; /* loan up to caller */ + UVM_PAGE_OWN(ptmp, "uvn_get1"); + pps[lcv] = ptmp; + gotpages++; + + } /* "for" lcv loop */ + + /* + * XXX: given the "advice", should we consider async read-ahead? + * XXX: fault current does deactive of pages behind us. is + * this good (other callers might now). + */ + /* + * XXX: read-ahead currently handled by buffer cache (bread) + * level. + * XXX: no async i/o available. + * XXX: so we don't do anything now. + */ + + /* + * step 1c: now we've either done everything needed or we to + * unlock and do some waiting or I/O. + */ + + *npagesp = gotpages; /* let caller know */ + if (done) + return(VM_PAGER_OK); /* bingo! */ + else + /* EEK! Need to unlock and I/O */ + return(VM_PAGER_UNLOCK); } - error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, - access_type, advice, flags); - VOP_UNLOCK(vp, LK_RELEASE, p); - return error; -} + /* + * step 2: get non-resident or busy pages. + * object is locked. data structures are unlocked. + * + * XXX: because we can't do async I/O at this level we get things + * page at a time (otherwise we'd chunk). the VOP_READ() will do + * async-read-ahead for us at a lower level. + */ + + for (lcv = 0, current_offset = offset ; + lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) { + + /* skip over pages we've already gotten or don't want */ + /* skip over pages we don't _have_ to get */ + if (pps[lcv] != NULL || (lcv != centeridx && + (flags & PGO_ALLPAGES) == 0)) + continue; + + /* + * we have yet to locate the current page (pps[lcv]). we first + * look for a page that is already at the current offset. if + * we fine a page, we check to see if it is busy or released. + * if that is the case, then we sleep on the page until it is + * no longer busy or released and repeat the lookup. if the + * page we found is neither busy nor released, then we busy it + * (so we own it) and plug it into pps[lcv]. this breaks the + * following while loop and indicates we are ready to move on + * to the next page in the "lcv" loop above. + * + * if we exit the while loop with pps[lcv] still set to NULL, + * then it means that we allocated a new busy/fake/clean page + * ptmp in the object and we need to do I/O to fill in the data. + */ + + while (pps[lcv] == NULL) { /* top of "pps" while loop */ + + /* look for a current page */ + ptmp = uvm_pagelookup(uobj, current_offset); + + /* nope? allocate one now (if we can) */ + if (ptmp == NULL) { + + ptmp = uvm_pagealloc(uobj, current_offset, + NULL, 0); + + /* out of RAM? */ + if (ptmp == NULL) { + simple_unlock(&uobj->vmobjlock); + uvm_wait("uvn_getpage"); + simple_lock(&uobj->vmobjlock); + + /* goto top of pps while loop */ + continue; + } + + /* + * got new page ready for I/O. break pps + * while loop. pps[lcv] is still NULL. + */ + break; + } + + /* page is there, see if we need to wait on it */ + if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) { + ptmp->flags |= PG_WANTED; + UVM_UNLOCK_AND_WAIT(ptmp, + &uobj->vmobjlock, FALSE, "uvn_get",0); + simple_lock(&uobj->vmobjlock); + continue; /* goto top of pps while loop */ + } + + /* + * if we get here then the page has become resident + * and unbusy between steps 1 and 2. we busy it + * now (so we own it) and set pps[lcv] (so that we + * exit the while loop). + */ + ptmp->flags |= PG_BUSY; + UVM_PAGE_OWN(ptmp, "uvn_get2"); + pps[lcv] = ptmp; + } + + /* + * if we own the a valid page at the correct offset, pps[lcv] + * will point to it. nothing more to do except go to the + * next page. + */ + + if (pps[lcv]) + continue; /* next lcv */ + + /* + * we have a "fake/busy/clean" page that we just allocated. do + * I/O to fill it with valid data. note that object must be + * locked going into uvn_io, but will be unlocked afterwards. + */ + + result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1, + PGO_SYNCIO, UIO_READ); + + /* + * I/O done. object is unlocked (by uvn_io). because we used + * syncio the result can not be PEND or AGAIN. we must relock + * and check for errors. + */ + + /* lock object. check for errors. */ + simple_lock(&uobj->vmobjlock); + if (result != VM_PAGER_OK) { + if (ptmp->flags & PG_WANTED) + /* object lock still held */ + wakeup(ptmp); + + ptmp->flags &= ~(PG_WANTED|PG_BUSY); + UVM_PAGE_OWN(ptmp, NULL); + uvm_lock_pageq(); + uvm_pagefree(ptmp); + uvm_unlock_pageq(); + simple_unlock(&uobj->vmobjlock); + return(result); + } + + /* + * we got the page! clear the fake flag (indicates valid + * data now in page) and plug into our result array. note + * that page is still busy. + * + * it is the callers job to: + * => check if the page is released + * => unbusy the page + * => activate the page + */ + + ptmp->flags &= ~PG_FAKE; /* data is valid ... */ + pmap_clear_modify(ptmp); /* ... and clean */ + pps[lcv] = ptmp; + + } /* lcv loop */ + + /* + * finally, unlock object and return. + */ + + simple_unlock(&uobj->vmobjlock); + return (VM_PAGER_OK); +} /* - * uvn_findpages: - * return the page for the uobj and offset requested, allocating if needed. - * => uobj must be locked. - * => returned page will be BUSY. + * uvn_io: do I/O to a vnode + * + * => prefer map unlocked (not required) + * => object must be locked! we will _unlock_ it before starting I/O. + * => flags: PGO_SYNCIO -- use sync. I/O + * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync. + * [thus we never do async i/o! see iodone comment] */ -void -uvn_findpages(uobj, offset, npagesp, pps, flags) - struct uvm_object *uobj; - voff_t offset; - int *npagesp; - struct vm_page **pps; - int flags; +static int +uvn_io(uvn, pps, npages, flags, rw) + struct uvm_vnode *uvn; + vm_page_t *pps; + int npages, flags, rw; { - int i, rv, npages; + struct vnode *vn; + struct uio uio; + struct iovec iov; + vaddr_t kva; + off_t file_offset; + int waitf, result, mapinflags; + size_t got, wanted; + UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist); + + UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0); + + /* + * init values + */ + + waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT; + vn = (struct vnode *) uvn; + file_offset = pps[0]->offset; + + /* + * check for sync'ing I/O. + */ + + while (uvn->u_flags & UVM_VNODE_IOSYNC) { + if (waitf == M_NOWAIT) { + simple_unlock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0); + return(VM_PAGER_AGAIN); + } + uvn->u_flags |= UVM_VNODE_IOSYNCWANTED; + UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock, + FALSE, "uvn_iosync",0); + simple_lock(&uvn->u_obj.vmobjlock); + } + + /* + * check size + */ + + if (file_offset >= uvn->u_size) { + simple_unlock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0); + return(VM_PAGER_BAD); + } + + /* + * first try and map the pages in (without waiting) + */ + + mapinflags = (rw == UIO_READ) ? + UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE; + + kva = uvm_pagermapin(pps, npages, mapinflags); + if (kva == 0 && waitf == M_NOWAIT) { + simple_unlock(&uvn->u_obj.vmobjlock); + UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0); + return(VM_PAGER_AGAIN); + } + + /* + * ok, now bump u_nio up. at this point we are done with uvn + * and can unlock it. if we still don't have a kva, try again + * (this time with sleep ok). + */ + + uvn->u_nio++; /* we have an I/O in progress! */ + simple_unlock(&uvn->u_obj.vmobjlock); + /* NOTE: object now unlocked */ + if (kva == 0) + kva = uvm_pagermapin(pps, npages, + mapinflags | UVMPAGER_MAPIN_WAITOK); + + /* + * ok, mapped in. our pages are PG_BUSY so they are not going to + * get touched (so we can look at "offset" without having to lock + * the object). set up for I/O. + */ + + /* + * fill out uio/iov + */ + + iov.iov_base = (caddr_t) kva; + wanted = npages << PAGE_SHIFT; + if (file_offset + wanted > uvn->u_size) + wanted = uvn->u_size - file_offset; /* XXX: needed? */ + iov.iov_len = wanted; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = file_offset; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = rw; + uio.uio_resid = wanted; + uio.uio_procp = curproc; + + /* + * do the I/O! (XXX: curproc?) + */ + + UVMHIST_LOG(maphist, "calling VOP",0,0,0,0); + + /* + * This process may already have this vnode locked, if we faulted in + * copyin() or copyout() on a region backed by this vnode + * while doing I/O to the vnode. If this is the case, don't + * panic.. instead, return the error to the user. + * + * XXX this is a stopgap to prevent a panic. + * Ideally, this kind of operation *should* work. + */ + result = 0; + if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + result = vn_lock(vn, LK_EXCLUSIVE | LK_RETRY | LK_RECURSEFAIL, curproc); + + if (result == 0) { + /* NOTE: vnode now locked! */ + + if (rw == UIO_READ) + result = VOP_READ(vn, &uio, 0, curproc->p_ucred); + else + result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred); - rv = 0; - npages = *npagesp; - for (i = 0; i < npages; i++, offset += PAGE_SIZE) { - rv += uvn_findpage(uobj, offset, &pps[i], flags); + if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + VOP_UNLOCK(vn, 0, curproc); + } + + /* NOTE: vnode now unlocked (unless vnislocked) */ + + UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0); + + /* + * result == unix style errno (0 == OK!) + * + * zero out rest of buffer (if needed) + */ + + if (result == 0) { + got = wanted - uio.uio_resid; + + if (wanted && got == 0) { + result = EIO; /* XXX: error? */ + } else if (got < PAGE_SIZE * npages && rw == UIO_READ) { + memset((void *) (kva + got), 0, + (npages << PAGE_SHIFT) - got); + } } - *npagesp = rv; + + /* + * now remove pager mapping + */ + uvm_pagermapout(kva, npages); + + /* + * now clean up the object (i.e. drop I/O count) + */ + + simple_lock(&uvn->u_obj.vmobjlock); + /* NOTE: object now locked! */ + + uvn->u_nio--; /* I/O DONE! */ + if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) { + wakeup(&uvn->u_nio); + } + simple_unlock(&uvn->u_obj.vmobjlock); + /* NOTE: object now unlocked! */ + + /* + * done! + */ + + UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0); + if (result == 0) + return(VM_PAGER_OK); + else + return(VM_PAGER_ERROR); } -static int -uvn_findpage(uobj, offset, pgp, flags) - struct uvm_object *uobj; - voff_t offset; - struct vm_page **pgp; - int flags; +/* + * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference + * is gone we will kill the object (flushing dirty pages back to the vnode + * if needed). + * + * => returns TRUE if there was no uvm_object attached or if there was + * one and we killed it [i.e. if there is no active uvn] + * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if + * needed] + * + * => XXX: given that we now kill uvn's when a vnode is recycled (without + * having to hold a reference on the vnode) and given a working + * uvm_vnp_sync(), how does that effect the need for this function? + * [XXXCDC: seems like it can die?] + * + * => XXX: this function should DIE once we merge the VM and buffer + * cache. + * + * research shows that this is called in the following places: + * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode + * changes sizes + * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we + * are written to + * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit + * is off + * ffs_realloccg: when we can't extend the current block and have + * to allocate a new one we call this [XXX: why?] + * nfsrv_rename, rename_files: called when the target filename is there + * and we want to remove it + * nfsrv_remove, sys_unlink: called on file we are removing + * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache + * then return "text busy" + * nfs_open: seems to uncache any file opened with nfs + * vn_writechk: if VTEXT vnode and can't uncache return "text busy" + */ + +boolean_t +uvm_vnp_uncache(vp) + struct vnode *vp; { - struct vm_page *pg; - int s; - UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist); - UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0); + struct uvm_vnode *uvn = &vp->v_uvm; - if (*pgp != NULL) { - UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0); - return 0; + /* + * lock uvn part of the vnode and check to see if we need to do anything + */ + + simple_lock(&uvn->u_obj.vmobjlock); + if ((uvn->u_flags & UVM_VNODE_VALID) == 0 || + (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) { + simple_unlock(&uvn->u_obj.vmobjlock); + return(TRUE); } - for (;;) { - /* look for an existing page */ - pg = uvm_pagelookup(uobj, offset); - - /* nope? allocate one now */ - if (pg == NULL) { - if (flags & UFP_NOALLOC) { - UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0); - return 0; - } - pg = uvm_pagealloc(uobj, offset, NULL, 0); - if (pg == NULL) { - if (flags & UFP_NOWAIT) { - UVMHIST_LOG(ubchist, "nowait",0,0,0,0); - return 0; - } - simple_unlock(&uobj->vmobjlock); - uvm_wait("uvn_fp1"); - simple_lock(&uobj->vmobjlock); - continue; - } - if (UVM_OBJ_IS_VTEXT(uobj)) { - uvmexp.vtextpages++; - } else { - uvmexp.vnodepages++; - } - s = splbio(); - vhold((struct vnode *)uobj); - splx(s); - UVMHIST_LOG(ubchist, "alloced",0,0,0,0); - break; - } else if (flags & UFP_NOCACHE) { - UVMHIST_LOG(ubchist, "nocache",0,0,0,0); - return 0; - } - /* page is there, see if we need to wait on it */ - if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) { - if (flags & UFP_NOWAIT) { - UVMHIST_LOG(ubchist, "nowait",0,0,0,0); - return 0; - } - pg->flags |= PG_WANTED; - UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, - "uvn_fp2", 0); - simple_lock(&uobj->vmobjlock); - continue; - } + /* + * we have a valid, non-blocked uvn. clear persist flag. + * if uvn is currently active we can return now. + */ - /* skip PG_RDONLY pages if requested */ - if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { - UVMHIST_LOG(ubchist, "nordonly",0,0,0,0); - return 0; - } + uvn->u_flags &= ~UVM_VNODE_CANPERSIST; + if (uvn->u_obj.uo_refs) { + simple_unlock(&uvn->u_obj.vmobjlock); + return(FALSE); + } - /* mark the page BUSY and we're done. */ - pg->flags |= PG_BUSY; - UVM_PAGE_OWN(pg, "uvn_findpage"); - UVMHIST_LOG(ubchist, "found",0,0,0,0); - break; + /* + * uvn is currently persisting! we have to gain a reference to + * it so that we can call uvn_detach to kill the uvn. + */ + + VREF(vp); /* seems ok, even with VOP_LOCK */ + uvn->u_obj.uo_refs++; /* value is now 1 */ + simple_unlock(&uvn->u_obj.vmobjlock); + + +#ifdef DEBUG + /* + * carry over sanity check from old vnode pager: the vnode should + * be VOP_LOCK'd, and we confirm it here. + */ + if (!VOP_ISLOCKED(vp)) { + boolean_t is_ok_anyway = FALSE; +#if defined(NFSCLIENT) + extern int (**nfsv2_vnodeop_p) __P((void *)); + extern int (**spec_nfsv2nodeop_p) __P((void *)); + extern int (**fifo_nfsv2nodeop_p) __P((void *)); + + /* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */ + if (vp->v_op == nfsv2_vnodeop_p || + vp->v_op == spec_nfsv2nodeop_p) { + is_ok_anyway = TRUE; + } + if (vp->v_op == fifo_nfsv2nodeop_p) { + is_ok_anyway = TRUE; + } +#endif /* defined(NFSSERVER) || defined(NFSCLIENT) */ + if (!is_ok_anyway) + panic("uvm_vnp_uncache: vnode not locked!"); } - *pgp = pg; - return 1; +#endif /* DEBUG */ + + /* + * now drop our reference to the vnode. if we have the sole + * reference to the vnode then this will cause it to die [as we + * just cleared the persist flag]. we have to unlock the vnode + * while we are doing this as it may trigger I/O. + * + * XXX: it might be possible for uvn to get reclaimed while we are + * unlocked causing us to return TRUE when we should not. we ignore + * this as a false-positive return value doesn't hurt us. + */ + VOP_UNLOCK(vp, 0, curproc); + uvn_detach(&uvn->u_obj); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc); + + /* + * and return... + */ + + return(TRUE); } /* @@ -976,7 +1861,7 @@ uvn_findpage(uobj, offset, pgp, flags) * grow => just update size value * shrink => toss un-needed pages * - * => we assume that the caller has a reference of some sort to the + * => we assume that the caller has a reference of some sort to the * vnode in question so that it will not be yanked out from under * us. * @@ -993,50 +1878,151 @@ uvm_vnp_setsize(vp, newsize) struct vnode *vp; voff_t newsize; { - struct uvm_object *uobj = &vp->v_uobj; - voff_t pgend = round_page(newsize); - UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist); - - simple_lock(&uobj->vmobjlock); - - UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", vp->v_size, newsize, 0,0); + struct uvm_vnode *uvn = &vp->v_uvm; /* - * now check if the size has changed: if we shrink we had better - * toss some pages... + * lock uvn and check for valid object, and if valid: do it! */ + simple_lock(&uvn->u_obj.vmobjlock); + if (uvn->u_flags & UVM_VNODE_VALID) { + + /* + * now check if the size has changed: if we shrink we had better + * toss some pages... + */ - if (vp->v_size > pgend && vp->v_size != VSIZENOTSET) { - (void) uvn_flush(uobj, pgend, 0, PGO_FREE); + if (uvn->u_size > newsize) { + (void)uvn_flush(&uvn->u_obj, newsize, + uvn->u_size, PGO_FREE); + } + uvn->u_size = newsize; } - vp->v_size = newsize; - simple_unlock(&uobj->vmobjlock); + simple_unlock(&uvn->u_obj.vmobjlock); + + /* + * done + */ + return; } /* - * uvm_vnp_zerorange: set a range of bytes in a file to zero. + * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes. + * + * => called from sys_sync with no VM structures locked + * => only one process can do a sync at a time (because the uvn + * structure only has one queue for sync'ing). we ensure this + * by holding the uvn_sync_lock while the sync is in progress. + * other processes attempting a sync will sleep on this lock + * until we are done. */ void -uvm_vnp_zerorange(vp, off, len) - struct vnode *vp; - off_t off; - size_t len; +uvm_vnp_sync(mp) + struct mount *mp; { - void *win; + struct uvm_vnode *uvn; + struct vnode *vp; + boolean_t got_lock; + + /* + * step 1: ensure we are only ones using the uvn_sync_q by locking + * our lock... + */ + lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, NULL, curproc); - /* - * XXXUBC invent kzero() and use it - */ + /* + * step 2: build up a simpleq of uvns of interest based on the + * write list. we gain a reference to uvns of interest. must + * be careful about locking uvn's since we will be holding uvn_wl_lock + * in the body of the loop. + */ + SIMPLEQ_INIT(&uvn_sync_q); + simple_lock(&uvn_wl_lock); + for (uvn = uvn_wlist.lh_first ; uvn != NULL ; + uvn = uvn->u_wlist.le_next) { + + vp = (struct vnode *) uvn; + if (mp && vp->v_mount != mp) + continue; + + /* attempt to gain reference */ + while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) == + FALSE && + (uvn->u_flags & UVM_VNODE_BLOCKED) == 0) + /* spin */ ; + + /* + * we will exit the loop if either if the following are true: + * - we got the lock [always true if NCPU == 1] + * - we failed to get the lock but noticed the vnode was + * "blocked" -- in this case the vnode must be a dying + * vnode, and since dying vnodes are in the process of + * being flushed out, we can safely skip this one + * + * we want to skip over the vnode if we did not get the lock, + * or if the vnode is already dying (due to the above logic). + * + * note that uvn must already be valid because we found it on + * the wlist (this also means it can't be ALOCK'd). + */ + if (!got_lock || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) { + if (got_lock) + simple_unlock(&uvn->u_obj.vmobjlock); + continue; /* skip it */ + } + + /* + * gain reference. watch out for persisting uvns (need to + * regain vnode REF). + */ + if (uvn->u_obj.uo_refs == 0) + VREF(vp); + uvn->u_obj.uo_refs++; + simple_unlock(&uvn->u_obj.vmobjlock); + + /* + * got it! + */ + SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq); + } + simple_unlock(&uvn_wl_lock); - while (len) { - vsize_t bytelen = len; + /* + * step 3: we now have a list of uvn's that may need cleaning. + * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock + * (so we can now safely lock uvn's again). + */ - win = ubc_alloc(&vp->v_uobj, off, &bytelen, UBC_WRITE); - memset(win, 0, bytelen); - ubc_release(win, 0); + for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) { + simple_lock(&uvn->u_obj.vmobjlock); +#ifdef DEBUG + if (uvn->u_flags & UVM_VNODE_DYING) { + printf("uvm_vnp_sync: dying vnode on sync list\n"); + } +#endif + uvn_flush(&uvn->u_obj, 0, 0, + PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST); - off += bytelen; - len -= bytelen; - } + /* + * if we have the only reference and we just cleaned the uvn, + * then we can pull it out of the UVM_VNODE_WRITEABLE state + * thus allowing us to avoid thinking about flushing it again + * on later sync ops. + */ + if (uvn->u_obj.uo_refs == 1 && + (uvn->u_flags & UVM_VNODE_WRITEABLE)) { + LIST_REMOVE(uvn, u_wlist); + uvn->u_flags &= ~UVM_VNODE_WRITEABLE; + } + + simple_unlock(&uvn->u_obj.vmobjlock); + + /* now drop our reference to the uvn */ + uvn_detach(&uvn->u_obj); + } + + /* + * done! release sync lock + */ + lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc); } diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h new file mode 100644 index 00000000000..64636bc15a3 --- /dev/null +++ b/sys/uvm/uvm_vnode.h @@ -0,0 +1,110 @@ +/* $OpenBSD: uvm_vnode.h,v 1.11 2001/12/19 08:58:07 art Exp $ */ +/* $NetBSD: uvm_vnode.h,v 1.9 2000/03/26 20:54:48 kleink Exp $ */ + +/* + * + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: Id: uvm_vnode.h,v 1.1.2.4 1997/10/03 21:18:24 chuck Exp + */ + +#ifndef _UVM_UVM_VNODE_H_ +#define _UVM_UVM_VNODE_H_ + +/* + * uvm_vnode.h + * + * vnode handle into the VM system. + */ + +/* + * the uvm_vnode structure. put at the top of the vnode data structure. + * this allows: + * (struct vnode *) == (struct uvm_vnode *) == (struct uvm_object *) + */ + +struct uvm_vnode { + struct uvm_object u_obj; /* the actual VM object */ + int u_flags; /* flags */ + int u_nio; /* number of running I/O requests */ + voff_t u_size; /* size of object */ + + /* the following entry is locked by uvn_wl_lock */ + LIST_ENTRY(uvm_vnode) u_wlist; /* list of writeable vnode objects */ + + /* the following entry is locked by uvn_sync_lock */ + SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */ +}; + +/* + * u_flags values + */ +#define UVM_VNODE_VALID 0x001 /* we are attached to the vnode */ +#define UVM_VNODE_CANPERSIST 0x002 /* we can persist after ref == 0 */ +#define UVM_VNODE_ALOCK 0x004 /* uvn_attach is locked out */ +#define UVM_VNODE_DYING 0x008 /* final detach/terminate in + progress */ +#define UVM_VNODE_RELKILL 0x010 /* uvn should be killed by releasepg + when final i/o is done */ +#define UVM_VNODE_WANTED 0x020 /* someone is waiting for alock, + dying, or relkill to clear */ +#define UVM_VNODE_VNISLOCKED 0x040 /* underlying vnode struct is locked + (valid when DYING is true) */ +#define UVM_VNODE_IOSYNC 0x080 /* I/O sync in progress ... setter + sleeps on &uvn->u_nio */ +#define UVM_VNODE_IOSYNCWANTED 0x100 /* a process is waiting for the + i/o sync to clear so it can do + i/o */ +#define UVM_VNODE_WRITEABLE 0x200 /* uvn has pages that are writeable */ + +/* + * UVM_VNODE_BLOCKED: any condition that should new processes from + * touching the vnode [set WANTED and sleep to wait for it to clear] + */ +#define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL) + +#ifdef _KERNEL + +/* + * prototypes + */ + +#if 0 +/* + * moved uvn_attach to uvm_extern.h because uvm_vnode.h is needed to + * include sys/vnode.h, and files that include sys/vnode.h don't know + * what a vm_prot_t is. + */ +struct uvm_object *uvn_attach __P((void *, vm_prot_t)); +#endif + +#endif /* _KERNEL */ + +#endif /* _UVM_UVM_VNODE_H_ */ |