diff options
author | Ariane van der Steldt <ariane@cvs.openbsd.org> | 2011-06-06 17:10:24 +0000 |
---|---|---|
committer | Ariane van der Steldt <ariane@cvs.openbsd.org> | 2011-06-06 17:10:24 +0000 |
commit | 100bcabc80107e37b8efd45890258b7852cfc054 (patch) | |
tree | e8b5e2a61d1db84e157951ce84c29f939b88c036 | |
parent | d2a285457d0c89bdbfc900ff091099f892e0e4f4 (diff) |
Backout vmmap in order to repair virtual address selection algorithms
outside the tree.
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 25 | ||||
-rw-r--r-- | sys/dev/pci/drm/i915_drv.c | 7 | ||||
-rw-r--r-- | sys/kern/exec_elf.c | 24 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_malloc.c | 11 | ||||
-rw-r--r-- | sys/kern/sysv_shm.c | 5 | ||||
-rw-r--r-- | sys/uvm/uvm_extern.h | 8 | ||||
-rw-r--r-- | sys/uvm/uvm_fault.c | 13 | ||||
-rw-r--r-- | sys/uvm/uvm_io.c | 15 | ||||
-rw-r--r-- | sys/uvm/uvm_km.c | 18 | ||||
-rw-r--r-- | sys/uvm/uvm_map.c | 6727 | ||||
-rw-r--r-- | sys/uvm/uvm_map.h | 291 | ||||
-rw-r--r-- | sys/uvm/uvm_mmap.c | 82 | ||||
-rw-r--r-- | sys/uvm/uvm_unix.c | 10 |
14 files changed, 3173 insertions, 4066 deletions
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 37a85c977e8..2d08e0bd239 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.153 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: pmap.c,v 1.154 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -604,16 +604,14 @@ pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) vaddr_t va = 0; vm_map_lock(map); - RB_FOREACH_REVERSE(ent, uvm_map_addr, &map->addr) { + for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { + /* + * This entry has greater va than the entries before. + * We need to make it point to the last page, not past it. + */ if (ent->protection & VM_PROT_EXECUTE) - break; + va = trunc_page(ent->end - 1); } - /* - * This entry has greater va than the entries before. - * We need to make it point to the last page, not past it. - */ - if (ent) - va = trunc_page(ent->end - 1); vm_map_unlock(map); if (va <= pm->pm_hiexec) { @@ -1248,7 +1246,7 @@ pmap_free_pvpage(void) { int s; struct vm_map *map; - struct uvm_map_deadq dead_entries; + struct vm_map_entry *dead_entries; struct pv_page *pvp; s = splvm(); /* protect kmem_map */ @@ -1269,12 +1267,13 @@ pmap_free_pvpage(void) TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list); /* unmap the page */ - TAILQ_INIT(&dead_entries); + dead_entries = NULL; uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE, - &dead_entries, FALSE, TRUE); + &dead_entries, NULL, FALSE); vm_map_unlock(map); - uvm_unmap_detach(&dead_entries, 0); + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */ } diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c index f42d09b1047..6b579639ac1 100644 --- a/sys/dev/pci/drm/i915_drv.c +++ b/sys/dev/pci/drm/i915_drv.c @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_drv.c,v 1.110 2011/06/02 18:22:00 weerd Exp $ */ +/* $OpenBSD: i915_drv.c,v 1.111 2011/06/06 17:10:23 ariane Exp $ */ /* * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org> * @@ -1422,9 +1422,10 @@ i915_gem_gtt_map_ioctl(struct drm_device *dev, void *data, * We give our reference from object_lookup to the mmap, so only * must free it in the case that the map fails. */ - ret = uvm_map(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj, + addr = uvm_map_hint(curproc, VM_PROT_READ | VM_PROT_WRITE); + ret = uvm_map_p(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj, offset, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, - UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); + UVM_INH_SHARE, UVM_ADV_RANDOM, 0), curproc); done: if (ret == 0) diff --git a/sys/kern/exec_elf.c b/sys/kern/exec_elf.c index d4bdfd1ba7e..542d9295839 100644 --- a/sys/kern/exec_elf.c +++ b/sys/kern/exec_elf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: exec_elf.c,v 1.82 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: exec_elf.c,v 1.83 2011/06/06 17:10:23 ariane Exp $ */ /* * Copyright (c) 1996 Per Fogelstrom @@ -333,7 +333,6 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp, int nload, idx = 0; Elf_Addr pos = *last; int file_align; - int loop; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p); if ((error = namei(&nd)) != 0) { @@ -390,7 +389,6 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp, pos = ELF_ROUND(pos, file_align); *last = epp->ep_interp_pos = pos; - loop = 0; for (i = 0; i < nload;/**/) { vaddr_t addr; struct uvm_object *uobj; @@ -418,17 +416,17 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp, addr = round_page((vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ); - if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size, - (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) { - if (loop == 0) { - loop = 1; - i = 0; - *last = epp->ep_interp_pos = pos = 0; - continue; + vm_map_lock(&p->p_vmspace->vm_map); + if (uvm_map_findspace(&p->p_vmspace->vm_map, addr, size, + &addr, uobj, uoff, 0, UVM_FLAG_FIXED) == NULL) { + if (uvm_map_findspace(&p->p_vmspace->vm_map, addr, size, + &addr, uobj, uoff, 0, 0) == NULL) { + error = ENOMEM; /* XXX */ + vm_map_unlock(&p->p_vmspace->vm_map); + goto bad1; } - error = ENOMEM; - goto bad1; - } + } + vm_map_unlock(&p->p_vmspace->vm_map); if (addr != pos + loadmap[i].vaddr) { /* base changed. */ pos = addr - trunc_page(loadmap[i].vaddr); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 5518cf41929..92db43a6f3b 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exec.c,v 1.118 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: kern_exec.c,v 1.119 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */ /*- @@ -810,6 +810,7 @@ exec_sigcode_map(struct proc *p, struct emul *e) } /* Just a hint to uvm_mmap where to put it. */ + p->p_sigcode = uvm_map_hint(p, VM_PROT_READ|VM_PROT_EXECUTE); uao_reference(e->e_sigobject); if (uvm_map(&p->p_vmspace->vm_map, &p->p_sigcode, round_page(sz), e->e_sigobject, 0, 0, UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 2912fd97c4c..adce9cc787d 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_malloc.c,v 1.88 2011/06/06 17:05:46 deraadt Exp $ */ +/* $OpenBSD: kern_malloc.c,v 1.89 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */ /* @@ -576,13 +576,8 @@ kmeminit(void) kmeminit_nkmempages(); base = vm_map_min(kernel_map); kmem_map = uvm_km_suballoc(kernel_map, &base, &limit, - (vsize_t)(nkmempages * PAGE_SIZE), -#ifdef KVA_GUARDPAGES - VM_MAP_INTRSAFE | VM_MAP_GUARDPAGES, -#else - VM_MAP_INTRSAFE, -#endif - FALSE, &kmem_map_store); + (vsize_t)(nkmempages * PAGE_SIZE), VM_MAP_INTRSAFE, FALSE, + &kmem_map_store); kmembase = (char *)base; kmemlimit = (char *)limit; kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map, diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 228e1da7654..18535cb9320 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sysv_shm.c,v 1.52 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: sysv_shm.c,v 1.53 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */ /* @@ -264,6 +264,9 @@ sys_shmat(struct proc *p, void *v, register_t *retval) attach_va = (vaddr_t)SCARG(uap, shmaddr); else return (EINVAL); + } else { + /* This is just a hint to uvm_map() about where to put it. */ + attach_va = uvm_map_hint(p, prot); } shm_handle = shmseg->shm_internal; uao_reference(shm_handle->shm_object); diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 0a326605652..80d97ca82f8 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_extern.h,v 1.97 2011/05/30 22:25:24 oga Exp $ */ +/* $OpenBSD: uvm_extern.h,v 1.98 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */ /* @@ -185,7 +185,6 @@ typedef int vm_prot_t; #define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */ #define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */ #define UVM_FLAG_HOLE 0x400000 /* no backend */ -#define UVM_FLAG_QUERY 0x800000 /* do everything, except actual execution */ /* macros to extract info */ #define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK) @@ -632,9 +631,10 @@ void km_free(void *, size_t, const struct kmem_va_mode *, const struct kmem_pa_mode *); /* uvm_map.c */ -int uvm_map(vm_map_t, vaddr_t *, vsize_t, +#define uvm_map(_m, _a, _sz, _u, _f, _al, _fl) uvm_map_p(_m, _a, _sz, _u, _f, _al, _fl, 0) +int uvm_map_p(vm_map_t, vaddr_t *, vsize_t, struct uvm_object *, voff_t, vsize_t, - uvm_flag_t); + uvm_flag_t, struct proc *); int uvm_map_pageable(vm_map_t, vaddr_t, vaddr_t, boolean_t, int); int uvm_map_pageable_all(vm_map_t, int, vsize_t); diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index 59725637315..5c4d3b5d65e 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_fault.c,v 1.59 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.60 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */ /* @@ -1781,7 +1781,7 @@ uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end) void uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) { - vm_map_entry_t entry, next; + vm_map_entry_t entry; pmap_t pmap = vm_map_pmap(map); vaddr_t va; paddr_t pa; @@ -1814,9 +1814,9 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) */ KASSERT(va >= entry->start); while (va >= entry->end) { - next = RB_NEXT(uvm_map_addr, &map->addr, entry); - KASSERT(next != NULL && next->start <= entry->end); - entry = next; + KASSERT(entry->next != &map->header && + entry->next->start <= entry->end); + entry = entry->next; } /* @@ -1905,6 +1905,7 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock) */ while (1) { + /* * lock map */ @@ -1918,7 +1919,7 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock) * lookup */ if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, - &ufi->entry)) { + &ufi->entry)) { uvmfault_unlockmaps(ufi, write_lock); return(FALSE); } diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c index 5e42d6b85f9..876b5420b6f 100644 --- a/sys/uvm/uvm_io.c +++ b/sys/uvm/uvm_io.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_io.c,v 1.18 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: uvm_io.c,v 1.19 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_io.c,v 1.12 2000/06/27 17:29:23 mrg Exp $ */ /* @@ -64,7 +64,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) { vaddr_t baseva, endva, pageoffset, kva; vsize_t chunksz, togo, sz; - struct uvm_map_deadq dead_entries; + vm_map_entry_t dead_entries; int error, extractflags; /* @@ -93,7 +93,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) chunksz = min(round_page(togo + pageoffset), MAXBSIZE); error = 0; - extractflags = 0; + extractflags = UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG; if (flags & UVM_IO_FIXPROT) extractflags |= UVM_EXTRACT_FIXPROT; @@ -107,7 +107,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) * step 2: extract mappings from the map into kernel_map */ - error = uvm_map_extract(map, baseva, chunksz, &kva, + error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva, extractflags); if (error) { @@ -139,11 +139,12 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) */ vm_map_lock(kernel_map); - TAILQ_INIT(&dead_entries); uvm_unmap_remove(kernel_map, kva, kva+chunksz, - &dead_entries, FALSE, TRUE); + &dead_entries, NULL, FALSE); vm_map_unlock(kernel_map); - uvm_unmap_detach(&dead_entries, AMAP_REFALL); + + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, AMAP_REFALL); /* * We defer checking the error return from uiomove until diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c index 2779c7e6b4f..d9bf54bbbc4 100644 --- a/sys/uvm/uvm_km.c +++ b/sys/uvm/uvm_km.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_km.c,v 1.102 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: uvm_km.c,v 1.103 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ /* @@ -184,13 +184,7 @@ uvm_km_init(vaddr_t start, vaddr_t end) * before installing. */ - uvm_map_setup(&kernel_map_store, base, end, -#ifdef KVA_GUARDPAGES - VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES -#else - VM_MAP_PAGEABLE -#endif - ); + uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE); kernel_map_store.pmap = pmap_kernel(); if (base != start && uvm_map(&kernel_map_store, &base, start - base, NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, @@ -479,16 +473,16 @@ uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size) void uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size) { - struct uvm_map_deadq dead_entries; + struct vm_map_entry *dead_entries; vm_map_lock(map); - TAILQ_INIT(&dead_entries); uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), - &dead_entries, FALSE, TRUE); + &dead_entries, NULL, FALSE); wakeup(map); vm_map_unlock(map); - uvm_unmap_detach(&dead_entries, 0); + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); } /* diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index 2f4359c49d6..1e01f2c5a8b 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,22 +1,7 @@ -/* $OpenBSD: uvm_map.c,v 1.140 2011/06/03 16:20:29 deraadt Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.141 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ -/* - * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * @@ -86,9 +71,6 @@ * uvm_map.c: uvm map operations */ -/* #define DEBUG */ -#define VMMAP_MIN_ADDR PAGE_SIZE /* auto-allocate address lower bound */ - #include <sys/param.h> #include <sys/systm.h> #include <sys/mman.h> @@ -104,196 +86,13 @@ #endif #include <uvm/uvm.h> +#undef RB_AUGMENT +#define RB_AUGMENT(x) uvm_rb_augment(x) #ifdef DDB #include <uvm/uvm_ddb.h> #endif - -vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); -struct vm_map_entry *uvm_map_entrybyaddr(struct uvm_map_addr*, vaddr_t); -struct vm_map_entry *uvm_map_findspace_entry(struct uvm_map_free*, vsize_t); -struct vm_map_entry *uvm_map_findspace_tree(struct uvm_map_free*, vsize_t, - voff_t, vsize_t, int, vaddr_t*, struct vm_map*); -int uvm_map_isavail(struct uvm_map_addr*, - struct vm_map_entry**, struct vm_map_entry**, - vaddr_t, vsize_t); -int uvm_mapent_isjoinable(struct vm_map*, - struct vm_map_entry*, struct vm_map_entry*); -struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, - struct vm_map_entry*, struct uvm_map_deadq*); -struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, - struct vm_map_entry*, struct uvm_map_deadq*); -struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, - struct vm_map_entry*, vaddr_t, vsize_t, int, - struct uvm_map_deadq*); -struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); -void uvm_mapent_free(struct vm_map_entry*); -void uvm_mapent_mkfree(struct vm_map*, - struct vm_map_entry*, struct vm_map_entry**, - struct uvm_map_deadq*, boolean_t); -void uvm_map_pageable_pgon(struct vm_map*, - struct vm_map_entry*, struct vm_map_entry*, - vaddr_t, vaddr_t); -int uvm_map_pageable_wire(struct vm_map*, - struct vm_map_entry*, struct vm_map_entry*, - vaddr_t, vaddr_t, int); -void uvm_map_setup_entries(struct vm_map*); -void uvm_map_vmspace_update(struct vm_map*, - struct uvm_map_deadq*, int); -void uvm_map_kmem_grow(struct vm_map*, - struct uvm_map_deadq*, vsize_t, int); -void uvm_map_freelist_update_clear(struct vm_map*, - struct uvm_map_deadq*); -void uvm_map_freelist_update_refill(struct vm_map *, int); -void uvm_map_freelist_update(struct vm_map*, - struct uvm_map_deadq*, vaddr_t, vaddr_t, - vaddr_t, vaddr_t, int); -struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, - vaddr_t, vaddr_t, int); -int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, - struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, - int); - -/* - * Tree management functions. - */ - -static __inline void uvm_mapent_copy(struct vm_map_entry*, - struct vm_map_entry*); -static int uvm_mapentry_addrcmp(struct vm_map_entry*, - struct vm_map_entry*); -static int uvm_mapentry_freecmp(struct vm_map_entry*, - struct vm_map_entry*); -void uvm_mapent_free_insert(struct vm_map*, - struct uvm_map_free*, struct vm_map_entry*); -void uvm_mapent_free_remove(struct vm_map*, - struct uvm_map_free*, struct vm_map_entry*); -void uvm_mapent_addr_insert(struct vm_map*, - struct vm_map_entry*); -void uvm_mapent_addr_remove(struct vm_map*, - struct vm_map_entry*); -void uvm_map_splitentry(struct vm_map*, - struct vm_map_entry*, struct vm_map_entry*, - vaddr_t); -vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); -struct uvm_map_free *uvm_free(struct vm_map*, vaddr_t); -int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); - -/* Find freelist for containing addr. */ -#define UVM_FREE(_map, _addr) uvm_free((_map), (_addr)) -/* Size of the free tree. */ -#define uvm_mapfree_size(_free) ((_free)->treesz) - -/* - * uvm_vmspace_fork helper functions. - */ -struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, - vsize_t, struct vm_map_entry*, - struct uvm_map_deadq*, int, int); -void uvm_mapent_forkshared(struct vmspace*, struct vm_map*, - struct vm_map*, struct vm_map_entry*, - struct uvm_map_deadq*); -void uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, - struct vm_map*, struct vm_map_entry*, - struct uvm_map_deadq*); - -/* - * Tree validation. - */ - -#ifdef DEBUG -void uvm_tree_assert(struct vm_map*, int, char*, - char*, int); -#define UVM_ASSERT(map, cond, file, line) \ - uvm_tree_assert((map), (cond), #cond, (file), (line)) -void uvm_tree_sanity_free(struct vm_map*, - struct uvm_map_free*, char*, int); -void uvm_tree_sanity(struct vm_map*, char*, int); -void uvm_tree_size_chk(struct vm_map*, char*, int); -void vmspace_validate(struct vm_map*); -#else -#define uvm_tree_sanity_free(_map, _free, _file, _line) do {} while (0) -#define uvm_tree_sanity(_map, _file, _line) do {} while (0) -#define uvm_tree_size_chk(_map, _file, _line) do {} while (0) -#define vmspace_validate(_map) do {} while (0) -#endif - - -/* - * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. - * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. - * - * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size - * each time. - */ -#define VM_MAP_KSIZE_INIT (512 * PAGE_SIZE) -#define VM_MAP_KSIZE_DELTA (256 * PAGE_SIZE) -#define VM_MAP_KSIZE_ALLOCMUL 4 -/* - * When selecting a random free-space block, look at most FSPACE_DELTA blocks - * ahead. - */ -#define FSPACE_DELTA 8 -/* - * Put allocations adjecent to previous allocations when the free-space tree - * is larger than FSPACE_COMPACT entries. - * - * Alignment and PMAP_PREFER may still cause the entry to not be fully - * adjecent. Note that this strategy reduces memory fragmentation (by leaving - * a large space before or after the allocation). - */ -#define FSPACE_COMPACT 128 -/* - * Make the address selection skip at most this many bytes from the start of - * the free space in which the allocation takes place. - * - * The main idea behind a randomized address space is that an attacker cannot - * know where to target his attack. Therefore, the location of objects must be - * as random as possible. However, the goal is not to create the most sparse - * map that is possible. - * FSPACE_MAXOFF pushes the considered range in bytes down to less insane - * sizes, thereby reducing the sparseness. The biggest randomization comes - * from fragmentation, i.e. FSPACE_COMPACT. - */ -#define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) -/* - * Allow for small gaps in the overflow areas. - * Gap size is in bytes and does not have to be a multiple of page-size. - */ -#define FSPACE_BIASGAP ((vaddr_t)32 * 1024) - - -#define FREE_START(_entry) ((_entry)->end + (_entry)->guard) -#define FREE_END(_entry) ((_entry)->end + (_entry)->guard + \ - (_entry)->fspace) - -#ifdef DEADBEEF0 -#define UVMMAP_DEADBEEF ((void*)DEADBEEF0) -#else -#define UVMMAP_DEADBEEF ((void*)0xdeadd0d0) -#endif - -#ifdef DEBUG -int uvm_map_dprintf = 0; -int uvm_map_printlocks = 0; - -#define DPRINTF(_args) \ - do { \ - if (uvm_map_dprintf) \ - printf _args; \ - } while (0) - -#define LPRINTF(_args) \ - do { \ - if (uvm_map_printlocks) \ - printf _args; \ - } while (0) -#else -#define DPRINTF(_args) do {} while (0) -#define LPRINTF(_args) do {} while (0) -#endif - static struct timeval uvm_kmapent_last_warn_time; static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; @@ -303,1259 +102,295 @@ struct uvm_cnt uvm_mlk_call, uvm_mlk_hint; const char vmmapbsy[] = "vmmapbsy"; /* + * Da history books + */ +UVMHIST_DECL(maphist); +UVMHIST_DECL(pdhist); + +/* * pool for vmspace structures. */ + struct pool uvm_vmspace_pool; /* * pool for dynamically-allocated map entries. */ + struct pool uvm_map_entry_pool; struct pool uvm_map_entry_kmem_pool; +#ifdef PMAP_GROWKERNEL /* * This global represents the end of the kernel virtual address - * space. If we want to exceed this, we must grow the kernel + * space. If we want to exceed this, we must grow the kernel * virtual address space dynamically. * * Note, this variable is locked by kernel_map's lock. */ vaddr_t uvm_maxkaddr; +#endif /* - * Locking predicate. + * macros */ -#define UVM_MAP_REQ_WRITE(_map) \ - do { \ - if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ - rw_assert_wrlock(&(_map)->lock); \ - } while (0) /* - * Tree describing entries by address. + * uvm_map_entry_link: insert entry into a map * - * Addresses are unique. - * Entries with start == end may only exist if they are the first entry - * (sorted by address) within a free-memory tree. + * => map must be locked */ - -static __inline int -uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2) -{ - return e1->start < e2->start ? -1 : e1->start > e2->start; -} +#define uvm_map_entry_link(map, after_where, entry) do { \ + (map)->nentries++; \ + (entry)->prev = (after_where); \ + (entry)->next = (after_where)->next; \ + (entry)->prev->next = (entry); \ + (entry)->next->prev = (entry); \ + uvm_rb_insert(map, entry); \ +} while (0) /* - * Tree describing free memory. + * uvm_map_entry_unlink: remove entry from a map * - * Free memory is indexed (so we can use array semantics in O(log N). - * Free memory is ordered by size (so we can reduce fragmentation). - * - * The address range in the tree can be limited, having part of the - * free memory not in the free-memory tree. Only free memory in the - * tree will be considered during 'any address' allocations. + * => map must be locked */ - -static __inline int -uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2) -{ - int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace; - return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2); -} +#define uvm_map_entry_unlink(map, entry) do { \ + (map)->nentries--; \ + (entry)->next->prev = (entry)->prev; \ + (entry)->prev->next = (entry)->next; \ + uvm_rb_remove(map, entry); \ +} while (0) /* - * Copy mapentry. + * SAVE_HINT: saves the specified entry as the hint for future lookups. + * + * => map need not be locked (protected by hint_lock). */ -static __inline void -uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) -{ - caddr_t csrc, cdst; - size_t sz; - - csrc = (caddr_t)src; - cdst = (caddr_t)dst; - csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); - cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); - - sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - - offsetof(struct vm_map_entry, uvm_map_entry_start_copy); - memcpy(cdst, csrc, sz); -} +#define SAVE_HINT(map,check,value) do { \ + simple_lock(&(map)->hint_lock); \ + if ((map)->hint == (check)) \ + (map)->hint = (value); \ + simple_unlock(&(map)->hint_lock); \ +} while (0) /* - * Handle free-list insertion. + * VM_MAP_RANGE_CHECK: check and correct range + * + * => map must at least be read locked */ -void -uvm_mapent_free_insert(struct vm_map *map, struct uvm_map_free *free, - struct vm_map_entry *entry) -{ - struct vm_map_entry *res; -#ifdef DEBUG - vaddr_t min, max, bound; -#endif - - if (RB_LEFT(entry, free_entry) != UVMMAP_DEADBEEF || - RB_RIGHT(entry, free_entry) != UVMMAP_DEADBEEF || - RB_PARENT(entry, free_entry) != UVMMAP_DEADBEEF) - panic("uvm_mapent_addr_insert: entry still in free list"); - -#ifdef DEBUG - /* - * Boundary check. - * Boundaries are folded if they go on the same free list. - */ - min = FREE_START(entry); - max = FREE_END(entry); - while (min < max && (bound = uvm_map_boundary(map, min, max)) != max) { - KASSERT(UVM_FREE(map, min) == free); - min = bound; - } -#endif - KDASSERT(entry->fspace > 0 && (entry->fspace & PAGE_MASK) == 0); - - UVM_MAP_REQ_WRITE(map); - res = RB_INSERT(uvm_map_free_int, &free->tree, entry); - free->treesz++; - if (res != NULL) - panic("uvm_mapent_free_insert"); -} +#define VM_MAP_RANGE_CHECK(map, start, end) do { \ + if (start < vm_map_min(map)) \ + start = vm_map_min(map); \ + if (end > vm_map_max(map)) \ + end = vm_map_max(map); \ + if (start > end) \ + start = end; \ +} while (0) /* - * Handle free-list removal. + * local prototypes */ -void -uvm_mapent_free_remove(struct vm_map *map, struct uvm_map_free *free, - struct vm_map_entry *entry) -{ - struct vm_map_entry *res; - - UVM_MAP_REQ_WRITE(map); - res = RB_REMOVE(uvm_map_free_int, &free->tree, entry); - free->treesz--; - if (res != entry) - panic("uvm_mapent_free_remove"); - RB_LEFT(entry, free_entry) = RB_RIGHT(entry, free_entry) = - RB_PARENT(entry, free_entry) = UVMMAP_DEADBEEF; -} -/* - * Handle address tree insertion. - */ -void -uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) -{ - struct vm_map_entry *res; - - if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || - RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || - RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF) - panic("uvm_mapent_addr_insert: entry still in addr list"); - KDASSERT(entry->start <= entry->end); - KDASSERT((entry->start & PAGE_MASK) == 0 && - (entry->end & PAGE_MASK) == 0); - - UVM_MAP_REQ_WRITE(map); - res = RB_INSERT(uvm_map_addr, &map->addr, entry); - if (res != NULL) - panic("uvm_mapent_addr_insert"); -} +void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); +void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); +void uvm_map_reference_amap(struct vm_map_entry *, int); +void uvm_map_unreference_amap(struct vm_map_entry *, int); +int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t, + struct vm_map_entry *, voff_t, vsize_t); -/* - * Handle address tree removal. - */ -void -uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) -{ - struct vm_map_entry *res; - - UVM_MAP_REQ_WRITE(map); - res = RB_REMOVE(uvm_map_addr, &map->addr, entry); - if (res != entry) - panic("uvm_mapent_addr_remove"); - RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) = - RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF; -} +struct vm_map_entry *uvm_mapent_alloc(struct vm_map *, int); +void uvm_mapent_free(struct vm_map_entry *); +#ifdef KVA_GUARDPAGES /* - * uvm_map_reference: add reference to a map - * - * XXX check map reference counter lock + * Number of kva guardpages in use. */ -#define uvm_map_reference(_map) \ - do { \ - simple_lock(&map->ref_lock); \ - map->ref_count++; \ - simple_unlock(&map->ref_lock); \ - } while (0) +int kva_guardpages; +#endif + /* - * Calculate the dused delta. + * Tree manipulation. */ -vsize_t -uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) -{ - struct vmspace *vm; - vsize_t sz; - vaddr_t lmax; - vaddr_t stack_begin, stack_end; /* Position of stack. */ - - KASSERT(map->flags & VM_MAP_ISVMSPACE); - vm = (struct vmspace *)map; - stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); - stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); - - sz = 0; - while (min != max) { - lmax = max; - if (min < stack_begin && lmax > stack_begin) - lmax = stack_begin; - else if (min < stack_end && lmax > stack_end) - lmax = stack_end; - - if (min >= stack_begin && min < stack_end) { - /* nothing */ - } else - sz += lmax - min; - min = lmax; - } +void uvm_rb_insert(struct vm_map *, struct vm_map_entry *); +void uvm_rb_remove(struct vm_map *, struct vm_map_entry *); +vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *); - return sz >> PAGE_SHIFT; -} +#ifdef DEBUG +int _uvm_tree_sanity(struct vm_map *map, const char *name); +#endif +vsize_t uvm_rb_subtree_space(struct vm_map_entry *); +void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *); -/* - * Find the entry describing the given address. - */ -struct vm_map_entry* -uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) +static __inline int +uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b) { - struct vm_map_entry *iter; - - iter = RB_ROOT(atree); - while (iter != NULL) { - if (iter->start > addr) - iter = RB_LEFT(iter, daddrs.addr_entry); - else if (FREE_END(iter) <= addr) - iter = RB_RIGHT(iter, daddrs.addr_entry); - else - return iter; - } - return NULL; + if (a->start < b->start) + return (-1); + else if (a->start > b->start) + return (1); + + return (0); } -/* - * Find the first entry with at least sz bytes free. - */ -struct vm_map_entry* -uvm_map_findspace_entry(struct uvm_map_free *free, vsize_t sz) -{ - struct vm_map_entry *iter; - struct vm_map_entry *res; - - iter = RB_ROOT(&free->tree); - res = NULL; - - while (iter) { - if (iter->fspace >= sz) { - res = iter; - iter = RB_LEFT(iter, free_entry); - } else - iter = RB_RIGHT(iter, free_entry); - } - return res; -} -/* - * DEAD_ENTRY_PUSH(struct vm_map_entry**head, struct vm_map_entry *entry) - * - * Push dead entries into a linked list. - * Since the linked list abuses the address tree for storage, the entry - * may not be linked in a map. - * - * *head must be initialized to NULL before the first call to this macro. - * uvm_unmap_detach(*head, 0) will remove dead entries. - */ static __inline void -dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) +uvm_rb_augment(struct vm_map_entry *entry) { - TAILQ_INSERT_TAIL(deadq, entry, daddrs.deadq); + entry->space = uvm_rb_subtree_space(entry); } -#define DEAD_ENTRY_PUSH(_headptr, _entry) \ - dead_entry_push((_headptr), (_entry)) - -/* - * Helper function for uvm_map_findspace_tree. - * - * Given allocation constraints and pmap constraints, finds the - * lowest and highest address in a range that can be used for the - * allocation. - * - * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. - * - * - * Big chunk of math with a seasoning of dragons. - */ -int -uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, - struct vm_map_entry *sel, vaddr_t align, - vaddr_t pmap_align, vaddr_t pmap_off, int bias) -{ - vaddr_t sel_min, sel_max; -#ifdef PMAP_PREFER - vaddr_t pmap_min, pmap_max; -#endif /* PMAP_PREFER */ -#ifdef DIAGNOSTIC - int bad; -#endif /* DIAGNOSTIC */ - sel_min = FREE_START(sel); - sel_max = FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); +RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); -#ifdef PMAP_PREFER +RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); - /* - * There are two special cases, in which we can satisfy the align - * requirement and the pmap_prefer requirement. - * - when pmap_off == 0, we always select the largest of the two - * - when pmap_off % align == 0 and pmap_align > align, we simply - * satisfy the pmap_align requirement and automatically - * satisfy the align requirement. - */ - if (align > PAGE_SIZE && - !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { - /* - * Simple case: only use align. - */ - sel_min = roundup(sel_min, align); - sel_max &= ~(align - 1); - - if (sel_min > sel_max) - return ENOMEM; - - /* - * Correct for bias. - */ - if (sel_max - sel_min > FSPACE_BIASGAP) { - if (bias > 0) { - sel_min = sel_max - FSPACE_BIASGAP; - sel_min = roundup(sel_min, align); - } else if (bias < 0) { - sel_max = sel_min + FSPACE_BIASGAP; - sel_max &= ~(align - 1); - } - } - } else if (pmap_align != 0) { - /* - * Special case: satisfy both pmap_prefer and - * align argument. - */ - pmap_max = sel_max & ~(pmap_align - 1); - pmap_min = sel_min; - if (pmap_max < sel_min) - return ENOMEM; - - /* Adjust pmap_min for BIASGAP for top-addr bias. */ - if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) - pmap_min = pmap_max - FSPACE_BIASGAP; - /* Align pmap_min. */ - pmap_min &= ~(pmap_align - 1); - if (pmap_min < sel_min) - pmap_min += pmap_align; - if (pmap_min > pmap_max) - return ENOMEM; - - /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ - if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { - pmap_max = (pmap_min + FSPACE_BIASGAP) & - ~(pmap_align - 1); - } - if (pmap_min > pmap_max) - return ENOMEM; - - /* Apply pmap prefer offset. */ - pmap_max |= pmap_off; - if (pmap_max > sel_max) - pmap_max -= pmap_align; - pmap_min |= pmap_off; - if (pmap_min < sel_min) - pmap_min += pmap_align; - - /* - * Fixup: it's possible that pmap_min and pmap_max - * cross eachother. In this case, try to find one - * address that is allowed. - * (This usually happens in biased case.) - */ - if (pmap_min > pmap_max) { - if (pmap_min < sel_max) - pmap_max = pmap_min; - else if (pmap_max > sel_min) - pmap_min = pmap_max; - else - return ENOMEM; - } - - /* Internal validation. */ - KDASSERT(pmap_min <= pmap_max); - - sel_min = pmap_min; - sel_max = pmap_max; - } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) - sel_min = sel_max - FSPACE_BIASGAP; - else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) - sel_max = sel_min + FSPACE_BIASGAP; - -#else - - if (align > PAGE_SIZE) { - sel_min = roundup(sel_min, align); - sel_max &= ~(align - 1); - if (sel_min > sel_max) - return ENOMEM; - - if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { - if (bias > 0) { - sel_min = roundup(sel_max - FSPACE_BIASGAP, - align); - } else { - sel_max = (sel_min + FSPACE_BIASGAP) & - ~(align - 1); - } - } - } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) - sel_min = sel_max - FSPACE_BIASGAP; - else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) - sel_max = sel_min + FSPACE_BIASGAP; - -#endif - - if (sel_min > sel_max) - return ENOMEM; - -#ifdef DIAGNOSTIC - bad = 0; - /* Lower boundary check. */ - if (sel_min < FREE_START(sel)) { - printf("sel_min: 0x%lx, but should be at least 0x%lx\n", - sel_min, FREE_START(sel)); - bad++; - } - /* Upper boundary check. */ - if (sel_max > FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { - printf("sel_max: 0x%lx, but should be at most 0x%lx\n", - sel_max, FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); - bad++; - } - /* Lower boundary alignment. */ - if (align != 0 && (sel_min & (align - 1)) != 0) { - printf("sel_min: 0x%lx, not aligned to 0x%lx\n", - sel_min, align); - bad++; - } - /* Upper boundary alignment. */ - if (align != 0 && (sel_max & (align - 1)) != 0) { - printf("sel_max: 0x%lx, not aligned to 0x%lx\n", - sel_max, align); - bad++; - } - /* Lower boundary PMAP_PREFER check. */ - if (pmap_align != 0 && align == 0 && - (sel_min & (pmap_align - 1)) != pmap_off) { - printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", - sel_min, sel_min & (pmap_align - 1), pmap_off); - bad++; - } - /* Upper boundary PMAP_PREFER check. */ - if (pmap_align != 0 && align == 0 && - (sel_max & (pmap_align - 1)) != pmap_off) { - printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", - sel_max, sel_max & (pmap_align - 1), pmap_off); - bad++; - } +vsize_t +uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry) +{ + struct vm_map_entry *next; + vaddr_t space; - if (bad) { - panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " - "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " - "bias = %d, " - "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", - sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, - bias, FREE_START(sel), FREE_END(sel)); + if ((next = entry->next) == &map->header) + space = map->max_offset - entry->end; + else { + KASSERT(next); + space = next->start - entry->end; } -#endif /* DIAGNOSTIC */ - - *min = sel_min; - *max = sel_max; - return 0; + return (space); } - -/* - * Find address and free space for sz bytes. - * - * free: tree of free space - * sz: size in bytes - * align: preferred alignment - * guardpg: if true, keep free space guards on both ends - * addr: fill in found address - * - * align is a hard requirement to align to virtual addresses. - * PMAP_PREFER is a soft requirement that is dropped if - * no memory can be found that will be acceptable. - * - * align overrules PMAP_PREFER, but if both can be satisfied, the code - * will attempt to find a range that does this. - * - * Returns NULL on failure. - */ -struct vm_map_entry* -uvm_map_findspace_tree(struct uvm_map_free *free, vsize_t sz, voff_t uoffset, - vsize_t align, int guardpg, vaddr_t *addr, struct vm_map *map) + +vsize_t +uvm_rb_subtree_space(struct vm_map_entry *entry) { - struct vm_map_entry *sfe; /* Start free entry. */ - struct vm_map_entry *sel; /* Selected free entry. */ - struct vm_map_entry *search_start, *fail_start; - size_t sel_idx, i; - vaddr_t sel_min, sel_max, sel_addr; - vaddr_t pmap_off, pmap_align; /* pmap_prefer variables */ - int bias; - -#ifdef PMAP_PREFER - /* Fix pmap prefer parameters. */ - pmap_off = 0; - pmap_align = PMAP_PREFER_ALIGN(); - if (uoffset != UVM_UNKNOWN_OFFSET && pmap_align > PAGE_SIZE) - pmap_off = PMAP_PREFER_OFFSET(uoffset); - else - pmap_align = 0; - KDASSERT(pmap_align == 0 || pmap_off < pmap_align); - - if (align > PAGE_SIZE || (pmap_off != 0 && pmap_off < align)) { - /* - * We're doomed. - * - * This allocation will never be able to fulfil the pmap_off - * requirement. - */ - pmap_off = 0; - pmap_align = 0; - } -#else - pmap_off = pmap_align = 0; -#endif - - /* Set up alignment argument. */ - if (align < PAGE_SIZE) - align = PAGE_SIZE; - - /* - * First entry that meets requirements. - */ - sfe = uvm_map_findspace_entry(free, sz + (guardpg ? PAGE_SIZE : 0)); - if (sfe == NULL) - return NULL; - - /* Select the entry from which we will allocate. */ - sel_idx = arc4random_uniform(FSPACE_DELTA); - sel = sfe; - for (i = 0; i < sel_idx; i++) { - sel = RB_NEXT(uvm_map_free_int, free->tree, sel); - /* - * This has a slight bias at the top of the tree (largest - * segments) towards the smaller elements. - * This may be nice. - */ - if (sel == NULL) { - sel_idx -= i; - i = 0; - sel = sfe; - } - } - search_start = sel; - fail_start = NULL; - -#ifdef PMAP_PREFER -pmap_prefer_retry: -#endif /* PMAP_PREFER */ - while (sel != NULL) { - bias = uvm_mapent_bias(map, sel); - if (bias == 0 && free->treesz >= FSPACE_COMPACT) - bias = (arc4random() & 0x1) ? 1 : -1; - - if (uvm_map_sel_limits(&sel_min, &sel_max, sz, guardpg, sel, - align, pmap_align, pmap_off, bias) == 0) { - if (bias > 0) - sel_addr = sel_max; - else if (bias < 0) - sel_addr = sel_min; - else if (sel_min == sel_max) - sel_addr = sel_min; - else { - /* - * Select a random address. - * - * Use sel_addr to limit the arc4random range. - */ - sel_addr = sel_max - sel_min; - if (align <= PAGE_SIZE && pmap_align != 0) - sel_addr += pmap_align; - else - sel_addr += align; - sel_addr = MIN(sel_addr, FSPACE_MAXOFF); - - /* - * Shift down, so arc4random can deal with - * the number. - * arc4random wants a 32-bit number. Therefore, - * handle 64-bit overflow. - */ - sel_addr >>= PAGE_SHIFT; - if (sel_addr > 0xffffffff) - sel_addr = 0xffffffff; - sel_addr = arc4random_uniform(sel_addr); - /* - * Shift back up. - */ - sel_addr <<= PAGE_SHIFT; - - /* - * Cancel bits that violate our alignment. - * - * This also cancels bits that are in - * PAGE_MASK, because align is at least - * a page. - */ - sel_addr &= ~(align - 1); - sel_addr &= ~(pmap_align - 1); - - KDASSERT(sel_addr <= sel_max - sel_min); - /* - * Change sel_addr from an offset relative - * to sel_min, to the actual selected address. - */ - sel_addr += sel_min; - } - - *addr = sel_addr; - return sel; - } - - /* Next entry. */ - sel_idx++; - sel = RB_NEXT(uvm_map_free_int, &free->tree, sel); - if (sel_idx == FSPACE_DELTA || - (sel == NULL && sel_idx <= FSPACE_DELTA)) { - if (fail_start == NULL) - fail_start = sel; - - sel_idx = 0; - sel = sfe; - } + vaddr_t space, tmp; - /* - * sel == search_start -> we made a full loop through the - * first FSPACE_DELTA items and couldn't find anything. - * - * We now restart the loop, at the first entry after - * FSPACE_DELTA (which we stored in fail_start during - * the first iteration). - * - * In the case that fail_start == NULL, we will stop - * immediately. - */ - if (sel == search_start) { - sel_idx = FSPACE_DELTA; - sel = fail_start; - } + space = entry->ownspace; + if (RB_LEFT(entry, rb_entry)) { + tmp = RB_LEFT(entry, rb_entry)->space; + if (tmp > space) + space = tmp; } -#ifdef PMAP_PREFER - /* - * If we can't satisfy pmap_prefer, we try without. - * - * We retry even in the case align is specified, since - * uvm_map_sel_limits() always attempts to take it into - * account. - */ - if (pmap_align != 0) { - printf("pmap_prefer aligned allocation failed -> " - "going for unaligned mapping\n"); /* DEBUG, for now */ - pmap_align = 0; - pmap_off = 0; - goto pmap_prefer_retry; + if (RB_RIGHT(entry, rb_entry)) { + tmp = RB_RIGHT(entry, rb_entry)->space; + if (tmp > space) + space = tmp; } -#endif /* PMAP_PREFER */ - /* - * Iterated everything, but nothing was good enough. - */ - return NULL; + return (space); } -/* - * Test if memory starting at addr with sz bytes is free. - * - * Fills in *start_ptr and *end_ptr to be the first and last entry describing - * the space. - * If called with prefilled *start_ptr and *end_ptr, they are to be correct. - */ -int -uvm_map_isavail(struct uvm_map_addr *atree, struct vm_map_entry **start_ptr, - struct vm_map_entry **end_ptr, vaddr_t addr, vsize_t sz) +void +uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) { - struct vm_map_entry *i, *i_end; - - KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); - if (*start_ptr == NULL) { - *start_ptr = uvm_map_entrybyaddr(atree, addr); - if (*start_ptr == NULL) - return 0; - } else - KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); - if (*end_ptr == NULL) { - if (FREE_END(*start_ptr) >= addr + sz) - *end_ptr = *start_ptr; - else { - *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); - if (*end_ptr == NULL) - return 0; - } - } else - KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); - - KDASSERT(*start_ptr != NULL && *end_ptr != NULL); - KDASSERT((*start_ptr)->start <= addr && FREE_END(*start_ptr) > addr && - (*end_ptr)->start < addr + sz && FREE_END(*end_ptr) >= addr + sz); - - i = *start_ptr; - if (i->end > addr) - return 0; - i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr); - for (i = RB_NEXT(uvm_map_addr, atree, i); i != i_end; - i = RB_NEXT(uvm_map_addr, atree, i)) { - if (i->start != i->end) - return 0; - } - - return -1; + /* We need to traverse to the very top */ + do { + entry->ownspace = uvm_rb_space(map, entry); + entry->space = uvm_rb_subtree_space(entry); + } while ((entry = RB_PARENT(entry, rb_entry)) != NULL); } -/* - * uvm_map: establish a valid mapping in map - * - * => *addr and sz must be a multiple of PAGE_SIZE. - * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. - * => map must be unlocked. - * => <uobj,uoffset> value meanings (4 cases): - * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER - * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER - * [3] <uobj,uoffset> == normal mapping - * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA - * - * case [4] is for kernel mappings where we don't know the offset until - * we've found a virtual address. note that kernel object offsets are - * always relative to vm_map_min(kernel_map). - * - * => align: align vaddr, must be a power-of-2. - * Align is only a hint and will be ignored if the alignemnt fails. - */ -int -uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, - struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) +void +uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) { - struct vm_map_entry *first, *last, *entry; - struct uvm_map_deadq dead; - struct uvm_map_free *free; - vm_prot_t prot; - vm_prot_t maxprot; - vm_inherit_t inherit; - int advice; - int error; - - if ((map->flags & VM_MAP_INTRSAFE) == 0) - splassert(IPL_NONE); - else - splassert(IPL_VM); - - /* - * Decode parameters. - */ - prot = UVM_PROTECTION(flags); - maxprot = UVM_MAXPROTECTION(flags); - advice = UVM_ADVICE(flags); - inherit = UVM_INHERIT(flags); - error = 0; - TAILQ_INIT(&dead); - KASSERT((sz & PAGE_MASK) == 0); - KASSERT((align & (align - 1)) == 0); - - /* - * Holes are incompatible with other types of mappings. - */ - if (flags & UVM_FLAG_HOLE) { - KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && - (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); - } - - /* - * Check protection. - */ - if ((prot & maxprot) != prot) - return EACCES; - - if (flags & UVM_FLAG_TRYLOCK) { - if (vm_map_lock_try(map) == FALSE) - return EFAULT; - } else - vm_map_lock(map); - - first = last = NULL; - if (flags & UVM_FLAG_FIXED) { - /* - * Fixed location. - * - * Note: we ignore align, pmap_prefer. - * Fill in first, last and *addr. - */ - KASSERT((*addr & PAGE_MASK) == 0); - if (!uvm_map_isavail(&map->addr, &first, &last, *addr, sz)) { - error = ENOMEM; - goto unlock; - } - - /* - * Grow pmap to include allocated address. - * XXX not possible in kernel? - */ - if ((map->flags & VM_MAP_ISVMSPACE) == 0 && - uvm_maxkaddr < (*addr + sz)) { - uvm_map_kmem_grow(map, &dead, - *addr + sz - uvm_maxkaddr, flags); - - /* - * Reload first, last, since uvm_map_kmem_grow likely - * moved them around. - */ - first = last = NULL; - if (!uvm_map_isavail(&map->addr, &first, &last, - *addr, sz)) - panic("uvm_map: opened box, cat died"); - } - } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && - (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && - (align == 0 || (*addr & (align - 1)) == 0) && - uvm_map_isavail(&map->addr, &first, &last, *addr, sz)) { - /* - * Address used as hint. - * - * Note: we enforce the alignment restriction, - * but ignore the pmap_prefer. - */ - } else { - /* - * Update freelists from vmspace. - */ - if (map->flags & VM_MAP_ISVMSPACE) - uvm_map_vmspace_update(map, &dead, flags); - - /* - * Allocation for sz bytes at any address on the - * freelist. - */ - free = &map->free; - first = uvm_map_findspace_tree(free, sz, uoffset, align, - map->flags & VM_MAP_GUARDPAGES, addr, map); - last = NULL; /* May get set in previous test (by isavail). */ - - /* - * Fall back into brk() space if the initial attempt failed. - */ - if (first == NULL) { - if (map->flags & VM_MAP_ISVMSPACE) - free = &map->bfree; - else - uvm_map_kmem_grow(map, &dead, sz, flags); - - first = uvm_map_findspace_tree(free, sz, uoffset, align, - map->flags & VM_MAP_GUARDPAGES, addr, map); - if (first == NULL) { - error = ENOMEM; - goto unlock; - } - } - - /* - * Fill in last. - */ - if (!uvm_map_isavail(&map->addr, &first, &last, *addr, sz)) - panic("uvm_map: findspace and isavail disagree"); - } - - KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || - uvm_maxkaddr >= *addr + sz); - - /* - * If we only want a query, return now. - */ - if (flags & UVM_FLAG_QUERY) { - error = 0; - goto unlock; - } - - if (uobj == NULL) - uoffset = 0; - else if (uoffset == UVM_UNKNOWN_OFFSET) { - KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); - uoffset = *addr - vm_map_min(kernel_map); - } - - /* - * Create new entry. - * first and last may be invalidated after this call. - */ - entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead); - if (entry == NULL) { - error = ENOMEM; - goto unlock; - } - KDASSERT(entry->start == *addr && entry->end == *addr + sz); - entry->object.uvm_obj = uobj; - entry->offset = uoffset; - entry->protection = prot; - entry->max_protection = maxprot; - entry->inheritance = inherit; - entry->wired_count = 0; - entry->advice = advice; - if (uobj) - entry->etype = UVM_ET_OBJ; - else if (flags & UVM_FLAG_HOLE) - entry->etype = UVM_ET_HOLE; - else - entry->etype = 0; - if (flags & UVM_FLAG_COPYONW) { - entry->etype |= UVM_ET_COPYONWRITE; - if ((flags & UVM_FLAG_OVERLAY) == 0) - entry->etype |= UVM_ET_NEEDSCOPY; - } - if (flags & UVM_FLAG_OVERLAY) { - entry->aref.ar_pageoff = 0; - entry->aref.ar_amap = amap_alloc(sz, - ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0), - M_WAITOK); - } - - /* - * Update map and process statistics. - */ - if (!(flags & UVM_FLAG_HOLE)) - map->size += sz; - if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && - !(flags & UVM_FLAG_HOLE)) { - ((struct vmspace *)map)->vm_dused += - uvmspace_dused(map, *addr, *addr + sz); - } - - /* - * Try to merge entry. - * - * XXX: I can't think of a good reason to only merge kernel map entries, - * but it's what the old code did. I'll look at it later. - */ - if ((flags & UVM_FLAG_NOMERGE) == 0) - entry = uvm_mapent_tryjoin(map, entry, &dead); - -unlock: - vm_map_unlock(map); - - if (error == 0) { - DPRINTF(("uvm_map: 0x%lx-0x%lx (query=%c) map=%p\n", - *addr, *addr + sz, - (flags & UVM_FLAG_QUERY ? 'T' : 'F'), map)); - } + vaddr_t space = uvm_rb_space(map, entry); + struct vm_map_entry *tmp; - /* - * Remove dead entries. - * - * Dead entries may be the result of merging. - * uvm_map_mkentry may also create dead entries, when it attempts to - * destroy free-space entries. - */ - uvm_unmap_detach(&dead, 0); - return error; + entry->ownspace = entry->space = space; + tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry); +#ifdef DIAGNOSTIC + if (tmp != NULL) + panic("uvm_rb_insert: duplicate entry?"); +#endif + uvm_rb_fixup(map, entry); + if (entry->prev != &map->header) + uvm_rb_fixup(map, entry->prev); } -/* - * True iff e1 and e2 can be joined together. - */ -int -uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, - struct vm_map_entry *e2) +void +uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) { - KDASSERT(e1 != NULL && e2 != NULL); - - /* - * Must be the same entry type and not have free memory between. - */ - if (e1->etype != e2->etype || e1->end != e2->start) - return 0; - - /* - * Submaps are never joined. - */ - if (UVM_ET_ISSUBMAP(e1)) - return 0; - - /* - * Never merge wired memory. - */ - if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) - return 0; - - /* - * Protection, inheritance and advice must be equal. - */ - if (e1->protection != e2->protection || - e1->max_protection != e2->max_protection || - e1->inheritance != e2->inheritance || - e1->advice != e2->advice) - return 0; - - /* - * If uvm_object: objects itself and offsets within object must match. - */ - if (UVM_ET_ISOBJ(e1)) { - if (e1->object.uvm_obj != e2->object.uvm_obj) - return 0; - if (e1->offset + (e1->end - e1->start) != e2->offset) - return 0; - } + struct vm_map_entry *parent; - /* - * Cannot join shared amaps. - * Note: no need to lock amap to look at refs, since we don't care - * about its exact value. - * If it is 1 (i.e. we have the only reference) it will stay there. - */ - if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) - return 0; - if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) - return 0; - - /* - * Apprently, e1 and e2 match. - */ - return 1; + parent = RB_PARENT(entry, rb_entry); + RB_REMOVE(uvm_tree, &(map)->rbhead, entry); + if (entry->prev != &map->header) + uvm_rb_fixup(map, entry->prev); + if (parent) + uvm_rb_fixup(map, parent); } -/* - * Join support function. - * - * Returns the merged entry on succes. - * Returns NULL if the merge failed. - */ -struct vm_map_entry* -uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, - struct vm_map_entry *e2, struct uvm_map_deadq *dead) -{ - struct uvm_map_free *free; +#ifdef DEBUG +#define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y) +#else +#define uvm_tree_sanity(x,y) +#endif - /* - * Amap of e1 must be extended to include e2. - * e2 contains no real information in its amap, - * so it can be erased immediately. - */ - if (e1->aref.ar_amap) { - if (amap_extend(e1, e2->end - e2->start)) - return NULL; +#ifdef DEBUG +int +_uvm_tree_sanity(struct vm_map *map, const char *name) +{ + struct vm_map_entry *tmp, *trtmp; + int n = 0, i = 1; + + RB_FOREACH(tmp, uvm_tree, &map->rbhead) { + if (tmp->ownspace != uvm_rb_space(map, tmp)) { + printf("%s: %d/%d ownspace %x != %x %s\n", + name, n + 1, map->nentries, + tmp->ownspace, uvm_rb_space(map, tmp), + tmp->next == &map->header ? "(last)" : ""); + goto error; + } } + trtmp = NULL; + RB_FOREACH(tmp, uvm_tree, &map->rbhead) { + if (tmp->space != uvm_rb_subtree_space(tmp)) { + printf("%s: space %d != %d\n", + name, tmp->space, uvm_rb_subtree_space(tmp)); + goto error; + } + if (trtmp != NULL && trtmp->start >= tmp->start) { + printf("%s: corrupt: 0x%lx >= 0x%lx\n", + name, trtmp->start, tmp->start); + goto error; + } + n++; - /* - * Don't drop obj reference: - * uvm_unmap_detach will do this for us. - */ - - free = UVM_FREE(map, FREE_START(e2)); - if (e2->fspace > 0 && free) - uvm_mapent_free_remove(map, free, e2); - uvm_mapent_addr_remove(map, e2); - e1->end = e2->end; - e1->guard = e2->guard; - e1->fspace = e2->fspace; - if (e1->fspace > 0 && free) - uvm_mapent_free_insert(map, free, e1); - - DEAD_ENTRY_PUSH(dead, e2); - return e1; -} - -/* - * Attempt forward and backward joining of entry. - * - * Returns entry after joins. - * We are guaranteed that the amap of entry is either non-existant or - * has never been used. - */ -struct vm_map_entry* -uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, - struct uvm_map_deadq *dead) -{ - struct vm_map_entry *other; - struct vm_map_entry *merged; - - /* - * Merge with previous entry. - */ - other = RB_PREV(uvm_map_addr, &map->addr, entry); - if (other && uvm_mapent_isjoinable(map, other, entry)) { - merged = uvm_mapent_merge(map, other, entry, dead); - DPRINTF(("prev merge: %p + %p -> %p\n", other, entry, merged)); - if (merged) - entry = merged; + trtmp = tmp; } - /* - * Merge with next entry. - * - * Because amap can only extend forward and the next entry - * probably contains sensible info, only perform forward merging - * in the absence of an amap. - */ - other = RB_NEXT(uvm_map_addr, &map->addr, entry); - if (other && entry->aref.ar_amap == NULL && - other->aref.ar_amap == NULL && - uvm_mapent_isjoinable(map, entry, other)) { - merged = uvm_mapent_merge(map, entry, other, dead); - DPRINTF(("next merge: %p + %p -> %p\n", entry, other, merged)); - if (merged) - entry = merged; + if (n != map->nentries) { + printf("%s: nentries: %d vs %d\n", + name, n, map->nentries); + goto error; } - return entry; -} - -/* - * Kill entries that are no longer in a map. - */ -void -uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) -{ - struct vm_map_entry *entry; - - while ((entry = TAILQ_FIRST(deadq)) != NULL) { - /* - * Drop reference to amap, if we've got one. - */ - if (entry->aref.ar_amap) - amap_unref(entry->aref.ar_amap, - entry->aref.ar_pageoff, - atop(entry->end - entry->start), - flags); - - /* - * Drop reference to our backing object, if we've got one. - */ - if (UVM_ET_ISSUBMAP(entry)) { - /* ... unlikely to happen, but play it safe */ - uvm_map_deallocate(entry->object.sub_map); - } else if (UVM_ET_ISOBJ(entry) && - entry->object.uvm_obj->pgops->pgo_detach) { - entry->object.uvm_obj->pgops->pgo_detach( - entry->object.uvm_obj); + for (tmp = map->header.next; tmp && tmp != &map->header; + tmp = tmp->next, i++) { + trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp); + if (trtmp != tmp) { + printf("%s: lookup: %d: %p - %p: %p\n", + name, i, tmp, trtmp, + RB_PARENT(tmp, rb_entry)); + goto error; } - - /* - * Step to next. - */ - TAILQ_REMOVE(deadq, entry, daddrs.deadq); - uvm_mapent_free(entry); } -} - -/* - * Create and insert new entry. - * - * Returned entry contains new addresses and is inserted properly in the tree. - * first and last are (probably) no longer valid. - */ -struct vm_map_entry* -uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, - struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, - struct uvm_map_deadq *dead) -{ - struct vm_map_entry *entry, *prev; - struct uvm_map_free *free; - vaddr_t min, max; /* free space boundaries for new entry */ - - KDASSERT(map != NULL && first != NULL && last != NULL && dead != NULL && - sz > 0 && addr + sz > addr); - KDASSERT(first->end <= addr && FREE_END(first) > addr); - KDASSERT(last->start < addr + sz && FREE_END(last) >= addr + sz); - KDASSERT(uvm_map_isavail(&map->addr, &first, &last, addr, sz)); - uvm_tree_sanity(map, __FILE__, __LINE__); - min = addr + sz; - max = FREE_END(last); - - /* - * Initialize new entry. - */ - entry = uvm_mapent_alloc(map, flags); - if (entry == NULL) - return NULL; - entry->offset = 0; - entry->etype = 0; - entry->wired_count = 0; - entry->aref.ar_pageoff = 0; - entry->aref.ar_amap = NULL; - - entry->start = addr; - entry->end = min; - entry->guard = 0; - entry->fspace = 0; - - /* - * Reset free space in first. - */ - free = UVM_FREE(map, FREE_START(first)); - if (free) - uvm_mapent_free_remove(map, free, first); - first->guard = 0; - first->fspace = 0; - - /* - * Remove all entries that are fully replaced. - * We are iterating using last in reverse order. - */ - for (; first != last; last = prev) { - prev = RB_PREV(uvm_map_addr, &map->addr, last); - - KDASSERT(last->start == last->end); - free = UVM_FREE(map, FREE_START(last)); - if (free && last->fspace > 0) - uvm_mapent_free_remove(map, free, last); - uvm_mapent_addr_remove(map, last); - DEAD_ENTRY_PUSH(dead, last); - } - /* - * Remove first if it is entirely inside <addr, addr+sz>. - */ - if (first->start == addr) { - uvm_mapent_addr_remove(map, first); - DEAD_ENTRY_PUSH(dead, first); - } else - uvm_map_fix_space(map, first, FREE_START(first), addr, flags); - - /* - * Finally, link in entry. - */ - uvm_mapent_addr_insert(map, entry); - uvm_map_fix_space(map, entry, min, max, flags); - - uvm_tree_sanity(map, __FILE__, __LINE__); - return entry; + return (0); + error: +#ifdef DDB + /* handy breakpoint location for error case */ + __asm(".globl treesanity_label\ntreesanity_label:"); +#endif + return (-1); } +#endif /* * uvm_mapent_alloc: allocate a map entry */ + struct vm_map_entry * uvm_mapent_alloc(struct vm_map *map, int flags) { @@ -1581,15 +416,15 @@ uvm_mapent_alloc(struct vm_map *map, int flags) for (i = 0; i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; i++) - RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1]; - RB_LEFT(&ne[i], daddrs.addr_entry) = NULL; + ne[i].next = &ne[i + 1]; + ne[i].next = NULL; me = ne; if (ratecheck(&uvm_kmapent_last_warn_time, &uvm_kmapent_warn_rate)) printf("uvm_mapent_alloc: out of static " "map entries\n"); } - uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry); + uvm.kentry_free = me->next; uvmexp.kmapent++; simple_unlock(&uvm.kentry_lock); splx(s); @@ -1608,14 +443,6 @@ uvm_mapent_alloc(struct vm_map *map, int flags) me->flags = 0; } - if (me != NULL) { - RB_LEFT(me, free_entry) = RB_RIGHT(me, free_entry) = - RB_PARENT(me, free_entry) = UVMMAP_DEADBEEF; - RB_LEFT(me, daddrs.addr_entry) = - RB_RIGHT(me, daddrs.addr_entry) = - RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF; - } - out: UVMHIST_LOG(maphist, "<- new entry=%p [kentry=%ld]", me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); @@ -1627,6 +454,7 @@ out: * * => XXX: static pool for kernel map? */ + void uvm_mapent_free(struct vm_map_entry *me) { @@ -1635,16 +463,10 @@ uvm_mapent_free(struct vm_map_entry *me) UVMHIST_LOG(maphist,"<- freeing map entry=%p [flags=%ld]", me, me->flags, 0, 0); - - if (RB_LEFT(me, free_entry) != UVMMAP_DEADBEEF || - RB_RIGHT(me, free_entry) != UVMMAP_DEADBEEF || - RB_PARENT(me, free_entry) != UVMMAP_DEADBEEF) - panic("uvm_mapent_free: mapent %p still in free list\n", me); - if (me->flags & UVM_MAP_STATIC) { s = splvm(); simple_lock(&uvm.kentry_lock); - RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free; + me->next = uvm.kentry_free; uvm.kentry_free = me; uvmexp.kmapent--; simple_unlock(&uvm.kentry_lock); @@ -1659,2009 +481,1838 @@ uvm_mapent_free(struct vm_map_entry *me) } /* - * uvm_map_lookup_entry: find map entry at or before an address. - * - * => map must at least be read-locked by caller - * => entry is returned in "entry" - * => return value is true if address is in the returned entry - * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is - * returned for those mappings. + * uvm_mapent_copy: copy a map entry, preserving flags */ -boolean_t -uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, - struct vm_map_entry **entry) + +void +uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) { - *entry = uvm_map_entrybyaddr(&map->addr, address); - return *entry != NULL && !UVM_ET_ISHOLE(*entry) && - (*entry)->start <= address && (*entry)->end > address; + memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - + ((char *)src)); } /* - * uvm_map_pie: return a random load address for a PIE executable - * properly aligned. + * uvm_map_entry_unwire: unwire a map entry + * + * => map should be locked by caller */ -#ifndef VM_PIE_MAX_ADDR -#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) -#endif - -#ifndef VM_PIE_MIN_ADDR -#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS -#endif - -#ifndef VM_PIE_MIN_ALIGN -#define VM_PIE_MIN_ALIGN PAGE_SIZE -#endif - -vaddr_t -uvm_map_pie(vaddr_t align) +void +uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) { - vaddr_t addr, space, min; - - align = MAX(align, VM_PIE_MIN_ALIGN); - - /* round up to next alignment */ - min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); - - if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) - return (align); - - space = (VM_PIE_MAX_ADDR - min) / align; - space = MIN(space, (u_int32_t)-1); - addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; - addr += min; - - return (addr); + entry->wired_count = 0; + uvm_fault_unwire_locked(map, entry->start, entry->end); } + +/* + * wrapper for calling amap_ref() + */ void -uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) +uvm_map_reference_amap(struct vm_map_entry *entry, int flags) { - struct uvm_map_deadq dead; - - KASSERT((start & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0); - TAILQ_INIT(&dead); - vm_map_lock(map); - uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); - vm_map_unlock(map); - - uvm_unmap_detach(&dead, 0); + amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, + (entry->end - entry->start) >> PAGE_SHIFT, flags); } + /* - * Mark entry as free. - * - * entry will be put on the dead list. - * The free space will be merged into the previous or a new entry, - * unless markfree is false. + * wrapper for calling amap_unref() */ void -uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, - struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, - boolean_t markfree) +uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) { - struct uvm_map_free *free; - struct vm_map_entry *prev; - vaddr_t addr; /* Start of freed range. */ - vaddr_t end; /* End of freed range. */ - - prev = *prev_ptr; - if (prev == entry) - *prev_ptr = prev = NULL; - - if (prev == NULL || - FREE_END(prev) != entry->start) - prev = RB_PREV(uvm_map_addr, &map->addr, entry); - /* - * Entry is describing only free memory and has nothing to drain into. - */ - if (prev == NULL && entry->start == entry->end && markfree) { - *prev_ptr = entry; - return; - } - - addr = entry->start; - end = FREE_END(entry); - free = UVM_FREE(map, FREE_START(entry)); - if (entry->fspace > 0 && free) - uvm_mapent_free_remove(map, free, entry); - uvm_mapent_addr_remove(map, entry); - DEAD_ENTRY_PUSH(dead, entry); - - if (markfree) - *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); + amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, + (entry->end - entry->start) >> PAGE_SHIFT, flags); } + /* - * Remove all entries from start to end. - * - * If remove_holes, then remove ET_HOLE entries as well. - * If markfree, entry will be properly marked free, otherwise, no replacement - * entry will be put in the tree (corrupting the tree). + * uvm_map_init: init mapping system at boot time. note that we allocate + * and init the static pool of structs vm_map_entry for the kernel here. */ + void -uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, - struct uvm_map_deadq *dead, boolean_t remove_holes, - boolean_t markfree) +uvm_map_init(void) { - struct vm_map_entry *prev_hint, *next, *entry; + static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; +#if defined(UVMHIST) + static struct uvm_history_ent maphistbuf[100]; + static struct uvm_history_ent pdhistbuf[100]; +#endif + int lcv; - start = MAX(start, map->min_offset); - end = MIN(end, map->max_offset); - if (start >= end) - return; + /* + * first, init logging system. + */ - if ((map->flags & VM_MAP_INTRSAFE) == 0) - splassert(IPL_NONE); - else - splassert(IPL_VM); + UVMHIST_FUNC("uvm_map_init"); + UVMHIST_INIT_STATIC(maphist, maphistbuf); + UVMHIST_INIT_STATIC(pdhist, pdhistbuf); + UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); + UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0, + "# uvm_map() successful calls", 0); + UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0); + UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward", + 0); + UVMCNT_INIT(map_nousermerge, UVMCNT_CNT, 0, "# back merges skipped", 0); + UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0); + UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0); /* - * Find first affected entry. + * now set up static pool of kernel map entries ... */ - entry = uvm_map_entrybyaddr(&map->addr, start); - KDASSERT(entry != NULL && entry->start <= start); - if (entry->end <= start && markfree) - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); - else - UVM_MAP_CLIP_START(map, entry, start); - DPRINTF(("uvm_unmap_p: 0x%lx-0x%lx\n" - "\tfirst 0x%lx-0x%lx\n", - start, end, - entry->start, entry->end)); + simple_lock_init(&uvm.kentry_lock); + uvm.kentry_free = NULL; + for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { + kernel_map_entry[lcv].next = uvm.kentry_free; + uvm.kentry_free = &kernel_map_entry[lcv]; + } /* - * Iterate entries until we reach end address. - * prev_hint hints where the freed space can be appended to. + * initialize the map-related pools. */ - prev_hint = NULL; - for (; entry != NULL && entry->start < end; entry = next) { - KDASSERT(entry->start >= start); - if (entry->end > end || !markfree) - UVM_MAP_CLIP_END(map, entry, end); - KDASSERT(entry->start >= start && entry->end <= end); - next = RB_NEXT(uvm_map_addr, &map->addr, entry); - DPRINTF(("\tunmap 0x%lx-0x%lx used 0x%lx-0x%lx free\n", - entry->start, entry->end, - FREE_START(entry), FREE_END(entry))); + pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), + 0, 0, 0, "vmsppl", &pool_allocator_nointr); + pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), + 0, 0, 0, "vmmpepl", &pool_allocator_nointr); + pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), + 0, 0, 0, "vmmpekpl", NULL); + pool_sethiwat(&uvm_map_entry_pool, 8192); +} - /* - * Unwire removed map entry. - */ - if (VM_MAPENT_ISWIRED(entry)) { - entry->wired_count = 0; - uvm_fault_unwire_locked(map, entry->start, entry->end); - } +/* + * clippers + */ - /* - * Entry-type specific code. - */ - if (UVM_ET_ISHOLE(entry)) { - /* - * Skip holes unless remove_holes. - */ - if (!remove_holes) { - prev_hint = entry; - continue; - } - } else if (map->flags & VM_MAP_INTRSAFE) { - KASSERT(vm_map_pmap(map) == pmap_kernel()); - uvm_km_pgremove_intrsafe(entry->start, entry->end); - pmap_kremove(entry->start, entry->end - entry->start); - } else if (UVM_ET_ISOBJ(entry) && - UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { - KASSERT(vm_map_pmap(map) == pmap_kernel()); +/* + * uvm_map_clip_start: ensure that the entry begins at or after + * the starting address, if it doesn't we split the entry. + * + * => caller should use UVM_MAP_CLIP_START macro rather than calling + * this directly + * => map must be locked by caller + */ - /* - * Note: kernel object mappings are currently used in - * two ways: - * [1] "normal" mappings of pages in the kernel object - * [2] uvm_km_valloc'd allocations in which we - * pmap_enter in some non-kernel-object page - * (e.g. vmapbuf). - * - * for case [1], we need to remove the mapping from - * the pmap and then remove the page from the kernel - * object (because, once pages in a kernel object are - * unmapped they are no longer needed, unlike, say, - * a vnode where you might want the data to persist - * until flushed out of a queue). - * - * for case [2], we need to remove the mapping from - * the pmap. there shouldn't be any pages at the - * specified offset in the kernel object [but it - * doesn't hurt to call uvm_km_pgremove just to be - * safe?] - * - * uvm_km_pgremove currently does the following: - * for pages in the kernel object range: - * - drops the swap slot - * - uvm_pagefree the page - * - * note there is version of uvm_km_pgremove() that - * is used for "intrsafe" objects. - */ +void +uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, + vaddr_t start) +{ + struct vm_map_entry *new_entry; + vaddr_t new_adj; - /* - * remove mappings from pmap and drop the pages - * from the object. offsets are always relative - * to vm_map_min(kernel_map). - */ - pmap_remove(pmap_kernel(), entry->start, entry->end); - uvm_km_pgremove(entry->object.uvm_obj, - entry->start - vm_map_min(kernel_map), - entry->end - vm_map_min(kernel_map)); + /* uvm_map_simplify_entry(map, entry); */ /* XXX */ - /* - * null out kernel_object reference, we've just - * dropped it - */ - entry->etype &= ~UVM_ET_OBJ; - entry->object.uvm_obj = NULL; /* to be safe */ - } else { - /* - * remove mappings the standard way. - */ - pmap_remove(map->pmap, entry->start, entry->end); - } + uvm_tree_sanity(map, "clip_start entry"); - /* - * Update space usage. - */ - if ((map->flags & VM_MAP_ISVMSPACE) && - entry->object.uvm_obj == NULL && - !UVM_ET_ISHOLE(entry)) { - ((struct vmspace *)map)->vm_dused -= - uvmspace_dused(map, entry->start, entry->end); - } - if (!UVM_ET_ISHOLE(entry)) - map->size -= entry->end - entry->start; + /* + * Split off the front portion. note that we must insert the new + * entry BEFORE this one, so that this entry has the specified + * starting address. + */ - /* - * Actual removal of entry. - */ - uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); - } + new_entry = uvm_mapent_alloc(map, 0); + uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ - pmap_update(vm_map_pmap(map)); + new_entry->end = start; + new_adj = start - new_entry->start; + if (entry->object.uvm_obj) + entry->offset += new_adj; /* shift start over */ - DPRINTF(("uvm_unmap_p: 0x%lx-0x%lx map=%p\n", start, end, - map)); + /* Does not change order for the RB tree */ + entry->start = start; -#ifdef DEBUG - if (markfree) { - for (entry = uvm_map_entrybyaddr(&map->addr, start); - entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - KDASSERT(entry->end <= start || - entry->start == entry->end || - UVM_ET_ISHOLE(entry)); - } + if (new_entry->aref.ar_amap) { + amap_splitref(&new_entry->aref, &entry->aref, new_adj); + } + + uvm_map_entry_link(map, entry->prev, new_entry); + + if (UVM_ET_ISSUBMAP(entry)) { + /* ... unlikely to happen, but play it safe */ + uvm_map_reference(new_entry->object.sub_map); } else { - vaddr_t a; - for (a = start; a < end; a += PAGE_SIZE) - KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); + if (UVM_ET_ISOBJ(entry) && + entry->object.uvm_obj->pgops && + entry->object.uvm_obj->pgops->pgo_reference) + entry->object.uvm_obj->pgops->pgo_reference( + entry->object.uvm_obj); } -#endif + + uvm_tree_sanity(map, "clip_start leave"); } /* - * Mark all entries from first until end (exclusive) as pageable. - * - * Lock must be exclusive on entry and will not be touched. + * uvm_map_clip_end: ensure that the entry ends at or before + * the ending address, if it doesn't we split the reference + * + * => caller should use UVM_MAP_CLIP_END macro rather than calling + * this directly + * => map must be locked by caller */ + void -uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, - struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) +uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) { - struct vm_map_entry *iter; + struct vm_map_entry *new_entry; + vaddr_t new_adj; /* #bytes we move start forward */ - for (iter = first; iter != end; - iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { - KDASSERT(iter->start >= start_addr && iter->end <= end_addr); - if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) - continue; + uvm_tree_sanity(map, "clip_end entry"); + /* + * Create a new entry and insert it + * AFTER the specified entry + */ - iter->wired_count = 0; - uvm_fault_unwire_locked(map, iter->start, iter->end); + new_entry = uvm_mapent_alloc(map, 0); + uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ + + new_entry->start = entry->end = end; + new_adj = end - entry->start; + if (new_entry->object.uvm_obj) + new_entry->offset += new_adj; + + if (entry->aref.ar_amap) + amap_splitref(&entry->aref, &new_entry->aref, new_adj); + + uvm_rb_fixup(map, entry); + + uvm_map_entry_link(map, entry, new_entry); + + if (UVM_ET_ISSUBMAP(entry)) { + /* ... unlikely to happen, but play it safe */ + uvm_map_reference(new_entry->object.sub_map); + } else { + if (UVM_ET_ISOBJ(entry) && + entry->object.uvm_obj->pgops && + entry->object.uvm_obj->pgops->pgo_reference) + entry->object.uvm_obj->pgops->pgo_reference( + entry->object.uvm_obj); } + uvm_tree_sanity(map, "clip_end leave"); } + /* - * Mark all entries from first until end (exclusive) as wired. + * M A P - m a i n e n t r y p o i n t + */ +/* + * uvm_map: establish a valid mapping in a map + * + * => assume startp is page aligned. + * => assume size is a multiple of PAGE_SIZE. + * => assume sys_mmap provides enough of a "hint" to have us skip + * over text/data/bss area. + * => map must be unlocked (we will lock it) + * => <uobj,uoffset> value meanings (4 cases): + * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER + * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER + * [3] <uobj,uoffset> == normal mapping + * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA + * + * case [4] is for kernel mappings where we don't know the offset until + * we've found a virtual address. note that kernel object offsets are + * always relative to vm_map_min(kernel_map). + * + * => if `align' is non-zero, we try to align the virtual address to + * the specified alignment. this is only a hint; if we can't + * do it, the address will be unaligned. this is provided as + * a mechanism for large pages. * - * Lockflags determines the lock state on return from this function. - * Lock must be exclusive on entry. + * => XXXCDC: need way to map in external amap? */ + int -uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, - struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, - int lockflags) +uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size, + struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, + struct proc *p) { - struct vm_map_entry *iter; -#ifdef DIAGNOSTIC - unsigned int timestamp_save; + struct vm_map_entry *prev_entry, *new_entry; +#ifdef KVA_GUARDPAGES + struct vm_map_entry *guard_entry; #endif + vm_prot_t prot = UVM_PROTECTION(flags), maxprot = + UVM_MAXPROTECTION(flags); + vm_inherit_t inherit = UVM_INHERIT(flags); + int advice = UVM_ADVICE(flags); int error; + UVMHIST_FUNC("uvm_map"); + UVMHIST_CALLED(maphist); - /* - * Wire pages in two passes: - * - * 1: holding the write lock, we create any anonymous maps that need - * to be created. then we clip each map entry to the region to - * be wired and increment its wiring count. - * - * 2: we downgrade to a read lock, and call uvm_fault_wire to fault - * in the pages for any newly wired area (wired_count == 1). - * - * downgrading to a read lock for uvm_fault_wire avoids a possible - * deadlock with another thread that may have faulted on one of - * the pages to be wired (it would mark the page busy, blocking - * us, then in turn block on the map lock that we hold). - * because we keep the read lock on the map, the copy-on-write - * status of the entries we modify here cannot change. - */ - for (iter = first; iter != end; - iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { - KDASSERT(iter->start >= start_addr && iter->end <= end_addr); - if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) - continue; - - /* - * Perform actions of vm_map_lookup that need the write lock. - * - create an anonymous map for copy-on-write - * - anonymous map for zero-fill - * Skip submaps. - */ - if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && - UVM_ET_ISNEEDSCOPY(iter) && - ((iter->protection & VM_PROT_WRITE) || - iter->object.uvm_obj == NULL)) { - amap_copy(map, iter, M_WAITOK, TRUE, - iter->start, iter->end); - } - iter->wired_count++; - } + UVMHIST_LOG(maphist, "(map=%p, *startp=0x%lx, size=%ld, flags=0x%lx)", + map, *startp, size, flags); + UVMHIST_LOG(maphist, " uobj/offset %p/%ld", uobj, (u_long)uoffset,0,0); /* - * Pass 2. + * Holes are incompatible with other types of mappings. */ -#ifdef DIAGNOSTIC - timestamp_save = map->timestamp; -#endif - vm_map_busy(map); - vm_map_downgrade(map); - - error = 0; - for (iter = first; error == 0 && iter != end; - iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { - if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) - continue; - - error = uvm_fault_wire(map, iter->start, iter->end, - iter->protection); + if (flags & UVM_FLAG_HOLE) { + KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) != 0 && + (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); } - if (error) { +#ifdef KVA_GUARDPAGES + if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { /* - * uvm_fault_wire failure - * - * Reacquire lock and undo our work. + * kva_guardstart is initialized to the start of the kernelmap + * and cycles through the kva space. + * This way we should have a long time between re-use of kva. */ - vm_map_upgrade(map); - vm_map_unbusy(map); -#ifdef DIAGNOSTIC - if (timestamp_save != map->timestamp) - panic("uvm_map_pageable_wire: stale map"); -#endif - + static vaddr_t kva_guardstart = 0; + if (kva_guardstart == 0) { + kva_guardstart = vm_map_min(map); + printf("uvm_map: kva guard pages enabled: %p\n", + kva_guardstart); + } + size += PAGE_SIZE; /* Add guard page at the end. */ /* - * first is no longer needed to restart loops. - * Use it as iterator to unmap successful mappings. + * Try to fully exhaust kva prior to wrap-around. + * (This may eat your ram!) */ - for (; first != iter; - first = RB_NEXT(uvm_map_addr, &map->addr, first)) { - if (UVM_ET_ISHOLE(first) || first->start == first->end) - continue; - - first->wired_count--; - if (!VM_MAPENT_ISWIRED(first)) { - uvm_fault_unwire_locked(map, - iter->start, iter->end); - } + if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) { + static int wrap_counter = 0; + printf("uvm_map: kva guard page wrap-around %d\n", + ++wrap_counter); + kva_guardstart = vm_map_min(map); } - + *startp = kva_guardstart; /* - * decrease counter in the rest of the entries + * Prepare for next round. */ - for (; iter != end; - iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { - if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) - continue; + kva_guardstart += size; + } +#endif - iter->wired_count--; - } + uvm_tree_sanity(map, "map entry"); - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - return error; - } + if ((map->flags & VM_MAP_INTRSAFE) == 0) + splassert(IPL_NONE); + else + splassert(IPL_VM); /* - * We are currently holding a read lock. + * step 0: sanity check of protection code */ - if ((lockflags & UVM_LK_EXIT) == 0) { - vm_map_unbusy(map); - vm_map_unlock_read(map); - } else { - vm_map_upgrade(map); - vm_map_unbusy(map); -#ifdef DIAGNOSTIC - if (timestamp_save != map->timestamp) - panic("uvm_map_pageable_wire: stale map"); -#endif - } - return 0; -} -/* - * uvm_map_pageable: set pageability of a range in a map. - * - * Flags: - * UVM_LK_ENTER: map is already locked by caller - * UVM_LK_EXIT: don't unlock map on exit - * - * The full range must be in use (entries may not have fspace != 0). - * UVM_ET_HOLE counts as unmapped. - */ -int -uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, - boolean_t new_pageable, int lockflags) -{ - struct vm_map_entry *first, *last, *tmp; - int error; - - if (start > end) - return EINVAL; - if (start < map->min_offset) - return EFAULT; /* why? see first XXX below */ - if (end > map->max_offset) - return EINVAL; /* why? see second XXX below */ - - KASSERT(map->flags & VM_MAP_PAGEABLE); - if ((lockflags & UVM_LK_ENTER) == 0) - vm_map_lock(map); + if ((prot & maxprot) != prot) { + UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%lx, max=0x%lx", + prot, maxprot,0,0); + return (EACCES); + } /* - * Find first entry. - * - * Initial test on start is different, because of the different - * error returned. Rest is tested further down. + * step 1: figure out where to put new VM range */ - first = uvm_map_entrybyaddr(&map->addr, start); - if (first->end <= start || UVM_ET_ISHOLE(first)) { + + if (vm_map_lock_try(map) == FALSE) { + if (flags & UVM_FLAG_TRYLOCK) + return (EFAULT); + vm_map_lock(map); /* could sleep here */ + } + if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, + uobj, uoffset, align, flags)) == NULL) { + UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0); + vm_map_unlock(map); + return (ENOMEM); + } + +#ifdef PMAP_GROWKERNEL + { /* - * XXX if the first address is not mapped, it is EFAULT? + * If the kernel pmap can't map the requested space, + * then allocate more resources for it. */ - error = EFAULT; - goto out; + if (map == kernel_map && !(flags & UVM_FLAG_FIXED) && + uvm_maxkaddr < (*startp + size)) + uvm_maxkaddr = pmap_growkernel(*startp + size); } +#endif + + UVMCNT_INCR(uvm_map_call); /* - * Check that the range has no holes. + * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER + * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in + * either case we want to zero it before storing it in the map entry + * (because it looks strange and confusing when debugging...) + * + * if uobj is not null + * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping + * and we do not need to change uoffset. + * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset + * now (based on the starting address of the map). this case is + * for kernel object mappings where we don't know the offset until + * the virtual address is found (with uvm_map_findspace). the + * offset is the distance we are from the start of the map. */ - for (last = first; last != NULL && last->start < end; - last = RB_NEXT(uvm_map_addr, &map->addr, last)) { - if (UVM_ET_ISHOLE(last) || - (last->end < end && FREE_END(last) != last->end)) { - /* - * XXX unmapped memory in range, why is it EINVAL - * instead of EFAULT? - */ - error = EINVAL; - goto out; + + if (uobj == NULL) { + uoffset = 0; + } else { + if (uoffset == UVM_UNKNOWN_OFFSET) { + KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); + uoffset = *startp - vm_map_min(kernel_map); } } /* - * Last ended at the first entry after the range. - * Move back one step. - * - * Note that last may be NULL. + * step 2: try and insert in map by extending previous entry, if + * possible + * XXX: we don't try and pull back the next entry. might be useful + * for a stack, but we are currently allocating our stack in advance. */ - if (last == NULL) { - last = RB_MAX(uvm_map_addr, &map->addr); - if (last->end < end) { - error = EINVAL; - goto out; - } - } else - last = RB_PREV(uvm_map_addr, &map->addr, last); - /* - * Wire/unwire pages here. - */ - if (new_pageable) { - /* - * Mark pageable. - * entries that are not wired are untouched. - */ - if (VM_MAPENT_ISWIRED(first)) - UVM_MAP_CLIP_START(map, first, start); + if ((flags & UVM_FLAG_NOMERGE) == 0 && + prev_entry->end == *startp && prev_entry != &map->header && + prev_entry->object.uvm_obj == uobj) { + + if (uobj && prev_entry->offset + + (prev_entry->end - prev_entry->start) != uoffset) + goto step3; + + if (UVM_ET_ISSUBMAP(prev_entry)) + goto step3; + + if (prev_entry->protection != prot || + prev_entry->max_protection != maxprot) + goto step3; + + if (prev_entry->inheritance != inherit || + prev_entry->advice != advice) + goto step3; + + /* wiring status must match (new area is unwired) */ + if (VM_MAPENT_ISWIRED(prev_entry)) + goto step3; + /* - * Split last at end. - * Make tmp be the first entry after what is to be touched. - * If last is not wired, don't touch it. + * can't extend a shared amap. note: no need to lock amap to + * look at refs since we don't care about its exact value. + * if it is one (i.e. we have only reference) it will stay there */ - if (VM_MAPENT_ISWIRED(last)) { - UVM_MAP_CLIP_END(map, last, end); - tmp = RB_NEXT(uvm_map_addr, &map->addr, last); - } else - tmp = last; - uvm_map_pageable_pgon(map, first, tmp, start, end); - error = 0; + if (prev_entry->aref.ar_amap && + amap_refs(prev_entry->aref.ar_amap) != 1) { + goto step3; + } -out: - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - return error; - } else { /* - * Mark entries wired. - * entries are always touched (because recovery needs this). + * Only merge kernel mappings, but keep track + * of how much we skipped. */ - if (!VM_MAPENT_ISWIRED(first)) - UVM_MAP_CLIP_START(map, first, start); + if (map != kernel_map && map != kmem_map) { + UVMCNT_INCR(map_nousermerge); + goto step3; + } + + if (prev_entry->aref.ar_amap) { + error = amap_extend(prev_entry, size); + if (error) + goto step3; + } + + UVMCNT_INCR(map_backmerge); + UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); + /* - * Split last at end. - * Make tmp be the first entry after what is to be touched. - * If last is not wired, don't touch it. + * drop our reference to uobj since we are extending a reference + * that we already have (the ref count can not drop to zero). */ - if (!VM_MAPENT_ISWIRED(last)) { - UVM_MAP_CLIP_END(map, last, end); - tmp = RB_NEXT(uvm_map_addr, &map->addr, last); - } else - tmp = last; - - return uvm_map_pageable_wire(map, first, tmp, start, end, - lockflags); - } -} -/* - * uvm_map_pageable_all: special case of uvm_map_pageable - affects - * all mapped regions. - * - * Map must not be locked. - * If no flags are specified, all ragions are unwired. - */ -int -uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) -{ - vsize_t size; - struct vm_map_entry *iter; + if (uobj && uobj->pgops->pgo_detach) + uobj->pgops->pgo_detach(uobj); - KASSERT(map->flags & VM_MAP_PAGEABLE); - vm_map_lock(map); + prev_entry->end += size; + uvm_rb_fixup(map, prev_entry); + map->size += size; + if (p && uobj == NULL) + p->p_vmspace->vm_dused += atop(size); - if (flags == 0) { - uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr), - NULL, map->min_offset, map->max_offset); + uvm_tree_sanity(map, "map leave 2"); - atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE); + UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); vm_map_unlock(map); - return 0; - } + return (0); - if (flags & MCL_FUTURE) - atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE); - if (!(flags & MCL_CURRENT)) { - vm_map_unlock(map); - return 0; } +step3: + UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); /* - * Count number of pages in all non-wired entries. - * If the number exceeds the limit, abort. + * check for possible forward merge (which we don't do) and count + * the number of times we missed a *possible* chance to merge more */ - size = 0; - RB_FOREACH(iter, uvm_map_addr, &map->addr) { - if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) - continue; - - size += iter->end - iter->start; - } - if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { - vm_map_unlock(map); - return ENOMEM; - } - - /* XXX non-pmap_wired_count case must be handled by caller */ -#ifdef pmap_wired_count - if (limit != 0 && - size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { - vm_map_unlock(map); - return ENOMEM; - } -#endif + if ((flags & UVM_FLAG_NOMERGE) == 0 && + prev_entry->next != &map->header && + prev_entry->next->start == (*startp + size)) + UVMCNT_INCR(map_forwmerge); /* - * uvm_map_pageable_wire will release lcok + * step 3: allocate new entry and link it in */ - return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr), - NULL, map->min_offset, map->max_offset, 0); -} -/* - * Initialize map. - * - * Allocates sufficient entries to describe the free memory in the map. - */ -void -uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) -{ - KASSERT((min & PAGE_MASK) == 0); - KASSERT((max & PAGE_MASK) == 0 || (max & PAGE_MASK) == PAGE_MASK); +#ifdef KVA_GUARDPAGES + if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) + size -= PAGE_SIZE; +#endif - /* - * Update parameters. - * - * This code handles (vaddr_t)-1 and other page mask ending addresses - * properly. - * We lose the top page if the full virtual address space is used. - */ - if (max & PAGE_MASK) { - max += 1; - if (max == 0) /* overflow */ - max -= PAGE_SIZE; + new_entry = uvm_mapent_alloc(map, flags); + if (new_entry == NULL) { + vm_map_unlock(map); + return (ENOMEM); } + new_entry->start = *startp; + new_entry->end = new_entry->start + size; + new_entry->object.uvm_obj = uobj; + new_entry->offset = uoffset; - RB_INIT(&map->addr); - RB_INIT(&map->free.tree); - map->free.treesz = 0; - RB_INIT(&map->bfree.tree); - map->bfree.treesz = 0; + if (uobj) + new_entry->etype = UVM_ET_OBJ; + else + new_entry->etype = 0; - map->size = 0; - map->ref_count = 1; - map->min_offset = min; - map->max_offset = max; - map->b_start = map->b_end = 0; /* Empty brk() area by default. */ - map->s_start = map->s_end = 0; /* Empty stack area by default. */ - map->flags = flags; - map->timestamp = 0; - rw_init(&map->lock, "vmmaplk"); - simple_lock_init(&map->ref_lock); + if (flags & UVM_FLAG_COPYONW) { + new_entry->etype |= UVM_ET_COPYONWRITE; + if ((flags & UVM_FLAG_OVERLAY) == 0) + new_entry->etype |= UVM_ET_NEEDSCOPY; + } + if (flags & UVM_FLAG_HOLE) + new_entry->etype |= UVM_ET_HOLE; - /* - * Fill map entries. - * This requires a write-locked map (because of diagnostic assertions - * in insert code). - */ - if ((map->flags & VM_MAP_INTRSAFE) == 0) { - if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0) - panic("uvm_map_setup: rw_enter failed on new map"); + new_entry->protection = prot; + new_entry->max_protection = maxprot; + new_entry->inheritance = inherit; + new_entry->wired_count = 0; + new_entry->advice = advice; + if (flags & UVM_FLAG_OVERLAY) { + /* + * to_add: for BSS we overallocate a little since we + * are likely to extend + */ + vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? + UVM_AMAP_CHUNK << PAGE_SHIFT : 0; + struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK); + new_entry->aref.ar_pageoff = 0; + new_entry->aref.ar_amap = amap; + } else { + new_entry->aref.ar_pageoff = 0; + new_entry->aref.ar_amap = NULL; } - uvm_map_setup_entries(map); - uvm_tree_sanity(map, __FILE__, __LINE__); - if ((map->flags & VM_MAP_INTRSAFE) == 0) - rw_exit(&map->lock); -} -/* - * Populate map with free-memory entries. - * - * Map must be initialized and empty. - */ -void -uvm_map_setup_entries(struct vm_map *map) -{ - KDASSERT(RB_EMPTY(&map->addr)); - KDASSERT(RB_EMPTY(&map->free.tree) && map->free.treesz == 0); - KDASSERT(RB_EMPTY(&map->bfree.tree) && map->bfree.treesz == 0); + uvm_map_entry_link(map, prev_entry, new_entry); - uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); -} + map->size += size; + if (p && uobj == NULL) + p->p_vmspace->vm_dused += atop(size); -/* - * Split entry at given address. - * - * orig: entry that is to be split. - * next: a newly allocated map entry that is not linked. - * split: address at which the split is done. - */ -void -uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, - struct vm_map_entry *next, vaddr_t split) -{ - struct uvm_map_free *free; - vsize_t adj; - KDASSERT(map != NULL && orig != NULL && next != NULL); - uvm_tree_sanity(map, __FILE__, __LINE__); - KASSERT(orig->start < split && FREE_END(orig) > split); + /* + * Update the free space hint + */ - adj = split - orig->start; - free = UVM_FREE(map, FREE_START(orig)); - KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig); - KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next); - KDASSERT(orig->fspace == 0 || free == NULL || - RB_FIND(uvm_map_free_int, &free->tree, orig) == orig); + if ((map->first_free == prev_entry) && + (prev_entry->end >= new_entry->start)) + map->first_free = new_entry; +#ifdef KVA_GUARDPAGES /* - * Free space will change, unlink from free space tree. + * Create the guard entry. */ - if (orig->fspace > 0 && free) - uvm_mapent_free_remove(map, free, orig); - - uvm_mapent_copy(orig, next); - if (split >= orig->end) { - next->etype = 0; - next->offset = 0; - next->wired_count = 0; - next->start = next->end = split; - next->guard = 0; - next->fspace = FREE_END(orig) - split; - next->aref.ar_amap = NULL; - next->aref.ar_pageoff = 0; - orig->guard = MIN(orig->guard, split - orig->end); - orig->fspace = split - FREE_START(orig); - } else { - orig->fspace = 0; - orig->guard = 0; - orig->end = next->start = split; - - if (next->aref.ar_amap) - amap_splitref(&orig->aref, &next->aref, adj); - if (UVM_ET_ISSUBMAP(orig)) { - uvm_map_reference(next->object.sub_map); - next->offset += adj; - } else if (UVM_ET_ISOBJ(orig)) { - if (next->object.uvm_obj->pgops && - next->object.uvm_obj->pgops->pgo_reference) { - next->object.uvm_obj->pgops->pgo_reference( - next->object.uvm_obj); - } - next->offset += adj; + if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { + guard_entry = uvm_mapent_alloc(map, flags); + if (guard_entry != NULL { + guard_entry->start = new_entry->end; + guard_entry->end = guard_entry->start + PAGE_SIZE; + guard_entry->object.uvm_obj = uobj; + guard_entry->offset = uoffset; + guard_entry->etype = MAP_ET_KVAGUARD; + guard_entry->protection = prot; + guard_entry->max_protection = maxprot; + guard_entry->inheritance = inherit; + guard_entry->wired_count = 0; + guard_entry->advice = advice; + guard_entry->aref.ar_pageoff = 0; + guard_entry->aref.ar_amap = NULL; + uvm_map_entry_link(map, new_entry, guard_entry); + map->size += PAGE_SIZE; + kva_guardpages++; } } +#endif - /* - * Link next into address tree. - * Link orig and next into free-space tree. - */ - uvm_mapent_addr_insert(map, next); - if (orig->fspace > 0 && free) - uvm_mapent_free_insert(map, free, orig); - if (next->fspace > 0 && free) - uvm_mapent_free_insert(map, free, next); + uvm_tree_sanity(map, "map leave"); - uvm_tree_sanity(map, __FILE__, __LINE__); + UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); + vm_map_unlock(map); + return (0); } +/* + * uvm_map_lookup_entry: find map entry at or before an address + * + * => map must at least be read-locked by caller + * => entry is returned in "entry" + * => return value is true if address is in the returned entry + */ -#ifdef DEBUG - -void -uvm_tree_assert(struct vm_map *map, int test, char *test_str, - char *file, int line) +boolean_t +uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, + struct vm_map_entry **entry) { - char* map_special; + struct vm_map_entry *cur; + struct vm_map_entry *last; + int use_tree = 0; + UVMHIST_FUNC("uvm_map_lookup_entry"); + UVMHIST_CALLED(maphist); - if (test) - return; + UVMHIST_LOG(maphist,"(map=%p,addr=0x%lx,ent=%p)", + map, address, entry, 0); - if (map == kernel_map) - map_special = " (kernel_map)"; - else if (map == kmem_map) - map_special = " (kmem_map)"; - else - map_special = ""; - panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, - line, test_str); -} + /* + * start looking either from the head of the + * list, or from the hint. + */ -/* - * Check that free space tree is sane. - */ -void -uvm_tree_sanity_free(struct vm_map *map, struct uvm_map_free *free, - char *file, int line) -{ - struct vm_map_entry *iter; - vsize_t space, sz; + simple_lock(&map->hint_lock); + cur = map->hint; + simple_unlock(&map->hint_lock); - space = PAGE_SIZE; - sz = 0; - RB_FOREACH(iter, uvm_map_free_int, &free->tree) { - sz++; + if (cur == &map->header) + cur = cur->next; - UVM_ASSERT(map, iter->fspace >= space, file, line); - space = iter->fspace; + UVMCNT_INCR(uvm_mlk_call); + if (address >= cur->start) { + /* + * go from hint to end of list. + * + * but first, make a quick check to see if + * we are already looking at the entry we + * want (which is usually the case). + * note also that we don't need to save the hint + * here... it is the same hint (unless we are + * at the header, in which case the hint didn't + * buy us anything anyway). + */ + last = &map->header; + if ((cur != last) && (cur->end > address)) { + UVMCNT_INCR(uvm_mlk_hint); + *entry = cur; + UVMHIST_LOG(maphist,"<- got it via hint (%p)", + cur, 0, 0, 0); + return (TRUE); + } - UVM_ASSERT(map, RB_FIND(uvm_map_addr, &map->addr, iter) == iter, - file, line); + if (map->nentries > 30) + use_tree = 1; + } else { + /* + * go from start to hint, *inclusively* + */ + last = cur->next; + cur = map->header.next; + use_tree = 1; } - UVM_ASSERT(map, free->treesz == sz, file, line); -} -/* - * Check that map is sane. - */ -void -uvm_tree_sanity(struct vm_map *map, char *file, int line) -{ - struct vm_map_entry *iter; - struct uvm_map_free *free; - vaddr_t addr; - vaddr_t min, max, bound; /* Bounds checker. */ + uvm_tree_sanity(map, __func__); - addr = vm_map_min(map); - RB_FOREACH(iter, uvm_map_addr, &map->addr) { - /* - * Valid start, end. - * Catch overflow for end+fspace. - */ - UVM_ASSERT(map, iter->end >= iter->start, file, line); - UVM_ASSERT(map, FREE_END(iter) >= iter->end, file, line); - /* - * May not be empty. - */ - UVM_ASSERT(map, iter->start < FREE_END(iter), file, line); + if (use_tree) { + struct vm_map_entry *prev = &map->header; + cur = RB_ROOT(&map->rbhead); /* - * Addresses for entry must lie within map boundaries. + * Simple lookup in the tree. Happens when the hint is + * invalid, or nentries reach a threshold. */ - UVM_ASSERT(map, iter->start >= vm_map_min(map) && - FREE_END(iter) <= vm_map_max(map), file, line); + while (cur) { + if (address >= cur->start) { + if (address < cur->end) { + *entry = cur; + SAVE_HINT(map, map->hint, cur); + return (TRUE); + } + prev = cur; + cur = RB_RIGHT(cur, rb_entry); + } else + cur = RB_LEFT(cur, rb_entry); + } + *entry = prev; + UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); + return (FALSE); + } - /* - * Tree may not have gaps. - */ - UVM_ASSERT(map, iter->start == addr, file, line); - addr = FREE_END(iter); + /* + * search linearly + */ - /* - * Free space may not cross boundaries, unless the same - * free list is used on both sides of the border. - */ - min = FREE_START(iter); - max = FREE_END(iter); - - while (min < max && - (bound = uvm_map_boundary(map, min, max)) != max) { - UVM_ASSERT(map, - UVM_FREE(map, min) == UVM_FREE(map, bound), - file, line); - min = bound; - } + while (cur != last) { + if (cur->end > address) { + if (address >= cur->start) { + /* + * save this lookup for future + * hints, and return + */ - /* - * Entries with free space must appear in the free list. - */ - free = UVM_FREE(map, FREE_START(iter)); - if (iter->fspace > 0 && free) { - UVM_ASSERT(map, - RB_FIND(uvm_map_free_int, &free->tree, iter) == - iter, file, line); + *entry = cur; + SAVE_HINT(map, map->hint, cur); + UVMHIST_LOG(maphist,"<- search got it (%p)", + cur, 0, 0, 0); + return (TRUE); + } + break; } + cur = cur->next; } - UVM_ASSERT(map, addr == vm_map_max(map), file, line); - uvm_tree_sanity_free(map, &map->free, file, line); - uvm_tree_sanity_free(map, &map->bfree, file, line); + *entry = cur->prev; + SAVE_HINT(map, map->hint, *entry); + UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); + return (FALSE); } -void -uvm_tree_size_chk(struct vm_map *map, char *file, int line) -{ - struct vm_map_entry *iter; - vsize_t size; +/* + * Checks if address pointed to by phint fits into the empty + * space before the vm_map_entry after. Takes alignment and + * offset into consideration. + */ - size = 0; - RB_FOREACH(iter, uvm_map_addr, &map->addr) { - if (!UVM_ET_ISHOLE(iter)) - size += iter->end - iter->start; - } +int +uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length, + struct vm_map_entry *after, voff_t uoffset, vsize_t align) +{ + vaddr_t hint = *phint; + vaddr_t end; - if (map->size != size) - printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); - UVM_ASSERT(map, map->size == size, file, line); +#ifdef PMAP_PREFER + /* + * push hint forward as needed to avoid VAC alias problems. + * we only do this if a valid offset is specified. + */ + if (uoffset != UVM_UNKNOWN_OFFSET) + hint = PMAP_PREFER(uoffset, hint); +#endif + if (align != 0) + if ((hint & (align - 1)) != 0) + hint = roundup(hint, align); + *phint = hint; - vmspace_validate(map); + end = hint + length; + if (end > map->max_offset || end < hint) + return (FALSE); + if (after != NULL && after != &map->header && after->start < end) + return (FALSE); + + return (TRUE); } /* - * This function validates the statistics on vmspace. + * uvm_map_pie: return a random load address for a PIE executable + * properly aligned. */ -void -vmspace_validate(struct vm_map *map) + +#ifndef VM_PIE_MAX_ADDR +#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) +#endif + +#ifndef VM_PIE_MIN_ADDR +#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS +#endif + +#ifndef VM_PIE_MIN_ALIGN +#define VM_PIE_MIN_ALIGN PAGE_SIZE +#endif + +vaddr_t +uvm_map_pie(vaddr_t align) { - struct vmspace *vm; - struct vm_map_entry *iter; - vaddr_t imin, imax; - vaddr_t stack_begin, stack_end; /* Position of stack. */ - vsize_t stack, heap; /* Measured sizes. */ + vaddr_t addr, space, min; - if (!(map->flags & VM_MAP_ISVMSPACE)) - return; + align = MAX(align, VM_PIE_MIN_ALIGN); - vm = (struct vmspace *)map; - stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); - stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); + /* round up to next alignment */ + min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); - stack = heap = 0; - RB_FOREACH(iter, uvm_map_addr, &map->addr) { - imin = imax = iter->start; + if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) + return (align); - if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) - continue; + space = (VM_PIE_MAX_ADDR - min) / align; + space = MIN(space, (u_int32_t)-1); - /* - * Update stack, heap. - * Keep in mind that (theoretically) the entries of - * userspace and stack may be joined. - */ - while (imin != iter->end) { - /* - * Set imax to the first boundary crossed between - * imin and stack addresses. - */ - imax = iter->end; - if (imin < stack_begin && imax > stack_begin) - imax = stack_begin; - else if (imin < stack_end && imax > stack_end) - imax = stack_end; - - if (imin >= stack_begin && imin < stack_end) - stack += imax - imin; - else - heap += imax - imin; - imin = imax; - } - } + addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; + addr += min; - heap >>= PAGE_SHIFT; - if (heap != vm->vm_dused) { - printf("vmspace stack range: 0x%lx-0x%lx\n", - stack_begin, stack_end); - panic("vmspace_validate: vmspace.vm_dused invalid, " - "expected %ld pgs, got %ld pgs in map %p", - heap, vm->vm_dused, - map); - } + return (addr); } -#endif /* DEBUG */ - /* - * uvm_map_init: init mapping system at boot time. note that we allocate - * and init the static pool of structs vm_map_entry for the kernel here. + * uvm_map_hint: return the beginning of the best area suitable for + * creating a new mapping with "prot" protection. */ -void -uvm_map_init(void) +vaddr_t +uvm_map_hint1(struct proc *p, vm_prot_t prot, int skipheap) { - static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; -#if defined(UVMHIST) - static struct uvm_history_ent maphistbuf[100]; - static struct uvm_history_ent pdhistbuf[100]; -#endif - int lcv; - - /* - * first, init logging system. - */ - - UVMHIST_FUNC("uvm_map_init"); - UVMHIST_INIT_STATIC(maphist, maphistbuf); - UVMHIST_INIT_STATIC(pdhist, pdhistbuf); - UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); - UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0, - "# uvm_map() successful calls", 0); - UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0); - UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward", - 0); - UVMCNT_INIT(map_nousermerge, UVMCNT_CNT, 0, "# back merges skipped", 0); - UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0); - UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0); + vaddr_t addr; +#ifdef __i386__ /* - * now set up static pool of kernel map entries ... + * If executable skip first two pages, otherwise start + * after data + heap region. */ - - simple_lock_init(&uvm.kentry_lock); - uvm.kentry_free = NULL; - for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { - RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) = - uvm.kentry_free; - uvm.kentry_free = &kernel_map_entry[lcv]; + if ((prot & VM_PROT_EXECUTE) && + ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { + addr = (PAGE_SIZE*2) + + (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); + return (round_page(addr)); } - - /* - * initialize the map-related pools. - */ - pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), - 0, 0, 0, "vmsppl", &pool_allocator_nointr); - pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), - 0, 0, 0, "vmmpepl", &pool_allocator_nointr); - pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), - 0, 0, 0, "vmmpekpl", NULL); - pool_sethiwat(&uvm_map_entry_pool, 8192); +#endif + /* start malloc/mmap after the brk */ + addr = (vaddr_t)p->p_vmspace->vm_daddr; + if (skipheap) + addr += BRKSIZ; +#if !defined(__vax__) + addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1); +#endif + return (round_page(addr)); } -#if defined(DDB) - /* - * DDB hooks + * uvm_map_findspace: find "length" sized space in "map". + * + * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is + * set (in which case we insist on using "hint"). + * => "result" is VA returned + * => uobj/uoffset are to be used to handle VAC alignment, if required + * => if `align' is non-zero, we attempt to align to that value. + * => caller must at least have read-locked map + * => returns NULL on failure, or pointer to prev. map entry if success + * => note this is a cross between the old vm_map_findspace and vm_map_find */ -/* - * uvm_map_printit: actually prints the map - */ -void -uvm_map_printit(struct vm_map *map, boolean_t full, - int (*pr)(const char *, ...)) +struct vm_map_entry * +uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, + vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align, + int flags) { - struct vmspace *vm; - struct vm_map_entry *entry; - struct uvm_map_free *free; - int in_free; - - (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); - (*pr)("\tbrk() allocate range: 0x%lx-0x%lx %ld segments\n", - map->b_start, map->b_end, uvm_mapfree_size(&map->bfree)); - (*pr)("\tstack allocate range: 0x%lx-0x%lx %ld segments\n", - map->s_start, map->s_end, uvm_mapfree_size(&map->bfree)); - (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", - map->size, map->ref_count, map->timestamp, - map->flags); -#ifdef pmap_resident_count - (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, - pmap_resident_count(map->pmap)); -#else - /* XXXCDC: this should be required ... */ - (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); -#endif - - /* - * struct vmspace handling. - */ - if (map->flags & VM_MAP_ISVMSPACE) { - vm = (struct vmspace *)map; - - (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", - vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); - (*pr)("\tvm_tsize=%u vm_dsize=%u\n", - vm->vm_tsize, vm->vm_dsize); - (*pr)("\tvm_taddr=%p vm_daddr=%p\n", - vm->vm_taddr, vm->vm_daddr); - (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", - vm->vm_maxsaddr, vm->vm_minsaddr); - } + struct vm_map_entry *entry, *next, *tmp; + struct vm_map_entry *child, *prev = NULL; - if (!full) - return; - RB_FOREACH(entry, uvm_map_addr, &map->addr) { - (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", - entry, entry->start, entry->end, entry->object.uvm_obj, - (long long)entry->offset, entry->aref.ar_amap, - entry->aref.ar_pageoff); - (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " - "wc=%d, adv=%d\n", - (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', - (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', - (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', - entry->protection, entry->max_protection, - entry->inheritance, entry->wired_count, entry->advice); + vaddr_t end, orig_hint; + UVMHIST_FUNC("uvm_map_findspace"); + UVMHIST_CALLED(maphist); - free = UVM_FREE(map, FREE_START(entry)); - in_free = (free != NULL) && - (RB_FIND(uvm_map_free_int, &free->tree, entry) == entry); - (*pr)("\thole=%c, free=%c, guard=0x%lx, " - "free=0x%lx-0x%lx\n", - (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', - in_free ? 'T' : 'F', - entry->guard, - FREE_START(entry), FREE_END(entry)); - } -} + UVMHIST_LOG(maphist, "(map=%p, hint=0x%lx, len=%ld, flags=0x%lx)", + map, hint, length, flags); + KASSERT((align & (align - 1)) == 0); + KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); -/* - * uvm_object_printit: actually prints the object - */ -void -uvm_object_printit(uobj, full, pr) - struct uvm_object *uobj; - boolean_t full; - int (*pr)(const char *, ...); -{ - struct vm_page *pg; - int cnt = 0; + uvm_tree_sanity(map, "map_findspace entry"); - (*pr)("OBJECT %p: pgops=%p, npages=%d, ", - uobj, uobj->pgops, uobj->uo_npages); - if (UVM_OBJ_IS_KERN_OBJECT(uobj)) - (*pr)("refs=<SYSTEM>\n"); - else - (*pr)("refs=%d\n", uobj->uo_refs); + /* + * remember the original hint. if we are aligning, then we + * may have to try again with no alignment constraint if + * we fail the first time. + */ - if (!full) { - return; - } - (*pr)(" PAGES <pg,offset>:\n "); - RB_FOREACH(pg, uvm_objtree, &uobj->memt) { - (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); - if ((cnt % 3) == 2) { - (*pr)("\n "); + orig_hint = hint; + if (hint < map->min_offset) { /* check ranges ... */ + if (flags & UVM_FLAG_FIXED) { + UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); + return(NULL); } - cnt++; + hint = map->min_offset; } - if ((cnt % 3) != 2) { - (*pr)("\n"); + if (hint > map->max_offset) { + UVMHIST_LOG(maphist,"<- VA 0x%lx > range [0x%lx->0x%lx]", + hint, map->min_offset, map->max_offset, 0); + return(NULL); } -} - -/* - * uvm_page_printit: actually print the page - */ -static const char page_flagbits[] = - "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" - "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" - "\31PMAP1\32PMAP2\33PMAP3"; - -void -uvm_page_printit(pg, full, pr) - struct vm_page *pg; - boolean_t full; - int (*pr)(const char *, ...); -{ - struct vm_page *tpg; - struct uvm_object *uobj; - struct pglist *pgl; - - (*pr)("PAGE %p:\n", pg); - (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", - pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, - (long long)pg->phys_addr); - (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", - pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); -#if defined(UVM_PAGE_TRKOWN) - if (pg->pg_flags & PG_BUSY) - (*pr)(" owning process = %d, tag=%s\n", - pg->owner, pg->owner_tag); - else - (*pr)(" page not busy, no owner\n"); -#else - (*pr)(" [page ownership tracking disabled]\n"); -#endif - if (!full) - return; + /* + * Look for the first possible address; if there's already + * something at this address, we have to start after it. + */ - /* cross-verify object/anon */ - if ((pg->pg_flags & PQ_FREE) == 0) { - if (pg->pg_flags & PQ_ANON) { - if (pg->uanon == NULL || pg->uanon->an_page != pg) - (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", - (pg->uanon) ? pg->uanon->an_page : NULL); - else - (*pr)(" anon backpointer is OK\n"); - } else { - uobj = pg->uobject; - if (uobj) { - (*pr)(" checking object list\n"); - RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { - if (tpg == pg) { - break; - } - } - if (tpg) - (*pr)(" page found on object list\n"); - else - (*pr)(" >>> PAGE NOT FOUND " - "ON OBJECT LIST! <<<\n"); + if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) { + if ((entry = map->first_free) != &map->header) + hint = entry->end; + } else { + if (uvm_map_lookup_entry(map, hint, &tmp)) { + /* "hint" address already in use ... */ + if (flags & UVM_FLAG_FIXED) { + UVMHIST_LOG(maphist,"<- fixed & VA in use", + 0, 0, 0, 0); + return(NULL); } + hint = tmp->end; } + entry = tmp; } - /* cross-verify page queue */ - if (pg->pg_flags & PQ_FREE) { - if (uvm_pmr_isfree(pg)) - (*pr)(" page found in uvm_pmemrange\n"); - else - (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); - pgl = NULL; - } else if (pg->pg_flags & PQ_INACTIVE) { - pgl = (pg->pg_flags & PQ_SWAPBACKED) ? - &uvm.page_inactive_swp : &uvm.page_inactive_obj; - } else if (pg->pg_flags & PQ_ACTIVE) { - pgl = &uvm.page_active; - } else { - pgl = NULL; - } - - if (pgl) { - (*pr)(" checking pageq list\n"); - TAILQ_FOREACH(tpg, pgl, pageq) { - if (tpg == pg) { + if (flags & UVM_FLAG_FIXED) { + end = hint + length; + if (end > map->max_offset || end < hint) { + UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); + goto error; + } + next = entry->next; + if (next == &map->header || next->start >= end) + goto found; + UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0); + return(NULL); /* only one shot at it ... */ + } + + /* Try to find the space in the red-black tree */ + + /* Check slot before any entry */ + if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align)) + goto found; + + /* If there is not enough space in the whole tree, we fail */ + tmp = RB_ROOT(&map->rbhead); + if (tmp == NULL || tmp->space < length) + goto error; + + /* Find an entry close to hint that has enough space */ + for (; tmp;) { + if (tmp->end >= hint && + (prev == NULL || tmp->end < prev->end)) { + if (tmp->ownspace >= length) + prev = tmp; + else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL && + child->space >= length) + prev = tmp; + } + if (tmp->end < hint) + child = RB_RIGHT(tmp, rb_entry); + else if (tmp->end > hint) + child = RB_LEFT(tmp, rb_entry); + else { + if (tmp->ownspace >= length) break; - } + child = RB_RIGHT(tmp, rb_entry); } - if (tpg) - (*pr)(" page found on pageq list\n"); - else - (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); + if (child == NULL || child->space < length) + break; + tmp = child; } -} -#endif - -/* - * uvm_map_protect: change map protection - * - * => set_max means set max_protection. - * => map must be unlocked. - */ -int -uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, - vm_prot_t new_prot, boolean_t set_max) -{ - struct vm_map_entry *first, *iter; - vm_prot_t old_prot; - vm_prot_t mask; - int error; - - if (start > end) - return EINVAL; - start = MAX(start, map->min_offset); - end = MIN(end, map->max_offset); - if (start >= end) - return 0; - - error = 0; - vm_map_lock(map); - - /* - * Set up first and last. - * - first will contain first entry at or after start. - */ - first = uvm_map_entrybyaddr(&map->addr, start); - KDASSERT(first != NULL); - if (first->end < start) - first = RB_NEXT(uvm_map_addr, &map->addr, first); - - /* - * First, check for protection violations. - */ - for (iter = first; iter != NULL && iter->start < end; - iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { - /* Treat memory holes as free space. */ - if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) + + if (tmp != NULL && hint < tmp->end + tmp->ownspace) { + /* + * Check if the entry that we found satifies the + * space requirement + */ + if (hint < tmp->end) + hint = tmp->end; + if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, + align)) { + entry = tmp; + goto found; + } else if (tmp->ownspace >= length) + goto listsearch; + } + if (prev == NULL) + goto error; + + hint = prev->end; + if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset, + align)) { + entry = prev; + goto found; + } else if (prev->ownspace >= length) + goto listsearch; + + tmp = RB_RIGHT(prev, rb_entry); + for (;;) { + KASSERT(tmp && tmp->space >= length); + child = RB_LEFT(tmp, rb_entry); + if (child && child->space >= length) { + tmp = child; continue; - - if (UVM_ET_ISSUBMAP(iter)) { - error = EINVAL; - goto out; - } - if ((new_prot & iter->max_protection) != new_prot) { - error = EACCES; - goto out; } + if (tmp->ownspace >= length) + break; + tmp = RB_RIGHT(tmp, rb_entry); + } + + hint = tmp->end; + if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) { + entry = tmp; + goto found; } + /* + * The tree fails to find an entry because of offset or alignment + * restrictions. Search the list instead. + */ + listsearch: /* - * Fix protections. + * Look through the rest of the map, trying to fit a new region in + * the gap between existing regions, or after the very last region. + * note: entry->end = base VA of current gap, + * next->start = VA of end of current gap */ - for (iter = first; iter != NULL && iter->start < end; - iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { - /* Treat memory holes as free space. */ - if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) - continue; - - old_prot = iter->protection; - + for (;; hint = (entry = next)->end) { /* - * Skip adapting protection iff old and new protection - * are equal. + * Find the end of the proposed new region. Be sure we didn't + * go beyond the end of the map, or wrap around the address; + * if so, we lose. Otherwise, if this is the last entry, or + * if the proposed new region fits before the next entry, we + * win. */ - if (set_max) { - if (old_prot == (new_prot & old_prot) && - iter->max_protection == new_prot) - continue; - } else { - if (old_prot == new_prot) - continue; - } - - UVM_MAP_CLIP_START(map, iter, start); - UVM_MAP_CLIP_END(map, iter, end); - - if (set_max) { - iter->max_protection = new_prot; - iter->protection &= new_prot; - } else - iter->protection = new_prot; +#ifdef PMAP_PREFER /* - * update physical map if necessary. worry about copy-on-write - * here -- CHECK THIS XXX + * push hint forward as needed to avoid VAC alias problems. + * we only do this if a valid offset is specified. */ - if (iter->protection != old_prot) { - mask = UVM_ET_ISCOPYONWRITE(iter) ? - ~VM_PROT_WRITE : VM_PROT_ALL; - - /* update pmap */ - if ((iter->protection & mask) == PROT_NONE && - VM_MAPENT_ISWIRED(iter)) { - /* - * TODO(ariane) this is stupid. wired_count - * is 0 if not wired, otherwise anything - * larger than 0 (incremented once each time - * wire is called). - * Mostly to be able to undo the damage on - * failure. Not the actually be a wired - * refcounter... - * Originally: iter->wired_count--; - * (don't we have to unwire this in the pmap - * as well?) - */ - iter->wired_count = 0; - } - pmap_protect(map->pmap, iter->start, iter->end, - iter->protection & mask); + if (uoffset != UVM_UNKNOWN_OFFSET) + hint = PMAP_PREFER(uoffset, hint); +#endif + if (align != 0) { + if ((hint & (align - 1)) != 0) + hint = roundup(hint, align); + /* + * XXX Should we PMAP_PREFER() here again? + */ } - - /* - * If the map is configured to lock any future mappings, - * wire this entry now if the old protection was VM_PROT_NONE - * and the new protection is not VM_PROT_NONE. - */ - if ((map->flags & VM_MAP_WIREFUTURE) != 0 && - VM_MAPENT_ISWIRED(iter) == 0 && - old_prot == VM_PROT_NONE && - new_prot != VM_PROT_NONE) { - if (uvm_map_pageable(map, iter->start, iter->end, - FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { - /* - * If locking the entry fails, remember the - * error if it's the first one. Note we - * still continue setting the protection in - * the map, but it will return the resource - * storage condition regardless. - * - * XXX Ignore what the actual error is, - * XXX just call it a resource shortage - * XXX so that it doesn't get confused - * XXX what uvm_map_protect() itself would - * XXX normally return. - */ - error = ENOMEM; - } + end = hint + length; + if (end > map->max_offset || end < hint) { + UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); + goto error; } + next = entry->next; + if (next == &map->header || next->start >= end) + break; } - pmap_update(map->pmap); + found: + SAVE_HINT(map, map->hint, entry); + *result = hint; + UVMHIST_LOG(maphist,"<- got it! (result=0x%lx)", hint, 0,0,0); + return (entry); -out: - vm_map_unlock(map); - UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0); - return error; + error: + if (align != 0) { + UVMHIST_LOG(maphist, + "calling recursively, no align", + 0,0,0,0); + return (uvm_map_findspace(map, orig_hint, + length, result, uobj, uoffset, 0, flags)); + } + return (NULL); } /* - * uvmspace_alloc: allocate a vmspace structure. - * - * - structure includes vm_map and pmap - * - XXX: no locking on this structure - * - refcnt set to 1, rest must be init'd by caller + * U N M A P - m a i n e n t r y p o i n t */ -struct vmspace * -uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, - boolean_t remove_holes) -{ - struct vmspace *vm; - UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); - - vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); - uvmspace_init(vm, NULL, min, max, pageable, remove_holes); - UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0); - return (vm); -} /* - * uvmspace_init: initialize a vmspace structure. + * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop") * - * - XXX: no locking on this structure - * - refcnt set to 1, rest must be init'd by caller + * => caller must check alignment and size + * => map must be unlocked (we will lock it) */ void -uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, - boolean_t pageable, boolean_t remove_holes) +uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p) { - UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); - - if (pmap) - pmap_reference(pmap); - else - pmap = pmap_create(); - vm->vm_map.pmap = pmap; + vm_map_entry_t dead_entries; + UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); - uvm_map_setup(&vm->vm_map, min, max, - (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); + UVMHIST_LOG(maphist, " (map=%p, start=0x%lx, end=0x%lx)", + map, start, end, 0); - vm->vm_refcnt = 1; + /* + * work now done by helper functions. wipe the pmap's and then + * detach from the dead entries... + */ + vm_map_lock(map); + uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE); + vm_map_unlock(map); - if (remove_holes) - pmap_remove_holes(&vm->vm_map); + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); - UVMHIST_LOG(maphist,"<- done",0,0,0,0); + UVMHIST_LOG(maphist, "<- done", 0,0,0,0); } + /* - * uvmspace_share: share a vmspace between two proceses - * - * - XXX: no locking on vmspace - * - used for vfork, threads(?) + * U N M A P - m a i n h e l p e r f u n c t i o n s */ -void -uvmspace_share(p1, p2) - struct proc *p1, *p2; -{ - p2->p_vmspace = p1->p_vmspace; - p1->p_vmspace->vm_refcnt++; -} - /* - * uvmspace_exec: the process wants to exec a new program + * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") * - * - XXX: no locking on vmspace + * => caller must check alignment and size + * => map must be locked by caller + * => we return a list of map entries that we've remove from the map + * in "entry_list" */ void -uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) +uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, + struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes) { - struct vmspace *nvm, *ovm = p->p_vmspace; - struct vm_map *map = &ovm->vm_map; - struct uvm_map_deadq dead_entries; + struct vm_map_entry *entry, *first_entry, *next; + vaddr_t len; + UVMHIST_FUNC("uvm_unmap_remove"); + UVMHIST_CALLED(maphist); - KASSERT((start & PAGE_MASK) == 0); - KASSERT((end & PAGE_MASK) == 0 || (end & PAGE_MASK) == PAGE_MASK); + UVMHIST_LOG(maphist,"(map=%p, start=0x%lx, end=0x%lx)", + map, start, end, 0); - pmap_unuse_final(p); /* before stack addresses go away */ - TAILQ_INIT(&dead_entries); + VM_MAP_RANGE_CHECK(map, start, end); + + uvm_tree_sanity(map, "unmap_remove entry"); + + if ((map->flags & VM_MAP_INTRSAFE) == 0) + splassert(IPL_NONE); + else + splassert(IPL_VM); /* - * see if more than one process is using this vmspace... + * find first entry */ + if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { + /* clip and go... */ + entry = first_entry; + UVM_MAP_CLIP_START(map, entry, start); + /* critical! prevents stale hint */ + SAVE_HINT(map, entry, entry->prev); - if (ovm->vm_refcnt == 1) { - /* - * if p is the only process using its vmspace then we can safely - * recycle that vmspace for the program that is being exec'd. - */ + } else { + entry = first_entry->next; + } -#ifdef SYSVSHM - /* - * SYSV SHM semantics require us to kill all segments on an exec - */ - if (ovm->vm_shm) - shmexit(ovm); -#endif + /* + * Save the free space hint + */ - /* - * POSIX 1003.1b -- "lock future mappings" is revoked - * when a process execs another program image. - */ - vm_map_lock(map); - vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); + if (map->first_free->start >= start) + map->first_free = entry->prev; + + /* + * note: we now re-use first_entry for a different task. we remove + * a number of map entries from the map and save them in a linked + * list headed by "first_entry". once we remove them from the map + * the caller should unlock the map and drop the references to the + * backing objects [c.f. uvm_unmap_detach]. the object is to + * separate unmapping from reference dropping. why? + * [1] the map has to be locked for unmapping + * [2] the map need not be locked for reference dropping + * [3] dropping references may trigger pager I/O, and if we hit + * a pager that does synchronous I/O we may have to wait for it. + * [4] we would like all waiting for I/O to occur with maps unlocked + * so that we don't block other threads. + */ + first_entry = NULL; + *entry_list = NULL; /* to be safe */ + + /* + * break up the area into map entry sized regions and unmap. note + * that all mappings have to be removed before we can even consider + * dropping references to amaps or VM objects (otherwise we could end + * up with a mapping to a page on the free list which would be very bad) + */ + + while ((entry != &map->header) && (entry->start < end)) { + + UVM_MAP_CLIP_END(map, entry, end); + next = entry->next; + len = entry->end - entry->start; + if (p && entry->object.uvm_obj == NULL) + p->p_vmspace->vm_dused -= atop(len); /* - * now unmap the old program - * - * Instead of attempting to keep the map valid, we simply - * nuke all entries and ask uvm_map_setup to reinitialize - * the map to the new boundaries. - * - * uvm_unmap_remove will actually nuke all entries for us - * (as in, not replace them with free-memory entries). + * unwire before removing addresses from the pmap; otherwise + * unwiring will put the entries back into the pmap (XXX). */ - uvm_unmap_remove(map, map->min_offset, map->max_offset, - &dead_entries, TRUE, FALSE); - KDASSERT(RB_EMPTY(&map->addr)); + if (VM_MAPENT_ISWIRED(entry)) + uvm_map_entry_unwire(map, entry); /* - * Nuke statistics and boundaries. + * special case: handle mappings to anonymous kernel objects. + * we want to free these pages right away... */ - bzero(&ovm->vm_startcopy, - (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); +#ifdef KVA_GUARDPAGES + if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) { + entry->etype &= ~MAP_ET_KVAGUARD; + kva_guardpages--; + } else /* (code continues across line-break) */ +#endif + if (UVM_ET_ISHOLE(entry)) { + if (!remove_holes) { + entry = next; + continue; + } + } else if (map->flags & VM_MAP_INTRSAFE) { + uvm_km_pgremove_intrsafe(entry->start, entry->end); + pmap_kremove(entry->start, len); + } else if (UVM_ET_ISOBJ(entry) && + UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { + KASSERT(vm_map_pmap(map) == pmap_kernel()); + /* + * note: kernel object mappings are currently used in + * two ways: + * [1] "normal" mappings of pages in the kernel object + * [2] uvm_km_valloc'd allocations in which we + * pmap_enter in some non-kernel-object page + * (e.g. vmapbuf). + * + * for case [1], we need to remove the mapping from + * the pmap and then remove the page from the kernel + * object (because, once pages in a kernel object are + * unmapped they are no longer needed, unlike, say, + * a vnode where you might want the data to persist + * until flushed out of a queue). + * + * for case [2], we need to remove the mapping from + * the pmap. there shouldn't be any pages at the + * specified offset in the kernel object [but it + * doesn't hurt to call uvm_km_pgremove just to be + * safe?] + * + * uvm_km_pgremove currently does the following: + * for pages in the kernel object in range: + * - drops the swap slot + * - uvm_pagefree the page + * + * note there is version of uvm_km_pgremove() that + * is used for "intrsafe" objects. + */ - if (end & PAGE_MASK) { - end += 1; - if (end == 0) /* overflow */ - end -= PAGE_SIZE; - } + /* + * remove mappings from pmap and drop the pages + * from the object. offsets are always relative + * to vm_map_min(kernel_map). + */ + pmap_remove(pmap_kernel(), entry->start, entry->end); + uvm_km_pgremove(entry->object.uvm_obj, + entry->start - vm_map_min(kernel_map), + entry->end - vm_map_min(kernel_map)); - /* - * Setup new boundaries and populate map with entries. - */ - map->min_offset = start; - map->max_offset = end; - uvm_map_setup_entries(map); - vm_map_unlock(map); + /* + * null out kernel_object reference, we've just + * dropped it + */ + entry->etype &= ~UVM_ET_OBJ; + entry->object.uvm_obj = NULL; /* to be safe */ + + } else { + /* + * remove mappings the standard way. + */ + pmap_remove(map->pmap, entry->start, entry->end); + } /* - * but keep MMU holes unavailable + * remove entry from map and put it on our list of entries + * that we've nuked. then go do next entry. */ - pmap_remove_holes(map); + UVMHIST_LOG(maphist, " removed map entry %p", entry, 0, 0,0); - } else { + /* critical! prevents stale hint */ + SAVE_HINT(map, entry, entry->prev); + uvm_map_entry_unlink(map, entry); + map->size -= len; + entry->next = first_entry; + first_entry = entry; + entry = next; /* next entry, please */ + } +#ifdef KVA_GUARDPAGES + /* + * entry points at the map-entry after the last-removed map-entry. + */ + if (map == kernel_map && entry != &map->header && + entry->etype & MAP_ET_KVAGUARD && entry->start == end) { /* - * p's vmspace is being shared, so we can't reuse it for p since - * it is still being used for others. allocate a new vmspace - * for p + * Removed range is followed by guard page; + * remove that guard page now (or it will stay forever). */ - nvm = uvmspace_alloc(start, end, - (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); + entry->etype &= ~MAP_ET_KVAGUARD; + kva_guardpages--; - /* - * install new vmspace and drop our ref to the old one. - */ + uvm_map_entry_unlink(map, entry); + map->size -= len; + entry->next = first_entry; + first_entry = entry; + entry = next; /* next entry, please */ + } +#endif + /* if ((map->flags & VM_MAP_DYING) == 0) { */ + pmap_update(vm_map_pmap(map)); + /* } */ - pmap_deactivate(p); - p->p_vmspace = nvm; - pmap_activate(p); - uvmspace_free(ovm); - } + uvm_tree_sanity(map, "unmap_remove leave"); /* - * Release dead entries + * now we've cleaned up the map and are ready for the caller to drop + * references to the mapped objects. */ - uvm_unmap_detach(&dead_entries, 0); + + *entry_list = first_entry; + UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); } /* - * uvmspace_free: free a vmspace data structure + * uvm_unmap_detach: drop references in a chain of map entries * - * - XXX: no locking on vmspace + * => we will free the map entries as we traverse the list. */ void -uvmspace_free(struct vmspace *vm) +uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) { - struct uvm_map_deadq dead_entries; + struct vm_map_entry *next_entry; + UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); - UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); + while (first_entry) { + KASSERT(!VM_MAPENT_ISWIRED(first_entry)); + UVMHIST_LOG(maphist, + " detach 0x%lx: amap=%p, obj=%p, submap?=%ld", + first_entry, first_entry->aref.ar_amap, + first_entry->object.uvm_obj, + UVM_ET_ISSUBMAP(first_entry)); - UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0); - if (--vm->vm_refcnt == 0) { /* - * lock the map, to wait out all other references to it. delete - * all of the mappings and pages they hold, then call the pmap - * module to reclaim anything left. + * drop reference to amap, if we've got one */ -#ifdef SYSVSHM - /* Get rid of any SYSV shared memory segments. */ - if (vm->vm_shm != NULL) - shmexit(vm); -#endif - if ((vm->vm_map.flags & VM_MAP_INTRSAFE) == 0) { - if (rw_enter(&vm->vm_map.lock, RW_NOSLEEP|RW_WRITE) != - 0) { - panic("uvm_map_setup: " - "rw_enter failed on free map"); - } + + if (first_entry->aref.ar_amap) + uvm_map_unreference_amap(first_entry, flags); + + /* + * drop reference to our backing object, if we've got one + */ + + if (UVM_ET_ISSUBMAP(first_entry)) { + /* ... unlikely to happen, but play it safe */ + uvm_map_deallocate(first_entry->object.sub_map); + } else { + if (UVM_ET_ISOBJ(first_entry) && + first_entry->object.uvm_obj->pgops->pgo_detach) + first_entry->object.uvm_obj->pgops-> + pgo_detach(first_entry->object.uvm_obj); } - uvm_tree_sanity(&vm->vm_map, __FILE__, __LINE__); - TAILQ_INIT(&dead_entries); - uvm_unmap_remove(&vm->vm_map, - vm->vm_map.min_offset, vm->vm_map.max_offset, - &dead_entries, TRUE, FALSE); - if ((vm->vm_map.flags & VM_MAP_INTRSAFE) == 0) - rw_exit(&vm->vm_map.lock); - KDASSERT(RB_EMPTY(&vm->vm_map.addr)); - uvm_unmap_detach(&dead_entries, 0); - pmap_destroy(vm->vm_map.pmap); - vm->vm_map.pmap = NULL; - pool_put(&uvm_vmspace_pool, vm); + + next_entry = first_entry->next; + uvm_mapent_free(first_entry); + first_entry = next_entry; } - UVMHIST_LOG(maphist,"<- done", 0,0,0,0); + UVMHIST_LOG(maphist, "<- done", 0,0,0,0); } /* - * Clone map entry into other map. + * E X T R A C T I O N F U N C T I O N S + */ + +/* + * uvm_map_reserve: reserve space in a vm_map for future use. * - * Mapping will be placed at dstaddr, for the same length. - * Space must be available. - * Reference counters are incremented. + * => we reserve space in a map by putting a dummy map entry in the + * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) + * => map should be unlocked (we will write lock it) + * => we return true if we were able to reserve space + * => XXXCDC: should be inline? */ -struct vm_map_entry* -uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, - vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, - int mapent_flags, int amap_share_flags) + +int +uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset, + vsize_t align, vaddr_t *raddr) { - struct vm_map_entry *new_entry, *first, *last; + UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); - KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); + UVMHIST_LOG(maphist, "(map=%p, size=0x%lx, offset=0x%lx,addr=0x%lx)", + map,size,offset,raddr); - /* - * Create new entry (linked in on creation). - * Fill in first, last. - */ - first = last = NULL; - if (!uvm_map_isavail(&dstmap->addr, &first, &last, dstaddr, dstlen)) { - panic("uvmspace_fork: no space in map for " - "entry in empty map"); - } - new_entry = uvm_map_mkentry(dstmap, first, last, - dstaddr, dstlen, mapent_flags, dead); - if (new_entry == NULL) - return NULL; - /* old_entry -> new_entry */ - new_entry->object = old_entry->object; - new_entry->offset = old_entry->offset; - new_entry->aref = old_entry->aref; - new_entry->etype = old_entry->etype; - new_entry->protection = old_entry->protection; - new_entry->max_protection = old_entry->max_protection; - new_entry->inheritance = old_entry->inheritance; - new_entry->advice = old_entry->advice; + size = round_page(size); + if (*raddr < vm_map_min(map)) + *raddr = vm_map_min(map); /* hint */ /* - * gain reference to object backing the map (can't - * be a submap). + * reserve some virtual space. */ - if (new_entry->aref.ar_amap) { - new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; - amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, - (new_entry->end - new_entry->start) >> PAGE_SHIFT, - amap_share_flags); - } - if (UVM_ET_ISOBJ(new_entry) && - new_entry->object.uvm_obj->pgops->pgo_reference) { - new_entry->offset += off; - new_entry->object.uvm_obj->pgops->pgo_reference - (new_entry->object.uvm_obj); - } + if (uvm_map(map, raddr, size, NULL, offset, 0, + UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, + UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { + UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); + return (FALSE); + } - return new_entry; + UVMHIST_LOG(maphist, "<- done (*raddr=0x%lx)", *raddr,0,0,0); + return (TRUE); } /* - * share the mapping: this means we want the old and - * new entries to share amaps and backing objects. + * uvm_map_replace: replace a reserved (blank) area of memory with + * real mappings. + * + * => caller must WRITE-LOCK the map + * => we return TRUE if replacement was a success + * => we expect the newents chain to have nnewents entries on it and + * we expect newents->prev to point to the last entry on the list + * => note newents is allowed to be NULL */ -void -uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, - struct vm_map *old_map, - struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) + +int +uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, + struct vm_map_entry *newents, int nnewents) { - struct vm_map_entry *new_entry; + struct vm_map_entry *oldent, *last; + + uvm_tree_sanity(map, "map_replace entry"); /* - * if the old_entry needs a new amap (due to prev fork) - * then we need to allocate it now so that we have - * something we own to share with the new_entry. [in - * other words, we need to clear needs_copy] + * first find the blank map entry at the specified address */ - if (UVM_ET_ISNEEDSCOPY(old_entry)) { - /* get our own amap, clears needs_copy */ - amap_copy(old_map, old_entry, M_WAITOK, FALSE, - 0, 0); - /* XXXCDC: WAITOK??? */ + if (!uvm_map_lookup_entry(map, start, &oldent)) { + return(FALSE); } - new_entry = uvm_mapent_clone(new_map, old_entry->start, - old_entry->end - old_entry->start, 0, old_entry, - dead, 0, AMAP_SHARED); - - /* - * pmap_copy the mappings: this routine is optional - * but if it is there it will reduce the number of - * page faults in the new proc. + /* + * check to make sure we have a proper blank entry */ - pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, - (new_entry->end - new_entry->start), new_entry->start); + + if (oldent->start != start || oldent->end != end || + oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { + return (FALSE); + } + +#ifdef DIAGNOSTIC + /* + * sanity check the newents chain + */ + { + struct vm_map_entry *tmpent = newents; + int nent = 0; + vaddr_t cur = start; + + while (tmpent) { + nent++; + if (tmpent->start < cur) + panic("uvm_map_replace1"); + if (tmpent->start > tmpent->end || tmpent->end > end) { + printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", + tmpent->start, tmpent->end, end); + panic("uvm_map_replace2"); + } + cur = tmpent->end; + if (tmpent->next) { + if (tmpent->next->prev != tmpent) + panic("uvm_map_replace3"); + } else { + if (newents->prev != tmpent) + panic("uvm_map_replace4"); + } + tmpent = tmpent->next; + } + if (nent != nnewents) + panic("uvm_map_replace5"); + } +#endif /* - * Update process statistics. + * map entry is a valid blank! replace it. (this does all the + * work of map entry link/unlink...). */ - if (!UVM_ET_ISHOLE(new_entry)) - new_map->size += new_entry->end - new_entry->start; - if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { - new_vm->vm_dused += - uvmspace_dused(new_map, new_entry->start, new_entry->end); + + if (newents) { + last = newents->prev; /* we expect this */ + + /* critical: flush stale hints out of map */ + SAVE_HINT(map, map->hint, newents); + if (map->first_free == oldent) + map->first_free = last; + + last->next = oldent->next; + last->next->prev = last; + + /* Fix RB tree */ + uvm_rb_remove(map, oldent); + + newents->prev = oldent->prev; + newents->prev->next = newents; + map->nentries = map->nentries + (nnewents - 1); + + /* Fixup the RB tree */ + { + int i; + struct vm_map_entry *tmp; + + tmp = newents; + for (i = 0; i < nnewents && tmp; i++) { + uvm_rb_insert(map, tmp); + tmp = tmp->next; + } + } + } else { + + /* critical: flush stale hints out of map */ + SAVE_HINT(map, map->hint, oldent->prev); + if (map->first_free == oldent) + map->first_free = oldent->prev; + + /* NULL list of new entries: just remove the old one */ + uvm_map_entry_unlink(map, oldent); } + + + uvm_tree_sanity(map, "map_replace leave"); + + /* + * now we can free the old blank entry, unlock the map and return. + */ + + uvm_mapent_free(oldent); + return(TRUE); } /* - * copy-on-write the mapping (using mmap's - * MAP_PRIVATE semantics) + * uvm_map_extract: extract a mapping from a map and put it somewhere + * (maybe removing the old mapping) * - * allocate new_entry, adjust reference counts. - * (note that new references are read-only). + * => maps should be unlocked (we will write lock them) + * => returns 0 on success, error code otherwise + * => start must be page aligned + * => len must be page sized + * => flags: + * UVM_EXTRACT_REMOVE: remove mappings from srcmap + * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) + * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs + * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go + * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< + * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only + * be used from within the kernel in a kernel level map <<< */ -void -uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, - struct vm_map *old_map, - struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) + +int +uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, + struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) { - struct vm_map_entry *new_entry; - boolean_t protect_child; + vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge, + oldstart; + struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry; + struct vm_map_entry *deadentry, *oldentry; + vsize_t elen; + int nchain, error, copy_ok; + UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); - new_entry = uvm_mapent_clone(new_map, old_entry->start, - old_entry->end - old_entry->start, 0, old_entry, - dead, 0, 0); + UVMHIST_LOG(maphist,"(srcmap=%p,start=0x%lx, len=0x%lx", srcmap, start, + len,0); + UVMHIST_LOG(maphist," ...,dstmap=%p, flags=0x%lx)", dstmap,flags,0,0); - new_entry->etype |= - (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); + uvm_tree_sanity(srcmap, "map_extract src enter"); + uvm_tree_sanity(dstmap, "map_extract dst enter"); /* - * the new entry will need an amap. it will either - * need to be copied from the old entry or created - * from scratch (if the old entry does not have an - * amap). can we defer this process until later - * (by setting "needs_copy") or do we need to copy - * the amap now? - * - * we must copy the amap now if any of the following - * conditions hold: - * 1. the old entry has an amap and that amap is - * being shared. this means that the old (parent) - * process is sharing the amap with another - * process. if we do not clear needs_copy here - * we will end up in a situation where both the - * parent and child process are referring to the - * same amap with "needs_copy" set. if the - * parent write-faults, the fault routine will - * clear "needs_copy" in the parent by allocating - * a new amap. this is wrong because the - * parent is supposed to be sharing the old amap - * and the new amap will break that. - * - * 2. if the old entry has an amap and a non-zero - * wire count then we are going to have to call - * amap_cow_now to avoid page faults in the - * parent process. since amap_cow_now requires - * "needs_copy" to be clear we might as well - * clear it here as well. - * + * step 0: sanity check: start must be on a page boundary, length + * must be page sized. can't ask for CONTIG/QREF if you asked for + * REMOVE. */ - if (old_entry->aref.ar_amap != NULL && - ((amap_flags(old_entry->aref.ar_amap) & - AMAP_SHARED) != 0 || - VM_MAPENT_ISWIRED(old_entry))) { - amap_copy(new_map, new_entry, M_WAITOK, FALSE, - 0, 0); - /* XXXCDC: M_WAITOK ... ok? */ - } + KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); + KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || + (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); /* - * if the parent's entry is wired down, then the - * parent process does not want page faults on - * access to that memory. this means that we - * cannot do copy-on-write because we can't write - * protect the old entry. in this case we - * resolve all copy-on-write faults now, using - * amap_cow_now. note that we have already - * allocated any needed amap (above). + * step 1: reserve space in the target map for the extracted area */ - if (VM_MAPENT_ISWIRED(old_entry)) { + dstaddr = vm_map_min(dstmap); + if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE) + return(ENOMEM); + *dstaddrp = dstaddr; /* pass address back to caller */ + UVMHIST_LOG(maphist, " dstaddr=0x%lx", dstaddr,0,0,0); - /* - * resolve all copy-on-write faults now - * (note that there is nothing to do if - * the old mapping does not have an amap). - * XXX: is it worthwhile to bother with - * pmap_copy in this case? - */ - if (old_entry->aref.ar_amap) - amap_cow_now(new_map, new_entry); + /* + * step 2: setup for the extraction process loop by init'ing the + * map entry chain, locking src map, and looking up the first useful + * entry in the map. + */ - } else { - if (old_entry->aref.ar_amap) { + end = start + len; + newend = dstaddr + len; + chain = endchain = NULL; + nchain = 0; + vm_map_lock(srcmap); + + if (uvm_map_lookup_entry(srcmap, start, &entry)) { + + /* "start" is within an entry */ + if (flags & UVM_EXTRACT_QREF) { /* - * setup mappings to trigger copy-on-write faults - * we must write-protect the parent if it has - * an amap and it is not already "needs_copy"... - * if it is already "needs_copy" then the parent - * has already been write-protected by a previous - * fork operation. - * - * if we do not write-protect the parent, then - * we must be sure to write-protect the child - * after the pmap_copy() operation. - * - * XXX: pmap_copy should have some way of telling - * us that it didn't do anything so we can avoid - * calling pmap_protect needlessly. + * for quick references we don't clip the entry, so + * the entry may map space "before" the starting + * virtual address... this is the "fudge" factor + * (which can be non-zero only the first time + * through the "while" loop in step 3). */ - if (!UVM_ET_ISNEEDSCOPY(old_entry)) { - if (old_entry->max_protection & - VM_PROT_WRITE) { - pmap_protect(old_map->pmap, - old_entry->start, - old_entry->end, - old_entry->protection & - ~VM_PROT_WRITE); - pmap_update(old_map->pmap); - } - old_entry->etype |= UVM_ET_NEEDSCOPY; - } - /* - * parent must now be write-protected - */ - protect_child = FALSE; + fudge = start - entry->start; } else { /* - * we only need to protect the child if the - * parent has write access. + * normal reference: we clip the map to fit (thus + * fudge is zero) */ - if (old_entry->max_protection & VM_PROT_WRITE) - protect_child = TRUE; + + UVM_MAP_CLIP_START(srcmap, entry, start); + SAVE_HINT(srcmap, srcmap->hint, entry->prev); + fudge = 0; + } + } else { + + /* "start" is not within an entry ... skip to next entry */ + if (flags & UVM_EXTRACT_CONTIG) { + error = EINVAL; + goto bad; /* definite hole here ... */ + } + + entry = entry->next; + fudge = 0; + } + + /* save values from srcmap for step 6 */ + orig_entry = entry; + orig_fudge = fudge; + + /* + * step 3: now start looping through the map entries, extracting + * as we go. + */ + + while (entry->start < end && entry != &srcmap->header) { + + /* if we are not doing a quick reference, clip it */ + if ((flags & UVM_EXTRACT_QREF) == 0) + UVM_MAP_CLIP_END(srcmap, entry, end); + + /* clear needs_copy (allow chunking) */ + if (UVM_ET_ISNEEDSCOPY(entry)) { + if (fudge) + oldstart = entry->start; else - protect_child = FALSE; + oldstart = 0; /* XXX: gcc */ + amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); + if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ + error = ENOMEM; + goto bad; + } + /* amap_copy could clip (during chunk)! update fudge */ + if (fudge) { + fudge = fudge - (entry->start - oldstart); + orig_fudge = fudge; + } } - /* - * copy the mappings - * XXX: need a way to tell if this does anything - */ + /* calculate the offset of this from "start" */ + oldoffset = (entry->start + fudge) - start; - pmap_copy(new_map->pmap, old_map->pmap, - new_entry->start, - (old_entry->end - old_entry->start), - old_entry->start); + /* allocate a new map entry */ + newentry = uvm_mapent_alloc(dstmap, flags); + if (newentry == NULL) { + error = ENOMEM; + goto bad; + } - /* - * protect the child's mappings if necessary - */ - if (protect_child) { - pmap_protect(new_map->pmap, new_entry->start, - new_entry->end, - new_entry->protection & - ~VM_PROT_WRITE); + /* set up new map entry */ + newentry->next = NULL; + newentry->prev = endchain; + newentry->start = dstaddr + oldoffset; + newentry->end = + newentry->start + (entry->end - (entry->start + fudge)); + if (newentry->end > newend || newentry->end < newentry->start) + newentry->end = newend; + newentry->object.uvm_obj = entry->object.uvm_obj; + if (newentry->object.uvm_obj) { + if (newentry->object.uvm_obj->pgops->pgo_reference) + newentry->object.uvm_obj->pgops-> + pgo_reference(newentry->object.uvm_obj); + newentry->offset = entry->offset + fudge; + } else { + newentry->offset = 0; + } + newentry->etype = entry->etype; + newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? + entry->max_protection : entry->protection; + newentry->max_protection = entry->max_protection; + newentry->inheritance = entry->inheritance; + newentry->wired_count = 0; + newentry->aref.ar_amap = entry->aref.ar_amap; + if (newentry->aref.ar_amap) { + newentry->aref.ar_pageoff = + entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); + uvm_map_reference_amap(newentry, AMAP_SHARED | + ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); + } else { + newentry->aref.ar_pageoff = 0; + } + newentry->advice = entry->advice; + + /* now link it on the chain */ + nchain++; + if (endchain == NULL) { + chain = endchain = newentry; + } else { + endchain->next = newentry; + endchain = newentry; + } + + /* end of 'while' loop! */ + if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && + (entry->next == &srcmap->header || + entry->next->start != entry->end)) { + error = EINVAL; + goto bad; } + entry = entry->next; + fudge = 0; } /* - * Update process statistics. + * step 4: close off chain (in format expected by uvm_map_replace) */ - if (!UVM_ET_ISHOLE(new_entry)) - new_map->size += new_entry->end - new_entry->start; - if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { - new_vm->vm_dused += - uvmspace_dused(new_map, new_entry->start, new_entry->end); - } -} -/* - * uvmspace_fork: fork a process' main map - * - * => create a new vmspace for child process from parent. - * => parent's map must not be locked. - */ -struct vmspace * -uvmspace_fork(struct vmspace *vm1) -{ - struct vmspace *vm2; - struct vm_map *old_map = &vm1->vm_map; - struct vm_map *new_map; - struct vm_map_entry *old_entry; - struct uvm_map_deadq dead; - UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); + if (chain) + chain->prev = endchain; - vm_map_lock(old_map); + /* + * step 5: attempt to lock the dest map so we can pmap_copy. + * note usage of copy_ok: + * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) + * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 + */ - vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, - (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); - memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, - (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); - vm2->vm_dused = 0; /* Statistic managed by us. */ - new_map = &vm2->vm_map; - vm_map_lock(new_map); + if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) { + copy_ok = 1; + if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, + nchain)) { + if (srcmap != dstmap) + vm_map_unlock(dstmap); + error = EIO; + goto bad; + } + } else { + copy_ok = 0; + /* replace defered until step 7 */ + } /* - * go entry-by-entry + * step 6: traverse the srcmap a second time to do the following: + * - if we got a lock on the dstmap do pmap_copy + * - if UVM_EXTRACT_REMOVE remove the entries + * we make use of orig_entry and orig_fudge (saved in step 2) */ - TAILQ_INIT(&dead); - RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { - if (old_entry->start == old_entry->end) - continue; + if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { - /* - * first, some sanity checks on the old entry - */ - if (UVM_ET_ISSUBMAP(old_entry)) { - panic("fork: encountered a submap during fork " - "(illegal)"); + /* purge possible stale hints from srcmap */ + if (flags & UVM_EXTRACT_REMOVE) { + SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); + if (srcmap->first_free->start >= start) + srcmap->first_free = orig_entry->prev; } - if (!UVM_ET_ISCOPYONWRITE(old_entry) && - UVM_ET_ISNEEDSCOPY(old_entry)) { - panic("fork: non-copy_on_write map entry marked " - "needs_copy (illegal)"); + entry = orig_entry; + fudge = orig_fudge; + deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ + + while (entry->start < end && entry != &srcmap->header) { + if (copy_ok) { + oldoffset = (entry->start + fudge) - start; + elen = MIN(end, entry->end) - + (entry->start + fudge); + pmap_copy(dstmap->pmap, srcmap->pmap, + dstaddr + oldoffset, elen, + entry->start + fudge); + } + + /* we advance "entry" in the following if statement */ + if (flags & UVM_EXTRACT_REMOVE) { + pmap_remove(srcmap->pmap, entry->start, + entry->end); + oldentry = entry; /* save entry */ + entry = entry->next; /* advance */ + uvm_map_entry_unlink(srcmap, oldentry); + /* add to dead list */ + oldentry->next = deadentry; + deadentry = oldentry; + } else { + entry = entry->next; /* advance */ + } + + /* end of 'while' loop */ + fudge = 0; } + pmap_update(srcmap->pmap); /* - * Apply inheritance. + * unlock dstmap. we will dispose of deadentry in + * step 7 if needed */ - if (old_entry->inheritance == MAP_INHERIT_SHARE) { - uvm_mapent_forkshared(vm2, new_map, - old_map, old_entry, &dead); - } - if (old_entry->inheritance == MAP_INHERIT_COPY) { - uvm_mapent_forkcopy(vm2, new_map, - old_map, old_entry, &dead); - } - } - vm_map_unlock(old_map); - vm_map_unlock(new_map); + if (copy_ok && srcmap != dstmap) + vm_map_unlock(dstmap); + + } + else + deadentry = NULL; /* XXX: gcc */ /* - * This can actually happen, if multiple entries described a - * space in which an entry was inherited. + * step 7: we are done with the source map, unlock. if copy_ok + * is 0 then we have not replaced the dummy mapping in dstmap yet + * and we need to do so now. */ - uvm_unmap_detach(&dead, 0); -#ifdef SYSVSHM - if (vm1->vm_shm) - shmfork(vm1, vm2); -#endif + vm_map_unlock(srcmap); + if ((flags & UVM_EXTRACT_REMOVE) && deadentry) + uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ + + /* now do the replacement if we didn't do it in step 5 */ + if (copy_ok == 0) { + vm_map_lock(dstmap); + error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, + nchain); + vm_map_unlock(dstmap); + + if (error == FALSE) { + error = EIO; + goto bad2; + } + } -#ifdef PMAP_FORK - pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); -#endif + uvm_tree_sanity(srcmap, "map_extract src leave"); + uvm_tree_sanity(dstmap, "map_extract dst leave"); - UVMHIST_LOG(maphist,"<- done",0,0,0,0); - return vm2; -} + return(0); -/* - * uvm_map_hint: return the beginning of the best area suitable for - * creating a new mapping with "prot" protection. - */ -vaddr_t -uvm_map_hint(struct proc *p, vm_prot_t prot) -{ - vaddr_t addr; - -#ifdef __i386__ /* - * If executable skip first two pages, otherwise start - * after data + heap region. + * bad: failure recovery */ - if ((prot & VM_PROT_EXECUTE) && - ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { - addr = (PAGE_SIZE*2) + - (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); - return (round_page(addr)); - } -#endif - /* start malloc/mmap after the brk */ - addr = (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ; -#if !defined(__vax__) - addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1); -#endif - return (round_page(addr)); +bad: + vm_map_unlock(srcmap); +bad2: /* src already unlocked */ + if (chain) + uvm_unmap_detach(chain, + (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); + + uvm_tree_sanity(srcmap, "map_extract src err leave"); + uvm_tree_sanity(dstmap, "map_extract dst err leave"); + + uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ + return(error); } +/* end of extraction functions */ + /* * uvm_map_submap: punch down part of a map into a submap * @@ -3678,6 +2329,7 @@ uvm_map_hint(struct proc *p, vm_prot_t prot) * => submap must have been init'd and have a zero reference count. * [need not be locked as we don't actually reference it] */ + int uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, struct vm_map *submap) @@ -3685,17 +2337,16 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, struct vm_map_entry *entry; int result; - if (start > map->max_offset || end > map->max_offset || - start < map->min_offset || end < map->min_offset) - return EINVAL; - vm_map_lock(map); + VM_MAP_RANGE_CHECK(map, start, end); + if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); - UVM_MAP_CLIP_END(map, entry, end); - } else + UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ + } else { entry = NULL; + } if (entry != NULL && entry->start == start && entry->end == end && @@ -3706,102 +2357,134 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, entry->offset = 0; uvm_map_reference(submap); result = 0; - } else + } else { result = EINVAL; - + } vm_map_unlock(map); return(result); } + /* - * uvm_map_checkprot: check protection in map + * uvm_map_protect: change map protection * - * => must allow specific protection in a fully allocated region. - * => map mut be read or write locked by caller. + * => set_max means set max_protection. + * => map must be unlocked. */ -boolean_t -uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, - vm_prot_t protection) + +#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ + ~VM_PROT_WRITE : VM_PROT_ALL) +#define max(a,b) ((a) > (b) ? (a) : (b)) + +int +uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, + vm_prot_t new_prot, boolean_t set_max) { - struct vm_map_entry *entry; + struct vm_map_entry *current, *entry; + int error = 0; + UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_prot=0x%lx)", + map, start, end, new_prot); - if (start < map->min_offset || end > map->max_offset || start > end) - return FALSE; - if (start == end) - return TRUE; + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + if (uvm_map_lookup_entry(map, start, &entry)) { + UVM_MAP_CLIP_START(map, entry, start); + } else { + entry = entry->next; + } /* - * Iterate entries. + * make a first pass to check for protection violations. */ - for (entry = uvm_map_entrybyaddr(&map->addr, start); - entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - /* - * Fail if a hole is found. - */ - if (UVM_ET_ISHOLE(entry) || - (entry->end < end && entry->end != FREE_END(entry))) - return FALSE; - /* - * Check protection. - */ - if ((entry->protection & protection) != protection) - return FALSE; + current = entry; + while ((current != &map->header) && (current->start < end)) { + if (UVM_ET_ISSUBMAP(current)) { + error = EINVAL; + goto out; + } + if ((new_prot & current->max_protection) != new_prot) { + error = EACCES; + goto out; + } + current = current->next; } - return TRUE; -} -/* - * uvm_map_create: create map - */ -vm_map_t -uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) -{ - vm_map_t result; + /* go back and fix up protections (no need to clip this time). */ - result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); - result->pmap = pmap; - uvm_map_setup(result, min, max, flags); - return(result); -} + current = entry; -/* - * uvm_map_deallocate: drop reference to a map - * - * => caller must not lock map - * => we will zap map if ref count goes to zero - */ -void -uvm_map_deallocate(vm_map_t map) -{ - int c; - struct uvm_map_deadq dead; + while ((current != &map->header) && (current->start < end)) { + vm_prot_t old_prot; - simple_lock(&map->ref_lock); - c = --map->ref_count; - simple_unlock(&map->ref_lock); - if (c > 0) { - return; - } + UVM_MAP_CLIP_END(map, current, end); - /* - * all references gone. unmap and free. - * - * No lock required: we are only one to access this map. - */ + old_prot = current->protection; + if (set_max) + current->protection = + (current->max_protection = new_prot) & old_prot; + else + current->protection = new_prot; - TAILQ_INIT(&dead); - uvm_tree_sanity(map, __FILE__, __LINE__); - uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, - TRUE, FALSE); - pmap_destroy(map->pmap); - KASSERT(RB_EMPTY(&map->addr)); - free(map, M_VMMAP); + /* + * update physical map if necessary. worry about copy-on-write + * here -- CHECK THIS XXX + */ - uvm_unmap_detach(&dead, 0); + if (current->protection != old_prot) { + /* update pmap! */ + if ((current->protection & MASK(entry)) == PROT_NONE && + VM_MAPENT_ISWIRED(entry)) + current->wired_count--; + pmap_protect(map->pmap, current->start, current->end, + current->protection & MASK(entry)); + } + + /* + * If the map is configured to lock any future mappings, + * wire this entry now if the old protection was VM_PROT_NONE + * and the new protection is not VM_PROT_NONE. + */ + + if ((map->flags & VM_MAP_WIREFUTURE) != 0 && + VM_MAPENT_ISWIRED(entry) == 0 && + old_prot == VM_PROT_NONE && + new_prot != VM_PROT_NONE) { + if (uvm_map_pageable(map, entry->start, entry->end, + FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) { + /* + * If locking the entry fails, remember the + * error if it's the first one. Note we + * still continue setting the protection in + * the map, but will return the resource + * shortage condition regardless. + * + * XXX Ignore what the actual error is, + * XXX just call it a resource shortage + * XXX so that it doesn't get confused + * XXX what uvm_map_protect() itself would + * XXX normally return. + */ + error = ENOMEM; + } + } + + current = current->next; + } + pmap_update(map->pmap); + + out: + vm_map_unlock(map); + UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0); + return (error); } +#undef max +#undef MASK + /* * uvm_map_inherit: set inheritance code for range of addrs in map. * @@ -3809,6 +2492,7 @@ uvm_map_deallocate(vm_map_t map) * => note that the inherit code is used during a "fork". see fork * code for details. */ + int uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, vm_inherit_t new_inheritance) @@ -3828,25 +2512,20 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, return (EINVAL); } - if (start > end) - return EINVAL; - start = MAX(start, map->min_offset); - end = MIN(end, map->max_offset); - if (start >= end) - return 0; - vm_map_lock(map); - - entry = uvm_map_entrybyaddr(&map->addr, start); - if (entry->end > start) + + VM_MAP_RANGE_CHECK(map, start, end); + + if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); - else - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + } else { + entry = entry->next; + } - while (entry != NULL && entry->start < end) { + while ((entry != &map->header) && (entry->start < end)) { UVM_MAP_CLIP_END(map, entry, end); entry->inheritance = new_inheritance; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + entry = entry->next; } vm_map_unlock(map); @@ -3859,6 +2538,7 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, * * => map must be unlocked */ + int uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) { @@ -3871,35 +2551,30 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) case MADV_NORMAL: case MADV_RANDOM: case MADV_SEQUENTIAL: + /* nothing special here */ break; + default: UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); return (EINVAL); } - - if (start > end) - return EINVAL; - start = MAX(start, map->min_offset); - end = MIN(end, map->max_offset); - if (start >= end) - return 0; - vm_map_lock(map); - - entry = uvm_map_entrybyaddr(&map->addr, start); - if (entry != NULL && entry->end > start) + VM_MAP_RANGE_CHECK(map, start, end); + if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); - else if (entry!= NULL) - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + } else { + entry = entry->next; + } /* * XXXJRT: disallow holes? */ - while (entry != NULL && entry->start < end) { + while ((entry != &map->header) && (entry->start < end)) { UVM_MAP_CLIP_END(map, entry, end); + entry->advice = new_advice; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + entry = entry->next; } vm_map_unlock(map); @@ -3908,187 +2583,470 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) } /* - * uvm_map_extract: extract a mapping from a map and put it somewhere - * in the kernel_map, setting protection to max_prot. + * uvm_map_pageable: sets the pageability of a range in a map. * - * => map should be unlocked (we will write lock it and kernel_map) - * => returns 0 on success, error code otherwise - * => start must be page aligned - * => len must be page sized - * => flags: - * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go - * Mappings are QREF's. + * => wires map entries. should not be used for transient page locking. + * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). + * => regions sepcified as not pageable require lock-down (wired) memory + * and page tables. + * => map must never be read-locked + * => if islocked is TRUE, map is already write-locked + * => we always unlock the map, since we must downgrade to a read-lock + * to call uvm_fault_wire() + * => XXXCDC: check this and try and clean it up. */ + int -uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, - vaddr_t *dstaddrp, int flags) +uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, + boolean_t new_pageable, int lockflags) { - struct uvm_map_deadq dead; - struct vm_map_entry *first, *entry, *newentry; - vaddr_t dstaddr; - vaddr_t end; - vaddr_t cp_start; - vsize_t cp_len, cp_off; - int error; + struct vm_map_entry *entry, *start_entry, *failed_entry; + int rv; +#ifdef DIAGNOSTIC + u_int timestamp_save; +#endif + UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_pageable=0x%lx)", + map, start, end, new_pageable); + KASSERT(map->flags & VM_MAP_PAGEABLE); - TAILQ_INIT(&dead); - end = start + len; + if ((lockflags & UVM_LK_ENTER) == 0) + vm_map_lock(map); - /* - * Sanity check on the parameters. - * Also, since the mapping may not contain gaps, error out if the - * mapped area is not in source map. + VM_MAP_RANGE_CHECK(map, start, end); + + /* + * only one pageability change may take place at one time, since + * uvm_fault_wire assumes it will be called only once for each + * wiring/unwiring. therefore, we have to make sure we're actually + * changing the pageability for the entire region. we do so before + * making any changes. */ - if ((start & PAGE_MASK) != 0 || (end & PAGE_MASK) != 0 || end < start) - return EINVAL; - if (start < srcmap->min_offset || end > srcmap->max_offset) - return EINVAL; + if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); - /* - * Initialize dead entries. - * Handle len == 0 case. + UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); + return (EFAULT); + } + entry = start_entry; + + /* + * handle wiring and unwiring separately. */ - if (len == 0) - return 0; + if (new_pageable) { /* unwire */ + UVM_MAP_CLIP_START(map, entry, start); - /* - * Acquire lock on srcmap. - */ - vm_map_lock(srcmap); + /* + * unwiring. first ensure that the range to be unwired is + * really wired down and that there are no holes. + */ - /* - * Lock srcmap, lookup first and last entry in <start,len>. - */ - first = uvm_map_entrybyaddr(&srcmap->addr, start); + while ((entry != &map->header) && (entry->start < end)) { + if (entry->wired_count == 0 || + (entry->end < end && + (entry->next == &map->header || + entry->next->start > entry->end))) { + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); + UVMHIST_LOG(maphist, + "<- done (INVALID UNWIRE ARG)",0,0,0,0); + return (EINVAL); + } + entry = entry->next; + } - /* - * Check that the range is contiguous. - */ - for (entry = first; entry != NULL && entry->end < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - if (FREE_END(entry) != entry->end || UVM_ET_ISHOLE(entry)) { - error = EINVAL; - goto fail; + /* + * POSIX 1003.1b - a single munlock call unlocks a region, + * regardless of the number of mlock calls made on that + * region. + */ + + entry = start_entry; + while ((entry != &map->header) && (entry->start < end)) { + UVM_MAP_CLIP_END(map, entry, end); + if (VM_MAPENT_ISWIRED(entry)) + uvm_map_entry_unwire(map, entry); + entry = entry->next; } - } - if (entry == NULL || UVM_ET_ISHOLE(entry)) { - error = EINVAL; - goto fail; + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); + UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); + return (0); } /* - * Handle need-copy flag. - * This may invalidate last, hence the re-initialization during the - * loop. + * wire case: in two passes [XXXCDC: ugly block of code here] + * + * 1: holding the write lock, we create any anonymous maps that need + * to be created. then we clip each map entry to the region to + * be wired and increment its wiring count. * - * Also, perform clipping of last if not UVM_EXTRACT_QREF. + * 2: we downgrade to a read lock, and call uvm_fault_wire to fault + * in the pages for any newly wired area (wired_count == 1). + * + * downgrading to a read lock for uvm_fault_wire avoids a possible + * deadlock with another thread that may have faulted on one of + * the pages to be wired (it would mark the page busy, blocking + * us, then in turn block on the map lock that we hold). because + * of problems in the recursive lock package, we cannot upgrade + * to a write lock in vm_map_lookup. thus, any actions that + * require the write lock must be done beforehand. because we + * keep the read lock on the map, the copy-on-write status of the + * entries we modify here cannot change. */ - for (entry = first; entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - if (UVM_ET_ISNEEDSCOPY(entry)) - amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); - if (UVM_ET_ISNEEDSCOPY(entry)) { + + while ((entry != &map->header) && (entry->start < end)) { + if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ + /* - * amap_copy failure + * perform actions of vm_map_lookup that need the + * write lock on the map: create an anonymous map + * for a copy-on-write region, or an anonymous map + * for a zero-fill region. (XXXCDC: submap case + * ok?) */ - error = ENOMEM; - goto fail; + + if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ + if (UVM_ET_ISNEEDSCOPY(entry) && + ((entry->protection & VM_PROT_WRITE) || + (entry->object.uvm_obj == NULL))) { + amap_copy(map, entry, M_WAITOK, TRUE, + start, end); + /* XXXCDC: wait OK? */ + } + } + } + UVM_MAP_CLIP_START(map, entry, start); + UVM_MAP_CLIP_END(map, entry, end); + entry->wired_count++; + + /* + * Check for holes + */ + + if (entry->protection == VM_PROT_NONE || + (entry->end < end && + (entry->next == &map->header || + entry->next->start > entry->end))) { + + /* + * found one. amap creation actions do not need to + * be undone, but the wired counts need to be restored. + */ + + while (entry != &map->header && entry->end > start) { + entry->wired_count--; + entry = entry->prev; + } + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); + UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); + return (EINVAL); } + entry = entry->next; } /* - * Lock destination map (kernel_map). + * Pass 2. */ - vm_map_lock(kernel_map); - if (uvm_map_findspace_tree(&kernel_map->free, len, UVM_UNKNOWN_OFFSET, - 0, kernel_map->flags & VM_MAP_GUARDPAGES, &dstaddr, kernel_map) == - NULL) { - error = ENOMEM; - goto fail2; +#ifdef DIAGNOSTIC + timestamp_save = map->timestamp; +#endif + vm_map_busy(map); + vm_map_downgrade(map); + + rv = 0; + entry = start_entry; + while (entry != &map->header && entry->start < end) { + if (entry->wired_count == 1) { + rv = uvm_fault_wire(map, entry->start, entry->end, + entry->protection); + if (rv) { + /* + * wiring failed. break out of the loop. + * we'll clean up the map below, once we + * have a write lock again. + */ + break; + } + } + entry = entry->next; } - *dstaddrp = dstaddr; - /* - * We now have srcmap and kernel_map locked. - * dstaddr contains the destination offset in dstmap. - */ + if (rv) { /* failed? */ - /* - * step 1: start looping through map entries, performing extraction. - */ - for (entry = first; entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); - if (UVM_ET_ISHOLE(entry)) - continue; + /* + * Get back to an exclusive (write) lock. + */ + + vm_map_upgrade(map); + vm_map_unbusy(map); + +#ifdef DIAGNOSTIC + if (timestamp_save != map->timestamp) + panic("uvm_map_pageable: stale map"); +#endif /* - * Calculate uvm_mapent_clone parameters. + * first drop the wiring count on all the entries + * which haven't actually been wired yet. */ - cp_start = entry->start; - if (cp_start < start) { - cp_off = start - cp_start; - cp_start = start; - } else - cp_off = 0; - cp_len = MIN(entry->end, end) - cp_start; - - newentry = uvm_mapent_clone(kernel_map, - cp_start - start + dstaddr, cp_len, cp_off, - entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); - if (newentry == NULL) { - error = ENOMEM; - goto fail2_unmap; + + failed_entry = entry; + while (entry != &map->header && entry->start < end) { + entry->wired_count--; + entry = entry->next; } - kernel_map->size += cp_len; - if (flags & UVM_EXTRACT_FIXPROT) - newentry->protection = newentry->max_protection; + + /* + * now, unwire all the entries that were successfully + * wired above. + */ + + entry = start_entry; + while (entry != failed_entry) { + entry->wired_count--; + if (VM_MAPENT_ISWIRED(entry) == 0) + uvm_map_entry_unwire(map, entry); + entry = entry->next; + } + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); + UVMHIST_LOG(maphist, "<- done (RV=%ld)", rv,0,0,0); + return(rv); + } + + /* We are holding a read lock here. */ + if ((lockflags & UVM_LK_EXIT) == 0) { + vm_map_unbusy(map); + vm_map_unlock_read(map); + } else { + + /* + * Get back to an exclusive (write) lock. + */ + + vm_map_upgrade(map); + vm_map_unbusy(map); } + UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); + return (0); +} + +/* + * uvm_map_pageable_all: special case of uvm_map_pageable - affects + * all mapped regions. + * + * => map must not be locked. + * => if no flags are specified, all regions are unwired. + * => XXXJRT: has some of the same problems as uvm_map_pageable() above. + */ + +int +uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) +{ + struct vm_map_entry *entry, *failed_entry; + vsize_t size; + int error; +#ifdef DIAGNOSTIC + u_int timestamp_save; +#endif + UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"(map=%p,flags=0x%lx)", map, flags, 0, 0); + + KASSERT(map->flags & VM_MAP_PAGEABLE); + + vm_map_lock(map); + /* - * step 2: perform pmap copy. + * handle wiring and unwiring separately. */ - for (entry = first; entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + + if (flags == 0) { /* unwire */ /* - * Calculate uvm_mapent_clone parameters (again). + * POSIX 1003.1b -- munlockall unlocks all regions, + * regardless of how many times mlockall has been called. */ - cp_start = entry->start; - if (cp_start < start) - cp_start = start; - cp_len = MIN(entry->end, end) - cp_start; + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + if (VM_MAPENT_ISWIRED(entry)) + uvm_map_entry_unwire(map, entry); + } + vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); + vm_map_unlock(map); + UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); + return (0); - pmap_copy(kernel_map->pmap, srcmap->pmap, - cp_start - start + dstaddr, cp_len, cp_start); + /* + * end of unwire case! + */ } - pmap_update(kernel_map->pmap); - error = 0; + if (flags & MCL_FUTURE) { + /* + * must wire all future mappings; remember this. + */ + vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); + } + + if ((flags & MCL_CURRENT) == 0) { + /* + * no more work to do! + */ + UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); + vm_map_unlock(map); + return (0); + } /* - * Unmap copied entries on failure. + * wire case: in three passes [XXXCDC: ugly block of code here] + * + * 1: holding the write lock, count all pages mapped by non-wired + * entries. if this would cause us to go over our limit, we fail. + * + * 2: still holding the write lock, we create any anonymous maps that + * need to be created. then we increment its wiring count. + * + * 3: we downgrade to a read lock, and call uvm_fault_wire to fault + * in the pages for any newly wired area (wired_count == 1). + * + * downgrading to a read lock for uvm_fault_wire avoids a possible + * deadlock with another thread that may have faulted on one of + * the pages to be wired (it would mark the page busy, blocking + * us, then in turn block on the map lock that we hold). because + * of problems in the recursive lock package, we cannot upgrade + * to a write lock in vm_map_lookup. thus, any actions that + * require the write lock must be done beforehand. because we + * keep the read lock on the map, the copy-on-write status of the + * entries we modify here cannot change. + */ + + for (size = 0, entry = map->header.next; entry != &map->header; + entry = entry->next) { + if (entry->protection != VM_PROT_NONE && + VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ + size += entry->end - entry->start; + } + } + + if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { + vm_map_unlock(map); + return (ENOMEM); /* XXX overloaded */ + } + + /* XXX non-pmap_wired_count case must be handled by caller */ +#ifdef pmap_wired_count + if (limit != 0 && + (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { + vm_map_unlock(map); + return (ENOMEM); /* XXX overloaded */ + } +#endif + + /* + * Pass 2. */ -fail2_unmap: - if (error) { - uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, - FALSE, TRUE); + + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + if (entry->protection == VM_PROT_NONE) + continue; + if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ + /* + * perform actions of vm_map_lookup that need the + * write lock on the map: create an anonymous map + * for a copy-on-write region, or an anonymous map + * for a zero-fill region. (XXXCDC: submap case + * ok?) + */ + if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ + if (UVM_ET_ISNEEDSCOPY(entry) && + ((entry->protection & VM_PROT_WRITE) || + (entry->object.uvm_obj == NULL))) { + amap_copy(map, entry, M_WAITOK, TRUE, + entry->start, entry->end); + /* XXXCDC: wait OK? */ + } + } + } + entry->wired_count++; } /* - * Release maps, release dead entries. + * Pass 3. */ -fail2: - vm_map_unlock(kernel_map); -fail: - vm_map_unlock(srcmap); +#ifdef DIAGNOSTIC + timestamp_save = map->timestamp; +#endif + vm_map_busy(map); + vm_map_downgrade(map); - uvm_unmap_detach(&dead, 0); + for (error = 0, entry = map->header.next; + entry != &map->header && error == 0; + entry = entry->next) { + if (entry->wired_count == 1) { + error = uvm_fault_wire(map, entry->start, entry->end, + entry->protection); + } + } + + if (error) { /* failed? */ + /* + * Get back an exclusive (write) lock. + */ + vm_map_upgrade(map); + vm_map_unbusy(map); + +#ifdef DIAGNOSTIC + if (timestamp_save != map->timestamp) + panic("uvm_map_pageable_all: stale map"); +#endif - return error; + /* + * first drop the wiring count on all the entries + * which haven't actually been wired yet. + * + * Skip VM_PROT_NONE entries like we did above. + */ + failed_entry = entry; + for (/* nothing */; entry != &map->header; + entry = entry->next) { + if (entry->protection == VM_PROT_NONE) + continue; + entry->wired_count--; + } + + /* + * now, unwire all the entries that were successfully + * wired above. + * + * Skip VM_PROT_NONE entries like we did above. + */ + for (entry = map->header.next; entry != failed_entry; + entry = entry->next) { + if (entry->protection == VM_PROT_NONE) + continue; + entry->wired_count--; + if (VM_MAPENT_ISWIRED(entry)) + uvm_map_entry_unwire(map, entry); + } + vm_map_unlock(map); + UVMHIST_LOG(maphist,"<- done (RV=%ld)", error,0,0,0); + return (error); + } + + /* We are holding a read lock here. */ + vm_map_unbusy(map); + vm_map_unlock_read(map); + + UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); + return (0); } /* @@ -4112,75 +3070,74 @@ int amap_clean_works = 1; /* XXX for now, just in case... */ int uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) { - struct vm_map_entry *first, *entry; + struct vm_map_entry *current, *entry; + struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; struct vm_page *pg; - struct uvm_object *uobj; - vaddr_t cp_start, cp_end; - int refs; - int error; - boolean_t rv; - + vaddr_t offset; + vsize_t size; + int rv, error, refs; UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,flags=0x%lx)", - map, start, end, flags); + map, start, end, flags); KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != - (PGO_FREE|PGO_DEACTIVATE)); - - if (start > end || start < map->min_offset || end > map->max_offset) - return EINVAL; + (PGO_FREE|PGO_DEACTIVATE)); vm_map_lock_read(map); - first = uvm_map_entrybyaddr(&map->addr, start); + VM_MAP_RANGE_CHECK(map, start, end); + if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { + vm_map_unlock_read(map); + return (EFAULT); + } /* * Make a first pass to check for holes. */ - for (entry = first; entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - if (UVM_ET_ISSUBMAP(entry)) { + + for (current = entry; current->start < end; current = current->next) { + if (UVM_ET_ISSUBMAP(current)) { vm_map_unlock_read(map); - return EINVAL; + return (EINVAL); } - if (UVM_ET_ISSUBMAP(entry) || - UVM_ET_ISHOLE(entry) || - (entry->end < end && FREE_END(entry) != entry->end)) { + if (end > current->end && (current->next == &map->header || + current->end != current->next->start)) { vm_map_unlock_read(map); - return EFAULT; + return (EFAULT); } } error = 0; - for (entry = first; entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - amap = entry->aref.ar_amap; /* top layer */ - if (UVM_ET_ISOBJ(entry)) - uobj = entry->object.uvm_obj; - else - uobj = NULL; + + for (current = entry; current->start < end; current = current->next) { + amap = current->aref.ar_amap; /* top layer */ + uobj = current->object.uvm_obj; /* bottom layer */ + KASSERT(start >= current->start); /* * No amap cleaning necessary if: - * - there's no amap - * - we're not deactivating or freeing pages. + * + * (1) There's no amap. + * + * (2) We're not deactivating or freeing pages. */ + if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) goto flush_object; - if (!amap_clean_works) - goto flush_object; - cp_start = MAX(entry->start, start); - cp_end = MIN(entry->end, end); + /* XXX for now, just in case... */ + if (amap_clean_works == 0) + goto flush_object; - for (; cp_start != cp_end; cp_start += PAGE_SIZE) { - anon = amap_lookup(&entry->aref, - cp_start - entry->start); + offset = start - current->start; + size = MIN(end, current->end) - start; + for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { + anon = amap_lookup(¤t->aref, offset); if (anon == NULL) continue; - simple_lock(&anon->an_lock); /* XXX */ + simple_lock(&anon->an_lock); pg = anon->an_page; if (pg == NULL) { @@ -4189,21 +3146,23 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) } switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { + /* * XXX In these first 3 cases, we always just * XXX deactivate the page. We may want to * XXX handle the different cases more * XXX specifically, in the future. */ + case PGO_CLEANIT|PGO_FREE: case PGO_CLEANIT|PGO_DEACTIVATE: case PGO_DEACTIVATE: -deactivate_it: + deactivate_it: /* skip the page if it's loaned or wired */ if (pg->loan_count != 0 || pg->wire_count != 0) { simple_unlock(&anon->an_lock); - break; + continue; } uvm_lock_pageq(); @@ -4213,45 +3172,51 @@ deactivate_it: * by the anon (may simply be loaned to the * anon). */ + if ((pg->pg_flags & PQ_ANON) == 0) { KASSERT(pg->uobject == NULL); uvm_unlock_pageq(); simple_unlock(&anon->an_lock); - break; + continue; } KASSERT(pg->uanon == anon); +#ifdef UBC + /* ...and deactivate the page. */ + pmap_clear_reference(pg); +#else /* zap all mappings for the page. */ pmap_page_protect(pg, VM_PROT_NONE); /* ...and deactivate the page. */ +#endif uvm_pagedeactivate(pg); uvm_unlock_pageq(); simple_unlock(&anon->an_lock); - break; + continue; case PGO_FREE: /* - * If there are mutliple references to + * If there are multiple references to * the amap, just deactivate the page. */ + if (amap_refs(amap) > 1) goto deactivate_it; /* XXX skip the page if it's wired */ if (pg->wire_count != 0) { simple_unlock(&anon->an_lock); - break; + continue; } - amap_unadd(&entry->aref, - cp_start - entry->start); + amap_unadd(¤t->aref, offset); refs = --anon->an_ref; simple_unlock(&anon->an_lock); if (refs == 0) uvm_anfree(anon); - break; + continue; default: panic("uvm_map_clean: weird flags"); @@ -4259,677 +3224,827 @@ deactivate_it: } flush_object: - cp_start = MAX(entry->start, start); - cp_end = MIN(entry->end, end); - /* * flush pages if we've got a valid backing object. * * Don't PGO_FREE if we don't have write permission - * and don't flush if this is a copy-on-write object + * and don't flush if this is a copy-on-write object * since we can't know our permissions on it. */ + + offset = current->offset + (start - current->start); + size = MIN(end, current->end) - start; if (uobj != NULL && ((flags & PGO_FREE) == 0 || ((entry->max_protection & VM_PROT_WRITE) != 0 && (entry->etype & UVM_ET_COPYONWRITE) == 0))) { simple_lock(&uobj->vmobjlock); - rv = uobj->pgops->pgo_flush(uobj, - cp_start - entry->start + entry->offset, - cp_end - entry->start + entry->offset, flags); + rv = uobj->pgops->pgo_flush(uobj, offset, + offset + size, flags); simple_unlock(&uobj->vmobjlock); if (rv == FALSE) error = EFAULT; } + start += size; } - vm_map_unlock_read(map); - return error; + return (error); } + /* - * UVM_MAP_CLIP_END implementation + * uvm_map_checkprot: check protection in map + * + * => must allow specified protection in a fully allocated region. + * => map must be read or write locked by caller. */ -void -uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) + +boolean_t +uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, + vm_prot_t protection) { - struct vm_map_entry *tmp; + struct vm_map_entry *entry; + struct vm_map_entry *tmp_entry; - KASSERT(entry->start < addr && FREE_END(entry) > addr); - tmp = uvm_mapent_alloc(map, 0); + if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { + return(FALSE); + } + entry = tmp_entry; + while (start < end) { + if (entry == &map->header) { + return(FALSE); + } - /* - * Invoke splitentry. - */ - uvm_map_splitentry(map, entry, tmp, addr); -} + /* + * no holes allowed + */ -/* - * UVM_MAP_CLIP_START implementation - * - * Clippers are required to not change the pointers to the entry they are - * clipping on. - * Since uvm_map_splitentry turns the original entry into the lowest - * entry (address wise) we do a swap between the new entry and the original - * entry, prior to calling uvm_map_splitentry. - */ -void -uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) -{ - struct vm_map_entry *tmp; - struct uvm_map_free *free; + if (start < entry->start) { + return(FALSE); + } - /* - * Copy entry. - */ - KASSERT(entry->start < addr && FREE_END(entry) > addr); - tmp = uvm_mapent_alloc(map, 0); - uvm_mapent_copy(entry, tmp); + /* + * check protection associated with entry + */ - /* - * Put new entry in place of original entry. - */ - free = UVM_FREE(map, FREE_START(entry)); - uvm_mapent_addr_remove(map, entry); - if (entry->fspace > 0 && free) { - uvm_mapent_free_remove(map, free, entry); - uvm_mapent_free_insert(map, free, tmp); - } - uvm_mapent_addr_insert(map, tmp); + if ((entry->protection & protection) != protection) { + return(FALSE); + } - /* - * Invoke splitentry. - */ - uvm_map_splitentry(map, tmp, entry, addr); + /* go to next entry */ + + start = entry->end; + entry = entry->next; + } + return(TRUE); } /* - * Boundary fixer. + * uvmspace_alloc: allocate a vmspace structure. + * + * - structure includes vm_map and pmap + * - XXX: no locking on this structure + * - refcnt set to 1, rest must be init'd by caller */ -static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); -static __inline vaddr_t -uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) +struct vmspace * +uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, + boolean_t remove_holes) { - return (min < bound && max > bound) ? bound : max; + struct vmspace *vm; + UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); + + vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); + uvmspace_init(vm, NULL, min, max, pageable, remove_holes); + UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0); + return (vm); } /* - * Choose free list based on address at start of free space. + * uvmspace_init: initialize a vmspace structure. + * + * - XXX: no locking on this structure + * - refcnt set to 1, rest must be init'd by caller */ -struct uvm_map_free* -uvm_free(struct vm_map *map, vaddr_t addr) +void +uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, + boolean_t pageable, boolean_t remove_holes) { - /* Special case the first page, to prevent mmap from returning 0. */ - if (addr < VMMAP_MIN_ADDR) - return NULL; + UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); - if ((map->flags & VM_MAP_ISVMSPACE) == 0) { - if (addr >= uvm_maxkaddr) - return NULL; - } else { - /* addr falls within brk() area. */ - if (addr >= map->b_start && addr < map->b_end) - return &map->bfree; - /* addr falls within stack area. */ - if (addr >= map->s_start && addr < map->s_end) - return &map->bfree; - } - return &map->free; + uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); + + if (pmap) + pmap_reference(pmap); + else + pmap = pmap_create(); + vm->vm_map.pmap = pmap; + + vm->vm_refcnt = 1; + + if (remove_holes) + pmap_remove_holes(&vm->vm_map); + + UVMHIST_LOG(maphist,"<- done",0,0,0,0); } /* - * Returns the first free-memory boundary that is crossed by [min-max]. + * uvmspace_share: share a vmspace between two proceses + * + * - XXX: no locking on vmspace + * - used for vfork, threads(?) */ -vsize_t -uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) -{ - /* Treat the first page special, mmap returning 0 breaks too much. */ - max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); - if ((map->flags & VM_MAP_ISVMSPACE) == 0) { - max = uvm_map_boundfix(min, max, uvm_maxkaddr); - } else { - max = uvm_map_boundfix(min, max, map->b_start); - max = uvm_map_boundfix(min, max, map->b_end); - max = uvm_map_boundfix(min, max, map->s_start); - max = uvm_map_boundfix(min, max, map->s_end); - } - return max; +void +uvmspace_share(p1, p2) + struct proc *p1, *p2; +{ + p2->p_vmspace = p1->p_vmspace; + p1->p_vmspace->vm_refcnt++; } /* - * Update map allocation start and end addresses from proc vmspace. + * uvmspace_exec: the process wants to exec a new program + * + * - XXX: no locking on vmspace */ + void -uvm_map_vmspace_update(struct vm_map *map, - struct uvm_map_deadq *dead, int flags) +uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) { - struct vmspace *vm; - vaddr_t b_start, b_end, s_start, s_end; + struct vmspace *nvm, *ovm = p->p_vmspace; + struct vm_map *map = &ovm->vm_map; - KASSERT(map->flags & VM_MAP_ISVMSPACE); - KASSERT(offsetof(struct vmspace, vm_map) == 0); + pmap_unuse_final(p); /* before stack addresses go away */ /* - * Derive actual allocation boundaries from vmspace. + * see if more than one process is using this vmspace... */ - vm = (struct vmspace *)map; - b_start = (vaddr_t)vm->vm_daddr; - b_end = b_start + BRKSIZ; - s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); - s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); -#ifdef DIAGNOSTIC - if ((b_start & PAGE_MASK) != 0 || (b_end & PAGE_MASK) != 0 || - (s_start & PAGE_MASK) != 0 || (s_end & PAGE_MASK) != 0) { - panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " - "b=0x%lx-0x%lx s=0x%lx-0x%lx", - vm, b_start, b_end, s_start, s_end); - } + + if (ovm->vm_refcnt == 1) { + + /* + * if p is the only process using its vmspace then we can safely + * recycle that vmspace for the program that is being exec'd. + */ + +#ifdef SYSVSHM + /* + * SYSV SHM semantics require us to kill all segments on an exec + */ + if (ovm->vm_shm) + shmexit(ovm); #endif - if (__predict_true(map->b_start == b_start && map->b_end == b_end && - map->s_start == s_start && map->s_end == s_end)) - return; + /* + * POSIX 1003.1b -- "lock future mappings" is revoked + * when a process execs another program image. + */ + vm_map_lock(map); + vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); + vm_map_unlock(map); + + /* + * now unmap the old program + */ + uvm_unmap(map, map->min_offset, map->max_offset); + + /* + * but keep MMU holes unavailable + */ + pmap_remove_holes(map); + + /* + * resize the map + */ + vm_map_lock(map); + map->min_offset = start; + uvm_tree_sanity(map, "resize enter"); + map->max_offset = end; + if (map->header.prev != &map->header) + uvm_rb_fixup(map, map->header.prev); + uvm_tree_sanity(map, "resize leave"); + vm_map_unlock(map); + + + } else { + + /* + * p's vmspace is being shared, so we can't reuse it for p since + * it is still being used for others. allocate a new vmspace + * for p + */ + nvm = uvmspace_alloc(start, end, + (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); - uvm_map_freelist_update(map, dead, b_start, b_end, - s_start, s_end, flags); + /* + * install new vmspace and drop our ref to the old one. + */ + + pmap_deactivate(p); + p->p_vmspace = nvm; + pmap_activate(p); + + uvmspace_free(ovm); + } } /* - * Grow kernel memory. - * - * This function is only called for kernel maps when an allocation fails. + * uvmspace_free: free a vmspace data structure * - * If the map has a gap that is large enough to accomodate alloc_sz, this - * function will make sure map->free will include it. + * - XXX: no locking on vmspace */ + void -uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, - vsize_t alloc_sz, int flags) +uvmspace_free(struct vmspace *vm) { - vsize_t sz; - vaddr_t end; - struct vm_map_entry *entry; - - /* Kernel memory only. */ - KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); - /* Destroy free list. */ - uvm_map_freelist_update_clear(map, dead); - - /* - * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. - * - * Don't handle the case where the multiplication overflows: - * if that happens, the allocation is probably too big anyway. - */ - sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); - - /* - * Include the guard page in the hard minimum requirement of alloc_sz. - */ - if (map->flags & VM_MAP_GUARDPAGES) - alloc_sz += PAGE_SIZE; + struct vm_map_entry *dead_entries; + UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); - /* - * Walk forward until a gap large enough for alloc_sz shows up. - * - * We assume the kernel map has no boundaries. - * uvm_maxkaddr may be zero. - */ - end = MAX(uvm_maxkaddr, map->min_offset); - entry = uvm_map_entrybyaddr(&map->addr, end); - while (entry && entry->fspace < alloc_sz) - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); - if (entry) { - end = MAX(FREE_START(entry), end); - end += MIN(sz, map->max_offset - end); - } else - end = map->max_offset; - - /* Reserve pmap entries. */ -#ifdef PMAP_GROWKERNEL - uvm_maxkaddr = pmap_growkernel(end); -#else - uvm_maxkaddr = end; + UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0); + if (--vm->vm_refcnt == 0) { + /* + * lock the map, to wait out all other references to it. delete + * all of the mappings and pages they hold, then call the pmap + * module to reclaim anything left. + */ +#ifdef SYSVSHM + /* Get rid of any SYSV shared memory segments. */ + if (vm->vm_shm != NULL) + shmexit(vm); #endif - /* Rebuild free list. */ - uvm_map_freelist_update_refill(map, flags); + vm_map_lock(&vm->vm_map); + if (vm->vm_map.nentries) { + uvm_unmap_remove(&vm->vm_map, + vm->vm_map.min_offset, vm->vm_map.max_offset, + &dead_entries, NULL, TRUE); + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); + } + pmap_destroy(vm->vm_map.pmap); + vm->vm_map.pmap = NULL; + pool_put(&uvm_vmspace_pool, vm); + } + UVMHIST_LOG(maphist,"<- done", 0,0,0,0); } /* - * Freelist update subfunction: unlink all entries from freelists. + * uvm_map_create: create map */ -void -uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) +vm_map_t +uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) { - struct uvm_map_free *free; - struct vm_map_entry *entry, *prev, *next; - - prev = NULL; - for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL; - entry = next) { - next = RB_NEXT(uvm_map_addr, &map->addr, entry); - - free = UVM_FREE(map, FREE_START(entry)); - if (entry->fspace > 0 && free) - uvm_mapent_free_remove(map, free, entry); - - if (prev != NULL && entry->start == entry->end) { - prev->fspace += FREE_END(entry) - entry->end; - uvm_mapent_addr_remove(map, entry); - DEAD_ENTRY_PUSH(dead, entry); - } else - prev = entry; - } + vm_map_t result; + + result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); + uvm_map_setup(result, min, max, flags); + result->pmap = pmap; + return(result); } /* - * Freelist update subfunction: refill the freelists with entries. + * uvm_map_setup: init map + * + * => map must not be in service yet. */ void -uvm_map_freelist_update_refill(struct vm_map *map, int flags) +uvm_map_setup(vm_map_t map, vaddr_t min, vaddr_t max, int flags) { - struct vm_map_entry *entry; - vaddr_t min, max; - RB_FOREACH(entry, uvm_map_addr, &map->addr) { - min = FREE_START(entry); - max = FREE_END(entry); - entry->fspace = 0; + RB_INIT(&map->rbhead); + map->header.next = map->header.prev = &map->header; + map->nentries = 0; + map->size = 0; + map->ref_count = 1; + map->min_offset = min; + map->max_offset = max; + map->flags = flags; + map->first_free = &map->header; + map->hint = &map->header; + map->timestamp = 0; + rw_init(&map->lock, "vmmaplk"); + simple_lock_init(&map->ref_lock); + simple_lock_init(&map->hint_lock); +} + - entry = uvm_map_fix_space(map, entry, min, max, flags); - } - uvm_tree_sanity(map, __FILE__, __LINE__); +/* + * uvm_map_reference: add reference to a map + * + * => map need not be locked (we use ref_lock). + */ +void +uvm_map_reference(vm_map_t map) +{ + simple_lock(&map->ref_lock); + map->ref_count++; + simple_unlock(&map->ref_lock); } /* - * Change {a,b}_{start,end} allocation ranges and associated free lists. + * uvm_map_deallocate: drop reference to a map + * + * => caller must not lock map + * => we will zap map if ref count goes to zero */ void -uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, - vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) +uvm_map_deallocate(vm_map_t map) { - KDASSERT(b_end >= b_start && s_end >= s_start); + int c; - /* Clear all free lists. */ - uvm_map_freelist_update_clear(map, dead); + simple_lock(&map->ref_lock); + c = --map->ref_count; + simple_unlock(&map->ref_lock); + if (c > 0) { + return; + } - /* Apply new bounds. */ - map->b_start = b_start; - map->b_end = b_end; - map->s_start = s_start; - map->s_end = s_end; + /* + * all references gone. unmap and free. + */ - /* Refill free lists. */ - uvm_map_freelist_update_refill(map, flags); + uvm_unmap(map, map->min_offset, map->max_offset); + pmap_destroy(map->pmap); + free(map, M_VMMAP); } /* - * Correct space insert. + * F O R K - m a i n e n t r y p o i n t + */ +/* + * uvmspace_fork: fork a process' main map + * + * => create a new vmspace for child process from parent. + * => parent's map must not be locked. */ -struct vm_map_entry* -uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, - vaddr_t min, vaddr_t max, int flags) + +struct vmspace * +uvmspace_fork(struct vmspace *vm1) { - struct uvm_map_free *free; - vaddr_t lmax; + struct vmspace *vm2; + struct vm_map *old_map = &vm1->vm_map; + struct vm_map *new_map; + struct vm_map_entry *old_entry; + struct vm_map_entry *new_entry; + pmap_t new_pmap; + boolean_t protect_child; + UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); + + vm_map_lock(old_map); + + vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, + (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); + memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, + (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); + new_map = &vm2->vm_map; /* XXX */ + new_pmap = new_map->pmap; + + old_entry = old_map->header.next; + + /* + * go entry-by-entry + */ + + while (old_entry != &old_map->header) { - KDASSERT(min <= max); - KDASSERT((entry != NULL && FREE_END(entry) == min) || - min == map->min_offset); - while (min != max) { /* - * Claim guard page for entry. + * first, some sanity checks on the old entry */ - if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && - FREE_END(entry) == entry->end && - entry->start != entry->end) { - if (max - min == 2 * PAGE_SIZE) { - /* - * If the free-space gap is exactly 2 pages, - * we make the guard 2 pages instead of 1. - * Because in a guarded map, an area needs - * at least 2 pages to allocate from: - * one page for the allocation and one for - * the guard. - */ - entry->guard = 2 * PAGE_SIZE; - min = max; - } else { - entry->guard = PAGE_SIZE; - min += PAGE_SIZE; + if (UVM_ET_ISSUBMAP(old_entry)) + panic("fork: encountered a submap during fork (illegal)"); + + if (!UVM_ET_ISCOPYONWRITE(old_entry) && + UVM_ET_ISNEEDSCOPY(old_entry)) + panic("fork: non-copy_on_write map entry marked needs_copy (illegal)"); + + + switch (old_entry->inheritance) { + case MAP_INHERIT_NONE: + /* + * drop the mapping + */ + break; + + case MAP_INHERIT_SHARE: + /* + * share the mapping: this means we want the old and + * new entries to share amaps and backing objects. + */ + + /* + * if the old_entry needs a new amap (due to prev fork) + * then we need to allocate it now so that we have + * something we own to share with the new_entry. [in + * other words, we need to clear needs_copy] + */ + + if (UVM_ET_ISNEEDSCOPY(old_entry)) { + /* get our own amap, clears needs_copy */ + amap_copy(old_map, old_entry, M_WAITOK, FALSE, + 0, 0); + /* XXXCDC: WAITOK??? */ } - continue; - } - /* - * Handle the case where entry has a 2-page guard, but the - * space after entry is freed. - */ - if (entry != NULL && entry->fspace == 0 && - entry->guard > PAGE_SIZE) { - entry->guard = PAGE_SIZE; - min = FREE_START(entry); - } + new_entry = uvm_mapent_alloc(new_map, 0); + /* old_entry -> new_entry */ + uvm_mapent_copy(old_entry, new_entry); - lmax = uvm_map_boundary(map, min, max); - free = UVM_FREE(map, min); + /* new pmap has nothing wired in it */ + new_entry->wired_count = 0; - if (entry != NULL && free == UVM_FREE(map, FREE_START(entry))) { - KDASSERT(FREE_END(entry) == min); - if (entry->fspace > 0 && free != NULL) - uvm_mapent_free_remove(map, free, entry); - entry->fspace += lmax - min; - } else { - entry = uvm_mapent_alloc(map, flags); - KDASSERT(entry != NULL); - entry->end = entry->start = min; - entry->guard = 0; - entry->fspace = lmax - min; - entry->object.uvm_obj = NULL; - entry->offset = 0; - entry->etype = 0; - entry->protection = entry->max_protection = 0; - entry->inheritance = 0; - entry->wired_count = 0; - entry->advice = 0; - entry->aref.ar_pageoff = 0; - entry->aref.ar_amap = NULL; - uvm_mapent_addr_insert(map, entry); - } + /* + * gain reference to object backing the map (can't + * be a submap, already checked this case). + */ + if (new_entry->aref.ar_amap) + /* share reference */ + uvm_map_reference_amap(new_entry, AMAP_SHARED); + + if (new_entry->object.uvm_obj && + new_entry->object.uvm_obj->pgops->pgo_reference) + new_entry->object.uvm_obj-> + pgops->pgo_reference( + new_entry->object.uvm_obj); + + /* insert entry at end of new_map's entry list */ + uvm_map_entry_link(new_map, new_map->header.prev, + new_entry); + + /* + * pmap_copy the mappings: this routine is optional + * but if it is there it will reduce the number of + * page faults in the new proc. + */ - if (free) - uvm_mapent_free_insert(map, free, entry); + pmap_copy(new_pmap, old_map->pmap, new_entry->start, + (old_entry->end - old_entry->start), + old_entry->start); - min = lmax; - } + break; - return entry; -} + case MAP_INHERIT_COPY: -/* - * MQuery style of allocation. - * - * This allocator searches forward until sufficient space is found to map - * the given size. - * - * XXX: factor in offset (via pmap_prefer) and protection? - */ -int -uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, - int flags) -{ - struct vm_map_entry *entry, *last; - vaddr_t addr; -#ifdef PMAP_PREFER - vaddr_t tmp; -#endif - int error; + /* + * copy-on-write the mapping (using mmap's + * MAP_PRIVATE semantics) + * + * allocate new_entry, adjust reference counts. + * (note that new references are read-only). + */ - addr = *addr_p; - vm_map_lock_read(map); + new_entry = uvm_mapent_alloc(new_map, 0); + /* old_entry -> new_entry */ + uvm_mapent_copy(old_entry, new_entry); -#ifdef PMAP_PREFER - if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) - addr = PMAP_PREFER(offset, addr); -#endif + if (new_entry->aref.ar_amap) + uvm_map_reference_amap(new_entry, 0); - /* - * First, check if the requested range is fully available. - */ - entry = uvm_map_entrybyaddr(&map->addr, addr); - last = NULL; - if (uvm_map_isavail(&map->addr, &entry, &last, addr, sz)) { - error = 0; - goto out; - } - if (flags & UVM_FLAG_FIXED) { - error = EINVAL; - goto out; - } + if (new_entry->object.uvm_obj && + new_entry->object.uvm_obj->pgops->pgo_reference) + new_entry->object.uvm_obj->pgops->pgo_reference + (new_entry->object.uvm_obj); - error = ENOMEM; /* Default error from here. */ + /* new pmap has nothing wired in it */ + new_entry->wired_count = 0; - /* - * At this point, the memory at <addr, sz> is not available. - * The reasons are: - * [1] it's outside the map, - * [2] it starts in used memory (and therefore needs to move - * toward the first free page in entry), - * [3] it starts in free memory but bumps into used memory. - * - * Note that for case [2], the forward moving is handled by the - * for loop below. - */ + new_entry->etype |= + (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); + uvm_map_entry_link(new_map, new_map->header.prev, + new_entry); - if (entry == NULL) { - /* [1] Outside the map. */ - if (addr >= map->max_offset) - goto out; - else - entry = RB_MIN(uvm_map_addr, &map->addr); - } else if (FREE_START(entry) <= addr) { - /* [3] Bumped into used memory. */ - entry = RB_NEXT(uvm_map_addr, &map->addr, entry); - } + /* + * the new entry will need an amap. it will either + * need to be copied from the old entry or created + * from scratch (if the old entry does not have an + * amap). can we defer this process until later + * (by setting "needs_copy") or do we need to copy + * the amap now? + * + * we must copy the amap now if any of the following + * conditions hold: + * 1. the old entry has an amap and that amap is + * being shared. this means that the old (parent) + * process is sharing the amap with another + * process. if we do not clear needs_copy here + * we will end up in a situation where both the + * parent and child process are referring to the + * same amap with "needs_copy" set. if the + * parent write-faults, the fault routine will + * clear "needs_copy" in the parent by allocating + * a new amap. this is wrong because the + * parent is supposed to be sharing the old amap + * and the new amap will break that. + * + * 2. if the old entry has an amap and a non-zero + * wire count then we are going to have to call + * amap_cow_now to avoid page faults in the + * parent process. since amap_cow_now requires + * "needs_copy" to be clear we might as well + * clear it here as well. + * + */ - /* - * Test if the next entry is sufficient for the allocation. - */ - for (; entry != NULL; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { - if (entry->fspace == 0) - continue; - addr = FREE_START(entry); + if (old_entry->aref.ar_amap != NULL) { -restart: /* Restart address checks on address change. */ + if ((amap_flags(old_entry->aref.ar_amap) & + AMAP_SHARED) != 0 || + VM_MAPENT_ISWIRED(old_entry)) { -#ifdef PMAP_PREFER - if (offset != UVM_UNKNOWN_OFFSET) { - tmp = (addr & ~(PMAP_PREFER_ALIGN() - 1)) | - PMAP_PREFER_OFFSET(offset); - if (tmp < addr) - tmp += PMAP_PREFER_ALIGN(); - if (addr >= FREE_END(entry)) - continue; - if (addr != tmp) { - addr = tmp; - goto restart; + amap_copy(new_map, new_entry, M_WAITOK, FALSE, + 0, 0); + /* XXXCDC: M_WAITOK ... ok? */ + } } - } -#endif - /* - * Skip brk() allocation addresses. - */ - if (addr + sz > map->b_start && addr < map->b_end) { - if (FREE_END(entry) > map->b_end) { - addr = map->b_end; - goto restart; - } else - continue; - } - /* - * Skip stack allocation addresses. - */ - if (addr + sz > map->s_start && addr < map->s_end) { - if (FREE_END(entry) > map->s_end) { - addr = map->s_end; - goto restart; - } else - continue; - } + /* + * if the parent's entry is wired down, then the + * parent process does not want page faults on + * access to that memory. this means that we + * cannot do copy-on-write because we can't write + * protect the old entry. in this case we + * resolve all copy-on-write faults now, using + * amap_cow_now. note that we have already + * allocated any needed amap (above). + */ - last = NULL; - if (uvm_map_isavail(&map->addr, &entry, &last, addr, sz)) { - error = 0; - goto out; - } + if (VM_MAPENT_ISWIRED(old_entry)) { + + /* + * resolve all copy-on-write faults now + * (note that there is nothing to do if + * the old mapping does not have an amap). + * XXX: is it worthwhile to bother with pmap_copy + * in this case? + */ + if (old_entry->aref.ar_amap) + amap_cow_now(new_map, new_entry); + + } else { + + /* + * setup mappings to trigger copy-on-write faults + * we must write-protect the parent if it has + * an amap and it is not already "needs_copy"... + * if it is already "needs_copy" then the parent + * has already been write-protected by a previous + * fork operation. + * + * if we do not write-protect the parent, then + * we must be sure to write-protect the child + * after the pmap_copy() operation. + * + * XXX: pmap_copy should have some way of telling + * us that it didn't do anything so we can avoid + * calling pmap_protect needlessly. + */ + + if (old_entry->aref.ar_amap) { + + if (!UVM_ET_ISNEEDSCOPY(old_entry)) { + if (old_entry->max_protection & VM_PROT_WRITE) { + pmap_protect(old_map->pmap, + old_entry->start, + old_entry->end, + old_entry->protection & + ~VM_PROT_WRITE); + pmap_update(old_map->pmap); + + } + old_entry->etype |= UVM_ET_NEEDSCOPY; + } + + /* + * parent must now be write-protected + */ + protect_child = FALSE; + } else { + + /* + * we only need to protect the child if the + * parent has write access. + */ + if (old_entry->max_protection & VM_PROT_WRITE) + protect_child = TRUE; + else + protect_child = FALSE; + + } + + /* + * copy the mappings + * XXX: need a way to tell if this does anything + */ + + pmap_copy(new_pmap, old_map->pmap, + new_entry->start, + (old_entry->end - old_entry->start), + old_entry->start); + + /* + * protect the child's mappings if necessary + */ + if (protect_child) { + pmap_protect(new_pmap, new_entry->start, + new_entry->end, + new_entry->protection & + ~VM_PROT_WRITE); + } + + } + break; + } /* end of switch statement */ + old_entry = old_entry->next; } -out: - vm_map_unlock_read(map); - if (error == 0) - *addr_p = addr; - return error; + new_map->size = old_map->size; + vm_map_unlock(old_map); + +#ifdef SYSVSHM + if (vm1->vm_shm) + shmfork(vm1, vm2); +#endif + +#ifdef PMAP_FORK + pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); +#endif + + UVMHIST_LOG(maphist,"<- done",0,0,0,0); + return(vm2); } +#if defined(DDB) + /* - * Determine allocation bias. - * - * Returns 1 if we should bias to high addresses, -1 for a bias towards low - * addresses, or 0 for no bias. - * The bias mechanism is intended to avoid clashing with brk() and stack - * areas. + * DDB hooks */ -int -uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) -{ - vaddr_t start, end; - start = FREE_START(entry); - end = FREE_END(entry); +/* + * uvm_map_printit: actually prints the map + */ - /* - * Stay at the top of brk() area. - */ - if (end >= map->b_start && start < map->b_end) - return 1; - /* - * Stay at the far end of the stack area. - */ - if (end >= map->s_start && start < map->s_end) { -#ifdef MACHINE_STACK_GROWS_UP - return 1; +void +uvm_map_printit(struct vm_map *map, boolean_t full, + int (*pr)(const char *, ...)) +{ + struct vm_map_entry *entry; + + (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); + (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n", + map->nentries, map->size, map->ref_count, map->timestamp, + map->flags); +#ifdef pmap_resident_count + (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, + pmap_resident_count(map->pmap)); #else - return -1; + /* XXXCDC: this should be required ... */ + (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); #endif + if (!full) + return; + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", + entry, entry->start, entry->end, entry->object.uvm_obj, + (long long)entry->offset, entry->aref.ar_amap, + entry->aref.ar_pageoff); + (*pr)( + "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " + "wc=%d, adv=%d\n", + (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', + (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', + (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', + entry->protection, entry->max_protection, + entry->inheritance, entry->wired_count, entry->advice); } +} - /* - * No bias, this area is meant for us. - */ - return 0; -} - +/* + * uvm_object_printit: actually prints the object + */ -boolean_t -vm_map_lock_try_ln(struct vm_map *map, char *file, int line) +void +uvm_object_printit(uobj, full, pr) + struct uvm_object *uobj; + boolean_t full; + int (*pr)(const char *, ...); { - boolean_t rv; + struct vm_page *pg; + int cnt = 0; - if (map->flags & VM_MAP_INTRSAFE) { - rv = TRUE; - } else { - if (map->flags & VM_MAP_BUSY) { - return (FALSE); - } - rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); - } + (*pr)("OBJECT %p: pgops=%p, npages=%d, ", + uobj, uobj->pgops, uobj->uo_npages); + if (UVM_OBJ_IS_KERN_OBJECT(uobj)) + (*pr)("refs=<SYSTEM>\n"); + else + (*pr)("refs=%d\n", uobj->uo_refs); - if (rv) { - map->timestamp++; - LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); - uvm_tree_sanity(map, file, line); - uvm_tree_size_chk(map, file, line); + if (!full) { + return; } - - return (rv); -} - -void -vm_map_lock_ln(struct vm_map *map, char *file, int line) -{ - if ((map->flags & VM_MAP_INTRSAFE) == 0) { - do { - while (map->flags & VM_MAP_BUSY) { - map->flags |= VM_MAP_WANTLOCK; - tsleep(&map->flags, PVM, (char *)vmmapbsy, 0); - } - } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); + (*pr)(" PAGES <pg,offset>:\n "); + RB_FOREACH(pg, uvm_objtree, &uobj->memt) { + (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); + if ((cnt % 3) == 2) { + (*pr)("\n "); + } + cnt++; + } + if ((cnt % 3) != 2) { + (*pr)("\n"); } +} - map->timestamp++; - LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); - uvm_tree_sanity(map, file, line); - uvm_tree_size_chk(map, file, line); -} +/* + * uvm_page_printit: actually print the page + */ -void -vm_map_lock_read_ln(struct vm_map *map, char *file, int line) -{ - if ((map->flags & VM_MAP_INTRSAFE) == 0) - rw_enter_read(&map->lock); - LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); - uvm_tree_sanity(map, file, line); - uvm_tree_size_chk(map, file, line); -} +static const char page_flagbits[] = + "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" + "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" + "\31PMAP1\32PMAP2\33PMAP3"; void -vm_map_unlock_ln(struct vm_map *map, char *file, int line) +uvm_page_printit(pg, full, pr) + struct vm_page *pg; + boolean_t full; + int (*pr)(const char *, ...); { - uvm_tree_sanity(map, file, line); - uvm_tree_size_chk(map, file, line); - LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); - if ((map->flags & VM_MAP_INTRSAFE) == 0) - rw_exit(&map->lock); -} + struct vm_page *tpg; + struct uvm_object *uobj; + struct pglist *pgl; -void -vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) -{ - /* XXX: RO */ uvm_tree_sanity(map, file, line); - /* XXX: RO */ uvm_tree_size_chk(map, file, line); - LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); - if ((map->flags & VM_MAP_INTRSAFE) == 0) - rw_exit_read(&map->lock); -} + (*pr)("PAGE %p:\n", pg); + (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", + pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, + (long long)pg->phys_addr); + (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", + pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); +#if defined(UVM_PAGE_TRKOWN) + if (pg->pg_flags & PG_BUSY) + (*pr)(" owning process = %d, tag=%s\n", + pg->owner, pg->owner_tag); + else + (*pr)(" page not busy, no owner\n"); +#else + (*pr)(" [page ownership tracking disabled]\n"); +#endif -void -vm_map_downgrade_ln(struct vm_map *map, char *file, int line) -{ - uvm_tree_sanity(map, file, line); - uvm_tree_size_chk(map, file, line); - LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); - LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); - if ((map->flags & VM_MAP_INTRSAFE) == 0) - rw_enter(&map->lock, RW_DOWNGRADE); -} + if (!full) + return; -void -vm_map_upgrade_ln(struct vm_map *map, char *file, int line) -{ - /* XXX: RO */ uvm_tree_sanity(map, file, line); - /* XXX: RO */ uvm_tree_size_chk(map, file, line); - LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); - if ((map->flags & VM_MAP_INTRSAFE) == 0) { - rw_exit_read(&map->lock); - rw_enter_write(&map->lock); + /* cross-verify object/anon */ + if ((pg->pg_flags & PQ_FREE) == 0) { + if (pg->pg_flags & PQ_ANON) { + if (pg->uanon == NULL || pg->uanon->an_page != pg) + (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", + (pg->uanon) ? pg->uanon->an_page : NULL); + else + (*pr)(" anon backpointer is OK\n"); + } else { + uobj = pg->uobject; + if (uobj) { + (*pr)(" checking object list\n"); + RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { + if (tpg == pg) { + break; + } + } + if (tpg) + (*pr)(" page found on object list\n"); + else + (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); + } + } } - LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); - uvm_tree_sanity(map, file, line); -} - -void -vm_map_busy_ln(struct vm_map *map, char *file, int line) -{ - map->flags |= VM_MAP_BUSY; -} -void -vm_map_unbusy_ln(struct vm_map *map, char *file, int line) -{ - int oflags; + /* cross-verify page queue */ + if (pg->pg_flags & PQ_FREE) { + if (uvm_pmr_isfree(pg)) + printf(" page found in uvm_pmemrange\n"); + else + printf(" >>> page not found in uvm_pmemrange <<<\n"); + pgl = NULL; + } else if (pg->pg_flags & PQ_INACTIVE) { + pgl = (pg->pg_flags & PQ_SWAPBACKED) ? + &uvm.page_inactive_swp : &uvm.page_inactive_obj; + } else if (pg->pg_flags & PQ_ACTIVE) { + pgl = &uvm.page_active; + } else { + pgl = NULL; + } - oflags = map->flags; - map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); - if (oflags & VM_MAP_WANTLOCK) - wakeup(&map->flags); + if (pgl) { + (*pr)(" checking pageq list\n"); + TAILQ_FOREACH(tpg, pgl, pageq) { + if (tpg == pg) { + break; + } + } + if (tpg) + (*pr)(" page found on pageq list\n"); + else + (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); + } } - - -RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, - uvm_mapentry_addrcmp); -RB_GENERATE(uvm_map_free_int, vm_map_entry, free_entry, uvm_mapentry_freecmp); +#endif diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h index 23b5950ee1c..c416cc51d23 100644 --- a/sys/uvm/uvm_map.h +++ b/sys/uvm/uvm_map.h @@ -1,22 +1,7 @@ -/* $OpenBSD: uvm_map.h,v 1.45 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: uvm_map.h,v 1.46 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */ -/* - * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * +/* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * @@ -90,28 +75,14 @@ #ifdef _KERNEL /* - * Internal functions. - * - * Required by clipping macros. - */ -void uvm_map_clip_end(struct vm_map*, struct vm_map_entry*, - vaddr_t); -void uvm_map_clip_start(struct vm_map*, - struct vm_map_entry*, vaddr_t); - -/* * UVM_MAP_CLIP_START: ensure that the entry begins at or after * the starting address, if it doesn't we split the entry. * * => map must be locked by caller */ -#define UVM_MAP_CLIP_START(_map, _entry, _addr) \ - do { \ - KASSERT((_entry)->end + (_entry)->fspace > (_addr)); \ - if ((_entry)->start < (_addr)) \ - uvm_map_clip_start((_map), (_entry), (_addr)); \ - } while (0) +#define UVM_MAP_CLIP_START(MAP,ENTRY,VA) { \ + if ((VA) > (ENTRY)->start) uvm_map_clip_start(MAP,ENTRY,VA); } /* * UVM_MAP_CLIP_END: ensure that the entry ends at or before @@ -120,16 +91,15 @@ void uvm_map_clip_start(struct vm_map*, * => map must be locked by caller */ -#define UVM_MAP_CLIP_END(_map, _entry, _addr) \ - do { \ - KASSERT((_entry)->start < (_addr)); \ - if ((_entry)->end > (_addr)) \ - uvm_map_clip_end((_map), (_entry), (_addr)); \ - } while (0) +#define UVM_MAP_CLIP_END(MAP,ENTRY,VA) { \ + if ((VA) < (ENTRY)->end) uvm_map_clip_end(MAP,ENTRY,VA); } /* * extract flags */ +#define UVM_EXTRACT_REMOVE 0x1 /* remove mapping from old map */ +#define UVM_EXTRACT_CONTIG 0x2 /* try to keep it contig */ +#define UVM_EXTRACT_QREF 0x4 /* use quick refs */ #define UVM_EXTRACT_FIXPROT 0x8 /* set prot to maxprot as we go */ #endif /* _KERNEL */ @@ -163,30 +133,21 @@ union vm_map_object { * Also included is control information for virtual copy operations. */ struct vm_map_entry { - union { - RB_ENTRY(vm_map_entry) addr_entry; /* address tree */ - TAILQ_ENTRY(vm_map_entry) deadq; /* dead entry queue */ - } daddrs; - RB_ENTRY(vm_map_entry) free_entry; /* free-space tree */ - -#define uvm_map_entry_start_copy start + RB_ENTRY(vm_map_entry) rb_entry; /* tree information */ + vaddr_t ownspace; /* free space after */ + vaddr_t space; /* space in subtree */ + struct vm_map_entry *prev; /* previous entry */ + struct vm_map_entry *next; /* next entry */ vaddr_t start; /* start address */ vaddr_t end; /* end address */ - - vsize_t guard; /* bytes in guard */ - vsize_t fspace; /* free space */ - union vm_map_object object; /* object I point to */ voff_t offset; /* offset into object */ - struct vm_aref aref; /* anonymous overlay */ - int etype; /* entry type */ - vm_prot_t protection; /* protection code */ vm_prot_t max_protection; /* maximum protection */ vm_inherit_t inheritance; /* inheritance */ - int wired_count; /* can be paged if == 0 */ + struct vm_aref aref; /* anonymous overlay */ int advice; /* madvise advice */ #define uvm_map_entry_stop_copy flags u_int8_t flags; /* flags */ @@ -195,29 +156,18 @@ struct vm_map_entry { #define UVM_MAP_KMEM 0x02 /* from kmem entry pool */ }; -#define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0) - -TAILQ_HEAD(uvm_map_deadq, vm_map_entry); /* dead entry queue */ -RB_HEAD(uvm_map_addr, vm_map_entry); -RB_HEAD(uvm_map_free_int, vm_map_entry); -RB_PROTOTYPE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, - uvm_mapentry_addrcmp); -RB_PROTOTYPE(uvm_map_free_int, vm_map_entry, free_entry, uvm_mapentry_freecmp); - /* - * Tree with size information. + * Marks the map entry as a guard page, using vm_map_entry.etype. */ -struct uvm_map_free { - struct uvm_map_free_int tree; /* Tree of free items. */ - size_t treesz; /* Size of tree. */ -}; +#define MAP_ET_KVAGUARD 0x10 /* guard entry */ + +#define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0) /* - * A Map is a rbtree of map entries, kept sorted by address. - * In addition, free space entries are also kept in a rbtree, - * indexed by free size. - * - * + * Maps are doubly-linked lists of map entries, kept sorted + * by address. A single hint is provided to start + * searches again from the last successful search, + * insertion, or removal. * * LOCKING PROTOCOL NOTES: * ----------------------- @@ -264,59 +214,23 @@ struct uvm_map_free { * is busy, and thread is attempting * to write-lock. must be tested * while `flags_lock' is asserted. - * - * VM_MAP_GUARDPAGES r/o; must be specified at map - * initialization time. - * If set, guards will appear between - * automatic allocations. - * No locking required. - * - * VM_MAP_ISVMSPACE r/o; set by uvmspace_alloc. - * Signifies that this map is a vmspace. - * (The implementation treats all maps - * without this bit as kernel maps.) - * No locking required. - * - * - * All automatic allocations (uvm_map without MAP_FIXED) will allocate - * from vm_map.free. - * If that allocation fails: - * - vmspace maps will spill over into vm_map.bfree, - * - all other maps will call uvm_map_kmem_grow() to increase the arena. - * - * vmspace maps have their data, brk() and stack arenas automatically - * updated when uvm_map() is invoked without MAP_FIXED. - * The spill over arena (vm_map.bfree) will contain the space in the brk() - * and stack ranges. - * Kernel maps never have a bfree arena and this tree will always be empty. - * - * - * read_locks and write_locks are used in lock debugging code. */ struct vm_map { struct pmap * pmap; /* Physical map */ struct rwlock lock; /* Lock for map data */ - - struct uvm_map_addr addr; /* Entry tree, by addr */ - struct uvm_map_free free; /* Free space tree */ - struct uvm_map_free bfree; /* brk() space tree */ - + RB_HEAD(uvm_tree, vm_map_entry) rbhead; /* Tree for entries */ + struct vm_map_entry header; /* List of entries */ + int nentries; /* Number of entries */ vsize_t size; /* virtual size */ int ref_count; /* Reference count */ simple_lock_data_t ref_lock; /* Lock for ref_count field */ + vm_map_entry_t hint; /* hint for quick lookups */ + simple_lock_data_t hint_lock; /* lock for hint storage */ + vm_map_entry_t first_free; /* First free space hint */ int flags; /* flags */ unsigned int timestamp; /* Version number */ - - vaddr_t min_offset; /* First address in map. */ - vaddr_t max_offset; /* Last address in map. */ - - /* - * Allocation overflow regions. - */ - vaddr_t b_start; /* Start for brk() alloc. */ - vaddr_t b_end; /* End for brk() alloc. */ - vaddr_t s_start; /* Start for stack alloc. */ - vaddr_t s_end; /* End for stack alloc. */ +#define min_offset header.start +#define max_offset header.end }; /* vm_map flags */ @@ -325,18 +239,11 @@ struct vm_map { #define VM_MAP_WIREFUTURE 0x04 /* rw: wire future mappings */ #define VM_MAP_BUSY 0x08 /* rw: map is busy */ #define VM_MAP_WANTLOCK 0x10 /* rw: want to write-lock */ -#define VM_MAP_GUARDPAGES 0x20 /* rw: add guard pgs to map */ -#define VM_MAP_ISVMSPACE 0x40 /* ro: map is a vmspace */ /* XXX: number of kernel maps and entries to statically allocate */ #if !defined(MAX_KMAPENT) -#ifdef KVA_GUARDPAGES -/* Sufficient for UVM_KM_MAXPAGES_HIWAT(8192) + overhead. */ -#define MAX_KMAPENT 8192 + 1024 -#else -#define MAX_KMAPENT 1024 /* XXXCDC: no crash */ -#endif +#define MAX_KMAPENT 1024 /* XXXCDC: no crash */ #endif /* !defined MAX_KMAPENT */ #ifdef _KERNEL @@ -372,27 +279,32 @@ extern vaddr_t uvm_maxkaddr; void uvm_map_deallocate(vm_map_t); int uvm_map_clean(vm_map_t, vaddr_t, vaddr_t, int); +void uvm_map_clip_start(vm_map_t, vm_map_entry_t, vaddr_t); +void uvm_map_clip_end(vm_map_t, vm_map_entry_t, vaddr_t); vm_map_t uvm_map_create(pmap_t, vaddr_t, vaddr_t, int); -int uvm_map_extract(struct vm_map*, vaddr_t, vsize_t, vaddr_t*, - int); +int uvm_map_extract(vm_map_t, vaddr_t, vsize_t, + vm_map_t, vaddr_t *, int); +vm_map_entry_t uvm_map_findspace(vm_map_t, vaddr_t, vsize_t, vaddr_t *, + struct uvm_object *, voff_t, vsize_t, int); vaddr_t uvm_map_pie(vaddr_t); -vaddr_t uvm_map_hint(struct proc *, vm_prot_t); +#define uvm_map_hint(p, prot) uvm_map_hint1(p, prot, 1) +vaddr_t uvm_map_hint1(struct proc *, vm_prot_t, int); int uvm_map_inherit(vm_map_t, vaddr_t, vaddr_t, vm_inherit_t); int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int); void uvm_map_init(void); boolean_t uvm_map_lookup_entry(vm_map_t, vaddr_t, vm_map_entry_t *); +void uvm_map_reference(vm_map_t); int uvm_map_replace(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t, int); int uvm_map_reserve(vm_map_t, vsize_t, vaddr_t, vsize_t, vaddr_t *); void uvm_map_setup(vm_map_t, vaddr_t, vaddr_t, int); int uvm_map_submap(vm_map_t, vaddr_t, vaddr_t, vm_map_t); -void uvm_unmap(vm_map_t, vaddr_t, vaddr_t); -int uvm_map_mquery(struct vm_map*, vaddr_t*, vsize_t, voff_t, int); - -void uvm_unmap_detach(struct uvm_map_deadq*, int); -void uvm_unmap_remove(struct vm_map*, vaddr_t, vaddr_t, - struct uvm_map_deadq*, boolean_t, boolean_t); +#define uvm_unmap(_m, _s, _e) uvm_unmap_p(_m, _s, _e, 0) +void uvm_unmap_p(vm_map_t, vaddr_t, vaddr_t, struct proc *); +void uvm_unmap_detach(vm_map_entry_t,int); +void uvm_unmap_remove(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t *, + struct proc *, boolean_t); #endif /* _KERNEL */ @@ -425,45 +337,82 @@ void uvm_unmap_remove(struct vm_map*, vaddr_t, vaddr_t, */ #ifdef _KERNEL -/* - * XXX: clean up later - * Half the kernel seems to depend on them being included here. - */ +/* XXX: clean up later */ #include <sys/time.h> -#include <sys/systm.h> /* for panic() */ - -boolean_t vm_map_lock_try_ln(struct vm_map*, char*, int); -void vm_map_lock_ln(struct vm_map*, char*, int); -void vm_map_lock_read_ln(struct vm_map*, char*, int); -void vm_map_unlock_ln(struct vm_map*, char*, int); -void vm_map_unlock_read_ln(struct vm_map*, char*, int); -void vm_map_downgrade_ln(struct vm_map*, char*, int); -void vm_map_upgrade_ln(struct vm_map*, char*, int); -void vm_map_busy_ln(struct vm_map*, char*, int); -void vm_map_unbusy_ln(struct vm_map*, char*, int); - -#ifdef DIAGNOSTIC -#define vm_map_lock_try(map) vm_map_lock_try_ln(map, __FILE__, __LINE__) -#define vm_map_lock(map) vm_map_lock_ln(map, __FILE__, __LINE__) -#define vm_map_lock_read(map) vm_map_lock_read_ln(map, __FILE__, __LINE__) -#define vm_map_unlock(map) vm_map_unlock_ln(map, __FILE__, __LINE__) -#define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, __FILE__, __LINE__) -#define vm_map_downgrade(map) vm_map_downgrade_ln(map, __FILE__, __LINE__) -#define vm_map_upgrade(map) vm_map_upgrade_ln(map, __FILE__, __LINE__) -#define vm_map_busy(map) vm_map_busy_ln(map, __FILE__, __LINE__) -#define vm_map_unbusy(map) vm_map_unbusy_ln(map, __FILE__, __LINE__) -#else -#define vm_map_lock_try(map) vm_map_lock_try_ln(map, NULL, 0) -#define vm_map_lock(map) vm_map_lock_ln(map, NULL, 0) -#define vm_map_lock_read(map) vm_map_lock_read_ln(map, NULL, 0) -#define vm_map_unlock(map) vm_map_unlock_ln(map, NULL, 0) -#define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, NULL, 0) -#define vm_map_downgrade(map) vm_map_downgrade_ln(map, NULL, 0) -#define vm_map_upgrade(map) vm_map_upgrade_ln(map, NULL, 0) -#define vm_map_busy(map) vm_map_busy_ln(map, NULL, 0) -#define vm_map_unbusy(map) vm_map_unbusy_ln(map, NULL, 0) -#endif +#include <sys/systm.h> /* for panic() */ + +static __inline boolean_t vm_map_lock_try(vm_map_t); +static __inline void vm_map_lock(vm_map_t); +extern const char vmmapbsy[]; + +static __inline boolean_t +vm_map_lock_try(struct vm_map *map) +{ + boolean_t rv; + + if (map->flags & VM_MAP_INTRSAFE) { + rv = TRUE; + } else { + if (map->flags & VM_MAP_BUSY) { + return (FALSE); + } + rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); + } + + if (rv) + map->timestamp++; + + return (rv); +} + +static __inline void +vm_map_lock(struct vm_map *map) +{ + if (map->flags & VM_MAP_INTRSAFE) + return; + + do { + while (map->flags & VM_MAP_BUSY) { + map->flags |= VM_MAP_WANTLOCK; + tsleep(&map->flags, PVM, (char *)vmmapbsy, 0); + } + } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); + + map->timestamp++; +} + +#define vm_map_lock_read(map) rw_enter_read(&(map)->lock) + +#define vm_map_unlock(map) \ +do { \ + if (((map)->flags & VM_MAP_INTRSAFE) == 0) \ + rw_exit(&(map)->lock); \ +} while (0) + +#define vm_map_unlock_read(map) rw_exit_read(&(map)->lock) +#define vm_map_downgrade(map) rw_enter(&(map)->lock, RW_DOWNGRADE) + +#define vm_map_upgrade(map) \ +do { \ + rw_exit_read(&(map)->lock); \ + rw_enter_write(&(map)->lock); \ +} while (0) + +#define vm_map_busy(map) \ +do { \ + (map)->flags |= VM_MAP_BUSY; \ +} while (0) + +#define vm_map_unbusy(map) \ +do { \ + int oflags; \ + \ + oflags = (map)->flags; \ + (map)->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); \ + if (oflags & VM_MAP_WANTLOCK) \ + wakeup(&(map)->flags); \ +} while (0) #endif /* _KERNEL */ /* diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index a29d9020313..e87b89dd160 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_mmap.c,v 1.83 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: uvm_mmap.c,v 1.84 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ /* @@ -181,14 +181,43 @@ sys_mquery(struct proc *p, void *v, register_t *retval) } else { fp = NULL; uobj = NULL; - uoff = UVM_UNKNOWN_OFFSET; + uoff = 0; } - error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff, - flags); - if (error == 0) - *retval = (register_t)(vaddr); + if (vaddr == 0) + vaddr = uvm_map_hint(p, prot); + /* prevent a user requested address from falling in heap space */ + if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) && + (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) { + if (flags & UVM_FLAG_FIXED) { + error = EINVAL; + goto done; + } + vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ); + } + vm_map_lock(&p->p_vmspace->vm_map); + +again: + if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size, + &vaddr, uobj, uoff, 0, flags) == NULL) { + if (flags & UVM_FLAG_FIXED) + error = EINVAL; + else + error = ENOMEM; + } else { + /* prevent a returned address from falling in heap space */ + if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) + && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) { + vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + + BRKSIZ); + goto again; + } + error = 0; + *retval = (register_t)(vaddr); + } + vm_map_unlock(&p->p_vmspace->vm_map); +done: if (fp != NULL) FRELE(fp); return (error); @@ -212,7 +241,7 @@ sys_mincore(struct proc *p, void *v, register_t *retval) struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; - vm_map_entry_t entry, next; + vm_map_entry_t entry; vaddr_t start, end, lim; vm_map_t map; vsize_t len, npgs; @@ -261,16 +290,15 @@ sys_mincore(struct proc *p, void *v, register_t *retval) } for (/* nothing */; - entry != NULL && entry->start < end; - entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + entry != &map->header && entry->start < end; + entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ - next = RB_NEXT(uvm_map_addr, &map->addr, entry); if (entry->end < end && - (next == NULL || - next->start > entry->end)) { + (entry->next == &map->header || + entry->next->start > entry->end)) { error = ENOMEM; goto out; } @@ -423,6 +451,17 @@ sys_mmap(struct proc *p, void *v, register_t *retval) if (vm_min_address > 0 && addr < vm_min_address) return (EINVAL); + } else { + + /* + * not fixed: make sure we skip over the largest possible heap. + * we will refine our guess later (e.g. to account for VAC, etc) + */ + if (addr == 0) + addr = uvm_map_hint(p, prot); + else if (!(flags & MAP_TRYFIXED) && + addr < (vaddr_t)p->p_vmspace->vm_daddr) + addr = uvm_map_hint(p, prot); } /* @@ -565,6 +604,13 @@ sys_mmap(struct proc *p, void *v, register_t *retval) error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); + if (error == ENOMEM && !(flags & (MAP_FIXED | MAP_TRYFIXED))) { + /* once more, with feeling */ + addr = uvm_map_hint1(p, prot, 0); + error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, + maxprot, flags, handle, pos, + p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); + } if (error == 0) /* remember to add offset */ @@ -675,7 +721,7 @@ sys_munmap(struct proc *p, void *v, register_t *retval) vsize_t size, pageoff; vm_map_t map; vaddr_t vm_min_address = VM_MIN_ADDRESS; - struct uvm_map_deadq dead_entries; + struct vm_map_entry *dead_entries; /* * get syscall args... @@ -717,12 +763,12 @@ sys_munmap(struct proc *p, void *v, register_t *retval) /* * doit! */ - TAILQ_INIT(&dead_entries); - uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE); + uvm_unmap_remove(map, addr, addr + size, &dead_entries, p, FALSE); vm_map_unlock(map); /* and unlock */ - uvm_unmap_detach(&dead_entries, 0); + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); return (0); } @@ -1053,7 +1099,7 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot, if (*addr & PAGE_MASK) return(EINVAL); uvmflag |= UVM_FLAG_FIXED; - uvm_unmap(map, *addr, *addr + size); /* zap! */ + uvm_unmap_p(map, *addr, *addr + size, p); /* zap! */ } /* @@ -1147,7 +1193,7 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot, (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, uvmflag); - error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); + error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p); if (error == 0) { /* diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c index 2b8eee79057..06cbf871e41 100644 --- a/sys/uvm/uvm_unix.c +++ b/sys/uvm/uvm_unix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_unix.c,v 1.41 2011/05/24 15:27:36 ariane Exp $ */ +/* $OpenBSD: uvm_unix.c,v 1.42 2011/06/06 17:10:23 ariane Exp $ */ /* $NetBSD: uvm_unix.c,v 1.18 2000/09/13 15:00:25 thorpej Exp $ */ /* @@ -167,7 +167,9 @@ uvm_coredump(struct proc *p, struct vnode *vp, struct ucred *cred, offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize; - RB_FOREACH(entry, uvm_map_addr, &map->addr) { + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + /* should never happen for a user process */ if (UVM_ET_ISSUBMAP(entry)) { panic("uvm_coredump: user process with submap?"); @@ -259,7 +261,9 @@ uvm_coredump_walkmap(struct proc *p, void *iocookie, vaddr_t top; int error; - RB_FOREACH(entry, uvm_map_addr, &map->addr) { + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + state.cookie = cookie; state.prot = entry->protection; state.flags = 0; |