diff options
author | Ariane van der Steldt <ariane@cvs.openbsd.org> | 2011-05-24 15:27:37 +0000 |
---|---|---|
committer | Ariane van der Steldt <ariane@cvs.openbsd.org> | 2011-05-24 15:27:37 +0000 |
commit | 7bba91997a49a1b90aaf8f3cc262edbe355c7a92 (patch) | |
tree | 7da632cfed9dd94efc9b0d8c66977bd71bfc81a2 /sys/uvm | |
parent | 7fabffa3a5b326844134b3d5ff1443bc8bc9c2c5 (diff) |
Reimplement uvm/uvm_map.
vmmap is designed to perform address space randomized allocations,
without letting fragmentation of the address space go through the roof.
Some highlights:
- kernel address space randomization
- proper implementation of guardpages
- roughly 10% system time reduction during kernel build
Tested by alot of people on tech@ and developers.
Theo's machines are still happy.
Diffstat (limited to 'sys/uvm')
-rw-r--r-- | sys/uvm/uvm_extern.h | 8 | ||||
-rw-r--r-- | sys/uvm/uvm_fault.c | 13 | ||||
-rw-r--r-- | sys/uvm/uvm_io.c | 15 | ||||
-rw-r--r-- | sys/uvm/uvm_km.c | 18 | ||||
-rw-r--r-- | sys/uvm/uvm_map.c | 6701 | ||||
-rw-r--r-- | sys/uvm/uvm_map.h | 291 | ||||
-rw-r--r-- | sys/uvm/uvm_mmap.c | 82 | ||||
-rw-r--r-- | sys/uvm/uvm_unix.c | 10 |
8 files changed, 4007 insertions, 3131 deletions
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 77d0522bd14..f0e578c899a 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_extern.h,v 1.95 2011/04/18 19:23:46 art Exp $ */ +/* $OpenBSD: uvm_extern.h,v 1.96 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */ /* @@ -185,6 +185,7 @@ typedef int vm_prot_t; #define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */ #define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */ #define UVM_FLAG_HOLE 0x400000 /* no backend */ +#define UVM_FLAG_QUERY 0x800000 /* do everything, except actual execution */ /* macros to extract info */ #define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK) @@ -631,10 +632,9 @@ void km_free(void *, size_t, const struct kmem_va_mode *, const struct kmem_pa_mode *); /* uvm_map.c */ -#define uvm_map(_m, _a, _sz, _u, _f, _al, _fl) uvm_map_p(_m, _a, _sz, _u, _f, _al, _fl, 0) -int uvm_map_p(vm_map_t, vaddr_t *, vsize_t, +int uvm_map(vm_map_t, vaddr_t *, vsize_t, struct uvm_object *, voff_t, vsize_t, - uvm_flag_t, struct proc *); + uvm_flag_t); int uvm_map_pageable(vm_map_t, vaddr_t, vaddr_t, boolean_t, int); int uvm_map_pageable_all(vm_map_t, int, vsize_t); diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index e5eb6b8e3b0..59725637315 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_fault.c,v 1.58 2009/07/22 21:05:37 oga Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.59 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */ /* @@ -1781,7 +1781,7 @@ uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end) void uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) { - vm_map_entry_t entry; + vm_map_entry_t entry, next; pmap_t pmap = vm_map_pmap(map); vaddr_t va; paddr_t pa; @@ -1814,9 +1814,9 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) */ KASSERT(va >= entry->start); while (va >= entry->end) { - KASSERT(entry->next != &map->header && - entry->next->start <= entry->end); - entry = entry->next; + next = RB_NEXT(uvm_map_addr, &map->addr, entry); + KASSERT(next != NULL && next->start <= entry->end); + entry = next; } /* @@ -1905,7 +1905,6 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock) */ while (1) { - /* * lock map */ @@ -1919,7 +1918,7 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock) * lookup */ if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, - &ufi->entry)) { + &ufi->entry)) { uvmfault_unlockmaps(ufi, write_lock); return(FALSE); } diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c index 1b9339979e1..5e42d6b85f9 100644 --- a/sys/uvm/uvm_io.c +++ b/sys/uvm/uvm_io.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_io.c,v 1.17 2009/07/25 12:55:40 miod Exp $ */ +/* $OpenBSD: uvm_io.c,v 1.18 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_io.c,v 1.12 2000/06/27 17:29:23 mrg Exp $ */ /* @@ -64,7 +64,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) { vaddr_t baseva, endva, pageoffset, kva; vsize_t chunksz, togo, sz; - vm_map_entry_t dead_entries; + struct uvm_map_deadq dead_entries; int error, extractflags; /* @@ -93,7 +93,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) chunksz = min(round_page(togo + pageoffset), MAXBSIZE); error = 0; - extractflags = UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG; + extractflags = 0; if (flags & UVM_IO_FIXPROT) extractflags |= UVM_EXTRACT_FIXPROT; @@ -107,7 +107,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) * step 2: extract mappings from the map into kernel_map */ - error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva, + error = uvm_map_extract(map, baseva, chunksz, &kva, extractflags); if (error) { @@ -139,12 +139,11 @@ uvm_io(vm_map_t map, struct uio *uio, int flags) */ vm_map_lock(kernel_map); + TAILQ_INIT(&dead_entries); uvm_unmap_remove(kernel_map, kva, kva+chunksz, - &dead_entries, NULL, FALSE); + &dead_entries, FALSE, TRUE); vm_map_unlock(kernel_map); - - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, AMAP_REFALL); + uvm_unmap_detach(&dead_entries, AMAP_REFALL); /* * We defer checking the error return from uiomove until diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c index 368aaa92f2d..2779c7e6b4f 100644 --- a/sys/uvm/uvm_km.c +++ b/sys/uvm/uvm_km.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_km.c,v 1.101 2011/05/10 21:48:17 oga Exp $ */ +/* $OpenBSD: uvm_km.c,v 1.102 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ /* @@ -184,7 +184,13 @@ uvm_km_init(vaddr_t start, vaddr_t end) * before installing. */ - uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE); + uvm_map_setup(&kernel_map_store, base, end, +#ifdef KVA_GUARDPAGES + VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES +#else + VM_MAP_PAGEABLE +#endif + ); kernel_map_store.pmap = pmap_kernel(); if (base != start && uvm_map(&kernel_map_store, &base, start - base, NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, @@ -473,16 +479,16 @@ uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size) void uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size) { - struct vm_map_entry *dead_entries; + struct uvm_map_deadq dead_entries; vm_map_lock(map); + TAILQ_INIT(&dead_entries); uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), - &dead_entries, NULL, FALSE); + &dead_entries, FALSE, TRUE); wakeup(map); vm_map_unlock(map); - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, 0); + uvm_unmap_detach(&dead_entries, 0); } /* diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index 07bc61fb37e..ed9930f42f0 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,7 +1,22 @@ -/* $OpenBSD: uvm_map.c,v 1.135 2011/04/26 23:50:21 ariane Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.136 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ -/* +/* + * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * @@ -71,6 +86,8 @@ * uvm_map.c: uvm map operations */ +/* #define DEBUG */ + #include <sys/param.h> #include <sys/systm.h> #include <sys/mman.h> @@ -86,13 +103,196 @@ #endif #include <uvm/uvm.h> -#undef RB_AUGMENT -#define RB_AUGMENT(x) uvm_rb_augment(x) #ifdef DDB #include <uvm/uvm_ddb.h> #endif + +vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); +struct vm_map_entry *uvm_map_entrybyaddr(struct uvm_map_addr*, vaddr_t); +struct vm_map_entry *uvm_map_findspace_entry(struct uvm_map_free*, vsize_t); +struct vm_map_entry *uvm_map_findspace_tree(struct uvm_map_free*, vsize_t, + voff_t, vsize_t, int, vaddr_t*, struct vm_map*); +int uvm_map_isavail(struct uvm_map_addr*, + struct vm_map_entry**, struct vm_map_entry**, + vaddr_t, vsize_t); +int uvm_mapent_isjoinable(struct vm_map*, + struct vm_map_entry*, struct vm_map_entry*); +struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, + struct vm_map_entry*, struct uvm_map_deadq*); +struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, + struct vm_map_entry*, struct uvm_map_deadq*); +struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, + struct vm_map_entry*, vaddr_t, vsize_t, int, + struct uvm_map_deadq*); +struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); +void uvm_mapent_free(struct vm_map_entry*); +void uvm_mapent_mkfree(struct vm_map*, + struct vm_map_entry*, struct vm_map_entry**, + struct uvm_map_deadq*, boolean_t); +void uvm_map_pageable_pgon(struct vm_map*, + struct vm_map_entry*, struct vm_map_entry*, + vaddr_t, vaddr_t); +int uvm_map_pageable_wire(struct vm_map*, + struct vm_map_entry*, struct vm_map_entry*, + vaddr_t, vaddr_t, int); +void uvm_map_setup_entries(struct vm_map*); +void uvm_map_vmspace_update(struct vm_map*, + struct uvm_map_deadq*, int); +void uvm_map_kmem_grow(struct vm_map*, + struct uvm_map_deadq*, vsize_t, int); +void uvm_map_freelist_update_clear(struct vm_map*, + struct uvm_map_deadq*); +void uvm_map_freelist_update_refill(struct vm_map *, int); +void uvm_map_freelist_update(struct vm_map*, + struct uvm_map_deadq*, vaddr_t, vaddr_t, + vaddr_t, vaddr_t, int); +struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, + vaddr_t, vaddr_t, int); +int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, + struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, + int); + +/* + * Tree management functions. + */ + +static __inline void uvm_mapent_copy(struct vm_map_entry*, + struct vm_map_entry*); +static int uvm_mapentry_addrcmp(struct vm_map_entry*, + struct vm_map_entry*); +static int uvm_mapentry_freecmp(struct vm_map_entry*, + struct vm_map_entry*); +void uvm_mapent_free_insert(struct vm_map*, + struct uvm_map_free*, struct vm_map_entry*); +void uvm_mapent_free_remove(struct vm_map*, + struct uvm_map_free*, struct vm_map_entry*); +void uvm_mapent_addr_insert(struct vm_map*, + struct vm_map_entry*); +void uvm_mapent_addr_remove(struct vm_map*, + struct vm_map_entry*); +void uvm_map_splitentry(struct vm_map*, + struct vm_map_entry*, struct vm_map_entry*, + vaddr_t); +vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); +struct uvm_map_free *uvm_free(struct vm_map*, vaddr_t); +int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); + +/* Find freelist for containing addr. */ +#define UVM_FREE(_map, _addr) uvm_free((_map), (_addr)) +/* Size of the free tree. */ +#define uvm_mapfree_size(_free) ((_free)->treesz) + +/* + * uvm_vmspace_fork helper functions. + */ +struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, + vsize_t, struct vm_map_entry*, + struct uvm_map_deadq*, int, int); +void uvm_mapent_forkshared(struct vmspace*, struct vm_map*, + struct vm_map*, struct vm_map_entry*, + struct uvm_map_deadq*); +void uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, + struct vm_map*, struct vm_map_entry*, + struct uvm_map_deadq*); + +/* + * Tree validation. + */ + +#ifdef DEBUG +void uvm_tree_assert(struct vm_map*, int, char*, + char*, int); +#define UVM_ASSERT(map, cond, file, line) \ + uvm_tree_assert((map), (cond), #cond, (file), (line)) +void uvm_tree_sanity_free(struct vm_map*, + struct uvm_map_free*, char*, int); +void uvm_tree_sanity(struct vm_map*, char*, int); +void uvm_tree_size_chk(struct vm_map*, char*, int); +void vmspace_validate(struct vm_map*); +#else +#define uvm_tree_sanity_free(_map, _free, _file, _line) do {} while (0) +#define uvm_tree_sanity(_map, _file, _line) do {} while (0) +#define uvm_tree_size_chk(_map, _file, _line) do {} while (0) +#define vmspace_validate(_map) do {} while (0) +#endif + + +/* + * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. + * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. + * + * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size + * each time. + */ +#define VM_MAP_KSIZE_INIT (512 * PAGE_SIZE) +#define VM_MAP_KSIZE_DELTA (256 * PAGE_SIZE) +#define VM_MAP_KSIZE_ALLOCMUL 4 +/* + * When selecting a random free-space block, look at most FSPACE_DELTA blocks + * ahead. + */ +#define FSPACE_DELTA 8 +/* + * Put allocations adjecent to previous allocations when the free-space tree + * is larger than FSPACE_COMPACT entries. + * + * Alignment and PMAP_PREFER may still cause the entry to not be fully + * adjecent. Note that this strategy reduces memory fragmentation (by leaving + * a large space before or after the allocation). + */ +#define FSPACE_COMPACT 128 +/* + * Make the address selection skip at most this many bytes from the start of + * the free space in which the allocation takes place. + * + * The main idea behind a randomized address space is that an attacker cannot + * know where to target his attack. Therefore, the location of objects must be + * as random as possible. However, the goal is not to create the most sparse + * map that is possible. + * FSPACE_MAXOFF pushes the considered range in bytes down to less insane + * sizes, thereby reducing the sparseness. The biggest randomization comes + * from fragmentation, i.e. FSPACE_COMPACT. + */ +#define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) +/* + * Allow for small gaps in the overflow areas. + * Gap size is in bytes and does not have to be a multiple of page-size. + */ +#define FSPACE_BIASGAP ((vaddr_t)32 * 1024) + + +#define FREE_START(_entry) ((_entry)->end + (_entry)->guard) +#define FREE_END(_entry) ((_entry)->end + (_entry)->guard + \ + (_entry)->fspace) + +#ifdef DEADBEEF0 +#define UVMMAP_DEADBEEF ((void*)DEADBEEF0) +#else +#define UVMMAP_DEADBEEF ((void*)0xdeadd0d0) +#endif + +#ifdef DEBUG +int uvm_map_dprintf = 0; +int uvm_map_printlocks = 0; + +#define DPRINTF(_args) \ + do { \ + if (uvm_map_dprintf) \ + printf _args; \ + } while (0) + +#define LPRINTF(_args) \ + do { \ + if (uvm_map_printlocks) \ + printf _args; \ + } while (0) +#else +#define DPRINTF(_args) do {} while (0) +#define LPRINTF(_args) do {} while (0) +#endif + static struct timeval uvm_kmapent_last_warn_time; static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; @@ -102,295 +302,1269 @@ struct uvm_cnt uvm_mlk_call, uvm_mlk_hint; const char vmmapbsy[] = "vmmapbsy"; /* - * Da history books - */ -UVMHIST_DECL(maphist); -UVMHIST_DECL(pdhist); - -/* * pool for vmspace structures. */ - struct pool uvm_vmspace_pool; /* * pool for dynamically-allocated map entries. */ - struct pool uvm_map_entry_pool; struct pool uvm_map_entry_kmem_pool; -#ifdef PMAP_GROWKERNEL /* * This global represents the end of the kernel virtual address - * space. If we want to exceed this, we must grow the kernel + * space. If we want to exceed this, we must grow the kernel * virtual address space dynamically. * * Note, this variable is locked by kernel_map's lock. */ vaddr_t uvm_maxkaddr; -#endif /* - * macros + * Locking predicate. */ +#define UVM_MAP_REQ_WRITE(_map) \ + do { \ + if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ + rw_assert_wrlock(&(_map)->lock); \ + } while (0) /* - * uvm_map_entry_link: insert entry into a map + * Tree describing entries by address. * - * => map must be locked + * Addresses are unique. + * Entries with start == end may only exist if they are the first entry + * (sorted by address) within a free-memory tree. */ -#define uvm_map_entry_link(map, after_where, entry) do { \ - (map)->nentries++; \ - (entry)->prev = (after_where); \ - (entry)->next = (after_where)->next; \ - (entry)->prev->next = (entry); \ - (entry)->next->prev = (entry); \ - uvm_rb_insert(map, entry); \ -} while (0) -/* - * uvm_map_entry_unlink: remove entry from a map - * - * => map must be locked - */ -#define uvm_map_entry_unlink(map, entry) do { \ - (map)->nentries--; \ - (entry)->next->prev = (entry)->prev; \ - (entry)->prev->next = (entry)->next; \ - uvm_rb_remove(map, entry); \ -} while (0) +static __inline int +uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2) +{ + return e1->start < e2->start ? -1 : e1->start > e2->start; +} /* - * SAVE_HINT: saves the specified entry as the hint for future lookups. + * Tree describing free memory. * - * => map need not be locked (protected by hint_lock). - */ -#define SAVE_HINT(map,check,value) do { \ - simple_lock(&(map)->hint_lock); \ - if ((map)->hint == (check)) \ - (map)->hint = (value); \ - simple_unlock(&(map)->hint_lock); \ -} while (0) - -/* - * VM_MAP_RANGE_CHECK: check and correct range + * Free memory is indexed (so we can use array semantics in O(log N). + * Free memory is ordered by size (so we can reduce fragmentation). * - * => map must at least be read locked + * The address range in the tree can be limited, having part of the + * free memory not in the free-memory tree. Only free memory in the + * tree will be considered during 'any address' allocations. */ -#define VM_MAP_RANGE_CHECK(map, start, end) do { \ - if (start < vm_map_min(map)) \ - start = vm_map_min(map); \ - if (end > vm_map_max(map)) \ - end = vm_map_max(map); \ - if (start > end) \ - start = end; \ -} while (0) +static __inline int +uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2) +{ + int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace; + return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2); +} /* - * local prototypes + * Copy mapentry. */ +static __inline void +uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) +{ + caddr_t csrc, cdst; + size_t sz; -void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); -void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); -void uvm_map_reference_amap(struct vm_map_entry *, int); -void uvm_map_unreference_amap(struct vm_map_entry *, int); -int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t, - struct vm_map_entry *, voff_t, vsize_t); + csrc = (caddr_t)src; + cdst = (caddr_t)dst; + csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); + cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); -struct vm_map_entry *uvm_mapent_alloc(struct vm_map *, int); -void uvm_mapent_free(struct vm_map_entry *); + sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - + offsetof(struct vm_map_entry, uvm_map_entry_start_copy); + memcpy(cdst, csrc, sz); +} -#ifdef KVA_GUARDPAGES /* - * Number of kva guardpages in use. + * Handle free-list insertion. */ -int kva_guardpages; +void +uvm_mapent_free_insert(struct vm_map *map, struct uvm_map_free *free, + struct vm_map_entry *entry) +{ + struct vm_map_entry *res; +#ifdef DEBUG + vaddr_t min, max, bound; #endif - -/* - * Tree manipulation. - */ -void uvm_rb_insert(struct vm_map *, struct vm_map_entry *); -void uvm_rb_remove(struct vm_map *, struct vm_map_entry *); -vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *); + if (RB_LEFT(entry, free_entry) != UVMMAP_DEADBEEF || + RB_RIGHT(entry, free_entry) != UVMMAP_DEADBEEF || + RB_PARENT(entry, free_entry) != UVMMAP_DEADBEEF) + panic("uvm_mapent_addr_insert: entry still in free list"); #ifdef DEBUG -int _uvm_tree_sanity(struct vm_map *map, const char *name); + /* + * Boundary check. + * Boundaries are folded if they go on the same free list. + */ + min = FREE_START(entry); + max = FREE_END(entry); + + while (min < max && (bound = uvm_map_boundary(map, min, max)) != max) { + KASSERT(UVM_FREE(map, min) == free); + min = bound; + } #endif -vsize_t uvm_rb_subtree_space(struct vm_map_entry *); -void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *); + KDASSERT(entry->fspace > 0 && (entry->fspace & PAGE_MASK) == 0); -static __inline int -uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b) + UVM_MAP_REQ_WRITE(map); + res = RB_INSERT(uvm_map_free_int, &free->tree, entry); + free->treesz++; + if (res != NULL) + panic("uvm_mapent_free_insert"); +} + +/* + * Handle free-list removal. + */ +void +uvm_mapent_free_remove(struct vm_map *map, struct uvm_map_free *free, + struct vm_map_entry *entry) { - if (a->start < b->start) - return (-1); - else if (a->start > b->start) - return (1); - - return (0); + struct vm_map_entry *res; + + UVM_MAP_REQ_WRITE(map); + res = RB_REMOVE(uvm_map_free_int, &free->tree, entry); + free->treesz--; + if (res != entry) + panic("uvm_mapent_free_remove"); + RB_LEFT(entry, free_entry) = RB_RIGHT(entry, free_entry) = + RB_PARENT(entry, free_entry) = UVMMAP_DEADBEEF; } +/* + * Handle address tree insertion. + */ +void +uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) +{ + struct vm_map_entry *res; + + if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || + RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || + RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF) + panic("uvm_mapent_addr_insert: entry still in addr list"); + KDASSERT(entry->start <= entry->end); + KDASSERT((entry->start & PAGE_MASK) == 0 && + (entry->end & PAGE_MASK) == 0); + + UVM_MAP_REQ_WRITE(map); + res = RB_INSERT(uvm_map_addr, &map->addr, entry); + if (res != NULL) + panic("uvm_mapent_addr_insert"); +} -static __inline void -uvm_rb_augment(struct vm_map_entry *entry) +/* + * Handle address tree removal. + */ +void +uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) { - entry->space = uvm_rb_subtree_space(entry); + struct vm_map_entry *res; + + UVM_MAP_REQ_WRITE(map); + res = RB_REMOVE(uvm_map_addr, &map->addr, entry); + if (res != entry) + panic("uvm_mapent_addr_remove"); + RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) = + RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF; } -RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); +/* + * Clamp start and end to map boundaries. + */ +#define VM_MAP_RANGE_CHECK(_map, _start, _end) \ + do { \ + (_start) = MAX((_start), vm_map_min((_map))); \ + (_end) = MIN((_end), vm_map_max((_map))); \ + (_start) = MIN((_start), (_end)); \ + } while (0) -RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); +/* + * uvm_map_reference: add reference to a map + * + * XXX check map reference counter lock + */ +#define uvm_map_reference(_map) \ + do { \ + simple_lock(&map->ref_lock); \ + map->ref_count++; \ + simple_unlock(&map->ref_lock); \ + } while (0) +/* + * Calculate the dused delta. + */ vsize_t -uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry) +uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) { - struct vm_map_entry *next; - vaddr_t space; - - if ((next = entry->next) == &map->header) - space = map->max_offset - entry->end; - else { - KASSERT(next); - space = next->start - entry->end; + struct vmspace *vm; + vsize_t sz; + vaddr_t lmax; + vaddr_t stack_begin, stack_end; /* Position of stack. */ + + KASSERT(map->flags & VM_MAP_ISVMSPACE); + vm = (struct vmspace *)map; + stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); + stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); + + sz = 0; + while (min != max) { + lmax = max; + if (min < stack_begin && lmax > stack_begin) + lmax = stack_begin; + else if (min < stack_end && lmax > stack_end) + lmax = stack_end; + + if (min >= stack_begin && min < stack_end) { + /* nothing */ + } else + sz += lmax - min; + min = lmax; } - return (space); + + return sz >> PAGE_SHIFT; } - -vsize_t -uvm_rb_subtree_space(struct vm_map_entry *entry) -{ - vaddr_t space, tmp; - space = entry->ownspace; - if (RB_LEFT(entry, rb_entry)) { - tmp = RB_LEFT(entry, rb_entry)->space; - if (tmp > space) - space = tmp; +/* + * Find the entry describing the given address. + */ +struct vm_map_entry* +uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) +{ + struct vm_map_entry *iter; + + iter = RB_ROOT(atree); + while (iter != NULL) { + if (iter->start > addr) + iter = RB_LEFT(iter, daddrs.addr_entry); + else if (FREE_END(iter) <= addr) + iter = RB_RIGHT(iter, daddrs.addr_entry); + else + return iter; } + return NULL; +} - if (RB_RIGHT(entry, rb_entry)) { - tmp = RB_RIGHT(entry, rb_entry)->space; - if (tmp > space) - space = tmp; +/* + * Find the first entry with at least sz bytes free. + */ +struct vm_map_entry* +uvm_map_findspace_entry(struct uvm_map_free *free, vsize_t sz) +{ + struct vm_map_entry *iter; + struct vm_map_entry *res; + + iter = RB_ROOT(&free->tree); + res = NULL; + + while (iter) { + if (iter->fspace >= sz) { + res = iter; + iter = RB_LEFT(iter, free_entry); + } else + iter = RB_RIGHT(iter, free_entry); } - - return (space); + return res; } -void -uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) +/* + * DEAD_ENTRY_PUSH(struct vm_map_entry**head, struct vm_map_entry *entry) + * + * Push dead entries into a linked list. + * Since the linked list abuses the address tree for storage, the entry + * may not be linked in a map. + * + * *head must be initialized to NULL before the first call to this macro. + * uvm_unmap_detach(*head, 0) will remove dead entries. + */ +static __inline void +dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) { - /* We need to traverse to the very top */ - do { - entry->ownspace = uvm_rb_space(map, entry); - entry->space = uvm_rb_subtree_space(entry); - } while ((entry = RB_PARENT(entry, rb_entry)) != NULL); + TAILQ_INSERT_TAIL(deadq, entry, daddrs.deadq); } +#define DEAD_ENTRY_PUSH(_headptr, _entry) \ + dead_entry_push((_headptr), (_entry)) -void -uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) +/* + * Helper function for uvm_map_findspace_tree. + * + * Given allocation constraints and pmap constraints, finds the + * lowest and highest address in a range that can be used for the + * allocation. + * + * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. + * + * + * Big chunk of math with a seasoning of dragons. + */ +int +uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, + struct vm_map_entry *sel, vaddr_t align, + vaddr_t pmap_align, vaddr_t pmap_off, int bias) { - vaddr_t space = uvm_rb_space(map, entry); - struct vm_map_entry *tmp; - - entry->ownspace = entry->space = space; - tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry); + vaddr_t sel_min, sel_max; +#ifdef PMAP_PREFER + vaddr_t pmap_min, pmap_max; +#endif /* PMAP_PREFER */ #ifdef DIAGNOSTIC - if (tmp != NULL) - panic("uvm_rb_insert: duplicate entry?"); + int bad; +#endif /* DIAGNOSTIC */ + + sel_min = FREE_START(sel); + sel_max = FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); + +#ifdef PMAP_PREFER + + /* + * There are two special cases, in which we can satisfy the align + * requirement and the pmap_prefer requirement. + * - when pmap_off == 0, we always select the largest of the two + * - when pmap_off % align == 0 and pmap_align > align, we simply + * satisfy the pmap_align requirement and automatically + * satisfy the align requirement. + */ + if (align > PAGE_SIZE && + !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { + /* + * Simple case: only use align. + */ + sel_min = roundup(sel_min, align); + sel_max &= ~(align - 1); + + if (sel_min > sel_max) + return ENOMEM; + + /* + * Correct for bias. + */ + if (sel_max - sel_min > FSPACE_BIASGAP) { + if (bias > 0) { + sel_min = sel_max - FSPACE_BIASGAP; + sel_min = roundup(sel_min, align); + } else if (bias < 0) { + sel_max = sel_min + FSPACE_BIASGAP; + sel_max &= ~(align - 1); + } + } + } else if (pmap_align != 0) { + /* + * Special case: satisfy both pmap_prefer and + * align argument. + */ + pmap_max = sel_max & ~(pmap_align - 1); + pmap_min = sel_min; + if (pmap_max < sel_min) + return ENOMEM; + + /* Adjust pmap_min for BIASGAP for top-addr bias. */ + if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) + pmap_min = pmap_max - FSPACE_BIASGAP; + /* Align pmap_min. */ + pmap_min &= ~(pmap_align - 1); + if (pmap_min < sel_min) + pmap_min += pmap_align; + if (pmap_min > pmap_max) + return ENOMEM; + + /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ + if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { + pmap_max = (pmap_min + FSPACE_BIASGAP) & + ~(pmap_align - 1); + } + if (pmap_min > pmap_max) + return ENOMEM; + + /* Apply pmap prefer offset. */ + pmap_max |= pmap_off; + if (pmap_max > sel_max) + pmap_max -= pmap_align; + pmap_min |= pmap_off; + if (pmap_min < sel_min) + pmap_min += pmap_align; + + /* + * Fixup: it's possible that pmap_min and pmap_max + * cross eachother. In this case, try to find one + * address that is allowed. + * (This usually happens in biased case.) + */ + if (pmap_min > pmap_max) { + if (pmap_min < sel_max) + pmap_max = pmap_min; + else if (pmap_max > sel_min) + pmap_min = pmap_max; + else + return ENOMEM; + } + + /* Internal validation. */ + KDASSERT(pmap_min <= pmap_max); + + sel_min = pmap_min; + sel_max = pmap_max; + } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) + sel_min = sel_max - FSPACE_BIASGAP; + else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) + sel_max = sel_min + FSPACE_BIASGAP; + +#else + + if (align > PAGE_SIZE) { + sel_min = roundup(sel_min, align); + sel_max &= ~(align - 1); + if (sel_min > sel_max) + return ENOMEM; + + if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { + if (bias > 0) { + sel_min = roundup(sel_max - FSPACE_BIASGAP, + align); + } else { + sel_max = (sel_min + FSPACE_BIASGAP) & + ~(align - 1); + } + } + } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) + sel_min = sel_max - FSPACE_BIASGAP; + else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) + sel_max = sel_min + FSPACE_BIASGAP; + #endif - uvm_rb_fixup(map, entry); - if (entry->prev != &map->header) - uvm_rb_fixup(map, entry->prev); + + if (sel_min > sel_max) + return ENOMEM; + +#ifdef DIAGNOSTIC + bad = 0; + /* Lower boundary check. */ + if (sel_min < FREE_START(sel)) { + printf("sel_min: 0x%lx, but should be at least 0x%lx\n", + sel_min, FREE_START(sel)); + bad++; + } + /* Upper boundary check. */ + if (sel_max > FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { + printf("sel_max: 0x%lx, but should be at most 0x%lx\n", + sel_max, FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); + bad++; + } + /* Lower boundary alignment. */ + if (align != 0 && (sel_min & (align - 1)) != 0) { + printf("sel_min: 0x%lx, not aligned to 0x%lx\n", + sel_min, align); + bad++; + } + /* Upper boundary alignment. */ + if (align != 0 && (sel_max & (align - 1)) != 0) { + printf("sel_max: 0x%lx, not aligned to 0x%lx\n", + sel_max, align); + bad++; + } + /* Lower boundary PMAP_PREFER check. */ + if (pmap_align != 0 && align == 0 && + (sel_min & (pmap_align - 1)) != pmap_off) { + printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", + sel_min, sel_min & (pmap_align - 1), pmap_off); + bad++; + } + /* Upper boundary PMAP_PREFER check. */ + if (pmap_align != 0 && align == 0 && + (sel_max & (pmap_align - 1)) != pmap_off) { + printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", + sel_max, sel_max & (pmap_align - 1), pmap_off); + bad++; + } + + if (bad) { + panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " + "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " + "bias = %d, " + "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", + sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, + bias, FREE_START(sel), FREE_END(sel)); + } +#endif /* DIAGNOSTIC */ + + *min = sel_min; + *max = sel_max; + return 0; } -void -uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) +/* + * Find address and free space for sz bytes. + * + * free: tree of free space + * sz: size in bytes + * align: preferred alignment + * guardpg: if true, keep free space guards on both ends + * addr: fill in found address + * + * align is a hard requirement to align to virtual addresses. + * PMAP_PREFER is a soft requirement that is dropped if + * no memory can be found that will be acceptable. + * + * align overrules PMAP_PREFER, but if both can be satisfied, the code + * will attempt to find a range that does this. + * + * Returns NULL on failure. + */ +struct vm_map_entry* +uvm_map_findspace_tree(struct uvm_map_free *free, vsize_t sz, voff_t uoffset, + vsize_t align, int guardpg, vaddr_t *addr, struct vm_map *map) { - struct vm_map_entry *parent; + struct vm_map_entry *sfe; /* Start free entry. */ + struct vm_map_entry *sel; /* Selected free entry. */ + struct vm_map_entry *search_start, *fail_start; + size_t sel_idx, i; + vaddr_t sel_min, sel_max, sel_addr; + vaddr_t pmap_off, pmap_align; /* pmap_prefer variables */ + int bias; - parent = RB_PARENT(entry, rb_entry); - RB_REMOVE(uvm_tree, &(map)->rbhead, entry); - if (entry->prev != &map->header) - uvm_rb_fixup(map, entry->prev); - if (parent) - uvm_rb_fixup(map, parent); -} +#ifdef PMAP_PREFER + /* Fix pmap prefer parameters. */ + pmap_off = 0; + pmap_align = PMAP_PREFER_ALIGN(); + if (uoffset != UVM_UNKNOWN_OFFSET && pmap_align > PAGE_SIZE) + pmap_off = PMAP_PREFER_OFFSET(uoffset); + else + pmap_align = 0; + KDASSERT(pmap_align == 0 || pmap_off < pmap_align); -#ifdef DEBUG -#define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y) + if (align > PAGE_SIZE || (pmap_off != 0 && pmap_off < align)) { + /* + * We're doomed. + * + * This allocation will never be able to fulfil the pmap_off + * requirement. + */ + pmap_off = 0; + pmap_align = 0; + } #else -#define uvm_tree_sanity(x,y) + pmap_off = pmap_align = 0; #endif -#ifdef DEBUG + /* Set up alignment argument. */ + if (align < PAGE_SIZE) + align = PAGE_SIZE; + + /* + * First entry that meets requirements. + */ + sfe = uvm_map_findspace_entry(free, sz + (guardpg ? PAGE_SIZE : 0)); + if (sfe == NULL) + return NULL; + + /* Select the entry from which we will allocate. */ + sel_idx = arc4random_uniform(FSPACE_DELTA); + sel = sfe; + for (i = 0; i < sel_idx; i++) { + sel = RB_NEXT(uvm_map_free_int, free->tree, sel); + /* + * This has a slight bias at the top of the tree (largest + * segments) towards the smaller elements. + * This may be nice. + */ + if (sel == NULL) { + sel_idx -= i; + i = 0; + sel = sfe; + } + } + search_start = sel; + fail_start = NULL; + +#ifdef PMAP_PREFER +pmap_prefer_retry: +#endif /* PMAP_PREFER */ + while (sel != NULL) { + bias = uvm_mapent_bias(map, sel); + if (bias == 0 && free->treesz >= FSPACE_COMPACT) + bias = (arc4random() & 0x1) ? 1 : -1; + + if (uvm_map_sel_limits(&sel_min, &sel_max, sz, guardpg, sel, + align, pmap_align, pmap_off, bias) == 0) { + if (bias > 0) + sel_addr = sel_max; + else if (bias < 0) + sel_addr = sel_min; + else if (sel_min == sel_max) + sel_addr = sel_min; + else { + /* + * Select a random address. + * + * Use sel_addr to limit the arc4random range. + */ + sel_addr = sel_max - sel_min; + if (align <= PAGE_SIZE && pmap_align != 0) + sel_addr += pmap_align; + else + sel_addr += align; + sel_addr = MIN(sel_addr, FSPACE_MAXOFF); + + /* + * Shift down, so arc4random can deal with + * the number. + * arc4random wants a 32-bit number. Therefore, + * handle 64-bit overflow. + */ + sel_addr >>= PAGE_SHIFT; + if (sel_addr > 0xffffffff) + sel_addr = 0xffffffff; + sel_addr = arc4random_uniform(sel_addr); + /* + * Shift back up. + */ + sel_addr <<= PAGE_SHIFT; + + /* + * Cancel bits that violate our alignment. + * + * This also cancels bits that are in + * PAGE_MASK, because align is at least + * a page. + */ + sel_addr &= ~(align - 1); + sel_addr &= ~(pmap_align - 1); + + KDASSERT(sel_addr <= sel_max - sel_min); + /* + * Change sel_addr from an offset relative + * to sel_min, to the actual selected address. + */ + sel_addr += sel_min; + } + + *addr = sel_addr; + return sel; + } + + /* Next entry. */ + sel_idx++; + sel = RB_NEXT(uvm_map_free_int, &free->tree, sel); + if (sel_idx == FSPACE_DELTA || + (sel == NULL && sel_idx <= FSPACE_DELTA)) { + if (fail_start == NULL) + fail_start = sel; + + sel_idx = 0; + sel = sfe; + } + + /* + * sel == search_start -> we made a full loop through the + * first FSPACE_DELTA items and couldn't find anything. + * + * We now restart the loop, at the first entry after + * FSPACE_DELTA (which we stored in fail_start during + * the first iteration). + * + * In the case that fail_start == NULL, we will stop + * immediately. + */ + if (sel == search_start) { + sel_idx = FSPACE_DELTA; + sel = fail_start; + } + } + +#ifdef PMAP_PREFER + /* + * If we can't satisfy pmap_prefer, we try without. + * + * We retry even in the case align is specified, since + * uvm_map_sel_limits() always attempts to take it into + * account. + */ + if (pmap_align != 0) { + printf("pmap_prefer aligned allocation failed -> " + "going for unaligned mapping\n"); /* DEBUG, for now */ + pmap_align = 0; + pmap_off = 0; + goto pmap_prefer_retry; + } +#endif /* PMAP_PREFER */ + + /* + * Iterated everything, but nothing was good enough. + */ + return NULL; +} + +/* + * Test if memory starting at addr with sz bytes is free. + * + * Fills in *start_ptr and *end_ptr to be the first and last entry describing + * the space. + * If called with prefilled *start_ptr and *end_ptr, they are to be correct. + */ int -_uvm_tree_sanity(struct vm_map *map, const char *name) -{ - struct vm_map_entry *tmp, *trtmp; - int n = 0, i = 1; - - RB_FOREACH(tmp, uvm_tree, &map->rbhead) { - if (tmp->ownspace != uvm_rb_space(map, tmp)) { - printf("%s: %d/%d ownspace %x != %x %s\n", - name, n + 1, map->nentries, - tmp->ownspace, uvm_rb_space(map, tmp), - tmp->next == &map->header ? "(last)" : ""); - goto error; +uvm_map_isavail(struct uvm_map_addr *atree, struct vm_map_entry **start_ptr, + struct vm_map_entry **end_ptr, vaddr_t addr, vsize_t sz) +{ + struct vm_map_entry *i, *i_end; + + KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); + if (*start_ptr == NULL) { + *start_ptr = uvm_map_entrybyaddr(atree, addr); + if (*start_ptr == NULL) + return 0; + } else + KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); + if (*end_ptr == NULL) { + if (FREE_END(*start_ptr) >= addr + sz) + *end_ptr = *start_ptr; + else { + *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); + if (*end_ptr == NULL) + return 0; } + } else + KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); + + KDASSERT(*start_ptr != NULL && *end_ptr != NULL); + KDASSERT((*start_ptr)->start <= addr && FREE_END(*start_ptr) > addr && + (*end_ptr)->start < addr + sz && FREE_END(*end_ptr) >= addr + sz); + + i = *start_ptr; + if (i->end > addr) + return 0; + i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr); + for (i = RB_NEXT(uvm_map_addr, atree, i); i != i_end; + i = RB_NEXT(uvm_map_addr, atree, i)) { + if (i->start != i->end) + return 0; } - trtmp = NULL; - RB_FOREACH(tmp, uvm_tree, &map->rbhead) { - if (tmp->space != uvm_rb_subtree_space(tmp)) { - printf("%s: space %d != %d\n", - name, tmp->space, uvm_rb_subtree_space(tmp)); - goto error; + + return -1; +} + +/* + * uvm_map: establish a valid mapping in map + * + * => *addr and sz must be a multiple of PAGE_SIZE. + * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. + * => map must be unlocked. + * => <uobj,uoffset> value meanings (4 cases): + * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER + * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER + * [3] <uobj,uoffset> == normal mapping + * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA + * + * case [4] is for kernel mappings where we don't know the offset until + * we've found a virtual address. note that kernel object offsets are + * always relative to vm_map_min(kernel_map). + * + * => align: align vaddr, must be a power-of-2. + * Align is only a hint and will be ignored if the alignemnt fails. + */ +int +uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, + struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) +{ + struct vm_map_entry *first, *last, *entry; + struct uvm_map_deadq dead; + struct uvm_map_free *free; + vm_prot_t prot; + vm_prot_t maxprot; + vm_inherit_t inherit; + int advice; + int error; + + if ((map->flags & VM_MAP_INTRSAFE) == 0) + splassert(IPL_NONE); + else + splassert(IPL_VM); + + /* + * Decode parameters. + */ + prot = UVM_PROTECTION(flags); + maxprot = UVM_MAXPROTECTION(flags); + advice = UVM_ADVICE(flags); + inherit = UVM_INHERIT(flags); + error = 0; + TAILQ_INIT(&dead); + KASSERT((sz & PAGE_MASK) == 0); + KASSERT((align & (align - 1)) == 0); + + /* + * Holes are incompatible with other types of mappings. + */ + if (flags & UVM_FLAG_HOLE) { + KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && + (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); + } + + /* + * Check protection. + */ + if ((prot & maxprot) != prot) + return EACCES; + + if (flags & UVM_FLAG_TRYLOCK) { + if (vm_map_lock_try(map) == FALSE) + return EFAULT; + } else + vm_map_lock(map); + + first = last = NULL; + if (flags & UVM_FLAG_FIXED) { + /* + * Fixed location. + * + * Note: we ignore align, pmap_prefer. + * Fill in first, last and *addr. + */ + KASSERT((*addr & PAGE_MASK) == 0); + if (!uvm_map_isavail(&map->addr, &first, &last, *addr, sz)) { + error = ENOMEM; + goto unlock; } - if (trtmp != NULL && trtmp->start >= tmp->start) { - printf("%s: corrupt: 0x%lx >= 0x%lx\n", - name, trtmp->start, tmp->start); - goto error; + + /* + * Grow pmap to include allocated address. + * XXX not possible in kernel? + */ + if ((map->flags & VM_MAP_ISVMSPACE) == 0 && + uvm_maxkaddr < (*addr + sz)) { + uvm_map_kmem_grow(map, &dead, + *addr + sz - uvm_maxkaddr, flags); + + /* + * Reload first, last, since uvm_map_kmem_grow likely + * moved them around. + */ + first = last = NULL; + if (!uvm_map_isavail(&map->addr, &first, &last, + *addr, sz)) + panic("uvm_map: opened box, cat died"); } - n++; + } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && + (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && + (align == 0 || (*addr & (align - 1)) == 0) && + uvm_map_isavail(&map->addr, &first, &last, *addr, sz)) { + /* + * Address used as hint. + * + * Note: we enforce the alignment restriction, + * but ignore the pmap_prefer. + */ + } else { + /* + * Update freelists from vmspace. + */ + if (map->flags & VM_MAP_ISVMSPACE) + uvm_map_vmspace_update(map, &dead, flags); + + /* + * Allocation for sz bytes at any address on the + * freelist. + */ + free = &map->free; + first = uvm_map_findspace_tree(free, sz, uoffset, align, + map->flags & VM_MAP_GUARDPAGES, addr, map); + last = NULL; /* May get set in previous test (by isavail). */ + + /* + * Fall back into brk() space if the initial attempt failed. + */ + if (first == NULL) { + if (map->flags & VM_MAP_ISVMSPACE) + free = &map->bfree; + else + uvm_map_kmem_grow(map, &dead, sz, flags); + + first = uvm_map_findspace_tree(free, sz, uoffset, align, + map->flags & VM_MAP_GUARDPAGES, addr, map); + if (first == NULL) { + error = ENOMEM; + goto unlock; + } + } + + /* + * Fill in last. + */ + if (!uvm_map_isavail(&map->addr, &first, &last, *addr, sz)) + panic("uvm_map: findspace and isavail disagree"); + } + + KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || + uvm_maxkaddr >= *addr + sz); + + /* + * If we only want a query, return now. + */ + if (flags & UVM_FLAG_QUERY) { + error = 0; + goto unlock; + } + + if (uobj == NULL) + uoffset = 0; + else if (uoffset == UVM_UNKNOWN_OFFSET) { + KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); + uoffset = *addr - vm_map_min(kernel_map); + } + + /* + * Create new entry. + * first and last may be invalidated after this call. + */ + entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead); + if (entry == NULL) { + error = ENOMEM; + goto unlock; + } + KDASSERT(entry->start == *addr && entry->end == *addr + sz); + entry->object.uvm_obj = uobj; + entry->offset = uoffset; + entry->protection = prot; + entry->max_protection = maxprot; + entry->inheritance = inherit; + entry->wired_count = 0; + entry->advice = advice; + if (uobj) + entry->etype = UVM_ET_OBJ; + else if (flags & UVM_FLAG_HOLE) + entry->etype = UVM_ET_HOLE; + else + entry->etype = 0; + if (flags & UVM_FLAG_COPYONW) { + entry->etype |= UVM_ET_COPYONWRITE; + if ((flags & UVM_FLAG_OVERLAY) == 0) + entry->etype |= UVM_ET_NEEDSCOPY; + } + if (flags & UVM_FLAG_OVERLAY) { + entry->aref.ar_pageoff = 0; + entry->aref.ar_amap = amap_alloc(sz, + ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0), + M_WAITOK); + } - trtmp = tmp; + /* + * Update map and process statistics. + */ + if (!(flags & UVM_FLAG_HOLE)) + map->size += sz; + if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && + !(flags & UVM_FLAG_HOLE)) { + ((struct vmspace *)map)->vm_dused += + uvmspace_dused(map, *addr, *addr + sz); } - if (n != map->nentries) { - printf("%s: nentries: %d vs %d\n", - name, n, map->nentries); - goto error; + /* + * Try to merge entry. + * + * XXX: I can't think of a good reason to only merge kernel map entries, + * but it's what the old code did. I'll look at it later. + */ + if ((flags & UVM_FLAG_NOMERGE) == 0) + entry = uvm_mapent_tryjoin(map, entry, &dead); + +unlock: + vm_map_unlock(map); + + if (error == 0) { + DPRINTF(("uvm_map: 0x%lx-0x%lx (query=%c) map=%p\n", + *addr, *addr + sz, + (flags & UVM_FLAG_QUERY ? 'T' : 'F'), map)); + } + + /* + * Remove dead entries. + * + * Dead entries may be the result of merging. + * uvm_map_mkentry may also create dead entries, when it attempts to + * destroy free-space entries. + */ + uvm_unmap_detach(&dead, 0); + return error; +} + +/* + * True iff e1 and e2 can be joined together. + */ +int +uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, + struct vm_map_entry *e2) +{ + KDASSERT(e1 != NULL && e2 != NULL); + + /* + * Must be the same entry type and not have free memory between. + */ + if (e1->etype != e2->etype || e1->end != e2->start) + return 0; + + /* + * Submaps are never joined. + */ + if (UVM_ET_ISSUBMAP(e1)) + return 0; + + /* + * Never merge wired memory. + */ + if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) + return 0; + + /* + * Protection, inheritance and advice must be equal. + */ + if (e1->protection != e2->protection || + e1->max_protection != e2->max_protection || + e1->inheritance != e2->inheritance || + e1->advice != e2->advice) + return 0; + + /* + * If uvm_object: objects itself and offsets within object must match. + */ + if (UVM_ET_ISOBJ(e1)) { + if (e1->object.uvm_obj != e2->object.uvm_obj) + return 0; + if (e1->offset + (e1->end - e1->start) != e2->offset) + return 0; } - for (tmp = map->header.next; tmp && tmp != &map->header; - tmp = tmp->next, i++) { - trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp); - if (trtmp != tmp) { - printf("%s: lookup: %d: %p - %p: %p\n", - name, i, tmp, trtmp, - RB_PARENT(tmp, rb_entry)); - goto error; + /* + * Cannot join shared amaps. + * Note: no need to lock amap to look at refs, since we don't care + * about its exact value. + * If it is 1 (i.e. we have the only reference) it will stay there. + */ + if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) + return 0; + if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) + return 0; + + /* + * Apprently, e1 and e2 match. + */ + return 1; +} + +/* + * Join support function. + * + * Returns the merged entry on succes. + * Returns NULL if the merge failed. + */ +struct vm_map_entry* +uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, + struct vm_map_entry *e2, struct uvm_map_deadq *dead) +{ + struct uvm_map_free *free; + + /* + * Amap of e1 must be extended to include e2. + * e2 contains no real information in its amap, + * so it can be erased immediately. + */ + if (e1->aref.ar_amap) { + if (amap_extend(e1, e2->end - e2->start)) + return NULL; + } + + /* + * Don't drop obj reference: + * uvm_unmap_detach will do this for us. + */ + + free = UVM_FREE(map, FREE_START(e2)); + if (e2->fspace > 0 && free) + uvm_mapent_free_remove(map, free, e2); + uvm_mapent_addr_remove(map, e2); + e1->end = e2->end; + e1->guard = e2->guard; + e1->fspace = e2->fspace; + if (e1->fspace > 0 && free) + uvm_mapent_free_insert(map, free, e1); + + DEAD_ENTRY_PUSH(dead, e2); + return e1; +} + +/* + * Attempt forward and backward joining of entry. + * + * Returns entry after joins. + * We are guaranteed that the amap of entry is either non-existant or + * has never been used. + */ +struct vm_map_entry* +uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, + struct uvm_map_deadq *dead) +{ + struct vm_map_entry *other; + struct vm_map_entry *merged; + + /* + * Merge with previous entry. + */ + other = RB_PREV(uvm_map_addr, &map->addr, entry); + if (other && uvm_mapent_isjoinable(map, other, entry)) { + merged = uvm_mapent_merge(map, other, entry, dead); + DPRINTF(("prev merge: %p + %p -> %p\n", other, entry, merged)); + if (merged) + entry = merged; + } + + /* + * Merge with next entry. + * + * Because amap can only extend forward and the next entry + * probably contains sensible info, only perform forward merging + * in the absence of an amap. + */ + other = RB_NEXT(uvm_map_addr, &map->addr, entry); + if (other && entry->aref.ar_amap == NULL && + other->aref.ar_amap == NULL && + uvm_mapent_isjoinable(map, entry, other)) { + merged = uvm_mapent_merge(map, entry, other, dead); + DPRINTF(("next merge: %p + %p -> %p\n", entry, other, merged)); + if (merged) + entry = merged; + } + + return entry; +} + +/* + * Kill entries that are no longer in a map. + */ +void +uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) +{ + struct vm_map_entry *entry; + + while ((entry = TAILQ_FIRST(deadq)) != NULL) { + /* + * Drop reference to amap, if we've got one. + */ + if (entry->aref.ar_amap) + amap_unref(entry->aref.ar_amap, + entry->aref.ar_pageoff, + atop(entry->end - entry->start), + flags); + + /* + * Drop reference to our backing object, if we've got one. + */ + if (UVM_ET_ISSUBMAP(entry)) { + /* ... unlikely to happen, but play it safe */ + uvm_map_deallocate(entry->object.sub_map); + } else if (UVM_ET_ISOBJ(entry) && + entry->object.uvm_obj->pgops->pgo_detach) { + entry->object.uvm_obj->pgops->pgo_detach( + entry->object.uvm_obj); } + + /* + * Step to next. + */ + TAILQ_REMOVE(deadq, entry, daddrs.deadq); + uvm_mapent_free(entry); } +} - return (0); - error: -#ifdef DDB - /* handy breakpoint location for error case */ - __asm(".globl treesanity_label\ntreesanity_label:"); -#endif - return (-1); +/* + * Create and insert new entry. + * + * Returned entry contains new addresses and is inserted properly in the tree. + * first and last are (probably) no longer valid. + */ +struct vm_map_entry* +uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, + struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, + struct uvm_map_deadq *dead) +{ + struct vm_map_entry *entry, *prev; + struct uvm_map_free *free; + vaddr_t min, max; /* free space boundaries for new entry */ + + KDASSERT(map != NULL && first != NULL && last != NULL && dead != NULL && + sz > 0 && addr + sz > addr); + KDASSERT(first->end <= addr && FREE_END(first) > addr); + KDASSERT(last->start < addr + sz && FREE_END(last) >= addr + sz); + KDASSERT(uvm_map_isavail(&map->addr, &first, &last, addr, sz)); + uvm_tree_sanity(map, __FILE__, __LINE__); + + min = addr + sz; + max = FREE_END(last); + + /* + * Initialize new entry. + */ + entry = uvm_mapent_alloc(map, flags); + if (entry == NULL) + return NULL; + entry->offset = 0; + entry->etype = 0; + entry->wired_count = 0; + entry->aref.ar_pageoff = 0; + entry->aref.ar_amap = NULL; + + entry->start = addr; + entry->end = min; + entry->guard = 0; + entry->fspace = 0; + + /* + * Reset free space in first. + */ + free = UVM_FREE(map, FREE_START(first)); + if (free) + uvm_mapent_free_remove(map, free, first); + first->guard = 0; + first->fspace = 0; + + /* + * Remove all entries that are fully replaced. + * We are iterating using last in reverse order. + */ + for (; first != last; last = prev) { + prev = RB_PREV(uvm_map_addr, &map->addr, last); + + KDASSERT(last->start == last->end); + free = UVM_FREE(map, FREE_START(last)); + if (free && last->fspace > 0) + uvm_mapent_free_remove(map, free, last); + uvm_mapent_addr_remove(map, last); + DEAD_ENTRY_PUSH(dead, last); + } + /* + * Remove first if it is entirely inside <addr, addr+sz>. + */ + if (first->start == addr) { + uvm_mapent_addr_remove(map, first); + DEAD_ENTRY_PUSH(dead, first); + } else + uvm_map_fix_space(map, first, FREE_START(first), addr, flags); + + /* + * Finally, link in entry. + */ + uvm_mapent_addr_insert(map, entry); + uvm_map_fix_space(map, entry, min, max, flags); + + uvm_tree_sanity(map, __FILE__, __LINE__); + return entry; } -#endif /* * uvm_mapent_alloc: allocate a map entry */ - struct vm_map_entry * uvm_mapent_alloc(struct vm_map *map, int flags) { @@ -416,15 +1590,15 @@ uvm_mapent_alloc(struct vm_map *map, int flags) for (i = 0; i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; i++) - ne[i].next = &ne[i + 1]; - ne[i].next = NULL; + RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1]; + RB_LEFT(&ne[i], daddrs.addr_entry) = NULL; me = ne; if (ratecheck(&uvm_kmapent_last_warn_time, &uvm_kmapent_warn_rate)) printf("uvm_mapent_alloc: out of static " "map entries\n"); } - uvm.kentry_free = me->next; + uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry); uvmexp.kmapent++; simple_unlock(&uvm.kentry_lock); splx(s); @@ -443,6 +1617,14 @@ uvm_mapent_alloc(struct vm_map *map, int flags) me->flags = 0; } + if (me != NULL) { + RB_LEFT(me, free_entry) = RB_RIGHT(me, free_entry) = + RB_PARENT(me, free_entry) = UVMMAP_DEADBEEF; + RB_LEFT(me, daddrs.addr_entry) = + RB_RIGHT(me, daddrs.addr_entry) = + RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF; + } + out: UVMHIST_LOG(maphist, "<- new entry=%p [kentry=%ld]", me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); @@ -454,7 +1636,6 @@ out: * * => XXX: static pool for kernel map? */ - void uvm_mapent_free(struct vm_map_entry *me) { @@ -463,10 +1644,16 @@ uvm_mapent_free(struct vm_map_entry *me) UVMHIST_LOG(maphist,"<- freeing map entry=%p [flags=%ld]", me, me->flags, 0, 0); + + if (RB_LEFT(me, free_entry) != UVMMAP_DEADBEEF || + RB_RIGHT(me, free_entry) != UVMMAP_DEADBEEF || + RB_PARENT(me, free_entry) != UVMMAP_DEADBEEF) + panic("uvm_mapent_free: mapent %p still in free list\n", me); + if (me->flags & UVM_MAP_STATIC) { s = splvm(); simple_lock(&uvm.kentry_lock); - me->next = uvm.kentry_free; + RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free; uvm.kentry_free = me; uvmexp.kmapent--; simple_unlock(&uvm.kentry_lock); @@ -481,1838 +1668,2000 @@ uvm_mapent_free(struct vm_map_entry *me) } /* - * uvm_mapent_copy: copy a map entry, preserving flags + * uvm_map_lookup_entry: find map entry at or before an address. + * + * => map must at least be read-locked by caller + * => entry is returned in "entry" + * => return value is true if address is in the returned entry + * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is + * returned for those mappings. */ - -void -uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) +boolean_t +uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, + struct vm_map_entry **entry) { - memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - - ((char *)src)); + *entry = uvm_map_entrybyaddr(&map->addr, address); + return *entry != NULL && !UVM_ET_ISHOLE(*entry) && + (*entry)->start <= address && (*entry)->end > address; } /* - * uvm_map_entry_unwire: unwire a map entry - * - * => map should be locked by caller + * uvm_map_pie: return a random load address for a PIE executable + * properly aligned. */ -void -uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) +#ifndef VM_PIE_MAX_ADDR +#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) +#endif + +#ifndef VM_PIE_MIN_ADDR +#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS +#endif + +#ifndef VM_PIE_MIN_ALIGN +#define VM_PIE_MIN_ALIGN PAGE_SIZE +#endif + +vaddr_t +uvm_map_pie(vaddr_t align) { + vaddr_t addr, space, min; - entry->wired_count = 0; - uvm_fault_unwire_locked(map, entry->start, entry->end); -} + align = MAX(align, VM_PIE_MIN_ALIGN); + /* round up to next alignment */ + min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); + + if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) + return (align); + + space = (VM_PIE_MAX_ADDR - min) / align; + space = MIN(space, (u_int32_t)-1); + + addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; + addr += min; + + return (addr); +} -/* - * wrapper for calling amap_ref() - */ void -uvm_map_reference_amap(struct vm_map_entry *entry, int flags) +uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) { - amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, - (entry->end - entry->start) >> PAGE_SHIFT, flags); -} + struct uvm_map_deadq dead; + + KASSERT((start & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0); + TAILQ_INIT(&dead); + vm_map_lock(map); + uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); + vm_map_unlock(map); + uvm_unmap_detach(&dead, 0); +} /* - * wrapper for calling amap_unref() + * Mark entry as free. + * + * entry will be put on the dead list. + * The free space will be merged into the previous or a new entry, + * unless markfree is false. */ void -uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) +uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, + struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, + boolean_t markfree) { - amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, - (entry->end - entry->start) >> PAGE_SHIFT, flags); -} + struct uvm_map_free *free; + struct vm_map_entry *prev; + vaddr_t addr; /* Start of freed range. */ + vaddr_t end; /* End of freed range. */ + + prev = *prev_ptr; + if (prev == entry) + *prev_ptr = prev = NULL; + + if (prev == NULL || + FREE_END(prev) != entry->start) + prev = RB_PREV(uvm_map_addr, &map->addr, entry); + /* + * Entry is describing only free memory and has nothing to drain into. + */ + if (prev == NULL && entry->start == entry->end && markfree) { + *prev_ptr = entry; + return; + } + addr = entry->start; + end = FREE_END(entry); + free = UVM_FREE(map, FREE_START(entry)); + if (entry->fspace > 0 && free) + uvm_mapent_free_remove(map, free, entry); + uvm_mapent_addr_remove(map, entry); + DEAD_ENTRY_PUSH(dead, entry); + + if (markfree) + *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); +} /* - * uvm_map_init: init mapping system at boot time. note that we allocate - * and init the static pool of structs vm_map_entry for the kernel here. + * Remove all entries from start to end. + * + * If remove_holes, then remove ET_HOLE entries as well. + * If markfree, entry will be properly marked free, otherwise, no replacement + * entry will be put in the tree (corrupting the tree). */ - void -uvm_map_init(void) +uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, + struct uvm_map_deadq *dead, boolean_t remove_holes, + boolean_t markfree) { - static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; -#if defined(UVMHIST) - static struct uvm_history_ent maphistbuf[100]; - static struct uvm_history_ent pdhistbuf[100]; -#endif - int lcv; + struct vm_map_entry *prev_hint, *next, *entry; - /* - * first, init logging system. - */ + VM_MAP_RANGE_CHECK(map, start, end); + if (start == end) + return; - UVMHIST_FUNC("uvm_map_init"); - UVMHIST_INIT_STATIC(maphist, maphistbuf); - UVMHIST_INIT_STATIC(pdhist, pdhistbuf); - UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); - UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0, - "# uvm_map() successful calls", 0); - UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0); - UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward", - 0); - UVMCNT_INIT(map_nousermerge, UVMCNT_CNT, 0, "# back merges skipped", 0); - UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0); - UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0); + if ((map->flags & VM_MAP_INTRSAFE) == 0) + splassert(IPL_NONE); + else + splassert(IPL_VM); /* - * now set up static pool of kernel map entries ... + * Find first affected entry. */ + entry = uvm_map_entrybyaddr(&map->addr, start); + KDASSERT(entry != NULL && entry->start <= start); + if (entry->end <= start && markfree) + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + else + UVM_MAP_CLIP_START(map, entry, start); - simple_lock_init(&uvm.kentry_lock); - uvm.kentry_free = NULL; - for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { - kernel_map_entry[lcv].next = uvm.kentry_free; - uvm.kentry_free = &kernel_map_entry[lcv]; - } + DPRINTF(("uvm_unmap_p: 0x%lx-0x%lx\n" + "\tfirst 0x%lx-0x%lx\n", + start, end, + entry->start, entry->end)); /* - * initialize the map-related pools. + * Iterate entries until we reach end address. + * prev_hint hints where the freed space can be appended to. */ - pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), - 0, 0, 0, "vmsppl", &pool_allocator_nointr); - pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), - 0, 0, 0, "vmmpepl", &pool_allocator_nointr); - pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), - 0, 0, 0, "vmmpekpl", NULL); - pool_sethiwat(&uvm_map_entry_pool, 8192); -} - -/* - * clippers - */ - -/* - * uvm_map_clip_start: ensure that the entry begins at or after - * the starting address, if it doesn't we split the entry. - * - * => caller should use UVM_MAP_CLIP_START macro rather than calling - * this directly - * => map must be locked by caller - */ - -void -uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, - vaddr_t start) -{ - struct vm_map_entry *new_entry; - vaddr_t new_adj; + prev_hint = NULL; + for (; entry != NULL && entry->start < end; entry = next) { + KDASSERT(entry->start >= start); + if (entry->end > end || !markfree) + UVM_MAP_CLIP_END(map, entry, end); + KDASSERT(entry->start >= start && entry->end <= end); + next = RB_NEXT(uvm_map_addr, &map->addr, entry); + DPRINTF(("\tunmap 0x%lx-0x%lx used 0x%lx-0x%lx free\n", + entry->start, entry->end, + FREE_START(entry), FREE_END(entry))); - /* uvm_map_simplify_entry(map, entry); */ /* XXX */ + /* + * Unwire removed map entry. + */ + if (VM_MAPENT_ISWIRED(entry)) { + entry->wired_count = 0; + uvm_fault_unwire_locked(map, entry->start, entry->end); + } - uvm_tree_sanity(map, "clip_start entry"); + /* + * Entry-type specific code. + */ + if (UVM_ET_ISHOLE(entry)) { + /* + * Skip holes unless remove_holes. + */ + if (!remove_holes) { + prev_hint = entry; + continue; + } + } else if (map->flags & VM_MAP_INTRSAFE) { + KASSERT(vm_map_pmap(map) == pmap_kernel()); + uvm_km_pgremove_intrsafe(entry->start, entry->end); + pmap_kremove(entry->start, entry->end - entry->start); + } else if (UVM_ET_ISOBJ(entry) && + UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { + KASSERT(vm_map_pmap(map) == pmap_kernel()); - /* - * Split off the front portion. note that we must insert the new - * entry BEFORE this one, so that this entry has the specified - * starting address. - */ + /* + * Note: kernel object mappings are currently used in + * two ways: + * [1] "normal" mappings of pages in the kernel object + * [2] uvm_km_valloc'd allocations in which we + * pmap_enter in some non-kernel-object page + * (e.g. vmapbuf). + * + * for case [1], we need to remove the mapping from + * the pmap and then remove the page from the kernel + * object (because, once pages in a kernel object are + * unmapped they are no longer needed, unlike, say, + * a vnode where you might want the data to persist + * until flushed out of a queue). + * + * for case [2], we need to remove the mapping from + * the pmap. there shouldn't be any pages at the + * specified offset in the kernel object [but it + * doesn't hurt to call uvm_km_pgremove just to be + * safe?] + * + * uvm_km_pgremove currently does the following: + * for pages in the kernel object range: + * - drops the swap slot + * - uvm_pagefree the page + * + * note there is version of uvm_km_pgremove() that + * is used for "intrsafe" objects. + */ - new_entry = uvm_mapent_alloc(map, 0); - uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ + /* + * remove mappings from pmap and drop the pages + * from the object. offsets are always relative + * to vm_map_min(kernel_map). + */ + pmap_remove(pmap_kernel(), entry->start, entry->end); + uvm_km_pgremove(entry->object.uvm_obj, + entry->start - vm_map_min(kernel_map), + entry->end - vm_map_min(kernel_map)); - new_entry->end = start; - new_adj = start - new_entry->start; - if (entry->object.uvm_obj) - entry->offset += new_adj; /* shift start over */ + /* + * null out kernel_object reference, we've just + * dropped it + */ + entry->etype &= ~UVM_ET_OBJ; + entry->object.uvm_obj = NULL; /* to be safe */ + } else { + /* + * remove mappings the standard way. + */ + pmap_remove(map->pmap, entry->start, entry->end); + } - /* Does not change order for the RB tree */ - entry->start = start; + /* + * Update space usage. + */ + if ((map->flags & VM_MAP_ISVMSPACE) && + entry->object.uvm_obj == NULL && + !UVM_ET_ISHOLE(entry)) { + ((struct vmspace *)map)->vm_dused -= + uvmspace_dused(map, entry->start, entry->end); + } + if (!UVM_ET_ISHOLE(entry)) + map->size -= entry->end - entry->start; - if (new_entry->aref.ar_amap) { - amap_splitref(&new_entry->aref, &entry->aref, new_adj); + /* + * Actual removal of entry. + */ + uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); } - uvm_map_entry_link(map, entry->prev, new_entry); + pmap_update(vm_map_pmap(map)); + + DPRINTF(("uvm_unmap_p: 0x%lx-0x%lx map=%p\n", start, end, + map)); - if (UVM_ET_ISSUBMAP(entry)) { - /* ... unlikely to happen, but play it safe */ - uvm_map_reference(new_entry->object.sub_map); +#ifdef DEBUG + if (markfree) { + for (entry = uvm_map_entrybyaddr(&map->addr, start); + entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + KDASSERT(entry->end <= start || + entry->start == entry->end || + UVM_ET_ISHOLE(entry)); + } } else { - if (UVM_ET_ISOBJ(entry) && - entry->object.uvm_obj->pgops && - entry->object.uvm_obj->pgops->pgo_reference) - entry->object.uvm_obj->pgops->pgo_reference( - entry->object.uvm_obj); + vaddr_t a; + for (a = start; a < end; a += PAGE_SIZE) + KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); } - - uvm_tree_sanity(map, "clip_start leave"); +#endif } /* - * uvm_map_clip_end: ensure that the entry ends at or before - * the ending address, if it doesn't we split the reference - * - * => caller should use UVM_MAP_CLIP_END macro rather than calling - * this directly - * => map must be locked by caller + * Mark all entries from first until end (exclusive) as pageable. + * + * Lock must be exclusive on entry and will not be touched. */ - void -uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) +uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, + struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) { - struct vm_map_entry *new_entry; - vaddr_t new_adj; /* #bytes we move start forward */ + struct vm_map_entry *iter; - uvm_tree_sanity(map, "clip_end entry"); - /* - * Create a new entry and insert it - * AFTER the specified entry - */ - - new_entry = uvm_mapent_alloc(map, 0); - uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ - - new_entry->start = entry->end = end; - new_adj = end - entry->start; - if (new_entry->object.uvm_obj) - new_entry->offset += new_adj; - - if (entry->aref.ar_amap) - amap_splitref(&entry->aref, &new_entry->aref, new_adj); - - uvm_rb_fixup(map, entry); - - uvm_map_entry_link(map, entry, new_entry); + for (iter = first; iter != end; + iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { + KDASSERT(iter->start >= start_addr && iter->end <= end_addr); + if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) + continue; - if (UVM_ET_ISSUBMAP(entry)) { - /* ... unlikely to happen, but play it safe */ - uvm_map_reference(new_entry->object.sub_map); - } else { - if (UVM_ET_ISOBJ(entry) && - entry->object.uvm_obj->pgops && - entry->object.uvm_obj->pgops->pgo_reference) - entry->object.uvm_obj->pgops->pgo_reference( - entry->object.uvm_obj); + iter->wired_count = 0; + uvm_fault_unwire_locked(map, iter->start, iter->end); } - uvm_tree_sanity(map, "clip_end leave"); } - -/* - * M A P - m a i n e n t r y p o i n t - */ /* - * uvm_map: establish a valid mapping in a map + * Mark all entries from first until end (exclusive) as wired. * - * => assume startp is page aligned. - * => assume size is a multiple of PAGE_SIZE. - * => assume sys_mmap provides enough of a "hint" to have us skip - * over text/data/bss area. - * => map must be unlocked (we will lock it) - * => <uobj,uoffset> value meanings (4 cases): - * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER - * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER - * [3] <uobj,uoffset> == normal mapping - * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA - * - * case [4] is for kernel mappings where we don't know the offset until - * we've found a virtual address. note that kernel object offsets are - * always relative to vm_map_min(kernel_map). - * - * => if `align' is non-zero, we try to align the virtual address to - * the specified alignment. this is only a hint; if we can't - * do it, the address will be unaligned. this is provided as - * a mechanism for large pages. - * - * => XXXCDC: need way to map in external amap? + * Lockflags determines the lock state on return from this function. + * Lock must be exclusive on entry. */ - int -uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size, - struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, - struct proc *p) +uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, + struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, + int lockflags) { - struct vm_map_entry *prev_entry, *new_entry; -#ifdef KVA_GUARDPAGES - struct vm_map_entry *guard_entry; + struct vm_map_entry *iter; +#ifdef DIAGNOSTIC + unsigned int timestamp_save; #endif - vm_prot_t prot = UVM_PROTECTION(flags), maxprot = - UVM_MAXPROTECTION(flags); - vm_inherit_t inherit = UVM_INHERIT(flags); - int advice = UVM_ADVICE(flags); int error; - UVMHIST_FUNC("uvm_map"); - UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist, "(map=%p, *startp=0x%lx, size=%ld, flags=0x%lx)", - map, *startp, size, flags); - UVMHIST_LOG(maphist, " uobj/offset %p/%ld", uobj, (u_long)uoffset,0,0); + /* + * Wire pages in two passes: + * + * 1: holding the write lock, we create any anonymous maps that need + * to be created. then we clip each map entry to the region to + * be wired and increment its wiring count. + * + * 2: we downgrade to a read lock, and call uvm_fault_wire to fault + * in the pages for any newly wired area (wired_count == 1). + * + * downgrading to a read lock for uvm_fault_wire avoids a possible + * deadlock with another thread that may have faulted on one of + * the pages to be wired (it would mark the page busy, blocking + * us, then in turn block on the map lock that we hold). + * because we keep the read lock on the map, the copy-on-write + * status of the entries we modify here cannot change. + */ + for (iter = first; iter != end; + iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { + KDASSERT(iter->start >= start_addr && iter->end <= end_addr); + if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) + continue; + + /* + * Perform actions of vm_map_lookup that need the write lock. + * - create an anonymous map for copy-on-write + * - anonymous map for zero-fill + * Skip submaps. + */ + if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && + UVM_ET_ISNEEDSCOPY(iter) && + ((iter->protection & VM_PROT_WRITE) || + iter->object.uvm_obj == NULL)) { + amap_copy(map, iter, M_WAITOK, TRUE, + iter->start, iter->end); + } + iter->wired_count++; + } /* - * Holes are incompatible with other types of mappings. + * Pass 2. */ - if (flags & UVM_FLAG_HOLE) { - KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) != 0 && - (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); +#ifdef DIAGNOSTIC + timestamp_save = map->timestamp; +#endif + vm_map_busy(map); + vm_map_downgrade(map); + + error = 0; + for (iter = first; error == 0 && iter != end; + iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { + if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) + continue; + + error = uvm_fault_wire(map, iter->start, iter->end, + iter->protection); } -#ifdef KVA_GUARDPAGES - if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { + if (error) { /* - * kva_guardstart is initialized to the start of the kernelmap - * and cycles through the kva space. - * This way we should have a long time between re-use of kva. + * uvm_fault_wire failure + * + * Reacquire lock and undo our work. */ - static vaddr_t kva_guardstart = 0; - if (kva_guardstart == 0) { - kva_guardstart = vm_map_min(map); - printf("uvm_map: kva guard pages enabled: %p\n", - kva_guardstart); - } - size += PAGE_SIZE; /* Add guard page at the end. */ + vm_map_upgrade(map); + vm_map_unbusy(map); +#ifdef DIAGNOSTIC + if (timestamp_save != map->timestamp) + panic("uvm_map_pageable_wire: stale map"); +#endif + /* - * Try to fully exhaust kva prior to wrap-around. - * (This may eat your ram!) + * first is no longer needed to restart loops. + * Use it as iterator to unmap successful mappings. */ - if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) { - static int wrap_counter = 0; - printf("uvm_map: kva guard page wrap-around %d\n", - ++wrap_counter); - kva_guardstart = vm_map_min(map); + for (; first != iter; + first = RB_NEXT(uvm_map_addr, &map->addr, first)) { + if (UVM_ET_ISHOLE(first) || first->start == first->end) + continue; + + first->wired_count--; + if (!VM_MAPENT_ISWIRED(first)) { + uvm_fault_unwire_locked(map, + iter->start, iter->end); + } } - *startp = kva_guardstart; + /* - * Prepare for next round. + * decrease counter in the rest of the entries */ - kva_guardstart += size; - } -#endif + for (; iter != end; + iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { + if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) + continue; - uvm_tree_sanity(map, "map entry"); + iter->wired_count--; + } - if ((map->flags & VM_MAP_INTRSAFE) == 0) - splassert(IPL_NONE); - else - splassert(IPL_VM); + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); + return error; + } /* - * step 0: sanity check of protection code + * We are currently holding a read lock. */ - - if ((prot & maxprot) != prot) { - UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%lx, max=0x%lx", - prot, maxprot,0,0); - return (EACCES); + if ((lockflags & UVM_LK_EXIT) == 0) { + vm_map_unbusy(map); + vm_map_unlock_read(map); + } else { + vm_map_upgrade(map); + vm_map_unbusy(map); +#ifdef DIAGNOSTIC + if (timestamp_save != map->timestamp) + panic("uvm_map_pageable_wire: stale map"); +#endif } + return 0; +} - /* - * step 1: figure out where to put new VM range - */ +/* + * uvm_map_pageable: set pageability of a range in a map. + * + * Flags: + * UVM_LK_ENTER: map is already locked by caller + * UVM_LK_EXIT: don't unlock map on exit + * + * The full range must be in use (entries may not have fspace != 0). + * UVM_ET_HOLE counts as unmapped. + */ +int +uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, + boolean_t new_pageable, int lockflags) +{ + struct vm_map_entry *first, *last, *tmp; + int error; - if (vm_map_lock_try(map) == FALSE) { - if (flags & UVM_FLAG_TRYLOCK) - return (EFAULT); - vm_map_lock(map); /* could sleep here */ - } - if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, - uobj, uoffset, align, flags)) == NULL) { - UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0); - vm_map_unlock(map); - return (ENOMEM); - } + KASSERT(map->flags & VM_MAP_PAGEABLE); + if ((lockflags & UVM_LK_ENTER) == 0) + vm_map_lock(map); -#ifdef PMAP_GROWKERNEL - { + VM_MAP_RANGE_CHECK(map, start, end); + + /* + * Find first entry. + * + * Initial test on start is different, because of the different + * error returned. Rest is tested further down. + */ + first = uvm_map_entrybyaddr(&map->addr, start); + if (first->end <= start || UVM_ET_ISHOLE(first)) { /* - * If the kernel pmap can't map the requested space, - * then allocate more resources for it. + * XXX if the first address is not mapped, it is EFAULT? */ - if (map == kernel_map && !(flags & UVM_FLAG_FIXED) && - uvm_maxkaddr < (*startp + size)) - uvm_maxkaddr = pmap_growkernel(*startp + size); + error = EFAULT; + goto out; } -#endif - - UVMCNT_INCR(uvm_map_call); /* - * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER - * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in - * either case we want to zero it before storing it in the map entry - * (because it looks strange and confusing when debugging...) - * - * if uobj is not null - * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping - * and we do not need to change uoffset. - * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset - * now (based on the starting address of the map). this case is - * for kernel object mappings where we don't know the offset until - * the virtual address is found (with uvm_map_findspace). the - * offset is the distance we are from the start of the map. + * Check that the range has no holes. */ - - if (uobj == NULL) { - uoffset = 0; - } else { - if (uoffset == UVM_UNKNOWN_OFFSET) { - KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); - uoffset = *startp - vm_map_min(kernel_map); + for (last = first; last != NULL && last->start < end; + last = RB_NEXT(uvm_map_addr, &map->addr, last)) { + if (UVM_ET_ISHOLE(last) || + (last->end < end && FREE_END(last) != last->end)) { + /* + * XXX unmapped memory in range, why is it EINVAL + * instead of EFAULT? + */ + error = EINVAL; + goto out; } } /* - * step 2: try and insert in map by extending previous entry, if - * possible - * XXX: we don't try and pull back the next entry. might be useful - * for a stack, but we are currently allocating our stack in advance. + * Last ended at the first entry after the range. + * Move back one step. + * + * Note that last may be NULL. */ + if (last == NULL) { + last = RB_MAX(uvm_map_addr, &map->addr); + if (last->end < end) { + error = EINVAL; + goto out; + } + } else + last = RB_PREV(uvm_map_addr, &map->addr, last); - if ((flags & UVM_FLAG_NOMERGE) == 0 && - prev_entry->end == *startp && prev_entry != &map->header && - prev_entry->object.uvm_obj == uobj) { - - if (uobj && prev_entry->offset + - (prev_entry->end - prev_entry->start) != uoffset) - goto step3; - - if (UVM_ET_ISSUBMAP(prev_entry)) - goto step3; - - if (prev_entry->protection != prot || - prev_entry->max_protection != maxprot) - goto step3; - - if (prev_entry->inheritance != inherit || - prev_entry->advice != advice) - goto step3; - - /* wiring status must match (new area is unwired) */ - if (VM_MAPENT_ISWIRED(prev_entry)) - goto step3; - + /* + * Wire/unwire pages here. + */ + if (new_pageable) { /* - * can't extend a shared amap. note: no need to lock amap to - * look at refs since we don't care about its exact value. - * if it is one (i.e. we have only reference) it will stay there + * Mark pageable. + * entries that are not wired are untouched. */ - - if (prev_entry->aref.ar_amap && - amap_refs(prev_entry->aref.ar_amap) != 1) { - goto step3; - } - + if (VM_MAPENT_ISWIRED(first)) + UVM_MAP_CLIP_START(map, first, start); /* - * Only merge kernel mappings, but keep track - * of how much we skipped. + * Split last at end. + * Make tmp be the first entry after what is to be touched. + * If last is not wired, don't touch it. */ - if (map != kernel_map && map != kmem_map) { - UVMCNT_INCR(map_nousermerge); - goto step3; - } + if (VM_MAPENT_ISWIRED(last)) { + UVM_MAP_CLIP_END(map, last, end); + tmp = RB_NEXT(uvm_map_addr, &map->addr, last); + } else + tmp = last; - if (prev_entry->aref.ar_amap) { - error = amap_extend(prev_entry, size); - if (error) - goto step3; - } - - UVMCNT_INCR(map_backmerge); - UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); + uvm_map_pageable_pgon(map, first, tmp, start, end); + error = 0; +out: + if ((lockflags & UVM_LK_EXIT) == 0) + vm_map_unlock(map); + return error; + } else { + /* + * Mark entries wired. + * entries are always touched (because recovery needs this). + */ + if (!VM_MAPENT_ISWIRED(first)) + UVM_MAP_CLIP_START(map, first, start); /* - * drop our reference to uobj since we are extending a reference - * that we already have (the ref count can not drop to zero). + * Split last at end. + * Make tmp be the first entry after what is to be touched. + * If last is not wired, don't touch it. */ + if (!VM_MAPENT_ISWIRED(last)) { + UVM_MAP_CLIP_END(map, last, end); + tmp = RB_NEXT(uvm_map_addr, &map->addr, last); + } else + tmp = last; + + return uvm_map_pageable_wire(map, first, tmp, start, end, + lockflags); + } +} - if (uobj && uobj->pgops->pgo_detach) - uobj->pgops->pgo_detach(uobj); +/* + * uvm_map_pageable_all: special case of uvm_map_pageable - affects + * all mapped regions. + * + * Map must not be locked. + * If no flags are specified, all ragions are unwired. + */ +int +uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) +{ + vsize_t size; + struct vm_map_entry *iter; - prev_entry->end += size; - uvm_rb_fixup(map, prev_entry); - map->size += size; - if (p && uobj == NULL) - p->p_vmspace->vm_dused += atop(size); + KASSERT(map->flags & VM_MAP_PAGEABLE); + vm_map_lock(map); - uvm_tree_sanity(map, "map leave 2"); + if (flags == 0) { + uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr), + NULL, map->min_offset, map->max_offset); - UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); + atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE); vm_map_unlock(map); - return (0); + return 0; + } + if (flags & MCL_FUTURE) + atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE); + if (!(flags & MCL_CURRENT)) { + vm_map_unlock(map); + return 0; } -step3: - UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); /* - * check for possible forward merge (which we don't do) and count - * the number of times we missed a *possible* chance to merge more + * Count number of pages in all non-wired entries. + * If the number exceeds the limit, abort. */ + size = 0; + RB_FOREACH(iter, uvm_map_addr, &map->addr) { + if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) + continue; + + size += iter->end - iter->start; + } - if ((flags & UVM_FLAG_NOMERGE) == 0 && - prev_entry->next != &map->header && - prev_entry->next->start == (*startp + size)) - UVMCNT_INCR(map_forwmerge); + if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { + vm_map_unlock(map); + return ENOMEM; + } + + /* XXX non-pmap_wired_count case must be handled by caller */ +#ifdef pmap_wired_count + if (limit != 0 && + size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { + vm_map_unlock(map); + return ENOMEM; + } +#endif /* - * step 3: allocate new entry and link it in + * uvm_map_pageable_wire will release lcok */ + return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr), + NULL, map->min_offset, map->max_offset, 0); +} -#ifdef KVA_GUARDPAGES - if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) - size -= PAGE_SIZE; -#endif +/* + * Initialize map. + * + * Allocates sufficient entries to describe the free memory in the map. + */ +void +uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) +{ + KASSERT((min & PAGE_MASK) == 0); + KASSERT((max & PAGE_MASK) == 0 || (max & PAGE_MASK) == PAGE_MASK); - new_entry = uvm_mapent_alloc(map, flags); - if (new_entry == NULL) { - vm_map_unlock(map); - return (ENOMEM); + /* + * Update parameters. + * + * This code handles (vaddr_t)-1 and other page mask ending addresses + * properly. + * We lose the top page if the full virtual address space is used. + */ + if (max & PAGE_MASK) { + max += 1; + if (max == 0) /* overflow */ + max -= PAGE_SIZE; } - new_entry->start = *startp; - new_entry->end = new_entry->start + size; - new_entry->object.uvm_obj = uobj; - new_entry->offset = uoffset; - if (uobj) - new_entry->etype = UVM_ET_OBJ; - else - new_entry->etype = 0; + RB_INIT(&map->addr); + RB_INIT(&map->free.tree); + map->free.treesz = 0; + RB_INIT(&map->bfree.tree); + map->bfree.treesz = 0; - if (flags & UVM_FLAG_COPYONW) { - new_entry->etype |= UVM_ET_COPYONWRITE; - if ((flags & UVM_FLAG_OVERLAY) == 0) - new_entry->etype |= UVM_ET_NEEDSCOPY; - } - if (flags & UVM_FLAG_HOLE) - new_entry->etype |= UVM_ET_HOLE; + map->size = 0; + map->ref_count = 1; + map->min_offset = min; + map->max_offset = max; + map->b_start = map->b_end = 0; /* Empty brk() area by default. */ + map->s_start = map->s_end = 0; /* Empty stack area by default. */ + map->flags = flags; + map->timestamp = 0; + rw_init(&map->lock, "vmmaplk"); + simple_lock_init(&map->ref_lock); - new_entry->protection = prot; - new_entry->max_protection = maxprot; - new_entry->inheritance = inherit; - new_entry->wired_count = 0; - new_entry->advice = advice; - if (flags & UVM_FLAG_OVERLAY) { - /* - * to_add: for BSS we overallocate a little since we - * are likely to extend - */ - vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? - UVM_AMAP_CHUNK << PAGE_SHIFT : 0; - struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK); - new_entry->aref.ar_pageoff = 0; - new_entry->aref.ar_amap = amap; - } else { - new_entry->aref.ar_pageoff = 0; - new_entry->aref.ar_amap = NULL; + /* + * Fill map entries. + * This requires a write-locked map (because of diagnostic assertions + * in insert code). + */ + if ((map->flags & VM_MAP_INTRSAFE) == 0) { + if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0) + panic("uvm_map_setup: rw_enter failed on new map"); } + uvm_map_setup_entries(map); + uvm_tree_sanity(map, __FILE__, __LINE__); + if ((map->flags & VM_MAP_INTRSAFE) == 0) + rw_exit(&map->lock); +} - uvm_map_entry_link(map, prev_entry, new_entry); +/* + * Populate map with free-memory entries. + * + * Map must be initialized and empty. + */ +void +uvm_map_setup_entries(struct vm_map *map) +{ + KDASSERT(RB_EMPTY(&map->addr)); + KDASSERT(RB_EMPTY(&map->free.tree) && map->free.treesz == 0); + KDASSERT(RB_EMPTY(&map->bfree.tree) && map->bfree.treesz == 0); - map->size += size; - if (p && uobj == NULL) - p->p_vmspace->vm_dused += atop(size); + uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); +} +/* + * Split entry at given address. + * + * orig: entry that is to be split. + * next: a newly allocated map entry that is not linked. + * split: address at which the split is done. + */ +void +uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, + struct vm_map_entry *next, vaddr_t split) +{ + struct uvm_map_free *free; + vsize_t adj; - /* - * Update the free space hint - */ + KDASSERT(map != NULL && orig != NULL && next != NULL); + uvm_tree_sanity(map, __FILE__, __LINE__); + KASSERT(orig->start < split && FREE_END(orig) > split); - if ((map->first_free == prev_entry) && - (prev_entry->end >= new_entry->start)) - map->first_free = new_entry; + adj = split - orig->start; + free = UVM_FREE(map, FREE_START(orig)); + KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig); + KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next); + KDASSERT(orig->fspace == 0 || free == NULL || + RB_FIND(uvm_map_free_int, &free->tree, orig) == orig); -#ifdef KVA_GUARDPAGES /* - * Create the guard entry. + * Free space will change, unlink from free space tree. */ - if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { - guard_entry = uvm_mapent_alloc(map, flags); - if (guard_entry != NULL { - guard_entry->start = new_entry->end; - guard_entry->end = guard_entry->start + PAGE_SIZE; - guard_entry->object.uvm_obj = uobj; - guard_entry->offset = uoffset; - guard_entry->etype = MAP_ET_KVAGUARD; - guard_entry->protection = prot; - guard_entry->max_protection = maxprot; - guard_entry->inheritance = inherit; - guard_entry->wired_count = 0; - guard_entry->advice = advice; - guard_entry->aref.ar_pageoff = 0; - guard_entry->aref.ar_amap = NULL; - uvm_map_entry_link(map, new_entry, guard_entry); - map->size += PAGE_SIZE; - kva_guardpages++; + if (orig->fspace > 0 && free) + uvm_mapent_free_remove(map, free, orig); + + uvm_mapent_copy(orig, next); + if (split >= orig->end) { + next->etype = 0; + next->offset = 0; + next->wired_count = 0; + next->start = next->end = split; + next->guard = 0; + next->fspace = FREE_END(orig) - split; + next->aref.ar_amap = NULL; + next->aref.ar_pageoff = 0; + orig->guard = MIN(orig->guard, split - orig->end); + orig->fspace = split - FREE_START(orig); + } else { + orig->fspace = 0; + orig->guard = 0; + orig->end = next->start = split; + + if (next->aref.ar_amap) + amap_splitref(&orig->aref, &next->aref, adj); + if (UVM_ET_ISSUBMAP(orig)) { + uvm_map_reference(next->object.sub_map); + next->offset += adj; + } else if (UVM_ET_ISOBJ(orig)) { + if (next->object.uvm_obj->pgops && + next->object.uvm_obj->pgops->pgo_reference) { + next->object.uvm_obj->pgops->pgo_reference( + next->object.uvm_obj); + } + next->offset += adj; } } -#endif - uvm_tree_sanity(map, "map leave"); + /* + * Link next into address tree. + * Link orig and next into free-space tree. + */ + uvm_mapent_addr_insert(map, next); + if (orig->fspace > 0 && free) + uvm_mapent_free_insert(map, free, orig); + if (next->fspace > 0 && free) + uvm_mapent_free_insert(map, free, next); - UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); - vm_map_unlock(map); - return (0); + uvm_tree_sanity(map, __FILE__, __LINE__); } -/* - * uvm_map_lookup_entry: find map entry at or before an address - * - * => map must at least be read-locked by caller - * => entry is returned in "entry" - * => return value is true if address is in the returned entry - */ -boolean_t -uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, - struct vm_map_entry **entry) +#ifdef DEBUG + +void +uvm_tree_assert(struct vm_map *map, int test, char *test_str, + char *file, int line) { - struct vm_map_entry *cur; - struct vm_map_entry *last; - int use_tree = 0; - UVMHIST_FUNC("uvm_map_lookup_entry"); - UVMHIST_CALLED(maphist); + char* map_special; - UVMHIST_LOG(maphist,"(map=%p,addr=0x%lx,ent=%p)", - map, address, entry, 0); + if (test) + return; - /* - * start looking either from the head of the - * list, or from the hint. - */ + if (map == kernel_map) + map_special = " (kernel_map)"; + else if (map == kmem_map) + map_special = " (kmem_map)"; + else + map_special = ""; + panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, + line, test_str); +} - simple_lock(&map->hint_lock); - cur = map->hint; - simple_unlock(&map->hint_lock); +/* + * Check that free space tree is sane. + */ +void +uvm_tree_sanity_free(struct vm_map *map, struct uvm_map_free *free, + char *file, int line) +{ + struct vm_map_entry *iter; + vsize_t space, sz; - if (cur == &map->header) - cur = cur->next; + space = PAGE_SIZE; + sz = 0; + RB_FOREACH(iter, uvm_map_free_int, &free->tree) { + sz++; - UVMCNT_INCR(uvm_mlk_call); - if (address >= cur->start) { - /* - * go from hint to end of list. - * - * but first, make a quick check to see if - * we are already looking at the entry we - * want (which is usually the case). - * note also that we don't need to save the hint - * here... it is the same hint (unless we are - * at the header, in which case the hint didn't - * buy us anything anyway). - */ - last = &map->header; - if ((cur != last) && (cur->end > address)) { - UVMCNT_INCR(uvm_mlk_hint); - *entry = cur; - UVMHIST_LOG(maphist,"<- got it via hint (%p)", - cur, 0, 0, 0); - return (TRUE); - } + UVM_ASSERT(map, iter->fspace >= space, file, line); + space = iter->fspace; - if (map->nentries > 30) - use_tree = 1; - } else { - /* - * go from start to hint, *inclusively* - */ - last = cur->next; - cur = map->header.next; - use_tree = 1; + UVM_ASSERT(map, RB_FIND(uvm_map_addr, &map->addr, iter) == iter, + file, line); } + UVM_ASSERT(map, free->treesz == sz, file, line); +} - uvm_tree_sanity(map, __func__); +/* + * Check that map is sane. + */ +void +uvm_tree_sanity(struct vm_map *map, char *file, int line) +{ + struct vm_map_entry *iter; + struct uvm_map_free *free; + vaddr_t addr; + vaddr_t min, max, bound; /* Bounds checker. */ - if (use_tree) { - struct vm_map_entry *prev = &map->header; - cur = RB_ROOT(&map->rbhead); + addr = vm_map_min(map); + RB_FOREACH(iter, uvm_map_addr, &map->addr) { + /* + * Valid start, end. + * Catch overflow for end+fspace. + */ + UVM_ASSERT(map, iter->end >= iter->start, file, line); + UVM_ASSERT(map, FREE_END(iter) >= iter->end, file, line); + /* + * May not be empty. + */ + UVM_ASSERT(map, iter->start < FREE_END(iter), file, line); /* - * Simple lookup in the tree. Happens when the hint is - * invalid, or nentries reach a threshold. + * Addresses for entry must lie within map boundaries. */ - while (cur) { - if (address >= cur->start) { - if (address < cur->end) { - *entry = cur; - SAVE_HINT(map, map->hint, cur); - return (TRUE); - } - prev = cur; - cur = RB_RIGHT(cur, rb_entry); - } else - cur = RB_LEFT(cur, rb_entry); - } - *entry = prev; - UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); - return (FALSE); - } + UVM_ASSERT(map, iter->start >= vm_map_min(map) && + FREE_END(iter) <= vm_map_max(map), file, line); - /* - * search linearly - */ + /* + * Tree may not have gaps. + */ + UVM_ASSERT(map, iter->start == addr, file, line); + addr = FREE_END(iter); - while (cur != last) { - if (cur->end > address) { - if (address >= cur->start) { - /* - * save this lookup for future - * hints, and return - */ + /* + * Free space may not cross boundaries, unless the same + * free list is used on both sides of the border. + */ + min = FREE_START(iter); + max = FREE_END(iter); + + while (min < max && + (bound = uvm_map_boundary(map, min, max)) != max) { + UVM_ASSERT(map, + UVM_FREE(map, min) == UVM_FREE(map, bound), + file, line); + min = bound; + } - *entry = cur; - SAVE_HINT(map, map->hint, cur); - UVMHIST_LOG(maphist,"<- search got it (%p)", - cur, 0, 0, 0); - return (TRUE); - } - break; + /* + * Entries with free space must appear in the free list. + */ + free = UVM_FREE(map, FREE_START(iter)); + if (iter->fspace > 0 && free) { + UVM_ASSERT(map, + RB_FIND(uvm_map_free_int, &free->tree, iter) == + iter, file, line); } - cur = cur->next; } + UVM_ASSERT(map, addr == vm_map_max(map), file, line); - *entry = cur->prev; - SAVE_HINT(map, map->hint, *entry); - UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); - return (FALSE); + uvm_tree_sanity_free(map, &map->free, file, line); + uvm_tree_sanity_free(map, &map->bfree, file, line); } -/* - * Checks if address pointed to by phint fits into the empty - * space before the vm_map_entry after. Takes alignment and - * offset into consideration. - */ - -int -uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length, - struct vm_map_entry *after, voff_t uoffset, vsize_t align) +void +uvm_tree_size_chk(struct vm_map *map, char *file, int line) { - vaddr_t hint = *phint; - vaddr_t end; + struct vm_map_entry *iter; + vsize_t size; -#ifdef PMAP_PREFER - /* - * push hint forward as needed to avoid VAC alias problems. - * we only do this if a valid offset is specified. - */ - if (uoffset != UVM_UNKNOWN_OFFSET) - hint = PMAP_PREFER(uoffset, hint); -#endif - if (align != 0) - if ((hint & (align - 1)) != 0) - hint = roundup(hint, align); - *phint = hint; + size = 0; + RB_FOREACH(iter, uvm_map_addr, &map->addr) { + if (!UVM_ET_ISHOLE(iter)) + size += iter->end - iter->start; + } + + if (map->size != size) + printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); + UVM_ASSERT(map, map->size == size, file, line); - end = hint + length; - if (end > map->max_offset || end < hint) - return (FALSE); - if (after != NULL && after != &map->header && after->start < end) - return (FALSE); - - return (TRUE); + vmspace_validate(map); } /* - * uvm_map_pie: return a random load address for a PIE executable - * properly aligned. + * This function validates the statistics on vmspace. */ - -#ifndef VM_PIE_MAX_ADDR -#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) -#endif - -#ifndef VM_PIE_MIN_ADDR -#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS -#endif - -#ifndef VM_PIE_MIN_ALIGN -#define VM_PIE_MIN_ALIGN PAGE_SIZE -#endif - -vaddr_t -uvm_map_pie(vaddr_t align) +void +vmspace_validate(struct vm_map *map) { - vaddr_t addr, space, min; + struct vmspace *vm; + struct vm_map_entry *iter; + vaddr_t imin, imax; + vaddr_t stack_begin, stack_end; /* Position of stack. */ + vsize_t stack, heap; /* Measured sizes. */ - align = MAX(align, VM_PIE_MIN_ALIGN); + if (!(map->flags & VM_MAP_ISVMSPACE)) + return; - /* round up to next alignment */ - min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); + vm = (struct vmspace *)map; + stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); + stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); - if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) - return (align); + stack = heap = 0; + RB_FOREACH(iter, uvm_map_addr, &map->addr) { + imin = imax = iter->start; - space = (VM_PIE_MAX_ADDR - min) / align; - space = MIN(space, (u_int32_t)-1); + if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) + continue; - addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; - addr += min; + /* + * Update stack, heap. + * Keep in mind that (theoretically) the entries of + * userspace and stack may be joined. + */ + while (imin != iter->end) { + /* + * Set imax to the first boundary crossed between + * imin and stack addresses. + */ + imax = iter->end; + if (imin < stack_begin && imax > stack_begin) + imax = stack_begin; + else if (imin < stack_end && imax > stack_end) + imax = stack_end; + + if (imin >= stack_begin && imin < stack_end) + stack += imax - imin; + else + heap += imax - imin; + imin = imax; + } + } - return (addr); + heap >>= PAGE_SHIFT; + if (heap != vm->vm_dused) { + printf("vmspace stack range: 0x%lx-0x%lx\n", + stack_begin, stack_end); + panic("vmspace_validate: vmspace.vm_dused invalid, " + "expected %ld pgs, got %ld pgs in map %p", + heap, vm->vm_dused, + map); + } } +#endif /* DEBUG */ + /* - * uvm_map_hint: return the beginning of the best area suitable for - * creating a new mapping with "prot" protection. + * uvm_map_init: init mapping system at boot time. note that we allocate + * and init the static pool of structs vm_map_entry for the kernel here. */ -vaddr_t -uvm_map_hint1(struct proc *p, vm_prot_t prot, int skipheap) +void +uvm_map_init(void) { - vaddr_t addr; + static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; +#if defined(UVMHIST) + static struct uvm_history_ent maphistbuf[100]; + static struct uvm_history_ent pdhistbuf[100]; +#endif + int lcv; -#ifdef __i386__ /* - * If executable skip first two pages, otherwise start - * after data + heap region. + * first, init logging system. */ - if ((prot & VM_PROT_EXECUTE) && - ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { - addr = (PAGE_SIZE*2) + - (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); - return (round_page(addr)); + + UVMHIST_FUNC("uvm_map_init"); + UVMHIST_INIT_STATIC(maphist, maphistbuf); + UVMHIST_INIT_STATIC(pdhist, pdhistbuf); + UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); + UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0, + "# uvm_map() successful calls", 0); + UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0); + UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward", + 0); + UVMCNT_INIT(map_nousermerge, UVMCNT_CNT, 0, "# back merges skipped", 0); + UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0); + UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0); + + /* + * now set up static pool of kernel map entries ... + */ + + simple_lock_init(&uvm.kentry_lock); + uvm.kentry_free = NULL; + for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { + RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) = + uvm.kentry_free; + uvm.kentry_free = &kernel_map_entry[lcv]; } -#endif - /* start malloc/mmap after the brk */ - addr = (vaddr_t)p->p_vmspace->vm_daddr; - if (skipheap) - addr += BRKSIZ; -#if !defined(__vax__) - addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1); -#endif - return (round_page(addr)); + + /* + * initialize the map-related pools. + */ + pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), + 0, 0, 0, "vmsppl", &pool_allocator_nointr); + pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), + 0, 0, 0, "vmmpepl", &pool_allocator_nointr); + pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), + 0, 0, 0, "vmmpekpl", NULL); + pool_sethiwat(&uvm_map_entry_pool, 8192); } +#if defined(DDB) + /* - * uvm_map_findspace: find "length" sized space in "map". - * - * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is - * set (in which case we insist on using "hint"). - * => "result" is VA returned - * => uobj/uoffset are to be used to handle VAC alignment, if required - * => if `align' is non-zero, we attempt to align to that value. - * => caller must at least have read-locked map - * => returns NULL on failure, or pointer to prev. map entry if success - * => note this is a cross between the old vm_map_findspace and vm_map_find + * DDB hooks */ -struct vm_map_entry * -uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, - vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align, - int flags) +/* + * uvm_map_printit: actually prints the map + */ +void +uvm_map_printit(struct vm_map *map, boolean_t full, + int (*pr)(const char *, ...)) { - struct vm_map_entry *entry, *next, *tmp; - struct vm_map_entry *child, *prev = NULL; - - vaddr_t end, orig_hint; - UVMHIST_FUNC("uvm_map_findspace"); - UVMHIST_CALLED(maphist); - - UVMHIST_LOG(maphist, "(map=%p, hint=0x%lx, len=%ld, flags=0x%lx)", - map, hint, length, flags); - KASSERT((align & (align - 1)) == 0); - KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); + struct vmspace *vm; + struct vm_map_entry *entry; + struct uvm_map_free *free; + int in_free; - uvm_tree_sanity(map, "map_findspace entry"); + (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); + (*pr)("\tbrk() allocate range: 0x%lx-0x%lx %ld segments\n", + map->b_start, map->b_end, uvm_mapfree_size(&map->bfree)); + (*pr)("\tstack allocate range: 0x%lx-0x%lx %ld segments\n", + map->s_start, map->s_end, uvm_mapfree_size(&map->bfree)); + (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", + map->size, map->ref_count, map->timestamp, + map->flags); +#ifdef pmap_resident_count + (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, + pmap_resident_count(map->pmap)); +#else + /* XXXCDC: this should be required ... */ + (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); +#endif /* - * remember the original hint. if we are aligning, then we - * may have to try again with no alignment constraint if - * we fail the first time. + * struct vmspace handling. */ + if (map->flags & VM_MAP_ISVMSPACE) { + vm = (struct vmspace *)map; + + (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", + vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); + (*pr)("\tvm_tsize=%u vm_dsize=%u\n", + vm->vm_tsize, vm->vm_dsize); + (*pr)("\tvm_taddr=%p vm_daddr=%p\n", + vm->vm_taddr, vm->vm_daddr); + (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", + vm->vm_maxsaddr, vm->vm_minsaddr); + } + + if (!full) + return; + RB_FOREACH(entry, uvm_map_addr, &map->addr) { + (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", + entry, entry->start, entry->end, entry->object.uvm_obj, + (long long)entry->offset, entry->aref.ar_amap, + entry->aref.ar_pageoff); + (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " + "wc=%d, adv=%d\n", + (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', + (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', + (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', + entry->protection, entry->max_protection, + entry->inheritance, entry->wired_count, entry->advice); + + free = UVM_FREE(map, FREE_START(entry)); + in_free = (free != NULL) && + (RB_FIND(uvm_map_free_int, &free->tree, entry) == entry); + (*pr)("\thole=%c, free=%c, guard=0x%lx, " + "free=0x%lx-0x%lx\n", + (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', + in_free ? 'T' : 'F', + entry->guard, + FREE_START(entry), FREE_END(entry)); + } +} + +/* + * uvm_object_printit: actually prints the object + */ +void +uvm_object_printit(uobj, full, pr) + struct uvm_object *uobj; + boolean_t full; + int (*pr)(const char *, ...); +{ + struct vm_page *pg; + int cnt = 0; + + (*pr)("OBJECT %p: pgops=%p, npages=%d, ", + uobj, uobj->pgops, uobj->uo_npages); + if (UVM_OBJ_IS_KERN_OBJECT(uobj)) + (*pr)("refs=<SYSTEM>\n"); + else + (*pr)("refs=%d\n", uobj->uo_refs); - orig_hint = hint; - if (hint < map->min_offset) { /* check ranges ... */ - if (flags & UVM_FLAG_FIXED) { - UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); - return(NULL); + if (!full) { + return; + } + (*pr)(" PAGES <pg,offset>:\n "); + RB_FOREACH(pg, uvm_objtree, &uobj->memt) { + (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); + if ((cnt % 3) == 2) { + (*pr)("\n "); } - hint = map->min_offset; + cnt++; } - if (hint > map->max_offset) { - UVMHIST_LOG(maphist,"<- VA 0x%lx > range [0x%lx->0x%lx]", - hint, map->min_offset, map->max_offset, 0); - return(NULL); + if ((cnt % 3) != 2) { + (*pr)("\n"); } +} - /* - * Look for the first possible address; if there's already - * something at this address, we have to start after it. - */ +/* + * uvm_page_printit: actually print the page + */ +static const char page_flagbits[] = + "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" + "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" + "\31PMAP1\32PMAP2\33PMAP3"; - if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) { - if ((entry = map->first_free) != &map->header) - hint = entry->end; - } else { - if (uvm_map_lookup_entry(map, hint, &tmp)) { - /* "hint" address already in use ... */ - if (flags & UVM_FLAG_FIXED) { - UVMHIST_LOG(maphist,"<- fixed & VA in use", - 0, 0, 0, 0); - return(NULL); +void +uvm_page_printit(pg, full, pr) + struct vm_page *pg; + boolean_t full; + int (*pr)(const char *, ...); +{ + struct vm_page *tpg; + struct uvm_object *uobj; + struct pglist *pgl; + + (*pr)("PAGE %p:\n", pg); + (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", + pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, + (long long)pg->phys_addr); + (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", + pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); +#if defined(UVM_PAGE_TRKOWN) + if (pg->pg_flags & PG_BUSY) + (*pr)(" owning process = %d, tag=%s\n", + pg->owner, pg->owner_tag); + else + (*pr)(" page not busy, no owner\n"); +#else + (*pr)(" [page ownership tracking disabled]\n"); +#endif + + if (!full) + return; + + /* cross-verify object/anon */ + if ((pg->pg_flags & PQ_FREE) == 0) { + if (pg->pg_flags & PQ_ANON) { + if (pg->uanon == NULL || pg->uanon->an_page != pg) + (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", + (pg->uanon) ? pg->uanon->an_page : NULL); + else + (*pr)(" anon backpointer is OK\n"); + } else { + uobj = pg->uobject; + if (uobj) { + (*pr)(" checking object list\n"); + RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { + if (tpg == pg) { + break; + } + } + if (tpg) + (*pr)(" page found on object list\n"); + else + (*pr)(" >>> PAGE NOT FOUND " + "ON OBJECT LIST! <<<\n"); } - hint = tmp->end; } - entry = tmp; } - if (flags & UVM_FLAG_FIXED) { - end = hint + length; - if (end > map->max_offset || end < hint) { - UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); - goto error; - } - next = entry->next; - if (next == &map->header || next->start >= end) - goto found; - UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0); - return(NULL); /* only one shot at it ... */ - } - - /* Try to find the space in the red-black tree */ - - /* Check slot before any entry */ - if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align)) - goto found; - - /* If there is not enough space in the whole tree, we fail */ - tmp = RB_ROOT(&map->rbhead); - if (tmp == NULL || tmp->space < length) - goto error; - - /* Find an entry close to hint that has enough space */ - for (; tmp;) { - if (tmp->end >= hint && - (prev == NULL || tmp->end < prev->end)) { - if (tmp->ownspace >= length) - prev = tmp; - else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL && - child->space >= length) - prev = tmp; - } - if (tmp->end < hint) - child = RB_RIGHT(tmp, rb_entry); - else if (tmp->end > hint) - child = RB_LEFT(tmp, rb_entry); - else { - if (tmp->ownspace >= length) + /* cross-verify page queue */ + if (pg->pg_flags & PQ_FREE) { + if (uvm_pmr_isfree(pg)) + (*pr)(" page found in uvm_pmemrange\n"); + else + (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); + pgl = NULL; + } else if (pg->pg_flags & PQ_INACTIVE) { + pgl = (pg->pg_flags & PQ_SWAPBACKED) ? + &uvm.page_inactive_swp : &uvm.page_inactive_obj; + } else if (pg->pg_flags & PQ_ACTIVE) { + pgl = &uvm.page_active; + } else { + pgl = NULL; + } + + if (pgl) { + (*pr)(" checking pageq list\n"); + TAILQ_FOREACH(tpg, pgl, pageq) { + if (tpg == pg) { break; - child = RB_RIGHT(tmp, rb_entry); + } } - if (child == NULL || child->space < length) - break; - tmp = child; + if (tpg) + (*pr)(" page found on pageq list\n"); + else + (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); } - - if (tmp != NULL && hint < tmp->end + tmp->ownspace) { - /* - * Check if the entry that we found satifies the - * space requirement - */ - if (hint < tmp->end) - hint = tmp->end; - if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, - align)) { - entry = tmp; - goto found; - } else if (tmp->ownspace >= length) - goto listsearch; - } - if (prev == NULL) - goto error; - - hint = prev->end; - if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset, - align)) { - entry = prev; - goto found; - } else if (prev->ownspace >= length) - goto listsearch; - - tmp = RB_RIGHT(prev, rb_entry); - for (;;) { - KASSERT(tmp && tmp->space >= length); - child = RB_LEFT(tmp, rb_entry); - if (child && child->space >= length) { - tmp = child; +} +#endif + +/* + * uvm_map_protect: change map protection + * + * => set_max means set max_protection. + * => map must be unlocked. + */ +int +uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, + vm_prot_t new_prot, boolean_t set_max) +{ + struct vm_map_entry *first, *iter; + vm_prot_t old_prot; + vm_prot_t mask; + int error; + + VM_MAP_RANGE_CHECK(map, start, end); + if (start == end) + return 0; + + error = 0; + vm_map_lock(map); + + /* + * Set up first and last. + * - first will contain first entry at or after start. + */ + first = uvm_map_entrybyaddr(&map->addr, start); + KDASSERT(first != NULL); + if (first->end < start) + first = RB_NEXT(uvm_map_addr, &map->addr, first); + + /* + * First, check for protection violations. + */ + for (iter = first; iter != NULL && iter->start < end; + iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { + /* Treat memory holes as free space. */ + if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) continue; + + if (UVM_ET_ISSUBMAP(iter)) { + error = EINVAL; + goto out; + } + if ((new_prot & iter->max_protection) != new_prot) { + error = EACCES; + goto out; } - if (tmp->ownspace >= length) - break; - tmp = RB_RIGHT(tmp, rb_entry); - } - - hint = tmp->end; - if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) { - entry = tmp; - goto found; } - /* - * The tree fails to find an entry because of offset or alignment - * restrictions. Search the list instead. - */ - listsearch: /* - * Look through the rest of the map, trying to fit a new region in - * the gap between existing regions, or after the very last region. - * note: entry->end = base VA of current gap, - * next->start = VA of end of current gap + * Fix protections. */ - for (;; hint = (entry = next)->end) { + for (iter = first; iter != NULL && iter->start < end; + iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { + /* Treat memory holes as free space. */ + if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) + continue; + + old_prot = iter->protection; + /* - * Find the end of the proposed new region. Be sure we didn't - * go beyond the end of the map, or wrap around the address; - * if so, we lose. Otherwise, if this is the last entry, or - * if the proposed new region fits before the next entry, we - * win. + * Skip adapting protection iff old and new protection + * are equal. */ + if (set_max) { + if (old_prot == (new_prot & old_prot) && + iter->max_protection == new_prot) + continue; + } else { + if (old_prot == new_prot) + continue; + } + + UVM_MAP_CLIP_START(map, iter, start); + UVM_MAP_CLIP_END(map, iter, end); + + if (set_max) { + iter->max_protection = new_prot; + iter->protection &= new_prot; + } else + iter->protection = new_prot; -#ifdef PMAP_PREFER /* - * push hint forward as needed to avoid VAC alias problems. - * we only do this if a valid offset is specified. + * update physical map if necessary. worry about copy-on-write + * here -- CHECK THIS XXX */ - if (uoffset != UVM_UNKNOWN_OFFSET) - hint = PMAP_PREFER(uoffset, hint); -#endif - if (align != 0) { - if ((hint & (align - 1)) != 0) - hint = roundup(hint, align); - /* - * XXX Should we PMAP_PREFER() here again? - */ + if (iter->protection != old_prot) { + mask = UVM_ET_ISCOPYONWRITE(iter) ? + ~VM_PROT_WRITE : VM_PROT_ALL; + + /* update pmap */ + if ((iter->protection & mask) == PROT_NONE && + VM_MAPENT_ISWIRED(iter)) { + /* + * TODO(ariane) this is stupid. wired_count + * is 0 if not wired, otherwise anything + * larger than 0 (incremented once each time + * wire is called). + * Mostly to be able to undo the damage on + * failure. Not the actually be a wired + * refcounter... + * Originally: iter->wired_count--; + * (don't we have to unwire this in the pmap + * as well?) + */ + iter->wired_count = 0; + } + pmap_protect(map->pmap, iter->start, iter->end, + iter->protection & mask); } - end = hint + length; - if (end > map->max_offset || end < hint) { - UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); - goto error; + + /* + * If the map is configured to lock any future mappings, + * wire this entry now if the old protection was VM_PROT_NONE + * and the new protection is not VM_PROT_NONE. + */ + if ((map->flags & VM_MAP_WIREFUTURE) != 0 && + VM_MAPENT_ISWIRED(iter) == 0 && + old_prot == VM_PROT_NONE && + new_prot != VM_PROT_NONE) { + if (uvm_map_pageable(map, iter->start, iter->end, + FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { + /* + * If locking the entry fails, remember the + * error if it's the first one. Note we + * still continue setting the protection in + * the map, but it will return the resource + * storage condition regardless. + * + * XXX Ignore what the actual error is, + * XXX just call it a resource shortage + * XXX so that it doesn't get confused + * XXX what uvm_map_protect() itself would + * XXX normally return. + */ + error = ENOMEM; + } } - next = entry->next; - if (next == &map->header || next->start >= end) - break; } - found: - SAVE_HINT(map, map->hint, entry); - *result = hint; - UVMHIST_LOG(maphist,"<- got it! (result=0x%lx)", hint, 0,0,0); - return (entry); + pmap_update(map->pmap); - error: - if (align != 0) { - UVMHIST_LOG(maphist, - "calling recursively, no align", - 0,0,0,0); - return (uvm_map_findspace(map, orig_hint, - length, result, uobj, uoffset, 0, flags)); - } - return (NULL); +out: + vm_map_unlock(map); + UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0); + return error; } /* - * U N M A P - m a i n e n t r y p o i n t + * uvmspace_alloc: allocate a vmspace structure. + * + * - structure includes vm_map and pmap + * - XXX: no locking on this structure + * - refcnt set to 1, rest must be init'd by caller */ +struct vmspace * +uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, + boolean_t remove_holes) +{ + struct vmspace *vm; + UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); + + vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); + uvmspace_init(vm, NULL, min, max, pageable, remove_holes); + UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0); + return (vm); +} /* - * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop") + * uvmspace_init: initialize a vmspace structure. * - * => caller must check alignment and size - * => map must be unlocked (we will lock it) + * - XXX: no locking on this structure + * - refcnt set to 1, rest must be init'd by caller */ void -uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p) +uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, + boolean_t pageable, boolean_t remove_holes) { - vm_map_entry_t dead_entries; - UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); + UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); + + if (pmap) + pmap_reference(pmap); + else + pmap = pmap_create(); + vm->vm_map.pmap = pmap; - UVMHIST_LOG(maphist, " (map=%p, start=0x%lx, end=0x%lx)", - map, start, end, 0); + uvm_map_setup(&vm->vm_map, min, max, + (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); - /* - * work now done by helper functions. wipe the pmap's and then - * detach from the dead entries... - */ - vm_map_lock(map); - uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE); - vm_map_unlock(map); + vm->vm_refcnt = 1; - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, 0); + if (remove_holes) + pmap_remove_holes(&vm->vm_map); - UVMHIST_LOG(maphist, "<- done", 0,0,0,0); + UVMHIST_LOG(maphist,"<- done",0,0,0,0); } - /* - * U N M A P - m a i n h e l p e r f u n c t i o n s + * uvmspace_share: share a vmspace between two proceses + * + * - XXX: no locking on vmspace + * - used for vfork, threads(?) */ +void +uvmspace_share(p1, p2) + struct proc *p1, *p2; +{ + p2->p_vmspace = p1->p_vmspace; + p1->p_vmspace->vm_refcnt++; +} + /* - * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") + * uvmspace_exec: the process wants to exec a new program * - * => caller must check alignment and size - * => map must be locked by caller - * => we return a list of map entries that we've remove from the map - * in "entry_list" + * - XXX: no locking on vmspace */ void -uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, - struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes) +uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) { - struct vm_map_entry *entry, *first_entry, *next; - vaddr_t len; - UVMHIST_FUNC("uvm_unmap_remove"); - UVMHIST_CALLED(maphist); - - UVMHIST_LOG(maphist,"(map=%p, start=0x%lx, end=0x%lx)", - map, start, end, 0); - - VM_MAP_RANGE_CHECK(map, start, end); - - uvm_tree_sanity(map, "unmap_remove entry"); - - if ((map->flags & VM_MAP_INTRSAFE) == 0) - splassert(IPL_NONE); - else - splassert(IPL_VM); - - /* - * find first entry - */ - if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { - /* clip and go... */ - entry = first_entry; - UVM_MAP_CLIP_START(map, entry, start); - /* critical! prevents stale hint */ - SAVE_HINT(map, entry, entry->prev); - - } else { - entry = first_entry->next; - } - - /* - * Save the free space hint - */ + struct vmspace *nvm, *ovm = p->p_vmspace; + struct vm_map *map = &ovm->vm_map; + struct uvm_map_deadq dead_entries; - if (map->first_free->start >= start) - map->first_free = entry->prev; + KASSERT((start & PAGE_MASK) == 0); + KASSERT((end & PAGE_MASK) == 0 || (end & PAGE_MASK) == PAGE_MASK); - /* - * note: we now re-use first_entry for a different task. we remove - * a number of map entries from the map and save them in a linked - * list headed by "first_entry". once we remove them from the map - * the caller should unlock the map and drop the references to the - * backing objects [c.f. uvm_unmap_detach]. the object is to - * separate unmapping from reference dropping. why? - * [1] the map has to be locked for unmapping - * [2] the map need not be locked for reference dropping - * [3] dropping references may trigger pager I/O, and if we hit - * a pager that does synchronous I/O we may have to wait for it. - * [4] we would like all waiting for I/O to occur with maps unlocked - * so that we don't block other threads. - */ - first_entry = NULL; - *entry_list = NULL; /* to be safe */ + pmap_unuse_final(p); /* before stack addresses go away */ + TAILQ_INIT(&dead_entries); /* - * break up the area into map entry sized regions and unmap. note - * that all mappings have to be removed before we can even consider - * dropping references to amaps or VM objects (otherwise we could end - * up with a mapping to a page on the free list which would be very bad) + * see if more than one process is using this vmspace... */ - while ((entry != &map->header) && (entry->start < end)) { - - UVM_MAP_CLIP_END(map, entry, end); - next = entry->next; - len = entry->end - entry->start; - if (p && entry->object.uvm_obj == NULL) - p->p_vmspace->vm_dused -= atop(len); + if (ovm->vm_refcnt == 1) { + /* + * if p is the only process using its vmspace then we can safely + * recycle that vmspace for the program that is being exec'd. + */ +#ifdef SYSVSHM /* - * unwire before removing addresses from the pmap; otherwise - * unwiring will put the entries back into the pmap (XXX). + * SYSV SHM semantics require us to kill all segments on an exec */ + if (ovm->vm_shm) + shmexit(ovm); +#endif - if (VM_MAPENT_ISWIRED(entry)) - uvm_map_entry_unwire(map, entry); + /* + * POSIX 1003.1b -- "lock future mappings" is revoked + * when a process execs another program image. + */ + vm_map_lock(map); + vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); /* - * special case: handle mappings to anonymous kernel objects. - * we want to free these pages right away... + * now unmap the old program + * + * Instead of attempting to keep the map valid, we simply + * nuke all entries and ask uvm_map_setup to reinitialize + * the map to the new boundaries. + * + * uvm_unmap_remove will actually nuke all entries for us + * (as in, not replace them with free-memory entries). */ -#ifdef KVA_GUARDPAGES - if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) { - entry->etype &= ~MAP_ET_KVAGUARD; - kva_guardpages--; - } else /* (code continues across line-break) */ -#endif - if (UVM_ET_ISHOLE(entry)) { - if (!remove_holes) { - entry = next; - continue; - } - } else if (map->flags & VM_MAP_INTRSAFE) { - uvm_km_pgremove_intrsafe(entry->start, entry->end); - pmap_kremove(entry->start, len); - } else if (UVM_ET_ISOBJ(entry) && - UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { - KASSERT(vm_map_pmap(map) == pmap_kernel()); + uvm_unmap_remove(map, map->min_offset, map->max_offset, + &dead_entries, TRUE, FALSE); - /* - * note: kernel object mappings are currently used in - * two ways: - * [1] "normal" mappings of pages in the kernel object - * [2] uvm_km_valloc'd allocations in which we - * pmap_enter in some non-kernel-object page - * (e.g. vmapbuf). - * - * for case [1], we need to remove the mapping from - * the pmap and then remove the page from the kernel - * object (because, once pages in a kernel object are - * unmapped they are no longer needed, unlike, say, - * a vnode where you might want the data to persist - * until flushed out of a queue). - * - * for case [2], we need to remove the mapping from - * the pmap. there shouldn't be any pages at the - * specified offset in the kernel object [but it - * doesn't hurt to call uvm_km_pgremove just to be - * safe?] - * - * uvm_km_pgremove currently does the following: - * for pages in the kernel object in range: - * - drops the swap slot - * - uvm_pagefree the page - * - * note there is version of uvm_km_pgremove() that - * is used for "intrsafe" objects. - */ + KDASSERT(RB_EMPTY(&map->addr)); - /* - * remove mappings from pmap and drop the pages - * from the object. offsets are always relative - * to vm_map_min(kernel_map). - */ - pmap_remove(pmap_kernel(), entry->start, entry->end); - uvm_km_pgremove(entry->object.uvm_obj, - entry->start - vm_map_min(kernel_map), - entry->end - vm_map_min(kernel_map)); + /* + * Nuke statistics and boundaries. + */ + bzero(&ovm->vm_startcopy, + (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); - /* - * null out kernel_object reference, we've just - * dropped it - */ - entry->etype &= ~UVM_ET_OBJ; - entry->object.uvm_obj = NULL; /* to be safe */ - } else { - /* - * remove mappings the standard way. - */ - pmap_remove(map->pmap, entry->start, entry->end); + if (end & PAGE_MASK) { + end += 1; + if (end == 0) /* overflow */ + end -= PAGE_SIZE; } /* - * remove entry from map and put it on our list of entries - * that we've nuked. then go do next entry. + * Setup new boundaries and populate map with entries. */ - UVMHIST_LOG(maphist, " removed map entry %p", entry, 0, 0,0); + map->min_offset = start; + map->max_offset = end; + uvm_map_setup_entries(map); + vm_map_unlock(map); - /* critical! prevents stale hint */ - SAVE_HINT(map, entry, entry->prev); + /* + * but keep MMU holes unavailable + */ + pmap_remove_holes(map); + + } else { - uvm_map_entry_unlink(map, entry); - map->size -= len; - entry->next = first_entry; - first_entry = entry; - entry = next; /* next entry, please */ - } -#ifdef KVA_GUARDPAGES - /* - * entry points at the map-entry after the last-removed map-entry. - */ - if (map == kernel_map && entry != &map->header && - entry->etype & MAP_ET_KVAGUARD && entry->start == end) { /* - * Removed range is followed by guard page; - * remove that guard page now (or it will stay forever). + * p's vmspace is being shared, so we can't reuse it for p since + * it is still being used for others. allocate a new vmspace + * for p */ - entry->etype &= ~MAP_ET_KVAGUARD; - kva_guardpages--; + nvm = uvmspace_alloc(start, end, + (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); - uvm_map_entry_unlink(map, entry); - map->size -= len; - entry->next = first_entry; - first_entry = entry; - entry = next; /* next entry, please */ - } -#endif - /* if ((map->flags & VM_MAP_DYING) == 0) { */ - pmap_update(vm_map_pmap(map)); - /* } */ + /* + * install new vmspace and drop our ref to the old one. + */ + pmap_deactivate(p); + p->p_vmspace = nvm; + pmap_activate(p); - uvm_tree_sanity(map, "unmap_remove leave"); + uvmspace_free(ovm); + } /* - * now we've cleaned up the map and are ready for the caller to drop - * references to the mapped objects. + * Release dead entries */ - - *entry_list = first_entry; - UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); + uvm_unmap_detach(&dead_entries, 0); } /* - * uvm_unmap_detach: drop references in a chain of map entries + * uvmspace_free: free a vmspace data structure * - * => we will free the map entries as we traverse the list. + * - XXX: no locking on vmspace */ void -uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) +uvmspace_free(struct vmspace *vm) { - struct vm_map_entry *next_entry; - UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); - - while (first_entry) { - KASSERT(!VM_MAPENT_ISWIRED(first_entry)); - UVMHIST_LOG(maphist, - " detach 0x%lx: amap=%p, obj=%p, submap?=%ld", - first_entry, first_entry->aref.ar_amap, - first_entry->object.uvm_obj, - UVM_ET_ISSUBMAP(first_entry)); + struct uvm_map_deadq dead_entries; - /* - * drop reference to amap, if we've got one - */ - - if (first_entry->aref.ar_amap) - uvm_map_unreference_amap(first_entry, flags); + UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0); + if (--vm->vm_refcnt == 0) { /* - * drop reference to our backing object, if we've got one + * lock the map, to wait out all other references to it. delete + * all of the mappings and pages they hold, then call the pmap + * module to reclaim anything left. */ - - if (UVM_ET_ISSUBMAP(first_entry)) { - /* ... unlikely to happen, but play it safe */ - uvm_map_deallocate(first_entry->object.sub_map); - } else { - if (UVM_ET_ISOBJ(first_entry) && - first_entry->object.uvm_obj->pgops->pgo_detach) - first_entry->object.uvm_obj->pgops-> - pgo_detach(first_entry->object.uvm_obj); +#ifdef SYSVSHM + /* Get rid of any SYSV shared memory segments. */ + if (vm->vm_shm != NULL) + shmexit(vm); +#endif + if ((vm->vm_map.flags & VM_MAP_INTRSAFE) == 0) { + if (rw_enter(&vm->vm_map.lock, RW_NOSLEEP|RW_WRITE) != + 0) { + panic("uvm_map_setup: " + "rw_enter failed on free map"); + } } - - next_entry = first_entry->next; - uvm_mapent_free(first_entry); - first_entry = next_entry; + uvm_tree_sanity(&vm->vm_map, __FILE__, __LINE__); + TAILQ_INIT(&dead_entries); + uvm_unmap_remove(&vm->vm_map, + vm->vm_map.min_offset, vm->vm_map.max_offset, + &dead_entries, TRUE, FALSE); + if ((vm->vm_map.flags & VM_MAP_INTRSAFE) == 0) + rw_exit(&vm->vm_map.lock); + KDASSERT(RB_EMPTY(&vm->vm_map.addr)); + uvm_unmap_detach(&dead_entries, 0); + pmap_destroy(vm->vm_map.pmap); + vm->vm_map.pmap = NULL; + pool_put(&uvm_vmspace_pool, vm); } - UVMHIST_LOG(maphist, "<- done", 0,0,0,0); + UVMHIST_LOG(maphist,"<- done", 0,0,0,0); } /* - * E X T R A C T I O N F U N C T I O N S - */ - -/* - * uvm_map_reserve: reserve space in a vm_map for future use. + * Clone map entry into other map. * - * => we reserve space in a map by putting a dummy map entry in the - * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) - * => map should be unlocked (we will write lock it) - * => we return true if we were able to reserve space - * => XXXCDC: should be inline? + * Mapping will be placed at dstaddr, for the same length. + * Space must be available. + * Reference counters are incremented. */ - -int -uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset, - vsize_t align, vaddr_t *raddr) +struct vm_map_entry* +uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, + vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, + int mapent_flags, int amap_share_flags) { - UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); + struct vm_map_entry *new_entry, *first, *last; - UVMHIST_LOG(maphist, "(map=%p, size=0x%lx, offset=0x%lx,addr=0x%lx)", - map,size,offset,raddr); + KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); - size = round_page(size); - if (*raddr < vm_map_min(map)) - *raddr = vm_map_min(map); /* hint */ + /* + * Create new entry (linked in on creation). + * Fill in first, last. + */ + first = last = NULL; + if (!uvm_map_isavail(&dstmap->addr, &first, &last, dstaddr, dstlen)) { + panic("uvmspace_fork: no space in map for " + "entry in empty map"); + } + new_entry = uvm_map_mkentry(dstmap, first, last, + dstaddr, dstlen, mapent_flags, dead); + if (new_entry == NULL) + return NULL; + /* old_entry -> new_entry */ + new_entry->object = old_entry->object; + new_entry->offset = old_entry->offset; + new_entry->aref = old_entry->aref; + new_entry->etype = old_entry->etype; + new_entry->protection = old_entry->protection; + new_entry->max_protection = old_entry->max_protection; + new_entry->inheritance = old_entry->inheritance; + new_entry->advice = old_entry->advice; /* - * reserve some virtual space. + * gain reference to object backing the map (can't + * be a submap). */ + if (new_entry->aref.ar_amap) { + new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; + amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, + (new_entry->end - new_entry->start) >> PAGE_SHIFT, + amap_share_flags); + } - if (uvm_map(map, raddr, size, NULL, offset, 0, - UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, - UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { - UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); - return (FALSE); - } + if (UVM_ET_ISOBJ(new_entry) && + new_entry->object.uvm_obj->pgops->pgo_reference) { + new_entry->offset += off; + new_entry->object.uvm_obj->pgops->pgo_reference + (new_entry->object.uvm_obj); + } - UVMHIST_LOG(maphist, "<- done (*raddr=0x%lx)", *raddr,0,0,0); - return (TRUE); + return new_entry; } /* - * uvm_map_replace: replace a reserved (blank) area of memory with - * real mappings. - * - * => caller must WRITE-LOCK the map - * => we return TRUE if replacement was a success - * => we expect the newents chain to have nnewents entries on it and - * we expect newents->prev to point to the last entry on the list - * => note newents is allowed to be NULL + * share the mapping: this means we want the old and + * new entries to share amaps and backing objects. */ - -int -uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, - struct vm_map_entry *newents, int nnewents) +void +uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, + struct vm_map *old_map, + struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) { - struct vm_map_entry *oldent, *last; - - uvm_tree_sanity(map, "map_replace entry"); - - /* - * first find the blank map entry at the specified address - */ - - if (!uvm_map_lookup_entry(map, start, &oldent)) { - return(FALSE); - } + struct vm_map_entry *new_entry; /* - * check to make sure we have a proper blank entry + * if the old_entry needs a new amap (due to prev fork) + * then we need to allocate it now so that we have + * something we own to share with the new_entry. [in + * other words, we need to clear needs_copy] */ - if (oldent->start != start || oldent->end != end || - oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { - return (FALSE); + if (UVM_ET_ISNEEDSCOPY(old_entry)) { + /* get our own amap, clears needs_copy */ + amap_copy(old_map, old_entry, M_WAITOK, FALSE, + 0, 0); + /* XXXCDC: WAITOK??? */ } -#ifdef DIAGNOSTIC - /* - * sanity check the newents chain - */ - { - struct vm_map_entry *tmpent = newents; - int nent = 0; - vaddr_t cur = start; - - while (tmpent) { - nent++; - if (tmpent->start < cur) - panic("uvm_map_replace1"); - if (tmpent->start > tmpent->end || tmpent->end > end) { - printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", - tmpent->start, tmpent->end, end); - panic("uvm_map_replace2"); - } - cur = tmpent->end; - if (tmpent->next) { - if (tmpent->next->prev != tmpent) - panic("uvm_map_replace3"); - } else { - if (newents->prev != tmpent) - panic("uvm_map_replace4"); - } - tmpent = tmpent->next; - } - if (nent != nnewents) - panic("uvm_map_replace5"); - } -#endif + new_entry = uvm_mapent_clone(new_map, old_entry->start, + old_entry->end - old_entry->start, 0, old_entry, + dead, 0, AMAP_SHARED); - /* - * map entry is a valid blank! replace it. (this does all the - * work of map entry link/unlink...). + /* + * pmap_copy the mappings: this routine is optional + * but if it is there it will reduce the number of + * page faults in the new proc. */ - - if (newents) { - last = newents->prev; /* we expect this */ - - /* critical: flush stale hints out of map */ - SAVE_HINT(map, map->hint, newents); - if (map->first_free == oldent) - map->first_free = last; - - last->next = oldent->next; - last->next->prev = last; - - /* Fix RB tree */ - uvm_rb_remove(map, oldent); - - newents->prev = oldent->prev; - newents->prev->next = newents; - map->nentries = map->nentries + (nnewents - 1); - - /* Fixup the RB tree */ - { - int i; - struct vm_map_entry *tmp; - - tmp = newents; - for (i = 0; i < nnewents && tmp; i++) { - uvm_rb_insert(map, tmp); - tmp = tmp->next; - } - } - } else { - - /* critical: flush stale hints out of map */ - SAVE_HINT(map, map->hint, oldent->prev); - if (map->first_free == oldent) - map->first_free = oldent->prev; - - /* NULL list of new entries: just remove the old one */ - uvm_map_entry_unlink(map, oldent); - } - - - uvm_tree_sanity(map, "map_replace leave"); + pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, + (new_entry->end - new_entry->start), new_entry->start); /* - * now we can free the old blank entry, unlock the map and return. + * Update process statistics. */ - - uvm_mapent_free(oldent); - return(TRUE); + if (!UVM_ET_ISHOLE(new_entry)) + new_map->size += new_entry->end - new_entry->start; + if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { + new_vm->vm_dused += + uvmspace_dused(new_map, new_entry->start, new_entry->end); + } } /* - * uvm_map_extract: extract a mapping from a map and put it somewhere - * (maybe removing the old mapping) + * copy-on-write the mapping (using mmap's + * MAP_PRIVATE semantics) * - * => maps should be unlocked (we will write lock them) - * => returns 0 on success, error code otherwise - * => start must be page aligned - * => len must be page sized - * => flags: - * UVM_EXTRACT_REMOVE: remove mappings from srcmap - * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) - * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs - * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go - * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< - * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only - * be used from within the kernel in a kernel level map <<< + * allocate new_entry, adjust reference counts. + * (note that new references are read-only). */ - -int -uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, - struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) +void +uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, + struct vm_map *old_map, + struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) { - vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge, - oldstart; - struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry; - struct vm_map_entry *deadentry, *oldentry; - vsize_t elen; - int nchain, error, copy_ok; - UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); + struct vm_map_entry *new_entry; + boolean_t protect_child; - UVMHIST_LOG(maphist,"(srcmap=%p,start=0x%lx, len=0x%lx", srcmap, start, - len,0); - UVMHIST_LOG(maphist," ...,dstmap=%p, flags=0x%lx)", dstmap,flags,0,0); + new_entry = uvm_mapent_clone(new_map, old_entry->start, + old_entry->end - old_entry->start, 0, old_entry, + dead, 0, 0); - uvm_tree_sanity(srcmap, "map_extract src enter"); - uvm_tree_sanity(dstmap, "map_extract dst enter"); + new_entry->etype |= + (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); /* - * step 0: sanity check: start must be on a page boundary, length - * must be page sized. can't ask for CONTIG/QREF if you asked for - * REMOVE. - */ - - KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); - KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || - (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); - - /* - * step 1: reserve space in the target map for the extracted area + * the new entry will need an amap. it will either + * need to be copied from the old entry or created + * from scratch (if the old entry does not have an + * amap). can we defer this process until later + * (by setting "needs_copy") or do we need to copy + * the amap now? + * + * we must copy the amap now if any of the following + * conditions hold: + * 1. the old entry has an amap and that amap is + * being shared. this means that the old (parent) + * process is sharing the amap with another + * process. if we do not clear needs_copy here + * we will end up in a situation where both the + * parent and child process are referring to the + * same amap with "needs_copy" set. if the + * parent write-faults, the fault routine will + * clear "needs_copy" in the parent by allocating + * a new amap. this is wrong because the + * parent is supposed to be sharing the old amap + * and the new amap will break that. + * + * 2. if the old entry has an amap and a non-zero + * wire count then we are going to have to call + * amap_cow_now to avoid page faults in the + * parent process. since amap_cow_now requires + * "needs_copy" to be clear we might as well + * clear it here as well. + * */ - dstaddr = vm_map_min(dstmap); - if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE) - return(ENOMEM); - *dstaddrp = dstaddr; /* pass address back to caller */ - UVMHIST_LOG(maphist, " dstaddr=0x%lx", dstaddr,0,0,0); + if (old_entry->aref.ar_amap != NULL && + ((amap_flags(old_entry->aref.ar_amap) & + AMAP_SHARED) != 0 || + VM_MAPENT_ISWIRED(old_entry))) { + amap_copy(new_map, new_entry, M_WAITOK, FALSE, + 0, 0); + /* XXXCDC: M_WAITOK ... ok? */ + } /* - * step 2: setup for the extraction process loop by init'ing the - * map entry chain, locking src map, and looking up the first useful - * entry in the map. + * if the parent's entry is wired down, then the + * parent process does not want page faults on + * access to that memory. this means that we + * cannot do copy-on-write because we can't write + * protect the old entry. in this case we + * resolve all copy-on-write faults now, using + * amap_cow_now. note that we have already + * allocated any needed amap (above). */ - end = start + len; - newend = dstaddr + len; - chain = endchain = NULL; - nchain = 0; - vm_map_lock(srcmap); + if (VM_MAPENT_ISWIRED(old_entry)) { - if (uvm_map_lookup_entry(srcmap, start, &entry)) { + /* + * resolve all copy-on-write faults now + * (note that there is nothing to do if + * the old mapping does not have an amap). + * XXX: is it worthwhile to bother with + * pmap_copy in this case? + */ + if (old_entry->aref.ar_amap) + amap_cow_now(new_map, new_entry); - /* "start" is within an entry */ - if (flags & UVM_EXTRACT_QREF) { + } else { + if (old_entry->aref.ar_amap) { /* - * for quick references we don't clip the entry, so - * the entry may map space "before" the starting - * virtual address... this is the "fudge" factor - * (which can be non-zero only the first time - * through the "while" loop in step 3). + * setup mappings to trigger copy-on-write faults + * we must write-protect the parent if it has + * an amap and it is not already "needs_copy"... + * if it is already "needs_copy" then the parent + * has already been write-protected by a previous + * fork operation. + * + * if we do not write-protect the parent, then + * we must be sure to write-protect the child + * after the pmap_copy() operation. + * + * XXX: pmap_copy should have some way of telling + * us that it didn't do anything so we can avoid + * calling pmap_protect needlessly. */ + if (!UVM_ET_ISNEEDSCOPY(old_entry)) { + if (old_entry->max_protection & + VM_PROT_WRITE) { + pmap_protect(old_map->pmap, + old_entry->start, + old_entry->end, + old_entry->protection & + ~VM_PROT_WRITE); + pmap_update(old_map->pmap); + } + old_entry->etype |= UVM_ET_NEEDSCOPY; + } - fudge = start - entry->start; + /* + * parent must now be write-protected + */ + protect_child = FALSE; } else { /* - * normal reference: we clip the map to fit (thus - * fudge is zero) + * we only need to protect the child if the + * parent has write access. */ - - UVM_MAP_CLIP_START(srcmap, entry, start); - SAVE_HINT(srcmap, srcmap->hint, entry->prev); - fudge = 0; - } - } else { - - /* "start" is not within an entry ... skip to next entry */ - if (flags & UVM_EXTRACT_CONTIG) { - error = EINVAL; - goto bad; /* definite hole here ... */ - } - - entry = entry->next; - fudge = 0; - } - - /* save values from srcmap for step 6 */ - orig_entry = entry; - orig_fudge = fudge; - - /* - * step 3: now start looping through the map entries, extracting - * as we go. - */ - - while (entry->start < end && entry != &srcmap->header) { - - /* if we are not doing a quick reference, clip it */ - if ((flags & UVM_EXTRACT_QREF) == 0) - UVM_MAP_CLIP_END(srcmap, entry, end); - - /* clear needs_copy (allow chunking) */ - if (UVM_ET_ISNEEDSCOPY(entry)) { - if (fudge) - oldstart = entry->start; + if (old_entry->max_protection & VM_PROT_WRITE) + protect_child = TRUE; else - oldstart = 0; /* XXX: gcc */ - amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); - if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ - error = ENOMEM; - goto bad; - } + protect_child = FALSE; - /* amap_copy could clip (during chunk)! update fudge */ - if (fudge) { - fudge = fudge - (entry->start - oldstart); - orig_fudge = fudge; - } - } - - /* calculate the offset of this from "start" */ - oldoffset = (entry->start + fudge) - start; - - /* allocate a new map entry */ - newentry = uvm_mapent_alloc(dstmap, flags); - if (newentry == NULL) { - error = ENOMEM; - goto bad; } - /* set up new map entry */ - newentry->next = NULL; - newentry->prev = endchain; - newentry->start = dstaddr + oldoffset; - newentry->end = - newentry->start + (entry->end - (entry->start + fudge)); - if (newentry->end > newend || newentry->end < newentry->start) - newentry->end = newend; - newentry->object.uvm_obj = entry->object.uvm_obj; - if (newentry->object.uvm_obj) { - if (newentry->object.uvm_obj->pgops->pgo_reference) - newentry->object.uvm_obj->pgops-> - pgo_reference(newentry->object.uvm_obj); - newentry->offset = entry->offset + fudge; - } else { - newentry->offset = 0; - } - newentry->etype = entry->etype; - newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? - entry->max_protection : entry->protection; - newentry->max_protection = entry->max_protection; - newentry->inheritance = entry->inheritance; - newentry->wired_count = 0; - newentry->aref.ar_amap = entry->aref.ar_amap; - if (newentry->aref.ar_amap) { - newentry->aref.ar_pageoff = - entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); - uvm_map_reference_amap(newentry, AMAP_SHARED | - ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); - } else { - newentry->aref.ar_pageoff = 0; - } - newentry->advice = entry->advice; + /* + * copy the mappings + * XXX: need a way to tell if this does anything + */ - /* now link it on the chain */ - nchain++; - if (endchain == NULL) { - chain = endchain = newentry; - } else { - endchain->next = newentry; - endchain = newentry; - } + pmap_copy(new_map->pmap, old_map->pmap, + new_entry->start, + (old_entry->end - old_entry->start), + old_entry->start); - /* end of 'while' loop! */ - if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && - (entry->next == &srcmap->header || - entry->next->start != entry->end)) { - error = EINVAL; - goto bad; + /* + * protect the child's mappings if necessary + */ + if (protect_child) { + pmap_protect(new_map->pmap, new_entry->start, + new_entry->end, + new_entry->protection & + ~VM_PROT_WRITE); } - entry = entry->next; - fudge = 0; } /* - * step 4: close off chain (in format expected by uvm_map_replace) + * Update process statistics. */ + if (!UVM_ET_ISHOLE(new_entry)) + new_map->size += new_entry->end - new_entry->start; + if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { + new_vm->vm_dused += + uvmspace_dused(new_map, new_entry->start, new_entry->end); + } +} - if (chain) - chain->prev = endchain; +/* + * uvmspace_fork: fork a process' main map + * + * => create a new vmspace for child process from parent. + * => parent's map must not be locked. + */ +struct vmspace * +uvmspace_fork(struct vmspace *vm1) +{ + struct vmspace *vm2; + struct vm_map *old_map = &vm1->vm_map; + struct vm_map *new_map; + struct vm_map_entry *old_entry; + struct uvm_map_deadq dead; + UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); - /* - * step 5: attempt to lock the dest map so we can pmap_copy. - * note usage of copy_ok: - * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) - * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 - */ + vm_map_lock(old_map); - if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) { - copy_ok = 1; - if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, - nchain)) { - if (srcmap != dstmap) - vm_map_unlock(dstmap); - error = EIO; - goto bad; - } - } else { - copy_ok = 0; - /* replace defered until step 7 */ - } + vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, + (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); + memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, + (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); + vm2->vm_dused = 0; /* Statistic managed by us. */ + new_map = &vm2->vm_map; + vm_map_lock(new_map); /* - * step 6: traverse the srcmap a second time to do the following: - * - if we got a lock on the dstmap do pmap_copy - * - if UVM_EXTRACT_REMOVE remove the entries - * we make use of orig_entry and orig_fudge (saved in step 2) + * go entry-by-entry */ - if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { + TAILQ_INIT(&dead); + RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { + if (old_entry->start == old_entry->end) + continue; - /* purge possible stale hints from srcmap */ - if (flags & UVM_EXTRACT_REMOVE) { - SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); - if (srcmap->first_free->start >= start) - srcmap->first_free = orig_entry->prev; + /* + * first, some sanity checks on the old entry + */ + if (UVM_ET_ISSUBMAP(old_entry)) { + panic("fork: encountered a submap during fork " + "(illegal)"); } - entry = orig_entry; - fudge = orig_fudge; - deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ - - while (entry->start < end && entry != &srcmap->header) { - if (copy_ok) { - oldoffset = (entry->start + fudge) - start; - elen = MIN(end, entry->end) - - (entry->start + fudge); - pmap_copy(dstmap->pmap, srcmap->pmap, - dstaddr + oldoffset, elen, - entry->start + fudge); - } - - /* we advance "entry" in the following if statement */ - if (flags & UVM_EXTRACT_REMOVE) { - pmap_remove(srcmap->pmap, entry->start, - entry->end); - oldentry = entry; /* save entry */ - entry = entry->next; /* advance */ - uvm_map_entry_unlink(srcmap, oldentry); - /* add to dead list */ - oldentry->next = deadentry; - deadentry = oldentry; - } else { - entry = entry->next; /* advance */ - } - - /* end of 'while' loop */ - fudge = 0; + if (!UVM_ET_ISCOPYONWRITE(old_entry) && + UVM_ET_ISNEEDSCOPY(old_entry)) { + panic("fork: non-copy_on_write map entry marked " + "needs_copy (illegal)"); } - pmap_update(srcmap->pmap); /* - * unlock dstmap. we will dispose of deadentry in - * step 7 if needed + * Apply inheritance. */ - - if (copy_ok && srcmap != dstmap) - vm_map_unlock(dstmap); - + if (old_entry->inheritance == MAP_INHERIT_SHARE) { + uvm_mapent_forkshared(vm2, new_map, + old_map, old_entry, &dead); + } + if (old_entry->inheritance == MAP_INHERIT_COPY) { + uvm_mapent_forkcopy(vm2, new_map, + old_map, old_entry, &dead); + } } - else - deadentry = NULL; /* XXX: gcc */ + + vm_map_unlock(old_map); + vm_map_unlock(new_map); /* - * step 7: we are done with the source map, unlock. if copy_ok - * is 0 then we have not replaced the dummy mapping in dstmap yet - * and we need to do so now. + * This can actually happen, if multiple entries described a + * space in which an entry was inherited. */ + uvm_unmap_detach(&dead, 0); - vm_map_unlock(srcmap); - if ((flags & UVM_EXTRACT_REMOVE) && deadentry) - uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ - - /* now do the replacement if we didn't do it in step 5 */ - if (copy_ok == 0) { - vm_map_lock(dstmap); - error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, - nchain); - vm_map_unlock(dstmap); - - if (error == FALSE) { - error = EIO; - goto bad2; - } - } +#ifdef SYSVSHM + if (vm1->vm_shm) + shmfork(vm1, vm2); +#endif + +#ifdef PMAP_FORK + pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); +#endif - uvm_tree_sanity(srcmap, "map_extract src leave"); - uvm_tree_sanity(dstmap, "map_extract dst leave"); + UVMHIST_LOG(maphist,"<- done",0,0,0,0); + return vm2; +} - return(0); +/* + * uvm_map_hint: return the beginning of the best area suitable for + * creating a new mapping with "prot" protection. + */ +vaddr_t +uvm_map_hint(struct proc *p, vm_prot_t prot) +{ + vaddr_t addr; +#ifdef __i386__ /* - * bad: failure recovery + * If executable skip first two pages, otherwise start + * after data + heap region. */ -bad: - vm_map_unlock(srcmap); -bad2: /* src already unlocked */ - if (chain) - uvm_unmap_detach(chain, - (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); - - uvm_tree_sanity(srcmap, "map_extract src err leave"); - uvm_tree_sanity(dstmap, "map_extract dst err leave"); - - uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ - return(error); + if ((prot & VM_PROT_EXECUTE) && + ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { + addr = (PAGE_SIZE*2) + + (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); + return (round_page(addr)); + } +#endif + /* start malloc/mmap after the brk */ + addr = (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ; +#if !defined(__vax__) + addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1); +#endif + return (round_page(addr)); } -/* end of extraction functions */ - /* * uvm_map_submap: punch down part of a map into a submap * @@ -2329,7 +3678,6 @@ bad2: /* src already unlocked */ * => submap must have been init'd and have a zero reference count. * [need not be locked as we don't actually reference it] */ - int uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, struct vm_map *submap) @@ -2343,10 +3691,9 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, if (uvm_map_lookup_entry(map, start, &entry)) { UVM_MAP_CLIP_START(map, entry, start); - UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ - } else { + UVM_MAP_CLIP_END(map, entry, end); + } else entry = NULL; - } if (entry != NULL && entry->start == start && entry->end == end && @@ -2357,133 +3704,101 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, entry->offset = 0; uvm_map_reference(submap); result = 0; - } else { + } else result = EINVAL; - } + vm_map_unlock(map); return(result); } - /* - * uvm_map_protect: change map protection + * uvm_map_checkprot: check protection in map * - * => set_max means set max_protection. - * => map must be unlocked. + * => must allow specific protection in a fully allocated region. + * => map mut be read or write locked by caller. */ - -#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ - ~VM_PROT_WRITE : VM_PROT_ALL) -#define max(a,b) ((a) > (b) ? (a) : (b)) - -int -uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, - vm_prot_t new_prot, boolean_t set_max) +boolean_t +uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, + vm_prot_t protection) { - struct vm_map_entry *current, *entry; - int error = 0; - UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_prot=0x%lx)", - map, start, end, new_prot); - - vm_map_lock(map); - - VM_MAP_RANGE_CHECK(map, start, end); + struct vm_map_entry *entry; - if (uvm_map_lookup_entry(map, start, &entry)) { - UVM_MAP_CLIP_START(map, entry, start); - } else { - entry = entry->next; - } + if (start < map->min_offset || end > map->max_offset || start > end) + return FALSE; + if (start == end) + return TRUE; /* - * make a first pass to check for protection violations. + * Iterate entries. */ - - current = entry; - while ((current != &map->header) && (current->start < end)) { - if (UVM_ET_ISSUBMAP(current)) { - error = EINVAL; - goto out; - } - if ((new_prot & current->max_protection) != new_prot) { - error = EACCES; - goto out; - } - current = current->next; - } - - /* go back and fix up protections (no need to clip this time). */ - - current = entry; - - while ((current != &map->header) && (current->start < end)) { - vm_prot_t old_prot; - - UVM_MAP_CLIP_END(map, current, end); - - old_prot = current->protection; - if (set_max) - current->protection = - (current->max_protection = new_prot) & old_prot; - else - current->protection = new_prot; - + for (entry = uvm_map_entrybyaddr(&map->addr, start); + entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { /* - * update physical map if necessary. worry about copy-on-write - * here -- CHECK THIS XXX + * Fail if a hole is found. */ - - if (current->protection != old_prot) { - /* update pmap! */ - if ((current->protection & MASK(entry)) == PROT_NONE && - VM_MAPENT_ISWIRED(entry)) - current->wired_count--; - pmap_protect(map->pmap, current->start, current->end, - current->protection & MASK(entry)); - } + if (UVM_ET_ISHOLE(entry) || + (entry->end < end && entry->end != FREE_END(entry))) + return FALSE; /* - * If the map is configured to lock any future mappings, - * wire this entry now if the old protection was VM_PROT_NONE - * and the new protection is not VM_PROT_NONE. + * Check protection. */ + if ((entry->protection & protection) != protection) + return FALSE; + } + return TRUE; +} - if ((map->flags & VM_MAP_WIREFUTURE) != 0 && - VM_MAPENT_ISWIRED(entry) == 0 && - old_prot == VM_PROT_NONE && - new_prot != VM_PROT_NONE) { - if (uvm_map_pageable(map, entry->start, entry->end, - FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) { - /* - * If locking the entry fails, remember the - * error if it's the first one. Note we - * still continue setting the protection in - * the map, but will return the resource - * shortage condition regardless. - * - * XXX Ignore what the actual error is, - * XXX just call it a resource shortage - * XXX so that it doesn't get confused - * XXX what uvm_map_protect() itself would - * XXX normally return. - */ - error = ENOMEM; - } - } +/* + * uvm_map_create: create map + */ +vm_map_t +uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) +{ + vm_map_t result; + + result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); + result->pmap = pmap; + uvm_map_setup(result, min, max, flags); + return(result); +} - current = current->next; +/* + * uvm_map_deallocate: drop reference to a map + * + * => caller must not lock map + * => we will zap map if ref count goes to zero + */ +void +uvm_map_deallocate(vm_map_t map) +{ + int c; + struct uvm_map_deadq dead; + + simple_lock(&map->ref_lock); + c = --map->ref_count; + simple_unlock(&map->ref_lock); + if (c > 0) { + return; } - pmap_update(map->pmap); - out: - vm_map_unlock(map); - UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0); - return (error); -} + /* + * all references gone. unmap and free. + * + * No lock required: we are only one to access this map. + */ + + TAILQ_INIT(&dead); + uvm_tree_sanity(map, __FILE__, __LINE__); + uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, + TRUE, FALSE); + pmap_destroy(map->pmap); + KASSERT(RB_EMPTY(&map->addr)); + free(map, M_VMMAP); -#undef max -#undef MASK + uvm_unmap_detach(&dead, 0); +} /* * uvm_map_inherit: set inheritance code for range of addrs in map. @@ -2492,7 +3807,6 @@ uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, * => note that the inherit code is used during a "fork". see fork * code for details. */ - int uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, vm_inherit_t new_inheritance) @@ -2513,19 +3827,19 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, } vm_map_lock(map); - + VM_MAP_RANGE_CHECK(map, start, end); - - if (uvm_map_lookup_entry(map, start, &entry)) { + + entry = uvm_map_entrybyaddr(&map->addr, start); + if (entry->end > start) UVM_MAP_CLIP_START(map, entry, start); - } else { - entry = entry->next; - } + else + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); - while ((entry != &map->header) && (entry->start < end)) { + while (entry != NULL && entry->start < end) { UVM_MAP_CLIP_END(map, entry, end); entry->inheritance = new_inheritance; - entry = entry->next; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); } vm_map_unlock(map); @@ -2538,7 +3852,6 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, * * => map must be unlocked */ - int uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) { @@ -2551,30 +3864,30 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) case MADV_NORMAL: case MADV_RANDOM: case MADV_SEQUENTIAL: - /* nothing special here */ break; - default: UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); return (EINVAL); } + vm_map_lock(map); + VM_MAP_RANGE_CHECK(map, start, end); - if (uvm_map_lookup_entry(map, start, &entry)) { + + entry = uvm_map_entrybyaddr(&map->addr, start); + if (entry != NULL && entry->end > start) UVM_MAP_CLIP_START(map, entry, start); - } else { - entry = entry->next; - } + else if (entry!= NULL) + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); /* * XXXJRT: disallow holes? */ - while ((entry != &map->header) && (entry->start < end)) { + while (entry != NULL && entry->start < end) { UVM_MAP_CLIP_END(map, entry, end); - entry->advice = new_advice; - entry = entry->next; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); } vm_map_unlock(map); @@ -2583,470 +3896,187 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) } /* - * uvm_map_pageable: sets the pageability of a range in a map. + * uvm_map_extract: extract a mapping from a map and put it somewhere + * in the kernel_map, setting protection to max_prot. * - * => wires map entries. should not be used for transient page locking. - * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). - * => regions sepcified as not pageable require lock-down (wired) memory - * and page tables. - * => map must never be read-locked - * => if islocked is TRUE, map is already write-locked - * => we always unlock the map, since we must downgrade to a read-lock - * to call uvm_fault_wire() - * => XXXCDC: check this and try and clean it up. + * => map should be unlocked (we will write lock it and kernel_map) + * => returns 0 on success, error code otherwise + * => start must be page aligned + * => len must be page sized + * => flags: + * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go + * Mappings are QREF's. */ - int -uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, - boolean_t new_pageable, int lockflags) +uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, + vaddr_t *dstaddrp, int flags) { - struct vm_map_entry *entry, *start_entry, *failed_entry; - int rv; -#ifdef DIAGNOSTIC - u_int timestamp_save; -#endif - UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_pageable=0x%lx)", - map, start, end, new_pageable); - KASSERT(map->flags & VM_MAP_PAGEABLE); - - if ((lockflags & UVM_LK_ENTER) == 0) - vm_map_lock(map); + struct uvm_map_deadq dead; + struct vm_map_entry *first, *entry, *newentry; + vaddr_t dstaddr; + vaddr_t end; + vaddr_t cp_start; + vsize_t cp_len, cp_off; + int error; - VM_MAP_RANGE_CHECK(map, start, end); + TAILQ_INIT(&dead); + end = start + len; - /* - * only one pageability change may take place at one time, since - * uvm_fault_wire assumes it will be called only once for each - * wiring/unwiring. therefore, we have to make sure we're actually - * changing the pageability for the entire region. we do so before - * making any changes. + /* + * Sanity check on the parameters. + * Also, since the mapping may not contain gaps, error out if the + * mapped area is not in source map. */ - if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - - UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); - return (EFAULT); - } - entry = start_entry; + if ((start & PAGE_MASK) != 0 || (end & PAGE_MASK) != 0 || end < start) + return EINVAL; + if (start < srcmap->min_offset || end > srcmap->max_offset) + return EINVAL; - /* - * handle wiring and unwiring separately. + /* + * Initialize dead entries. + * Handle len == 0 case. */ - if (new_pageable) { /* unwire */ - UVM_MAP_CLIP_START(map, entry, start); - - /* - * unwiring. first ensure that the range to be unwired is - * really wired down and that there are no holes. - */ + if (len == 0) + return 0; - while ((entry != &map->header) && (entry->start < end)) { - if (entry->wired_count == 0 || - (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end))) { - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - UVMHIST_LOG(maphist, - "<- done (INVALID UNWIRE ARG)",0,0,0,0); - return (EINVAL); - } - entry = entry->next; - } + /* + * Acquire lock on srcmap. + */ + vm_map_lock(srcmap); - /* - * POSIX 1003.1b - a single munlock call unlocks a region, - * regardless of the number of mlock calls made on that - * region. - */ + /* + * Lock srcmap, lookup first and last entry in <start,len>. + */ + first = uvm_map_entrybyaddr(&srcmap->addr, start); - entry = start_entry; - while ((entry != &map->header) && (entry->start < end)) { - UVM_MAP_CLIP_END(map, entry, end); - if (VM_MAPENT_ISWIRED(entry)) - uvm_map_entry_unwire(map, entry); - entry = entry->next; + /* + * Check that the range is contiguous. + */ + for (entry = first; entry != NULL && entry->end < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + if (FREE_END(entry) != entry->end || UVM_ET_ISHOLE(entry)) { + error = EINVAL; + goto fail; } - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); - return (0); + } + if (entry == NULL || UVM_ET_ISHOLE(entry)) { + error = EINVAL; + goto fail; } /* - * wire case: in two passes [XXXCDC: ugly block of code here] + * Handle need-copy flag. + * This may invalidate last, hence the re-initialization during the + * loop. * - * 1: holding the write lock, we create any anonymous maps that need - * to be created. then we clip each map entry to the region to - * be wired and increment its wiring count. - * - * 2: we downgrade to a read lock, and call uvm_fault_wire to fault - * in the pages for any newly wired area (wired_count == 1). - * - * downgrading to a read lock for uvm_fault_wire avoids a possible - * deadlock with another thread that may have faulted on one of - * the pages to be wired (it would mark the page busy, blocking - * us, then in turn block on the map lock that we hold). because - * of problems in the recursive lock package, we cannot upgrade - * to a write lock in vm_map_lookup. thus, any actions that - * require the write lock must be done beforehand. because we - * keep the read lock on the map, the copy-on-write status of the - * entries we modify here cannot change. + * Also, perform clipping of last if not UVM_EXTRACT_QREF. */ - - while ((entry != &map->header) && (entry->start < end)) { - if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ - - /* - * perform actions of vm_map_lookup that need the - * write lock on the map: create an anonymous map - * for a copy-on-write region, or an anonymous map - * for a zero-fill region. (XXXCDC: submap case - * ok?) - */ - - if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ - if (UVM_ET_ISNEEDSCOPY(entry) && - ((entry->protection & VM_PROT_WRITE) || - (entry->object.uvm_obj == NULL))) { - amap_copy(map, entry, M_WAITOK, TRUE, - start, end); - /* XXXCDC: wait OK? */ - } - } - } - UVM_MAP_CLIP_START(map, entry, start); - UVM_MAP_CLIP_END(map, entry, end); - entry->wired_count++; - - /* - * Check for holes - */ - - if (entry->protection == VM_PROT_NONE || - (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end))) { - + for (entry = first; entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + if (UVM_ET_ISNEEDSCOPY(entry)) + amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); + if (UVM_ET_ISNEEDSCOPY(entry)) { /* - * found one. amap creation actions do not need to - * be undone, but the wired counts need to be restored. + * amap_copy failure */ - - while (entry != &map->header && entry->end > start) { - entry->wired_count--; - entry = entry->prev; - } - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); - return (EINVAL); + error = ENOMEM; + goto fail; } - entry = entry->next; } /* - * Pass 2. + * Lock destination map (kernel_map). */ + vm_map_lock(kernel_map); -#ifdef DIAGNOSTIC - timestamp_save = map->timestamp; -#endif - vm_map_busy(map); - vm_map_downgrade(map); - - rv = 0; - entry = start_entry; - while (entry != &map->header && entry->start < end) { - if (entry->wired_count == 1) { - rv = uvm_fault_wire(map, entry->start, entry->end, - entry->protection); - if (rv) { - /* - * wiring failed. break out of the loop. - * we'll clean up the map below, once we - * have a write lock again. - */ - break; - } - } - entry = entry->next; - } - - if (rv) { /* failed? */ - - /* - * Get back to an exclusive (write) lock. - */ - - vm_map_upgrade(map); - vm_map_unbusy(map); - -#ifdef DIAGNOSTIC - if (timestamp_save != map->timestamp) - panic("uvm_map_pageable: stale map"); -#endif - - /* - * first drop the wiring count on all the entries - * which haven't actually been wired yet. - */ - - failed_entry = entry; - while (entry != &map->header && entry->start < end) { - entry->wired_count--; - entry = entry->next; - } - - /* - * now, unwire all the entries that were successfully - * wired above. - */ - - entry = start_entry; - while (entry != failed_entry) { - entry->wired_count--; - if (VM_MAPENT_ISWIRED(entry) == 0) - uvm_map_entry_unwire(map, entry); - entry = entry->next; - } - if ((lockflags & UVM_LK_EXIT) == 0) - vm_map_unlock(map); - UVMHIST_LOG(maphist, "<- done (RV=%ld)", rv,0,0,0); - return(rv); - } - - /* We are holding a read lock here. */ - if ((lockflags & UVM_LK_EXIT) == 0) { - vm_map_unbusy(map); - vm_map_unlock_read(map); - } else { - - /* - * Get back to an exclusive (write) lock. - */ - - vm_map_upgrade(map); - vm_map_unbusy(map); + if (uvm_map_findspace_tree(&kernel_map->free, len, UVM_UNKNOWN_OFFSET, + 0, kernel_map->flags & VM_MAP_GUARDPAGES, &dstaddr, kernel_map) == + NULL) { + error = ENOMEM; + goto fail2; } + *dstaddrp = dstaddr; - UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); - return (0); -} - -/* - * uvm_map_pageable_all: special case of uvm_map_pageable - affects - * all mapped regions. - * - * => map must not be locked. - * => if no flags are specified, all regions are unwired. - * => XXXJRT: has some of the same problems as uvm_map_pageable() above. - */ - -int -uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) -{ - struct vm_map_entry *entry, *failed_entry; - vsize_t size; - int error; -#ifdef DIAGNOSTIC - u_int timestamp_save; -#endif - UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); - UVMHIST_LOG(maphist,"(map=%p,flags=0x%lx)", map, flags, 0, 0); - - KASSERT(map->flags & VM_MAP_PAGEABLE); - - vm_map_lock(map); + /* + * We now have srcmap and kernel_map locked. + * dstaddr contains the destination offset in dstmap. + */ /* - * handle wiring and unwiring separately. + * step 1: start looping through map entries, performing extraction. */ + for (entry = first; entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); + if (UVM_ET_ISHOLE(entry)) + continue; - if (flags == 0) { /* unwire */ /* - * POSIX 1003.1b -- munlockall unlocks all regions, - * regardless of how many times mlockall has been called. + * Calculate uvm_mapent_clone parameters. */ - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - if (VM_MAPENT_ISWIRED(entry)) - uvm_map_entry_unwire(map, entry); + cp_start = entry->start; + if (cp_start < start) { + cp_off = start - cp_start; + cp_start = start; + } else + cp_off = 0; + cp_len = MIN(entry->end, end) - cp_start; + + newentry = uvm_mapent_clone(kernel_map, + cp_start - start + dstaddr, cp_len, cp_off, + entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); + if (newentry == NULL) { + error = ENOMEM; + goto fail2_unmap; } - vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); - vm_map_unlock(map); - UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); - return (0); - - /* - * end of unwire case! - */ + kernel_map->size += cp_len; + if (flags & UVM_EXTRACT_FIXPROT) + newentry->protection = newentry->max_protection; } - if (flags & MCL_FUTURE) { + /* + * step 2: perform pmap copy. + */ + for (entry = first; entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { /* - * must wire all future mappings; remember this. + * Calculate uvm_mapent_clone parameters (again). */ - vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); - } + cp_start = entry->start; + if (cp_start < start) + cp_start = start; + cp_len = MIN(entry->end, end) - cp_start; - if ((flags & MCL_CURRENT) == 0) { - /* - * no more work to do! - */ - UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); - vm_map_unlock(map); - return (0); - } - - /* - * wire case: in three passes [XXXCDC: ugly block of code here] - * - * 1: holding the write lock, count all pages mapped by non-wired - * entries. if this would cause us to go over our limit, we fail. - * - * 2: still holding the write lock, we create any anonymous maps that - * need to be created. then we increment its wiring count. - * - * 3: we downgrade to a read lock, and call uvm_fault_wire to fault - * in the pages for any newly wired area (wired_count == 1). - * - * downgrading to a read lock for uvm_fault_wire avoids a possible - * deadlock with another thread that may have faulted on one of - * the pages to be wired (it would mark the page busy, blocking - * us, then in turn block on the map lock that we hold). because - * of problems in the recursive lock package, we cannot upgrade - * to a write lock in vm_map_lookup. thus, any actions that - * require the write lock must be done beforehand. because we - * keep the read lock on the map, the copy-on-write status of the - * entries we modify here cannot change. - */ - - for (size = 0, entry = map->header.next; entry != &map->header; - entry = entry->next) { - if (entry->protection != VM_PROT_NONE && - VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ - size += entry->end - entry->start; - } - } - - if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { - vm_map_unlock(map); - return (ENOMEM); /* XXX overloaded */ + pmap_copy(kernel_map->pmap, srcmap->pmap, + cp_start - start + dstaddr, cp_len, cp_start); } + pmap_update(kernel_map->pmap); - /* XXX non-pmap_wired_count case must be handled by caller */ -#ifdef pmap_wired_count - if (limit != 0 && - (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { - vm_map_unlock(map); - return (ENOMEM); /* XXX overloaded */ - } -#endif + error = 0; /* - * Pass 2. + * Unmap copied entries on failure. */ - - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - if (entry->protection == VM_PROT_NONE) - continue; - if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ - /* - * perform actions of vm_map_lookup that need the - * write lock on the map: create an anonymous map - * for a copy-on-write region, or an anonymous map - * for a zero-fill region. (XXXCDC: submap case - * ok?) - */ - if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ - if (UVM_ET_ISNEEDSCOPY(entry) && - ((entry->protection & VM_PROT_WRITE) || - (entry->object.uvm_obj == NULL))) { - amap_copy(map, entry, M_WAITOK, TRUE, - entry->start, entry->end); - /* XXXCDC: wait OK? */ - } - } - } - entry->wired_count++; +fail2_unmap: + if (error) { + uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, + FALSE, TRUE); } /* - * Pass 3. + * Release maps, release dead entries. */ +fail2: + vm_map_unlock(kernel_map); -#ifdef DIAGNOSTIC - timestamp_save = map->timestamp; -#endif - vm_map_busy(map); - vm_map_downgrade(map); - - for (error = 0, entry = map->header.next; - entry != &map->header && error == 0; - entry = entry->next) { - if (entry->wired_count == 1) { - error = uvm_fault_wire(map, entry->start, entry->end, - entry->protection); - } - } - - if (error) { /* failed? */ - /* - * Get back an exclusive (write) lock. - */ - vm_map_upgrade(map); - vm_map_unbusy(map); - -#ifdef DIAGNOSTIC - if (timestamp_save != map->timestamp) - panic("uvm_map_pageable_all: stale map"); -#endif - - /* - * first drop the wiring count on all the entries - * which haven't actually been wired yet. - * - * Skip VM_PROT_NONE entries like we did above. - */ - failed_entry = entry; - for (/* nothing */; entry != &map->header; - entry = entry->next) { - if (entry->protection == VM_PROT_NONE) - continue; - entry->wired_count--; - } - - /* - * now, unwire all the entries that were successfully - * wired above. - * - * Skip VM_PROT_NONE entries like we did above. - */ - for (entry = map->header.next; entry != failed_entry; - entry = entry->next) { - if (entry->protection == VM_PROT_NONE) - continue; - entry->wired_count--; - if (VM_MAPENT_ISWIRED(entry)) - uvm_map_entry_unwire(map, entry); - } - vm_map_unlock(map); - UVMHIST_LOG(maphist,"<- done (RV=%ld)", error,0,0,0); - return (error); - } +fail: + vm_map_unlock(srcmap); - /* We are holding a read lock here. */ - vm_map_unbusy(map); - vm_map_unlock_read(map); + uvm_unmap_detach(&dead, 0); - UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); - return (0); + return error; } /* @@ -3070,74 +4100,73 @@ int amap_clean_works = 1; /* XXX for now, just in case... */ int uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) { - struct vm_map_entry *current, *entry; - struct uvm_object *uobj; + struct vm_map_entry *first, *entry; struct vm_amap *amap; struct vm_anon *anon; struct vm_page *pg; - vaddr_t offset; - vsize_t size; - int rv, error, refs; + struct uvm_object *uobj; + vaddr_t cp_start, cp_end; + int refs; + int error; + boolean_t rv; + UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,flags=0x%lx)", - map, start, end, flags); + map, start, end, flags); KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != - (PGO_FREE|PGO_DEACTIVATE)); + (PGO_FREE|PGO_DEACTIVATE)); vm_map_lock_read(map); VM_MAP_RANGE_CHECK(map, start, end); - if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { - vm_map_unlock_read(map); - return (EFAULT); - } + first = uvm_map_entrybyaddr(&map->addr, start); /* * Make a first pass to check for holes. */ - - for (current = entry; current->start < end; current = current->next) { - if (UVM_ET_ISSUBMAP(current)) { + for (entry = first; entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + if (UVM_ET_ISSUBMAP(entry)) { vm_map_unlock_read(map); - return (EINVAL); + return EINVAL; } - if (end > current->end && (current->next == &map->header || - current->end != current->next->start)) { + if (UVM_ET_ISSUBMAP(entry) || + UVM_ET_ISHOLE(entry) || + (entry->end < end && FREE_END(entry) != entry->end)) { vm_map_unlock_read(map); - return (EFAULT); + return EFAULT; } } error = 0; - - for (current = entry; current->start < end; current = current->next) { - amap = current->aref.ar_amap; /* top layer */ - uobj = current->object.uvm_obj; /* bottom layer */ - KASSERT(start >= current->start); + for (entry = first; entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + amap = entry->aref.ar_amap; /* top layer */ + if (UVM_ET_ISOBJ(entry)) + uobj = entry->object.uvm_obj; + else + uobj = NULL; /* * No amap cleaning necessary if: - * - * (1) There's no amap. - * - * (2) We're not deactivating or freeing pages. + * - there's no amap + * - we're not deactivating or freeing pages. */ - if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) goto flush_object; - - /* XXX for now, just in case... */ - if (amap_clean_works == 0) + if (!amap_clean_works) goto flush_object; - offset = start - current->start; - size = MIN(end, current->end) - start; - for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { - anon = amap_lookup(¤t->aref, offset); + cp_start = MAX(entry->start, start); + cp_end = MIN(entry->end, end); + + for (; cp_start != cp_end; cp_start += PAGE_SIZE) { + anon = amap_lookup(&entry->aref, + cp_start - entry->start); if (anon == NULL) continue; - simple_lock(&anon->an_lock); + simple_lock(&anon->an_lock); /* XXX */ pg = anon->an_page; if (pg == NULL) { @@ -3146,23 +4175,21 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) } switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { - /* * XXX In these first 3 cases, we always just * XXX deactivate the page. We may want to * XXX handle the different cases more * XXX specifically, in the future. */ - case PGO_CLEANIT|PGO_FREE: case PGO_CLEANIT|PGO_DEACTIVATE: case PGO_DEACTIVATE: - deactivate_it: +deactivate_it: /* skip the page if it's loaned or wired */ if (pg->loan_count != 0 || pg->wire_count != 0) { simple_unlock(&anon->an_lock); - continue; + break; } uvm_lock_pageq(); @@ -3172,51 +4199,45 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) * by the anon (may simply be loaned to the * anon). */ - if ((pg->pg_flags & PQ_ANON) == 0) { KASSERT(pg->uobject == NULL); uvm_unlock_pageq(); simple_unlock(&anon->an_lock); - continue; + break; } KASSERT(pg->uanon == anon); -#ifdef UBC - /* ...and deactivate the page. */ - pmap_clear_reference(pg); -#else /* zap all mappings for the page. */ pmap_page_protect(pg, VM_PROT_NONE); /* ...and deactivate the page. */ -#endif uvm_pagedeactivate(pg); uvm_unlock_pageq(); simple_unlock(&anon->an_lock); - continue; + break; case PGO_FREE: /* - * If there are multiple references to + * If there are mutliple references to * the amap, just deactivate the page. */ - if (amap_refs(amap) > 1) goto deactivate_it; /* XXX skip the page if it's wired */ if (pg->wire_count != 0) { simple_unlock(&anon->an_lock); - continue; + break; } - amap_unadd(¤t->aref, offset); + amap_unadd(&entry->aref, + cp_start - entry->start); refs = --anon->an_ref; simple_unlock(&anon->an_lock); if (refs == 0) uvm_anfree(anon); - continue; + break; default: panic("uvm_map_clean: weird flags"); @@ -3224,827 +4245,677 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) } flush_object: + cp_start = MAX(entry->start, start); + cp_end = MIN(entry->end, end); + /* * flush pages if we've got a valid backing object. * * Don't PGO_FREE if we don't have write permission - * and don't flush if this is a copy-on-write object + * and don't flush if this is a copy-on-write object * since we can't know our permissions on it. */ - - offset = current->offset + (start - current->start); - size = MIN(end, current->end) - start; if (uobj != NULL && ((flags & PGO_FREE) == 0 || ((entry->max_protection & VM_PROT_WRITE) != 0 && (entry->etype & UVM_ET_COPYONWRITE) == 0))) { simple_lock(&uobj->vmobjlock); - rv = uobj->pgops->pgo_flush(uobj, offset, - offset + size, flags); + rv = uobj->pgops->pgo_flush(uobj, + cp_start - entry->start + entry->offset, + cp_end - entry->start + entry->offset, flags); simple_unlock(&uobj->vmobjlock); if (rv == FALSE) error = EFAULT; } - start += size; } + vm_map_unlock_read(map); - return (error); + return error; } - /* - * uvm_map_checkprot: check protection in map - * - * => must allow specified protection in a fully allocated region. - * => map must be read or write locked by caller. + * UVM_MAP_CLIP_END implementation */ - -boolean_t -uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, - vm_prot_t protection) +void +uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) { - struct vm_map_entry *entry; - struct vm_map_entry *tmp_entry; - - if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { - return(FALSE); - } - entry = tmp_entry; - while (start < end) { - if (entry == &map->header) { - return(FALSE); - } - - /* - * no holes allowed - */ + struct vm_map_entry *tmp; - if (start < entry->start) { - return(FALSE); - } + KASSERT(entry->start < addr && FREE_END(entry) > addr); + tmp = uvm_mapent_alloc(map, 0); - /* - * check protection associated with entry - */ + /* + * Invoke splitentry. + */ + uvm_map_splitentry(map, entry, tmp, addr); +} - if ((entry->protection & protection) != protection) { - return(FALSE); - } +/* + * UVM_MAP_CLIP_START implementation + * + * Clippers are required to not change the pointers to the entry they are + * clipping on. + * Since uvm_map_splitentry turns the original entry into the lowest + * entry (address wise) we do a swap between the new entry and the original + * entry, prior to calling uvm_map_splitentry. + */ +void +uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) +{ + struct vm_map_entry *tmp; + struct uvm_map_free *free; - /* go to next entry */ + /* + * Copy entry. + */ + KASSERT(entry->start < addr && FREE_END(entry) > addr); + tmp = uvm_mapent_alloc(map, 0); + uvm_mapent_copy(entry, tmp); - start = entry->end; - entry = entry->next; + /* + * Put new entry in place of original entry. + */ + free = UVM_FREE(map, FREE_START(entry)); + uvm_mapent_addr_remove(map, entry); + if (entry->fspace > 0 && free) { + uvm_mapent_free_remove(map, free, entry); + uvm_mapent_free_insert(map, free, tmp); } - return(TRUE); + uvm_mapent_addr_insert(map, tmp); + + /* + * Invoke splitentry. + */ + uvm_map_splitentry(map, tmp, entry, addr); } /* - * uvmspace_alloc: allocate a vmspace structure. - * - * - structure includes vm_map and pmap - * - XXX: no locking on this structure - * - refcnt set to 1, rest must be init'd by caller + * Boundary fixer. */ -struct vmspace * -uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, - boolean_t remove_holes) +static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); +static __inline vaddr_t +uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) { - struct vmspace *vm; - UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); - - vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); - uvmspace_init(vm, NULL, min, max, pageable, remove_holes); - UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0); - return (vm); + return (min < bound && max > bound) ? bound : max; } /* - * uvmspace_init: initialize a vmspace structure. - * - * - XXX: no locking on this structure - * - refcnt set to 1, rest must be init'd by caller + * Choose free list based on address at start of free space. */ -void -uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, - boolean_t pageable, boolean_t remove_holes) +struct uvm_map_free* +uvm_free(struct vm_map *map, vaddr_t addr) { - UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); - - uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); + /* Special case the first page, to prevent mmap from returning 0. */ + if (addr < PAGE_SIZE) + return NULL; - if (pmap) - pmap_reference(pmap); - else - pmap = pmap_create(); - vm->vm_map.pmap = pmap; - - vm->vm_refcnt = 1; - - if (remove_holes) - pmap_remove_holes(&vm->vm_map); - - UVMHIST_LOG(maphist,"<- done",0,0,0,0); + if ((map->flags & VM_MAP_ISVMSPACE) == 0) { + if (addr >= uvm_maxkaddr) + return NULL; + } else { + /* addr falls within brk() area. */ + if (addr >= map->b_start && addr < map->b_end) + return &map->bfree; + /* addr falls within stack area. */ + if (addr >= map->s_start && addr < map->s_end) + return &map->bfree; + } + return &map->free; } /* - * uvmspace_share: share a vmspace between two proceses - * - * - XXX: no locking on vmspace - * - used for vfork, threads(?) + * Returns the first free-memory boundary that is crossed by [min-max]. */ - -void -uvmspace_share(p1, p2) - struct proc *p1, *p2; +vsize_t +uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) { - p2->p_vmspace = p1->p_vmspace; - p1->p_vmspace->vm_refcnt++; + /* Treat the first page special, mmap returning 0 breaks too much. */ + max = uvm_map_boundfix(min, max, PAGE_SIZE); + + if ((map->flags & VM_MAP_ISVMSPACE) == 0) { + max = uvm_map_boundfix(min, max, uvm_maxkaddr); + } else { + max = uvm_map_boundfix(min, max, map->b_start); + max = uvm_map_boundfix(min, max, map->b_end); + max = uvm_map_boundfix(min, max, map->s_start); + max = uvm_map_boundfix(min, max, map->s_end); + } + return max; } /* - * uvmspace_exec: the process wants to exec a new program - * - * - XXX: no locking on vmspace + * Update map allocation start and end addresses from proc vmspace. */ - void -uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) +uvm_map_vmspace_update(struct vm_map *map, + struct uvm_map_deadq *dead, int flags) { - struct vmspace *nvm, *ovm = p->p_vmspace; - struct vm_map *map = &ovm->vm_map; + struct vmspace *vm; + vaddr_t b_start, b_end, s_start, s_end; - pmap_unuse_final(p); /* before stack addresses go away */ + KASSERT(map->flags & VM_MAP_ISVMSPACE); + KASSERT(offsetof(struct vmspace, vm_map) == 0); /* - * see if more than one process is using this vmspace... + * Derive actual allocation boundaries from vmspace. */ - - if (ovm->vm_refcnt == 1) { - - /* - * if p is the only process using its vmspace then we can safely - * recycle that vmspace for the program that is being exec'd. - */ - -#ifdef SYSVSHM - /* - * SYSV SHM semantics require us to kill all segments on an exec - */ - if (ovm->vm_shm) - shmexit(ovm); + vm = (struct vmspace *)map; + b_start = (vaddr_t)vm->vm_daddr; + b_end = b_start + BRKSIZ; + s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); + s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); +#ifdef DIAGNOSTIC + if ((b_start & PAGE_MASK) != 0 || (b_end & PAGE_MASK) != 0 || + (s_start & PAGE_MASK) != 0 || (s_end & PAGE_MASK) != 0) { + panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " + "b=0x%lx-0x%lx s=0x%lx-0x%lx", + vm, b_start, b_end, s_start, s_end); + } #endif - /* - * POSIX 1003.1b -- "lock future mappings" is revoked - * when a process execs another program image. - */ - vm_map_lock(map); - vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); - vm_map_unlock(map); - - /* - * now unmap the old program - */ - uvm_unmap(map, map->min_offset, map->max_offset); - - /* - * but keep MMU holes unavailable - */ - pmap_remove_holes(map); - - /* - * resize the map - */ - vm_map_lock(map); - map->min_offset = start; - uvm_tree_sanity(map, "resize enter"); - map->max_offset = end; - if (map->header.prev != &map->header) - uvm_rb_fixup(map, map->header.prev); - uvm_tree_sanity(map, "resize leave"); - vm_map_unlock(map); - - - } else { - - /* - * p's vmspace is being shared, so we can't reuse it for p since - * it is still being used for others. allocate a new vmspace - * for p - */ - nvm = uvmspace_alloc(start, end, - (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); - - /* - * install new vmspace and drop our ref to the old one. - */ - - pmap_deactivate(p); - p->p_vmspace = nvm; - pmap_activate(p); + if (__predict_true(map->b_start == b_start && map->b_end == b_end && + map->s_start == s_start && map->s_end == s_end)) + return; - uvmspace_free(ovm); - } + uvm_map_freelist_update(map, dead, b_start, b_end, + s_start, s_end, flags); } /* - * uvmspace_free: free a vmspace data structure + * Grow kernel memory. * - * - XXX: no locking on vmspace + * This function is only called for kernel maps when an allocation fails. + * + * If the map has a gap that is large enough to accomodate alloc_sz, this + * function will make sure map->free will include it. */ - void -uvmspace_free(struct vmspace *vm) +uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, + vsize_t alloc_sz, int flags) { - struct vm_map_entry *dead_entries; - UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); + vsize_t sz; + vaddr_t end; + struct vm_map_entry *entry; - UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0); - if (--vm->vm_refcnt == 0) { - /* - * lock the map, to wait out all other references to it. delete - * all of the mappings and pages they hold, then call the pmap - * module to reclaim anything left. - */ -#ifdef SYSVSHM - /* Get rid of any SYSV shared memory segments. */ - if (vm->vm_shm != NULL) - shmexit(vm); + /* Kernel memory only. */ + KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); + /* Destroy free list. */ + uvm_map_freelist_update_clear(map, dead); + + /* + * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. + * + * Don't handle the case where the multiplication overflows: + * if that happens, the allocation is probably too big anyway. + */ + sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); + + /* + * Include the guard page in the hard minimum requirement of alloc_sz. + */ + if (map->flags & VM_MAP_GUARDPAGES) + alloc_sz += PAGE_SIZE; + + /* + * Walk forward until a gap large enough for alloc_sz shows up. + * + * We assume the kernel map has no boundaries. + * uvm_maxkaddr may be zero. + */ + end = MAX(uvm_maxkaddr, map->min_offset); + entry = uvm_map_entrybyaddr(&map->addr, end); + while (entry && entry->fspace < alloc_sz) + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + if (entry) { + end = MAX(FREE_START(entry), end); + end += MIN(sz, map->max_offset - end); + } else + end = map->max_offset; + + /* Reserve pmap entries. */ +#ifdef PMAP_GROWKERNEL + uvm_maxkaddr = pmap_growkernel(end); +#else + uvm_maxkaddr = end; #endif - vm_map_lock(&vm->vm_map); - if (vm->vm_map.nentries) { - uvm_unmap_remove(&vm->vm_map, - vm->vm_map.min_offset, vm->vm_map.max_offset, - &dead_entries, NULL, TRUE); - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, 0); - } - pmap_destroy(vm->vm_map.pmap); - vm->vm_map.pmap = NULL; - pool_put(&uvm_vmspace_pool, vm); - } - UVMHIST_LOG(maphist,"<- done", 0,0,0,0); + /* Rebuild free list. */ + uvm_map_freelist_update_refill(map, flags); } /* - * uvm_map_create: create map + * Freelist update subfunction: unlink all entries from freelists. */ -vm_map_t -uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) +void +uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) { - vm_map_t result; - - result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); - uvm_map_setup(result, min, max, flags); - result->pmap = pmap; - return(result); + struct uvm_map_free *free; + struct vm_map_entry *entry, *prev, *next; + + prev = NULL; + for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL; + entry = next) { + next = RB_NEXT(uvm_map_addr, &map->addr, entry); + + free = UVM_FREE(map, FREE_START(entry)); + if (entry->fspace > 0 && free) + uvm_mapent_free_remove(map, free, entry); + + if (prev != NULL && entry->start == entry->end) { + prev->fspace += FREE_END(entry) - entry->end; + uvm_mapent_addr_remove(map, entry); + DEAD_ENTRY_PUSH(dead, entry); + } else + prev = entry; + } } /* - * uvm_map_setup: init map - * - * => map must not be in service yet. + * Freelist update subfunction: refill the freelists with entries. */ void -uvm_map_setup(vm_map_t map, vaddr_t min, vaddr_t max, int flags) +uvm_map_freelist_update_refill(struct vm_map *map, int flags) { + struct vm_map_entry *entry; + vaddr_t min, max; - RB_INIT(&map->rbhead); - map->header.next = map->header.prev = &map->header; - map->nentries = 0; - map->size = 0; - map->ref_count = 1; - map->min_offset = min; - map->max_offset = max; - map->flags = flags; - map->first_free = &map->header; - map->hint = &map->header; - map->timestamp = 0; - rw_init(&map->lock, "vmmaplk"); - simple_lock_init(&map->ref_lock); - simple_lock_init(&map->hint_lock); -} - + RB_FOREACH(entry, uvm_map_addr, &map->addr) { + min = FREE_START(entry); + max = FREE_END(entry); + entry->fspace = 0; + entry = uvm_map_fix_space(map, entry, min, max, flags); + } -/* - * uvm_map_reference: add reference to a map - * - * => map need not be locked (we use ref_lock). - */ -void -uvm_map_reference(vm_map_t map) -{ - simple_lock(&map->ref_lock); - map->ref_count++; - simple_unlock(&map->ref_lock); + uvm_tree_sanity(map, __FILE__, __LINE__); } /* - * uvm_map_deallocate: drop reference to a map - * - * => caller must not lock map - * => we will zap map if ref count goes to zero + * Change {a,b}_{start,end} allocation ranges and associated free lists. */ void -uvm_map_deallocate(vm_map_t map) +uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, + vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) { - int c; + KDASSERT(b_end >= b_start && s_end >= s_start); - simple_lock(&map->ref_lock); - c = --map->ref_count; - simple_unlock(&map->ref_lock); - if (c > 0) { - return; - } + /* Clear all free lists. */ + uvm_map_freelist_update_clear(map, dead); - /* - * all references gone. unmap and free. - */ + /* Apply new bounds. */ + map->b_start = b_start; + map->b_end = b_end; + map->s_start = s_start; + map->s_end = s_end; - uvm_unmap(map, map->min_offset, map->max_offset); - pmap_destroy(map->pmap); - free(map, M_VMMAP); + /* Refill free lists. */ + uvm_map_freelist_update_refill(map, flags); } /* - * F O R K - m a i n e n t r y p o i n t - */ -/* - * uvmspace_fork: fork a process' main map - * - * => create a new vmspace for child process from parent. - * => parent's map must not be locked. + * Correct space insert. */ - -struct vmspace * -uvmspace_fork(struct vmspace *vm1) +struct vm_map_entry* +uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, + vaddr_t min, vaddr_t max, int flags) { - struct vmspace *vm2; - struct vm_map *old_map = &vm1->vm_map; - struct vm_map *new_map; - struct vm_map_entry *old_entry; - struct vm_map_entry *new_entry; - pmap_t new_pmap; - boolean_t protect_child; - UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); - - vm_map_lock(old_map); - - vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, - (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); - memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, - (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); - new_map = &vm2->vm_map; /* XXX */ - new_pmap = new_map->pmap; - - old_entry = old_map->header.next; - - /* - * go entry-by-entry - */ - - while (old_entry != &old_map->header) { + struct uvm_map_free *free; + vaddr_t lmax; + KDASSERT(min <= max); + KDASSERT((entry != NULL && FREE_END(entry) == min) || + min == map->min_offset); + while (min != max) { /* - * first, some sanity checks on the old entry + * Claim guard page for entry. */ - if (UVM_ET_ISSUBMAP(old_entry)) - panic("fork: encountered a submap during fork (illegal)"); - - if (!UVM_ET_ISCOPYONWRITE(old_entry) && - UVM_ET_ISNEEDSCOPY(old_entry)) - panic("fork: non-copy_on_write map entry marked needs_copy (illegal)"); - - - switch (old_entry->inheritance) { - case MAP_INHERIT_NONE: - /* - * drop the mapping - */ - break; - - case MAP_INHERIT_SHARE: - /* - * share the mapping: this means we want the old and - * new entries to share amaps and backing objects. - */ - - /* - * if the old_entry needs a new amap (due to prev fork) - * then we need to allocate it now so that we have - * something we own to share with the new_entry. [in - * other words, we need to clear needs_copy] - */ - - if (UVM_ET_ISNEEDSCOPY(old_entry)) { - /* get our own amap, clears needs_copy */ - amap_copy(old_map, old_entry, M_WAITOK, FALSE, - 0, 0); - /* XXXCDC: WAITOK??? */ + if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && + FREE_END(entry) == entry->end && + entry->start != entry->end) { + if (max - min == 2 * PAGE_SIZE) { + /* + * If the free-space gap is exactly 2 pages, + * we make the guard 2 pages instead of 1. + * Because in a guarded map, an area needs + * at least 2 pages to allocate from: + * one page for the allocation and one for + * the guard. + */ + entry->guard = 2 * PAGE_SIZE; + min = max; + } else { + entry->guard = PAGE_SIZE; + min += PAGE_SIZE; } + continue; + } - new_entry = uvm_mapent_alloc(new_map, 0); - /* old_entry -> new_entry */ - uvm_mapent_copy(old_entry, new_entry); + /* + * Handle the case where entry has a 2-page guard, but the + * space after entry is freed. + */ + if (entry != NULL && entry->fspace == 0 && + entry->guard > PAGE_SIZE) { + entry->guard = PAGE_SIZE; + min = FREE_START(entry); + } - /* new pmap has nothing wired in it */ - new_entry->wired_count = 0; + lmax = uvm_map_boundary(map, min, max); + free = UVM_FREE(map, min); - /* - * gain reference to object backing the map (can't - * be a submap, already checked this case). - */ - if (new_entry->aref.ar_amap) - /* share reference */ - uvm_map_reference_amap(new_entry, AMAP_SHARED); - - if (new_entry->object.uvm_obj && - new_entry->object.uvm_obj->pgops->pgo_reference) - new_entry->object.uvm_obj-> - pgops->pgo_reference( - new_entry->object.uvm_obj); - - /* insert entry at end of new_map's entry list */ - uvm_map_entry_link(new_map, new_map->header.prev, - new_entry); - - /* - * pmap_copy the mappings: this routine is optional - * but if it is there it will reduce the number of - * page faults in the new proc. - */ + if (entry != NULL && free == UVM_FREE(map, FREE_START(entry))) { + KDASSERT(FREE_END(entry) == min); + if (entry->fspace > 0 && free != NULL) + uvm_mapent_free_remove(map, free, entry); + entry->fspace += lmax - min; + } else { + entry = uvm_mapent_alloc(map, flags); + KDASSERT(entry != NULL); + entry->end = entry->start = min; + entry->guard = 0; + entry->fspace = lmax - min; + entry->object.uvm_obj = NULL; + entry->offset = 0; + entry->etype = 0; + entry->protection = entry->max_protection = 0; + entry->inheritance = 0; + entry->wired_count = 0; + entry->advice = 0; + entry->aref.ar_pageoff = 0; + entry->aref.ar_amap = NULL; + uvm_mapent_addr_insert(map, entry); + } - pmap_copy(new_pmap, old_map->pmap, new_entry->start, - (old_entry->end - old_entry->start), - old_entry->start); + if (free) + uvm_mapent_free_insert(map, free, entry); - break; + min = lmax; + } - case MAP_INHERIT_COPY: + return entry; +} - /* - * copy-on-write the mapping (using mmap's - * MAP_PRIVATE semantics) - * - * allocate new_entry, adjust reference counts. - * (note that new references are read-only). - */ +/* + * MQuery style of allocation. + * + * This allocator searches forward until sufficient space is found to map + * the given size. + * + * XXX: factor in offset (via pmap_prefer) and protection? + */ +int +uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, + int flags) +{ + struct vm_map_entry *entry, *last; + vaddr_t addr; +#ifdef PMAP_PREFER + vaddr_t tmp; +#endif + int error; - new_entry = uvm_mapent_alloc(new_map, 0); - /* old_entry -> new_entry */ - uvm_mapent_copy(old_entry, new_entry); + addr = *addr_p; + vm_map_lock_read(map); - if (new_entry->aref.ar_amap) - uvm_map_reference_amap(new_entry, 0); +#ifdef PMAP_PREFER + if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) + addr = PMAP_PREFER(offset, addr); +#endif - if (new_entry->object.uvm_obj && - new_entry->object.uvm_obj->pgops->pgo_reference) - new_entry->object.uvm_obj->pgops->pgo_reference - (new_entry->object.uvm_obj); + /* + * First, check if the requested range is fully available. + */ + entry = uvm_map_entrybyaddr(&map->addr, addr); + last = NULL; + if (uvm_map_isavail(&map->addr, &entry, &last, addr, sz)) { + error = 0; + goto out; + } + if (flags & UVM_FLAG_FIXED) { + error = EINVAL; + goto out; + } - /* new pmap has nothing wired in it */ - new_entry->wired_count = 0; + error = ENOMEM; /* Default error from here. */ - new_entry->etype |= - (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); - uvm_map_entry_link(new_map, new_map->header.prev, - new_entry); + /* + * At this point, the memory at <addr, sz> is not available. + * The reasons are: + * [1] it's outside the map, + * [2] it starts in used memory (and therefore needs to move + * toward the first free page in entry), + * [3] it starts in free memory but bumps into used memory. + * + * Note that for case [2], the forward moving is handled by the + * for loop below. + */ - /* - * the new entry will need an amap. it will either - * need to be copied from the old entry or created - * from scratch (if the old entry does not have an - * amap). can we defer this process until later - * (by setting "needs_copy") or do we need to copy - * the amap now? - * - * we must copy the amap now if any of the following - * conditions hold: - * 1. the old entry has an amap and that amap is - * being shared. this means that the old (parent) - * process is sharing the amap with another - * process. if we do not clear needs_copy here - * we will end up in a situation where both the - * parent and child process are referring to the - * same amap with "needs_copy" set. if the - * parent write-faults, the fault routine will - * clear "needs_copy" in the parent by allocating - * a new amap. this is wrong because the - * parent is supposed to be sharing the old amap - * and the new amap will break that. - * - * 2. if the old entry has an amap and a non-zero - * wire count then we are going to have to call - * amap_cow_now to avoid page faults in the - * parent process. since amap_cow_now requires - * "needs_copy" to be clear we might as well - * clear it here as well. - * - */ + if (entry == NULL) { + /* [1] Outside the map. */ + if (addr >= map->max_offset) + goto out; + else + entry = RB_MIN(uvm_map_addr, &map->addr); + } else if (FREE_START(entry) <= addr) { + /* [3] Bumped into used memory. */ + entry = RB_NEXT(uvm_map_addr, &map->addr, entry); + } - if (old_entry->aref.ar_amap != NULL) { + /* + * Test if the next entry is sufficient for the allocation. + */ + for (; entry != NULL; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { + if (entry->fspace == 0) + continue; + addr = FREE_START(entry); - if ((amap_flags(old_entry->aref.ar_amap) & - AMAP_SHARED) != 0 || - VM_MAPENT_ISWIRED(old_entry)) { +restart: /* Restart address checks on address change. */ - amap_copy(new_map, new_entry, M_WAITOK, FALSE, - 0, 0); - /* XXXCDC: M_WAITOK ... ok? */ - } +#ifdef PMAP_PREFER + if (offset != UVM_UNKNOWN_OFFSET) { + tmp = (addr & ~(PMAP_PREFER_ALIGN() - 1)) | + PMAP_PREFER_OFFSET(offset); + if (tmp < addr) + tmp += PMAP_PREFER_ALIGN(); + if (addr >= FREE_END(entry)) + continue; + if (addr != tmp) { + addr = tmp; + goto restart; } + } +#endif - /* - * if the parent's entry is wired down, then the - * parent process does not want page faults on - * access to that memory. this means that we - * cannot do copy-on-write because we can't write - * protect the old entry. in this case we - * resolve all copy-on-write faults now, using - * amap_cow_now. note that we have already - * allocated any needed amap (above). - */ - - if (VM_MAPENT_ISWIRED(old_entry)) { - - /* - * resolve all copy-on-write faults now - * (note that there is nothing to do if - * the old mapping does not have an amap). - * XXX: is it worthwhile to bother with pmap_copy - * in this case? - */ - if (old_entry->aref.ar_amap) - amap_cow_now(new_map, new_entry); - - } else { - - /* - * setup mappings to trigger copy-on-write faults - * we must write-protect the parent if it has - * an amap and it is not already "needs_copy"... - * if it is already "needs_copy" then the parent - * has already been write-protected by a previous - * fork operation. - * - * if we do not write-protect the parent, then - * we must be sure to write-protect the child - * after the pmap_copy() operation. - * - * XXX: pmap_copy should have some way of telling - * us that it didn't do anything so we can avoid - * calling pmap_protect needlessly. - */ - - if (old_entry->aref.ar_amap) { - - if (!UVM_ET_ISNEEDSCOPY(old_entry)) { - if (old_entry->max_protection & VM_PROT_WRITE) { - pmap_protect(old_map->pmap, - old_entry->start, - old_entry->end, - old_entry->protection & - ~VM_PROT_WRITE); - pmap_update(old_map->pmap); - - } - old_entry->etype |= UVM_ET_NEEDSCOPY; - } - - /* - * parent must now be write-protected - */ - protect_child = FALSE; - } else { - - /* - * we only need to protect the child if the - * parent has write access. - */ - if (old_entry->max_protection & VM_PROT_WRITE) - protect_child = TRUE; - else - protect_child = FALSE; - - } - - /* - * copy the mappings - * XXX: need a way to tell if this does anything - */ - - pmap_copy(new_pmap, old_map->pmap, - new_entry->start, - (old_entry->end - old_entry->start), - old_entry->start); - - /* - * protect the child's mappings if necessary - */ - if (protect_child) { - pmap_protect(new_pmap, new_entry->start, - new_entry->end, - new_entry->protection & - ~VM_PROT_WRITE); - } + /* + * Skip brk() allocation addresses. + */ + if (addr + sz > map->b_start && addr < map->b_end) { + if (FREE_END(entry) > map->b_end) { + addr = map->b_end; + goto restart; + } else + continue; + } + /* + * Skip stack allocation addresses. + */ + if (addr + sz > map->s_start && addr < map->s_end) { + if (FREE_END(entry) > map->s_end) { + addr = map->s_end; + goto restart; + } else + continue; + } - } - break; - } /* end of switch statement */ - old_entry = old_entry->next; + last = NULL; + if (uvm_map_isavail(&map->addr, &entry, &last, addr, sz)) { + error = 0; + goto out; + } } - new_map->size = old_map->size; - vm_map_unlock(old_map); - -#ifdef SYSVSHM - if (vm1->vm_shm) - shmfork(vm1, vm2); -#endif - -#ifdef PMAP_FORK - pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); -#endif - - UVMHIST_LOG(maphist,"<- done",0,0,0,0); - return(vm2); +out: + vm_map_unlock_read(map); + if (error == 0) + *addr_p = addr; + return error; } -#if defined(DDB) - /* - * DDB hooks - */ - -/* - * uvm_map_printit: actually prints the map + * Determine allocation bias. + * + * Returns 1 if we should bias to high addresses, -1 for a bias towards low + * addresses, or 0 for no bias. + * The bias mechanism is intended to avoid clashing with brk() and stack + * areas. */ - -void -uvm_map_printit(struct vm_map *map, boolean_t full, - int (*pr)(const char *, ...)) +int +uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) { - struct vm_map_entry *entry; + vaddr_t start, end; - (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); - (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n", - map->nentries, map->size, map->ref_count, map->timestamp, - map->flags); -#ifdef pmap_resident_count - (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, - pmap_resident_count(map->pmap)); + start = FREE_START(entry); + end = FREE_END(entry); + + /* + * Stay at the top of brk() area. + */ + if (end >= map->b_start && start < map->b_end) + return 1; + /* + * Stay at the far end of the stack area. + */ + if (end >= map->s_start && start < map->s_end) { +#ifdef MACHINE_STACK_GROWS_UP + return 1; #else - /* XXXCDC: this should be required ... */ - (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); + return -1; #endif - if (!full) - return; - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", - entry, entry->start, entry->end, entry->object.uvm_obj, - (long long)entry->offset, entry->aref.ar_amap, - entry->aref.ar_pageoff); - (*pr)( - "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " - "wc=%d, adv=%d\n", - (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', - (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', - (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', - entry->protection, entry->max_protection, - entry->inheritance, entry->wired_count, entry->advice); } -} -/* - * uvm_object_printit: actually prints the object - */ + /* + * No bias, this area is meant for us. + */ + return 0; +} -void -uvm_object_printit(uobj, full, pr) - struct uvm_object *uobj; - boolean_t full; - int (*pr)(const char *, ...); -{ - struct vm_page *pg; - int cnt = 0; - (*pr)("OBJECT %p: pgops=%p, npages=%d, ", - uobj, uobj->pgops, uobj->uo_npages); - if (UVM_OBJ_IS_KERN_OBJECT(uobj)) - (*pr)("refs=<SYSTEM>\n"); - else - (*pr)("refs=%d\n", uobj->uo_refs); +boolean_t +vm_map_lock_try_ln(struct vm_map *map, char *file, int line) +{ + boolean_t rv; - if (!full) { - return; - } - (*pr)(" PAGES <pg,offset>:\n "); - RB_FOREACH(pg, uvm_objtree, &uobj->memt) { - (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); - if ((cnt % 3) == 2) { - (*pr)("\n "); + if (map->flags & VM_MAP_INTRSAFE) { + rv = TRUE; + } else { + if (map->flags & VM_MAP_BUSY) { + return (FALSE); } - cnt++; + rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); } - if ((cnt % 3) != 2) { - (*pr)("\n"); + + if (rv) { + map->timestamp++; + LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); } -} -/* - * uvm_page_printit: actually print the page - */ + return (rv); +} -static const char page_flagbits[] = - "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" - "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" - "\31PMAP1\32PMAP2\33PMAP3"; +void +vm_map_lock_ln(struct vm_map *map, char *file, int line) +{ + if ((map->flags & VM_MAP_INTRSAFE) == 0) { + do { + while (map->flags & VM_MAP_BUSY) { + map->flags |= VM_MAP_WANTLOCK; + tsleep(&map->flags, PVM, (char *)vmmapbsy, 0); + } + } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); + } + + map->timestamp++; + LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); +} void -uvm_page_printit(pg, full, pr) - struct vm_page *pg; - boolean_t full; - int (*pr)(const char *, ...); +vm_map_lock_read_ln(struct vm_map *map, char *file, int line) { - struct vm_page *tpg; - struct uvm_object *uobj; - struct pglist *pgl; + if ((map->flags & VM_MAP_INTRSAFE) == 0) + rw_enter_read(&map->lock); + LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); +} - (*pr)("PAGE %p:\n", pg); - (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", - pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, - (long long)pg->phys_addr); - (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", - pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); -#if defined(UVM_PAGE_TRKOWN) - if (pg->pg_flags & PG_BUSY) - (*pr)(" owning process = %d, tag=%s\n", - pg->owner, pg->owner_tag); - else - (*pr)(" page not busy, no owner\n"); -#else - (*pr)(" [page ownership tracking disabled]\n"); -#endif +void +vm_map_unlock_ln(struct vm_map *map, char *file, int line) +{ + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); + LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); + if ((map->flags & VM_MAP_INTRSAFE) == 0) + rw_exit(&map->lock); +} - if (!full) - return; +void +vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) +{ + /* XXX: RO */ uvm_tree_sanity(map, file, line); + /* XXX: RO */ uvm_tree_size_chk(map, file, line); + LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); + if ((map->flags & VM_MAP_INTRSAFE) == 0) + rw_exit_read(&map->lock); +} - /* cross-verify object/anon */ - if ((pg->pg_flags & PQ_FREE) == 0) { - if (pg->pg_flags & PQ_ANON) { - if (pg->uanon == NULL || pg->uanon->an_page != pg) - (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", - (pg->uanon) ? pg->uanon->an_page : NULL); - else - (*pr)(" anon backpointer is OK\n"); - } else { - uobj = pg->uobject; - if (uobj) { - (*pr)(" checking object list\n"); - RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { - if (tpg == pg) { - break; - } - } - if (tpg) - (*pr)(" page found on object list\n"); - else - (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); - } - } - } +void +vm_map_downgrade_ln(struct vm_map *map, char *file, int line) +{ + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); + LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); + LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); + if ((map->flags & VM_MAP_INTRSAFE) == 0) + rw_enter(&map->lock, RW_DOWNGRADE); +} - /* cross-verify page queue */ - if (pg->pg_flags & PQ_FREE) { - if (uvm_pmr_isfree(pg)) - printf(" page found in uvm_pmemrange\n"); - else - printf(" >>> page not found in uvm_pmemrange <<<\n"); - pgl = NULL; - } else if (pg->pg_flags & PQ_INACTIVE) { - pgl = (pg->pg_flags & PQ_SWAPBACKED) ? - &uvm.page_inactive_swp : &uvm.page_inactive_obj; - } else if (pg->pg_flags & PQ_ACTIVE) { - pgl = &uvm.page_active; - } else { - pgl = NULL; +void +vm_map_upgrade_ln(struct vm_map *map, char *file, int line) +{ + /* XXX: RO */ uvm_tree_sanity(map, file, line); + /* XXX: RO */ uvm_tree_size_chk(map, file, line); + LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); + if ((map->flags & VM_MAP_INTRSAFE) == 0) { + rw_exit_read(&map->lock); + rw_enter_write(&map->lock); } + LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); + uvm_tree_sanity(map, file, line); +} - if (pgl) { - (*pr)(" checking pageq list\n"); - TAILQ_FOREACH(tpg, pgl, pageq) { - if (tpg == pg) { - break; - } - } - if (tpg) - (*pr)(" page found on pageq list\n"); - else - (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); - } +void +vm_map_busy_ln(struct vm_map *map, char *file, int line) +{ + map->flags |= VM_MAP_BUSY; } -#endif + +void +vm_map_unbusy_ln(struct vm_map *map, char *file, int line) +{ + int oflags; + + oflags = map->flags; + map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); + if (oflags & VM_MAP_WANTLOCK) + wakeup(&map->flags); +} + + +RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, + uvm_mapentry_addrcmp); +RB_GENERATE(uvm_map_free_int, vm_map_entry, free_entry, uvm_mapentry_freecmp); diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h index 0c05d491289..23b5950ee1c 100644 --- a/sys/uvm/uvm_map.h +++ b/sys/uvm/uvm_map.h @@ -1,7 +1,22 @@ -/* $OpenBSD: uvm_map.h,v 1.44 2010/12/24 21:49:04 tedu Exp $ */ +/* $OpenBSD: uvm_map.h,v 1.45 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */ -/* +/* + * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. * @@ -75,14 +90,28 @@ #ifdef _KERNEL /* + * Internal functions. + * + * Required by clipping macros. + */ +void uvm_map_clip_end(struct vm_map*, struct vm_map_entry*, + vaddr_t); +void uvm_map_clip_start(struct vm_map*, + struct vm_map_entry*, vaddr_t); + +/* * UVM_MAP_CLIP_START: ensure that the entry begins at or after * the starting address, if it doesn't we split the entry. * * => map must be locked by caller */ -#define UVM_MAP_CLIP_START(MAP,ENTRY,VA) { \ - if ((VA) > (ENTRY)->start) uvm_map_clip_start(MAP,ENTRY,VA); } +#define UVM_MAP_CLIP_START(_map, _entry, _addr) \ + do { \ + KASSERT((_entry)->end + (_entry)->fspace > (_addr)); \ + if ((_entry)->start < (_addr)) \ + uvm_map_clip_start((_map), (_entry), (_addr)); \ + } while (0) /* * UVM_MAP_CLIP_END: ensure that the entry ends at or before @@ -91,15 +120,16 @@ * => map must be locked by caller */ -#define UVM_MAP_CLIP_END(MAP,ENTRY,VA) { \ - if ((VA) < (ENTRY)->end) uvm_map_clip_end(MAP,ENTRY,VA); } +#define UVM_MAP_CLIP_END(_map, _entry, _addr) \ + do { \ + KASSERT((_entry)->start < (_addr)); \ + if ((_entry)->end > (_addr)) \ + uvm_map_clip_end((_map), (_entry), (_addr)); \ + } while (0) /* * extract flags */ -#define UVM_EXTRACT_REMOVE 0x1 /* remove mapping from old map */ -#define UVM_EXTRACT_CONTIG 0x2 /* try to keep it contig */ -#define UVM_EXTRACT_QREF 0x4 /* use quick refs */ #define UVM_EXTRACT_FIXPROT 0x8 /* set prot to maxprot as we go */ #endif /* _KERNEL */ @@ -133,21 +163,30 @@ union vm_map_object { * Also included is control information for virtual copy operations. */ struct vm_map_entry { - RB_ENTRY(vm_map_entry) rb_entry; /* tree information */ - vaddr_t ownspace; /* free space after */ - vaddr_t space; /* space in subtree */ - struct vm_map_entry *prev; /* previous entry */ - struct vm_map_entry *next; /* next entry */ + union { + RB_ENTRY(vm_map_entry) addr_entry; /* address tree */ + TAILQ_ENTRY(vm_map_entry) deadq; /* dead entry queue */ + } daddrs; + RB_ENTRY(vm_map_entry) free_entry; /* free-space tree */ + +#define uvm_map_entry_start_copy start vaddr_t start; /* start address */ vaddr_t end; /* end address */ + + vsize_t guard; /* bytes in guard */ + vsize_t fspace; /* free space */ + union vm_map_object object; /* object I point to */ voff_t offset; /* offset into object */ + struct vm_aref aref; /* anonymous overlay */ + int etype; /* entry type */ + vm_prot_t protection; /* protection code */ vm_prot_t max_protection; /* maximum protection */ vm_inherit_t inheritance; /* inheritance */ + int wired_count; /* can be paged if == 0 */ - struct vm_aref aref; /* anonymous overlay */ int advice; /* madvise advice */ #define uvm_map_entry_stop_copy flags u_int8_t flags; /* flags */ @@ -156,18 +195,29 @@ struct vm_map_entry { #define UVM_MAP_KMEM 0x02 /* from kmem entry pool */ }; +#define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0) + +TAILQ_HEAD(uvm_map_deadq, vm_map_entry); /* dead entry queue */ +RB_HEAD(uvm_map_addr, vm_map_entry); +RB_HEAD(uvm_map_free_int, vm_map_entry); +RB_PROTOTYPE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, + uvm_mapentry_addrcmp); +RB_PROTOTYPE(uvm_map_free_int, vm_map_entry, free_entry, uvm_mapentry_freecmp); + /* - * Marks the map entry as a guard page, using vm_map_entry.etype. + * Tree with size information. */ -#define MAP_ET_KVAGUARD 0x10 /* guard entry */ - -#define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0) +struct uvm_map_free { + struct uvm_map_free_int tree; /* Tree of free items. */ + size_t treesz; /* Size of tree. */ +}; /* - * Maps are doubly-linked lists of map entries, kept sorted - * by address. A single hint is provided to start - * searches again from the last successful search, - * insertion, or removal. + * A Map is a rbtree of map entries, kept sorted by address. + * In addition, free space entries are also kept in a rbtree, + * indexed by free size. + * + * * * LOCKING PROTOCOL NOTES: * ----------------------- @@ -214,23 +264,59 @@ struct vm_map_entry { * is busy, and thread is attempting * to write-lock. must be tested * while `flags_lock' is asserted. + * + * VM_MAP_GUARDPAGES r/o; must be specified at map + * initialization time. + * If set, guards will appear between + * automatic allocations. + * No locking required. + * + * VM_MAP_ISVMSPACE r/o; set by uvmspace_alloc. + * Signifies that this map is a vmspace. + * (The implementation treats all maps + * without this bit as kernel maps.) + * No locking required. + * + * + * All automatic allocations (uvm_map without MAP_FIXED) will allocate + * from vm_map.free. + * If that allocation fails: + * - vmspace maps will spill over into vm_map.bfree, + * - all other maps will call uvm_map_kmem_grow() to increase the arena. + * + * vmspace maps have their data, brk() and stack arenas automatically + * updated when uvm_map() is invoked without MAP_FIXED. + * The spill over arena (vm_map.bfree) will contain the space in the brk() + * and stack ranges. + * Kernel maps never have a bfree arena and this tree will always be empty. + * + * + * read_locks and write_locks are used in lock debugging code. */ struct vm_map { struct pmap * pmap; /* Physical map */ struct rwlock lock; /* Lock for map data */ - RB_HEAD(uvm_tree, vm_map_entry) rbhead; /* Tree for entries */ - struct vm_map_entry header; /* List of entries */ - int nentries; /* Number of entries */ + + struct uvm_map_addr addr; /* Entry tree, by addr */ + struct uvm_map_free free; /* Free space tree */ + struct uvm_map_free bfree; /* brk() space tree */ + vsize_t size; /* virtual size */ int ref_count; /* Reference count */ simple_lock_data_t ref_lock; /* Lock for ref_count field */ - vm_map_entry_t hint; /* hint for quick lookups */ - simple_lock_data_t hint_lock; /* lock for hint storage */ - vm_map_entry_t first_free; /* First free space hint */ int flags; /* flags */ unsigned int timestamp; /* Version number */ -#define min_offset header.start -#define max_offset header.end + + vaddr_t min_offset; /* First address in map. */ + vaddr_t max_offset; /* Last address in map. */ + + /* + * Allocation overflow regions. + */ + vaddr_t b_start; /* Start for brk() alloc. */ + vaddr_t b_end; /* End for brk() alloc. */ + vaddr_t s_start; /* Start for stack alloc. */ + vaddr_t s_end; /* End for stack alloc. */ }; /* vm_map flags */ @@ -239,11 +325,18 @@ struct vm_map { #define VM_MAP_WIREFUTURE 0x04 /* rw: wire future mappings */ #define VM_MAP_BUSY 0x08 /* rw: map is busy */ #define VM_MAP_WANTLOCK 0x10 /* rw: want to write-lock */ +#define VM_MAP_GUARDPAGES 0x20 /* rw: add guard pgs to map */ +#define VM_MAP_ISVMSPACE 0x40 /* ro: map is a vmspace */ /* XXX: number of kernel maps and entries to statically allocate */ #if !defined(MAX_KMAPENT) -#define MAX_KMAPENT 1024 /* XXXCDC: no crash */ +#ifdef KVA_GUARDPAGES +/* Sufficient for UVM_KM_MAXPAGES_HIWAT(8192) + overhead. */ +#define MAX_KMAPENT 8192 + 1024 +#else +#define MAX_KMAPENT 1024 /* XXXCDC: no crash */ +#endif #endif /* !defined MAX_KMAPENT */ #ifdef _KERNEL @@ -279,32 +372,27 @@ extern vaddr_t uvm_maxkaddr; void uvm_map_deallocate(vm_map_t); int uvm_map_clean(vm_map_t, vaddr_t, vaddr_t, int); -void uvm_map_clip_start(vm_map_t, vm_map_entry_t, vaddr_t); -void uvm_map_clip_end(vm_map_t, vm_map_entry_t, vaddr_t); vm_map_t uvm_map_create(pmap_t, vaddr_t, vaddr_t, int); -int uvm_map_extract(vm_map_t, vaddr_t, vsize_t, - vm_map_t, vaddr_t *, int); -vm_map_entry_t uvm_map_findspace(vm_map_t, vaddr_t, vsize_t, vaddr_t *, - struct uvm_object *, voff_t, vsize_t, int); +int uvm_map_extract(struct vm_map*, vaddr_t, vsize_t, vaddr_t*, + int); vaddr_t uvm_map_pie(vaddr_t); -#define uvm_map_hint(p, prot) uvm_map_hint1(p, prot, 1) -vaddr_t uvm_map_hint1(struct proc *, vm_prot_t, int); +vaddr_t uvm_map_hint(struct proc *, vm_prot_t); int uvm_map_inherit(vm_map_t, vaddr_t, vaddr_t, vm_inherit_t); int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int); void uvm_map_init(void); boolean_t uvm_map_lookup_entry(vm_map_t, vaddr_t, vm_map_entry_t *); -void uvm_map_reference(vm_map_t); int uvm_map_replace(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t, int); int uvm_map_reserve(vm_map_t, vsize_t, vaddr_t, vsize_t, vaddr_t *); void uvm_map_setup(vm_map_t, vaddr_t, vaddr_t, int); int uvm_map_submap(vm_map_t, vaddr_t, vaddr_t, vm_map_t); -#define uvm_unmap(_m, _s, _e) uvm_unmap_p(_m, _s, _e, 0) -void uvm_unmap_p(vm_map_t, vaddr_t, vaddr_t, struct proc *); -void uvm_unmap_detach(vm_map_entry_t,int); -void uvm_unmap_remove(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t *, - struct proc *, boolean_t); +void uvm_unmap(vm_map_t, vaddr_t, vaddr_t); +int uvm_map_mquery(struct vm_map*, vaddr_t*, vsize_t, voff_t, int); + +void uvm_unmap_detach(struct uvm_map_deadq*, int); +void uvm_unmap_remove(struct vm_map*, vaddr_t, vaddr_t, + struct uvm_map_deadq*, boolean_t, boolean_t); #endif /* _KERNEL */ @@ -337,82 +425,45 @@ void uvm_unmap_remove(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t *, */ #ifdef _KERNEL -/* XXX: clean up later */ +/* + * XXX: clean up later + * Half the kernel seems to depend on them being included here. + */ #include <sys/time.h> -#include <sys/systm.h> /* for panic() */ - -static __inline boolean_t vm_map_lock_try(vm_map_t); -static __inline void vm_map_lock(vm_map_t); -extern const char vmmapbsy[]; - -static __inline boolean_t -vm_map_lock_try(struct vm_map *map) -{ - boolean_t rv; - - if (map->flags & VM_MAP_INTRSAFE) { - rv = TRUE; - } else { - if (map->flags & VM_MAP_BUSY) { - return (FALSE); - } - rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); - } - - if (rv) - map->timestamp++; - - return (rv); -} - -static __inline void -vm_map_lock(struct vm_map *map) -{ - if (map->flags & VM_MAP_INTRSAFE) - return; - - do { - while (map->flags & VM_MAP_BUSY) { - map->flags |= VM_MAP_WANTLOCK; - tsleep(&map->flags, PVM, (char *)vmmapbsy, 0); - } - } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); - - map->timestamp++; -} - -#define vm_map_lock_read(map) rw_enter_read(&(map)->lock) - -#define vm_map_unlock(map) \ -do { \ - if (((map)->flags & VM_MAP_INTRSAFE) == 0) \ - rw_exit(&(map)->lock); \ -} while (0) - -#define vm_map_unlock_read(map) rw_exit_read(&(map)->lock) - -#define vm_map_downgrade(map) rw_enter(&(map)->lock, RW_DOWNGRADE) - -#define vm_map_upgrade(map) \ -do { \ - rw_exit_read(&(map)->lock); \ - rw_enter_write(&(map)->lock); \ -} while (0) - -#define vm_map_busy(map) \ -do { \ - (map)->flags |= VM_MAP_BUSY; \ -} while (0) +#include <sys/systm.h> /* for panic() */ + +boolean_t vm_map_lock_try_ln(struct vm_map*, char*, int); +void vm_map_lock_ln(struct vm_map*, char*, int); +void vm_map_lock_read_ln(struct vm_map*, char*, int); +void vm_map_unlock_ln(struct vm_map*, char*, int); +void vm_map_unlock_read_ln(struct vm_map*, char*, int); +void vm_map_downgrade_ln(struct vm_map*, char*, int); +void vm_map_upgrade_ln(struct vm_map*, char*, int); +void vm_map_busy_ln(struct vm_map*, char*, int); +void vm_map_unbusy_ln(struct vm_map*, char*, int); + +#ifdef DIAGNOSTIC +#define vm_map_lock_try(map) vm_map_lock_try_ln(map, __FILE__, __LINE__) +#define vm_map_lock(map) vm_map_lock_ln(map, __FILE__, __LINE__) +#define vm_map_lock_read(map) vm_map_lock_read_ln(map, __FILE__, __LINE__) +#define vm_map_unlock(map) vm_map_unlock_ln(map, __FILE__, __LINE__) +#define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, __FILE__, __LINE__) +#define vm_map_downgrade(map) vm_map_downgrade_ln(map, __FILE__, __LINE__) +#define vm_map_upgrade(map) vm_map_upgrade_ln(map, __FILE__, __LINE__) +#define vm_map_busy(map) vm_map_busy_ln(map, __FILE__, __LINE__) +#define vm_map_unbusy(map) vm_map_unbusy_ln(map, __FILE__, __LINE__) +#else +#define vm_map_lock_try(map) vm_map_lock_try_ln(map, NULL, 0) +#define vm_map_lock(map) vm_map_lock_ln(map, NULL, 0) +#define vm_map_lock_read(map) vm_map_lock_read_ln(map, NULL, 0) +#define vm_map_unlock(map) vm_map_unlock_ln(map, NULL, 0) +#define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, NULL, 0) +#define vm_map_downgrade(map) vm_map_downgrade_ln(map, NULL, 0) +#define vm_map_upgrade(map) vm_map_upgrade_ln(map, NULL, 0) +#define vm_map_busy(map) vm_map_busy_ln(map, NULL, 0) +#define vm_map_unbusy(map) vm_map_unbusy_ln(map, NULL, 0) +#endif -#define vm_map_unbusy(map) \ -do { \ - int oflags; \ - \ - oflags = (map)->flags; \ - (map)->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); \ - if (oflags & VM_MAP_WANTLOCK) \ - wakeup(&(map)->flags); \ -} while (0) #endif /* _KERNEL */ /* diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 4ae8c3d3797..a29d9020313 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_mmap.c,v 1.82 2010/12/24 21:49:04 tedu Exp $ */ +/* $OpenBSD: uvm_mmap.c,v 1.83 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ /* @@ -181,43 +181,14 @@ sys_mquery(struct proc *p, void *v, register_t *retval) } else { fp = NULL; uobj = NULL; - uoff = 0; + uoff = UVM_UNKNOWN_OFFSET; } - if (vaddr == 0) - vaddr = uvm_map_hint(p, prot); - - /* prevent a user requested address from falling in heap space */ - if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) && - (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) { - if (flags & UVM_FLAG_FIXED) { - error = EINVAL; - goto done; - } - vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ); - } - vm_map_lock(&p->p_vmspace->vm_map); - -again: - if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size, - &vaddr, uobj, uoff, 0, flags) == NULL) { - if (flags & UVM_FLAG_FIXED) - error = EINVAL; - else - error = ENOMEM; - } else { - /* prevent a returned address from falling in heap space */ - if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) - && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) { - vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + - BRKSIZ); - goto again; - } - error = 0; + error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff, + flags); + if (error == 0) *retval = (register_t)(vaddr); - } - vm_map_unlock(&p->p_vmspace->vm_map); -done: + if (fp != NULL) FRELE(fp); return (error); @@ -241,7 +212,7 @@ sys_mincore(struct proc *p, void *v, register_t *retval) struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; - vm_map_entry_t entry; + vm_map_entry_t entry, next; vaddr_t start, end, lim; vm_map_t map; vsize_t len, npgs; @@ -290,15 +261,16 @@ sys_mincore(struct proc *p, void *v, register_t *retval) } for (/* nothing */; - entry != &map->header && entry->start < end; - entry = entry->next) { + entry != NULL && entry->start < end; + entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ + next = RB_NEXT(uvm_map_addr, &map->addr, entry); if (entry->end < end && - (entry->next == &map->header || - entry->next->start > entry->end)) { + (next == NULL || + next->start > entry->end)) { error = ENOMEM; goto out; } @@ -451,17 +423,6 @@ sys_mmap(struct proc *p, void *v, register_t *retval) if (vm_min_address > 0 && addr < vm_min_address) return (EINVAL); - } else { - - /* - * not fixed: make sure we skip over the largest possible heap. - * we will refine our guess later (e.g. to account for VAC, etc) - */ - if (addr == 0) - addr = uvm_map_hint(p, prot); - else if (!(flags & MAP_TRYFIXED) && - addr < (vaddr_t)p->p_vmspace->vm_daddr) - addr = uvm_map_hint(p, prot); } /* @@ -604,13 +565,6 @@ sys_mmap(struct proc *p, void *v, register_t *retval) error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); - if (error == ENOMEM && !(flags & (MAP_FIXED | MAP_TRYFIXED))) { - /* once more, with feeling */ - addr = uvm_map_hint1(p, prot, 0); - error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, - maxprot, flags, handle, pos, - p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); - } if (error == 0) /* remember to add offset */ @@ -721,7 +675,7 @@ sys_munmap(struct proc *p, void *v, register_t *retval) vsize_t size, pageoff; vm_map_t map; vaddr_t vm_min_address = VM_MIN_ADDRESS; - struct vm_map_entry *dead_entries; + struct uvm_map_deadq dead_entries; /* * get syscall args... @@ -763,12 +717,12 @@ sys_munmap(struct proc *p, void *v, register_t *retval) /* * doit! */ - uvm_unmap_remove(map, addr, addr + size, &dead_entries, p, FALSE); + TAILQ_INIT(&dead_entries); + uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE); vm_map_unlock(map); /* and unlock */ - if (dead_entries != NULL) - uvm_unmap_detach(dead_entries, 0); + uvm_unmap_detach(&dead_entries, 0); return (0); } @@ -1099,7 +1053,7 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot, if (*addr & PAGE_MASK) return(EINVAL); uvmflag |= UVM_FLAG_FIXED; - uvm_unmap_p(map, *addr, *addr + size, p); /* zap! */ + uvm_unmap(map, *addr, *addr + size); /* zap! */ } /* @@ -1193,7 +1147,7 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot, (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, uvmflag); - error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p); + error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); if (error == 0) { /* diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c index ec7922cd9f3..2b8eee79057 100644 --- a/sys/uvm/uvm_unix.c +++ b/sys/uvm/uvm_unix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_unix.c,v 1.40 2009/11/24 10:35:56 otto Exp $ */ +/* $OpenBSD: uvm_unix.c,v 1.41 2011/05/24 15:27:36 ariane Exp $ */ /* $NetBSD: uvm_unix.c,v 1.18 2000/09/13 15:00:25 thorpej Exp $ */ /* @@ -167,9 +167,7 @@ uvm_coredump(struct proc *p, struct vnode *vp, struct ucred *cred, offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize; - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - + RB_FOREACH(entry, uvm_map_addr, &map->addr) { /* should never happen for a user process */ if (UVM_ET_ISSUBMAP(entry)) { panic("uvm_coredump: user process with submap?"); @@ -261,9 +259,7 @@ uvm_coredump_walkmap(struct proc *p, void *iocookie, vaddr_t top; int error; - for (entry = map->header.next; entry != &map->header; - entry = entry->next) { - + RB_FOREACH(entry, uvm_map_addr, &map->addr) { state.cookie = cookie; state.prot = entry->protection; state.flags = 0; |