/* $OpenBSD: vm_map.c,v 1.13 1998/03/01 00:38:11 niklas Exp $ */ /* $NetBSD: vm_map.c,v 1.23 1996/02/10 00:08:08 christos Exp $ */ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_map.c 8.9 (Berkeley) 5/17/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory mapping module. */ #include #include #include #include #include #include #include /* * Virtual memory maps provide for the mapping, protection, * and sharing of virtual memory objects. In addition, * this module provides for an efficient virtual copy of * memory from one map to another. * * Synchronization is required prior to most operations. * * Maps consist of an ordered doubly-linked list of simple * entries; a single hint is used to speed up lookups. * * In order to properly represent the sharing of virtual * memory regions among maps, the map structure is bi-level. * Top-level ("address") maps refer to regions of sharable * virtual memory. These regions are implemented as * ("sharing") maps, which then refer to the actual virtual * memory objects. When two address maps "share" memory, * their top-level maps both have references to the same * sharing map. When memory is virtual-copied from one * address map to another, the references in the sharing * maps are actually copied -- no copying occurs at the * virtual memory object level. * * Since portions of maps are specified by start/end addreses, * which may not align with existing map entries, all * routines merely "clip" entries to these start/end values. * [That is, an entry is split into two, bordering at a * start or end value.] Note that these clippings may not * always be necessary (as the two resulting entries are then * not changed); however, the clipping is done for convenience. * No attempt is currently made to "glue back together" two * abutting entries. * * As mentioned above, virtual copy operations are performed * by copying VM object references from one sharing map to * another, and then marking both regions as copy-on-write. * It is important to note that only one writeable reference * to a VM object region exists in any map -- this means that * shadow object creation can be delayed until a write operation * occurs. */ /* * vm_map_startup: * * Initialize the vm_map module. Must be called before * any other vm_map routines. * * Map and entry structures are allocated from the general * purpose memory pool with some exceptions: * * - The kernel map and kmem submap are allocated statically. * - Kernel map entries are allocated out of a static pool. * * These restrictions are necessary since malloc() uses the * maps and requires map entries. */ #if defined(MACHINE_NEW_NONCONTIG) u_int8_t kentry_data_store[MAX_KMAP*sizeof(struct vm_map) + MAX_KMAPENT*sizeof(struct vm_map_entry)]; vm_offset_t kentry_data = (vm_offset_t) kentry_data_store; vm_size_t kentry_data_size = sizeof(kentry_data_store); #else /* NUKE NUKE NUKE */ vm_offset_t kentry_data; vm_size_t kentry_data_size; #endif vm_map_entry_t kentry_free; vm_map_t kmap_free; static int kentry_count; static vm_offset_t mapvm_start, mapvm, mapvmmax; static int mapvmpgcnt; static struct vm_map_entry *mappool; static int mappoolcnt; #define KENTRY_LOW_WATER 128 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); void vm_map_startup() { register int i; register vm_map_entry_t mep; vm_map_t mp; /* * zero kentry area * XXX necessary? */ bzero((caddr_t)kentry_data, kentry_data_size); /* * Static map structures for allocation before initialization of * kernel map or kmem map. vm_map_create knows how to deal with them. */ kmap_free = mp = (vm_map_t) kentry_data; i = MAX_KMAP; while (--i > 0) { mp->header.next = (vm_map_entry_t) (mp + 1); mp++; } mp++->header.next = NULL; /* * Form a free list of statically allocated kernel map entries * with the rest. */ kentry_free = mep = (vm_map_entry_t) mp; kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep; while (--i > 0) { mep->next = mep + 1; mep++; } mep->next = NULL; } /* * Allocate a vmspace structure, including a vm_map and pmap, * and initialize those structures. The refcnt is set to 1. * The remaining fields must be initialized by the caller. */ struct vmspace * vmspace_alloc(min, max, pageable) vm_offset_t min, max; int pageable; { register struct vmspace *vm; if (mapvmpgcnt == 0 && mapvm == 0) { #if defined(MACHINE_NEW_NONCONTIG) int vm_page_count = 0; int lcv; for (lcv = 0; lcv < vm_nphysseg; lcv++) vm_page_count += (vm_physmem[lcv].end - vm_physmem[lcv].start); mapvmpgcnt = (vm_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; #elif defined(MACHINE_NONCONTIG) mapvmpgcnt = (vm_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; #else /* must be contig */ mapvmpgcnt = ((last_page-first_page) * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; #endif /* contig */ mapvm_start = mapvm = kmem_alloc_pageable(kernel_map, mapvmpgcnt * PAGE_SIZE); mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE; if (!mapvm) mapvmpgcnt = 0; } MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK); bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm); vm_map_init(&vm->vm_map, min, max, pageable); pmap_pinit(&vm->vm_pmap); vm->vm_map.pmap = &vm->vm_pmap; /* XXX */ vm->vm_refcnt = 1; return (vm); } void vmspace_free(vm) register struct vmspace *vm; { if (--vm->vm_refcnt == 0) { /* * Lock the map, to wait out all other references to it. * Delete all of the mappings and pages they hold, * then call the pmap module to reclaim anything left. */ vm_map_lock(&vm->vm_map); (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); pmap_release(&vm->vm_pmap); FREE(vm, M_VMMAP); } } /* * vm_map_create: * * Creates and returns a new empty VM map with * the given physical map structure, and having * the given lower and upper address bounds. */ vm_map_t vm_map_create(pmap, min, max, pageable) pmap_t pmap; vm_offset_t min, max; boolean_t pageable; { register vm_map_t result; extern vm_map_t kmem_map; if (kmem_map == NULL) { result = kmap_free; if (result == NULL) panic("vm_map_create: out of maps"); kmap_free = (vm_map_t) result->header.next; } else MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK); vm_map_init(result, min, max, pageable); result->pmap = pmap; return(result); } /* * Initialize an existing vm_map structure * such as that in the vmspace structure. * The pmap is set elsewhere. */ void vm_map_init(map, min, max, pageable) register struct vm_map *map; vm_offset_t min, max; boolean_t pageable; { map->header.next = map->header.prev = &map->header; map->nentries = 0; map->size = 0; map->ref_count = 1; map->is_main_map = TRUE; map->min_offset = min; map->max_offset = max; map->entries_pageable = pageable; map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; lockinit(&map->lock, PVM, "thrd_sleep", 0, 0); simple_lock_init(&map->ref_lock); simple_lock_init(&map->hint_lock); } /* * vm_map_entry_create: [ internal use only ] * * Allocates a VM map entry for insertion. * No entry fields are filled in. This routine is */ vm_map_entry_t vm_map_entry_create(map) vm_map_t map; { vm_map_entry_t entry; int i, s; /* * This is a *very* nasty (and sort of incomplete) hack!!!! */ if (kentry_count < KENTRY_LOW_WATER) { s = splimp(); if (mapvmpgcnt && mapvm) { vm_page_t m; m = vm_page_alloc(kernel_object, mapvm - VM_MIN_KERNEL_ADDRESS); if (m) { int newentries; newentries = (PAGE_SIZE / sizeof(struct vm_map_entry)); #ifdef DIAGNOSTIC printf("vm_map_entry_create: allocated %d new entries.\n", newentries); #endif /* XXX */ vm_page_wire(m); PAGE_WAKEUP(m); pmap_enter(pmap_kernel(), mapvm, VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, FALSE); entry = (vm_map_entry_t) mapvm; mapvm += PAGE_SIZE; --mapvmpgcnt; for (i = 0; i < newentries; i++) { vm_map_entry_dispose(kernel_map, entry); entry++; } } } splx(s); } if (map->entries_pageable) { if ((entry = mappool) != NULL) { mappool = mappool->next; --mappoolcnt; } else { MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), M_VMMAPENT, M_WAITOK); if (entry == NULL) panic("vm_map_entry_create: couldn't alloc pageable map entry"); } } else { s = splimp(); if ((entry = kentry_free) != NULL) { kentry_free = kentry_free->next; --kentry_count; } if (entry == NULL) panic("vm_map_entry_create: out of map entries for kernel"); splx(s); } return(entry); } /* * vm_map_entry_dispose: [ internal use only ] * * Inverse of vm_map_entry_create. */ void vm_map_entry_dispose(map, entry) vm_map_t map; vm_map_entry_t entry; { int s; if (map->entries_pageable) { entry->next = mappool; mappool = entry; ++mappoolcnt; } else { s = splimp(); entry->next = kentry_free; kentry_free = entry; ++kentry_count; splx(s); } } /* * vm_map_entry_{un,}link: * * Insert/remove entries from maps. */ #define vm_map_entry_link(map, after_where, entry) \ { \ (map)->nentries++; \ (entry)->prev = (after_where); \ (entry)->next = (after_where)->next; \ (entry)->prev->next = (entry); \ (entry)->next->prev = (entry); \ } #define vm_map_entry_unlink(map, entry) \ { \ (map)->nentries--; \ (entry)->next->prev = (entry)->prev; \ (entry)->prev->next = (entry)->next; \ } /* * vm_map_reference: * * Creates another valid reference to the given map. * */ void vm_map_reference(map) register vm_map_t map; { if (map == NULL) return; simple_lock(&map->ref_lock); map->ref_count++; simple_unlock(&map->ref_lock); } /* * vm_map_deallocate: * * Removes a reference from the specified map, * destroying it if no references remain. * The map should not be locked. */ void vm_map_deallocate(map) register vm_map_t map; { register int c; if (map == NULL) return; simple_lock(&map->ref_lock); c = --map->ref_count; simple_unlock(&map->ref_lock); if (c > 0) { return; } /* * Lock the map, to wait out all other references * to it. */ vm_map_lock_drain_interlock(map); (void) vm_map_delete(map, map->min_offset, map->max_offset); pmap_destroy(map->pmap); vm_map_unlock(map); FREE(map, M_VMMAP); } /* * vm_map_insert: * * Inserts the given whole VM object into the target * map at the specified address range. The object's * size should match that of the address range. * * Requires that the map be locked, and leaves it so. */ int vm_map_insert(map, object, offset, start, end) vm_map_t map; vm_object_t object; vm_offset_t offset; vm_offset_t start; vm_offset_t end; { register vm_map_entry_t new_entry; register vm_map_entry_t prev_entry; vm_map_entry_t temp_entry; /* * Check that the start and end points are not bogus. */ if ((start < map->min_offset) || (end > map->max_offset) || (start >= end)) return(KERN_INVALID_ADDRESS); /* * Find the entry prior to the proposed * starting address; if it's part of an * existing entry, this range is bogus. */ if (vm_map_lookup_entry(map, start, &temp_entry)) return(KERN_NO_SPACE); prev_entry = temp_entry; /* * Assert that the next entry doesn't overlap the * end point. */ if ((prev_entry->next != &map->header) && (prev_entry->next->start < end)) return(KERN_NO_SPACE); /* * See if we can avoid creating a new entry by * extending one of our neighbors. */ if (object == NULL) { if ((prev_entry != &map->header) && (prev_entry->end == start) && (map->is_main_map) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && (prev_entry->inheritance == VM_INHERIT_DEFAULT) && (prev_entry->protection == VM_PROT_DEFAULT) && (prev_entry->max_protection == VM_PROT_DEFAULT) && (prev_entry->wired_count == 0)) { if (vm_object_coalesce(prev_entry->object.vm_object, NULL, prev_entry->offset, (vm_offset_t) 0, (vm_size_t)(prev_entry->end - prev_entry->start), (vm_size_t)(end - prev_entry->end))) { /* * Coalesced the two objects - can extend * the previous map entry to include the * new range. */ map->size += (end - prev_entry->end); prev_entry->end = end; return(KERN_SUCCESS); } } } /* * Create a new entry */ new_entry = vm_map_entry_create(map); new_entry->start = start; new_entry->end = end; new_entry->is_a_map = FALSE; new_entry->is_sub_map = FALSE; new_entry->object.vm_object = object; new_entry->offset = offset; new_entry->copy_on_write = FALSE; new_entry->needs_copy = FALSE; if (map->is_main_map) { new_entry->inheritance = VM_INHERIT_DEFAULT; new_entry->protection = VM_PROT_DEFAULT; new_entry->max_protection = VM_PROT_DEFAULT; new_entry->wired_count = 0; } /* * Insert the new entry into the list */ vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; /* * Update the free space hint */ if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) map->first_free = new_entry; return(KERN_SUCCESS); } /* * SAVE_HINT: * * Saves the specified entry as the hint for * future lookups. Performs necessary interlocks. */ #define SAVE_HINT(map,value) \ simple_lock(&(map)->hint_lock); \ (map)->hint = (value); \ simple_unlock(&(map)->hint_lock); /* * vm_map_lookup_entry: [ internal use only ] * * Finds the map entry containing (or * immediately preceding) the specified address * in the given map; the entry is returned * in the "entry" parameter. The boolean * result indicates whether the address is * actually contained in the map. */ boolean_t vm_map_lookup_entry(map, address, entry) register vm_map_t map; register vm_offset_t address; vm_map_entry_t *entry; /* OUT */ { register vm_map_entry_t cur; register vm_map_entry_t last; /* * Start looking either from the head of the * list, or from the hint. */ simple_lock(&map->hint_lock); cur = map->hint; simple_unlock(&map->hint_lock); if (cur == &map->header) cur = cur->next; if (address >= cur->start) { /* * Go from hint to end of list. * * But first, make a quick check to see if * we are already looking at the entry we * want (which is usually the case). * Note also that we don't need to save the hint * here... it is the same hint (unless we are * at the header, in which case the hint didn't * buy us anything anyway). */ last = &map->header; if ((cur != last) && (cur->end > address)) { *entry = cur; return(TRUE); } } else { /* * Go from start to hint, *inclusively* */ last = cur->next; cur = map->header.next; } /* * Search linearly */ while (cur != last) { if (cur->end > address) { if (address >= cur->start) { /* * Save this lookup for future * hints, and return */ *entry = cur; SAVE_HINT(map, cur); return(TRUE); } break; } cur = cur->next; } *entry = cur->prev; SAVE_HINT(map, *entry); return(FALSE); } /* * Find sufficient space for `length' bytes in the given map, starting at * `start'. The map must be locked. Returns 0 on success, 1 on no space. */ int vm_map_findspace(map, start, length, addr) register vm_map_t map; register vm_offset_t start; vm_size_t length; vm_offset_t *addr; { register vm_map_entry_t entry, next; register vm_offset_t end; if (start < map->min_offset) start = map->min_offset; if (start > map->max_offset) return (1); /* * Look for the first possible address; if there's already * something at this address, we have to start after it. */ if (start == map->min_offset) { if ((entry = map->first_free) != &map->header) start = entry->end; } else { vm_map_entry_t tmp; if (vm_map_lookup_entry(map, start, &tmp)) start = tmp->end; entry = tmp; } /* * Look through the rest of the map, trying to fit a new region in * the gap between existing regions, or after the very last region. */ for (;; start = (entry = next)->end) { /* * Find the end of the proposed new region. Be sure we didn't * go beyond the end of the map, or wrap around the address; * if so, we lose. Otherwise, if this is the last entry, or * if the proposed new region fits before the next entry, we * win. */ end = start + length; if (end > map->max_offset || end < start) return (1); next = entry->next; if (next == &map->header || next->start >= end) break; } SAVE_HINT(map, entry); *addr = start; return (0); } /* * vm_map_find finds an unallocated region in the target address * map with the given length. The search is defined to be * first-fit from the specified address; the region found is * returned in the same parameter. * */ int vm_map_find(map, object, offset, addr, length, find_space) vm_map_t map; vm_object_t object; vm_offset_t offset; vm_offset_t *addr; /* IN/OUT */ vm_size_t length; boolean_t find_space; { register vm_offset_t start; int result; start = *addr; vm_map_lock(map); if (find_space) { if (vm_map_findspace(map, start, length, addr)) { vm_map_unlock(map); return (KERN_NO_SPACE); } start = *addr; } result = vm_map_insert(map, object, offset, start, start + length); vm_map_unlock(map); return (result); } /* * vm_map_simplify_entry: [ internal use only ] * * Simplify the given map entry by: * removing extra sharing maps * [XXX maybe later] merging with a neighbor */ void vm_map_simplify_entry(map, entry) vm_map_t map; vm_map_entry_t entry; { #ifdef lint map++; #endif /* * If this entry corresponds to a sharing map, then * see if we can remove the level of indirection. * If it's not a sharing map, then it points to * a VM object, so see if we can merge with either * of our neighbors. */ if (entry->is_sub_map) return; if (entry->is_a_map) { #if 0 vm_map_t my_share_map; int count; my_share_map = entry->object.share_map; simple_lock(&my_share_map->ref_lock); count = my_share_map->ref_count; simple_unlock(&my_share_map->ref_lock); if (count == 1) { /* Can move the region from * entry->start to entry->end (+ entry->offset) * in my_share_map into place of entry. * Later. */ } #endif } else { /* * Try to merge with our neighbors. * * Conditions for merge are: * * 1. entries are adjacent. * 2. both entries point to objects * with null pagers. * * If a merge is possible, we replace the two * entries with a single entry, then merge * the two objects into a single object. * * Now, all that is left to do is write the * code! */ } } /* * vm_map_clip_start: [ internal use only ] * * Asserts that the given entry begins at or after * the specified address; if necessary, * it splits the entry into two. */ #define vm_map_clip_start(map, entry, startaddr) \ { \ if (startaddr > entry->start) \ _vm_map_clip_start(map, entry, startaddr); \ } /* * This routine is called only when it is known that * the entry must be split. */ static void _vm_map_clip_start(map, entry, start) register vm_map_t map; register vm_map_entry_t entry; register vm_offset_t start; { register vm_map_entry_t new_entry; /* * See if we can simplify this entry first */ vm_map_simplify_entry(map, entry); /* * Split off the front portion -- * note that we must insert the new * entry BEFORE this one, so that * this entry has the specified starting * address. */ new_entry = vm_map_entry_create(map); *new_entry = *entry; new_entry->end = start; entry->offset += (start - entry->start); entry->start = start; vm_map_entry_link(map, entry->prev, new_entry); if (entry->is_a_map || entry->is_sub_map) vm_map_reference(new_entry->object.share_map); else vm_object_reference(new_entry->object.vm_object); } /* * vm_map_clip_end: [ internal use only ] * * Asserts that the given entry ends at or before * the specified address; if necessary, * it splits the entry into two. */ #define vm_map_clip_end(map, entry, endaddr) \ { \ if (endaddr < entry->end) \ _vm_map_clip_end(map, entry, endaddr); \ } /* * This routine is called only when it is known that * the entry must be split. */ static void _vm_map_clip_end(map, entry, end) register vm_map_t map; register vm_map_entry_t entry; register vm_offset_t end; { register vm_map_entry_t new_entry; /* * Create a new entry and insert it * AFTER the specified entry */ new_entry = vm_map_entry_create(map); *new_entry = *entry; new_entry->start = entry->end = end; new_entry->offset += (end - entry->start); vm_map_entry_link(map, entry, new_entry); if (entry->is_a_map || entry->is_sub_map) vm_map_reference(new_entry->object.share_map); else vm_object_reference(new_entry->object.vm_object); } /* * VM_MAP_RANGE_CHECK: [ internal use only ] * * Asserts that the starting and ending region * addresses fall within the valid range of the map. */ #define VM_MAP_RANGE_CHECK(map, start, end) \ { \ if (start < vm_map_min(map)) \ start = vm_map_min(map); \ if (end > vm_map_max(map)) \ end = vm_map_max(map); \ if (start > end) \ start = end; \ } /* * vm_map_submap: [ kernel use only ] * * Mark the given range as handled by a subordinate map. * * This range must have been created with vm_map_find, * and no other operations may have been performed on this * range prior to calling vm_map_submap. * * Only a limited number of operations can be performed * within this rage after calling vm_map_submap: * vm_fault * [Don't try vm_map_copy!] * * To remove a submapping, one must first remove the * range from the superior map, and then destroy the * submap (if desired). [Better yet, don't try it.] */ int vm_map_submap(map, start, end, submap) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; vm_map_t submap; { vm_map_entry_t entry; register int result = KERN_INVALID_ARGUMENT; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else entry = entry->next; vm_map_clip_end(map, entry, end); if ((entry->start == start) && (entry->end == end) && (!entry->is_a_map) && (entry->object.vm_object == NULL) && (!entry->copy_on_write)) { entry->is_a_map = FALSE; entry->is_sub_map = TRUE; vm_map_reference(entry->object.sub_map = submap); result = KERN_SUCCESS; } vm_map_unlock(map); return(result); } /* * vm_map_protect: * * Sets the protection of the specified address * region in the target map. If "set_max" is * specified, the maximum protection is to be set; * otherwise, only the current protection is affected. */ int vm_map_protect(map, start, end, new_prot, set_max) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register vm_prot_t new_prot; register boolean_t set_max; { register vm_map_entry_t current; vm_map_entry_t entry; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else entry = entry->next; /* * Make a first pass to check for protection * violations. */ current = entry; while ((current != &map->header) && (current->start < end)) { if (current->is_sub_map) return(KERN_INVALID_ARGUMENT); if ((new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); return(KERN_PROTECTION_FAILURE); } current = current->next; } /* * Go back and fix up protections. * [Note that clipping is not necessary the second time.] */ current = entry; while ((current != &map->header) && (current->start < end)) { vm_prot_t old_prot; vm_map_clip_end(map, current, end); old_prot = current->protection; if (set_max) current->protection = (current->max_protection = new_prot) & old_prot; else current->protection = new_prot; /* * Update physical map if necessary. * Worry about copy-on-write here -- CHECK THIS XXX */ if (current->protection != old_prot) { #define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \ VM_PROT_ALL) #define max(a,b) ((a) > (b) ? (a) : (b)) if (current->is_a_map) { vm_map_entry_t share_entry; vm_offset_t share_end; vm_map_lock(current->object.share_map); (void) vm_map_lookup_entry( current->object.share_map, current->offset, &share_entry); share_end = current->offset + (current->end - current->start); while ((share_entry != ¤t->object.share_map->header) && (share_entry->start < share_end)) { pmap_protect(map->pmap, (max(share_entry->start, current->offset) - current->offset + current->start), min(share_entry->end, share_end) - current->offset + current->start, current->protection & MASK(share_entry)); share_entry = share_entry->next; } vm_map_unlock(current->object.share_map); } else pmap_protect(map->pmap, current->start, current->end, current->protection & MASK(entry)); #undef max #undef MASK } current = current->next; } vm_map_unlock(map); return(KERN_SUCCESS); } /* * vm_map_inherit: * * Sets the inheritance of the specified address * range in the target map. Inheritance * affects how the map will be shared with * child maps at the time of vm_map_fork. */ int vm_map_inherit(map, start, end, new_inheritance) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register vm_inherit_t new_inheritance; { register vm_map_entry_t entry; vm_map_entry_t temp_entry; switch (new_inheritance) { case VM_INHERIT_NONE: case VM_INHERIT_COPY: case VM_INHERIT_SHARE: break; default: return(KERN_INVALID_ARGUMENT); } vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; vm_map_clip_start(map, entry, start); } else entry = temp_entry->next; while ((entry != &map->header) && (entry->start < end)) { vm_map_clip_end(map, entry, end); entry->inheritance = new_inheritance; entry = entry->next; } vm_map_unlock(map); return(KERN_SUCCESS); } /* * vm_map_pageable: * * Sets the pageability of the specified address * range in the target map. Regions specified * as not pageable require locked-down physical * memory and physical page maps. * * The map must not be locked, but a reference * must remain to the map throughout the call. */ int vm_map_pageable(map, start, end, new_pageable) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register boolean_t new_pageable; { register vm_map_entry_t entry; vm_map_entry_t start_entry; register vm_offset_t failed = 0; int rv; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); /* * Only one pageability change may take place at one * time, since vm_fault assumes it will be called * only once for each wiring/unwiring. Therefore, we * have to make sure we're actually changing the pageability * for the entire region. We do so before making any changes. */ if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { vm_map_unlock(map); return(KERN_INVALID_ADDRESS); } entry = start_entry; /* * Actions are rather different for wiring and unwiring, * so we have two separate cases. */ if (new_pageable) { vm_map_clip_start(map, entry, start); /* * Unwiring. First ensure that the range to be * unwired is really wired down and that there * are no holes. */ while ((entry != &map->header) && (entry->start < end)) { if (entry->wired_count == 0 || (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end))) { vm_map_unlock(map); return(KERN_INVALID_ARGUMENT); } entry = entry->next; } /* * Now decrement the wiring count for each region. * If a region becomes completely unwired, * unwire its physical pages and mappings. */ vm_map_set_recursive(&map->lock); entry = start_entry; while ((entry != &map->header) && (entry->start < end)) { vm_map_clip_end(map, entry, end); entry->wired_count--; if (entry->wired_count == 0) vm_fault_unwire(map, entry->start, entry->end); entry = entry->next; } vm_map_clear_recursive(&map->lock); } else { /* * Wiring. We must do this in two passes: * * 1. Holding the write lock, we create any shadow * or zero-fill objects that need to be created. * Then we clip each map entry to the region to be * wired and increment its wiring count. We * create objects before clipping the map entries * to avoid object proliferation. * * 2. We downgrade to a read lock, and call * vm_fault_wire to fault in the pages for any * newly wired area (wired_count is 1). * * Downgrading to a read lock for vm_fault_wire avoids * a possible deadlock with another thread that may have * faulted on one of the pages to be wired (it would mark * the page busy, blocking us, then in turn block on the * map lock that we hold). Because of problems in the * recursive lock package, we cannot upgrade to a write * lock in vm_map_lookup. Thus, any actions that require * the write lock must be done beforehand. Because we * keep the read lock on the map, the copy-on-write status * of the entries we modify here cannot change. */ /* * Pass 1. */ while ((entry != &map->header) && (entry->start < end)) { if (entry->wired_count == 0) { /* * Perform actions of vm_map_lookup that need * the write lock on the map: create a shadow * object for a copy-on-write region, or an * object for a zero-fill region. * * We don't have to do this for entries that * point to sharing maps, because we won't hold * the lock on the sharing map. */ if (!entry->is_a_map) { if (entry->needs_copy && ((entry->protection & VM_PROT_WRITE) != 0)) { vm_object_shadow(&entry->object.vm_object, &entry->offset, (vm_size_t)(entry->end - entry->start)); entry->needs_copy = FALSE; } else if (entry->object.vm_object == NULL) { entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->end - entry->start)); entry->offset = (vm_offset_t)0; } } } vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); entry->wired_count++; /* * Check for holes */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { /* * Found one. Object creation actions * do not need to be undone, but the * wired counts need to be restored. */ while (entry != &map->header && entry->end > start) { entry->wired_count--; entry = entry->prev; } vm_map_unlock(map); return(KERN_INVALID_ARGUMENT); } entry = entry->next; } /* * Pass 2. */ /* * HACK HACK HACK HACK * * If we are wiring in the kernel map or a submap of it, * unlock the map to avoid deadlocks. We trust that the * kernel threads are well-behaved, and therefore will * not do anything destructive to this region of the map * while we have it unlocked. We cannot trust user threads * to do the same. * * HACK HACK HACK HACK */ if (vm_map_pmap(map) == pmap_kernel()) { vm_map_unlock(map); /* trust me ... */ } else { vm_map_set_recursive(&map->lock); lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc); } rv = 0; entry = start_entry; while (entry != &map->header && entry->start < end) { /* * If vm_fault_wire fails for any page we need to * undo what has been done. We decrement the wiring * count for those pages which have not yet been * wired (now) and unwire those that have (later). * * XXX this violates the locking protocol on the map, * needs to be fixed. */ if (rv) entry->wired_count--; else if (entry->wired_count == 1) { rv = vm_fault_wire(map, entry->start, entry->end); if (rv) { failed = entry->start; entry->wired_count--; } } entry = entry->next; } if (vm_map_pmap(map) == pmap_kernel()) { vm_map_lock(map); } else { vm_map_clear_recursive(&map->lock); } if (rv) { vm_map_unlock(map); (void) vm_map_pageable(map, start, failed, TRUE); return(rv); } } vm_map_unlock(map); return(KERN_SUCCESS); } /* * vm_map_clean * * Push any dirty cached pages in the address range to their pager. * If syncio is TRUE, dirty pages are written synchronously. * If invalidate is TRUE, any cached pages are freed as well. * * Returns an error if any part of the specified range is not mapped. */ int vm_map_clean(map, start, end, syncio, invalidate) vm_map_t map; vm_offset_t start; vm_offset_t end; boolean_t syncio; boolean_t invalidate; { register vm_map_entry_t current; vm_map_entry_t entry; vm_size_t size; vm_object_t object; vm_offset_t offset; vm_map_lock_read(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &entry)) { vm_map_unlock_read(map); return(KERN_INVALID_ADDRESS); } /* * Make a first pass to check for holes, and (if invalidating) * wired pages. */ for (current = entry; current->start < end; current = current->next) { if (current->is_sub_map) { vm_map_unlock_read(map); return(KERN_INVALID_ARGUMENT); } if (end > current->end && (current->next == &map->header || current->end != current->next->start)) { vm_map_unlock_read(map); return(KERN_INVALID_ADDRESS); } if (current->wired_count) { vm_map_unlock_read(map); return(KERN_PAGES_LOCKED); } } /* * Make a second pass, cleaning/uncaching pages from the indicated * objects as we go. */ for (current = entry; current->start < end; current = current->next) { offset = current->offset + (start - current->start); size = (end <= current->end ? end : current->end) - start; if (current->is_a_map) { register vm_map_t smap; vm_map_entry_t tentry; vm_size_t tsize; smap = current->object.share_map; vm_map_lock_read(smap); (void) vm_map_lookup_entry(smap, offset, &tentry); tsize = tentry->end - offset; if (tsize < size) size = tsize; object = tentry->object.vm_object; offset = tentry->offset + (offset - tentry->start); vm_object_lock(object); vm_map_unlock_read(smap); } else { object = current->object.vm_object; vm_object_lock(object); } /* * Flush pages if writing is allowed. * XXX should we continue on an error? */ if ((current->protection & VM_PROT_WRITE) && !vm_object_page_clean(object, offset, offset+size, syncio, FALSE)) { vm_object_unlock(object); vm_map_unlock_read(map); return(KERN_FAILURE); } if (invalidate) vm_object_page_remove(object, offset, offset+size); vm_object_unlock(object); start += size; } vm_map_unlock_read(map); return(KERN_SUCCESS); } /* * vm_map_entry_unwire: [ internal use only ] * * Make the region specified by this entry pageable. * * The map in question should be locked. * [This is the reason for this routine's existence.] */ void vm_map_entry_unwire(map, entry) vm_map_t map; register vm_map_entry_t entry; { vm_fault_unwire(map, entry->start, entry->end); entry->wired_count = 0; } /* * vm_map_entry_delete: [ internal use only ] * * Deallocate the given entry from the target map. */ void vm_map_entry_delete(map, entry) register vm_map_t map; register vm_map_entry_t entry; { if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); vm_map_entry_unlink(map, entry); map->size -= entry->end - entry->start; if (entry->is_a_map || entry->is_sub_map) vm_map_deallocate(entry->object.share_map); else vm_object_deallocate(entry->object.vm_object); vm_map_entry_dispose(map, entry); } /* * vm_map_delete: [ internal use only ] * * Deallocates the given address range from the target * map. * * When called with a sharing map, removes pages from * that region from all physical maps. */ int vm_map_delete(map, start, end) register vm_map_t map; vm_offset_t start; register vm_offset_t end; { register vm_map_entry_t entry; vm_map_entry_t first_entry; /* * Find the start of the region, and clip it */ if (!vm_map_lookup_entry(map, start, &first_entry)) entry = first_entry->next; else { entry = first_entry; vm_map_clip_start(map, entry, start); /* * Fix the lookup hint now, rather than each * time though the loop. */ SAVE_HINT(map, entry->prev); } /* * Save the free space hint */ if (map->first_free->start >= start) map->first_free = entry->prev; /* * Step through all entries in this region */ while ((entry != &map->header) && (entry->start < end)) { vm_map_entry_t next; register vm_offset_t s, e; register vm_object_t object; vm_map_clip_end(map, entry, end); next = entry->next; s = entry->start; e = entry->end; /* * Unwire before removing addresses from the pmap; * otherwise, unwiring will put the entries back in * the pmap. */ object = entry->object.vm_object; if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); /* * If this is a sharing map, we must remove * *all* references to this data, since we can't * find all of the physical maps which are sharing * it. */ if (object == kernel_object || object == kmem_object) vm_object_page_remove(object, entry->offset, entry->offset + (e - s)); else if (!map->is_main_map) vm_object_pmap_remove(object, entry->offset, entry->offset + (e - s)); else pmap_remove(map->pmap, s, e); /* * Delete the entry (which may delete the object) * only after removing all pmap entries pointing * to its pages. (Otherwise, its page frames may * be reallocated, and any modify bits will be * set in the wrong object!) */ vm_map_entry_delete(map, entry); entry = next; } return(KERN_SUCCESS); } /* * vm_map_remove: * * Remove the given address range from the target map. * This is the exported form of vm_map_delete. */ int vm_map_remove(map, start, end) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; { register int result; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); result = vm_map_delete(map, start, end); vm_map_unlock(map); return(result); } /* * vm_map_check_protection: * * Assert that the target map allows the specified * privilege on the entire address region given. * The entire region must be allocated. */ boolean_t vm_map_check_protection(map, start, end, protection) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register vm_prot_t protection; { register vm_map_entry_t entry; vm_map_entry_t tmp_entry; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { return(FALSE); } entry = tmp_entry; while (start < end) { if (entry == &map->header) { return(FALSE); } /* * No holes allowed! */ if (start < entry->start) { return(FALSE); } /* * Check protection associated with entry. */ if ((entry->protection & protection) != protection) { return(FALSE); } /* go to next entry */ start = entry->end; entry = entry->next; } return(TRUE); } /* * vm_map_copy_entry: * * Copies the contents of the source entry to the destination * entry. The entries *must* be aligned properly. */ void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) vm_map_t src_map, dst_map; register vm_map_entry_t src_entry, dst_entry; { vm_object_t temp_object; if (src_entry->is_sub_map || dst_entry->is_sub_map) return; if (dst_entry->object.vm_object != NULL && (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0) printf("vm_map_copy_entry: copying over permanent data!\n"); /* * If our destination map was wired down, * unwire it now. */ if (dst_entry->wired_count != 0) vm_map_entry_unwire(dst_map, dst_entry); /* * If we're dealing with a sharing map, we * must remove the destination pages from * all maps (since we cannot know which maps * this sharing map belongs in). */ if (dst_map->is_main_map) pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); else vm_object_pmap_remove(dst_entry->object.vm_object, dst_entry->offset, dst_entry->offset + (dst_entry->end - dst_entry->start)); if (src_entry->wired_count == 0) { boolean_t src_needs_copy; /* * If the source entry is marked needs_copy, * it is already write-protected. */ if (!src_entry->needs_copy) { boolean_t su; /* * If the source entry has only one mapping, * we can just protect the virtual address * range. */ if (!(su = src_map->is_main_map)) { simple_lock(&src_map->ref_lock); su = (src_map->ref_count == 1); simple_unlock(&src_map->ref_lock); } if (su) { pmap_protect(src_map->pmap, src_entry->start, src_entry->end, src_entry->protection & ~VM_PROT_WRITE); } else { vm_object_pmap_copy(src_entry->object.vm_object, src_entry->offset, src_entry->offset + (src_entry->end -src_entry->start)); } } /* * Make a copy of the object. */ temp_object = dst_entry->object.vm_object; vm_object_copy(src_entry->object.vm_object, src_entry->offset, (vm_size_t)(src_entry->end - src_entry->start), &dst_entry->object.vm_object, &dst_entry->offset, &src_needs_copy); /* * If we didn't get a copy-object now, mark the * source map entry so that a shadow will be created * to hold its changed pages. */ if (src_needs_copy) src_entry->needs_copy = TRUE; /* * The destination always needs to have a shadow * created, unless it's a zero-fill entry. */ if (dst_entry->object.vm_object != NULL) dst_entry->needs_copy = TRUE; else dst_entry->needs_copy = FALSE; /* * Mark the entries copy-on-write, so that write-enabling * the entry won't make copy-on-write pages writable. */ src_entry->copy_on_write = TRUE; dst_entry->copy_on_write = TRUE; /* * Get rid of the old object. */ vm_object_deallocate(temp_object); pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, dst_entry->end - dst_entry->start, src_entry->start); } else { /* * Of course, wired down pages can't be set copy-on-write. * Cause wired pages to be copied into the new * map by simulating faults (the new pages are * pageable) */ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); } } /* * vm_map_copy: * * Perform a virtual memory copy from the source * address map/range to the destination map/range. * * If src_destroy or dst_alloc is requested, * the source and destination regions should be * disjoint, not only in the top-level map, but * in the sharing maps as well. [The best way * to guarantee this is to use a new intermediate * map to make copies. This also reduces map * fragmentation.] */ int vm_map_copy(dst_map, src_map, dst_addr, len, src_addr, dst_alloc, src_destroy) vm_map_t dst_map; vm_map_t src_map; vm_offset_t dst_addr; vm_size_t len; vm_offset_t src_addr; boolean_t dst_alloc; boolean_t src_destroy; { register vm_map_entry_t src_entry; register vm_map_entry_t dst_entry; vm_map_entry_t tmp_entry; vm_offset_t src_start; vm_offset_t src_end; vm_offset_t dst_start; vm_offset_t dst_end; vm_offset_t src_clip; vm_offset_t dst_clip; int result; boolean_t old_src_destroy; /* * XXX While we figure out why src_destroy screws up, * we'll do it by explicitly vm_map_delete'ing at the end. */ old_src_destroy = src_destroy; src_destroy = FALSE; /* * Compute start and end of region in both maps */ src_start = src_addr; src_end = src_start + len; dst_start = dst_addr; dst_end = dst_start + len; /* * Check that the region can exist in both source * and destination. */ if ((dst_end < dst_start) || (src_end < src_start)) return(KERN_NO_SPACE); /* * Lock the maps in question -- we avoid deadlock * by ordering lock acquisition by map value */ if (src_map == dst_map) { vm_map_lock(src_map); } else if ((long) src_map < (long) dst_map) { vm_map_lock(src_map); vm_map_lock(dst_map); } else { vm_map_lock(dst_map); vm_map_lock(src_map); } result = KERN_SUCCESS; /* * Check protections... source must be completely readable and * destination must be completely writable. [Note that if we're * allocating the destination region, we don't have to worry * about protection, but instead about whether the region * exists.] */ if (src_map->is_main_map && dst_map->is_main_map) { if (!vm_map_check_protection(src_map, src_start, src_end, VM_PROT_READ)) { result = KERN_PROTECTION_FAILURE; goto Return; } if (dst_alloc) { /* XXX Consider making this a vm_map_find instead */ if ((result = vm_map_insert(dst_map, NULL, (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS) goto Return; } else if (!vm_map_check_protection(dst_map, dst_start, dst_end, VM_PROT_WRITE)) { result = KERN_PROTECTION_FAILURE; goto Return; } } /* * Find the start entries and clip. * * Note that checking protection asserts that the * lookup cannot fail. * * Also note that we wait to do the second lookup * until we have done the first clip, as the clip * may affect which entry we get! */ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; vm_map_clip_start(src_map, src_entry, src_start); (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry); dst_entry = tmp_entry; vm_map_clip_start(dst_map, dst_entry, dst_start); /* * If both source and destination entries are the same, * retry the first lookup, as it may have changed. */ if (src_entry == dst_entry) { (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; } /* * If source and destination entries are still the same, * a null copy is being performed. */ if (src_entry == dst_entry) goto Return; /* * Go through entries until we get to the end of the * region. */ while (src_start < src_end) { /* * Clip the entries to the endpoint of the entire region. */ vm_map_clip_end(src_map, src_entry, src_end); vm_map_clip_end(dst_map, dst_entry, dst_end); /* * Clip each entry to the endpoint of the other entry. */ src_clip = src_entry->start + (dst_entry->end - dst_entry->start); vm_map_clip_end(src_map, src_entry, src_clip); dst_clip = dst_entry->start + (src_entry->end - src_entry->start); vm_map_clip_end(dst_map, dst_entry, dst_clip); /* * Both entries now match in size and relative endpoints. * * If both entries refer to a VM object, we can * deal with them now. */ if (!src_entry->is_a_map && !dst_entry->is_a_map) { vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry); } else { register vm_map_t new_dst_map; vm_offset_t new_dst_start; vm_size_t new_size; vm_map_t new_src_map; vm_offset_t new_src_start; /* * We have to follow at least one sharing map. */ new_size = (dst_entry->end - dst_entry->start); if (src_entry->is_a_map) { new_src_map = src_entry->object.share_map; new_src_start = src_entry->offset; } else { new_src_map = src_map; new_src_start = src_entry->start; vm_map_set_recursive(&src_map->lock); } if (dst_entry->is_a_map) { vm_offset_t new_dst_end; new_dst_map = dst_entry->object.share_map; new_dst_start = dst_entry->offset; /* * Since the destination sharing entries * will be merely deallocated, we can * do that now, and replace the region * with a null object. [This prevents * splitting the source map to match * the form of the destination map.] * Note that we can only do so if the * source and destination do not overlap. */ new_dst_end = new_dst_start + new_size; if (new_dst_map != new_src_map) { vm_map_lock(new_dst_map); (void) vm_map_delete(new_dst_map, new_dst_start, new_dst_end); (void) vm_map_insert(new_dst_map, NULL, (vm_offset_t) 0, new_dst_start, new_dst_end); vm_map_unlock(new_dst_map); } } else { new_dst_map = dst_map; new_dst_start = dst_entry->start; vm_map_set_recursive(&dst_map->lock); } /* * Recursively copy the sharing map. */ (void) vm_map_copy(new_dst_map, new_src_map, new_dst_start, new_size, new_src_start, FALSE, FALSE); if (dst_map == new_dst_map) vm_map_clear_recursive(&dst_map->lock); if (src_map == new_src_map) vm_map_clear_recursive(&src_map->lock); } /* * Update variables for next pass through the loop. */ src_start = src_entry->end; src_entry = src_entry->next; dst_start = dst_entry->end; dst_entry = dst_entry->next; /* * If the source is to be destroyed, here is the * place to do it. */ if (src_destroy && src_map->is_main_map && dst_map->is_main_map) vm_map_entry_delete(src_map, src_entry->prev); } /* * Update the physical maps as appropriate */ if (src_map->is_main_map && dst_map->is_main_map) { if (src_destroy) pmap_remove(src_map->pmap, src_addr, src_addr + len); } /* * Unlock the maps */ Return: ; if (old_src_destroy) vm_map_delete(src_map, src_addr, src_addr + len); vm_map_unlock(src_map); if (src_map != dst_map) vm_map_unlock(dst_map); return(result); } /* * vmspace_fork: * Create a new process vmspace structure and vm_map * based on those of an existing process. The new map * is based on the old map, according to the inheritance * values on the regions in that map. * * The source map must not be locked. */ struct vmspace * vmspace_fork(vm1) register struct vmspace *vm1; { register struct vmspace *vm2; vm_map_t old_map = &vm1->vm_map; vm_map_t new_map; vm_map_entry_t old_entry; vm_map_entry_t new_entry; pmap_t new_pmap; vm_map_lock(old_map); vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, old_map->entries_pageable); bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); new_pmap = &vm2->vm_pmap; /* XXX */ new_map = &vm2->vm_map; /* XXX */ old_entry = old_map->header.next; while (old_entry != &old_map->header) { if (old_entry->is_sub_map) panic("vm_map_fork: encountered a submap"); switch (old_entry->inheritance) { case VM_INHERIT_NONE: break; case VM_INHERIT_SHARE: /* * If we don't already have a sharing map: */ if (!old_entry->is_a_map) { vm_map_t new_share_map; vm_map_entry_t new_share_entry; /* * Create a new sharing map */ new_share_map = vm_map_create(NULL, old_entry->start, old_entry->end, TRUE); new_share_map->is_main_map = FALSE; /* * Create the only sharing entry from the * old task map entry. */ new_share_entry = vm_map_entry_create(new_share_map); *new_share_entry = *old_entry; new_share_entry->wired_count = 0; /* * Insert the entry into the new sharing * map */ vm_map_entry_link(new_share_map, new_share_map->header.prev, new_share_entry); /* * Fix up the task map entry to refer * to the sharing map now. */ old_entry->is_a_map = TRUE; old_entry->object.share_map = new_share_map; old_entry->offset = old_entry->start; } /* * Clone the entry, referencing the sharing map. */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; vm_map_reference(new_entry->object.share_map); /* * Insert the entry into the new map -- we * know we're inserting at the end of the new * map. */ vm_map_entry_link(new_map, new_map->header.prev, new_entry); /* * Update the physical map */ pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, (old_entry->end - old_entry->start), old_entry->start); break; case VM_INHERIT_COPY: /* * Clone the entry and link into the map. */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; new_entry->object.vm_object = NULL; new_entry->is_a_map = FALSE; vm_map_entry_link(new_map, new_map->header.prev, new_entry); if (old_entry->is_a_map) { int check; check = vm_map_copy(new_map, old_entry->object.share_map, new_entry->start, (vm_size_t)(new_entry->end - new_entry->start), old_entry->offset, FALSE, FALSE); if (check != KERN_SUCCESS) printf("vm_map_fork: copy in share_map region failed\n"); } else { vm_map_copy_entry(old_map, new_map, old_entry, new_entry); } break; } old_entry = old_entry->next; } new_map->size = old_map->size; vm_map_unlock(old_map); return(vm2); } /* * vm_map_lookup: * * Finds the VM object, offset, and * protection for a given virtual address in the * specified map, assuming a page fault of the * type specified. * * Leaves the map in question locked for read; return * values are guaranteed until a vm_map_lookup_done * call is performed. Note that the map argument * is in/out; the returned map must be used in * the call to vm_map_lookup_done. * * A handle (out_entry) is returned for use in * vm_map_lookup_done, to make that fast. * * If a lookup is requested with "write protection" * specified, the map may be changed to perform virtual * copying operations, although the data referenced will * remain the same. */ int vm_map_lookup(var_map, vaddr, fault_type, out_entry, object, offset, out_prot, wired, single_use) vm_map_t *var_map; /* IN/OUT */ register vm_offset_t vaddr; register vm_prot_t fault_type; vm_map_entry_t *out_entry; /* OUT */ vm_object_t *object; /* OUT */ vm_offset_t *offset; /* OUT */ vm_prot_t *out_prot; /* OUT */ boolean_t *wired; /* OUT */ boolean_t *single_use; /* OUT */ { vm_map_t share_map; vm_offset_t share_offset; register vm_map_entry_t entry; register vm_map_t map = *var_map; register vm_prot_t prot; register boolean_t su; RetryLookup: ; /* * Lookup the faulting address. */ vm_map_lock_read(map); #define RETURN(why) \ { \ vm_map_unlock_read(map); \ return(why); \ } /* * If the map has an interesting hint, try it before calling * full blown lookup routine. */ simple_lock(&map->hint_lock); entry = map->hint; simple_unlock(&map->hint_lock); *out_entry = entry; if ((entry == &map->header) || (vaddr < entry->start) || (vaddr >= entry->end)) { vm_map_entry_t tmp_entry; /* * Entry was either not a valid hint, or the vaddr * was not contained in the entry, so do a full lookup. */ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); entry = tmp_entry; *out_entry = entry; } /* * Handle submaps. */ if (entry->is_sub_map) { vm_map_t old_map = map; *var_map = map = entry->object.sub_map; vm_map_unlock_read(old_map); goto RetryLookup; } /* * Check whether this task is allowed to have * this page. */ prot = entry->protection; if ((fault_type & (prot)) != fault_type) RETURN(KERN_PROTECTION_FAILURE); /* * If this page is not pageable, we have to get * it for all possible accesses. */ if ((*wired = (entry->wired_count != 0)) != 0) prot = fault_type = entry->protection; /* * If we don't already have a VM object, track * it down. */ if ((su = !entry->is_a_map) != 0) { share_map = map; share_offset = vaddr; } else { vm_map_entry_t share_entry; /* * Compute the sharing map, and offset into it. */ share_map = entry->object.share_map; share_offset = (vaddr - entry->start) + entry->offset; /* * Look for the backing store object and offset */ vm_map_lock_read(share_map); if (!vm_map_lookup_entry(share_map, share_offset, &share_entry)) { vm_map_unlock_read(share_map); RETURN(KERN_INVALID_ADDRESS); } entry = share_entry; } /* * If the entry was copy-on-write, we either ... */ if (entry->needs_copy) { /* * If we want to write the page, we may as well * handle that now since we've got the sharing * map locked. * * If we don't need to write the page, we just * demote the permissions allowed. */ if (fault_type & VM_PROT_WRITE) { /* * Make a new object, and place it in the * object chain. Note that no new references * have appeared -- one just moved from the * share map to the new object. */ if (lockmgr(&share_map->lock, LK_EXCLUPGRADE, (void *)0, curproc)) { if (share_map != map) vm_map_unlock_read(map); goto RetryLookup; } vm_object_shadow( &entry->object.vm_object, &entry->offset, (vm_size_t) (entry->end - entry->start)); entry->needs_copy = FALSE; lockmgr(&share_map->lock, LK_DOWNGRADE, (void *)0, curproc); } else { /* * We're attempting to read a copy-on-write * page -- don't allow writes. */ prot &= (~VM_PROT_WRITE); } } /* * Create an object if necessary. */ if (entry->object.vm_object == NULL) { if (lockmgr(&share_map->lock, LK_EXCLUPGRADE, (void *)0, curproc)) { if (share_map != map) vm_map_unlock_read(map); goto RetryLookup; } entry->object.vm_object = vm_object_allocate( (vm_size_t)(entry->end - entry->start)); entry->offset = 0; lockmgr(&share_map->lock, LK_DOWNGRADE, (void *)0, curproc); } /* * Return the object/offset from this entry. If the entry * was copy-on-write or empty, it has been fixed up. */ *offset = (share_offset - entry->start) + entry->offset; *object = entry->object.vm_object; /* * Return whether this is the only map sharing this data. */ if (!su) { simple_lock(&share_map->ref_lock); su = (share_map->ref_count == 1); simple_unlock(&share_map->ref_lock); } *out_prot = prot; *single_use = su; return(KERN_SUCCESS); #undef RETURN } /* * vm_map_lookup_done: * * Releases locks acquired by a vm_map_lookup * (according to the handle returned by that lookup). */ void vm_map_lookup_done(map, entry) register vm_map_t map; vm_map_entry_t entry; { /* * If this entry references a map, unlock it first. */ if (entry->is_a_map) vm_map_unlock_read(entry->object.share_map); /* * Unlock the main-level map */ vm_map_unlock_read(map); } /* * Routine: vm_map_simplify * Purpose: * Attempt to simplify the map representation in * the vicinity of the given starting address. * Note: * This routine is intended primarily to keep the * kernel maps more compact -- they generally don't * benefit from the "expand a map entry" technology * at allocation time because the adjacent entry * is often wired down. */ void vm_map_simplify(map, start) vm_map_t map; vm_offset_t start; { vm_map_entry_t this_entry; vm_map_entry_t prev_entry; vm_map_lock(map); if ( (vm_map_lookup_entry(map, start, &this_entry)) && ((prev_entry = this_entry->prev) != &map->header) && (prev_entry->end == start) && (map->is_main_map) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && (this_entry->is_a_map == FALSE) && (this_entry->is_sub_map == FALSE) && (prev_entry->inheritance == this_entry->inheritance) && (prev_entry->protection == this_entry->protection) && (prev_entry->max_protection == this_entry->max_protection) && (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->copy_on_write == this_entry->copy_on_write) && (prev_entry->needs_copy == this_entry->needs_copy) && (prev_entry->object.vm_object == this_entry->object.vm_object) && ((prev_entry->offset + (prev_entry->end - prev_entry->start)) == this_entry->offset) ) { if (map->first_free == this_entry) map->first_free = prev_entry; SAVE_HINT(map, prev_entry); vm_map_entry_unlink(map, this_entry); prev_entry->end = this_entry->end; vm_object_deallocate(this_entry->object.vm_object); vm_map_entry_dispose(map, this_entry); } vm_map_unlock(map); } /* * vm_map_print: [ debug ] */ void vm_map_print(map, full) register vm_map_t map; boolean_t full; { _vm_map_print(map, full, printf); } void _vm_map_print(map, full, pr) register vm_map_t map; boolean_t full; int (*pr) __P((const char *, ...)); { register vm_map_entry_t entry; extern int indent; iprintf(pr, "%s map %p: pmap=%p, ref=%d, nentries=%d, version=%d\n", (map->is_main_map ? "Task" : "Share"), map, (map->pmap), map->ref_count, map->nentries, map->timestamp); if (!full && indent) return; indent += 2; for (entry = map->header.next; entry != &map->header; entry = entry->next) { iprintf(pr, "map entry %p: start=%p, end=%p, ", entry, entry->start, entry->end); if (map->is_main_map) { static char *inheritance_name[4] = { "share", "copy", "none", "donate_copy"}; (*pr)("prot=%x/%x/%s, ", entry->protection, entry->max_protection, inheritance_name[entry->inheritance]); if (entry->wired_count != 0) (*pr)("wired, "); } if (entry->is_a_map || entry->is_sub_map) { (*pr)("share=%p, offset=%p\n", entry->object.share_map, entry->offset); if ((entry->prev == &map->header) || (!entry->prev->is_a_map) || (entry->prev->object.share_map != entry->object.share_map)) { indent += 2; vm_map_print(entry->object.share_map, full); indent -= 2; } } else { (*pr)("object=%p, offset=%p", entry->object.vm_object, entry->offset); if (entry->copy_on_write) (*pr)(", copy (%s)", entry->needs_copy ? "needed" : "done"); (*pr)("\n"); if ((entry->prev == &map->header) || (entry->prev->is_a_map) || (entry->prev->object.vm_object != entry->object.vm_object)) { indent += 2; _vm_object_print(entry->object.vm_object, full, pr); indent -= 2; } } } indent -= 2; }