summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAriane van der Steldt <ariane@cvs.openbsd.org>2012-03-09 13:01:30 +0000
committerAriane van der Steldt <ariane@cvs.openbsd.org>2012-03-09 13:01:30 +0000
commit193e3efb70083a72f3d299ea5f129cf83d547115 (patch)
treee7c9b8d210fe2b29062f1cf3a40c093bdf14800d
parentdbaaf4ad89f61a154abf6b48600210ec50ecb62c (diff)
New vmmap implementation.
no oks (it is really a pain to review properly) extensively tested, I'm confident it'll be stable 'now is the time' from several icb inhabitants Diff provides: - ability to specify different allocators for different regions/maps - a simpler implementation of the current allocator - currently in compatibility mode: it will generate similar addresses as the old allocator
-rw-r--r--sys/arch/i386/i386/pmap.c25
-rw-r--r--sys/conf/files3
-rw-r--r--sys/dev/pci/drm/drm_bufs.c4
-rw-r--r--sys/dev/pci/drm/i915_drv.c8
-rw-r--r--sys/kern/exec_elf.c26
-rw-r--r--sys/kern/kern_exec.c6
-rw-r--r--sys/kern/kern_malloc.c11
-rw-r--r--sys/kern/sysv_shm.c8
-rw-r--r--sys/uvm/uvm.h20
-rw-r--r--sys/uvm/uvm_addr.c1556
-rw-r--r--sys/uvm/uvm_addr.h116
-rw-r--r--sys/uvm/uvm_extern.h8
-rw-r--r--sys/uvm/uvm_fault.c15
-rw-r--r--sys/uvm/uvm_init.c14
-rw-r--r--sys/uvm/uvm_io.c15
-rw-r--r--sys/uvm/uvm_km.c78
-rw-r--r--sys/uvm/uvm_map.c6656
-rw-r--r--sys/uvm/uvm_map.h307
-rw-r--r--sys/uvm/uvm_mmap.c81
-rw-r--r--sys/uvm/uvm_unix.c10
20 files changed, 5935 insertions, 3032 deletions
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 2cdfba314d7..f8f05cb8b88 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.156 2012/02/19 17:14:28 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.157 2012/03/09 13:01:28 ariane Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -602,14 +602,16 @@ pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
vaddr_t va = 0;
vm_map_lock(map);
- for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
- /*
- * This entry has greater va than the entries before.
- * We need to make it point to the last page, not past it.
- */
+ RB_FOREACH_REVERSE(ent, uvm_map_addr, &map->addr) {
if (ent->protection & VM_PROT_EXECUTE)
- va = trunc_page(ent->end - 1);
+ break;
}
+ /*
+ * This entry has greater va than the entries before.
+ * We need to make it point to the last page, not past it.
+ */
+ if (ent)
+ va = trunc_page(ent->end - 1);
vm_map_unlock(map);
if (va <= pm->pm_hiexec) {
@@ -1244,7 +1246,7 @@ pmap_free_pvpage(void)
{
int s;
struct vm_map *map;
- struct vm_map_entry *dead_entries;
+ struct uvm_map_deadq dead_entries;
struct pv_page *pvp;
s = splvm(); /* protect kmem_map */
@@ -1265,13 +1267,12 @@ pmap_free_pvpage(void)
TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
/* unmap the page */
- dead_entries = NULL;
+ TAILQ_INIT(&dead_entries);
uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE,
- &dead_entries, NULL, FALSE);
+ &dead_entries, FALSE, TRUE);
vm_map_unlock(map);
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, 0);
+ uvm_unmap_detach(&dead_entries, 0);
pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */
}
diff --git a/sys/conf/files b/sys/conf/files
index 5f12fdfdc26..379b1e2cdb8 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.533 2011/12/31 17:06:10 jsing Exp $
+# $OpenBSD: files,v 1.534 2012/03/09 13:01:28 ariane Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -987,6 +987,7 @@ file nnpfs/nnpfs_vfsops-openbsd.c nnpfs
file nnpfs/nnpfs_vnodeops-bsd.c nnpfs
file nnpfs/nnpfs_vnodeops-common.c nnpfs
file nnpfs/nnpfs_syscalls-dummy.c !nnpfs
+file uvm/uvm_addr.c
file uvm/uvm_amap.c
file uvm/uvm_anon.c
file uvm/uvm_aobj.c
diff --git a/sys/dev/pci/drm/drm_bufs.c b/sys/dev/pci/drm/drm_bufs.c
index 08df8480a55..2f64aa271fa 100644
--- a/sys/dev/pci/drm/drm_bufs.c
+++ b/sys/dev/pci/drm/drm_bufs.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: drm_bufs.c,v 1.48 2011/06/02 18:22:00 weerd Exp $ */
+/* $OpenBSD: drm_bufs.c,v 1.49 2012/03/09 13:01:28 ariane Exp $ */
/*-
* Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas.
* Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
@@ -958,7 +958,7 @@ drm_mapbufs(struct drm_device *dev, void *data, struct drm_file *file_priv)
foff = 0;
}
- vaddr = uvm_map_hint(curproc, VM_PROT_READ | VM_PROT_WRITE);
+ vaddr = 0;
retcode = uvm_mmap(&curproc->p_vmspace->vm_map, &vaddr, size,
UVM_PROT_READ | UVM_PROT_WRITE, UVM_PROT_ALL, MAP_SHARED,
(caddr_t)vn, foff, curproc->p_rlimit[RLIMIT_MEMLOCK].rlim_cur,
diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c
index 76ad35fe01a..602bac7a77e 100644
--- a/sys/dev/pci/drm/i915_drv.c
+++ b/sys/dev/pci/drm/i915_drv.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: i915_drv.c,v 1.118 2011/09/20 14:29:34 kettenis Exp $ */
+/* $OpenBSD: i915_drv.c,v 1.119 2012/03/09 13:01:28 ariane Exp $ */
/*
* Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org>
*
@@ -1438,10 +1438,10 @@ i915_gem_gtt_map_ioctl(struct drm_device *dev, void *data,
* We give our reference from object_lookup to the mmap, so only
* must free it in the case that the map fails.
*/
- addr = uvm_map_hint(curproc, VM_PROT_READ | VM_PROT_WRITE);
- ret = uvm_map_p(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj,
+ addr = 0;
+ ret = uvm_map(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj,
offset, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
- UVM_INH_SHARE, UVM_ADV_RANDOM, 0), curproc);
+ UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
done:
if (ret == 0)
diff --git a/sys/kern/exec_elf.c b/sys/kern/exec_elf.c
index 2e615de374d..4e9f314965f 100644
--- a/sys/kern/exec_elf.c
+++ b/sys/kern/exec_elf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: exec_elf.c,v 1.85 2011/07/05 04:48:02 guenther Exp $ */
+/* $OpenBSD: exec_elf.c,v 1.86 2012/03/09 13:01:28 ariane Exp $ */
/*
* Copyright (c) 1996 Per Fogelstrom
@@ -326,6 +326,7 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp,
int nload, idx = 0;
Elf_Addr pos = *last;
int file_align;
+ int loop;
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
if ((error = namei(&nd)) != 0) {
@@ -377,11 +378,12 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp,
* would (i.e. something safely out of the way).
*/
if (pos == ELFDEFNNAME(NO_ADDR)) {
- pos = uvm_map_hint(p, VM_PROT_EXECUTE);
+ pos = uvm_map_hint(p->p_vmspace, VM_PROT_EXECUTE);
}
pos = ELF_ROUND(pos, file_align);
*last = epp->ep_interp_pos = pos;
+ loop = 0;
for (i = 0; i < nload;/**/) {
vaddr_t addr;
struct uvm_object *uobj;
@@ -409,17 +411,17 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp,
addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
BRKSIZ);
- vm_map_lock(&p->p_vmspace->vm_map);
- if (uvm_map_findspace(&p->p_vmspace->vm_map, addr, size,
- &addr, uobj, uoff, 0, UVM_FLAG_FIXED) == NULL) {
- if (uvm_map_findspace(&p->p_vmspace->vm_map, addr, size,
- &addr, uobj, uoff, 0, 0) == NULL) {
- error = ENOMEM; /* XXX */
- vm_map_unlock(&p->p_vmspace->vm_map);
- goto bad1;
+ if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,
+ (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {
+ if (loop == 0) {
+ loop = 1;
+ i = 0;
+ *last = epp->ep_interp_pos = pos = 0;
+ continue;
}
- }
- vm_map_unlock(&p->p_vmspace->vm_map);
+ error = ENOMEM;
+ goto bad1;
+ }
if (addr != pos + loadmap[i].vaddr) {
/* base changed. */
pos = addr - trunc_page(loadmap[i].vaddr);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index c8c2f1c1378..4fa96597e89 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exec.c,v 1.124 2012/02/20 22:23:39 guenther Exp $ */
+/* $OpenBSD: kern_exec.c,v 1.125 2012/03/09 13:01:28 ariane Exp $ */
/* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */
/*-
@@ -821,7 +821,6 @@ exec_sigcode_map(struct proc *p, struct emul *e)
e->e_sigobject = uao_create(sz, 0);
uao_reference(e->e_sigobject); /* permanent reference */
- va = vm_map_min(kernel_map); /* hint */
if ((r = uvm_map(kernel_map, &va, round_page(sz), e->e_sigobject,
0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
@@ -832,8 +831,7 @@ exec_sigcode_map(struct proc *p, struct emul *e)
uvm_unmap(kernel_map, va, va + round_page(sz));
}
- /* Just a hint to uvm_mmap where to put it. */
- p->p_sigcode = uvm_map_hint(p, VM_PROT_READ|VM_PROT_EXECUTE);
+ p->p_sigcode = 0; /* no hint */
uao_reference(e->e_sigobject);
if (uvm_map(&p->p_vmspace->vm_map, &p->p_sigcode, round_page(sz),
e->e_sigobject, 0, 0, UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX,
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index d6fc8d3ac20..6c5f9e8bfdd 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_malloc.c,v 1.90 2011/09/22 21:52:36 jsing Exp $ */
+/* $OpenBSD: kern_malloc.c,v 1.91 2012/03/09 13:01:28 ariane Exp $ */
/* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */
/*
@@ -576,8 +576,13 @@ kmeminit(void)
kmeminit_nkmempages();
base = vm_map_min(kernel_map);
kmem_map = uvm_km_suballoc(kernel_map, &base, &limit,
- (vsize_t)(nkmempages * PAGE_SIZE), VM_MAP_INTRSAFE, FALSE,
- &kmem_map_store);
+ (vsize_t)nkmempages << PAGE_SHIFT,
+#ifdef KVA_GUARDPAGES
+ VM_MAP_INTRSAFE | VM_MAP_GUARDPAGES,
+#else
+ VM_MAP_INTRSAFE,
+#endif
+ FALSE, &kmem_map_store);
kmembase = (char *)base;
kmemlimit = (char *)limit;
kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map,
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 6b4b0ed18ce..6b6736c153d 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sysv_shm.c,v 1.54 2011/10/27 07:56:28 robert Exp $ */
+/* $OpenBSD: sysv_shm.c,v 1.55 2012/03/09 13:01:28 ariane Exp $ */
/* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */
/*
@@ -261,10 +261,8 @@ sys_shmat(struct proc *p, void *v, register_t *retval)
attach_va = (vaddr_t)SCARG(uap, shmaddr);
else
return (EINVAL);
- } else {
- /* This is just a hint to uvm_map() about where to put it. */
- attach_va = uvm_map_hint(p, prot);
- }
+ } else
+ attach_va = 0;
shm_handle = shmseg->shm_internal;
uao_reference(shm_handle->shm_object);
error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 939738f47aa..c236fb421a9 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.h,v 1.46 2011/07/06 19:50:38 beck Exp $ */
+/* $OpenBSD: uvm.h,v 1.47 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
@@ -120,6 +120,7 @@ struct uvm {
#define UVM_ET_COPYONWRITE 0x04 /* copy_on_write */
#define UVM_ET_NEEDSCOPY 0x08 /* needs_copy */
#define UVM_ET_HOLE 0x10 /* no backend */
+#define UVM_ET_FREEMAPPED 0x80 /* map entry is on free list (DEBUG) */
#define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0)
#define UVM_ET_ISSUBMAP(E) (((E)->etype & UVM_ET_SUBMAP) != 0)
@@ -154,6 +155,23 @@ do { \
#define UVM_PAGE_OWN(PG, TAG) /* nothing */
#endif /* UVM_PAGE_TRKOWN */
+/*
+ * uvm_map internal functions.
+ * Used by uvm_map address selectors.
+ */
+
+struct vm_map_entry *uvm_map_entrybyaddr(struct uvm_map_addr*, vaddr_t);
+int uvm_map_isavail(struct vm_map*,
+ struct uvm_addr_state*,
+ struct vm_map_entry**, struct vm_map_entry**,
+ vaddr_t, vsize_t);
+struct uvm_addr_state *uvm_map_uaddr(struct vm_map*, vaddr_t);
+struct uvm_addr_state *uvm_map_uaddr_e(struct vm_map*, struct vm_map_entry*);
+
+#define VMMAP_FREE_START(_entry) ((_entry)->end + (_entry)->guard)
+#define VMMAP_FREE_END(_entry) ((_entry)->end + (_entry)->guard + \
+ (_entry)->fspace)
+
#endif /* _KERNEL */
#endif /* _UVM_UVM_H_ */
diff --git a/sys/uvm/uvm_addr.c b/sys/uvm/uvm_addr.c
new file mode 100644
index 00000000000..486198e3891
--- /dev/null
+++ b/sys/uvm/uvm_addr.c
@@ -0,0 +1,1556 @@
+/* $OpenBSD: uvm_addr.c,v 1.1 2012/03/09 13:01:29 ariane Exp $ */
+
+/*
+ * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* #define DEBUG */
+
+#include <sys/param.h>
+#include <uvm/uvm.h>
+#include <uvm/uvm_addr.h>
+#include <sys/pool.h>
+#include <dev/rndvar.h>
+
+/* Max gap between hint allocations. */
+#define UADDR_HINT_MAXGAP (4 * PAGE_SIZE)
+/* Number of pivots in pivot allocator. */
+#define NUM_PIVOTS 16
+/*
+ * Max number (inclusive) of pages the pivot allocator
+ * will place between allocations.
+ *
+ * The uaddr_pivot_random() function attempts to bias towards
+ * small space between allocations, so putting a large number here is fine.
+ */
+#define PIVOT_RND 8
+/*
+ * Number of allocations that a pivot can supply before expiring.
+ * When a pivot expires, a new pivot has to be found.
+ *
+ * Must be at least 1.
+ */
+#define PIVOT_EXPIRE 1024
+
+
+/* Pool with uvm_addr_state structures. */
+struct pool uaddr_pool;
+struct pool uaddr_hint_pool;
+struct pool uaddr_bestfit_pool;
+struct pool uaddr_pivot_pool;
+struct pool uaddr_rnd_pool;
+
+/* uvm_addr state for hint based selector. */
+struct uaddr_hint_state {
+ struct uvm_addr_state uaddr;
+ vsize_t max_dist;
+};
+
+/* uvm_addr state for bestfit selector. */
+struct uaddr_bestfit_state {
+ struct uvm_addr_state ubf_uaddr;
+ struct uaddr_free_rbtree ubf_free;
+};
+
+/* uvm_addr state for rnd selector. */
+struct uaddr_rnd_state {
+ struct uvm_addr_state ur_uaddr;
+ TAILQ_HEAD(, vm_map_entry) ur_free;
+};
+
+/*
+ * Definition of a pivot in pivot selector.
+ */
+struct uaddr_pivot {
+ vaddr_t addr; /* End of prev. allocation. */
+ int expire;/* Best before date. */
+ int dir; /* Direction. */
+ struct vm_map_entry *entry; /* Will contain next alloc. */
+};
+/* uvm_addr state for pivot selector. */
+struct uaddr_pivot_state {
+ struct uvm_addr_state up_uaddr;
+
+ /* Free space tree, for fast pivot selection. */
+ struct uaddr_free_rbtree up_free;
+
+ /* List of pivots. The pointers point to after the last allocation. */
+ struct uaddr_pivot up_pivots[NUM_PIVOTS];
+};
+
+/*
+ * Free space comparison.
+ * Compares smaller free-space before larger free-space.
+ */
+static __inline int
+uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
+{
+ if (e1->fspace != e2->fspace)
+ return (e1->fspace < e2->fspace ? -1 : 1);
+ return (e1->start < e2->start ? -1 : e1->start > e2->start);
+}
+
+/* Forward declaration (see below). */
+extern const struct uvm_addr_functions uaddr_kernel_functions;
+struct uvm_addr_state uaddr_kbootstrap;
+
+
+/*
+ * Support functions.
+ */
+
+struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
+ vsize_t);
+void uaddr_kinsert(struct vm_map*, struct uvm_addr_state*,
+ struct vm_map_entry*);
+void uaddr_kremove(struct vm_map*, struct uvm_addr_state*,
+ struct vm_map_entry*);
+void uaddr_kbootstrapdestroy(struct uvm_addr_state*);
+
+void uaddr_destroy(struct uvm_addr_state*);
+void uaddr_hint_destroy(struct uvm_addr_state*);
+void uaddr_kbootstrap_destroy(struct uvm_addr_state*);
+void uaddr_rnd_destroy(struct uvm_addr_state*);
+void uaddr_bestfit_destroy(struct uvm_addr_state*);
+void uaddr_pivot_destroy(struct uvm_addr_state*);
+
+int uaddr_lin_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+int uaddr_kbootstrap_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+int uaddr_rnd_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+int uaddr_hint_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+int uaddr_bestfit_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+int uaddr_pivot_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+int uaddr_stack_brk_select(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+
+void uaddr_rnd_insert(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uaddr_rnd_remove(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uaddr_bestfit_insert(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uaddr_bestfit_remove(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uaddr_pivot_insert(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uaddr_pivot_remove(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+
+vsize_t uaddr_pivot_random(void);
+int uaddr_pivot_newpivot(struct vm_map*,
+ struct uaddr_pivot_state*, struct uaddr_pivot*,
+ struct vm_map_entry**, vaddr_t*,
+ vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
+
+#if defined(DEBUG) || defined(DDB)
+void uaddr_pivot_print(struct uvm_addr_state*, boolean_t,
+ int (*)(const char*, ...));
+void uaddr_rnd_print(struct uvm_addr_state*, boolean_t,
+ int (*)(const char*, ...));
+#endif /* DEBUG || DDB */
+
+
+/*
+ * Find smallest entry in tree that will fit sz bytes.
+ */
+struct vm_map_entry*
+uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
+{
+ struct vm_map_entry *tmp, *res;
+
+ tmp = RB_ROOT(free);
+ res = NULL;
+ while (tmp) {
+ if (tmp->fspace >= sz) {
+ res = tmp;
+ tmp = RB_LEFT(tmp, dfree.rbtree);
+ } else if (tmp->fspace < sz)
+ tmp = RB_RIGHT(tmp, dfree.rbtree);
+ }
+ return res;
+}
+
+static __inline vaddr_t
+uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
+{
+ vaddr_t adjusted;
+
+ KASSERT(offset < align || (align == 0 && offset == 0));
+ KASSERT((align & (align - 1)) == 0);
+ KASSERT((offset & PAGE_MASK) == 0);
+
+ align = MAX(align, PAGE_SIZE);
+ adjusted = addr & ~(align - 1);
+ adjusted += offset;
+ return (adjusted < addr ? adjusted + align : adjusted);
+}
+
+static __inline vaddr_t
+uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
+{
+ vaddr_t adjusted;
+
+ KASSERT(offset < align || (align == 0 && offset == 0));
+ KASSERT((align & (align - 1)) == 0);
+ KASSERT((offset & PAGE_MASK) == 0);
+
+ align = MAX(align, PAGE_SIZE);
+ adjusted = addr & ~(align - 1);
+ adjusted += offset;
+ return (adjusted > addr ? adjusted - align : adjusted);
+}
+
+/*
+ * Try to fit the requested space into the entry.
+ */
+int
+uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
+ vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
+ vaddr_t align, vaddr_t offset,
+ vsize_t before_gap, vsize_t after_gap)
+{
+ vaddr_t tmp;
+ vsize_t fspace;
+
+ if (low_addr > high_addr)
+ return ENOMEM;
+ fspace = high_addr - low_addr;
+ if (fspace < sz + before_gap + after_gap)
+ return ENOMEM;
+
+ /*
+ * Calculate lowest address.
+ */
+ low_addr += before_gap;
+ low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
+ if (low_addr < tmp) /* Overflow during alignment. */
+ return ENOMEM;
+ if (high_addr - after_gap - sz < low_addr)
+ return ENOMEM;
+
+ /*
+ * Calculate highest address.
+ */
+ high_addr -= after_gap + sz;
+ high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
+ if (high_addr > tmp) /* Overflow during alignment. */
+ return ENOMEM;
+ if (low_addr > high_addr)
+ return ENOMEM;
+
+ *min_result = low_addr;
+ *max_result = high_addr;
+ return 0;
+}
+
+
+/*
+ * Initialize uvm_addr.
+ */
+void
+uvm_addr_init()
+{
+ pool_init(&uaddr_pool, sizeof(struct uvm_addr_state),
+ 0, 0, 0, "uaddr", &pool_allocator_nointr);
+ pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state),
+ 0, 0, 0, "uaddrhint", &pool_allocator_nointr);
+ pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state),
+ 0, 0, 0, "uaddrbestfit", &pool_allocator_nointr);
+ pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state),
+ 0, 0, 0, "uaddrpivot", &pool_allocator_nointr);
+ pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state),
+ 0, 0, 0, "uaddrrnd", &pool_allocator_nointr);
+
+ uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
+ uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
+ uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
+}
+
+/*
+ * Invoke destructor function of uaddr.
+ */
+void
+uvm_addr_destroy(struct uvm_addr_state *uaddr)
+{
+ if (uaddr)
+ (*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
+}
+
+/*
+ * Move address forward to satisfy align, offset.
+ */
+vaddr_t
+uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
+{
+ vaddr_t result = (addr & ~(align - 1)) + offset;
+ if (result < addr)
+ result += align;
+ return result;
+}
+
+/*
+ * Move address backwards to satisfy align, offset.
+ */
+vaddr_t
+uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
+{
+ vaddr_t result = (addr & ~(align - 1)) + offset;
+ if (result > addr)
+ result -= align;
+ return result;
+}
+
+/*
+ * Directional first fit.
+ *
+ * Do a lineair search for free space, starting at addr in entry.
+ * direction == 1: search forward
+ * direction == -1: search backward
+ *
+ * Output: low <= addr <= high and entry will contain addr.
+ * 0 will be returned if no space is available.
+ *
+ * gap describes the space that must appear between the preceding entry.
+ */
+int
+uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
+ int direction, vaddr_t low, vaddr_t high,
+ vsize_t before_gap, vsize_t after_gap)
+{
+ struct vm_map_entry *entry;
+ vaddr_t low_addr, high_addr;
+
+ KASSERT(entry_out != NULL && addr_out != NULL);
+ KASSERT(direction == -1 || direction == 1);
+ KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
+ (low & PAGE_MASK) == 0 &&
+ (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
+ KASSERT(high + sz > high); /* Check for overflow. */
+
+ /*
+ * Hint magic.
+ */
+ if (hint == 0)
+ hint = (direction == 1 ? low : high);
+ else if (hint > high) {
+ if (direction != -1)
+ return ENOMEM;
+ hint = high;
+ } else if (hint < low) {
+ if (direction != 1)
+ return ENOMEM;
+ hint = low;
+ }
+
+ for (entry = uvm_map_entrybyaddr(&map->addr,
+ hint - (direction == -1 ? 1 : 0)); entry != NULL;
+ entry = (direction == 1 ?
+ RB_NEXT(uvm_map_addr, &map->addr, entry) :
+ RB_PREV(uvm_map_addr, &map->addr, entry))) {
+ if (VMMAP_FREE_START(entry) > high ||
+ VMMAP_FREE_END(entry) < low) {
+ break;
+ }
+
+ if (uvm_addr_fitspace(&low_addr, &high_addr,
+ MAX(low, VMMAP_FREE_START(entry)),
+ MIN(high, VMMAP_FREE_END(entry)),
+ sz, align, offset, before_gap, after_gap) == 0) {
+ *entry_out = entry;
+ if (hint >= low_addr && hint <= high_addr) {
+ *addr_out = hint;
+ } else {
+ *addr_out = (direction == 1 ?
+ low_addr : high_addr);
+ }
+ return 0;
+ }
+ }
+
+ return ENOMEM;
+}
+
+/*
+ * Invoke address selector of uaddr.
+ * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
+ *
+ * Will invoke uvm_addr_isavail to fill in last_out.
+ */
+int
+uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry**entry_out, struct vm_map_entry**last_out,
+ vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
+{
+ int error;
+
+ if (uaddr == NULL)
+ return ENOMEM;
+
+ hint &= ~((vaddr_t)PAGE_MASK);
+ if (hint != 0 &&
+ !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
+ return ENOMEM;
+
+ error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
+ entry_out, addr_out, sz, align, offset, prot, hint);
+
+ if (error == 0) {
+ KASSERT(*entry_out != NULL);
+ *last_out = NULL;
+ if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
+ *addr_out, sz)) {
+ panic("uvm_addr_invoke: address selector %p "
+ "(%s 0x%lx-0x%lx) "
+ "returned unavailable address 0x%lx",
+ uaddr, uaddr->uaddr_functions->uaddr_name,
+ uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
+ *addr_out);
+ }
+ }
+
+ return error;
+}
+
+#if defined(DEBUG) || defined(DDB)
+void
+uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
+ int (*pr)(const char*, ...))
+{
+ if (uaddr == NULL) {
+ (*pr)("- uvm_addr %s: NULL\n", slot);
+ return;
+ }
+
+ (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
+ uaddr->uaddr_functions->uaddr_name,
+ uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
+ if (uaddr->uaddr_functions->uaddr_print == NULL)
+ return;
+
+ (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
+}
+#endif /* DEBUG || DDB */
+
+/*
+ * Destroy a uvm_addr_state structure.
+ * The uaddr must have been previously allocated from uaddr_state_pool.
+ */
+void
+uaddr_destroy(struct uvm_addr_state *uaddr)
+{
+ pool_put(&uaddr_pool, uaddr);
+}
+
+
+/*
+ * Lineair allocator.
+ * This allocator uses a first-fit algorithm.
+ *
+ * If hint is set, search will start at the hint position.
+ * Only searches forward.
+ */
+
+const struct uvm_addr_functions uaddr_lin_functions = {
+ .uaddr_select = &uaddr_lin_select,
+ .uaddr_destroy = &uaddr_destroy,
+ .uaddr_name = "uaddr_lin"
+};
+
+struct uvm_addr_state*
+uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
+{
+ struct uvm_addr_state* uaddr;
+
+ uaddr = pool_get(&uaddr_pool, PR_WAITOK);
+ uaddr->uaddr_minaddr = minaddr;
+ uaddr->uaddr_maxaddr = maxaddr;
+ uaddr->uaddr_functions = &uaddr_lin_functions;
+ return uaddr;
+}
+
+int
+uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint)
+{
+ vaddr_t guard_sz;
+
+ /*
+ * Deal with guardpages: search for space with one extra page.
+ */
+ guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
+
+ if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)
+ return ENOMEM;
+ return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
+ align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
+ 0, guard_sz);
+}
+
+
+/*
+ * Randomized allocator.
+ * This allocator use uvm_map_hint to acquire a random address and searches
+ * from there.
+ */
+
+const struct uvm_addr_functions uaddr_rnd_functions = {
+ .uaddr_select = &uaddr_rnd_select,
+ .uaddr_free_insert = &uaddr_rnd_insert,
+ .uaddr_free_remove = &uaddr_rnd_remove,
+ .uaddr_destroy = &uaddr_rnd_destroy,
+#if defined(DEBUG) || defined(DDB)
+ .uaddr_print = &uaddr_rnd_print,
+#endif /* DEBUG || DDB */
+ .uaddr_name = "uaddr_rnd"
+};
+
+struct uvm_addr_state*
+uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
+{
+ struct uaddr_rnd_state* uaddr;
+
+ uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
+ uaddr->ur_uaddr.uaddr_minaddr = minaddr;
+ uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
+ uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
+ TAILQ_INIT(&uaddr->ur_free);
+ return &uaddr->ur_uaddr;
+}
+
+int
+uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint)
+{
+ struct vmspace *vm;
+ vaddr_t guard_sz;
+ vaddr_t low_addr, high_addr;
+ struct vm_map_entry *entry;
+ vsize_t before_gap, after_gap;
+ vaddr_t tmp;
+
+ KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
+ vm = (struct vmspace*)map;
+
+ /* Deal with guardpages: search for space with one extra page. */
+ guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
+
+ /* Quick fail if the allocation won't fit. */
+ if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)
+ return ENOMEM;
+
+ /* Select a hint. */
+ if (hint == 0)
+ hint = uvm_map_hint(vm, prot);
+ /* Clamp hint to uaddr range. */
+ hint = MIN(MAX(hint, uaddr->uaddr_minaddr),
+ uaddr->uaddr_maxaddr - sz - guard_sz);
+
+ /* Align hint to align,offset parameters. */
+ tmp = hint;
+ hint = uvm_addr_align_forward(tmp, align, offset);
+ /* Check for overflow during alignment. */
+ if (hint < tmp || hint > uaddr->uaddr_maxaddr - sz - guard_sz)
+ return ENOMEM; /* Compatibility mode: never look backwards. */
+
+ before_gap = 0;
+ after_gap = guard_sz;
+
+ /*
+ * Find the first entry at or after hint with free space.
+ *
+ * Since we need an entry that is on the free-list, search until
+ * we hit an entry that is owned by our uaddr.
+ */
+ for (entry = uvm_map_entrybyaddr(&map->addr, hint);
+ entry != NULL &&
+ uvm_map_uaddr_e(map, entry) != uaddr;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ /* Fail if we search past uaddr_maxaddr. */
+ if (VMMAP_FREE_START(entry) >= uaddr->uaddr_maxaddr) {
+ entry = NULL;
+ break;
+ }
+ }
+
+ for ( /* initial entry filled in above */ ;
+ entry != NULL && VMMAP_FREE_START(entry) < uaddr->uaddr_maxaddr;
+ entry = TAILQ_NEXT(entry, dfree.tailq)) {
+ if (uvm_addr_fitspace(&low_addr, &high_addr,
+ MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
+ MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
+ sz, align, offset, before_gap, after_gap) == 0) {
+ *entry_out = entry;
+ if (hint >= low_addr && hint <= high_addr)
+ *addr_out = hint;
+ else
+ *addr_out = low_addr;
+ return 0;
+ }
+ }
+
+ return ENOMEM;
+}
+
+/*
+ * Destroy a uaddr_rnd_state structure.
+ */
+void
+uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
+{
+ pool_put(&uaddr_rnd_pool, uaddr);
+}
+
+/*
+ * Add entry to tailq.
+ */
+void
+uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry *entry)
+{
+ struct uaddr_rnd_state *uaddr;
+ struct vm_map_entry *prev;
+
+ uaddr = (struct uaddr_rnd_state*)uaddr_p;
+ KASSERT(entry == RB_FIND(uvm_map_addr, &map->addr, entry));
+
+ /*
+ * Make prev the first vm_map_entry before entry.
+ */
+ for (prev = RB_PREV(uvm_map_addr, &map->addr, entry);
+ prev != NULL;
+ prev = RB_PREV(uvm_map_addr, &map->addr, prev)) {
+ /* Stop and fail when reaching uaddr minaddr. */
+ if (VMMAP_FREE_START(prev) < uaddr_p->uaddr_minaddr) {
+ prev = NULL;
+ break;
+ }
+
+ KASSERT(prev->etype & UVM_ET_FREEMAPPED);
+ if (uvm_map_uaddr_e(map, prev) == uaddr_p)
+ break;
+ }
+
+ /* Perform insertion. */
+ if (prev == NULL)
+ TAILQ_INSERT_HEAD(&uaddr->ur_free, entry, dfree.tailq);
+ else
+ TAILQ_INSERT_AFTER(&uaddr->ur_free, prev, entry, dfree.tailq);
+}
+
+/*
+ * Remove entry from tailq.
+ */
+void
+uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry *entry)
+{
+ struct uaddr_rnd_state *uaddr;
+
+ uaddr = (struct uaddr_rnd_state*)uaddr_p;
+ TAILQ_REMOVE(&uaddr->ur_free, entry, dfree.tailq);
+}
+
+#if defined(DEBUG) || defined(DDB)
+void
+uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
+ int (*pr)(const char*, ...))
+{
+ struct vm_map_entry *entry;
+ struct uaddr_rnd_state *uaddr;
+ vaddr_t addr;
+ size_t count;
+ vsize_t space;
+
+ uaddr = (struct uaddr_rnd_state*)uaddr_p;
+ addr = 0;
+ count = 0;
+ space = 0;
+ TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
+ count++;
+ space += entry->fspace;
+
+ if (full) {
+ (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
+ entry, entry->start, entry->end,
+ entry->guard, entry->fspace);
+ (*pr)("\t\tfree: 0x%lx-0x%lx\n",
+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
+ }
+ if (entry->start < addr) {
+ if (!full)
+ (*pr)("\tentry %p: 0x%lx-0x%lx "
+ "G=0x%lx F=0x%lx\n",
+ entry, entry->start, entry->end,
+ entry->guard, entry->fspace);
+ (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
+ entry->start, addr);
+ }
+
+ addr = VMMAP_FREE_END(entry);
+ }
+ (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
+}
+#endif /* DEBUG || DDB */
+
+
+/*
+ * An allocator that selects an address within distance of the hint.
+ *
+ * If no hint is given, the allocator refuses to allocate.
+ */
+
+const struct uvm_addr_functions uaddr_hint_functions = {
+ .uaddr_select = &uaddr_hint_select,
+ .uaddr_destroy = &uaddr_hint_destroy,
+ .uaddr_name = "uaddr_hint"
+};
+
+/*
+ * Create uaddr_hint state.
+ */
+struct uvm_addr_state*
+uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist)
+{
+ struct uaddr_hint_state* ua_hint;
+
+ KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint));
+
+ ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK);
+ ua_hint->uaddr.uaddr_minaddr = minaddr;
+ ua_hint->uaddr.uaddr_maxaddr = maxaddr;
+ ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions;
+ ua_hint->max_dist = max_dist;
+ return &ua_hint->uaddr;
+}
+
+/*
+ * Destroy uaddr_hint state.
+ */
+void
+uaddr_hint_destroy(struct uvm_addr_state *uaddr)
+{
+ pool_put(&uaddr_hint_pool, uaddr);
+}
+
+/*
+ * Hint selector.
+ *
+ * Attempts to find an address that is within max_dist of the hint.
+ */
+int
+uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint)
+{
+ struct uaddr_hint_state *uaddr = (struct uaddr_hint_state*)uaddr_param;
+ vsize_t before_gap, after_gap;
+ vaddr_t low, high;
+ int dir;
+
+ if (hint == 0)
+ return ENOMEM;
+
+ /*
+ * Calculate upper and lower bound for selected address.
+ */
+ high = hint + uaddr->max_dist;
+ if (high < hint) /* overflow */
+ high = map->max_offset;
+ high = MIN(high, uaddr->uaddr.uaddr_maxaddr);
+ if (high < sz)
+ return ENOMEM; /* Protect against underflow. */
+ high -= sz;
+
+ /* Calculate lower bound for selected address. */
+ low = hint - uaddr->max_dist;
+ if (low > hint) /* underflow */
+ low = map->min_offset;
+ low = MAX(low, uaddr->uaddr.uaddr_minaddr);
+
+ /* Search strategy setup. */
+ before_gap = PAGE_SIZE +
+ (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);
+ after_gap = PAGE_SIZE +
+ (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);
+ dir = (arc4random() & 0x01) ? 1 : -1;
+
+ /*
+ * Try to search:
+ * - forward, with gap
+ * - backward, with gap
+ * - forward, without gap
+ * - backward, without gap
+ * (Where forward is in the direction specified by dir and
+ * backward is in the direction specified by -dir).
+ */
+ if (uvm_addr_linsearch(map, uaddr_param,
+ entry_out, addr_out, hint, sz, align, offset,
+ dir, low, high, before_gap, after_gap) == 0)
+ return 0;
+ if (uvm_addr_linsearch(map, uaddr_param,
+ entry_out, addr_out, hint, sz, align, offset,
+ -dir, low, high, before_gap, after_gap) == 0)
+ return 0;
+
+ if (uvm_addr_linsearch(map, uaddr_param,
+ entry_out, addr_out, hint, sz, align, offset,
+ dir, low, high, 0, 0) == 0)
+ return 0;
+ if (uvm_addr_linsearch(map, uaddr_param,
+ entry_out, addr_out, hint, sz, align, offset,
+ -dir, low, high, 0, 0) == 0)
+ return 0;
+
+ return ENOMEM;
+}
+
+/*
+ * Kernel allocation bootstrap logic.
+ */
+
+const struct uvm_addr_functions uaddr_kernel_functions = {
+ .uaddr_select = &uaddr_kbootstrap_select,
+ .uaddr_destroy = &uaddr_kbootstrap_destroy,
+ .uaddr_name = "uaddr_kbootstrap"
+};
+
+/*
+ * Select an address from the map.
+ *
+ * This function ignores the uaddr spec and instead uses the map directly.
+ * Because of that property, the uaddr algorithm can be shared across all
+ * kernel maps.
+ */
+int
+uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry **entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
+{
+ vaddr_t tmp;
+
+ RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
+ if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
+ uvm_addr_fitspace(addr_out, &tmp,
+ VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
+ sz, align, offset, 0, 0) == 0)
+ return 0;
+ }
+
+ return ENOMEM;
+}
+
+/*
+ * Don't destroy the kernel bootstrap allocator.
+ */
+void
+uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
+{
+ KASSERT(uaddr == (struct uvm_addr_state*)&uaddr_kbootstrap);
+}
+
+/*
+ * Best fit algorithm.
+ */
+
+const struct uvm_addr_functions uaddr_bestfit_functions = {
+ .uaddr_select = &uaddr_bestfit_select,
+ .uaddr_free_insert = &uaddr_bestfit_insert,
+ .uaddr_free_remove = &uaddr_bestfit_remove,
+ .uaddr_destroy = &uaddr_bestfit_destroy,
+ .uaddr_name = "uaddr_bestfit"
+};
+
+struct uvm_addr_state*
+uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
+{
+ struct uaddr_bestfit_state *uaddr;
+
+ uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
+ uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
+ uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
+ uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
+ RB_INIT(&uaddr->ubf_free);
+ return &uaddr->ubf_uaddr;
+}
+
+void
+uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
+{
+ pool_put(&uaddr_bestfit_pool, uaddr);
+}
+
+void
+uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry *entry)
+{
+ struct uaddr_bestfit_state *uaddr;
+ struct vm_map_entry *rb_rv;
+
+ uaddr = (struct uaddr_bestfit_state*)uaddr_p;
+ if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
+ NULL) {
+ panic("%s: duplicate insertion: state %p "
+ "interting %p, colliding with %p", __func__,
+ uaddr, entry, rb_rv);
+ }
+}
+
+void
+uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry *entry)
+{
+ struct uaddr_bestfit_state *uaddr;
+
+ uaddr = (struct uaddr_bestfit_state*)uaddr_p;
+ if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
+ panic("%s: entry was not in tree", __func__);
+}
+
+int
+uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint)
+{
+ vaddr_t min, max;
+ struct uaddr_bestfit_state *uaddr;
+ struct vm_map_entry *entry;
+ vsize_t guardsz;
+
+ uaddr = (struct uaddr_bestfit_state*)uaddr_p;
+ guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
+
+ /*
+ * Find smallest item on freelist capable of holding item.
+ * Deal with guardpages: search for space with one extra page.
+ */
+ entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
+ if (entry == NULL)
+ return ENOMEM;
+
+ /*
+ * Walk the tree until we find an entry that fits.
+ */
+ while (uvm_addr_fitspace(&min, &max,
+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
+ sz, align, offset, 0, guardsz) != 0) {
+ entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry);
+ if (entry == NULL)
+ return ENOMEM;
+ }
+
+ /*
+ * Return the address that generates the least fragmentation.
+ */
+ *entry_out = entry;
+ *addr_out = (min - VMMAP_FREE_START(entry) <=
+ VMMAP_FREE_END(entry) - guardsz - sz - max ?
+ min : max);
+ return 0;
+}
+
+
+/*
+ * A userspace allocator based on pivots.
+ */
+
+const struct uvm_addr_functions uaddr_pivot_functions = {
+ .uaddr_select = &uaddr_pivot_select,
+ .uaddr_free_insert = &uaddr_pivot_insert,
+ .uaddr_free_remove = &uaddr_pivot_remove,
+ .uaddr_destroy = &uaddr_pivot_destroy,
+#if defined(DEBUG) || defined(DDB)
+ .uaddr_print = &uaddr_pivot_print,
+#endif /* DEBUG || DDB */
+ .uaddr_name = "uaddr_pivot"
+};
+
+/*
+ * A special random function for pivots.
+ *
+ * This function will return:
+ * - a random number
+ * - a multiple of PAGE_SIZE
+ * - at least PAGE_SIZE
+ *
+ * The random function has a slightly higher change to return a small number.
+ */
+vsize_t
+uaddr_pivot_random()
+{
+ int r;
+
+ /*
+ * The sum of two six-sided dice will have a normal distribution.
+ * We map the highest probable number to 1, by folding the curve
+ * (think of a graph on a piece of paper, that you fold).
+ *
+ * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
+ * have the same and highest probability of happening.
+ */
+ r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
+ (PIVOT_RND - 1);
+ if (r < 0)
+ r = -r;
+
+ /*
+ * Make the returned value at least PAGE_SIZE and a multiple of
+ * PAGE_SIZE.
+ */
+ return (vaddr_t)(1 + r) << PAGE_SHIFT;
+}
+
+/*
+ * Select a new pivot.
+ *
+ * A pivot must:
+ * - be chosen random
+ * - have a randomly chosen gap before it, where the uaddr_state starts
+ * - have a randomly chosen gap after it, before the uaddr_state ends
+ *
+ * Furthermore, the pivot must provide sufficient space for the allocation.
+ * The addr will be set to the selected address.
+ *
+ * Returns ENOMEM on failure.
+ */
+int
+uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
+ struct uaddr_pivot *pivot,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vsize_t before_gap, vsize_t after_gap)
+{
+ struct vm_map_entry *entry, *found;
+ vaddr_t minaddr, maxaddr;
+ vsize_t dist;
+ vaddr_t found_minaddr, found_maxaddr;
+ vaddr_t min, max;
+ vsize_t arc4_arg;
+ int fit_error;
+ u_int32_t path;
+
+ minaddr = uaddr->up_uaddr.uaddr_minaddr;
+ maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
+ KASSERT(minaddr < maxaddr);
+#ifdef DIAGNOSTIC
+ if (minaddr + 2 * PAGE_SIZE > maxaddr) {
+ panic("uaddr_pivot_newpivot: cannot grant random pivot "
+ "in area less than 2 pages (size = 0x%lx)",
+ maxaddr - minaddr);
+ }
+#endif /* DIAGNOSTIC */
+
+ /*
+ * Gap calculation: 1/32 of the size of the managed area.
+ *
+ * At most: sufficient to not get truncated at arc4random.
+ * At least: 2 PAGE_SIZE
+ *
+ * minaddr and maxaddr will be changed according to arc4random.
+ */
+ dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
+ if (dist >> PAGE_SHIFT > 0xffffffff) {
+ minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
+ maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
+ } else {
+ minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
+ PAGE_SHIFT;
+ maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
+ PAGE_SHIFT;
+ }
+
+ /*
+ * A very fast way to find an entry that will be large enough
+ * to hold the allocation, but still is found more or less
+ * randomly: the tree path selector has a 50% chance to go for
+ * a bigger or smaller entry.
+ *
+ * Note that the memory may actually be available,
+ * but the fragmentation may be so bad and the gaps chosen
+ * so unfortunately, that the allocation will not succeed.
+ * Or the alignment can only be satisfied by an entry that
+ * is not visited in the randomly selected path.
+ *
+ * This code finds an entry with sufficient space in O(log n) time.
+ */
+ path = arc4random();
+ found = NULL;
+ entry = RB_ROOT(&uaddr->up_free);
+ while (entry != NULL) {
+ fit_error = uvm_addr_fitspace(&min, &max,
+ MAX(VMMAP_FREE_START(entry), minaddr),
+ MIN(VMMAP_FREE_END(entry), maxaddr),
+ sz, align, offset, before_gap, after_gap);
+
+ /* It fits, save this entry. */
+ if (fit_error == 0) {
+ found = entry;
+ found_minaddr = min;
+ found_maxaddr = max;
+ }
+
+ /* Next. */
+ if (fit_error != 0)
+ entry = RB_RIGHT(entry, dfree.rbtree);
+ else if ((path & 0x1) == 0) {
+ path >>= 1;
+ entry = RB_RIGHT(entry, dfree.rbtree);
+ } else {
+ path >>= 1;
+ entry = RB_LEFT(entry, dfree.rbtree);
+ }
+ }
+ if (found == NULL)
+ return ENOMEM; /* Not found a large enough region. */
+
+ /*
+ * Calculate a random address within found.
+ *
+ * found_minaddr and found_maxaddr are already aligned, so be sure
+ * to select a multiple of align as the offset in the entry.
+ * Preferably, arc4random_uniform is used to provide no bias within
+ * the entry.
+ * However if the size of the entry exceeds arc4random_uniforms
+ * argument limit, we simply use arc4random (thus limiting ourselves
+ * to 4G * PAGE_SIZE bytes offset).
+ */
+ if (found_maxaddr == found_minaddr)
+ *addr_out = found_minaddr;
+ else {
+ KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
+ arc4_arg = found_maxaddr - found_minaddr;
+ if (arc4_arg > 0xffffffff) {
+ *addr_out = found_minaddr +
+ (arc4random() & (align - 1));
+ } else {
+ *addr_out = found_minaddr +
+ (arc4random_uniform(arc4_arg) & (align - 1));
+ }
+ }
+ /* Address was found in this entry. */
+ *entry_out = found;
+
+ /*
+ * Set up new pivot and return selected address.
+ *
+ * Depending on the direction of the pivot, the pivot must be placed
+ * at the bottom or the top of the allocation:
+ * - if the pivot moves upwards, place the pivot at the top of the
+ * allocation,
+ * - if the pivot moves downwards, place the pivot at the bottom
+ * of the allocation.
+ */
+ pivot->entry = found;
+ pivot->dir = (arc4random() & 0x1 ? 1 : -1);
+ if (pivot->dir > 0)
+ pivot->addr = *addr_out + sz;
+ else
+ pivot->addr = *addr_out;
+ pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
+ return 0;
+}
+
+/*
+ * Pivot selector.
+ *
+ * Each time the selector is invoked, it will select a random pivot, which
+ * it will use to select memory with. The memory will be placed at the pivot,
+ * with a randomly sized gap between the allocation and the pivot.
+ * The pivot will then move so it will never revisit this address.
+ *
+ * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
+ * expired, it will be replaced with a newly created pivot. Pivots also
+ * automatically expire if they fail to provide memory for an allocation.
+ *
+ * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
+ * which will ensure the pivot points at memory in such a way that the
+ * allocation will succeed.
+ * As an added bonus, the uaddr_pivot_newpivot() function will perform the
+ * allocation immediately and move the pivot as appropriate.
+ *
+ * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
+ * allocation to succeed, it will not create a new pivot and the allocation
+ * will fail.
+ *
+ * A pivot running into used memory will automatically expire (because it will
+ * fail to allocate).
+ *
+ * Characteristics of the allocator:
+ * - best case, an allocation is O(log N)
+ * (it would be O(1), if it werent for the need to check if the memory is
+ * free; although that can be avoided...)
+ * - worst case, an allocation is O(log N)
+ * (the uaddr_pivot_newpivot() function has that complexity)
+ * - failed allocations always take O(log N)
+ * (the uaddr_pivot_newpivot() function will walk that deep into the tree).
+ */
+int
+uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint)
+{
+ struct uaddr_pivot_state *uaddr;
+ struct vm_map_entry *entry;
+ struct uaddr_pivot *pivot;
+ vaddr_t min, max;
+ vsize_t before_gap, after_gap;
+ int err;
+
+ /* Hint must be handled by dedicated hint allocator. */
+ if (hint != 0)
+ return EINVAL;
+
+ /*
+ * Select a random pivot and a random gap sizes around the allocation.
+ */
+ uaddr = (struct uaddr_pivot_state*)uaddr_p;
+ pivot = &uaddr->up_pivots[
+ arc4random_uniform(nitems(uaddr->up_pivots))];
+ before_gap = uaddr_pivot_random();
+ after_gap = uaddr_pivot_random();
+ if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
+ goto expired; /* Pivot is invalid (null or expired). */
+
+ /*
+ * Attempt to use the pivot to map the entry.
+ */
+ entry = pivot->entry;
+ if (pivot->dir > 0) {
+ if (uvm_addr_fitspace(&min, &max,
+ MAX(VMMAP_FREE_START(entry), pivot->addr),
+ VMMAP_FREE_END(entry), sz, align, offset,
+ before_gap, after_gap) == 0) {
+ *addr_out = min;
+ *entry_out = entry;
+ pivot->addr = min + sz;
+ pivot->expire--;
+ return 0;
+ }
+ } else {
+ if (uvm_addr_fitspace(&min, &max,
+ VMMAP_FREE_START(entry),
+ MIN(VMMAP_FREE_END(entry), pivot->addr),
+ sz, align, offset, before_gap, after_gap) == 0) {
+ *addr_out = max;
+ *entry_out = entry;
+ pivot->addr = max;
+ pivot->expire--;
+ return 0;
+ }
+ }
+
+expired:
+ /*
+ * Pivot expired or allocation failed.
+ * Use pivot selector to do the allocation and find a new pivot.
+ */
+ err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
+ sz, align, offset, before_gap, after_gap);
+ return err;
+}
+
+/*
+ * Free the pivot.
+ */
+void
+uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
+{
+ pool_put(&uaddr_pivot_pool, uaddr);
+}
+
+/*
+ * Insert an entry with free space in the space tree.
+ */
+void
+uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry *entry)
+{
+ struct uaddr_pivot_state *uaddr;
+ struct vm_map_entry *rb_rv;
+ struct uaddr_pivot *p;
+ vaddr_t check_addr;
+ vaddr_t start, end;
+
+ uaddr = (struct uaddr_pivot_state*)uaddr_p;
+ if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
+ NULL) {
+ panic("%s: duplicate insertion: state %p "
+ "inserting entry %p which collides with %p", __func__,
+ uaddr, entry, rb_rv);
+ }
+
+ start = VMMAP_FREE_START(entry);
+ end = VMMAP_FREE_END(entry);
+
+ /*
+ * Update all pivots that are contained in this entry.
+ */
+ for (p = &uaddr->up_pivots[0];
+ p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
+ check_addr = p->addr;
+ if (check_addr == 0)
+ continue;
+ if (p->dir < 0)
+ check_addr--;
+
+ if (start <= check_addr &&
+ check_addr < end) {
+ KASSERT(p->entry == NULL);
+ p->entry = entry;
+ }
+ }
+}
+
+/*
+ * Remove an entry with free space from the space tree.
+ */
+void
+uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
+ struct vm_map_entry *entry)
+{
+ struct uaddr_pivot_state *uaddr;
+ struct uaddr_pivot *p;
+
+ uaddr = (struct uaddr_pivot_state*)uaddr_p;
+ if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
+ panic("%s: entry was not in tree", __func__);
+
+ /*
+ * Inform any pivot with this entry that the entry is gone.
+ * Note that this does not automatically invalidate the pivot.
+ */
+ for (p = &uaddr->up_pivots[0];
+ p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
+ if (p->entry == entry)
+ p->entry = NULL;
+ }
+}
+
+/*
+ * Create a new pivot selector.
+ *
+ * Initially, all pivots are in the expired state.
+ * Two reasons for this:
+ * - it means this allocator will not take a huge amount of time
+ * - pivots select better on demand, because the pivot selection will be
+ * affected by preceding allocations:
+ * the next pivots will likely end up in different segments of free memory,
+ * that was segmented by an earlier allocation; better spread.
+ */
+struct uvm_addr_state*
+uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
+{
+ struct uaddr_pivot_state *uaddr;
+
+ uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
+ uaddr->up_uaddr.uaddr_minaddr = minaddr;
+ uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
+ uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
+ RB_INIT(&uaddr->up_free);
+ bzero(uaddr->up_pivots, sizeof(uaddr->up_pivots));
+
+ return &uaddr->up_uaddr;
+}
+
+#if defined(DEBUG) || defined(DDB)
+/*
+ * Print the uaddr_pivot_state.
+ *
+ * If full, a listing of all entries in the state will be provided.
+ */
+void
+uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
+ int (*pr)(const char*, ...))
+{
+ struct uaddr_pivot_state *uaddr;
+ struct uaddr_pivot *pivot;
+ struct vm_map_entry *entry;
+ int i;
+ vaddr_t check_addr;
+
+ uaddr = (struct uaddr_pivot_state*)uaddr_p;
+
+ for (i = 0; i < NUM_PIVOTS; i++) {
+ pivot = &uaddr->up_pivots[i];
+
+ (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
+ pivot->addr, pivot->expire, pivot->dir);
+ }
+ if (!full)
+ return;
+
+ if (RB_EMPTY(&uaddr->up_free))
+ (*pr)("\tempty\n");
+ /* Print list of free space. */
+ RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
+ (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
+ VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
+
+ for (i = 0; i < NUM_PIVOTS; i++) {
+ pivot = &uaddr->up_pivots[i];
+ check_addr = pivot->addr;
+ if (check_addr == 0)
+ continue;
+ if (pivot->dir < 0)
+ check_addr--;
+
+ if (VMMAP_FREE_START(entry) <= check_addr &&
+ check_addr < VMMAP_FREE_END(entry)) {
+ (*pr)("\t\tcontains pivot %d (0x%lx)\n",
+ i, pivot->addr);
+ }
+ }
+ }
+}
+#endif /* DEBUG || DDB */
+
+/*
+ * Strategy for uaddr_stack_brk_select.
+ */
+struct uaddr_bs_strat {
+ vaddr_t start; /* Start of area. */
+ vaddr_t end; /* End of area. */
+ int dir; /* Search direction. */
+};
+
+/*
+ * Stack/break allocator.
+ *
+ * Stack area is grown into in the opposite direction of the stack growth,
+ * brk area is grown downward (because sbrk() grows upward).
+ *
+ * Both areas are grown into proportially: a weighted chance is used to
+ * select which one (stack or brk area) to try. If the allocation fails,
+ * the other one is tested.
+ */
+
+const struct uvm_addr_functions uaddr_stack_brk_functions = {
+ .uaddr_select = &uaddr_stack_brk_select,
+ .uaddr_destroy = &uaddr_destroy,
+ .uaddr_name = "uaddr_stckbrk"
+};
+
+/*
+ * Stack/brk address selector.
+ */
+int
+uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint)
+{
+ vsize_t before_gap, after_gap;
+ int stack_idx, brk_idx;
+ struct uaddr_bs_strat strat[2], *s;
+ vsize_t sb_size;
+
+ /*
+ * Choose gap size and if the stack is searched before or after the
+ * brk area.
+ */
+ before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
+ after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
+
+ sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start);
+ sb_size >>= PAGE_SHIFT;
+ if (arc4random_uniform(MAX(sb_size, 0xffffffff)) >
+ map->b_end - map->b_start) {
+ brk_idx = 1;
+ stack_idx = 0;
+ } else {
+ brk_idx = 0;
+ stack_idx = 1;
+ }
+
+ /*
+ * Set up stack search strategy.
+ */
+ s = &strat[stack_idx];
+ s->start = MAX(map->s_start, uaddr->uaddr_minaddr);
+ s->end = MIN(map->s_end, uaddr->uaddr_maxaddr);
+#ifdef MACHINE_STACK_GROWS_UP
+ s->dir = -1;
+#else
+ s->dir = 1;
+#endif
+
+ /*
+ * Set up brk search strategy.
+ */
+ s = &strat[brk_idx];
+ s->start = MAX(map->b_start, uaddr->uaddr_minaddr);
+ s->end = MIN(map->b_end, uaddr->uaddr_maxaddr);
+ s->dir = -1; /* Opposite of brk() growth. */
+
+ /*
+ * Linear search for space.
+ */
+ for (s = &strat[0]; s < &strat[nitems(strat)]; s++) {
+ if (s->end - s->start < sz)
+ continue;
+ if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
+ 0, sz, align, offset, s->dir, s->start, s->end - sz,
+ before_gap, after_gap) == 0)
+ return 0;
+ }
+
+ return ENOMEM;
+}
+
+struct uvm_addr_state*
+uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
+{
+ struct uvm_addr_state* uaddr;
+
+ uaddr = pool_get(&uaddr_pool, PR_WAITOK);
+ uaddr->uaddr_minaddr = minaddr;
+ uaddr->uaddr_maxaddr = maxaddr;
+ uaddr->uaddr_functions = &uaddr_stack_brk_functions;
+ return uaddr;
+}
+
+
+RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
+ uvm_mapent_fspace_cmp);
diff --git a/sys/uvm/uvm_addr.h b/sys/uvm/uvm_addr.h
new file mode 100644
index 00000000000..5d94947d5a3
--- /dev/null
+++ b/sys/uvm/uvm_addr.h
@@ -0,0 +1,116 @@
+/* $OpenBSD: uvm_addr.h,v 1.1 2012/03/09 13:01:29 ariane Exp $ */
+
+/*
+ * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _UVM_UVM_ADDR_H_
+#define _UVM_UVM_ADDR_H_
+
+#include <uvm/uvm_extern.h>
+
+/*
+ * Address selection logic.
+ *
+ * Address selection is just that: selection. These functions may make no
+ * changes to the map, except for their own state (which is passed as a
+ * uaddr_state pointer).
+ */
+
+
+/*
+ * UVM address selection base state.
+ *
+ * Each uvm address algorithm requires these parameters:
+ * - lower bound address (page aligned)
+ * - upper bound address (page aligned)
+ * - function address pointers
+ */
+struct uvm_addr_state {
+ vaddr_t uaddr_minaddr;
+ vaddr_t uaddr_maxaddr;
+ const struct uvm_addr_functions *uaddr_functions;
+};
+
+/*
+ * This structure describes one algorithm implementation.
+ *
+ * Each algorithm is described in terms of:
+ * - uaddr_select: an address selection algorithm
+ * - uaddr_free_insert: a freelist insertion function (optional)
+ * - uaddr_free_remove: a freelist deletion function (optional)
+ * - uaddr_destroy: a destructor for the algorithm state
+ */
+struct uvm_addr_functions {
+ int (*uaddr_select)(struct vm_map *map,
+ struct uvm_addr_state *uaddr,
+ struct vm_map_entry**entry_out, vaddr_t *addr_out,
+ vsize_t sz, vaddr_t align, vaddr_t offset,
+ vm_prot_t prot, vaddr_t hint);
+ void (*uaddr_free_insert)(struct vm_map *map,
+ struct uvm_addr_state *uaddr_state,
+ struct vm_map_entry *entry);
+ void (*uaddr_free_remove)(struct vm_map *map,
+ struct uvm_addr_state *uaddr_state,
+ struct vm_map_entry *entry);
+ void (*uaddr_destroy)(struct uvm_addr_state *uaddr_state);
+ void (*uaddr_print)(struct uvm_addr_state *uaddr_state, boolean_t full,
+ int (*pr)(const char*, ...));
+
+ const char* uaddr_name; /* Name of the allocator. */
+};
+
+
+#ifdef _KERNEL
+
+void uvm_addr_init(void);
+void uvm_addr_destroy(struct uvm_addr_state*);
+vaddr_t uvm_addr_align(vaddr_t, vaddr_t, vaddr_t);
+vaddr_t uvm_addr_align_back(vaddr_t, vaddr_t, vaddr_t);
+int uvm_addr_linsearch(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ vaddr_t *addr_out, vaddr_t, vsize_t,
+ vaddr_t, vaddr_t, int, vaddr_t, vaddr_t,
+ vsize_t, vsize_t);
+int uvm_addr_invoke(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry**,
+ struct vm_map_entry**, vaddr_t*,
+ vsize_t, vaddr_t, vaddr_t, vm_prot_t, vaddr_t);
+struct uvm_addr_state *uaddr_lin_create(vaddr_t, vaddr_t);
+struct uvm_addr_state *uaddr_rnd_create(vaddr_t, vaddr_t);
+struct uvm_addr_state *uaddr_hint_create(vaddr_t, vaddr_t, vsize_t);
+struct uvm_addr_state *uaddr_bestfit_create(vaddr_t, vaddr_t);
+struct uvm_addr_state *uaddr_pivot_create(vaddr_t, vaddr_t);
+struct uvm_addr_state *uaddr_stack_brk_create(vaddr_t, vaddr_t);
+int uvm_addr_fitspace(vaddr_t*, vaddr_t*,
+ vaddr_t, vaddr_t, vsize_t, vaddr_t, vaddr_t,
+ vsize_t, vsize_t);
+
+#if defined(DEBUG) || defined(DDB)
+void uvm_addr_print(struct uvm_addr_state*, const char*,
+ boolean_t, int (*pr)(const char*, ...));
+#endif /* DEBUG || DDB */
+
+/*
+ * Kernel bootstrap allocator.
+ */
+RB_HEAD(uaddr_free_rbtree, vm_map_entry);
+RB_PROTOTYPE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
+ uvm_mapent_fspace_cmp);
+
+extern struct uvm_addr_state uaddr_kbootstrap;
+
+#endif /* _KERNEL */
+#endif /* _UVM_UVM_ADDR_H_ */
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 201abdb923a..991a44d4776 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.103 2011/07/08 00:10:59 tedu Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.104 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -185,6 +185,7 @@ typedef int vm_prot_t;
#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
#define UVM_FLAG_HOLE 0x400000 /* no backend */
+#define UVM_FLAG_QUERY 0x800000 /* do everything, except actual execution */
/* macros to extract info */
#define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK)
@@ -644,10 +645,9 @@ void km_free(void *, size_t, const struct kmem_va_mode *,
const struct kmem_pa_mode *);
/* uvm_map.c */
-#define uvm_map(_m, _a, _sz, _u, _f, _al, _fl) uvm_map_p(_m, _a, _sz, _u, _f, _al, _fl, 0)
-int uvm_map_p(vm_map_t, vaddr_t *, vsize_t,
+int uvm_map(vm_map_t, vaddr_t *, vsize_t,
struct uvm_object *, voff_t, vsize_t,
- uvm_flag_t, struct proc *);
+ uvm_flag_t);
int uvm_map_pageable(vm_map_t, vaddr_t,
vaddr_t, boolean_t, int);
int uvm_map_pageable_all(vm_map_t, int, vsize_t);
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index b699bba34c5..03a4418dac6 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_fault.c,v 1.62 2011/07/03 18:34:14 oga Exp $ */
+/* $OpenBSD: uvm_fault.c,v 1.63 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */
/*
@@ -1701,7 +1701,7 @@ uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)
void
uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
{
- vm_map_entry_t entry;
+ vm_map_entry_t entry, next;
pmap_t pmap = vm_map_pmap(map);
vaddr_t va;
paddr_t pa;
@@ -1734,9 +1734,9 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
*/
KASSERT(va >= entry->start);
while (va >= entry->end) {
- KASSERT(entry->next != &map->header &&
- entry->next->start <= entry->end);
- entry = entry->next;
+ next = RB_NEXT(uvm_map_addr, &map->addr, entry);
+ KASSERT(next != NULL && next->start <= entry->end);
+ entry = next;
}
/*
@@ -1825,6 +1825,9 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)
*/
while (1) {
+ if (ufi->orig_rvaddr < ufi->map->min_offset ||
+ ufi->orig_rvaddr >= ufi->map->max_offset)
+ return(FALSE);
/*
* lock map
@@ -1839,7 +1842,7 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)
* lookup
*/
if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
- &ufi->entry)) {
+ &ufi->entry)) {
uvmfault_unlockmaps(ufi, write_lock);
return(FALSE);
}
diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c
index fce559d83e5..81110d054e8 100644
--- a/sys/uvm/uvm_init.c
+++ b/sys/uvm/uvm_init.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_init.c,v 1.28 2010/08/07 03:50:02 krw Exp $ */
+/* $OpenBSD: uvm_init.c,v 1.29 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_init.c,v 1.14 2000/06/27 17:29:23 mrg Exp $ */
/*
@@ -51,6 +51,7 @@
#include <sys/pool.h>
#include <uvm/uvm.h>
+#include <uvm/uvm_addr.h>
/*
* struct uvm: we store all global vars in this structure to make them
@@ -177,4 +178,15 @@ uvm_init(void)
* init anonymous memory systems
*/
uvm_anon_init();
+
+ /*
+ * Switch kernel and kmem_map over to a best-fit allocator,
+ * instead of walking the tree.
+ */
+ uvm_map_set_uaddr(kernel_map, &kernel_map->uaddr_any[3],
+ uaddr_bestfit_create(vm_map_min(kernel_map),
+ vm_map_max(kernel_map)));
+ uvm_map_set_uaddr(kmem_map, &kmem_map->uaddr_any[3],
+ uaddr_bestfit_create(vm_map_min(kmem_map),
+ vm_map_max(kmem_map)));
}
diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c
index 876b5420b6f..bfeea500ace 100644
--- a/sys/uvm/uvm_io.c
+++ b/sys/uvm/uvm_io.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_io.c,v 1.19 2011/06/06 17:10:23 ariane Exp $ */
+/* $OpenBSD: uvm_io.c,v 1.20 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_io.c,v 1.12 2000/06/27 17:29:23 mrg Exp $ */
/*
@@ -64,7 +64,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)
{
vaddr_t baseva, endva, pageoffset, kva;
vsize_t chunksz, togo, sz;
- vm_map_entry_t dead_entries;
+ struct uvm_map_deadq dead_entries;
int error, extractflags;
/*
@@ -93,7 +93,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)
chunksz = min(round_page(togo + pageoffset), MAXBSIZE);
error = 0;
- extractflags = UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG;
+ extractflags = 0;
if (flags & UVM_IO_FIXPROT)
extractflags |= UVM_EXTRACT_FIXPROT;
@@ -107,7 +107,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)
* step 2: extract mappings from the map into kernel_map
*/
- error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva,
+ error = uvm_map_extract(map, baseva, chunksz, &kva,
extractflags);
if (error) {
@@ -139,12 +139,11 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)
*/
vm_map_lock(kernel_map);
+ TAILQ_INIT(&dead_entries);
uvm_unmap_remove(kernel_map, kva, kva+chunksz,
- &dead_entries, NULL, FALSE);
+ &dead_entries, FALSE, TRUE);
vm_map_unlock(kernel_map);
-
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, AMAP_REFALL);
+ uvm_unmap_detach(&dead_entries, AMAP_REFALL);
/*
* We defer checking the error return from uiomove until
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
index da5686d0881..aa97110d6bf 100644
--- a/sys/uvm/uvm_km.c
+++ b/sys/uvm/uvm_km.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_km.c,v 1.106 2011/07/03 18:34:14 oga Exp $ */
+/* $OpenBSD: uvm_km.c,v 1.107 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */
/*
@@ -138,7 +138,6 @@
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kthread.h>
-
#include <uvm/uvm.h>
/*
@@ -184,7 +183,13 @@ uvm_km_init(vaddr_t start, vaddr_t end)
* before installing.
*/
- uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE);
+ uvm_map_setup(&kernel_map_store, base, end,
+#ifdef KVA_GUARDPAGES
+ VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES
+#else
+ VM_MAP_PAGEABLE
+#endif
+ );
kernel_map_store.pmap = pmap_kernel();
if (base != start && uvm_map(&kernel_map_store, &base, start - base,
NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
@@ -464,16 +469,16 @@ uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size)
void
uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size)
{
- struct vm_map_entry *dead_entries;
+ struct uvm_map_deadq dead_entries;
vm_map_lock(map);
+ TAILQ_INIT(&dead_entries);
uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size),
- &dead_entries, NULL, FALSE);
+ &dead_entries, FALSE, TRUE);
wakeup(map);
vm_map_unlock(map);
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, 0);
+ uvm_unmap_detach(&dead_entries, 0);
}
/*
@@ -692,8 +697,10 @@ struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *);
void
uvm_km_page_init(void)
{
- int lowat_min;
- int i;
+ int lowat_min;
+ int i;
+ int len, bulk;
+ vaddr_t addr;
mtx_init(&uvm_km_pages.mtx, IPL_VM);
if (!uvm_km_pages.lowat) {
@@ -709,14 +716,27 @@ uvm_km_page_init(void)
if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX)
uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX;
- for (i = 0; i < uvm_km_pages.hiwat; i++) {
- uvm_km_pages.page[i] = (vaddr_t)uvm_km_kmemalloc(kernel_map,
- NULL, PAGE_SIZE, UVM_KMF_NOWAIT|UVM_KMF_VALLOC);
- if (uvm_km_pages.page[i] == 0)
- break;
+ /* Allocate all pages in as few allocations as possible. */
+ len = 0;
+ bulk = uvm_km_pages.hiwat;
+ while (len < uvm_km_pages.hiwat && bulk > 0) {
+ bulk = MIN(bulk, uvm_km_pages.hiwat - len);
+ addr = vm_map_min(kernel_map);
+ if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT,
+ NULL, UVM_UNKNOWN_OFFSET, 0,
+ UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
+ UVM_ADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) {
+ bulk /= 2;
+ continue;
+ }
+
+ for (i = len; i < len + bulk; i++, addr += PAGE_SIZE)
+ uvm_km_pages.page[i] = addr;
+ len += bulk;
}
- uvm_km_pages.free = i;
- for ( ; i < UVM_KM_PAGES_HIWAT_MAX; i++)
+
+ uvm_km_pages.free = len;
+ for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++)
uvm_km_pages.page[i] = 0;
/* tone down if really high */
@@ -760,17 +780,25 @@ uvm_km_thread(void *arg)
mtx_leave(&uvm_km_pages.mtx);
if (allocmore) {
+ bzero(pg, sizeof(pg));
for (i = 0; i < nitems(pg); i++) {
- pg[i] = (vaddr_t)uvm_km_kmemalloc(kernel_map,
- NULL, PAGE_SIZE, UVM_KMF_VALLOC);
+ pg[i] = vm_map_min(kernel_map);
+ if (uvm_map(kernel_map, &pg[i], PAGE_SIZE,
+ NULL, UVM_UNKNOWN_OFFSET, 0,
+ UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
+ UVM_INH_NONE, UVM_ADV_RANDOM,
+ UVM_KMF_TRYLOCK)) != 0) {
+ pg[i] = 0;
+ break;
+ }
}
-
+
mtx_enter(&uvm_km_pages.mtx);
for (i = 0; i < nitems(pg); i++) {
if (uvm_km_pages.free ==
nitems(uvm_km_pages.page))
break;
- else
+ else if (pg[i] != 0)
uvm_km_pages.page[uvm_km_pages.free++]
= pg[i];
}
@@ -778,8 +806,12 @@ uvm_km_thread(void *arg)
mtx_leave(&uvm_km_pages.mtx);
/* Cleanup left-over pages (if any). */
- for (; i < nitems(pg); i++)
- uvm_km_free(kernel_map, pg[i], PAGE_SIZE);
+ for (; i < nitems(pg); i++) {
+ if (pg[i] != 0) {
+ uvm_unmap(kernel_map,
+ pg[i], pg[i] + PAGE_SIZE);
+ }
+ }
}
while (fp) {
fp = uvm_km_doputpage(fp);
@@ -808,7 +840,7 @@ uvm_km_doputpage(struct uvm_km_free_page *fp)
mtx_leave(&uvm_km_pages.mtx);
if (freeva)
- uvm_km_free(kernel_map, va, PAGE_SIZE);
+ uvm_unmap(kernel_map, va, va + PAGE_SIZE);
uvm_pagefree(pg);
return (nextfp);
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index e097952a130..bc6b9df0281 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,7 +1,22 @@
-/* $OpenBSD: uvm_map.c,v 1.147 2011/11/24 18:47:34 guenther Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.148 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
-/*
+/*
+ * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ *
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* Copyright (c) 1991, 1993, The Regents of the University of California.
*
@@ -71,6 +86,9 @@
* uvm_map.c: uvm map operations
*/
+/* #define DEBUG */
+/* #define VMMAP_DEBUG */
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mman.h>
@@ -86,13 +104,189 @@
#endif
#include <uvm/uvm.h>
-#undef RB_AUGMENT
-#define RB_AUGMENT(x) uvm_rb_augment(x)
#ifdef DDB
#include <uvm/uvm_ddb.h>
#endif
+#include <uvm/uvm_addr.h>
+
+
+vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
+int uvm_mapent_isjoinable(struct vm_map*,
+ struct vm_map_entry*, struct vm_map_entry*);
+struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
+ struct vm_map_entry*, struct uvm_map_deadq*);
+struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*,
+ struct vm_map_entry*, struct uvm_map_deadq*);
+struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
+ struct vm_map_entry*, vaddr_t, vsize_t, int,
+ struct uvm_map_deadq*);
+struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int);
+void uvm_mapent_free(struct vm_map_entry*);
+void uvm_unmap_kill_entry(struct vm_map*,
+ struct vm_map_entry*);
+void uvm_mapent_mkfree(struct vm_map*,
+ struct vm_map_entry*, struct vm_map_entry**,
+ struct uvm_map_deadq*, boolean_t);
+void uvm_map_pageable_pgon(struct vm_map*,
+ struct vm_map_entry*, struct vm_map_entry*,
+ vaddr_t, vaddr_t);
+int uvm_map_pageable_wire(struct vm_map*,
+ struct vm_map_entry*, struct vm_map_entry*,
+ vaddr_t, vaddr_t, int);
+void uvm_map_setup_entries(struct vm_map*);
+void uvm_map_teardown(struct vm_map*);
+void uvm_map_vmspace_update(struct vm_map*,
+ struct uvm_map_deadq*, int);
+void uvm_map_kmem_grow(struct vm_map*,
+ struct uvm_map_deadq*, vsize_t, int);
+void uvm_map_freelist_update_clear(struct vm_map*,
+ struct uvm_map_deadq*);
+void uvm_map_freelist_update_refill(struct vm_map *, int);
+void uvm_map_freelist_update(struct vm_map*,
+ struct uvm_map_deadq*, vaddr_t, vaddr_t,
+ vaddr_t, vaddr_t, int);
+struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
+ vaddr_t, vaddr_t, int);
+int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
+ struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
+ int);
+int uvm_map_findspace(struct vm_map*,
+ struct vm_map_entry**, struct vm_map_entry**,
+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
+ vaddr_t);
+
+/*
+ * Tree management functions.
+ */
+
+static __inline void uvm_mapent_copy(struct vm_map_entry*,
+ struct vm_map_entry*);
+static int uvm_mapentry_addrcmp(struct vm_map_entry*,
+ struct vm_map_entry*);
+static int uvm_mapentry_freecmp(struct vm_map_entry*,
+ struct vm_map_entry*);
+void uvm_mapent_free_insert(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uvm_mapent_free_remove(struct vm_map*,
+ struct uvm_addr_state*, struct vm_map_entry*);
+void uvm_mapent_addr_insert(struct vm_map*,
+ struct vm_map_entry*);
+void uvm_mapent_addr_remove(struct vm_map*,
+ struct vm_map_entry*);
+void uvm_map_splitentry(struct vm_map*,
+ struct vm_map_entry*, struct vm_map_entry*,
+ vaddr_t);
+vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
+int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
+
+/*
+ * uvm_vmspace_fork helper functions.
+ */
+struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
+ vsize_t, struct vm_map_entry*,
+ struct uvm_map_deadq*, int, int);
+void uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
+ struct vm_map*, struct vm_map_entry*,
+ struct uvm_map_deadq*);
+void uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
+ struct vm_map*, struct vm_map_entry*,
+ struct uvm_map_deadq*);
+
+/*
+ * Tree validation.
+ */
+
+#ifdef VMMAP_DEBUG
+void uvm_tree_assert(struct vm_map*, int, char*,
+ char*, int);
+#define UVM_ASSERT(map, cond, file, line) \
+ uvm_tree_assert((map), (cond), #cond, (file), (line))
+void uvm_tree_sanity(struct vm_map*, char*, int);
+void uvm_tree_size_chk(struct vm_map*, char*, int);
+void vmspace_validate(struct vm_map*);
+#else
+#define uvm_tree_sanity(_map, _file, _line) do {} while (0)
+#define uvm_tree_size_chk(_map, _file, _line) do {} while (0)
+#define vmspace_validate(_map) do {} while (0)
+#endif
+
+/*
+ * All architectures will have pmap_prefer.
+ */
+#ifndef PMAP_PREFER
+#define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE
+#define PMAP_PREFER_OFFSET(off) 0
+#define PMAP_PREFER(addr, off) (addr)
+#endif
+
+
+/*
+ * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
+ * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
+ *
+ * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
+ * each time.
+ */
+#define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE)
+#define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE)
+#define VM_MAP_KSIZE_ALLOCMUL 4
+/*
+ * When selecting a random free-space block, look at most FSPACE_DELTA blocks
+ * ahead.
+ */
+#define FSPACE_DELTA 8
+/*
+ * Put allocations adjecent to previous allocations when the free-space tree
+ * is larger than FSPACE_COMPACT entries.
+ *
+ * Alignment and PMAP_PREFER may still cause the entry to not be fully
+ * adjecent. Note that this strategy reduces memory fragmentation (by leaving
+ * a large space before or after the allocation).
+ */
+#define FSPACE_COMPACT 128
+/*
+ * Make the address selection skip at most this many bytes from the start of
+ * the free space in which the allocation takes place.
+ *
+ * The main idea behind a randomized address space is that an attacker cannot
+ * know where to target his attack. Therefore, the location of objects must be
+ * as random as possible. However, the goal is not to create the most sparse
+ * map that is possible.
+ * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
+ * sizes, thereby reducing the sparseness. The biggest randomization comes
+ * from fragmentation, i.e. FSPACE_COMPACT.
+ */
+#define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024)
+/*
+ * Allow for small gaps in the overflow areas.
+ * Gap size is in bytes and does not have to be a multiple of page-size.
+ */
+#define FSPACE_BIASGAP ((vaddr_t)32 * 1024)
+
+/* auto-allocate address lower bound */
+#define VMMAP_MIN_ADDR PAGE_SIZE
+
+
+#ifdef DEADBEEF0
+#define UVMMAP_DEADBEEF ((void*)DEADBEEF0)
+#else
+#define UVMMAP_DEADBEEF ((void*)0xdeadd0d0)
+#endif
+
+#ifdef DEBUG
+int uvm_map_printlocks = 0;
+
+#define LPRINTF(_args) \
+ do { \
+ if (uvm_map_printlocks) \
+ printf _args; \
+ } while (0)
+#else
+#define LPRINTF(_args) do {} while (0)
+#endif
+
static struct timeval uvm_kmapent_last_warn_time;
static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
@@ -101,287 +295,1155 @@ const char vmmapbsy[] = "vmmapbsy";
/*
* pool for vmspace structures.
*/
-
struct pool uvm_vmspace_pool;
/*
* pool for dynamically-allocated map entries.
*/
-
struct pool uvm_map_entry_pool;
struct pool uvm_map_entry_kmem_pool;
-#ifdef PMAP_GROWKERNEL
/*
* This global represents the end of the kernel virtual address
- * space. If we want to exceed this, we must grow the kernel
+ * space. If we want to exceed this, we must grow the kernel
* virtual address space dynamically.
*
* Note, this variable is locked by kernel_map's lock.
*/
vaddr_t uvm_maxkaddr;
-#endif
/*
- * macros
+ * Locking predicate.
*/
+#define UVM_MAP_REQ_WRITE(_map) \
+ do { \
+ if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \
+ rw_assert_wrlock(&(_map)->lock); \
+ } while (0)
/*
- * uvm_map_entry_link: insert entry into a map
+ * Tree describing entries by address.
*
- * => map must be locked
+ * Addresses are unique.
+ * Entries with start == end may only exist if they are the first entry
+ * (sorted by address) within a free-memory tree.
*/
-#define uvm_map_entry_link(map, after_where, entry) do { \
- (map)->nentries++; \
- (entry)->prev = (after_where); \
- (entry)->next = (after_where)->next; \
- (entry)->prev->next = (entry); \
- (entry)->next->prev = (entry); \
- uvm_rb_insert(map, entry); \
-} while (0)
+
+static __inline int
+uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
+{
+ return e1->start < e2->start ? -1 : e1->start > e2->start;
+}
/*
- * uvm_map_entry_unlink: remove entry from a map
+ * Tree describing free memory.
*
- * => map must be locked
+ * Free memory is indexed (so we can use array semantics in O(log N).
+ * Free memory is ordered by size (so we can reduce fragmentation).
+ *
+ * The address range in the tree can be limited, having part of the
+ * free memory not in the free-memory tree. Only free memory in the
+ * tree will be considered during 'any address' allocations.
*/
-#define uvm_map_entry_unlink(map, entry) do { \
- (map)->nentries--; \
- (entry)->next->prev = (entry)->prev; \
- (entry)->prev->next = (entry)->next; \
- uvm_rb_remove(map, entry); \
-} while (0)
+
+static __inline int
+uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
+{
+ int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace;
+ return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2);
+}
/*
- * SAVE_HINT: saves the specified entry as the hint for future lookups.
- *
- * => map need not be locked (protected by hint_lock).
+ * Copy mapentry.
*/
-#define SAVE_HINT(map,check,value) do { \
- simple_lock(&(map)->hint_lock); \
- if ((map)->hint == (check)) \
- (map)->hint = (value); \
- simple_unlock(&(map)->hint_lock); \
-} while (0)
+static __inline void
+uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
+{
+ caddr_t csrc, cdst;
+ size_t sz;
+
+ csrc = (caddr_t)src;
+ cdst = (caddr_t)dst;
+ csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
+ cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
+
+ sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
+ offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
+ memcpy(cdst, csrc, sz);
+}
/*
- * VM_MAP_RANGE_CHECK: check and correct range
- *
- * => map must at least be read locked
+ * Handle free-list insertion.
*/
+void
+uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry *entry)
+{
+ const struct uvm_addr_functions *fun;
+#ifdef VMMAP_DEBUG
+ vaddr_t min, max, bound;
+#endif
+
+#ifdef VMMAP_DEBUG
+ /*
+ * Boundary check.
+ * Boundaries are folded if they go on the same free list.
+ */
+ min = VMMAP_FREE_START(entry);
+ max = VMMAP_FREE_END(entry);
-#define VM_MAP_RANGE_CHECK(map, start, end) do { \
- if (start < vm_map_min(map)) \
- start = vm_map_min(map); \
- if (end > vm_map_max(map)) \
- end = vm_map_max(map); \
- if (start > end) \
- start = end; \
-} while (0)
+ while (min < max) {
+ bound = uvm_map_boundary(map, min, max);
+ KASSERT(uvm_map_uaddr(map, min) == uaddr);
+ min = bound;
+ }
+#endif
+ KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
+ KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
+
+ UVM_MAP_REQ_WRITE(map);
+
+ /* Actual insert: forward to uaddr pointer. */
+ fun = uaddr->uaddr_functions;
+ KDASSERT(fun != NULL);
+ if (fun->uaddr_free_insert != NULL)
+ (*fun->uaddr_free_insert)(map, uaddr, entry);
+ entry->etype |= UVM_ET_FREEMAPPED;
+}
/*
- * local prototypes
+ * Handle free-list removal.
*/
+void
+uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry *entry)
+{
+ const struct uvm_addr_functions *fun;
-void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
-void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
-void uvm_map_reference_amap(struct vm_map_entry *, int);
-void uvm_map_unreference_amap(struct vm_map_entry *, int);
-int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t,
- struct vm_map_entry *, voff_t, vsize_t);
+ KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0);
+ KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
+ UVM_MAP_REQ_WRITE(map);
-struct vm_map_entry *uvm_mapent_alloc(struct vm_map *, int);
-void uvm_mapent_free(struct vm_map_entry *);
+ fun = uaddr->uaddr_functions;
+ if (fun->uaddr_free_remove != NULL)
+ (*fun->uaddr_free_remove)(map, uaddr, entry);
+ entry->etype &= ~UVM_ET_FREEMAPPED;
+}
-#ifdef KVA_GUARDPAGES
/*
- * Number of kva guardpages in use.
+ * Handle address tree insertion.
*/
-int kva_guardpages;
-#endif
-
+void
+uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
+{
+ struct vm_map_entry *res;
+
+ if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||
+ RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||
+ RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF)
+ panic("uvm_mapent_addr_insert: entry still in addr list");
+ KDASSERT(entry->start <= entry->end);
+ KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
+ (entry->end & (vaddr_t)PAGE_MASK) == 0);
+
+ UVM_MAP_REQ_WRITE(map);
+ res = RB_INSERT(uvm_map_addr, &map->addr, entry);
+ if (res != NULL) {
+ panic("uvm_mapent_addr_insert: map %p entry %p "
+ "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
+ "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
+ map, entry,
+ entry->start, entry->end, entry->guard, entry->fspace,
+ res, res->start, res->end, res->guard, res->fspace);
+ }
+}
/*
- * Tree manipulation.
+ * Handle address tree removal.
*/
-void uvm_rb_insert(struct vm_map *, struct vm_map_entry *);
-void uvm_rb_remove(struct vm_map *, struct vm_map_entry *);
-vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *);
+void
+uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
+{
+ struct vm_map_entry *res;
+
+ UVM_MAP_REQ_WRITE(map);
+ res = RB_REMOVE(uvm_map_addr, &map->addr, entry);
+ if (res != entry)
+ panic("uvm_mapent_addr_remove");
+ RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) =
+ RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF;
+}
-#ifdef DEBUG
-int _uvm_tree_sanity(struct vm_map *map, const char *name);
-#endif
-vsize_t uvm_rb_subtree_space(struct vm_map_entry *);
-void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *);
+/*
+ * uvm_map_reference: add reference to a map
+ *
+ * XXX check map reference counter lock
+ */
+#define uvm_map_reference(_map) \
+ do { \
+ simple_lock(&map->ref_lock); \
+ map->ref_count++; \
+ simple_unlock(&map->ref_lock); \
+ } while (0)
-static __inline int
-uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b)
+/*
+ * Calculate the dused delta.
+ */
+vsize_t
+uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
{
- if (a->start < b->start)
- return (-1);
- else if (a->start > b->start)
- return (1);
-
- return (0);
+ struct vmspace *vm;
+ vsize_t sz;
+ vaddr_t lmax;
+ vaddr_t stack_begin, stack_end; /* Position of stack. */
+
+ KASSERT(map->flags & VM_MAP_ISVMSPACE);
+ vm = (struct vmspace *)map;
+ stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
+ stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
+
+ sz = 0;
+ while (min != max) {
+ lmax = max;
+ if (min < stack_begin && lmax > stack_begin)
+ lmax = stack_begin;
+ else if (min < stack_end && lmax > stack_end)
+ lmax = stack_end;
+
+ if (min >= stack_begin && min < stack_end) {
+ /* nothing */
+ } else
+ sz += lmax - min;
+ min = lmax;
+ }
+
+ return sz >> PAGE_SHIFT;
}
+/*
+ * Find the entry describing the given address.
+ */
+struct vm_map_entry*
+uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
+{
+ struct vm_map_entry *iter;
+
+ iter = RB_ROOT(atree);
+ while (iter != NULL) {
+ if (iter->start > addr)
+ iter = RB_LEFT(iter, daddrs.addr_entry);
+ else if (VMMAP_FREE_END(iter) <= addr)
+ iter = RB_RIGHT(iter, daddrs.addr_entry);
+ else
+ return iter;
+ }
+ return NULL;
+}
+/*
+ * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
+ *
+ * Push dead entries into a linked list.
+ * Since the linked list abuses the address tree for storage, the entry
+ * may not be linked in a map.
+ *
+ * *head must be initialized to NULL before the first call to this macro.
+ * uvm_unmap_detach(*head, 0) will remove dead entries.
+ */
static __inline void
-uvm_rb_augment(struct vm_map_entry *entry)
+dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
{
- entry->space = uvm_rb_subtree_space(entry);
+ TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
}
+#define DEAD_ENTRY_PUSH(_headptr, _entry) \
+ dead_entry_push((_headptr), (_entry))
-RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
+/*
+ * Helper function for uvm_map_findspace_tree.
+ *
+ * Given allocation constraints and pmap constraints, finds the
+ * lowest and highest address in a range that can be used for the
+ * allocation.
+ *
+ * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
+ *
+ *
+ * Big chunk of math with a seasoning of dragons.
+ */
+int
+uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
+ struct vm_map_entry *sel, vaddr_t align,
+ vaddr_t pmap_align, vaddr_t pmap_off, int bias)
+{
+ vaddr_t sel_min, sel_max;
+#ifdef PMAP_PREFER
+ vaddr_t pmap_min, pmap_max;
+#endif /* PMAP_PREFER */
+#ifdef DIAGNOSTIC
+ int bad;
+#endif /* DIAGNOSTIC */
-RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
+ sel_min = VMMAP_FREE_START(sel);
+ sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
-vsize_t
-uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry)
-{
- struct vm_map_entry *next;
- vaddr_t space;
+#ifdef PMAP_PREFER
+
+ /*
+ * There are two special cases, in which we can satisfy the align
+ * requirement and the pmap_prefer requirement.
+ * - when pmap_off == 0, we always select the largest of the two
+ * - when pmap_off % align == 0 and pmap_align > align, we simply
+ * satisfy the pmap_align requirement and automatically
+ * satisfy the align requirement.
+ */
+ if (align > PAGE_SIZE &&
+ !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
+ /*
+ * Simple case: only use align.
+ */
+ sel_min = roundup(sel_min, align);
+ sel_max &= ~(align - 1);
+
+ if (sel_min > sel_max)
+ return ENOMEM;
+
+ /*
+ * Correct for bias.
+ */
+ if (sel_max - sel_min > FSPACE_BIASGAP) {
+ if (bias > 0) {
+ sel_min = sel_max - FSPACE_BIASGAP;
+ sel_min = roundup(sel_min, align);
+ } else if (bias < 0) {
+ sel_max = sel_min + FSPACE_BIASGAP;
+ sel_max &= ~(align - 1);
+ }
+ }
+ } else if (pmap_align != 0) {
+ /*
+ * Special case: satisfy both pmap_prefer and
+ * align argument.
+ */
+ pmap_max = sel_max & ~(pmap_align - 1);
+ pmap_min = sel_min;
+ if (pmap_max < sel_min)
+ return ENOMEM;
+
+ /* Adjust pmap_min for BIASGAP for top-addr bias. */
+ if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
+ pmap_min = pmap_max - FSPACE_BIASGAP;
+ /* Align pmap_min. */
+ pmap_min &= ~(pmap_align - 1);
+ if (pmap_min < sel_min)
+ pmap_min += pmap_align;
+ if (pmap_min > pmap_max)
+ return ENOMEM;
+
+ /* Adjust pmap_max for BIASGAP for bottom-addr bias. */
+ if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
+ pmap_max = (pmap_min + FSPACE_BIASGAP) &
+ ~(pmap_align - 1);
+ }
+ if (pmap_min > pmap_max)
+ return ENOMEM;
+
+ /* Apply pmap prefer offset. */
+ pmap_max |= pmap_off;
+ if (pmap_max > sel_max)
+ pmap_max -= pmap_align;
+ pmap_min |= pmap_off;
+ if (pmap_min < sel_min)
+ pmap_min += pmap_align;
+
+ /*
+ * Fixup: it's possible that pmap_min and pmap_max
+ * cross eachother. In this case, try to find one
+ * address that is allowed.
+ * (This usually happens in biased case.)
+ */
+ if (pmap_min > pmap_max) {
+ if (pmap_min < sel_max)
+ pmap_max = pmap_min;
+ else if (pmap_max > sel_min)
+ pmap_min = pmap_max;
+ else
+ return ENOMEM;
+ }
+
+ /* Internal validation. */
+ KDASSERT(pmap_min <= pmap_max);
+
+ sel_min = pmap_min;
+ sel_max = pmap_max;
+ } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
+ sel_min = sel_max - FSPACE_BIASGAP;
+ else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
+ sel_max = sel_min + FSPACE_BIASGAP;
+
+#else
+
+ if (align > PAGE_SIZE) {
+ sel_min = roundup(sel_min, align);
+ sel_max &= ~(align - 1);
+ if (sel_min > sel_max)
+ return ENOMEM;
+
+ if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
+ if (bias > 0) {
+ sel_min = roundup(sel_max - FSPACE_BIASGAP,
+ align);
+ } else {
+ sel_max = (sel_min + FSPACE_BIASGAP) &
+ ~(align - 1);
+ }
+ }
+ } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
+ sel_min = sel_max - FSPACE_BIASGAP;
+ else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
+ sel_max = sel_min + FSPACE_BIASGAP;
+
+#endif
+
+ if (sel_min > sel_max)
+ return ENOMEM;
+
+#ifdef DIAGNOSTIC
+ bad = 0;
+ /* Lower boundary check. */
+ if (sel_min < VMMAP_FREE_START(sel)) {
+ printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
+ sel_min, VMMAP_FREE_START(sel));
+ bad++;
+ }
+ /* Upper boundary check. */
+ if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
+ printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
+ sel_max,
+ VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
+ bad++;
+ }
+ /* Lower boundary alignment. */
+ if (align != 0 && (sel_min & (align - 1)) != 0) {
+ printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
+ sel_min, align);
+ bad++;
+ }
+ /* Upper boundary alignment. */
+ if (align != 0 && (sel_max & (align - 1)) != 0) {
+ printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
+ sel_max, align);
+ bad++;
+ }
+ /* Lower boundary PMAP_PREFER check. */
+ if (pmap_align != 0 && align == 0 &&
+ (sel_min & (pmap_align - 1)) != pmap_off) {
+ printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
+ sel_min, sel_min & (pmap_align - 1), pmap_off);
+ bad++;
+ }
+ /* Upper boundary PMAP_PREFER check. */
+ if (pmap_align != 0 && align == 0 &&
+ (sel_max & (pmap_align - 1)) != pmap_off) {
+ printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
+ sel_max, sel_max & (pmap_align - 1), pmap_off);
+ bad++;
+ }
- if ((next = entry->next) == &map->header)
- space = map->max_offset - entry->end;
- else {
- KASSERT(next);
- space = next->start - entry->end;
+ if (bad) {
+ panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
+ "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
+ "bias = %d, "
+ "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
+ sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
+ bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
}
- return (space);
+#endif /* DIAGNOSTIC */
+
+ *min = sel_min;
+ *max = sel_max;
+ return 0;
}
-
-vsize_t
-uvm_rb_subtree_space(struct vm_map_entry *entry)
+
+/*
+ * Test if memory starting at addr with sz bytes is free.
+ *
+ * Fills in *start_ptr and *end_ptr to be the first and last entry describing
+ * the space.
+ * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
+ */
+int
+uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
+ struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
+ vaddr_t addr, vsize_t sz)
{
- vaddr_t space, tmp;
+ struct uvm_addr_state *free;
+ struct uvm_map_addr *atree;
+ struct vm_map_entry *i, *i_end;
- space = entry->ownspace;
- if (RB_LEFT(entry, rb_entry)) {
- tmp = RB_LEFT(entry, rb_entry)->space;
- if (tmp > space)
- space = tmp;
+ /*
+ * Kernel memory above uvm_maxkaddr is considered unavailable.
+ */
+ if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
+ if (addr + sz > uvm_maxkaddr)
+ return 0;
}
- if (RB_RIGHT(entry, rb_entry)) {
- tmp = RB_RIGHT(entry, rb_entry)->space;
- if (tmp > space)
- space = tmp;
+ atree = &map->addr;
+
+ /*
+ * Fill in first, last, so they point at the entries containing the
+ * first and last address of the range.
+ * Note that if they are not NULL, we don't perform the lookup.
+ */
+ KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
+ if (*start_ptr == NULL) {
+ *start_ptr = uvm_map_entrybyaddr(atree, addr);
+ if (*start_ptr == NULL)
+ return 0;
+ } else
+ KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
+ if (*end_ptr == NULL) {
+ if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
+ *end_ptr = *start_ptr;
+ else {
+ *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
+ if (*end_ptr == NULL)
+ return 0;
+ }
+ } else
+ KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
+
+ /*
+ * Validation.
+ */
+ KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
+ KDASSERT((*start_ptr)->start <= addr &&
+ VMMAP_FREE_END(*start_ptr) > addr &&
+ (*end_ptr)->start < addr + sz &&
+ VMMAP_FREE_END(*end_ptr) >= addr + sz);
+
+ /*
+ * Check the none of the entries intersects with <addr, addr+sz>.
+ * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
+ * considered unavailable unless called by those allocators.
+ */
+ i = *start_ptr;
+ i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr);
+ for (; i != i_end;
+ i = RB_NEXT(uvm_map_addr, atree, i)) {
+ if (i->start != i->end && i->end > addr)
+ return 0;
+
+ /*
+ * uaddr_exe and uaddr_brk_stack may only be used
+ * by these allocators and the NULL uaddr (i.e. no
+ * uaddr).
+ * Reject if this requirement is not met.
+ */
+ if (uaddr != NULL) {
+ free = uvm_map_uaddr_e(map, i);
+
+ if (uaddr != free && free != NULL &&
+ (free == map->uaddr_exe ||
+ free == map->uaddr_brk_stack))
+ return 0;
+ }
}
- return (space);
+ return -1;
}
-void
-uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
+/*
+ * Invoke each address selector until an address is found.
+ * Will not invoke uaddr_exe.
+ */
+int
+uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
+ struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
+ vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
{
- /* We need to traverse to the very top */
- do {
- entry->ownspace = uvm_rb_space(map, entry);
- entry->space = uvm_rb_subtree_space(entry);
- } while ((entry = RB_PARENT(entry, rb_entry)) != NULL);
-}
+ struct uvm_addr_state *uaddr;
+ int i;
-void
-uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
-{
- vaddr_t space = uvm_rb_space(map, entry);
- struct vm_map_entry *tmp;
+ /*
+ * Allocation for sz bytes at any address,
+ * using the addr selectors in order.
+ */
+ for (i = 0; i < nitems(map->uaddr_any); i++) {
+ uaddr = map->uaddr_any[i];
- entry->ownspace = entry->space = space;
- tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry);
-#ifdef DIAGNOSTIC
- if (tmp != NULL)
- panic("uvm_rb_insert: duplicate entry?");
-#endif
- uvm_rb_fixup(map, entry);
- if (entry->prev != &map->header)
- uvm_rb_fixup(map, entry->prev);
+ if (uvm_addr_invoke(map, uaddr, first, last,
+ addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
+ return 0;
+ }
+
+ /*
+ * Fall back to brk() and stack() address selectors.
+ */
+ uaddr = map->uaddr_brk_stack;
+ if (uvm_addr_invoke(map, uaddr, first, last,
+ addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
+ return 0;
+
+ return ENOMEM;
}
-void
-uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
+/*
+ * uvm_map: establish a valid mapping in map
+ *
+ * => *addr and sz must be a multiple of PAGE_SIZE.
+ * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
+ * => map must be unlocked.
+ * => <uobj,uoffset> value meanings (4 cases):
+ * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
+ * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
+ * [3] <uobj,uoffset> == normal mapping
+ * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
+ *
+ * case [4] is for kernel mappings where we don't know the offset until
+ * we've found a virtual address. note that kernel object offsets are
+ * always relative to vm_map_min(kernel_map).
+ *
+ * => align: align vaddr, must be a power-of-2.
+ * Align is only a hint and will be ignored if the alignemnt fails.
+ */
+int
+uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
+ struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
{
- struct vm_map_entry *parent;
+ struct vm_map_entry *first, *last, *entry;
+ struct uvm_map_deadq dead;
+ vm_prot_t prot;
+ vm_prot_t maxprot;
+ vm_inherit_t inherit;
+ int advice;
+ int error;
+ vaddr_t pmap_align, pmap_offset;
+ vaddr_t hint;
- parent = RB_PARENT(entry, rb_entry);
- RB_REMOVE(uvm_tree, &(map)->rbhead, entry);
- if (entry->prev != &map->header)
- uvm_rb_fixup(map, entry->prev);
- if (parent)
- uvm_rb_fixup(map, parent);
-}
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ splassert(IPL_NONE);
+ else
+ splassert(IPL_VM);
-#ifdef DEBUG
-#define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y)
-#else
-#define uvm_tree_sanity(x,y)
-#endif
+ /*
+ * We use pmap_align and pmap_offset as alignment and offset variables.
+ *
+ * Because the align parameter takes precedence over pmap prefer,
+ * the pmap_align will need to be set to align, with pmap_offset = 0,
+ * if pmap_prefer will not align.
+ */
+ if (uoffset == UVM_UNKNOWN_OFFSET) {
+ pmap_align = MAX(align, PAGE_SIZE);
+ pmap_offset = 0;
+ } else {
+ pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
+ pmap_offset = PMAP_PREFER_OFFSET(uoffset);
-#ifdef DEBUG
-int
-_uvm_tree_sanity(struct vm_map *map, const char *name)
-{
- struct vm_map_entry *tmp, *trtmp;
- int n = 0, i = 1;
-
- RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
- if (tmp->ownspace != uvm_rb_space(map, tmp)) {
- printf("%s: %d/%d ownspace %x != %x %s\n",
- name, n + 1, map->nentries,
- tmp->ownspace, uvm_rb_space(map, tmp),
- tmp->next == &map->header ? "(last)" : "");
- goto error;
+ if (align == 0 ||
+ (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
+ /*
+ * pmap_offset satisfies align, no change.
+ */
+ } else {
+ /*
+ * Align takes precedence over pmap prefer.
+ */
+ pmap_align = align;
+ pmap_offset = 0;
}
}
- trtmp = NULL;
- RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
- if (tmp->space != uvm_rb_subtree_space(tmp)) {
- printf("%s: space %d != %d\n",
- name, tmp->space, uvm_rb_subtree_space(tmp));
- goto error;
+
+ /*
+ * Decode parameters.
+ */
+ prot = UVM_PROTECTION(flags);
+ maxprot = UVM_MAXPROTECTION(flags);
+ advice = UVM_ADVICE(flags);
+ inherit = UVM_INHERIT(flags);
+ error = 0;
+ hint = trunc_page(*addr);
+ TAILQ_INIT(&dead);
+ KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
+ KASSERT((align & (align - 1)) == 0);
+
+ /*
+ * Holes are incompatible with other types of mappings.
+ */
+ if (flags & UVM_FLAG_HOLE) {
+ KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
+ (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
+ }
+
+ /*
+ * Unset hint for kernel_map non-fixed allocations.
+ */
+ if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
+ hint = 0;
+
+ /*
+ * Check protection.
+ */
+ if ((prot & maxprot) != prot)
+ return EACCES;
+
+ if (flags & UVM_FLAG_TRYLOCK) {
+ if (vm_map_lock_try(map) == FALSE)
+ return EFAULT;
+ } else
+ vm_map_lock(map);
+
+ first = last = NULL;
+ if (flags & UVM_FLAG_FIXED) {
+ /*
+ * Fixed location.
+ *
+ * Note: we ignore align, pmap_prefer.
+ * Fill in first, last and *addr.
+ */
+ KASSERT((*addr & PAGE_MASK) == 0);
+
+ /*
+ * Grow pmap to include allocated address.
+ * If the growth fails, the allocation will fail too.
+ */
+ if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
+ uvm_maxkaddr < (*addr + sz)) {
+ uvm_map_kmem_grow(map, &dead,
+ *addr + sz - uvm_maxkaddr, flags);
}
- if (trtmp != NULL && trtmp->start >= tmp->start) {
- printf("%s: corrupt: 0x%lx >= 0x%lx\n",
- name, trtmp->start, tmp->start);
- goto error;
+
+ /*
+ * Check that the space is available.
+ */
+ if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
+ error = ENOMEM;
+ goto unlock;
+ }
+ } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
+ (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
+ (align == 0 || (*addr & (align - 1)) == 0) &&
+ uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
+ /*
+ * Address used as hint.
+ *
+ * Note: we enforce the alignment restriction,
+ * but ignore pmap_prefer.
+ */
+ } else if ((maxprot & VM_PROT_EXECUTE) != 0 &&
+ map->uaddr_exe != NULL) {
+ /*
+ * Run selection algorithm for executables.
+ */
+ error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
+ addr, sz, pmap_align, pmap_offset, prot, hint);
+
+ /*
+ * Grow kernel memory and try again.
+ */
+ if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
+ uvm_map_kmem_grow(map, &dead, sz, flags);
+
+ error = uvm_addr_invoke(map, map->uaddr_exe,
+ &first, &last, addr, sz,
+ pmap_align, pmap_offset, prot, hint);
+ }
+
+ if (error != 0)
+ goto unlock;
+ } else {
+ /*
+ * Update freelists from vmspace.
+ */
+ if (map->flags & VM_MAP_ISVMSPACE)
+ uvm_map_vmspace_update(map, &dead, flags);
+
+ error = uvm_map_findspace(map, &first, &last, addr, sz,
+ pmap_align, pmap_offset, prot, hint);
+
+ /*
+ * Grow kernel memory and try again.
+ */
+ if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
+ uvm_map_kmem_grow(map, &dead, sz, flags);
+
+ error = uvm_map_findspace(map, &first, &last, addr, sz,
+ pmap_align, pmap_offset, prot, hint);
+ }
+
+ if (error != 0)
+ goto unlock;
+ }
+
+ KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
+ uvm_maxkaddr >= *addr + sz);
+
+ /*
+ * If we only want a query, return now.
+ */
+ if (flags & UVM_FLAG_QUERY) {
+ error = 0;
+ goto unlock;
+ }
+
+ if (uobj == NULL)
+ uoffset = 0;
+ else if (uoffset == UVM_UNKNOWN_OFFSET) {
+ KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
+ uoffset = *addr - vm_map_min(kernel_map);
+ }
+
+ /*
+ * Create new entry.
+ * first and last may be invalidated after this call.
+ */
+ entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead);
+ if (entry == NULL) {
+ error = ENOMEM;
+ goto unlock;
+ }
+ KDASSERT(entry->start == *addr && entry->end == *addr + sz);
+ entry->object.uvm_obj = uobj;
+ entry->offset = uoffset;
+ entry->protection = prot;
+ entry->max_protection = maxprot;
+ entry->inheritance = inherit;
+ entry->wired_count = 0;
+ entry->advice = advice;
+ if (uobj)
+ entry->etype |= UVM_ET_OBJ;
+ else if (flags & UVM_FLAG_HOLE)
+ entry->etype |= UVM_ET_HOLE;
+ if (flags & UVM_FLAG_COPYONW) {
+ entry->etype |= UVM_ET_COPYONWRITE;
+ if ((flags & UVM_FLAG_OVERLAY) == 0)
+ entry->etype |= UVM_ET_NEEDSCOPY;
+ }
+ if (flags & UVM_FLAG_OVERLAY) {
+ entry->aref.ar_pageoff = 0;
+ entry->aref.ar_amap = amap_alloc(sz,
+ ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0),
+ M_WAITOK);
+ }
+
+ /*
+ * Update map and process statistics.
+ */
+ if (!(flags & UVM_FLAG_HOLE)) {
+ map->size += sz;
+ if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {
+ ((struct vmspace *)map)->vm_dused +=
+ uvmspace_dused(map, *addr, *addr + sz);
}
- n++;
+ }
- trtmp = tmp;
+ /*
+ * Try to merge entry.
+ *
+ * Userland allocations are kept separated most of the time.
+ * Forego the effort of merging what most of the time can't be merged
+ * and only try the merge if it concerns a kernel entry.
+ */
+ if ((flags & UVM_FLAG_NOMERGE) == 0 &&
+ (map->flags & VM_MAP_ISVMSPACE) == 0)
+ uvm_mapent_tryjoin(map, entry, &dead);
+
+unlock:
+ vm_map_unlock(map);
+
+ /*
+ * Remove dead entries.
+ *
+ * Dead entries may be the result of merging.
+ * uvm_map_mkentry may also create dead entries, when it attempts to
+ * destroy free-space entries.
+ */
+ uvm_unmap_detach(&dead, 0);
+ return error;
+}
+
+/*
+ * True iff e1 and e2 can be joined together.
+ */
+int
+uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
+ struct vm_map_entry *e2)
+{
+ KDASSERT(e1 != NULL && e2 != NULL);
+
+ /*
+ * Must be the same entry type and not have free memory between.
+ */
+ if (e1->etype != e2->etype || e1->end != e2->start)
+ return 0;
+
+ /*
+ * Submaps are never joined.
+ */
+ if (UVM_ET_ISSUBMAP(e1))
+ return 0;
+
+ /*
+ * Never merge wired memory.
+ */
+ if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
+ return 0;
+
+ /*
+ * Protection, inheritance and advice must be equal.
+ */
+ if (e1->protection != e2->protection ||
+ e1->max_protection != e2->max_protection ||
+ e1->inheritance != e2->inheritance ||
+ e1->advice != e2->advice)
+ return 0;
+
+ /*
+ * If uvm_object: objects itself and offsets within object must match.
+ */
+ if (UVM_ET_ISOBJ(e1)) {
+ if (e1->object.uvm_obj != e2->object.uvm_obj)
+ return 0;
+ if (e1->offset + (e1->end - e1->start) != e2->offset)
+ return 0;
+ }
+
+ /*
+ * Cannot join shared amaps.
+ * Note: no need to lock amap to look at refs, since we don't care
+ * about its exact value.
+ * If it is 1 (i.e. we have the only reference) it will stay there.
+ */
+ if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
+ return 0;
+ if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
+ return 0;
+
+ /*
+ * Apprently, e1 and e2 match.
+ */
+ return 1;
+}
+
+/*
+ * Join support function.
+ *
+ * Returns the merged entry on succes.
+ * Returns NULL if the merge failed.
+ */
+struct vm_map_entry*
+uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
+ struct vm_map_entry *e2, struct uvm_map_deadq *dead)
+{
+ struct uvm_addr_state *free;
+
+ /*
+ * Amap of e1 must be extended to include e2.
+ * e2 contains no real information in its amap,
+ * so it can be erased immediately.
+ */
+ if (e1->aref.ar_amap) {
+ if (amap_extend(e1, e2->end - e2->start))
+ return NULL;
+ }
+
+ /*
+ * Don't drop obj reference:
+ * uvm_unmap_detach will do this for us.
+ */
+
+ free = uvm_map_uaddr_e(map, e1);
+ if (free)
+ uvm_mapent_free_remove(map, free, e1);
+
+ free = uvm_map_uaddr_e(map, e2);
+ if (free)
+ uvm_mapent_free_remove(map, free, e2);
+ uvm_mapent_addr_remove(map, e2);
+ e1->end = e2->end;
+ e1->guard = e2->guard;
+ e1->fspace = e2->fspace;
+ if (free)
+ uvm_mapent_free_insert(map, free, e1);
+
+ DEAD_ENTRY_PUSH(dead, e2);
+ return e1;
+}
+
+/*
+ * Attempt forward and backward joining of entry.
+ *
+ * Returns entry after joins.
+ * We are guaranteed that the amap of entry is either non-existant or
+ * has never been used.
+ */
+struct vm_map_entry*
+uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
+ struct uvm_map_deadq *dead)
+{
+ struct vm_map_entry *other;
+ struct vm_map_entry *merged;
+
+ /*
+ * Merge with previous entry.
+ */
+ other = RB_PREV(uvm_map_addr, &map->addr, entry);
+ if (other && uvm_mapent_isjoinable(map, other, entry)) {
+ merged = uvm_mapent_merge(map, other, entry, dead);
+ if (merged)
+ entry = merged;
}
- if (n != map->nentries) {
- printf("%s: nentries: %d vs %d\n",
- name, n, map->nentries);
- goto error;
+ /*
+ * Merge with next entry.
+ *
+ * Because amap can only extend forward and the next entry
+ * probably contains sensible info, only perform forward merging
+ * in the absence of an amap.
+ */
+ other = RB_NEXT(uvm_map_addr, &map->addr, entry);
+ if (other && entry->aref.ar_amap == NULL &&
+ other->aref.ar_amap == NULL &&
+ uvm_mapent_isjoinable(map, entry, other)) {
+ merged = uvm_mapent_merge(map, entry, other, dead);
+ if (merged)
+ entry = merged;
}
- for (tmp = map->header.next; tmp && tmp != &map->header;
- tmp = tmp->next, i++) {
- trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp);
- if (trtmp != tmp) {
- printf("%s: lookup: %d: %p - %p: %p\n",
- name, i, tmp, trtmp,
- RB_PARENT(tmp, rb_entry));
- goto error;
+ return entry;
+}
+
+/*
+ * Kill entries that are no longer in a map.
+ */
+void
+uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
+{
+ struct vm_map_entry *entry;
+
+ while ((entry = TAILQ_FIRST(deadq)) != NULL) {
+ /*
+ * Drop reference to amap, if we've got one.
+ */
+ if (entry->aref.ar_amap)
+ amap_unref(entry->aref.ar_amap,
+ entry->aref.ar_pageoff,
+ atop(entry->end - entry->start),
+ flags);
+
+ /*
+ * Drop reference to our backing object, if we've got one.
+ */
+ if (UVM_ET_ISSUBMAP(entry)) {
+ /* ... unlikely to happen, but play it safe */
+ uvm_map_deallocate(entry->object.sub_map);
+ } else if (UVM_ET_ISOBJ(entry) &&
+ entry->object.uvm_obj->pgops->pgo_detach) {
+ entry->object.uvm_obj->pgops->pgo_detach(
+ entry->object.uvm_obj);
}
+
+ /*
+ * Step to next.
+ */
+ TAILQ_REMOVE(deadq, entry, dfree.deadq);
+ uvm_mapent_free(entry);
}
+}
- return (0);
- error:
-#ifdef DDB
- /* handy breakpoint location for error case */
- __asm(".globl treesanity_label\ntreesanity_label:");
-#endif
- return (-1);
+/*
+ * Create and insert new entry.
+ *
+ * Returned entry contains new addresses and is inserted properly in the tree.
+ * first and last are (probably) no longer valid.
+ */
+struct vm_map_entry*
+uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
+ struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
+ struct uvm_map_deadq *dead)
+{
+ struct vm_map_entry *entry, *prev;
+ struct uvm_addr_state *free;
+ vaddr_t min, max; /* free space boundaries for new entry */
+
+ KDASSERT(map != NULL);
+ KDASSERT(first != NULL);
+ KDASSERT(last != NULL);
+ KDASSERT(dead != NULL);
+ KDASSERT(sz > 0);
+ KDASSERT(addr + sz > addr);
+ KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
+ KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
+ KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
+ uvm_tree_sanity(map, __FILE__, __LINE__);
+
+ min = addr + sz;
+ max = VMMAP_FREE_END(last);
+
+ /*
+ * Initialize new entry.
+ */
+ entry = uvm_mapent_alloc(map, flags);
+ if (entry == NULL)
+ return NULL;
+ entry->offset = 0;
+ entry->etype = 0;
+ entry->wired_count = 0;
+ entry->aref.ar_pageoff = 0;
+ entry->aref.ar_amap = NULL;
+
+ entry->start = addr;
+ entry->end = min;
+ entry->guard = 0;
+ entry->fspace = 0;
+
+ /*
+ * Reset free space in first.
+ */
+ free = uvm_map_uaddr_e(map, first);
+ if (free)
+ uvm_mapent_free_remove(map, free, first);
+ first->guard = 0;
+ first->fspace = 0;
+
+ /*
+ * Remove all entries that are fully replaced.
+ * We are iterating using last in reverse order.
+ */
+ for (; first != last; last = prev) {
+ prev = RB_PREV(uvm_map_addr, &map->addr, last);
+
+ KDASSERT(last->start == last->end);
+ free = uvm_map_uaddr_e(map, last);
+ if (free)
+ uvm_mapent_free_remove(map, free, last);
+ uvm_mapent_addr_remove(map, last);
+ DEAD_ENTRY_PUSH(dead, last);
+ }
+ /*
+ * Remove first if it is entirely inside <addr, addr+sz>.
+ */
+ if (first->start == addr) {
+ uvm_mapent_addr_remove(map, first);
+ DEAD_ENTRY_PUSH(dead, first);
+ } else {
+ uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
+ addr, flags);
+ }
+
+ /*
+ * Finally, link in entry.
+ */
+ uvm_mapent_addr_insert(map, entry);
+ uvm_map_fix_space(map, entry, min, max, flags);
+
+ uvm_tree_sanity(map, __FILE__, __LINE__);
+ return entry;
}
-#endif
/*
* uvm_mapent_alloc: allocate a map entry
*/
-
struct vm_map_entry *
uvm_mapent_alloc(struct vm_map *map, int flags)
{
@@ -406,15 +1468,15 @@ uvm_mapent_alloc(struct vm_map *map, int flags)
for (i = 0;
i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
i++)
- ne[i].next = &ne[i + 1];
- ne[i].next = NULL;
+ RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
+ RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
me = ne;
if (ratecheck(&uvm_kmapent_last_warn_time,
&uvm_kmapent_warn_rate))
printf("uvm_mapent_alloc: out of static "
"map entries\n");
}
- uvm.kentry_free = me->next;
+ uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
uvmexp.kmapent++;
simple_unlock(&uvm.kentry_lock);
splx(s);
@@ -433,6 +1495,12 @@ uvm_mapent_alloc(struct vm_map *map, int flags)
me->flags = 0;
}
+ if (me != NULL) {
+ RB_LEFT(me, daddrs.addr_entry) =
+ RB_RIGHT(me, daddrs.addr_entry) =
+ RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF;
+ }
+
out:
return(me);
}
@@ -442,7 +1510,6 @@ out:
*
* => XXX: static pool for kernel map?
*/
-
void
uvm_mapent_free(struct vm_map_entry *me)
{
@@ -451,7 +1518,7 @@ uvm_mapent_free(struct vm_map_entry *me)
if (me->flags & UVM_MAP_STATIC) {
s = splvm();
simple_lock(&uvm.kentry_lock);
- me->next = uvm.kentry_free;
+ RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
uvm.kentry_free = me;
uvmexp.kmapent--;
simple_unlock(&uvm.kentry_lock);
@@ -466,1726 +1533,2108 @@ uvm_mapent_free(struct vm_map_entry *me)
}
/*
- * uvm_mapent_copy: copy a map entry, preserving flags
+ * uvm_map_lookup_entry: find map entry at or before an address.
+ *
+ * => map must at least be read-locked by caller
+ * => entry is returned in "entry"
+ * => return value is true if address is in the returned entry
+ * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
+ * returned for those mappings.
*/
-
-void
-uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
+boolean_t
+uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
+ struct vm_map_entry **entry)
{
- memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
- ((char *)src));
+ *entry = uvm_map_entrybyaddr(&map->addr, address);
+ return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
+ (*entry)->start <= address && (*entry)->end > address;
}
/*
- * uvm_map_entry_unwire: unwire a map entry
- *
- * => map should be locked by caller
+ * uvm_map_pie: return a random load address for a PIE executable
+ * properly aligned.
*/
-void
-uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
+#ifndef VM_PIE_MAX_ADDR
+#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
+#endif
+
+#ifndef VM_PIE_MIN_ADDR
+#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
+#endif
+
+#ifndef VM_PIE_MIN_ALIGN
+#define VM_PIE_MIN_ALIGN PAGE_SIZE
+#endif
+
+vaddr_t
+uvm_map_pie(vaddr_t align)
{
+ vaddr_t addr, space, min;
- entry->wired_count = 0;
- uvm_fault_unwire_locked(map, entry->start, entry->end);
-}
+ align = MAX(align, VM_PIE_MIN_ALIGN);
+ /* round up to next alignment */
+ min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
+
+ if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
+ return (align);
+
+ space = (VM_PIE_MAX_ADDR - min) / align;
+ space = MIN(space, (u_int32_t)-1);
+
+ addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
+ addr += min;
+
+ return (addr);
+}
-/*
- * wrapper for calling amap_ref()
- */
void
-uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
+uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
{
- amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
- (entry->end - entry->start) >> PAGE_SHIFT, flags);
-}
+ struct uvm_map_deadq dead;
+ KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
+ (end & (vaddr_t)PAGE_MASK) == 0);
+ TAILQ_INIT(&dead);
+ vm_map_lock(map);
+ uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
+ vm_map_unlock(map);
+
+ uvm_unmap_detach(&dead, 0);
+}
/*
- * wrapper for calling amap_unref()
+ * Mark entry as free.
+ *
+ * entry will be put on the dead list.
+ * The free space will be merged into the previous or a new entry,
+ * unless markfree is false.
*/
void
-uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
+uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
+ struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
+ boolean_t markfree)
{
- amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
- (entry->end - entry->start) >> PAGE_SHIFT, flags);
-}
+ struct uvm_addr_state *free;
+ struct vm_map_entry *prev;
+ vaddr_t addr; /* Start of freed range. */
+ vaddr_t end; /* End of freed range. */
+
+ prev = *prev_ptr;
+ if (prev == entry)
+ *prev_ptr = prev = NULL;
+
+ if (prev == NULL ||
+ VMMAP_FREE_END(prev) != entry->start)
+ prev = RB_PREV(uvm_map_addr, &map->addr, entry);
+ /*
+ * Entry is describing only free memory and has nothing to drain into.
+ */
+ if (prev == NULL && entry->start == entry->end && markfree) {
+ *prev_ptr = entry;
+ return;
+ }
+ addr = entry->start;
+ end = VMMAP_FREE_END(entry);
+ free = uvm_map_uaddr_e(map, entry);
+ if (free)
+ uvm_mapent_free_remove(map, free, entry);
+ uvm_mapent_addr_remove(map, entry);
+ DEAD_ENTRY_PUSH(dead, entry);
+
+ if (markfree) {
+ if (prev) {
+ free = uvm_map_uaddr_e(map, prev);
+ if (free)
+ uvm_mapent_free_remove(map, free, prev);
+ }
+ *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
+ }
+}
/*
- * uvm_map_init: init mapping system at boot time. note that we allocate
- * and init the static pool of structs vm_map_entry for the kernel here.
+ * Unwire and release referenced amap and object from map entry.
*/
-
void
-uvm_map_init(void)
+uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
{
- static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
- int lcv;
-
/*
- * set up static pool of kernel map entries ...
+ * Unwire removed map entry.
*/
-
- simple_lock_init(&uvm.kentry_lock);
- uvm.kentry_free = NULL;
- for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
- kernel_map_entry[lcv].next = uvm.kentry_free;
- uvm.kentry_free = &kernel_map_entry[lcv];
+ if (VM_MAPENT_ISWIRED(entry)) {
+ entry->wired_count = 0;
+ uvm_fault_unwire_locked(map, entry->start, entry->end);
}
/*
- * initialize the map-related pools.
+ * Entry-type specific code.
*/
- pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
- 0, 0, 0, "vmsppl", &pool_allocator_nointr);
- pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
- 0, 0, 0, "vmmpepl", &pool_allocator_nointr);
- pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
- 0, 0, 0, "vmmpekpl", NULL);
- pool_sethiwat(&uvm_map_entry_pool, 8192);
-}
+ if (UVM_ET_ISHOLE(entry)) {
+ /*
+ * Nothing to be done for holes.
+ */
+ } else if (map->flags & VM_MAP_INTRSAFE) {
+ KASSERT(vm_map_pmap(map) == pmap_kernel());
+ uvm_km_pgremove_intrsafe(entry->start, entry->end);
+ pmap_kremove(entry->start, entry->end - entry->start);
+ } else if (UVM_ET_ISOBJ(entry) &&
+ UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
+ KASSERT(vm_map_pmap(map) == pmap_kernel());
-/*
- * clippers
- */
+ /*
+ * Note: kernel object mappings are currently used in
+ * two ways:
+ * [1] "normal" mappings of pages in the kernel object
+ * [2] uvm_km_valloc'd allocations in which we
+ * pmap_enter in some non-kernel-object page
+ * (e.g. vmapbuf).
+ *
+ * for case [1], we need to remove the mapping from
+ * the pmap and then remove the page from the kernel
+ * object (because, once pages in a kernel object are
+ * unmapped they are no longer needed, unlike, say,
+ * a vnode where you might want the data to persist
+ * until flushed out of a queue).
+ *
+ * for case [2], we need to remove the mapping from
+ * the pmap. there shouldn't be any pages at the
+ * specified offset in the kernel object [but it
+ * doesn't hurt to call uvm_km_pgremove just to be
+ * safe?]
+ *
+ * uvm_km_pgremove currently does the following:
+ * for pages in the kernel object range:
+ * - drops the swap slot
+ * - uvm_pagefree the page
+ *
+ * note there is version of uvm_km_pgremove() that
+ * is used for "intrsafe" objects.
+ */
+
+ /*
+ * remove mappings from pmap and drop the pages
+ * from the object. offsets are always relative
+ * to vm_map_min(kernel_map).
+ */
+ pmap_remove(pmap_kernel(), entry->start, entry->end);
+ uvm_km_pgremove(entry->object.uvm_obj,
+ entry->start - vm_map_min(kernel_map),
+ entry->end - vm_map_min(kernel_map));
+
+ /*
+ * null out kernel_object reference, we've just
+ * dropped it
+ */
+ entry->etype &= ~UVM_ET_OBJ;
+ entry->object.uvm_obj = NULL; /* to be safe */
+ } else {
+ /*
+ * remove mappings the standard way.
+ */
+ pmap_remove(map->pmap, entry->start, entry->end);
+ }
+}
/*
- * uvm_map_clip_start: ensure that the entry begins at or after
- * the starting address, if it doesn't we split the entry.
- *
- * => caller should use UVM_MAP_CLIP_START macro rather than calling
- * this directly
- * => map must be locked by caller
+ * Remove all entries from start to end.
+ *
+ * If remove_holes, then remove ET_HOLE entries as well.
+ * If markfree, entry will be properly marked free, otherwise, no replacement
+ * entry will be put in the tree (corrupting the tree).
*/
-
void
-uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
- vaddr_t start)
+uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
+ struct uvm_map_deadq *dead, boolean_t remove_holes,
+ boolean_t markfree)
{
- struct vm_map_entry *new_entry;
- vaddr_t new_adj;
+ struct vm_map_entry *prev_hint, *next, *entry;
- /* uvm_map_simplify_entry(map, entry); */ /* XXX */
+ start = MAX(start, map->min_offset);
+ end = MIN(end, map->max_offset);
+ if (start >= end)
+ return;
- uvm_tree_sanity(map, "clip_start entry");
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ splassert(IPL_NONE);
+ else
+ splassert(IPL_VM);
/*
- * Split off the front portion. note that we must insert the new
- * entry BEFORE this one, so that this entry has the specified
- * starting address.
+ * Find first affected entry.
*/
+ entry = uvm_map_entrybyaddr(&map->addr, start);
+ KDASSERT(entry != NULL && entry->start <= start);
+ if (entry->end <= start && markfree)
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
+ else
+ UVM_MAP_CLIP_START(map, entry, start);
- new_entry = uvm_mapent_alloc(map, 0);
- uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
+ /*
+ * Iterate entries until we reach end address.
+ * prev_hint hints where the freed space can be appended to.
+ */
+ prev_hint = NULL;
+ for (; entry != NULL && entry->start < end; entry = next) {
+ KDASSERT(entry->start >= start);
+ if (entry->end > end || !markfree)
+ UVM_MAP_CLIP_END(map, entry, end);
+ KDASSERT(entry->start >= start && entry->end <= end);
+ next = RB_NEXT(uvm_map_addr, &map->addr, entry);
- new_entry->end = start;
- new_adj = start - new_entry->start;
- if (entry->object.uvm_obj)
- entry->offset += new_adj; /* shift start over */
+ /* Don't remove holes unless asked to do so. */
+ if (UVM_ET_ISHOLE(entry)) {
+ if (!remove_holes) {
+ prev_hint = entry;
+ continue;
+ }
+ }
- /* Does not change order for the RB tree */
- entry->start = start;
+ /* Kill entry. */
+ uvm_unmap_kill_entry(map, entry);
- if (new_entry->aref.ar_amap) {
- amap_splitref(&new_entry->aref, &entry->aref, new_adj);
+ /*
+ * Update space usage.
+ */
+ if ((map->flags & VM_MAP_ISVMSPACE) &&
+ entry->object.uvm_obj == NULL &&
+ !UVM_ET_ISHOLE(entry)) {
+ ((struct vmspace *)map)->vm_dused -=
+ uvmspace_dused(map, entry->start, entry->end);
+ }
+ if (!UVM_ET_ISHOLE(entry))
+ map->size -= entry->end - entry->start;
+
+ /*
+ * Actual removal of entry.
+ */
+ uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
}
- uvm_map_entry_link(map, entry->prev, new_entry);
+ pmap_update(vm_map_pmap(map));
- if (UVM_ET_ISSUBMAP(entry)) {
- /* ... unlikely to happen, but play it safe */
- uvm_map_reference(new_entry->object.sub_map);
+#ifdef VMMAP_DEBUG
+ if (markfree) {
+ for (entry = uvm_map_entrybyaddr(&map->addr, start);
+ entry != NULL && entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ KDASSERT(entry->end <= start ||
+ entry->start == entry->end ||
+ UVM_ET_ISHOLE(entry));
+ }
} else {
- if (UVM_ET_ISOBJ(entry) &&
- entry->object.uvm_obj->pgops &&
- entry->object.uvm_obj->pgops->pgo_reference)
- entry->object.uvm_obj->pgops->pgo_reference(
- entry->object.uvm_obj);
+ vaddr_t a;
+ for (a = start; a < end; a += PAGE_SIZE)
+ KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
}
-
- uvm_tree_sanity(map, "clip_start leave");
+#endif
}
/*
- * uvm_map_clip_end: ensure that the entry ends at or before
- * the ending address, if it doesn't we split the reference
- *
- * => caller should use UVM_MAP_CLIP_END macro rather than calling
- * this directly
- * => map must be locked by caller
+ * Mark all entries from first until end (exclusive) as pageable.
+ *
+ * Lock must be exclusive on entry and will not be touched.
*/
-
void
-uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
+uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
+ struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
{
- struct vm_map_entry *new_entry;
- vaddr_t new_adj; /* #bytes we move start forward */
-
- uvm_tree_sanity(map, "clip_end entry");
- /*
- * Create a new entry and insert it
- * AFTER the specified entry
- */
+ struct vm_map_entry *iter;
- new_entry = uvm_mapent_alloc(map, 0);
- uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
-
- new_entry->start = entry->end = end;
- new_adj = end - entry->start;
- if (new_entry->object.uvm_obj)
- new_entry->offset += new_adj;
-
- if (entry->aref.ar_amap)
- amap_splitref(&entry->aref, &new_entry->aref, new_adj);
-
- uvm_rb_fixup(map, entry);
-
- uvm_map_entry_link(map, entry, new_entry);
+ for (iter = first; iter != end;
+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
+ KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
+ if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
+ continue;
- if (UVM_ET_ISSUBMAP(entry)) {
- /* ... unlikely to happen, but play it safe */
- uvm_map_reference(new_entry->object.sub_map);
- } else {
- if (UVM_ET_ISOBJ(entry) &&
- entry->object.uvm_obj->pgops &&
- entry->object.uvm_obj->pgops->pgo_reference)
- entry->object.uvm_obj->pgops->pgo_reference(
- entry->object.uvm_obj);
+ iter->wired_count = 0;
+ uvm_fault_unwire_locked(map, iter->start, iter->end);
}
- uvm_tree_sanity(map, "clip_end leave");
}
-
-/*
- * M A P - m a i n e n t r y p o i n t
- */
/*
- * uvm_map: establish a valid mapping in a map
+ * Mark all entries from first until end (exclusive) as wired.
*
- * => assume startp is page aligned.
- * => assume size is a multiple of PAGE_SIZE.
- * => assume sys_mmap provides enough of a "hint" to have us skip
- * over text/data/bss area.
- * => map must be unlocked (we will lock it)
- * => <uobj,uoffset> value meanings (4 cases):
- * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
- * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
- * [3] <uobj,uoffset> == normal mapping
- * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
- *
- * case [4] is for kernel mappings where we don't know the offset until
- * we've found a virtual address. note that kernel object offsets are
- * always relative to vm_map_min(kernel_map).
- *
- * => if `align' is non-zero, we try to align the virtual address to
- * the specified alignment. this is only a hint; if we can't
- * do it, the address will be unaligned. this is provided as
- * a mechanism for large pages.
- *
- * => XXXCDC: need way to map in external amap?
+ * Lockflags determines the lock state on return from this function.
+ * Lock must be exclusive on entry.
*/
-
int
-uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size,
- struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
- struct proc *p)
+uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
+ struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
+ int lockflags)
{
- struct vm_map_entry *prev_entry, *new_entry;
-#ifdef KVA_GUARDPAGES
- struct vm_map_entry *guard_entry;
+ struct vm_map_entry *iter;
+#ifdef DIAGNOSTIC
+ unsigned int timestamp_save;
#endif
- vm_prot_t prot = UVM_PROTECTION(flags), maxprot =
- UVM_MAXPROTECTION(flags);
- vm_inherit_t inherit = UVM_INHERIT(flags);
- int advice = UVM_ADVICE(flags);
int error;
/*
- * Holes are incompatible with other types of mappings.
+ * Wire pages in two passes:
+ *
+ * 1: holding the write lock, we create any anonymous maps that need
+ * to be created. then we clip each map entry to the region to
+ * be wired and increment its wiring count.
+ *
+ * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
+ * in the pages for any newly wired area (wired_count == 1).
+ *
+ * downgrading to a read lock for uvm_fault_wire avoids a possible
+ * deadlock with another thread that may have faulted on one of
+ * the pages to be wired (it would mark the page busy, blocking
+ * us, then in turn block on the map lock that we hold).
+ * because we keep the read lock on the map, the copy-on-write
+ * status of the entries we modify here cannot change.
*/
- if (flags & UVM_FLAG_HOLE) {
- KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) != 0 &&
- (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
- }
+ for (iter = first; iter != end;
+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
+ KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
+ if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)
+ continue;
-#ifdef KVA_GUARDPAGES
- if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) {
/*
- * kva_guardstart is initialized to the start of the kernelmap
- * and cycles through the kva space.
- * This way we should have a long time between re-use of kva.
+ * Perform actions of vm_map_lookup that need the write lock.
+ * - create an anonymous map for copy-on-write
+ * - anonymous map for zero-fill
+ * Skip submaps.
*/
- static vaddr_t kva_guardstart = 0;
- if (kva_guardstart == 0) {
- kva_guardstart = vm_map_min(map);
- printf("uvm_map: kva guard pages enabled: %p\n",
- kva_guardstart);
+ if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
+ UVM_ET_ISNEEDSCOPY(iter) &&
+ ((iter->protection & VM_PROT_WRITE) ||
+ iter->object.uvm_obj == NULL)) {
+ amap_copy(map, iter, M_WAITOK, TRUE,
+ iter->start, iter->end);
}
- size += PAGE_SIZE; /* Add guard page at the end. */
- /*
- * Try to fully exhaust kva prior to wrap-around.
- * (This may eat your ram!)
- */
- if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) {
- static int wrap_counter = 0;
- printf("uvm_map: kva guard page wrap-around %d\n",
- ++wrap_counter);
- kva_guardstart = vm_map_min(map);
- }
- *startp = kva_guardstart;
- /*
- * Prepare for next round.
- */
- kva_guardstart += size;
+ iter->wired_count++;
}
-#endif
-
- uvm_tree_sanity(map, "map entry");
-
- if ((map->flags & VM_MAP_INTRSAFE) == 0)
- splassert(IPL_NONE);
- else
- splassert(IPL_VM);
/*
- * step 0: sanity check of protection code
+ * Pass 2.
*/
+#ifdef DIAGNOSTIC
+ timestamp_save = map->timestamp;
+#endif
+ vm_map_busy(map);
+ vm_map_downgrade(map);
- if ((prot & maxprot) != prot) {
- return (EACCES);
- }
-
- /*
- * step 1: figure out where to put new VM range
- */
+ error = 0;
+ for (iter = first; error == 0 && iter != end;
+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
+ if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)
+ continue;
- if (vm_map_lock_try(map) == FALSE) {
- if (flags & UVM_FLAG_TRYLOCK)
- return (EFAULT);
- vm_map_lock(map); /* could sleep here */
- }
- if ((prev_entry = uvm_map_findspace(map, *startp, size, startp,
- uobj, uoffset, align, flags)) == NULL) {
- vm_map_unlock(map);
- return (ENOMEM);
+ error = uvm_fault_wire(map, iter->start, iter->end,
+ iter->protection);
}
-#ifdef PMAP_GROWKERNEL
- {
+ if (error) {
/*
- * If the kernel pmap can't map the requested space,
- * then allocate more resources for it.
+ * uvm_fault_wire failure
+ *
+ * Reacquire lock and undo our work.
*/
- if (map == kernel_map && !(flags & UVM_FLAG_FIXED) &&
- uvm_maxkaddr < (*startp + size))
- uvm_maxkaddr = pmap_growkernel(*startp + size);
- }
+ vm_map_upgrade(map);
+ vm_map_unbusy(map);
+#ifdef DIAGNOSTIC
+ if (timestamp_save != map->timestamp)
+ panic("uvm_map_pageable_wire: stale map");
#endif
- /*
- * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
- * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in
- * either case we want to zero it before storing it in the map entry
- * (because it looks strange and confusing when debugging...)
- *
- * if uobj is not null
- * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
- * and we do not need to change uoffset.
- * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
- * now (based on the starting address of the map). this case is
- * for kernel object mappings where we don't know the offset until
- * the virtual address is found (with uvm_map_findspace). the
- * offset is the distance we are from the start of the map.
- */
+ /*
+ * first is no longer needed to restart loops.
+ * Use it as iterator to unmap successful mappings.
+ */
+ for (; first != iter;
+ first = RB_NEXT(uvm_map_addr, &map->addr, first)) {
+ if (UVM_ET_ISHOLE(first) || first->start == first->end)
+ continue;
- if (uobj == NULL) {
- uoffset = 0;
- } else {
- if (uoffset == UVM_UNKNOWN_OFFSET) {
- KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
- uoffset = *startp - vm_map_min(kernel_map);
+ first->wired_count--;
+ if (!VM_MAPENT_ISWIRED(first)) {
+ uvm_fault_unwire_locked(map,
+ iter->start, iter->end);
+ }
}
+
+ /*
+ * decrease counter in the rest of the entries
+ */
+ for (; iter != end;
+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
+ if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)
+ continue;
+
+ iter->wired_count--;
+ }
+
+ if ((lockflags & UVM_LK_EXIT) == 0)
+ vm_map_unlock(map);
+ return error;
}
/*
- * step 2: try and insert in map by extending previous entry, if
- * possible
- * XXX: we don't try and pull back the next entry. might be useful
- * for a stack, but we are currently allocating our stack in advance.
+ * We are currently holding a read lock.
*/
+ if ((lockflags & UVM_LK_EXIT) == 0) {
+ vm_map_unbusy(map);
+ vm_map_unlock_read(map);
+ } else {
+ vm_map_upgrade(map);
+ vm_map_unbusy(map);
+#ifdef DIAGNOSTIC
+ if (timestamp_save != map->timestamp)
+ panic("uvm_map_pageable_wire: stale map");
+#endif
+ }
+ return 0;
+}
- if ((flags & UVM_FLAG_NOMERGE) == 0 &&
- prev_entry->end == *startp && prev_entry != &map->header &&
- prev_entry->object.uvm_obj == uobj) {
-
- if (uobj && prev_entry->offset +
- (prev_entry->end - prev_entry->start) != uoffset)
- goto step3;
-
- if (UVM_ET_ISSUBMAP(prev_entry))
- goto step3;
+/*
+ * uvm_map_pageable: set pageability of a range in a map.
+ *
+ * Flags:
+ * UVM_LK_ENTER: map is already locked by caller
+ * UVM_LK_EXIT: don't unlock map on exit
+ *
+ * The full range must be in use (entries may not have fspace != 0).
+ * UVM_ET_HOLE counts as unmapped.
+ */
+int
+uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
+ boolean_t new_pageable, int lockflags)
+{
+ struct vm_map_entry *first, *last, *tmp;
+ int error;
- if (prev_entry->protection != prot ||
- prev_entry->max_protection != maxprot)
- goto step3;
+ start = trunc_page(start);
+ end = round_page(end);
- if (prev_entry->inheritance != inherit ||
- prev_entry->advice != advice)
- goto step3;
+ if (start > end)
+ return EINVAL;
+ if (start < map->min_offset)
+ return EFAULT; /* why? see first XXX below */
+ if (end > map->max_offset)
+ return EINVAL; /* why? see second XXX below */
- /* wiring status must match (new area is unwired) */
- if (VM_MAPENT_ISWIRED(prev_entry))
- goto step3;
+ KASSERT(map->flags & VM_MAP_PAGEABLE);
+ if ((lockflags & UVM_LK_ENTER) == 0)
+ vm_map_lock(map);
+ /*
+ * Find first entry.
+ *
+ * Initial test on start is different, because of the different
+ * error returned. Rest is tested further down.
+ */
+ first = uvm_map_entrybyaddr(&map->addr, start);
+ if (first->end <= start || UVM_ET_ISHOLE(first)) {
/*
- * can't extend a shared amap. note: no need to lock amap to
- * look at refs since we don't care about its exact value.
- * if it is one (i.e. we have only reference) it will stay there
+ * XXX if the first address is not mapped, it is EFAULT?
*/
+ error = EFAULT;
+ goto out;
+ }
- if (prev_entry->aref.ar_amap &&
- amap_refs(prev_entry->aref.ar_amap) != 1) {
- goto step3;
+ /*
+ * Check that the range has no holes.
+ */
+ for (last = first; last != NULL && last->start < end;
+ last = RB_NEXT(uvm_map_addr, &map->addr, last)) {
+ if (UVM_ET_ISHOLE(last) ||
+ (last->end < end && VMMAP_FREE_END(last) != last->end)) {
+ /*
+ * XXX unmapped memory in range, why is it EINVAL
+ * instead of EFAULT?
+ */
+ error = EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * Last ended at the first entry after the range.
+ * Move back one step.
+ *
+ * Note that last may be NULL.
+ */
+ if (last == NULL) {
+ last = RB_MAX(uvm_map_addr, &map->addr);
+ if (last->end < end) {
+ error = EINVAL;
+ goto out;
}
+ } else
+ last = RB_PREV(uvm_map_addr, &map->addr, last);
+ /*
+ * Wire/unwire pages here.
+ */
+ if (new_pageable) {
/*
- * Only merge kernel mappings, but keep track
- * of how much we skipped.
+ * Mark pageable.
+ * entries that are not wired are untouched.
*/
- if (map != kernel_map && map != kmem_map) {
- goto step3;
- }
+ if (VM_MAPENT_ISWIRED(first))
+ UVM_MAP_CLIP_START(map, first, start);
+ /*
+ * Split last at end.
+ * Make tmp be the first entry after what is to be touched.
+ * If last is not wired, don't touch it.
+ */
+ if (VM_MAPENT_ISWIRED(last)) {
+ UVM_MAP_CLIP_END(map, last, end);
+ tmp = RB_NEXT(uvm_map_addr, &map->addr, last);
+ } else
+ tmp = last;
- if (prev_entry->aref.ar_amap) {
- error = amap_extend(prev_entry, size);
- if (error)
- goto step3;
- }
+ uvm_map_pageable_pgon(map, first, tmp, start, end);
+ error = 0;
+out:
+ if ((lockflags & UVM_LK_EXIT) == 0)
+ vm_map_unlock(map);
+ return error;
+ } else {
+ /*
+ * Mark entries wired.
+ * entries are always touched (because recovery needs this).
+ */
+ if (!VM_MAPENT_ISWIRED(first))
+ UVM_MAP_CLIP_START(map, first, start);
/*
- * drop our reference to uobj since we are extending a reference
- * that we already have (the ref count can not drop to zero).
+ * Split last at end.
+ * Make tmp be the first entry after what is to be touched.
+ * If last is not wired, don't touch it.
*/
+ if (!VM_MAPENT_ISWIRED(last)) {
+ UVM_MAP_CLIP_END(map, last, end);
+ tmp = RB_NEXT(uvm_map_addr, &map->addr, last);
+ } else
+ tmp = last;
+
+ return uvm_map_pageable_wire(map, first, tmp, start, end,
+ lockflags);
+ }
+}
- if (uobj && uobj->pgops->pgo_detach)
- uobj->pgops->pgo_detach(uobj);
+/*
+ * uvm_map_pageable_all: special case of uvm_map_pageable - affects
+ * all mapped regions.
+ *
+ * Map must not be locked.
+ * If no flags are specified, all ragions are unwired.
+ */
+int
+uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
+{
+ vsize_t size;
+ struct vm_map_entry *iter;
- prev_entry->end += size;
- uvm_rb_fixup(map, prev_entry);
- map->size += size;
- if (p && uobj == NULL)
- p->p_vmspace->vm_dused += atop(size);
+ KASSERT(map->flags & VM_MAP_PAGEABLE);
+ vm_map_lock(map);
- uvm_tree_sanity(map, "map leave 2");
+ if (flags == 0) {
+ uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr),
+ NULL, map->min_offset, map->max_offset);
+ atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE);
vm_map_unlock(map);
- return (0);
+ return 0;
+ }
+ if (flags & MCL_FUTURE)
+ atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE);
+ if (!(flags & MCL_CURRENT)) {
+ vm_map_unlock(map);
+ return 0;
}
-step3:
/*
- * step 3: allocate new entry and link it in
+ * Count number of pages in all non-wired entries.
+ * If the number exceeds the limit, abort.
*/
+ size = 0;
+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {
+ if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
+ continue;
-#ifdef KVA_GUARDPAGES
- if (map == kernel_map && !(flags & UVM_FLAG_FIXED))
- size -= PAGE_SIZE;
-#endif
-
- new_entry = uvm_mapent_alloc(map, flags);
- if (new_entry == NULL) {
- vm_map_unlock(map);
- return (ENOMEM);
+ size += iter->end - iter->start;
}
- new_entry->start = *startp;
- new_entry->end = new_entry->start + size;
- new_entry->object.uvm_obj = uobj;
- new_entry->offset = uoffset;
- if (uobj)
- new_entry->etype = UVM_ET_OBJ;
- else
- new_entry->etype = 0;
-
- if (flags & UVM_FLAG_COPYONW) {
- new_entry->etype |= UVM_ET_COPYONWRITE;
- if ((flags & UVM_FLAG_OVERLAY) == 0)
- new_entry->etype |= UVM_ET_NEEDSCOPY;
+ if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
+ vm_map_unlock(map);
+ return ENOMEM;
}
- if (flags & UVM_FLAG_HOLE)
- new_entry->etype |= UVM_ET_HOLE;
- new_entry->protection = prot;
- new_entry->max_protection = maxprot;
- new_entry->inheritance = inherit;
- new_entry->wired_count = 0;
- new_entry->advice = advice;
- if (flags & UVM_FLAG_OVERLAY) {
- /*
- * to_add: for BSS we overallocate a little since we
- * are likely to extend
- */
- vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
- UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
- struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
- new_entry->aref.ar_pageoff = 0;
- new_entry->aref.ar_amap = amap;
- } else {
- new_entry->aref.ar_pageoff = 0;
- new_entry->aref.ar_amap = NULL;
+ /* XXX non-pmap_wired_count case must be handled by caller */
+#ifdef pmap_wired_count
+ if (limit != 0 &&
+ size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
+ vm_map_unlock(map);
+ return ENOMEM;
}
+#endif
- uvm_map_entry_link(map, prev_entry, new_entry);
+ /*
+ * uvm_map_pageable_wire will release lcok
+ */
+ return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr),
+ NULL, map->min_offset, map->max_offset, 0);
+}
- map->size += size;
- if (p && uobj == NULL)
- p->p_vmspace->vm_dused += atop(size);
+/*
+ * Initialize map.
+ *
+ * Allocates sufficient entries to describe the free memory in the map.
+ */
+void
+uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)
+{
+ int i;
+ KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
+ KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
+ (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
/*
- * Update the free space hint
+ * Update parameters.
+ *
+ * This code handles (vaddr_t)-1 and other page mask ending addresses
+ * properly.
+ * We lose the top page if the full virtual address space is used.
*/
+ if (max & (vaddr_t)PAGE_MASK) {
+ max += 1;
+ if (max == 0) /* overflow */
+ max -= PAGE_SIZE;
+ }
+
+ RB_INIT(&map->addr);
+ map->uaddr_exe = NULL;
+ for (i = 0; i < nitems(map->uaddr_any); ++i)
+ map->uaddr_any[i] = NULL;
+ map->uaddr_brk_stack = NULL;
- if ((map->first_free == prev_entry) &&
- (prev_entry->end >= new_entry->start))
- map->first_free = new_entry;
+ map->size = 0;
+ map->ref_count = 1;
+ map->min_offset = min;
+ map->max_offset = max;
+ map->b_start = map->b_end = 0; /* Empty brk() area by default. */
+ map->s_start = map->s_end = 0; /* Empty stack area by default. */
+ map->flags = flags;
+ map->timestamp = 0;
+ rw_init(&map->lock, "vmmaplk");
+ simple_lock_init(&map->ref_lock);
-#ifdef KVA_GUARDPAGES
/*
- * Create the guard entry.
+ * Ensure the selectors will not try to manage page 0;
+ * it's too special.
*/
- if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) {
- guard_entry = uvm_mapent_alloc(map, flags);
- if (guard_entry != NULL) {
- guard_entry->start = new_entry->end;
- guard_entry->end = guard_entry->start + PAGE_SIZE;
- guard_entry->object.uvm_obj = uobj;
- guard_entry->offset = uoffset;
- guard_entry->etype = MAP_ET_KVAGUARD;
- guard_entry->protection = prot;
- guard_entry->max_protection = maxprot;
- guard_entry->inheritance = inherit;
- guard_entry->wired_count = 0;
- guard_entry->advice = advice;
- guard_entry->aref.ar_pageoff = 0;
- guard_entry->aref.ar_amap = NULL;
- uvm_map_entry_link(map, new_entry, guard_entry);
- map->size += PAGE_SIZE;
- kva_guardpages++;
- }
- }
+ if (min < VMMAP_MIN_ADDR)
+ min = VMMAP_MIN_ADDR;
+
+ /*
+ * Configure the allocators.
+ */
+ if (flags & VM_MAP_ISVMSPACE) {
+ /*
+ * Setup hint areas.
+ */
+#if 0 /* Don't use the cool stuff yet. */
+#ifdef __LP64__
+ /* Hinted allocations above 4GB */
+ map->uaddr_any[0] =
+ uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024);
+ /* Hinted allocations below 4GB */
+ map->uaddr_any[1] =
+ uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL,
+ 1024 * 1024 * 1024);
+#else
+ map->uaddr_any[1] =
+ uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max,
+ 1024 * 1024 * 1024);
#endif
- uvm_tree_sanity(map, "map leave");
+#ifdef __i386__
+ map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
+ map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR,
+ max);
+#elif defined(__LP64__)
+ map->uaddr_any[3] =
+ uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
+#else
+ map->uaddr_any[3] = uaddr_pivot_create(min, max);
+#endif
+#else /* Don't use the cool stuff yet. */
+ /*
+ * Use the really crappy stuff at first commit.
+ * Browsers like crappy stuff.
+ */
+ map->uaddr_any[0] = uaddr_rnd_create(min, max);
+#endif
+ map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
+ } else
+ map->uaddr_any[3] = &uaddr_kbootstrap;
- vm_map_unlock(map);
- return (0);
+ /*
+ * Fill map entries.
+ * This requires a write-locked map (because of diagnostic assertions
+ * in insert code).
+ */
+ if ((map->flags & VM_MAP_INTRSAFE) == 0) {
+ if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0)
+ panic("uvm_map_setup: rw_enter failed on new map");
+ }
+ uvm_map_setup_entries(map);
+ uvm_tree_sanity(map, __FILE__, __LINE__);
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ rw_exit(&map->lock);
}
/*
- * uvm_map_lookup_entry: find map entry at or before an address
+ * Destroy the map.
*
- * => map must at least be read-locked by caller
- * => entry is returned in "entry"
- * => return value is true if address is in the returned entry
+ * This is the inverse operation to uvm_map_setup.
*/
-
-boolean_t
-uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
- struct vm_map_entry **entry)
+void
+uvm_map_teardown(struct vm_map *map)
{
- struct vm_map_entry *cur;
- struct vm_map_entry *last;
- int use_tree = 0;
+ struct uvm_map_deadq dead_entries;
+ int i;
+ struct vm_map_entry *entry, *tmp;
+#ifdef VMMAP_DEBUG
+ size_t numq, numt;
+#endif
+
+ if ((map->flags & VM_MAP_INTRSAFE) == 0) {
+ if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0)
+ panic("uvm_map_teardown: rw_enter failed on free map");
+ }
/*
- * start looking either from the head of the
- * list, or from the hint.
+ * Remove address selectors.
*/
+ uvm_addr_destroy(map->uaddr_exe);
+ map->uaddr_exe = NULL;
+ for (i = 0; i < nitems(map->uaddr_any); i++) {
+ uvm_addr_destroy(map->uaddr_any[i]);
+ map->uaddr_any[i] = NULL;
+ }
+ uvm_addr_destroy(map->uaddr_brk_stack);
+ map->uaddr_brk_stack = NULL;
- simple_lock(&map->hint_lock);
- cur = map->hint;
- simple_unlock(&map->hint_lock);
-
- if (cur == &map->header)
- cur = cur->next;
+ /*
+ * Remove entries.
+ *
+ * The following is based on graph breadth-first search.
+ *
+ * In color terms:
+ * - the dead_entries set contains all nodes that are reachable
+ * (i.e. both the black and the grey nodes)
+ * - any entry not in dead_entries is white
+ * - any entry that appears in dead_entries before entry,
+ * is black, the rest is grey.
+ * The set [entry, end] is also referred to as the wavefront.
+ *
+ * Since the tree is always a fully connected graph, the breadth-first
+ * search guarantees that each vmmap_entry is visited exactly once.
+ * The vm_map is broken down in linear time.
+ */
+ TAILQ_INIT(&dead_entries);
+ if ((entry = RB_ROOT(&map->addr)) != NULL)
+ DEAD_ENTRY_PUSH(&dead_entries, entry);
+ while (entry != NULL) {
+ uvm_unmap_kill_entry(map, entry);
+ if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL)
+ DEAD_ENTRY_PUSH(&dead_entries, tmp);
+ if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL)
+ DEAD_ENTRY_PUSH(&dead_entries, tmp);
+ /* Update wave-front. */
+ entry = TAILQ_NEXT(entry, dfree.deadq);
+ }
- if (address >= cur->start) {
- /*
- * go from hint to end of list.
- *
- * but first, make a quick check to see if
- * we are already looking at the entry we
- * want (which is usually the case).
- * note also that we don't need to save the hint
- * here... it is the same hint (unless we are
- * at the header, in which case the hint didn't
- * buy us anything anyway).
- */
- last = &map->header;
- if ((cur != last) && (cur->end > address)) {
- *entry = cur;
- return (TRUE);
- }
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ rw_exit(&map->lock);
+
+#ifdef VMMAP_DEBUG
+ numt = numq = 0;
+ RB_FOREACH(entry, uvm_map_addr, &map->addr)
+ numt++;
+ TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
+ numq++;
+ KASSERT(numt == numq);
+#endif
+ uvm_unmap_detach(&dead_entries, 0);
+ pmap_destroy(map->pmap);
+ map->pmap = NULL;
+}
- if (map->nentries > 30)
- use_tree = 1;
- } else {
- /*
- * go from start to hint, *inclusively*
- */
- last = cur->next;
- cur = map->header.next;
- use_tree = 1;
- }
+/*
+ * Populate map with free-memory entries.
+ *
+ * Map must be initialized and empty.
+ */
+void
+uvm_map_setup_entries(struct vm_map *map)
+{
+ KDASSERT(RB_EMPTY(&map->addr));
- uvm_tree_sanity(map, __func__);
+ uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
+}
- if (use_tree) {
- struct vm_map_entry *prev = &map->header;
- cur = RB_ROOT(&map->rbhead);
+/*
+ * Split entry at given address.
+ *
+ * orig: entry that is to be split.
+ * next: a newly allocated map entry that is not linked.
+ * split: address at which the split is done.
+ */
+void
+uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
+ struct vm_map_entry *next, vaddr_t split)
+{
+ struct uvm_addr_state *free, *free_before;
+ vsize_t adj;
- /*
- * Simple lookup in the tree. Happens when the hint is
- * invalid, or nentries reach a threshold.
- */
- while (cur) {
- if (address >= cur->start) {
- if (address < cur->end) {
- *entry = cur;
- SAVE_HINT(map, map->hint, cur);
- return (TRUE);
- }
- prev = cur;
- cur = RB_RIGHT(cur, rb_entry);
- } else
- cur = RB_LEFT(cur, rb_entry);
- }
- *entry = prev;
- return (FALSE);
+ if ((split & PAGE_MASK) != 0) {
+ panic("uvm_map_splitentry: split address 0x%lx "
+ "not on page boundary!", split);
}
+ KDASSERT(map != NULL && orig != NULL && next != NULL);
+ uvm_tree_sanity(map, __FILE__, __LINE__);
+ KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
+
+#ifdef VMMAP_DEBUG
+ KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig);
+ KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next);
+#endif /* VMMAP_DEBUG */
/*
- * search linearly
+ * Free space will change, unlink from free space tree.
*/
-
- while (cur != last) {
- if (cur->end > address) {
- if (address >= cur->start) {
- /*
- * save this lookup for future
- * hints, and return
- */
-
- *entry = cur;
- SAVE_HINT(map, map->hint, cur);
- return (TRUE);
+ free = uvm_map_uaddr_e(map, orig);
+ if (free)
+ uvm_mapent_free_remove(map, free, orig);
+
+ adj = split - orig->start;
+
+ uvm_mapent_copy(orig, next);
+ if (split >= orig->end) {
+ next->etype = 0;
+ next->offset = 0;
+ next->wired_count = 0;
+ next->start = next->end = split;
+ next->guard = 0;
+ next->fspace = VMMAP_FREE_END(orig) - split;
+ next->aref.ar_amap = NULL;
+ next->aref.ar_pageoff = 0;
+ orig->guard = MIN(orig->guard, split - orig->end);
+ orig->fspace = split - VMMAP_FREE_START(orig);
+ } else {
+ orig->fspace = 0;
+ orig->guard = 0;
+ orig->end = next->start = split;
+
+ if (next->aref.ar_amap)
+ amap_splitref(&orig->aref, &next->aref, adj);
+ if (UVM_ET_ISSUBMAP(orig)) {
+ uvm_map_reference(next->object.sub_map);
+ next->offset += adj;
+ } else if (UVM_ET_ISOBJ(orig)) {
+ if (next->object.uvm_obj->pgops &&
+ next->object.uvm_obj->pgops->pgo_reference) {
+ next->object.uvm_obj->pgops->pgo_reference(
+ next->object.uvm_obj);
}
- break;
+ next->offset += adj;
}
- cur = cur->next;
}
- *entry = cur->prev;
- SAVE_HINT(map, map->hint, *entry);
- return (FALSE);
+ /*
+ * Link next into address tree.
+ * Link orig and next into free-space tree.
+ *
+ * Don't insert 'next' into the addr tree until orig has been linked,
+ * in case the free-list looks at adjecent entries in the addr tree
+ * for its decisions.
+ */
+ if (orig->fspace > 0)
+ free_before = free;
+ else
+ free_before = uvm_map_uaddr_e(map, orig);
+ if (free_before)
+ uvm_mapent_free_insert(map, free_before, orig);
+ uvm_mapent_addr_insert(map, next);
+ if (free)
+ uvm_mapent_free_insert(map, free, next);
+
+ uvm_tree_sanity(map, __FILE__, __LINE__);
}
-/*
- * Checks if address pointed to by phint fits into the empty
- * space before the vm_map_entry after. Takes alignment and
- * offset into consideration.
- */
-int
-uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length,
- struct vm_map_entry *after, voff_t uoffset, vsize_t align)
+#ifdef VMMAP_DEBUG
+
+void
+uvm_tree_assert(struct vm_map *map, int test, char *test_str,
+ char *file, int line)
{
- vaddr_t hint = *phint;
- vaddr_t end;
+ char* map_special;
-#ifdef PMAP_PREFER
- /*
- * push hint forward as needed to avoid VAC alias problems.
- * we only do this if a valid offset is specified.
- */
- if (uoffset != UVM_UNKNOWN_OFFSET)
- hint = PMAP_PREFER(uoffset, hint);
-#endif
- if (align != 0)
- if ((hint & (align - 1)) != 0)
- hint = roundup(hint, align);
- *phint = hint;
+ if (test)
+ return;
- end = hint + length;
- if (end > map->max_offset || end < hint)
- return (FALSE);
- if (after != NULL && after != &map->header && after->start < end)
- return (FALSE);
-
- return (TRUE);
+ if (map == kernel_map)
+ map_special = " (kernel_map)";
+ else if (map == kmem_map)
+ map_special = " (kmem_map)";
+ else
+ map_special = "";
+ panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
+ line, test_str);
}
/*
- * uvm_map_pie: return a random load address for a PIE executable
- * properly aligned.
+ * Check that map is sane.
*/
+void
+uvm_tree_sanity(struct vm_map *map, char *file, int line)
+{
+ struct vm_map_entry *iter;
+ vaddr_t addr;
+ vaddr_t min, max, bound; /* Bounds checker. */
+ struct uvm_addr_state *free;
-#ifndef VM_PIE_MAX_ADDR
-#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
-#endif
+ addr = vm_map_min(map);
+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {
+ /*
+ * Valid start, end.
+ * Catch overflow for end+fspace.
+ */
+ UVM_ASSERT(map, iter->end >= iter->start, file, line);
+ UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
+ /*
+ * May not be empty.
+ */
+ UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
+ file, line);
-#ifndef VM_PIE_MIN_ADDR
-#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
-#endif
+ /*
+ * Addresses for entry must lie within map boundaries.
+ */
+ UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
+ VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
-#ifndef VM_PIE_MIN_ALIGN
-#define VM_PIE_MIN_ALIGN PAGE_SIZE
-#endif
+ /*
+ * Tree may not have gaps.
+ */
+ UVM_ASSERT(map, iter->start == addr, file, line);
+ addr = VMMAP_FREE_END(iter);
-vaddr_t
-uvm_map_pie(vaddr_t align)
+ /*
+ * Free space may not cross boundaries, unless the same
+ * free list is used on both sides of the border.
+ */
+ min = VMMAP_FREE_START(iter);
+ max = VMMAP_FREE_END(iter);
+
+ while (min < max &&
+ (bound = uvm_map_boundary(map, min, max)) != max) {
+ UVM_ASSERT(map,
+ uvm_map_uaddr(map, bound - 1) ==
+ uvm_map_uaddr(map, bound),
+ file, line);
+ min = bound;
+ }
+
+ free = uvm_map_uaddr_e(map, iter);
+ if (free) {
+ UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
+ file, line);
+ } else {
+ UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
+ file, line);
+ }
+ }
+ UVM_ASSERT(map, addr == vm_map_max(map), file, line);
+}
+
+void
+uvm_tree_size_chk(struct vm_map *map, char *file, int line)
{
- vaddr_t addr, space, min;
+ struct vm_map_entry *iter;
+ vsize_t size;
- align = MAX(align, VM_PIE_MIN_ALIGN);
+ size = 0;
+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {
+ if (!UVM_ET_ISHOLE(iter))
+ size += iter->end - iter->start;
+ }
- /* round up to next alignment */
- min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
+ if (map->size != size)
+ printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
+ UVM_ASSERT(map, map->size == size, file, line);
- if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
- return (align);
+ vmspace_validate(map);
+}
- space = (VM_PIE_MAX_ADDR - min) / align;
- space = MIN(space, (u_int32_t)-1);
+/*
+ * This function validates the statistics on vmspace.
+ */
+void
+vmspace_validate(struct vm_map *map)
+{
+ struct vmspace *vm;
+ struct vm_map_entry *iter;
+ vaddr_t imin, imax;
+ vaddr_t stack_begin, stack_end; /* Position of stack. */
+ vsize_t stack, heap; /* Measured sizes. */
- addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
- addr += min;
+ if (!(map->flags & VM_MAP_ISVMSPACE))
+ return;
- return (addr);
+ vm = (struct vmspace *)map;
+ stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
+ stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
+
+ stack = heap = 0;
+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {
+ imin = imax = iter->start;
+
+ if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)
+ continue;
+
+ /*
+ * Update stack, heap.
+ * Keep in mind that (theoretically) the entries of
+ * userspace and stack may be joined.
+ */
+ while (imin != iter->end) {
+ /*
+ * Set imax to the first boundary crossed between
+ * imin and stack addresses.
+ */
+ imax = iter->end;
+ if (imin < stack_begin && imax > stack_begin)
+ imax = stack_begin;
+ else if (imin < stack_end && imax > stack_end)
+ imax = stack_end;
+
+ if (imin >= stack_begin && imin < stack_end)
+ stack += imax - imin;
+ else
+ heap += imax - imin;
+ imin = imax;
+ }
+ }
+
+ heap >>= PAGE_SHIFT;
+ if (heap != vm->vm_dused) {
+ printf("vmspace stack range: 0x%lx-0x%lx\n",
+ stack_begin, stack_end);
+ panic("vmspace_validate: vmspace.vm_dused invalid, "
+ "expected %ld pgs, got %ld pgs in map %p",
+ heap, vm->vm_dused,
+ map);
+ }
}
+#endif /* VMMAP_DEBUG */
+
/*
- * uvm_map_hint: return the beginning of the best area suitable for
- * creating a new mapping with "prot" protection.
+ * uvm_map_init: init mapping system at boot time. note that we allocate
+ * and init the static pool of structs vm_map_entry for the kernel here.
*/
-vaddr_t
-uvm_map_hint1(struct proc *p, vm_prot_t prot, int skipheap)
+void
+uvm_map_init(void)
{
- vaddr_t addr;
+ static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
+ int lcv;
-#ifdef __i386__
/*
- * If executable skip first two pages, otherwise start
- * after data + heap region.
+ * now set up static pool of kernel map entries ...
*/
- if ((prot & VM_PROT_EXECUTE) &&
- ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) {
- addr = (PAGE_SIZE*2) +
- (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
- return (round_page(addr));
+
+ simple_lock_init(&uvm.kentry_lock);
+ uvm.kentry_free = NULL;
+ for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
+ RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
+ uvm.kentry_free;
+ uvm.kentry_free = &kernel_map_entry[lcv];
}
-#endif
- /* start malloc/mmap after the brk */
- addr = (vaddr_t)p->p_vmspace->vm_daddr;
- if (skipheap)
- addr += BRKSIZ;
-#if !defined(__vax__)
- addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1);
-#endif
- return (round_page(addr));
+
+ /*
+ * initialize the map-related pools.
+ */
+ pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
+ 0, 0, 0, "vmsppl", &pool_allocator_nointr);
+ pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
+ 0, 0, 0, "vmmpepl", &pool_allocator_nointr);
+ pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
+ 0, 0, 0, "vmmpekpl", NULL);
+ pool_sethiwat(&uvm_map_entry_pool, 8192);
+
+ uvm_addr_init();
}
+#if defined(DDB)
+
/*
- * uvm_map_findspace: find "length" sized space in "map".
- *
- * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is
- * set (in which case we insist on using "hint").
- * => "result" is VA returned
- * => uobj/uoffset are to be used to handle VAC alignment, if required
- * => if `align' is non-zero, we attempt to align to that value.
- * => caller must at least have read-locked map
- * => returns NULL on failure, or pointer to prev. map entry if success
- * => note this is a cross between the old vm_map_findspace and vm_map_find
+ * DDB hooks
*/
-struct vm_map_entry *
-uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
- vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align,
- int flags)
+/*
+ * uvm_map_printit: actually prints the map
+ */
+void
+uvm_map_printit(struct vm_map *map, boolean_t full,
+ int (*pr)(const char *, ...))
{
- struct vm_map_entry *entry, *next, *tmp;
- struct vm_map_entry *child, *prev = NULL;
- vaddr_t end, orig_hint;
-
- KASSERT((align & (align - 1)) == 0);
- KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
+ struct vmspace *vm;
+ struct vm_map_entry *entry;
+ struct uvm_addr_state *free;
+ int in_free, i;
+ char buf[8];
- uvm_tree_sanity(map, "map_findspace entry");
+ (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
+ (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
+ map->b_start, map->b_end);
+ (*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
+ map->s_start, map->s_end);
+ (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
+ map->size, map->ref_count, map->timestamp,
+ map->flags);
+#ifdef pmap_resident_count
+ (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
+ pmap_resident_count(map->pmap));
+#else
+ /* XXXCDC: this should be required ... */
+ (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
+#endif
/*
- * remember the original hint. if we are aligning, then we
- * may have to try again with no alignment constraint if
- * we fail the first time.
+ * struct vmspace handling.
*/
+ if (map->flags & VM_MAP_ISVMSPACE) {
+ vm = (struct vmspace *)map;
+
+ (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
+ vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
+ (*pr)("\tvm_tsize=%u vm_dsize=%u\n",
+ vm->vm_tsize, vm->vm_dsize);
+ (*pr)("\tvm_taddr=%p vm_daddr=%p\n",
+ vm->vm_taddr, vm->vm_daddr);
+ (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
+ vm->vm_maxsaddr, vm->vm_minsaddr);
+ }
+
+ if (!full)
+ goto print_uaddr;
+ RB_FOREACH(entry, uvm_map_addr, &map->addr) {
+ (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
+ entry, entry->start, entry->end, entry->object.uvm_obj,
+ (long long)entry->offset, entry->aref.ar_amap,
+ entry->aref.ar_pageoff);
+ (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
+ "wc=%d, adv=%d\n",
+ (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
+ (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
+ (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
+ entry->protection, entry->max_protection,
+ entry->inheritance, entry->wired_count, entry->advice);
- orig_hint = hint;
- if (hint < map->min_offset) { /* check ranges ... */
- if (flags & UVM_FLAG_FIXED) {
- return(NULL);
+ free = uvm_map_uaddr_e(map, entry);
+ in_free = (free != NULL);
+ (*pr)("\thole=%c, free=%c, guard=0x%lx, "
+ "free=0x%lx-0x%lx\n",
+ (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
+ in_free ? 'T' : 'F',
+ entry->guard,
+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
+ (*pr)("\tfreemapped=%c, uaddr=%p\n",
+ (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
+ if (free) {
+ (*pr)("\t\t(0x%lx-0x%lx %s)\n",
+ free->uaddr_minaddr, free->uaddr_maxaddr,
+ free->uaddr_functions->uaddr_name);
}
- hint = map->min_offset;
}
- if (hint > map->max_offset) {
- return(NULL);
+
+print_uaddr:
+ uvm_addr_print(map->uaddr_exe, "exe", full, pr);
+ for (i = 0; i < nitems(map->uaddr_any); i++) {
+ snprintf(&buf[0], sizeof(buf), "any[%d]", i);
+ uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
}
+ uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
+}
- /*
- * Look for the first possible address; if there's already
- * something at this address, we have to start after it.
- */
+/*
+ * uvm_object_printit: actually prints the object
+ */
+void
+uvm_object_printit(uobj, full, pr)
+ struct uvm_object *uobj;
+ boolean_t full;
+ int (*pr)(const char *, ...);
+{
+ struct vm_page *pg;
+ int cnt = 0;
- if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) {
- if ((entry = map->first_free) != &map->header)
- hint = entry->end;
- } else {
- if (uvm_map_lookup_entry(map, hint, &tmp)) {
- /* "hint" address already in use ... */
- if (flags & UVM_FLAG_FIXED) {
- return(NULL);
- }
- hint = tmp->end;
+ (*pr)("OBJECT %p: pgops=%p, npages=%d, ",
+ uobj, uobj->pgops, uobj->uo_npages);
+ if (UVM_OBJ_IS_KERN_OBJECT(uobj))
+ (*pr)("refs=<SYSTEM>\n");
+ else
+ (*pr)("refs=%d\n", uobj->uo_refs);
+
+ if (!full) {
+ return;
+ }
+ (*pr)(" PAGES <pg,offset>:\n ");
+ RB_FOREACH(pg, uvm_objtree, &uobj->memt) {
+ (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
+ if ((cnt % 3) == 2) {
+ (*pr)("\n ");
}
- entry = tmp;
+ cnt++;
+ }
+ if ((cnt % 3) != 2) {
+ (*pr)("\n");
}
+}
- if (flags & UVM_FLAG_FIXED) {
- end = hint + length;
- if (end > map->max_offset || end < hint) {
- goto error;
- }
- next = entry->next;
- if (next == &map->header || next->start >= end)
- goto found;
- return(NULL); /* only one shot at it ... */
- }
-
- /* Try to find the space in the red-black tree */
-
- /* Check slot before any entry */
- if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align))
- goto found;
-
- /* If there is not enough space in the whole tree, we fail */
- tmp = RB_ROOT(&map->rbhead);
- if (tmp == NULL || tmp->space < length)
- goto error;
-
- /* Find an entry close to hint that has enough space */
- for (; tmp;) {
- if (tmp->end >= hint &&
- (prev == NULL || tmp->end < prev->end)) {
- if (tmp->ownspace >= length)
- prev = tmp;
- else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL &&
- child->space >= length)
- prev = tmp;
+/*
+ * uvm_page_printit: actually print the page
+ */
+static const char page_flagbits[] =
+ "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
+ "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0"
+ "\31PMAP1\32PMAP2\33PMAP3";
+
+void
+uvm_page_printit(pg, full, pr)
+ struct vm_page *pg;
+ boolean_t full;
+ int (*pr)(const char *, ...);
+{
+ struct vm_page *tpg;
+ struct uvm_object *uobj;
+ struct pglist *pgl;
+
+ (*pr)("PAGE %p:\n", pg);
+ (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
+ pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
+ (long long)pg->phys_addr);
+ (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
+ pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
+#if defined(UVM_PAGE_TRKOWN)
+ if (pg->pg_flags & PG_BUSY)
+ (*pr)(" owning process = %d, tag=%s",
+ pg->owner, pg->owner_tag);
+ else
+ (*pr)(" page not busy, no owner");
+#else
+ (*pr)(" [page ownership tracking disabled]");
+#endif
+#ifdef __HAVE_VM_PAGE_MD
+ (*pr)("\tvm_page_md %p\n", &pg->mdpage);
+#else
+ (*pr)("\n");
+#endif
+
+ if (!full)
+ return;
+
+ /* cross-verify object/anon */
+ if ((pg->pg_flags & PQ_FREE) == 0) {
+ if (pg->pg_flags & PQ_ANON) {
+ if (pg->uanon == NULL || pg->uanon->an_page != pg)
+ (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
+ (pg->uanon) ? pg->uanon->an_page : NULL);
+ else
+ (*pr)(" anon backpointer is OK\n");
+ } else {
+ uobj = pg->uobject;
+ if (uobj) {
+ (*pr)(" checking object list\n");
+ RB_FOREACH(tpg, uvm_objtree, &uobj->memt) {
+ if (tpg == pg) {
+ break;
+ }
+ }
+ if (tpg)
+ (*pr)(" page found on object list\n");
+ else
+ (*pr)(" >>> PAGE NOT FOUND "
+ "ON OBJECT LIST! <<<\n");
+ }
}
- if (tmp->end < hint)
- child = RB_RIGHT(tmp, rb_entry);
- else if (tmp->end > hint)
- child = RB_LEFT(tmp, rb_entry);
- else {
- if (tmp->ownspace >= length)
+ }
+
+ /* cross-verify page queue */
+ if (pg->pg_flags & PQ_FREE) {
+ if (uvm_pmr_isfree(pg))
+ (*pr)(" page found in uvm_pmemrange\n");
+ else
+ (*pr)(" >>> page not found in uvm_pmemrange <<<\n");
+ pgl = NULL;
+ } else if (pg->pg_flags & PQ_INACTIVE) {
+ pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
+ &uvm.page_inactive_swp : &uvm.page_inactive_obj;
+ } else if (pg->pg_flags & PQ_ACTIVE) {
+ pgl = &uvm.page_active;
+ } else {
+ pgl = NULL;
+ }
+
+ if (pgl) {
+ (*pr)(" checking pageq list\n");
+ TAILQ_FOREACH(tpg, pgl, pageq) {
+ if (tpg == pg) {
break;
- child = RB_RIGHT(tmp, rb_entry);
+ }
}
- if (child == NULL || child->space < length)
- break;
- tmp = child;
+ if (tpg)
+ (*pr)(" page found on pageq list\n");
+ else
+ (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
}
-
- if (tmp != NULL && hint < tmp->end + tmp->ownspace) {
- /*
- * Check if the entry that we found satifies the
- * space requirement
- */
- if (hint < tmp->end)
- hint = tmp->end;
- if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset,
- align)) {
- entry = tmp;
- goto found;
- } else if (tmp->ownspace >= length)
- goto listsearch;
- }
- if (prev == NULL)
- goto error;
-
- hint = prev->end;
- if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset,
- align)) {
- entry = prev;
- goto found;
- } else if (prev->ownspace >= length)
- goto listsearch;
-
- tmp = RB_RIGHT(prev, rb_entry);
- for (;;) {
- KASSERT(tmp && tmp->space >= length);
- child = RB_LEFT(tmp, rb_entry);
- if (child && child->space >= length) {
- tmp = child;
+}
+#endif
+
+/*
+ * uvm_map_protect: change map protection
+ *
+ * => set_max means set max_protection.
+ * => map must be unlocked.
+ */
+int
+uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
+ vm_prot_t new_prot, boolean_t set_max)
+{
+ struct vm_map_entry *first, *iter;
+ vm_prot_t old_prot;
+ vm_prot_t mask;
+ int error;
+
+ if (start > end)
+ return EINVAL;
+ start = MAX(start, map->min_offset);
+ end = MIN(end, map->max_offset);
+ if (start >= end)
+ return 0;
+
+ error = 0;
+ vm_map_lock(map);
+
+ /*
+ * Set up first and last.
+ * - first will contain first entry at or after start.
+ */
+ first = uvm_map_entrybyaddr(&map->addr, start);
+ KDASSERT(first != NULL);
+ if (first->end < start)
+ first = RB_NEXT(uvm_map_addr, &map->addr, first);
+
+ /*
+ * First, check for protection violations.
+ */
+ for (iter = first; iter != NULL && iter->start < end;
+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
+ /* Treat memory holes as free space. */
+ if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
continue;
+
+ if (UVM_ET_ISSUBMAP(iter)) {
+ error = EINVAL;
+ goto out;
+ }
+ if ((new_prot & iter->max_protection) != new_prot) {
+ error = EACCES;
+ goto out;
}
- if (tmp->ownspace >= length)
- break;
- tmp = RB_RIGHT(tmp, rb_entry);
- }
-
- hint = tmp->end;
- if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) {
- entry = tmp;
- goto found;
}
- /*
- * The tree fails to find an entry because of offset or alignment
- * restrictions. Search the list instead.
- */
- listsearch:
/*
- * Look through the rest of the map, trying to fit a new region in
- * the gap between existing regions, or after the very last region.
- * note: entry->end = base VA of current gap,
- * next->start = VA of end of current gap
+ * Fix protections.
*/
- for (;; hint = (entry = next)->end) {
+ for (iter = first; iter != NULL && iter->start < end;
+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
+ /* Treat memory holes as free space. */
+ if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
+ continue;
+
+ old_prot = iter->protection;
+
/*
- * Find the end of the proposed new region. Be sure we didn't
- * go beyond the end of the map, or wrap around the address;
- * if so, we lose. Otherwise, if this is the last entry, or
- * if the proposed new region fits before the next entry, we
- * win.
+ * Skip adapting protection iff old and new protection
+ * are equal.
*/
+ if (set_max) {
+ if (old_prot == (new_prot & old_prot) &&
+ iter->max_protection == new_prot)
+ continue;
+ } else {
+ if (old_prot == new_prot)
+ continue;
+ }
+
+ UVM_MAP_CLIP_START(map, iter, start);
+ UVM_MAP_CLIP_END(map, iter, end);
+
+ if (set_max) {
+ iter->max_protection = new_prot;
+ iter->protection &= new_prot;
+ } else
+ iter->protection = new_prot;
-#ifdef PMAP_PREFER
/*
- * push hint forward as needed to avoid VAC alias problems.
- * we only do this if a valid offset is specified.
+ * update physical map if necessary. worry about copy-on-write
+ * here -- CHECK THIS XXX
*/
- if (uoffset != UVM_UNKNOWN_OFFSET)
- hint = PMAP_PREFER(uoffset, hint);
-#endif
- if (align != 0) {
- if ((hint & (align - 1)) != 0)
- hint = roundup(hint, align);
- /*
- * XXX Should we PMAP_PREFER() here again?
- */
+ if (iter->protection != old_prot) {
+ mask = UVM_ET_ISCOPYONWRITE(iter) ?
+ ~VM_PROT_WRITE : VM_PROT_ALL;
+
+ /* update pmap */
+ if ((iter->protection & mask) == PROT_NONE &&
+ VM_MAPENT_ISWIRED(iter)) {
+ /*
+ * TODO(ariane) this is stupid. wired_count
+ * is 0 if not wired, otherwise anything
+ * larger than 0 (incremented once each time
+ * wire is called).
+ * Mostly to be able to undo the damage on
+ * failure. Not the actually be a wired
+ * refcounter...
+ * Originally: iter->wired_count--;
+ * (don't we have to unwire this in the pmap
+ * as well?)
+ */
+ iter->wired_count = 0;
+ }
+ pmap_protect(map->pmap, iter->start, iter->end,
+ iter->protection & mask);
}
- end = hint + length;
- if (end > map->max_offset || end < hint) {
- goto error;
+
+ /*
+ * If the map is configured to lock any future mappings,
+ * wire this entry now if the old protection was VM_PROT_NONE
+ * and the new protection is not VM_PROT_NONE.
+ */
+ if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
+ VM_MAPENT_ISWIRED(iter) == 0 &&
+ old_prot == VM_PROT_NONE &&
+ new_prot != VM_PROT_NONE) {
+ if (uvm_map_pageable(map, iter->start, iter->end,
+ FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
+ /*
+ * If locking the entry fails, remember the
+ * error if it's the first one. Note we
+ * still continue setting the protection in
+ * the map, but it will return the resource
+ * storage condition regardless.
+ *
+ * XXX Ignore what the actual error is,
+ * XXX just call it a resource shortage
+ * XXX so that it doesn't get confused
+ * XXX what uvm_map_protect() itself would
+ * XXX normally return.
+ */
+ error = ENOMEM;
+ }
}
- next = entry->next;
- if (next == &map->header || next->start >= end)
- break;
}
- found:
- SAVE_HINT(map, map->hint, entry);
- *result = hint;
- return (entry);
+ pmap_update(map->pmap);
- error:
- if (align != 0) {
- return (uvm_map_findspace(map, orig_hint,
- length, result, uobj, uoffset, 0, flags));
- }
- return (NULL);
+out:
+ vm_map_unlock(map);
+ return error;
}
/*
- * U N M A P - m a i n e n t r y p o i n t
+ * uvmspace_alloc: allocate a vmspace structure.
+ *
+ * - structure includes vm_map and pmap
+ * - XXX: no locking on this structure
+ * - refcnt set to 1, rest must be init'd by caller
*/
+struct vmspace *
+uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
+ boolean_t remove_holes)
+{
+ struct vmspace *vm;
+
+ vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
+ uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
+ return (vm);
+}
/*
- * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop")
+ * uvmspace_init: initialize a vmspace structure.
*
- * => caller must check alignment and size
- * => map must be unlocked (we will lock it)
+ * - XXX: no locking on this structure
+ * - refcnt set to 1, rest must be init'd by caller
*/
void
-uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p)
+uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
+ boolean_t pageable, boolean_t remove_holes)
{
- vm_map_entry_t dead_entries;
+ if (pmap)
+ pmap_reference(pmap);
+ else
+ pmap = pmap_create();
+ vm->vm_map.pmap = pmap;
- /*
- * work now done by helper functions. wipe the pmap's and then
- * detach from the dead entries...
- */
- vm_map_lock(map);
- uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE);
- vm_map_unlock(map);
+ uvm_map_setup(&vm->vm_map, min, max,
+ (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, 0);
+ vm->vm_refcnt = 1;
+ if (remove_holes)
+ pmap_remove_holes(&vm->vm_map);
}
-
/*
- * U N M A P - m a i n h e l p e r f u n c t i o n s
+ * uvmspace_share: share a vmspace between two processes
+ *
+ * - XXX: no locking on vmspace
+ * - used for vfork and threads
*/
+void
+uvmspace_share(p1, p2)
+ struct proc *p1, *p2;
+{
+ p2->p_vmspace = p1->p_vmspace;
+ p1->p_vmspace->vm_refcnt++;
+}
+
/*
- * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
+ * uvmspace_exec: the process wants to exec a new program
*
- * => caller must check alignment and size
- * => map must be locked by caller
- * => we return a list of map entries that we've remove from the map
- * in "entry_list"
+ * - XXX: no locking on vmspace
*/
void
-uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
- struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes)
+uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
{
- struct vm_map_entry *entry, *first_entry, *next;
- vaddr_t len;
-
- VM_MAP_RANGE_CHECK(map, start, end);
-
- uvm_tree_sanity(map, "unmap_remove entry");
-
- if ((map->flags & VM_MAP_INTRSAFE) == 0)
- splassert(IPL_NONE);
- else
- splassert(IPL_VM);
-
- /*
- * find first entry
- */
- if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
- /* clip and go... */
- entry = first_entry;
- UVM_MAP_CLIP_START(map, entry, start);
- /* critical! prevents stale hint */
- SAVE_HINT(map, entry, entry->prev);
-
- } else {
- entry = first_entry->next;
- }
-
- /*
- * Save the free space hint
- */
+ struct vmspace *nvm, *ovm = p->p_vmspace;
+ struct vm_map *map = &ovm->vm_map;
+ struct uvm_map_deadq dead_entries;
- if (map->first_free->start >= start)
- map->first_free = entry->prev;
+ KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
+ KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
+ (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
- /*
- * note: we now re-use first_entry for a different task. we remove
- * a number of map entries from the map and save them in a linked
- * list headed by "first_entry". once we remove them from the map
- * the caller should unlock the map and drop the references to the
- * backing objects [c.f. uvm_unmap_detach]. the object is to
- * separate unmapping from reference dropping. why?
- * [1] the map has to be locked for unmapping
- * [2] the map need not be locked for reference dropping
- * [3] dropping references may trigger pager I/O, and if we hit
- * a pager that does synchronous I/O we may have to wait for it.
- * [4] we would like all waiting for I/O to occur with maps unlocked
- * so that we don't block other threads.
- */
- first_entry = NULL;
- *entry_list = NULL; /* to be safe */
+ pmap_unuse_final(p); /* before stack addresses go away */
+ TAILQ_INIT(&dead_entries);
/*
- * break up the area into map entry sized regions and unmap. note
- * that all mappings have to be removed before we can even consider
- * dropping references to amaps or VM objects (otherwise we could end
- * up with a mapping to a page on the free list which would be very bad)
+ * see if more than one process is using this vmspace...
*/
- while ((entry != &map->header) && (entry->start < end)) {
-
- UVM_MAP_CLIP_END(map, entry, end);
- next = entry->next;
- len = entry->end - entry->start;
- if (p && entry->object.uvm_obj == NULL)
- p->p_vmspace->vm_dused -= atop(len);
+ if (ovm->vm_refcnt == 1) {
+ /*
+ * if p is the only process using its vmspace then we can safely
+ * recycle that vmspace for the program that is being exec'd.
+ */
+#ifdef SYSVSHM
/*
- * unwire before removing addresses from the pmap; otherwise
- * unwiring will put the entries back into the pmap (XXX).
+ * SYSV SHM semantics require us to kill all segments on an exec
*/
+ if (ovm->vm_shm)
+ shmexit(ovm);
+#endif
- if (VM_MAPENT_ISWIRED(entry))
- uvm_map_entry_unwire(map, entry);
+ /*
+ * POSIX 1003.1b -- "lock future mappings" is revoked
+ * when a process execs another program image.
+ */
+ vm_map_lock(map);
+ vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
/*
- * special case: handle mappings to anonymous kernel objects.
- * we want to free these pages right away...
+ * now unmap the old program
+ *
+ * Instead of attempting to keep the map valid, we simply
+ * nuke all entries and ask uvm_map_setup to reinitialize
+ * the map to the new boundaries.
+ *
+ * uvm_unmap_remove will actually nuke all entries for us
+ * (as in, not replace them with free-memory entries).
*/
-#ifdef KVA_GUARDPAGES
- if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) {
- entry->etype &= ~MAP_ET_KVAGUARD;
- kva_guardpages--;
- } else /* (code continues across line-break) */
-#endif
- if (UVM_ET_ISHOLE(entry)) {
- if (!remove_holes) {
- entry = next;
- continue;
- }
- } else if (map->flags & VM_MAP_INTRSAFE) {
- uvm_km_pgremove_intrsafe(entry->start, entry->end);
- pmap_kremove(entry->start, len);
- } else if (UVM_ET_ISOBJ(entry) &&
- UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
- KASSERT(vm_map_pmap(map) == pmap_kernel());
+ uvm_unmap_remove(map, map->min_offset, map->max_offset,
+ &dead_entries, TRUE, FALSE);
- /*
- * note: kernel object mappings are currently used in
- * two ways:
- * [1] "normal" mappings of pages in the kernel object
- * [2] uvm_km_valloc'd allocations in which we
- * pmap_enter in some non-kernel-object page
- * (e.g. vmapbuf).
- *
- * for case [1], we need to remove the mapping from
- * the pmap and then remove the page from the kernel
- * object (because, once pages in a kernel object are
- * unmapped they are no longer needed, unlike, say,
- * a vnode where you might want the data to persist
- * until flushed out of a queue).
- *
- * for case [2], we need to remove the mapping from
- * the pmap. there shouldn't be any pages at the
- * specified offset in the kernel object [but it
- * doesn't hurt to call uvm_km_pgremove just to be
- * safe?]
- *
- * uvm_km_pgremove currently does the following:
- * for pages in the kernel object in range:
- * - drops the swap slot
- * - uvm_pagefree the page
- *
- * note there is version of uvm_km_pgremove() that
- * is used for "intrsafe" objects.
- */
+ KDASSERT(RB_EMPTY(&map->addr));
- /*
- * remove mappings from pmap and drop the pages
- * from the object. offsets are always relative
- * to vm_map_min(kernel_map).
- */
- pmap_remove(pmap_kernel(), entry->start, entry->end);
- uvm_km_pgremove(entry->object.uvm_obj,
- entry->start - vm_map_min(kernel_map),
- entry->end - vm_map_min(kernel_map));
+ /*
+ * Nuke statistics and boundaries.
+ */
+ bzero(&ovm->vm_startcopy,
+ (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
- /*
- * null out kernel_object reference, we've just
- * dropped it
- */
- entry->etype &= ~UVM_ET_OBJ;
- entry->object.uvm_obj = NULL; /* to be safe */
- } else {
- /*
- * remove mappings the standard way.
- */
- pmap_remove(map->pmap, entry->start, entry->end);
+ if (end & (vaddr_t)PAGE_MASK) {
+ end += 1;
+ if (end == 0) /* overflow */
+ end -= PAGE_SIZE;
}
/*
- * remove entry from map and put it on our list of entries
- * that we've nuked. then go do next entry.
+ * Setup new boundaries and populate map with entries.
*/
- /* critical! prevents stale hint */
- SAVE_HINT(map, entry, entry->prev);
+ map->min_offset = start;
+ map->max_offset = end;
+ uvm_map_setup_entries(map);
+ vm_map_unlock(map);
- uvm_map_entry_unlink(map, entry);
- map->size -= len;
- entry->next = first_entry;
- first_entry = entry;
- entry = next; /* next entry, please */
- }
-#ifdef KVA_GUARDPAGES
- /*
- * entry points at the map-entry after the last-removed map-entry.
- */
- if (map == kernel_map && entry != &map->header &&
- entry->etype & MAP_ET_KVAGUARD && entry->start == end) {
/*
- * Removed range is followed by guard page;
- * remove that guard page now (or it will stay forever).
+ * but keep MMU holes unavailable
*/
- entry->etype &= ~MAP_ET_KVAGUARD;
- kva_guardpages--;
+ pmap_remove_holes(map);
- uvm_map_entry_unlink(map, entry);
- map->size -= len;
- entry->next = first_entry;
- first_entry = entry;
- entry = next; /* next entry, please */
- }
-#endif
- /* if ((map->flags & VM_MAP_DYING) == 0) { */
- pmap_update(vm_map_pmap(map));
- /* } */
+ } else {
+ /*
+ * p's vmspace is being shared, so we can't reuse it for p since
+ * it is still being used for others. allocate a new vmspace
+ * for p
+ */
+ nvm = uvmspace_alloc(start, end,
+ (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
+
+ /*
+ * install new vmspace and drop our ref to the old one.
+ */
- uvm_tree_sanity(map, "unmap_remove leave");
+ pmap_deactivate(p);
+ p->p_vmspace = nvm;
+ pmap_activate(p);
+
+ uvmspace_free(ovm);
+ }
/*
- * now we've cleaned up the map and are ready for the caller to drop
- * references to the mapped objects.
+ * Release dead entries
*/
-
- *entry_list = first_entry;
+ uvm_unmap_detach(&dead_entries, 0);
}
/*
- * uvm_unmap_detach: drop references in a chain of map entries
+ * uvmspace_free: free a vmspace data structure
*
- * => we will free the map entries as we traverse the list.
+ * - XXX: no locking on vmspace
*/
void
-uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
+uvmspace_free(struct vmspace *vm)
{
- struct vm_map_entry *next_entry;
-
- while (first_entry) {
- KASSERT(!VM_MAPENT_ISWIRED(first_entry));
-
- /*
- * drop reference to amap, if we've got one
- */
-
- if (first_entry->aref.ar_amap)
- uvm_map_unreference_amap(first_entry, flags);
-
+ if (--vm->vm_refcnt == 0) {
/*
- * drop reference to our backing object, if we've got one
+ * lock the map, to wait out all other references to it. delete
+ * all of the mappings and pages they hold, then call the pmap
+ * module to reclaim anything left.
*/
+#ifdef SYSVSHM
+ /* Get rid of any SYSV shared memory segments. */
+ if (vm->vm_shm != NULL)
+ shmexit(vm);
+#endif
- if (UVM_ET_ISSUBMAP(first_entry)) {
- /* ... unlikely to happen, but play it safe */
- uvm_map_deallocate(first_entry->object.sub_map);
- } else {
- if (UVM_ET_ISOBJ(first_entry) &&
- first_entry->object.uvm_obj->pgops->pgo_detach)
- first_entry->object.uvm_obj->pgops->
- pgo_detach(first_entry->object.uvm_obj);
- }
-
- next_entry = first_entry->next;
- uvm_mapent_free(first_entry);
- first_entry = next_entry;
+ uvm_map_teardown(&vm->vm_map);
+ pool_put(&uvm_vmspace_pool, vm);
}
}
/*
- * E X T R A C T I O N F U N C T I O N S
- */
-
-/*
- * uvm_map_reserve: reserve space in a vm_map for future use.
+ * Clone map entry into other map.
*
- * => we reserve space in a map by putting a dummy map entry in the
- * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
- * => map should be unlocked (we will write lock it)
- * => we return true if we were able to reserve space
- * => XXXCDC: should be inline?
+ * Mapping will be placed at dstaddr, for the same length.
+ * Space must be available.
+ * Reference counters are incremented.
*/
-
-int
-uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset,
- vsize_t align, vaddr_t *raddr)
+struct vm_map_entry*
+uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
+ vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
+ int mapent_flags, int amap_share_flags)
{
+ struct vm_map_entry *new_entry, *first, *last;
- size = round_page(size);
- if (*raddr < vm_map_min(map))
- *raddr = vm_map_min(map); /* hint */
+ KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
/*
- * reserve some virtual space.
+ * Create new entry (linked in on creation).
+ * Fill in first, last.
*/
+ first = last = NULL;
+ if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
+ panic("uvmspace_fork: no space in map for "
+ "entry in empty map");
+ }
+ new_entry = uvm_map_mkentry(dstmap, first, last,
+ dstaddr, dstlen, mapent_flags, dead);
+ if (new_entry == NULL)
+ return NULL;
+ /* old_entry -> new_entry */
+ new_entry->object = old_entry->object;
+ new_entry->offset = old_entry->offset;
+ new_entry->aref = old_entry->aref;
+ new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
+ new_entry->protection = old_entry->protection;
+ new_entry->max_protection = old_entry->max_protection;
+ new_entry->inheritance = old_entry->inheritance;
+ new_entry->advice = old_entry->advice;
- if (uvm_map(map, raddr, size, NULL, offset, 0,
- UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
- UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
- return (FALSE);
- }
+ /*
+ * gain reference to object backing the map (can't
+ * be a submap).
+ */
+ if (new_entry->aref.ar_amap) {
+ new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
+ amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
+ (new_entry->end - new_entry->start) >> PAGE_SHIFT,
+ amap_share_flags);
+ }
+
+ if (UVM_ET_ISOBJ(new_entry) &&
+ new_entry->object.uvm_obj->pgops->pgo_reference) {
+ new_entry->offset += off;
+ new_entry->object.uvm_obj->pgops->pgo_reference
+ (new_entry->object.uvm_obj);
+ }
- return (TRUE);
+ return new_entry;
}
/*
- * uvm_map_replace: replace a reserved (blank) area of memory with
- * real mappings.
- *
- * => caller must WRITE-LOCK the map
- * => we return TRUE if replacement was a success
- * => we expect the newents chain to have nnewents entries on it and
- * we expect newents->prev to point to the last entry on the list
- * => note newents is allowed to be NULL
+ * share the mapping: this means we want the old and
+ * new entries to share amaps and backing objects.
*/
-
-int
-uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
- struct vm_map_entry *newents, int nnewents)
+void
+uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
+ struct vm_map *old_map,
+ struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
{
- struct vm_map_entry *oldent, *last;
-
- uvm_tree_sanity(map, "map_replace entry");
-
- /*
- * first find the blank map entry at the specified address
- */
-
- if (!uvm_map_lookup_entry(map, start, &oldent)) {
- return(FALSE);
- }
+ struct vm_map_entry *new_entry;
/*
- * check to make sure we have a proper blank entry
+ * if the old_entry needs a new amap (due to prev fork)
+ * then we need to allocate it now so that we have
+ * something we own to share with the new_entry. [in
+ * other words, we need to clear needs_copy]
*/
- if (oldent->start != start || oldent->end != end ||
- oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
- return (FALSE);
+ if (UVM_ET_ISNEEDSCOPY(old_entry)) {
+ /* get our own amap, clears needs_copy */
+ amap_copy(old_map, old_entry, M_WAITOK, FALSE,
+ 0, 0);
+ /* XXXCDC: WAITOK??? */
}
-#ifdef DIAGNOSTIC
- /*
- * sanity check the newents chain
- */
- {
- struct vm_map_entry *tmpent = newents;
- int nent = 0;
- vaddr_t cur = start;
-
- while (tmpent) {
- nent++;
- if (tmpent->start < cur)
- panic("uvm_map_replace1");
- if (tmpent->start > tmpent->end || tmpent->end > end) {
- printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
- tmpent->start, tmpent->end, end);
- panic("uvm_map_replace2");
- }
- cur = tmpent->end;
- if (tmpent->next) {
- if (tmpent->next->prev != tmpent)
- panic("uvm_map_replace3");
- } else {
- if (newents->prev != tmpent)
- panic("uvm_map_replace4");
- }
- tmpent = tmpent->next;
- }
- if (nent != nnewents)
- panic("uvm_map_replace5");
- }
-#endif
+ new_entry = uvm_mapent_clone(new_map, old_entry->start,
+ old_entry->end - old_entry->start, 0, old_entry,
+ dead, 0, AMAP_SHARED);
- /*
- * map entry is a valid blank! replace it. (this does all the
- * work of map entry link/unlink...).
+ /*
+ * pmap_copy the mappings: this routine is optional
+ * but if it is there it will reduce the number of
+ * page faults in the new proc.
*/
-
- if (newents) {
- last = newents->prev; /* we expect this */
-
- /* critical: flush stale hints out of map */
- SAVE_HINT(map, map->hint, newents);
- if (map->first_free == oldent)
- map->first_free = last;
-
- last->next = oldent->next;
- last->next->prev = last;
-
- /* Fix RB tree */
- uvm_rb_remove(map, oldent);
-
- newents->prev = oldent->prev;
- newents->prev->next = newents;
- map->nentries = map->nentries + (nnewents - 1);
-
- /* Fixup the RB tree */
- {
- int i;
- struct vm_map_entry *tmp;
-
- tmp = newents;
- for (i = 0; i < nnewents && tmp; i++) {
- uvm_rb_insert(map, tmp);
- tmp = tmp->next;
- }
- }
- } else {
-
- /* critical: flush stale hints out of map */
- SAVE_HINT(map, map->hint, oldent->prev);
- if (map->first_free == oldent)
- map->first_free = oldent->prev;
-
- /* NULL list of new entries: just remove the old one */
- uvm_map_entry_unlink(map, oldent);
- }
-
-
- uvm_tree_sanity(map, "map_replace leave");
+ pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
+ (new_entry->end - new_entry->start), new_entry->start);
/*
- * now we can free the old blank entry, unlock the map and return.
+ * Update process statistics.
*/
-
- uvm_mapent_free(oldent);
- return(TRUE);
+ if (!UVM_ET_ISHOLE(new_entry))
+ new_map->size += new_entry->end - new_entry->start;
+ if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
+ new_vm->vm_dused +=
+ uvmspace_dused(new_map, new_entry->start, new_entry->end);
+ }
}
/*
- * uvm_map_extract: extract a mapping from a map and put it somewhere
- * (maybe removing the old mapping)
+ * copy-on-write the mapping (using mmap's
+ * MAP_PRIVATE semantics)
*
- * => maps should be unlocked (we will write lock them)
- * => returns 0 on success, error code otherwise
- * => start must be page aligned
- * => len must be page sized
- * => flags:
- * UVM_EXTRACT_REMOVE: remove mappings from srcmap
- * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
- * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
- * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
- * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
- * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
- * be used from within the kernel in a kernel level map <<<
+ * allocate new_entry, adjust reference counts.
+ * (note that new references are read-only).
*/
-
-int
-uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
- struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
+void
+uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
+ struct vm_map *old_map,
+ struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
{
- vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
- oldstart;
- struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry;
- struct vm_map_entry *deadentry, *oldentry;
- vsize_t elen;
- int nchain, error, copy_ok;
+ struct vm_map_entry *new_entry;
+ boolean_t protect_child;
- uvm_tree_sanity(srcmap, "map_extract src enter");
- uvm_tree_sanity(dstmap, "map_extract dst enter");
-
- /*
- * step 0: sanity check: start must be on a page boundary, length
- * must be page sized. can't ask for CONTIG/QREF if you asked for
- * REMOVE.
- */
+ new_entry = uvm_mapent_clone(new_map, old_entry->start,
+ old_entry->end - old_entry->start, 0, old_entry,
+ dead, 0, 0);
- KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
- KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
- (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
+ new_entry->etype |=
+ (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
/*
- * step 1: reserve space in the target map for the extracted area
+ * the new entry will need an amap. it will either
+ * need to be copied from the old entry or created
+ * from scratch (if the old entry does not have an
+ * amap). can we defer this process until later
+ * (by setting "needs_copy") or do we need to copy
+ * the amap now?
+ *
+ * we must copy the amap now if any of the following
+ * conditions hold:
+ * 1. the old entry has an amap and that amap is
+ * being shared. this means that the old (parent)
+ * process is sharing the amap with another
+ * process. if we do not clear needs_copy here
+ * we will end up in a situation where both the
+ * parent and child process are referring to the
+ * same amap with "needs_copy" set. if the
+ * parent write-faults, the fault routine will
+ * clear "needs_copy" in the parent by allocating
+ * a new amap. this is wrong because the
+ * parent is supposed to be sharing the old amap
+ * and the new amap will break that.
+ *
+ * 2. if the old entry has an amap and a non-zero
+ * wire count then we are going to have to call
+ * amap_cow_now to avoid page faults in the
+ * parent process. since amap_cow_now requires
+ * "needs_copy" to be clear we might as well
+ * clear it here as well.
+ *
*/
- dstaddr = vm_map_min(dstmap);
- if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE)
- return(ENOMEM);
- *dstaddrp = dstaddr; /* pass address back to caller */
+ if (old_entry->aref.ar_amap != NULL &&
+ ((amap_flags(old_entry->aref.ar_amap) &
+ AMAP_SHARED) != 0 ||
+ VM_MAPENT_ISWIRED(old_entry))) {
+ amap_copy(new_map, new_entry, M_WAITOK, FALSE,
+ 0, 0);
+ /* XXXCDC: M_WAITOK ... ok? */
+ }
/*
- * step 2: setup for the extraction process loop by init'ing the
- * map entry chain, locking src map, and looking up the first useful
- * entry in the map.
+ * if the parent's entry is wired down, then the
+ * parent process does not want page faults on
+ * access to that memory. this means that we
+ * cannot do copy-on-write because we can't write
+ * protect the old entry. in this case we
+ * resolve all copy-on-write faults now, using
+ * amap_cow_now. note that we have already
+ * allocated any needed amap (above).
*/
- end = start + len;
- newend = dstaddr + len;
- chain = endchain = NULL;
- nchain = 0;
- vm_map_lock(srcmap);
+ if (VM_MAPENT_ISWIRED(old_entry)) {
- if (uvm_map_lookup_entry(srcmap, start, &entry)) {
+ /*
+ * resolve all copy-on-write faults now
+ * (note that there is nothing to do if
+ * the old mapping does not have an amap).
+ * XXX: is it worthwhile to bother with
+ * pmap_copy in this case?
+ */
+ if (old_entry->aref.ar_amap)
+ amap_cow_now(new_map, new_entry);
- /* "start" is within an entry */
- if (flags & UVM_EXTRACT_QREF) {
+ } else {
+ if (old_entry->aref.ar_amap) {
/*
- * for quick references we don't clip the entry, so
- * the entry may map space "before" the starting
- * virtual address... this is the "fudge" factor
- * (which can be non-zero only the first time
- * through the "while" loop in step 3).
+ * setup mappings to trigger copy-on-write faults
+ * we must write-protect the parent if it has
+ * an amap and it is not already "needs_copy"...
+ * if it is already "needs_copy" then the parent
+ * has already been write-protected by a previous
+ * fork operation.
+ *
+ * if we do not write-protect the parent, then
+ * we must be sure to write-protect the child
+ * after the pmap_copy() operation.
+ *
+ * XXX: pmap_copy should have some way of telling
+ * us that it didn't do anything so we can avoid
+ * calling pmap_protect needlessly.
*/
+ if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
+ if (old_entry->max_protection &
+ VM_PROT_WRITE) {
+ pmap_protect(old_map->pmap,
+ old_entry->start,
+ old_entry->end,
+ old_entry->protection &
+ ~VM_PROT_WRITE);
+ pmap_update(old_map->pmap);
+ }
+ old_entry->etype |= UVM_ET_NEEDSCOPY;
+ }
- fudge = start - entry->start;
+ /*
+ * parent must now be write-protected
+ */
+ protect_child = FALSE;
} else {
/*
- * normal reference: we clip the map to fit (thus
- * fudge is zero)
+ * we only need to protect the child if the
+ * parent has write access.
*/
-
- UVM_MAP_CLIP_START(srcmap, entry, start);
- SAVE_HINT(srcmap, srcmap->hint, entry->prev);
- fudge = 0;
- }
- } else {
-
- /* "start" is not within an entry ... skip to next entry */
- if (flags & UVM_EXTRACT_CONTIG) {
- error = EINVAL;
- goto bad; /* definite hole here ... */
- }
-
- entry = entry->next;
- fudge = 0;
- }
-
- /* save values from srcmap for step 6 */
- orig_entry = entry;
- orig_fudge = fudge;
-
- /*
- * step 3: now start looping through the map entries, extracting
- * as we go.
- */
-
- while (entry->start < end && entry != &srcmap->header) {
-
- /* if we are not doing a quick reference, clip it */
- if ((flags & UVM_EXTRACT_QREF) == 0)
- UVM_MAP_CLIP_END(srcmap, entry, end);
-
- /* clear needs_copy (allow chunking) */
- if (UVM_ET_ISNEEDSCOPY(entry)) {
- if (fudge)
- oldstart = entry->start;
+ if (old_entry->max_protection & VM_PROT_WRITE)
+ protect_child = TRUE;
else
- oldstart = 0; /* XXX: gcc */
- amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
- if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */
- error = ENOMEM;
- goto bad;
- }
+ protect_child = FALSE;
- /* amap_copy could clip (during chunk)! update fudge */
- if (fudge) {
- fudge = fudge - (entry->start - oldstart);
- orig_fudge = fudge;
- }
- }
-
- /* calculate the offset of this from "start" */
- oldoffset = (entry->start + fudge) - start;
-
- /* allocate a new map entry */
- newentry = uvm_mapent_alloc(dstmap, flags);
- if (newentry == NULL) {
- error = ENOMEM;
- goto bad;
}
- /* set up new map entry */
- newentry->next = NULL;
- newentry->prev = endchain;
- newentry->start = dstaddr + oldoffset;
- newentry->end =
- newentry->start + (entry->end - (entry->start + fudge));
- if (newentry->end > newend || newentry->end < newentry->start)
- newentry->end = newend;
- newentry->object.uvm_obj = entry->object.uvm_obj;
- if (newentry->object.uvm_obj) {
- if (newentry->object.uvm_obj->pgops->pgo_reference)
- newentry->object.uvm_obj->pgops->
- pgo_reference(newentry->object.uvm_obj);
- newentry->offset = entry->offset + fudge;
- } else {
- newentry->offset = 0;
- }
- newentry->etype = entry->etype;
- newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
- entry->max_protection : entry->protection;
- newentry->max_protection = entry->max_protection;
- newentry->inheritance = entry->inheritance;
- newentry->wired_count = 0;
- newentry->aref.ar_amap = entry->aref.ar_amap;
- if (newentry->aref.ar_amap) {
- newentry->aref.ar_pageoff =
- entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
- uvm_map_reference_amap(newentry, AMAP_SHARED |
- ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
- } else {
- newentry->aref.ar_pageoff = 0;
- }
- newentry->advice = entry->advice;
+ /*
+ * copy the mappings
+ * XXX: need a way to tell if this does anything
+ */
- /* now link it on the chain */
- nchain++;
- if (endchain == NULL) {
- chain = endchain = newentry;
- } else {
- endchain->next = newentry;
- endchain = newentry;
- }
+ pmap_copy(new_map->pmap, old_map->pmap,
+ new_entry->start,
+ (old_entry->end - old_entry->start),
+ old_entry->start);
- /* end of 'while' loop! */
- if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
- (entry->next == &srcmap->header ||
- entry->next->start != entry->end)) {
- error = EINVAL;
- goto bad;
+ /*
+ * protect the child's mappings if necessary
+ */
+ if (protect_child) {
+ pmap_protect(new_map->pmap, new_entry->start,
+ new_entry->end,
+ new_entry->protection &
+ ~VM_PROT_WRITE);
}
- entry = entry->next;
- fudge = 0;
}
/*
- * step 4: close off chain (in format expected by uvm_map_replace)
+ * Update process statistics.
*/
+ if (!UVM_ET_ISHOLE(new_entry))
+ new_map->size += new_entry->end - new_entry->start;
+ if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
+ new_vm->vm_dused +=
+ uvmspace_dused(new_map, new_entry->start, new_entry->end);
+ }
+}
- if (chain)
- chain->prev = endchain;
+/*
+ * uvmspace_fork: fork a process' main map
+ *
+ * => create a new vmspace for child process from parent.
+ * => parent's map must not be locked.
+ */
+struct vmspace *
+uvmspace_fork(struct vmspace *vm1)
+{
+ struct vmspace *vm2;
+ struct vm_map *old_map = &vm1->vm_map;
+ struct vm_map *new_map;
+ struct vm_map_entry *old_entry;
+ struct uvm_map_deadq dead;
- /*
- * step 5: attempt to lock the dest map so we can pmap_copy.
- * note usage of copy_ok:
- * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
- * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
- */
+ vm_map_lock(old_map);
- if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
- copy_ok = 1;
- if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
- nchain)) {
- if (srcmap != dstmap)
- vm_map_unlock(dstmap);
- error = EIO;
- goto bad;
- }
- } else {
- copy_ok = 0;
- /* replace defered until step 7 */
- }
+ vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
+ (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
+ memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
+ (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
+ vm2->vm_dused = 0; /* Statistic managed by us. */
+ new_map = &vm2->vm_map;
+ vm_map_lock(new_map);
/*
- * step 6: traverse the srcmap a second time to do the following:
- * - if we got a lock on the dstmap do pmap_copy
- * - if UVM_EXTRACT_REMOVE remove the entries
- * we make use of orig_entry and orig_fudge (saved in step 2)
+ * go entry-by-entry
*/
- if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
+ TAILQ_INIT(&dead);
+ RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
+ if (old_entry->start == old_entry->end)
+ continue;
- /* purge possible stale hints from srcmap */
- if (flags & UVM_EXTRACT_REMOVE) {
- SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
- if (srcmap->first_free->start >= start)
- srcmap->first_free = orig_entry->prev;
+ /*
+ * first, some sanity checks on the old entry
+ */
+ if (UVM_ET_ISSUBMAP(old_entry)) {
+ panic("fork: encountered a submap during fork "
+ "(illegal)");
}
- entry = orig_entry;
- fudge = orig_fudge;
- deadentry = NULL; /* for UVM_EXTRACT_REMOVE */
-
- while (entry->start < end && entry != &srcmap->header) {
- if (copy_ok) {
- oldoffset = (entry->start + fudge) - start;
- elen = MIN(end, entry->end) -
- (entry->start + fudge);
- pmap_copy(dstmap->pmap, srcmap->pmap,
- dstaddr + oldoffset, elen,
- entry->start + fudge);
- }
-
- /* we advance "entry" in the following if statement */
- if (flags & UVM_EXTRACT_REMOVE) {
- pmap_remove(srcmap->pmap, entry->start,
- entry->end);
- oldentry = entry; /* save entry */
- entry = entry->next; /* advance */
- uvm_map_entry_unlink(srcmap, oldentry);
- /* add to dead list */
- oldentry->next = deadentry;
- deadentry = oldentry;
- } else {
- entry = entry->next; /* advance */
- }
-
- /* end of 'while' loop */
- fudge = 0;
+ if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
+ UVM_ET_ISNEEDSCOPY(old_entry)) {
+ panic("fork: non-copy_on_write map entry marked "
+ "needs_copy (illegal)");
}
- pmap_update(srcmap->pmap);
/*
- * unlock dstmap. we will dispose of deadentry in
- * step 7 if needed
+ * Apply inheritance.
*/
-
- if (copy_ok && srcmap != dstmap)
- vm_map_unlock(dstmap);
-
+ if (old_entry->inheritance == MAP_INHERIT_SHARE) {
+ uvm_mapent_forkshared(vm2, new_map,
+ old_map, old_entry, &dead);
+ }
+ if (old_entry->inheritance == MAP_INHERIT_COPY) {
+ uvm_mapent_forkcopy(vm2, new_map,
+ old_map, old_entry, &dead);
+ }
}
- else
- deadentry = NULL; /* XXX: gcc */
+
+ vm_map_unlock(old_map);
+ vm_map_unlock(new_map);
/*
- * step 7: we are done with the source map, unlock. if copy_ok
- * is 0 then we have not replaced the dummy mapping in dstmap yet
- * and we need to do so now.
+ * This can actually happen, if multiple entries described a
+ * space in which an entry was inherited.
*/
+ uvm_unmap_detach(&dead, 0);
- vm_map_unlock(srcmap);
- if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
- uvm_unmap_detach(deadentry, 0); /* dispose of old entries */
-
- /* now do the replacement if we didn't do it in step 5 */
- if (copy_ok == 0) {
- vm_map_lock(dstmap);
- error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
- nchain);
- vm_map_unlock(dstmap);
-
- if (error == FALSE) {
- error = EIO;
- goto bad2;
- }
- }
+#ifdef SYSVSHM
+ if (vm1->vm_shm)
+ shmfork(vm1, vm2);
+#endif
- uvm_tree_sanity(srcmap, "map_extract src leave");
- uvm_tree_sanity(dstmap, "map_extract dst leave");
+#ifdef PMAP_FORK
+ pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
+#endif
- return(0);
+ return vm2;
+}
+
+/*
+ * uvm_map_hint: return the beginning of the best area suitable for
+ * creating a new mapping with "prot" protection.
+ */
+vaddr_t
+uvm_map_hint(struct vmspace *vm, vm_prot_t prot)
+{
+ vaddr_t addr;
+#ifdef __i386__
/*
- * bad: failure recovery
+ * If executable skip first two pages, otherwise start
+ * after data + heap region.
*/
-bad:
- vm_map_unlock(srcmap);
-bad2: /* src already unlocked */
- if (chain)
- uvm_unmap_detach(chain,
- (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
-
- uvm_tree_sanity(srcmap, "map_extract src err leave");
- uvm_tree_sanity(dstmap, "map_extract dst err leave");
-
- uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */
- return(error);
+ if ((prot & VM_PROT_EXECUTE) != 0 &&
+ (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
+ addr = (PAGE_SIZE*2) +
+ (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
+ return (round_page(addr));
+ }
+#endif
+ /* start malloc/mmap after the brk */
+ addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
+#if !defined(__vax__)
+ addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1);
+#endif
+ return (round_page(addr));
}
-/* end of extraction functions */
-
/*
* uvm_map_submap: punch down part of a map into a submap
*
@@ -2202,7 +3651,6 @@ bad2: /* src already unlocked */
* => submap must have been init'd and have a zero reference count.
* [need not be locked as we don't actually reference it]
*/
-
int
uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
struct vm_map *submap)
@@ -2210,16 +3658,17 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
struct vm_map_entry *entry;
int result;
- vm_map_lock(map);
+ if (start > map->max_offset || end > map->max_offset ||
+ start < map->min_offset || end < map->min_offset)
+ return EINVAL;
- VM_MAP_RANGE_CHECK(map, start, end);
+ vm_map_lock(map);
if (uvm_map_lookup_entry(map, start, &entry)) {
UVM_MAP_CLIP_START(map, entry, start);
- UVM_MAP_CLIP_END(map, entry, end); /* to be safe */
- } else {
+ UVM_MAP_CLIP_END(map, entry, end);
+ } else
entry = NULL;
- }
if (entry != NULL &&
entry->start == start && entry->end == end &&
@@ -2230,129 +3679,101 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
entry->offset = 0;
uvm_map_reference(submap);
result = 0;
- } else {
+ } else
result = EINVAL;
- }
+
vm_map_unlock(map);
return(result);
}
-
/*
- * uvm_map_protect: change map protection
+ * uvm_map_checkprot: check protection in map
*
- * => set_max means set max_protection.
- * => map must be unlocked.
+ * => must allow specific protection in a fully allocated region.
+ * => map mut be read or write locked by caller.
*/
-
-#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
- ~VM_PROT_WRITE : VM_PROT_ALL)
-#define max(a,b) ((a) > (b) ? (a) : (b))
-
-int
-uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
- vm_prot_t new_prot, boolean_t set_max)
+boolean_t
+uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
+ vm_prot_t protection)
{
- struct vm_map_entry *current, *entry;
- int error = 0;
-
- vm_map_lock(map);
-
- VM_MAP_RANGE_CHECK(map, start, end);
+ struct vm_map_entry *entry;
- if (uvm_map_lookup_entry(map, start, &entry)) {
- UVM_MAP_CLIP_START(map, entry, start);
- } else {
- entry = entry->next;
- }
+ if (start < map->min_offset || end > map->max_offset || start > end)
+ return FALSE;
+ if (start == end)
+ return TRUE;
/*
- * make a first pass to check for protection violations.
+ * Iterate entries.
*/
-
- current = entry;
- while ((current != &map->header) && (current->start < end)) {
- if (UVM_ET_ISSUBMAP(current)) {
- error = EINVAL;
- goto out;
- }
- if ((new_prot & current->max_protection) != new_prot) {
- error = EACCES;
- goto out;
- }
- current = current->next;
- }
-
- /* go back and fix up protections (no need to clip this time). */
-
- current = entry;
-
- while ((current != &map->header) && (current->start < end)) {
- vm_prot_t old_prot;
-
- UVM_MAP_CLIP_END(map, current, end);
-
- old_prot = current->protection;
- if (set_max)
- current->protection =
- (current->max_protection = new_prot) & old_prot;
- else
- current->protection = new_prot;
-
+ for (entry = uvm_map_entrybyaddr(&map->addr, start);
+ entry != NULL && entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
/*
- * update physical map if necessary. worry about copy-on-write
- * here -- CHECK THIS XXX
+ * Fail if a hole is found.
*/
-
- if (current->protection != old_prot) {
- /* update pmap! */
- if ((current->protection & MASK(entry)) == PROT_NONE &&
- VM_MAPENT_ISWIRED(entry))
- current->wired_count--;
- pmap_protect(map->pmap, current->start, current->end,
- current->protection & MASK(entry));
- }
+ if (UVM_ET_ISHOLE(entry) ||
+ (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
+ return FALSE;
/*
- * If the map is configured to lock any future mappings,
- * wire this entry now if the old protection was VM_PROT_NONE
- * and the new protection is not VM_PROT_NONE.
+ * Check protection.
*/
+ if ((entry->protection & protection) != protection)
+ return FALSE;
+ }
+ return TRUE;
+}
- if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
- VM_MAPENT_ISWIRED(entry) == 0 &&
- old_prot == VM_PROT_NONE &&
- new_prot != VM_PROT_NONE) {
- if (uvm_map_pageable(map, entry->start, entry->end,
- FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
- /*
- * If locking the entry fails, remember the
- * error if it's the first one. Note we
- * still continue setting the protection in
- * the map, but will return the resource
- * shortage condition regardless.
- *
- * XXX Ignore what the actual error is,
- * XXX just call it a resource shortage
- * XXX so that it doesn't get confused
- * XXX what uvm_map_protect() itself would
- * XXX normally return.
- */
- error = ENOMEM;
- }
- }
+/*
+ * uvm_map_create: create map
+ */
+vm_map_t
+uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
+{
+ vm_map_t result;
+
+ result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);
+ result->pmap = pmap;
+ uvm_map_setup(result, min, max, flags);
+ return(result);
+}
+
+/*
+ * uvm_map_deallocate: drop reference to a map
+ *
+ * => caller must not lock map
+ * => we will zap map if ref count goes to zero
+ */
+void
+uvm_map_deallocate(vm_map_t map)
+{
+ int c;
+ struct uvm_map_deadq dead;
- current = current->next;
+ simple_lock(&map->ref_lock);
+ c = --map->ref_count;
+ simple_unlock(&map->ref_lock);
+ if (c > 0) {
+ return;
}
- pmap_update(map->pmap);
- out:
- vm_map_unlock(map);
- return (error);
-}
+ /*
+ * all references gone. unmap and free.
+ *
+ * No lock required: we are only one to access this map.
+ */
-#undef max
-#undef MASK
+ TAILQ_INIT(&dead);
+ uvm_tree_sanity(map, __FILE__, __LINE__);
+ uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
+ TRUE, FALSE);
+ pmap_destroy(map->pmap);
+ KASSERT(RB_EMPTY(&map->addr));
+ free(map, M_VMMAP);
+
+ uvm_unmap_detach(&dead, 0);
+}
/*
* uvm_map_inherit: set inheritance code for range of addrs in map.
@@ -2361,7 +3782,6 @@ uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
* => note that the inherit code is used during a "fork". see fork
* code for details.
*/
-
int
uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
vm_inherit_t new_inheritance)
@@ -2377,20 +3797,25 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
return (EINVAL);
}
+ if (start > end)
+ return EINVAL;
+ start = MAX(start, map->min_offset);
+ end = MIN(end, map->max_offset);
+ if (start >= end)
+ return 0;
+
vm_map_lock(map);
-
- VM_MAP_RANGE_CHECK(map, start, end);
-
- if (uvm_map_lookup_entry(map, start, &entry)) {
+
+ entry = uvm_map_entrybyaddr(&map->addr, start);
+ if (entry->end > start)
UVM_MAP_CLIP_START(map, entry, start);
- } else {
- entry = entry->next;
- }
+ else
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
- while ((entry != &map->header) && (entry->start < end)) {
+ while (entry != NULL && entry->start < end) {
UVM_MAP_CLIP_END(map, entry, end);
entry->inheritance = new_inheritance;
- entry = entry->next;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
}
vm_map_unlock(map);
@@ -2402,7 +3827,6 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
*
* => map must be unlocked
*/
-
int
uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
{
@@ -2412,29 +3836,34 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
case MADV_NORMAL:
case MADV_RANDOM:
case MADV_SEQUENTIAL:
- /* nothing special here */
break;
-
default:
return (EINVAL);
}
+
+ if (start > end)
+ return EINVAL;
+ start = MAX(start, map->min_offset);
+ end = MIN(end, map->max_offset);
+ if (start >= end)
+ return 0;
+
vm_map_lock(map);
- VM_MAP_RANGE_CHECK(map, start, end);
- if (uvm_map_lookup_entry(map, start, &entry)) {
+
+ entry = uvm_map_entrybyaddr(&map->addr, start);
+ if (entry != NULL && entry->end > start)
UVM_MAP_CLIP_START(map, entry, start);
- } else {
- entry = entry->next;
- }
+ else if (entry!= NULL)
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
/*
* XXXJRT: disallow holes?
*/
- while ((entry != &map->header) && (entry->start < end)) {
+ while (entry != NULL && entry->start < end) {
UVM_MAP_CLIP_END(map, entry, end);
-
entry->advice = new_advice;
- entry = entry->next;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
}
vm_map_unlock(map);
@@ -2442,454 +3871,178 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
}
/*
- * uvm_map_pageable: sets the pageability of a range in a map.
+ * uvm_map_extract: extract a mapping from a map and put it somewhere
+ * in the kernel_map, setting protection to max_prot.
*
- * => wires map entries. should not be used for transient page locking.
- * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
- * => regions sepcified as not pageable require lock-down (wired) memory
- * and page tables.
- * => map must never be read-locked
- * => if islocked is TRUE, map is already write-locked
- * => we always unlock the map, since we must downgrade to a read-lock
- * to call uvm_fault_wire()
- * => XXXCDC: check this and try and clean it up.
+ * => map should be unlocked (we will write lock it and kernel_map)
+ * => returns 0 on success, error code otherwise
+ * => start must be page aligned
+ * => len must be page sized
+ * => flags:
+ * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
+ * Mappings are QREF's.
*/
-
int
-uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
- boolean_t new_pageable, int lockflags)
+uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
+ vaddr_t *dstaddrp, int flags)
{
- struct vm_map_entry *entry, *start_entry, *failed_entry;
- int rv;
-#ifdef DIAGNOSTIC
- u_int timestamp_save;
-#endif
- KASSERT(map->flags & VM_MAP_PAGEABLE);
-
- if ((lockflags & UVM_LK_ENTER) == 0)
- vm_map_lock(map);
+ struct uvm_map_deadq dead;
+ struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
+ vaddr_t dstaddr;
+ vaddr_t end;
+ vaddr_t cp_start;
+ vsize_t cp_len, cp_off;
+ int error;
- VM_MAP_RANGE_CHECK(map, start, end);
+ TAILQ_INIT(&dead);
+ end = start + len;
- /*
- * only one pageability change may take place at one time, since
- * uvm_fault_wire assumes it will be called only once for each
- * wiring/unwiring. therefore, we have to make sure we're actually
- * changing the pageability for the entire region. we do so before
- * making any changes.
+ /*
+ * Sanity check on the parameters.
+ * Also, since the mapping may not contain gaps, error out if the
+ * mapped area is not in source map.
*/
- if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
- if ((lockflags & UVM_LK_EXIT) == 0)
- vm_map_unlock(map);
+ if ((start & (vaddr_t)PAGE_MASK) != 0 ||
+ (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
+ return EINVAL;
+ if (start < srcmap->min_offset || end > srcmap->max_offset)
+ return EINVAL;
- return (EFAULT);
- }
- entry = start_entry;
-
- /*
- * handle wiring and unwiring separately.
+ /*
+ * Initialize dead entries.
+ * Handle len == 0 case.
*/
- if (new_pageable) { /* unwire */
- UVM_MAP_CLIP_START(map, entry, start);
-
- /*
- * unwiring. first ensure that the range to be unwired is
- * really wired down and that there are no holes.
- */
+ if (len == 0)
+ return 0;
- while ((entry != &map->header) && (entry->start < end)) {
- if (entry->wired_count == 0 ||
- (entry->end < end &&
- (entry->next == &map->header ||
- entry->next->start > entry->end))) {
- if ((lockflags & UVM_LK_EXIT) == 0)
- vm_map_unlock(map);
- return (EINVAL);
- }
- entry = entry->next;
- }
+ /*
+ * Acquire lock on srcmap.
+ */
+ vm_map_lock(srcmap);
- /*
- * POSIX 1003.1b - a single munlock call unlocks a region,
- * regardless of the number of mlock calls made on that
- * region.
- */
+ /*
+ * Lock srcmap, lookup first and last entry in <start,len>.
+ */
+ first = uvm_map_entrybyaddr(&srcmap->addr, start);
- entry = start_entry;
- while ((entry != &map->header) && (entry->start < end)) {
- UVM_MAP_CLIP_END(map, entry, end);
- if (VM_MAPENT_ISWIRED(entry))
- uvm_map_entry_unwire(map, entry);
- entry = entry->next;
+ /*
+ * Check that the range is contiguous.
+ */
+ for (entry = first; entry != NULL && entry->end < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ if (VMMAP_FREE_END(entry) != entry->end ||
+ UVM_ET_ISHOLE(entry)) {
+ error = EINVAL;
+ goto fail;
}
- if ((lockflags & UVM_LK_EXIT) == 0)
- vm_map_unlock(map);
- return (0);
+ }
+ if (entry == NULL || UVM_ET_ISHOLE(entry)) {
+ error = EINVAL;
+ goto fail;
}
/*
- * wire case: in two passes [XXXCDC: ugly block of code here]
- *
- * 1: holding the write lock, we create any anonymous maps that need
- * to be created. then we clip each map entry to the region to
- * be wired and increment its wiring count.
+ * Handle need-copy flag.
+ * This may invalidate last, hence the re-initialization during the
+ * loop.
*
- * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
- * in the pages for any newly wired area (wired_count == 1).
- *
- * downgrading to a read lock for uvm_fault_wire avoids a possible
- * deadlock with another thread that may have faulted on one of
- * the pages to be wired (it would mark the page busy, blocking
- * us, then in turn block on the map lock that we hold). because
- * of problems in the recursive lock package, we cannot upgrade
- * to a write lock in vm_map_lookup. thus, any actions that
- * require the write lock must be done beforehand. because we
- * keep the read lock on the map, the copy-on-write status of the
- * entries we modify here cannot change.
+ * Also, perform clipping of last if not UVM_EXTRACT_QREF.
*/
-
- while ((entry != &map->header) && (entry->start < end)) {
- if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
-
- /*
- * perform actions of vm_map_lookup that need the
- * write lock on the map: create an anonymous map
- * for a copy-on-write region, or an anonymous map
- * for a zero-fill region. (XXXCDC: submap case
- * ok?)
- */
-
- if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
- if (UVM_ET_ISNEEDSCOPY(entry) &&
- ((entry->protection & VM_PROT_WRITE) ||
- (entry->object.uvm_obj == NULL))) {
- amap_copy(map, entry, M_WAITOK, TRUE,
- start, end);
- /* XXXCDC: wait OK? */
- }
- }
- }
- UVM_MAP_CLIP_START(map, entry, start);
- UVM_MAP_CLIP_END(map, entry, end);
- entry->wired_count++;
-
- /*
- * Check for holes
- */
-
- if (entry->protection == VM_PROT_NONE ||
- (entry->end < end &&
- (entry->next == &map->header ||
- entry->next->start > entry->end))) {
-
+ for (entry = first; entry != NULL && entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ if (UVM_ET_ISNEEDSCOPY(entry))
+ amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
+ if (UVM_ET_ISNEEDSCOPY(entry)) {
/*
- * found one. amap creation actions do not need to
- * be undone, but the wired counts need to be restored.
+ * amap_copy failure
*/
-
- while (entry != &map->header && entry->end > start) {
- entry->wired_count--;
- entry = entry->prev;
- }
- if ((lockflags & UVM_LK_EXIT) == 0)
- vm_map_unlock(map);
- return (EINVAL);
+ error = ENOMEM;
+ goto fail;
}
- entry = entry->next;
}
/*
- * Pass 2.
+ * Lock destination map (kernel_map).
*/
+ vm_map_lock(kernel_map);
-#ifdef DIAGNOSTIC
- timestamp_save = map->timestamp;
-#endif
- vm_map_busy(map);
- vm_map_downgrade(map);
-
- rv = 0;
- entry = start_entry;
- while (entry != &map->header && entry->start < end) {
- if (entry->wired_count == 1) {
- rv = uvm_fault_wire(map, entry->start, entry->end,
- entry->protection);
- if (rv) {
- /*
- * wiring failed. break out of the loop.
- * we'll clean up the map below, once we
- * have a write lock again.
- */
- break;
- }
- }
- entry = entry->next;
- }
-
- if (rv) { /* failed? */
-
- /*
- * Get back to an exclusive (write) lock.
- */
-
- vm_map_upgrade(map);
- vm_map_unbusy(map);
-
-#ifdef DIAGNOSTIC
- if (timestamp_save != map->timestamp)
- panic("uvm_map_pageable: stale map");
-#endif
-
- /*
- * first drop the wiring count on all the entries
- * which haven't actually been wired yet.
- */
-
- failed_entry = entry;
- while (entry != &map->header && entry->start < end) {
- entry->wired_count--;
- entry = entry->next;
- }
-
- /*
- * now, unwire all the entries that were successfully
- * wired above.
- */
-
- entry = start_entry;
- while (entry != failed_entry) {
- entry->wired_count--;
- if (VM_MAPENT_ISWIRED(entry) == 0)
- uvm_map_entry_unwire(map, entry);
- entry = entry->next;
- }
- if ((lockflags & UVM_LK_EXIT) == 0)
- vm_map_unlock(map);
- return(rv);
- }
-
- /* We are holding a read lock here. */
- if ((lockflags & UVM_LK_EXIT) == 0) {
- vm_map_unbusy(map);
- vm_map_unlock_read(map);
- } else {
-
- /*
- * Get back to an exclusive (write) lock.
- */
-
- vm_map_upgrade(map);
- vm_map_unbusy(map);
+ if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
+ PAGE_SIZE, 0, VM_PROT_NONE, 0) != 0) {
+ error = ENOMEM;
+ goto fail2;
}
+ *dstaddrp = dstaddr;
- return (0);
-}
-
-/*
- * uvm_map_pageable_all: special case of uvm_map_pageable - affects
- * all mapped regions.
- *
- * => map must not be locked.
- * => if no flags are specified, all regions are unwired.
- * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
- */
-
-int
-uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
-{
- struct vm_map_entry *entry, *failed_entry;
- vsize_t size;
- int error;
-#ifdef DIAGNOSTIC
- u_int timestamp_save;
-#endif
-
- KASSERT(map->flags & VM_MAP_PAGEABLE);
-
- vm_map_lock(map);
+ /*
+ * We now have srcmap and kernel_map locked.
+ * dstaddr contains the destination offset in dstmap.
+ */
/*
- * handle wiring and unwiring separately.
+ * step 1: start looping through map entries, performing extraction.
*/
+ for (entry = first; entry != NULL && entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
+ if (UVM_ET_ISHOLE(entry))
+ continue;
- if (flags == 0) { /* unwire */
/*
- * POSIX 1003.1b -- munlockall unlocks all regions,
- * regardless of how many times mlockall has been called.
+ * Calculate uvm_mapent_clone parameters.
*/
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
- if (VM_MAPENT_ISWIRED(entry))
- uvm_map_entry_unwire(map, entry);
+ cp_start = entry->start;
+ if (cp_start < start) {
+ cp_off = start - cp_start;
+ cp_start = start;
+ } else
+ cp_off = 0;
+ cp_len = MIN(entry->end, end) - cp_start;
+
+ newentry = uvm_mapent_clone(kernel_map,
+ cp_start - start + dstaddr, cp_len, cp_off,
+ entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
+ if (newentry == NULL) {
+ error = ENOMEM;
+ goto fail2_unmap;
}
- vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
- vm_map_unlock(map);
- return (0);
-
- /*
- * end of unwire case!
- */
- }
+ kernel_map->size += cp_len;
+ if (flags & UVM_EXTRACT_FIXPROT)
+ newentry->protection = newentry->max_protection;
- if (flags & MCL_FUTURE) {
/*
- * must wire all future mappings; remember this.
+ * Step 2: perform pmap copy.
+ * (Doing this in the loop saves one RB traversal.)
*/
- vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
- }
-
- if ((flags & MCL_CURRENT) == 0) {
- /*
- * no more work to do!
- */
- vm_map_unlock(map);
- return (0);
- }
-
- /*
- * wire case: in three passes [XXXCDC: ugly block of code here]
- *
- * 1: holding the write lock, count all pages mapped by non-wired
- * entries. if this would cause us to go over our limit, we fail.
- *
- * 2: still holding the write lock, we create any anonymous maps that
- * need to be created. then we increment its wiring count.
- *
- * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
- * in the pages for any newly wired area (wired_count == 1).
- *
- * downgrading to a read lock for uvm_fault_wire avoids a possible
- * deadlock with another thread that may have faulted on one of
- * the pages to be wired (it would mark the page busy, blocking
- * us, then in turn block on the map lock that we hold). because
- * of problems in the recursive lock package, we cannot upgrade
- * to a write lock in vm_map_lookup. thus, any actions that
- * require the write lock must be done beforehand. because we
- * keep the read lock on the map, the copy-on-write status of the
- * entries we modify here cannot change.
- */
-
- for (size = 0, entry = map->header.next; entry != &map->header;
- entry = entry->next) {
- if (entry->protection != VM_PROT_NONE &&
- VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
- size += entry->end - entry->start;
- }
+ pmap_copy(kernel_map->pmap, srcmap->pmap,
+ cp_start - start + dstaddr, cp_len, cp_start);
}
+ pmap_update(kernel_map->pmap);
- if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
- vm_map_unlock(map);
- return (ENOMEM); /* XXX overloaded */
- }
-
- /* XXX non-pmap_wired_count case must be handled by caller */
-#ifdef pmap_wired_count
- if (limit != 0 &&
- (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
- vm_map_unlock(map);
- return (ENOMEM); /* XXX overloaded */
- }
-#endif
+ error = 0;
/*
- * Pass 2.
+ * Unmap copied entries on failure.
*/
-
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
- if (entry->protection == VM_PROT_NONE)
- continue;
- if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
- /*
- * perform actions of vm_map_lookup that need the
- * write lock on the map: create an anonymous map
- * for a copy-on-write region, or an anonymous map
- * for a zero-fill region. (XXXCDC: submap case
- * ok?)
- */
- if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
- if (UVM_ET_ISNEEDSCOPY(entry) &&
- ((entry->protection & VM_PROT_WRITE) ||
- (entry->object.uvm_obj == NULL))) {
- amap_copy(map, entry, M_WAITOK, TRUE,
- entry->start, entry->end);
- /* XXXCDC: wait OK? */
- }
- }
- }
- entry->wired_count++;
+fail2_unmap:
+ if (error) {
+ uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
+ FALSE, TRUE);
}
/*
- * Pass 3.
+ * Release maps, release dead entries.
*/
+fail2:
+ vm_map_unlock(kernel_map);
-#ifdef DIAGNOSTIC
- timestamp_save = map->timestamp;
-#endif
- vm_map_busy(map);
- vm_map_downgrade(map);
-
- for (error = 0, entry = map->header.next;
- entry != &map->header && error == 0;
- entry = entry->next) {
- if (entry->wired_count == 1) {
- error = uvm_fault_wire(map, entry->start, entry->end,
- entry->protection);
- }
- }
-
- if (error) { /* failed? */
- /*
- * Get back an exclusive (write) lock.
- */
- vm_map_upgrade(map);
- vm_map_unbusy(map);
-
-#ifdef DIAGNOSTIC
- if (timestamp_save != map->timestamp)
- panic("uvm_map_pageable_all: stale map");
-#endif
-
- /*
- * first drop the wiring count on all the entries
- * which haven't actually been wired yet.
- *
- * Skip VM_PROT_NONE entries like we did above.
- */
- failed_entry = entry;
- for (/* nothing */; entry != &map->header;
- entry = entry->next) {
- if (entry->protection == VM_PROT_NONE)
- continue;
- entry->wired_count--;
- }
-
- /*
- * now, unwire all the entries that were successfully
- * wired above.
- *
- * Skip VM_PROT_NONE entries like we did above.
- */
- for (entry = map->header.next; entry != failed_entry;
- entry = entry->next) {
- if (entry->protection == VM_PROT_NONE)
- continue;
- entry->wired_count--;
- if (VM_MAPENT_ISWIRED(entry))
- uvm_map_entry_unwire(map, entry);
- }
- vm_map_unlock(map);
- return (error);
- }
+fail:
+ vm_map_unlock(srcmap);
- /* We are holding a read lock here. */
- vm_map_unbusy(map);
- vm_map_unlock_read(map);
+ uvm_unmap_detach(&dead, 0);
- return (0);
+ return error;
}
/*
@@ -2913,71 +4066,72 @@ int amap_clean_works = 1; /* XXX for now, just in case... */
int
uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
{
- struct vm_map_entry *current, *entry;
- struct uvm_object *uobj;
+ struct vm_map_entry *first, *entry;
struct vm_amap *amap;
struct vm_anon *anon;
struct vm_page *pg;
- vaddr_t offset;
- vsize_t size;
- int rv, error, refs;
+ struct uvm_object *uobj;
+ vaddr_t cp_start, cp_end;
+ int refs;
+ int error;
+ boolean_t rv;
KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
- (PGO_FREE|PGO_DEACTIVATE));
+ (PGO_FREE|PGO_DEACTIVATE));
+
+ if (start > end || start < map->min_offset || end > map->max_offset)
+ return EINVAL;
vm_map_lock_read(map);
- VM_MAP_RANGE_CHECK(map, start, end);
- if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
- vm_map_unlock_read(map);
- return (EFAULT);
- }
+ first = uvm_map_entrybyaddr(&map->addr, start);
/*
* Make a first pass to check for holes.
*/
-
- for (current = entry; current->start < end; current = current->next) {
- if (UVM_ET_ISSUBMAP(current)) {
+ for (entry = first; entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ if (UVM_ET_ISSUBMAP(entry)) {
vm_map_unlock_read(map);
- return (EINVAL);
+ return EINVAL;
}
- if (end > current->end && (current->next == &map->header ||
- current->end != current->next->start)) {
+ if (UVM_ET_ISSUBMAP(entry) ||
+ UVM_ET_ISHOLE(entry) ||
+ (entry->end < end &&
+ VMMAP_FREE_END(entry) != entry->end)) {
vm_map_unlock_read(map);
- return (EFAULT);
+ return EFAULT;
}
}
error = 0;
-
- for (current = entry; current->start < end; current = current->next) {
- amap = current->aref.ar_amap; /* top layer */
- uobj = current->object.uvm_obj; /* bottom layer */
- KASSERT(start >= current->start);
+ for (entry = first; entry != NULL && entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ amap = entry->aref.ar_amap; /* top layer */
+ if (UVM_ET_ISOBJ(entry))
+ uobj = entry->object.uvm_obj;
+ else
+ uobj = NULL;
/*
* No amap cleaning necessary if:
- *
- * (1) There's no amap.
- *
- * (2) We're not deactivating or freeing pages.
+ * - there's no amap
+ * - we're not deactivating or freeing pages.
*/
-
if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
goto flush_object;
-
- /* XXX for now, just in case... */
- if (amap_clean_works == 0)
+ if (!amap_clean_works)
goto flush_object;
- offset = start - current->start;
- size = MIN(end, current->end) - start;
- for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
- anon = amap_lookup(&current->aref, offset);
+ cp_start = MAX(entry->start, start);
+ cp_end = MIN(entry->end, end);
+
+ for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
+ anon = amap_lookup(&entry->aref,
+ cp_start - entry->start);
if (anon == NULL)
continue;
- simple_lock(&anon->an_lock);
+ simple_lock(&anon->an_lock); /* XXX */
pg = anon->an_page;
if (pg == NULL) {
@@ -2986,23 +4140,21 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
}
switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
-
/*
* XXX In these first 3 cases, we always just
* XXX deactivate the page. We may want to
* XXX handle the different cases more
* XXX specifically, in the future.
*/
-
case PGO_CLEANIT|PGO_FREE:
case PGO_CLEANIT|PGO_DEACTIVATE:
case PGO_DEACTIVATE:
- deactivate_it:
+deactivate_it:
/* skip the page if it's loaned or wired */
if (pg->loan_count != 0 ||
pg->wire_count != 0) {
simple_unlock(&anon->an_lock);
- continue;
+ break;
}
uvm_lock_pageq();
@@ -3012,51 +4164,45 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
* by the anon (may simply be loaned to the
* anon).
*/
-
if ((pg->pg_flags & PQ_ANON) == 0) {
KASSERT(pg->uobject == NULL);
uvm_unlock_pageq();
simple_unlock(&anon->an_lock);
- continue;
+ break;
}
KASSERT(pg->uanon == anon);
-#ifdef UBC
- /* ...and deactivate the page. */
- pmap_clear_reference(pg);
-#else
/* zap all mappings for the page. */
pmap_page_protect(pg, VM_PROT_NONE);
/* ...and deactivate the page. */
-#endif
uvm_pagedeactivate(pg);
uvm_unlock_pageq();
simple_unlock(&anon->an_lock);
- continue;
+ break;
case PGO_FREE:
/*
- * If there are multiple references to
+ * If there are mutliple references to
* the amap, just deactivate the page.
*/
-
if (amap_refs(amap) > 1)
goto deactivate_it;
/* XXX skip the page if it's wired */
if (pg->wire_count != 0) {
simple_unlock(&anon->an_lock);
- continue;
+ break;
}
- amap_unadd(&current->aref, offset);
+ amap_unadd(&entry->aref,
+ cp_start - entry->start);
refs = --anon->an_ref;
simple_unlock(&anon->an_lock);
if (refs == 0)
uvm_anfree(anon);
- continue;
+ break;
default:
panic("uvm_map_clean: weird flags");
@@ -3064,817 +4210,819 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
}
flush_object:
+ cp_start = MAX(entry->start, start);
+ cp_end = MIN(entry->end, end);
+
/*
* flush pages if we've got a valid backing object.
*
* Don't PGO_FREE if we don't have write permission
- * and don't flush if this is a copy-on-write object
+ * and don't flush if this is a copy-on-write object
* since we can't know our permissions on it.
*/
-
- offset = current->offset + (start - current->start);
- size = MIN(end, current->end) - start;
if (uobj != NULL &&
((flags & PGO_FREE) == 0 ||
((entry->max_protection & VM_PROT_WRITE) != 0 &&
(entry->etype & UVM_ET_COPYONWRITE) == 0))) {
simple_lock(&uobj->vmobjlock);
- rv = uobj->pgops->pgo_flush(uobj, offset,
- offset + size, flags);
+ rv = uobj->pgops->pgo_flush(uobj,
+ cp_start - entry->start + entry->offset,
+ cp_end - entry->start + entry->offset, flags);
simple_unlock(&uobj->vmobjlock);
if (rv == FALSE)
error = EFAULT;
}
- start += size;
}
+
vm_map_unlock_read(map);
- return (error);
+ return error;
}
-
/*
- * uvm_map_checkprot: check protection in map
- *
- * => must allow specified protection in a fully allocated region.
- * => map must be read or write locked by caller.
+ * UVM_MAP_CLIP_END implementation
*/
-
-boolean_t
-uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
- vm_prot_t protection)
+void
+uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
{
- struct vm_map_entry *entry;
- struct vm_map_entry *tmp_entry;
-
- if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
- return(FALSE);
- }
- entry = tmp_entry;
- while (start < end) {
- if (entry == &map->header) {
- return(FALSE);
- }
-
- /*
- * no holes allowed
- */
-
- if (start < entry->start) {
- return(FALSE);
- }
-
- /*
- * check protection associated with entry
- */
-
- if ((entry->protection & protection) != protection) {
- return(FALSE);
- }
+ struct vm_map_entry *tmp;
- /* go to next entry */
+ KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
+ tmp = uvm_mapent_alloc(map, 0);
- start = entry->end;
- entry = entry->next;
- }
- return(TRUE);
+ /*
+ * Invoke splitentry.
+ */
+ uvm_map_splitentry(map, entry, tmp, addr);
}
/*
- * uvmspace_alloc: allocate a vmspace structure.
+ * UVM_MAP_CLIP_START implementation
*
- * - structure includes vm_map and pmap
- * - XXX: no locking on this structure
- * - refcnt set to 1, rest must be init'd by caller
+ * Clippers are required to not change the pointers to the entry they are
+ * clipping on.
+ * Since uvm_map_splitentry turns the original entry into the lowest
+ * entry (address wise) we do a swap between the new entry and the original
+ * entry, prior to calling uvm_map_splitentry.
*/
-struct vmspace *
-uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
- boolean_t remove_holes)
+void
+uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
{
- struct vmspace *vm;
+ struct vm_map_entry *tmp;
+ struct uvm_addr_state *free;
+
+ /* Unlink original. */
+ free = uvm_map_uaddr_e(map, entry);
+ if (free)
+ uvm_mapent_free_remove(map, free, entry);
+ uvm_mapent_addr_remove(map, entry);
+
+ /* Copy entry. */
+ KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
+ tmp = uvm_mapent_alloc(map, 0);
+ uvm_mapent_copy(entry, tmp);
+
+ /* Put new entry in place of original entry. */
+ uvm_mapent_addr_insert(map, tmp);
+ if (free)
+ uvm_mapent_free_insert(map, free, tmp);
+
+ /* Invoke splitentry. */
+ uvm_map_splitentry(map, tmp, entry, addr);
+}
- vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
- uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
- return (vm);
+/*
+ * Boundary fixer.
+ */
+static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
+static __inline vaddr_t
+uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
+{
+ return (min < bound && max > bound) ? bound : max;
}
/*
- * uvmspace_init: initialize a vmspace structure.
+ * Choose free list based on address at start of free space.
*
- * - XXX: no locking on this structure
- * - refcnt set to 1, rest must be init'd by caller
+ * The uvm_addr_state returned contains addr and is the first of:
+ * - uaddr_exe
+ * - uaddr_brk_stack
+ * - uaddr_any
*/
-void
-uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
- boolean_t pageable, boolean_t remove_holes)
+struct uvm_addr_state*
+uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
{
+ struct uvm_addr_state *uaddr;
+ int i;
- uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0);
+ /* Special case the first page, to prevent mmap from returning 0. */
+ if (addr < VMMAP_MIN_ADDR)
+ return NULL;
- if (pmap)
- pmap_reference(pmap);
- else
- pmap = pmap_create();
- vm->vm_map.pmap = pmap;
+ /* Upper bound for kernel maps at uvm_maxkaddr. */
+ if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
+ if (addr >= uvm_maxkaddr)
+ return NULL;
+ }
- vm->vm_refcnt = 1;
+ /* Is the address inside the exe-only map? */
+ if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
+ addr < map->uaddr_exe->uaddr_maxaddr)
+ return map->uaddr_exe;
+
+ /* Check if the space falls inside brk/stack area. */
+ if ((addr >= map->b_start && addr < map->b_end) ||
+ (addr >= map->s_start && addr < map->s_end)) {
+ if (map->uaddr_brk_stack != NULL &&
+ addr >= map->uaddr_brk_stack->uaddr_minaddr &&
+ addr < map->uaddr_brk_stack->uaddr_maxaddr) {
+ return map->uaddr_brk_stack;
+ } else
+ return NULL;
+ }
- if (remove_holes)
- pmap_remove_holes(&vm->vm_map);
+ /*
+ * Check the other selectors.
+ *
+ * These selectors are only marked as the owner, if they have insert
+ * functions.
+ */
+ for (i = 0; i < nitems(map->uaddr_any); i++) {
+ uaddr = map->uaddr_any[i];
+ if (uaddr == NULL)
+ continue;
+ if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
+ continue;
+
+ if (addr >= uaddr->uaddr_minaddr &&
+ addr < uaddr->uaddr_maxaddr)
+ return uaddr;
+ }
+
+ return NULL;
}
/*
- * uvmspace_share: share a vmspace between two processes
+ * Choose free list based on address at start of free space.
*
- * - XXX: no locking on vmspace
- * - used for vfork and threads
+ * The uvm_addr_state returned contains addr and is the first of:
+ * - uaddr_exe
+ * - uaddr_brk_stack
+ * - uaddr_any
*/
-
-void
-uvmspace_share(struct proc *p1, struct proc *p2)
+struct uvm_addr_state*
+uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
{
- p2->p_vmspace = p1->p_vmspace;
- p1->p_vmspace->vm_refcnt++;
+ return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
}
/*
- * uvmspace_exec: the process wants to exec a new program
- *
- * - XXX: no locking on vmspace
+ * Returns the first free-memory boundary that is crossed by [min-max].
*/
-
-void
-uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
+vsize_t
+uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
{
- struct vmspace *nvm, *ovm = p->p_vmspace;
- struct vm_map *map = &ovm->vm_map;
-
- pmap_unuse_final(p); /* before stack addresses go away */
-
- /*
- * see if more than one process is using this vmspace...
- */
+ struct uvm_addr_state *uaddr;
+ int i;
- if (ovm->vm_refcnt == 1) {
+ /* Never return first page. */
+ max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
- /*
- * if p is the only process using its vmspace then we can safely
- * recycle that vmspace for the program that is being exec'd.
- */
+ /* Treat the maxkaddr special, if the map is a kernel_map. */
+ if ((map->flags & VM_MAP_ISVMSPACE) == 0)
+ max = uvm_map_boundfix(min, max, uvm_maxkaddr);
-#ifdef SYSVSHM
- /*
- * SYSV SHM semantics require us to kill all segments on an exec
- */
- if (ovm->vm_shm)
- shmexit(ovm);
-#endif
+ /* Check for exe-only boundaries. */
+ if (map->uaddr_exe != NULL) {
+ max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
+ max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
+ }
- /*
- * POSIX 1003.1b -- "lock future mappings" is revoked
- * when a process execs another program image.
- */
- vm_map_lock(map);
- vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
- vm_map_unlock(map);
+ /* Check for exe-only boundaries. */
+ if (map->uaddr_brk_stack != NULL) {
+ max = uvm_map_boundfix(min, max,
+ map->uaddr_brk_stack->uaddr_minaddr);
+ max = uvm_map_boundfix(min, max,
+ map->uaddr_brk_stack->uaddr_maxaddr);
+ }
- /*
- * now unmap the old program
- */
- uvm_unmap(map, map->min_offset, map->max_offset);
+ /* Check other boundaries. */
+ for (i = 0; i < nitems(map->uaddr_any); i++) {
+ uaddr = map->uaddr_any[i];
+ if (uaddr != NULL) {
+ max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
+ max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
+ }
+ }
- /*
- * but keep MMU holes unavailable
- */
- pmap_remove_holes(map);
+ /* Boundaries at stack and brk() area. */
+ max = uvm_map_boundfix(min, max, map->s_start);
+ max = uvm_map_boundfix(min, max, map->s_end);
+ max = uvm_map_boundfix(min, max, map->b_start);
+ max = uvm_map_boundfix(min, max, map->b_end);
- /*
- * resize the map
- */
- vm_map_lock(map);
- map->min_offset = start;
- uvm_tree_sanity(map, "resize enter");
- map->max_offset = end;
- if (map->header.prev != &map->header)
- uvm_rb_fixup(map, map->header.prev);
- uvm_tree_sanity(map, "resize leave");
- vm_map_unlock(map);
-
+ return max;
+}
- } else {
+/*
+ * Update map allocation start and end addresses from proc vmspace.
+ */
+void
+uvm_map_vmspace_update(struct vm_map *map,
+ struct uvm_map_deadq *dead, int flags)
+{
+ struct vmspace *vm;
+ vaddr_t b_start, b_end, s_start, s_end;
- /*
- * p's vmspace is being shared, so we can't reuse it for p since
- * it is still being used for others. allocate a new vmspace
- * for p
- */
- nvm = uvmspace_alloc(start, end,
- (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
+ KASSERT(map->flags & VM_MAP_ISVMSPACE);
+ KASSERT(offsetof(struct vmspace, vm_map) == 0);
- /*
- * install new vmspace and drop our ref to the old one.
- */
+ /*
+ * Derive actual allocation boundaries from vmspace.
+ */
+ vm = (struct vmspace *)map;
+ b_start = (vaddr_t)vm->vm_daddr;
+ b_end = b_start + BRKSIZ;
+ s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
+ s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
+#ifdef DIAGNOSTIC
+ if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
+ (b_end & (vaddr_t)PAGE_MASK) != 0 ||
+ (s_start & (vaddr_t)PAGE_MASK) != 0 ||
+ (s_end & (vaddr_t)PAGE_MASK) != 0) {
+ panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
+ "b=0x%lx-0x%lx s=0x%lx-0x%lx",
+ vm, b_start, b_end, s_start, s_end);
+ }
+#endif
- pmap_deactivate(p);
- p->p_vmspace = nvm;
- pmap_activate(p);
+ if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
+ map->s_start == s_start && map->s_end == s_end))
+ return;
- uvmspace_free(ovm);
- }
+ uvm_map_freelist_update(map, dead, b_start, b_end,
+ s_start, s_end, flags);
}
/*
- * uvmspace_free: free a vmspace data structure
+ * Grow kernel memory.
*
- * - XXX: no locking on vmspace
+ * This function is only called for kernel maps when an allocation fails.
+ *
+ * If the map has a gap that is large enough to accomodate alloc_sz, this
+ * function will make sure map->free will include it.
*/
-
void
-uvmspace_free(struct vmspace *vm)
+uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
+ vsize_t alloc_sz, int flags)
{
- struct vm_map_entry *dead_entries;
+ vsize_t sz;
+ vaddr_t end;
+ struct vm_map_entry *entry;
- if (--vm->vm_refcnt == 0) {
- /*
- * lock the map, to wait out all other references to it. delete
- * all of the mappings and pages they hold, then call the pmap
- * module to reclaim anything left.
- */
-#ifdef SYSVSHM
- /* Get rid of any SYSV shared memory segments. */
- if (vm->vm_shm != NULL)
- shmexit(vm);
+ /* Kernel memory only. */
+ KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
+ /* Destroy free list. */
+ uvm_map_freelist_update_clear(map, dead);
+
+ /*
+ * Include the guard page in the hard minimum requirement of alloc_sz.
+ */
+ if (map->flags & VM_MAP_GUARDPAGES)
+ alloc_sz += PAGE_SIZE;
+
+ /*
+ * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
+ *
+ * Don't handle the case where the multiplication overflows:
+ * if that happens, the allocation is probably too big anyway.
+ */
+ sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
+
+ /*
+ * Walk forward until a gap large enough for alloc_sz shows up.
+ *
+ * We assume the kernel map has no boundaries.
+ * uvm_maxkaddr may be zero.
+ */
+ end = MAX(uvm_maxkaddr, map->min_offset);
+ entry = uvm_map_entrybyaddr(&map->addr, end);
+ while (entry && entry->fspace < alloc_sz)
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
+ if (entry) {
+ end = MAX(VMMAP_FREE_START(entry), end);
+ end += MIN(sz, map->max_offset - end);
+ } else
+ end = map->max_offset;
+
+ /* Reserve pmap entries. */
+#ifdef PMAP_GROWKERNEL
+ uvm_maxkaddr = pmap_growkernel(end);
+#else
+ uvm_maxkaddr = end;
#endif
- vm_map_lock(&vm->vm_map);
- if (vm->vm_map.nentries) {
- uvm_unmap_remove(&vm->vm_map,
- vm->vm_map.min_offset, vm->vm_map.max_offset,
- &dead_entries, NULL, TRUE);
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, 0);
- }
- pmap_destroy(vm->vm_map.pmap);
- vm->vm_map.pmap = NULL;
- pool_put(&uvm_vmspace_pool, vm);
- }
+
+ printf("uvm_km_kmem_grow: grown to 0x%lx\n", uvm_maxkaddr);
+
+ /* Rebuild free list. */
+ uvm_map_freelist_update_refill(map, flags);
}
/*
- * uvm_map_create: create map
+ * Freelist update subfunction: unlink all entries from freelists.
*/
-vm_map_t
-uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
+void
+uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
{
- vm_map_t result;
-
- result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);
- uvm_map_setup(result, min, max, flags);
- result->pmap = pmap;
- return(result);
+ struct uvm_addr_state *free;
+ struct vm_map_entry *entry, *prev, *next;
+
+ prev = NULL;
+ for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL;
+ entry = next) {
+ next = RB_NEXT(uvm_map_addr, &map->addr, entry);
+
+ free = uvm_map_uaddr_e(map, entry);
+ if (free)
+ uvm_mapent_free_remove(map, free, entry);
+
+ if (prev != NULL && entry->start == entry->end) {
+ prev->fspace += VMMAP_FREE_END(entry) - entry->end;
+ uvm_mapent_addr_remove(map, entry);
+ DEAD_ENTRY_PUSH(dead, entry);
+ } else
+ prev = entry;
+ }
}
/*
- * uvm_map_setup: init map
- *
- * => map must not be in service yet.
+ * Freelist update subfunction: refill the freelists with entries.
*/
void
-uvm_map_setup(vm_map_t map, vaddr_t min, vaddr_t max, int flags)
+uvm_map_freelist_update_refill(struct vm_map *map, int flags)
{
+ struct vm_map_entry *entry;
+ vaddr_t min, max;
- RB_INIT(&map->rbhead);
- map->header.next = map->header.prev = &map->header;
- map->nentries = 0;
- map->size = 0;
- map->ref_count = 1;
- map->min_offset = min;
- map->max_offset = max;
- map->flags = flags;
- map->first_free = &map->header;
- map->hint = &map->header;
- map->timestamp = 0;
- rw_init(&map->lock, "vmmaplk");
- simple_lock_init(&map->ref_lock);
- simple_lock_init(&map->hint_lock);
-}
+ RB_FOREACH(entry, uvm_map_addr, &map->addr) {
+ min = VMMAP_FREE_START(entry);
+ max = VMMAP_FREE_END(entry);
+ entry->fspace = 0;
+ entry = uvm_map_fix_space(map, entry, min, max, flags);
+ }
+ uvm_tree_sanity(map, __FILE__, __LINE__);
+}
/*
- * uvm_map_reference: add reference to a map
- *
- * => map need not be locked (we use ref_lock).
+ * Change {a,b}_{start,end} allocation ranges and associated free lists.
*/
void
-uvm_map_reference(vm_map_t map)
+uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
+ vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
{
- simple_lock(&map->ref_lock);
- map->ref_count++;
- simple_unlock(&map->ref_lock);
+ KDASSERT(b_end >= b_start && s_end >= s_start);
+
+ /* Clear all free lists. */
+ uvm_map_freelist_update_clear(map, dead);
+
+ /* Apply new bounds. */
+ map->b_start = b_start;
+ map->b_end = b_end;
+ map->s_start = s_start;
+ map->s_end = s_end;
+
+ /* Refill free lists. */
+ uvm_map_freelist_update_refill(map, flags);
}
/*
- * uvm_map_deallocate: drop reference to a map
+ * Assign a uvm_addr_state to the specified pointer in vm_map.
*
- * => caller must not lock map
- * => we will zap map if ref count goes to zero
+ * May sleep.
*/
void
-uvm_map_deallocate(vm_map_t map)
+uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
+ struct uvm_addr_state *newval)
{
- int c;
+ struct uvm_map_deadq dead;
- simple_lock(&map->ref_lock);
- c = --map->ref_count;
- simple_unlock(&map->ref_lock);
- if (c > 0) {
- return;
- }
+ /* Pointer which must be in this map. */
+ KASSERT(which != NULL);
+ KASSERT((void*)map <= (void*)(which) &&
+ (void*)(which) < (void*)(map + 1));
- /*
- * all references gone. unmap and free.
- */
+ vm_map_lock(map);
+ TAILQ_INIT(&dead);
+ uvm_map_freelist_update_clear(map, &dead);
- uvm_unmap(map, map->min_offset, map->max_offset);
- pmap_destroy(map->pmap);
- free(map, M_VMMAP);
+ uvm_addr_destroy(*which);
+ *which = newval;
+
+ uvm_map_freelist_update_refill(map, 0);
+ vm_map_unlock(map);
+ uvm_unmap_detach(&dead, 0);
}
/*
- * F O R K - m a i n e n t r y p o i n t
- */
-/*
- * uvmspace_fork: fork a process' main map
+ * Correct space insert.
*
- * => create a new vmspace for child process from parent.
- * => parent's map must not be locked.
+ * Entry must not be on any freelist.
*/
-
-struct vmspace *
-uvmspace_fork(struct vmspace *vm1)
+struct vm_map_entry*
+uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
+ vaddr_t min, vaddr_t max, int flags)
{
- struct vmspace *vm2;
- struct vm_map *old_map = &vm1->vm_map;
- struct vm_map *new_map;
- struct vm_map_entry *old_entry;
- struct vm_map_entry *new_entry;
- pmap_t new_pmap;
- boolean_t protect_child;
-
- vm_map_lock(old_map);
-
- vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
- (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
- memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
- (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
- new_map = &vm2->vm_map; /* XXX */
- new_pmap = new_map->pmap;
+ struct uvm_addr_state *free, *entfree;
+ vaddr_t lmax;
- old_entry = old_map->header.next;
+ KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
+ KDASSERT(min <= max);
+ KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
+ min == map->min_offset);
/*
- * go entry-by-entry
+ * During the function, entfree will always point at the uaddr state
+ * for entry.
*/
+ entfree = (entry == NULL ? NULL :
+ uvm_map_uaddr_e(map, entry));
- while (old_entry != &old_map->header) {
-
+ while (min != max) {
/*
- * first, some sanity checks on the old entry
+ * Claim guard page for entry.
*/
- if (UVM_ET_ISSUBMAP(old_entry))
- panic("fork: encountered a submap during fork (illegal)");
-
- if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
- UVM_ET_ISNEEDSCOPY(old_entry))
- panic("fork: non-copy_on_write map entry marked needs_copy (illegal)");
-
-
- switch (old_entry->inheritance) {
- case MAP_INHERIT_NONE:
- /*
- * drop the mapping
- */
- break;
-
- case MAP_INHERIT_SHARE:
- /*
- * share the mapping: this means we want the old and
- * new entries to share amaps and backing objects.
- */
-
- /*
- * if the old_entry needs a new amap (due to prev fork)
- * then we need to allocate it now so that we have
- * something we own to share with the new_entry. [in
- * other words, we need to clear needs_copy]
- */
-
- if (UVM_ET_ISNEEDSCOPY(old_entry)) {
- /* get our own amap, clears needs_copy */
- amap_copy(old_map, old_entry, M_WAITOK, FALSE,
- 0, 0);
- /* XXXCDC: WAITOK??? */
+ if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
+ VMMAP_FREE_END(entry) == entry->end &&
+ entry->start != entry->end) {
+ if (max - min == 2 * PAGE_SIZE) {
+ /*
+ * If the free-space gap is exactly 2 pages,
+ * we make the guard 2 pages instead of 1.
+ * Because in a guarded map, an area needs
+ * at least 2 pages to allocate from:
+ * one page for the allocation and one for
+ * the guard.
+ */
+ entry->guard = 2 * PAGE_SIZE;
+ min = max;
+ } else {
+ entry->guard = PAGE_SIZE;
+ min += PAGE_SIZE;
}
+ continue;
+ }
- new_entry = uvm_mapent_alloc(new_map, 0);
- /* old_entry -> new_entry */
- uvm_mapent_copy(old_entry, new_entry);
+ /*
+ * Handle the case where entry has a 2-page guard, but the
+ * space after entry is freed.
+ */
+ if (entry != NULL && entry->fspace == 0 &&
+ entry->guard > PAGE_SIZE) {
+ entry->guard = PAGE_SIZE;
+ min = VMMAP_FREE_START(entry);
+ }
- /* new pmap has nothing wired in it */
- new_entry->wired_count = 0;
+ lmax = uvm_map_boundary(map, min, max);
+ free = uvm_map_uaddr(map, min);
+ /*
+ * Entries are merged if they point at the same uvm_free().
+ * Exception to that rule: if min == uvm_maxkaddr, a new
+ * entry is started regardless (otherwise the allocators
+ * will get confused).
+ */
+ if (entry != NULL && free == entfree &&
+ !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
+ min == uvm_maxkaddr)) {
+ KDASSERT(VMMAP_FREE_END(entry) == min);
+ entry->fspace += lmax - min;
+ } else {
/*
- * gain reference to object backing the map (can't
- * be a submap, already checked this case).
+ * Commit entry to free list: it'll not be added to
+ * anymore.
+ * We'll start a new entry and add to that entry
+ * instead.
*/
- if (new_entry->aref.ar_amap)
- /* share reference */
- uvm_map_reference_amap(new_entry, AMAP_SHARED);
-
- if (new_entry->object.uvm_obj &&
- new_entry->object.uvm_obj->pgops->pgo_reference)
- new_entry->object.uvm_obj->
- pgops->pgo_reference(
- new_entry->object.uvm_obj);
-
- /* insert entry at end of new_map's entry list */
- uvm_map_entry_link(new_map, new_map->header.prev,
- new_entry);
-
- /*
- * pmap_copy the mappings: this routine is optional
- * but if it is there it will reduce the number of
- * page faults in the new proc.
- */
-
- pmap_copy(new_pmap, old_map->pmap, new_entry->start,
- (old_entry->end - old_entry->start),
- old_entry->start);
+ if (entry != NULL && entfree != NULL)
+ uvm_mapent_free_insert(map, entfree, entry);
+
+ /* New entry for new uaddr. */
+ entry = uvm_mapent_alloc(map, flags);
+ KDASSERT(entry != NULL);
+ entry->end = entry->start = min;
+ entry->guard = 0;
+ entry->fspace = lmax - min;
+ entry->object.uvm_obj = NULL;
+ entry->offset = 0;
+ entry->etype = 0;
+ entry->protection = entry->max_protection = 0;
+ entry->inheritance = 0;
+ entry->wired_count = 0;
+ entry->advice = 0;
+ entry->aref.ar_pageoff = 0;
+ entry->aref.ar_amap = NULL;
+ uvm_mapent_addr_insert(map, entry);
+
+ entfree = free;
+ }
- break;
+ min = lmax;
+ }
+ /* Finally put entry on the uaddr state. */
+ if (entry != NULL && entfree != NULL)
+ uvm_mapent_free_insert(map, entfree, entry);
- case MAP_INHERIT_COPY:
+ return entry;
+}
- /*
- * copy-on-write the mapping (using mmap's
- * MAP_PRIVATE semantics)
- *
- * allocate new_entry, adjust reference counts.
- * (note that new references are read-only).
- */
+/*
+ * MQuery style of allocation.
+ *
+ * This allocator searches forward until sufficient space is found to map
+ * the given size.
+ *
+ * XXX: factor in offset (via pmap_prefer) and protection?
+ */
+int
+uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
+ int flags)
+{
+ struct vm_map_entry *entry, *last;
+ vaddr_t addr;
+ vaddr_t tmp, pmap_align, pmap_offset;
+ int error;
- new_entry = uvm_mapent_alloc(new_map, 0);
- /* old_entry -> new_entry */
- uvm_mapent_copy(old_entry, new_entry);
+ addr = *addr_p;
+ vm_map_lock_read(map);
- if (new_entry->aref.ar_amap)
- uvm_map_reference_amap(new_entry, 0);
+ /*
+ * Configure pmap prefer.
+ */
+ if (offset != UVM_UNKNOWN_OFFSET) {
+ pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
+ pmap_offset = PMAP_PREFER_OFFSET(offset);
+ } else {
+ pmap_align = PAGE_SIZE;
+ pmap_offset = 0;
+ }
- if (new_entry->object.uvm_obj &&
- new_entry->object.uvm_obj->pgops->pgo_reference)
- new_entry->object.uvm_obj->pgops->pgo_reference
- (new_entry->object.uvm_obj);
+ /*
+ * Align address to pmap_prefer unless FLAG_FIXED is set.
+ */
+ if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
+ tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
+ if (tmp < addr)
+ tmp += pmap_align;
+ addr = tmp;
+ }
- /* new pmap has nothing wired in it */
- new_entry->wired_count = 0;
+ /*
+ * First, check if the requested range is fully available.
+ */
+ entry = uvm_map_entrybyaddr(&map->addr, addr);
+ last = NULL;
+ if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
+ error = 0;
+ goto out;
+ }
+ if (flags & UVM_FLAG_FIXED) {
+ error = EINVAL;
+ goto out;
+ }
- new_entry->etype |=
- (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
- uvm_map_entry_link(new_map, new_map->header.prev,
- new_entry);
+ error = ENOMEM; /* Default error from here. */
- /*
- * the new entry will need an amap. it will either
- * need to be copied from the old entry or created
- * from scratch (if the old entry does not have an
- * amap). can we defer this process until later
- * (by setting "needs_copy") or do we need to copy
- * the amap now?
- *
- * we must copy the amap now if any of the following
- * conditions hold:
- * 1. the old entry has an amap and that amap is
- * being shared. this means that the old (parent)
- * process is sharing the amap with another
- * process. if we do not clear needs_copy here
- * we will end up in a situation where both the
- * parent and child process are referring to the
- * same amap with "needs_copy" set. if the
- * parent write-faults, the fault routine will
- * clear "needs_copy" in the parent by allocating
- * a new amap. this is wrong because the
- * parent is supposed to be sharing the old amap
- * and the new amap will break that.
- *
- * 2. if the old entry has an amap and a non-zero
- * wire count then we are going to have to call
- * amap_cow_now to avoid page faults in the
- * parent process. since amap_cow_now requires
- * "needs_copy" to be clear we might as well
- * clear it here as well.
- *
- */
+ /*
+ * At this point, the memory at <addr, sz> is not available.
+ * The reasons are:
+ * [1] it's outside the map,
+ * [2] it starts in used memory (and therefore needs to move
+ * toward the first free page in entry),
+ * [3] it starts in free memory but bumps into used memory.
+ *
+ * Note that for case [2], the forward moving is handled by the
+ * for loop below.
+ */
- if (old_entry->aref.ar_amap != NULL) {
+ if (entry == NULL) {
+ /* [1] Outside the map. */
+ if (addr >= map->max_offset)
+ goto out;
+ else
+ entry = RB_MIN(uvm_map_addr, &map->addr);
+ } else if (VMMAP_FREE_START(entry) <= addr) {
+ /* [3] Bumped into used memory. */
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
+ }
- if ((amap_flags(old_entry->aref.ar_amap) &
- AMAP_SHARED) != 0 ||
- VM_MAPENT_ISWIRED(old_entry)) {
+ /*
+ * Test if the next entry is sufficient for the allocation.
+ */
+ for (; entry != NULL;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
+ if (entry->fspace == 0)
+ continue;
+ addr = VMMAP_FREE_START(entry);
- amap_copy(new_map, new_entry, M_WAITOK, FALSE,
- 0, 0);
- /* XXXCDC: M_WAITOK ... ok? */
- }
- }
+restart: /* Restart address checks on address change. */
- /*
- * if the parent's entry is wired down, then the
- * parent process does not want page faults on
- * access to that memory. this means that we
- * cannot do copy-on-write because we can't write
- * protect the old entry. in this case we
- * resolve all copy-on-write faults now, using
- * amap_cow_now. note that we have already
- * allocated any needed amap (above).
- */
+ tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
+ if (tmp < addr)
+ tmp += pmap_align;
+ addr = tmp;
+ if (addr >= VMMAP_FREE_END(entry))
+ continue;
- if (VM_MAPENT_ISWIRED(old_entry)) {
-
- /*
- * resolve all copy-on-write faults now
- * (note that there is nothing to do if
- * the old mapping does not have an amap).
- * XXX: is it worthwhile to bother with pmap_copy
- * in this case?
- */
- if (old_entry->aref.ar_amap)
- amap_cow_now(new_map, new_entry);
-
- } else {
-
- /*
- * setup mappings to trigger copy-on-write faults
- * we must write-protect the parent if it has
- * an amap and it is not already "needs_copy"...
- * if it is already "needs_copy" then the parent
- * has already been write-protected by a previous
- * fork operation.
- *
- * if we do not write-protect the parent, then
- * we must be sure to write-protect the child
- * after the pmap_copy() operation.
- *
- * XXX: pmap_copy should have some way of telling
- * us that it didn't do anything so we can avoid
- * calling pmap_protect needlessly.
- */
-
- if (old_entry->aref.ar_amap) {
-
- if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
- if (old_entry->max_protection & VM_PROT_WRITE) {
- pmap_protect(old_map->pmap,
- old_entry->start,
- old_entry->end,
- old_entry->protection &
- ~VM_PROT_WRITE);
- pmap_update(old_map->pmap);
-
- }
- old_entry->etype |= UVM_ET_NEEDSCOPY;
- }
-
- /*
- * parent must now be write-protected
- */
- protect_child = FALSE;
- } else {
-
- /*
- * we only need to protect the child if the
- * parent has write access.
- */
- if (old_entry->max_protection & VM_PROT_WRITE)
- protect_child = TRUE;
- else
- protect_child = FALSE;
-
- }
-
- /*
- * copy the mappings
- * XXX: need a way to tell if this does anything
- */
-
- pmap_copy(new_pmap, old_map->pmap,
- new_entry->start,
- (old_entry->end - old_entry->start),
- old_entry->start);
-
- /*
- * protect the child's mappings if necessary
- */
- if (protect_child) {
- pmap_protect(new_pmap, new_entry->start,
- new_entry->end,
- new_entry->protection &
- ~VM_PROT_WRITE);
- }
+ /*
+ * Skip brk() allocation addresses.
+ */
+ if (addr + sz > map->b_start && addr < map->b_end) {
+ if (VMMAP_FREE_END(entry) > map->b_end) {
+ addr = map->b_end;
+ goto restart;
+ } else
+ continue;
+ }
+ /*
+ * Skip stack allocation addresses.
+ */
+ if (addr + sz > map->s_start && addr < map->s_end) {
+ if (VMMAP_FREE_END(entry) > map->s_end) {
+ addr = map->s_end;
+ goto restart;
+ } else
+ continue;
+ }
- }
- break;
- } /* end of switch statement */
- old_entry = old_entry->next;
+ last = NULL;
+ if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
+ error = 0;
+ goto out;
+ }
}
- new_map->size = old_map->size;
- vm_map_unlock(old_map);
-
-#ifdef SYSVSHM
- if (vm1->vm_shm)
- shmfork(vm1, vm2);
-#endif
-
-#ifdef PMAP_FORK
- pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
-#endif
-
- return(vm2);
+out:
+ vm_map_unlock_read(map);
+ if (error == 0)
+ *addr_p = addr;
+ return error;
}
-#if defined(DDB)
-
/*
- * DDB hooks
- */
-
-/*
- * uvm_map_printit: actually prints the map
+ * Determine allocation bias.
+ *
+ * Returns 1 if we should bias to high addresses, -1 for a bias towards low
+ * addresses, or 0 for no bias.
+ * The bias mechanism is intended to avoid clashing with brk() and stack
+ * areas.
*/
-
-void
-uvm_map_printit(struct vm_map *map, boolean_t full,
- int (*pr)(const char *, ...))
+int
+uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
{
- struct vm_map_entry *entry;
+ vaddr_t start, end;
- (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
- (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n",
- map->nentries, map->size, map->ref_count, map->timestamp,
- map->flags);
-#ifdef pmap_resident_count
- (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
- pmap_resident_count(map->pmap));
+ start = VMMAP_FREE_START(entry);
+ end = VMMAP_FREE_END(entry);
+
+ /*
+ * Stay at the top of brk() area.
+ */
+ if (end >= map->b_start && start < map->b_end)
+ return 1;
+ /*
+ * Stay at the far end of the stack area.
+ */
+ if (end >= map->s_start && start < map->s_end) {
+#ifdef MACHINE_STACK_GROWS_UP
+ return 1;
#else
- /* XXXCDC: this should be required ... */
- (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
+ return -1;
#endif
- if (!full)
- return;
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
- (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
- entry, entry->start, entry->end, entry->object.uvm_obj,
- (long long)entry->offset, entry->aref.ar_amap,
- entry->aref.ar_pageoff);
- (*pr)(
- "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
- "wc=%d, adv=%d\n",
- (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
- (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
- (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
- entry->protection, entry->max_protection,
- entry->inheritance, entry->wired_count, entry->advice);
}
-}
-/*
- * uvm_object_printit: actually prints the object
- */
+ /*
+ * No bias, this area is meant for us.
+ */
+ return 0;
+}
-void
-uvm_object_printit(struct uvm_object *uobj, boolean_t full,
- int (*pr)(const char *, ...))
-{
- struct vm_page *pg;
- int cnt = 0;
- (*pr)("OBJECT %p: pgops=%p, npages=%d, ",
- uobj, uobj->pgops, uobj->uo_npages);
- if (UVM_OBJ_IS_KERN_OBJECT(uobj))
- (*pr)("refs=<SYSTEM>\n");
- else
- (*pr)("refs=%d\n", uobj->uo_refs);
+boolean_t
+vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
+{
+ boolean_t rv;
- if (!full) {
- return;
- }
- (*pr)(" PAGES <pg,offset>:\n ");
- RB_FOREACH(pg, uvm_objtree, &uobj->memt) {
- (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
- if ((cnt % 3) == 2) {
- (*pr)("\n ");
+ if (map->flags & VM_MAP_INTRSAFE) {
+ rv = TRUE;
+ } else {
+ if (map->flags & VM_MAP_BUSY) {
+ return (FALSE);
}
- cnt++;
+ rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
}
- if ((cnt % 3) != 2) {
- (*pr)("\n");
+
+ if (rv) {
+ map->timestamp++;
+ LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
+ uvm_tree_sanity(map, file, line);
+ uvm_tree_size_chk(map, file, line);
}
-}
-/*
- * uvm_page_printit: actually print the page
- */
+ return (rv);
+}
-static const char page_flagbits[] =
- "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
- "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0"
- "\31PMAP1\32PMAP2\33PMAP3";
+void
+vm_map_lock_ln(struct vm_map *map, char *file, int line)
+{
+ if ((map->flags & VM_MAP_INTRSAFE) == 0) {
+ do {
+ while (map->flags & VM_MAP_BUSY) {
+ map->flags |= VM_MAP_WANTLOCK;
+ tsleep(&map->flags, PVM, (char *)vmmapbsy, 0);
+ }
+ } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
+ }
+
+ map->timestamp++;
+ LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
+ uvm_tree_sanity(map, file, line);
+ uvm_tree_size_chk(map, file, line);
+}
void
-uvm_page_printit(struct vm_page *pg, boolean_t full,
- int (*pr)(const char *, ...))
+vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
{
- struct vm_page *tpg;
- struct uvm_object *uobj;
- struct pglist *pgl;
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ rw_enter_read(&map->lock);
+ LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
+ uvm_tree_sanity(map, file, line);
+ uvm_tree_size_chk(map, file, line);
+}
- (*pr)("PAGE %p:\n", pg);
- (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
- pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
- (long long)pg->phys_addr);
- (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
- pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
-#if defined(UVM_PAGE_TRKOWN)
- if (pg->pg_flags & PG_BUSY)
- (*pr)(" owning process = %d, tag=%s",
- pg->owner, pg->owner_tag);
- else
- (*pr)(" page not busy, no owner");
-#else
- (*pr)(" [page ownership tracking disabled]");
-#endif
-#ifdef __HAVE_VM_PAGE_MD
- (*pr)("\tvm_page_md %p\n", &pg->mdpage);
-#else
- (*pr)("\n");
-#endif
+void
+vm_map_unlock_ln(struct vm_map *map, char *file, int line)
+{
+ uvm_tree_sanity(map, file, line);
+ uvm_tree_size_chk(map, file, line);
+ LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ rw_exit(&map->lock);
+}
- if (!full)
- return;
+void
+vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
+{
+ /* XXX: RO */ uvm_tree_sanity(map, file, line);
+ /* XXX: RO */ uvm_tree_size_chk(map, file, line);
+ LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ rw_exit_read(&map->lock);
+}
- /* cross-verify object/anon */
- if ((pg->pg_flags & PQ_FREE) == 0) {
- if (pg->pg_flags & PQ_ANON) {
- if (pg->uanon == NULL || pg->uanon->an_page != pg)
- (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
- (pg->uanon) ? pg->uanon->an_page : NULL);
- else
- (*pr)(" anon backpointer is OK\n");
- } else {
- uobj = pg->uobject;
- if (uobj) {
- (*pr)(" checking object list\n");
- RB_FOREACH(tpg, uvm_objtree, &uobj->memt) {
- if (tpg == pg) {
- break;
- }
- }
- if (tpg)
- (*pr)(" page found on object list\n");
- else
- (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
- }
- }
- }
+void
+vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
+{
+ uvm_tree_sanity(map, file, line);
+ uvm_tree_size_chk(map, file, line);
+ LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
+ LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
+ if ((map->flags & VM_MAP_INTRSAFE) == 0)
+ rw_enter(&map->lock, RW_DOWNGRADE);
+}
- /* cross-verify page queue */
- if (pg->pg_flags & PQ_FREE) {
- if (uvm_pmr_isfree(pg))
- printf(" page found in uvm_pmemrange\n");
- else
- printf(" >>> page not found in uvm_pmemrange <<<\n");
- pgl = NULL;
- } else if (pg->pg_flags & PQ_INACTIVE) {
- pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
- &uvm.page_inactive_swp : &uvm.page_inactive_obj;
- } else if (pg->pg_flags & PQ_ACTIVE) {
- pgl = &uvm.page_active;
- } else {
- pgl = NULL;
+void
+vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
+{
+ /* XXX: RO */ uvm_tree_sanity(map, file, line);
+ /* XXX: RO */ uvm_tree_size_chk(map, file, line);
+ LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
+ if ((map->flags & VM_MAP_INTRSAFE) == 0) {
+ rw_exit_read(&map->lock);
+ rw_enter_write(&map->lock);
}
+ LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
+ uvm_tree_sanity(map, file, line);
+}
- if (pgl) {
- (*pr)(" checking pageq list\n");
- TAILQ_FOREACH(tpg, pgl, pageq) {
- if (tpg == pg) {
- break;
- }
- }
- if (tpg)
- (*pr)(" page found on pageq list\n");
- else
- (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
- }
+void
+vm_map_busy_ln(struct vm_map *map, char *file, int line)
+{
+ map->flags |= VM_MAP_BUSY;
}
-#endif
+
+void
+vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
+{
+ int oflags;
+
+ oflags = map->flags;
+ map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
+ if (oflags & VM_MAP_WANTLOCK)
+ wakeup(&map->flags);
+}
+
+
+RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
+ uvm_mapentry_addrcmp);
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index c416cc51d23..e0e21267e31 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,7 +1,22 @@
-/* $OpenBSD: uvm_map.h,v 1.46 2011/06/06 17:10:23 ariane Exp $ */
+/* $OpenBSD: uvm_map.h,v 1.47 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */
-/*
+/*
+ * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ *
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* Copyright (c) 1991, 1993, The Regents of the University of California.
*
@@ -75,14 +90,28 @@
#ifdef _KERNEL
/*
+ * Internal functions.
+ *
+ * Required by clipping macros.
+ */
+void uvm_map_clip_end(struct vm_map*, struct vm_map_entry*,
+ vaddr_t);
+void uvm_map_clip_start(struct vm_map*,
+ struct vm_map_entry*, vaddr_t);
+
+/*
* UVM_MAP_CLIP_START: ensure that the entry begins at or after
* the starting address, if it doesn't we split the entry.
*
* => map must be locked by caller
*/
-#define UVM_MAP_CLIP_START(MAP,ENTRY,VA) { \
- if ((VA) > (ENTRY)->start) uvm_map_clip_start(MAP,ENTRY,VA); }
+#define UVM_MAP_CLIP_START(_map, _entry, _addr) \
+ do { \
+ KASSERT((_entry)->end + (_entry)->fspace > (_addr)); \
+ if ((_entry)->start < (_addr)) \
+ uvm_map_clip_start((_map), (_entry), (_addr)); \
+ } while (0)
/*
* UVM_MAP_CLIP_END: ensure that the entry ends at or before
@@ -91,15 +120,16 @@
* => map must be locked by caller
*/
-#define UVM_MAP_CLIP_END(MAP,ENTRY,VA) { \
- if ((VA) < (ENTRY)->end) uvm_map_clip_end(MAP,ENTRY,VA); }
+#define UVM_MAP_CLIP_END(_map, _entry, _addr) \
+ do { \
+ KASSERT((_entry)->start < (_addr)); \
+ if ((_entry)->end > (_addr)) \
+ uvm_map_clip_end((_map), (_entry), (_addr)); \
+ } while (0)
/*
* extract flags
*/
-#define UVM_EXTRACT_REMOVE 0x1 /* remove mapping from old map */
-#define UVM_EXTRACT_CONTIG 0x2 /* try to keep it contig */
-#define UVM_EXTRACT_QREF 0x4 /* use quick refs */
#define UVM_EXTRACT_FIXPROT 0x8 /* set prot to maxprot as we go */
#endif /* _KERNEL */
@@ -133,21 +163,34 @@ union vm_map_object {
* Also included is control information for virtual copy operations.
*/
struct vm_map_entry {
- RB_ENTRY(vm_map_entry) rb_entry; /* tree information */
- vaddr_t ownspace; /* free space after */
- vaddr_t space; /* space in subtree */
- struct vm_map_entry *prev; /* previous entry */
- struct vm_map_entry *next; /* next entry */
+ union {
+ RB_ENTRY(vm_map_entry) addr_entry; /* address tree */
+ } daddrs;
+
+ union {
+ RB_ENTRY(vm_map_entry) rbtree; /* Link freespace tree. */
+ TAILQ_ENTRY(vm_map_entry) tailq;/* Link freespace queue. */
+ TAILQ_ENTRY(vm_map_entry) deadq;/* dead entry queue */
+ } dfree;
+
+#define uvm_map_entry_start_copy start
vaddr_t start; /* start address */
vaddr_t end; /* end address */
+
+ vsize_t guard; /* bytes in guard */
+ vsize_t fspace; /* free space */
+
union vm_map_object object; /* object I point to */
voff_t offset; /* offset into object */
+ struct vm_aref aref; /* anonymous overlay */
+
int etype; /* entry type */
+
vm_prot_t protection; /* protection code */
vm_prot_t max_protection; /* maximum protection */
vm_inherit_t inheritance; /* inheritance */
+
int wired_count; /* can be paged if == 0 */
- struct vm_aref aref; /* anonymous overlay */
int advice; /* madvise advice */
#define uvm_map_entry_stop_copy flags
u_int8_t flags; /* flags */
@@ -156,18 +199,19 @@ struct vm_map_entry {
#define UVM_MAP_KMEM 0x02 /* from kmem entry pool */
};
-/*
- * Marks the map entry as a guard page, using vm_map_entry.etype.
- */
-#define MAP_ET_KVAGUARD 0x10 /* guard entry */
-
#define VM_MAPENT_ISWIRED(entry) ((entry)->wired_count != 0)
+TAILQ_HEAD(uvm_map_deadq, vm_map_entry); /* dead entry queue */
+RB_HEAD(uvm_map_addr, vm_map_entry);
+RB_PROTOTYPE(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
+ uvm_mapentry_addrcmp);
+
/*
- * Maps are doubly-linked lists of map entries, kept sorted
- * by address. A single hint is provided to start
- * searches again from the last successful search,
- * insertion, or removal.
+ * A Map is a rbtree of map entries, kept sorted by address.
+ * In addition, free space entries are also kept in a rbtree,
+ * indexed by free size.
+ *
+ *
*
* LOCKING PROTOCOL NOTES:
* -----------------------
@@ -214,23 +258,80 @@ struct vm_map_entry {
* is busy, and thread is attempting
* to write-lock. must be tested
* while `flags_lock' is asserted.
+ *
+ * VM_MAP_GUARDPAGES r/o; must be specified at map
+ * initialization time.
+ * If set, guards will appear between
+ * automatic allocations.
+ * No locking required.
+ *
+ * VM_MAP_ISVMSPACE r/o; set by uvmspace_alloc.
+ * Signifies that this map is a vmspace.
+ * (The implementation treats all maps
+ * without this bit as kernel maps.)
+ * No locking required.
+ *
+ *
+ * All automatic allocations (uvm_map without MAP_FIXED) will allocate
+ * from vm_map.free.
+ * If that allocation fails:
+ * - vmspace maps will spill over into vm_map.bfree,
+ * - all other maps will call uvm_map_kmem_grow() to increase the arena.
+ *
+ * vmspace maps have their data, brk() and stack arenas automatically
+ * updated when uvm_map() is invoked without MAP_FIXED.
+ * The spill over arena (vm_map.bfree) will contain the space in the brk()
+ * and stack ranges.
+ * Kernel maps never have a bfree arena and this tree will always be empty.
+ *
+ *
+ * read_locks and write_locks are used in lock debugging code.
*/
struct vm_map {
struct pmap * pmap; /* Physical map */
struct rwlock lock; /* Lock for map data */
- RB_HEAD(uvm_tree, vm_map_entry) rbhead; /* Tree for entries */
- struct vm_map_entry header; /* List of entries */
- int nentries; /* Number of entries */
+
+ struct uvm_map_addr addr; /* Entry tree, by addr */
+
vsize_t size; /* virtual size */
int ref_count; /* Reference count */
simple_lock_data_t ref_lock; /* Lock for ref_count field */
- vm_map_entry_t hint; /* hint for quick lookups */
- simple_lock_data_t hint_lock; /* lock for hint storage */
- vm_map_entry_t first_free; /* First free space hint */
int flags; /* flags */
unsigned int timestamp; /* Version number */
-#define min_offset header.start
-#define max_offset header.end
+
+ vaddr_t min_offset; /* First address in map. */
+ vaddr_t max_offset; /* Last address in map. */
+
+ /*
+ * Allocation overflow regions.
+ */
+ vaddr_t b_start; /* Start for brk() alloc. */
+ vaddr_t b_end; /* End for brk() alloc. */
+ vaddr_t s_start; /* Start for stack alloc. */
+ vaddr_t s_end; /* End for stack alloc. */
+
+ /*
+ * Special address selectors.
+ *
+ * The uaddr_exe mapping is used if:
+ * - protX is selected
+ * - the pointer is not NULL
+ *
+ * If uaddr_exe is not used, the other mappings are checked in
+ * order of appearance.
+ * If a hint is given, the selection will only be used if the hint
+ * falls in the range described by the mapping.
+ *
+ * The states are pointers because:
+ * - they may not all be in use
+ * - the struct size for different schemes is variable
+ *
+ * The uaddr_brk_stack selector will select addresses that are in
+ * the brk/stack area of the map.
+ */
+ struct uvm_addr_state *uaddr_exe; /* Executable selector. */
+ struct uvm_addr_state *uaddr_any[4]; /* More selectors. */
+ struct uvm_addr_state *uaddr_brk_stack; /* Brk/stack selector. */
};
/* vm_map flags */
@@ -239,11 +340,13 @@ struct vm_map {
#define VM_MAP_WIREFUTURE 0x04 /* rw: wire future mappings */
#define VM_MAP_BUSY 0x08 /* rw: map is busy */
#define VM_MAP_WANTLOCK 0x10 /* rw: want to write-lock */
+#define VM_MAP_GUARDPAGES 0x20 /* rw: add guard pgs to map */
+#define VM_MAP_ISVMSPACE 0x40 /* ro: map is a vmspace */
/* XXX: number of kernel maps and entries to statically allocate */
#if !defined(MAX_KMAPENT)
-#define MAX_KMAPENT 1024 /* XXXCDC: no crash */
+#define MAX_KMAPENT 1024 /* Sufficient to make it to the scheduler. */
#endif /* !defined MAX_KMAPENT */
#ifdef _KERNEL
@@ -268,9 +371,7 @@ struct vm_map_intrsafe {
#ifdef _KERNEL
-#ifdef PMAP_GROWKERNEL
extern vaddr_t uvm_maxkaddr;
-#endif
/*
* protos: the following prototypes define the interface to vm_map
@@ -279,32 +380,29 @@ extern vaddr_t uvm_maxkaddr;
void uvm_map_deallocate(vm_map_t);
int uvm_map_clean(vm_map_t, vaddr_t, vaddr_t, int);
-void uvm_map_clip_start(vm_map_t, vm_map_entry_t, vaddr_t);
-void uvm_map_clip_end(vm_map_t, vm_map_entry_t, vaddr_t);
vm_map_t uvm_map_create(pmap_t, vaddr_t, vaddr_t, int);
-int uvm_map_extract(vm_map_t, vaddr_t, vsize_t,
- vm_map_t, vaddr_t *, int);
-vm_map_entry_t uvm_map_findspace(vm_map_t, vaddr_t, vsize_t, vaddr_t *,
- struct uvm_object *, voff_t, vsize_t, int);
+int uvm_map_extract(struct vm_map*, vaddr_t, vsize_t, vaddr_t*,
+ int);
vaddr_t uvm_map_pie(vaddr_t);
-#define uvm_map_hint(p, prot) uvm_map_hint1(p, prot, 1)
-vaddr_t uvm_map_hint1(struct proc *, vm_prot_t, int);
+vaddr_t uvm_map_hint(struct vmspace *, vm_prot_t);
int uvm_map_inherit(vm_map_t, vaddr_t, vaddr_t, vm_inherit_t);
int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int);
void uvm_map_init(void);
boolean_t uvm_map_lookup_entry(vm_map_t, vaddr_t, vm_map_entry_t *);
-void uvm_map_reference(vm_map_t);
int uvm_map_replace(vm_map_t, vaddr_t, vaddr_t,
vm_map_entry_t, int);
int uvm_map_reserve(vm_map_t, vsize_t, vaddr_t, vsize_t,
vaddr_t *);
void uvm_map_setup(vm_map_t, vaddr_t, vaddr_t, int);
int uvm_map_submap(vm_map_t, vaddr_t, vaddr_t, vm_map_t);
-#define uvm_unmap(_m, _s, _e) uvm_unmap_p(_m, _s, _e, 0)
-void uvm_unmap_p(vm_map_t, vaddr_t, vaddr_t, struct proc *);
-void uvm_unmap_detach(vm_map_entry_t,int);
-void uvm_unmap_remove(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t *,
- struct proc *, boolean_t);
+void uvm_unmap(vm_map_t, vaddr_t, vaddr_t);
+void uvm_map_set_uaddr(struct vm_map*, struct uvm_addr_state**,
+ struct uvm_addr_state*);
+int uvm_map_mquery(struct vm_map*, vaddr_t*, vsize_t, voff_t, int);
+
+void uvm_unmap_detach(struct uvm_map_deadq*, int);
+void uvm_unmap_remove(struct vm_map*, vaddr_t, vaddr_t,
+ struct uvm_map_deadq*, boolean_t, boolean_t);
#endif /* _KERNEL */
@@ -337,82 +435,45 @@ void uvm_unmap_remove(vm_map_t, vaddr_t, vaddr_t, vm_map_entry_t *,
*/
#ifdef _KERNEL
-/* XXX: clean up later */
+/*
+ * XXX: clean up later
+ * Half the kernel seems to depend on them being included here.
+ */
#include <sys/time.h>
-#include <sys/systm.h> /* for panic() */
-
-static __inline boolean_t vm_map_lock_try(vm_map_t);
-static __inline void vm_map_lock(vm_map_t);
-extern const char vmmapbsy[];
-
-static __inline boolean_t
-vm_map_lock_try(struct vm_map *map)
-{
- boolean_t rv;
-
- if (map->flags & VM_MAP_INTRSAFE) {
- rv = TRUE;
- } else {
- if (map->flags & VM_MAP_BUSY) {
- return (FALSE);
- }
- rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
- }
-
- if (rv)
- map->timestamp++;
-
- return (rv);
-}
-
-static __inline void
-vm_map_lock(struct vm_map *map)
-{
- if (map->flags & VM_MAP_INTRSAFE)
- return;
-
- do {
- while (map->flags & VM_MAP_BUSY) {
- map->flags |= VM_MAP_WANTLOCK;
- tsleep(&map->flags, PVM, (char *)vmmapbsy, 0);
- }
- } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
-
- map->timestamp++;
-}
-
-#define vm_map_lock_read(map) rw_enter_read(&(map)->lock)
-
-#define vm_map_unlock(map) \
-do { \
- if (((map)->flags & VM_MAP_INTRSAFE) == 0) \
- rw_exit(&(map)->lock); \
-} while (0)
-
-#define vm_map_unlock_read(map) rw_exit_read(&(map)->lock)
-
-#define vm_map_downgrade(map) rw_enter(&(map)->lock, RW_DOWNGRADE)
-
-#define vm_map_upgrade(map) \
-do { \
- rw_exit_read(&(map)->lock); \
- rw_enter_write(&(map)->lock); \
-} while (0)
-
-#define vm_map_busy(map) \
-do { \
- (map)->flags |= VM_MAP_BUSY; \
-} while (0)
+#include <sys/systm.h> /* for panic() */
+
+boolean_t vm_map_lock_try_ln(struct vm_map*, char*, int);
+void vm_map_lock_ln(struct vm_map*, char*, int);
+void vm_map_lock_read_ln(struct vm_map*, char*, int);
+void vm_map_unlock_ln(struct vm_map*, char*, int);
+void vm_map_unlock_read_ln(struct vm_map*, char*, int);
+void vm_map_downgrade_ln(struct vm_map*, char*, int);
+void vm_map_upgrade_ln(struct vm_map*, char*, int);
+void vm_map_busy_ln(struct vm_map*, char*, int);
+void vm_map_unbusy_ln(struct vm_map*, char*, int);
+
+#ifdef DIAGNOSTIC
+#define vm_map_lock_try(map) vm_map_lock_try_ln(map, __FILE__, __LINE__)
+#define vm_map_lock(map) vm_map_lock_ln(map, __FILE__, __LINE__)
+#define vm_map_lock_read(map) vm_map_lock_read_ln(map, __FILE__, __LINE__)
+#define vm_map_unlock(map) vm_map_unlock_ln(map, __FILE__, __LINE__)
+#define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, __FILE__, __LINE__)
+#define vm_map_downgrade(map) vm_map_downgrade_ln(map, __FILE__, __LINE__)
+#define vm_map_upgrade(map) vm_map_upgrade_ln(map, __FILE__, __LINE__)
+#define vm_map_busy(map) vm_map_busy_ln(map, __FILE__, __LINE__)
+#define vm_map_unbusy(map) vm_map_unbusy_ln(map, __FILE__, __LINE__)
+#else
+#define vm_map_lock_try(map) vm_map_lock_try_ln(map, NULL, 0)
+#define vm_map_lock(map) vm_map_lock_ln(map, NULL, 0)
+#define vm_map_lock_read(map) vm_map_lock_read_ln(map, NULL, 0)
+#define vm_map_unlock(map) vm_map_unlock_ln(map, NULL, 0)
+#define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, NULL, 0)
+#define vm_map_downgrade(map) vm_map_downgrade_ln(map, NULL, 0)
+#define vm_map_upgrade(map) vm_map_upgrade_ln(map, NULL, 0)
+#define vm_map_busy(map) vm_map_busy_ln(map, NULL, 0)
+#define vm_map_unbusy(map) vm_map_unbusy_ln(map, NULL, 0)
+#endif
-#define vm_map_unbusy(map) \
-do { \
- int oflags; \
- \
- oflags = (map)->flags; \
- (map)->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); \
- if (oflags & VM_MAP_WANTLOCK) \
- wakeup(&(map)->flags); \
-} while (0)
#endif /* _KERNEL */
/*
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index a96deed4052..6817224e3ce 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_mmap.c,v 1.87 2011/07/09 05:31:26 matthew Exp $ */
+/* $OpenBSD: uvm_mmap.c,v 1.88 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -142,43 +142,17 @@ sys_mquery(struct proc *p, void *v, register_t *retval)
} else {
fp = NULL;
uobj = NULL;
- uoff = 0;
+ uoff = UVM_UNKNOWN_OFFSET;
}
if (vaddr == 0)
- vaddr = uvm_map_hint(p, prot);
+ vaddr = uvm_map_hint(p->p_vmspace, prot);
- /* prevent a user requested address from falling in heap space */
- if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
- (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) {
- if (flags & UVM_FLAG_FIXED) {
- error = EINVAL;
- goto done;
- }
- vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ);
- }
- vm_map_lock(&p->p_vmspace->vm_map);
-
-again:
- if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size,
- &vaddr, uobj, uoff, 0, flags) == NULL) {
- if (flags & UVM_FLAG_FIXED)
- error = EINVAL;
- else
- error = ENOMEM;
- } else {
- /* prevent a returned address from falling in heap space */
- if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr)
- && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) {
- vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
- BRKSIZ);
- goto again;
- }
- error = 0;
+ error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff,
+ flags);
+ if (error == 0)
*retval = (register_t)(vaddr);
- }
- vm_map_unlock(&p->p_vmspace->vm_map);
-done:
+
if (fp != NULL)
FRELE(fp);
return (error);
@@ -202,7 +176,7 @@ sys_mincore(struct proc *p, void *v, register_t *retval)
struct uvm_object *uobj;
struct vm_amap *amap;
struct vm_anon *anon;
- vm_map_entry_t entry;
+ vm_map_entry_t entry, next;
vaddr_t start, end, lim;
vm_map_t map;
vsize_t len, npgs;
@@ -251,15 +225,16 @@ sys_mincore(struct proc *p, void *v, register_t *retval)
}
for (/* nothing */;
- entry != &map->header && entry->start < end;
- entry = entry->next) {
+ entry != NULL && entry->start < end;
+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
KASSERT(!UVM_ET_ISSUBMAP(entry));
KASSERT(start >= entry->start);
/* Make sure there are no holes. */
+ next = RB_NEXT(uvm_map_addr, &map->addr, entry);
if (entry->end < end &&
- (entry->next == &map->header ||
- entry->next->start > entry->end)) {
+ (next == NULL ||
+ next->start > entry->end)) {
error = ENOMEM;
goto out;
}
@@ -412,17 +387,6 @@ sys_mmap(struct proc *p, void *v, register_t *retval)
if (vm_min_address > 0 && addr < vm_min_address)
return (EINVAL);
- } else {
-
- /*
- * not fixed: make sure we skip over the largest possible heap.
- * we will refine our guess later (e.g. to account for VAC, etc)
- */
- if (addr == 0)
- addr = uvm_map_hint(p, prot);
- else if (!(flags & MAP_TRYFIXED) &&
- addr < (vaddr_t)p->p_vmspace->vm_daddr)
- addr = uvm_map_hint(p, prot);
}
/*
@@ -565,13 +529,6 @@ sys_mmap(struct proc *p, void *v, register_t *retval)
error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
- if (error == ENOMEM && !(flags & (MAP_FIXED | MAP_TRYFIXED))) {
- /* once more, with feeling */
- addr = uvm_map_hint1(p, prot, 0);
- error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot,
- maxprot, flags, handle, pos,
- p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
- }
if (error == 0)
/* remember to add offset */
@@ -658,7 +615,7 @@ sys_munmap(struct proc *p, void *v, register_t *retval)
vsize_t size, pageoff;
vm_map_t map;
vaddr_t vm_min_address = VM_MIN_ADDRESS;
- struct vm_map_entry *dead_entries;
+ struct uvm_map_deadq dead_entries;
/*
* get syscall args...
@@ -700,12 +657,12 @@ sys_munmap(struct proc *p, void *v, register_t *retval)
/*
* doit!
*/
- uvm_unmap_remove(map, addr, addr + size, &dead_entries, p, FALSE);
+ TAILQ_INIT(&dead_entries);
+ uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE);
vm_map_unlock(map); /* and unlock */
- if (dead_entries != NULL)
- uvm_unmap_detach(dead_entries, 0);
+ uvm_unmap_detach(&dead_entries, 0);
return (0);
}
@@ -1036,7 +993,7 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
if (*addr & PAGE_MASK)
return(EINVAL);
uvmflag |= UVM_FLAG_FIXED;
- uvm_unmap_p(map, *addr, *addr + size, p); /* zap! */
+ uvm_unmap(map, *addr, *addr + size); /* zap! */
}
/*
@@ -1130,7 +1087,7 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
advice, uvmflag);
- error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p);
+ error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
if (error == 0) {
/*
diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c
index 06cbf871e41..f4d4490b853 100644
--- a/sys/uvm/uvm_unix.c
+++ b/sys/uvm/uvm_unix.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_unix.c,v 1.42 2011/06/06 17:10:23 ariane Exp $ */
+/* $OpenBSD: uvm_unix.c,v 1.43 2012/03/09 13:01:29 ariane Exp $ */
/* $NetBSD: uvm_unix.c,v 1.18 2000/09/13 15:00:25 thorpej Exp $ */
/*
@@ -167,9 +167,7 @@ uvm_coredump(struct proc *p, struct vnode *vp, struct ucred *cred,
offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize;
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
-
+ RB_FOREACH(entry, uvm_map_addr, &map->addr) {
/* should never happen for a user process */
if (UVM_ET_ISSUBMAP(entry)) {
panic("uvm_coredump: user process with submap?");
@@ -261,9 +259,7 @@ uvm_coredump_walkmap(struct proc *p, void *iocookie,
vaddr_t top;
int error;
- for (entry = map->header.next; entry != &map->header;
- entry = entry->next) {
-
+ RB_FOREACH(entry, uvm_map_addr, &map->addr) {
state.cookie = cookie;
state.prot = entry->protection;
state.flags = 0;