src - OpenBSD base system

diff options


context:
space:
mode:

author	Ariane van der Steldt <ariane@cvs.openbsd.org>	2012-03-09 13:01:30 +0000
committer	Ariane van der Steldt <ariane@cvs.openbsd.org>	2012-03-09 13:01:30 +0000
commit	193e3efb70083a72f3d299ea5f129cf83d547115 (patch)
tree	e7c9b8d210fe2b29062f1cf3a40c093bdf14800d
parent	dbaaf4ad89f61a154abf6b48600210ec50ecb62c (diff)

New vmmap implementation.

no oks (it is really a pain to review properly) extensively tested, I'm confident it'll be stable 'now is the time' from several icb inhabitants Diff provides: - ability to specify different allocators for different regions/maps - a simpler implementation of the current allocator - currently in compatibility mode: it will generate similar addresses as the old allocator

Diffstat

-rw-r--r--

sys/arch/i386/i386/pmap.c

-rw-r--r--

sys/conf/files

-rw-r--r--

sys/dev/pci/drm/drm_bufs.c

-rw-r--r--

sys/dev/pci/drm/i915_drv.c

-rw-r--r--

sys/kern/exec_elf.c

-rw-r--r--

sys/kern/kern_exec.c

-rw-r--r--

sys/kern/kern_malloc.c

-rw-r--r--

sys/kern/sysv_shm.c

-rw-r--r--

sys/uvm/uvm.h

-rw-r--r--

sys/uvm/uvm_addr.c

1556

-rw-r--r--

sys/uvm/uvm_addr.h

116

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

6656

-rw-r--r--

sys/uvm/uvm_map.h

307

-rw-r--r--

sys/uvm/uvm_mmap.c

-rw-r--r--

sys/uvm/uvm_unix.c

20 files changed, 5935 insertions, 3032 deletions

diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 2cdfba314d7..f8f05cb8b88 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: pmap.c,v 1.156 2012/02/19 17:14:28 kettenis Exp $ */

+/* $OpenBSD: pmap.c,v 1.157 2012/03/09 13:01:28 ariane Exp $ */

/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */

@@ -602,14 +602,16 @@ pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)

vaddr_t va = 0;

vm_map_lock(map);

- for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {

- /*

- * This entry has greater va than the entries before.

- * We need to make it point to the last page, not past it.

- */

+ RB_FOREACH_REVERSE(ent, uvm_map_addr, &map->addr) {

if (ent->protection & VM_PROT_EXECUTE)

- va = trunc_page(ent->end - 1);

+ break;

}

+ /*

+ * This entry has greater va than the entries before.

+ * We need to make it point to the last page, not past it.

+ */

+ if (ent)

+ va = trunc_page(ent->end - 1);

vm_map_unlock(map);

if (va <= pm->pm_hiexec) {

@@ -1244,7 +1246,7 @@ pmap_free_pvpage(void)

{

int s;

struct vm_map *map;

- struct vm_map_entry *dead_entries;

+ struct uvm_map_deadq dead_entries;

struct pv_page *pvp;

s = splvm(); /* protect kmem_map */

@@ -1265,13 +1267,12 @@ pmap_free_pvpage(void)

TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);

/* unmap the page */

- dead_entries = NULL;

+ TAILQ_INIT(&dead_entries);

uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE,

- &dead_entries, NULL, FALSE);

+ &dead_entries, FALSE, TRUE);

vm_map_unlock(map);

- if (dead_entries != NULL)

- uvm_unmap_detach(dead_entries, 0);

+ uvm_unmap_detach(&dead_entries, 0);

pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */

}

diff --git a/sys/conf/files b/sys/conf/files
index 5f12fdfdc26..379b1e2cdb8 100644
--- a/sys/conf/files
+++ b/sys/conf/files

@@ -1,4 +1,4 @@

-# $OpenBSD: files,v 1.533 2011/12/31 17:06:10 jsing Exp $

+# $OpenBSD: files,v 1.534 2012/03/09 13:01:28 ariane Exp $

# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $

# @(#)files.newconf 7.5 (Berkeley) 5/10/93

@@ -987,6 +987,7 @@ file nnpfs/nnpfs_vfsops-openbsd.c nnpfs

file nnpfs/nnpfs_vnodeops-bsd.c nnpfs

file nnpfs/nnpfs_vnodeops-common.c nnpfs

file nnpfs/nnpfs_syscalls-dummy.c !nnpfs

+file uvm/uvm_addr.c

file uvm/uvm_amap.c

file uvm/uvm_anon.c

file uvm/uvm_aobj.c

diff --git a/sys/dev/pci/drm/drm_bufs.c b/sys/dev/pci/drm/drm_bufs.c
index 08df8480a55..2f64aa271fa 100644
--- a/sys/dev/pci/drm/drm_bufs.c
+++ b/sys/dev/pci/drm/drm_bufs.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: drm_bufs.c,v 1.48 2011/06/02 18:22:00 weerd Exp $ */

+/* $OpenBSD: drm_bufs.c,v 1.49 2012/03/09 13:01:28 ariane Exp $ */

/*-

@@ -958,7 +958,7 @@ drm_mapbufs(struct drm_device *dev, void *data, struct drm_file *file_priv)

foff = 0;

}

- vaddr = uvm_map_hint(curproc, VM_PROT_READ | VM_PROT_WRITE);

+ vaddr = 0;

retcode = uvm_mmap(&curproc->p_vmspace->vm_map, &vaddr, size,

UVM_PROT_READ | UVM_PROT_WRITE, UVM_PROT_ALL, MAP_SHARED,

(caddr_t)vn, foff, curproc->p_rlimit[RLIMIT_MEMLOCK].rlim_cur,

diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c
index 76ad35fe01a..602bac7a77e 100644
--- a/sys/dev/pci/drm/i915_drv.c
+++ b/sys/dev/pci/drm/i915_drv.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: i915_drv.c,v 1.118 2011/09/20 14:29:34 kettenis Exp $ */

+/* $OpenBSD: i915_drv.c,v 1.119 2012/03/09 13:01:28 ariane Exp $ */

@@ -1438,10 +1438,10 @@ i915_gem_gtt_map_ioctl(struct drm_device *dev, void *data,

* We give our reference from object_lookup to the mmap, so only

* must free it in the case that the map fails.

- addr = uvm_map_hint(curproc, VM_PROT_READ | VM_PROT_WRITE);

- ret = uvm_map_p(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj,

+ addr = 0;

+ ret = uvm_map(&curproc->p_vmspace->vm_map, &addr, nsize, &obj->uobj,

offset, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,

- UVM_INH_SHARE, UVM_ADV_RANDOM, 0), curproc);

+ UVM_INH_SHARE, UVM_ADV_RANDOM, 0));

done:

if (ret == 0)

diff --git a/sys/kern/exec_elf.c b/sys/kern/exec_elf.c
index 2e615de374d..4e9f314965f 100644
--- a/sys/kern/exec_elf.c
+++ b/sys/kern/exec_elf.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: exec_elf.c,v 1.85 2011/07/05 04:48:02 guenther Exp $ */

+/* $OpenBSD: exec_elf.c,v 1.86 2012/03/09 13:01:28 ariane Exp $ */

@@ -326,6 +326,7 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp,

int nload, idx = 0;

Elf_Addr pos = *last;

int file_align;

+ int loop;

NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);

if ((error = namei(&nd)) != 0) {

@@ -377,11 +378,12 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp,

* would (i.e. something safely out of the way).

if (pos == ELFDEFNNAME(NO_ADDR)) {

- pos = uvm_map_hint(p, VM_PROT_EXECUTE);

+ pos = uvm_map_hint(p->p_vmspace, VM_PROT_EXECUTE);

}

pos = ELF_ROUND(pos, file_align);

*last = epp->ep_interp_pos = pos;

+ loop = 0;

for (i = 0; i < nload;/**/) {

vaddr_t addr;

struct uvm_object *uobj;

@@ -409,17 +411,17 @@ ELFNAME(load_file)(struct proc *p, char *path, struct exec_package *epp,

addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +

BRKSIZ);

- vm_map_lock(&p->p_vmspace->vm_map);

- if (uvm_map_findspace(&p->p_vmspace->vm_map, addr, size,

- &addr, uobj, uoff, 0, UVM_FLAG_FIXED) == NULL) {

- if (uvm_map_findspace(&p->p_vmspace->vm_map, addr, size,

- &addr, uobj, uoff, 0, 0) == NULL) {

- error = ENOMEM; /* XXX */

- vm_map_unlock(&p->p_vmspace->vm_map);

- goto bad1;

+ if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,

+ (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {

+ if (loop == 0) {

+ loop = 1;

+ i = 0;

+ *last = epp->ep_interp_pos = pos = 0;

+ continue;

}

- }

- vm_map_unlock(&p->p_vmspace->vm_map);

+ error = ENOMEM;

+ goto bad1;

+ }

if (addr != pos + loadmap[i].vaddr) {

/* base changed. */

pos = addr - trunc_page(loadmap[i].vaddr);

diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index c8c2f1c1378..4fa96597e89 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: kern_exec.c,v 1.124 2012/02/20 22:23:39 guenther Exp $ */

+/* $OpenBSD: kern_exec.c,v 1.125 2012/03/09 13:01:28 ariane Exp $ */

/* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */

/*-

@@ -821,7 +821,6 @@ exec_sigcode_map(struct proc *p, struct emul *e)

e->e_sigobject = uao_create(sz, 0);

uao_reference(e->e_sigobject); /* permanent reference */

- va = vm_map_min(kernel_map); /* hint */

if ((r = uvm_map(kernel_map, &va, round_page(sz), e->e_sigobject,

0, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,

UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {

@@ -832,8 +831,7 @@ exec_sigcode_map(struct proc *p, struct emul *e)

uvm_unmap(kernel_map, va, va + round_page(sz));

}

- /* Just a hint to uvm_mmap where to put it. */

- p->p_sigcode = uvm_map_hint(p, VM_PROT_READ|VM_PROT_EXECUTE);

+ p->p_sigcode = 0; /* no hint */

uao_reference(e->e_sigobject);

if (uvm_map(&p->p_vmspace->vm_map, &p->p_sigcode, round_page(sz),

e->e_sigobject, 0, 0, UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX,

diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index d6fc8d3ac20..6c5f9e8bfdd 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: kern_malloc.c,v 1.90 2011/09/22 21:52:36 jsing Exp $ */

+/* $OpenBSD: kern_malloc.c,v 1.91 2012/03/09 13:01:28 ariane Exp $ */

/* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */

@@ -576,8 +576,13 @@ kmeminit(void)

kmeminit_nkmempages();

base = vm_map_min(kernel_map);

kmem_map = uvm_km_suballoc(kernel_map, &base, &limit,

- (vsize_t)(nkmempages * PAGE_SIZE), VM_MAP_INTRSAFE, FALSE,

- &kmem_map_store);

+ (vsize_t)nkmempages << PAGE_SHIFT,

+#ifdef KVA_GUARDPAGES

+ VM_MAP_INTRSAFE | VM_MAP_GUARDPAGES,

+#else

+ VM_MAP_INTRSAFE,

+#endif

+ FALSE, &kmem_map_store);

kmembase = (char *)base;

kmemlimit = (char *)limit;

kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map,

diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 6b4b0ed18ce..6b6736c153d 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: sysv_shm.c,v 1.54 2011/10/27 07:56:28 robert Exp $ */

+/* $OpenBSD: sysv_shm.c,v 1.55 2012/03/09 13:01:28 ariane Exp $ */

/* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */

@@ -261,10 +261,8 @@ sys_shmat(struct proc *p, void *v, register_t *retval)

attach_va = (vaddr_t)SCARG(uap, shmaddr);

else

return (EINVAL);

- } else {

- /* This is just a hint to uvm_map() about where to put it. */

- attach_va = uvm_map_hint(p, prot);

- }

+ } else

+ attach_va = 0;

shm_handle = shmseg->shm_internal;

uao_reference(shm_handle->shm_object);

error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size,

diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 939738f47aa..c236fb421a9 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm.h,v 1.46 2011/07/06 19:50:38 beck Exp $ */

+/* $OpenBSD: uvm.h,v 1.47 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */

@@ -120,6 +120,7 @@ struct uvm {

#define UVM_ET_COPYONWRITE 0x04 /* copy_on_write */

#define UVM_ET_NEEDSCOPY 0x08 /* needs_copy */

#define UVM_ET_HOLE 0x10 /* no backend */

+#define UVM_ET_FREEMAPPED 0x80 /* map entry is on free list (DEBUG) */

#define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0)

#define UVM_ET_ISSUBMAP(E) (((E)->etype & UVM_ET_SUBMAP) != 0)

@@ -154,6 +155,23 @@ do { \

#define UVM_PAGE_OWN(PG, TAG) /* nothing */

#endif /* UVM_PAGE_TRKOWN */

+/*

+ * uvm_map internal functions.

+ * Used by uvm_map address selectors.

+ */

+struct vm_map_entry *uvm_map_entrybyaddr(struct uvm_map_addr*, vaddr_t);

+int uvm_map_isavail(struct vm_map*,

+ struct uvm_addr_state*,

+ struct vm_map_entry**, struct vm_map_entry**,

+ vaddr_t, vsize_t);

+struct uvm_addr_state *uvm_map_uaddr(struct vm_map*, vaddr_t);

+struct uvm_addr_state *uvm_map_uaddr_e(struct vm_map*, struct vm_map_entry*);

+#define VMMAP_FREE_START(_entry) ((_entry)->end + (_entry)->guard)

+#define VMMAP_FREE_END(_entry) ((_entry)->end + (_entry)->guard + \

+ (_entry)->fspace)

#endif /* _KERNEL */

#endif /* _UVM_UVM_H_ */

diff --git a/sys/uvm/uvm_addr.c b/sys/uvm/uvm_addr.c
new file mode 100644
index 00000000000..486198e3891
--- /dev/null
+++ b/sys/uvm/uvm_addr.c

@@ -0,0 +1,1556 @@

+/* $OpenBSD: uvm_addr.c,v 1.1 2012/03/09 13:01:29 ariane Exp $ */

+/*

+ *

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose with or without fee is hereby granted, provided that the above

+ * copyright notice and this permission notice appear in all copies.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ */

+/* #define DEBUG */

+#include <sys/param.h>

+#include <uvm/uvm.h>

+#include <uvm/uvm_addr.h>

+#include <sys/pool.h>

+#include <dev/rndvar.h>

+/* Max gap between hint allocations. */

+#define UADDR_HINT_MAXGAP (4 * PAGE_SIZE)

+/* Number of pivots in pivot allocator. */

+#define NUM_PIVOTS 16

+/*

+ * Max number (inclusive) of pages the pivot allocator

+ * will place between allocations.

+ *

+ * The uaddr_pivot_random() function attempts to bias towards

+ * small space between allocations, so putting a large number here is fine.

+ */

+#define PIVOT_RND 8

+/*

+ * Number of allocations that a pivot can supply before expiring.

+ * When a pivot expires, a new pivot has to be found.

+ *

+ * Must be at least 1.

+ */

+#define PIVOT_EXPIRE 1024

+/* Pool with uvm_addr_state structures. */

+struct pool uaddr_pool;

+struct pool uaddr_hint_pool;

+struct pool uaddr_bestfit_pool;

+struct pool uaddr_pivot_pool;

+struct pool uaddr_rnd_pool;

+/* uvm_addr state for hint based selector. */

+struct uaddr_hint_state {

+ struct uvm_addr_state uaddr;

+ vsize_t max_dist;

+};

+/* uvm_addr state for bestfit selector. */

+struct uaddr_bestfit_state {

+ struct uvm_addr_state ubf_uaddr;

+ struct uaddr_free_rbtree ubf_free;

+};

+/* uvm_addr state for rnd selector. */

+struct uaddr_rnd_state {

+ struct uvm_addr_state ur_uaddr;

+ TAILQ_HEAD(, vm_map_entry) ur_free;

+};

+/*

+ * Definition of a pivot in pivot selector.

+ */

+struct uaddr_pivot {

+ vaddr_t addr; /* End of prev. allocation. */

+ int expire;/* Best before date. */

+ int dir; /* Direction. */

+ struct vm_map_entry *entry; /* Will contain next alloc. */

+};

+/* uvm_addr state for pivot selector. */

+struct uaddr_pivot_state {

+ struct uvm_addr_state up_uaddr;

+ /* Free space tree, for fast pivot selection. */

+ struct uaddr_free_rbtree up_free;

+ /* List of pivots. The pointers point to after the last allocation. */

+ struct uaddr_pivot up_pivots[NUM_PIVOTS];

+};

+/*

+ * Free space comparison.

+ * Compares smaller free-space before larger free-space.

+ */

+static __inline int

+uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2)

+ if (e1->fspace != e2->fspace)

+ return (e1->fspace < e2->fspace ? -1 : 1);

+ return (e1->start < e2->start ? -1 : e1->start > e2->start);

+/* Forward declaration (see below). */

+extern const struct uvm_addr_functions uaddr_kernel_functions;

+struct uvm_addr_state uaddr_kbootstrap;

+/*

+ * Support functions.

+ */

+struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*,

+ vsize_t);

+void uaddr_kinsert(struct vm_map*, struct uvm_addr_state*,

+ struct vm_map_entry*);

+void uaddr_kremove(struct vm_map*, struct uvm_addr_state*,

+ struct vm_map_entry*);

+void uaddr_kbootstrapdestroy(struct uvm_addr_state*);

+void uaddr_destroy(struct uvm_addr_state*);

+void uaddr_hint_destroy(struct uvm_addr_state*);

+void uaddr_kbootstrap_destroy(struct uvm_addr_state*);

+void uaddr_rnd_destroy(struct uvm_addr_state*);

+void uaddr_bestfit_destroy(struct uvm_addr_state*);

+void uaddr_pivot_destroy(struct uvm_addr_state*);

+int uaddr_lin_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+int uaddr_kbootstrap_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+int uaddr_rnd_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+int uaddr_hint_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+int uaddr_bestfit_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+int uaddr_pivot_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+int uaddr_stack_brk_select(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+void uaddr_rnd_insert(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uaddr_rnd_remove(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uaddr_bestfit_insert(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uaddr_bestfit_remove(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uaddr_pivot_insert(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uaddr_pivot_remove(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+vsize_t uaddr_pivot_random(void);

+int uaddr_pivot_newpivot(struct vm_map*,

+ struct uaddr_pivot_state*, struct uaddr_pivot*,

+ struct vm_map_entry**, vaddr_t*,

+ vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);

+#if defined(DEBUG) || defined(DDB)

+void uaddr_pivot_print(struct uvm_addr_state*, boolean_t,

+ int (*)(const char*, ...));

+void uaddr_rnd_print(struct uvm_addr_state*, boolean_t,

+ int (*)(const char*, ...));

+#endif /* DEBUG || DDB */

+/*

+ * Find smallest entry in tree that will fit sz bytes.

+ */

+struct vm_map_entry*

+uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)

+ struct vm_map_entry *tmp, *res;

+ tmp = RB_ROOT(free);

+ res = NULL;

+ while (tmp) {

+ if (tmp->fspace >= sz) {

+ res = tmp;

+ tmp = RB_LEFT(tmp, dfree.rbtree);

+ } else if (tmp->fspace < sz)

+ tmp = RB_RIGHT(tmp, dfree.rbtree);

+ }

+ return res;

+static __inline vaddr_t

+uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)

+ vaddr_t adjusted;

+ KASSERT(offset < align || (align == 0 && offset == 0));

+ KASSERT((align & (align - 1)) == 0);

+ KASSERT((offset & PAGE_MASK) == 0);

+ align = MAX(align, PAGE_SIZE);

+ adjusted = addr & ~(align - 1);

+ adjusted += offset;

+ return (adjusted < addr ? adjusted + align : adjusted);

+static __inline vaddr_t

+uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)

+ vaddr_t adjusted;

+ KASSERT(offset < align || (align == 0 && offset == 0));

+ KASSERT((align & (align - 1)) == 0);

+ KASSERT((offset & PAGE_MASK) == 0);

+ align = MAX(align, PAGE_SIZE);

+ adjusted = addr & ~(align - 1);

+ adjusted += offset;

+ return (adjusted > addr ? adjusted - align : adjusted);

+/*

+ * Try to fit the requested space into the entry.

+ */

+int

+uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,

+ vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,

+ vaddr_t align, vaddr_t offset,

+ vsize_t before_gap, vsize_t after_gap)

+ vaddr_t tmp;

+ vsize_t fspace;

+ if (low_addr > high_addr)

+ return ENOMEM;

+ fspace = high_addr - low_addr;

+ if (fspace < sz + before_gap + after_gap)

+ return ENOMEM;

+ /*

+ * Calculate lowest address.

+ */

+ low_addr += before_gap;

+ low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);

+ if (low_addr < tmp) /* Overflow during alignment. */

+ return ENOMEM;

+ if (high_addr - after_gap - sz < low_addr)

+ return ENOMEM;

+ /*

+ * Calculate highest address.

+ */

+ high_addr -= after_gap + sz;

+ high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);

+ if (high_addr > tmp) /* Overflow during alignment. */

+ return ENOMEM;

+ if (low_addr > high_addr)

+ return ENOMEM;

+ *min_result = low_addr;

+ *max_result = high_addr;

+ return 0;

+/*

+ * Initialize uvm_addr.

+ */

+void

+uvm_addr_init()

+ pool_init(&uaddr_pool, sizeof(struct uvm_addr_state),

+ 0, 0, 0, "uaddr", &pool_allocator_nointr);

+ pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state),

+ 0, 0, 0, "uaddrhint", &pool_allocator_nointr);

+ pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state),

+ 0, 0, 0, "uaddrbestfit", &pool_allocator_nointr);

+ pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state),

+ 0, 0, 0, "uaddrpivot", &pool_allocator_nointr);

+ pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state),

+ 0, 0, 0, "uaddrrnd", &pool_allocator_nointr);

+ uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;

+ uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;

+ uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;

+/*

+ * Invoke destructor function of uaddr.

+ */

+void

+uvm_addr_destroy(struct uvm_addr_state *uaddr)

+ if (uaddr)

+ (*uaddr->uaddr_functions->uaddr_destroy)(uaddr);

+/*

+ * Move address forward to satisfy align, offset.

+ */

+vaddr_t

+uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)

+ vaddr_t result = (addr & ~(align - 1)) + offset;

+ if (result < addr)

+ result += align;

+ return result;

+/*

+ * Move address backwards to satisfy align, offset.

+ */

+vaddr_t

+uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)

+ vaddr_t result = (addr & ~(align - 1)) + offset;

+ if (result > addr)

+ result -= align;

+ return result;

+/*

+ * Directional first fit.

+ *

+ * Do a lineair search for free space, starting at addr in entry.

+ * direction == 1: search forward

+ * direction == -1: search backward

+ *

+ * Output: low <= addr <= high and entry will contain addr.

+ * 0 will be returned if no space is available.

+ *

+ * gap describes the space that must appear between the preceding entry.

+ */

+int

+uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,

+ int direction, vaddr_t low, vaddr_t high,

+ vsize_t before_gap, vsize_t after_gap)

+ struct vm_map_entry *entry;

+ vaddr_t low_addr, high_addr;

+ KASSERT(entry_out != NULL && addr_out != NULL);

+ KASSERT(direction == -1 || direction == 1);

+ KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&

+ (low & PAGE_MASK) == 0 &&

+ (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);

+ KASSERT(high + sz > high); /* Check for overflow. */

+ /*

+ * Hint magic.

+ */

+ if (hint == 0)

+ hint = (direction == 1 ? low : high);

+ else if (hint > high) {

+ if (direction != -1)

+ return ENOMEM;

+ hint = high;

+ } else if (hint < low) {

+ if (direction != 1)

+ return ENOMEM;

+ hint = low;

+ }

+ for (entry = uvm_map_entrybyaddr(&map->addr,

+ hint - (direction == -1 ? 1 : 0)); entry != NULL;

+ entry = (direction == 1 ?

+ RB_NEXT(uvm_map_addr, &map->addr, entry) :

+ RB_PREV(uvm_map_addr, &map->addr, entry))) {

+ if (VMMAP_FREE_START(entry) > high ||

+ VMMAP_FREE_END(entry) < low) {

+ break;

+ }

+ if (uvm_addr_fitspace(&low_addr, &high_addr,

+ MAX(low, VMMAP_FREE_START(entry)),

+ MIN(high, VMMAP_FREE_END(entry)),

+ sz, align, offset, before_gap, after_gap) == 0) {

+ *entry_out = entry;

+ if (hint >= low_addr && hint <= high_addr) {

+ *addr_out = hint;

+ } else {

+ *addr_out = (direction == 1 ?

+ low_addr : high_addr);

+ }

+ return 0;

+ }

+ return ENOMEM;

+/*

+ * Invoke address selector of uaddr.

+ * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.

+ *

+ * Will invoke uvm_addr_isavail to fill in last_out.

+ */

+int

+uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry**entry_out, struct vm_map_entry**last_out,

+ vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)

+ int error;

+ if (uaddr == NULL)

+ return ENOMEM;

+ hint &= ~((vaddr_t)PAGE_MASK);

+ if (hint != 0 &&

+ !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))

+ return ENOMEM;

+ error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,

+ entry_out, addr_out, sz, align, offset, prot, hint);

+ if (error == 0) {

+ KASSERT(*entry_out != NULL);

+ *last_out = NULL;

+ if (!uvm_map_isavail(map, uaddr, entry_out, last_out,

+ *addr_out, sz)) {

+ panic("uvm_addr_invoke: address selector %p "

+ "(%s 0x%lx-0x%lx) "

+ "returned unavailable address 0x%lx",

+ uaddr, uaddr->uaddr_functions->uaddr_name,

+ uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,

+ *addr_out);

+ }

+ return error;

+#if defined(DEBUG) || defined(DDB)

+void

+uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,

+ int (*pr)(const char*, ...))

+ if (uaddr == NULL) {

+ (*pr)("- uvm_addr %s: NULL\n", slot);

+ return;

+ }

+ (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,

+ uaddr->uaddr_functions->uaddr_name,

+ uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);

+ if (uaddr->uaddr_functions->uaddr_print == NULL)

+ return;

+ (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);

+#endif /* DEBUG || DDB */

+/*

+ * Destroy a uvm_addr_state structure.

+ * The uaddr must have been previously allocated from uaddr_state_pool.

+ */

+void

+uaddr_destroy(struct uvm_addr_state *uaddr)

+ pool_put(&uaddr_pool, uaddr);

+/*

+ * Lineair allocator.

+ * This allocator uses a first-fit algorithm.

+ *

+ * If hint is set, search will start at the hint position.

+ * Only searches forward.

+ */

+const struct uvm_addr_functions uaddr_lin_functions = {

+ .uaddr_select = &uaddr_lin_select,

+ .uaddr_destroy = &uaddr_destroy,

+ .uaddr_name = "uaddr_lin"

+};

+struct uvm_addr_state*

+uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)

+ struct uvm_addr_state* uaddr;

+ uaddr = pool_get(&uaddr_pool, PR_WAITOK);

+ uaddr->uaddr_minaddr = minaddr;

+ uaddr->uaddr_maxaddr = maxaddr;

+ uaddr->uaddr_functions = &uaddr_lin_functions;

+ return uaddr;

+int

+uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint)

+ vaddr_t guard_sz;

+ /*

+ * Deal with guardpages: search for space with one extra page.

+ */

+ guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);

+ if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)

+ return ENOMEM;

+ return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,

+ align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,

+ 0, guard_sz);

+/*

+ * Randomized allocator.

+ * This allocator use uvm_map_hint to acquire a random address and searches

+ * from there.

+ */

+const struct uvm_addr_functions uaddr_rnd_functions = {

+ .uaddr_select = &uaddr_rnd_select,

+ .uaddr_free_insert = &uaddr_rnd_insert,

+ .uaddr_free_remove = &uaddr_rnd_remove,

+ .uaddr_destroy = &uaddr_rnd_destroy,

+#if defined(DEBUG) || defined(DDB)

+ .uaddr_print = &uaddr_rnd_print,

+#endif /* DEBUG || DDB */

+ .uaddr_name = "uaddr_rnd"

+};

+struct uvm_addr_state*

+uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)

+ struct uaddr_rnd_state* uaddr;

+ uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);

+ uaddr->ur_uaddr.uaddr_minaddr = minaddr;

+ uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;

+ uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;

+ TAILQ_INIT(&uaddr->ur_free);

+ return &uaddr->ur_uaddr;

+int

+uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint)

+ struct vmspace *vm;

+ vaddr_t guard_sz;

+ vaddr_t low_addr, high_addr;

+ struct vm_map_entry *entry;

+ vsize_t before_gap, after_gap;

+ vaddr_t tmp;

+ KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);

+ vm = (struct vmspace*)map;

+ /* Deal with guardpages: search for space with one extra page. */

+ guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);

+ /* Quick fail if the allocation won't fit. */

+ if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)

+ return ENOMEM;

+ /* Select a hint. */

+ if (hint == 0)

+ hint = uvm_map_hint(vm, prot);

+ /* Clamp hint to uaddr range. */

+ hint = MIN(MAX(hint, uaddr->uaddr_minaddr),

+ uaddr->uaddr_maxaddr - sz - guard_sz);

+ /* Align hint to align,offset parameters. */

+ tmp = hint;

+ hint = uvm_addr_align_forward(tmp, align, offset);

+ /* Check for overflow during alignment. */

+ if (hint < tmp || hint > uaddr->uaddr_maxaddr - sz - guard_sz)

+ return ENOMEM; /* Compatibility mode: never look backwards. */

+ before_gap = 0;

+ after_gap = guard_sz;

+ /*

+ * Find the first entry at or after hint with free space.

+ *

+ * Since we need an entry that is on the free-list, search until

+ * we hit an entry that is owned by our uaddr.

+ */

+ for (entry = uvm_map_entrybyaddr(&map->addr, hint);

+ entry != NULL &&

+ uvm_map_uaddr_e(map, entry) != uaddr;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {

+ /* Fail if we search past uaddr_maxaddr. */

+ if (VMMAP_FREE_START(entry) >= uaddr->uaddr_maxaddr) {

+ entry = NULL;

+ break;

+ }

+ for ( /* initial entry filled in above */ ;

+ entry != NULL && VMMAP_FREE_START(entry) < uaddr->uaddr_maxaddr;

+ entry = TAILQ_NEXT(entry, dfree.tailq)) {

+ if (uvm_addr_fitspace(&low_addr, &high_addr,

+ MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),

+ MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),

+ sz, align, offset, before_gap, after_gap) == 0) {

+ *entry_out = entry;

+ if (hint >= low_addr && hint <= high_addr)

+ *addr_out = hint;

+ else

+ *addr_out = low_addr;

+ return 0;

+ }

+ return ENOMEM;

+/*

+ * Destroy a uaddr_rnd_state structure.

+ */

+void

+uaddr_rnd_destroy(struct uvm_addr_state *uaddr)

+ pool_put(&uaddr_rnd_pool, uaddr);

+/*

+ * Add entry to tailq.

+ */

+void

+uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry *entry)

+ struct uaddr_rnd_state *uaddr;

+ struct vm_map_entry *prev;

+ uaddr = (struct uaddr_rnd_state*)uaddr_p;

+ KASSERT(entry == RB_FIND(uvm_map_addr, &map->addr, entry));

+ /*

+ * Make prev the first vm_map_entry before entry.

+ */

+ for (prev = RB_PREV(uvm_map_addr, &map->addr, entry);

+ prev != NULL;

+ prev = RB_PREV(uvm_map_addr, &map->addr, prev)) {

+ /* Stop and fail when reaching uaddr minaddr. */

+ if (VMMAP_FREE_START(prev) < uaddr_p->uaddr_minaddr) {

+ prev = NULL;

+ break;

+ }

+ KASSERT(prev->etype & UVM_ET_FREEMAPPED);

+ if (uvm_map_uaddr_e(map, prev) == uaddr_p)

+ break;

+ }

+ /* Perform insertion. */

+ if (prev == NULL)

+ TAILQ_INSERT_HEAD(&uaddr->ur_free, entry, dfree.tailq);

+ else

+ TAILQ_INSERT_AFTER(&uaddr->ur_free, prev, entry, dfree.tailq);

+/*

+ * Remove entry from tailq.

+ */

+void

+uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry *entry)

+ struct uaddr_rnd_state *uaddr;

+ uaddr = (struct uaddr_rnd_state*)uaddr_p;

+ TAILQ_REMOVE(&uaddr->ur_free, entry, dfree.tailq);

+#if defined(DEBUG) || defined(DDB)

+void

+uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,

+ int (*pr)(const char*, ...))

+ struct vm_map_entry *entry;

+ struct uaddr_rnd_state *uaddr;

+ vaddr_t addr;

+ size_t count;

+ vsize_t space;

+ uaddr = (struct uaddr_rnd_state*)uaddr_p;

+ addr = 0;

+ count = 0;

+ space = 0;

+ TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {

+ count++;

+ space += entry->fspace;

+ if (full) {

+ (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",

+ entry, entry->start, entry->end,

+ entry->guard, entry->fspace);

+ (*pr)("\t\tfree: 0x%lx-0x%lx\n",

+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));

+ }

+ if (entry->start < addr) {

+ if (!full)

+ (*pr)("\tentry %p: 0x%lx-0x%lx "

+ "G=0x%lx F=0x%lx\n",

+ entry, entry->start, entry->end,

+ entry->guard, entry->fspace);

+ (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",

+ entry->start, addr);

+ }

+ addr = VMMAP_FREE_END(entry);

+ }

+ (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);

+#endif /* DEBUG || DDB */

+/*

+ * An allocator that selects an address within distance of the hint.

+ *

+ * If no hint is given, the allocator refuses to allocate.

+ */

+const struct uvm_addr_functions uaddr_hint_functions = {

+ .uaddr_select = &uaddr_hint_select,

+ .uaddr_destroy = &uaddr_hint_destroy,

+ .uaddr_name = "uaddr_hint"

+};

+/*

+ * Create uaddr_hint state.

+ */

+struct uvm_addr_state*

+uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist)

+ struct uaddr_hint_state* ua_hint;

+ KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint));

+ ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK);

+ ua_hint->uaddr.uaddr_minaddr = minaddr;

+ ua_hint->uaddr.uaddr_maxaddr = maxaddr;

+ ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions;

+ ua_hint->max_dist = max_dist;

+ return &ua_hint->uaddr;

+/*

+ * Destroy uaddr_hint state.

+ */

+void

+uaddr_hint_destroy(struct uvm_addr_state *uaddr)

+ pool_put(&uaddr_hint_pool, uaddr);

+/*

+ * Hint selector.

+ *

+ * Attempts to find an address that is within max_dist of the hint.

+ */

+int

+uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint)

+ struct uaddr_hint_state *uaddr = (struct uaddr_hint_state*)uaddr_param;

+ vsize_t before_gap, after_gap;

+ vaddr_t low, high;

+ int dir;

+ if (hint == 0)

+ return ENOMEM;

+ /*

+ * Calculate upper and lower bound for selected address.

+ */

+ high = hint + uaddr->max_dist;

+ if (high < hint) /* overflow */

+ high = map->max_offset;

+ high = MIN(high, uaddr->uaddr.uaddr_maxaddr);

+ if (high < sz)

+ return ENOMEM; /* Protect against underflow. */

+ high -= sz;

+ /* Calculate lower bound for selected address. */

+ low = hint - uaddr->max_dist;

+ if (low > hint) /* underflow */

+ low = map->min_offset;

+ low = MAX(low, uaddr->uaddr.uaddr_minaddr);

+ /* Search strategy setup. */

+ before_gap = PAGE_SIZE +

+ (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);

+ after_gap = PAGE_SIZE +

+ (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);

+ dir = (arc4random() & 0x01) ? 1 : -1;

+ /*

+ * Try to search:

+ * - forward, with gap

+ * - backward, with gap

+ * - forward, without gap

+ * - backward, without gap

+ * (Where forward is in the direction specified by dir and

+ * backward is in the direction specified by -dir).

+ */

+ if (uvm_addr_linsearch(map, uaddr_param,

+ entry_out, addr_out, hint, sz, align, offset,

+ dir, low, high, before_gap, after_gap) == 0)

+ return 0;

+ if (uvm_addr_linsearch(map, uaddr_param,

+ entry_out, addr_out, hint, sz, align, offset,

+ -dir, low, high, before_gap, after_gap) == 0)

+ return 0;

+ if (uvm_addr_linsearch(map, uaddr_param,

+ entry_out, addr_out, hint, sz, align, offset,

+ dir, low, high, 0, 0) == 0)

+ return 0;

+ if (uvm_addr_linsearch(map, uaddr_param,

+ entry_out, addr_out, hint, sz, align, offset,

+ -dir, low, high, 0, 0) == 0)

+ return 0;

+ return ENOMEM;

+/*

+ * Kernel allocation bootstrap logic.

+ */

+const struct uvm_addr_functions uaddr_kernel_functions = {

+ .uaddr_select = &uaddr_kbootstrap_select,

+ .uaddr_destroy = &uaddr_kbootstrap_destroy,

+ .uaddr_name = "uaddr_kbootstrap"

+};

+/*

+ * Select an address from the map.

+ *

+ * This function ignores the uaddr spec and instead uses the map directly.

+ * Because of that property, the uaddr algorithm can be shared across all

+ * kernel maps.

+ */

+int

+uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry **entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)

+ vaddr_t tmp;

+ RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) {

+ if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&

+ uvm_addr_fitspace(addr_out, &tmp,

+ VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),

+ sz, align, offset, 0, 0) == 0)

+ return 0;

+ }

+ return ENOMEM;

+/*

+ * Don't destroy the kernel bootstrap allocator.

+ */

+void

+uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)

+ KASSERT(uaddr == (struct uvm_addr_state*)&uaddr_kbootstrap);

+/*

+ * Best fit algorithm.

+ */

+const struct uvm_addr_functions uaddr_bestfit_functions = {

+ .uaddr_select = &uaddr_bestfit_select,

+ .uaddr_free_insert = &uaddr_bestfit_insert,

+ .uaddr_free_remove = &uaddr_bestfit_remove,

+ .uaddr_destroy = &uaddr_bestfit_destroy,

+ .uaddr_name = "uaddr_bestfit"

+};

+struct uvm_addr_state*

+uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)

+ struct uaddr_bestfit_state *uaddr;

+ uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);

+ uaddr->ubf_uaddr.uaddr_minaddr = minaddr;

+ uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;

+ uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;

+ RB_INIT(&uaddr->ubf_free);

+ return &uaddr->ubf_uaddr;

+void

+uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)

+ pool_put(&uaddr_bestfit_pool, uaddr);

+void

+uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry *entry)

+ struct uaddr_bestfit_state *uaddr;

+ struct vm_map_entry *rb_rv;

+ uaddr = (struct uaddr_bestfit_state*)uaddr_p;

+ if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=

+ NULL) {

+ panic("%s: duplicate insertion: state %p "

+ "interting %p, colliding with %p", __func__,

+ uaddr, entry, rb_rv);

+ }

+void

+uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry *entry)

+ struct uaddr_bestfit_state *uaddr;

+ uaddr = (struct uaddr_bestfit_state*)uaddr_p;

+ if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)

+ panic("%s: entry was not in tree", __func__);

+int

+uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint)

+ vaddr_t min, max;

+ struct uaddr_bestfit_state *uaddr;

+ struct vm_map_entry *entry;

+ vsize_t guardsz;

+ uaddr = (struct uaddr_bestfit_state*)uaddr_p;

+ guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);

+ /*

+ * Find smallest item on freelist capable of holding item.

+ * Deal with guardpages: search for space with one extra page.

+ */

+ entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);

+ if (entry == NULL)

+ return ENOMEM;

+ /*

+ * Walk the tree until we find an entry that fits.

+ */

+ while (uvm_addr_fitspace(&min, &max,

+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),

+ sz, align, offset, 0, guardsz) != 0) {

+ entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry);

+ if (entry == NULL)

+ return ENOMEM;

+ }

+ /*

+ * Return the address that generates the least fragmentation.

+ */

+ *entry_out = entry;

+ *addr_out = (min - VMMAP_FREE_START(entry) <=

+ VMMAP_FREE_END(entry) - guardsz - sz - max ?

+ min : max);

+ return 0;

+/*

+ * A userspace allocator based on pivots.

+ */

+const struct uvm_addr_functions uaddr_pivot_functions = {

+ .uaddr_select = &uaddr_pivot_select,

+ .uaddr_free_insert = &uaddr_pivot_insert,

+ .uaddr_free_remove = &uaddr_pivot_remove,

+ .uaddr_destroy = &uaddr_pivot_destroy,

+#if defined(DEBUG) || defined(DDB)

+ .uaddr_print = &uaddr_pivot_print,

+#endif /* DEBUG || DDB */

+ .uaddr_name = "uaddr_pivot"

+};

+/*

+ * A special random function for pivots.

+ *

+ * This function will return:

+ * - a random number

+ * - a multiple of PAGE_SIZE

+ * - at least PAGE_SIZE

+ *

+ * The random function has a slightly higher change to return a small number.

+ */

+vsize_t

+uaddr_pivot_random()

+ int r;

+ /*

+ * The sum of two six-sided dice will have a normal distribution.

+ * We map the highest probable number to 1, by folding the curve

+ * (think of a graph on a piece of paper, that you fold).

+ *

+ * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1

+ * have the same and highest probability of happening.

+ */

+ r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -

+ (PIVOT_RND - 1);

+ if (r < 0)

+ r = -r;

+ /*

+ * Make the returned value at least PAGE_SIZE and a multiple of

+ * PAGE_SIZE.

+ */

+ return (vaddr_t)(1 + r) << PAGE_SHIFT;

+/*

+ * Select a new pivot.

+ *

+ * A pivot must:

+ * - be chosen random

+ * - have a randomly chosen gap before it, where the uaddr_state starts

+ * - have a randomly chosen gap after it, before the uaddr_state ends

+ *

+ * Furthermore, the pivot must provide sufficient space for the allocation.

+ * The addr will be set to the selected address.

+ *

+ * Returns ENOMEM on failure.

+ */

+int

+uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,

+ struct uaddr_pivot *pivot,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vsize_t before_gap, vsize_t after_gap)

+ struct vm_map_entry *entry, *found;

+ vaddr_t minaddr, maxaddr;

+ vsize_t dist;

+ vaddr_t found_minaddr, found_maxaddr;

+ vaddr_t min, max;

+ vsize_t arc4_arg;

+ int fit_error;

+ u_int32_t path;

+ minaddr = uaddr->up_uaddr.uaddr_minaddr;

+ maxaddr = uaddr->up_uaddr.uaddr_maxaddr;

+ KASSERT(minaddr < maxaddr);

+#ifdef DIAGNOSTIC

+ if (minaddr + 2 * PAGE_SIZE > maxaddr) {

+ panic("uaddr_pivot_newpivot: cannot grant random pivot "

+ "in area less than 2 pages (size = 0x%lx)",

+ maxaddr - minaddr);

+ }

+#endif /* DIAGNOSTIC */

+ /*

+ * Gap calculation: 1/32 of the size of the managed area.

+ *

+ * At most: sufficient to not get truncated at arc4random.

+ * At least: 2 PAGE_SIZE

+ *

+ * minaddr and maxaddr will be changed according to arc4random.

+ */

+ dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);

+ if (dist >> PAGE_SHIFT > 0xffffffff) {

+ minaddr += (vsize_t)arc4random() << PAGE_SHIFT;

+ maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;

+ } else {

+ minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<

+ PAGE_SHIFT;

+ maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<

+ PAGE_SHIFT;

+ }

+ /*

+ * A very fast way to find an entry that will be large enough

+ * to hold the allocation, but still is found more or less

+ * randomly: the tree path selector has a 50% chance to go for

+ * a bigger or smaller entry.

+ *

+ * Note that the memory may actually be available,

+ * but the fragmentation may be so bad and the gaps chosen

+ * so unfortunately, that the allocation will not succeed.

+ * Or the alignment can only be satisfied by an entry that

+ * is not visited in the randomly selected path.

+ *

+ * This code finds an entry with sufficient space in O(log n) time.

+ */

+ path = arc4random();

+ found = NULL;

+ entry = RB_ROOT(&uaddr->up_free);

+ while (entry != NULL) {

+ fit_error = uvm_addr_fitspace(&min, &max,

+ MAX(VMMAP_FREE_START(entry), minaddr),

+ MIN(VMMAP_FREE_END(entry), maxaddr),

+ sz, align, offset, before_gap, after_gap);

+ /* It fits, save this entry. */

+ if (fit_error == 0) {

+ found = entry;

+ found_minaddr = min;

+ found_maxaddr = max;

+ }

+ /* Next. */

+ if (fit_error != 0)

+ entry = RB_RIGHT(entry, dfree.rbtree);

+ else if ((path & 0x1) == 0) {

+ path >>= 1;

+ entry = RB_RIGHT(entry, dfree.rbtree);

+ } else {

+ path >>= 1;

+ entry = RB_LEFT(entry, dfree.rbtree);

+ }

+ if (found == NULL)

+ return ENOMEM; /* Not found a large enough region. */

+ /*

+ * Calculate a random address within found.

+ *

+ * found_minaddr and found_maxaddr are already aligned, so be sure

+ * to select a multiple of align as the offset in the entry.

+ * Preferably, arc4random_uniform is used to provide no bias within

+ * the entry.

+ * However if the size of the entry exceeds arc4random_uniforms

+ * argument limit, we simply use arc4random (thus limiting ourselves

+ * to 4G * PAGE_SIZE bytes offset).

+ */

+ if (found_maxaddr == found_minaddr)

+ *addr_out = found_minaddr;

+ else {

+ KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);

+ arc4_arg = found_maxaddr - found_minaddr;

+ if (arc4_arg > 0xffffffff) {

+ *addr_out = found_minaddr +

+ (arc4random() & (align - 1));

+ } else {

+ *addr_out = found_minaddr +

+ (arc4random_uniform(arc4_arg) & (align - 1));

+ }

+ /* Address was found in this entry. */

+ *entry_out = found;

+ /*

+ * Set up new pivot and return selected address.

+ *

+ * Depending on the direction of the pivot, the pivot must be placed

+ * at the bottom or the top of the allocation:

+ * - if the pivot moves upwards, place the pivot at the top of the

+ * allocation,

+ * - if the pivot moves downwards, place the pivot at the bottom

+ * of the allocation.

+ */

+ pivot->entry = found;

+ pivot->dir = (arc4random() & 0x1 ? 1 : -1);

+ if (pivot->dir > 0)

+ pivot->addr = *addr_out + sz;

+ else

+ pivot->addr = *addr_out;

+ pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */

+ return 0;

+/*

+ * Pivot selector.

+ *

+ * Each time the selector is invoked, it will select a random pivot, which

+ * it will use to select memory with. The memory will be placed at the pivot,

+ * with a randomly sized gap between the allocation and the pivot.

+ * The pivot will then move so it will never revisit this address.

+ *

+ * Each allocation, the pivot expiry timer ticks. Once the pivot becomes

+ * expired, it will be replaced with a newly created pivot. Pivots also

+ * automatically expire if they fail to provide memory for an allocation.

+ *

+ * Expired pivots are replaced using the uaddr_pivot_newpivot() function,

+ * which will ensure the pivot points at memory in such a way that the

+ * allocation will succeed.

+ * As an added bonus, the uaddr_pivot_newpivot() function will perform the

+ * allocation immediately and move the pivot as appropriate.

+ *

+ * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the

+ * allocation to succeed, it will not create a new pivot and the allocation

+ * will fail.

+ *

+ * A pivot running into used memory will automatically expire (because it will

+ * fail to allocate).

+ *

+ * Characteristics of the allocator:

+ * - best case, an allocation is O(log N)

+ * (it would be O(1), if it werent for the need to check if the memory is

+ * free; although that can be avoided...)

+ * - worst case, an allocation is O(log N)

+ * (the uaddr_pivot_newpivot() function has that complexity)

+ * - failed allocations always take O(log N)

+ * (the uaddr_pivot_newpivot() function will walk that deep into the tree).

+ */

+int

+uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint)

+ struct uaddr_pivot_state *uaddr;

+ struct vm_map_entry *entry;

+ struct uaddr_pivot *pivot;

+ vaddr_t min, max;

+ vsize_t before_gap, after_gap;

+ int err;

+ /* Hint must be handled by dedicated hint allocator. */

+ if (hint != 0)

+ return EINVAL;

+ /*

+ * Select a random pivot and a random gap sizes around the allocation.

+ */

+ uaddr = (struct uaddr_pivot_state*)uaddr_p;

+ pivot = &uaddr->up_pivots[

+ arc4random_uniform(nitems(uaddr->up_pivots))];

+ before_gap = uaddr_pivot_random();

+ after_gap = uaddr_pivot_random();

+ if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)

+ goto expired; /* Pivot is invalid (null or expired). */

+ /*

+ * Attempt to use the pivot to map the entry.

+ */

+ entry = pivot->entry;

+ if (pivot->dir > 0) {

+ if (uvm_addr_fitspace(&min, &max,

+ MAX(VMMAP_FREE_START(entry), pivot->addr),

+ VMMAP_FREE_END(entry), sz, align, offset,

+ before_gap, after_gap) == 0) {

+ *addr_out = min;

+ *entry_out = entry;

+ pivot->addr = min + sz;

+ pivot->expire--;

+ return 0;

+ }

+ } else {

+ if (uvm_addr_fitspace(&min, &max,

+ VMMAP_FREE_START(entry),

+ MIN(VMMAP_FREE_END(entry), pivot->addr),

+ sz, align, offset, before_gap, after_gap) == 0) {

+ *addr_out = max;

+ *entry_out = entry;

+ pivot->addr = max;

+ pivot->expire--;

+ return 0;

+ }

+expired:

+ /*

+ * Pivot expired or allocation failed.

+ * Use pivot selector to do the allocation and find a new pivot.

+ */

+ err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,

+ sz, align, offset, before_gap, after_gap);

+ return err;

+/*

+ * Free the pivot.

+ */

+void

+uaddr_pivot_destroy(struct uvm_addr_state *uaddr)

+ pool_put(&uaddr_pivot_pool, uaddr);

+/*

+ * Insert an entry with free space in the space tree.

+ */

+void

+uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry *entry)

+ struct uaddr_pivot_state *uaddr;

+ struct vm_map_entry *rb_rv;

+ struct uaddr_pivot *p;

+ vaddr_t check_addr;

+ vaddr_t start, end;

+ uaddr = (struct uaddr_pivot_state*)uaddr_p;

+ if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=

+ NULL) {

+ panic("%s: duplicate insertion: state %p "

+ "inserting entry %p which collides with %p", __func__,

+ uaddr, entry, rb_rv);

+ }

+ start = VMMAP_FREE_START(entry);

+ end = VMMAP_FREE_END(entry);

+ /*

+ * Update all pivots that are contained in this entry.

+ */

+ for (p = &uaddr->up_pivots[0];

+ p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {

+ check_addr = p->addr;

+ if (check_addr == 0)

+ continue;

+ if (p->dir < 0)

+ check_addr--;

+ if (start <= check_addr &&

+ check_addr < end) {

+ KASSERT(p->entry == NULL);

+ p->entry = entry;

+ }

+/*

+ * Remove an entry with free space from the space tree.

+ */

+void

+uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,

+ struct vm_map_entry *entry)

+ struct uaddr_pivot_state *uaddr;

+ struct uaddr_pivot *p;

+ uaddr = (struct uaddr_pivot_state*)uaddr_p;

+ if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)

+ panic("%s: entry was not in tree", __func__);

+ /*

+ * Inform any pivot with this entry that the entry is gone.

+ * Note that this does not automatically invalidate the pivot.

+ */

+ for (p = &uaddr->up_pivots[0];

+ p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {

+ if (p->entry == entry)

+ p->entry = NULL;

+ }

+/*

+ * Create a new pivot selector.

+ *

+ * Initially, all pivots are in the expired state.

+ * Two reasons for this:

+ * - it means this allocator will not take a huge amount of time

+ * - pivots select better on demand, because the pivot selection will be

+ * affected by preceding allocations:

+ * the next pivots will likely end up in different segments of free memory,

+ * that was segmented by an earlier allocation; better spread.

+ */

+struct uvm_addr_state*

+uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)

+ struct uaddr_pivot_state *uaddr;

+ uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);

+ uaddr->up_uaddr.uaddr_minaddr = minaddr;

+ uaddr->up_uaddr.uaddr_maxaddr = maxaddr;

+ uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;

+ RB_INIT(&uaddr->up_free);

+ bzero(uaddr->up_pivots, sizeof(uaddr->up_pivots));

+ return &uaddr->up_uaddr;

+#if defined(DEBUG) || defined(DDB)

+/*

+ * Print the uaddr_pivot_state.

+ *

+ * If full, a listing of all entries in the state will be provided.

+ */

+void

+uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,

+ int (*pr)(const char*, ...))

+ struct uaddr_pivot_state *uaddr;

+ struct uaddr_pivot *pivot;

+ struct vm_map_entry *entry;

+ int i;

+ vaddr_t check_addr;

+ uaddr = (struct uaddr_pivot_state*)uaddr_p;

+ for (i = 0; i < NUM_PIVOTS; i++) {

+ pivot = &uaddr->up_pivots[i];

+ (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",

+ pivot->addr, pivot->expire, pivot->dir);

+ }

+ if (!full)

+ return;

+ if (RB_EMPTY(&uaddr->up_free))

+ (*pr)("\tempty\n");

+ /* Print list of free space. */

+ RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {

+ (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",

+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),

+ VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));

+ for (i = 0; i < NUM_PIVOTS; i++) {

+ pivot = &uaddr->up_pivots[i];

+ check_addr = pivot->addr;

+ if (check_addr == 0)

+ continue;

+ if (pivot->dir < 0)

+ check_addr--;

+ if (VMMAP_FREE_START(entry) <= check_addr &&

+ check_addr < VMMAP_FREE_END(entry)) {

+ (*pr)("\t\tcontains pivot %d (0x%lx)\n",

+ i, pivot->addr);

+ }

+#endif /* DEBUG || DDB */

+/*

+ * Strategy for uaddr_stack_brk_select.

+ */

+struct uaddr_bs_strat {

+ vaddr_t start; /* Start of area. */

+ vaddr_t end; /* End of area. */

+ int dir; /* Search direction. */

+};

+/*

+ * Stack/break allocator.

+ *

+ * Stack area is grown into in the opposite direction of the stack growth,

+ * brk area is grown downward (because sbrk() grows upward).

+ *

+ * Both areas are grown into proportially: a weighted chance is used to

+ * select which one (stack or brk area) to try. If the allocation fails,

+ * the other one is tested.

+ */

+const struct uvm_addr_functions uaddr_stack_brk_functions = {

+ .uaddr_select = &uaddr_stack_brk_select,

+ .uaddr_destroy = &uaddr_destroy,

+ .uaddr_name = "uaddr_stckbrk"

+};

+/*

+ * Stack/brk address selector.

+ */

+int

+uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint)

+ vsize_t before_gap, after_gap;

+ int stack_idx, brk_idx;

+ struct uaddr_bs_strat strat[2], *s;

+ vsize_t sb_size;

+ /*

+ * Choose gap size and if the stack is searched before or after the

+ * brk area.

+ */

+ before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;

+ after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;

+ sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start);

+ sb_size >>= PAGE_SHIFT;

+ if (arc4random_uniform(MAX(sb_size, 0xffffffff)) >

+ map->b_end - map->b_start) {

+ brk_idx = 1;

+ stack_idx = 0;

+ } else {

+ brk_idx = 0;

+ stack_idx = 1;

+ }

+ /*

+ * Set up stack search strategy.

+ */

+ s = &strat[stack_idx];

+ s->start = MAX(map->s_start, uaddr->uaddr_minaddr);

+ s->end = MIN(map->s_end, uaddr->uaddr_maxaddr);

+#ifdef MACHINE_STACK_GROWS_UP

+ s->dir = -1;

+#else

+ s->dir = 1;

+#endif

+ /*

+ * Set up brk search strategy.

+ */

+ s = &strat[brk_idx];

+ s->start = MAX(map->b_start, uaddr->uaddr_minaddr);

+ s->end = MIN(map->b_end, uaddr->uaddr_maxaddr);

+ s->dir = -1; /* Opposite of brk() growth. */

+ /*

+ * Linear search for space.

+ */

+ for (s = &strat[0]; s < &strat[nitems(strat)]; s++) {

+ if (s->end - s->start < sz)

+ continue;

+ if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,

+ 0, sz, align, offset, s->dir, s->start, s->end - sz,

+ before_gap, after_gap) == 0)

+ return 0;

+ }

+ return ENOMEM;

+struct uvm_addr_state*

+uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)

+ struct uvm_addr_state* uaddr;

+ uaddr = pool_get(&uaddr_pool, PR_WAITOK);

+ uaddr->uaddr_minaddr = minaddr;

+ uaddr->uaddr_maxaddr = maxaddr;

+ uaddr->uaddr_functions = &uaddr_stack_brk_functions;

+ return uaddr;

+RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,

+ uvm_mapent_fspace_cmp);

diff --git a/sys/uvm/uvm_addr.h b/sys/uvm/uvm_addr.h
new file mode 100644
index 00000000000..5d94947d5a3
--- /dev/null
+++ b/sys/uvm/uvm_addr.h

@@ -0,0 +1,116 @@

+/* $OpenBSD: uvm_addr.h,v 1.1 2012/03/09 13:01:29 ariane Exp $ */

+/*

+ *

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose with or without fee is hereby granted, provided that the above

+ * copyright notice and this permission notice appear in all copies.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ */

+#ifndef _UVM_UVM_ADDR_H_

+#define _UVM_UVM_ADDR_H_

+#include <uvm/uvm_extern.h>

+/*

+ * Address selection logic.

+ *

+ * Address selection is just that: selection. These functions may make no

+ * changes to the map, except for their own state (which is passed as a

+ * uaddr_state pointer).

+ */

+/*

+ * UVM address selection base state.

+ *

+ * Each uvm address algorithm requires these parameters:

+ * - lower bound address (page aligned)

+ * - upper bound address (page aligned)

+ * - function address pointers

+ */

+struct uvm_addr_state {

+ vaddr_t uaddr_minaddr;

+ vaddr_t uaddr_maxaddr;

+ const struct uvm_addr_functions *uaddr_functions;

+};

+/*

+ * This structure describes one algorithm implementation.

+ *

+ * Each algorithm is described in terms of:

+ * - uaddr_select: an address selection algorithm

+ * - uaddr_free_insert: a freelist insertion function (optional)

+ * - uaddr_free_remove: a freelist deletion function (optional)

+ * - uaddr_destroy: a destructor for the algorithm state

+ */

+struct uvm_addr_functions {

+ int (*uaddr_select)(struct vm_map *map,

+ struct uvm_addr_state *uaddr,

+ struct vm_map_entry**entry_out, vaddr_t *addr_out,

+ vsize_t sz, vaddr_t align, vaddr_t offset,

+ vm_prot_t prot, vaddr_t hint);

+ void (*uaddr_free_insert)(struct vm_map *map,

+ struct uvm_addr_state *uaddr_state,

+ struct vm_map_entry *entry);

+ void (*uaddr_free_remove)(struct vm_map *map,

+ struct uvm_addr_state *uaddr_state,

+ struct vm_map_entry *entry);

+ void (*uaddr_destroy)(struct uvm_addr_state *uaddr_state);

+ void (*uaddr_print)(struct uvm_addr_state *uaddr_state, boolean_t full,

+ int (*pr)(const char*, ...));

+ const char* uaddr_name; /* Name of the allocator. */

+};

+#ifdef _KERNEL

+void uvm_addr_init(void);

+void uvm_addr_destroy(struct uvm_addr_state*);

+vaddr_t uvm_addr_align(vaddr_t, vaddr_t, vaddr_t);

+vaddr_t uvm_addr_align_back(vaddr_t, vaddr_t, vaddr_t);

+int uvm_addr_linsearch(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ vaddr_t *addr_out, vaddr_t, vsize_t,

+ vaddr_t, vaddr_t, int, vaddr_t, vaddr_t,

+ vsize_t, vsize_t);

+int uvm_addr_invoke(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry**,

+ struct vm_map_entry**, vaddr_t*,

+ vsize_t, vaddr_t, vaddr_t, vm_prot_t, vaddr_t);

+struct uvm_addr_state *uaddr_lin_create(vaddr_t, vaddr_t);

+struct uvm_addr_state *uaddr_rnd_create(vaddr_t, vaddr_t);

+struct uvm_addr_state *uaddr_hint_create(vaddr_t, vaddr_t, vsize_t);

+struct uvm_addr_state *uaddr_bestfit_create(vaddr_t, vaddr_t);

+struct uvm_addr_state *uaddr_pivot_create(vaddr_t, vaddr_t);

+struct uvm_addr_state *uaddr_stack_brk_create(vaddr_t, vaddr_t);

+int uvm_addr_fitspace(vaddr_t*, vaddr_t*,

+ vaddr_t, vaddr_t, vsize_t, vaddr_t, vaddr_t,

+ vsize_t, vsize_t);

+#if defined(DEBUG) || defined(DDB)

+void uvm_addr_print(struct uvm_addr_state*, const char*,

+ boolean_t, int (*pr)(const char*, ...));

+#endif /* DEBUG || DDB */

+/*

+ * Kernel bootstrap allocator.

+ */

+RB_HEAD(uaddr_free_rbtree, vm_map_entry);

+RB_PROTOTYPE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,

+ uvm_mapent_fspace_cmp);

+extern struct uvm_addr_state uaddr_kbootstrap;

+#endif /* _KERNEL */

+#endif /* _UVM_UVM_ADDR_H_ */

diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 201abdb923a..991a44d4776 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_extern.h,v 1.103 2011/07/08 00:10:59 tedu Exp $ */

+/* $OpenBSD: uvm_extern.h,v 1.104 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */

@@ -185,6 +185,7 @@ typedef int vm_prot_t;

#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */

#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */

#define UVM_FLAG_HOLE 0x400000 /* no backend */

+#define UVM_FLAG_QUERY 0x800000 /* do everything, except actual execution */

/* macros to extract info */

#define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK)

@@ -644,10 +645,9 @@ void km_free(void *, size_t, const struct kmem_va_mode *,

const struct kmem_pa_mode *);

/* uvm_map.c */

-#define uvm_map(_m, _a, _sz, _u, _f, _al, _fl) uvm_map_p(_m, _a, _sz, _u, _f, _al, _fl, 0)

-int uvm_map_p(vm_map_t, vaddr_t *, vsize_t,

+int uvm_map(vm_map_t, vaddr_t *, vsize_t,

struct uvm_object *, voff_t, vsize_t,

- uvm_flag_t, struct proc *);

+ uvm_flag_t);

int uvm_map_pageable(vm_map_t, vaddr_t,

vaddr_t, boolean_t, int);

int uvm_map_pageable_all(vm_map_t, int, vsize_t);

diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index b699bba34c5..03a4418dac6 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_fault.c,v 1.62 2011/07/03 18:34:14 oga Exp $ */

+/* $OpenBSD: uvm_fault.c,v 1.63 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */

@@ -1701,7 +1701,7 @@ uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end)

void

uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)

{

- vm_map_entry_t entry;

+ vm_map_entry_t entry, next;

pmap_t pmap = vm_map_pmap(map);

vaddr_t va;

paddr_t pa;

@@ -1734,9 +1734,9 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)

KASSERT(va >= entry->start);

while (va >= entry->end) {

- KASSERT(entry->next != &map->header &&

- entry->next->start <= entry->end);

- entry = entry->next;

+ next = RB_NEXT(uvm_map_addr, &map->addr, entry);

+ KASSERT(next != NULL && next->start <= entry->end);

+ entry = next;

}

@@ -1825,6 +1825,9 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)

while (1) {

+ if (ufi->orig_rvaddr < ufi->map->min_offset ||

+ ufi->orig_rvaddr >= ufi->map->max_offset)

+ return(FALSE);

* lock map

@@ -1839,7 +1842,7 @@ uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock)

* lookup

if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,

- &ufi->entry)) {

+ &ufi->entry)) {

uvmfault_unlockmaps(ufi, write_lock);

return(FALSE);

}

diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c
index fce559d83e5..81110d054e8 100644
--- a/sys/uvm/uvm_init.c
+++ b/sys/uvm/uvm_init.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_init.c,v 1.28 2010/08/07 03:50:02 krw Exp $ */

+/* $OpenBSD: uvm_init.c,v 1.29 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm_init.c,v 1.14 2000/06/27 17:29:23 mrg Exp $ */

@@ -51,6 +51,7 @@

#include <sys/pool.h>

#include <uvm/uvm.h>

+#include <uvm/uvm_addr.h>

* struct uvm: we store all global vars in this structure to make them

@@ -177,4 +178,15 @@ uvm_init(void)

* init anonymous memory systems

uvm_anon_init();

+ /*

+ * Switch kernel and kmem_map over to a best-fit allocator,

+ * instead of walking the tree.

+ */

+ uvm_map_set_uaddr(kernel_map, &kernel_map->uaddr_any[3],

+ uaddr_bestfit_create(vm_map_min(kernel_map),

+ vm_map_max(kernel_map)));

+ uvm_map_set_uaddr(kmem_map, &kmem_map->uaddr_any[3],

+ uaddr_bestfit_create(vm_map_min(kmem_map),

+ vm_map_max(kmem_map)));

}

diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c
index 876b5420b6f..bfeea500ace 100644
--- a/sys/uvm/uvm_io.c
+++ b/sys/uvm/uvm_io.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_io.c,v 1.19 2011/06/06 17:10:23 ariane Exp $ */

+/* $OpenBSD: uvm_io.c,v 1.20 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm_io.c,v 1.12 2000/06/27 17:29:23 mrg Exp $ */

@@ -64,7 +64,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)

{

vaddr_t baseva, endva, pageoffset, kva;

vsize_t chunksz, togo, sz;

- vm_map_entry_t dead_entries;

+ struct uvm_map_deadq dead_entries;

int error, extractflags;

@@ -93,7 +93,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)

chunksz = min(round_page(togo + pageoffset), MAXBSIZE);

error = 0;

- extractflags = UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG;

+ extractflags = 0;

if (flags & UVM_IO_FIXPROT)

extractflags |= UVM_EXTRACT_FIXPROT;

@@ -107,7 +107,7 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)

* step 2: extract mappings from the map into kernel_map

- error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva,

+ error = uvm_map_extract(map, baseva, chunksz, &kva,

extractflags);

if (error) {

@@ -139,12 +139,11 @@ uvm_io(vm_map_t map, struct uio *uio, int flags)

vm_map_lock(kernel_map);

+ TAILQ_INIT(&dead_entries);

uvm_unmap_remove(kernel_map, kva, kva+chunksz,

- &dead_entries, NULL, FALSE);

+ &dead_entries, FALSE, TRUE);

vm_map_unlock(kernel_map);

- if (dead_entries != NULL)

- uvm_unmap_detach(dead_entries, AMAP_REFALL);

+ uvm_unmap_detach(&dead_entries, AMAP_REFALL);

* We defer checking the error return from uiomove until

diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
index da5686d0881..aa97110d6bf 100644
--- a/sys/uvm/uvm_km.c
+++ b/sys/uvm/uvm_km.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: uvm_km.c,v 1.106 2011/07/03 18:34:14 oga Exp $ */

+/* $OpenBSD: uvm_km.c,v 1.107 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */

@@ -138,7 +138,6 @@

#include <sys/systm.h>

#include <sys/proc.h>

#include <sys/kthread.h>

#include <uvm/uvm.h>

@@ -184,7 +183,13 @@ uvm_km_init(vaddr_t start, vaddr_t end)

* before installing.

- uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE);

+ uvm_map_setup(&kernel_map_store, base, end,

+#ifdef KVA_GUARDPAGES

+ VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES

+#else

+ VM_MAP_PAGEABLE

+#endif

+ );

kernel_map_store.pmap = pmap_kernel();

if (base != start && uvm_map(&kernel_map_store, &base, start - base,

NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,

@@ -464,16 +469,16 @@ uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size)

void

uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size)

{

- struct vm_map_entry *dead_entries;

+ struct uvm_map_deadq dead_entries;

vm_map_lock(map);

+ TAILQ_INIT(&dead_entries);

uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size),

- &dead_entries, NULL, FALSE);

+ &dead_entries, FALSE, TRUE);

wakeup(map);

vm_map_unlock(map);

- if (dead_entries != NULL)

- uvm_unmap_detach(dead_entries, 0);

+ uvm_unmap_detach(&dead_entries, 0);

}

@@ -692,8 +697,10 @@ struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *);

void

uvm_km_page_init(void)

{

- int lowat_min;

- int i;

+ int lowat_min;

+ int i;

+ int len, bulk;

+ vaddr_t addr;

mtx_init(&uvm_km_pages.mtx, IPL_VM);

if (!uvm_km_pages.lowat) {

@@ -709,14 +716,27 @@ uvm_km_page_init(void)

if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX)

uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX;

- for (i = 0; i < uvm_km_pages.hiwat; i++) {

- uvm_km_pages.page[i] = (vaddr_t)uvm_km_kmemalloc(kernel_map,

- NULL, PAGE_SIZE, UVM_KMF_NOWAIT|UVM_KMF_VALLOC);

- if (uvm_km_pages.page[i] == 0)

- break;

+ /* Allocate all pages in as few allocations as possible. */

+ len = 0;

+ bulk = uvm_km_pages.hiwat;

+ while (len < uvm_km_pages.hiwat && bulk > 0) {

+ bulk = MIN(bulk, uvm_km_pages.hiwat - len);

+ addr = vm_map_min(kernel_map);

+ if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT,

+ NULL, UVM_UNKNOWN_OFFSET, 0,

+ UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,

+ UVM_ADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) {

+ bulk /= 2;

+ continue;

+ }

+ for (i = len; i < len + bulk; i++, addr += PAGE_SIZE)

+ uvm_km_pages.page[i] = addr;

+ len += bulk;

}

- uvm_km_pages.free = i;

- for ( ; i < UVM_KM_PAGES_HIWAT_MAX; i++)

+ uvm_km_pages.free = len;

+ for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++)

uvm_km_pages.page[i] = 0;

/* tone down if really high */

@@ -760,17 +780,25 @@ uvm_km_thread(void *arg)

mtx_leave(&uvm_km_pages.mtx);

if (allocmore) {

+ bzero(pg, sizeof(pg));

for (i = 0; i < nitems(pg); i++) {

- pg[i] = (vaddr_t)uvm_km_kmemalloc(kernel_map,

- NULL, PAGE_SIZE, UVM_KMF_VALLOC);

+ pg[i] = vm_map_min(kernel_map);

+ if (uvm_map(kernel_map, &pg[i], PAGE_SIZE,

+ NULL, UVM_UNKNOWN_OFFSET, 0,

+ UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,

+ UVM_INH_NONE, UVM_ADV_RANDOM,

+ UVM_KMF_TRYLOCK)) != 0) {

+ pg[i] = 0;

+ break;

+ }

}

mtx_enter(&uvm_km_pages.mtx);

for (i = 0; i < nitems(pg); i++) {

if (uvm_km_pages.free ==

nitems(uvm_km_pages.page))

break;

- else

+ else if (pg[i] != 0)

uvm_km_pages.page[uvm_km_pages.free++]

= pg[i];

}

@@ -778,8 +806,12 @@ uvm_km_thread(void *arg)

mtx_leave(&uvm_km_pages.mtx);

/* Cleanup left-over pages (if any). */

- for (; i < nitems(pg); i++)

- uvm_km_free(kernel_map, pg[i], PAGE_SIZE);

+ for (; i < nitems(pg); i++) {

+ if (pg[i] != 0) {

+ uvm_unmap(kernel_map,

+ pg[i], pg[i] + PAGE_SIZE);

+ }

}

while (fp) {

fp = uvm_km_doputpage(fp);

@@ -808,7 +840,7 @@ uvm_km_doputpage(struct uvm_km_free_page *fp)

mtx_leave(&uvm_km_pages.mtx);

if (freeva)

- uvm_km_free(kernel_map, va, PAGE_SIZE);

+ uvm_unmap(kernel_map, va, va + PAGE_SIZE);

uvm_pagefree(pg);

return (nextfp);

diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index e097952a130..bc6b9df0281 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c

@@ -1,7 +1,22 @@

-/* $OpenBSD: uvm_map.c,v 1.147 2011/11/24 18:47:34 guenther Exp $ */

+/* $OpenBSD: uvm_map.c,v 1.148 2012/03/09 13:01:29 ariane Exp $ */

/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */

-/*

+/*

+ *

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose with or without fee is hereby granted, provided that the above

+ * copyright notice and this permission notice appear in all copies.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ *

@@ -71,6 +86,9 @@

* uvm_map.c: uvm map operations

+/* #define DEBUG */

+/* #define VMMAP_DEBUG */

#include <sys/param.h>

#include <sys/systm.h>

#include <sys/mman.h>

@@ -86,13 +104,189 @@

#endif

#include <uvm/uvm.h>

-#undef RB_AUGMENT

-#define RB_AUGMENT(x) uvm_rb_augment(x)

#ifdef DDB

#include <uvm/uvm_ddb.h>

#endif

+#include <uvm/uvm_addr.h>

+vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);

+int uvm_mapent_isjoinable(struct vm_map*,

+ struct vm_map_entry*, struct vm_map_entry*);

+struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,

+ struct vm_map_entry*, struct uvm_map_deadq*);

+struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*,

+ struct vm_map_entry*, struct uvm_map_deadq*);

+struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,

+ struct vm_map_entry*, vaddr_t, vsize_t, int,

+ struct uvm_map_deadq*);

+struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int);

+void uvm_mapent_free(struct vm_map_entry*);

+void uvm_unmap_kill_entry(struct vm_map*,

+ struct vm_map_entry*);

+void uvm_mapent_mkfree(struct vm_map*,

+ struct vm_map_entry*, struct vm_map_entry**,

+ struct uvm_map_deadq*, boolean_t);

+void uvm_map_pageable_pgon(struct vm_map*,

+ struct vm_map_entry*, struct vm_map_entry*,

+ vaddr_t, vaddr_t);

+int uvm_map_pageable_wire(struct vm_map*,

+ struct vm_map_entry*, struct vm_map_entry*,

+ vaddr_t, vaddr_t, int);

+void uvm_map_setup_entries(struct vm_map*);

+void uvm_map_teardown(struct vm_map*);

+void uvm_map_vmspace_update(struct vm_map*,

+ struct uvm_map_deadq*, int);

+void uvm_map_kmem_grow(struct vm_map*,

+ struct uvm_map_deadq*, vsize_t, int);

+void uvm_map_freelist_update_clear(struct vm_map*,

+ struct uvm_map_deadq*);

+void uvm_map_freelist_update_refill(struct vm_map *, int);

+void uvm_map_freelist_update(struct vm_map*,

+ struct uvm_map_deadq*, vaddr_t, vaddr_t,

+ vaddr_t, vaddr_t, int);

+struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,

+ vaddr_t, vaddr_t, int);

+int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,

+ struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,

+ int);

+int uvm_map_findspace(struct vm_map*,

+ struct vm_map_entry**, struct vm_map_entry**,

+ vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,

+ vaddr_t);

+/*

+ * Tree management functions.

+ */

+static __inline void uvm_mapent_copy(struct vm_map_entry*,

+ struct vm_map_entry*);

+static int uvm_mapentry_addrcmp(struct vm_map_entry*,

+ struct vm_map_entry*);

+static int uvm_mapentry_freecmp(struct vm_map_entry*,

+ struct vm_map_entry*);

+void uvm_mapent_free_insert(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uvm_mapent_free_remove(struct vm_map*,

+ struct uvm_addr_state*, struct vm_map_entry*);

+void uvm_mapent_addr_insert(struct vm_map*,

+ struct vm_map_entry*);

+void uvm_mapent_addr_remove(struct vm_map*,

+ struct vm_map_entry*);

+void uvm_map_splitentry(struct vm_map*,

+ struct vm_map_entry*, struct vm_map_entry*,

+ vaddr_t);

+vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);

+int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);

+/*

+ * uvm_vmspace_fork helper functions.

+ */

+struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,

+ vsize_t, struct vm_map_entry*,

+ struct uvm_map_deadq*, int, int);

+void uvm_mapent_forkshared(struct vmspace*, struct vm_map*,

+ struct vm_map*, struct vm_map_entry*,

+ struct uvm_map_deadq*);

+void uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,

+ struct vm_map*, struct vm_map_entry*,

+ struct uvm_map_deadq*);

+/*

+ * Tree validation.

+ */

+#ifdef VMMAP_DEBUG

+void uvm_tree_assert(struct vm_map*, int, char*,

+ char*, int);

+#define UVM_ASSERT(map, cond, file, line) \

+ uvm_tree_assert((map), (cond), #cond, (file), (line))

+void uvm_tree_sanity(struct vm_map*, char*, int);

+void uvm_tree_size_chk(struct vm_map*, char*, int);

+void vmspace_validate(struct vm_map*);

+#else

+#define uvm_tree_sanity(_map, _file, _line) do {} while (0)

+#define uvm_tree_size_chk(_map, _file, _line) do {} while (0)

+#define vmspace_validate(_map) do {} while (0)

+#endif

+/*

+ * All architectures will have pmap_prefer.

+ */

+#ifndef PMAP_PREFER

+#define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE

+#define PMAP_PREFER_OFFSET(off) 0

+#define PMAP_PREFER(addr, off) (addr)

+#endif

+/*

+ * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.

+ * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.

+ *

+ * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size

+ * each time.

+ */

+#define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE)

+#define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE)

+#define VM_MAP_KSIZE_ALLOCMUL 4

+/*

+ * When selecting a random free-space block, look at most FSPACE_DELTA blocks

+ * ahead.

+ */

+#define FSPACE_DELTA 8

+/*

+ * Put allocations adjecent to previous allocations when the free-space tree

+ * is larger than FSPACE_COMPACT entries.

+ *

+ * Alignment and PMAP_PREFER may still cause the entry to not be fully

+ * adjecent. Note that this strategy reduces memory fragmentation (by leaving

+ * a large space before or after the allocation).

+ */

+#define FSPACE_COMPACT 128

+/*

+ * Make the address selection skip at most this many bytes from the start of

+ * the free space in which the allocation takes place.

+ *

+ * The main idea behind a randomized address space is that an attacker cannot

+ * know where to target his attack. Therefore, the location of objects must be

+ * as random as possible. However, the goal is not to create the most sparse

+ * map that is possible.

+ * FSPACE_MAXOFF pushes the considered range in bytes down to less insane

+ * sizes, thereby reducing the sparseness. The biggest randomization comes

+ * from fragmentation, i.e. FSPACE_COMPACT.

+ */

+#define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024)

+/*

+ * Allow for small gaps in the overflow areas.

+ * Gap size is in bytes and does not have to be a multiple of page-size.

+ */

+#define FSPACE_BIASGAP ((vaddr_t)32 * 1024)

+/* auto-allocate address lower bound */

+#define VMMAP_MIN_ADDR PAGE_SIZE

+#ifdef DEADBEEF0

+#define UVMMAP_DEADBEEF ((void*)DEADBEEF0)

+#else

+#define UVMMAP_DEADBEEF ((void*)0xdeadd0d0)

+#endif

+#ifdef DEBUG

+int uvm_map_printlocks = 0;

+#define LPRINTF(_args) \

+ do { \

+ if (uvm_map_printlocks) \

+ printf _args; \

+ } while (0)

+#else

+#define LPRINTF(_args) do {} while (0)

+#endif

static struct timeval uvm_kmapent_last_warn_time;

static struct timeval uvm_kmapent_warn_rate = { 10, 0 };

@@ -101,287 +295,1155 @@ const char vmmapbsy[] = "vmmapbsy";

* pool for vmspace structures.

struct pool uvm_vmspace_pool;

* pool for dynamically-allocated map entries.

struct pool uvm_map_entry_pool;

struct pool uvm_map_entry_kmem_pool;

-#ifdef PMAP_GROWKERNEL

* This global represents the end of the kernel virtual address

- * space. If we want to exceed this, we must grow the kernel

+ * space. If we want to exceed this, we must grow the kernel

* virtual address space dynamically.

* Note, this variable is locked by kernel_map's lock.

vaddr_t uvm_maxkaddr;

-#endif

- * macros

+ * Locking predicate.

+#define UVM_MAP_REQ_WRITE(_map) \

+ do { \

+ if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \

+ rw_assert_wrlock(&(_map)->lock); \

+ } while (0)

- * uvm_map_entry_link: insert entry into a map

+ * Tree describing entries by address.

- * => map must be locked

+ * Addresses are unique.

+ * Entries with start == end may only exist if they are the first entry

+ * (sorted by address) within a free-memory tree.

-#define uvm_map_entry_link(map, after_where, entry) do { \

- (map)->nentries++; \

- (entry)->prev = (after_where); \

- (entry)->next = (after_where)->next; \

- (entry)->prev->next = (entry); \

- (entry)->next->prev = (entry); \

- uvm_rb_insert(map, entry); \

-} while (0)

+static __inline int

+uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2)

+ return e1->start < e2->start ? -1 : e1->start > e2->start;

- * uvm_map_entry_unlink: remove entry from a map

+ * Tree describing free memory.

- * => map must be locked

+ * Free memory is indexed (so we can use array semantics in O(log N).

+ * Free memory is ordered by size (so we can reduce fragmentation).

+ *

+ * The address range in the tree can be limited, having part of the

+ * free memory not in the free-memory tree. Only free memory in the

+ * tree will be considered during 'any address' allocations.

-#define uvm_map_entry_unlink(map, entry) do { \

- (map)->nentries--; \

- (entry)->next->prev = (entry)->prev; \

- (entry)->prev->next = (entry)->next; \

- uvm_rb_remove(map, entry); \

-} while (0)

+static __inline int

+uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2)

+ int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace;

+ return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2);

- * SAVE_HINT: saves the specified entry as the hint for future lookups.

- *

- * => map need not be locked (protected by hint_lock).

+ * Copy mapentry.

-#define SAVE_HINT(map,check,value) do { \

- simple_lock(&(map)->hint_lock); \

- if ((map)->hint == (check)) \

- (map)->hint = (value); \

- simple_unlock(&(map)->hint_lock); \

-} while (0)

+static __inline void

+uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)

+ caddr_t csrc, cdst;

+ size_t sz;

+ csrc = (caddr_t)src;

+ cdst = (caddr_t)dst;

+ csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);

+ cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);

+ sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -

+ offsetof(struct vm_map_entry, uvm_map_entry_start_copy);

+ memcpy(cdst, csrc, sz);

- * VM_MAP_RANGE_CHECK: check and correct range

- *

- * => map must at least be read locked

+ * Handle free-list insertion.

+void

+uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry *entry)

+ const struct uvm_addr_functions *fun;

+#ifdef VMMAP_DEBUG

+ vaddr_t min, max, bound;

+#endif

+#ifdef VMMAP_DEBUG

+ /*

+ * Boundary check.

+ * Boundaries are folded if they go on the same free list.

+ */

+ min = VMMAP_FREE_START(entry);

+ max = VMMAP_FREE_END(entry);

-#define VM_MAP_RANGE_CHECK(map, start, end) do { \

- if (start < vm_map_min(map)) \

- start = vm_map_min(map); \

- if (end > vm_map_max(map)) \

- end = vm_map_max(map); \

- if (start > end) \

- start = end; \

-} while (0)

+ while (min < max) {

+ bound = uvm_map_boundary(map, min, max);

+ KASSERT(uvm_map_uaddr(map, min) == uaddr);

+ min = bound;

+ }

+#endif

+ KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);

+ KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);

+ UVM_MAP_REQ_WRITE(map);

+ /* Actual insert: forward to uaddr pointer. */

+ fun = uaddr->uaddr_functions;

+ KDASSERT(fun != NULL);

+ if (fun->uaddr_free_insert != NULL)

+ (*fun->uaddr_free_insert)(map, uaddr, entry);

+ entry->etype |= UVM_ET_FREEMAPPED;

- * local prototypes

+ * Handle free-list removal.

+void

+uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry *entry)

+ const struct uvm_addr_functions *fun;

-void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);

-void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);

-void uvm_map_reference_amap(struct vm_map_entry *, int);

-void uvm_map_unreference_amap(struct vm_map_entry *, int);

-int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t,

- struct vm_map_entry *, voff_t, vsize_t);

+ KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0);

+ KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);

+ UVM_MAP_REQ_WRITE(map);

-struct vm_map_entry *uvm_mapent_alloc(struct vm_map *, int);

-void uvm_mapent_free(struct vm_map_entry *);

+ fun = uaddr->uaddr_functions;

+ if (fun->uaddr_free_remove != NULL)

+ (*fun->uaddr_free_remove)(map, uaddr, entry);

+ entry->etype &= ~UVM_ET_FREEMAPPED;

-#ifdef KVA_GUARDPAGES

- * Number of kva guardpages in use.

+ * Handle address tree insertion.

-int kva_guardpages;

-#endif

+void

+uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)

+ struct vm_map_entry *res;

+ if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||

+ RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||

+ RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF)

+ panic("uvm_mapent_addr_insert: entry still in addr list");

+ KDASSERT(entry->start <= entry->end);

+ KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&

+ (entry->end & (vaddr_t)PAGE_MASK) == 0);

+ UVM_MAP_REQ_WRITE(map);

+ res = RB_INSERT(uvm_map_addr, &map->addr, entry);

+ if (res != NULL) {

+ panic("uvm_mapent_addr_insert: map %p entry %p "

+ "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "

+ "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",

+ map, entry,

+ entry->start, entry->end, entry->guard, entry->fspace,

+ res, res->start, res->end, res->guard, res->fspace);

+ }

- * Tree manipulation.

+ * Handle address tree removal.

-void uvm_rb_insert(struct vm_map *, struct vm_map_entry *);

-void uvm_rb_remove(struct vm_map *, struct vm_map_entry *);

-vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *);

+void

+uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)

+ struct vm_map_entry *res;

+ UVM_MAP_REQ_WRITE(map);

+ res = RB_REMOVE(uvm_map_addr, &map->addr, entry);

+ if (res != entry)

+ panic("uvm_mapent_addr_remove");

+ RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) =

+ RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF;

-#ifdef DEBUG

-int _uvm_tree_sanity(struct vm_map *map, const char *name);

-#endif

-vsize_t uvm_rb_subtree_space(struct vm_map_entry *);

-void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *);

+/*

+ * uvm_map_reference: add reference to a map

+ *

+ * XXX check map reference counter lock

+ */

+#define uvm_map_reference(_map) \

+ do { \

+ simple_lock(&map->ref_lock); \

+ map->ref_count++; \

+ simple_unlock(&map->ref_lock); \

+ } while (0)

-static __inline int

-uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b)

+/*

+ * Calculate the dused delta.

+ */

+vsize_t

+uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)

{

- if (a->start < b->start)

- return (-1);

- else if (a->start > b->start)

- return (1);

- return (0);

+ struct vmspace *vm;

+ vsize_t sz;

+ vaddr_t lmax;

+ vaddr_t stack_begin, stack_end; /* Position of stack. */

+ KASSERT(map->flags & VM_MAP_ISVMSPACE);

+ vm = (struct vmspace *)map;

+ stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);

+ stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);

+ sz = 0;

+ while (min != max) {

+ lmax = max;

+ if (min < stack_begin && lmax > stack_begin)

+ lmax = stack_begin;

+ else if (min < stack_end && lmax > stack_end)

+ lmax = stack_end;

+ if (min >= stack_begin && min < stack_end) {

+ /* nothing */

+ } else

+ sz += lmax - min;

+ min = lmax;

+ }

+ return sz >> PAGE_SHIFT;

}

+/*

+ * Find the entry describing the given address.

+ */

+struct vm_map_entry*

+uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)

+ struct vm_map_entry *iter;

+ iter = RB_ROOT(atree);

+ while (iter != NULL) {

+ if (iter->start > addr)

+ iter = RB_LEFT(iter, daddrs.addr_entry);

+ else if (VMMAP_FREE_END(iter) <= addr)

+ iter = RB_RIGHT(iter, daddrs.addr_entry);

+ else

+ return iter;

+ }

+ return NULL;

+/*

+ * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)

+ *

+ * Push dead entries into a linked list.

+ * Since the linked list abuses the address tree for storage, the entry

+ * may not be linked in a map.

+ *

+ * *head must be initialized to NULL before the first call to this macro.

+ * uvm_unmap_detach(*head, 0) will remove dead entries.

+ */

static __inline void

-uvm_rb_augment(struct vm_map_entry *entry)

+dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)

{

- entry->space = uvm_rb_subtree_space(entry);

+ TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);

}

+#define DEAD_ENTRY_PUSH(_headptr, _entry) \

+ dead_entry_push((_headptr), (_entry))

-RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);

+/*

+ * Helper function for uvm_map_findspace_tree.

+ *

+ * Given allocation constraints and pmap constraints, finds the

+ * lowest and highest address in a range that can be used for the

+ * allocation.

+ *

+ * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.

+ *

+ * Big chunk of math with a seasoning of dragons.

+ */

+int

+uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,

+ struct vm_map_entry *sel, vaddr_t align,

+ vaddr_t pmap_align, vaddr_t pmap_off, int bias)

+ vaddr_t sel_min, sel_max;

+#ifdef PMAP_PREFER

+ vaddr_t pmap_min, pmap_max;

+#endif /* PMAP_PREFER */

+#ifdef DIAGNOSTIC

+ int bad;

+#endif /* DIAGNOSTIC */

-RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);

+ sel_min = VMMAP_FREE_START(sel);

+ sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);

-vsize_t

-uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry)

- struct vm_map_entry *next;

- vaddr_t space;

+#ifdef PMAP_PREFER

+ /*

+ * There are two special cases, in which we can satisfy the align

+ * requirement and the pmap_prefer requirement.

+ * - when pmap_off == 0, we always select the largest of the two

+ * - when pmap_off % align == 0 and pmap_align > align, we simply

+ * satisfy the pmap_align requirement and automatically

+ * satisfy the align requirement.

+ */

+ if (align > PAGE_SIZE &&

+ !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {

+ /*

+ * Simple case: only use align.

+ */

+ sel_min = roundup(sel_min, align);

+ sel_max &= ~(align - 1);

+ if (sel_min > sel_max)

+ return ENOMEM;

+ /*

+ * Correct for bias.

+ */

+ if (sel_max - sel_min > FSPACE_BIASGAP) {

+ if (bias > 0) {

+ sel_min = sel_max - FSPACE_BIASGAP;

+ sel_min = roundup(sel_min, align);

+ } else if (bias < 0) {

+ sel_max = sel_min + FSPACE_BIASGAP;

+ sel_max &= ~(align - 1);

+ }

+ } else if (pmap_align != 0) {

+ /*

+ * Special case: satisfy both pmap_prefer and

+ * align argument.

+ */

+ pmap_max = sel_max & ~(pmap_align - 1);

+ pmap_min = sel_min;

+ if (pmap_max < sel_min)

+ return ENOMEM;

+ /* Adjust pmap_min for BIASGAP for top-addr bias. */

+ if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)

+ pmap_min = pmap_max - FSPACE_BIASGAP;

+ /* Align pmap_min. */

+ pmap_min &= ~(pmap_align - 1);

+ if (pmap_min < sel_min)

+ pmap_min += pmap_align;

+ if (pmap_min > pmap_max)

+ return ENOMEM;

+ /* Adjust pmap_max for BIASGAP for bottom-addr bias. */

+ if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {

+ pmap_max = (pmap_min + FSPACE_BIASGAP) &

+ ~(pmap_align - 1);

+ }

+ if (pmap_min > pmap_max)

+ return ENOMEM;

+ /* Apply pmap prefer offset. */

+ pmap_max |= pmap_off;

+ if (pmap_max > sel_max)

+ pmap_max -= pmap_align;

+ pmap_min |= pmap_off;

+ if (pmap_min < sel_min)

+ pmap_min += pmap_align;

+ /*

+ * Fixup: it's possible that pmap_min and pmap_max

+ * cross eachother. In this case, try to find one

+ * address that is allowed.

+ * (This usually happens in biased case.)

+ */

+ if (pmap_min > pmap_max) {

+ if (pmap_min < sel_max)

+ pmap_max = pmap_min;

+ else if (pmap_max > sel_min)

+ pmap_min = pmap_max;

+ else

+ return ENOMEM;

+ }

+ /* Internal validation. */

+ KDASSERT(pmap_min <= pmap_max);

+ sel_min = pmap_min;

+ sel_max = pmap_max;

+ } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)

+ sel_min = sel_max - FSPACE_BIASGAP;

+ else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)

+ sel_max = sel_min + FSPACE_BIASGAP;

+#else

+ if (align > PAGE_SIZE) {

+ sel_min = roundup(sel_min, align);

+ sel_max &= ~(align - 1);

+ if (sel_min > sel_max)

+ return ENOMEM;

+ if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {

+ if (bias > 0) {

+ sel_min = roundup(sel_max - FSPACE_BIASGAP,

+ align);

+ } else {

+ sel_max = (sel_min + FSPACE_BIASGAP) &

+ ~(align - 1);

+ }

+ } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)

+ sel_min = sel_max - FSPACE_BIASGAP;

+ else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)

+ sel_max = sel_min + FSPACE_BIASGAP;

+#endif

+ if (sel_min > sel_max)

+ return ENOMEM;

+#ifdef DIAGNOSTIC

+ bad = 0;

+ /* Lower boundary check. */

+ if (sel_min < VMMAP_FREE_START(sel)) {

+ printf("sel_min: 0x%lx, but should be at least 0x%lx\n",

+ sel_min, VMMAP_FREE_START(sel));

+ bad++;

+ }

+ /* Upper boundary check. */

+ if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {

+ printf("sel_max: 0x%lx, but should be at most 0x%lx\n",

+ sel_max,

+ VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));

+ bad++;

+ }

+ /* Lower boundary alignment. */

+ if (align != 0 && (sel_min & (align - 1)) != 0) {

+ printf("sel_min: 0x%lx, not aligned to 0x%lx\n",

+ sel_min, align);

+ bad++;

+ }

+ /* Upper boundary alignment. */

+ if (align != 0 && (sel_max & (align - 1)) != 0) {

+ printf("sel_max: 0x%lx, not aligned to 0x%lx\n",

+ sel_max, align);

+ bad++;

+ }

+ /* Lower boundary PMAP_PREFER check. */

+ if (pmap_align != 0 && align == 0 &&

+ (sel_min & (pmap_align - 1)) != pmap_off) {

+ printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",

+ sel_min, sel_min & (pmap_align - 1), pmap_off);

+ bad++;

+ }

+ /* Upper boundary PMAP_PREFER check. */

+ if (pmap_align != 0 && align == 0 &&

+ (sel_max & (pmap_align - 1)) != pmap_off) {

+ printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",

+ sel_max, sel_max & (pmap_align - 1), pmap_off);

+ bad++;

+ }

- if ((next = entry->next) == &map->header)

- space = map->max_offset - entry->end;

- else {

- KASSERT(next);

- space = next->start - entry->end;

+ if (bad) {

+ panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "

+ "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "

+ "bias = %d, "

+ "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",

+ sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,

+ bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));

}

- return (space);

+#endif /* DIAGNOSTIC */

+ *min = sel_min;

+ *max = sel_max;

+ return 0;

}

-vsize_t

-uvm_rb_subtree_space(struct vm_map_entry *entry)

+/*

+ * Test if memory starting at addr with sz bytes is free.

+ *

+ * Fills in *start_ptr and *end_ptr to be the first and last entry describing

+ * the space.

+ * If called with prefilled *start_ptr and *end_ptr, they are to be correct.

+ */

+int

+uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,

+ struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,

+ vaddr_t addr, vsize_t sz)

{

- vaddr_t space, tmp;

+ struct uvm_addr_state *free;

+ struct uvm_map_addr *atree;

+ struct vm_map_entry *i, *i_end;

- space = entry->ownspace;

- if (RB_LEFT(entry, rb_entry)) {

- tmp = RB_LEFT(entry, rb_entry)->space;

- if (tmp > space)

- space = tmp;

+ /*

+ * Kernel memory above uvm_maxkaddr is considered unavailable.

+ */

+ if ((map->flags & VM_MAP_ISVMSPACE) == 0) {

+ if (addr + sz > uvm_maxkaddr)

+ return 0;

}

- if (RB_RIGHT(entry, rb_entry)) {

- tmp = RB_RIGHT(entry, rb_entry)->space;

- if (tmp > space)

- space = tmp;

+ atree = &map->addr;

+ /*

+ * Fill in first, last, so they point at the entries containing the

+ * first and last address of the range.

+ * Note that if they are not NULL, we don't perform the lookup.

+ */

+ KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);

+ if (*start_ptr == NULL) {

+ *start_ptr = uvm_map_entrybyaddr(atree, addr);

+ if (*start_ptr == NULL)

+ return 0;

+ } else

+ KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));

+ if (*end_ptr == NULL) {

+ if (VMMAP_FREE_END(*start_ptr) >= addr + sz)

+ *end_ptr = *start_ptr;

+ else {

+ *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);

+ if (*end_ptr == NULL)

+ return 0;

+ }

+ } else

+ KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));

+ /*

+ * Validation.

+ */

+ KDASSERT(*start_ptr != NULL && *end_ptr != NULL);

+ KDASSERT((*start_ptr)->start <= addr &&

+ VMMAP_FREE_END(*start_ptr) > addr &&

+ (*end_ptr)->start < addr + sz &&

+ VMMAP_FREE_END(*end_ptr) >= addr + sz);

+ /*

+ * Check the none of the entries intersects with <addr, addr+sz>.

+ * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is

+ * considered unavailable unless called by those allocators.

+ */

+ i = *start_ptr;

+ i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr);

+ for (; i != i_end;

+ i = RB_NEXT(uvm_map_addr, atree, i)) {

+ if (i->start != i->end && i->end > addr)

+ return 0;

+ /*

+ * uaddr_exe and uaddr_brk_stack may only be used

+ * by these allocators and the NULL uaddr (i.e. no

+ * uaddr).

+ * Reject if this requirement is not met.

+ */

+ if (uaddr != NULL) {

+ free = uvm_map_uaddr_e(map, i);

+ if (uaddr != free && free != NULL &&

+ (free == map->uaddr_exe ||

+ free == map->uaddr_brk_stack))

+ return 0;

+ }

}

- return (space);

+ return -1;

}

-void

-uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)

+/*

+ * Invoke each address selector until an address is found.

+ * Will not invoke uaddr_exe.

+ */

+int

+uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,

+ struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,

+ vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)

{

- /* We need to traverse to the very top */

- do {

- entry->ownspace = uvm_rb_space(map, entry);

- entry->space = uvm_rb_subtree_space(entry);

- } while ((entry = RB_PARENT(entry, rb_entry)) != NULL);

+ struct uvm_addr_state *uaddr;

+ int i;

-void

-uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)

- vaddr_t space = uvm_rb_space(map, entry);

- struct vm_map_entry *tmp;

+ /*

+ * Allocation for sz bytes at any address,

+ * using the addr selectors in order.

+ */

+ for (i = 0; i < nitems(map->uaddr_any); i++) {

+ uaddr = map->uaddr_any[i];

- entry->ownspace = entry->space = space;

- tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry);

-#ifdef DIAGNOSTIC

- if (tmp != NULL)

- panic("uvm_rb_insert: duplicate entry?");

-#endif

- uvm_rb_fixup(map, entry);

- if (entry->prev != &map->header)

- uvm_rb_fixup(map, entry->prev);

+ if (uvm_addr_invoke(map, uaddr, first, last,

+ addr, sz, pmap_align, pmap_offset, prot, hint) == 0)

+ return 0;

+ }

+ /*

+ * Fall back to brk() and stack() address selectors.

+ */

+ uaddr = map->uaddr_brk_stack;

+ if (uvm_addr_invoke(map, uaddr, first, last,

+ addr, sz, pmap_align, pmap_offset, prot, hint) == 0)

+ return 0;

+ return ENOMEM;

}

-void

-uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)

+/*

+ * uvm_map: establish a valid mapping in map

+ *

+ * => *addr and sz must be a multiple of PAGE_SIZE.

+ * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.

+ * => map must be unlocked.

+ * => <uobj,uoffset> value meanings (4 cases):

+ * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER

+ * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER

+ * [3] <uobj,uoffset> == normal mapping

+ * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA

+ *

+ * case [4] is for kernel mappings where we don't know the offset until

+ * we've found a virtual address. note that kernel object offsets are

+ * always relative to vm_map_min(kernel_map).

+ *

+ * => align: align vaddr, must be a power-of-2.

+ * Align is only a hint and will be ignored if the alignemnt fails.

+ */

+int

+uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,

+ struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)

{

- struct vm_map_entry *parent;

+ struct vm_map_entry *first, *last, *entry;

+ struct uvm_map_deadq dead;

+ vm_prot_t prot;

+ vm_prot_t maxprot;

+ vm_inherit_t inherit;

+ int advice;

+ int error;

+ vaddr_t pmap_align, pmap_offset;

+ vaddr_t hint;

- parent = RB_PARENT(entry, rb_entry);

- RB_REMOVE(uvm_tree, &(map)->rbhead, entry);

- if (entry->prev != &map->header)

- uvm_rb_fixup(map, entry->prev);

- if (parent)

- uvm_rb_fixup(map, parent);

+ if ((map->flags & VM_MAP_INTRSAFE) == 0)

+ splassert(IPL_NONE);

+ else

+ splassert(IPL_VM);

-#ifdef DEBUG

-#define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y)

-#else

-#define uvm_tree_sanity(x,y)

-#endif

+ /*

+ * We use pmap_align and pmap_offset as alignment and offset variables.

+ *

+ * Because the align parameter takes precedence over pmap prefer,

+ * the pmap_align will need to be set to align, with pmap_offset = 0,

+ * if pmap_prefer will not align.

+ */

+ if (uoffset == UVM_UNKNOWN_OFFSET) {

+ pmap_align = MAX(align, PAGE_SIZE);

+ pmap_offset = 0;

+ } else {

+ pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);

+ pmap_offset = PMAP_PREFER_OFFSET(uoffset);

-#ifdef DEBUG

-int

-_uvm_tree_sanity(struct vm_map *map, const char *name)

- struct vm_map_entry *tmp, *trtmp;

- int n = 0, i = 1;

- RB_FOREACH(tmp, uvm_tree, &map->rbhead) {

- if (tmp->ownspace != uvm_rb_space(map, tmp)) {

- printf("%s: %d/%d ownspace %x != %x %s\n",

- name, n + 1, map->nentries,

- tmp->ownspace, uvm_rb_space(map, tmp),

- tmp->next == &map->header ? "(last)" : "");

- goto error;

+ if (align == 0 ||

+ (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {

+ /*

+ * pmap_offset satisfies align, no change.

+ */

+ } else {

+ /*

+ * Align takes precedence over pmap prefer.

+ */

+ pmap_align = align;

+ pmap_offset = 0;

}

- trtmp = NULL;

- RB_FOREACH(tmp, uvm_tree, &map->rbhead) {

- if (tmp->space != uvm_rb_subtree_space(tmp)) {

- printf("%s: space %d != %d\n",

- name, tmp->space, uvm_rb_subtree_space(tmp));

- goto error;

+ /*

+ * Decode parameters.

+ */

+ prot = UVM_PROTECTION(flags);

+ maxprot = UVM_MAXPROTECTION(flags);

+ advice = UVM_ADVICE(flags);

+ inherit = UVM_INHERIT(flags);

+ error = 0;

+ hint = trunc_page(*addr);

+ TAILQ_INIT(&dead);

+ KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);

+ KASSERT((align & (align - 1)) == 0);

+ /*

+ * Holes are incompatible with other types of mappings.

+ */

+ if (flags & UVM_FLAG_HOLE) {

+ KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&

+ (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);

+ }

+ /*

+ * Unset hint for kernel_map non-fixed allocations.

+ */

+ if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))

+ hint = 0;

+ /*

+ * Check protection.

+ */

+ if ((prot & maxprot) != prot)

+ return EACCES;

+ if (flags & UVM_FLAG_TRYLOCK) {

+ if (vm_map_lock_try(map) == FALSE)

+ return EFAULT;

+ } else

+ vm_map_lock(map);

+ first = last = NULL;

+ if (flags & UVM_FLAG_FIXED) {

+ /*

+ * Fixed location.

+ *

+ * Note: we ignore align, pmap_prefer.

+ * Fill in first, last and *addr.

+ */

+ KASSERT((*addr & PAGE_MASK) == 0);

+ /*

+ * Grow pmap to include allocated address.

+ * If the growth fails, the allocation will fail too.

+ */

+ if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&

+ uvm_maxkaddr < (*addr + sz)) {

+ uvm_map_kmem_grow(map, &dead,

+ *addr + sz - uvm_maxkaddr, flags);

}

- if (trtmp != NULL && trtmp->start >= tmp->start) {

- printf("%s: corrupt: 0x%lx >= 0x%lx\n",

- name, trtmp->start, tmp->start);

- goto error;

+ /*

+ * Check that the space is available.

+ */

+ if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {

+ error = ENOMEM;

+ goto unlock;

+ }

+ } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&

+ (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&

+ (align == 0 || (*addr & (align - 1)) == 0) &&

+ uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {

+ /*

+ * Address used as hint.

+ *

+ * Note: we enforce the alignment restriction,

+ * but ignore pmap_prefer.

+ */

+ } else if ((maxprot & VM_PROT_EXECUTE) != 0 &&

+ map->uaddr_exe != NULL) {

+ /*

+ * Run selection algorithm for executables.

+ */

+ error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,

+ addr, sz, pmap_align, pmap_offset, prot, hint);

+ /*

+ * Grow kernel memory and try again.

+ */

+ if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {

+ uvm_map_kmem_grow(map, &dead, sz, flags);

+ error = uvm_addr_invoke(map, map->uaddr_exe,

+ &first, &last, addr, sz,

+ pmap_align, pmap_offset, prot, hint);

+ }

+ if (error != 0)

+ goto unlock;

+ } else {

+ /*

+ * Update freelists from vmspace.

+ */

+ if (map->flags & VM_MAP_ISVMSPACE)

+ uvm_map_vmspace_update(map, &dead, flags);

+ error = uvm_map_findspace(map, &first, &last, addr, sz,

+ pmap_align, pmap_offset, prot, hint);

+ /*

+ * Grow kernel memory and try again.

+ */

+ if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {

+ uvm_map_kmem_grow(map, &dead, sz, flags);

+ error = uvm_map_findspace(map, &first, &last, addr, sz,

+ pmap_align, pmap_offset, prot, hint);

+ }

+ if (error != 0)

+ goto unlock;

+ }

+ KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||

+ uvm_maxkaddr >= *addr + sz);

+ /*

+ * If we only want a query, return now.

+ */

+ if (flags & UVM_FLAG_QUERY) {

+ error = 0;

+ goto unlock;

+ }

+ if (uobj == NULL)

+ uoffset = 0;

+ else if (uoffset == UVM_UNKNOWN_OFFSET) {

+ KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));

+ uoffset = *addr - vm_map_min(kernel_map);

+ }

+ /*

+ * Create new entry.

+ * first and last may be invalidated after this call.

+ */

+ entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead);

+ if (entry == NULL) {

+ error = ENOMEM;

+ goto unlock;

+ }

+ KDASSERT(entry->start == *addr && entry->end == *addr + sz);

+ entry->object.uvm_obj = uobj;

+ entry->offset = uoffset;

+ entry->protection = prot;

+ entry->max_protection = maxprot;

+ entry->inheritance = inherit;

+ entry->wired_count = 0;

+ entry->advice = advice;

+ if (uobj)

+ entry->etype |= UVM_ET_OBJ;

+ else if (flags & UVM_FLAG_HOLE)

+ entry->etype |= UVM_ET_HOLE;

+ if (flags & UVM_FLAG_COPYONW) {

+ entry->etype |= UVM_ET_COPYONWRITE;

+ if ((flags & UVM_FLAG_OVERLAY) == 0)

+ entry->etype |= UVM_ET_NEEDSCOPY;

+ }

+ if (flags & UVM_FLAG_OVERLAY) {

+ entry->aref.ar_pageoff = 0;

+ entry->aref.ar_amap = amap_alloc(sz,

+ ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0),

+ M_WAITOK);

+ }

+ /*

+ * Update map and process statistics.

+ */

+ if (!(flags & UVM_FLAG_HOLE)) {

+ map->size += sz;

+ if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {

+ ((struct vmspace *)map)->vm_dused +=

+ uvmspace_dused(map, *addr, *addr + sz);

}

- n++;

+ }

- trtmp = tmp;

+ /*

+ * Try to merge entry.

+ *

+ * Userland allocations are kept separated most of the time.

+ * Forego the effort of merging what most of the time can't be merged

+ * and only try the merge if it concerns a kernel entry.

+ */

+ if ((flags & UVM_FLAG_NOMERGE) == 0 &&

+ (map->flags & VM_MAP_ISVMSPACE) == 0)

+ uvm_mapent_tryjoin(map, entry, &dead);

+unlock:

+ vm_map_unlock(map);

+ /*

+ * Remove dead entries.

+ *

+ * Dead entries may be the result of merging.

+ * uvm_map_mkentry may also create dead entries, when it attempts to

+ * destroy free-space entries.

+ */

+ uvm_unmap_detach(&dead, 0);

+ return error;

+/*

+ * True iff e1 and e2 can be joined together.

+ */

+int

+uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,

+ struct vm_map_entry *e2)

+ KDASSERT(e1 != NULL && e2 != NULL);

+ /*

+ * Must be the same entry type and not have free memory between.

+ */

+ if (e1->etype != e2->etype || e1->end != e2->start)

+ return 0;

+ /*

+ * Submaps are never joined.

+ */

+ if (UVM_ET_ISSUBMAP(e1))

+ return 0;

+ /*

+ * Never merge wired memory.

+ */

+ if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))

+ return 0;

+ /*

+ * Protection, inheritance and advice must be equal.

+ */

+ if (e1->protection != e2->protection ||

+ e1->max_protection != e2->max_protection ||

+ e1->inheritance != e2->inheritance ||

+ e1->advice != e2->advice)

+ return 0;

+ /*

+ * If uvm_object: objects itself and offsets within object must match.

+ */

+ if (UVM_ET_ISOBJ(e1)) {

+ if (e1->object.uvm_obj != e2->object.uvm_obj)

+ return 0;

+ if (e1->offset + (e1->end - e1->start) != e2->offset)

+ return 0;

+ }

+ /*

+ * Cannot join shared amaps.

+ * Note: no need to lock amap to look at refs, since we don't care

+ * about its exact value.

+ * If it is 1 (i.e. we have the only reference) it will stay there.

+ */

+ if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)

+ return 0;

+ if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)

+ return 0;

+ /*

+ * Apprently, e1 and e2 match.

+ */

+ return 1;

+/*

+ * Join support function.

+ *

+ * Returns the merged entry on succes.

+ * Returns NULL if the merge failed.

+ */

+struct vm_map_entry*

+uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,

+ struct vm_map_entry *e2, struct uvm_map_deadq *dead)

+ struct uvm_addr_state *free;

+ /*

+ * Amap of e1 must be extended to include e2.

+ * e2 contains no real information in its amap,

+ * so it can be erased immediately.

+ */

+ if (e1->aref.ar_amap) {

+ if (amap_extend(e1, e2->end - e2->start))

+ return NULL;

+ }

+ /*

+ * Don't drop obj reference:

+ * uvm_unmap_detach will do this for us.

+ */

+ free = uvm_map_uaddr_e(map, e1);

+ if (free)

+ uvm_mapent_free_remove(map, free, e1);

+ free = uvm_map_uaddr_e(map, e2);

+ if (free)

+ uvm_mapent_free_remove(map, free, e2);

+ uvm_mapent_addr_remove(map, e2);

+ e1->end = e2->end;

+ e1->guard = e2->guard;

+ e1->fspace = e2->fspace;

+ if (free)

+ uvm_mapent_free_insert(map, free, e1);

+ DEAD_ENTRY_PUSH(dead, e2);

+ return e1;

+/*

+ * Attempt forward and backward joining of entry.

+ *

+ * Returns entry after joins.

+ * We are guaranteed that the amap of entry is either non-existant or

+ * has never been used.

+ */

+struct vm_map_entry*

+uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,

+ struct uvm_map_deadq *dead)

+ struct vm_map_entry *other;

+ struct vm_map_entry *merged;

+ /*

+ * Merge with previous entry.

+ */

+ other = RB_PREV(uvm_map_addr, &map->addr, entry);

+ if (other && uvm_mapent_isjoinable(map, other, entry)) {

+ merged = uvm_mapent_merge(map, other, entry, dead);

+ if (merged)

+ entry = merged;

}

- if (n != map->nentries) {

- printf("%s: nentries: %d vs %d\n",

- name, n, map->nentries);

- goto error;

+ /*

+ * Merge with next entry.

+ *

+ * Because amap can only extend forward and the next entry

+ * probably contains sensible info, only perform forward merging

+ * in the absence of an amap.

+ */

+ other = RB_NEXT(uvm_map_addr, &map->addr, entry);

+ if (other && entry->aref.ar_amap == NULL &&

+ other->aref.ar_amap == NULL &&

+ uvm_mapent_isjoinable(map, entry, other)) {

+ merged = uvm_mapent_merge(map, entry, other, dead);

+ if (merged)

+ entry = merged;

}

- for (tmp = map->header.next; tmp && tmp != &map->header;

- tmp = tmp->next, i++) {

- trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp);

- if (trtmp != tmp) {

- printf("%s: lookup: %d: %p - %p: %p\n",

- name, i, tmp, trtmp,

- RB_PARENT(tmp, rb_entry));

- goto error;

+ return entry;

+/*

+ * Kill entries that are no longer in a map.

+ */

+void

+uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)

+ struct vm_map_entry *entry;

+ while ((entry = TAILQ_FIRST(deadq)) != NULL) {

+ /*

+ * Drop reference to amap, if we've got one.

+ */

+ if (entry->aref.ar_amap)

+ amap_unref(entry->aref.ar_amap,

+ entry->aref.ar_pageoff,

+ atop(entry->end - entry->start),

+ flags);

+ /*

+ * Drop reference to our backing object, if we've got one.

+ */

+ if (UVM_ET_ISSUBMAP(entry)) {

+ /* ... unlikely to happen, but play it safe */

+ uvm_map_deallocate(entry->object.sub_map);

+ } else if (UVM_ET_ISOBJ(entry) &&

+ entry->object.uvm_obj->pgops->pgo_detach) {

+ entry->object.uvm_obj->pgops->pgo_detach(

+ entry->object.uvm_obj);

}

+ /*

+ * Step to next.

+ */

+ TAILQ_REMOVE(deadq, entry, dfree.deadq);

+ uvm_mapent_free(entry);

}

- return (0);

- error:

-#ifdef DDB

- /* handy breakpoint location for error case */

- __asm(".globl treesanity_label\ntreesanity_label:");

-#endif

- return (-1);

+/*

+ * Create and insert new entry.

+ *

+ * Returned entry contains new addresses and is inserted properly in the tree.

+ * first and last are (probably) no longer valid.

+ */

+struct vm_map_entry*

+uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,

+ struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,

+ struct uvm_map_deadq *dead)

+ struct vm_map_entry *entry, *prev;

+ struct uvm_addr_state *free;

+ vaddr_t min, max; /* free space boundaries for new entry */

+ KDASSERT(map != NULL);

+ KDASSERT(first != NULL);

+ KDASSERT(last != NULL);

+ KDASSERT(dead != NULL);

+ KDASSERT(sz > 0);

+ KDASSERT(addr + sz > addr);

+ KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);

+ KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);

+ KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));

+ uvm_tree_sanity(map, __FILE__, __LINE__);

+ min = addr + sz;

+ max = VMMAP_FREE_END(last);

+ /*

+ * Initialize new entry.

+ */

+ entry = uvm_mapent_alloc(map, flags);

+ if (entry == NULL)

+ return NULL;

+ entry->offset = 0;

+ entry->etype = 0;

+ entry->wired_count = 0;

+ entry->aref.ar_pageoff = 0;

+ entry->aref.ar_amap = NULL;

+ entry->start = addr;

+ entry->end = min;

+ entry->guard = 0;

+ entry->fspace = 0;

+ /*

+ * Reset free space in first.

+ */

+ free = uvm_map_uaddr_e(map, first);

+ if (free)

+ uvm_mapent_free_remove(map, free, first);

+ first->guard = 0;

+ first->fspace = 0;

+ /*

+ * Remove all entries that are fully replaced.

+ * We are iterating using last in reverse order.

+ */

+ for (; first != last; last = prev) {

+ prev = RB_PREV(uvm_map_addr, &map->addr, last);

+ KDASSERT(last->start == last->end);

+ free = uvm_map_uaddr_e(map, last);

+ if (free)

+ uvm_mapent_free_remove(map, free, last);

+ uvm_mapent_addr_remove(map, last);

+ DEAD_ENTRY_PUSH(dead, last);

+ }

+ /*

+ * Remove first if it is entirely inside <addr, addr+sz>.

+ */

+ if (first->start == addr) {

+ uvm_mapent_addr_remove(map, first);

+ DEAD_ENTRY_PUSH(dead, first);

+ } else {

+ uvm_map_fix_space(map, first, VMMAP_FREE_START(first),

+ addr, flags);

+ }

+ /*

+ * Finally, link in entry.

+ */

+ uvm_mapent_addr_insert(map, entry);

+ uvm_map_fix_space(map, entry, min, max, flags);

+ uvm_tree_sanity(map, __FILE__, __LINE__);

+ return entry;

}

-#endif

* uvm_mapent_alloc: allocate a map entry

struct vm_map_entry *

uvm_mapent_alloc(struct vm_map *map, int flags)

{

@@ -406,15 +1468,15 @@ uvm_mapent_alloc(struct vm_map *map, int flags)

for (i = 0;

i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;

i++)

- ne[i].next = &ne[i + 1];

- ne[i].next = NULL;

+ RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];

+ RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;

me = ne;

if (ratecheck(&uvm_kmapent_last_warn_time,

&uvm_kmapent_warn_rate))

printf("uvm_mapent_alloc: out of static "

"map entries\n");

}

- uvm.kentry_free = me->next;

+ uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);

uvmexp.kmapent++;

simple_unlock(&uvm.kentry_lock);

splx(s);

@@ -433,6 +1495,12 @@ uvm_mapent_alloc(struct vm_map *map, int flags)

me->flags = 0;

}

+ if (me != NULL) {

+ RB_LEFT(me, daddrs.addr_entry) =

+ RB_RIGHT(me, daddrs.addr_entry) =

+ RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF;

+ }

out:

return(me);

}

@@ -442,7 +1510,6 @@ out:

* => XXX: static pool for kernel map?

void

uvm_mapent_free(struct vm_map_entry *me)

{

@@ -451,7 +1518,7 @@ uvm_mapent_free(struct vm_map_entry *me)

if (me->flags & UVM_MAP_STATIC) {

s = splvm();

simple_lock(&uvm.kentry_lock);

- me->next = uvm.kentry_free;

+ RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;

uvm.kentry_free = me;

uvmexp.kmapent--;

simple_unlock(&uvm.kentry_lock);

@@ -466,1726 +1533,2108 @@ uvm_mapent_free(struct vm_map_entry *me)

}

- * uvm_mapent_copy: copy a map entry, preserving flags

+ * uvm_map_lookup_entry: find map entry at or before an address.

+ *

+ * => map must at least be read-locked by caller

+ * => entry is returned in "entry"

+ * => return value is true if address is in the returned entry

+ * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is

+ * returned for those mappings.

-void

-uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)

+boolean_t

+uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,

+ struct vm_map_entry **entry)

{

- memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -

- ((char *)src));

+ *entry = uvm_map_entrybyaddr(&map->addr, address);

+ return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&

+ (*entry)->start <= address && (*entry)->end > address;

}

- * uvm_map_entry_unwire: unwire a map entry

- *

- * => map should be locked by caller

+ * uvm_map_pie: return a random load address for a PIE executable

+ * properly aligned.

-void

-uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)

+#ifndef VM_PIE_MAX_ADDR

+#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)

+#endif

+#ifndef VM_PIE_MIN_ADDR

+#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS

+#endif

+#ifndef VM_PIE_MIN_ALIGN

+#define VM_PIE_MIN_ALIGN PAGE_SIZE

+#endif

+vaddr_t

+uvm_map_pie(vaddr_t align)

{

+ vaddr_t addr, space, min;

- entry->wired_count = 0;

- uvm_fault_unwire_locked(map, entry->start, entry->end);

+ align = MAX(align, VM_PIE_MIN_ALIGN);

+ /* round up to next alignment */

+ min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);

+ if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)

+ return (align);

+ space = (VM_PIE_MAX_ADDR - min) / align;

+ space = MIN(space, (u_int32_t)-1);

+ addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;

+ addr += min;

+ return (addr);

-/*

- * wrapper for calling amap_ref()

- */

void

-uvm_map_reference_amap(struct vm_map_entry *entry, int flags)

+uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)

{

- amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,

- (entry->end - entry->start) >> PAGE_SHIFT, flags);

+ struct uvm_map_deadq dead;

+ KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&

+ (end & (vaddr_t)PAGE_MASK) == 0);

+ TAILQ_INIT(&dead);

+ vm_map_lock(map);

+ uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);

+ vm_map_unlock(map);

+ uvm_unmap_detach(&dead, 0);

- * wrapper for calling amap_unref()

+ * Mark entry as free.

+ *

+ * entry will be put on the dead list.

+ * The free space will be merged into the previous or a new entry,

+ * unless markfree is false.

void

-uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)

+uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,

+ struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,

+ boolean_t markfree)

{

- amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,

- (entry->end - entry->start) >> PAGE_SHIFT, flags);

+ struct uvm_addr_state *free;

+ struct vm_map_entry *prev;

+ vaddr_t addr; /* Start of freed range. */

+ vaddr_t end; /* End of freed range. */

+ prev = *prev_ptr;

+ if (prev == entry)

+ *prev_ptr = prev = NULL;

+ if (prev == NULL ||

+ VMMAP_FREE_END(prev) != entry->start)

+ prev = RB_PREV(uvm_map_addr, &map->addr, entry);

+ /*

+ * Entry is describing only free memory and has nothing to drain into.

+ */

+ if (prev == NULL && entry->start == entry->end && markfree) {

+ *prev_ptr = entry;

+ return;

+ }

+ addr = entry->start;

+ end = VMMAP_FREE_END(entry);

+ free = uvm_map_uaddr_e(map, entry);

+ if (free)

+ uvm_mapent_free_remove(map, free, entry);

+ uvm_mapent_addr_remove(map, entry);

+ DEAD_ENTRY_PUSH(dead, entry);

+ if (markfree) {

+ if (prev) {

+ free = uvm_map_uaddr_e(map, prev);

+ if (free)

+ uvm_mapent_free_remove(map, free, prev);

+ }

+ *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);

+ }

- * uvm_map_init: init mapping system at boot time. note that we allocate

- * and init the static pool of structs vm_map_entry for the kernel here.

+ * Unwire and release referenced amap and object from map entry.

void

-uvm_map_init(void)

+uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)

{

- static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];

- int lcv;

- * set up static pool of kernel map entries ...

+ * Unwire removed map entry.

- simple_lock_init(&uvm.kentry_lock);

- uvm.kentry_free = NULL;

- for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {

- kernel_map_entry[lcv].next = uvm.kentry_free;

- uvm.kentry_free = &kernel_map_entry[lcv];

+ if (VM_MAPENT_ISWIRED(entry)) {

+ entry->wired_count = 0;

+ uvm_fault_unwire_locked(map, entry->start, entry->end);

}

- * initialize the map-related pools.

+ * Entry-type specific code.

- pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),

- 0, 0, 0, "vmsppl", &pool_allocator_nointr);

- pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),

- 0, 0, 0, "vmmpepl", &pool_allocator_nointr);

- pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),

- 0, 0, 0, "vmmpekpl", NULL);

- pool_sethiwat(&uvm_map_entry_pool, 8192);

+ if (UVM_ET_ISHOLE(entry)) {

+ /*

+ * Nothing to be done for holes.

+ */

+ } else if (map->flags & VM_MAP_INTRSAFE) {

+ KASSERT(vm_map_pmap(map) == pmap_kernel());

+ uvm_km_pgremove_intrsafe(entry->start, entry->end);

+ pmap_kremove(entry->start, entry->end - entry->start);

+ } else if (UVM_ET_ISOBJ(entry) &&

+ UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {

+ KASSERT(vm_map_pmap(map) == pmap_kernel());

-/*

- * clippers

- */

+ /*

+ * Note: kernel object mappings are currently used in

+ * two ways:

+ * [1] "normal" mappings of pages in the kernel object

+ * [2] uvm_km_valloc'd allocations in which we

+ * pmap_enter in some non-kernel-object page

+ * (e.g. vmapbuf).

+ *

+ * for case [1], we need to remove the mapping from

+ * the pmap and then remove the page from the kernel

+ * object (because, once pages in a kernel object are

+ * unmapped they are no longer needed, unlike, say,

+ * a vnode where you might want the data to persist

+ * until flushed out of a queue).

+ *

+ * for case [2], we need to remove the mapping from

+ * the pmap. there shouldn't be any pages at the

+ * specified offset in the kernel object [but it

+ * doesn't hurt to call uvm_km_pgremove just to be

+ * safe?]

+ *

+ * uvm_km_pgremove currently does the following:

+ * for pages in the kernel object range:

+ * - drops the swap slot

+ * - uvm_pagefree the page

+ *

+ * note there is version of uvm_km_pgremove() that

+ * is used for "intrsafe" objects.

+ */

+ /*

+ * remove mappings from pmap and drop the pages

+ * from the object. offsets are always relative

+ * to vm_map_min(kernel_map).

+ */

+ pmap_remove(pmap_kernel(), entry->start, entry->end);

+ uvm_km_pgremove(entry->object.uvm_obj,

+ entry->start - vm_map_min(kernel_map),

+ entry->end - vm_map_min(kernel_map));

+ /*

+ * null out kernel_object reference, we've just

+ * dropped it

+ */

+ entry->etype &= ~UVM_ET_OBJ;

+ entry->object.uvm_obj = NULL; /* to be safe */

+ } else {

+ /*

+ * remove mappings the standard way.

+ */

+ pmap_remove(map->pmap, entry->start, entry->end);

+ }

- * uvm_map_clip_start: ensure that the entry begins at or after

- * the starting address, if it doesn't we split the entry.

- *

- * => caller should use UVM_MAP_CLIP_START macro rather than calling

- * this directly

- * => map must be locked by caller

+ * Remove all entries from start to end.

+ *

+ * If remove_holes, then remove ET_HOLE entries as well.

+ * If markfree, entry will be properly marked free, otherwise, no replacement

+ * entry will be put in the tree (corrupting the tree).

void

-uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,

- vaddr_t start)

+uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,

+ struct uvm_map_deadq *dead, boolean_t remove_holes,

+ boolean_t markfree)

{

- struct vm_map_entry *new_entry;

- vaddr_t new_adj;

+ struct vm_map_entry *prev_hint, *next, *entry;

- /* uvm_map_simplify_entry(map, entry); */ /* XXX */

+ start = MAX(start, map->min_offset);

+ end = MIN(end, map->max_offset);

+ if (start >= end)

+ return;

- uvm_tree_sanity(map, "clip_start entry");

+ if ((map->flags & VM_MAP_INTRSAFE) == 0)

+ splassert(IPL_NONE);

+ else

+ splassert(IPL_VM);

- * Split off the front portion. note that we must insert the new

- * entry BEFORE this one, so that this entry has the specified

- * starting address.

+ * Find first affected entry.

+ entry = uvm_map_entrybyaddr(&map->addr, start);

+ KDASSERT(entry != NULL && entry->start <= start);

+ if (entry->end <= start && markfree)

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);

+ else

+ UVM_MAP_CLIP_START(map, entry, start);

- new_entry = uvm_mapent_alloc(map, 0);

- uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */

+ /*

+ * Iterate entries until we reach end address.

+ * prev_hint hints where the freed space can be appended to.

+ */

+ prev_hint = NULL;

+ for (; entry != NULL && entry->start < end; entry = next) {

+ KDASSERT(entry->start >= start);

+ if (entry->end > end || !markfree)

+ UVM_MAP_CLIP_END(map, entry, end);

+ KDASSERT(entry->start >= start && entry->end <= end);

+ next = RB_NEXT(uvm_map_addr, &map->addr, entry);

- new_entry->end = start;

- new_adj = start - new_entry->start;

- if (entry->object.uvm_obj)

- entry->offset += new_adj; /* shift start over */

+ /* Don't remove holes unless asked to do so. */

+ if (UVM_ET_ISHOLE(entry)) {

+ if (!remove_holes) {

+ prev_hint = entry;

+ continue;

+ }

- /* Does not change order for the RB tree */

- entry->start = start;

+ /* Kill entry. */

+ uvm_unmap_kill_entry(map, entry);

- if (new_entry->aref.ar_amap) {

- amap_splitref(&new_entry->aref, &entry->aref, new_adj);

+ /*

+ * Update space usage.

+ */

+ if ((map->flags & VM_MAP_ISVMSPACE) &&

+ entry->object.uvm_obj == NULL &&

+ !UVM_ET_ISHOLE(entry)) {

+ ((struct vmspace *)map)->vm_dused -=

+ uvmspace_dused(map, entry->start, entry->end);

+ }

+ if (!UVM_ET_ISHOLE(entry))

+ map->size -= entry->end - entry->start;

+ /*

+ * Actual removal of entry.

+ */

+ uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);

}

- uvm_map_entry_link(map, entry->prev, new_entry);

+ pmap_update(vm_map_pmap(map));

- if (UVM_ET_ISSUBMAP(entry)) {

- /* ... unlikely to happen, but play it safe */

- uvm_map_reference(new_entry->object.sub_map);

+#ifdef VMMAP_DEBUG

+ if (markfree) {

+ for (entry = uvm_map_entrybyaddr(&map->addr, start);

+ entry != NULL && entry->start < end;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {

+ KDASSERT(entry->end <= start ||

+ entry->start == entry->end ||

+ UVM_ET_ISHOLE(entry));

+ }

} else {

- if (UVM_ET_ISOBJ(entry) &&

- entry->object.uvm_obj->pgops &&

- entry->object.uvm_obj->pgops->pgo_reference)

- entry->object.uvm_obj->pgops->pgo_reference(

- entry->object.uvm_obj);

+ vaddr_t a;

+ for (a = start; a < end; a += PAGE_SIZE)

+ KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);

}

- uvm_tree_sanity(map, "clip_start leave");

+#endif

}

- * uvm_map_clip_end: ensure that the entry ends at or before

- * the ending address, if it doesn't we split the reference

- *

- * => caller should use UVM_MAP_CLIP_END macro rather than calling

- * this directly

- * => map must be locked by caller

+ * Mark all entries from first until end (exclusive) as pageable.

+ *

+ * Lock must be exclusive on entry and will not be touched.

void

-uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)

+uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,

+ struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)

{

- struct vm_map_entry *new_entry;

- vaddr_t new_adj; /* #bytes we move start forward */

- uvm_tree_sanity(map, "clip_end entry");

- /*

- * Create a new entry and insert it

- * AFTER the specified entry

- */

+ struct vm_map_entry *iter;

- new_entry = uvm_mapent_alloc(map, 0);

- uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */

- new_entry->start = entry->end = end;

- new_adj = end - entry->start;

- if (new_entry->object.uvm_obj)

- new_entry->offset += new_adj;

- if (entry->aref.ar_amap)

- amap_splitref(&entry->aref, &new_entry->aref, new_adj);

- uvm_rb_fixup(map, entry);

- uvm_map_entry_link(map, entry, new_entry);

+ for (iter = first; iter != end;

+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {

+ KDASSERT(iter->start >= start_addr && iter->end <= end_addr);

+ if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))

+ continue;

- if (UVM_ET_ISSUBMAP(entry)) {

- /* ... unlikely to happen, but play it safe */

- uvm_map_reference(new_entry->object.sub_map);

- } else {

- if (UVM_ET_ISOBJ(entry) &&

- entry->object.uvm_obj->pgops &&

- entry->object.uvm_obj->pgops->pgo_reference)

- entry->object.uvm_obj->pgops->pgo_reference(

- entry->object.uvm_obj);

+ iter->wired_count = 0;

+ uvm_fault_unwire_locked(map, iter->start, iter->end);

}

- uvm_tree_sanity(map, "clip_end leave");

}

-/*

- * M A P - m a i n e n t r y p o i n t

- */

- * uvm_map: establish a valid mapping in a map

+ * Mark all entries from first until end (exclusive) as wired.

- * => assume startp is page aligned.

- * => assume size is a multiple of PAGE_SIZE.

- * => assume sys_mmap provides enough of a "hint" to have us skip

- * over text/data/bss area.

- * => map must be unlocked (we will lock it)

- * => <uobj,uoffset> value meanings (4 cases):

- * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER

- * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER

- * [3] <uobj,uoffset> == normal mapping

- * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA

- *

- * case [4] is for kernel mappings where we don't know the offset until

- * we've found a virtual address. note that kernel object offsets are

- * always relative to vm_map_min(kernel_map).

- *

- * => if `align' is non-zero, we try to align the virtual address to

- * the specified alignment. this is only a hint; if we can't

- * do it, the address will be unaligned. this is provided as

- * a mechanism for large pages.

- *

- * => XXXCDC: need way to map in external amap?

+ * Lockflags determines the lock state on return from this function.

+ * Lock must be exclusive on entry.

int

-uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size,

- struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,

- struct proc *p)

+uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,

+ struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,

+ int lockflags)

{

- struct vm_map_entry *prev_entry, *new_entry;

-#ifdef KVA_GUARDPAGES

- struct vm_map_entry *guard_entry;

+ struct vm_map_entry *iter;

+#ifdef DIAGNOSTIC

+ unsigned int timestamp_save;

#endif

- vm_prot_t prot = UVM_PROTECTION(flags), maxprot =

- UVM_MAXPROTECTION(flags);

- vm_inherit_t inherit = UVM_INHERIT(flags);

- int advice = UVM_ADVICE(flags);

int error;

- * Holes are incompatible with other types of mappings.

+ * Wire pages in two passes:

+ *

+ * 1: holding the write lock, we create any anonymous maps that need

+ * to be created. then we clip each map entry to the region to

+ * be wired and increment its wiring count.

+ *

+ * 2: we downgrade to a read lock, and call uvm_fault_wire to fault

+ * in the pages for any newly wired area (wired_count == 1).

+ *

+ * downgrading to a read lock for uvm_fault_wire avoids a possible

+ * deadlock with another thread that may have faulted on one of

+ * the pages to be wired (it would mark the page busy, blocking

+ * us, then in turn block on the map lock that we hold).

+ * because we keep the read lock on the map, the copy-on-write

+ * status of the entries we modify here cannot change.

- if (flags & UVM_FLAG_HOLE) {

- KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) != 0 &&

- (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);

- }

+ for (iter = first; iter != end;

+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {

+ KDASSERT(iter->start >= start_addr && iter->end <= end_addr);

+ if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)

+ continue;

-#ifdef KVA_GUARDPAGES

- if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) {

- * kva_guardstart is initialized to the start of the kernelmap

- * and cycles through the kva space.

- * This way we should have a long time between re-use of kva.

+ * Perform actions of vm_map_lookup that need the write lock.

+ * - create an anonymous map for copy-on-write

+ * - anonymous map for zero-fill

+ * Skip submaps.

- static vaddr_t kva_guardstart = 0;

- if (kva_guardstart == 0) {

- kva_guardstart = vm_map_min(map);

- printf("uvm_map: kva guard pages enabled: %p\n",

- kva_guardstart);

+ if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&

+ UVM_ET_ISNEEDSCOPY(iter) &&

+ ((iter->protection & VM_PROT_WRITE) ||

+ iter->object.uvm_obj == NULL)) {

+ amap_copy(map, iter, M_WAITOK, TRUE,

+ iter->start, iter->end);

}

- size += PAGE_SIZE; /* Add guard page at the end. */

- /*

- * Try to fully exhaust kva prior to wrap-around.

- * (This may eat your ram!)

- */

- if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) {

- static int wrap_counter = 0;

- printf("uvm_map: kva guard page wrap-around %d\n",

- ++wrap_counter);

- kva_guardstart = vm_map_min(map);

- }

- *startp = kva_guardstart;

- /*

- * Prepare for next round.

- */

- kva_guardstart += size;

+ iter->wired_count++;

}

-#endif

- uvm_tree_sanity(map, "map entry");

- if ((map->flags & VM_MAP_INTRSAFE) == 0)

- splassert(IPL_NONE);

- else

- splassert(IPL_VM);

- * step 0: sanity check of protection code

+ * Pass 2.

+#ifdef DIAGNOSTIC

+ timestamp_save = map->timestamp;

+#endif

+ vm_map_busy(map);

+ vm_map_downgrade(map);

- if ((prot & maxprot) != prot) {

- return (EACCES);

- }

- /*

- * step 1: figure out where to put new VM range

- */

+ error = 0;

+ for (iter = first; error == 0 && iter != end;

+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {

+ if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)

+ continue;

- if (vm_map_lock_try(map) == FALSE) {

- if (flags & UVM_FLAG_TRYLOCK)

- return (EFAULT);

- vm_map_lock(map); /* could sleep here */

- }

- if ((prev_entry = uvm_map_findspace(map, *startp, size, startp,

- uobj, uoffset, align, flags)) == NULL) {

- vm_map_unlock(map);

- return (ENOMEM);

+ error = uvm_fault_wire(map, iter->start, iter->end,

+ iter->protection);

}

-#ifdef PMAP_GROWKERNEL

- {

+ if (error) {

- * If the kernel pmap can't map the requested space,

- * then allocate more resources for it.

+ * uvm_fault_wire failure

+ *

+ * Reacquire lock and undo our work.

- if (map == kernel_map && !(flags & UVM_FLAG_FIXED) &&

- uvm_maxkaddr < (*startp + size))

- uvm_maxkaddr = pmap_growkernel(*startp + size);

- }

+ vm_map_upgrade(map);

+ vm_map_unbusy(map);

+#ifdef DIAGNOSTIC

+ if (timestamp_save != map->timestamp)

+ panic("uvm_map_pageable_wire: stale map");

#endif

- /*

- * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER

- * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in

- * either case we want to zero it before storing it in the map entry

- * (because it looks strange and confusing when debugging...)

- *

- * if uobj is not null

- * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping

- * and we do not need to change uoffset.

- * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset

- * now (based on the starting address of the map). this case is

- * for kernel object mappings where we don't know the offset until

- * the virtual address is found (with uvm_map_findspace). the

- * offset is the distance we are from the start of the map.

- */

+ /*

+ * first is no longer needed to restart loops.

+ * Use it as iterator to unmap successful mappings.

+ */

+ for (; first != iter;

+ first = RB_NEXT(uvm_map_addr, &map->addr, first)) {

+ if (UVM_ET_ISHOLE(first) || first->start == first->end)

+ continue;

- if (uobj == NULL) {

- uoffset = 0;

- } else {

- if (uoffset == UVM_UNKNOWN_OFFSET) {

- KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));

- uoffset = *startp - vm_map_min(kernel_map);

+ first->wired_count--;

+ if (!VM_MAPENT_ISWIRED(first)) {

+ uvm_fault_unwire_locked(map,

+ iter->start, iter->end);

+ }

}

+ /*

+ * decrease counter in the rest of the entries

+ */

+ for (; iter != end;

+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {

+ if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)

+ continue;

+ iter->wired_count--;

+ }

+ if ((lockflags & UVM_LK_EXIT) == 0)

+ vm_map_unlock(map);

+ return error;

}

- * step 2: try and insert in map by extending previous entry, if

- * possible

- * XXX: we don't try and pull back the next entry. might be useful

- * for a stack, but we are currently allocating our stack in advance.

+ * We are currently holding a read lock.

+ if ((lockflags & UVM_LK_EXIT) == 0) {

+ vm_map_unbusy(map);

+ vm_map_unlock_read(map);

+ } else {

+ vm_map_upgrade(map);

+ vm_map_unbusy(map);

+#ifdef DIAGNOSTIC

+ if (timestamp_save != map->timestamp)

+ panic("uvm_map_pageable_wire: stale map");

+#endif

+ }

+ return 0;

- if ((flags & UVM_FLAG_NOMERGE) == 0 &&

- prev_entry->end == *startp && prev_entry != &map->header &&

- prev_entry->object.uvm_obj == uobj) {

- if (uobj && prev_entry->offset +

- (prev_entry->end - prev_entry->start) != uoffset)

- goto step3;

- if (UVM_ET_ISSUBMAP(prev_entry))

- goto step3;

+/*

+ * uvm_map_pageable: set pageability of a range in a map.

+ *

+ * Flags:

+ * UVM_LK_ENTER: map is already locked by caller

+ * UVM_LK_EXIT: don't unlock map on exit

+ *

+ * The full range must be in use (entries may not have fspace != 0).

+ * UVM_ET_HOLE counts as unmapped.

+ */

+int

+uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,

+ boolean_t new_pageable, int lockflags)

+ struct vm_map_entry *first, *last, *tmp;

+ int error;

- if (prev_entry->protection != prot ||

- prev_entry->max_protection != maxprot)

- goto step3;

+ start = trunc_page(start);

+ end = round_page(end);

- if (prev_entry->inheritance != inherit ||

- prev_entry->advice != advice)

- goto step3;

+ if (start > end)

+ return EINVAL;

+ if (start < map->min_offset)

+ return EFAULT; /* why? see first XXX below */

+ if (end > map->max_offset)

+ return EINVAL; /* why? see second XXX below */

- /* wiring status must match (new area is unwired) */

- if (VM_MAPENT_ISWIRED(prev_entry))

- goto step3;

+ KASSERT(map->flags & VM_MAP_PAGEABLE);

+ if ((lockflags & UVM_LK_ENTER) == 0)

+ vm_map_lock(map);

+ /*

+ * Find first entry.

+ *

+ * Initial test on start is different, because of the different

+ * error returned. Rest is tested further down.

+ */

+ first = uvm_map_entrybyaddr(&map->addr, start);

+ if (first->end <= start || UVM_ET_ISHOLE(first)) {

- * can't extend a shared amap. note: no need to lock amap to

- * look at refs since we don't care about its exact value.

- * if it is one (i.e. we have only reference) it will stay there

+ * XXX if the first address is not mapped, it is EFAULT?

+ error = EFAULT;

+ goto out;

+ }

- if (prev_entry->aref.ar_amap &&

- amap_refs(prev_entry->aref.ar_amap) != 1) {

- goto step3;

+ /*

+ * Check that the range has no holes.

+ */

+ for (last = first; last != NULL && last->start < end;

+ last = RB_NEXT(uvm_map_addr, &map->addr, last)) {

+ if (UVM_ET_ISHOLE(last) ||

+ (last->end < end && VMMAP_FREE_END(last) != last->end)) {

+ /*

+ * XXX unmapped memory in range, why is it EINVAL

+ * instead of EFAULT?

+ */

+ error = EINVAL;

+ goto out;

+ }

+ /*

+ * Last ended at the first entry after the range.

+ * Move back one step.

+ *

+ * Note that last may be NULL.

+ */

+ if (last == NULL) {

+ last = RB_MAX(uvm_map_addr, &map->addr);

+ if (last->end < end) {

+ error = EINVAL;

+ goto out;

}

+ } else

+ last = RB_PREV(uvm_map_addr, &map->addr, last);

+ /*

+ * Wire/unwire pages here.

+ */

+ if (new_pageable) {

- * Only merge kernel mappings, but keep track

- * of how much we skipped.

+ * Mark pageable.

+ * entries that are not wired are untouched.

- if (map != kernel_map && map != kmem_map) {

- goto step3;

- }

+ if (VM_MAPENT_ISWIRED(first))

+ UVM_MAP_CLIP_START(map, first, start);

+ /*

+ * Split last at end.

+ * Make tmp be the first entry after what is to be touched.

+ * If last is not wired, don't touch it.

+ */

+ if (VM_MAPENT_ISWIRED(last)) {

+ UVM_MAP_CLIP_END(map, last, end);

+ tmp = RB_NEXT(uvm_map_addr, &map->addr, last);

+ } else

+ tmp = last;

- if (prev_entry->aref.ar_amap) {

- error = amap_extend(prev_entry, size);

- if (error)

- goto step3;

- }

+ uvm_map_pageable_pgon(map, first, tmp, start, end);

+ error = 0;

+out:

+ if ((lockflags & UVM_LK_EXIT) == 0)

+ vm_map_unlock(map);

+ return error;

+ } else {

+ /*

+ * Mark entries wired.

+ * entries are always touched (because recovery needs this).

+ */

+ if (!VM_MAPENT_ISWIRED(first))

+ UVM_MAP_CLIP_START(map, first, start);

- * drop our reference to uobj since we are extending a reference

- * that we already have (the ref count can not drop to zero).

+ * Split last at end.

+ * Make tmp be the first entry after what is to be touched.

+ * If last is not wired, don't touch it.

+ if (!VM_MAPENT_ISWIRED(last)) {

+ UVM_MAP_CLIP_END(map, last, end);

+ tmp = RB_NEXT(uvm_map_addr, &map->addr, last);

+ } else

+ tmp = last;

+ return uvm_map_pageable_wire(map, first, tmp, start, end,

+ lockflags);

+ }

- if (uobj && uobj->pgops->pgo_detach)

- uobj->pgops->pgo_detach(uobj);

+/*

+ * uvm_map_pageable_all: special case of uvm_map_pageable - affects

+ * all mapped regions.

+ *

+ * Map must not be locked.

+ * If no flags are specified, all ragions are unwired.

+ */

+int

+uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)

+ vsize_t size;

+ struct vm_map_entry *iter;

- prev_entry->end += size;

- uvm_rb_fixup(map, prev_entry);

- map->size += size;

- if (p && uobj == NULL)

- p->p_vmspace->vm_dused += atop(size);

+ KASSERT(map->flags & VM_MAP_PAGEABLE);

+ vm_map_lock(map);

- uvm_tree_sanity(map, "map leave 2");

+ if (flags == 0) {

+ uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr),

+ NULL, map->min_offset, map->max_offset);

+ atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE);

vm_map_unlock(map);

- return (0);

+ return 0;

+ }

+ if (flags & MCL_FUTURE)

+ atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE);

+ if (!(flags & MCL_CURRENT)) {

+ vm_map_unlock(map);

+ return 0;

}

-step3:

- * step 3: allocate new entry and link it in

+ * Count number of pages in all non-wired entries.

+ * If the number exceeds the limit, abort.

+ size = 0;

+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {

+ if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))

+ continue;

-#ifdef KVA_GUARDPAGES

- if (map == kernel_map && !(flags & UVM_FLAG_FIXED))

- size -= PAGE_SIZE;

-#endif

- new_entry = uvm_mapent_alloc(map, flags);

- if (new_entry == NULL) {

- vm_map_unlock(map);

- return (ENOMEM);

+ size += iter->end - iter->start;

}

- new_entry->start = *startp;

- new_entry->end = new_entry->start + size;

- new_entry->object.uvm_obj = uobj;

- new_entry->offset = uoffset;

- if (uobj)

- new_entry->etype = UVM_ET_OBJ;

- else

- new_entry->etype = 0;

- if (flags & UVM_FLAG_COPYONW) {

- new_entry->etype |= UVM_ET_COPYONWRITE;

- if ((flags & UVM_FLAG_OVERLAY) == 0)

- new_entry->etype |= UVM_ET_NEEDSCOPY;

+ if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {

+ vm_map_unlock(map);

+ return ENOMEM;

}

- if (flags & UVM_FLAG_HOLE)

- new_entry->etype |= UVM_ET_HOLE;

- new_entry->protection = prot;

- new_entry->max_protection = maxprot;

- new_entry->inheritance = inherit;

- new_entry->wired_count = 0;

- new_entry->advice = advice;

- if (flags & UVM_FLAG_OVERLAY) {

- /*

- * to_add: for BSS we overallocate a little since we

- * are likely to extend

- */

- vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?

- UVM_AMAP_CHUNK << PAGE_SHIFT : 0;

- struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);

- new_entry->aref.ar_pageoff = 0;

- new_entry->aref.ar_amap = amap;

- } else {

- new_entry->aref.ar_pageoff = 0;

- new_entry->aref.ar_amap = NULL;

+ /* XXX non-pmap_wired_count case must be handled by caller */

+#ifdef pmap_wired_count

+ if (limit != 0 &&

+ size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {

+ vm_map_unlock(map);

+ return ENOMEM;

}

+#endif

- uvm_map_entry_link(map, prev_entry, new_entry);

+ /*

+ * uvm_map_pageable_wire will release lcok

+ */

+ return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr),

+ NULL, map->min_offset, map->max_offset, 0);

- map->size += size;

- if (p && uobj == NULL)

- p->p_vmspace->vm_dused += atop(size);

+/*

+ * Initialize map.

+ *

+ * Allocates sufficient entries to describe the free memory in the map.

+ */

+void

+uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)

+ int i;

+ KASSERT((min & (vaddr_t)PAGE_MASK) == 0);

+ KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||

+ (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);

- * Update the free space hint

+ * Update parameters.

+ *

+ * This code handles (vaddr_t)-1 and other page mask ending addresses

+ * properly.

+ * We lose the top page if the full virtual address space is used.

+ if (max & (vaddr_t)PAGE_MASK) {

+ max += 1;

+ if (max == 0) /* overflow */

+ max -= PAGE_SIZE;

+ }

+ RB_INIT(&map->addr);

+ map->uaddr_exe = NULL;

+ for (i = 0; i < nitems(map->uaddr_any); ++i)

+ map->uaddr_any[i] = NULL;

+ map->uaddr_brk_stack = NULL;

- if ((map->first_free == prev_entry) &&

- (prev_entry->end >= new_entry->start))

- map->first_free = new_entry;

+ map->size = 0;

+ map->ref_count = 1;

+ map->min_offset = min;

+ map->max_offset = max;

+ map->b_start = map->b_end = 0; /* Empty brk() area by default. */

+ map->s_start = map->s_end = 0; /* Empty stack area by default. */

+ map->flags = flags;

+ map->timestamp = 0;

+ rw_init(&map->lock, "vmmaplk");

+ simple_lock_init(&map->ref_lock);

-#ifdef KVA_GUARDPAGES

- * Create the guard entry.

+ * Ensure the selectors will not try to manage page 0;

+ * it's too special.

- if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) {

- guard_entry = uvm_mapent_alloc(map, flags);

- if (guard_entry != NULL) {

- guard_entry->start = new_entry->end;

- guard_entry->end = guard_entry->start + PAGE_SIZE;

- guard_entry->object.uvm_obj = uobj;

- guard_entry->offset = uoffset;

- guard_entry->etype = MAP_ET_KVAGUARD;

- guard_entry->protection = prot;

- guard_entry->max_protection = maxprot;

- guard_entry->inheritance = inherit;

- guard_entry->wired_count = 0;

- guard_entry->advice = advice;

- guard_entry->aref.ar_pageoff = 0;

- guard_entry->aref.ar_amap = NULL;

- uvm_map_entry_link(map, new_entry, guard_entry);

- map->size += PAGE_SIZE;

- kva_guardpages++;

- }

+ if (min < VMMAP_MIN_ADDR)

+ min = VMMAP_MIN_ADDR;

+ /*

+ * Configure the allocators.

+ */

+ if (flags & VM_MAP_ISVMSPACE) {

+ /*

+ * Setup hint areas.

+ */

+#if 0 /* Don't use the cool stuff yet. */

+#ifdef __LP64__

+ /* Hinted allocations above 4GB */

+ map->uaddr_any[0] =

+ uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024);

+ /* Hinted allocations below 4GB */

+ map->uaddr_any[1] =

+ uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL,

+ 1024 * 1024 * 1024);

+#else

+ map->uaddr_any[1] =

+ uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max,

+ 1024 * 1024 * 1024);

#endif

- uvm_tree_sanity(map, "map leave");

+#ifdef __i386__

+ map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);

+ map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR,

+ max);

+#elif defined(__LP64__)

+ map->uaddr_any[3] =

+ uaddr_pivot_create(MAX(min, 0x100000000ULL), max);

+#else

+ map->uaddr_any[3] = uaddr_pivot_create(min, max);

+#endif

+#else /* Don't use the cool stuff yet. */

+ /*

+ * Use the really crappy stuff at first commit.

+ * Browsers like crappy stuff.

+ */

+ map->uaddr_any[0] = uaddr_rnd_create(min, max);

+#endif

+ map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);

+ } else

+ map->uaddr_any[3] = &uaddr_kbootstrap;

- vm_map_unlock(map);

- return (0);

+ /*

+ * Fill map entries.

+ * This requires a write-locked map (because of diagnostic assertions

+ * in insert code).

+ */

+ if ((map->flags & VM_MAP_INTRSAFE) == 0) {

+ if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0)

+ panic("uvm_map_setup: rw_enter failed on new map");

+ }

+ uvm_map_setup_entries(map);

+ uvm_tree_sanity(map, __FILE__, __LINE__);

+ if ((map->flags & VM_MAP_INTRSAFE) == 0)

+ rw_exit(&map->lock);

}

- * uvm_map_lookup_entry: find map entry at or before an address

+ * Destroy the map.

- * => map must at least be read-locked by caller

- * => entry is returned in "entry"

- * => return value is true if address is in the returned entry

+ * This is the inverse operation to uvm_map_setup.

-boolean_t

-uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,

- struct vm_map_entry **entry)

+void

+uvm_map_teardown(struct vm_map *map)

{

- struct vm_map_entry *cur;

- struct vm_map_entry *last;

- int use_tree = 0;

+ struct uvm_map_deadq dead_entries;

+ int i;

+ struct vm_map_entry *entry, *tmp;

+#ifdef VMMAP_DEBUG

+ size_t numq, numt;

+#endif

+ if ((map->flags & VM_MAP_INTRSAFE) == 0) {

+ if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0)

+ panic("uvm_map_teardown: rw_enter failed on free map");

+ }

- * start looking either from the head of the

- * list, or from the hint.

+ * Remove address selectors.

+ uvm_addr_destroy(map->uaddr_exe);

+ map->uaddr_exe = NULL;

+ for (i = 0; i < nitems(map->uaddr_any); i++) {

+ uvm_addr_destroy(map->uaddr_any[i]);

+ map->uaddr_any[i] = NULL;

+ }

+ uvm_addr_destroy(map->uaddr_brk_stack);

+ map->uaddr_brk_stack = NULL;

- simple_lock(&map->hint_lock);

- cur = map->hint;

- simple_unlock(&map->hint_lock);

- if (cur == &map->header)

- cur = cur->next;

+ /*

+ * Remove entries.

+ *

+ * The following is based on graph breadth-first search.

+ *

+ * In color terms:

+ * - the dead_entries set contains all nodes that are reachable

+ * (i.e. both the black and the grey nodes)

+ * - any entry not in dead_entries is white

+ * - any entry that appears in dead_entries before entry,

+ * is black, the rest is grey.

+ * The set [entry, end] is also referred to as the wavefront.

+ *

+ * Since the tree is always a fully connected graph, the breadth-first

+ * search guarantees that each vmmap_entry is visited exactly once.

+ * The vm_map is broken down in linear time.

+ */

+ TAILQ_INIT(&dead_entries);

+ if ((entry = RB_ROOT(&map->addr)) != NULL)

+ DEAD_ENTRY_PUSH(&dead_entries, entry);

+ while (entry != NULL) {

+ uvm_unmap_kill_entry(map, entry);

+ if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL)

+ DEAD_ENTRY_PUSH(&dead_entries, tmp);

+ if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL)

+ DEAD_ENTRY_PUSH(&dead_entries, tmp);

+ /* Update wave-front. */

+ entry = TAILQ_NEXT(entry, dfree.deadq);

+ }

- if (address >= cur->start) {

- /*

- * go from hint to end of list.

- *

- * but first, make a quick check to see if

- * we are already looking at the entry we

- * want (which is usually the case).

- * note also that we don't need to save the hint

- * here... it is the same hint (unless we are

- * at the header, in which case the hint didn't

- * buy us anything anyway).

- */

- last = &map->header;

- if ((cur != last) && (cur->end > address)) {

- *entry = cur;

- return (TRUE);

- }

+ if ((map->flags & VM_MAP_INTRSAFE) == 0)

+ rw_exit(&map->lock);

+#ifdef VMMAP_DEBUG

+ numt = numq = 0;

+ RB_FOREACH(entry, uvm_map_addr, &map->addr)

+ numt++;

+ TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)

+ numq++;

+ KASSERT(numt == numq);

+#endif

+ uvm_unmap_detach(&dead_entries, 0);

+ pmap_destroy(map->pmap);

+ map->pmap = NULL;

- if (map->nentries > 30)

- use_tree = 1;

- } else {

- /*

- * go from start to hint, *inclusively*

- */

- last = cur->next;

- cur = map->header.next;

- use_tree = 1;

- }

+/*

+ * Populate map with free-memory entries.

+ *

+ * Map must be initialized and empty.

+ */

+void

+uvm_map_setup_entries(struct vm_map *map)

+ KDASSERT(RB_EMPTY(&map->addr));

- uvm_tree_sanity(map, __func__);

+ uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);

- if (use_tree) {

- struct vm_map_entry *prev = &map->header;

- cur = RB_ROOT(&map->rbhead);

+/*

+ * Split entry at given address.

+ *

+ * orig: entry that is to be split.

+ * next: a newly allocated map entry that is not linked.

+ * split: address at which the split is done.

+ */

+void

+uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,

+ struct vm_map_entry *next, vaddr_t split)

+ struct uvm_addr_state *free, *free_before;

+ vsize_t adj;

- /*

- * Simple lookup in the tree. Happens when the hint is

- * invalid, or nentries reach a threshold.

- */

- while (cur) {

- if (address >= cur->start) {

- if (address < cur->end) {

- *entry = cur;

- SAVE_HINT(map, map->hint, cur);

- return (TRUE);

- }

- prev = cur;

- cur = RB_RIGHT(cur, rb_entry);

- } else

- cur = RB_LEFT(cur, rb_entry);

- }

- *entry = prev;

- return (FALSE);

+ if ((split & PAGE_MASK) != 0) {

+ panic("uvm_map_splitentry: split address 0x%lx "

+ "not on page boundary!", split);

}

+ KDASSERT(map != NULL && orig != NULL && next != NULL);

+ uvm_tree_sanity(map, __FILE__, __LINE__);

+ KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);

+#ifdef VMMAP_DEBUG

+ KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig);

+ KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next);

+#endif /* VMMAP_DEBUG */

- * search linearly

+ * Free space will change, unlink from free space tree.

- while (cur != last) {

- if (cur->end > address) {

- if (address >= cur->start) {

- /*

- * save this lookup for future

- * hints, and return

- */

- *entry = cur;

- SAVE_HINT(map, map->hint, cur);

- return (TRUE);

+ free = uvm_map_uaddr_e(map, orig);

+ if (free)

+ uvm_mapent_free_remove(map, free, orig);

+ adj = split - orig->start;

+ uvm_mapent_copy(orig, next);

+ if (split >= orig->end) {

+ next->etype = 0;

+ next->offset = 0;

+ next->wired_count = 0;

+ next->start = next->end = split;

+ next->guard = 0;

+ next->fspace = VMMAP_FREE_END(orig) - split;

+ next->aref.ar_amap = NULL;

+ next->aref.ar_pageoff = 0;

+ orig->guard = MIN(orig->guard, split - orig->end);

+ orig->fspace = split - VMMAP_FREE_START(orig);

+ } else {

+ orig->fspace = 0;

+ orig->guard = 0;

+ orig->end = next->start = split;

+ if (next->aref.ar_amap)

+ amap_splitref(&orig->aref, &next->aref, adj);

+ if (UVM_ET_ISSUBMAP(orig)) {

+ uvm_map_reference(next->object.sub_map);

+ next->offset += adj;

+ } else if (UVM_ET_ISOBJ(orig)) {

+ if (next->object.uvm_obj->pgops &&

+ next->object.uvm_obj->pgops->pgo_reference) {

+ next->object.uvm_obj->pgops->pgo_reference(

+ next->object.uvm_obj);

}

- break;

+ next->offset += adj;

}

- cur = cur->next;

}

- *entry = cur->prev;

- SAVE_HINT(map, map->hint, *entry);

- return (FALSE);

+ /*

+ * Link next into address tree.

+ * Link orig and next into free-space tree.

+ *

+ * Don't insert 'next' into the addr tree until orig has been linked,

+ * in case the free-list looks at adjecent entries in the addr tree

+ * for its decisions.

+ */

+ if (orig->fspace > 0)

+ free_before = free;

+ else

+ free_before = uvm_map_uaddr_e(map, orig);

+ if (free_before)

+ uvm_mapent_free_insert(map, free_before, orig);

+ uvm_mapent_addr_insert(map, next);

+ if (free)

+ uvm_mapent_free_insert(map, free, next);

+ uvm_tree_sanity(map, __FILE__, __LINE__);

}

-/*

- * Checks if address pointed to by phint fits into the empty

- * space before the vm_map_entry after. Takes alignment and

- * offset into consideration.

- */

-int

-uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length,

- struct vm_map_entry *after, voff_t uoffset, vsize_t align)

+#ifdef VMMAP_DEBUG

+void

+uvm_tree_assert(struct vm_map *map, int test, char *test_str,

+ char *file, int line)

{

- vaddr_t hint = *phint;

- vaddr_t end;

+ char* map_special;

-#ifdef PMAP_PREFER

- /*

- * push hint forward as needed to avoid VAC alias problems.

- * we only do this if a valid offset is specified.

- */

- if (uoffset != UVM_UNKNOWN_OFFSET)

- hint = PMAP_PREFER(uoffset, hint);

-#endif

- if (align != 0)

- if ((hint & (align - 1)) != 0)

- hint = roundup(hint, align);

- *phint = hint;

+ if (test)

+ return;

- end = hint + length;

- if (end > map->max_offset || end < hint)

- return (FALSE);

- if (after != NULL && after != &map->header && after->start < end)

- return (FALSE);

- return (TRUE);

+ if (map == kernel_map)

+ map_special = " (kernel_map)";

+ else if (map == kmem_map)

+ map_special = " (kmem_map)";

+ else

+ map_special = "";

+ panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,

+ line, test_str);

}

- * uvm_map_pie: return a random load address for a PIE executable

- * properly aligned.

+ * Check that map is sane.

+void

+uvm_tree_sanity(struct vm_map *map, char *file, int line)

+ struct vm_map_entry *iter;

+ vaddr_t addr;

+ vaddr_t min, max, bound; /* Bounds checker. */

+ struct uvm_addr_state *free;

-#ifndef VM_PIE_MAX_ADDR

-#define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)

-#endif

+ addr = vm_map_min(map);

+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {

+ /*

+ * Valid start, end.

+ * Catch overflow for end+fspace.

+ */

+ UVM_ASSERT(map, iter->end >= iter->start, file, line);

+ UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);

+ /*

+ * May not be empty.

+ */

+ UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),

+ file, line);

-#ifndef VM_PIE_MIN_ADDR

-#define VM_PIE_MIN_ADDR VM_MIN_ADDRESS

-#endif

+ /*

+ * Addresses for entry must lie within map boundaries.

+ */

+ UVM_ASSERT(map, iter->start >= vm_map_min(map) &&

+ VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);

-#ifndef VM_PIE_MIN_ALIGN

-#define VM_PIE_MIN_ALIGN PAGE_SIZE

-#endif

+ /*

+ * Tree may not have gaps.

+ */

+ UVM_ASSERT(map, iter->start == addr, file, line);

+ addr = VMMAP_FREE_END(iter);

-vaddr_t

-uvm_map_pie(vaddr_t align)

+ /*

+ * Free space may not cross boundaries, unless the same

+ * free list is used on both sides of the border.

+ */

+ min = VMMAP_FREE_START(iter);

+ max = VMMAP_FREE_END(iter);

+ while (min < max &&

+ (bound = uvm_map_boundary(map, min, max)) != max) {

+ UVM_ASSERT(map,

+ uvm_map_uaddr(map, bound - 1) ==

+ uvm_map_uaddr(map, bound),

+ file, line);

+ min = bound;

+ }

+ free = uvm_map_uaddr_e(map, iter);

+ if (free) {

+ UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,

+ file, line);

+ } else {

+ UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,

+ file, line);

+ }

+ UVM_ASSERT(map, addr == vm_map_max(map), file, line);

+void

+uvm_tree_size_chk(struct vm_map *map, char *file, int line)

{

- vaddr_t addr, space, min;

+ struct vm_map_entry *iter;

+ vsize_t size;

- align = MAX(align, VM_PIE_MIN_ALIGN);

+ size = 0;

+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {

+ if (!UVM_ET_ISHOLE(iter))

+ size += iter->end - iter->start;

+ }

- /* round up to next alignment */

- min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);

+ if (map->size != size)

+ printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);

+ UVM_ASSERT(map, map->size == size, file, line);

- if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)

- return (align);

+ vmspace_validate(map);

- space = (VM_PIE_MAX_ADDR - min) / align;

- space = MIN(space, (u_int32_t)-1);

+/*

+ * This function validates the statistics on vmspace.

+ */

+void

+vmspace_validate(struct vm_map *map)

+ struct vmspace *vm;

+ struct vm_map_entry *iter;

+ vaddr_t imin, imax;

+ vaddr_t stack_begin, stack_end; /* Position of stack. */

+ vsize_t stack, heap; /* Measured sizes. */

- addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;

- addr += min;

+ if (!(map->flags & VM_MAP_ISVMSPACE))

+ return;

- return (addr);

+ vm = (struct vmspace *)map;

+ stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);

+ stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);

+ stack = heap = 0;

+ RB_FOREACH(iter, uvm_map_addr, &map->addr) {

+ imin = imax = iter->start;

+ if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)

+ continue;

+ /*

+ * Update stack, heap.

+ * Keep in mind that (theoretically) the entries of

+ * userspace and stack may be joined.

+ */

+ while (imin != iter->end) {

+ /*

+ * Set imax to the first boundary crossed between

+ * imin and stack addresses.

+ */

+ imax = iter->end;

+ if (imin < stack_begin && imax > stack_begin)

+ imax = stack_begin;

+ else if (imin < stack_end && imax > stack_end)

+ imax = stack_end;

+ if (imin >= stack_begin && imin < stack_end)

+ stack += imax - imin;

+ else

+ heap += imax - imin;

+ imin = imax;

+ }

+ heap >>= PAGE_SHIFT;

+ if (heap != vm->vm_dused) {

+ printf("vmspace stack range: 0x%lx-0x%lx\n",

+ stack_begin, stack_end);

+ panic("vmspace_validate: vmspace.vm_dused invalid, "

+ "expected %ld pgs, got %ld pgs in map %p",

+ heap, vm->vm_dused,

+ map);

+ }

}

+#endif /* VMMAP_DEBUG */

- * uvm_map_hint: return the beginning of the best area suitable for

- * creating a new mapping with "prot" protection.

+ * uvm_map_init: init mapping system at boot time. note that we allocate

+ * and init the static pool of structs vm_map_entry for the kernel here.

-vaddr_t

-uvm_map_hint1(struct proc *p, vm_prot_t prot, int skipheap)

+void

+uvm_map_init(void)

{

- vaddr_t addr;

+ static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];

+ int lcv;

-#ifdef __i386__

- * If executable skip first two pages, otherwise start

- * after data + heap region.

+ * now set up static pool of kernel map entries ...

- if ((prot & VM_PROT_EXECUTE) &&

- ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) {

- addr = (PAGE_SIZE*2) +

- (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));

- return (round_page(addr));

+ simple_lock_init(&uvm.kentry_lock);

+ uvm.kentry_free = NULL;

+ for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {

+ RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =

+ uvm.kentry_free;

+ uvm.kentry_free = &kernel_map_entry[lcv];

}

-#endif

- /* start malloc/mmap after the brk */

- addr = (vaddr_t)p->p_vmspace->vm_daddr;

- if (skipheap)

- addr += BRKSIZ;

-#if !defined(__vax__)

- addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1);

-#endif

- return (round_page(addr));

+ /*

+ * initialize the map-related pools.

+ */

+ pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),

+ 0, 0, 0, "vmsppl", &pool_allocator_nointr);

+ pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),

+ 0, 0, 0, "vmmpepl", &pool_allocator_nointr);

+ pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),

+ 0, 0, 0, "vmmpekpl", NULL);

+ pool_sethiwat(&uvm_map_entry_pool, 8192);

+ uvm_addr_init();

}

+#if defined(DDB)

- * uvm_map_findspace: find "length" sized space in "map".

- *

- * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is

- * set (in which case we insist on using "hint").

- * => "result" is VA returned

- * => uobj/uoffset are to be used to handle VAC alignment, if required

- * => if `align' is non-zero, we attempt to align to that value.

- * => caller must at least have read-locked map

- * => returns NULL on failure, or pointer to prev. map entry if success

- * => note this is a cross between the old vm_map_findspace and vm_map_find

+ * DDB hooks

-struct vm_map_entry *

-uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,

- vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align,

- int flags)

+/*

+ * uvm_map_printit: actually prints the map

+ */

+void

+uvm_map_printit(struct vm_map *map, boolean_t full,

+ int (*pr)(const char *, ...))

{

- struct vm_map_entry *entry, *next, *tmp;

- struct vm_map_entry *child, *prev = NULL;

- vaddr_t end, orig_hint;

- KASSERT((align & (align - 1)) == 0);

- KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);

+ struct vmspace *vm;

+ struct vm_map_entry *entry;

+ struct uvm_addr_state *free;

+ int in_free, i;

+ char buf[8];

- uvm_tree_sanity(map, "map_findspace entry");

+ (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);

+ (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",

+ map->b_start, map->b_end);

+ (*pr)("\tstack allocate range: 0x%lx-0x%lx\n",

+ map->s_start, map->s_end);

+ (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",

+ map->size, map->ref_count, map->timestamp,

+ map->flags);

+#ifdef pmap_resident_count

+ (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,

+ pmap_resident_count(map->pmap));

+#else

+ /* XXXCDC: this should be required ... */

+ (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);

+#endif

- * remember the original hint. if we are aligning, then we

- * may have to try again with no alignment constraint if

- * we fail the first time.

+ * struct vmspace handling.

+ if (map->flags & VM_MAP_ISVMSPACE) {

+ vm = (struct vmspace *)map;

+ (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",

+ vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);

+ (*pr)("\tvm_tsize=%u vm_dsize=%u\n",

+ vm->vm_tsize, vm->vm_dsize);

+ (*pr)("\tvm_taddr=%p vm_daddr=%p\n",

+ vm->vm_taddr, vm->vm_daddr);

+ (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",

+ vm->vm_maxsaddr, vm->vm_minsaddr);

+ }

+ if (!full)

+ goto print_uaddr;

+ RB_FOREACH(entry, uvm_map_addr, &map->addr) {

+ (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",

+ entry, entry->start, entry->end, entry->object.uvm_obj,

+ (long long)entry->offset, entry->aref.ar_amap,

+ entry->aref.ar_pageoff);

+ (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "

+ "wc=%d, adv=%d\n",

+ (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',

+ (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',

+ (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',

+ entry->protection, entry->max_protection,

+ entry->inheritance, entry->wired_count, entry->advice);

- orig_hint = hint;

- if (hint < map->min_offset) { /* check ranges ... */

- if (flags & UVM_FLAG_FIXED) {

- return(NULL);

+ free = uvm_map_uaddr_e(map, entry);

+ in_free = (free != NULL);

+ (*pr)("\thole=%c, free=%c, guard=0x%lx, "

+ "free=0x%lx-0x%lx\n",

+ (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',

+ in_free ? 'T' : 'F',

+ entry->guard,

+ VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));

+ (*pr)("\tfreemapped=%c, uaddr=%p\n",

+ (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);

+ if (free) {

+ (*pr)("\t\t(0x%lx-0x%lx %s)\n",

+ free->uaddr_minaddr, free->uaddr_maxaddr,

+ free->uaddr_functions->uaddr_name);

}

- hint = map->min_offset;

}

- if (hint > map->max_offset) {

- return(NULL);

+print_uaddr:

+ uvm_addr_print(map->uaddr_exe, "exe", full, pr);

+ for (i = 0; i < nitems(map->uaddr_any); i++) {

+ snprintf(&buf[0], sizeof(buf), "any[%d]", i);

+ uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);

}

+ uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);

- /*

- * Look for the first possible address; if there's already

- * something at this address, we have to start after it.

- */

+/*

+ * uvm_object_printit: actually prints the object

+ */

+void

+uvm_object_printit(uobj, full, pr)

+ struct uvm_object *uobj;

+ boolean_t full;

+ int (*pr)(const char *, ...);

+ struct vm_page *pg;

+ int cnt = 0;

- if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) {

- if ((entry = map->first_free) != &map->header)

- hint = entry->end;

- } else {

- if (uvm_map_lookup_entry(map, hint, &tmp)) {

- /* "hint" address already in use ... */

- if (flags & UVM_FLAG_FIXED) {

- return(NULL);

- }

- hint = tmp->end;

+ (*pr)("OBJECT %p: pgops=%p, npages=%d, ",

+ uobj, uobj->pgops, uobj->uo_npages);

+ if (UVM_OBJ_IS_KERN_OBJECT(uobj))

+ (*pr)("refs=<SYSTEM>\n");

+ else

+ (*pr)("refs=%d\n", uobj->uo_refs);

+ if (!full) {

+ return;

+ }

+ (*pr)(" PAGES <pg,offset>:\n ");

+ RB_FOREACH(pg, uvm_objtree, &uobj->memt) {

+ (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);

+ if ((cnt % 3) == 2) {

+ (*pr)("\n ");

}

- entry = tmp;

+ cnt++;

+ }

+ if ((cnt % 3) != 2) {

+ (*pr)("\n");

}

- if (flags & UVM_FLAG_FIXED) {

- end = hint + length;

- if (end > map->max_offset || end < hint) {

- goto error;

- }

- next = entry->next;

- if (next == &map->header || next->start >= end)

- goto found;

- return(NULL); /* only one shot at it ... */

- }

- /* Try to find the space in the red-black tree */

- /* Check slot before any entry */

- if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align))

- goto found;

- /* If there is not enough space in the whole tree, we fail */

- tmp = RB_ROOT(&map->rbhead);

- if (tmp == NULL || tmp->space < length)

- goto error;

- /* Find an entry close to hint that has enough space */

- for (; tmp;) {

- if (tmp->end >= hint &&

- (prev == NULL || tmp->end < prev->end)) {

- if (tmp->ownspace >= length)

- prev = tmp;

- else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL &&

- child->space >= length)

- prev = tmp;

+/*

+ * uvm_page_printit: actually print the page

+ */

+static const char page_flagbits[] =

+ "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"

+ "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0"

+ "\31PMAP1\32PMAP2\33PMAP3";

+void

+uvm_page_printit(pg, full, pr)

+ struct vm_page *pg;

+ boolean_t full;

+ int (*pr)(const char *, ...);

+ struct vm_page *tpg;

+ struct uvm_object *uobj;

+ struct pglist *pgl;

+ (*pr)("PAGE %p:\n", pg);

+ (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",

+ pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,

+ (long long)pg->phys_addr);

+ (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",

+ pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);

+#if defined(UVM_PAGE_TRKOWN)

+ if (pg->pg_flags & PG_BUSY)

+ (*pr)(" owning process = %d, tag=%s",

+ pg->owner, pg->owner_tag);

+ else

+ (*pr)(" page not busy, no owner");

+#else

+ (*pr)(" [page ownership tracking disabled]");

+#endif

+#ifdef __HAVE_VM_PAGE_MD

+ (*pr)("\tvm_page_md %p\n", &pg->mdpage);

+#else

+ (*pr)("\n");

+#endif

+ if (!full)

+ return;

+ /* cross-verify object/anon */

+ if ((pg->pg_flags & PQ_FREE) == 0) {

+ if (pg->pg_flags & PQ_ANON) {

+ if (pg->uanon == NULL || pg->uanon->an_page != pg)

+ (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",

+ (pg->uanon) ? pg->uanon->an_page : NULL);

+ else

+ (*pr)(" anon backpointer is OK\n");

+ } else {

+ uobj = pg->uobject;

+ if (uobj) {

+ (*pr)(" checking object list\n");

+ RB_FOREACH(tpg, uvm_objtree, &uobj->memt) {

+ if (tpg == pg) {

+ break;

+ }

+ if (tpg)

+ (*pr)(" page found on object list\n");

+ else

+ (*pr)(" >>> PAGE NOT FOUND "

+ "ON OBJECT LIST! <<<\n");

+ }

}

- if (tmp->end < hint)

- child = RB_RIGHT(tmp, rb_entry);

- else if (tmp->end > hint)

- child = RB_LEFT(tmp, rb_entry);

- else {

- if (tmp->ownspace >= length)

+ }

+ /* cross-verify page queue */

+ if (pg->pg_flags & PQ_FREE) {

+ if (uvm_pmr_isfree(pg))

+ (*pr)(" page found in uvm_pmemrange\n");

+ else

+ (*pr)(" >>> page not found in uvm_pmemrange <<<\n");

+ pgl = NULL;

+ } else if (pg->pg_flags & PQ_INACTIVE) {

+ pgl = (pg->pg_flags & PQ_SWAPBACKED) ?

+ &uvm.page_inactive_swp : &uvm.page_inactive_obj;

+ } else if (pg->pg_flags & PQ_ACTIVE) {

+ pgl = &uvm.page_active;

+ } else {

+ pgl = NULL;

+ }

+ if (pgl) {

+ (*pr)(" checking pageq list\n");

+ TAILQ_FOREACH(tpg, pgl, pageq) {

+ if (tpg == pg) {

break;

- child = RB_RIGHT(tmp, rb_entry);

+ }

}

- if (child == NULL || child->space < length)

- break;

- tmp = child;

+ if (tpg)

+ (*pr)(" page found on pageq list\n");

+ else

+ (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");

}

- if (tmp != NULL && hint < tmp->end + tmp->ownspace) {

- /*

- * Check if the entry that we found satifies the

- * space requirement

- */

- if (hint < tmp->end)

- hint = tmp->end;

- if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset,

- align)) {

- entry = tmp;

- goto found;

- } else if (tmp->ownspace >= length)

- goto listsearch;

- }

- if (prev == NULL)

- goto error;

- hint = prev->end;

- if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset,

- align)) {

- entry = prev;

- goto found;

- } else if (prev->ownspace >= length)

- goto listsearch;

- tmp = RB_RIGHT(prev, rb_entry);

- for (;;) {

- KASSERT(tmp && tmp->space >= length);

- child = RB_LEFT(tmp, rb_entry);

- if (child && child->space >= length) {

- tmp = child;

+#endif

+/*

+ * uvm_map_protect: change map protection

+ *

+ * => set_max means set max_protection.

+ * => map must be unlocked.

+ */

+int

+uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,

+ vm_prot_t new_prot, boolean_t set_max)

+ struct vm_map_entry *first, *iter;

+ vm_prot_t old_prot;

+ vm_prot_t mask;

+ int error;

+ if (start > end)

+ return EINVAL;

+ start = MAX(start, map->min_offset);

+ end = MIN(end, map->max_offset);

+ if (start >= end)

+ return 0;

+ error = 0;

+ vm_map_lock(map);

+ /*

+ * Set up first and last.

+ * - first will contain first entry at or after start.

+ */

+ first = uvm_map_entrybyaddr(&map->addr, start);

+ KDASSERT(first != NULL);

+ if (first->end < start)

+ first = RB_NEXT(uvm_map_addr, &map->addr, first);

+ /*

+ * First, check for protection violations.

+ */

+ for (iter = first; iter != NULL && iter->start < end;

+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {

+ /* Treat memory holes as free space. */

+ if (iter->start == iter->end || UVM_ET_ISHOLE(iter))

continue;

+ if (UVM_ET_ISSUBMAP(iter)) {

+ error = EINVAL;

+ goto out;

+ }

+ if ((new_prot & iter->max_protection) != new_prot) {

+ error = EACCES;

+ goto out;

}

- if (tmp->ownspace >= length)

- break;

- tmp = RB_RIGHT(tmp, rb_entry);

- }

- hint = tmp->end;

- if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) {

- entry = tmp;

- goto found;

}

- /*

- * The tree fails to find an entry because of offset or alignment

- * restrictions. Search the list instead.

- */

- listsearch:

- * Look through the rest of the map, trying to fit a new region in

- * the gap between existing regions, or after the very last region.

- * note: entry->end = base VA of current gap,

- * next->start = VA of end of current gap

+ * Fix protections.

- for (;; hint = (entry = next)->end) {

+ for (iter = first; iter != NULL && iter->start < end;

+ iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {

+ /* Treat memory holes as free space. */

+ if (iter->start == iter->end || UVM_ET_ISHOLE(iter))

+ continue;

+ old_prot = iter->protection;

- * Find the end of the proposed new region. Be sure we didn't

- * go beyond the end of the map, or wrap around the address;

- * if so, we lose. Otherwise, if this is the last entry, or

- * if the proposed new region fits before the next entry, we

- * win.

+ * Skip adapting protection iff old and new protection

+ * are equal.

+ if (set_max) {

+ if (old_prot == (new_prot & old_prot) &&

+ iter->max_protection == new_prot)

+ continue;

+ } else {

+ if (old_prot == new_prot)

+ continue;

+ }

+ UVM_MAP_CLIP_START(map, iter, start);

+ UVM_MAP_CLIP_END(map, iter, end);

+ if (set_max) {

+ iter->max_protection = new_prot;

+ iter->protection &= new_prot;

+ } else

+ iter->protection = new_prot;

-#ifdef PMAP_PREFER

- * push hint forward as needed to avoid VAC alias problems.

- * we only do this if a valid offset is specified.

+ * update physical map if necessary. worry about copy-on-write

+ * here -- CHECK THIS XXX

- if (uoffset != UVM_UNKNOWN_OFFSET)

- hint = PMAP_PREFER(uoffset, hint);

-#endif

- if (align != 0) {

- if ((hint & (align - 1)) != 0)

- hint = roundup(hint, align);

- /*

- * XXX Should we PMAP_PREFER() here again?

- */

+ if (iter->protection != old_prot) {

+ mask = UVM_ET_ISCOPYONWRITE(iter) ?

+ ~VM_PROT_WRITE : VM_PROT_ALL;

+ /* update pmap */

+ if ((iter->protection & mask) == PROT_NONE &&

+ VM_MAPENT_ISWIRED(iter)) {

+ /*

+ * TODO(ariane) this is stupid. wired_count

+ * is 0 if not wired, otherwise anything

+ * larger than 0 (incremented once each time

+ * wire is called).

+ * Mostly to be able to undo the damage on

+ * failure. Not the actually be a wired

+ * refcounter...

+ * Originally: iter->wired_count--;

+ * (don't we have to unwire this in the pmap

+ * as well?)

+ */

+ iter->wired_count = 0;

+ }

+ pmap_protect(map->pmap, iter->start, iter->end,

+ iter->protection & mask);

}

- end = hint + length;

- if (end > map->max_offset || end < hint) {

- goto error;

+ /*

+ * If the map is configured to lock any future mappings,

+ * wire this entry now if the old protection was VM_PROT_NONE

+ * and the new protection is not VM_PROT_NONE.

+ */

+ if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&

+ VM_MAPENT_ISWIRED(iter) == 0 &&

+ old_prot == VM_PROT_NONE &&

+ new_prot != VM_PROT_NONE) {

+ if (uvm_map_pageable(map, iter->start, iter->end,

+ FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {

+ /*

+ * If locking the entry fails, remember the

+ * error if it's the first one. Note we

+ * still continue setting the protection in

+ * the map, but it will return the resource

+ * storage condition regardless.

+ *

+ * XXX Ignore what the actual error is,

+ * XXX just call it a resource shortage

+ * XXX so that it doesn't get confused

+ * XXX what uvm_map_protect() itself would

+ * XXX normally return.

+ */

+ error = ENOMEM;

+ }

}

- next = entry->next;

- if (next == &map->header || next->start >= end)

- break;

}

- found:

- SAVE_HINT(map, map->hint, entry);

- *result = hint;

- return (entry);

+ pmap_update(map->pmap);

- error:

- if (align != 0) {

- return (uvm_map_findspace(map, orig_hint,

- length, result, uobj, uoffset, 0, flags));

- }

- return (NULL);

+out:

+ vm_map_unlock(map);

+ return error;

}

- * U N M A P - m a i n e n t r y p o i n t

+ * uvmspace_alloc: allocate a vmspace structure.

+ *

+ * - structure includes vm_map and pmap

+ * - XXX: no locking on this structure

+ * - refcnt set to 1, rest must be init'd by caller

+struct vmspace *

+uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,

+ boolean_t remove_holes)

+ struct vmspace *vm;

+ vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);

+ uvmspace_init(vm, NULL, min, max, pageable, remove_holes);

+ return (vm);

- * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop")

+ * uvmspace_init: initialize a vmspace structure.

- * => caller must check alignment and size

- * => map must be unlocked (we will lock it)

+ * - XXX: no locking on this structure

+ * - refcnt set to 1, rest must be init'd by caller

void

-uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p)

+uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,

+ boolean_t pageable, boolean_t remove_holes)

{

- vm_map_entry_t dead_entries;

+ if (pmap)

+ pmap_reference(pmap);

+ else

+ pmap = pmap_create();

+ vm->vm_map.pmap = pmap;

- /*

- * work now done by helper functions. wipe the pmap's and then

- * detach from the dead entries...

- */

- vm_map_lock(map);

- uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE);

- vm_map_unlock(map);

+ uvm_map_setup(&vm->vm_map, min, max,

+ (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);

- if (dead_entries != NULL)

- uvm_unmap_detach(dead_entries, 0);

+ vm->vm_refcnt = 1;

+ if (remove_holes)

+ pmap_remove_holes(&vm->vm_map);

}

- * U N M A P - m a i n h e l p e r f u n c t i o n s

+ * uvmspace_share: share a vmspace between two processes

+ *

+ * - XXX: no locking on vmspace

+ * - used for vfork and threads

+void

+uvmspace_share(p1, p2)

+ struct proc *p1, *p2;

+ p2->p_vmspace = p1->p_vmspace;

+ p1->p_vmspace->vm_refcnt++;

- * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")

+ * uvmspace_exec: the process wants to exec a new program

- * => caller must check alignment and size

- * => map must be locked by caller

- * => we return a list of map entries that we've remove from the map

- * in "entry_list"

+ * - XXX: no locking on vmspace

void

-uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,

- struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes)

+uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)

{

- struct vm_map_entry *entry, *first_entry, *next;

- vaddr_t len;

- VM_MAP_RANGE_CHECK(map, start, end);

- uvm_tree_sanity(map, "unmap_remove entry");

- if ((map->flags & VM_MAP_INTRSAFE) == 0)

- splassert(IPL_NONE);

- else

- splassert(IPL_VM);

- /*

- * find first entry

- */

- if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {

- /* clip and go... */

- entry = first_entry;

- UVM_MAP_CLIP_START(map, entry, start);

- /* critical! prevents stale hint */

- SAVE_HINT(map, entry, entry->prev);

- } else {

- entry = first_entry->next;

- }

- /*

- * Save the free space hint

- */

+ struct vmspace *nvm, *ovm = p->p_vmspace;

+ struct vm_map *map = &ovm->vm_map;

+ struct uvm_map_deadq dead_entries;

- if (map->first_free->start >= start)

- map->first_free = entry->prev;

+ KASSERT((start & (vaddr_t)PAGE_MASK) == 0);

+ KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||

+ (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);

- /*

- * note: we now re-use first_entry for a different task. we remove

- * a number of map entries from the map and save them in a linked

- * list headed by "first_entry". once we remove them from the map

- * the caller should unlock the map and drop the references to the

- * backing objects [c.f. uvm_unmap_detach]. the object is to

- * separate unmapping from reference dropping. why?

- * [1] the map has to be locked for unmapping

- * [2] the map need not be locked for reference dropping

- * [3] dropping references may trigger pager I/O, and if we hit

- * a pager that does synchronous I/O we may have to wait for it.

- * [4] we would like all waiting for I/O to occur with maps unlocked

- * so that we don't block other threads.

- */

- first_entry = NULL;

- *entry_list = NULL; /* to be safe */

+ pmap_unuse_final(p); /* before stack addresses go away */

+ TAILQ_INIT(&dead_entries);

- * break up the area into map entry sized regions and unmap. note

- * that all mappings have to be removed before we can even consider

- * dropping references to amaps or VM objects (otherwise we could end

- * up with a mapping to a page on the free list which would be very bad)

+ * see if more than one process is using this vmspace...

- while ((entry != &map->header) && (entry->start < end)) {

- UVM_MAP_CLIP_END(map, entry, end);

- next = entry->next;

- len = entry->end - entry->start;

- if (p && entry->object.uvm_obj == NULL)

- p->p_vmspace->vm_dused -= atop(len);

+ if (ovm->vm_refcnt == 1) {

+ /*

+ * if p is the only process using its vmspace then we can safely

+ * recycle that vmspace for the program that is being exec'd.

+ */

+#ifdef SYSVSHM

- * unwire before removing addresses from the pmap; otherwise

- * unwiring will put the entries back into the pmap (XXX).

+ * SYSV SHM semantics require us to kill all segments on an exec

+ if (ovm->vm_shm)

+ shmexit(ovm);

+#endif

- if (VM_MAPENT_ISWIRED(entry))

- uvm_map_entry_unwire(map, entry);

+ /*

+ * POSIX 1003.1b -- "lock future mappings" is revoked

+ * when a process execs another program image.

+ */

+ vm_map_lock(map);

+ vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);

- * special case: handle mappings to anonymous kernel objects.

- * we want to free these pages right away...

+ * now unmap the old program

+ *

+ * Instead of attempting to keep the map valid, we simply

+ * nuke all entries and ask uvm_map_setup to reinitialize

+ * the map to the new boundaries.

+ *

+ * uvm_unmap_remove will actually nuke all entries for us

+ * (as in, not replace them with free-memory entries).

-#ifdef KVA_GUARDPAGES

- if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) {

- entry->etype &= ~MAP_ET_KVAGUARD;

- kva_guardpages--;

- } else /* (code continues across line-break) */

-#endif

- if (UVM_ET_ISHOLE(entry)) {

- if (!remove_holes) {

- entry = next;

- continue;

- }

- } else if (map->flags & VM_MAP_INTRSAFE) {

- uvm_km_pgremove_intrsafe(entry->start, entry->end);

- pmap_kremove(entry->start, len);

- } else if (UVM_ET_ISOBJ(entry) &&

- UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {

- KASSERT(vm_map_pmap(map) == pmap_kernel());

+ uvm_unmap_remove(map, map->min_offset, map->max_offset,

+ &dead_entries, TRUE, FALSE);

- /*

- * note: kernel object mappings are currently used in

- * two ways:

- * [1] "normal" mappings of pages in the kernel object

- * [2] uvm_km_valloc'd allocations in which we

- * pmap_enter in some non-kernel-object page

- * (e.g. vmapbuf).

- *

- * for case [1], we need to remove the mapping from

- * the pmap and then remove the page from the kernel

- * object (because, once pages in a kernel object are

- * unmapped they are no longer needed, unlike, say,

- * a vnode where you might want the data to persist

- * until flushed out of a queue).

- *

- * for case [2], we need to remove the mapping from

- * the pmap. there shouldn't be any pages at the

- * specified offset in the kernel object [but it

- * doesn't hurt to call uvm_km_pgremove just to be

- * safe?]

- *

- * uvm_km_pgremove currently does the following:

- * for pages in the kernel object in range:

- * - drops the swap slot

- * - uvm_pagefree the page

- *

- * note there is version of uvm_km_pgremove() that

- * is used for "intrsafe" objects.

- */

+ KDASSERT(RB_EMPTY(&map->addr));

- /*

- * remove mappings from pmap and drop the pages

- * from the object. offsets are always relative

- * to vm_map_min(kernel_map).

- */

- pmap_remove(pmap_kernel(), entry->start, entry->end);

- uvm_km_pgremove(entry->object.uvm_obj,

- entry->start - vm_map_min(kernel_map),

- entry->end - vm_map_min(kernel_map));

+ /*

+ * Nuke statistics and boundaries.

+ */

+ bzero(&ovm->vm_startcopy,

+ (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);

- /*

- * null out kernel_object reference, we've just

- * dropped it

- */

- entry->etype &= ~UVM_ET_OBJ;

- entry->object.uvm_obj = NULL; /* to be safe */

- } else {

- /*

- * remove mappings the standard way.

- */

- pmap_remove(map->pmap, entry->start, entry->end);

+ if (end & (vaddr_t)PAGE_MASK) {

+ end += 1;

+ if (end == 0) /* overflow */

+ end -= PAGE_SIZE;

}

- * remove entry from map and put it on our list of entries

- * that we've nuked. then go do next entry.

+ * Setup new boundaries and populate map with entries.

- /* critical! prevents stale hint */

- SAVE_HINT(map, entry, entry->prev);

+ map->min_offset = start;

+ map->max_offset = end;

+ uvm_map_setup_entries(map);

+ vm_map_unlock(map);

- uvm_map_entry_unlink(map, entry);

- map->size -= len;

- entry->next = first_entry;

- first_entry = entry;

- entry = next; /* next entry, please */

- }

-#ifdef KVA_GUARDPAGES

- /*

- * entry points at the map-entry after the last-removed map-entry.

- */

- if (map == kernel_map && entry != &map->header &&

- entry->etype & MAP_ET_KVAGUARD && entry->start == end) {

- * Removed range is followed by guard page;

- * remove that guard page now (or it will stay forever).

+ * but keep MMU holes unavailable

- entry->etype &= ~MAP_ET_KVAGUARD;

- kva_guardpages--;

+ pmap_remove_holes(map);

- uvm_map_entry_unlink(map, entry);

- map->size -= len;

- entry->next = first_entry;

- first_entry = entry;

- entry = next; /* next entry, please */

- }

-#endif

- /* if ((map->flags & VM_MAP_DYING) == 0) { */

- pmap_update(vm_map_pmap(map));

- /* } */

+ } else {

+ /*

+ * p's vmspace is being shared, so we can't reuse it for p since

+ * it is still being used for others. allocate a new vmspace

+ * for p

+ */

+ nvm = uvmspace_alloc(start, end,

+ (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);

+ /*

+ * install new vmspace and drop our ref to the old one.

+ */

- uvm_tree_sanity(map, "unmap_remove leave");

+ pmap_deactivate(p);

+ p->p_vmspace = nvm;

+ pmap_activate(p);

+ uvmspace_free(ovm);

+ }

- * now we've cleaned up the map and are ready for the caller to drop

- * references to the mapped objects.

+ * Release dead entries

- *entry_list = first_entry;

+ uvm_unmap_detach(&dead_entries, 0);

}

- * uvm_unmap_detach: drop references in a chain of map entries

+ * uvmspace_free: free a vmspace data structure

- * => we will free the map entries as we traverse the list.

+ * - XXX: no locking on vmspace

void

-uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)

+uvmspace_free(struct vmspace *vm)

{

- struct vm_map_entry *next_entry;

- while (first_entry) {

- KASSERT(!VM_MAPENT_ISWIRED(first_entry));

- /*

- * drop reference to amap, if we've got one

- */

- if (first_entry->aref.ar_amap)

- uvm_map_unreference_amap(first_entry, flags);

+ if (--vm->vm_refcnt == 0) {

- * drop reference to our backing object, if we've got one

+ * lock the map, to wait out all other references to it. delete

+ * all of the mappings and pages they hold, then call the pmap

+ * module to reclaim anything left.

+#ifdef SYSVSHM

+ /* Get rid of any SYSV shared memory segments. */

+ if (vm->vm_shm != NULL)

+ shmexit(vm);

+#endif

- if (UVM_ET_ISSUBMAP(first_entry)) {

- /* ... unlikely to happen, but play it safe */

- uvm_map_deallocate(first_entry->object.sub_map);

- } else {

- if (UVM_ET_ISOBJ(first_entry) &&

- first_entry->object.uvm_obj->pgops->pgo_detach)

- first_entry->object.uvm_obj->pgops->

- pgo_detach(first_entry->object.uvm_obj);

- }

- next_entry = first_entry->next;

- uvm_mapent_free(first_entry);

- first_entry = next_entry;

+ uvm_map_teardown(&vm->vm_map);

+ pool_put(&uvm_vmspace_pool, vm);

}

- * E X T R A C T I O N F U N C T I O N S

- */

-/*

- * uvm_map_reserve: reserve space in a vm_map for future use.

+ * Clone map entry into other map.

- * => we reserve space in a map by putting a dummy map entry in the

- * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)

- * => map should be unlocked (we will write lock it)

- * => we return true if we were able to reserve space

- * => XXXCDC: should be inline?

+ * Mapping will be placed at dstaddr, for the same length.

+ * Space must be available.

+ * Reference counters are incremented.

-int

-uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset,

- vsize_t align, vaddr_t *raddr)

+struct vm_map_entry*

+uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,

+ vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,

+ int mapent_flags, int amap_share_flags)

{

+ struct vm_map_entry *new_entry, *first, *last;

- size = round_page(size);

- if (*raddr < vm_map_min(map))

- *raddr = vm_map_min(map); /* hint */

+ KDASSERT(!UVM_ET_ISSUBMAP(old_entry));

- * reserve some virtual space.

+ * Create new entry (linked in on creation).

+ * Fill in first, last.

+ first = last = NULL;

+ if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {

+ panic("uvmspace_fork: no space in map for "

+ "entry in empty map");

+ }

+ new_entry = uvm_map_mkentry(dstmap, first, last,

+ dstaddr, dstlen, mapent_flags, dead);

+ if (new_entry == NULL)

+ return NULL;

+ /* old_entry -> new_entry */

+ new_entry->object = old_entry->object;

+ new_entry->offset = old_entry->offset;

+ new_entry->aref = old_entry->aref;

+ new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;

+ new_entry->protection = old_entry->protection;

+ new_entry->max_protection = old_entry->max_protection;

+ new_entry->inheritance = old_entry->inheritance;

+ new_entry->advice = old_entry->advice;

- if (uvm_map(map, raddr, size, NULL, offset, 0,

- UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,

- UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {

- return (FALSE);

- }

+ /*

+ * gain reference to object backing the map (can't

+ * be a submap).

+ */

+ if (new_entry->aref.ar_amap) {

+ new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;

+ amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,

+ (new_entry->end - new_entry->start) >> PAGE_SHIFT,

+ amap_share_flags);

+ }

+ if (UVM_ET_ISOBJ(new_entry) &&

+ new_entry->object.uvm_obj->pgops->pgo_reference) {

+ new_entry->offset += off;

+ new_entry->object.uvm_obj->pgops->pgo_reference

+ (new_entry->object.uvm_obj);

+ }

- return (TRUE);

+ return new_entry;

}

- * uvm_map_replace: replace a reserved (blank) area of memory with

- * real mappings.

- *

- * => caller must WRITE-LOCK the map

- * => we return TRUE if replacement was a success

- * => we expect the newents chain to have nnewents entries on it and

- * we expect newents->prev to point to the last entry on the list

- * => note newents is allowed to be NULL

+ * share the mapping: this means we want the old and

+ * new entries to share amaps and backing objects.

-int

-uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,

- struct vm_map_entry *newents, int nnewents)

+void

+uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,

+ struct vm_map *old_map,

+ struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)

{

- struct vm_map_entry *oldent, *last;

- uvm_tree_sanity(map, "map_replace entry");

- /*

- * first find the blank map entry at the specified address

- */

- if (!uvm_map_lookup_entry(map, start, &oldent)) {

- return(FALSE);

- }

+ struct vm_map_entry *new_entry;

- * check to make sure we have a proper blank entry

+ * if the old_entry needs a new amap (due to prev fork)

+ * then we need to allocate it now so that we have

+ * something we own to share with the new_entry. [in

+ * other words, we need to clear needs_copy]

- if (oldent->start != start || oldent->end != end ||

- oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {

- return (FALSE);

+ if (UVM_ET_ISNEEDSCOPY(old_entry)) {

+ /* get our own amap, clears needs_copy */

+ amap_copy(old_map, old_entry, M_WAITOK, FALSE,

+ 0, 0);

+ /* XXXCDC: WAITOK??? */

}

-#ifdef DIAGNOSTIC

- /*

- * sanity check the newents chain

- */

- {

- struct vm_map_entry *tmpent = newents;

- int nent = 0;

- vaddr_t cur = start;

- while (tmpent) {

- nent++;

- if (tmpent->start < cur)

- panic("uvm_map_replace1");

- if (tmpent->start > tmpent->end || tmpent->end > end) {

- printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",

- tmpent->start, tmpent->end, end);

- panic("uvm_map_replace2");

- }

- cur = tmpent->end;

- if (tmpent->next) {

- if (tmpent->next->prev != tmpent)

- panic("uvm_map_replace3");

- } else {

- if (newents->prev != tmpent)

- panic("uvm_map_replace4");

- }

- tmpent = tmpent->next;

- }

- if (nent != nnewents)

- panic("uvm_map_replace5");

- }

-#endif

+ new_entry = uvm_mapent_clone(new_map, old_entry->start,

+ old_entry->end - old_entry->start, 0, old_entry,

+ dead, 0, AMAP_SHARED);

- /*

- * map entry is a valid blank! replace it. (this does all the

- * work of map entry link/unlink...).

+ /*

+ * pmap_copy the mappings: this routine is optional

+ * but if it is there it will reduce the number of

+ * page faults in the new proc.

- if (newents) {

- last = newents->prev; /* we expect this */

- /* critical: flush stale hints out of map */

- SAVE_HINT(map, map->hint, newents);

- if (map->first_free == oldent)

- map->first_free = last;

- last->next = oldent->next;

- last->next->prev = last;

- /* Fix RB tree */

- uvm_rb_remove(map, oldent);

- newents->prev = oldent->prev;

- newents->prev->next = newents;

- map->nentries = map->nentries + (nnewents - 1);

- /* Fixup the RB tree */

- {

- int i;

- struct vm_map_entry *tmp;

- tmp = newents;

- for (i = 0; i < nnewents && tmp; i++) {

- uvm_rb_insert(map, tmp);

- tmp = tmp->next;

- }

- } else {

- /* critical: flush stale hints out of map */

- SAVE_HINT(map, map->hint, oldent->prev);

- if (map->first_free == oldent)

- map->first_free = oldent->prev;

- /* NULL list of new entries: just remove the old one */

- uvm_map_entry_unlink(map, oldent);

- }

- uvm_tree_sanity(map, "map_replace leave");

+ pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,

+ (new_entry->end - new_entry->start), new_entry->start);

- * now we can free the old blank entry, unlock the map and return.

+ * Update process statistics.

- uvm_mapent_free(oldent);

- return(TRUE);

+ if (!UVM_ET_ISHOLE(new_entry))

+ new_map->size += new_entry->end - new_entry->start;

+ if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {

+ new_vm->vm_dused +=

+ uvmspace_dused(new_map, new_entry->start, new_entry->end);

+ }

}

- * uvm_map_extract: extract a mapping from a map and put it somewhere

- * (maybe removing the old mapping)

+ * copy-on-write the mapping (using mmap's

+ * MAP_PRIVATE semantics)

- * => maps should be unlocked (we will write lock them)

- * => returns 0 on success, error code otherwise

- * => start must be page aligned

- * => len must be page sized

- * => flags:

- * UVM_EXTRACT_REMOVE: remove mappings from srcmap

- * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)

- * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs

- * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go

- * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<

- * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only

- * be used from within the kernel in a kernel level map <<<

+ * allocate new_entry, adjust reference counts.

+ * (note that new references are read-only).

-int

-uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,

- struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)

+void

+uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,

+ struct vm_map *old_map,

+ struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)

{

- vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,

- oldstart;

- struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry;

- struct vm_map_entry *deadentry, *oldentry;

- vsize_t elen;

- int nchain, error, copy_ok;

+ struct vm_map_entry *new_entry;

+ boolean_t protect_child;

- uvm_tree_sanity(srcmap, "map_extract src enter");

- uvm_tree_sanity(dstmap, "map_extract dst enter");

- /*

- * step 0: sanity check: start must be on a page boundary, length

- * must be page sized. can't ask for CONTIG/QREF if you asked for

- * REMOVE.

- */

+ new_entry = uvm_mapent_clone(new_map, old_entry->start,

+ old_entry->end - old_entry->start, 0, old_entry,

+ dead, 0, 0);

- KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);

- KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||

- (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);

+ new_entry->etype |=

+ (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);

- * step 1: reserve space in the target map for the extracted area

+ * the new entry will need an amap. it will either

+ * need to be copied from the old entry or created

+ * from scratch (if the old entry does not have an

+ * amap). can we defer this process until later

+ * (by setting "needs_copy") or do we need to copy

+ * the amap now?

+ *

+ * we must copy the amap now if any of the following

+ * conditions hold:

+ * 1. the old entry has an amap and that amap is

+ * being shared. this means that the old (parent)

+ * process is sharing the amap with another

+ * process. if we do not clear needs_copy here

+ * we will end up in a situation where both the

+ * parent and child process are referring to the

+ * same amap with "needs_copy" set. if the

+ * parent write-faults, the fault routine will

+ * clear "needs_copy" in the parent by allocating

+ * a new amap. this is wrong because the

+ * parent is supposed to be sharing the old amap

+ * and the new amap will break that.

+ *

+ * 2. if the old entry has an amap and a non-zero

+ * wire count then we are going to have to call

+ * amap_cow_now to avoid page faults in the

+ * parent process. since amap_cow_now requires

+ * "needs_copy" to be clear we might as well

+ * clear it here as well.

+ *

- dstaddr = vm_map_min(dstmap);

- if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE)

- return(ENOMEM);

- *dstaddrp = dstaddr; /* pass address back to caller */

+ if (old_entry->aref.ar_amap != NULL &&

+ ((amap_flags(old_entry->aref.ar_amap) &

+ AMAP_SHARED) != 0 ||

+ VM_MAPENT_ISWIRED(old_entry))) {

+ amap_copy(new_map, new_entry, M_WAITOK, FALSE,

+ 0, 0);

+ /* XXXCDC: M_WAITOK ... ok? */

+ }

- * step 2: setup for the extraction process loop by init'ing the

- * map entry chain, locking src map, and looking up the first useful

- * entry in the map.

+ * if the parent's entry is wired down, then the

+ * parent process does not want page faults on

+ * access to that memory. this means that we

+ * cannot do copy-on-write because we can't write

+ * protect the old entry. in this case we

+ * resolve all copy-on-write faults now, using

+ * amap_cow_now. note that we have already

+ * allocated any needed amap (above).

- end = start + len;

- newend = dstaddr + len;

- chain = endchain = NULL;

- nchain = 0;

- vm_map_lock(srcmap);

+ if (VM_MAPENT_ISWIRED(old_entry)) {

- if (uvm_map_lookup_entry(srcmap, start, &entry)) {

+ /*

+ * resolve all copy-on-write faults now

+ * (note that there is nothing to do if

+ * the old mapping does not have an amap).

+ * XXX: is it worthwhile to bother with

+ * pmap_copy in this case?

+ */

+ if (old_entry->aref.ar_amap)

+ amap_cow_now(new_map, new_entry);

- /* "start" is within an entry */

- if (flags & UVM_EXTRACT_QREF) {

+ } else {

+ if (old_entry->aref.ar_amap) {

- * for quick references we don't clip the entry, so

- * the entry may map space "before" the starting

- * virtual address... this is the "fudge" factor

- * (which can be non-zero only the first time

- * through the "while" loop in step 3).

+ * setup mappings to trigger copy-on-write faults

+ * we must write-protect the parent if it has

+ * an amap and it is not already "needs_copy"...

+ * if it is already "needs_copy" then the parent

+ * has already been write-protected by a previous

+ * fork operation.

+ *

+ * if we do not write-protect the parent, then

+ * we must be sure to write-protect the child

+ * after the pmap_copy() operation.

+ *

+ * XXX: pmap_copy should have some way of telling

+ * us that it didn't do anything so we can avoid

+ * calling pmap_protect needlessly.

+ if (!UVM_ET_ISNEEDSCOPY(old_entry)) {

+ if (old_entry->max_protection &

+ VM_PROT_WRITE) {

+ pmap_protect(old_map->pmap,

+ old_entry->start,

+ old_entry->end,

+ old_entry->protection &

+ ~VM_PROT_WRITE);

+ pmap_update(old_map->pmap);

+ }

+ old_entry->etype |= UVM_ET_NEEDSCOPY;

+ }

- fudge = start - entry->start;

+ /*

+ * parent must now be write-protected

+ */

+ protect_child = FALSE;

} else {

- * normal reference: we clip the map to fit (thus

- * fudge is zero)

+ * we only need to protect the child if the

+ * parent has write access.

- UVM_MAP_CLIP_START(srcmap, entry, start);

- SAVE_HINT(srcmap, srcmap->hint, entry->prev);

- fudge = 0;

- }

- } else {

- /* "start" is not within an entry ... skip to next entry */

- if (flags & UVM_EXTRACT_CONTIG) {

- error = EINVAL;

- goto bad; /* definite hole here ... */

- }

- entry = entry->next;

- fudge = 0;

- }

- /* save values from srcmap for step 6 */

- orig_entry = entry;

- orig_fudge = fudge;

- /*

- * step 3: now start looping through the map entries, extracting

- * as we go.

- */

- while (entry->start < end && entry != &srcmap->header) {

- /* if we are not doing a quick reference, clip it */

- if ((flags & UVM_EXTRACT_QREF) == 0)

- UVM_MAP_CLIP_END(srcmap, entry, end);

- /* clear needs_copy (allow chunking) */

- if (UVM_ET_ISNEEDSCOPY(entry)) {

- if (fudge)

- oldstart = entry->start;

+ if (old_entry->max_protection & VM_PROT_WRITE)

+ protect_child = TRUE;

else

- oldstart = 0; /* XXX: gcc */

- amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);

- if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */

- error = ENOMEM;

- goto bad;

- }

+ protect_child = FALSE;

- /* amap_copy could clip (during chunk)! update fudge */

- if (fudge) {

- fudge = fudge - (entry->start - oldstart);

- orig_fudge = fudge;

- }

- /* calculate the offset of this from "start" */

- oldoffset = (entry->start + fudge) - start;

- /* allocate a new map entry */

- newentry = uvm_mapent_alloc(dstmap, flags);

- if (newentry == NULL) {

- error = ENOMEM;

- goto bad;

}

- /* set up new map entry */

- newentry->next = NULL;

- newentry->prev = endchain;

- newentry->start = dstaddr + oldoffset;

- newentry->end =

- newentry->start + (entry->end - (entry->start + fudge));

- if (newentry->end > newend || newentry->end < newentry->start)

- newentry->end = newend;

- newentry->object.uvm_obj = entry->object.uvm_obj;

- if (newentry->object.uvm_obj) {

- if (newentry->object.uvm_obj->pgops->pgo_reference)

- newentry->object.uvm_obj->pgops->

- pgo_reference(newentry->object.uvm_obj);

- newentry->offset = entry->offset + fudge;

- } else {

- newentry->offset = 0;

- }

- newentry->etype = entry->etype;

- newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?

- entry->max_protection : entry->protection;

- newentry->max_protection = entry->max_protection;

- newentry->inheritance = entry->inheritance;

- newentry->wired_count = 0;

- newentry->aref.ar_amap = entry->aref.ar_amap;

- if (newentry->aref.ar_amap) {

- newentry->aref.ar_pageoff =

- entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);

- uvm_map_reference_amap(newentry, AMAP_SHARED |

- ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));

- } else {

- newentry->aref.ar_pageoff = 0;

- }

- newentry->advice = entry->advice;

+ /*

+ * copy the mappings

+ * XXX: need a way to tell if this does anything

+ */

- /* now link it on the chain */

- nchain++;

- if (endchain == NULL) {

- chain = endchain = newentry;

- } else {

- endchain->next = newentry;

- endchain = newentry;

- }

+ pmap_copy(new_map->pmap, old_map->pmap,

+ new_entry->start,

+ (old_entry->end - old_entry->start),

+ old_entry->start);

- /* end of 'while' loop! */

- if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&

- (entry->next == &srcmap->header ||

- entry->next->start != entry->end)) {

- error = EINVAL;

- goto bad;

+ /*

+ * protect the child's mappings if necessary

+ */

+ if (protect_child) {

+ pmap_protect(new_map->pmap, new_entry->start,

+ new_entry->end,

+ new_entry->protection &

+ ~VM_PROT_WRITE);

}

- entry = entry->next;

- fudge = 0;

}

- * step 4: close off chain (in format expected by uvm_map_replace)

+ * Update process statistics.

+ if (!UVM_ET_ISHOLE(new_entry))

+ new_map->size += new_entry->end - new_entry->start;

+ if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {

+ new_vm->vm_dused +=

+ uvmspace_dused(new_map, new_entry->start, new_entry->end);

+ }

- if (chain)

- chain->prev = endchain;

+/*

+ * uvmspace_fork: fork a process' main map

+ *

+ * => create a new vmspace for child process from parent.

+ * => parent's map must not be locked.

+ */

+struct vmspace *

+uvmspace_fork(struct vmspace *vm1)

+ struct vmspace *vm2;

+ struct vm_map *old_map = &vm1->vm_map;

+ struct vm_map *new_map;

+ struct vm_map_entry *old_entry;

+ struct uvm_map_deadq dead;

- /*

- * step 5: attempt to lock the dest map so we can pmap_copy.

- * note usage of copy_ok:

- * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)

- * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7

- */

+ vm_map_lock(old_map);

- if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {

- copy_ok = 1;

- if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,

- nchain)) {

- if (srcmap != dstmap)

- vm_map_unlock(dstmap);

- error = EIO;

- goto bad;

- }

- } else {

- copy_ok = 0;

- /* replace defered until step 7 */

- }

+ vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,

+ (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);

+ memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,

+ (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);

+ vm2->vm_dused = 0; /* Statistic managed by us. */

+ new_map = &vm2->vm_map;

+ vm_map_lock(new_map);

- * step 6: traverse the srcmap a second time to do the following:

- * - if we got a lock on the dstmap do pmap_copy

- * - if UVM_EXTRACT_REMOVE remove the entries

- * we make use of orig_entry and orig_fudge (saved in step 2)

+ * go entry-by-entry

- if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {

+ TAILQ_INIT(&dead);

+ RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {

+ if (old_entry->start == old_entry->end)

+ continue;

- /* purge possible stale hints from srcmap */

- if (flags & UVM_EXTRACT_REMOVE) {

- SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);

- if (srcmap->first_free->start >= start)

- srcmap->first_free = orig_entry->prev;

+ /*

+ * first, some sanity checks on the old entry

+ */

+ if (UVM_ET_ISSUBMAP(old_entry)) {

+ panic("fork: encountered a submap during fork "

+ "(illegal)");

}

- entry = orig_entry;

- fudge = orig_fudge;

- deadentry = NULL; /* for UVM_EXTRACT_REMOVE */

- while (entry->start < end && entry != &srcmap->header) {

- if (copy_ok) {

- oldoffset = (entry->start + fudge) - start;

- elen = MIN(end, entry->end) -

- (entry->start + fudge);

- pmap_copy(dstmap->pmap, srcmap->pmap,

- dstaddr + oldoffset, elen,

- entry->start + fudge);

- }

- /* we advance "entry" in the following if statement */

- if (flags & UVM_EXTRACT_REMOVE) {

- pmap_remove(srcmap->pmap, entry->start,

- entry->end);

- oldentry = entry; /* save entry */

- entry = entry->next; /* advance */

- uvm_map_entry_unlink(srcmap, oldentry);

- /* add to dead list */

- oldentry->next = deadentry;

- deadentry = oldentry;

- } else {

- entry = entry->next; /* advance */

- }

- /* end of 'while' loop */

- fudge = 0;

+ if (!UVM_ET_ISCOPYONWRITE(old_entry) &&

+ UVM_ET_ISNEEDSCOPY(old_entry)) {

+ panic("fork: non-copy_on_write map entry marked "

+ "needs_copy (illegal)");

}

- pmap_update(srcmap->pmap);

- * unlock dstmap. we will dispose of deadentry in

- * step 7 if needed

+ * Apply inheritance.

- if (copy_ok && srcmap != dstmap)

- vm_map_unlock(dstmap);

+ if (old_entry->inheritance == MAP_INHERIT_SHARE) {

+ uvm_mapent_forkshared(vm2, new_map,

+ old_map, old_entry, &dead);

+ }

+ if (old_entry->inheritance == MAP_INHERIT_COPY) {

+ uvm_mapent_forkcopy(vm2, new_map,

+ old_map, old_entry, &dead);

+ }

}

- else

- deadentry = NULL; /* XXX: gcc */

+ vm_map_unlock(old_map);

+ vm_map_unlock(new_map);

- * step 7: we are done with the source map, unlock. if copy_ok

- * is 0 then we have not replaced the dummy mapping in dstmap yet

- * and we need to do so now.

+ * This can actually happen, if multiple entries described a

+ * space in which an entry was inherited.

+ uvm_unmap_detach(&dead, 0);

- vm_map_unlock(srcmap);

- if ((flags & UVM_EXTRACT_REMOVE) && deadentry)

- uvm_unmap_detach(deadentry, 0); /* dispose of old entries */

- /* now do the replacement if we didn't do it in step 5 */

- if (copy_ok == 0) {

- vm_map_lock(dstmap);

- error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,

- nchain);

- vm_map_unlock(dstmap);

- if (error == FALSE) {

- error = EIO;

- goto bad2;

- }

+#ifdef SYSVSHM

+ if (vm1->vm_shm)

+ shmfork(vm1, vm2);

+#endif

- uvm_tree_sanity(srcmap, "map_extract src leave");

- uvm_tree_sanity(dstmap, "map_extract dst leave");

+#ifdef PMAP_FORK

+ pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);

+#endif

- return(0);

+ return vm2;

+/*

+ * uvm_map_hint: return the beginning of the best area suitable for

+ * creating a new mapping with "prot" protection.

+ */

+vaddr_t

+uvm_map_hint(struct vmspace *vm, vm_prot_t prot)

+ vaddr_t addr;

+#ifdef __i386__

- * bad: failure recovery

+ * If executable skip first two pages, otherwise start

+ * after data + heap region.

-bad:

- vm_map_unlock(srcmap);

-bad2: /* src already unlocked */

- if (chain)

- uvm_unmap_detach(chain,

- (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);

- uvm_tree_sanity(srcmap, "map_extract src err leave");

- uvm_tree_sanity(dstmap, "map_extract dst err leave");

- uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */

- return(error);

+ if ((prot & VM_PROT_EXECUTE) != 0 &&

+ (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {

+ addr = (PAGE_SIZE*2) +

+ (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));

+ return (round_page(addr));

+ }

+#endif

+ /* start malloc/mmap after the brk */

+ addr = (vaddr_t)vm->vm_daddr + BRKSIZ;

+#if !defined(__vax__)

+ addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1);

+#endif

+ return (round_page(addr));

}

-/* end of extraction functions */

* uvm_map_submap: punch down part of a map into a submap

@@ -2202,7 +3651,6 @@ bad2: /* src already unlocked */

* => submap must have been init'd and have a zero reference count.

* [need not be locked as we don't actually reference it]

int

uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,

struct vm_map *submap)

@@ -2210,16 +3658,17 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,

struct vm_map_entry *entry;

int result;

- vm_map_lock(map);

+ if (start > map->max_offset || end > map->max_offset ||

+ start < map->min_offset || end < map->min_offset)

+ return EINVAL;

- VM_MAP_RANGE_CHECK(map, start, end);

+ vm_map_lock(map);

if (uvm_map_lookup_entry(map, start, &entry)) {

UVM_MAP_CLIP_START(map, entry, start);

- UVM_MAP_CLIP_END(map, entry, end); /* to be safe */

- } else {

+ UVM_MAP_CLIP_END(map, entry, end);

+ } else

entry = NULL;

- }

if (entry != NULL &&

entry->start == start && entry->end == end &&

@@ -2230,129 +3679,101 @@ uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,

entry->offset = 0;

uvm_map_reference(submap);

result = 0;

- } else {

+ } else

result = EINVAL;

- }

vm_map_unlock(map);

return(result);

}

- * uvm_map_protect: change map protection

+ * uvm_map_checkprot: check protection in map

- * => set_max means set max_protection.

- * => map must be unlocked.

+ * => must allow specific protection in a fully allocated region.

+ * => map mut be read or write locked by caller.

-#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \

- ~VM_PROT_WRITE : VM_PROT_ALL)

-#define max(a,b) ((a) > (b) ? (a) : (b))

-int

-uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,

- vm_prot_t new_prot, boolean_t set_max)

+boolean_t

+uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,

+ vm_prot_t protection)

{

- struct vm_map_entry *current, *entry;

- int error = 0;

- vm_map_lock(map);

- VM_MAP_RANGE_CHECK(map, start, end);

+ struct vm_map_entry *entry;

- if (uvm_map_lookup_entry(map, start, &entry)) {

- UVM_MAP_CLIP_START(map, entry, start);

- } else {

- entry = entry->next;

- }

+ if (start < map->min_offset || end > map->max_offset || start > end)

+ return FALSE;

+ if (start == end)

+ return TRUE;

- * make a first pass to check for protection violations.

+ * Iterate entries.

- current = entry;

- while ((current != &map->header) && (current->start < end)) {

- if (UVM_ET_ISSUBMAP(current)) {

- error = EINVAL;

- goto out;

- }

- if ((new_prot & current->max_protection) != new_prot) {

- error = EACCES;

- goto out;

- }

- current = current->next;

- }

- /* go back and fix up protections (no need to clip this time). */

- current = entry;

- while ((current != &map->header) && (current->start < end)) {

- vm_prot_t old_prot;

- UVM_MAP_CLIP_END(map, current, end);

- old_prot = current->protection;

- if (set_max)

- current->protection =

- (current->max_protection = new_prot) & old_prot;

- else

- current->protection = new_prot;

+ for (entry = uvm_map_entrybyaddr(&map->addr, start);

+ entry != NULL && entry->start < end;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {

- * update physical map if necessary. worry about copy-on-write

- * here -- CHECK THIS XXX

+ * Fail if a hole is found.

- if (current->protection != old_prot) {

- /* update pmap! */

- if ((current->protection & MASK(entry)) == PROT_NONE &&

- VM_MAPENT_ISWIRED(entry))

- current->wired_count--;

- pmap_protect(map->pmap, current->start, current->end,

- current->protection & MASK(entry));

- }

+ if (UVM_ET_ISHOLE(entry) ||

+ (entry->end < end && entry->end != VMMAP_FREE_END(entry)))

+ return FALSE;

- * If the map is configured to lock any future mappings,

- * wire this entry now if the old protection was VM_PROT_NONE

- * and the new protection is not VM_PROT_NONE.

+ * Check protection.

+ if ((entry->protection & protection) != protection)

+ return FALSE;

+ }

+ return TRUE;

- if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&

- VM_MAPENT_ISWIRED(entry) == 0 &&

- old_prot == VM_PROT_NONE &&

- new_prot != VM_PROT_NONE) {

- if (uvm_map_pageable(map, entry->start, entry->end,

- FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) {

- /*

- * If locking the entry fails, remember the

- * error if it's the first one. Note we

- * still continue setting the protection in

- * the map, but will return the resource

- * shortage condition regardless.

- *

- * XXX Ignore what the actual error is,

- * XXX just call it a resource shortage

- * XXX so that it doesn't get confused

- * XXX what uvm_map_protect() itself would

- * XXX normally return.

- */

- error = ENOMEM;

- }

+/*

+ * uvm_map_create: create map

+ */

+vm_map_t

+uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)

+ vm_map_t result;

+ result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);

+ result->pmap = pmap;

+ uvm_map_setup(result, min, max, flags);

+ return(result);

+/*

+ * uvm_map_deallocate: drop reference to a map

+ *

+ * => caller must not lock map

+ * => we will zap map if ref count goes to zero

+ */

+void

+uvm_map_deallocate(vm_map_t map)

+ int c;

+ struct uvm_map_deadq dead;

- current = current->next;

+ simple_lock(&map->ref_lock);

+ c = --map->ref_count;

+ simple_unlock(&map->ref_lock);

+ if (c > 0) {

+ return;

}

- pmap_update(map->pmap);

- out:

- vm_map_unlock(map);

- return (error);

+ /*

+ * all references gone. unmap and free.

+ *

+ * No lock required: we are only one to access this map.

+ */

-#undef max

-#undef MASK

+ TAILQ_INIT(&dead);

+ uvm_tree_sanity(map, __FILE__, __LINE__);

+ uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,

+ TRUE, FALSE);

+ pmap_destroy(map->pmap);

+ KASSERT(RB_EMPTY(&map->addr));

+ free(map, M_VMMAP);

+ uvm_unmap_detach(&dead, 0);

* uvm_map_inherit: set inheritance code for range of addrs in map.

@@ -2361,7 +3782,6 @@ uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,

* => note that the inherit code is used during a "fork". see fork

* code for details.

int

uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,

vm_inherit_t new_inheritance)

@@ -2377,20 +3797,25 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,

return (EINVAL);

}

+ if (start > end)

+ return EINVAL;

+ start = MAX(start, map->min_offset);

+ end = MIN(end, map->max_offset);

+ if (start >= end)

+ return 0;

vm_map_lock(map);

- VM_MAP_RANGE_CHECK(map, start, end);

- if (uvm_map_lookup_entry(map, start, &entry)) {

+ entry = uvm_map_entrybyaddr(&map->addr, start);

+ if (entry->end > start)

UVM_MAP_CLIP_START(map, entry, start);

- } else {

- entry = entry->next;

- }

+ else

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);

- while ((entry != &map->header) && (entry->start < end)) {

+ while (entry != NULL && entry->start < end) {

UVM_MAP_CLIP_END(map, entry, end);

entry->inheritance = new_inheritance;

- entry = entry->next;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);

}

vm_map_unlock(map);

@@ -2402,7 +3827,6 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,

* => map must be unlocked

int

uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)

{

@@ -2412,29 +3836,34 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)

case MADV_NORMAL:

case MADV_RANDOM:

case MADV_SEQUENTIAL:

- /* nothing special here */

break;

default:

return (EINVAL);

}

+ if (start > end)

+ return EINVAL;

+ start = MAX(start, map->min_offset);

+ end = MIN(end, map->max_offset);

+ if (start >= end)

+ return 0;

vm_map_lock(map);

- VM_MAP_RANGE_CHECK(map, start, end);

- if (uvm_map_lookup_entry(map, start, &entry)) {

+ entry = uvm_map_entrybyaddr(&map->addr, start);

+ if (entry != NULL && entry->end > start)

UVM_MAP_CLIP_START(map, entry, start);

- } else {

- entry = entry->next;

- }

+ else if (entry!= NULL)

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);

* XXXJRT: disallow holes?

- while ((entry != &map->header) && (entry->start < end)) {

+ while (entry != NULL && entry->start < end) {

UVM_MAP_CLIP_END(map, entry, end);

entry->advice = new_advice;

- entry = entry->next;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry);

}

vm_map_unlock(map);

@@ -2442,454 +3871,178 @@ uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)

}

- * uvm_map_pageable: sets the pageability of a range in a map.

+ * uvm_map_extract: extract a mapping from a map and put it somewhere

+ * in the kernel_map, setting protection to max_prot.

- * => wires map entries. should not be used for transient page locking.

- * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).

- * => regions sepcified as not pageable require lock-down (wired) memory

- * and page tables.

- * => map must never be read-locked

- * => if islocked is TRUE, map is already write-locked

- * => we always unlock the map, since we must downgrade to a read-lock

- * to call uvm_fault_wire()

- * => XXXCDC: check this and try and clean it up.

+ * => map should be unlocked (we will write lock it and kernel_map)

+ * => returns 0 on success, error code otherwise

+ * => start must be page aligned

+ * => len must be page sized

+ * => flags:

+ * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go

+ * Mappings are QREF's.

int

-uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,

- boolean_t new_pageable, int lockflags)

+uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,

+ vaddr_t *dstaddrp, int flags)

{

- struct vm_map_entry *entry, *start_entry, *failed_entry;

- int rv;

-#ifdef DIAGNOSTIC

- u_int timestamp_save;

-#endif

- KASSERT(map->flags & VM_MAP_PAGEABLE);

- if ((lockflags & UVM_LK_ENTER) == 0)

- vm_map_lock(map);

+ struct uvm_map_deadq dead;

+ struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;

+ vaddr_t dstaddr;

+ vaddr_t end;

+ vaddr_t cp_start;

+ vsize_t cp_len, cp_off;

+ int error;

- VM_MAP_RANGE_CHECK(map, start, end);

+ TAILQ_INIT(&dead);

+ end = start + len;

- /*

- * only one pageability change may take place at one time, since

- * uvm_fault_wire assumes it will be called only once for each

- * wiring/unwiring. therefore, we have to make sure we're actually

- * changing the pageability for the entire region. we do so before

- * making any changes.

+ /*

+ * Sanity check on the parameters.

+ * Also, since the mapping may not contain gaps, error out if the

+ * mapped area is not in source map.

- if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {

- if ((lockflags & UVM_LK_EXIT) == 0)

- vm_map_unlock(map);

+ if ((start & (vaddr_t)PAGE_MASK) != 0 ||

+ (end & (vaddr_t)PAGE_MASK) != 0 || end < start)

+ return EINVAL;

+ if (start < srcmap->min_offset || end > srcmap->max_offset)

+ return EINVAL;

- return (EFAULT);

- }

- entry = start_entry;

- /*

- * handle wiring and unwiring separately.

+ /*

+ * Initialize dead entries.

+ * Handle len == 0 case.

- if (new_pageable) { /* unwire */

- UVM_MAP_CLIP_START(map, entry, start);

- /*

- * unwiring. first ensure that the range to be unwired is

- * really wired down and that there are no holes.

- */

+ if (len == 0)

+ return 0;

- while ((entry != &map->header) && (entry->start < end)) {

- if (entry->wired_count == 0 ||

- (entry->end < end &&

- (entry->next == &map->header ||

- entry->next->start > entry->end))) {

- if ((lockflags & UVM_LK_EXIT) == 0)

- vm_map_unlock(map);

- return (EINVAL);

- }

- entry = entry->next;

- }

+ /*

+ * Acquire lock on srcmap.

+ */

+ vm_map_lock(srcmap);

- /*

- * POSIX 1003.1b - a single munlock call unlocks a region,

- * regardless of the number of mlock calls made on that

- * region.

- */

+ /*

+ * Lock srcmap, lookup first and last entry in <start,len>.

+ */

+ first = uvm_map_entrybyaddr(&srcmap->addr, start);

- entry = start_entry;

- while ((entry != &map->header) && (entry->start < end)) {

- UVM_MAP_CLIP_END(map, entry, end);

- if (VM_MAPENT_ISWIRED(entry))

- uvm_map_entry_unwire(map, entry);

- entry = entry->next;

+ /*

+ * Check that the range is contiguous.

+ */

+ for (entry = first; entry != NULL && entry->end < end;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {

+ if (VMMAP_FREE_END(entry) != entry->end ||

+ UVM_ET_ISHOLE(entry)) {

+ error = EINVAL;

+ goto fail;

}

- if ((lockflags & UVM_LK_EXIT) == 0)

- vm_map_unlock(map);

- return (0);

+ }

+ if (entry == NULL || UVM_ET_ISHOLE(entry)) {

+ error = EINVAL;

+ goto fail;

}

- * wire case: in two passes [XXXCDC: ugly block of code here]

- *

- * 1: holding the write lock, we create any anonymous maps that need

- * to be created. then we clip each map entry to the region to

- * be wired and increment its wiring count.

+ * Handle need-copy flag.

+ * This may invalidate last, hence the re-initialization during the

+ * loop.

- * 2: we downgrade to a read lock, and call uvm_fault_wire to fault

- * in the pages for any newly wired area (wired_count == 1).

- *

- * downgrading to a read lock for uvm_fault_wire avoids a possible

- * deadlock with another thread that may have faulted on one of

- * the pages to be wired (it would mark the page busy, blocking

- * us, then in turn block on the map lock that we hold). because

- * of problems in the recursive lock package, we cannot upgrade

- * to a write lock in vm_map_lookup. thus, any actions that

- * require the write lock must be done beforehand. because we

- * keep the read lock on the map, the copy-on-write status of the

- * entries we modify here cannot change.

+ * Also, perform clipping of last if not UVM_EXTRACT_QREF.

- while ((entry != &map->header) && (entry->start < end)) {

- if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */

- /*

- * perform actions of vm_map_lookup that need the

- * write lock on the map: create an anonymous map

- * for a copy-on-write region, or an anonymous map

- * for a zero-fill region. (XXXCDC: submap case

- * ok?)

- */

- if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */

- if (UVM_ET_ISNEEDSCOPY(entry) &&

- ((entry->protection & VM_PROT_WRITE) ||

- (entry->object.uvm_obj == NULL))) {

- amap_copy(map, entry, M_WAITOK, TRUE,

- start, end);

- /* XXXCDC: wait OK? */

- }

- UVM_MAP_CLIP_START(map, entry, start);

- UVM_MAP_CLIP_END(map, entry, end);

- entry->wired_count++;

- /*

- * Check for holes

- */

- if (entry->protection == VM_PROT_NONE ||

- (entry->end < end &&

- (entry->next == &map->header ||

- entry->next->start > entry->end))) {

+ for (entry = first; entry != NULL && entry->start < end;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {

+ if (UVM_ET_ISNEEDSCOPY(entry))

+ amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);

+ if (UVM_ET_ISNEEDSCOPY(entry)) {

- * found one. amap creation actions do not need to

- * be undone, but the wired counts need to be restored.

+ * amap_copy failure

- while (entry != &map->header && entry->end > start) {

- entry->wired_count--;

- entry = entry->prev;

- }

- if ((lockflags & UVM_LK_EXIT) == 0)

- vm_map_unlock(map);

- return (EINVAL);

+ error = ENOMEM;

+ goto fail;

}

- entry = entry->next;

}

- * Pass 2.

+ * Lock destination map (kernel_map).

+ vm_map_lock(kernel_map);

-#ifdef DIAGNOSTIC

- timestamp_save = map->timestamp;

-#endif

- vm_map_busy(map);

- vm_map_downgrade(map);

- rv = 0;

- entry = start_entry;

- while (entry != &map->header && entry->start < end) {

- if (entry->wired_count == 1) {

- rv = uvm_fault_wire(map, entry->start, entry->end,

- entry->protection);

- if (rv) {

- /*

- * wiring failed. break out of the loop.

- * we'll clean up the map below, once we

- * have a write lock again.

- */

- break;

- }

- entry = entry->next;

- }

- if (rv) { /* failed? */

- /*

- * Get back to an exclusive (write) lock.

- */

- vm_map_upgrade(map);

- vm_map_unbusy(map);

-#ifdef DIAGNOSTIC

- if (timestamp_save != map->timestamp)

- panic("uvm_map_pageable: stale map");

-#endif

- /*

- * first drop the wiring count on all the entries

- * which haven't actually been wired yet.

- */

- failed_entry = entry;

- while (entry != &map->header && entry->start < end) {

- entry->wired_count--;

- entry = entry->next;

- }

- /*

- * now, unwire all the entries that were successfully

- * wired above.

- */

- entry = start_entry;

- while (entry != failed_entry) {

- entry->wired_count--;

- if (VM_MAPENT_ISWIRED(entry) == 0)

- uvm_map_entry_unwire(map, entry);

- entry = entry->next;

- }

- if ((lockflags & UVM_LK_EXIT) == 0)

- vm_map_unlock(map);

- return(rv);

- }

- /* We are holding a read lock here. */

- if ((lockflags & UVM_LK_EXIT) == 0) {

- vm_map_unbusy(map);

- vm_map_unlock_read(map);

- } else {

- /*

- * Get back to an exclusive (write) lock.

- */

- vm_map_upgrade(map);

- vm_map_unbusy(map);

+ if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,

+ PAGE_SIZE, 0, VM_PROT_NONE, 0) != 0) {

+ error = ENOMEM;

+ goto fail2;

}

+ *dstaddrp = dstaddr;

- return (0);

-/*

- * uvm_map_pageable_all: special case of uvm_map_pageable - affects

- * all mapped regions.

- *

- * => map must not be locked.

- * => if no flags are specified, all regions are unwired.

- * => XXXJRT: has some of the same problems as uvm_map_pageable() above.

- */

-int

-uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)

- struct vm_map_entry *entry, *failed_entry;

- vsize_t size;

- int error;

-#ifdef DIAGNOSTIC

- u_int timestamp_save;

-#endif

- KASSERT(map->flags & VM_MAP_PAGEABLE);

- vm_map_lock(map);

+ /*

+ * We now have srcmap and kernel_map locked.

+ * dstaddr contains the destination offset in dstmap.

+ */

- * handle wiring and unwiring separately.

+ * step 1: start looping through map entries, performing extraction.

+ for (entry = first; entry != NULL && entry->start < end;

+ entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {

+ KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));

+ if (UVM_ET_ISHOLE(entry))

+ continue;

- if (flags == 0) { /* unwire */

- * POSIX 1003.1b -- munlockall unlocks all regions,

- * regardless of how many times mlockall has been called.

+ * Calculate uvm_mapent_clone parameters.

- for (entry = map->header.next; entry != &map->header;

- entry = entry->next) {

- if (VM_MAPENT_ISWIRED(entry))

- uvm_map_entry_unwire(map, entry);

+ cp_start = entry->start;

+ if (cp_start < start) {

+ cp_off = start - cp_start;

+ cp_start = start;

+ } else

+ cp_off = 0;

+ cp_len = MIN(entry->end, end) - cp_start;

+ newentry = uvm_mapent_clone(kernel_map,

+ cp_start - start + dstaddr, cp_len, cp_off,

+ entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);

+ if (newentry == NULL) {

+ error = ENOMEM;

+ goto fail2_unmap;

}

- vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);

- vm_map_unlock(map);

- return (0);

- /*

- * end of unwire case!

- */

- }

+ kernel_map->size += cp_len;

+ if (flags & UVM_EXTRACT_FIXPROT)

+ newentry->protection = newentry->max_protection;

- if (flags & MCL_FUTURE) {

- * must wire all future mappings; remember this.

+ * Step 2: perform pmap copy.

+ * (Doing this in the loop saves one RB traversal.)

- vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);

- }

- if ((flags & MCL_CURRENT) == 0) {

- /*

- * no more work to do!

- */

- vm_map_unlock(map);

- return (0);

- }

- /*

- * wire case: in three passes [XXXCDC: ugly block of code here]

- *

- * 1: holding the write lock, count all pages mapped by non-wired

- * entries. if this would cause us to go over our limit, we fail.

- *

- * 2: still holding the write lock, we create any anonymous maps that

- * need to be created. then we increment its wiring count.

- *

- * 3: we downgrade to a read lock, and call uvm_fault_wire to fault