summaryrefslogtreecommitdiff
path: root/sys/uvm
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2000-03-16 22:11:06 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2000-03-16 22:11:06 +0000
commit6270ed032c8d513c60cd21bb0356766633014a3b (patch)
tree3566a5fad92c120888db8b8da8da3c9b1006d48c /sys/uvm
parent20caed58266ef751b064092b7925dd0ae32fee9d (diff)
Bring in some new UVM code from NetBSD (not current).
- Introduce a new type of map that are interrupt safe and never allow faults in them. mb_map and kmem_map are made intrsafe. - Add "access protection" to uvm_vslock (to be passed down to uvm_fault and later to pmap_enter). - madvise(2) now works. - various cleanups.
Diffstat (limited to 'sys/uvm')
-rw-r--r--sys/uvm/uvm_aobj.c6
-rw-r--r--sys/uvm/uvm_extern.h7
-rw-r--r--sys/uvm/uvm_fault.c45
-rw-r--r--sys/uvm/uvm_fault.h7
-rw-r--r--sys/uvm/uvm_fault_i.h35
-rw-r--r--sys/uvm/uvm_glue.c20
-rw-r--r--sys/uvm/uvm_km.c184
-rw-r--r--sys/uvm/uvm_km.h3
-rw-r--r--sys/uvm/uvm_loan.c24
-rw-r--r--sys/uvm/uvm_map.c303
-rw-r--r--sys/uvm/uvm_map.h7
-rw-r--r--sys/uvm/uvm_map_i.h32
-rw-r--r--sys/uvm/uvm_mmap.c69
-rw-r--r--sys/uvm/uvm_object.h19
-rw-r--r--sys/uvm/uvm_page.c20
-rw-r--r--sys/uvm/uvm_page.h7
-rw-r--r--sys/uvm/uvm_page_i.h36
-rw-r--r--sys/uvm/uvm_pager.c14
-rw-r--r--sys/uvm/uvm_pdaemon.c14
-rw-r--r--sys/uvm/uvm_pdaemon.h2
-rw-r--r--sys/uvm/uvm_pglist.c14
-rw-r--r--sys/uvm/uvm_uio.c266
-rw-r--r--sys/uvm/uvm_uio.h66
23 files changed, 963 insertions, 237 deletions
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index 250662bece8..727b72006d7 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_aobj.c,v 1.18 1999/03/26 17:34:15 chs Exp $ */
+/* $NetBSD: uvm_aobj.c,v 1.20 1999/05/25 00:09:00 thorpej Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -619,7 +619,7 @@ uao_reference(uobj)
* kernel_object already has plenty of references, leave it alone.
*/
- if (uobj->uo_refs == UVM_OBJ_KERN)
+ if (UVM_OBJ_IS_KERN_OBJECT(uobj))
return;
simple_lock(&uobj->vmobjlock);
@@ -646,7 +646,7 @@ uao_detach(uobj)
/*
* detaching from kernel_object is a noop.
*/
- if (uobj->uo_refs == UVM_OBJ_KERN)
+ if (UVM_OBJ_IS_KERN_OBJECT(uobj))
return;
simple_lock(&uobj->vmobjlock);
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 9782f0e0560..f3c2a65d15f 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_extern.h,v 1.24 1999/04/11 04:04:11 chs Exp $ */
+/* $NetBSD: uvm_extern.h,v 1.27 1999/05/26 19:16:36 thorpej Exp $ */
/*
*
@@ -282,7 +282,8 @@ boolean_t uvm_kernacc __P((caddr_t, size_t, int));
__dead void uvm_scheduler __P((void)) __attribute__((noreturn));
void uvm_swapin __P((struct proc *));
boolean_t uvm_useracc __P((caddr_t, size_t, int));
-void uvm_vslock __P((struct proc *, caddr_t, size_t));
+void uvm_vslock __P((struct proc *, caddr_t, size_t,
+ vm_prot_t));
void uvm_vsunlock __P((struct proc *, caddr_t, size_t));
@@ -301,7 +302,7 @@ void uvm_km_free_wakeup __P((vm_map_t, vaddr_t,
vaddr_t uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *,
vsize_t, int));
struct vm_map *uvm_km_suballoc __P((vm_map_t, vaddr_t *,
- vaddr_t *, vsize_t, boolean_t,
+ vaddr_t *, vsize_t, int,
boolean_t, vm_map_t));
vaddr_t uvm_km_valloc __P((vm_map_t, vsize_t));
vaddr_t uvm_km_valloc_wait __P((vm_map_t, vsize_t));
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index d05c5c30282..b4002aceb87 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_fault.c,v 1.28 1999/04/11 04:04:11 chs Exp $ */
+/* $NetBSD: uvm_fault.c,v 1.33 1999/06/04 23:38:41 thorpej Exp $ */
/*
*
@@ -587,6 +587,19 @@ uvm_fault(orig_map, vaddr, fault_type, access_type)
narrow = FALSE; /* normal fault */
/*
+ * before we do anything else, if this is a fault on a kernel
+ * address, check to see if the address is managed by an
+ * interrupt-safe map. If it is, we fail immediately. Intrsafe
+ * maps are never pageable, and this approach avoids an evil
+ * locking mess.
+ */
+ if (orig_map == kernel_map && uvmfault_check_intrsafe(&ufi)) {
+ UVMHIST_LOG(maphist, "<- VA 0x%lx in intrsafe map %p",
+ ufi.orig_rvaddr, ufi.map, 0, 0);
+ return (KERN_FAILURE);
+ }
+
+ /*
* "goto ReFault" means restart the page fault from ground zero.
*/
ReFault:
@@ -614,6 +627,17 @@ ReFault:
}
/*
+ * if the map is not a pageable map, a page fault always fails.
+ */
+
+ if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) {
+ UVMHIST_LOG(maphist,
+ "<- map %p not pageable", ufi.map, 0, 0, 0);
+ uvmfault_unlockmaps(&ufi, FALSE);
+ return (KERN_FAILURE);
+ }
+
+ /*
* "enter_prot" is the protection we want to enter the page in at.
* for certain pages (e.g. copy-on-write pages) this protection can
* be more strict than ufi.entry->protection. "wired" means either
@@ -1689,9 +1713,10 @@ Case2:
*/
int
-uvm_fault_wire(map, start, end)
+uvm_fault_wire(map, start, end, access_type)
vm_map_t map;
vaddr_t start, end;
+ vm_prot_t access_type;
{
vaddr_t va;
pmap_t pmap;
@@ -1713,10 +1738,10 @@ uvm_fault_wire(map, start, end)
*/
for (va = start ; va < end ; va += PAGE_SIZE) {
- rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE);
+ rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
if (rv) {
if (va != start) {
- uvm_fault_unwire(map->pmap, start, va);
+ uvm_fault_unwire(map, start, va);
}
return (rv);
}
@@ -1727,19 +1752,23 @@ uvm_fault_wire(map, start, end)
/*
* uvm_fault_unwire(): unwire range of virtual space.
- *
- * => caller holds reference to pmap (via its map)
*/
void
-uvm_fault_unwire(pmap, start, end)
- struct pmap *pmap;
+uvm_fault_unwire(map, start, end)
+ vm_map_t map;
vaddr_t start, end;
{
+ pmap_t pmap = vm_map_pmap(map);
vaddr_t va;
paddr_t pa;
struct vm_page *pg;
+#ifdef DIAGNOSTIC
+ if (map->flags & VM_MAP_INTRSAFE)
+ panic("uvm_fault_unwire: intrsafe map");
+#endif
+
/*
* we assume that the area we are unwiring has actually been wired
* in the first place. this means that we should be able to extract
diff --git a/sys/uvm/uvm_fault.h b/sys/uvm/uvm_fault.h
index fd8958f96e5..6849b5713c6 100644
--- a/sys/uvm/uvm_fault.h
+++ b/sys/uvm/uvm_fault.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_fault.h,v 1.7 1998/10/11 23:07:42 chuck Exp $ */
+/* $NetBSD: uvm_fault.h,v 1.11 1999/06/04 23:38:41 thorpej Exp $ */
/*
*
@@ -72,13 +72,14 @@ struct uvm_faultinfo {
int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *,
struct vm_anon *));
+static boolean_t uvmfault_check_intrsafe __P((struct uvm_faultinfo *));
static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t));
static boolean_t uvmfault_relock __P((struct uvm_faultinfo *));
static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *,
struct uvm_object *, struct vm_anon *));
static void uvmfault_unlockmaps __P((struct uvm_faultinfo *, boolean_t));
-int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t));
-void uvm_fault_unwire __P((struct pmap *, vaddr_t, vaddr_t));
+int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t));
+void uvm_fault_unwire __P((vm_map_t, vaddr_t, vaddr_t));
#endif /* _UVM_UVM_FAULT_H_ */
diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h
index 8a2c3ea0fb4..38e64476a68 100644
--- a/sys/uvm/uvm_fault_i.h
+++ b/sys/uvm/uvm_fault_i.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_fault_i.h,v 1.7 1999/01/24 23:53:15 chuck Exp $ */
+/* $NetBSD: uvm_fault_i.h,v 1.9 1999/06/04 23:38:41 thorpej Exp $ */
/*
*
@@ -82,6 +82,39 @@ uvmfault_unlockall(ufi, amap, uobj, anon)
}
/*
+ * uvmfault_check_intrsafe: check for a virtual address managed by
+ * an interrupt-safe map.
+ *
+ * => caller must provide a uvm_faultinfo structure with the IN
+ * params properly filled in
+ * => if we find an intersafe VA, we fill in ufi->map, and return TRUE
+ */
+
+static __inline boolean_t
+uvmfault_check_intrsafe(ufi)
+ struct uvm_faultinfo *ufi;
+{
+ struct vm_map_intrsafe *vmi;
+ int s;
+
+ s = vmi_list_lock();
+ for (vmi = LIST_FIRST(&vmi_list); vmi != NULL;
+ vmi = LIST_NEXT(vmi, vmi_list)) {
+ if (ufi->orig_rvaddr >= vm_map_min(&vmi->vmi_map) &&
+ ufi->orig_rvaddr < vm_map_max(&vmi->vmi_map))
+ break;
+ }
+ vmi_list_unlock(s);
+
+ if (vmi != NULL) {
+ ufi->map = &vmi->vmi_map;
+ return (TRUE);
+ }
+
+ return (FALSE);
+}
+
+/*
* uvmfault_lookup: lookup a virtual address in a map
*
* => caller must provide a uvm_faultinfo structure with the IN
diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c
index fe866fdc017..837c158a240 100644
--- a/sys/uvm/uvm_glue.c
+++ b/sys/uvm/uvm_glue.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_glue.c,v 1.19 1999/04/30 21:23:50 thorpej Exp $ */
+/* $NetBSD: uvm_glue.c,v 1.23 1999/05/28 20:49:51 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -221,13 +221,15 @@ uvm_chgkprot(addr, len, rw)
*/
void
-uvm_vslock(p, addr, len)
+uvm_vslock(p, addr, len, access_type)
struct proc *p;
caddr_t addr;
size_t len;
+ vm_prot_t access_type;
{
+
uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr),
- round_page(addr+len));
+ round_page(addr+len), access_type);
}
/*
@@ -243,7 +245,7 @@ uvm_vsunlock(p, addr, len)
caddr_t addr;
size_t len;
{
- uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr),
+ uvm_fault_unwire(&p->p_vmspace->vm_map, trunc_page(addr),
round_page(addr+len));
}
@@ -282,9 +284,12 @@ uvm_fork(p1, p2, shared, stack, stacksize)
* and the kernel stack. Wired state is stored in p->p_flag's
* P_INMEM bit rather than in the vm_map_entry's wired count
* to prevent kernel_map fragmentation.
+ *
+ * Note the kernel stack gets read/write accesses right off
+ * the bat.
*/
rv = uvm_fault_wire(kernel_map, (vaddr_t)up,
- (vaddr_t)up + USPACE);
+ (vaddr_t)up + USPACE, VM_PROT_READ | VM_PROT_WRITE);
if (rv != KERN_SUCCESS)
panic("uvm_fork: uvm_fault_wire failed: %d", rv);
@@ -373,7 +378,8 @@ uvm_swapin(p)
addr = (vaddr_t)p->p_addr;
/* make P_INMEM true */
- uvm_fault_wire(kernel_map, addr, addr + USPACE);
+ uvm_fault_wire(kernel_map, addr, addr + USPACE,
+ VM_PROT_READ | VM_PROT_WRITE);
/*
* Some architectures need to be notified when the user area has
@@ -586,7 +592,7 @@ uvm_swapout(p)
* Unwire the to-be-swapped process's user struct and kernel stack.
*/
addr = (vaddr_t)p->p_addr;
- uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */
+ uvm_fault_unwire(kernel_map, addr, addr + USPACE); /* !P_INMEM */
pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
/*
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
index a908f8f53aa..a5bb21b2a7b 100644
--- a/sys/uvm/uvm_km.c
+++ b/sys/uvm/uvm_km.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_km.c,v 1.22 1999/03/26 21:58:39 mycroft Exp $ */
+/* $NetBSD: uvm_km.c,v 1.27 1999/06/04 23:38:41 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -156,12 +156,16 @@
vm_map_t kernel_map = NULL;
+struct vmi_list vmi_list;
+simple_lock_data_t vmi_list_slock;
+
/*
* local functions
*/
static int uvm_km_get __P((struct uvm_object *, vaddr_t,
- vm_page_t *, int *, int, vm_prot_t, int, int));
+ vm_page_t *, int *, int, vm_prot_t, int, int));
+
/*
* local data structues
*/
@@ -414,29 +418,43 @@ uvm_km_init(start, end)
vaddr_t base = VM_MIN_KERNEL_ADDRESS;
/*
- * first, init kernel memory objects.
+ * first, initialize the interrupt-safe map list.
+ */
+ LIST_INIT(&vmi_list);
+ simple_lock_init(&vmi_list_slock);
+
+ /*
+ * next, init kernel memory objects.
*/
/* kernel_object: for pageable anonymous kernel memory */
uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);
- /* kmem_object: for malloc'd memory (wired, protected by splimp) */
+ /*
+ * kmem_object: for use by the kernel malloc(). Memory is always
+ * wired, and this object (and the kmem_map) can be accessed at
+ * interrupt time.
+ */
simple_lock_init(&kmem_object_store.vmobjlock);
kmem_object_store.pgops = &km_pager;
TAILQ_INIT(&kmem_object_store.memq);
kmem_object_store.uo_npages = 0;
/* we are special. we never die */
- kmem_object_store.uo_refs = UVM_OBJ_KERN;
+ kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE;
uvmexp.kmem_object = &kmem_object_store;
- /* mb_object: for mbuf memory (always wired, protected by splimp) */
+ /*
+ * mb_object: for mbuf cluster pages on platforms which use the
+ * mb_map. Memory is always wired, and this object (and the mb_map)
+ * can be accessed at interrupt time.
+ */
simple_lock_init(&mb_object_store.vmobjlock);
mb_object_store.pgops = &km_pager;
TAILQ_INIT(&mb_object_store.memq);
mb_object_store.uo_npages = 0;
/* we are special. we never die */
- mb_object_store.uo_refs = UVM_OBJ_KERN;
+ mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE;
uvmexp.mb_object = &mb_object_store;
/*
@@ -444,7 +462,7 @@ uvm_km_init(start, end)
* before installing.
*/
- uvm_map_setup(&kernel_map_store, base, end, FALSE);
+ uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE);
kernel_map_store.pmap = pmap_kernel();
if (uvm_map(&kernel_map_store, &base, start - base, NULL,
UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
@@ -469,11 +487,11 @@ uvm_km_init(start, end)
* alloc a new map
*/
struct vm_map *
-uvm_km_suballoc(map, min, max, size, pageable, fixed, submap)
+uvm_km_suballoc(map, min, max, size, flags, fixed, submap)
struct vm_map *map;
vaddr_t *min, *max; /* OUT, OUT */
vsize_t size;
- boolean_t pageable;
+ int flags;
boolean_t fixed;
struct vm_map *submap;
{
@@ -503,11 +521,11 @@ uvm_km_suballoc(map, min, max, size, pageable, fixed, submap)
pmap_reference(vm_map_pmap(map));
if (submap == NULL) {
- submap = uvm_map_create(vm_map_pmap(map), *min, *max, pageable);
+ submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags);
if (submap == NULL)
panic("uvm_km_suballoc: unable to create submap");
} else {
- uvm_map_setup(submap, *min, *max, pageable);
+ uvm_map_setup(submap, *min, *max, flags);
submap->pmap = vm_map_pmap(map);
}
@@ -535,15 +553,17 @@ uvm_km_pgremove(uobj, start, end)
struct uvm_object *uobj;
vaddr_t start, end;
{
- boolean_t by_list, is_aobj;
+ boolean_t by_list;
struct vm_page *pp, *ppnext;
vaddr_t curoff;
UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist);
simple_lock(&uobj->vmobjlock); /* lock object */
- /* is uobj an aobj? */
- is_aobj = uobj->pgops == &aobj_pager;
+#ifdef DIAGNOSTIC
+ if (uobj->pgops != &aobj_pager)
+ panic("uvm_km_pgremove: object %p not an aobj", uobj);
+#endif
/* choose cheapest traversal */
by_list = (uobj->uo_npages <=
@@ -561,26 +581,24 @@ uvm_km_pgremove(uobj, start, end)
UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp,
pp->flags & PG_BUSY, 0, 0);
+
/* now do the actual work */
- if (pp->flags & PG_BUSY)
+ if (pp->flags & PG_BUSY) {
/* owner must check for this when done */
pp->flags |= PG_RELEASED;
- else {
- pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+ } else {
+ /* free the swap slot... */
+ uao_dropswap(uobj, curoff >> PAGE_SHIFT);
/*
- * if this kernel object is an aobj, free the swap slot.
+ * ...and free the page; note it may be on the
+ * active or inactive queues.
*/
- if (is_aobj) {
- uao_dropswap(uobj, curoff >> PAGE_SHIFT);
- }
-
uvm_lock_pageq();
uvm_pagefree(pp);
uvm_unlock_pageq();
}
/* done */
-
}
simple_unlock(&uobj->vmobjlock);
return;
@@ -588,7 +606,6 @@ uvm_km_pgremove(uobj, start, end)
loop_by_list:
for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) {
-
ppnext = pp->listq.tqe_next;
if (pp->offset < start || pp->offset >= end) {
continue;
@@ -596,26 +613,111 @@ loop_by_list:
UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp,
pp->flags & PG_BUSY, 0, 0);
+
/* now do the actual work */
- if (pp->flags & PG_BUSY)
+ if (pp->flags & PG_BUSY) {
/* owner must check for this when done */
pp->flags |= PG_RELEASED;
- else {
- pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+ } else {
+ /* free the swap slot... */
+ uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
/*
- * if this kernel object is an aobj, free the swap slot.
+ * ...and free the page; note it may be on the
+ * active or inactive queues.
*/
- if (is_aobj) {
- uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
- }
-
uvm_lock_pageq();
uvm_pagefree(pp);
uvm_unlock_pageq();
}
/* done */
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return;
+}
+
+
+/*
+ * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe"
+ * objects
+ *
+ * => when you unmap a part of anonymous kernel memory you want to toss
+ * the pages right away. (this gets called from uvm_unmap_...).
+ * => none of the pages will ever be busy, and none of them will ever
+ * be on the active or inactive queues (because these objects are
+ * never allowed to "page").
+ */
+void
+uvm_km_pgremove_intrsafe(uobj, start, end)
+ struct uvm_object *uobj;
+ vaddr_t start, end;
+{
+ boolean_t by_list;
+ struct vm_page *pp, *ppnext;
+ vaddr_t curoff;
+ UVMHIST_FUNC("uvm_km_pgremove_intrsafe"); UVMHIST_CALLED(maphist);
+
+ simple_lock(&uobj->vmobjlock); /* lock object */
+
+#ifdef DIAGNOSTIC
+ if (UVM_OBJ_IS_INTRSAFE_OBJECT(uobj) == 0)
+ panic("uvm_km_pgremove_intrsafe: object %p not intrsafe", uobj);
+#endif
+
+ /* choose cheapest traversal */
+ by_list = (uobj->uo_npages <=
+ ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY);
+
+ if (by_list)
+ goto loop_by_list;
+
+ /* by hash */
+
+ for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
+ pp = uvm_pagelookup(uobj, curoff);
+ if (pp == NULL)
+ continue;
+
+ UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp,
+ pp->flags & PG_BUSY, 0, 0);
+#ifdef DIAGNOSTIC
+ if (pp->flags & PG_BUSY)
+ panic("uvm_km_pgremove_intrsafe: busy page");
+ if (pp->pqflags & PQ_ACTIVE)
+ panic("uvm_km_pgremove_intrsafe: active page");
+ if (pp->pqflags & PQ_INACTIVE)
+ panic("uvm_km_pgremove_intrsafe: inactive page");
+#endif
+
+ /* free the page */
+ uvm_pagefree(pp);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return;
+
+loop_by_list:
+
+ for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) {
+ ppnext = pp->listq.tqe_next;
+ if (pp->offset < start || pp->offset >= end) {
+ continue;
+ }
+
+ UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp,
+ pp->flags & PG_BUSY, 0, 0);
+
+#ifdef DIAGNOSTIC
+ if (pp->flags & PG_BUSY)
+ panic("uvm_km_pgremove_intrsafe: busy page");
+ if (pp->pqflags & PQ_ACTIVE)
+ panic("uvm_km_pgremove_intrsafe: active page");
+ if (pp->pqflags & PQ_INACTIVE)
+ panic("uvm_km_pgremove_intrsafe: inactive page");
+#endif
+
+ /* free the page */
+ uvm_pagefree(pp);
}
simple_unlock(&uobj->vmobjlock);
return;
@@ -725,12 +827,18 @@ uvm_km_kmemalloc(map, obj, size, flags)
* (because if pmap_enter wants to allocate out of kmem_object
* it will need to lock it itself!)
*/
+ if (UVM_OBJ_IS_INTRSAFE_OBJECT(obj)) {
#if defined(PMAP_NEW)
- pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL);
+ pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
+ VM_PROT_ALL);
#else
- pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
- UVM_PROT_ALL, TRUE, VM_PROT_READ | VM_PROT_WRITE);
+ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+ UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
#endif
+ } else {
+ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+ UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
+ }
loopva += PAGE_SIZE;
offset += PAGE_SIZE;
size -= PAGE_SIZE;
@@ -861,8 +969,8 @@ uvm_km_alloc1(map, size, zeroit)
* map it in; note we're never called with an intrsafe
* object, so we always use regular old pmap_enter().
*/
- pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
- UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
+ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+ UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
loopva += PAGE_SIZE;
offset += PAGE_SIZE;
diff --git a/sys/uvm/uvm_km.h b/sys/uvm/uvm_km.h
index 7b07b0d74d7..fb143cbd39c 100644
--- a/sys/uvm/uvm_km.h
+++ b/sys/uvm/uvm_km.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_km.h,v 1.6 1998/08/13 02:11:01 eeh Exp $ */
+/* $NetBSD: uvm_km.h,v 1.8 1999/05/25 20:30:09 thorpej Exp $ */
/*
*
@@ -47,5 +47,6 @@
void uvm_km_init __P((vaddr_t, vaddr_t));
void uvm_km_pgremove __P((struct uvm_object *, vaddr_t, vaddr_t));
+void uvm_km_pgremove_intrsafe __P((struct uvm_object *, vaddr_t, vaddr_t));
#endif /* _UVM_UVM_KM_H_ */
diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c
index 66d81f15eb0..f82d8576e0a 100644
--- a/sys/uvm/uvm_loan.c
+++ b/sys/uvm/uvm_loan.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_loan.c,v 1.14 1999/03/25 18:48:52 mrg Exp $ */
+/* $NetBSD: uvm_loan.c,v 1.17 1999/06/03 00:05:45 thorpej Exp $ */
/*
*
@@ -59,7 +59,7 @@
*
* there are 3 types of loans possible:
* O->K uvm_object page to wired kernel page (e.g. mbuf data area)
- * A->K anon page to kernel wired kernel page (e.g. mbuf data area)
+ * A->K anon page to wired kernel page (e.g. mbuf data area)
* O->A uvm_object to anon loan (e.g. vnode page to an anon)
* note that it possible to have an O page loaned to both an A and K
* at the same time.
@@ -68,10 +68,15 @@
* a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort
* of page is considered "owned" by the uvm_object (not the anon).
*
- * each loan of a page to a wired kernel page bumps the pg->wire_count.
- * wired kernel mappings should be entered with pmap_kenter functions
- * so that pmap_page_protect() will not affect the kernel mappings.
- * (this requires the PMAP_NEW interface...).
+ * each loan of a page to the kernel bumps the pg->wire_count. the
+ * kernel mappings for these pages will be read-only and wired. since
+ * the page will also be wired, it will not be a candidate for pageout,
+ * and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a
+ * write fault in the kernel to one of these pages will not cause
+ * copy-on-write. instead, the page fault is considered fatal. this
+ * is because the kernel mapping will have no way to look up the
+ * object/anon which the page is owned by. this is a good side-effect,
+ * since a kernel write to a loaned page is an error.
*
* owners that want to free their pages and discover that they are
* loaned out simply "disown" them (the page becomes an orphan). these
@@ -96,7 +101,7 @@
*
* note that loaning a page causes all mappings of the page to become
* read-only (via pmap_page_protect). this could have an unexpected
- * effect on normal "wired" pages if one is not careful.
+ * effect on normal "wired" pages if one is not careful (XXX).
*/
/*
@@ -220,6 +225,11 @@ uvm_loan(map, start, len, result, flags)
void **output;
int rv;
+#ifdef DIAGNOSTIC
+ if (map->flags & VM_MAP_INTRSAFE)
+ panic("uvm_loan: intrsafe map");
+#endif
+
/*
* ensure that one and only one of the flags is set
*/
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index 68487e398e3..c7c34a8f9bd 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_map.c,v 1.39 1999/05/12 19:11:23 thorpej Exp $ */
+/* $NetBSD: uvm_map.c,v 1.53 1999/06/07 16:31:42 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -186,6 +186,23 @@ static void uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
* local inlines
*/
+/* XXX Should not exist! */
+#define vm_map_downgrade(map) \
+ (void) lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc)
+
+/* XXX Should not exist! */
+#ifdef DIAGNOSTIC
+#define vm_map_upgrade(map) \
+do { \
+ if (lockmgr(&(map)->lock, LK_UPGRADE, NULL, curproc) != 0) \
+ panic("vm_map_upgrade: failed to upgrade lock"); \
+} while (0)
+#else
+#define vm_map_upgrade(map) \
+ (void) lockmgr(&(map)->lock, LK_UPGRADE, NULL)
+#endif /* DIAGNOSTIC */
+
+
/*
* uvm_mapent_alloc: allocate a map entry
*
@@ -201,11 +218,11 @@ uvm_mapent_alloc(map)
UVMHIST_FUNC("uvm_mapent_alloc");
UVMHIST_CALLED(maphist);
- if (map->entries_pageable) {
+ if ((map->flags & VM_MAP_INTRSAFE) == 0 &&
+ map != kernel_map && kernel_map != NULL /* XXX */) {
me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
me->flags = 0;
/* me can't be null, wait ok */
-
} else {
s = splimp(); /* protect kentry_free list with splimp */
simple_lock(&uvm.kentry_lock);
@@ -214,14 +231,14 @@ uvm_mapent_alloc(map)
simple_unlock(&uvm.kentry_lock);
splx(s);
if (!me)
- panic("mapent_alloc: out of kernel map entries, check MAX_KMAPENT");
+ panic("mapent_alloc: out of static map entries, check MAX_KMAPENT");
me->flags = UVM_MAP_STATIC;
}
- UVMHIST_LOG(maphist, "<- new entry=0x%x [pageable=%d]",
- me, map->entries_pageable, 0, 0);
+ UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]",
+ me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map)
+ ? TRUE : FALSE, 0, 0);
return(me);
-
}
/*
@@ -276,7 +293,7 @@ uvm_map_entry_unwire(map, entry)
vm_map_entry_t entry;
{
- uvm_fault_unwire(map->pmap, entry->start, entry->end);
+ uvm_fault_unwire(map, entry->start, entry->end);
entry->wired_count = 0;
}
@@ -543,8 +560,9 @@ uvm_map(map, startp, size, uobj, uoffset, flags)
} else {
if (uoffset == UVM_UNKNOWN_OFFSET) {
#ifdef DIAGNOSTIC
- if (uobj->uo_refs != UVM_OBJ_KERN)
- panic("uvm_map: unknown offset with non-kernel object");
+ if (UVM_OBJ_IS_KERN_OBJECT(uobj) == 0)
+ panic("uvm_map: unknown offset with "
+ "non-kernel object");
#endif
uoffset = *startp - vm_map_min(kernel_map);
}
@@ -976,11 +994,11 @@ uvm_unmap_remove(map, start, end, entry_list)
* we want to free these pages right away...
*/
if (UVM_ET_ISOBJ(entry) &&
- entry->object.uvm_obj->uo_refs == UVM_OBJ_KERN) {
-
+ UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
#ifdef DIAGNOSTIC
if (vm_map_pmap(map) != pmap_kernel())
- panic("uvm_unmap_remove: kernel object mapped by non-kernel map");
+ panic("uvm_unmap_remove: kernel object "
+ "mapped by non-kernel map");
#endif
/*
@@ -1006,40 +1024,35 @@ uvm_unmap_remove(map, start, end, entry_list)
*
* uvm_km_pgremove currently does the following:
* for pages in the kernel object in range:
- * - pmap_page_protect them out of all pmaps
+ * - drops the swap slot
* - uvm_pagefree the page
*
- * note that in case [1] the pmap_page_protect call
- * in uvm_km_pgremove may very well be redundant
- * because we have already removed the mappings
- * beforehand with pmap_remove (or pmap_kremove).
- * in the PMAP_NEW case, the pmap_page_protect call
- * may not do anything, since PMAP_NEW allows the
- * kernel to enter/remove kernel mappings without
- * bothing to keep track of the mappings (e.g. via
- * pv_entry lists). XXX: because of this, in the
- * future we should consider removing the
- * pmap_page_protect from uvm_km_pgremove some time
- * in the future.
+ * note there is version of uvm_km_pgremove() that
+ * is used for "intrsafe" objects.
*/
/*
- * remove mappings from pmap
+ * remove mappings from pmap and drop the pages
+ * from the object. offsets are always relative
+ * to vm_map_min(kernel_map).
*/
+ if (UVM_OBJ_IS_INTRSAFE_OBJECT(entry->object.uvm_obj)) {
#if defined(PMAP_NEW)
- pmap_kremove(entry->start, len);
+ pmap_kremove(entry->start, len);
#else
- pmap_remove(pmap_kernel(), entry->start,
- entry->start+len);
+ pmap_remove(pmap_kernel(), entry->start,
+ entry->start + len);
#endif
-
- /*
- * remove pages from a kernel object (offsets are
- * always relative to vm_map_min(kernel_map)).
- */
- uvm_km_pgremove(entry->object.uvm_obj,
- entry->start - vm_map_min(kernel_map),
- entry->end - vm_map_min(kernel_map));
+ uvm_km_pgremove_intrsafe(entry->object.uvm_obj,
+ entry->start - vm_map_min(kernel_map),
+ entry->end - vm_map_min(kernel_map));
+ } else {
+ pmap_remove(pmap_kernel(), entry->start,
+ entry->start + len);
+ uvm_km_pgremove(entry->object.uvm_obj,
+ entry->start - vm_map_min(kernel_map),
+ entry->end - vm_map_min(kernel_map));
+ }
/*
* null out kernel_object reference, we've just
@@ -1842,6 +1855,121 @@ uvm_map_inherit(map, start, end, new_inheritance)
return(KERN_SUCCESS);
}
+/*
+ * uvm_map_advice: set advice code for range of addrs in map.
+ *
+ * => map must be unlocked
+ */
+
+int
+uvm_map_advice(map, start, end, new_advice)
+ vm_map_t map;
+ vaddr_t start;
+ vaddr_t end;
+ int new_advice;
+{
+ vm_map_entry_t entry, temp_entry;
+ UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)",
+ map, start, end, new_advice);
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (uvm_map_lookup_entry(map, start, &temp_entry)) {
+ entry = temp_entry;
+ UVM_MAP_CLIP_START(map, entry, start);
+ } else {
+ entry = temp_entry->next;
+ }
+
+ while ((entry != &map->header) && (entry->start < end)) {
+ UVM_MAP_CLIP_END(map, entry, end);
+
+ switch (new_advice) {
+ case MADV_NORMAL:
+ case MADV_RANDOM:
+ case MADV_SEQUENTIAL:
+ /* nothing special here */
+ break;
+
+#if 0
+ case MADV_WILLNEED:
+ /* activate all these pages */
+ /* XXX */
+ /*
+ * should invent a "weak" mode for uvm_fault()
+ * which would only do the PGO_LOCKED pgo_get().
+ */
+ break;
+
+ case MADV_DONTNEED:
+ /* deactivate this page */
+ /* XXX */
+ /*
+ * vm_page_t p;
+ * uvm_lock_pageq();
+ * for (p in each page)
+ * if (not_wired)
+ * uvm_pagedeactivate(p);
+ * uvm_unlock_pageq();
+ */
+ break;
+
+ case MADV_SPACEAVAIL:
+ /*
+ * XXXMRG
+ * what is this? i think: "ensure that we have
+ * allocated backing-store for these pages". this
+ * is going to require changes in the page daemon,
+ * as it will free swap space allocated to pages in
+ * core. there's also what to do for
+ * device/file/anonymous memory..
+ */
+ break;
+
+ case MADV_GARBAGE:
+ /* pages are `empty' and can be garbage collected */
+ /* XXX */
+ /*
+ * (perhaps MADV_FREE? check freebsd's MADV_FREE).
+ *
+ * need to do this:
+ * - clear all the referenced and modified bits on
+ * the pages,
+ * - delete any backing store,
+ * - mark the page as `recycable'.
+ *
+ * So, if you start paging, the pages would be thrown out
+ * and then zero-filled the next time they're used.
+ * Otherwise you'd just reuse them directly. Once the
+ * page has been modified again, it would no longer be
+ * recyclable. That way, malloc() can just tell the
+ * system when pages are `empty'; if memory is needed,
+ * they'll be tossed; if memory is not needed, there
+ * will be no additional overhead.
+ */
+ break;
+#endif
+
+ default:
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+ return (KERN_INVALID_ARGUMENT);
+ }
+
+
+ entry->advice = new_advice;
+
+ entry = entry->next;
+ }
+
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
+ return (KERN_SUCCESS);
+}
+
/*
* uvm_map_pageable: sets the pageability of a range in a map.
*
@@ -1864,6 +1992,11 @@ uvm_map_pageable(map, start, end, new_pageable)
UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
map, start, end, new_pageable);
+#ifdef DIAGNOSTIC
+ if ((map->flags & VM_MAP_PAGEABLE) == 0)
+ panic("uvm_map_pageable: map %p not pageable", map);
+#endif
+
vm_map_lock(map);
VM_MAP_RANGE_CHECK(map, start, end);
@@ -1913,10 +2046,11 @@ uvm_map_pageable(map, start, end, new_pageable)
* now decrement the wiring count for each region. if a region
* becomes completely unwired, unwire its physical pages and
* mappings.
+ *
+ * Note, uvm_fault_unwire() (called via uvm_map_entry_unwire())
+ * does not lock the map, so we don't have to do anything
+ * special regarding locking here.
*/
-#if 0 /* not necessary: uvm_fault_unwire does not lock */
- lock_set_recursive(&map->lock);
-#endif /* XXXCDC */
entry = start_entry;
while ((entry != &map->header) && (entry->start < end)) {
@@ -1928,9 +2062,6 @@ uvm_map_pageable(map, start, end, new_pageable)
entry = entry->next;
}
-#if 0 /* XXXCDC: not necessary, see above */
- lock_clear_recursive(&map->lock);
-#endif
vm_map_unlock(map);
UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
return(KERN_SUCCESS);
@@ -2015,62 +2146,53 @@ uvm_map_pageable(map, start, end, new_pageable)
/*
* Pass 2.
*/
- /*
- * HACK HACK HACK HACK
- *
- * if we are wiring in the kernel map or a submap of it, unlock the
- * map to avoid deadlocks. we trust that the kernel threads are
- * well-behaved, and therefore will not do anything destructive to
- * this region of the map while we have it unlocked. we cannot
- * trust user threads to do the same.
- *
- * HACK HACK HACK HACK
- */
- if (vm_map_pmap(map) == pmap_kernel()) {
- vm_map_unlock(map); /* trust me ... */
- } else {
- vm_map_set_recursive(&map->lock);
- lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc /*XXX*/);
- }
+
+ vm_map_downgrade(map);
rv = 0;
entry = start_entry;
while (entry != &map->header && entry->start < end) {
- /*
- * if uvm_fault_wire fails for any page we need to undo what has
- * been done. we decrement the wiring count for those pages
- * which have not yet been wired (now) and unwire those that
- * have * (later).
- *
- * XXX this violates the locking protocol on the map, needs to
- * be fixed. [because we only have a read lock on map we
- * shouldn't be changing wired_count?]
- */
- if (rv) {
- entry->wired_count--;
- } else if (entry->wired_count == 1) {
- rv = uvm_fault_wire(map, entry->start, entry->end);
+ if (entry->wired_count == 1) {
+ rv = uvm_fault_wire(map, entry->start, entry->end,
+ entry->protection);
if (rv) {
- failed = entry->start;
- entry->wired_count--;
+ /*
+ * wiring failed. break out of the loop.
+ * we'll clean up the map below, once we
+ * have a write lock again.
+ */
+ break;
}
}
entry = entry->next;
}
- if (vm_map_pmap(map) == pmap_kernel()) {
- vm_map_lock(map); /* relock */
- } else {
- vm_map_clear_recursive(&map->lock);
- }
-
if (rv) { /* failed? */
+ /*
+ * Get back to an exclusive (write) lock.
+ */
+ vm_map_upgrade(map);
+
+ /*
+ * first drop the wiring count on all the entries
+ * which haven't actually been wired yet.
+ */
+ failed = entry->start;
+ while (entry != &map->header && entry->start < end)
+ entry->wired_count--;
+
+ /*
+ * now, unlock the map, and unwire all the pages that
+ * were successfully wired above.
+ */
vm_map_unlock(map);
(void) uvm_map_pageable(map, start, failed, TRUE);
UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
return(rv);
}
- vm_map_unlock(map);
+
+ /* We are holding a read lock here. */
+ vm_map_unlock_read(map);
UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
return(KERN_SUCCESS);
@@ -2255,7 +2377,7 @@ uvmspace_init(vm, pmap, min, max, pageable)
bzero(vm, sizeof(*vm));
- uvm_map_setup(&vm->vm_map, min, max, pageable);
+ uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0);
if (pmap)
pmap_reference(pmap);
@@ -2366,7 +2488,7 @@ uvmspace_exec(p)
* for p
*/
nvm = uvmspace_alloc(map->min_offset, map->max_offset,
- map->entries_pageable);
+ (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
/*
@@ -2472,7 +2594,7 @@ uvmspace_fork(vm1)
vm_map_lock(old_map);
vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
- old_map->entries_pageable);
+ (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
(caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
new_map = &vm2->vm_map; /* XXX */
@@ -2801,8 +2923,9 @@ uvm_map_printit(map, full, pr)
vm_map_entry_t entry;
(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
- (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d\n",
- map->nentries, map->size, map->ref_count, map->timestamp);
+ (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n",
+ map->nentries, map->size, map->ref_count, map->timestamp,
+ map->flags);
#ifdef pmap_resident_count
(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
pmap_resident_count(map->pmap));
@@ -2855,7 +2978,7 @@ uvm_object_printit(uobj, full, pr)
(*pr)("OBJECT %p: pgops=%p, npages=%d, ", uobj, uobj->pgops,
uobj->uo_npages);
- if (uobj->uo_refs == UVM_OBJ_KERN)
+ if (UVM_OBJ_IS_KERN_OBJECT(uobj))
(*pr)("refs=<SYSTEM>\n");
else
(*pr)("refs=%d\n", uobj->uo_refs);
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index c4ee5711acb..b58b21ac459 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_map.h,v 1.11 1999/03/25 18:48:52 mrg Exp $ */
+/* $NetBSD: uvm_map.h,v 1.14 1999/05/26 19:16:36 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -138,7 +138,7 @@ void uvm_map_clip_end __P((vm_map_t, vm_map_entry_t,
vaddr_t));
MAP_INLINE
vm_map_t uvm_map_create __P((pmap_t, vaddr_t,
- vaddr_t, boolean_t));
+ vaddr_t, int));
int uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t,
vm_map_t, vaddr_t *, int));
vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t,
@@ -146,6 +146,7 @@ vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t,
boolean_t));
int uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t,
vm_inherit_t));
+int uvm_map_advice __P((vm_map_t, vaddr_t, vaddr_t, int));
void uvm_map_init __P((void));
boolean_t uvm_map_lookup_entry __P((vm_map_t, vaddr_t,
vm_map_entry_t *));
@@ -156,7 +157,7 @@ int uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t,
int uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t,
vaddr_t *));
void uvm_map_setup __P((vm_map_t, vaddr_t,
- vaddr_t, boolean_t));
+ vaddr_t, int));
int uvm_map_submap __P((vm_map_t, vaddr_t,
vaddr_t, vm_map_t));
MAP_INLINE
diff --git a/sys/uvm/uvm_map_i.h b/sys/uvm/uvm_map_i.h
index e56ba28e5e9..85ca2a72a43 100644
--- a/sys/uvm/uvm_map_i.h
+++ b/sys/uvm/uvm_map_i.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_map_i.h,v 1.11 1999/03/25 18:48:53 mrg Exp $ */
+/* $NetBSD: uvm_map_i.h,v 1.14 1999/06/04 23:38:42 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -84,15 +84,18 @@
*/
MAP_INLINE vm_map_t
-uvm_map_create(pmap, min, max, pageable)
+uvm_map_create(pmap, min, max, flags)
pmap_t pmap;
vaddr_t min, max;
- boolean_t pageable;
+ int flags;
{
vm_map_t result;
- MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK);
- uvm_map_setup(result, min, max, pageable);
+ MALLOC(result, vm_map_t,
+ (flags & VM_MAP_INTRSAFE) ? sizeof(struct vm_map_intrsafe) :
+ sizeof(struct vm_map),
+ M_VMMAP, M_WAITOK);
+ uvm_map_setup(result, min, max, flags);
result->pmap = pmap;
return(result);
}
@@ -104,10 +107,10 @@ uvm_map_create(pmap, min, max, pageable)
*/
MAP_INLINE void
-uvm_map_setup(map, min, max, pageable)
+uvm_map_setup(map, min, max, flags)
vm_map_t map;
vaddr_t min, max;
- boolean_t pageable;
+ int flags;
{
map->header.next = map->header.prev = &map->header;
@@ -116,13 +119,26 @@ uvm_map_setup(map, min, max, pageable)
map->ref_count = 1;
map->min_offset = min;
map->max_offset = max;
- map->entries_pageable = pageable;
+ map->flags = flags;
map->first_free = &map->header;
map->hint = &map->header;
map->timestamp = 0;
lockinit(&map->lock, PVM, "thrd_sleep", 0, 0);
simple_lock_init(&map->ref_lock);
simple_lock_init(&map->hint_lock);
+
+ /*
+ * If the map is interrupt safe, place it on the list
+ * of interrupt safe maps, for uvm_fault().
+ */
+ if (flags & VM_MAP_INTRSAFE) {
+ struct vm_map_intrsafe *vmi = (struct vm_map_intrsafe *)map;
+ int s;
+
+ s = vmi_list_lock();
+ LIST_INSERT_HEAD(&vmi_list, vmi, vmi_list);
+ vmi_list_unlock(s);
+ }
}
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 4d78b3a3993..75b1d162648 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_mmap.c,v 1.19 1999/03/25 18:48:53 mrg Exp $ */
+/* $NetBSD: uvm_mmap.c,v 1.21 1999/05/23 06:27:13 mrg Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -120,28 +120,6 @@ sys_sstk(p, v, retval)
}
/*
- * sys_madvise: give advice about memory usage.
- */
-
-/* ARGSUSED */
-int
-sys_madvise(p, v, retval)
- struct proc *p;
- void *v;
- register_t *retval;
-{
-#if 0
- struct sys_madvise_args /* {
- syscallarg(caddr_t) addr;
- syscallarg(size_t) len;
- syscallarg(int) behav;
- } */ *uap = v;
-#endif
-
- return (ENOSYS);
-}
-
-/*
* sys_mincore: determine if pages are in core or not.
*/
@@ -696,6 +674,51 @@ sys_minherit(p, v, retval)
}
/*
+ * sys_madvise: give advice about memory usage.
+ */
+
+/* ARGSUSED */
+int
+sys_madvise(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_madvise_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ syscallarg(int) behav;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ int advice;
+
+ addr = (vaddr_t)SCARG(uap, addr);
+ size = (vsize_t)SCARG(uap, len);
+ advice = SCARG(uap, behav);
+
+ /*
+ * align the address to a page boundary, and adjust the size accordingly
+ */
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+
+ if ((int)size < 0)
+ return (EINVAL);
+
+ switch (uvm_map_advice(&p->p_vmspace->vm_map, addr, addr+size,
+ advice)) {
+ case KERN_SUCCESS:
+ return (0);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ }
+ return (EINVAL);
+}
+
+/*
* sys_mlock: memory lock
*/
diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h
index c45dd262a6b..294e3624e9e 100644
--- a/sys/uvm/uvm_object.h
+++ b/sys/uvm/uvm_object.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_object.h,v 1.5 1998/03/09 00:58:58 mrg Exp $ */
+/* $NetBSD: uvm_object.h,v 1.8 1999/05/25 20:30:09 thorpej Exp $ */
/*
*
@@ -64,7 +64,22 @@ struct uvm_object {
* for kernel objects... when a kernel object is unmapped we always want
* to free the resources associated with the mapping. UVM_OBJ_KERN
* allows us to decide which type of unmapping we want to do.
+ *
+ * in addition, we have kernel objects which may be used in an
+ * interrupt context. these objects get their mappings entered
+ * with pmap_kenter*() and removed with pmap_kremove(), which
+ * are safe to call in interrupt context, and must be used ONLY
+ * for wired kernel mappings in these objects and their associated
+ * maps.
*/
-#define UVM_OBJ_KERN (-2)
+#define UVM_OBJ_KERN (-2)
+#define UVM_OBJ_KERN_INTRSAFE (-3)
+
+#define UVM_OBJ_IS_KERN_OBJECT(uobj) \
+ ((uobj)->uo_refs == UVM_OBJ_KERN || \
+ (uobj)->uo_refs == UVM_OBJ_KERN_INTRSAFE)
+
+#define UVM_OBJ_IS_INTRSAFE_OBJECT(uobj) \
+ ((uobj)->uo_refs == UVM_OBJ_KERN_INTRSAFE)
#endif /* _UVM_UVM_OBJECT_H_ */
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index fa85122f307..c60017de35b 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_page.c,v 1.19 1999/05/20 20:07:55 thorpej Exp $ */
+/* $NetBSD: uvm_page.c,v 1.23 1999/05/25 01:34:13 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -847,9 +847,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
panic("uvm_pagealloc: obj and anon != NULL");
#endif
- s = splimp();
-
- uvm_lock_fpageq(); /* lock free page queue */
+ s = uvm_lock_fpageq(); /* lock free page queue */
/*
* check to see if we need to generate some free pages waking
@@ -870,7 +868,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
*/
use_reserve = (flags & UVM_PGA_USERESERVE) ||
- (obj && obj->uo_refs == UVM_OBJ_KERN);
+ (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
(uvmexp.free <= uvmexp.reserve_pagedaemon &&
!(use_reserve && curproc == uvm.pagedaemon_proc)))
@@ -919,8 +917,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
TAILQ_REMOVE(freeq, pg, pageq);
uvmexp.free--;
- uvm_unlock_fpageq(); /* unlock free page queue */
- splx(s);
+ uvm_unlock_fpageq(s); /* unlock free page queue */
pg->offset = off;
pg->uobject = obj;
@@ -945,8 +942,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
return(pg);
fail:
- uvm_unlock_fpageq();
- splx(s);
+ uvm_unlock_fpageq(s);
return (NULL);
}
@@ -1137,8 +1133,7 @@ struct vm_page *pg;
* and put on free queue
*/
- s = splimp();
- uvm_lock_fpageq();
+ s = uvm_lock_fpageq();
TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)],
pg, pageq);
pg->pqflags = PQ_FREE;
@@ -1148,8 +1143,7 @@ struct vm_page *pg;
pg->uanon = (void *)0xdeadbeef;
#endif
uvmexp.free++;
- uvm_unlock_fpageq();
- splx(s);
+ uvm_unlock_fpageq(s);
}
#if defined(UVM_PAGE_TRKOWN)
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index 621bb01d9a1..09b4c635a65 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_page.h,v 1.10 1998/08/13 02:11:02 eeh Exp $ */
+/* $NetBSD: uvm_page.h,v 1.12 1999/05/24 19:10:57 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -79,8 +79,6 @@
#define uvm_lock_pageq() simple_lock(&uvm.pageqlock)
#define uvm_unlock_pageq() simple_unlock(&uvm.pageqlock)
-#define uvm_lock_fpageq() simple_lock(&uvm.fpageqlock)
-#define uvm_unlock_fpageq() simple_unlock(&uvm.fpageqlock)
#define uvm_pagehash(obj,off) \
(((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask)
@@ -108,6 +106,9 @@ boolean_t uvm_page_physget __P((paddr_t *));
#endif
void uvm_page_rehash __P((void));
+PAGE_INLINE int uvm_lock_fpageq __P((void));
+PAGE_INLINE void uvm_unlock_fpageq __P((int));
+
PAGE_INLINE void uvm_pageactivate __P((struct vm_page *));
vaddr_t uvm_pageboot_alloc __P((vsize_t));
PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *));
diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h
index 4691e0806f8..9fc1d2da91b 100644
--- a/sys/uvm/uvm_page_i.h
+++ b/sys/uvm/uvm_page_i.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_page_i.h,v 1.8 1998/08/13 02:11:02 eeh Exp $ */
+/* $NetBSD: uvm_page_i.h,v 1.10 1999/05/24 19:10:57 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -80,6 +80,40 @@
#if defined(UVM_PAGE_INLINE) || defined(UVM_PAGE)
/*
+ * uvm_lock_fpageq: lock the free page queue
+ *
+ * => free page queue can be accessed in interrupt context, so this
+ * blocks all interrupts that can cause memory allocation, and
+ * returns the previous interrupt level.
+ */
+
+PAGE_INLINE int
+uvm_lock_fpageq()
+{
+ int s;
+
+ s = splimp();
+ simple_lock(&uvm.fpageqlock);
+ return (s);
+}
+
+/*
+ * uvm_unlock_fpageq: unlock the free page queue
+ *
+ * => caller must supply interrupt level returned by uvm_lock_fpageq()
+ * so that it may be restored.
+ */
+
+PAGE_INLINE void
+uvm_unlock_fpageq(s)
+ int s;
+{
+
+ simple_unlock(&uvm.fpageqlock);
+ splx(s);
+}
+
+/*
* uvm_pagelookup: look up a page
*
* => caller should lock object to keep someone from pulling the page
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index c8050983dec..8c057a8bfdf 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pager.c,v 1.15 1999/03/25 18:48:55 mrg Exp $ */
+/* $NetBSD: uvm_pager.c,v 1.20 1999/05/26 19:16:36 thorpej Exp $ */
/*
*
@@ -88,7 +88,7 @@ uvm_pager_init()
*/
pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
- PAGER_MAP_SIZE, FALSE, FALSE, NULL);
+ PAGER_MAP_SIZE, 0, FALSE, NULL);
simple_lock_init(&pager_map_wanted_lock);
pager_map_wanted = FALSE;
@@ -113,6 +113,9 @@ uvm_pager_init()
*
* we basically just map in a blank map entry to reserve the space in the
* map and then use pmap_enter() to put the mappings in by hand.
+ *
+ * XXX It would be nice to know the direction of the I/O, so that we can
+ * XXX map only what is necessary.
*/
vaddr_t
@@ -169,6 +172,11 @@ ReStart:
panic("uvm_pagermapin: page not busy");
#endif
+ /*
+ * XXX VM_PROT_DEFAULT includes VM_PROT_EXEC; is that
+ * XXX really necessary? It could lead to unnecessary
+ * XXX instruction cache flushes.
+ */
pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp),
VM_PROT_DEFAULT, TRUE,
VM_PROT_READ | VM_PROT_WRITE);
@@ -698,8 +706,6 @@ int swblk; /* valid if (uobj == NULL && PGO_REALLOCSWAP) */
* had a successful pageout update the page!
*/
if (flags & PGO_PDFREECLUST) {
- /* XXX: with PMAP_NEW ref should already be clear,
- * but don't trust! */
pmap_clear_reference(PMAP_PGARG(ppsp[lcv]));
pmap_clear_modify(PMAP_PGARG(ppsp[lcv]));
ppsp[lcv]->flags |= PG_CLEAN;
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index c68355c7b7b..b3788da4d09 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pdaemon.c,v 1.14 1999/03/26 17:33:30 chs Exp $ */
+/* $NetBSD: uvm_pdaemon.c,v 1.16 1999/05/24 19:10:57 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -365,11 +365,9 @@ uvmpd_scan_inactive(pglst)
* update our copy of "free" and see if we've met
* our target
*/
- s = splimp();
- uvm_lock_fpageq();
+ s = uvm_lock_fpageq();
free = uvmexp.free;
- uvm_unlock_fpageq();
- splx(s);
+ uvm_unlock_fpageq(s);
if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
dirtyreacts == UVMPD_NUMDIRTYREACTS) {
@@ -952,11 +950,9 @@ uvmpd_scan()
/*
* get current "free" page count
*/
- s = splimp();
- uvm_lock_fpageq();
+ s = uvm_lock_fpageq();
free = uvmexp.free;
- uvm_unlock_fpageq();
- splx(s);
+ uvm_unlock_fpageq(s);
#ifndef __SWAP_BROKEN
/*
diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h
index 4590f1cef10..56ea153296e 100644
--- a/sys/uvm/uvm_pdaemon.h
+++ b/sys/uvm/uvm_pdaemon.h
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pdaemon.h,v 1.5 1998/02/10 14:12:28 mrg Exp $ */
+/* $NetBSD: uvm_pdaemon.h,v 1.6 1999/03/25 18:48:56 mrg Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
index 042ab2b8749..c24125e18ec 100644
--- a/sys/uvm/uvm_pglist.c
+++ b/sys/uvm/uvm_pglist.c
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pglist.c,v 1.5.2.1 1998/07/30 14:04:15 eeh Exp $ */
+/* $NetBSD: uvm_pglist.c,v 1.7 1999/05/24 19:10:58 thorpej Exp $ */
#define VM_PAGE_ALLOC_MEMORY_STATS
@@ -136,8 +136,7 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
/*
* Block all memory allocation and lock the free list.
*/
- s = splimp();
- uvm_lock_fpageq(); /* lock free page queue */
+ s = uvm_lock_fpageq(); /* lock free page queue */
/* Are there even any free pages? */
for (idx = 0; idx < VM_NFREELIST; idx++)
@@ -238,8 +237,7 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
error = 0;
out:
- uvm_unlock_fpageq();
- splx(s);
+ uvm_unlock_fpageq(s);
/*
* check to see if we need to generate some free pages waking
@@ -271,8 +269,7 @@ uvm_pglistfree(list)
/*
* Block all memory allocation and lock the free list.
*/
- s = splimp();
- uvm_lock_fpageq();
+ s = uvm_lock_fpageq();
while ((m = list->tqh_first) != NULL) {
#ifdef DIAGNOSTIC
@@ -287,6 +284,5 @@ uvm_pglistfree(list)
STAT_DECR(uvm_pglistalloc_npages);
}
- uvm_unlock_fpageq();
- splx(s);
+ uvm_unlock_fpageq(s);
}
diff --git a/sys/uvm/uvm_uio.c b/sys/uvm/uvm_uio.c
new file mode 100644
index 00000000000..84ef108b2b5
--- /dev/null
+++ b/sys/uvm/uvm_uio.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 1999 Artur Grabowski <art@openbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of his contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_uio.h>
+
+int uvm_uio_enable = 1;
+int uvm_uio_num_try = 0;
+int uvm_uio_num_success = 0;
+
+/*
+ * m_ext functions.
+ */
+void uvm_mbuf_free __P((struct mbuf *));
+void uvm_mbuf_ref __P((struct mbuf *));
+
+/*
+ * returns the length of I/O, 0 on failure.
+ *
+ * Should not be called if UVM_UIO_TRY(uio) has been checked first.
+ */
+size_t
+uvm_uio_to_mbuf(uio, mbuf)
+ struct uio *uio;
+ struct mbuf *mbuf;
+{
+ struct vm_map *map;
+ vaddr_t realbase, base, kva;
+ vsize_t reallen, len, offset;
+ struct vm_page **pages;
+ int npages;
+ struct iovec *iov;
+ struct uvm_mbuf *um;
+ struct mbuf *m;
+#ifndef PMAP_NEW
+ int i;
+#endif
+
+ uvm_uio_num_try++;
+
+ if ((mbuf->m_flags & M_EXT)) {
+ printf("uvm_uio_to_mbuf: fail 1\n");
+ return 0;
+ }
+
+ map = &uio->uio_procp->p_vmspace->vm_map;
+ iov = uio->uio_iov;
+
+ /*
+ * XXX - check if iov_len is bigger than max vsize_t
+ */
+
+ reallen = (vsize_t)iov->iov_len;
+ realbase = (vaddr_t)iov->iov_base;
+
+ /*
+ * Check alignment.
+ *
+ * What we really want is to somehow tell the caller how much the
+ * uios should be adjusted and try again.
+ */
+ if ((realbase & (sizeof(long) - 1)) != 0) {
+ printf("uvm_uio_to_mbuf: not aligned\n");
+ return 0;
+ }
+
+ base = trunc_page(realbase);
+ offset = realbase - base;
+
+ /*
+ * truncate reallen here so that we won't do a huge malloc.
+ * Subtract offset so that the next round will be page aligned.
+ */
+ if (reallen > UVM_UIO_LIMIT)
+ reallen = UVM_UIO_LIMIT - offset;
+
+ len = reallen + offset;
+ len = round_page(len);
+ npages = atop(len);
+
+ if ((mbuf->m_flags & M_PKTHDR)) {
+
+ MGET(m, M_WAIT, MT_DATA);
+ mbuf->m_len = 0;
+ mbuf->m_next = m;
+ } else {
+ m = mbuf;
+ m->m_next = NULL;
+ }
+
+ MALLOC(um, struct uvm_mbuf *, sizeof(struct uvm_mbuf), M_TEMP,
+ M_WAITOK);
+
+ /*
+ * If the pages we have less than UVM_UIO_SMALL_PAGES, we can fit
+ * them into the pages struct in uvm_uio.
+ */
+ if (npages > UVM_UIO_SMALL_PAGES)
+ MALLOC(pages, struct vm_page **,
+ npages * sizeof(struct vm_page *), M_TEMP, M_WAITOK);
+ else
+ pages = um->um_pages_small;
+
+ /*
+ * Loan the pages we want.
+ */
+ if (uvm_loan(map, base, len, (void **)pages, UVM_LOAN_TOPAGE) !=
+ KERN_SUCCESS) {
+ /*
+ * XXX - This is really ENOMEM or EFAULT.
+ */
+ printf("uvm_uio_to_mbuf: loan failed\n");
+
+ goto fail;
+ }
+
+ /*
+ * Allocate space to map pages.
+ */
+ kva = vm_map_min(kernel_map);
+ if (uvm_map(kernel_map, &kva, len, NULL, UVM_UNKNOWN_OFFSET,
+ UVM_MAPFLAG(UVM_PROT_READ, UVM_PROT_READ, UVM_INH_NONE,
+ UVM_ADV_SEQUENTIAL, 0)) != KERN_SUCCESS) {
+ uvm_unloanpage(pages, npages);
+ goto fail;
+ }
+
+ /*
+ * Initialize um.
+ */
+ um->um_pages = pages;
+ um->um_npages = npages;
+ um->um_usecount = 1;
+ um->um_kva = kva;
+
+ printf("mapping: 0x%x -> 0x%x\n", kva, kva + len);
+ /*
+ * Map pages.
+ */
+#ifdef PMAP_NEW
+ pmap_kenter_pgs(kva, pages, npages);
+#else
+ for (i = 0; i < npages; i++, kva += PAGE_SIZE)
+ pmap_enter(pmap_kernel(), kva, VM_PAGE_TO_PHYS(pages[i]),
+ VM_PROT_READ, TRUE, VM_PROT_READ);
+#endif
+
+ /*
+ * Update mbuf.
+ */
+ m->m_flags |= M_EXT | M_RONLY;
+ m->m_data = (caddr_t)(um->um_kva + offset);
+ m->m_len = reallen;
+ m->m_ext.ext_free = uvm_mbuf_free;
+ m->m_ext.ext_ref = uvm_mbuf_ref;
+ /*
+ * We lie about those two to avoid problems with someone trying
+ * to prepend data.
+ */
+ m->m_ext.ext_buf = (caddr_t)(um->um_kva + offset);
+ m->m_ext.ext_size = reallen;
+ m->m_ext.ext_handle = um;
+
+ /*
+ * Update uio.
+ */
+ if ((iov->iov_len -= reallen) == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ }
+ uio->uio_resid -= reallen;
+
+ uvm_uio_num_success++;
+
+ return reallen;
+fail:
+ if (npages > UVM_UIO_SMALL_PAGES)
+ FREE(pages, M_TEMP);
+
+ if (m != mbuf)
+ m_freem(m);
+
+ FREE(um, M_TEMP);
+
+ return 0;
+}
+
+void
+uvm_mbuf_free(mb)
+ struct mbuf *mb;
+{
+ struct uvm_mbuf *um = (struct uvm_mbuf *)mb->m_ext.ext_handle;
+ vsize_t len;
+
+ if (--um->um_usecount)
+ return;
+
+ len = ptoa(um->um_npages);
+
+ printf("unmapping: 0x%x -> 0x%x\n", um->um_kva, um->um_kva + len);
+#ifdef PMAP_NEW
+ pmap_kremove(um->um_kva, len);
+#else
+ pmap_remove(pmap_kernel(), um->um_kva, um->um_kva + len);
+#endif
+
+ uvm_unloanpage(um->um_pages, um->um_npages);
+ uvm_unmap(kernel_map, um->um_kva, um->um_kva + len);
+ uvm_km_free_wakeup(kernel_map, um->um_kva, len);
+ if (um->um_npages > UVM_UIO_SMALL_PAGES)
+ FREE(um->um_pages, M_TEMP);
+
+ FREE(um, M_TEMP);
+#ifdef DIAGNOSTIC
+ mb->m_data = NULL;
+ mb->m_ext.ext_handle = NULL;
+ mb->m_flags &= ~M_EXT;
+#endif
+}
+
+void
+uvm_mbuf_ref(mb)
+ struct mbuf *mb;
+{
+ ((struct uvm_mbuf *)mb->m_ext.ext_handle)->um_usecount++;
+}
diff --git a/sys/uvm/uvm_uio.h b/sys/uvm/uvm_uio.h
new file mode 100644
index 00000000000..4a8ce974664
--- /dev/null
+++ b/sys/uvm/uvm_uio.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1999 Artur Grabowski <art@openbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of his contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * If the number of pages we're about to transfer is smaller than this number
+ * we use the pre-allocated array.
+ */
+#define UVM_UIO_SMALL_PAGES 8
+
+/*
+ * Limit transfers to this number to avoid running out of memory.
+ */
+#define UVM_UIO_LIMIT (256 * PAGE_SIZE)
+
+/*
+ * m_ext structure.
+ */
+struct uvm_mbuf {
+ struct vm_page **um_pages; /* The pages */
+ int um_npages; /* number of pages */
+ int um_usecount; /* ref cnt */
+ vaddr_t um_kva; /* where the pages are mapped */
+ struct vm_page *um_pages_small[UVM_UIO_SMALL_PAGES];
+};
+
+extern int uvm_uio_enable;
+
+#define UVM_UIO_MINIO PAGE_SIZE /* XXX - tweak */
+#define UVM_UIO_TRY(uio) (uvm_uio_enable && \
+ ((uio)->uio_iov->iov_len >= UVM_UIO_MINIO) && \
+ ((uio)->uio_procp != NULL) && \
+ ((uio)->uio_rw == UIO_WRITE) && \
+ ((uio)->uio_segflg == UIO_USERSPACE))
+
+size_t uvm_uio_to_mbuf __P((struct uio *, struct mbuf *));