18 files changed, 4153 insertions, 1680 deletions
diff --git a/sys/arch/i386/conf/GENERIC_NEW b/sys/arch/i386/conf/GENERIC_NEW
new file mode 100644
index 00000000000..f00ee814461
--- /dev/null
+++ b/sys/arch/i386/conf/GENERIC_NEW
@@ -0,0 +1,7 @@
+#	$OpenBSD: GENERIC_NEW,v 1.1 2001/03/22 23:36:50 niklas Exp $
+#
+#	GENERIC_NEW -- everything that's currently supported + PMAP_NEW
+#
+
+include "arch/i386/conf/GENERIC"
+option		PMAP_NEW	# use new pmap
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386
index b7f1b5c20ba..836403bf5e5 100644
--- a/sys/arch/i386/conf/files.i386
+++ b/sys/arch/i386/conf/files.i386
@@ -1,4 +1,4 @@
-#	$OpenBSD: files.i386,v 1.77 2001/03/05 15:13:43 aaron Exp $
+#	$OpenBSD: files.i386,v 1.78 2001/03/22 23:36:51 niklas Exp $
 #	$NetBSD: files.i386,v 1.73 1996/05/07 00:58:36 thorpej Exp $
 #
 # new style config file for i386 architecture
@@ -30,7 +30,8 @@ file	arch/i386/i386/i686_mem.c	mtrr
 file	arch/i386/i386/k6_mem.c		mtrr
 file	arch/i386/i386/microtime.s
 file	arch/i386/i386/ns_cksum.c	ns
-file	arch/i386/i386/pmap.c
+file	arch/i386/i386/pmap.c		pmap_new
+file	arch/i386/i386/pmap.old.c	!pmap_new
 file	arch/i386/i386/process_machdep.c
 file	arch/i386/i386/random.s
 file	arch/i386/i386/sys_machdep.c
diff --git a/sys/arch/i386/i386/db_memrw.c b/sys/arch/i386/i386/db_memrw.c
index cfa980a2ef2..18335b985e5 100644
--- a/sys/arch/i386/i386/db_memrw.c
+++ b/sys/arch/i386/i386/db_memrw.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: db_memrw.c,v 1.1 2000/07/05 14:26:34 hugh Exp $	*/
+/*	$OpenBSD: db_memrw.c,v 1.2 2001/03/22 23:36:51 niklas Exp $	*/
 /*	$NetBSD: db_memrw.c,v 1.6 1999/04/12 20:38:19 pk Exp $	*/
 
 /* 
@@ -60,7 +60,9 @@ db_read_bytes(addr, size, data)
 		*data++ = *src++;
 }
 
+#ifndef PMAP_NEW
 pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t));
+#endif
 
 /*
  * Write bytes to kernel address space for debugger.
@@ -82,14 +84,22 @@ db_write_bytes(addr, size, data)
 
 	if (addr >= VM_MIN_KERNEL_ADDRESS &&
 	    addr < (vm_offset_t)&etext) {
+#ifdef PMAP_NEW
+		ptep0 = PTE_BASE + i386_btop(addr);
+#else
 		ptep0 = pmap_pte(pmap_kernel(), addr);
+#endif
 		oldmap0 = *ptep0;
 		*(int *)ptep0 |= /* INTEL_PTE_WRITE */ PG_RW;
 
 		addr1 = i386_trunc_page(addr + size - 1);
 		if (i386_trunc_page(addr) != addr1) {
 			/* data crosses a page boundary */
+#ifdef PMAP_NEW
+			ptep1 = PTE_BASE + i386_btop(addr1);
+#else
 			ptep1 = pmap_pte(pmap_kernel(), addr1);
+#endif
 			oldmap1 = *ptep1;
 			*(int *)ptep1 |= /* INTEL_PTE_WRITE */ PG_RW;
 		}
diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c
index 0ff2b7c30a1..4df49b135e5 100644
--- a/sys/arch/i386/i386/gdt.c
+++ b/sys/arch/i386/i386/gdt.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: gdt.c,v 1.11 1999/02/26 04:32:36 art Exp $	*/
+/*	$OpenBSD: gdt.c,v 1.12 2001/03/22 23:36:51 niklas Exp $	*/
 /*	$NetBSD: gdt.c,v 1.8 1996/05/03 19:42:06 christos Exp $	*/
 
 /*-
@@ -307,8 +307,13 @@ tss_free(pcb)
 }
 
 void
+#ifdef PMAP_NEW
+ldt_alloc(pmap, ldt, len)
+	struct pmap *pmap;
+#else
 ldt_alloc(pcb, ldt, len)
 	struct pcb *pcb;
+#endif
 	union descriptor *ldt;
 	size_t len;
 {
@@ -317,13 +322,33 @@ ldt_alloc(pcb, ldt, len)
 	slot = gdt_get_slot();
 	setsegment(&dynamic_gdt[slot].sd, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0,
 	    0);
+#ifdef PMAP_NEW
+	simple_lock(&pmap->pm_lock);
+	pmap->pm_ldt_sel = GSEL(slot, SEL_KPL);
+	simple_unlock(&pmap->pm_lock);
+#else
 	pcb->pcb_ldt_sel = GSEL(slot, SEL_KPL);
+#endif
 }
 
 void
+#ifdef PMAP_NEW
+ldt_free(pmap)
+	struct pmap *pmap;
+#else
 ldt_free(pcb)
 	struct pcb *pcb;
+#endif
 {
+	int slot;
+
+#ifdef PMAP_NEW
+	simple_lock(&pmap->pm_lock);
+	slot = IDXSEL(pmap->pm_ldt_sel);
+	simple_unlock(&pmap->pm_lock);
+#else
+	slot = IDXSEL(pcb->pcb_ldt_sel);
+#endif
 
-	gdt_put_slot(IDXSEL(pcb->pcb_ldt_sel));
+	gdt_put_slot(slot);
 }
diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf
index a5f2962be43..d6c8d82511c 100644
--- a/sys/arch/i386/i386/genassym.cf
+++ b/sys/arch/i386/i386/genassym.cf
@@ -1,4 +1,4 @@
-#	$OpenBSD: genassym.cf,v 1.7 1999/02/26 04:28:50 art Exp $
+#	$OpenBSD: genassym.cf,v 1.8 2001/03/22 23:36:51 niklas Exp $
 #
 # Copyright (c) 1982, 1990 The Regents of the University of California.
 # All rights reserved.
@@ -73,9 +73,16 @@ endif
 define	SRUN		SRUN
 
 # values for page tables
+ifdef PMAP_NEW
+define	PDSLOT_KERN	PDSLOT_KERN
+define	PDSLOT_PTE	PDSLOT_PTE
+define	NKPTP_MIN	NKPTP_MIN
+define	NKPTP_MAX	NKPTP_MAX
+else
 define	PTDPTDI		PTDPTDI
 define	KPTDI		KPTDI
 define	NKPDE		NKPDE
+endif
 define	APTDPTDI	APTDPTDI
 
 # values for virtual memory
diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s
index 08a593267b2..f05505d6b26 100644
--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -1,4 +1,4 @@
-/*	$OpenBSD: locore.s,v 1.53 2001/01/24 09:37:58 hugh Exp $	*/
+/*	$OpenBSD: locore.s,v 1.54 2001/03/22 23:36:51 niklas Exp $	*/
 /*	$NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $	*/
 
 /*-
@@ -120,11 +120,18 @@
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
  */
-	.globl	_PTmap,_PTD,_PTDpde,_Sysmap
+	.globl	_PTmap,_PTD,_PTDpde
+#ifdef PMAP_NEW
+	.set	_PTmap,(PDSLOT_PTE << PDSHIFT)
+	.set	_PTD,(_PTmap + PDSLOT_PTE * NBPG)
+	.set	_PTDpde,(_PTD + PDSLOT_PTE * 4)		# XXX 4 == sizeof pde
+#else
 	.set	_PTmap,(PTDPTDI << PDSHIFT)
 	.set	_PTD,(_PTmap + PTDPTDI * NBPG)
 	.set	_PTDpde,(_PTD + PTDPTDI * 4)		# XXX 4 == sizeof pde
+	.globl	_Sysmap
 	.set	_Sysmap,(_PTmap + KPTDI * NBPG)
+#endif
 
 /*
  * APTmap, APTD is the alternate recursive pagemap.
@@ -421,7 +428,11 @@ try586:	/* Use the `cpuid' instruction. */
 #define	PROC0PDIR	((0)              * NBPG)
 #define	PROC0STACK	((1)              * NBPG)
 #define	SYSMAP		((1+UPAGES)       * NBPG)
+#ifdef PMAP_NEW
+#define	TABLESIZE	((1+UPAGES) * NBPG) /* + nkpde * NBPG */
+#else
 #define	TABLESIZE	((1+UPAGES+NKPDE) * NBPG)
+#endif
 
 	/* Clear the BSS. */
 	movl	$RELOC(_edata),%edi
@@ -435,24 +446,48 @@ try586:	/* Use the `cpuid' instruction. */
 	stosl
 
 	/* Find end of kernel image. */
-	movl	$RELOC(_end),%esi
+	movl	$RELOC(_end),%edi
 #if (defined(DDB) || NKSYMS > 0) && !defined(SYMTAB_SPACE)
 	/* Save the symbols (if loaded). */
 	movl	RELOC(_esym),%eax
 	testl	%eax,%eax
 	jz	1f
 	subl	$KERNBASE,%eax
-	movl	%eax,%esi
+	movl	%eax,%edi
 1:
 #endif
 
 	/* Calculate where to start the bootstrap tables. */
+	movl	%edi,%esi			# edi = esym ? esym : end
 	addl	$PGOFSET, %esi			# page align up
 	andl	$~PGOFSET, %esi
 
+#ifdef PMAP_NEW
+	/*
+	 * Calculate the size of the kernel page table directory, and
+	 * how many entries it will have.
+	 */
+	movl	RELOC(_nkpde),%ecx		# get nkpde
+	cmpl	$NKPTP_MIN,%ecx			# larger than min?
+	jge	1f
+	movl	$NKPTP_MIN,%ecx			# set at min
+	jmp	2f
+1:	cmpl	$NKPTP_MAX,%ecx			# larger than max?
+	jle	2f
+	movl	$NKPTP_MAX,%ecx
+2:	
+
+	/* Clear memory for bootstrap tables. */
+	shll	$PGSHIFT,%ecx
+	addl	$TABLESIZE,%ecx
+	addl	%esi,%ecx			# end of tables
+	subl	%edi,%ecx			# size of tables
+	shrl	$2,%ecx
+#else
 	/* Clear memory for bootstrap tables. */
 	movl	%esi, %edi
 	movl	$((TABLESIZE + 3) >> 2), %ecx	# size of tables
+#endif
 	xorl	%eax, %eax
 	cld
 	rep
@@ -496,7 +531,14 @@ try586:	/* Use the `cpuid' instruction. */
 
 	/* Map the data, BSS, and bootstrap tables read-write. */
 	leal	(PG_V|PG_KW)(%edx),%eax
+#ifdef PMAP_NEW
+	movl	RELOC(_nkpde),%ecx
+	shll	$PGSHIFT,%ecx
+	addl	$TABLESIZE,%ecx
+	addl	%esi,%ecx				# end of tables
+#else
 	leal	(TABLESIZE)(%esi),%ecx			# end of tables
+#endif
 	subl	%edx,%ecx				# subtract end of text
 	shrl	$PGSHIFT,%ecx
 	fillkpt
@@ -508,7 +550,14 @@ try586:	/* Use the `cpuid' instruction. */
 
 /*
  * Construct a page table directory.
- *
+*/
+#ifdef PMAP_NEW
+	movl	RELOC(_nkpde),%ecx			# count of pde s,
+	leal	(PROC0PDIR+0*4)(%esi),%ebx		# where temp maps!
+	leal	(SYSMAP+PG_V|PG_KW)(%esi),%eax		# pte for KPT in proc 0
+	fillkpt
+#else
+/*
  * Install a PDE for temporary double map of kernel text.
  * Maps two pages, in case the kernel is larger than 4M.
  * XXX: should the number of pages to map be decided at run-time?
@@ -519,18 +568,29 @@ try586:	/* Use the `cpuid' instruction. */
 	movl	%eax,(PROC0PDIR+1*4)(%esi)		# map it too
 	/* code below assumes %eax == sysmap physaddr, so we adjust it back */
 	subl	$NBPG, %eax
+#endif
 
 /*
  * Map kernel PDEs: this is the real mapping used 
  * after the temp mapping outlives its usefulness.
  */
+#ifdef PMAP_NEW
+	movl	RELOC(_nkpde),%ecx			# count of pde s,
+	leal	(PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx	# map them high
+	leal	(SYSMAP+PG_V|PG_KW)(%esi),%eax		# pte for KPT in proc 0
+#else
 	movl	$NKPDE,%ecx				# count of pde's
 	leal	(PROC0PDIR+KPTDI*4)(%esi),%ebx		# map them high
+#endif
 	fillkpt
 
 	/* Install a PDE recursively mapping page directory as a page table! */
 	leal	(PROC0PDIR+PG_V|PG_KW)(%esi),%eax	# pte for ptd
+#ifdef PMAP_NEW
+	movl	%eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi)	# recursive PD slot
+#else
 	movl	%eax,(PROC0PDIR+PTDPTDI*4)(%esi)	# phys addr from above
+#endif
 
 	/* Save phys. addr of PTD, for libkvm. */
 	movl	%esi,RELOC(_PTDpaddr)
@@ -548,11 +608,27 @@ try586:	/* Use the `cpuid' instruction. */
 
 begin:
 	/* Now running relocated at KERNBASE.  Remove double mapping. */
+#ifdef PMAP_NEW
+	movl	_nkpde,%ecx		# for this many pde s,
+	leal	(PROC0PDIR+0*4)(%esi),%ebx	# which is where temp maps!
+	addl	$(KERNBASE), %ebx	# now use relocated address
+1:	movl	$0,(%ebx)
+	addl	$4,%ebx	# next pde
+	loop	1b
+#else
 	movl	$0,(PROC0PDIR+0*4)(%esi)
 	movl	$0,(PROC0PDIR+1*4)(%esi)
+#endif
 
 	/* Relocate atdevbase. */
+#ifdef PMAP_NEW
+	movl	_nkpde,%edx
+	shll	$PGSHIFT,%edx
+	addl	$(TABLESIZE+KERNBASE),%edx
+	addl	%esi,%edx
+#else
 	leal	(TABLESIZE+KERNBASE)(%esi),%edx
+#endif
 	movl	%edx,_atdevbase
 
 	/* Set up bootstrap stack. */
@@ -562,7 +638,14 @@ begin:
 	movl	%esi,PCB_CR3(%eax)	# pcb->pcb_cr3
 	xorl	%ebp,%ebp               # mark end of frames
 
+#ifdef PMAP_NEW
+	movl	_nkpde,%eax
+	shll	$PGSHIFT,%eax
+	addl	$TABLESIZE,%eax
+	addl	%esi,%eax		# skip past stack and page tables
+#else
 	leal	(TABLESIZE)(%esi),%eax	# skip past stack and page tables
+#endif
 	pushl	%eax
 	call	_init386		# wire 386 chip for unix operation
 	addl	$4,%esp
diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c
index 77ca81809b5..059ff37241d 100644
--- a/sys/arch/i386/i386/machdep.c
+++ b/sys/arch/i386/i386/machdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: machdep.c,v 1.151 2001/03/16 00:24:00 deraadt Exp $	*/
+/*	$OpenBSD: machdep.c,v 1.152 2001/03/22 23:36:51 niklas Exp $	*/
 /*	$NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $	*/
 
 /*-
@@ -2003,7 +2003,9 @@ extern int IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 
 #if defined(I586_CPU)
 extern int IDTVEC(f00f_redirect);
+#ifndef PMAP_NEW
 pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t));
+#endif
 
 int cpu_f00f_bug = 0;
 
@@ -2032,7 +2034,11 @@ fix_f00f()
 		SEL_KPL, GCODE_SEL);
 
 	/* Map first page RO */
+#ifdef PMAP_NEW
+	pte = PTE_BASE + i386_btop(va);
+#else
 	pte = pmap_pte(pmap_kernel(), va);
+#endif
 	*pte &= ~PG_RW;
 
 	/* Reload idtr */
@@ -2053,6 +2059,7 @@ init386(first_avail)
 	bios_memmap_t *im;
 
 	proc0.p_addr = proc0paddr;
+	curpcb = &proc0.p_addr->u_pcb;
 
 	/*
 	 * Initialize the I/O port and I/O mem extent maps.
@@ -2422,12 +2429,14 @@ cpu_reset()
 	lidt(&region);
 	__asm __volatile("divl %0,%1" : : "q" (0), "a" (0));
 
+#if 1
 	/*
 	 * Try to cause a triple fault and watchdog reset by unmapping the
 	 * entire address space.
 	 */
 	bzero((caddr_t)PTD, NBPG);
 	pmap_update(); 
+#endif
 
 	for (;;);
 }
@@ -2685,6 +2694,9 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp)
 {
 	u_long pa, endpa;
 	vm_offset_t va;
+#ifdef PMAP_NEW
+	pt_entry_t *pte;
+#endif
 
 	pa = i386_trunc_page(bpa);
 	endpa = i386_round_page(bpa + size);
@@ -2715,10 +2727,19 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp)
 		 * on those machines.
 		 */
 		if (cpu_class != CPUCLASS_386) {
+#ifdef PMAP_NEW
+			pte = kvtopte(va);
+			if (cacheable)
+				*pte &= ~PG_N;
+			else
+				*pte |= PG_N;
+			pmap_update_pg(va);
+#else
 			if (!cacheable)
 				pmap_changebit(pa, PG_N, ~0);
 			else
 				pmap_changebit(pa, 0, ~PG_N);
+#endif
 		}
 	}
  
@@ -2881,7 +2902,7 @@ _bus_dmamap_load(t, map, buf, buflen, p, flags)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr, lastaddr, baddr, bmask;
-	caddr_t vaddr = buf;
+	vaddr_t vaddr = (vaddr_t)buf;
 	int first, seg;
 	pmap_t pmap;
 
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index b16f45a6d90..28ec9a8ab5e 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,14 +1,10 @@
-/*	$OpenBSD: pmap.c,v 1.37 2001/03/22 20:44:59 niklas Exp $	*/
-/*	$NetBSD: pmap.c,v 1.36 1996/05/03 19:42:22 christos Exp $	*/
+/*	$OpenBSD: pmap.c,v 1.38 2001/03/22 23:36:51 niklas Exp $	*/
+/*	$NetBSD: pmap.c,v 1.84 2000/02/21 02:01:24 chs Exp $	*/
 
 /*
- * Copyright (c) 1993, 1994, 1995 Charles M. Hannum.  All rights reserved.
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
  *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -20,1884 +16,3747 @@
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- */
-
-/*
- * Derived originally from an old hp300 version by Mike Hibler.  The version
- * by William Jolitz has been heavily modified to allow non-contiguous
- * mapping of physical memory by Wolfgang Solfrank, and to fix several bugs
- * and greatly speedup it up by Charles Hannum.
- * 
- * A recursive map [a pde which points to the page directory] is used to map
- * the page tables using the pagetables themselves. This is done to reduce
- * the impact on kernel virtual memory for lots of sparse address space, and
- * to reduce the cost of memory to each process.
- */
-
-/*
- *	Manages physical address maps.
- *
- *	In addition to hardware address maps, this
- *	module is called upon to provide software-use-only
- *	maps which may or may not be stored in the same
- *	form as hardware maps.  These pseudo-maps are
- *	used to store intermediate results from copy
- *	operations to and from address spaces.
- *
- *	Since the information managed by this module is
- *	also stored by the logical address mapping module,
- *	this module may throw away valid virtual-to-physical
- *	mappings at almost any time.  However, invalidations
- *	of virtual-to-physical mappings must be done as
- *	requested.
- *
- *	In order to cope with hardware architectures which
- *	make virtual-to-physical map invalidates expensive,
- *	this module may delay invalidate or reduced protection
- *	operations until such time as they are actually
- *	necessary.  This module is given full information as
- *	to which processors are currently using which maps,
- *	and to when physical maps must be made correct.
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/*
+ * pmap.c: i386 pmap module rewrite
+ * Chuck Cranor <chuck@ccrc.wustl.edu>
+ * 11-Aug-97
+ *
+ * history of this pmap module: in addition to my own input, i used
+ *    the following references for this rewrite of the i386 pmap:
+ *
+ * [1] the NetBSD i386 pmap.   this pmap appears to be based on the
+ *     BSD hp300 pmap done by Mike Hibler at University of Utah.
+ *     it was then ported to the i386 by William Jolitz of UUNET
+ *     Technologies, Inc.   Then Charles M. Hannum of the NetBSD
+ *     project fixed some bugs and provided some speed ups.
+ *
+ * [2] the FreeBSD i386 pmap.   this pmap seems to be the
+ *     Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
+ *     and David Greenman.
+ *
+ * [3] the Mach pmap.   this pmap, from CMU, seems to have migrated
+ *     between several processors.   the VAX version was done by
+ *     Avadis Tevanian, Jr., and Michael Wayne Young.    the i386
+ *     version was done by Lance Berc, Mike Kupfer, Bob Baron,
+ *     David Golub, and Richard Draves.    the alpha version was
+ *     done by Alessandro Forin (CMU/Mach) and Chris Demetriou
+ *     (NetBSD/alpha).
+ */
+
+#ifdef __NetBSD__
+#include "opt_cputype.h"
+#include "opt_user_ldt.h"
+#include "opt_lockdebug.h"
+#include "opt_multiprocessor.h"
+#endif
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
+#include <sys/pool.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 
-#if defined(UVM)
 #include <uvm/uvm.h>
-#endif
 
 #include <machine/cpu.h>
+#include <machine/specialreg.h>
+#include <machine/gdt.h>
 
 #include <dev/isa/isareg.h>
+#ifdef __NetBSD__
+#include <machine/isa_machdep.h>
+#endif
+#ifdef __OpenBSD__
+#include <sys/msgbuf.h>
 #include <stand/boot/bootarg.h>
-#include <i386/isa/isa_machdep.h>
+#endif
 
-#include "isa.h"
-#include "isadma.h"
+/*
+ * general info:
+ *
+ *  - for an explanation of how the i386 MMU hardware works see
+ *    the comments in <machine/pte.h>.
+ *
+ *  - for an explanation of the general memory structure used by
+ *    this pmap (including the recursive mapping), see the comments
+ *    in <machine/pmap.h>.
+ *
+ * this file contains the code for the "pmap module."   the module's
+ * job is to manage the hardware's virtual to physical address mappings.
+ * note that there are two levels of mapping in the VM system:
+ *
+ *  [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
+ *      to map ranges of virtual address space to objects/files.  for
+ *      example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
+ *      to the file /bin/ls starting at offset zero."   note that
+ *      the upper layer mapping is not concerned with how individual
+ *      vm_pages are mapped.
+ *
+ *  [2] the lower layer of the VM system (the pmap) maintains the mappings
+ *      from virtual addresses.   it is concerned with which vm_page is
+ *      mapped where.   for example, when you run /bin/ls and start
+ *      at page 0x1000 the fault routine may lookup the correct page
+ *      of the /bin/ls file and then ask the pmap layer to establish
+ *      a mapping for it.
+ *
+ * note that information in the lower layer of the VM system can be
+ * thrown away since it can easily be reconstructed from the info
+ * in the upper layer.
+ *
+ * data structures we use include:
+ *
+ *  - struct pmap: describes the address space of one thread
+ *  - struct pv_entry: describes one <PMAP,VA> mapping of a PA
+ *  - struct pv_head: there is one pv_head per managed page of
+ *	physical memory.   the pv_head points to a list of pv_entry
+ *	structures which describe all the <PMAP,VA> pairs that this
+ *      page is mapped in.    this is critical for page based operations
+ *      such as pmap_page_protect() [change protection on _all_ mappings
+ *      of a page]
+ *  - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
+ *      if we run out of pv_entry's we allocate a new pv_page and free
+ *      its pv_entrys.
+ * - pmap_remove_record: a list of virtual addresses whose mappings
+ *	have been changed.   used for TLB flushing.
+ */
 
 /*
- * Allocate various and sundry SYSMAPs used in the days of old VM
- * and not yet converted.  XXX.
+ * memory allocation
+ *
+ *  - there are three data structures that we must dynamically allocate:
+ *
+ * [A] new process' page directory page (PDP)
+ *	- plan 1: done at pmap_pinit() we use
+ *	  uvm_km_alloc(kernel_map, NBPG)  [fka kmem_alloc] to do this
+ *	  allocation.
+ *
+ * if we are low in free physical memory then we sleep in
+ * uvm_km_alloc -- in this case this is ok since we are creating
+ * a new pmap and should not be holding any locks.
+ *
+ * if the kernel is totally out of virtual space
+ * (i.e. uvm_km_alloc returns NULL), then we panic.
+ *
+ * XXX: the fork code currently has no way to return an "out of
+ * memory, try again" error code since uvm_fork [fka vm_fork]
+ * is a void function.
+ *
+ * [B] new page tables pages (PTP)
+ * 	- plan 1: call uvm_pagealloc()
+ * 		=> success: zero page, add to pm_pdir
+ * 		=> failure: we are out of free vm_pages
+ * 	- plan 2: using a linked LIST of active pmaps we attempt
+ * 	to "steal" a PTP from another process.   we lock
+ * 	the target pmap with simple_lock_try so that if it is
+ * 	busy we do not block.
+ * 		=> success: remove old mappings, zero, add to pm_pdir
+ * 		=> failure: highly unlikely
+ * 	- plan 3: panic
+ *
+ * note: for kernel PTPs, we start with NKPTP of them.   as we map
+ * kernel memory (at uvm_map time) we check to see if we've grown
+ * the kernel pmap.   if so, we call the optional function
+ * pmap_growkernel() to grow the kernel PTPs in advance.
+ *
+ * [C] pv_entry structures
+ *	- plan 1: try to allocate one off the free list
+ *		=> success: done!
+ *		=> failure: no more free pv_entrys on the list
+ *	- plan 2: try to allocate a new pv_page to add a chunk of
+ *	pv_entrys to the free list
+ *		[a] obtain a free, unmapped, VA in kmem_map.  either
+ *		we have one saved from a previous call, or we allocate
+ *		one now using a "vm_map_lock_try" in uvm_map
+ *		=> success: we have an unmapped VA, continue to [b]
+ *		=> failure: unable to lock kmem_map or out of VA in it.
+ *			move on to plan 3.
+ *		[b] allocate a page in kmem_object for the VA
+ *		=> success: map it in, free the pv_entry's, DONE!
+ *		=> failure: kmem_object locked, no free vm_pages, etc.
+ *			save VA for later call to [a], go to plan 3.
+ *	- plan 3: using the pv_entry/pv_head lists find a pv_entry
+ *		structure that is part of a non-kernel lockable pmap
+ *		and "steal" that pv_entry by removing the mapping
+ *		and reusing that pv_entry.
+ *		=> success: done
+ *		=> failure: highly unlikely: unable to lock and steal
+ *			pv_entry
+ *	- plan 4: we panic.
  */
-#define	BSDVM_COMPAT	1
 
-#ifdef DEBUG
-struct {
-	int kernel;	/* entering kernel mapping */
-	int user;	/* entering user mapping */
-	int ptpneeded;	/* needed to allocate a PT page */
-	int pwchange;	/* no mapping change, just wiring or protection */
-	int wchange;	/* no mapping change, just wiring */
-	int mchange;	/* was mapped but mapping to different page */
-	int managed;	/* a managed page */
-	int firstpv;	/* first mapping for this PA */
-	int secondpv;	/* second mapping for this PA */
-	int ci;		/* cache inhibited */
-	int unmanaged;	/* not a managed page */
-	int flushes;	/* cache flushes */
-} enter_stats;
-struct {
-	int calls;
-	int removes;
-	int pvfirst;
-	int pvsearch;
-	int ptinvalid;
-	int uflushes;
-	int sflushes;
-} remove_stats;
-
-int pmapdebug = 0 /* 0xffff */;
-#define	PDB_FOLLOW	0x0001
-#define	PDB_INIT	0x0002
-#define	PDB_ENTER	0x0004
-#define	PDB_REMOVE	0x0008
-#define	PDB_CREATE	0x0010
-#define	PDB_PTPAGE	0x0020
-#define	PDB_CACHE	0x0040
-#define	PDB_BITS	0x0080
-#define	PDB_COLLECT	0x0100
-#define	PDB_PROTECT	0x0200
-#define	PDB_PDRTAB	0x0400
-#define	PDB_PARANOIA	0x2000
-#define	PDB_WIRING	0x4000
-#define	PDB_PVDUMP	0x8000
-#endif
-
-/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define	pmap_pde(m, v)	(&((m)->pm_pdir[((vm_offset_t)(v) >> PDSHIFT)&1023]))
-
-/*
- * Empty PTEs and PDEs are always 0, but checking only the valid bit allows
- * the compiler to generate `testb' rather than `testl'.
- */
-#define	pmap_pde_v(pde)			(*(pde) & PG_V)
-#define	pmap_pte_pa(pte)		(*(pte) & PG_FRAME)
-#define	pmap_pte_w(pte)			(*(pte) & PG_W)
-#define	pmap_pte_m(pte)			(*(pte) & PG_M)
-#define	pmap_pte_u(pte)			(*(pte) & PG_U)
-#define	pmap_pte_v(pte)			(*(pte) & PG_V)
-#define	pmap_pte_set_w(pte, v)		((v) ? (*(pte) |= PG_W) : (*(pte) &= ~PG_W))
-#define	pmap_pte_set_prot(pte, v)	((*(pte) &= ~PG_PROT), (*(pte) |= (v)))
-
-/*
- * Given a map and a machine independent protection code,
- * convert to a vax protection code.
- */
-pt_entry_t	protection_codes[8];
-
-struct pmap	kernel_pmap_store;
-
-vm_offset_t	virtual_avail;  /* VA of first avail page (after kernel bss)*/
-vm_offset_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
-int		npages;
-
-boolean_t	pmap_initialized = FALSE;	/* Has pmap_init completed? */
-TAILQ_HEAD(pv_page_list, pv_page) pv_page_freelist;
-int		pv_nfree;
-
-pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t));
-struct pv_entry * pmap_alloc_pv __P((void));
-void pmap_free_pv __P((struct pv_entry *));
-void i386_protection_init __P((void));
-void pmap_collect_pv __P((void));
-__inline void pmap_remove_pv __P((pmap_t, vm_offset_t, struct pv_entry *));
-__inline void pmap_enter_pv __P((pmap_t, vm_offset_t, struct pv_entry *));
-void pmap_remove_all __P((vm_offset_t));
-void pads __P((pmap_t pm));
-void pmap_dump_pvlist __P((vm_offset_t phys, char *m));
-void pmap_pvdump __P((vm_offset_t pa));
-
-#if BSDVM_COMPAT
-#include <sys/msgbuf.h>
+/*
+ * locking
+ *
+ * we have the following locks that we must contend with:
+ *
+ * "normal" locks:
+ *
+ *  - pmap_main_lock
+ *    this lock is used to prevent deadlock and/or provide mutex
+ *    access to the pmap system.   most operations lock the pmap
+ *    structure first, then they lock the pv_lists (if needed).
+ *    however, some operations such as pmap_page_protect lock
+ *    the pv_lists and then lock pmaps.   in order to prevent a
+ *    cycle, we require a mutex lock when locking the pv_lists
+ *    first.   thus, the "pmap = >pv_list" lockers must gain a
+ *    read-lock on pmap_main_lock before locking the pmap.   and
+ *    the "pv_list => pmap" lockers must gain a write-lock on
+ *    pmap_main_lock before locking.    since only one thread
+ *    can write-lock a lock at a time, this provides mutex.
+ *
+ * "simple" locks:
+ *
+ * - pmap lock (per pmap, part of uvm_object)
+ *   this lock protects the fields in the pmap structure including
+ *   the non-kernel PDEs in the PDP, and the PTEs.  it also locks
+ *   in the alternate PTE space (since that is determined by the
+ *   entry in the PDP).
+ *
+ * - pvh_lock (per pv_head)
+ *   this lock protects the pv_entry list which is chained off the
+ *   pv_head structure for a specific managed PA.   it is locked
+ *   when traversing the list (e.g. adding/removing mappings,
+ *   syncing R/M bits, etc.)
+ *
+ * - pvalloc_lock
+ *   this lock protects the data structures which are used to manage
+ *   the free list of pv_entry structures.
+ *
+ * - pmaps_lock
+ *   this lock protects the list of active pmaps (headed by "pmaps").
+ *   we lock it when adding or removing pmaps from this list.
+ *
+ * - pmap_copy_page_lock
+ *   locks the tmp kernel PTE mappings we used to copy data
+ *
+ * - pmap_zero_page_lock
+ *   locks the tmp kernel PTE mapping we use to zero a page
+ *
+ * - pmap_tmpptp_lock
+ *   locks the tmp kernel PTE mapping we use to look at a PTP
+ *   in another process
+ *
+ * XXX: would be nice to have per-CPU VAs for the above 4
+ */
+
+/*
+ * locking data structures
+ */
+
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+struct lock pmap_main_lock;
+simple_lock_data_t pvalloc_lock;
+simple_lock_data_t pmaps_lock;
+simple_lock_data_t pmap_copy_page_lock;
+simple_lock_data_t pmap_zero_page_lock;
+simple_lock_data_t pmap_tmpptp_lock;
+
+#define PMAP_MAP_TO_HEAD_LOCK() \
+     spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0)
+#define PMAP_MAP_TO_HEAD_UNLOCK() \
+     spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
+
+#define PMAP_HEAD_TO_MAP_LOCK() \
+     spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *) 0)
+#define PMAP_HEAD_TO_MAP_UNLOCK() \
+     spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
+
+#else
+
+#define PMAP_MAP_TO_HEAD_LOCK()		/* null */
+#define PMAP_MAP_TO_HEAD_UNLOCK()	/* null */
+
+#define PMAP_HEAD_TO_MAP_LOCK()		/* null */
+#define PMAP_HEAD_TO_MAP_UNLOCK()	/* null */
+
+#endif
+
+/*
+ * global data structures
+ */
+
+struct pmap kernel_pmap_store;	/* the kernel's pmap (proc0) */
+
+/*
+ * nkpde is the number of kernel PTPs allocated for the kernel at
+ * boot time (NKPTP is a compile time override).   this number can
+ * grow dynamically as needed (but once allocated, we never free
+ * kernel PTPs).
+ */
+
+int nkpde = NKPTP;
+#ifdef NKPDE
+#error "obsolete NKPDE: use NKPTP"
+#endif
+
+/*
+ * pmap_pg_g: if our processor supports PG_G in the PTE then we
+ * set pmap_pg_g to PG_G (otherwise it is zero).
+ */
+
+int pmap_pg_g = 0;
+
+/*
+ * i386 physical memory comes in a big contig chunk with a small
+ * hole toward the front of it...  the following 4 paddr_t's
+ * (shared with machdep.c) describe the physical address space
+ * of this machine.
+ */
+paddr_t avail_start;	/* PA of first available physical page */
+paddr_t avail_end;	/* PA of last available physical page */
+paddr_t hole_start;	/* PA of start of "hole" */
+paddr_t hole_end;	/* PA of end of "hole" */
+
+/*
+ * other data structures
+ */
+
+static pt_entry_t protection_codes[8];     /* maps MI prot to i386 prot code */
+static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */
+
+/*
+ * the following two vaddr_t's are used during system startup
+ * to keep track of how much of the kernel's VM space we have used.
+ * once the system is started, the management of the remaining kernel
+ * VM space is turned over to the kernel_map vm_map.
+ */
+
+static vaddr_t virtual_avail;	/* VA of first free KVA */
+static vaddr_t virtual_end;	/* VA of last free KVA */
+
+
+/*
+ * pv_page management structures: locked by pvalloc_lock
+ */
+
+TAILQ_HEAD(pv_pagelist, pv_page);
+static struct pv_pagelist pv_freepages;	/* list of pv_pages with free entrys */
+static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
+static int pv_nfpvents;			/* # of free pv entries */
+static struct pv_page *pv_initpage;	/* bootstrap page from kernel_map */
+static vaddr_t pv_cachedva;		/* cached VA for later use */
+
+#define PVE_LOWAT (PVE_PER_PVPAGE / 2)	/* free pv_entry low water mark */
+#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
+					/* high water mark */
+
+/*
+ * linked list of all non-kernel pmaps
+ */
+
+static struct pmap_head pmaps;
+static struct pmap *pmaps_hand = NULL;	/* used by pmap_steal_ptp */
+
+/*
+ * pool that pmap structures are allocated from
+ */
+
+struct pool pmap_pmap_pool;
+
+/*
+ * special VAs and the PTEs that map them
+ */
+
+static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
+static caddr_t csrcp, cdstp, zerop, ptpp;
+caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
+
+#ifdef __NetBSD__
+extern vaddr_t msgbuf_vaddr;
+extern paddr_t msgbuf_paddr;
+
+extern vaddr_t idt_vaddr;			/* we allocate IDT early */
+extern paddr_t idt_paddr;
+#endif
+
+#if defined(I586_CPU)
+/* stuff to fix the pentium f00f bug */
+extern vaddr_t pentium_idt_vaddr;
+#endif
+
+
+/*
+ * local prototypes
+ */
+
+static struct pv_entry	*pmap_add_pvpage __P((struct pv_page *, boolean_t));
+static struct vm_page	*pmap_alloc_ptp __P((struct pmap *, int, boolean_t));
+static struct pv_entry	*pmap_alloc_pv __P((struct pmap *, int)); /* see codes below */
+#define ALLOCPV_NEED	0	/* need PV now */
+#define ALLOCPV_TRY	1	/* just try to allocate, don't steal */
+#define ALLOCPV_NONEED	2	/* don't need PV, just growing cache */
+static struct pv_entry	*pmap_alloc_pvpage __P((struct pmap *, int));
+static void		 pmap_enter_pv __P((struct pv_head *,
+					    struct pv_entry *, struct pmap *,
+					    vaddr_t, struct vm_page *));
+static void		 pmap_free_pv __P((struct pmap *, struct pv_entry *));
+static void		 pmap_free_pvs __P((struct pmap *, struct pv_entry *));
+static void		 pmap_free_pv_doit __P((struct pv_entry *));
+static void		 pmap_free_pvpage __P((void));
+static struct vm_page	*pmap_get_ptp __P((struct pmap *, int, boolean_t));
+static boolean_t	 pmap_is_curpmap __P((struct pmap *));
+static pt_entry_t	*pmap_map_ptes __P((struct pmap *));
+static struct pv_entry	*pmap_remove_pv __P((struct pv_head *, struct pmap *,
+					     vaddr_t));
+static boolean_t	 pmap_remove_pte __P((struct pmap *, struct vm_page *,
+					      pt_entry_t *, vaddr_t));
+static void		 pmap_remove_ptes __P((struct pmap *,
+					       struct pmap_remove_record *,
+					       struct vm_page *, vaddr_t,
+					       vaddr_t, vaddr_t));
+static struct vm_page	*pmap_steal_ptp __P((struct uvm_object *,
+					     vaddr_t));
+static vaddr_t		 pmap_tmpmap_pa __P((paddr_t));
+static pt_entry_t	*pmap_tmpmap_pvepte __P((struct pv_entry *));
+static void		 pmap_tmpunmap_pa __P((void));
+static void		 pmap_tmpunmap_pvepte __P((struct pv_entry *));
+static boolean_t	 pmap_transfer_ptes __P((struct pmap *,
+					 struct pmap_transfer_location *,
+					 struct pmap *,
+					 struct pmap_transfer_location *,
+					 int, boolean_t));
+static boolean_t	 pmap_try_steal_pv __P((struct pv_head *,
+						struct pv_entry *,
+						struct pv_entry *));
+static void		pmap_unmap_ptes __P((struct pmap *));
+
+void			pmap_pinit __P((pmap_t));
+void			pmap_release __P((pmap_t));
+
+/*
+ * p m a p   i n l i n e   h e l p e r   f u n c t i o n s
+ */
+
+/*
+ * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
+ *		of course the kernel is always loaded
+ */
+
+__inline static boolean_t
+pmap_is_curpmap(pmap)
+	struct pmap *pmap;
+{
+	return((pmap == pmap_kernel()) ||
+	       (pmap->pm_pdirpa == (paddr_t) rcr3()));
+}
+
+/*
+ * pmap_tmpmap_pa: map a page in for tmp usage
+ *
+ * => returns with pmap_tmpptp_lock held
+ */
+
+__inline static vaddr_t
+pmap_tmpmap_pa(pa)
+	paddr_t pa;
+{
+	simple_lock(&pmap_tmpptp_lock);
+#if defined(DIAGNOSTIC)
+	if (*ptp_pte)
+		panic("pmap_tmpmap_pa: ptp_pte in use?");
+#endif
+	*ptp_pte = PG_V | PG_RW | pa;		/* always a new mapping */
+	return((vaddr_t)ptpp);
+}
+
+/*
+ * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
+ *
+ * => we release pmap_tmpptp_lock
+ */
+
+__inline static void
+pmap_tmpunmap_pa()
+{
+#if defined(DIAGNOSTIC)
+	if (!pmap_valid_entry(*ptp_pte))
+		panic("pmap_tmpunmap_pa: our pte invalid?");
+#endif
+	*ptp_pte = 0;		/* zap! */
+	pmap_update_pg((vaddr_t)ptpp);
+	simple_unlock(&pmap_tmpptp_lock);
+}
+
+/*
+ * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
+ *
+ * => do NOT use this on kernel mappings [why?  because pv_ptp may be NULL]
+ * => we may grab pmap_tmpptp_lock and return with it held
+ */
+
+__inline static pt_entry_t *
+pmap_tmpmap_pvepte(pve)
+	struct pv_entry *pve;
+{
+#ifdef DIAGNOSTIC
+	if (pve->pv_pmap == pmap_kernel())
+		panic("pmap_tmpmap_pvepte: attempt to map kernel");
+#endif
+
+	/* is it current pmap?  use direct mapping... */
+	if (pmap_is_curpmap(pve->pv_pmap))
+		return(vtopte(pve->pv_va));
+
+	return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp)))
+	       + ptei((unsigned)pve->pv_va));
+}
+
+/*
+ * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
+ *
+ * => we will release pmap_tmpptp_lock if we hold it
+ */
+
+__inline static void
+pmap_tmpunmap_pvepte(pve)
+	struct pv_entry *pve;
+{
+	/* was it current pmap?   if so, return */
+	if (pmap_is_curpmap(pve->pv_pmap))
+		return;
+
+	pmap_tmpunmap_pa();
+}
+
+/*
+ * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
+ *
+ * => we lock enough pmaps to keep things locked in
+ * => must be undone with pmap_unmap_ptes before returning
+ */
+
+__inline static pt_entry_t *
+pmap_map_ptes(pmap)
+	struct pmap *pmap;
+{
+	pd_entry_t opde;
+
+	/* the kernel's pmap is always accessible */
+	if (pmap == pmap_kernel()) {
+		return(PTE_BASE);
+	}
+
+	/* if curpmap then we are always mapped */
+	if (pmap_is_curpmap(pmap)) {
+		simple_lock(&pmap->pm_obj.vmobjlock);
+		return(PTE_BASE);
+	}
+
+	/* need to lock both curpmap and pmap: use ordered locking */
+	if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
+		simple_lock(&pmap->pm_obj.vmobjlock);
+		simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
+	} else {
+		simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
+		simple_lock(&pmap->pm_obj.vmobjlock);
+	}
+
+	/* need to load a new alternate pt space into curpmap? */
+	opde = *APDP_PDE;
+	if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
+		*APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V);
+		if (pmap_valid_entry(opde))
+			pmap_update();
+	}
+	return(APTE_BASE);
+}
+
+/*
+ * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
+ */
+
+__inline static void
+pmap_unmap_ptes(pmap)
+	struct pmap *pmap;
+{
+	if (pmap == pmap_kernel()) {
+		return;
+	}
+	if (pmap_is_curpmap(pmap)) {
+		simple_unlock(&pmap->pm_obj.vmobjlock);
+	} else {
+		simple_unlock(&pmap->pm_obj.vmobjlock);
+		simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
+	}
+}
 
 /*
- * All those kernel PT submaps that BSD is so fond of
+ * p m a p   k e n t e r   f u n c t i o n s
+ *
+ * functions to quickly enter/remove pages from the kernel address
+ * space.   pmap_kremove/pmap_kenter_pgs are exported to MI kernel.
+ * we make use of the recursive PTE mappings.
  */
-pt_entry_t	*CMAP1, *CMAP2, *XXX_mmap;
-caddr_t		CADDR1, CADDR2, vmmap;
-pt_entry_t	*msgbufmap, *bootargmap;
-#endif	/* BSDVM_COMPAT */
 
 /*
- *	Bootstrap the system enough to run with virtual memory.
- *	Map the kernel's code and data, and allocate the system page table.
+ * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
+ *
+ * => no need to lock anything, assume va is already allocated
+ * => should be faster than normal pmap enter function
+ */
+
+void
+pmap_kenter_pa(va, pa, prot)
+	vaddr_t va;
+	paddr_t pa;
+	vm_prot_t prot;
+{
+	pt_entry_t *pte, opte;
+
+	pte = vtopte(va);
+	opte = *pte;
+	*pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) |
+		PG_V | pmap_pg_g;	/* zap! */
+	if (pmap_valid_entry(opte))
+		pmap_update_pg(va);
+}
+
+/*
+ * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
  *
- *	On the I386 this is called after mapping has already been enabled
- *	and just syncs the pmap module with what has already been done.
- *	[We can't call it easily with mapping off since the kernel is not
- *	mapped with PA == VA, hence we would have to relocate every address
- *	from the linked base (virtual) address to the actual (physical)
- *	address starting relative to 0]
+ * => no need to lock anything
+ * => caller must dispose of any vm_page mapped in the va range
+ * => note: not an inline function
+ * => we assume the va is page aligned and the len is a multiple of NBPG
+ * => we assume kernel only unmaps valid addresses and thus don't bother
+ *    checking the valid bit before doing TLB flushing
  */
 
 void
-pmap_bootstrap(virtual_start)
-	vm_offset_t virtual_start;
+pmap_kremove(va, len)
+	vaddr_t va;
+	vsize_t len;
 {
-#if BSDVM_COMPAT
-	vm_offset_t va;
 	pt_entry_t *pte;
+
+	len >>= PAGE_SHIFT;
+	for ( /* null */ ; len ; len--, va += NBPG) {
+		pte = vtopte(va);
+#ifdef DIAGNOSTIC
+		if (*pte & PG_PVLIST)
+			panic("pmap_kremove: PG_PVLIST mapping for 0x%lx\n",
+			      va);
+#endif
+		*pte = 0;		/* zap! */
+#if defined(I386_CPU)
+		if (cpu_class != CPUCLASS_386)
 #endif
+			pmap_update_pg(va);
+	}
+#if defined(I386_CPU)
+	if (cpu_class == CPUCLASS_386)
+		pmap_update();
+#endif
+}
+
+/*
+ * pmap_kenter_pgs: enter in a number of vm_pages
+ */
+
+void
+pmap_kenter_pgs(va, pgs, npgs)
+	vaddr_t va;
+	struct vm_page **pgs;
+	int npgs;
+{
+	pt_entry_t *pte, opte;
+	int lcv;
+	vaddr_t tva;
+#if defined(I386_CPU)
+	boolean_t need_update = FALSE;
+#endif
+
+	for (lcv = 0 ; lcv < npgs ; lcv++) {
+		tva = va + lcv * NBPG;
+		pte = vtopte(tva);
+		opte = *pte;
+		*pte = VM_PAGE_TO_PHYS(pgs[lcv]) | PG_RW | PG_V | pmap_pg_g;
+#if defined(I386_CPU)
+		if (cpu_class == CPUCLASS_386) {
+			if (pmap_valid_entry(opte))
+				need_update = TRUE;
+			continue;
+		}
+#endif
+		if (pmap_valid_entry(opte))
+			pmap_update_pg(tva);
+	}
+#if defined(I386_CPU)
+	if (need_update && cpu_class == CPUCLASS_386)
+		pmap_update();
+#endif
+}
+
+/*
+ * p m a p   i n i t   f u n c t i o n s
+ *
+ * pmap_bootstrap and pmap_init are called during system startup
+ * to init the pmap module.   pmap_bootstrap() does a low level
+ * init just to get things rolling.   pmap_init() finishes the job.
+ */
+
+/*
+ * pmap_bootstrap: get the system in a state where it can run with VM
+ *	properly enabled (called before main()).   the VM system is
+ *      fully init'd later...
+ *
+ * => on i386, locore.s has already enabled the MMU by allocating
+ *	a PDP for the kernel, and nkpde PTP's for the kernel.
+ * => kva_start is the first free virtual address in kernel space
+ * => we make use of the global vars from machdep.c:
+ *	avail_start, avail_end, hole_start, hole_end
+ */
+
+void
+pmap_bootstrap(kva_start)
+	vaddr_t kva_start;
+{
+	struct pmap *kpm;
+	vaddr_t kva;
+	pt_entry_t *pte;
+#ifdef __NetBSD__
+	int first16q;
+#endif
+
+	/*
+	 * set the page size (default value is 4K which is ok)
+	 */
 
-	/* Register the page size with the vm system */
-#if defined(UVM)
 	uvm_setpagesize();
-#else
-	vm_set_page_size();
+
+	/*
+	 * a quick sanity check
+	 */
+
+	if (PAGE_SIZE != NBPG)
+		panic("pmap_bootstrap: PAGE_SIZE != NBPG");
+
+	/*
+	 * use the very last page of physical memory for the message buffer
+	 */
+
+	avail_end -= i386_round_page(MSGBUFSIZE);
+#ifdef __NetBSD__
+	msgbuf_paddr = avail_end;
 #endif
 
-	virtual_avail = virtual_start;
-	virtual_end = VM_MAX_KERNEL_ADDRESS;
+#ifdef __OpenBSD__
+	/*
+	 * The arguments passed in from /boot needs space too.
+	 */
+	avail_end -= i386_round_page(bootargc);
+#endif
+
+	/*
+	 * set up our local static global vars that keep track of the
+	 * usage of KVM before kernel_map is set up
+	 */
+
+	virtual_avail = kva_start;		/* first free KVA */
+	virtual_end = VM_MAX_KERNEL_ADDRESS;	/* last KVA */
+
+	/*
+	 * set up protection_codes: we need to be able to convert from
+	 * a MI protection code (some combo of VM_PROT...) to something
+	 * we can jam into a i386 PTE.
+	 */
+
+	protection_codes[VM_PROT_NONE] = 0;  			/* --- */
+	protection_codes[VM_PROT_EXECUTE] = PG_RO;		/* --x */
+	protection_codes[VM_PROT_READ] = PG_RO;			/* -r- */
+	protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO;	/* -rx */
+	protection_codes[VM_PROT_WRITE] = PG_RW;		/* w-- */
+	protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW;/* w-x */
+	protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW;	/* wr- */
+	protection_codes[VM_PROT_ALL] = PG_RW;			/* wrx */
+
+	/*
+	 * now we init the kernel's pmap
+	 *
+	 * the kernel pmap's pm_obj is not used for much.   however, in
+	 * user pmaps the pm_obj contains the list of active PTPs.
+	 * the pm_obj currently does not have a pager.   it might be possible
+	 * to add a pager that would allow a process to read-only mmap its
+	 * own page tables (fast user level vtophys?).   this may or may not
+	 * be useful.
+	 */
+
+	kpm = pmap_kernel();
+	simple_lock_init(&kpm->pm_obj.vmobjlock);
+	kpm->pm_obj.pgops = NULL;
+	TAILQ_INIT(&kpm->pm_obj.memq);
+	kpm->pm_obj.uo_npages = 0;
+	kpm->pm_obj.uo_refs = 1;
+	bzero(&kpm->pm_list, sizeof(kpm->pm_list));  /* pm_list not used */
+	kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
+	kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3;
+	kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
+		i386_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
 
 	/*
-	 * Initialize protection array.
+	 * the above is just a rough estimate and not critical to the proper
+	 * operation of the system.
 	 */
-	i386_protection_init();
 
-#ifdef notdef
+	curpcb->pcb_pmap = kpm;	/* proc0's pcb */
+
 	/*
-	 * Create Kernel page directory table and page maps.
-	 * [ currently done in locore. i have wild and crazy ideas -wfj ]
+	 * enable global TLB entries if they are supported
 	 */
-	bzero(firstaddr, (1+NKPDE)*NBPG);
-	pmap_kernel()->pm_pdir = firstaddr + VM_MIN_KERNEL_ADDRESS;
-	pmap_kernel()->pm_ptab = firstaddr + VM_MIN_KERNEL_ADDRESS + NBPG;
-
-	firstaddr += NBPG;
-	for (x = i386_btod(VM_MIN_KERNEL_ADDRESS);
-	     x < i386_btod(VM_MIN_KERNEL_ADDRESS) + NKPDE; x++) {
-		pd_entry_t *pde;
-		pde = pmap_kernel()->pm_pdir + x;
-		*pde = (firstaddr + x*NBPG) | PG_V | PG_KW;
+
+	if (cpu_feature & CPUID_PGE) {
+		lcr4(rcr4() | CR4_PGE);	/* enable hardware (via %cr4) */
+		pmap_pg_g = PG_G;		/* enable software */
+
+		/* add PG_G attribute to already mapped kernel pages */
+		for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
+		     kva += NBPG)
+			if (pmap_valid_entry(PTE_BASE[i386_btop(kva)]))
+				PTE_BASE[i386_btop(kva)] |= PG_G;
 	}
-#else
-	pmap_kernel()->pm_pdir =
-	    (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
+
+	/*
+	 * now we allocate the "special" VAs which are used for tmp mappings
+	 * by the pmap (and other modules).    we allocate the VAs by advancing
+	 * virtual_avail (note that there are no pages mapped at these VAs).
+	 * we find the PTE that maps the allocated VA via the linear PTE
+	 * mapping.
+	 */
+
+	pte = PTE_BASE + i386_btop(virtual_avail);
+
+	csrcp = (caddr_t) virtual_avail;  csrc_pte = pte;  /* allocate */
+	virtual_avail += NBPG; pte++;			     /* advance */
+
+	cdstp = (caddr_t) virtual_avail;  cdst_pte = pte;
+	virtual_avail += NBPG; pte++;
+
+	zerop = (caddr_t) virtual_avail;  zero_pte = pte;
+	virtual_avail += NBPG; pte++;
+
+	ptpp = (caddr_t) virtual_avail;  ptp_pte = pte;
+	virtual_avail += NBPG; pte++;
+
+	/* XXX: vmmap used by mem.c... should be uvm_map_reserve */
+	vmmap = (char *)virtual_avail;			/* don't need pte */
+	virtual_avail += NBPG; pte++;
+
+#ifdef __NetBSD
+	msgbuf_vaddr = virtual_avail;			/* don't need pte */
 #endif
+#ifdef __OpenBSD__
+	msgbufp = (struct msgbuf *)virtual_avail;	/* don't need pte */
+#endif
+	virtual_avail += round_page(MSGBUFSIZE); pte++;
 
-	simple_lock_init(&pmap_kernel()->pm_lock);
-	pmap_kernel()->pm_count = 1;
+#ifdef __NetBSD__
+	idt_vaddr = virtual_avail;			/* don't need pte */
+	virtual_avail += NBPG; pte++;
+	avail_end -= NBPG;
+	idt_paddr = avail_end;
+
+#if defined(I586_CPU)
+	/* pentium f00f bug stuff */
+	pentium_idt_vaddr = virtual_avail;		/* don't need pte */
+	virtual_avail += NBPG; pte++;
+#endif
+#endif
+
+#ifdef __OpenBSD__
+	bootargp = (bootarg_t *)virtual_avail;
+	virtual_avail += round_page(bootargc); pte++;
+#endif
 
-#if BSDVM_COMPAT
 	/*
-	 * Allocate all the submaps we need
+	 * now we reserve some VM for mapping pages when doing a crash dump
 	 */
-#define	SYSMAP(c, p, v, n)	\
-	v = (c)va; va += ((n)*NBPG); p = pte; pte += (n);
 
-	va = virtual_avail;
-	pte = pmap_pte(pmap_kernel(), va);
+	virtual_avail = reserve_dumppages(virtual_avail);
+
+	/*
+	 * init the static-global locks and global lists.
+	 */
 
-	SYSMAP(caddr_t		,CMAP1		,CADDR1	   ,1		)
-	SYSMAP(caddr_t		,CMAP2		,CADDR2	   ,1		)
-	SYSMAP(caddr_t		,XXX_mmap	,vmmap	   ,1		)
-	SYSMAP(struct msgbuf *	,msgbufmap	,msgbufp   ,btoc(MSGBUFSIZE))
-	SYSMAP(bootarg_t *	,bootargmap	,bootargp  ,btoc(bootargc))
-	virtual_avail = va;
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+	spinlockinit(&pmap_main_lock, "pmaplk", 0);
+	simple_lock_init(&pvalloc_lock);
+	simple_lock_init(&pmaps_lock);
+	simple_lock_init(&pmap_copy_page_lock);
+	simple_lock_init(&pmap_zero_page_lock);
+	simple_lock_init(&pmap_tmpptp_lock);
 #endif
+	LIST_INIT(&pmaps);
+	TAILQ_INIT(&pv_freepages);
+	TAILQ_INIT(&pv_unusedpgs);
 
 	/*
-	 * Reserve pmap space for mapping physical pages during dump.
+	 * initialize the pmap pool.
 	 */
-	virtual_avail = reserve_dumppages(virtual_avail);
 
-	/* flawed, no mappings?? */
-	if (ctob(physmem) > 31*1024*1024 && MAXKPDE != NKPDE) {
-		vm_offset_t p;
-		int i;
-
-		p = virtual_avail;
-		virtual_avail += (MAXKPDE-NKPDE+1) * NBPG;
-		bzero((void *)p, (MAXKPDE-NKPDE+1) * NBPG);
-		p = round_page(p);
-		for (i = NKPDE; i < MAXKPDE; i++, p += NBPG)
-			PTD[KPTDI+i] = (pd_entry_t)p |
-			    PG_V | PG_KW;
+	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
+		  0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP);
+
+#ifdef __NetBSD__
+	/*
+	 * we must call uvm_page_physload() after we are done playing with
+	 * virtual_avail but before we call pmap_steal_memory.  [i.e. here]
+	 * this call tells the VM system how much physical memory it
+	 * controls.  If we have 16M of RAM or less, just put it all on
+	 * the default free list.  Otherwise, put the first 16M of RAM
+	 * on a lower priority free list (so that all of the ISA DMA'able
+	 * memory won't be eaten up first-off).
+	 */
+
+	if (avail_end <= (16 * 1024 * 1024))
+		first16q = VM_FREELIST_DEFAULT;
+	else
+		first16q = VM_FREELIST_FIRST16;
+
+	if (avail_start < hole_start)   /* any free memory before the hole? */
+		uvm_page_physload(atop(avail_start), atop(hole_start),
+				  atop(avail_start), atop(hole_start),
+				  first16q);
+
+	if (first16q != VM_FREELIST_DEFAULT &&
+	    hole_end < 16 * 1024 * 1024) {
+		uvm_page_physload(atop(hole_end), atop(16 * 1024 * 1024),
+				  atop(hole_end), atop(16 * 1024 * 1024),
+				  first16q);
+		uvm_page_physload(atop(16 * 1024 * 1024), atop(avail_end),
+				  atop(16 * 1024 * 1024), atop(avail_end),
+				  VM_FREELIST_DEFAULT);
+	} else {
+		uvm_page_physload(atop(hole_end), atop(avail_end),
+				  atop(hole_end), atop(avail_end),
+				  VM_FREELIST_DEFAULT);
 	}
-}
+#endif
 
-void
-pmap_virtual_space(startp, endp)
-	vm_offset_t *startp;
-	vm_offset_t *endp;
-{
-	*startp = virtual_avail;
-	*endp = virtual_end;
+	/*
+	 * ensure the TLB is sync'd with reality by flushing it...
+	 */
+
+	pmap_update();
 }
 
 /*
- *	Initialize the pmap module.
- *	Called by vm_init, to initialize any structures that the pmap
- *	system needs to map virtual memory.
+ * pmap_init: called from uvm_init, our job is to get the pmap
+ * system ready to manage mappings... this mainly means initing
+ * the pv_entry stuff.
  */
+
 void
 pmap_init()
 {
-	vm_offset_t addr;
-	vm_size_t s;
-	int lcv;
+	int npages, lcv;
+	vaddr_t addr;
+	vsize_t s;
 
-	if (PAGE_SIZE != NBPG)
-		panic("pmap_init: CLSIZE != 1");
+	/*
+	 * compute the number of pages we have and then allocate RAM
+	 * for each pages' pv_head and saved attributes.
+	 */
 
 	npages = 0;
-	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
 		npages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
-	s = (vm_size_t) (sizeof(struct pv_entry) * npages + npages);
-	s = round_page(s);
-#if defined(UVM)
-	addr = (vm_offset_t) uvm_km_zalloc(kernel_map, s);
+	s = (vsize_t) (sizeof(struct pv_head) * npages +
+		       sizeof(char) * npages);
+	s = round_page(s); /* round up */
+	addr = (vaddr_t) uvm_km_zalloc(kernel_map, s);
 	if (addr == NULL)
-		panic("pmap_init");
-#else
-	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
-#endif
+		panic("pmap_init: unable to allocate pv_heads");
 
-	/* allocate pv_entry stuff first */
+	/*
+	 * init all pv_head's and attrs in one bzero
+	 */
+
+	/* allocate pv_head stuff first */
 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
-		vm_physmem[lcv].pmseg.pvent = (struct pv_entry *) addr;
-		addr = (vm_offset_t)(vm_physmem[lcv].pmseg.pvent +
-			(vm_physmem[lcv].end - vm_physmem[lcv].start));
+		vm_physmem[lcv].pmseg.pvhead = (struct pv_head *) addr;
+		addr = (vaddr_t)(vm_physmem[lcv].pmseg.pvhead +
+				 (vm_physmem[lcv].end - vm_physmem[lcv].start));
 	}
-	/* allocate attrs next */
+
+	/* now allocate attrs */
 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
 		vm_physmem[lcv].pmseg.attrs = (char *) addr;
-		addr = (vm_offset_t)(vm_physmem[lcv].pmseg.attrs +
-			(vm_physmem[lcv].end - vm_physmem[lcv].start));
+		addr = (vaddr_t)(vm_physmem[lcv].pmseg.attrs +
+				 (vm_physmem[lcv].end - vm_physmem[lcv].start));
 	}
-	TAILQ_INIT(&pv_page_freelist);
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_INIT)
-		printf("pmap_init: %lx bytes (%x pgs)\n",
-		       s, npages);
-#endif
+	/*
+	 * now we need to free enough pv_entry structures to allow us to get
+	 * the kmem_map/kmem_object allocated and inited (done after this
+	 * function is finished).  to do this we allocate one bootstrap page out
+	 * of kernel_map and use it to provide an initial pool of pv_entry
+	 * structures.   we never free this page.
+	 */
+
+	pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, NBPG);
+	if (pv_initpage == NULL)
+		panic("pmap_init: pv_initpage");
+	pv_cachedva = NULL;   /* a VA we have allocated but not used yet */
+	pv_nfpvents = 0;
+	(void) pmap_add_pvpage(pv_initpage, FALSE);
 
 	/*
-	 * Now it is safe to enable pv_entry recording.
+	 * done: pmap module is up (and ready for business)
 	 */
+
 	pmap_initialized = TRUE;
 }
 
-struct pv_entry *
-pmap_alloc_pv()
+/*
+ * p v _ e n t r y   f u n c t i o n s
+ */
+
+/*
+ * pv_entry allocation functions:
+ *   the main pv_entry allocation functions are:
+ *     pmap_alloc_pv: allocate a pv_entry structure
+ *     pmap_free_pv: free one pv_entry
+ *     pmap_free_pvs: free a list of pv_entrys
+ *
+ * the rest are helper functions
+ */
+
+/*
+ * pmap_alloc_pv: inline function to allocate a pv_entry structure
+ * => we lock pvalloc_lock
+ * => if we fail, we call out to pmap_alloc_pvpage
+ * => 3 modes:
+ *    ALLOCPV_NEED   = we really need a pv_entry, even if we have to steal it
+ *    ALLOCPV_TRY    = we want a pv_entry, but not enough to steal
+ *    ALLOCPV_NONEED = we are trying to grow our free list, don't really need
+ *			one now
+ *
+ * "try" is for optional functions like pmap_copy().
+ */
+
+__inline static struct pv_entry *
+pmap_alloc_pv(pmap, mode)
+	struct pmap *pmap;
+	int mode;
 {
-	struct pv_page *pvp;
+	struct pv_page *pvpage;
 	struct pv_entry *pv;
-	int i;
 
-	if (pv_nfree == 0) {
-#if defined(UVM)
-		/* NOTE: can't lock kernel_map here */
-		MALLOC(pvp, struct pv_page *, NBPG, M_VMPVENT, M_WAITOK);
-#else
-		pvp = (struct pv_page *)kmem_alloc(kernel_map, NBPG);
-#endif
-		if (pvp == 0)
-			panic("pmap_alloc_pv: kmem_alloc() failed");
-		pvp->pvp_pgi.pgi_freelist = pv = &pvp->pvp_pv[1];
-		for (i = NPVPPG - 2; i; i--, pv++)
-			pv->pv_next = pv + 1;
-		pv->pv_next = 0;
-		pv_nfree += pvp->pvp_pgi.pgi_nfree = NPVPPG - 1;
-		TAILQ_INSERT_HEAD(&pv_page_freelist, pvp, pvp_pgi.pgi_list);
-		pv = &pvp->pvp_pv[0];
-	} else {
-		--pv_nfree;
-		pvp = pv_page_freelist.tqh_first;
-		if (--pvp->pvp_pgi.pgi_nfree == 0) {
-			TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list);
+	simple_lock(&pvalloc_lock);
+
+	if (pv_freepages.tqh_first != NULL) {
+		pvpage = pv_freepages.tqh_first;
+		pvpage->pvinfo.pvpi_nfree--;
+		if (pvpage->pvinfo.pvpi_nfree == 0) {
+			/* nothing left in this one? */
+			TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
 		}
-		pv = pvp->pvp_pgi.pgi_freelist;
+		pv = pvpage->pvinfo.pvpi_pvfree;
 #ifdef DIAGNOSTIC
-		if (pv == 0)
-			panic("pmap_alloc_pv: pgi_nfree inconsistent");
+		if (pv == NULL)
+			panic("pmap_alloc_pv: pvpi_nfree off");
 #endif
-		pvp->pvp_pgi.pgi_freelist = pv->pv_next;
+		pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
+		pv_nfpvents--;  /* took one from pool */
+	} else {
+		pv = NULL;		/* need more of them */
 	}
-	return pv;
-}
 
-void
-pmap_free_pv(pv)
-	struct pv_entry *pv;
-{
-	register struct pv_page *pvp;
-
-	pvp = (struct pv_page *) trunc_page(pv);
-	switch (++pvp->pvp_pgi.pgi_nfree) {
-	case 1:
-		TAILQ_INSERT_TAIL(&pv_page_freelist, pvp, pvp_pgi.pgi_list);
-	default:
-		pv->pv_next = pvp->pvp_pgi.pgi_freelist;
-		pvp->pvp_pgi.pgi_freelist = pv;
-		++pv_nfree;
-		break;
-	case NPVPPG:
-		pv_nfree -= NPVPPG - 1;
-		TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list);
-#if defined(UVM)
-		FREE((vaddr_t) pvp, M_VMPVENT);
-#else
-		kmem_free(kernel_map, (vm_offset_t)pvp, NBPG);
-#endif
-		break;
+	/*
+	 * if below low water mark or we didn't get a pv_entry we try and
+	 * create more pv_entrys ...
+	 */
+
+	if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
+		if (pv == NULL)
+			pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
+					       mode : ALLOCPV_NEED);
+		else
+			(void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
 	}
+
+	simple_unlock(&pvalloc_lock);
+	return(pv);
 }
 
-void
-pmap_collect_pv()
+/*
+ * pmap_alloc_pvpage: maybe allocate a new pvpage
+ *
+ * if need_entry is false: try and allocate a new pv_page
+ * if need_entry is true: try and allocate a new pv_page and return a
+ *	new pv_entry from it.   if we are unable to allocate a pv_page
+ *	we make a last ditch effort to steal a pv_page from some other
+ *	mapping.    if that fails, we panic...
+ *
+ * => we assume that the caller holds pvalloc_lock
+ */
+
+static struct pv_entry *
+pmap_alloc_pvpage(pmap, mode)
+	struct pmap *pmap;
+	int mode;
 {
-	struct pv_page_list pv_page_collectlist;
-	struct pv_page *pvp, *npvp;
-	struct pv_entry *ph, *ppv, *pv, *npv;
-	int s;
-	int bank, off;
+	struct vm_page *pg;
+	struct pv_page *pvpage;
+	int lcv, idx, npg, s;
+	struct pv_entry *pv, *cpv, *prevpv;
+
+	/*
+	 * if we need_entry and we've got unused pv_pages, allocate from there
+	 */
 
-	TAILQ_INIT(&pv_page_collectlist);
+	if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) {
 
-	for (pvp = pv_page_freelist.tqh_first; pvp; pvp = npvp) {
-		if (pv_nfree < NPVPPG)
-			break;
-		npvp = pvp->pvp_pgi.pgi_list.tqe_next;
-		if (pvp->pvp_pgi.pgi_nfree > NPVPPG / 3) {
-			TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list);
-			TAILQ_INSERT_TAIL(&pv_page_collectlist, pvp, pvp_pgi.pgi_list);
-			pv_nfree -= pvp->pvp_pgi.pgi_nfree;
-			pvp->pvp_pgi.pgi_nfree = -1;
+		/* move it to pv_freepages list */
+		pvpage = pv_unusedpgs.tqh_first;
+		TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
+		TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
+
+		/* allocate a pv_entry */
+		pvpage->pvinfo.pvpi_nfree--;	/* can't go to zero */
+		pv = pvpage->pvinfo.pvpi_pvfree;
+#ifdef DIAGNOSTIC
+		if (pv == NULL)
+			panic("pmap_alloc_pvpage: pvpi_nfree off");
+#endif
+		pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
+
+		pv_nfpvents--;  /* took one from pool */
+		return(pv);
+	}
+
+	/*
+	 *  see if we've got a cached unmapped VA that we can map a page in.
+	 * if not, try to allocate one.
+	 */
+
+	s = splimp();   /* must protect kmem_map/kmem_object with splimp! */
+	if (pv_cachedva == NULL) {
+		pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object,
+		    NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
+		if (pv_cachedva == NULL) {
+			splx(s);
+			goto steal_one;
 		}
 	}
 
-	if (pv_page_collectlist.tqh_first == 0)
-		return;
+	/*
+	 * we have a VA, now let's try and allocate a page in the object
+	 * note: we are still holding splimp to protect kmem_object
+	 */
 
-	if ((bank = vm_physseg_find(atop(0), &off)) == -1) { 
-		printf("INVALID PA!");
-		return;
+	if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) {
+		splx(s);
+		goto steal_one;
 	}
 
-	for (ph = &vm_physmem[bank].pmseg.pvent[off]; ph; ph = ph->pv_next) {
-		if (ph->pv_pmap == 0)
-			continue;
-		s = splimp();
-		for (ppv = ph; (pv = ppv->pv_next) != 0; ) {
-			pvp = (struct pv_page *) trunc_page(pv);
-			if (pvp->pvp_pgi.pgi_nfree == -1) {
-				pvp = pv_page_freelist.tqh_first;
-				if (--pvp->pvp_pgi.pgi_nfree == 0) {
-					TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list);
-				}
-				npv = pvp->pvp_pgi.pgi_freelist;
-#ifdef DIAGNOSTIC
-				if (npv == 0)
-					panic("pmap_collect_pv: pgi_nfree inconsistent");
-#endif
-				pvp->pvp_pgi.pgi_freelist = npv->pv_next;
-				*npv = *pv;
-				ppv->pv_next = npv;
-				ppv = npv;
-			} else
-				ppv = pv;
+	pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva -
+			   vm_map_min(kernel_map),
+			   NULL, UVM_PGA_USERESERVE);
+	if (pg)
+		pg->flags &= ~PG_BUSY;	/* never busy */
+
+	simple_unlock(&uvmexp.kmem_object->vmobjlock);
+	splx(s);
+	/* splimp now dropped */
+
+	if (pg == NULL)
+		goto steal_one;
+
+	/*
+	 * add a mapping for our new pv_page and free its entrys (save one!)
+	 *
+	 * NOTE: If we are allocating a PV page for the kernel pmap, the
+	 * pmap is already locked!  (...but entering the mapping is safe...)
+	 */
+
+	pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL);
+	pvpage = (struct pv_page *) pv_cachedva;
+	pv_cachedva = NULL;
+	return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
+
+steal_one:
+	/*
+	 * if we don't really need a pv_entry right now, we can just return.
+	 */
+
+	if (mode != ALLOCPV_NEED)
+		return(NULL);
+
+	/*
+	 * last ditch effort!   we couldn't allocate a free page to make
+	 * more pv_entrys so we try and steal one from someone else.
+	 */
+
+	pv = NULL;
+	for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) {
+		npg = vm_physmem[lcv].end - vm_physmem[lcv].start;
+		for (idx = 0 ; idx < npg ; idx++) {
+			struct pv_head *pvhead = vm_physmem[lcv].pmseg.pvhead;
+
+			if (pvhead->pvh_list == NULL)
+				continue;	/* spot check */
+			if (!simple_lock_try(&pvhead->pvh_lock))
+				continue;
+			cpv = prevpv = pvhead->pvh_list;
+			while (cpv) {
+				if (pmap_try_steal_pv(pvhead, cpv, prevpv))
+					break;
+				prevpv = cpv;
+				cpv = cpv->pv_next;
+			}
+			simple_unlock(&pvhead->pvh_lock);
+			/* got one?  break out of the loop! */
+			if (cpv) {
+				pv = cpv;
+				break;
+			}
 		}
-		splx(s);
 	}
 
-	for (pvp = pv_page_collectlist.tqh_first; pvp; pvp = npvp) {
-		npvp = pvp->pvp_pgi.pgi_list.tqe_next;
-#if defined(UVM)
-		FREE((vaddr_t) pvp, M_VMPVENT);
-#else
-		kmem_free(kernel_map, (vm_offset_t)pvp, NBPG);
-#endif
+	return(pv);
+}
+
+/*
+ * pmap_try_steal_pv: try and steal a pv_entry from a pmap
+ *
+ * => return true if we did it!
+ */
+
+static boolean_t
+pmap_try_steal_pv(pvh, cpv, prevpv)
+	struct pv_head *pvh;
+	struct pv_entry *cpv, *prevpv;
+{
+	pt_entry_t *ptep;	/* pointer to a PTE */
+
+	/*
+	 * we never steal kernel mappings or mappings from pmaps we can't lock
+	 */
+
+	if (cpv->pv_pmap == pmap_kernel() ||
+	    !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock))
+		return(FALSE);
+
+	/*
+	 * yes, we can try and steal it.   first we need to remove the
+	 * mapping from the pmap.
+	 */
+
+	ptep = pmap_tmpmap_pvepte(cpv);
+	if (*ptep & PG_W) {
+		ptep = NULL;	/* wired page, avoid stealing this one */
+	} else {
+		*ptep = 0;		/* zap! */
+		if (pmap_is_curpmap(cpv->pv_pmap))
+			pmap_update_pg(cpv->pv_va);
+		pmap_tmpunmap_pvepte(cpv);
+	}
+	if (ptep == NULL) {
+		simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock);
+		return(FALSE);	/* wired page, abort! */
+	}
+	cpv->pv_pmap->pm_stats.resident_count--;
+	if (cpv->pv_ptp && cpv->pv_ptp->wire_count)
+		/* drop PTP's wired count */
+		cpv->pv_ptp->wire_count--;
+
+	/*
+	 * XXX: if wire_count goes to one the PTP could be freed, however,
+	 * we'd have to lock the page queues (etc.) to do that and it could
+	 * cause deadlock headaches.   besides, the pmap we just stole from
+	 * may want the mapping back anyway, so leave the PTP around.
+	 */
+
+	/*
+	 * now we need to remove the entry from the pvlist
+	 */
+
+	if (cpv == pvh->pvh_list)
+		pvh->pvh_list = cpv->pv_next;
+	else
+		prevpv->pv_next = cpv->pv_next;
+	return(TRUE);
+}
+
+/*
+ * pmap_add_pvpage: add a pv_page's pv_entrys to the free list
+ *
+ * => caller must hold pvalloc_lock
+ * => if need_entry is true, we allocate and return one pv_entry
+ */
+
+static struct pv_entry *
+pmap_add_pvpage(pvp, need_entry)
+	struct pv_page *pvp;
+	boolean_t need_entry;
+{
+	int tofree, lcv;
+
+	/* do we need to return one? */
+	tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
+
+	pvp->pvinfo.pvpi_pvfree = NULL;
+	pvp->pvinfo.pvpi_nfree = tofree;
+	for (lcv = 0 ; lcv < tofree ; lcv++) {
+		pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree;
+		pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
 	}
+	if (need_entry)
+		TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
+	else
+		TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
+	pv_nfpvents += tofree;
+	return((need_entry) ? &pvp->pvents[lcv] : NULL);
 }
 
-__inline void
-pmap_enter_pv(pmap, va, pv)
-	register pmap_t pmap;
-	vm_offset_t va;
+/*
+ * pmap_free_pv_doit: actually free a pv_entry
+ *
+ * => do not call this directly!  instead use either
+ *    1. pmap_free_pv ==> free a single pv_entry
+ *    2. pmap_free_pvs => free a list of pv_entrys
+ * => we must be holding pvalloc_lock
+ */
+
+__inline static void
+pmap_free_pv_doit(pv)
 	struct pv_entry *pv;
-{	
-	register struct pv_entry *npv;
-	int s;
+{
+	struct pv_page *pvp;
 
-	if (!pmap_initialized)
-		return;
+	pvp = (struct pv_page *) i386_trunc_page(pv);
+	pv_nfpvents++;
+	pvp->pvinfo.pvpi_nfree++;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_ENTER)
-		printf("pmap_enter_pv: pv %x: %x/%x/%x\n",
-		       pv, pv->pv_va, pv->pv_pmap, pv->pv_next);
-#endif
-	s = splimp();
+	/* nfree == 1 => fully allocated page just became partly allocated */
+	if (pvp->pvinfo.pvpi_nfree == 1) {
+		TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
+	}
 
-	if (pv->pv_pmap == NULL) {
-		/*
-		 * No entries yet, use header as the first entry
-		 */
-#ifdef DEBUG
-		enter_stats.firstpv++;
-#endif
-		pv->pv_va = va;
-		pv->pv_pmap = pmap;
-		pv->pv_next = NULL;
-	} else {
-		/*
-		 * There is at least one other VA mapping this page.
-		 * Place this entry after the header.
-		 */
-#ifdef DEBUG
-		for (npv = pv; npv; npv = npv->pv_next)
-			if (pmap == npv->pv_pmap && va == npv->pv_va)
-				panic("pmap_enter_pv: already in pv_tab");
-#endif
-		npv = pmap_alloc_pv();
-		npv->pv_va = va;
-		npv->pv_pmap = pmap;
-		npv->pv_next = pv->pv_next;
-		pv->pv_next = npv;
-#ifdef DEBUG
-		if (!npv->pv_next)
-			enter_stats.secondpv++;
-#endif
+	/* free it */
+	pv->pv_next = pvp->pvinfo.pvpi_pvfree;
+	pvp->pvinfo.pvpi_pvfree = pv;
+
+	/*
+	 * are all pv_page's pv_entry's free?  move it to unused queue.
+	 */
+
+	if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
+		TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
+		TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
 	}
-	splx(s);
 }
 
-__inline void
-pmap_remove_pv(pmap, va, pv)
-	register pmap_t pmap;
-	vm_offset_t va;
+/*
+ * pmap_free_pv: free a single pv_entry
+ *
+ * => we gain the pvalloc_lock
+ */
+
+__inline static void
+pmap_free_pv(pmap, pv)
+	struct pmap *pmap;
 	struct pv_entry *pv;
-{	
-	register struct pv_entry *npv;
-	int s;
+{
+	simple_lock(&pvalloc_lock);
+	pmap_free_pv_doit(pv);
 
 	/*
-	 * Remove from the PV table (raise IPL since we
-	 * may be called at interrupt time).
+	 * Can't free the PV page if the PV entries were associated with
+	 * the kernel pmap; the pmap is already locked.
 	 */
-	s = splimp();
+	if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL &&
+	    pmap != pmap_kernel())
+		pmap_free_pvpage();
+
+	simple_unlock(&pvalloc_lock);
+}
+
+/*
+ * pmap_free_pvs: free a list of pv_entrys
+ *
+ * => we gain the pvalloc_lock
+ */
+
+__inline static void
+pmap_free_pvs(pmap, pvs)
+	struct pmap *pmap;
+	struct pv_entry *pvs;
+{
+	struct pv_entry *nextpv;
+
+	simple_lock(&pvalloc_lock);
+
+	for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
+		nextpv = pvs->pv_next;
+		pmap_free_pv_doit(pvs);
+	}
 
 	/*
-	 * If it is the first entry on the list, it is actually
-	 * in the header and we must copy the following entry up
-	 * to the header.  Otherwise we must search the list for
-	 * the entry.  In either case we free the now unused entry.
+	 * Can't free the PV page if the PV entries were associated with
+	 * the kernel pmap; the pmap is already locked.
 	 */
-	if (pmap == pv->pv_pmap && va == pv->pv_va) {
-		npv = pv->pv_next;
-		if (npv) {
-			*pv = *npv;
-			pmap_free_pv(npv);
-		} else
-			pv->pv_pmap = NULL;
-	} else {
-		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
-			if (pmap == npv->pv_pmap && va == npv->pv_va)
-				break;
-		}
-		if (npv) {
-			pv->pv_next = npv->pv_next;
-			pmap_free_pv(npv);
-		}
+	if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL &&
+	    pmap != pmap_kernel())
+		pmap_free_pvpage();
+
+	simple_unlock(&pvalloc_lock);
+}
+
+
+/*
+ * pmap_free_pvpage: try and free an unused pv_page structure
+ *
+ * => assume caller is holding the pvalloc_lock and that
+ *	there is a page on the pv_unusedpgs list
+ * => if we can't get a lock on the kmem_map we try again later
+ * => note: analysis of MI kmem_map usage [i.e. malloc/free] shows
+ *	that if we can lock the kmem_map then we are not already
+ *	holding kmem_object's lock.
+ */
+
+static void
+pmap_free_pvpage()
+{
+	int s;
+	struct vm_map *map;
+	vm_map_entry_t dead_entries;
+	struct pv_page *pvp;
+
+	s = splimp(); /* protect kmem_map */
+
+	pvp = pv_unusedpgs.tqh_first;
+
+	/*
+	 * note: watch out for pv_initpage which is allocated out of
+	 * kernel_map rather than kmem_map.
+	 */
+	if (pvp == pv_initpage)
+		map = kernel_map;
+	else
+		map = kmem_map;
+
+	if (vm_map_lock_try(map)) {
+
+		/* remove pvp from pv_unusedpgs */
+		TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
+
+		/* unmap the page */
+		dead_entries = NULL;
+		(void)uvm_unmap_remove(map, (vaddr_t) pvp,
+				       ((vaddr_t) pvp) + NBPG, &dead_entries);
+		vm_map_unlock(map);
+
+		if (dead_entries != NULL)
+			uvm_unmap_detach(dead_entries, 0);
+
+		pv_nfpvents -= PVE_PER_PVPAGE;  /* update free count */
 	}
+
+	if (pvp == pv_initpage)
+		/* no more initpage, we've freed it */
+		pv_initpage = NULL;
+
 	splx(s);
 }
 
 /*
- *	Used to map a range of physical addresses into kernel
- *	virtual address space.
+ * main pv_entry manipulation functions:
+ *   pmap_enter_pv: enter a mapping onto a pv_head list
+ *   pmap_remove_pv: remove a mappiing from a pv_head list
  *
- *	For now, VM is already on, we only need to map the
- *	specified memory.
+ * NOTE: pmap_enter_pv expects to lock the pvh itself
+ *       pmap_remove_pv expects te caller to lock the pvh before calling
  */
-vm_offset_t
-pmap_map(va, spa, epa, prot)
-	vm_offset_t va, spa, epa;
-	int prot;
+
+/*
+ * pmap_enter_pv: enter a mapping onto a pv_head lst
+ *
+ * => caller should hold the proper lock on pmap_main_lock
+ * => caller should have pmap locked
+ * => we will gain the lock on the pv_head and allocate the new pv_entry
+ * => caller should adjust ptp's wire_count before calling
+ */
+
+__inline static void
+pmap_enter_pv(pvh, pve, pmap, va, ptp)
+	struct pv_head *pvh;
+	struct pv_entry *pve;	/* preallocated pve for us to use */
+	struct pmap *pmap;
+	vaddr_t va;
+	struct vm_page *ptp;	/* PTP in pmap that maps this VA */
 {
+	pve->pv_pmap = pmap;
+	pve->pv_va = va;
+	pve->pv_ptp = ptp;			/* NULL for kernel pmap */
+	simple_lock(&pvh->pvh_lock);		/* lock pv_head */
+	pve->pv_next = pvh->pvh_list;		/* add to ... */
+	pvh->pvh_list = pve;			/* ... locked list */
+	simple_unlock(&pvh->pvh_lock);		/* unlock, done! */
+}
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_map(%x, %x, %x, %x)\n", va, spa, epa, prot);
-#endif
+/*
+ * pmap_remove_pv: try to remove a mapping from a pv_list
+ *
+ * => caller should hold proper lock on pmap_main_lock
+ * => pmap should be locked
+ * => caller should hold lock on pv_head [so that attrs can be adjusted]
+ * => caller should adjust ptp's wire_count and free PTP if needed
+ * => we return the removed pve
+ */
 
-	while (spa < epa) {
-		pmap_enter(pmap_kernel(), va, spa, prot, FALSE, 0);
-		va += NBPG;
-		spa += NBPG;
+__inline static struct pv_entry *
+pmap_remove_pv(pvh, pmap, va)
+	struct pv_head *pvh;
+	struct pmap *pmap;
+	vaddr_t va;
+{
+	struct pv_entry *pve, **prevptr;
+
+	prevptr = &pvh->pvh_list;		/* previous pv_entry pointer */
+	pve = *prevptr;
+	while (pve) {
+		if (pve->pv_pmap == pmap && pve->pv_va == va) {	/* match? */
+			*prevptr = pve->pv_next;		/* remove it! */
+			break;
+		}
+		prevptr = &pve->pv_next;		/* previous pointer */
+		pve = pve->pv_next;			/* advance */
 	}
-	return va;
+	return(pve);				/* return removed pve */
 }
 
 /*
- *	Create and return a physical map.
+ * p t p   f u n c t i o n s
+ */
+
+/*
+ * pmap_alloc_ptp: allocate a PTP for a PMAP
  *
- *	If the size specified for the map
- *	is zero, the map is an actual physical
- *	map, and may be referenced by the
- *	hardware.
+ * => pmap should already be locked by caller
+ * => we use the ptp's wire_count to count the number of active mappings
+ *	in the PTP (we start it at one to prevent any chance this PTP
+ *	will ever leak onto the active/inactive queues)
+ * => we should not be holding any pv_head locks (in case we are forced
+ *	to call pmap_steal_ptp())
+ * => we may need to lock pv_head's if we have to steal a PTP
+ * => just_try: true if we want a PTP, but not enough to steal one
+ * 	from another pmap (e.g. during optional functions like pmap_copy)
+ */
+
+__inline static struct vm_page *
+pmap_alloc_ptp(pmap, pde_index, just_try)
+	struct pmap *pmap;
+	int pde_index;
+	boolean_t just_try;
+{
+	struct vm_page *ptp;
+
+	ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
+			    UVM_PGA_USERESERVE);
+	if (ptp == NULL) {
+		if (just_try)
+			return(NULL);
+		ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index));
+		if (ptp == NULL) {
+			return (NULL);
+		}
+	}
+
+	/* got one! */
+	ptp->flags &= ~PG_BUSY;	/* never busy */
+	ptp->wire_count = 1;	/* no mappings yet */
+	pmap_zero_page(VM_PAGE_TO_PHYS(ptp));
+	pmap->pm_pdir[pde_index] =
+		(pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);
+	pmap->pm_stats.resident_count++;	/* count PTP as resident */
+	pmap->pm_ptphint = ptp;
+	return(ptp);
+}
+
+/*
+ * pmap_steal_ptp: steal a PTP from any pmap that we can access
  *
- *	If the size specified is non-zero,
- *	the map will be used in software only, and
- *	is bounded by that size.
+ * => obj is locked by caller.
+ * => we can throw away mappings at this level (except in the kernel's pmap)
+ * => stolen PTP is placed in <obj,offset> pmap
+ * => we lock pv_head's
+ * => hopefully, this function will be seldom used [much better to have
+ *	enough free pages around for us to allocate off the free page list]
+ */
+
+static struct vm_page *
+pmap_steal_ptp(obj, offset)
+	struct uvm_object *obj;
+	vaddr_t offset;
+{
+	struct vm_page *ptp = NULL;
+	struct pmap *firstpmap;
+	struct uvm_object *curobj;
+	pt_entry_t *ptes;
+	int idx, lcv;
+	boolean_t caller_locked, we_locked;
+
+	simple_lock(&pmaps_lock);
+	if (pmaps_hand == NULL)
+		pmaps_hand = LIST_FIRST(&pmaps);
+	firstpmap = pmaps_hand;
+
+	do { /* while we haven't looped back around to firstpmap */
+
+		curobj = &pmaps_hand->pm_obj;
+		we_locked = FALSE;
+		caller_locked = (curobj == obj);
+		if (!caller_locked) {
+			we_locked = simple_lock_try(&curobj->vmobjlock);
+		}
+		if (caller_locked || we_locked) {
+			ptp = curobj->memq.tqh_first;
+			for (/*null*/; ptp != NULL; ptp = ptp->listq.tqe_next) {
+
+				/*
+				 * might have found a PTP we can steal
+				 * (unless it has wired pages).
+				 */
+
+				idx = ptp_o2i(ptp->offset);
+#ifdef DIAGNOSTIC
+				if (VM_PAGE_TO_PHYS(ptp) !=
+				    (pmaps_hand->pm_pdir[idx] & PG_FRAME))
+					panic("pmap_steal_ptp: PTP mismatch!");
+#endif
+
+				ptes = (pt_entry_t *)
+					pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp));
+				for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++)
+					if ((ptes[lcv] & (PG_V|PG_W)) ==
+					    (PG_V|PG_W))
+						break;
+				if (lcv == PTES_PER_PTP)
+					pmap_remove_ptes(pmaps_hand, NULL, ptp,
+							 (vaddr_t)ptes,
+							 ptp_i2v(idx),
+							 ptp_i2v(idx+1));
+				pmap_tmpunmap_pa();
+
+				if (lcv != PTES_PER_PTP)
+					/* wired, try next PTP */
+					continue;
+
+				/*
+				 * got it!!!
+				 */
+
+				pmaps_hand->pm_pdir[idx] = 0;	/* zap! */
+				pmaps_hand->pm_stats.resident_count--;
+				if (pmap_is_curpmap(pmaps_hand))
+					pmap_update();
+				else if (pmap_valid_entry(*APDP_PDE) &&
+					 (*APDP_PDE & PG_FRAME) ==
+					 pmaps_hand->pm_pdirpa) {
+					pmap_update_pg(((vaddr_t)APTE_BASE) +
+						       ptp->offset);
+				}
+
+				/* put it in our pmap! */
+				uvm_pagerealloc(ptp, obj, offset);
+				break;	/* break out of "for" loop */
+			}
+			if (we_locked) {
+				simple_unlock(&curobj->vmobjlock);
+			}
+		}
+
+		/* advance the pmaps_hand */
+		pmaps_hand = LIST_NEXT(pmaps_hand, pm_list);
+		if (pmaps_hand == NULL) {
+			pmaps_hand = LIST_FIRST(&pmaps);
+		}
+
+	} while (ptp == NULL && pmaps_hand != firstpmap);
+
+	simple_unlock(&pmaps_lock);
+	return(ptp);
+}
+
+/*
+ * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
  *
- * [ just allocate a ptd and mark it uninitialize -- should we track
- *   with a table which process has which ptd? -wfj ]
+ * => pmap should NOT be pmap_kernel()
+ * => pmap should be locked
  */
-pmap_t
-pmap_create(size)
-	vm_size_t size;
+
+static struct vm_page *
+pmap_get_ptp(pmap, pde_index, just_try)
+	struct pmap *pmap;
+	int pde_index;
+	boolean_t just_try;
 {
-	register pmap_t pmap;
+	struct vm_page *ptp;
 
-#ifdef DEBUG
-	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
-		printf("pmap_create(%x)\n", size);
+	if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
+
+		/* valid... check hint (saves us a PA->PG lookup) */
+		if (pmap->pm_ptphint &&
+		    (pmap->pm_pdir[pde_index] & PG_FRAME) ==
+		    VM_PAGE_TO_PHYS(pmap->pm_ptphint))
+			return(pmap->pm_ptphint);
+
+		ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
+#ifdef DIAGNOSTIC
+		if (ptp == NULL)
+			panic("pmap_get_ptp: unmanaged user PTP");
 #endif
+		pmap->pm_ptphint = ptp;
+		return(ptp);
+	}
 
-	/*
-	 * Software use map does not need a pmap
-	 */
-	if (size)
-		return NULL;
+	/* allocate a new PTP (updates ptphint) */
+	return(pmap_alloc_ptp(pmap, pde_index, just_try));
+}
+
+/*
+ * p m a p  l i f e c y c l e   f u n c t i o n s
+ */
+
+/*
+ * pmap_create: create a pmap
+ *
+ * => note: old pmap interface took a "size" args which allowed for
+ *	the creation of "software only" pmaps (not in bsd).
+ */
+
+struct pmap *
+pmap_create()
+{
+	struct pmap *pmap;
 
-	pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK);
-	bzero(pmap, sizeof(*pmap));
+	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
 	pmap_pinit(pmap);
-	return pmap;
+	return(pmap);
 }
 
 /*
- * Initialize a preallocated and zeroed pmap structure,
- * such as one in a vmspace structure.
+ * pmap_pinit: given a zero'd pmap structure, init it.
  */
+
 void
 pmap_pinit(pmap)
-	register struct pmap *pmap;
+	struct pmap *pmap;
 {
+	/* init uvm_object */
+	simple_lock_init(&pmap->pm_obj.vmobjlock);
+	pmap->pm_obj.pgops = NULL;	/* currently not a mappable object */
+	TAILQ_INIT(&pmap->pm_obj.memq);
+	pmap->pm_obj.uo_npages = 0;
+	pmap->pm_obj.uo_refs = 1;
+	pmap->pm_stats.wired_count = 0;
+	pmap->pm_stats.resident_count = 1;	/* count the PDP allocd below */
+	pmap->pm_ptphint = NULL;
+	pmap->pm_flags = 0;
+
+	/* allocate PDP */
+	pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG);
+	if (pmap->pm_pdir == NULL)
+		panic("pmap_pinit: kernel_map out of virtual space!");
+	(void) _pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir,
+			    (paddr_t *)&pmap->pm_pdirpa);
 
-#ifdef DEBUG
-	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
-		printf("pmap_pinit(%x)\n", pmap);
-#endif
+	/* init PDP */
+	/* zero init area */
+	bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
+	/* put in recursive PDE to map the PTEs */
+	pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW;
+
+	/* init the LDT */
+	pmap->pm_ldt = NULL;
+	pmap->pm_ldt_len = 0;
+	pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
 
 	/*
-	 * No need to allocate page table space yet but we do need a
-	 * valid page directory table.
+	 * we need to lock pmaps_lock to prevent nkpde from changing on
+	 * us.   note that there is no need to splimp to protect us from
+	 * malloc since malloc allocates out of a submap and we should have
+	 * already allocated kernel PTPs to cover the range...
 	 */
-#if defined(UVM)
-	pmap->pm_pdir = (pd_entry_t *) uvm_km_zalloc(kernel_map, NBPG);
-#else
-	pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, NBPG);
-#endif
-
-#ifdef DIAGNOSTIC
-	if (pmap->pm_pdir == NULL)
-		panic("pmap_pinit: alloc failed");
-#endif
-	/* wire in kernel global address entries */
-	bcopy(&PTD[KPTDI], &pmap->pm_pdir[KPTDI], MAXKPDE *
-	    sizeof(pd_entry_t));
-
-	/* install self-referential address mapping entry */
-	pmap->pm_pdir[PTDPTDI] = pmap_extract(pmap_kernel(),
-	    (vm_offset_t)pmap->pm_pdir) | PG_V | PG_KW;
-
-	pmap->pm_count = 1;
-	simple_lock_init(&pmap->pm_lock);
+	simple_lock(&pmaps_lock);
+	/* put in kernel VM PDEs */
+	bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN],
+	       nkpde * sizeof(pd_entry_t));
+	/* zero the rest */
+	bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde],
+	       NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
+	LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
+	simple_unlock(&pmaps_lock);
 }
 
 /*
- *	Retire the given physical map from service.
- *	Should only be called if the map contains
- *	no valid mappings.
+ * pmap_destroy: drop reference count on pmap.   free pmap if
+ *	reference count goes to zero.
  */
+
 void
 pmap_destroy(pmap)
-	register pmap_t pmap;
+	struct pmap *pmap;
 {
-	int count;
+	int refs;
 
-	if (pmap == NULL)
+	/*
+	 * drop reference count
+	 */
+
+	simple_lock(&pmap->pm_obj.vmobjlock);
+	refs = --pmap->pm_obj.uo_refs;
+	simple_unlock(&pmap->pm_obj.vmobjlock);
+	if (refs > 0) {
 		return;
+	}
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_destroy(%x)\n", pmap);
-#endif
+	/*
+	 * reference count is zero, free pmap resources and then free pmap.
+	 */
 
-	simple_lock(&pmap->pm_lock);
-	count = --pmap->pm_count;
-	simple_unlock(&pmap->pm_lock);
-	if (count == 0) {
-		pmap_release(pmap);
-		free((caddr_t)pmap, M_VMPMAP);
-	}
+	pmap_release(pmap);
+	pool_put(&pmap_pmap_pool, pmap);
 }
 
 /*
- * Release any resources held by the given physical map.
- * Called when a pmap initialized by pmap_pinit is being released.
- * Should only be called if the map contains no valid mappings.
+ * pmap_release: release all resources held by a pmap
+ *
+ * => if pmap is still referenced it should be locked
+ * => XXX: we currently don't expect any busy PTPs because we don't
+ *    allow anything to map them (except for the kernel's private
+ *    recursive mapping) or make them busy.
  */
+
 void
 pmap_release(pmap)
-	register struct pmap *pmap;
+	struct pmap *pmap;
 {
+	struct vm_page *pg;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_release(%x)\n", pmap);
-#endif
+	/*
+	 * remove it from global list of pmaps
+	 */
 
-#ifdef DIAGNOSTICx
-	/* sometimes 1, sometimes 0; could rearrange pmap_destroy */
-	if (pmap->pm_count != 1)
-		panic("pmap_release count");
+	simple_lock(&pmaps_lock);
+	if (pmap == pmaps_hand)
+		pmaps_hand = LIST_NEXT(pmaps_hand, pm_list);
+	LIST_REMOVE(pmap, pm_list);
+	simple_unlock(&pmaps_lock);
+
+	/*
+	 * free any remaining PTPs
+	 */
+
+	while (pmap->pm_obj.memq.tqh_first != NULL) {
+		pg = pmap->pm_obj.memq.tqh_first;
+#ifdef DIAGNOSTIC
+		if (pg->flags & PG_BUSY)
+			panic("pmap_release: busy page table page");
 #endif
+		/* pmap_page_protect?  currently no need for it. */
 
-#if defined(UVM)
+		pg->wire_count = 0;
+		uvm_pagefree(pg);
+	}
+
+	/* XXX: need to flush it out of other processor's APTE space? */
 	uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
-#else
-	kmem_free(kernel_map, (vm_offset_t)pmap->pm_pdir, NBPG);
+
+#ifdef USER_LDT
+	if (pmap->pm_flags & PMF_USER_LDT) {
+		/*
+		 * no need to switch the LDT; this address space is gone,
+		 * nothing is using it.
+		 */
+		ldt_free(pmap);
+		uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
+			    pmap->pm_ldt_len * sizeof(union descriptor));
+	}
 #endif
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
+
 void
 pmap_reference(pmap)
-	pmap_t pmap;
+	struct pmap *pmap;
 {
+	simple_lock(&pmap->pm_obj.vmobjlock);
+	pmap->pm_obj.uo_refs++;
+	simple_unlock(&pmap->pm_obj.vmobjlock);
+}
 
-	if (pmap == NULL)
-		return;
+#if defined(PMAP_FORK)
+/*
+ * pmap_fork: perform any necessary data structure manipulation when
+ * a VM space is forked.
+ */
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_reference(%x)", pmap);
-#endif
+void
+pmap_fork(pmap1, pmap2)
+	struct pmap *pmap1, *pmap2;
+{
+	simple_lock(&pmap1->pm_obj.vmobjlock);
+	simple_lock(&pmap2->pm_obj.vmobjlock);
+
+#ifdef USER_LDT
+	/* Copy the LDT, if necessary. */
+	if (pmap1->pm_flags & PMF_USER_LDT) {
+		union descriptor *new_ldt;
+		size_t len;
+
+		len = pmap1->pm_ldt_len * sizeof(union descriptor);
+		new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len);
+		bcopy(pmap1->pm_ldt, new_ldt, len);
+		pmap2->pm_ldt = new_ldt;
+		pmap2->pm_ldt_len = pmap1->pm_ldt_len;
+		pmap2->pm_flags |= PMF_USER_LDT;
+		ldt_alloc(pmap2, new_ldt, len);
+	}
+#endif /* USER_LDT */
 
-	simple_lock(&pmap->pm_lock);
-	pmap->pm_count++;
-	simple_unlock(&pmap->pm_lock);
+	simple_unlock(&pmap2->pm_obj.vmobjlock);
+	simple_unlock(&pmap1->pm_obj.vmobjlock);
 }
+#endif /* PMAP_FORK */
+
+#ifdef USER_LDT
+/*
+ * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
+ * restore the default.
+ */
 
 void
-pmap_activate(p)
+pmap_ldt_cleanup(p)
 	struct proc *p;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
+	union descriptor *old_ldt = NULL;
+	size_t len = 0;
+
+	simple_lock(&pmap->pm_obj.vmobjlock);
+
+	if (pmap->pm_flags & PMF_USER_LDT) {
+		ldt_free(pmap);
+		pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
+		pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
+		if (pcb == curpcb)
+			lldt(pcb->pcb_ldt_sel);
+		old_ldt = pmap->pm_ldt;
+		len = pmap->pm_ldt_len * sizeof(union descriptor);
+		pmap->pm_ldt = NULL;
+		pmap->pm_ldt_len = 0;
+		pmap->pm_flags &= ~PMF_USER_LDT;
+	}
 
-	pcb->pcb_cr3 = pmap_extract(pmap_kernel(), (vm_offset_t)pmap->pm_pdir);
-	if (p == curproc)
-		lcr3(pcb->pcb_cr3);
+	simple_unlock(&pmap->pm_obj.vmobjlock);
+
+	if (old_ldt != NULL)
+		uvm_km_free(kernel_map, (vaddr_t)old_ldt, len);
 }
+#endif /* USER_LDT */
+
+/*
+ * pmap_activate: activate a process' pmap (fill in %cr3 info)
+ *
+ * => called from cpu_fork()
+ * => if proc is the curproc, then load it into the MMU
+ */
 
 void
-pmap_deactivate(p)
+pmap_activate(p)
 	struct proc *p;
 {
+	struct pcb *pcb = &p->p_addr->u_pcb;
+	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
+
+	pcb->pcb_pmap = pmap;
+	pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
+	pcb->pcb_cr3 = pmap->pm_pdirpa;
+	if (p == curproc)
+		lcr3(pcb->pcb_cr3);
+	if (pcb == curpcb)
+		lldt(pcb->pcb_ldt_sel);
 }
 
 /*
- *	Remove the given range of addresses from the specified map.
+ * pmap_deactivate: deactivate a process' pmap
  *
- *	It is assumed that the start and end are properly
- *	rounded to the page size.
+ * => XXX: what should this do, if anything?
  */
+
 void
-pmap_remove(pmap, sva, eva)
-	struct pmap *pmap;
-	register vm_offset_t sva, eva;
+pmap_deactivate(p)
+	struct proc *p;
 {
-	register pt_entry_t *pte;
-	vm_offset_t pa;
-	int bank, off;
-	int flush = 0;
-
-	sva &= PG_FRAME;
-	eva &= PG_FRAME;
+}
 
-	/*
-	 * We need to acquire a pointer to a page table page before entering
-	 * the following loop.
-	 */
-	while (sva < eva) {
-		pte = pmap_pte(pmap, sva);
-		if (pte)
-			break;
-		sva = (sva & PD_MASK) + NBPD;
-	}
+/*
+ * end of lifecycle functions
+ */
 
-	while (sva < eva) {
-		/* only check once in a while */
-		if ((sva & PT_MASK) == 0) {
-			if (!pmap_pde_v(pmap_pde(pmap, sva))) {
-				/* We can race ahead here, to the next pde. */
-				sva += NBPD;
-				pte += i386_btop(NBPD);
-				continue;
-			}
-		}
+/*
+ * some misc. functions
+ */
 
-		pte = pmap_pte(pmap, sva);
-		if (pte == NULL) {
-			/* We can race ahead here, to the next pde. */
-			sva = (sva & PD_MASK) + NBPD;
-			continue;
-		}
+/*
+ * pmap_extract: extract a PA for the given VA
+ */
 
-		if (!pmap_pte_v(pte)) {
-#ifdef __GNUC__
-			/*
-			 * Scan ahead in a tight loop for the next used PTE in
-			 * this page.  We don't scan the whole region here
-			 * because we don't want to zero-fill unused page table
-			 * pages.
-			 */
-			int n, m;
-
-			n = min(eva - sva, NBPD - (sva & PT_MASK)) >> PGSHIFT;
-			__asm __volatile(
-			    "cld\n\trepe\n\tscasl\n\tje 1f\n\tincl %1\n\t1:"
-			    : "=D" (pte), "=c" (m)
-			    : "0" (pte), "1" (n), "a" (0));
-			sva += (n - m) << PGSHIFT;
-			if (!m)
-				continue;
-			/* Overshot. */
-			--pte;
-#else
-			goto next;
-#endif
-		}
+boolean_t
+_pmap_extract(pmap, va, pap)
+	struct pmap *pmap;
+	vaddr_t va;
+	paddr_t *pap;
+{
+	paddr_t retval;
+	pt_entry_t *ptes;
+
+	if (pmap->pm_pdir[pdei(va)]) {
+		ptes = pmap_map_ptes(pmap);
+		retval = (paddr_t)(ptes[i386_btop(va)] & PG_FRAME);
+		pmap_unmap_ptes(pmap);
+		if (pap != NULL)
+			*pap = retval | (va & ~PG_FRAME);
+		return (TRUE);
+	}
+	return (FALSE);
+}
 
-		flush = 1;
+paddr_t
+pmap_extract(pmap, va)
+	pmap_t pmap;
+	vaddr_t va;
+{
+	paddr_t pa;
 
-		/*
-		 * Update statistics
-		 */
-		if (pmap_pte_w(pte))
-			pmap->pm_stats.wired_count--;
-		pmap->pm_stats.resident_count--;
+	if (_pmap_extract(pmap, va, &pa))
+		return (pa);
+	return (NULL);
+}
 
-		pa = pmap_pte_pa(pte);
+/*
+ * pmap_virtual_space: used during bootup [pmap_steal_memory] to
+ *	determine the bounds of the kernel virtual addess space.
+ */
 
-		/*
-		 * Invalidate the PTEs.
-		 * XXX: should cluster them up and invalidate as many
-		 * as possible at once.
-		 */
-#ifdef DEBUG
-		if (pmapdebug & PDB_REMOVE)
-			printf("remove: inv pte at %x(%x) ", pte, *pte);
-#endif
+void
+pmap_virtual_space(startp, endp)
+	vaddr_t *startp;
+	vaddr_t *endp;
+{
+	*startp = virtual_avail;
+	*endp = virtual_end;
+}
 
-#ifdef needednotdone
-reduce wiring count on page table pages as references drop
-#endif
+/*
+ * pmap_map: map a range of PAs into kvm
+ *
+ * => used during crash dump
+ * => XXX: pmap_map() should be phased out?
+ */
 
-		if ((bank = vm_physseg_find(atop(pa), &off)) != -1) {
-			vm_physmem[bank].pmseg.attrs[off] |=
-				*pte & (PG_M | PG_U);
-			pmap_remove_pv(pmap, sva,
-				&vm_physmem[bank].pmseg.pvent[off]);
-		}
+vaddr_t
+pmap_map(va, spa, epa, prot)
+	vaddr_t va;
+	paddr_t spa, epa;
+	vm_prot_t prot;
+{
+	while (spa < epa) {
+		_pmap_enter(pmap_kernel(), va, spa, prot, 0);
+		va += NBPG;
+		spa += NBPG;
+	}
+	return va;
+}
 
-		*pte = 0;
+/*
+ * pmap_zero_page: zero a page
+ */
 
-#ifndef __GNUC__
-	next:
+void
+pmap_zero_page(pa)
+	paddr_t pa;
+{
+	simple_lock(&pmap_zero_page_lock);
+#ifdef DIAGNOSTIC
+	if (*zero_pte)
+		panic("pmap_zero_page: lock botch");
 #endif
-		sva += NBPG;
-		pte++;
-	}
 
-	if (flush)
-		pmap_update();
+	*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW;	/* map in */
+	bzero(zerop, NBPG);				/* zero */
+	*zero_pte = 0;				/* zap! */
+	pmap_update_pg((vaddr_t)zerop);		/* flush TLB */
+	simple_unlock(&pmap_zero_page_lock);
 }
 
 /*
- *	Routine:	pmap_remove_all
- *	Function:
- *		Removes this physical page from
- *		all physical maps in which it resides.
- *		Reflects back modify bits to the pager.
+ * pmap_copy_page: copy a page
  */
+
 void
-pmap_remove_all(pa)
-	vm_offset_t pa;
+pmap_copy_page(srcpa, dstpa)
+	paddr_t srcpa, dstpa;
 {
-	struct pv_entry *ph, *pv, *npv;
-	register pmap_t pmap;
-	register pt_entry_t *pte;
-	int bank, off;
-	int s;
-
-#ifdef DEBUG
-	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
-		printf("pmap_remove_all(%x)", pa);
-	/*pmap_pvdump(pa);*/
+	simple_lock(&pmap_copy_page_lock);
+#ifdef DIAGNOSTIC
+	if (*csrc_pte || *cdst_pte)
+		panic("pmap_copy_page: lock botch");
 #endif
 
-	bank = vm_physseg_find(atop(pa), &off);
-	if (bank == -1)
-		return;
+	*csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW;
+	*cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW;
+	bcopy(csrcp, cdstp, NBPG);
+	*csrc_pte = *cdst_pte = 0;			/* zap! */
+	pmap_update_2pg((vaddr_t)csrcp, (vaddr_t)cdstp);
+	simple_unlock(&pmap_copy_page_lock);
+}
 
-	pv = ph = &vm_physmem[bank].pmseg.pvent[off];
-	s = splimp();
+/*
+ * p m a p   r e m o v e   f u n c t i o n s
+ *
+ * functions that remove mappings
+ */
 
-	if (ph->pv_pmap == NULL) {
-		splx(s);
-		return;
-	}
+/*
+ * pmap_remove_ptes: remove PTEs from a PTP
+ *
+ * => must have proper locking on pmap_master_lock
+ * => caller must hold pmap's lock
+ * => PTP must be mapped into KVA
+ * => PTP should be null if pmap == pmap_kernel()
+ */
 
-	while (pv) {
-		pmap = pv->pv_pmap;
-		pte = pmap_pte(pmap, pv->pv_va);
+static void
+pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
+	struct pmap *pmap;
+	struct pmap_remove_record *pmap_rr;
+	struct vm_page *ptp;
+	vaddr_t ptpva;
+	vaddr_t startva, endva;
+{
+	struct pv_entry *pv_tofree = NULL;	/* list of pv_entrys to free */
+	struct pv_entry *pve;
+	pt_entry_t *pte = (pt_entry_t *) ptpva;
+	pt_entry_t opte;
+	int bank, off;
 
-#ifdef DEBUG
-		if (!pte || !pmap_pte_v(pte) || pmap_pte_pa(pte) != pa)
-			panic("pmap_remove_all: bad mapping");
-#endif
+	/*
+	 * note that ptpva points to the PTE that maps startva.   this may
+	 * or may not be the first PTE in the PTP.
+	 *
+	 * we loop through the PTP while there are still PTEs to look at
+	 * and the wire_count is greater than 1 (because we use the wire_count
+	 * to keep track of the number of real PTEs in the PTP).
+	 */
 
-		/*
-		 * Update statistics
-		 */
-		if (pmap_pte_w(pte))
+	for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
+			     ; pte++, startva += NBPG) {
+		if (!pmap_valid_entry(*pte))
+			continue;			/* VA not mapped */
+
+		opte = *pte;		/* save the old PTE */
+		*pte = 0;			/* zap! */
+		if (opte & PG_W)
 			pmap->pm_stats.wired_count--;
 		pmap->pm_stats.resident_count--;
 
+		if (pmap_rr) {		/* worried about tlb flushing? */
+			if (opte & PG_G) {
+				/* PG_G requires this */
+				pmap_update_pg(startva);
+			} else {
+				if (pmap_rr->prr_npages < PMAP_RR_MAX) {
+					pmap_rr->prr_vas[pmap_rr->prr_npages++]
+						= startva;
+				} else {
+					if (pmap_rr->prr_npages == PMAP_RR_MAX)
+						/* signal an overflow */
+						pmap_rr->prr_npages++;
+				}
+			}
+		}
+		if (ptp)
+			ptp->wire_count--;		/* dropping a PTE */
+
 		/*
-		 * Invalidate the PTEs.
-		 * XXX: should cluster them up and invalidate as many
-		 * as possible at once.
+		 * if we are not on a pv_head list we are done.
 		 */
-#ifdef DEBUG
-		if (pmapdebug & PDB_REMOVE)
-			printf("remove: inv pte at %x(%x) ", pte, *pte);
+
+		if ((opte & PG_PVLIST) == 0) {
+#ifdef DIAGNOSTIC
+			if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off)
+			    != -1)
+				panic("pmap_remove_ptes: managed page without "
+				      "PG_PVLIST for 0x%lx", startva);
 #endif
+			continue;
+		}
 
-#ifdef needednotdone
-reduce wiring count on page table pages as references drop
+		bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off);
+#ifdef DIAGNOSTIC
+		if (bank == -1)
+			panic("pmap_remove_ptes: unmanaged page marked "
+			      "PG_PVLIST");
 #endif
 
-		/*
-		 * Update saved attributes for managed page
-		 */
-		vm_physmem[bank].pmseg.attrs[off] |= *pte & (PG_M | PG_U);
-		*pte = 0;
+		/* sync R/M bits */
+		simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
+		vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
+		pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap,
+				     startva);
+		simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
 
-		npv = pv->pv_next;
-		if (pv == ph)
-			ph->pv_pmap = NULL;
-		else
-			pmap_free_pv(pv);
-		pv = npv;
-	}
-	splx(s);
+		if (pve) {
+			pve->pv_next = pv_tofree;
+			pv_tofree = pve;
+		}
 
-	pmap_update();
+		/* end of "for" loop: time for next pte */
+	}
+	if (pv_tofree)
+		pmap_free_pvs(pmap, pv_tofree);
 }
 
+
 /*
- *	Set the physical protection on the
- *	specified range of this map as requested.
+ * pmap_remove_pte: remove a single PTE from a PTP
+ *
+ * => must have proper locking on pmap_master_lock
+ * => caller must hold pmap's lock
+ * => PTP must be mapped into KVA
+ * => PTP should be null if pmap == pmap_kernel()
+ * => returns true if we removed a mapping
  */
-void
-pmap_protect(pmap, sva, eva, prot)
-	register pmap_t pmap;
-	vm_offset_t sva, eva;
-	vm_prot_t prot;
+
+static boolean_t
+pmap_remove_pte(pmap, ptp, pte, va)
+	struct pmap *pmap;
+	struct vm_page *ptp;
+	pt_entry_t *pte;
+	vaddr_t va;
 {
-	register pt_entry_t *pte;
-	register int i386prot;
-	int flush = 0;
+	pt_entry_t opte;
+	int bank, off;
+	struct pv_entry *pve;
 
-#ifdef DEBUG
-	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
-		printf("pmap_protect(%x, %x, %x, %x)", pmap, sva, eva, prot);
-#endif
+	if (!pmap_valid_entry(*pte))
+		return(FALSE);		/* VA not mapped */
 
-	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
-		pmap_remove(pmap, sva, eva);
-		return;
-	}
+	opte = *pte;			/* save the old PTE */
+	*pte = 0;			/* zap! */
 
-	if (prot & VM_PROT_WRITE)
-		return;
+	if (opte & PG_W)
+		pmap->pm_stats.wired_count--;
+	pmap->pm_stats.resident_count--;
 
-	sva &= PG_FRAME;
-	eva &= PG_FRAME;
+	if (ptp)
+		ptp->wire_count--;		/* dropping a PTE */
+
+	if (pmap_is_curpmap(pmap))
+		pmap_update_pg(va);		/* flush TLB */
 
 	/*
-	 * We need to acquire a pointer to a page table page before entering
-	 * the following loop.
+	 * if we are not on a pv_head list we are done.
 	 */
-	while (sva < eva) {
-		pte = pmap_pte(pmap, sva);
-		if (pte)
-			break;
-		sva = (sva & PD_MASK) + NBPD;
-	}
 
-	while (sva < eva) {
-		/* only check once in a while */
-		if ((sva & PT_MASK) == 0) {
-			if (!pmap_pde_v(pmap_pde(pmap, sva))) {
-				/* We can race ahead here, to the next pde. */
-				sva += NBPD;
-				pte += i386_btop(NBPD);
-				continue;
-			}
-		}
-
-		if (!pmap_pte_v(pte)) {
-#ifdef __GNUC__
-			/*
-			 * Scan ahead in a tight loop for the next used PTE in
-			 * this page.  We don't scan the whole region here
-			 * because we don't want to zero-fill unused page table
-			 * pages.
-			 */
-			int n, m;
-
-			n = min(eva - sva, NBPD - (sva & PT_MASK)) >> PGSHIFT;
-			__asm __volatile(
-			    "cld\n\trepe\n\tscasl\n\tje 1f\n\tincl %1\n\t1:"
-			    : "=D" (pte), "=c" (m)
-			    : "0" (pte), "1" (n), "a" (0));
-			sva += (n - m) << PGSHIFT;
-			if (!m)
-				continue;
-			/* Overshot. */
-			--pte;
-#else
-			goto next;
+	if ((opte & PG_PVLIST) == 0) {
+#ifdef DIAGNOSTIC
+		if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off) != -1)
+			panic("pmap_remove_ptes: managed page without "
+			      "PG_PVLIST for 0x%lx", va);
 #endif
-		}
-
-		flush = 1;
-
-		i386prot = protection_codes[prot];
-		if (sva < VM_MAXUSER_ADDRESS)	/* see also pmap_enter() */
-			i386prot |= PG_u;
-		else if (sva < VM_MAX_ADDRESS)
-			i386prot |= PG_u | PG_RW;
-		pmap_pte_set_prot(pte, i386prot);
+		return(TRUE);
+	}
 
-#ifndef __GNUC__
-	next:
+	bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off);
+#ifdef DIAGNOSTIC
+	if (bank == -1)
+		panic("pmap_remove_pte: unmanaged page marked PG_PVLIST");
 #endif
-		sva += NBPG;
-		pte++;
-	}
 
-	if (flush)
-		pmap_update();
+	/* sync R/M bits */
+	simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
+	vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
+	pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va);
+	simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
+
+	if (pve)
+		pmap_free_pv(pmap, pve);
+	return(TRUE);
 }
 
 /*
- *	Insert the given physical page (p) at
- *	the specified virtual address (v) in the
- *	target physical map with the protection requested.
- *
- *	If specified, the page will be wired down, meaning
- *	that the related pte can not be reclaimed.
+ * pmap_remove: top level mapping removal function
  *
- *	NB:  This is the only routine which MAY NOT lazy-evaluate
- *	or lose information.  That is, this routine must actually
- *	insert this page into the given map NOW.
+ * => caller should not be holding any pmap locks
  */
+
 void
-pmap_enter(pmap, va, pa, prot, wired, access_type)
-	register pmap_t pmap;
-	vm_offset_t va;
-	register vm_offset_t pa;
-	vm_prot_t prot;
-	boolean_t wired;
-	vm_prot_t access_type;
+pmap_remove(pmap, sva, eva)
+	struct pmap *pmap;
+	vaddr_t sva, eva;
 {
-	register pt_entry_t *pte;
-	register pt_entry_t npte;
-	int bank, off;
-	int flush = 0;
-	boolean_t cacheable;
+	pt_entry_t *ptes;
+	boolean_t result;
+	paddr_t ptppa;
+	vaddr_t blkendva;
+	struct vm_page *ptp;
+	struct pmap_remove_record pmap_rr, *prr;
 
-#ifdef DEBUG
-	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
-		printf("pmap_enter(%x, %x, %x, %x, %x)", pmap, va, pa, prot,
-		    wired);
-#endif
+	/*
+	 * we lock in the pmap => pv_head direction
+	 */
 
-	if (pmap == NULL)
-		return;
+	PMAP_MAP_TO_HEAD_LOCK();
+	ptes = pmap_map_ptes(pmap);	/* locks pmap */
 
-	if (va >= VM_MAX_KERNEL_ADDRESS)
-		panic("pmap_enter: too big");
-	/* also, should not muck with PTD va! */
+	/*
+	 * removing one page?  take shortcut function.
+	 */
 
-#ifdef DEBUG
-	if (pmap == pmap_kernel())
-		enter_stats.kernel++;
-	else
-		enter_stats.user++;
-#endif
+	if (sva + NBPG == eva) {
 
-	pte = pmap_pte(pmap, va);
-	if (!pte) {
-		/*
-		 * Page Directory table entry not valid, we need a new PT page
-		 *
-		 * we want to vm_fault in a new zero-filled PT page for our
-		 * use.   in order to do this, we want to call vm_fault()
-		 * with the VA of where we want to put the PTE.   but in
-		 * order to call vm_fault() we need to know which vm_map
-		 * we are faulting in.    in the m68k pmap's this is easy
-		 * since all PT pages live in one global vm_map ("pt_map")
-		 * and we have a lot of virtual space we can use for the
-		 * pt_map (since the kernel doesn't have to share its 4GB
-		 * address space with processes).    but in the i386 port
-		 * the kernel must live in the top part of the virtual 
-		 * address space and PT pages live in their process' vm_map
-		 * rather than a global one.    the problem is that we have
-		 * no way of knowing which vm_map is the correct one to 
-		 * fault on.
-		 * 
-		 * XXX: see NetBSD PR#1834 and Mycroft's posting to 
-		 *	tech-kern on 7 Jan 1996.
-		 *
-		 * rather than always calling panic, we try and make an 
-		 * educated guess as to which vm_map to use by using curproc.
-		 * this is a workaround and may not fully solve the problem?
-	 	 */
-		struct vm_map *vmap;
-		int rv;
-		vm_offset_t v;
-
-		if (curproc == NULL || curproc->p_vmspace == NULL ||
-		    pmap != curproc->p_vmspace->vm_map.pmap)
-			panic("ptdi %x", pmap->pm_pdir[PTDPTDI]);
-
-		/* our guess about the vm_map was good!  fault it in.  */
-
-		vmap = &curproc->p_vmspace->vm_map;
-		v = trunc_page(vtopte(va));
-#ifdef DEBUG
-		printf("faulting in a pt page map %x va %x\n", vmap, v);
-#endif
-#if defined(UVM)
-		rv = uvm_fault(vmap, v, 0, VM_PROT_READ|VM_PROT_WRITE);
-#else
-		rv = vm_fault(vmap, v, VM_PROT_READ|VM_PROT_WRITE, FALSE);
-#endif
-		if (rv != KERN_SUCCESS)
-			panic("ptdi2 %x", pmap->pm_pdir[PTDPTDI]);
-#if defined(UVM)
-		/*
-		 * XXX It is possible to get here from uvm_fault with vmap
-		 * locked.  uvm_map_pageable requires it to be unlocked, so
-		 * try to record the state of the lock, unlock it, and then
-		 * after the call, reacquire the original lock.
-		 * THIS IS A GROSS HACK!
-		 */
-		{
-			int ls = lockstatus(&vmap->lock);
-
-			if (ls)
-				lockmgr(&vmap->lock, LK_RELEASE, (void *)0,
-				    curproc);
-			uvm_map_pageable(vmap, v, round_page(v+1), FALSE);
-			if (ls)
-				lockmgr(&vmap->lock, ls, (void *)0, curproc);
-		}
-#else
-		vm_map_pageable(vmap, v, round_page(v+1), FALSE);
-#endif
-		pte = pmap_pte(pmap, va);
-		if (!pte) 
-			panic("ptdi3 %x", pmap->pm_pdir[PTDPTDI]);
-	}
-#ifdef DEBUG
-	if (pmapdebug & PDB_ENTER)
-		printf("enter: pte %x, *pte %x ", pte, *pte);
+		if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
+
+			/* PA of the PTP */
+			ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME;
+
+			/* get PTP if non-kernel mapping */
+
+			if (pmap == pmap_kernel()) {
+				/* we never free kernel PTPs */
+				ptp = NULL;
+			} else {
+				if (pmap->pm_ptphint &&
+				    VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
+				    ptppa) {
+					ptp = pmap->pm_ptphint;
+				} else {
+					ptp = PHYS_TO_VM_PAGE(ptppa);
+#ifdef DIAGNOSTIC
+					if (ptp == NULL)
+						panic("pmap_remove: unmanaged "
+						      "PTP detected");
 #endif
+				}
+			}
 
-	if (pmap_pte_v(pte)) {
-		register vm_offset_t opa;
+			/* do it! */
+			result = pmap_remove_pte(pmap, ptp,
+						 &ptes[i386_btop(sva)], sva);
 
-		/*
-		 * Check for wiring change and adjust statistics.
-		 */
-		if ((wired && !pmap_pte_w(pte)) ||
-		    (!wired && pmap_pte_w(pte))) {
 			/*
-			 * We don't worry about wiring PT pages as they remain
-			 * resident as long as there are valid mappings in them.
-			 * Hence, if a user page is wired, the PT page will be also.
+			 * if mapping removed and the PTP is no longer
+			 * being used, free it!
 			 */
-#ifdef DEBUG
-			if (pmapdebug & PDB_ENTER)
-				printf("enter: wiring change -> %x ", wired);
-#endif
-			if (wired)
-				pmap->pm_stats.wired_count++;
-			else
-				pmap->pm_stats.wired_count--;
-#ifdef DEBUG
-			enter_stats.wchange++;
-#endif
-		}
-
-		flush = 1;
-		opa = pmap_pte_pa(pte);
 
-		/*
-		 * Mapping has not changed, must be protection or wiring change.
-		 */
-		if (opa == pa) {
-#ifdef DEBUG
-			enter_stats.pwchange++;
+			if (result && ptp && ptp->wire_count <= 1) {
+				pmap->pm_pdir[pdei(sva)] = 0;	/* zap! */
+#if defined(I386_CPU)
+				/* already dumped whole TLB on i386 */
+				if (cpu_class != CPUCLASS_386)
 #endif
-			goto validate;
-		}
-		
-		/*
-		 * Mapping has changed, invalidate old range and fall through to
-		 * handle validating new mapping.
-		 */
-#ifdef DEBUG
-		if (pmapdebug & PDB_ENTER)
-			printf("enter: removing old mapping %x pa %x ", va, opa);
-#endif
-		if ((bank = vm_physseg_find(atop(opa), &off)) != -1) {
-			vm_physmem[bank].pmseg.attrs[off] |=
-				*pte & (PG_M | PG_U);
-			pmap_remove_pv(pmap, va,
-				&vm_physmem[bank].pmseg.pvent[off]);
+				{
+					pmap_update_pg(((vaddr_t) ptes) +
+						       ptp->offset);
+				}
+				pmap->pm_stats.resident_count--;
+				if (pmap->pm_ptphint == ptp)
+					pmap->pm_ptphint =
+						pmap->pm_obj.memq.tqh_first;
+				ptp->wire_count = 0;
+				uvm_pagefree(ptp);
+			}
 		}
-#ifdef DEBUG
-		enter_stats.mchange++;
-#endif
-	} else {
-		/*
-		 * Increment counters
-		 */
-		pmap->pm_stats.resident_count++;
-		if (wired)
-			pmap->pm_stats.wired_count++;
+
+		pmap_unmap_ptes(pmap);		/* unlock pmap */
+		PMAP_MAP_TO_HEAD_UNLOCK();
+		return;
 	}
 
 	/*
-	 * Enter on the PV list if part of our managed memory
+	 * removing a range of pages: we unmap in PTP sized blocks (4MB)
+	 *
+	 * if we are the currently loaded pmap, we use prr to keep track
+	 * of the VAs we unload so that we can flush them out of the tlb.
 	 */
-	if ((bank = vm_physseg_find(atop(pa), &off)) != -1) {
-#ifdef DEBUG     
-		enter_stats.managed++;
-#endif
-		pmap_enter_pv(pmap, va, &vm_physmem[bank].pmseg.pvent[off]);
-		cacheable = TRUE;
-	} else if (pmap_initialized) {
-#ifdef DEBUG
-		enter_stats.unmanaged++;
-#endif
-		/*
-		 * Assumption: if it is not part of our managed memory
-		 * then it must be device memory which may be volatile.
-		 */
-		cacheable = FALSE;
+
+	if (pmap_is_curpmap(pmap)) {
+		prr = &pmap_rr;
+		prr->prr_npages = 0;
+	} else {
+		prr = NULL;
 	}
 
-validate:
-	/*
-	 * Now validate mapping with desired protection/wiring.
-	 * Assume uniform modified and referenced status for all
-	 * I386 pages in a MACH page.
-	 */
-	npte = (pa & PG_FRAME) | protection_codes[prot] | PG_V;
-	if (wired)
-		npte |= PG_W;
+	for (/* null */ ; sva < eva ; sva = blkendva) {
+
+		/* determine range of block */
+		blkendva = i386_round_pdr(sva+1);
+		if (blkendva > eva)
+			blkendva = eva;
 
-	if (va < VM_MAXUSER_ADDRESS)	/* i.e. below USRSTACK */
-		npte |= PG_u;
-	else if (va < VM_MAX_ADDRESS)
 		/*
-		 * Page tables need to be user RW, for some reason, and the
-		 * user area must be writable too.  Anything above
-		 * VM_MAXUSER_ADDRESS is protected from user access by
-		 * the user data and code segment descriptors, so this is OK.
+		 * XXXCDC: our PTE mappings should never be removed
+		 * with pmap_remove!  if we allow this (and why would
+		 * we?) then we end up freeing the pmap's page
+		 * directory page (PDP) before we are finished using
+		 * it when we hit in in the recursive mapping.  this
+		 * is BAD.
+		 *
+		 * long term solution is to move the PTEs out of user
+		 * address space.  and into kernel address space (up
+		 * with APTE).  then we can set VM_MAXUSER_ADDRESS to
+		 * be VM_MAX_ADDRESS.
 		 */
-		npte |= PG_u | PG_RW;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_ENTER)
-		printf("enter: new pte value %x ", npte);
+		if (pdei(sva) == PDSLOT_PTE)
+			/* XXXCDC: ugly hack to avoid freeing PDP here */
+			continue;
+
+		if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
+			/* valid block? */
+			continue;
+
+		/* PA of the PTP */
+		ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME);
+
+		/* get PTP if non-kernel mapping */
+		if (pmap == pmap_kernel()) {
+			/* we never free kernel PTPs */
+			ptp = NULL;
+		} else {
+			if (pmap->pm_ptphint &&
+			    VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
+				ptp = pmap->pm_ptphint;
+			} else {
+				ptp = PHYS_TO_VM_PAGE(ptppa);
+#ifdef DIAGNOSTIC
+				if (ptp == NULL)
+					panic("pmap_remove: unmanaged PTP "
+					      "detected");
 #endif
+			}
+		}
+		pmap_remove_ptes(pmap, prr, ptp,
+				 (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva);
+
+		/* if PTP is no longer being used, free it! */
+		if (ptp && ptp->wire_count <= 1) {
+			pmap->pm_pdir[pdei(sva)] = 0;	/* zap! */
+			pmap_update_pg( ((vaddr_t) ptes) + ptp->offset);
+#if defined(I386_CPU)
+			/* cancel possible pending pmap update on i386 */
+			if (cpu_class == CPUCLASS_386 && prr)
+				prr->prr_npages = 0;
+#endif
+			pmap->pm_stats.resident_count--;
+			if (pmap->pm_ptphint == ptp)	/* update hint? */
+				pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first;
+			ptp->wire_count = 0;
+			uvm_pagefree(ptp);
+		}
+	}
 
-	*pte = npte;
-	if (flush)
-		pmap_update();
+	/*
+	 * if we kept a removal record and removed some pages update the TLB
+	 */
+
+	if (prr && prr->prr_npages) {
+#if defined(I386_CPU)
+		if (cpu_class == CPUCLASS_386) {
+			pmap_update();
+		} else
+#endif
+		{ /* not I386 */
+			if (prr->prr_npages > PMAP_RR_MAX) {
+				pmap_update();
+			} else {
+				while (prr->prr_npages) {
+					pmap_update_pg(
+					    prr->prr_vas[--prr->prr_npages]);
+				}
+			}
+		} /* not I386 */
+	}
+	pmap_unmap_ptes(pmap);
+	PMAP_MAP_TO_HEAD_UNLOCK();
 }
 
 /*
- *      pmap_page_protect:
+ * pmap_page_remove: remove a managed vm_page from all pmaps that map it
  *
- *      Lower the permission for all mappings to a given page.
+ * => we set pv_head => pmap locking
+ * => R/M bits are sync'd back to attrs
  */
+
 void
-pmap_page_protect(phys, prot)
-	vm_offset_t     phys;
-	vm_prot_t       prot;
+pmap_page_remove(pg)
+	struct vm_page *pg;
 {
+	int bank, off;
+	struct pv_head *pvh;
+	struct pv_entry *pve;
+	pt_entry_t *ptes, opte;
+#if defined(I386_CPU)
+	boolean_t needs_update = FALSE;
+#endif
 
-	switch (prot) {
-	case VM_PROT_READ:
-	case VM_PROT_READ|VM_PROT_EXECUTE:
-		pmap_copy_on_write(phys);
-		break;
-	case VM_PROT_ALL:
-		break;
-	default:
-		pmap_remove_all(phys);
-		break;
+	/* XXX: vm_page should either contain pv_head or have a pointer to it */
+	bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
+	if (bank == -1) {
+		printf("pmap_page_remove: unmanaged page?\n");
+		return;
 	}
+
+	pvh = &vm_physmem[bank].pmseg.pvhead[off];
+	if (pvh->pvh_list == NULL) {
+		return;
+	}
+
+	/* set pv_head => pmap locking */
+	PMAP_HEAD_TO_MAP_LOCK();
+
+	/* XXX: needed if we hold head->map lock? */
+	simple_lock(&pvh->pvh_lock);
+
+	for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) {
+		ptes = pmap_map_ptes(pve->pv_pmap);		/* locks pmap */
+
+#ifdef DIAGNOSTIC
+		if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
+				    PG_FRAME)
+		    != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
+			printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
+			       pg, pve->pv_va, pve->pv_ptp);
+			printf("pmap_page_remove: PTP's phys addr: "
+			       "actual=%x, recorded=%lx\n",
+			       (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
+				PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
+			panic("pmap_page_remove: mapped managed page has "
+			      "invalid pv_ptp field");
+		}
+#endif
+
+		opte = ptes[i386_btop(pve->pv_va)];
+		ptes[i386_btop(pve->pv_va)] = 0;		/* zap! */
+
+		if (opte & PG_W)
+			pve->pv_pmap->pm_stats.wired_count--;
+		pve->pv_pmap->pm_stats.resident_count--;
+
+		if (pmap_is_curpmap(pve->pv_pmap)) {
+#if defined(I386_CPU)
+			if (cpu_class == CPUCLASS_386)
+				needs_update = TRUE;
+			else
+#endif
+				pmap_update_pg(pve->pv_va);
+		}
+
+		/* sync R/M bits */
+		vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
+
+		/* update the PTP reference count.  free if last reference. */
+		if (pve->pv_ptp) {
+			pve->pv_ptp->wire_count--;
+			if (pve->pv_ptp->wire_count <= 1) {
+				/* zap! */
+				pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0;
+				pmap_update_pg(((vaddr_t)ptes) +
+					       pve->pv_ptp->offset);
+#if defined(I386_CPU)
+				needs_update = FALSE;
+#endif
+				pve->pv_pmap->pm_stats.resident_count--;
+				/* update hint? */
+				if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
+					pve->pv_pmap->pm_ptphint =
+					    pve->pv_pmap->pm_obj.memq.tqh_first;
+				pve->pv_ptp->wire_count = 0;
+				uvm_pagefree(pve->pv_ptp);
+			}
+		}
+		pmap_unmap_ptes(pve->pv_pmap);		/* unlocks pmap */
+	}
+	pmap_free_pvs(NULL, pvh->pvh_list);
+	pvh->pvh_list = NULL;
+	simple_unlock(&pvh->pvh_lock);
+	PMAP_HEAD_TO_MAP_UNLOCK();
+#if defined(I386_CPU)
+	if (needs_update)
+		pmap_update();
+#endif
 }
 
 /*
- *	Routine:	pmap_change_wiring
- *	Function:	Change the wiring attribute for a map/virtual-address
- *			pair.
- *	In/out conditions:
- *			The mapping must already exist in the pmap.
+ * p m a p   a t t r i b u t e  f u n c t i o n s
+ * functions that test/change managed page's attributes
+ * since a page can be mapped multiple times we must check each PTE that
+ * maps it by going down the pv lists.
  */
-void
-pmap_change_wiring(pmap, va, wired)
-	register pmap_t pmap;
-	vm_offset_t va;
-	boolean_t wired;
-{
-	register pt_entry_t *pte;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_change_wiring(%x, %x, %x)", pmap, va, wired);
-#endif
+/*
+ * pmap_test_attrs: test a page's attributes
+ *
+ * => we set pv_head => pmap locking
+ */
 
-	pte = pmap_pte(pmap, va);
-	if (!pte)
-		return;
+boolean_t
+pmap_test_attrs(pg, testbits)
+	struct vm_page *pg;
+	int testbits;
+{
+	int bank, off;
+	char *myattrs;
+	struct pv_head *pvh;
+	struct pv_entry *pve;
+	pt_entry_t *ptes, pte;
+
+	/* XXX: vm_page should either contain pv_head or have a pointer to it */
+	bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
+	if (bank == -1) {
+		printf("pmap_test_attrs: unmanaged page?\n");
+		return(FALSE);
+	}
 
-#ifdef DEBUG
 	/*
-	 * Page not valid.  Should this ever happen?
-	 * Just continue and change wiring anyway.
+	 * before locking: see if attributes are already set and if so,
+	 * return!
 	 */
-	if (!pmap_pte_v(pte)) {
-		if (pmapdebug & PDB_PARANOIA)
-			printf("pmap_change_wiring: invalid PTE for %x ", va);
+
+	myattrs = &vm_physmem[bank].pmseg.attrs[off];
+	if (*myattrs & testbits)
+		return(TRUE);
+
+	/* test to see if there is a list before bothering to lock */
+	pvh = &vm_physmem[bank].pmseg.pvhead[off];
+	if (pvh->pvh_list == NULL) {
+		return(FALSE);
 	}
-#endif
 
-	if ((wired && !pmap_pte_w(pte)) || (!wired && pmap_pte_w(pte))) {
-		if (wired)
-			pmap->pm_stats.wired_count++;
-		else
-			pmap->pm_stats.wired_count--;
-		pmap_pte_set_w(pte, wired);
+	/* nope, gonna have to do it the hard way */
+	PMAP_HEAD_TO_MAP_LOCK();
+	/* XXX: needed if we hold head->map lock? */
+	simple_lock(&pvh->pvh_lock);
+
+	for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0;
+	     pve = pve->pv_next) {
+		ptes = pmap_map_ptes(pve->pv_pmap);
+		pte = ptes[i386_btop(pve->pv_va)];
+		pmap_unmap_ptes(pve->pv_pmap);
+		*myattrs |= pte;
 	}
+
+	/*
+	 * note that we will exit the for loop with a non-null pve if
+	 * we have found the bits we are testing for.
+	 */
+
+	simple_unlock(&pvh->pvh_lock);
+	PMAP_HEAD_TO_MAP_UNLOCK();
+	return((*myattrs & testbits) != 0);
 }
 
 /*
- *	Routine:	pmap_pte
- *	Function:
- *		Extract the page table entry associated
- *		with the given map/virtual_address pair.
+ * pmap_change_attrs: change a page's attributes
+ *
+ * => we set pv_head => pmap locking
+ * => we return TRUE if we cleared one of the bits we were asked to
  */
-pt_entry_t *
-pmap_pte(pmap, va)
-	register pmap_t pmap;
-	vm_offset_t va;
+
+boolean_t
+pmap_change_attrs(pg, setbits, clearbits)
+	struct vm_page *pg;
+	int setbits, clearbits;
 {
-	pt_entry_t *ptp;
+	u_int32_t result;
+	int bank, off;
+	struct pv_head *pvh;
+	struct pv_entry *pve;
+	pt_entry_t *ptes, npte;
+	char *myattrs;
+#if defined(I386_CPU)
+	boolean_t needs_update = FALSE;
+#endif
+
+	/* XXX: vm_page should either contain pv_head or have a pointer to it */
+	bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
+	if (bank == -1) {
+		printf("pmap_change_attrs: unmanaged page?\n");
+		return(FALSE);
+	}
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_pte(%x, %x) ->\n", pmap, va);
+	PMAP_HEAD_TO_MAP_LOCK();
+	pvh = &vm_physmem[bank].pmseg.pvhead[off];
+	/* XXX: needed if we hold head->map lock? */
+	simple_lock(&pvh->pvh_lock);
+
+	myattrs = &vm_physmem[bank].pmseg.attrs[off];
+	result = *myattrs & clearbits;
+	*myattrs = (*myattrs | setbits) & ~clearbits;
+
+	for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) {
+#ifdef DIAGNOSTIC
+		if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) {
+			printf("pmap_change_attrs: found pager VA on pv_list");
+		}
+		if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
+			panic("pmap_change_attrs: mapping without PTP "
+			      "detected");
 #endif
 
-	if (!pmap || !pmap_pde_v(pmap_pde(pmap, va)))
-		return NULL;
+		ptes = pmap_map_ptes(pve->pv_pmap);		/* locks pmap */
+		npte = ptes[i386_btop(pve->pv_va)];
+		result |= (npte & clearbits);
+		npte = (npte | setbits) & ~clearbits;
+		if (ptes[i386_btop(pve->pv_va)] != npte) {
+			ptes[i386_btop(pve->pv_va)] = npte;	/* zap! */
 
-	if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde & PG_FRAME) ||
-	    pmap == pmap_kernel())
-		/* current address space or kernel */
-		ptp = PTmap;
-	else {
-		/* alternate address space */
-		if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) != (APTDpde & PG_FRAME)) {
-			APTDpde = pmap->pm_pdir[PTDPTDI];
-			pmap_update();
+			if (pmap_is_curpmap(pve->pv_pmap)) {
+#if defined(I386_CPU)
+				if (cpu_class == CPUCLASS_386)
+					needs_update = TRUE;
+				else
+#endif
+					pmap_update_pg(pve->pv_va);
+			}
 		}
-		ptp = APTmap;
+		pmap_unmap_ptes(pve->pv_pmap);		/* unlocks pmap */
 	}
 
-	return ptp + i386_btop(va);
+	simple_unlock(&pvh->pvh_lock);
+	PMAP_HEAD_TO_MAP_UNLOCK();
+
+#if defined(I386_CPU)
+	if (needs_update)
+		pmap_update();
+#endif
+	return(result != 0);
 }
 
 /*
- *	Routine:	pmap_extract
- *	Function:
- *		Extract the physical page address associated
- *		with the given map/virtual_address pair.
+ * p m a p   p r o t e c t i o n   f u n c t i o n s
  */
-vm_offset_t
-pmap_extract(pmap, va)
-	register pmap_t pmap;
-	vm_offset_t va;
-{
-	register pt_entry_t *pte;
-	register vm_offset_t pa;
-
-#ifdef DEBUGx
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_extract(%x, %x) -> ", pmap, va);
-#endif
 
-	pte = pmap_pte(pmap, va);
-	if (!pte)
-		return NULL;
-	if (!pmap_pte_v(pte))
-		return NULL;
+/*
+ * pmap_page_protect: change the protection of all recorded mappings
+ *	of a managed page
+ *
+ * => NOTE: this is an inline function in pmap.h
+ */
 
-	pa = pmap_pte_pa(pte);
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("%x\n", pa);
-#endif
-	return pa | (va & ~PG_FRAME);
-}
+/* see pmap.h */
 
 /*
- *	Copy the range specified by src_addr/len
- *	from the source map to the range dst_addr/len
- *	in the destination map.
+ * pmap_protect: set the protection in of the pages in a pmap
  *
- *	This routine is only advisory and need not do anything.
+ * => NOTE: this is an inline function in pmap.h
  */
-void
-pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
-	pmap_t dst_pmap, src_pmap;
-	vm_offset_t dst_addr, src_addr;
-	vm_size_t len;
-{
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_copy(%x, %x, %x, %x, %x)",
-		       dst_pmap, src_pmap, dst_addr, len, src_addr);
-#endif
-}
+/* see pmap.h */
 
 /*
- *	Routine:	pmap_collect
- *	Function:
- *		Garbage collects the physical map system for
- *		pages which are no longer used.
- *		Success need not be guaranteed -- that is, there
- *		may well be pages which are not referenced, but
- *		others may be collected.
- *	Usage:
- *		Called by the pageout daemon when pages are scarce.
- * [ needs to be written -wfj ]  XXXX
+ * pmap_write_protect: write-protect pages in a pmap
  */
+
 void
-pmap_collect(pmap)
-	pmap_t pmap;
+pmap_write_protect(pmap, sva, eva, prot)
+	struct pmap *pmap;
+	vaddr_t sva, eva;
+	vm_prot_t prot;
 {
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_collect(%x) ", pmap);
-#endif
+	pt_entry_t *ptes, *spte, *epte, npte;
+	struct pmap_remove_record pmap_rr, *prr;
+	vaddr_t blockend, va;
+	u_int32_t md_prot;
 
-	if (pmap != pmap_kernel())
-		return;
+	ptes = pmap_map_ptes(pmap);		/* locks pmap */
 
-}
+	/* need to worry about TLB? [TLB stores protection bits] */
+	if (pmap_is_curpmap(pmap)) {
+		prr = &pmap_rr;
+		prr->prr_npages = 0;
+	} else {
+		prr = NULL;
+	}
 
-#if DEBUG
-void
-pmap_dump_pvlist(phys, m)
-	vm_offset_t phys;
-	char *m;
-{
-	register struct pv_entry *pv;
-	int bank, off;
+	/* should be ok, but just in case ... */
+	sva &= PG_FRAME;
+	eva &= PG_FRAME;
 
-	if (!(pmapdebug & PDB_PARANOIA))
-		return;
+	for (/* null */ ; sva < eva ; sva = blockend) {
 
-	if (!pmap_initialized)
-		return;
-	printf("%s %08x:", m, phys);
-	bank = vm_physseg_find(atop(phys), &off);
-	pv = &vm_physmem[bank].pmseg.pvent[off];
-	if (pv->pv_pmap == NULL) {
-		printf(" no mappings\n");
-		return;
+		blockend = (sva & PD_MASK) + NBPD;
+		if (blockend > eva)
+			blockend = eva;
+
+		/*
+		 * XXXCDC: our PTE mappings should never be write-protected!
+		 *
+		 * long term solution is to move the PTEs out of user
+		 * address space.  and into kernel address space (up
+		 * with APTE).  then we can set VM_MAXUSER_ADDRESS to
+		 * be VM_MAX_ADDRESS.
+		 */
+
+		/* XXXCDC: ugly hack to avoid freeing PDP here */
+		if (pdei(sva) == PDSLOT_PTE)
+			continue;
+
+		/* empty block? */
+		if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
+			continue;
+
+		md_prot = protection_codes[prot];
+		if (sva < VM_MAXUSER_ADDRESS)
+			md_prot |= PG_u;
+		else if (sva < VM_MAX_ADDRESS)
+			/* XXX: write-prot our PTES? never! */
+			md_prot |= (PG_u | PG_RW);
+
+		spte = &ptes[i386_btop(sva)];
+		epte = &ptes[i386_btop(blockend)];
+
+		for (/*null */; spte < epte ; spte++) {
+
+			if (!pmap_valid_entry(*spte))	/* no mapping? */
+				continue;
+
+			npte = (*spte & ~PG_PROT) | md_prot;
+
+			if (npte != *spte) {
+				*spte = npte;		/* zap! */
+
+				if (prr) {    /* worried about tlb flushing? */
+					va = i386_ptob(spte - ptes);
+					if (npte & PG_G) {
+						/* PG_G requires this */
+						pmap_update_pg(va);
+					} else {
+						if (prr->prr_npages <
+						    PMAP_RR_MAX) {
+							prr->prr_vas[
+							    prr->prr_npages++] =
+								va;
+						} else {
+						    if (prr->prr_npages ==
+							PMAP_RR_MAX)
+							/* signal an overflow */
+							    prr->prr_npages++;
+						}
+					}
+				}	/* if (prr) */
+			}	/* npte != *spte */
+		}	/* for loop */
 	}
-	for (; pv; pv = pv->pv_next)
-		printf(" pmap %08x va %08x", pv->pv_pmap, pv->pv_va);
-	printf("\n");
-}
-#else
-#define	pmap_dump_pvlist(a,b)
+
+	/*
+	 * if we kept a removal record and removed some pages update the TLB
+	 */
+
+	if (prr && prr->prr_npages) {
+#if defined(I386_CPU)
+		if (cpu_class == CPUCLASS_386) {
+			pmap_update();
+		} else
 #endif
+		{ /* not I386 */
+			if (prr->prr_npages > PMAP_RR_MAX) {
+				pmap_update();
+			} else {
+				while (prr->prr_npages) {
+					pmap_update_pg(prr->prr_vas[
+						       --prr->prr_npages]);
+				}
+			}
+		} /* not I386 */
+	}
+	pmap_unmap_ptes(pmap);		/* unlocks pmap */
+}
 
 /*
- *	pmap_zero_page zeros the specified by mapping it into
- *	virtual memory and using bzero to clear its contents.
+ * end of protection functions
  */
+
+/*
+ * pmap_unwire: clear the wired bit in the PTE
+ *
+ * => mapping should already be in map
+ */
+
 void
-pmap_zero_page(phys)
-	register vm_offset_t phys;
+pmap_change_wiring(pmap, va, wired)
+	struct pmap *pmap;
+	vaddr_t va;
+	boolean_t wired;
 {
+	pt_entry_t *ptes;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_zero_page(%x)", phys);
-#endif
+	if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
+		ptes = pmap_map_ptes(pmap);		/* locks pmap */
 
-	pmap_dump_pvlist(phys, "pmap_zero_page: phys");
-	*CMAP2 = (phys & PG_FRAME) | PG_V | PG_KW /*| PG_N*/;
-	pmap_update();
-	bzero(CADDR2, NBPG);
+#ifdef DIAGNOSTIC
+		if (!pmap_valid_entry(ptes[i386_btop(va)]))
+			panic("pmap_unwire: invalid (unmapped) va");
+#endif
+		if ((ptes[i386_btop(va)] & PG_W) != 0) {
+			ptes[i386_btop(va)] &= ~PG_W;
+			pmap->pm_stats.wired_count--;
+		}
+#if 0
+#ifdef DIAGNOSITC
+		else {
+			printf("pmap_unwire: wiring for pmap %p va 0x%lx "
+			       "didn't change!\n", pmap, va);
+		}
+#endif
+#endif
+		pmap_unmap_ptes(pmap);		/* unlocks map */
+	}
+#ifdef DIAGNOSTIC
+	else {
+		panic("pmap_unwire: invalid PDE");
+	}
+#endif
 }
 
 /*
- *	pmap_copy_page copies the specified page by mapping
- *	it into virtual memory and using bcopy to copy its
- *	contents.
+ * pmap_collect: free resources held by a pmap
+ *
+ * => optional function.
+ * => called when a process is swapped out to free memory.
  */
+
 void
-pmap_copy_page(src, dst)
-	register vm_offset_t src, dst;
+pmap_collect(pmap)
+	struct pmap *pmap;
 {
+	/*
+	 * free all of the pt pages by removing the physical mappings
+	 * for its entire address space.
+	 */
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_copy_page(%x, %x)", src, dst);
-#endif
-
-	pmap_dump_pvlist(src, "pmap_copy_page: src");
-	pmap_dump_pvlist(dst, "pmap_copy_page: dst");
-	*CMAP1 = (src & PG_FRAME) | PG_V | PG_KR;
-	*CMAP2 = (dst & PG_FRAME) | PG_V | PG_KW /*| PG_N*/;
-	pmap_update();
-	bcopy(CADDR1, CADDR2, NBPG);
+	pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
 }
 
 /*
- *	Routine:	pmap_pageable
- *	Function:
- *		Make the specified pages (by pmap, offset)
- *		pageable (or not) as requested.
+ * pmap_transfer: transfer (move or copy) mapping from one pmap
+ * 	to another.
  *
- *		A page which is not pageable may not take
- *		a fault; therefore, its page table entry
- *		must remain valid for the duration.
- *
- *		This routine is merely advisory; pmap_enter
- *		will specify that these pages are to be wired
- *		down (or not) as appropriate.
+ * => this function is optional, it doesn't have to do anything
+ * => we assume that the mapping in the src pmap is valid (i.e. that
+ *    it doesn't run off the end of the map's virtual space).
+ * => we assume saddr, daddr, and len are page aligned/lengthed
  */
 
 void
-pmap_pageable(pmap, sva, eva, pageable)
-	pmap_t pmap;
-	vm_offset_t sva, eva;
-	boolean_t pageable;
+pmap_transfer(dstpmap, srcpmap, daddr, len, saddr, move)
+	struct pmap *dstpmap, *srcpmap;
+	vaddr_t daddr, saddr;
+	vsize_t len;
+	boolean_t move;
 {
+	/* base address of PTEs, dst could be NULL */
+	pt_entry_t *srcptes, *dstptes;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_FOLLOW)
-		printf("pmap_pageable(%x, %x, %x, %x)",
-		       pmap, sva, eva, pageable);
+	struct pmap_transfer_location srcl, dstl;
+	int dstvalid;		  /* # of PTEs left in dst's current PTP */
+	struct pmap *mapped_pmap; /* the pmap we passed to pmap_map_ptes */
+	vsize_t blklen;
+	int blkpgs, toxfer;
+	boolean_t ok;
+
+#ifdef DIAGNOSTIC
+	/*
+	 * sanity check: let's make sure our len doesn't overflow our dst
+	 * space.
+	 */
+
+	if (daddr < VM_MAXUSER_ADDRESS) {
+		if (VM_MAXUSER_ADDRESS - daddr < len) {
+			printf("pmap_transfer: no room in user pmap "
+			       "(addr=0x%lx, len=0x%lx)\n", daddr, len);
+			return;
+		}
+	} else if (daddr < VM_MIN_KERNEL_ADDRESS ||
+		   daddr >= VM_MAX_KERNEL_ADDRESS) {
+		printf("pmap_transfer: invalid transfer address 0x%lx\n",
+		       daddr);
+	} else {
+		if (VM_MAX_KERNEL_ADDRESS - daddr < len) {
+			printf("pmap_transfer: no room in kernel pmap "
+			       "(addr=0x%lx, len=0x%lx)\n", daddr, len);
+			return;
+		}
+	}
 #endif
 
 	/*
-	 * If we are making a PT page pageable then all valid
-	 * mappings must be gone from that page.  Hence it should
-	 * be all zeros and there is no need to clean it.
-	 * Assumption:
-	 *	- PT pages have only one pv_table entry
-	 *	- PT pages are the only single-page allocations
-	 *	  between the user stack and kernel va's 
-	 * See also pmap_enter & pmap_protect for rehashes of this...
+	 * ideally we would like to have either src or dst pmap's be the
+	 * current pmap so that we can map the other one in APTE space
+	 * (if needed... one of the maps could be the kernel's pmap).
+	 *
+	 * however, if we can't get this, then we have to use the tmpmap
+	 * (alternately we could punt).
 	 */
 
-	if (pageable &&
-	    pmap == pmap_kernel() &&
-	    sva >= VM_MAXUSER_ADDRESS && eva <= VM_MAX_ADDRESS &&
-	    eva - sva == NBPG) {
-		register vm_offset_t pa;
-		register pt_entry_t *pte;
+	if (!pmap_is_curpmap(dstpmap) && !pmap_is_curpmap(srcpmap)) {
+		dstptes = NULL;			/* dstptes NOT mapped */
+		srcptes = pmap_map_ptes(srcpmap);   /* let's map the source */
+		mapped_pmap = srcpmap;
+	} else {
+		if (!pmap_is_curpmap(srcpmap)) {
+			srcptes = pmap_map_ptes(srcpmap);   /* possible APTE */
+			dstptes = PTE_BASE;
+			mapped_pmap = srcpmap;
+		} else {
+			dstptes = pmap_map_ptes(dstpmap);   /* possible APTE */
+			srcptes = PTE_BASE;
+			mapped_pmap = dstpmap;
+		}
+	}
+
+	/*
+	 * at this point we know that the srcptes are mapped.   the dstptes
+	 * are mapped if (dstptes != NULL).    if (dstptes == NULL) then we
+	 * will have to map the dst PTPs page at a time using the tmpmap.
+	 * [XXX: is it worth the effort, or should we just punt?]
+	 */
+
+	srcl.addr = saddr;
+	srcl.pte = &srcptes[i386_btop(srcl.addr)];
+	srcl.ptp = NULL;
+	dstl.addr = daddr;
+	if (dstptes)
+		dstl.pte = &dstptes[i386_btop(dstl.addr)];
+	else
+		dstl.pte  = NULL;		/* we map page at a time */
+	dstl.ptp = NULL;
+	dstvalid = 0;		/* force us to load a new dst PTP to start */
+
+	while (len) {
+
+		/*
+		 * compute the size of this block.
+		 */
+
+		/* length in bytes */
+		blklen = i386_round_pdr(srcl.addr+1) - srcl.addr;
+		if (blklen > len)
+			blklen = len;
+		blkpgs = i386_btop(blklen);
+
+		/*
+		 * if the block is not valid in the src pmap,
+		 * then we can skip it!
+		 */
+
+		if (!pmap_valid_entry(srcpmap->pm_pdir[pdei(srcl.addr)])) {
+			len = len - blklen;
+			srcl.pte  = srcl.pte + blkpgs;
+			srcl.addr += blklen;
+			dstl.addr += blklen;
+			if (blkpgs > dstvalid) {
+				dstvalid = 0;
+				dstl.ptp = NULL;
+			} else {
+				dstvalid = dstvalid - blkpgs;
+			}
+			if (dstptes == NULL && (len == 0 || dstvalid == 0)) {
+				if (dstl.pte) {
+					pmap_tmpunmap_pa();
+					dstl.pte = NULL;
+				}
+			} else {
+				dstl.pte += blkpgs;
+			}
+			continue;
+		}
+
+		/*
+		 * we have a valid source block of "blkpgs" PTEs to transfer.
+		 * if we don't have any dst PTEs ready, then get some.
+		 */
+
+		if (dstvalid == 0) {
+			if (!pmap_valid_entry(dstpmap->
+					      pm_pdir[pdei(dstl.addr)])) {
 #ifdef DIAGNOSTIC
-		int bank, off;
-		register struct pv_entry *pv;
-#endif
+				if (dstl.addr >= VM_MIN_KERNEL_ADDRESS)
+					panic("pmap_transfer: missing kernel "
+					      "PTP at 0x%lx", dstl.addr);
+#endif
+				dstl.ptp = pmap_get_ptp(dstpmap,
+							pdei(dstl.addr), TRUE);
+				if (dstl.ptp == NULL)	/* out of RAM?  punt. */
+					break;
+			} else {
+				dstl.ptp = NULL;
+			}
+			dstvalid = i386_btop(i386_round_pdr(dstl.addr+1) -
+					     dstl.addr);
+			if (dstptes == NULL) {
+				dstl.pte = (pt_entry_t *)
+					pmap_tmpmap_pa(dstpmap->
+						       pm_pdir[pdei(dstl.addr)]
+						       & PG_FRAME);
+				dstl.pte = dstl.pte + (PTES_PER_PTP - dstvalid);
+			}
+		}
 
-#ifdef DEBUG
-		if ((pmapdebug & (PDB_FOLLOW|PDB_PTPAGE)) == PDB_PTPAGE)
-			printf("pmap_pageable(%x, %x, %x, %x)",
-			       pmap, sva, eva, pageable);
-#endif
+		/*
+		 * we have a valid source block of "blkpgs" PTEs to transfer.
+		 * we have a valid dst block of "dstvalid" PTEs ready.
+		 * thus we can transfer min(blkpgs, dstvalid) PTEs now.
+		 */
 
-		pte = pmap_pte(pmap, sva);
-		if (!pte)
-			return;
-		if (!pmap_pte_v(pte))
-			return;
+		srcl.ptp = NULL;	/* don't know source PTP yet */
+		if (dstvalid < blkpgs)
+			toxfer = dstvalid;
+		else
+			toxfer = blkpgs;
 
-		pa = pmap_pte_pa(pte);
+		if (toxfer > 0) {
+			ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl,
+						toxfer, move);
+
+			if (!ok)		/* memory shortage?  punt. */
+				break;
+
+			dstvalid -= toxfer;
+			blkpgs -= toxfer;
+			len -= i386_ptob(toxfer);
+			if (blkpgs == 0)	/* out of src PTEs?  restart */
+				continue;
+		}
+
+		/*
+		 * we have a valid source block of "blkpgs" PTEs left
+		 * to transfer.  we have just used up our "dstvalid"
+		 * PTEs, and thus must obtain more dst PTEs to finish
+		 * off the src block.  since we are now going to
+		 * obtain a brand new dst PTP, we know we can finish
+		 * the src block in one more transfer.
+		 */
 
 #ifdef DIAGNOSTIC
-		if ((*pte & (PG_u | PG_RW)) != (PG_u | PG_RW))
-			printf("pmap_pageable: unexpected pte=%x va %x\n",
-				*pte, sva);
-		if ((bank = vm_physseg_find(atop(pa), &off)) == -1)
-			return;
-		pv = &vm_physmem[bank].pmseg.pvent[off];
-		if (pv->pv_va != sva || pv->pv_next) {
-			printf("pmap_pageable: bad PT page va %x next %x\n",
-			       pv->pv_va, pv->pv_next);
-			return;
+		if (dstvalid)
+			panic("pmap_transfer: dstvalid non-zero after drain");
+		if ((dstl.addr & (NBPD-1)) != 0)
+			panic("pmap_transfer: dstaddr not on PD boundary "
+			      "(0x%lx)\n", dstl.addr);
+#endif
+
+		if (dstptes == NULL && dstl.pte != NULL) {
+			/* dispose of old PT mapping */
+			pmap_tmpunmap_pa();
+			dstl.pte = NULL;
 		}
+
+		/*
+		 * get new dst PTP
+		 */
+		if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) {
+#ifdef DIAGNOSTIC
+			if (dstl.addr >= VM_MIN_KERNEL_ADDRESS)
+				panic("pmap_transfer: missing kernel PTP at "
+				      "0x%lx", dstl.addr);
 #endif
+			dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE);
+			if (dstl.ptp == NULL)	/* out of free RAM?  punt. */
+				break;
+		} else {
+			dstl.ptp = NULL;
+		}
+
+		dstvalid = PTES_PER_PTP;	/* new PTP */
+
+		/*
+		 * if the dstptes are un-mapped, then we need to tmpmap in the
+		 * dstl.ptp.
+		 */
+
+		if (dstptes == NULL) {
+			dstl.pte = (pt_entry_t *)
+				pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)]
+					       & PG_FRAME);
+		}
 
 		/*
-		 * Mark it unmodified to avoid pageout
+		 * we have a valid source block of "blkpgs" PTEs left
+		 * to transfer.  we just got a brand new dst PTP to
+		 * receive these PTEs.
 		 */
-		pmap_clear_modify(pa);
 
-#ifdef needsomethinglikethis
-		if (pmapdebug & PDB_PTPAGE)
-			printf("pmap_pageable: PT page %x(%x) unmodified\n",
-			       sva, *pmap_pte(pmap, sva));
-		if (pmapdebug & PDB_WIRING)
-			pmap_check_wiring("pageable", sva);
+#ifdef DIAGNOSTIC
+		if (dstvalid < blkpgs)
+			panic("pmap_transfer: too many blkpgs?");
 #endif
+		toxfer = blkpgs;
+		ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer,
+					move);
+
+		if (!ok)		/* memory shortage?   punt. */
+			break;
+
+		dstvalid -= toxfer;
+		blkpgs -= toxfer;
+		len -= i386_ptob(toxfer);
+
+		/*
+		 * done src pte block
+		 */
 	}
+	if (dstptes == NULL && dstl.pte != NULL)
+		pmap_tmpunmap_pa();		/* dst PTP still mapped? */
+	pmap_unmap_ptes(mapped_pmap);
 }
 
 /*
- * Miscellaneous support routines follow
+ * pmap_transfer_ptes: transfer PTEs from one pmap to another
+ *
+ * => we assume that the needed PTPs are mapped and that we will
+ *	not cross a block boundary.
+ * => we return TRUE if we transfered all PTEs, FALSE if we were
+ *	unable to allocate a pv_entry
  */
-void
-i386_protection_init()
-{
-
-	protection_codes[VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE] = 0;
-	protection_codes[VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE] =
-	protection_codes[VM_PROT_NONE | VM_PROT_READ | VM_PROT_NONE] =
-	protection_codes[VM_PROT_NONE | VM_PROT_READ | VM_PROT_EXECUTE] = PG_RO;
-	protection_codes[VM_PROT_WRITE | VM_PROT_NONE | VM_PROT_NONE] =
-	protection_codes[VM_PROT_WRITE | VM_PROT_NONE | VM_PROT_EXECUTE] =
-	protection_codes[VM_PROT_WRITE | VM_PROT_READ | VM_PROT_NONE] =
-	protection_codes[VM_PROT_WRITE | VM_PROT_READ | VM_PROT_EXECUTE] = PG_RW;
-}
 
-boolean_t
-pmap_testbit(pa, setbits)
-	register vm_offset_t pa;
-	int setbits;
+static boolean_t
+pmap_transfer_ptes(srcpmap, srcl, dstpmap, dstl, toxfer, move)
+	struct pmap *srcpmap, *dstpmap;
+	struct pmap_transfer_location *srcl, *dstl;
+	int toxfer;
+	boolean_t move;
 {
-	register struct pv_entry *pv;
-	register pt_entry_t *pte;
-	int s;
+	pt_entry_t dstproto, opte;
 	int bank, off;
+	struct pv_head *pvh;
+	struct pv_entry *pve, *lpve;
 
-	if ((bank = vm_physseg_find(atop(pa), &off)) == -1)
-		return FALSE;
-	pv = &vm_physmem[bank].pmseg.pvent[off];
-	s = splimp();
+	/*
+	 * generate "prototype" dst PTE
+	 */
+
+	if (dstl->addr < VM_MAX_ADDRESS)
+		dstproto = PG_u;		/* "user" page */
+	else
+		dstproto = pmap_pg_g;	/* kernel page */
 
 	/*
-	 * Check saved info first
+	 * ensure we have dst PTP for user addresses.
 	 */
-	if (vm_physmem[bank].pmseg.attrs[off] & setbits) {
-		splx(s);
-		return TRUE;
-	}
+
+	if (dstl->ptp == NULL && dstl->addr < VM_MAXUSER_ADDRESS)
+		dstl->ptp = PHYS_TO_VM_PAGE(dstpmap->pm_pdir[pdei(dstl->addr)] &
+					    PG_FRAME);
 
 	/*
-	 * Not found, check current mappings returning
-	 * immediately if found.
+	 * main loop over range
 	 */
-	if (pv->pv_pmap != NULL) {
-		for (; pv; pv = pv->pv_next) {
-			pte = pmap_pte(pv->pv_pmap, pv->pv_va);
-			if (*pte & setbits) {
-				splx(s);
-				return TRUE;
-			}
+
+	for (/*null*/; toxfer > 0 ; toxfer--,
+			     srcl->addr += NBPG, dstl->addr += NBPG,
+			     srcl->pte++, dstl->pte++) {
+
+		if (!pmap_valid_entry(*srcl->pte))  /* skip invalid entrys */
+			continue;
+
+#ifdef DIAGNOSTIC
+		if (pmap_valid_entry(*dstl->pte))
+			panic("pmap_transfer_ptes: attempt to overwrite "
+			      "active entry");
+#endif
+
+		/*
+		 * let's not worry about non-pvlist mappings (typically device
+		 * pager mappings).
+		 */
+
+		opte = *srcl->pte;
+
+		if ((opte & PG_PVLIST) == 0)
+			continue;
+
+		/*
+		 * if we are moving the mapping, then we can just adjust the
+		 * current pv_entry.    if we are copying the mapping, then we
+		 * need to allocate a new pv_entry to account for it.
+		 */
+
+		if (move == FALSE) {
+			pve = pmap_alloc_pv(dstpmap, ALLOCPV_TRY);
+			if (pve == NULL)
+				return(FALSE); 		/* punt! */
+		} else {
+			pve = NULL;  /* XXX: quiet gcc warning */
 		}
+
+		/*
+		 * find the pv_head for this mapping.  since our mapping is
+		 * on the pvlist (PG_PVLIST), there must be a pv_head.
+		 */
+
+		bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
+#ifdef DIAGNOSTIC
+		if (bank == -1)
+			panic("pmap_transfer_ptes: PG_PVLIST PTE and "
+			      "no pv_head!");
+#endif
+		pvh = &vm_physmem[bank].pmseg.pvhead[off];
+
+		/*
+		 * now lock down the pvhead and find the current entry (there
+		 * must be one).
+		 */
+
+		simple_lock(&pvh->pvh_lock);
+		for (lpve = pvh->pvh_list ; lpve ; lpve = lpve->pv_next)
+			if (lpve->pv_pmap == srcpmap &&
+			    lpve->pv_va == srcl->addr)
+				break;
+#ifdef DIAGNOSTIC
+		if (lpve == NULL)
+			panic("pmap_transfer_ptes: PG_PVLIST PTE, but "
+			      "entry not found");
+#endif
+
+		/*
+		 * update src ptp.   if the ptp is null in the pventry, then
+		 * we are not counting valid entrys for this ptp (this is only
+		 * true for kernel PTPs).
+		 */
+
+		if (srcl->ptp == NULL)
+			srcl->ptp = lpve->pv_ptp;
+#ifdef DIAGNOSTIC
+		if (srcl->ptp &&
+		    (srcpmap->pm_pdir[pdei(srcl->addr)] & PG_FRAME) !=
+		    VM_PAGE_TO_PHYS(srcl->ptp))
+			panic("pmap_transfer_ptes: pm_pdir - pv_ptp mismatch!");
+#endif
+
+		/*
+		 * for move, update the pve we just found (lpve) to
+		 * point to its new mapping.  for copy, init the new
+		 * pve and put it in the list.
+		 */
+
+		if (move == TRUE) {
+			pve = lpve;
+		}
+		pve->pv_pmap = dstpmap;
+		pve->pv_va = dstl->addr;
+		pve->pv_ptp = dstl->ptp;
+		if (move == FALSE) {		/* link in copy */
+			pve->pv_next = lpve->pv_next;
+			lpve->pv_next = pve;
+		}
+
+		/*
+		 * sync the R/M bits while we are here.
+		 */
+
+		vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
+
+		/*
+		 * now actually update the ptes and unlock the pvlist.
+		 */
+
+		if (move) {
+			*srcl->pte = 0;		/* zap! */
+			if (pmap_is_curpmap(srcpmap))
+				pmap_update_pg(srcl->addr);
+			if (srcl->ptp)
+				/* don't bother trying to free PTP */
+				srcl->ptp->wire_count--;
+			srcpmap->pm_stats.resident_count--;
+			if (opte & PG_W)
+				srcpmap->pm_stats.wired_count--;
+		}
+		*dstl->pte = (opte & ~(PG_u|PG_U|PG_M|PG_G|PG_W)) | dstproto;
+		dstpmap->pm_stats.resident_count++;
+		if (dstl->ptp)
+			dstl->ptp->wire_count++;
+		simple_unlock(&pvh->pvh_lock);
 	}
-	splx(s);
-	return FALSE;
+	return(TRUE);
 }
 
 /*
- * Modify pte bits for all ptes corresponding to the given physical address.
- * We use `maskbits' rather than `clearbits' because we're always passing
- * constants and the latter would require an extra inversion at run-time.
+ * pmap_copy: copy mappings from one pmap to another
+ *
+ * => optional function
+ * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
+ */
+
+/*
+ * defined as macro call to pmap_transfer in pmap.h
  */
-void
-pmap_changebit(pa, setbits, maskbits)
-	register vm_offset_t pa;
-	int setbits, maskbits;
+
+/*
+ * pmap_move: move mappings from one pmap to another
+ *
+ * => optional function
+ * void pmap_move(dst_pmap, src_pmap, dst_addr, len, src_addr)
+ */
+
+/*
+ * defined as macro call to pmap_transfer in pmap.h
+ */
+
+/*
+ * pmap_enter: enter a mapping into a pmap
+ *
+ * => must be done "now" ... no lazy-evaluation
+ * => we set pmap => pv_head locking
+ */
+
+int
+_pmap_enter(pmap, va, pa, prot, flags)
+	struct pmap *pmap;
+	vaddr_t va;
+	paddr_t pa;
+	vm_prot_t prot;
+	int flags;
 {
-	register struct pv_entry *pv;
-	register pt_entry_t *pte;
-	vm_offset_t va;
-	int s;
-	int bank, off;
+	pt_entry_t *ptes, opte, npte;
+	struct vm_page *ptp;
+	struct pv_head *pvh;
+	struct pv_entry *pve;
+	int bank, off, error;
+	boolean_t wired = (flags & PMAP_WIRED) != 0;
 
-#ifdef DEBUG
-	if (pmapdebug & PDB_BITS)
-		printf("pmap_changebit(%x, %x, %x)",
-		       pa, setbits, ~maskbits);
+#ifdef DIAGNOSTIC
+	/* sanity check: totally out of range? */
+	if (va >= VM_MAX_KERNEL_ADDRESS)
+		panic("pmap_enter: too big");
+
+	if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
+		panic("pmap_enter: trying to map over PDP/APDP!");
+
+	/* sanity check: kernel PTPs should already have been pre-allocated */
+	if (va >= VM_MIN_KERNEL_ADDRESS &&
+	    !pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
+		panic("pmap_enter: missing kernel PTP!");
 #endif
 
-	if ((bank = vm_physseg_find(atop(pa), &off)) == -1)
-		return;
-	pv = &vm_physmem[bank].pmseg.pvent[off];
-	s = splimp();
+	/* get lock */
+	PMAP_MAP_TO_HEAD_LOCK();
 
 	/*
-	 * Clear saved attributes (modify, reference)
+	 * map in ptes and get a pointer to our PTP (unless we are the kernel)
 	 */
-	if (~maskbits)
-		vm_physmem[bank].pmseg.attrs[off] &= maskbits;
+
+	ptes = pmap_map_ptes(pmap);		/* locks pmap */
+	if (pmap == pmap_kernel()) {
+		ptp = NULL;
+	} else {
+		ptp = pmap_get_ptp(pmap, pdei(va), FALSE);
+		if (ptp == NULL) {
+			if (flags & PMAP_CANFAIL) {
+				return (KERN_RESOURCE_SHORTAGE);
+			}
+			panic("pmap_enter: get ptp failed");
+		}
+	}
+	opte = ptes[i386_btop(va)];		/* old PTE */
 
 	/*
-	 * Loop over all current mappings setting/clearing as appropos
-	 * If setting RO do we need to clear the VAC?
+	 * is there currently a valid mapping at our VA?
 	 */
-	if (pv->pv_pmap != NULL) {
-		for (; pv; pv = pv->pv_next) {
-			va = pv->pv_va;
 
-			/*
-			 * XXX don't write protect pager mappings
-			 */
-			if ((PG_RO && setbits == PG_RO) ||
-			    (PG_RW && maskbits == ~PG_RW)) {
-#if defined(UVM)
-				if (va >= uvm.pager_sva && va < uvm.pager_eva)
-					continue;
-#else
-				extern vm_offset_t pager_sva, pager_eva;
+	if (pmap_valid_entry(opte)) {
 
-				if (va >= pager_sva && va < pager_eva)
-					continue;
-#endif
+		/*
+		 * first, update pm_stats.  resident count will not
+		 * change since we are replacing/changing a valid
+		 * mapping.  wired count might change...
+		 */
+
+		if (wired && (opte & PG_W) == 0)
+			pmap->pm_stats.wired_count++;
+		else if (!wired && (opte & PG_W) != 0)
+			pmap->pm_stats.wired_count--;
+
+		/*
+		 * is the currently mapped PA the same as the one we
+		 * want to map?
+		 */
+
+		if ((opte & PG_FRAME) == pa) {
+
+			/* if this is on the PVLIST, sync R/M bit */
+			if (opte & PG_PVLIST) {
+				bank = vm_physseg_find(atop(pa), &off);
+#ifdef DIAGNOSTIC
+				if (bank == -1)
+					panic("pmap_enter: PG_PVLIST mapping "
+					    "with unmanaged page");
+#endif
+				pvh = &vm_physmem[bank].pmseg.pvhead[off];
+				simple_lock(&pvh->pvh_lock);
+				vm_physmem[bank].pmseg.attrs[off] |= opte;
+				simple_unlock(&pvh->pvh_lock);
+			} else {
+				pvh = NULL;	/* ensure !PG_PVLIST */
 			}
+			goto enter_now;
+		}
 
-			pte = pmap_pte(pv->pv_pmap, va);
-			*pte = (*pte & maskbits) | setbits;
+		/*
+		 * changing PAs: we must remove the old one first
+		 */
+
+		/*
+		 * if current mapping is on a pvlist,
+		 * remove it (sync R/M bits)
+		 */
+
+		if (opte & PG_PVLIST) {
+			bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
+#ifdef DIAGNOSTIC
+			if (bank == -1)
+				panic("pmap_enter: PG_PVLIST mapping with "
+				    "unmanaged page");
+#endif
+			pvh = &vm_physmem[bank].pmseg.pvhead[off];
+			simple_lock(&pvh->pvh_lock);
+			pve = pmap_remove_pv(pvh, pmap, va);
+			vm_physmem[bank].pmseg.attrs[off] |= opte;
+			simple_unlock(&pvh->pvh_lock);
+		} else {
+			pve = NULL;
 		}
-		pmap_update();
+	} else {	/* opte not valid */
+		pve = NULL;
+		pmap->pm_stats.resident_count++;
+		if (wired)
+			pmap->pm_stats.wired_count++;
+		if (ptp)
+			ptp->wire_count++;      /* count # of valid entrys */
 	}
-	splx(s);
-}
 
-void
-pmap_prefault(map, v, l)
-	vm_map_t map;
-	vm_offset_t v;
-	vm_size_t l;
-{
-	vm_offset_t pv, pv2;
+	/*
+	 * at this point pm_stats has been updated.   pve is either NULL
+	 * or points to a now-free pv_entry structure (the latter case is
+	 * if we called pmap_remove_pv above).
+	 *
+	 * if this entry is to be on a pvlist, enter it now.
+	 */
 
-	for (pv = v; pv < v + l ; pv += ~PD_MASK + 1) {
-		if (!pmap_pde_v(pmap_pde(map->pmap, pv))) {
-			pv2 = trunc_page(vtopte(pv));
-#if defined(UVM)
-			uvm_fault(map, pv2, 0, VM_PROT_READ);
-#else
-			vm_fault(map, pv2, VM_PROT_READ, FALSE);
-#endif
+	bank = vm_physseg_find(atop(pa), &off);
+	if (pmap_initialized && bank != -1) {
+		pvh = &vm_physmem[bank].pmseg.pvhead[off];
+		if (pve == NULL) {
+			pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
+			if (pve == NULL) {
+				if (flags & PMAP_CANFAIL) {
+					error = KERN_RESOURCE_SHORTAGE;
+					goto out;
+				}
+				panic("pmap_enter: no pv entries available");
+			}
 		}
-		pv &= PD_MASK;
+		/* lock pvh when adding */
+		pmap_enter_pv(pvh, pve, pmap, va, ptp);
+	} else {
+
+		/* new mapping is not PG_PVLIST.   free pve if we've got one */
+		pvh = NULL;		/* ensure !PG_PVLIST */
+		if (pve)
+			pmap_free_pv(pmap, pve);
 	}
+
+enter_now:
+	/*
+	 * at this point pvh is !NULL if we want the PG_PVLIST bit set
+	 */
+
+	npte = pa | protection_codes[prot] | PG_V;
+	if (pvh)
+		npte |= PG_PVLIST;
+	if (wired)
+		npte |= PG_W;
+	if (va < VM_MAXUSER_ADDRESS)
+		npte |= PG_u;
+	else if (va < VM_MAX_ADDRESS)
+		npte |= (PG_u | PG_RW);	/* XXXCDC: no longer needed? */
+	if (pmap == pmap_kernel())
+		npte |= pmap_pg_g;
+
+	ptes[i386_btop(va)] = npte;		/* zap! */
+
+	if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap))
+		pmap_update_pg(va);
+
+	error = KERN_SUCCESS;
+
+out:
+	pmap_unmap_ptes(pmap);
+	PMAP_MAP_TO_HEAD_UNLOCK();
+
+	return error;
 }
 
-#ifdef DEBUG
-void
-pmap_pvdump(pa)
-	vm_offset_t pa;
+/*
+ * pmap_growkernel: increase usage of KVM space
+ *
+ * => we allocate new PTPs for the kernel and install them in all
+ *	the pmaps on the system.
+ */
+
+vaddr_t
+pmap_growkernel(maxkvaddr)
+	vaddr_t maxkvaddr;
 {
-	register struct pv_entry *pv;
-	int bank, off;
+	struct pmap *kpm = pmap_kernel(), *pm;
+	int needed_kpde;   /* needed number of kernel PTPs */
+	int s;
+	paddr_t ptaddr;
 
-	printf("pa %x", pa);
-	if ((bank = vm_physseg_find(atop(pa), &off)) == -1) {
-		printf("INVALID PA!");
-	} else {
-		for (pv = &vm_physmem[bank].pmseg.pvent[off] ; pv ;
-		     pv = pv->pv_next) {
-			printf(" -> pmap %p, va %lx", pv->pv_pmap, pv->pv_va);
-			       pads(pv->pv_pmap);
+	needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
+		/ NBPD;
+	if (needed_kpde <= nkpde)
+		goto out;		/* we are OK */
+
+	/*
+	 * whoops!   we need to add kernel PTPs
+	 */
+
+	s = splhigh();	/* to be safe */
+	simple_lock(&kpm->pm_obj.vmobjlock);
+
+	for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
+
+		if (pmap_initialized == FALSE) {
+
+			/*
+			 * we're growing the kernel pmap early (from
+			 * uvm_pageboot_alloc()).  this case must be
+			 * handled a little differently.
+			 */
+
+			if (uvm_page_physget(&ptaddr) == FALSE)
+				panic("pmap_growkernel: out of memory");
+
+			kpm->pm_pdir[PDSLOT_KERN + nkpde] =
+				ptaddr | PG_RW | PG_V;
+
+			/* count PTP as resident */
+			kpm->pm_stats.resident_count++;
+			continue;
 		}
-	}
-	printf(" ");
-}
 
-#ifdef notyet
-void
-pmap_check_wiring(str, va)
-	char *str;
-	vm_offset_t va;
-{
-	vm_map_entry_t entry;
-	register int count, *pte;
+		if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) {
+			panic("pmap_growkernel: alloc ptp failed");
+		}
 
-	va = trunc_page(va);
-	if (!pmap_pde_v(pmap_pde(pmap_kernel(), va)) ||
-	    !pmap_pte_v(pmap_pte(pmap_kernel(), va)))
-		return;
+		/* PG_u not for kernel */
+		kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u;
 
-	if (!vm_map_lookup_entry(pt_map, va, &entry)) {
-		printf("wired_check: entry for %x not found\n", va);
-		return;
+		/* distribute new kernel PTP to all active pmaps */
+		simple_lock(&pmaps_lock);
+		for (pm = pmaps.lh_first; pm != NULL;
+		     pm = pm->pm_list.le_next) {
+			pm->pm_pdir[PDSLOT_KERN + nkpde] =
+				kpm->pm_pdir[PDSLOT_KERN + nkpde];
+		}
+		simple_unlock(&pmaps_lock);
 	}
-	count = 0;
-	for (pte = (int *)va; pte < (int *)(va + NBPG); pte++)
-		if (*pte)
-			count++;
-	if (entry->wired_count != count)
-		printf("*%s*: %x: w%d/a%d\n",
-		       str, va, entry->wired_count, count);
+
+	simple_unlock(&kpm->pm_obj.vmobjlock);
+	splx(s);
+
+out:
+	return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
 }
-#endif
 
-/* print address space of pmap*/
+#ifdef DEBUG
+void pmap_dump __P((struct pmap *, vaddr_t, vaddr_t));
+
+/*
+ * pmap_dump: dump all the mappings from a pmap
+ *
+ * => caller should not be holding any pmap locks
+ */
+
 void
-pads(pm)
-	pmap_t pm;
+pmap_dump(pmap, sva, eva)
+	struct pmap *pmap;
+	vaddr_t sva, eva;
 {
-	unsigned va, i, j;
-	register pt_entry_t *pte;
+	pt_entry_t *ptes, *pte;
+	vaddr_t blkendva;
 
-	if (pm == pmap_kernel())
-		return;
-	for (i = 0; i < 1024; i++) 
-		if (pmap_pde_v(&pm->pm_pdir[i]))
-			for (j = 0; j < 1024 ; j++) {
-				va = (i << PDSHIFT) | (j << PGSHIFT);
-				if (pm == pmap_kernel() &&
-				    va < VM_MIN_KERNEL_ADDRESS)
-					continue;
-				if (pm != pmap_kernel() &&
-				    va > VM_MAX_ADDRESS)
-					continue;
-				pte = pmap_pte(pm, va);
-				if (pmap_pte_v(pte)) 
-					printf("%x:%x ", va, *pte); 
-			}
+	/*
+	 * if end is out of range truncate.
+	 * if (end == start) update to max.
+	 */
+
+	if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
+		eva = VM_MAXUSER_ADDRESS;
+
+	/*
+	 * we lock in the pmap => pv_head direction
+	 */
+
+	PMAP_MAP_TO_HEAD_LOCK();
+	ptes = pmap_map_ptes(pmap);	/* locks pmap */
+
+	/*
+	 * dumping a range of pages: we dump in PTP sized blocks (4MB)
+	 */
+
+	for (/* null */ ; sva < eva ; sva = blkendva) {
+
+		/* determine range of block */
+		blkendva = i386_round_pdr(sva+1);
+		if (blkendva > eva)
+			blkendva = eva;
+
+		/* valid block? */
+		if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
+			continue;
+
+		pte = &ptes[i386_btop(sva)];
+		for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
+			if (!pmap_valid_entry(*pte))
+				continue;
+			printf("va %#lx -> pa %#x (pte=%#x)\n",
+			       sva, *pte, *pte & PG_FRAME);
+		}
+	}
+	pmap_unmap_ptes(pmap);
+	PMAP_MAP_TO_HEAD_UNLOCK();
 }
 #endif
diff --git a/sys/arch/i386/i386/sys_machdep.c b/sys/arch/i386/i386/sys_machdep.c
index b1fb6877cfc..5d72a697af6 100644
--- a/sys/arch/i386/i386/sys_machdep.c
+++ b/sys/arch/i386/i386/sys_machdep.c
@@ -94,7 +94,11 @@ i386_user_cleanup(pcb)
 	struct pcb *pcb;
 {
 
+#ifdef PMAP_NEW
+	ldt_free(pcb->pcb_pmap);
+#else
 	ldt_free(pcb);
+#endif
 	pcb->pcb_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
 	if (pcb == curpcb)
 		lldt(pcb->pcb_ldt_sel);
@@ -123,8 +127,8 @@ i386_get_ldt(p, args, retval)
 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
 		return (error);
 
-#ifdef	DEBUG
-	printf("i386_get_ldt: start=%d num=%d descs=%x\n", ua.start,
+#ifdef LDTDEBUG
+	printf("i386_get_ldt: start=%d num=%d descs=%p\n", ua.start,
 	    ua.num, ua.desc);
 #endif
 
@@ -161,16 +165,21 @@ i386_set_ldt(p, args, retval)
 {
 	int error, i, n;
 	struct pcb *pcb = &p->p_addr->u_pcb;
+#ifdef PMAP_NEW
+	pmap_t pmap = p->p_vmspace->vm_map.pmap;
+#endif
 	int fsslot, gsslot;
+#ifndef PMAP_NEW
 	int s;
+#endif
 	struct i386_set_ldt_args ua;
 	union descriptor desc;
 
 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
 		return (error);
 
-#ifdef	DEBUG
-	printf("i386_set_ldt: start=%d num=%d descs=%x\n", ua.start,
+#ifdef	LDT_DEBUG
+	printf("i386_set_ldt: start=%d num=%d descs=%p\n", ua.start,
 	    ua.num, ua.desc);
 #endif
 
@@ -179,22 +188,46 @@ i386_set_ldt(p, args, retval)
 	if (ua.start > 8192 || (ua.start + ua.num) > 8192)
 		return (EINVAL);
 
+	/*
+	 * XXX LOCKING
+	 */
+
 	/* allocate user ldt */
+#ifdef PMAP_NEW
+	if (pmap->pm_ldt == 0 || (ua.start + ua.num) > pmap->pm_ldt_len) {
+#else
 	if (pcb->pcb_ldt == 0 || (ua.start + ua.num) > pcb->pcb_ldt_len) {
+#endif
 		size_t old_len, new_len;
 		union descriptor *old_ldt, *new_ldt;
 
+#ifdef PMAP_NEW
+		if (pmap->pm_flags & PMF_USER_LDT) {
+			old_len = pmap->pm_ldt_len * sizeof(union descriptor);
+			old_ldt = pmap->pm_ldt;
+#else
 		if (pcb->pcb_flags & PCB_USER_LDT) {
 			old_len = pcb->pcb_ldt_len * sizeof(union descriptor);
 			old_ldt = pcb->pcb_ldt;
+#endif
 		} else {
 			old_len = NLDT * sizeof(union descriptor);
 			old_ldt = ldt;
+#ifdef PMAP_NEW
+			pmap->pm_ldt_len = 512;
+#else
 			pcb->pcb_ldt_len = 512;
+#endif
 		}
+#ifdef PMAP_NEW
+		while ((ua.start + ua.num) > pmap->pm_ldt_len)
+			pmap->pm_ldt_len *= 2;
+		new_len = pmap->pm_ldt_len * sizeof(union descriptor);
+#else
 		while ((ua.start + ua.num) > pcb->pcb_ldt_len)
 			pcb->pcb_ldt_len *= 2;
 		new_len = pcb->pcb_ldt_len * sizeof(union descriptor);
+#endif
 #if defined(UVM)
 		new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, new_len);
 #else
@@ -202,6 +235,16 @@ i386_set_ldt(p, args, retval)
 #endif
 		bcopy(old_ldt, new_ldt, old_len);
 		bzero((caddr_t)new_ldt + old_len, new_len - old_len);
+#ifdef PMAP_NEW
+		pmap->pm_ldt = new_ldt;
+
+		if (pmap->pm_flags & PCB_USER_LDT)
+			ldt_free(pmap);
+		else
+			pmap->pm_flags |= PCB_USER_LDT;
+		ldt_alloc(pmap, new_ldt, new_len);
+		pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
+#else
 		pcb->pcb_ldt = new_ldt;
 
 		if (pcb->pcb_flags & PCB_USER_LDT)
@@ -209,17 +252,24 @@ i386_set_ldt(p, args, retval)
 		else
 			pcb->pcb_flags |= PCB_USER_LDT;
 		ldt_alloc(pcb, new_ldt, new_len);
+#endif
 		if (pcb == curpcb)
 			lldt(pcb->pcb_ldt_sel);
 
+		/*
+		 * XXX Need to notify other processors which may be
+		 * XXX currently using this pmap that they need to
+		 * XXX re-load the LDT.
+		 */
+
 		if (old_ldt != ldt)
 #if defined(UVM)
 			uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len);
 #else
-			kmem_free(kernel_map, (vm_offset_t)old_ldt, old_len);
+			kmem_free(kernel_map, (vaddr_t)old_ldt, old_len);
 #endif
-#ifdef DEBUG
-		printf("i386_set_ldt(%d): new_ldt=%x\n", p->p_pid, new_ldt);
+#ifdef LDT_DEBUG
+		printf("i386_set_ldt(%d): new_ldt=%p\n", p->p_pid, new_ldt);
 #endif
 	}
 
@@ -292,20 +342,28 @@ i386_set_ldt(p, args, retval)
 		}
 	}
 
+#ifndef PMAP_NEW
 	s = splhigh();
+#endif
 
 	/* Now actually replace the descriptors. */
 	for (i = 0, n = ua.start; i < ua.num; i++, n++) {
 		if ((error = copyin(&ua.desc[i], &desc, sizeof(desc))) != 0)
 			goto out;
 
+#ifdef PMAP_NEW
+		pmap->pm_ldt[n] = desc;
+#else
 		pcb->pcb_ldt[n] = desc;
+#endif
 	}
 
 	*retval = ua.start;
 
 out:
+#ifndef PMAP_NEW
 	splx(s);
+#endif
 	return (error);
 }
 #endif	/* USER_LDT */
diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c
index c2ae880fa02..c89e50ca3a3 100644
--- a/sys/arch/i386/i386/trap.c
+++ b/sys/arch/i386/i386/trap.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: trap.c,v 1.35 2001/01/24 09:37:59 hugh Exp $	*/
+/*	$OpenBSD: trap.c,v 1.36 2001/03/22 23:36:51 niklas Exp $	*/
 /*	$NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $	*/
 
 #undef DEBUG
@@ -445,7 +445,10 @@ trap(frame)
 		register vm_map_t map;
 		int rv;
 		extern vm_map_t kernel_map;
-		unsigned nss, v;
+		unsigned nss;
+#ifndef PMAP_NEW
+		unsigned v;
+#endif
 
 		if (vm == NULL)
 			goto we_re_toast;
@@ -481,6 +484,7 @@ trap(frame)
 			}
 		}
 
+#ifndef PMAP_NEW
 		/* check if page table is mapped, if not, fault it first */
 		if ((PTD[pdei(va)] & PG_V) == 0) {
 			v = trunc_page(vtopte(va));
@@ -499,6 +503,7 @@ trap(frame)
 #endif
 		} else
 			v = 0;
+#endif
 
 #if defined(UVM)
 		rv = uvm_fault(map, va, 0, ftype);
diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c
index 38c1141a9d7..36afc7340c4 100644
--- a/sys/arch/i386/i386/vm_machdep.c
+++ b/sys/arch/i386/i386/vm_machdep.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_machdep.c,v 1.20 2001/02/08 00:46:35 mickey Exp $	*/
+/*	$OpenBSD: vm_machdep.c,v 1.21 2001/03/22 23:36:51 niklas Exp $	*/
 /*	$NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $	*/
 
 /*-
@@ -114,21 +114,29 @@ cpu_fork(p1, p2, stack, stacksize)
 	/* Sync curpcb (which is presumably p1's PCB) and copy it to p2. */
 	savectx(curpcb);
 	*pcb = p1->p_addr->u_pcb;
+#ifndef PMAP_NEW
 	pmap_activate(p2);
-
+#endif
 	/*
 	 * Preset these so that gdt_compact() doesn't get confused if called
 	 * during the allocations below.
 	 */
 	pcb->pcb_tss_sel = GSEL(GNULL_SEL, SEL_KPL);
+#ifndef PMAP_NEW
 	pcb->pcb_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
+#else
+	/*
+	 * Activate the addres space.  Note this will refresh pcb_ldt_sel.
+	 */
+	pmap_activate(p2);
+#endif
 
 	/* Fix up the TSS. */
 	pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	pcb->pcb_tss.tss_esp0 = (int)p2->p_addr + USPACE - 16;
 	tss_alloc(pcb);
 
-#ifdef USER_LDT
+#if defined(USER_LDT) && !defined(PMAP_NEW)
 	/* Copy the LDT, if necessary. */
 	if (pcb->pcb_flags & PCB_USER_LDT) {
 		size_t len;
@@ -228,10 +236,17 @@ cpu_wait(p)
 	struct pcb *pcb;
 
 	pcb = &p->p_addr->u_pcb;
+#ifndef PMAP_NEW
 #ifdef USER_LDT
 	if (pcb->pcb_flags & PCB_USER_LDT)
 		i386_user_cleanup(pcb);
 #endif
+#else
+	/*
+	 * No need to do user LDT cleanup here; it's handled in
+	 * pmap_destroy().
+	 */
+#endif
 	tss_free(pcb);
 }
 
@@ -375,8 +390,12 @@ vmapbuf(bp, len)
 	vm_size_t len;
 {
 	vm_offset_t faddr, taddr, off;
+#ifdef PMAP_NEW
+	paddr_t fpa;
+#else
 	pt_entry_t *fpte, *tpte;
 	pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t));
+#endif
 
 	if ((bp->b_flags & B_PHYS) == 0)
 		panic("vmapbuf");
@@ -389,16 +408,42 @@ vmapbuf(bp, len)
 	taddr = kmem_alloc_wait(phys_map, len);
 #endif
 	bp->b_data = (caddr_t)(taddr + off);
+#ifdef PMAP_NEW
 	/*
 	 * The region is locked, so we expect that pmap_pte() will return
 	 * non-NULL.
+	 * XXX: unwise to expect this in a multithreaded environment.
+	 * anything can happen to a pmap between the time we lock a 
+	 * region, release the pmap lock, and then relock it for
+	 * the pmap_extract().
+	 *
+	 * no need to flush TLB since we expect nothing to be mapped
+	 * where we we just allocated (TLB will be flushed when our
+	 * mapping is removed).
 	 */
-	fpte = pmap_pte(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), faddr);
-	tpte = pmap_pte(vm_map_pmap(phys_map), taddr);
-	do {
-		*tpte++ = *fpte++;
+	while (len) {
+		fpa = pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
+		    faddr);
+		pmap_enter(vm_map_pmap(phys_map), taddr, fpa,
+		    VM_PROT_READ | VM_PROT_WRITE, TRUE,
+		    VM_PROT_READ | VM_PROT_WRITE);
+		faddr += PAGE_SIZE;
+		taddr += PAGE_SIZE;
 		len -= PAGE_SIZE;
-	} while (len);
+	}
+#else
+        /*
+         * The region is locked, so we expect that pmap_pte() will return
+         * non-NULL.
+         */
+        fpte = pmap_pte(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), faddr);
+        tpte = pmap_pte(vm_map_pmap(phys_map), taddr);
+        do {
+                *tpte++ = *fpte++;
+                len -= PAGE_SIZE;
+        } while (len);
+#endif
+
 }
 
 /*
diff --git a/sys/arch/i386/include/gdt.h b/sys/arch/i386/include/gdt.h
index 2ec88669840..46d52f88b32 100644
--- a/sys/arch/i386/include/gdt.h
+++ b/sys/arch/i386/include/gdt.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: gdt.h,v 1.5 1997/11/11 22:53:40 deraadt Exp $	*/
+/*	$OpenBSD: gdt.h,v 1.6 2001/03/22 23:36:52 niklas Exp $	*/
 /*	$NetBSD: gdt.h,v 1.3 1996/02/27 22:32:11 jtc Exp $	*/
 
 /*-
@@ -40,6 +40,11 @@
 #ifdef _KERNEL
 void tss_alloc __P((struct pcb *));
 void tss_free __P((struct pcb *));
+#ifdef PMAP_NEW
+void ldt_alloc __P((struct pmap *, union descriptor *, size_t));
+void ldt_free __P((struct pmap *));
+#else
 void ldt_alloc __P((struct pcb *, union descriptor *, size_t));
 void ldt_free __P((struct pcb *));
 #endif
+#endif
diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h
index 6342b19d615..7ab2d4acb26 100644
--- a/sys/arch/i386/include/pcb.h
+++ b/sys/arch/i386/include/pcb.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pcb.h,v 1.5 2000/08/05 22:07:32 niklas Exp $	*/
+/*	$OpenBSD: pcb.h,v 1.6 2001/03/22 23:36:52 niklas Exp $	*/
 /*	$NetBSD: pcb.h,v 1.21 1996/01/08 13:51:42 mycroft Exp $	*/
 
 /*-
@@ -79,6 +79,9 @@ struct pcb {
 	int	vm86_eflags;		/* virtual eflags for vm86 mode */
 	int	vm86_flagmask;		/* flag mask for vm86 mode */
 	void	*vm86_userp;		/* XXX performance hack */
+#ifdef PMAP_NEW
+	struct pmap *pcb_pmap;		/* back pointer to our pmap */
+#endif
 	u_long	pcb_iomap[NIOPORTS/32];	/* I/O bitmap */
 	u_char	pcb_iomap_pad;	/* required; must be 0xff, says intel */
 };
diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index fdc7cbf9698..de770cb59a6 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h
@@ -1,210 +1,5 @@
-/*	$OpenBSD: pmap.h,v 1.12 1999/09/20 02:47:43 deraadt Exp $	*/
-/*	$NetBSD: pmap.h,v 1.23 1996/05/03 19:26:30 christos Exp $	*/
-
-/* 
- * Copyright (c) 1995 Charles M. Hannum.  All rights reserved.
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)pmap.h	7.4 (Berkeley) 5/12/91
- */
-
-/*
- * Derived from hp300 version by Mike Hibler, this version by William
- * Jolitz uses a recursive map [a pde points to the page directory] to
- * map the page tables using the pagetables themselves. This is done to
- * reduce the impact on kernel virtual memory for lots of sparse address
- * space, and to reduce the cost of memory to each process.
- *
- * from hp300:	@(#)pmap.h	7.2 (Berkeley) 12/16/90
- */
-
-#ifndef	_I386_PMAP_H_
-#define	_I386_PMAP_H_
-
-#include <machine/cpufunc.h>
-#include <machine/pte.h>
-
-/*
- * 386 page table entry and page table directory
- * W.Jolitz, 8/89
- */
-
-/*
- * One page directory, shared between
- * kernel and user modes.
- */
-#define	KPTDI		(KERNBASE>>22)	/* start of kernel virtual pde's */
-#define	PTDPTDI		(KPTDI-1)	/* ptd entry that points to ptd! */
-#define	APTDPTDI	0x3ff		/* start of alternate page directory */
-#define	MAXKPDE		(APTDPTDI-KPTDI)
-#ifndef NKPDE		/* permit config file override */
-#define	NKPDE		127		/* # to static alloc */
+#ifdef PMAP_NEW
+#include <machine/pmap.new.h>
+#else
+#include <machine/pmap.old.h>
 #endif
-
-/*
- * Address of current and alternate address space page table maps
- * and directories.
- */
-#ifdef _KERNEL
-extern pt_entry_t	PTmap[], APTmap[], Upte;
-extern pd_entry_t	PTD[], APTD[], PTDpde, APTDpde, Upde;
-extern pt_entry_t	*Sysmap;
-
-extern int	PTDpaddr;	/* physical address of kernel PTD */
-
-void pmap_bootstrap __P((vm_offset_t start));
-boolean_t pmap_testbit __P((vm_offset_t, int));
-void pmap_changebit __P((vm_offset_t, int, int));
-void pmap_prefault __P((vm_map_t, vm_offset_t, vm_size_t));
-#endif
-
-/*
- * virtual address to page table entry and
- * to physical address. Likewise for alternate address space.
- * Note: these work recursively, thus vtopte of a pte will give
- * the corresponding pde that in turn maps it.
- */
-#define	vtopte(va)	(PTmap + i386_btop(va))
-#define	kvtopte(va)	vtopte(va)
-#define	ptetov(pt)	(i386_ptob(pt - PTmap)) 
-#define	vtophys(va) \
-	((*vtopte(va) & PG_FRAME) | ((unsigned)(va) & ~PG_FRAME))
-
-#define	avtopte(va)	(APTmap + i386_btop(va))
-#define	ptetoav(pt)	(i386_ptob(pt - APTmap)) 
-#define	avtophys(va) \
-	((*avtopte(va) & PG_FRAME) | ((unsigned)(va) & ~PG_FRAME))
-
-/*
- * macros to generate page directory/table indicies
- */
-#define	pdei(va)	(((va) & PD_MASK) >> PDSHIFT)
-#define	ptei(va)	(((va) & PT_MASK) >> PGSHIFT)
-
-/*
- * Pmap stuff
- */
-typedef struct pmap {
-	pd_entry_t		*pm_pdir;	/* KVA of page directory */
-	boolean_t		pm_pdchanged;	/* pdir changed */
-	short			pm_dref;	/* page directory ref count */
-	short			pm_count;	/* pmap reference count */
-	simple_lock_data_t	pm_lock;	/* lock on pmap */
-	struct pmap_statistics	pm_stats;	/* pmap statistics */
-	long			pm_ptpages;	/* more stats: PT pages */
-} *pmap_t;
-
-/*
- * For each vm_page_t, there is a list of all currently valid virtual
- * mappings of that page.  An entry is a pv_entry, the list is pv_table.
- */
-struct pv_entry {
-	struct pv_entry	*pv_next;	/* next pv_entry */
-	pmap_t		pv_pmap;	/* pmap where mapping lies */
-	vm_offset_t	pv_va;		/* virtual address for mapping */
-};
-
-struct pv_page;
-
-struct pv_page_info {
-	TAILQ_ENTRY(pv_page) pgi_list;
-	struct pv_entry *pgi_freelist;
-	int pgi_nfree;
-};
-
-/*
- * This is basically:
- * ((NBPG - sizeof(struct pv_page_info)) / sizeof(struct pv_entry))
- */
-#define	NPVPPG	340
-
-struct pv_page {
-	struct pv_page_info pvp_pgi;
-	struct pv_entry pvp_pv[NPVPPG];
-};
-
-#ifdef	_KERNEL
-extern struct pmap	kernel_pmap_store;
-
-#define	pmap_kernel()			(&kernel_pmap_store)
-#define	pmap_resident_count(pmap)	((pmap)->pm_stats.resident_count)
-#define	pmap_update()			tlbflush()
-
-vm_offset_t reserve_dumppages __P((vm_offset_t));
-
-static __inline void
-pmap_clear_modify(vm_offset_t pa)
-{
-	pmap_changebit(pa, 0, ~PG_M);
-}
-
-static __inline void
-pmap_clear_reference(vm_offset_t pa)
-{
-	pmap_changebit(pa, 0, ~PG_U);
-}
-
-static __inline void
-pmap_copy_on_write(vm_offset_t pa)
-{
-	pmap_changebit(pa, PG_RO, ~PG_RW);
-}
-
-static __inline boolean_t
-pmap_is_modified(vm_offset_t pa)
-{
-	return pmap_testbit(pa, PG_M);
-}
-
-static __inline boolean_t
-pmap_is_referenced(vm_offset_t pa)
-{
-	return pmap_testbit(pa, PG_U);
-}
-
-static __inline vm_offset_t
-pmap_phys_address(int ppn)
-{
-	return i386_ptob(ppn);
-}
-
-void pmap_activate __P((struct proc *));
-void pmap_deactivate __P((struct proc *));
-vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int));
-
-#endif	/* _KERNEL */
-
-#endif /* _I386_PMAP_H_ */
diff --git a/sys/arch/i386/include/pmap.new.h b/sys/arch/i386/include/pmap.new.h
new file mode 100644
index 00000000000..712d418dd33
--- /dev/null
+++ b/sys/arch/i386/include/pmap.new.h
@@ -0,0 +1,509 @@
+/*	$OpenBSD: pmap.new.h,v 1.1 2001/03/22 23:36:52 niklas Exp $	*/
+/*	$NetBSD: pmap.h,v 1.43 2000/02/11 07:00:13 thorpej Exp $	*/
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgment:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * pmap.h: see pmap.c for the history of this pmap module.
+ */
+
+#ifndef	_I386_PMAP_H_
+#define	_I386_PMAP_H_
+
+#if defined(_KERNEL) && !defined(_LKM) && defined(__NetBSD__)
+#include "opt_user_ldt.h"
+#endif
+
+#include <machine/cpufunc.h>
+#include <machine/pte.h>
+#include <machine/segments.h>
+#include <vm/pglist.h>
+#include <uvm/uvm_object.h>
+
+/*
+ * see pte.h for a description of i386 MMU terminology and hardware
+ * interface.
+ *
+ * a pmap describes a processes' 4GB virtual address space.  this
+ * virtual address space can be broken up into 1024 4MB regions which
+ * are described by PDEs in the PDP.  the PDEs are defined as follows:
+ *
+ * (ranges are inclusive -> exclusive, just like vm_map_entry start/end)
+ * (the following assumes that KERNBASE is 0xc0000000)
+ *
+ * PDE#s	VA range		usage
+ * 0->767	0x0 -> 0xbfc00000	user address space, note that the
+ *					max user address is 0xbfbfe000
+ *					the final two pages in the last 4MB
+ *					used to be reserved for the UAREA
+ *					but now are no longer used
+ * 768		0xbfc00000->		recursive mapping of PDP (used for
+ *			0xc0000000	linear mapping of PTPs)
+ * 768->1023	0xc0000000->		kernel address space (constant
+ *			0xffc00000	across all pmap's/processes)
+ * 1023		0xffc00000->		"alternate" recursive PDP mapping
+ *			<end>		(for other pmaps)
+ *
+ *
+ * note: a recursive PDP mapping provides a way to map all the PTEs for
+ * a 4GB address space into a linear chunk of virtual memory.  in other
+ * words, the PTE for page 0 is the first int mapped into the 4MB recursive
+ * area.  the PTE for page 1 is the second int.  the very last int in the
+ * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB
+ * address).
+ *
+ * all pmap's PD's must have the same values in slots 768->1023 so that
+ * the kernel is always mapped in every process.  these values are loaded
+ * into the PD at pmap creation time.
+ *
+ * at any one time only one pmap can be active on a processor.  this is
+ * the pmap whose PDP is pointed to by processor register %cr3.  this pmap
+ * will have all its PTEs mapped into memory at the recursive mapping
+ * point (slot #767 as show above).  when the pmap code wants to find the
+ * PTE for a virtual address, all it has to do is the following:
+ *
+ * address of PTE = (767 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t)
+ *                = 0xbfc00000 + (VA / 4096) * 4
+ *
+ * what happens if the pmap layer is asked to perform an operation
+ * on a pmap that is not the one which is currently active?  in that
+ * case we take the PA of the PDP of non-active pmap and put it in
+ * slot 1023 of the active pmap.  this causes the non-active pmap's
+ * PTEs to get mapped in the final 4MB of the 4GB address space
+ * (e.g. starting at 0xffc00000).
+ *
+ * the following figure shows the effects of the recursive PDP mapping:
+ *
+ *   PDP (%cr3)
+ *   +----+
+ *   |   0| -> PTP#0 that maps VA 0x0 -> 0x400000
+ *   |    |
+ *   |    |
+ *   | 767| -> points back to PDP (%cr3) mapping VA 0xbfc00000 -> 0xc0000000
+ *   | 768| -> first kernel PTP (maps 0xc0000000 -> 0xf0400000)
+ *   |    |
+ *   |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end)
+ *   +----+
+ *
+ * note that the PDE#767 VA (0xbfc00000) is defined as "PTE_BASE"
+ * note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE"
+ *
+ * starting at VA 0xbfc00000 the current active PDP (%cr3) acts as a
+ * PTP:
+ *
+ * PTP#767 == PDP(%cr3) => maps VA 0xbfc00000 -> 0xc0000000
+ *   +----+
+ *   |   0| -> maps the contents of PTP#0 at VA 0xbfc00000->0xbfc01000
+ *   |    |
+ *   |    |
+ *   | 767| -> maps contents of PTP#767 (the PDP) at VA 0xbffbf000
+ *   | 768| -> maps contents of first kernel PTP
+ *   |    |
+ *   |1023|
+ *   +----+
+ *
+ * note that mapping of the PDP at PTP#959's VA (0xeffbf000) is
+ * defined as "PDP_BASE".... within that mapping there are two
+ * defines:
+ *   "PDP_PDE" (0xeffbfefc) is the VA of the PDE in the PDP
+ *      which points back to itself.
+ *   "APDP_PDE" (0xeffbfffc) is the VA of the PDE in the PDP which
+ *      establishes the recursive mapping of the alternate pmap.
+ *      to set the alternate PDP, one just has to put the correct
+ *	PA info in *APDP_PDE.
+ *
+ * note that in the APTE_BASE space, the APDP appears at VA
+ * "APDP_BASE" (0xfffff000).
+ */
+
+/*
+ * the following defines identify the slots used as described above.
+ */
+
+#define PDSLOT_PTE	((KERNBASE/NBPD)-1) /* 767: for recursive PDP map */
+#define PDSLOT_KERN	(KERNBASE/NBPD)	    /* 768: start of kernel space */
+#define PDSLOT_APTE	((unsigned)1023) /* 1023: alternative recursive slot */
+
+/*
+ * the following defines give the virtual addresses of various MMU
+ * data structures:
+ * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
+ * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
+ * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
+ */
+
+#define PTE_BASE	((pt_entry_t *)  (PDSLOT_PTE * NBPD) )
+#define APTE_BASE	((pt_entry_t *)  (PDSLOT_APTE * NBPD) )
+#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG)))
+#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG)))
+#define PDP_PDE		(PDP_BASE + PDSLOT_PTE)
+#define APDP_PDE	(PDP_BASE + PDSLOT_APTE)
+
+/*
+ * XXXCDC: tmp xlate from old names:
+ * PTDPTDI -> PDSLOT_PTE
+ * KPTDI -> PDSLOT_KERN
+ * APTDPTDI -> PDSLOT_APTE
+ */
+
+/*
+ * the follow define determines how many PTPs should be set up for the
+ * kernel by locore.s at boot time.  this should be large enough to
+ * get the VM system running.  once the VM system is running, the
+ * pmap module can add more PTPs to the kernel area on demand.
+ */
+
+#ifndef NKPTP
+#define NKPTP		4	/* 16MB to start */
+#endif
+#define NKPTP_MIN	4	/* smallest value we allow */
+#define NKPTP_MAX	(1024 - (KERNBASE/NBPD) - 1)
+				/* largest value (-1 for APTP space) */
+
+/*
+ * various address macros
+ *
+ *  vtopte: return a pointer to the PTE mapping a VA
+ *  kvtopte: same as above (takes a KVA, but doesn't matter with this pmap)
+ *  ptetov: given a pointer to a PTE, return the VA that it maps
+ *  vtophys: translate a VA to the PA mapped to it
+ *
+ * plus alternative versions of the above
+ */
+
+#define vtopte(VA)	(PTE_BASE + i386_btop(VA))
+#define kvtopte(VA)	vtopte(VA)
+#define ptetov(PT)	(i386_ptob(PT - PTE_BASE))
+#define	vtophys(VA)	((*vtopte(VA) & PG_FRAME) | \
+			 ((unsigned)(VA) & ~PG_FRAME))
+#define	avtopte(VA)	(APTE_BASE + i386_btop(VA))
+#define	ptetoav(PT)	(i386_ptob(PT - APTE_BASE))
+#define	avtophys(VA)	((*avtopte(VA) & PG_FRAME) | \
+			 ((unsigned)(VA) & ~PG_FRAME))
+
+/*
+ * pdei/ptei: generate index into PDP/PTP from a VA
+ */
+#define	pdei(VA)	(((VA) & PD_MASK) >> PDSHIFT)
+#define	ptei(VA)	(((VA) & PT_MASK) >> PGSHIFT)
+
+/*
+ * PTP macros:
+ *   a PTP's index is the PD index of the PDE that points to it
+ *   a PTP's offset is the byte-offset in the PTE space that this PTP is at
+ *   a PTP's VA is the first VA mapped by that PTP
+ *
+ * note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries)
+ *           NBPD == number of bytes a PTP can map (4MB)
+ */
+
+#define ptp_i2o(I)	((I) * NBPG)	/* index => offset */
+#define ptp_o2i(O)	((O) / NBPG)	/* offset => index */
+#define ptp_i2v(I)	((I) * NBPD)	/* index => VA */
+#define ptp_v2i(V)	((V) / NBPD)	/* VA => index (same as pdei) */
+
+/*
+ * PG_AVAIL usage: we make use of the ignored bits of the PTE
+ */
+
+#define PG_W		PG_AVAIL1	/* "wired" mapping */
+#define PG_PVLIST	PG_AVAIL2	/* mapping has entry on pvlist */
+/* PG_AVAIL3 not used */
+
+#ifdef _KERNEL
+/*
+ * pmap data structures: see pmap.c for details of locking.
+ */
+
+struct pmap;
+typedef struct pmap *pmap_t;
+
+/*
+ * we maintain a list of all non-kernel pmaps
+ */
+
+LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
+
+/*
+ * the pmap structure
+ *
+ * note that the pm_obj contains the simple_lock, the reference count,
+ * page list, and number of PTPs within the pmap.
+ */
+
+struct pmap {
+	struct uvm_object pm_obj;	/* object (lck by object lock) */
+#define	pm_lock	pm_obj.vmobjlock
+	LIST_ENTRY(pmap) pm_list;	/* list (lck by pm_list lock) */
+	pd_entry_t *pm_pdir;		/* VA of PD (lck by object lock) */
+	u_int32_t pm_pdirpa;		/* PA of PD (read-only after create) */
+	struct vm_page *pm_ptphint;	/* pointer to a PTP in our pmap */
+	struct pmap_statistics pm_stats;  /* pmap stats (lck by object lock) */
+
+	int pm_flags;			/* see below */
+
+	union descriptor *pm_ldt;	/* user-set LDT */
+	int pm_ldt_len;			/* number of LDT entries */
+	int pm_ldt_sel;			/* LDT selector */
+};
+
+/* pm_flags */
+#define	PMF_USER_LDT	0x01	/* pmap has user-set LDT */
+
+/*
+ * for each managed physical page we maintain a list of <PMAP,VA>'s
+ * which it is mapped at.  the list is headed by a pv_head structure.
+ * there is one pv_head per managed phys page (allocated at boot time).
+ * the pv_head structure points to a list of pv_entry structures (each
+ * describes one mapping).
+ */
+
+struct pv_entry;
+
+struct pv_head {
+	simple_lock_data_t pvh_lock;	/* locks every pv on this list */
+	struct pv_entry *pvh_list;	/* head of list (locked by pvh_lock) */
+};
+
+struct pv_entry {			/* locked by its list's pvh_lock */
+	struct pv_entry *pv_next;	/* next entry */
+	struct pmap *pv_pmap;		/* the pmap */
+	vaddr_t pv_va;			/* the virtual address */
+	struct vm_page *pv_ptp;		/* the vm_page of the PTP */
+};
+
+/*
+ * pv_entrys are dynamically allocated in chunks from a single page.
+ * we keep track of how many pv_entrys are in use for each page and
+ * we can free pv_entry pages if needed.  there is one lock for the
+ * entire allocation system.
+ */
+
+struct pv_page_info {
+	TAILQ_ENTRY(pv_page) pvpi_list;
+	struct pv_entry *pvpi_pvfree;
+	int pvpi_nfree;
+};
+
+/*
+ * number of pv_entry's in a pv_page
+ * (note: won't work on systems where NPBG isn't a constant)
+ */
+
+#define PVE_PER_PVPAGE ((NBPG - sizeof(struct pv_page_info)) / \
+			sizeof(struct pv_entry))
+
+/*
+ * a pv_page: where pv_entrys are allocated from
+ */
+
+struct pv_page {
+	struct pv_page_info pvinfo;
+	struct pv_entry pvents[PVE_PER_PVPAGE];
+};
+
+/*
+ * pmap_remove_record: a record of VAs that have been unmapped, used to
+ * flush TLB.  if we have more than PMAP_RR_MAX then we stop recording.
+ */
+
+#define PMAP_RR_MAX	16	/* max of 16 pages (64K) */
+
+struct pmap_remove_record {
+	int prr_npages;
+	vaddr_t prr_vas[PMAP_RR_MAX];
+};
+
+/*
+ * pmap_transfer_location: used to pass the current location in the
+ * pmap between pmap_transfer and pmap_transfer_ptes [e.g. during
+ * a pmap_copy].
+ */
+
+struct pmap_transfer_location {
+	vaddr_t addr;			/* the address (page-aligned) */
+	pt_entry_t *pte;		/* the PTE that maps address */
+	struct vm_page *ptp;		/* the PTP that the PTE lives in */
+};
+
+/*
+ * global kernel variables
+ */
+
+extern pd_entry_t	PTD[];
+
+/* PTDpaddr: is the physical address of the kernel's PDP */
+extern u_long PTDpaddr;
+
+extern struct pmap kernel_pmap_store;	/* kernel pmap */
+extern int nkpde;			/* current # of PDEs for kernel */
+extern int pmap_pg_g;			/* do we support PG_G? */
+
+/*
+ * macros
+ */
+
+#define	pmap_kernel()			(&kernel_pmap_store)
+#define	pmap_resident_count(pmap)	((pmap)->pm_stats.resident_count)
+#define	pmap_update()			tlbflush()
+
+#define pmap_clear_modify(pg)		pmap_change_attrs(pg, 0, PG_M)
+#define pmap_clear_reference(pg)	pmap_change_attrs(pg, 0, PG_U)
+#define pmap_copy(DP,SP,D,L,S)		pmap_transfer(DP,SP,D,L,S, FALSE)
+#define pmap_is_modified(pg)		pmap_test_attrs(pg, PG_M)
+#define pmap_is_referenced(pg)		pmap_test_attrs(pg, PG_U)
+#define pmap_move(DP,SP,D,L,S)		pmap_transfer(DP,SP,D,L,S, TRUE)
+#define pmap_phys_address(ppn)		i386_ptob(ppn)
+#define pmap_valid_entry(E) 		((E) & PG_V) /* is PDE or PTE valid? */
+
+
+/*
+ * prototypes
+ */
+
+void		pmap_activate __P((struct proc *));
+void		pmap_bootstrap __P((vaddr_t));
+boolean_t	pmap_change_attrs __P((struct vm_page *, int, int));
+void		pmap_deactivate __P((struct proc *));
+static void	pmap_page_protect __P((struct vm_page *, vm_prot_t));
+void		pmap_page_remove  __P((struct vm_page *));
+static void	pmap_protect __P((struct pmap *, vaddr_t,
+				vaddr_t, vm_prot_t));
+void		pmap_remove __P((struct pmap *, vaddr_t, vaddr_t));
+boolean_t	pmap_test_attrs __P((struct vm_page *, int));
+void		pmap_transfer __P((struct pmap *, struct pmap *, vaddr_t,
+				   vsize_t, vaddr_t, boolean_t));
+static void	pmap_update_pg __P((vaddr_t));
+static void	pmap_update_2pg __P((vaddr_t,vaddr_t));
+void		pmap_write_protect __P((struct pmap *, vaddr_t,
+				vaddr_t, vm_prot_t));
+
+vaddr_t reserve_dumppages __P((vaddr_t)); /* XXX: not a pmap fn */
+
+#define PMAP_GROWKERNEL		/* turn on pmap_growkernel interface */
+
+/*
+ * inline functions
+ */
+
+/*
+ * pmap_update_pg: flush one page from the TLB (or flush the whole thing
+ *	if hardware doesn't support one-page flushing)
+ */
+
+__inline static void
+pmap_update_pg(va)
+	vaddr_t va;
+{
+#if defined(I386_CPU)
+	if (cpu_class == CPUCLASS_386)
+		pmap_update();
+	else
+#endif
+		invlpg((u_int) va);
+}
+
+/*
+ * pmap_update_2pg: flush two pages from the TLB
+ */
+
+__inline static void
+pmap_update_2pg(va, vb)
+	vaddr_t va, vb;
+{
+#if defined(I386_CPU)
+	if (cpu_class == CPUCLASS_386)
+		pmap_update();
+	else
+#endif
+	{
+		invlpg((u_int) va);
+		invlpg((u_int) vb);
+	}
+}
+
+/*
+ * pmap_page_protect: change the protection of all recorded mappings
+ *	of a managed page
+ *
+ * => this function is a frontend for pmap_page_remove/pmap_change_attrs
+ * => we only have to worry about making the page more protected.
+ *	unprotecting a page is done on-demand at fault time.
+ */
+
+__inline static void
+pmap_page_protect(pg, prot)
+	struct vm_page *pg;
+	vm_prot_t prot;
+{
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
+			(void) pmap_change_attrs(pg, PG_RO, PG_RW);
+		} else {
+			pmap_page_remove(pg);
+		}
+	}
+}
+
+/*
+ * pmap_protect: change the protection of pages in a pmap
+ *
+ * => this function is a frontend for pmap_remove/pmap_write_protect
+ * => we only have to worry about making the page more protected.
+ *	unprotecting a page is done on-demand at fault time.
+ */
+
+__inline static void
+pmap_protect(pmap, sva, eva, prot)
+	struct pmap *pmap;
+	vaddr_t sva, eva;
+	vm_prot_t prot;
+{
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
+			pmap_write_protect(pmap, sva, eva, prot);
+		} else {
+			pmap_remove(pmap, sva, eva);
+		}
+	}
+}
+
+vaddr_t	pmap_map __P((vaddr_t, paddr_t, paddr_t, vm_prot_t));
+
+#if defined(USER_LDT)
+void	pmap_ldt_cleanup __P((struct proc *));
+#define	PMAP_FORK
+#endif /* USER_LDT */
+
+#endif /* _KERNEL */
+#endif	/* _I386_PMAP_H_ */
diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h
index 4f3dc049f8c..a8dd350ef77 100644
--- a/sys/arch/i386/include/vmparam.h
+++ b/sys/arch/i386/include/vmparam.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vmparam.h,v 1.15 2000/04/25 23:10:31 niklas Exp $	*/
+/*	$OpenBSD: vmparam.h,v 1.16 2001/03/22 23:36:52 niklas Exp $	*/
 /*	$NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $	*/
 
 /*-
@@ -130,6 +130,12 @@
  * Mach derived constants
  */
 
+/* XXX Compatibility */
+#ifdef PMAP_NEW
+#define APTDPTDI	PDSLOT_APTE
+#define PTDPTDI		PDSLOT_PTE
+#endif
+
 /* user/kernel map constants */
 #define VM_MIN_ADDRESS		((vm_offset_t)0)
 #define VM_MAXUSER_ADDRESS	((vm_offset_t)((PTDPTDI<<PDSHIFT) - USPACE))
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index cf8a9345b9e..3d3b75b82bc 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_fault.c,v 1.9 2001/03/22 03:05:55 smart Exp $	*/
+/*	$OpenBSD: uvm_fault.c,v 1.10 2001/03/22 23:36:52 niklas Exp $	*/
 /*	$NetBSD: uvm_fault.c,v 1.35 1999/06/16 18:43:28 thorpej Exp $	*/
 
 /*
@@ -844,8 +844,8 @@ ReFault:
 			uvmexp.fltnamap++;
 			pmap_enter(ufi.orig_map->pmap, currva,
 			    VM_PAGE_TO_PHYS(anon->u.an_page),
-			    (anon->an_ref > 1) ? (enter_prot & ~VM_PROT_WRITE) :
-			    enter_prot, 
+			    (anon->an_ref > 1) ?
+			    (enter_prot & ~VM_PROT_WRITE) : enter_prot, 
 			    VM_MAPENT_ISWIRED(ufi.entry), 0);
 		}
 		simple_unlock(&anon->an_lock);
@@ -1725,12 +1725,14 @@ uvm_fault_wire(map, start, end, access_type)
 
 	pmap = vm_map_pmap(map);
 
+#ifndef PMAP_NEW
 	/*
 	 * call pmap pageable: this tells the pmap layer to lock down these
 	 * page tables.
 	 */
 
 	pmap_pageable(pmap, start, end, FALSE);
+#endif
 
 	/*
 	 * now fault it in page at a time.   if the fault fails then we have
@@ -1785,7 +1787,9 @@ uvm_fault_unwire(map, start, end)
 		if (pa == (paddr_t) 0) {
 			panic("uvm_fault_unwire: unwiring non-wired memory");
 		}
+
 		pmap_change_wiring(pmap, va, FALSE);  /* tell the pmap */
+
 		pg = PHYS_TO_VM_PAGE(pa);
 		if (pg)
 			uvm_pageunwire(pg);
@@ -1793,11 +1797,12 @@ uvm_fault_unwire(map, start, end)
 
 	uvm_unlock_pageq();
 
+#ifndef PMAP_NEW
 	/*
 	 * now we call pmap_pageable to let the pmap know that the page tables
 	 * in this space no longer need to be wired.
 	 */
 
 	pmap_pageable(pmap, start, end, TRUE);
-
+#endif
 }
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
index f98c1f2281f..8881998d2f0 100644
--- a/sys/vm/pmap.h
+++ b/sys/vm/pmap.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pmap.h,v 1.13 2000/03/13 16:05:24 art Exp $	*/
+/*	$OpenBSD: pmap.h,v 1.14 2001/03/22 23:36:52 niklas Exp $	*/
 /*	$NetBSD: pmap.h,v 1.16 1996/03/31 22:15:32 pk Exp $	*/
 
 /* 
@@ -107,6 +107,16 @@ typedef struct pmap_statistics	*pmap_statistics_t;
 #define PMAP_PGARG(PG) (VM_PAGE_TO_PHYS(PG))
 #endif
 
+#ifdef PMAP_NEW
+/*
+ * Flags passed to pmap_enter().  Note the bottom 3 bits are VM_PROT_*
+ * bits, used to indicate the access type that was made (to seed modified
+ * and referenced information).
+ */
+#define	PMAP_WIRED	0x00000010	/* wired mapping */
+#define	PMAP_CANFAIL	0x00000020	/* can fail if resource shortage */
+#endif
+
 #ifndef PMAP_EXCLUDE_DECLS	/* Used in Sparc port to virtualize pmap mod */
 #ifdef _KERNEL
 __BEGIN_DECLS
@@ -114,6 +124,9 @@ void		*pmap_bootstrap_alloc __P((int));
 void		 pmap_change_wiring __P((pmap_t, vaddr_t, boolean_t));
 
 #if defined(PMAP_NEW)
+#if 0
+void		 pmap_unwire __P((pmap_t, vaddr_t));
+#endif
 #if !defined(pmap_clear_modify)
 boolean_t	 pmap_clear_modify __P((struct vm_page *));
 #endif
@@ -134,8 +147,20 @@ struct pmap 	 *pmap_create __P((void));
 pmap_t		 pmap_create __P((vsize_t));
 #endif
 void		 pmap_destroy __P((pmap_t));
+#ifdef PMAP_NEW
+#ifdef notyet
+int		 pmap_enter __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, int));
+boolean_t	 pmap_extract __P((pmap_t, vaddr_t, paddr_t *));
+#else
+int		 _pmap_enter __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, int));
+#define	 pmap_enter(pmap, va, pa, prot, wired, access_type) \
+    (_pmap_enter((pmap), (va), (pa), (prot), ((wired) ? PMAP_WIRED : 0)))
+boolean_t	 _pmap_extract __P((pmap_t, vaddr_t, paddr_t *));
+#endif
+#else
 void		 pmap_enter __P((pmap_t,
 		    vaddr_t, paddr_t, vm_prot_t, boolean_t, vm_prot_t));
+#endif
 paddr_t		 pmap_extract __P((pmap_t, vaddr_t));
 #if defined(PMAP_NEW) && defined(PMAP_GROWKERNEL)
 vaddr_t		 pmap_growkernel __P((vaddr_t));
@@ -197,6 +222,10 @@ vaddr_t		 pmap_steal_memory __P((vsize_t, paddr_t *, paddr_t *));
 #else
 void		 pmap_virtual_space __P((vaddr_t *, vaddr_t *));
 #endif
+
+#if defined(PMAP_FORK)
+void		 pmap_fork __P((pmap_t, pmap_t));
+#endif
 __END_DECLS
 #endif	/* kernel*/
 #endif  /* PMAP_EXCLUDE_DECLS */