diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/arch/i386/conf/GENERIC_NEW | 7 | ||||
-rw-r--r-- | sys/arch/i386/conf/files.i386 | 5 | ||||
-rw-r--r-- | sys/arch/i386/i386/db_memrw.c | 12 | ||||
-rw-r--r-- | sys/arch/i386/i386/gdt.c | 29 | ||||
-rw-r--r-- | sys/arch/i386/i386/genassym.cf | 9 | ||||
-rw-r--r-- | sys/arch/i386/i386/locore.s | 93 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 25 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 4725 | ||||
-rw-r--r-- | sys/arch/i386/i386/sys_machdep.c | 72 | ||||
-rw-r--r-- | sys/arch/i386/i386/trap.c | 9 | ||||
-rw-r--r-- | sys/arch/i386/i386/vm_machdep.c | 61 | ||||
-rw-r--r-- | sys/arch/i386/include/gdt.h | 7 | ||||
-rw-r--r-- | sys/arch/i386/include/pcb.h | 5 | ||||
-rw-r--r-- | sys/arch/i386/include/pmap.h | 213 | ||||
-rw-r--r-- | sys/arch/i386/include/pmap.new.h | 509 | ||||
-rw-r--r-- | sys/arch/i386/include/vmparam.h | 8 | ||||
-rw-r--r-- | sys/uvm/uvm_fault.c | 13 | ||||
-rw-r--r-- | sys/vm/pmap.h | 31 |
18 files changed, 4153 insertions, 1680 deletions
diff --git a/sys/arch/i386/conf/GENERIC_NEW b/sys/arch/i386/conf/GENERIC_NEW new file mode 100644 index 00000000000..f00ee814461 --- /dev/null +++ b/sys/arch/i386/conf/GENERIC_NEW @@ -0,0 +1,7 @@ +# $OpenBSD: GENERIC_NEW,v 1.1 2001/03/22 23:36:50 niklas Exp $ +# +# GENERIC_NEW -- everything that's currently supported + PMAP_NEW +# + +include "arch/i386/conf/GENERIC" +option PMAP_NEW # use new pmap diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index b7f1b5c20ba..836403bf5e5 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.77 2001/03/05 15:13:43 aaron Exp $ +# $OpenBSD: files.i386,v 1.78 2001/03/22 23:36:51 niklas Exp $ # $NetBSD: files.i386,v 1.73 1996/05/07 00:58:36 thorpej Exp $ # # new style config file for i386 architecture @@ -30,7 +30,8 @@ file arch/i386/i386/i686_mem.c mtrr file arch/i386/i386/k6_mem.c mtrr file arch/i386/i386/microtime.s file arch/i386/i386/ns_cksum.c ns -file arch/i386/i386/pmap.c +file arch/i386/i386/pmap.c pmap_new +file arch/i386/i386/pmap.old.c !pmap_new file arch/i386/i386/process_machdep.c file arch/i386/i386/random.s file arch/i386/i386/sys_machdep.c diff --git a/sys/arch/i386/i386/db_memrw.c b/sys/arch/i386/i386/db_memrw.c index cfa980a2ef2..18335b985e5 100644 --- a/sys/arch/i386/i386/db_memrw.c +++ b/sys/arch/i386/i386/db_memrw.c @@ -1,4 +1,4 @@ -/* $OpenBSD: db_memrw.c,v 1.1 2000/07/05 14:26:34 hugh Exp $ */ +/* $OpenBSD: db_memrw.c,v 1.2 2001/03/22 23:36:51 niklas Exp $ */ /* $NetBSD: db_memrw.c,v 1.6 1999/04/12 20:38:19 pk Exp $ */ /* @@ -60,7 +60,9 @@ db_read_bytes(addr, size, data) *data++ = *src++; } +#ifndef PMAP_NEW pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)); +#endif /* * Write bytes to kernel address space for debugger. @@ -82,14 +84,22 @@ db_write_bytes(addr, size, data) if (addr >= VM_MIN_KERNEL_ADDRESS && addr < (vm_offset_t)&etext) { +#ifdef PMAP_NEW + ptep0 = PTE_BASE + i386_btop(addr); +#else ptep0 = pmap_pte(pmap_kernel(), addr); +#endif oldmap0 = *ptep0; *(int *)ptep0 |= /* INTEL_PTE_WRITE */ PG_RW; addr1 = i386_trunc_page(addr + size - 1); if (i386_trunc_page(addr) != addr1) { /* data crosses a page boundary */ +#ifdef PMAP_NEW + ptep1 = PTE_BASE + i386_btop(addr1); +#else ptep1 = pmap_pte(pmap_kernel(), addr1); +#endif oldmap1 = *ptep1; *(int *)ptep1 |= /* INTEL_PTE_WRITE */ PG_RW; } diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c index 0ff2b7c30a1..4df49b135e5 100644 --- a/sys/arch/i386/i386/gdt.c +++ b/sys/arch/i386/i386/gdt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gdt.c,v 1.11 1999/02/26 04:32:36 art Exp $ */ +/* $OpenBSD: gdt.c,v 1.12 2001/03/22 23:36:51 niklas Exp $ */ /* $NetBSD: gdt.c,v 1.8 1996/05/03 19:42:06 christos Exp $ */ /*- @@ -307,8 +307,13 @@ tss_free(pcb) } void +#ifdef PMAP_NEW +ldt_alloc(pmap, ldt, len) + struct pmap *pmap; +#else ldt_alloc(pcb, ldt, len) struct pcb *pcb; +#endif union descriptor *ldt; size_t len; { @@ -317,13 +322,33 @@ ldt_alloc(pcb, ldt, len) slot = gdt_get_slot(); setsegment(&dynamic_gdt[slot].sd, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0); +#ifdef PMAP_NEW + simple_lock(&pmap->pm_lock); + pmap->pm_ldt_sel = GSEL(slot, SEL_KPL); + simple_unlock(&pmap->pm_lock); +#else pcb->pcb_ldt_sel = GSEL(slot, SEL_KPL); +#endif } void +#ifdef PMAP_NEW +ldt_free(pmap) + struct pmap *pmap; +#else ldt_free(pcb) struct pcb *pcb; +#endif { + int slot; + +#ifdef PMAP_NEW + simple_lock(&pmap->pm_lock); + slot = IDXSEL(pmap->pm_ldt_sel); + simple_unlock(&pmap->pm_lock); +#else + slot = IDXSEL(pcb->pcb_ldt_sel); +#endif - gdt_put_slot(IDXSEL(pcb->pcb_ldt_sel)); + gdt_put_slot(slot); } diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index a5f2962be43..d6c8d82511c 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.7 1999/02/26 04:28:50 art Exp $ +# $OpenBSD: genassym.cf,v 1.8 2001/03/22 23:36:51 niklas Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -73,9 +73,16 @@ endif define SRUN SRUN # values for page tables +ifdef PMAP_NEW +define PDSLOT_KERN PDSLOT_KERN +define PDSLOT_PTE PDSLOT_PTE +define NKPTP_MIN NKPTP_MIN +define NKPTP_MAX NKPTP_MAX +else define PTDPTDI PTDPTDI define KPTDI KPTDI define NKPDE NKPDE +endif define APTDPTDI APTDPTDI # values for virtual memory diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index 08a593267b2..f05505d6b26 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.53 2001/01/24 09:37:58 hugh Exp $ */ +/* $OpenBSD: locore.s,v 1.54 2001/03/22 23:36:51 niklas Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -120,11 +120,18 @@ * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ - .globl _PTmap,_PTD,_PTDpde,_Sysmap + .globl _PTmap,_PTD,_PTDpde +#ifdef PMAP_NEW + .set _PTmap,(PDSLOT_PTE << PDSHIFT) + .set _PTD,(_PTmap + PDSLOT_PTE * NBPG) + .set _PTDpde,(_PTD + PDSLOT_PTE * 4) # XXX 4 == sizeof pde +#else .set _PTmap,(PTDPTDI << PDSHIFT) .set _PTD,(_PTmap + PTDPTDI * NBPG) .set _PTDpde,(_PTD + PTDPTDI * 4) # XXX 4 == sizeof pde + .globl _Sysmap .set _Sysmap,(_PTmap + KPTDI * NBPG) +#endif /* * APTmap, APTD is the alternate recursive pagemap. @@ -421,7 +428,11 @@ try586: /* Use the `cpuid' instruction. */ #define PROC0PDIR ((0) * NBPG) #define PROC0STACK ((1) * NBPG) #define SYSMAP ((1+UPAGES) * NBPG) +#ifdef PMAP_NEW +#define TABLESIZE ((1+UPAGES) * NBPG) /* + nkpde * NBPG */ +#else #define TABLESIZE ((1+UPAGES+NKPDE) * NBPG) +#endif /* Clear the BSS. */ movl $RELOC(_edata),%edi @@ -435,24 +446,48 @@ try586: /* Use the `cpuid' instruction. */ stosl /* Find end of kernel image. */ - movl $RELOC(_end),%esi + movl $RELOC(_end),%edi #if (defined(DDB) || NKSYMS > 0) && !defined(SYMTAB_SPACE) /* Save the symbols (if loaded). */ movl RELOC(_esym),%eax testl %eax,%eax jz 1f subl $KERNBASE,%eax - movl %eax,%esi + movl %eax,%edi 1: #endif /* Calculate where to start the bootstrap tables. */ + movl %edi,%esi # edi = esym ? esym : end addl $PGOFSET, %esi # page align up andl $~PGOFSET, %esi +#ifdef PMAP_NEW + /* + * Calculate the size of the kernel page table directory, and + * how many entries it will have. + */ + movl RELOC(_nkpde),%ecx # get nkpde + cmpl $NKPTP_MIN,%ecx # larger than min? + jge 1f + movl $NKPTP_MIN,%ecx # set at min + jmp 2f +1: cmpl $NKPTP_MAX,%ecx # larger than max? + jle 2f + movl $NKPTP_MAX,%ecx +2: + + /* Clear memory for bootstrap tables. */ + shll $PGSHIFT,%ecx + addl $TABLESIZE,%ecx + addl %esi,%ecx # end of tables + subl %edi,%ecx # size of tables + shrl $2,%ecx +#else /* Clear memory for bootstrap tables. */ movl %esi, %edi movl $((TABLESIZE + 3) >> 2), %ecx # size of tables +#endif xorl %eax, %eax cld rep @@ -496,7 +531,14 @@ try586: /* Use the `cpuid' instruction. */ /* Map the data, BSS, and bootstrap tables read-write. */ leal (PG_V|PG_KW)(%edx),%eax +#ifdef PMAP_NEW + movl RELOC(_nkpde),%ecx + shll $PGSHIFT,%ecx + addl $TABLESIZE,%ecx + addl %esi,%ecx # end of tables +#else leal (TABLESIZE)(%esi),%ecx # end of tables +#endif subl %edx,%ecx # subtract end of text shrl $PGSHIFT,%ecx fillkpt @@ -508,7 +550,14 @@ try586: /* Use the `cpuid' instruction. */ /* * Construct a page table directory. - * +*/ +#ifdef PMAP_NEW + movl RELOC(_nkpde),%ecx # count of pde s, + leal (PROC0PDIR+0*4)(%esi),%ebx # where temp maps! + leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0 + fillkpt +#else +/* * Install a PDE for temporary double map of kernel text. * Maps two pages, in case the kernel is larger than 4M. * XXX: should the number of pages to map be decided at run-time? @@ -519,18 +568,29 @@ try586: /* Use the `cpuid' instruction. */ movl %eax,(PROC0PDIR+1*4)(%esi) # map it too /* code below assumes %eax == sysmap physaddr, so we adjust it back */ subl $NBPG, %eax +#endif /* * Map kernel PDEs: this is the real mapping used * after the temp mapping outlives its usefulness. */ +#ifdef PMAP_NEW + movl RELOC(_nkpde),%ecx # count of pde s, + leal (PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx # map them high + leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0 +#else movl $NKPDE,%ecx # count of pde's leal (PROC0PDIR+KPTDI*4)(%esi),%ebx # map them high +#endif fillkpt /* Install a PDE recursively mapping page directory as a page table! */ leal (PROC0PDIR+PG_V|PG_KW)(%esi),%eax # pte for ptd +#ifdef PMAP_NEW + movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot +#else movl %eax,(PROC0PDIR+PTDPTDI*4)(%esi) # phys addr from above +#endif /* Save phys. addr of PTD, for libkvm. */ movl %esi,RELOC(_PTDpaddr) @@ -548,11 +608,27 @@ try586: /* Use the `cpuid' instruction. */ begin: /* Now running relocated at KERNBASE. Remove double mapping. */ +#ifdef PMAP_NEW + movl _nkpde,%ecx # for this many pde s, + leal (PROC0PDIR+0*4)(%esi),%ebx # which is where temp maps! + addl $(KERNBASE), %ebx # now use relocated address +1: movl $0,(%ebx) + addl $4,%ebx # next pde + loop 1b +#else movl $0,(PROC0PDIR+0*4)(%esi) movl $0,(PROC0PDIR+1*4)(%esi) +#endif /* Relocate atdevbase. */ +#ifdef PMAP_NEW + movl _nkpde,%edx + shll $PGSHIFT,%edx + addl $(TABLESIZE+KERNBASE),%edx + addl %esi,%edx +#else leal (TABLESIZE+KERNBASE)(%esi),%edx +#endif movl %edx,_atdevbase /* Set up bootstrap stack. */ @@ -562,7 +638,14 @@ begin: movl %esi,PCB_CR3(%eax) # pcb->pcb_cr3 xorl %ebp,%ebp # mark end of frames +#ifdef PMAP_NEW + movl _nkpde,%eax + shll $PGSHIFT,%eax + addl $TABLESIZE,%eax + addl %esi,%eax # skip past stack and page tables +#else leal (TABLESIZE)(%esi),%eax # skip past stack and page tables +#endif pushl %eax call _init386 # wire 386 chip for unix operation addl $4,%esp diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 77ca81809b5..059ff37241d 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.151 2001/03/16 00:24:00 deraadt Exp $ */ +/* $OpenBSD: machdep.c,v 1.152 2001/03/22 23:36:51 niklas Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -2003,7 +2003,9 @@ extern int IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), #if defined(I586_CPU) extern int IDTVEC(f00f_redirect); +#ifndef PMAP_NEW pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)); +#endif int cpu_f00f_bug = 0; @@ -2032,7 +2034,11 @@ fix_f00f() SEL_KPL, GCODE_SEL); /* Map first page RO */ +#ifdef PMAP_NEW + pte = PTE_BASE + i386_btop(va); +#else pte = pmap_pte(pmap_kernel(), va); +#endif *pte &= ~PG_RW; /* Reload idtr */ @@ -2053,6 +2059,7 @@ init386(first_avail) bios_memmap_t *im; proc0.p_addr = proc0paddr; + curpcb = &proc0.p_addr->u_pcb; /* * Initialize the I/O port and I/O mem extent maps. @@ -2422,12 +2429,14 @@ cpu_reset() lidt(®ion); __asm __volatile("divl %0,%1" : : "q" (0), "a" (0)); +#if 1 /* * Try to cause a triple fault and watchdog reset by unmapping the * entire address space. */ bzero((caddr_t)PTD, NBPG); pmap_update(); +#endif for (;;); } @@ -2685,6 +2694,9 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) { u_long pa, endpa; vm_offset_t va; +#ifdef PMAP_NEW + pt_entry_t *pte; +#endif pa = i386_trunc_page(bpa); endpa = i386_round_page(bpa + size); @@ -2715,10 +2727,19 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) * on those machines. */ if (cpu_class != CPUCLASS_386) { +#ifdef PMAP_NEW + pte = kvtopte(va); + if (cacheable) + *pte &= ~PG_N; + else + *pte |= PG_N; + pmap_update_pg(va); +#else if (!cacheable) pmap_changebit(pa, PG_N, ~0); else pmap_changebit(pa, 0, ~PG_N); +#endif } } @@ -2881,7 +2902,7 @@ _bus_dmamap_load(t, map, buf, buflen, p, flags) { bus_size_t sgsize; bus_addr_t curaddr, lastaddr, baddr, bmask; - caddr_t vaddr = buf; + vaddr_t vaddr = (vaddr_t)buf; int first, seg; pmap_t pmap; diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index b16f45a6d90..28ec9a8ab5e 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,14 +1,10 @@ -/* $OpenBSD: pmap.c,v 1.37 2001/03/22 20:44:59 niklas Exp $ */ -/* $NetBSD: pmap.c,v 1.36 1996/05/03 19:42:22 christos Exp $ */ +/* $OpenBSD: pmap.c,v 1.38 2001/03/22 23:36:51 niklas Exp $ */ +/* $NetBSD: pmap.c,v 1.84 2000/02/21 02:01:24 chs Exp $ */ /* - * Copyright (c) 1993, 1994, 1995 Charles M. Hannum. All rights reserved. - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department and William Jolitz of UUNET Technologies Inc. + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -20,1884 +16,3747 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)pmap.c 7.7 (Berkeley) 5/12/91 - */ - -/* - * Derived originally from an old hp300 version by Mike Hibler. The version - * by William Jolitz has been heavily modified to allow non-contiguous - * mapping of physical memory by Wolfgang Solfrank, and to fix several bugs - * and greatly speedup it up by Charles Hannum. - * - * A recursive map [a pde which points to the page directory] is used to map - * the page tables using the pagetables themselves. This is done to reduce - * the impact on kernel virtual memory for lots of sparse address space, and - * to reduce the cost of memory to each process. - */ - -/* - * Manages physical address maps. - * - * In addition to hardware address maps, this - * module is called upon to provide software-use-only - * maps which may or may not be stored in the same - * form as hardware maps. These pseudo-maps are - * used to store intermediate results from copy - * operations to and from address spaces. - * - * Since the information managed by this module is - * also stored by the logical address mapping module, - * this module may throw away valid virtual-to-physical - * mappings at almost any time. However, invalidations - * of virtual-to-physical mappings must be done as - * requested. - * - * In order to cope with hardware architectures which - * make virtual-to-physical map invalidates expensive, - * this module may delay invalidate or reduced protection - * operations until such time as they are actually - * necessary. This module is given full information as - * to which processors are currently using which maps, - * and to when physical maps must be made correct. + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * pmap.c: i386 pmap module rewrite + * Chuck Cranor <chuck@ccrc.wustl.edu> + * 11-Aug-97 + * + * history of this pmap module: in addition to my own input, i used + * the following references for this rewrite of the i386 pmap: + * + * [1] the NetBSD i386 pmap. this pmap appears to be based on the + * BSD hp300 pmap done by Mike Hibler at University of Utah. + * it was then ported to the i386 by William Jolitz of UUNET + * Technologies, Inc. Then Charles M. Hannum of the NetBSD + * project fixed some bugs and provided some speed ups. + * + * [2] the FreeBSD i386 pmap. this pmap seems to be the + * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson + * and David Greenman. + * + * [3] the Mach pmap. this pmap, from CMU, seems to have migrated + * between several processors. the VAX version was done by + * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 + * version was done by Lance Berc, Mike Kupfer, Bob Baron, + * David Golub, and Richard Draves. the alpha version was + * done by Alessandro Forin (CMU/Mach) and Chris Demetriou + * (NetBSD/alpha). + */ + +#ifdef __NetBSD__ +#include "opt_cputype.h" +#include "opt_user_ldt.h" +#include "opt_lockdebug.h" +#include "opt_multiprocessor.h" +#endif + #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/malloc.h> +#include <sys/pool.h> #include <sys/user.h> #include <vm/vm.h> #include <vm/vm_kern.h> #include <vm/vm_page.h> -#if defined(UVM) #include <uvm/uvm.h> -#endif #include <machine/cpu.h> +#include <machine/specialreg.h> +#include <machine/gdt.h> #include <dev/isa/isareg.h> +#ifdef __NetBSD__ +#include <machine/isa_machdep.h> +#endif +#ifdef __OpenBSD__ +#include <sys/msgbuf.h> #include <stand/boot/bootarg.h> -#include <i386/isa/isa_machdep.h> +#endif -#include "isa.h" -#include "isadma.h" +/* + * general info: + * + * - for an explanation of how the i386 MMU hardware works see + * the comments in <machine/pte.h>. + * + * - for an explanation of the general memory structure used by + * this pmap (including the recursive mapping), see the comments + * in <machine/pmap.h>. + * + * this file contains the code for the "pmap module." the module's + * job is to manage the hardware's virtual to physical address mappings. + * note that there are two levels of mapping in the VM system: + * + * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's + * to map ranges of virtual address space to objects/files. for + * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only + * to the file /bin/ls starting at offset zero." note that + * the upper layer mapping is not concerned with how individual + * vm_pages are mapped. + * + * [2] the lower layer of the VM system (the pmap) maintains the mappings + * from virtual addresses. it is concerned with which vm_page is + * mapped where. for example, when you run /bin/ls and start + * at page 0x1000 the fault routine may lookup the correct page + * of the /bin/ls file and then ask the pmap layer to establish + * a mapping for it. + * + * note that information in the lower layer of the VM system can be + * thrown away since it can easily be reconstructed from the info + * in the upper layer. + * + * data structures we use include: + * + * - struct pmap: describes the address space of one thread + * - struct pv_entry: describes one <PMAP,VA> mapping of a PA + * - struct pv_head: there is one pv_head per managed page of + * physical memory. the pv_head points to a list of pv_entry + * structures which describe all the <PMAP,VA> pairs that this + * page is mapped in. this is critical for page based operations + * such as pmap_page_protect() [change protection on _all_ mappings + * of a page] + * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's. + * if we run out of pv_entry's we allocate a new pv_page and free + * its pv_entrys. + * - pmap_remove_record: a list of virtual addresses whose mappings + * have been changed. used for TLB flushing. + */ /* - * Allocate various and sundry SYSMAPs used in the days of old VM - * and not yet converted. XXX. + * memory allocation + * + * - there are three data structures that we must dynamically allocate: + * + * [A] new process' page directory page (PDP) + * - plan 1: done at pmap_pinit() we use + * uvm_km_alloc(kernel_map, NBPG) [fka kmem_alloc] to do this + * allocation. + * + * if we are low in free physical memory then we sleep in + * uvm_km_alloc -- in this case this is ok since we are creating + * a new pmap and should not be holding any locks. + * + * if the kernel is totally out of virtual space + * (i.e. uvm_km_alloc returns NULL), then we panic. + * + * XXX: the fork code currently has no way to return an "out of + * memory, try again" error code since uvm_fork [fka vm_fork] + * is a void function. + * + * [B] new page tables pages (PTP) + * - plan 1: call uvm_pagealloc() + * => success: zero page, add to pm_pdir + * => failure: we are out of free vm_pages + * - plan 2: using a linked LIST of active pmaps we attempt + * to "steal" a PTP from another process. we lock + * the target pmap with simple_lock_try so that if it is + * busy we do not block. + * => success: remove old mappings, zero, add to pm_pdir + * => failure: highly unlikely + * - plan 3: panic + * + * note: for kernel PTPs, we start with NKPTP of them. as we map + * kernel memory (at uvm_map time) we check to see if we've grown + * the kernel pmap. if so, we call the optional function + * pmap_growkernel() to grow the kernel PTPs in advance. + * + * [C] pv_entry structures + * - plan 1: try to allocate one off the free list + * => success: done! + * => failure: no more free pv_entrys on the list + * - plan 2: try to allocate a new pv_page to add a chunk of + * pv_entrys to the free list + * [a] obtain a free, unmapped, VA in kmem_map. either + * we have one saved from a previous call, or we allocate + * one now using a "vm_map_lock_try" in uvm_map + * => success: we have an unmapped VA, continue to [b] + * => failure: unable to lock kmem_map or out of VA in it. + * move on to plan 3. + * [b] allocate a page in kmem_object for the VA + * => success: map it in, free the pv_entry's, DONE! + * => failure: kmem_object locked, no free vm_pages, etc. + * save VA for later call to [a], go to plan 3. + * - plan 3: using the pv_entry/pv_head lists find a pv_entry + * structure that is part of a non-kernel lockable pmap + * and "steal" that pv_entry by removing the mapping + * and reusing that pv_entry. + * => success: done + * => failure: highly unlikely: unable to lock and steal + * pv_entry + * - plan 4: we panic. */ -#define BSDVM_COMPAT 1 -#ifdef DEBUG -struct { - int kernel; /* entering kernel mapping */ - int user; /* entering user mapping */ - int ptpneeded; /* needed to allocate a PT page */ - int pwchange; /* no mapping change, just wiring or protection */ - int wchange; /* no mapping change, just wiring */ - int mchange; /* was mapped but mapping to different page */ - int managed; /* a managed page */ - int firstpv; /* first mapping for this PA */ - int secondpv; /* second mapping for this PA */ - int ci; /* cache inhibited */ - int unmanaged; /* not a managed page */ - int flushes; /* cache flushes */ -} enter_stats; -struct { - int calls; - int removes; - int pvfirst; - int pvsearch; - int ptinvalid; - int uflushes; - int sflushes; -} remove_stats; - -int pmapdebug = 0 /* 0xffff */; -#define PDB_FOLLOW 0x0001 -#define PDB_INIT 0x0002 -#define PDB_ENTER 0x0004 -#define PDB_REMOVE 0x0008 -#define PDB_CREATE 0x0010 -#define PDB_PTPAGE 0x0020 -#define PDB_CACHE 0x0040 -#define PDB_BITS 0x0080 -#define PDB_COLLECT 0x0100 -#define PDB_PROTECT 0x0200 -#define PDB_PDRTAB 0x0400 -#define PDB_PARANOIA 0x2000 -#define PDB_WIRING 0x4000 -#define PDB_PVDUMP 0x8000 -#endif - -/* - * Get PDEs and PTEs for user/kernel address space - */ -#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PDSHIFT)&1023])) - -/* - * Empty PTEs and PDEs are always 0, but checking only the valid bit allows - * the compiler to generate `testb' rather than `testl'. - */ -#define pmap_pde_v(pde) (*(pde) & PG_V) -#define pmap_pte_pa(pte) (*(pte) & PG_FRAME) -#define pmap_pte_w(pte) (*(pte) & PG_W) -#define pmap_pte_m(pte) (*(pte) & PG_M) -#define pmap_pte_u(pte) (*(pte) & PG_U) -#define pmap_pte_v(pte) (*(pte) & PG_V) -#define pmap_pte_set_w(pte, v) ((v) ? (*(pte) |= PG_W) : (*(pte) &= ~PG_W)) -#define pmap_pte_set_prot(pte, v) ((*(pte) &= ~PG_PROT), (*(pte) |= (v))) - -/* - * Given a map and a machine independent protection code, - * convert to a vax protection code. - */ -pt_entry_t protection_codes[8]; - -struct pmap kernel_pmap_store; - -vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss)*/ -vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ -int npages; - -boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ -TAILQ_HEAD(pv_page_list, pv_page) pv_page_freelist; -int pv_nfree; - -pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)); -struct pv_entry * pmap_alloc_pv __P((void)); -void pmap_free_pv __P((struct pv_entry *)); -void i386_protection_init __P((void)); -void pmap_collect_pv __P((void)); -__inline void pmap_remove_pv __P((pmap_t, vm_offset_t, struct pv_entry *)); -__inline void pmap_enter_pv __P((pmap_t, vm_offset_t, struct pv_entry *)); -void pmap_remove_all __P((vm_offset_t)); -void pads __P((pmap_t pm)); -void pmap_dump_pvlist __P((vm_offset_t phys, char *m)); -void pmap_pvdump __P((vm_offset_t pa)); - -#if BSDVM_COMPAT -#include <sys/msgbuf.h> +/* + * locking + * + * we have the following locks that we must contend with: + * + * "normal" locks: + * + * - pmap_main_lock + * this lock is used to prevent deadlock and/or provide mutex + * access to the pmap system. most operations lock the pmap + * structure first, then they lock the pv_lists (if needed). + * however, some operations such as pmap_page_protect lock + * the pv_lists and then lock pmaps. in order to prevent a + * cycle, we require a mutex lock when locking the pv_lists + * first. thus, the "pmap = >pv_list" lockers must gain a + * read-lock on pmap_main_lock before locking the pmap. and + * the "pv_list => pmap" lockers must gain a write-lock on + * pmap_main_lock before locking. since only one thread + * can write-lock a lock at a time, this provides mutex. + * + * "simple" locks: + * + * - pmap lock (per pmap, part of uvm_object) + * this lock protects the fields in the pmap structure including + * the non-kernel PDEs in the PDP, and the PTEs. it also locks + * in the alternate PTE space (since that is determined by the + * entry in the PDP). + * + * - pvh_lock (per pv_head) + * this lock protects the pv_entry list which is chained off the + * pv_head structure for a specific managed PA. it is locked + * when traversing the list (e.g. adding/removing mappings, + * syncing R/M bits, etc.) + * + * - pvalloc_lock + * this lock protects the data structures which are used to manage + * the free list of pv_entry structures. + * + * - pmaps_lock + * this lock protects the list of active pmaps (headed by "pmaps"). + * we lock it when adding or removing pmaps from this list. + * + * - pmap_copy_page_lock + * locks the tmp kernel PTE mappings we used to copy data + * + * - pmap_zero_page_lock + * locks the tmp kernel PTE mapping we use to zero a page + * + * - pmap_tmpptp_lock + * locks the tmp kernel PTE mapping we use to look at a PTP + * in another process + * + * XXX: would be nice to have per-CPU VAs for the above 4 + */ + +/* + * locking data structures + */ + +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) +struct lock pmap_main_lock; +simple_lock_data_t pvalloc_lock; +simple_lock_data_t pmaps_lock; +simple_lock_data_t pmap_copy_page_lock; +simple_lock_data_t pmap_zero_page_lock; +simple_lock_data_t pmap_tmpptp_lock; + +#define PMAP_MAP_TO_HEAD_LOCK() \ + spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0) +#define PMAP_MAP_TO_HEAD_UNLOCK() \ + spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) + +#define PMAP_HEAD_TO_MAP_LOCK() \ + spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *) 0) +#define PMAP_HEAD_TO_MAP_UNLOCK() \ + spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) + +#else + +#define PMAP_MAP_TO_HEAD_LOCK() /* null */ +#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ + +#define PMAP_HEAD_TO_MAP_LOCK() /* null */ +#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ + +#endif + +/* + * global data structures + */ + +struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ + +/* + * nkpde is the number of kernel PTPs allocated for the kernel at + * boot time (NKPTP is a compile time override). this number can + * grow dynamically as needed (but once allocated, we never free + * kernel PTPs). + */ + +int nkpde = NKPTP; +#ifdef NKPDE +#error "obsolete NKPDE: use NKPTP" +#endif + +/* + * pmap_pg_g: if our processor supports PG_G in the PTE then we + * set pmap_pg_g to PG_G (otherwise it is zero). + */ + +int pmap_pg_g = 0; + +/* + * i386 physical memory comes in a big contig chunk with a small + * hole toward the front of it... the following 4 paddr_t's + * (shared with machdep.c) describe the physical address space + * of this machine. + */ +paddr_t avail_start; /* PA of first available physical page */ +paddr_t avail_end; /* PA of last available physical page */ +paddr_t hole_start; /* PA of start of "hole" */ +paddr_t hole_end; /* PA of end of "hole" */ + +/* + * other data structures + */ + +static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ +static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ + +/* + * the following two vaddr_t's are used during system startup + * to keep track of how much of the kernel's VM space we have used. + * once the system is started, the management of the remaining kernel + * VM space is turned over to the kernel_map vm_map. + */ + +static vaddr_t virtual_avail; /* VA of first free KVA */ +static vaddr_t virtual_end; /* VA of last free KVA */ + + +/* + * pv_page management structures: locked by pvalloc_lock + */ + +TAILQ_HEAD(pv_pagelist, pv_page); +static struct pv_pagelist pv_freepages; /* list of pv_pages with free entrys */ +static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */ +static int pv_nfpvents; /* # of free pv entries */ +static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */ +static vaddr_t pv_cachedva; /* cached VA for later use */ + +#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */ +#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2)) + /* high water mark */ + +/* + * linked list of all non-kernel pmaps + */ + +static struct pmap_head pmaps; +static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ + +/* + * pool that pmap structures are allocated from + */ + +struct pool pmap_pmap_pool; + +/* + * special VAs and the PTEs that map them + */ + +static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; +static caddr_t csrcp, cdstp, zerop, ptpp; +caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ + +#ifdef __NetBSD__ +extern vaddr_t msgbuf_vaddr; +extern paddr_t msgbuf_paddr; + +extern vaddr_t idt_vaddr; /* we allocate IDT early */ +extern paddr_t idt_paddr; +#endif + +#if defined(I586_CPU) +/* stuff to fix the pentium f00f bug */ +extern vaddr_t pentium_idt_vaddr; +#endif + + +/* + * local prototypes + */ + +static struct pv_entry *pmap_add_pvpage __P((struct pv_page *, boolean_t)); +static struct vm_page *pmap_alloc_ptp __P((struct pmap *, int, boolean_t)); +static struct pv_entry *pmap_alloc_pv __P((struct pmap *, int)); /* see codes below */ +#define ALLOCPV_NEED 0 /* need PV now */ +#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ +#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ +static struct pv_entry *pmap_alloc_pvpage __P((struct pmap *, int)); +static void pmap_enter_pv __P((struct pv_head *, + struct pv_entry *, struct pmap *, + vaddr_t, struct vm_page *)); +static void pmap_free_pv __P((struct pmap *, struct pv_entry *)); +static void pmap_free_pvs __P((struct pmap *, struct pv_entry *)); +static void pmap_free_pv_doit __P((struct pv_entry *)); +static void pmap_free_pvpage __P((void)); +static struct vm_page *pmap_get_ptp __P((struct pmap *, int, boolean_t)); +static boolean_t pmap_is_curpmap __P((struct pmap *)); +static pt_entry_t *pmap_map_ptes __P((struct pmap *)); +static struct pv_entry *pmap_remove_pv __P((struct pv_head *, struct pmap *, + vaddr_t)); +static boolean_t pmap_remove_pte __P((struct pmap *, struct vm_page *, + pt_entry_t *, vaddr_t)); +static void pmap_remove_ptes __P((struct pmap *, + struct pmap_remove_record *, + struct vm_page *, vaddr_t, + vaddr_t, vaddr_t)); +static struct vm_page *pmap_steal_ptp __P((struct uvm_object *, + vaddr_t)); +static vaddr_t pmap_tmpmap_pa __P((paddr_t)); +static pt_entry_t *pmap_tmpmap_pvepte __P((struct pv_entry *)); +static void pmap_tmpunmap_pa __P((void)); +static void pmap_tmpunmap_pvepte __P((struct pv_entry *)); +static boolean_t pmap_transfer_ptes __P((struct pmap *, + struct pmap_transfer_location *, + struct pmap *, + struct pmap_transfer_location *, + int, boolean_t)); +static boolean_t pmap_try_steal_pv __P((struct pv_head *, + struct pv_entry *, + struct pv_entry *)); +static void pmap_unmap_ptes __P((struct pmap *)); + +void pmap_pinit __P((pmap_t)); +void pmap_release __P((pmap_t)); + +/* + * p m a p i n l i n e h e l p e r f u n c t i o n s + */ + +/* + * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? + * of course the kernel is always loaded + */ + +__inline static boolean_t +pmap_is_curpmap(pmap) + struct pmap *pmap; +{ + return((pmap == pmap_kernel()) || + (pmap->pm_pdirpa == (paddr_t) rcr3())); +} + +/* + * pmap_tmpmap_pa: map a page in for tmp usage + * + * => returns with pmap_tmpptp_lock held + */ + +__inline static vaddr_t +pmap_tmpmap_pa(pa) + paddr_t pa; +{ + simple_lock(&pmap_tmpptp_lock); +#if defined(DIAGNOSTIC) + if (*ptp_pte) + panic("pmap_tmpmap_pa: ptp_pte in use?"); +#endif + *ptp_pte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vaddr_t)ptpp); +} + +/* + * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) + * + * => we release pmap_tmpptp_lock + */ + +__inline static void +pmap_tmpunmap_pa() +{ +#if defined(DIAGNOSTIC) + if (!pmap_valid_entry(*ptp_pte)) + panic("pmap_tmpunmap_pa: our pte invalid?"); +#endif + *ptp_pte = 0; /* zap! */ + pmap_update_pg((vaddr_t)ptpp); + simple_unlock(&pmap_tmpptp_lock); +} + +/* + * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry + * + * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] + * => we may grab pmap_tmpptp_lock and return with it held + */ + +__inline static pt_entry_t * +pmap_tmpmap_pvepte(pve) + struct pv_entry *pve; +{ +#ifdef DIAGNOSTIC + if (pve->pv_pmap == pmap_kernel()) + panic("pmap_tmpmap_pvepte: attempt to map kernel"); +#endif + + /* is it current pmap? use direct mapping... */ + if (pmap_is_curpmap(pve->pv_pmap)) + return(vtopte(pve->pv_va)); + + return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp))) + + ptei((unsigned)pve->pv_va)); +} + +/* + * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte + * + * => we will release pmap_tmpptp_lock if we hold it + */ + +__inline static void +pmap_tmpunmap_pvepte(pve) + struct pv_entry *pve; +{ + /* was it current pmap? if so, return */ + if (pmap_is_curpmap(pve->pv_pmap)) + return; + + pmap_tmpunmap_pa(); +} + +/* + * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in + * + * => we lock enough pmaps to keep things locked in + * => must be undone with pmap_unmap_ptes before returning + */ + +__inline static pt_entry_t * +pmap_map_ptes(pmap) + struct pmap *pmap; +{ + pd_entry_t opde; + + /* the kernel's pmap is always accessible */ + if (pmap == pmap_kernel()) { + return(PTE_BASE); + } + + /* if curpmap then we are always mapped */ + if (pmap_is_curpmap(pmap)) { + simple_lock(&pmap->pm_obj.vmobjlock); + return(PTE_BASE); + } + + /* need to lock both curpmap and pmap: use ordered locking */ + if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) { + simple_lock(&pmap->pm_obj.vmobjlock); + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + } else { + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&pmap->pm_obj.vmobjlock); + } + + /* need to load a new alternate pt space into curpmap? */ + opde = *APDP_PDE; + if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { + *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); + if (pmap_valid_entry(opde)) + pmap_update(); + } + return(APTE_BASE); +} + +/* + * pmap_unmap_ptes: unlock the PTE mapping of "pmap" + */ + +__inline static void +pmap_unmap_ptes(pmap) + struct pmap *pmap; +{ + if (pmap == pmap_kernel()) { + return; + } + if (pmap_is_curpmap(pmap)) { + simple_unlock(&pmap->pm_obj.vmobjlock); + } else { + simple_unlock(&pmap->pm_obj.vmobjlock); + simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + } +} /* - * All those kernel PT submaps that BSD is so fond of + * p m a p k e n t e r f u n c t i o n s + * + * functions to quickly enter/remove pages from the kernel address + * space. pmap_kremove/pmap_kenter_pgs are exported to MI kernel. + * we make use of the recursive PTE mappings. */ -pt_entry_t *CMAP1, *CMAP2, *XXX_mmap; -caddr_t CADDR1, CADDR2, vmmap; -pt_entry_t *msgbufmap, *bootargmap; -#endif /* BSDVM_COMPAT */ /* - * Bootstrap the system enough to run with virtual memory. - * Map the kernel's code and data, and allocate the system page table. + * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking + * + * => no need to lock anything, assume va is already allocated + * => should be faster than normal pmap enter function + */ + +void +pmap_kenter_pa(va, pa, prot) + vaddr_t va; + paddr_t pa; + vm_prot_t prot; +{ + pt_entry_t *pte, opte; + + pte = vtopte(va); + opte = *pte; + *pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | + PG_V | pmap_pg_g; /* zap! */ + if (pmap_valid_entry(opte)) + pmap_update_pg(va); +} + +/* + * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking * - * On the I386 this is called after mapping has already been enabled - * and just syncs the pmap module with what has already been done. - * [We can't call it easily with mapping off since the kernel is not - * mapped with PA == VA, hence we would have to relocate every address - * from the linked base (virtual) address to the actual (physical) - * address starting relative to 0] + * => no need to lock anything + * => caller must dispose of any vm_page mapped in the va range + * => note: not an inline function + * => we assume the va is page aligned and the len is a multiple of NBPG + * => we assume kernel only unmaps valid addresses and thus don't bother + * checking the valid bit before doing TLB flushing */ void -pmap_bootstrap(virtual_start) - vm_offset_t virtual_start; +pmap_kremove(va, len) + vaddr_t va; + vsize_t len; { -#if BSDVM_COMPAT - vm_offset_t va; pt_entry_t *pte; + + len >>= PAGE_SHIFT; + for ( /* null */ ; len ; len--, va += NBPG) { + pte = vtopte(va); +#ifdef DIAGNOSTIC + if (*pte & PG_PVLIST) + panic("pmap_kremove: PG_PVLIST mapping for 0x%lx\n", + va); +#endif + *pte = 0; /* zap! */ +#if defined(I386_CPU) + if (cpu_class != CPUCLASS_386) #endif + pmap_update_pg(va); + } +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + pmap_update(); +#endif +} + +/* + * pmap_kenter_pgs: enter in a number of vm_pages + */ + +void +pmap_kenter_pgs(va, pgs, npgs) + vaddr_t va; + struct vm_page **pgs; + int npgs; +{ + pt_entry_t *pte, opte; + int lcv; + vaddr_t tva; +#if defined(I386_CPU) + boolean_t need_update = FALSE; +#endif + + for (lcv = 0 ; lcv < npgs ; lcv++) { + tva = va + lcv * NBPG; + pte = vtopte(tva); + opte = *pte; + *pte = VM_PAGE_TO_PHYS(pgs[lcv]) | PG_RW | PG_V | pmap_pg_g; +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + if (pmap_valid_entry(opte)) + need_update = TRUE; + continue; + } +#endif + if (pmap_valid_entry(opte)) + pmap_update_pg(tva); + } +#if defined(I386_CPU) + if (need_update && cpu_class == CPUCLASS_386) + pmap_update(); +#endif +} + +/* + * p m a p i n i t f u n c t i o n s + * + * pmap_bootstrap and pmap_init are called during system startup + * to init the pmap module. pmap_bootstrap() does a low level + * init just to get things rolling. pmap_init() finishes the job. + */ + +/* + * pmap_bootstrap: get the system in a state where it can run with VM + * properly enabled (called before main()). the VM system is + * fully init'd later... + * + * => on i386, locore.s has already enabled the MMU by allocating + * a PDP for the kernel, and nkpde PTP's for the kernel. + * => kva_start is the first free virtual address in kernel space + * => we make use of the global vars from machdep.c: + * avail_start, avail_end, hole_start, hole_end + */ + +void +pmap_bootstrap(kva_start) + vaddr_t kva_start; +{ + struct pmap *kpm; + vaddr_t kva; + pt_entry_t *pte; +#ifdef __NetBSD__ + int first16q; +#endif + + /* + * set the page size (default value is 4K which is ok) + */ - /* Register the page size with the vm system */ -#if defined(UVM) uvm_setpagesize(); -#else - vm_set_page_size(); + + /* + * a quick sanity check + */ + + if (PAGE_SIZE != NBPG) + panic("pmap_bootstrap: PAGE_SIZE != NBPG"); + + /* + * use the very last page of physical memory for the message buffer + */ + + avail_end -= i386_round_page(MSGBUFSIZE); +#ifdef __NetBSD__ + msgbuf_paddr = avail_end; #endif - virtual_avail = virtual_start; - virtual_end = VM_MAX_KERNEL_ADDRESS; +#ifdef __OpenBSD__ + /* + * The arguments passed in from /boot needs space too. + */ + avail_end -= i386_round_page(bootargc); +#endif + + /* + * set up our local static global vars that keep track of the + * usage of KVM before kernel_map is set up + */ + + virtual_avail = kva_start; /* first free KVA */ + virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ + + /* + * set up protection_codes: we need to be able to convert from + * a MI protection code (some combo of VM_PROT...) to something + * we can jam into a i386 PTE. + */ + + protection_codes[VM_PROT_NONE] = 0; /* --- */ + protection_codes[VM_PROT_EXECUTE] = PG_RO; /* --x */ + protection_codes[VM_PROT_READ] = PG_RO; /* -r- */ + protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO; /* -rx */ + protection_codes[VM_PROT_WRITE] = PG_RW; /* w-- */ + protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW;/* w-x */ + protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW; /* wr- */ + protection_codes[VM_PROT_ALL] = PG_RW; /* wrx */ + + /* + * now we init the kernel's pmap + * + * the kernel pmap's pm_obj is not used for much. however, in + * user pmaps the pm_obj contains the list of active PTPs. + * the pm_obj currently does not have a pager. it might be possible + * to add a pager that would allow a process to read-only mmap its + * own page tables (fast user level vtophys?). this may or may not + * be useful. + */ + + kpm = pmap_kernel(); + simple_lock_init(&kpm->pm_obj.vmobjlock); + kpm->pm_obj.pgops = NULL; + TAILQ_INIT(&kpm->pm_obj.memq); + kpm->pm_obj.uo_npages = 0; + kpm->pm_obj.uo_refs = 1; + bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ + kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); + kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3; + kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = + i386_btop(kva_start - VM_MIN_KERNEL_ADDRESS); /* - * Initialize protection array. + * the above is just a rough estimate and not critical to the proper + * operation of the system. */ - i386_protection_init(); -#ifdef notdef + curpcb->pcb_pmap = kpm; /* proc0's pcb */ + /* - * Create Kernel page directory table and page maps. - * [ currently done in locore. i have wild and crazy ideas -wfj ] + * enable global TLB entries if they are supported */ - bzero(firstaddr, (1+NKPDE)*NBPG); - pmap_kernel()->pm_pdir = firstaddr + VM_MIN_KERNEL_ADDRESS; - pmap_kernel()->pm_ptab = firstaddr + VM_MIN_KERNEL_ADDRESS + NBPG; - - firstaddr += NBPG; - for (x = i386_btod(VM_MIN_KERNEL_ADDRESS); - x < i386_btod(VM_MIN_KERNEL_ADDRESS) + NKPDE; x++) { - pd_entry_t *pde; - pde = pmap_kernel()->pm_pdir + x; - *pde = (firstaddr + x*NBPG) | PG_V | PG_KW; + + if (cpu_feature & CPUID_PGE) { + lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */ + pmap_pg_g = PG_G; /* enable software */ + + /* add PG_G attribute to already mapped kernel pages */ + for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ; + kva += NBPG) + if (pmap_valid_entry(PTE_BASE[i386_btop(kva)])) + PTE_BASE[i386_btop(kva)] |= PG_G; } -#else - pmap_kernel()->pm_pdir = - (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); + + /* + * now we allocate the "special" VAs which are used for tmp mappings + * by the pmap (and other modules). we allocate the VAs by advancing + * virtual_avail (note that there are no pages mapped at these VAs). + * we find the PTE that maps the allocated VA via the linear PTE + * mapping. + */ + + pte = PTE_BASE + i386_btop(virtual_avail); + + csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ + virtual_avail += NBPG; pte++; /* advance */ + + cdstp = (caddr_t) virtual_avail; cdst_pte = pte; + virtual_avail += NBPG; pte++; + + zerop = (caddr_t) virtual_avail; zero_pte = pte; + virtual_avail += NBPG; pte++; + + ptpp = (caddr_t) virtual_avail; ptp_pte = pte; + virtual_avail += NBPG; pte++; + + /* XXX: vmmap used by mem.c... should be uvm_map_reserve */ + vmmap = (char *)virtual_avail; /* don't need pte */ + virtual_avail += NBPG; pte++; + +#ifdef __NetBSD + msgbuf_vaddr = virtual_avail; /* don't need pte */ #endif +#ifdef __OpenBSD__ + msgbufp = (struct msgbuf *)virtual_avail; /* don't need pte */ +#endif + virtual_avail += round_page(MSGBUFSIZE); pte++; - simple_lock_init(&pmap_kernel()->pm_lock); - pmap_kernel()->pm_count = 1; +#ifdef __NetBSD__ + idt_vaddr = virtual_avail; /* don't need pte */ + virtual_avail += NBPG; pte++; + avail_end -= NBPG; + idt_paddr = avail_end; + +#if defined(I586_CPU) + /* pentium f00f bug stuff */ + pentium_idt_vaddr = virtual_avail; /* don't need pte */ + virtual_avail += NBPG; pte++; +#endif +#endif + +#ifdef __OpenBSD__ + bootargp = (bootarg_t *)virtual_avail; + virtual_avail += round_page(bootargc); pte++; +#endif -#if BSDVM_COMPAT /* - * Allocate all the submaps we need + * now we reserve some VM for mapping pages when doing a crash dump */ -#define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); - va = virtual_avail; - pte = pmap_pte(pmap_kernel(), va); + virtual_avail = reserve_dumppages(virtual_avail); + + /* + * init the static-global locks and global lists. + */ - SYSMAP(caddr_t ,CMAP1 ,CADDR1 ,1 ) - SYSMAP(caddr_t ,CMAP2 ,CADDR2 ,1 ) - SYSMAP(caddr_t ,XXX_mmap ,vmmap ,1 ) - SYSMAP(struct msgbuf * ,msgbufmap ,msgbufp ,btoc(MSGBUFSIZE)) - SYSMAP(bootarg_t * ,bootargmap ,bootargp ,btoc(bootargc)) - virtual_avail = va; +#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) + spinlockinit(&pmap_main_lock, "pmaplk", 0); + simple_lock_init(&pvalloc_lock); + simple_lock_init(&pmaps_lock); + simple_lock_init(&pmap_copy_page_lock); + simple_lock_init(&pmap_zero_page_lock); + simple_lock_init(&pmap_tmpptp_lock); #endif + LIST_INIT(&pmaps); + TAILQ_INIT(&pv_freepages); + TAILQ_INIT(&pv_unusedpgs); /* - * Reserve pmap space for mapping physical pages during dump. + * initialize the pmap pool. */ - virtual_avail = reserve_dumppages(virtual_avail); - /* flawed, no mappings?? */ - if (ctob(physmem) > 31*1024*1024 && MAXKPDE != NKPDE) { - vm_offset_t p; - int i; - - p = virtual_avail; - virtual_avail += (MAXKPDE-NKPDE+1) * NBPG; - bzero((void *)p, (MAXKPDE-NKPDE+1) * NBPG); - p = round_page(p); - for (i = NKPDE; i < MAXKPDE; i++, p += NBPG) - PTD[KPTDI+i] = (pd_entry_t)p | - PG_V | PG_KW; + pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", + 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP); + +#ifdef __NetBSD__ + /* + * we must call uvm_page_physload() after we are done playing with + * virtual_avail but before we call pmap_steal_memory. [i.e. here] + * this call tells the VM system how much physical memory it + * controls. If we have 16M of RAM or less, just put it all on + * the default free list. Otherwise, put the first 16M of RAM + * on a lower priority free list (so that all of the ISA DMA'able + * memory won't be eaten up first-off). + */ + + if (avail_end <= (16 * 1024 * 1024)) + first16q = VM_FREELIST_DEFAULT; + else + first16q = VM_FREELIST_FIRST16; + + if (avail_start < hole_start) /* any free memory before the hole? */ + uvm_page_physload(atop(avail_start), atop(hole_start), + atop(avail_start), atop(hole_start), + first16q); + + if (first16q != VM_FREELIST_DEFAULT && + hole_end < 16 * 1024 * 1024) { + uvm_page_physload(atop(hole_end), atop(16 * 1024 * 1024), + atop(hole_end), atop(16 * 1024 * 1024), + first16q); + uvm_page_physload(atop(16 * 1024 * 1024), atop(avail_end), + atop(16 * 1024 * 1024), atop(avail_end), + VM_FREELIST_DEFAULT); + } else { + uvm_page_physload(atop(hole_end), atop(avail_end), + atop(hole_end), atop(avail_end), + VM_FREELIST_DEFAULT); } -} +#endif -void -pmap_virtual_space(startp, endp) - vm_offset_t *startp; - vm_offset_t *endp; -{ - *startp = virtual_avail; - *endp = virtual_end; + /* + * ensure the TLB is sync'd with reality by flushing it... + */ + + pmap_update(); } /* - * Initialize the pmap module. - * Called by vm_init, to initialize any structures that the pmap - * system needs to map virtual memory. + * pmap_init: called from uvm_init, our job is to get the pmap + * system ready to manage mappings... this mainly means initing + * the pv_entry stuff. */ + void pmap_init() { - vm_offset_t addr; - vm_size_t s; - int lcv; + int npages, lcv; + vaddr_t addr; + vsize_t s; - if (PAGE_SIZE != NBPG) - panic("pmap_init: CLSIZE != 1"); + /* + * compute the number of pages we have and then allocate RAM + * for each pages' pv_head and saved attributes. + */ npages = 0; - for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) + for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) npages += (vm_physmem[lcv].end - vm_physmem[lcv].start); - s = (vm_size_t) (sizeof(struct pv_entry) * npages + npages); - s = round_page(s); -#if defined(UVM) - addr = (vm_offset_t) uvm_km_zalloc(kernel_map, s); + s = (vsize_t) (sizeof(struct pv_head) * npages + + sizeof(char) * npages); + s = round_page(s); /* round up */ + addr = (vaddr_t) uvm_km_zalloc(kernel_map, s); if (addr == NULL) - panic("pmap_init"); -#else - addr = (vm_offset_t) kmem_alloc(kernel_map, s); -#endif + panic("pmap_init: unable to allocate pv_heads"); - /* allocate pv_entry stuff first */ + /* + * init all pv_head's and attrs in one bzero + */ + + /* allocate pv_head stuff first */ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { - vm_physmem[lcv].pmseg.pvent = (struct pv_entry *) addr; - addr = (vm_offset_t)(vm_physmem[lcv].pmseg.pvent + - (vm_physmem[lcv].end - vm_physmem[lcv].start)); + vm_physmem[lcv].pmseg.pvhead = (struct pv_head *) addr; + addr = (vaddr_t)(vm_physmem[lcv].pmseg.pvhead + + (vm_physmem[lcv].end - vm_physmem[lcv].start)); } - /* allocate attrs next */ + + /* now allocate attrs */ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { vm_physmem[lcv].pmseg.attrs = (char *) addr; - addr = (vm_offset_t)(vm_physmem[lcv].pmseg.attrs + - (vm_physmem[lcv].end - vm_physmem[lcv].start)); + addr = (vaddr_t)(vm_physmem[lcv].pmseg.attrs + + (vm_physmem[lcv].end - vm_physmem[lcv].start)); } - TAILQ_INIT(&pv_page_freelist); -#ifdef DEBUG - if (pmapdebug & PDB_INIT) - printf("pmap_init: %lx bytes (%x pgs)\n", - s, npages); -#endif + /* + * now we need to free enough pv_entry structures to allow us to get + * the kmem_map/kmem_object allocated and inited (done after this + * function is finished). to do this we allocate one bootstrap page out + * of kernel_map and use it to provide an initial pool of pv_entry + * structures. we never free this page. + */ + + pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, NBPG); + if (pv_initpage == NULL) + panic("pmap_init: pv_initpage"); + pv_cachedva = NULL; /* a VA we have allocated but not used yet */ + pv_nfpvents = 0; + (void) pmap_add_pvpage(pv_initpage, FALSE); /* - * Now it is safe to enable pv_entry recording. + * done: pmap module is up (and ready for business) */ + pmap_initialized = TRUE; } -struct pv_entry * -pmap_alloc_pv() +/* + * p v _ e n t r y f u n c t i o n s + */ + +/* + * pv_entry allocation functions: + * the main pv_entry allocation functions are: + * pmap_alloc_pv: allocate a pv_entry structure + * pmap_free_pv: free one pv_entry + * pmap_free_pvs: free a list of pv_entrys + * + * the rest are helper functions + */ + +/* + * pmap_alloc_pv: inline function to allocate a pv_entry structure + * => we lock pvalloc_lock + * => if we fail, we call out to pmap_alloc_pvpage + * => 3 modes: + * ALLOCPV_NEED = we really need a pv_entry, even if we have to steal it + * ALLOCPV_TRY = we want a pv_entry, but not enough to steal + * ALLOCPV_NONEED = we are trying to grow our free list, don't really need + * one now + * + * "try" is for optional functions like pmap_copy(). + */ + +__inline static struct pv_entry * +pmap_alloc_pv(pmap, mode) + struct pmap *pmap; + int mode; { - struct pv_page *pvp; + struct pv_page *pvpage; struct pv_entry *pv; - int i; - if (pv_nfree == 0) { -#if defined(UVM) - /* NOTE: can't lock kernel_map here */ - MALLOC(pvp, struct pv_page *, NBPG, M_VMPVENT, M_WAITOK); -#else - pvp = (struct pv_page *)kmem_alloc(kernel_map, NBPG); -#endif - if (pvp == 0) - panic("pmap_alloc_pv: kmem_alloc() failed"); - pvp->pvp_pgi.pgi_freelist = pv = &pvp->pvp_pv[1]; - for (i = NPVPPG - 2; i; i--, pv++) - pv->pv_next = pv + 1; - pv->pv_next = 0; - pv_nfree += pvp->pvp_pgi.pgi_nfree = NPVPPG - 1; - TAILQ_INSERT_HEAD(&pv_page_freelist, pvp, pvp_pgi.pgi_list); - pv = &pvp->pvp_pv[0]; - } else { - --pv_nfree; - pvp = pv_page_freelist.tqh_first; - if (--pvp->pvp_pgi.pgi_nfree == 0) { - TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list); + simple_lock(&pvalloc_lock); + + if (pv_freepages.tqh_first != NULL) { + pvpage = pv_freepages.tqh_first; + pvpage->pvinfo.pvpi_nfree--; + if (pvpage->pvinfo.pvpi_nfree == 0) { + /* nothing left in this one? */ + TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list); } - pv = pvp->pvp_pgi.pgi_freelist; + pv = pvpage->pvinfo.pvpi_pvfree; #ifdef DIAGNOSTIC - if (pv == 0) - panic("pmap_alloc_pv: pgi_nfree inconsistent"); + if (pv == NULL) + panic("pmap_alloc_pv: pvpi_nfree off"); #endif - pvp->pvp_pgi.pgi_freelist = pv->pv_next; + pvpage->pvinfo.pvpi_pvfree = pv->pv_next; + pv_nfpvents--; /* took one from pool */ + } else { + pv = NULL; /* need more of them */ } - return pv; -} -void -pmap_free_pv(pv) - struct pv_entry *pv; -{ - register struct pv_page *pvp; - - pvp = (struct pv_page *) trunc_page(pv); - switch (++pvp->pvp_pgi.pgi_nfree) { - case 1: - TAILQ_INSERT_TAIL(&pv_page_freelist, pvp, pvp_pgi.pgi_list); - default: - pv->pv_next = pvp->pvp_pgi.pgi_freelist; - pvp->pvp_pgi.pgi_freelist = pv; - ++pv_nfree; - break; - case NPVPPG: - pv_nfree -= NPVPPG - 1; - TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list); -#if defined(UVM) - FREE((vaddr_t) pvp, M_VMPVENT); -#else - kmem_free(kernel_map, (vm_offset_t)pvp, NBPG); -#endif - break; + /* + * if below low water mark or we didn't get a pv_entry we try and + * create more pv_entrys ... + */ + + if (pv_nfpvents < PVE_LOWAT || pv == NULL) { + if (pv == NULL) + pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ? + mode : ALLOCPV_NEED); + else + (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED); } + + simple_unlock(&pvalloc_lock); + return(pv); } -void -pmap_collect_pv() +/* + * pmap_alloc_pvpage: maybe allocate a new pvpage + * + * if need_entry is false: try and allocate a new pv_page + * if need_entry is true: try and allocate a new pv_page and return a + * new pv_entry from it. if we are unable to allocate a pv_page + * we make a last ditch effort to steal a pv_page from some other + * mapping. if that fails, we panic... + * + * => we assume that the caller holds pvalloc_lock + */ + +static struct pv_entry * +pmap_alloc_pvpage(pmap, mode) + struct pmap *pmap; + int mode; { - struct pv_page_list pv_page_collectlist; - struct pv_page *pvp, *npvp; - struct pv_entry *ph, *ppv, *pv, *npv; - int s; - int bank, off; + struct vm_page *pg; + struct pv_page *pvpage; + int lcv, idx, npg, s; + struct pv_entry *pv, *cpv, *prevpv; + + /* + * if we need_entry and we've got unused pv_pages, allocate from there + */ - TAILQ_INIT(&pv_page_collectlist); + if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) { - for (pvp = pv_page_freelist.tqh_first; pvp; pvp = npvp) { - if (pv_nfree < NPVPPG) - break; - npvp = pvp->pvp_pgi.pgi_list.tqe_next; - if (pvp->pvp_pgi.pgi_nfree > NPVPPG / 3) { - TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list); - TAILQ_INSERT_TAIL(&pv_page_collectlist, pvp, pvp_pgi.pgi_list); - pv_nfree -= pvp->pvp_pgi.pgi_nfree; - pvp->pvp_pgi.pgi_nfree = -1; + /* move it to pv_freepages list */ + pvpage = pv_unusedpgs.tqh_first; + TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list); + TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list); + + /* allocate a pv_entry */ + pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */ + pv = pvpage->pvinfo.pvpi_pvfree; +#ifdef DIAGNOSTIC + if (pv == NULL) + panic("pmap_alloc_pvpage: pvpi_nfree off"); +#endif + pvpage->pvinfo.pvpi_pvfree = pv->pv_next; + + pv_nfpvents--; /* took one from pool */ + return(pv); + } + + /* + * see if we've got a cached unmapped VA that we can map a page in. + * if not, try to allocate one. + */ + + s = splimp(); /* must protect kmem_map/kmem_object with splimp! */ + if (pv_cachedva == NULL) { + pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object, + NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC); + if (pv_cachedva == NULL) { + splx(s); + goto steal_one; } } - if (pv_page_collectlist.tqh_first == 0) - return; + /* + * we have a VA, now let's try and allocate a page in the object + * note: we are still holding splimp to protect kmem_object + */ - if ((bank = vm_physseg_find(atop(0), &off)) == -1) { - printf("INVALID PA!"); - return; + if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) { + splx(s); + goto steal_one; } - for (ph = &vm_physmem[bank].pmseg.pvent[off]; ph; ph = ph->pv_next) { - if (ph->pv_pmap == 0) - continue; - s = splimp(); - for (ppv = ph; (pv = ppv->pv_next) != 0; ) { - pvp = (struct pv_page *) trunc_page(pv); - if (pvp->pvp_pgi.pgi_nfree == -1) { - pvp = pv_page_freelist.tqh_first; - if (--pvp->pvp_pgi.pgi_nfree == 0) { - TAILQ_REMOVE(&pv_page_freelist, pvp, pvp_pgi.pgi_list); - } - npv = pvp->pvp_pgi.pgi_freelist; -#ifdef DIAGNOSTIC - if (npv == 0) - panic("pmap_collect_pv: pgi_nfree inconsistent"); -#endif - pvp->pvp_pgi.pgi_freelist = npv->pv_next; - *npv = *pv; - ppv->pv_next = npv; - ppv = npv; - } else - ppv = pv; + pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva - + vm_map_min(kernel_map), + NULL, UVM_PGA_USERESERVE); + if (pg) + pg->flags &= ~PG_BUSY; /* never busy */ + + simple_unlock(&uvmexp.kmem_object->vmobjlock); + splx(s); + /* splimp now dropped */ + + if (pg == NULL) + goto steal_one; + + /* + * add a mapping for our new pv_page and free its entrys (save one!) + * + * NOTE: If we are allocating a PV page for the kernel pmap, the + * pmap is already locked! (...but entering the mapping is safe...) + */ + + pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL); + pvpage = (struct pv_page *) pv_cachedva; + pv_cachedva = NULL; + return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED)); + +steal_one: + /* + * if we don't really need a pv_entry right now, we can just return. + */ + + if (mode != ALLOCPV_NEED) + return(NULL); + + /* + * last ditch effort! we couldn't allocate a free page to make + * more pv_entrys so we try and steal one from someone else. + */ + + pv = NULL; + for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) { + npg = vm_physmem[lcv].end - vm_physmem[lcv].start; + for (idx = 0 ; idx < npg ; idx++) { + struct pv_head *pvhead = vm_physmem[lcv].pmseg.pvhead; + + if (pvhead->pvh_list == NULL) + continue; /* spot check */ + if (!simple_lock_try(&pvhead->pvh_lock)) + continue; + cpv = prevpv = pvhead->pvh_list; + while (cpv) { + if (pmap_try_steal_pv(pvhead, cpv, prevpv)) + break; + prevpv = cpv; + cpv = cpv->pv_next; + } + simple_unlock(&pvhead->pvh_lock); + /* got one? break out of the loop! */ + if (cpv) { + pv = cpv; + break; + } } - splx(s); } - for (pvp = pv_page_collectlist.tqh_first; pvp; pvp = npvp) { - npvp = pvp->pvp_pgi.pgi_list.tqe_next; -#if defined(UVM) - FREE((vaddr_t) pvp, M_VMPVENT); -#else - kmem_free(kernel_map, (vm_offset_t)pvp, NBPG); -#endif + return(pv); +} + +/* + * pmap_try_steal_pv: try and steal a pv_entry from a pmap + * + * => return true if we did it! + */ + +static boolean_t +pmap_try_steal_pv(pvh, cpv, prevpv) + struct pv_head *pvh; + struct pv_entry *cpv, *prevpv; +{ + pt_entry_t *ptep; /* pointer to a PTE */ + + /* + * we never steal kernel mappings or mappings from pmaps we can't lock + */ + + if (cpv->pv_pmap == pmap_kernel() || + !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock)) + return(FALSE); + + /* + * yes, we can try and steal it. first we need to remove the + * mapping from the pmap. + */ + + ptep = pmap_tmpmap_pvepte(cpv); + if (*ptep & PG_W) { + ptep = NULL; /* wired page, avoid stealing this one */ + } else { + *ptep = 0; /* zap! */ + if (pmap_is_curpmap(cpv->pv_pmap)) + pmap_update_pg(cpv->pv_va); + pmap_tmpunmap_pvepte(cpv); + } + if (ptep == NULL) { + simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); + return(FALSE); /* wired page, abort! */ + } + cpv->pv_pmap->pm_stats.resident_count--; + if (cpv->pv_ptp && cpv->pv_ptp->wire_count) + /* drop PTP's wired count */ + cpv->pv_ptp->wire_count--; + + /* + * XXX: if wire_count goes to one the PTP could be freed, however, + * we'd have to lock the page queues (etc.) to do that and it could + * cause deadlock headaches. besides, the pmap we just stole from + * may want the mapping back anyway, so leave the PTP around. + */ + + /* + * now we need to remove the entry from the pvlist + */ + + if (cpv == pvh->pvh_list) + pvh->pvh_list = cpv->pv_next; + else + prevpv->pv_next = cpv->pv_next; + return(TRUE); +} + +/* + * pmap_add_pvpage: add a pv_page's pv_entrys to the free list + * + * => caller must hold pvalloc_lock + * => if need_entry is true, we allocate and return one pv_entry + */ + +static struct pv_entry * +pmap_add_pvpage(pvp, need_entry) + struct pv_page *pvp; + boolean_t need_entry; +{ + int tofree, lcv; + + /* do we need to return one? */ + tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE; + + pvp->pvinfo.pvpi_pvfree = NULL; + pvp->pvinfo.pvpi_nfree = tofree; + for (lcv = 0 ; lcv < tofree ; lcv++) { + pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree; + pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv]; } + if (need_entry) + TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list); + else + TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list); + pv_nfpvents += tofree; + return((need_entry) ? &pvp->pvents[lcv] : NULL); } -__inline void -pmap_enter_pv(pmap, va, pv) - register pmap_t pmap; - vm_offset_t va; +/* + * pmap_free_pv_doit: actually free a pv_entry + * + * => do not call this directly! instead use either + * 1. pmap_free_pv ==> free a single pv_entry + * 2. pmap_free_pvs => free a list of pv_entrys + * => we must be holding pvalloc_lock + */ + +__inline static void +pmap_free_pv_doit(pv) struct pv_entry *pv; -{ - register struct pv_entry *npv; - int s; +{ + struct pv_page *pvp; - if (!pmap_initialized) - return; + pvp = (struct pv_page *) i386_trunc_page(pv); + pv_nfpvents++; + pvp->pvinfo.pvpi_nfree++; -#ifdef DEBUG - if (pmapdebug & PDB_ENTER) - printf("pmap_enter_pv: pv %x: %x/%x/%x\n", - pv, pv->pv_va, pv->pv_pmap, pv->pv_next); -#endif - s = splimp(); + /* nfree == 1 => fully allocated page just became partly allocated */ + if (pvp->pvinfo.pvpi_nfree == 1) { + TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list); + } - if (pv->pv_pmap == NULL) { - /* - * No entries yet, use header as the first entry - */ -#ifdef DEBUG - enter_stats.firstpv++; -#endif - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_next = NULL; - } else { - /* - * There is at least one other VA mapping this page. - * Place this entry after the header. - */ -#ifdef DEBUG - for (npv = pv; npv; npv = npv->pv_next) - if (pmap == npv->pv_pmap && va == npv->pv_va) - panic("pmap_enter_pv: already in pv_tab"); -#endif - npv = pmap_alloc_pv(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; -#ifdef DEBUG - if (!npv->pv_next) - enter_stats.secondpv++; -#endif + /* free it */ + pv->pv_next = pvp->pvinfo.pvpi_pvfree; + pvp->pvinfo.pvpi_pvfree = pv; + + /* + * are all pv_page's pv_entry's free? move it to unused queue. + */ + + if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) { + TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list); + TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list); } - splx(s); } -__inline void -pmap_remove_pv(pmap, va, pv) - register pmap_t pmap; - vm_offset_t va; +/* + * pmap_free_pv: free a single pv_entry + * + * => we gain the pvalloc_lock + */ + +__inline static void +pmap_free_pv(pmap, pv) + struct pmap *pmap; struct pv_entry *pv; -{ - register struct pv_entry *npv; - int s; +{ + simple_lock(&pvalloc_lock); + pmap_free_pv_doit(pv); /* - * Remove from the PV table (raise IPL since we - * may be called at interrupt time). + * Can't free the PV page if the PV entries were associated with + * the kernel pmap; the pmap is already locked. */ - s = splimp(); + if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL && + pmap != pmap_kernel()) + pmap_free_pvpage(); + + simple_unlock(&pvalloc_lock); +} + +/* + * pmap_free_pvs: free a list of pv_entrys + * + * => we gain the pvalloc_lock + */ + +__inline static void +pmap_free_pvs(pmap, pvs) + struct pmap *pmap; + struct pv_entry *pvs; +{ + struct pv_entry *nextpv; + + simple_lock(&pvalloc_lock); + + for ( /* null */ ; pvs != NULL ; pvs = nextpv) { + nextpv = pvs->pv_next; + pmap_free_pv_doit(pvs); + } /* - * If it is the first entry on the list, it is actually - * in the header and we must copy the following entry up - * to the header. Otherwise we must search the list for - * the entry. In either case we free the now unused entry. + * Can't free the PV page if the PV entries were associated with + * the kernel pmap; the pmap is already locked. */ - if (pmap == pv->pv_pmap && va == pv->pv_va) { - npv = pv->pv_next; - if (npv) { - *pv = *npv; - pmap_free_pv(npv); - } else - pv->pv_pmap = NULL; - } else { - for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) { - if (pmap == npv->pv_pmap && va == npv->pv_va) - break; - } - if (npv) { - pv->pv_next = npv->pv_next; - pmap_free_pv(npv); - } + if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL && + pmap != pmap_kernel()) + pmap_free_pvpage(); + + simple_unlock(&pvalloc_lock); +} + + +/* + * pmap_free_pvpage: try and free an unused pv_page structure + * + * => assume caller is holding the pvalloc_lock and that + * there is a page on the pv_unusedpgs list + * => if we can't get a lock on the kmem_map we try again later + * => note: analysis of MI kmem_map usage [i.e. malloc/free] shows + * that if we can lock the kmem_map then we are not already + * holding kmem_object's lock. + */ + +static void +pmap_free_pvpage() +{ + int s; + struct vm_map *map; + vm_map_entry_t dead_entries; + struct pv_page *pvp; + + s = splimp(); /* protect kmem_map */ + + pvp = pv_unusedpgs.tqh_first; + + /* + * note: watch out for pv_initpage which is allocated out of + * kernel_map rather than kmem_map. + */ + if (pvp == pv_initpage) + map = kernel_map; + else + map = kmem_map; + + if (vm_map_lock_try(map)) { + + /* remove pvp from pv_unusedpgs */ + TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list); + + /* unmap the page */ + dead_entries = NULL; + (void)uvm_unmap_remove(map, (vaddr_t) pvp, + ((vaddr_t) pvp) + NBPG, &dead_entries); + vm_map_unlock(map); + + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); + + pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */ } + + if (pvp == pv_initpage) + /* no more initpage, we've freed it */ + pv_initpage = NULL; + splx(s); } /* - * Used to map a range of physical addresses into kernel - * virtual address space. + * main pv_entry manipulation functions: + * pmap_enter_pv: enter a mapping onto a pv_head list + * pmap_remove_pv: remove a mappiing from a pv_head list * - * For now, VM is already on, we only need to map the - * specified memory. + * NOTE: pmap_enter_pv expects to lock the pvh itself + * pmap_remove_pv expects te caller to lock the pvh before calling */ -vm_offset_t -pmap_map(va, spa, epa, prot) - vm_offset_t va, spa, epa; - int prot; + +/* + * pmap_enter_pv: enter a mapping onto a pv_head lst + * + * => caller should hold the proper lock on pmap_main_lock + * => caller should have pmap locked + * => we will gain the lock on the pv_head and allocate the new pv_entry + * => caller should adjust ptp's wire_count before calling + */ + +__inline static void +pmap_enter_pv(pvh, pve, pmap, va, ptp) + struct pv_head *pvh; + struct pv_entry *pve; /* preallocated pve for us to use */ + struct pmap *pmap; + vaddr_t va; + struct vm_page *ptp; /* PTP in pmap that maps this VA */ { + pve->pv_pmap = pmap; + pve->pv_va = va; + pve->pv_ptp = ptp; /* NULL for kernel pmap */ + simple_lock(&pvh->pvh_lock); /* lock pv_head */ + pve->pv_next = pvh->pvh_list; /* add to ... */ + pvh->pvh_list = pve; /* ... locked list */ + simple_unlock(&pvh->pvh_lock); /* unlock, done! */ +} -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_map(%x, %x, %x, %x)\n", va, spa, epa, prot); -#endif +/* + * pmap_remove_pv: try to remove a mapping from a pv_list + * + * => caller should hold proper lock on pmap_main_lock + * => pmap should be locked + * => caller should hold lock on pv_head [so that attrs can be adjusted] + * => caller should adjust ptp's wire_count and free PTP if needed + * => we return the removed pve + */ - while (spa < epa) { - pmap_enter(pmap_kernel(), va, spa, prot, FALSE, 0); - va += NBPG; - spa += NBPG; +__inline static struct pv_entry * +pmap_remove_pv(pvh, pmap, va) + struct pv_head *pvh; + struct pmap *pmap; + vaddr_t va; +{ + struct pv_entry *pve, **prevptr; + + prevptr = &pvh->pvh_list; /* previous pv_entry pointer */ + pve = *prevptr; + while (pve) { + if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ + *prevptr = pve->pv_next; /* remove it! */ + break; + } + prevptr = &pve->pv_next; /* previous pointer */ + pve = pve->pv_next; /* advance */ } - return va; + return(pve); /* return removed pve */ } /* - * Create and return a physical map. + * p t p f u n c t i o n s + */ + +/* + * pmap_alloc_ptp: allocate a PTP for a PMAP * - * If the size specified for the map - * is zero, the map is an actual physical - * map, and may be referenced by the - * hardware. + * => pmap should already be locked by caller + * => we use the ptp's wire_count to count the number of active mappings + * in the PTP (we start it at one to prevent any chance this PTP + * will ever leak onto the active/inactive queues) + * => we should not be holding any pv_head locks (in case we are forced + * to call pmap_steal_ptp()) + * => we may need to lock pv_head's if we have to steal a PTP + * => just_try: true if we want a PTP, but not enough to steal one + * from another pmap (e.g. during optional functions like pmap_copy) + */ + +__inline static struct vm_page * +pmap_alloc_ptp(pmap, pde_index, just_try) + struct pmap *pmap; + int pde_index; + boolean_t just_try; +{ + struct vm_page *ptp; + + ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, + UVM_PGA_USERESERVE); + if (ptp == NULL) { + if (just_try) + return(NULL); + ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index)); + if (ptp == NULL) { + return (NULL); + } + } + + /* got one! */ + ptp->flags &= ~PG_BUSY; /* never busy */ + ptp->wire_count = 1; /* no mappings yet */ + pmap_zero_page(VM_PAGE_TO_PHYS(ptp)); + pmap->pm_pdir[pde_index] = + (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + pmap->pm_stats.resident_count++; /* count PTP as resident */ + pmap->pm_ptphint = ptp; + return(ptp); +} + +/* + * pmap_steal_ptp: steal a PTP from any pmap that we can access * - * If the size specified is non-zero, - * the map will be used in software only, and - * is bounded by that size. + * => obj is locked by caller. + * => we can throw away mappings at this level (except in the kernel's pmap) + * => stolen PTP is placed in <obj,offset> pmap + * => we lock pv_head's + * => hopefully, this function will be seldom used [much better to have + * enough free pages around for us to allocate off the free page list] + */ + +static struct vm_page * +pmap_steal_ptp(obj, offset) + struct uvm_object *obj; + vaddr_t offset; +{ + struct vm_page *ptp = NULL; + struct pmap *firstpmap; + struct uvm_object *curobj; + pt_entry_t *ptes; + int idx, lcv; + boolean_t caller_locked, we_locked; + + simple_lock(&pmaps_lock); + if (pmaps_hand == NULL) + pmaps_hand = LIST_FIRST(&pmaps); + firstpmap = pmaps_hand; + + do { /* while we haven't looped back around to firstpmap */ + + curobj = &pmaps_hand->pm_obj; + we_locked = FALSE; + caller_locked = (curobj == obj); + if (!caller_locked) { + we_locked = simple_lock_try(&curobj->vmobjlock); + } + if (caller_locked || we_locked) { + ptp = curobj->memq.tqh_first; + for (/*null*/; ptp != NULL; ptp = ptp->listq.tqe_next) { + + /* + * might have found a PTP we can steal + * (unless it has wired pages). + */ + + idx = ptp_o2i(ptp->offset); +#ifdef DIAGNOSTIC + if (VM_PAGE_TO_PHYS(ptp) != + (pmaps_hand->pm_pdir[idx] & PG_FRAME)) + panic("pmap_steal_ptp: PTP mismatch!"); +#endif + + ptes = (pt_entry_t *) + pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp)); + for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) + if ((ptes[lcv] & (PG_V|PG_W)) == + (PG_V|PG_W)) + break; + if (lcv == PTES_PER_PTP) + pmap_remove_ptes(pmaps_hand, NULL, ptp, + (vaddr_t)ptes, + ptp_i2v(idx), + ptp_i2v(idx+1)); + pmap_tmpunmap_pa(); + + if (lcv != PTES_PER_PTP) + /* wired, try next PTP */ + continue; + + /* + * got it!!! + */ + + pmaps_hand->pm_pdir[idx] = 0; /* zap! */ + pmaps_hand->pm_stats.resident_count--; + if (pmap_is_curpmap(pmaps_hand)) + pmap_update(); + else if (pmap_valid_entry(*APDP_PDE) && + (*APDP_PDE & PG_FRAME) == + pmaps_hand->pm_pdirpa) { + pmap_update_pg(((vaddr_t)APTE_BASE) + + ptp->offset); + } + + /* put it in our pmap! */ + uvm_pagerealloc(ptp, obj, offset); + break; /* break out of "for" loop */ + } + if (we_locked) { + simple_unlock(&curobj->vmobjlock); + } + } + + /* advance the pmaps_hand */ + pmaps_hand = LIST_NEXT(pmaps_hand, pm_list); + if (pmaps_hand == NULL) { + pmaps_hand = LIST_FIRST(&pmaps); + } + + } while (ptp == NULL && pmaps_hand != firstpmap); + + simple_unlock(&pmaps_lock); + return(ptp); +} + +/* + * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) * - * [ just allocate a ptd and mark it uninitialize -- should we track - * with a table which process has which ptd? -wfj ] + * => pmap should NOT be pmap_kernel() + * => pmap should be locked */ -pmap_t -pmap_create(size) - vm_size_t size; + +static struct vm_page * +pmap_get_ptp(pmap, pde_index, just_try) + struct pmap *pmap; + int pde_index; + boolean_t just_try; { - register pmap_t pmap; + struct vm_page *ptp; -#ifdef DEBUG - if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) - printf("pmap_create(%x)\n", size); + if (pmap_valid_entry(pmap->pm_pdir[pde_index])) { + + /* valid... check hint (saves us a PA->PG lookup) */ + if (pmap->pm_ptphint && + (pmap->pm_pdir[pde_index] & PG_FRAME) == + VM_PAGE_TO_PHYS(pmap->pm_ptphint)) + return(pmap->pm_ptphint); + + ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index)); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_get_ptp: unmanaged user PTP"); #endif + pmap->pm_ptphint = ptp; + return(ptp); + } - /* - * Software use map does not need a pmap - */ - if (size) - return NULL; + /* allocate a new PTP (updates ptphint) */ + return(pmap_alloc_ptp(pmap, pde_index, just_try)); +} + +/* + * p m a p l i f e c y c l e f u n c t i o n s + */ + +/* + * pmap_create: create a pmap + * + * => note: old pmap interface took a "size" args which allowed for + * the creation of "software only" pmaps (not in bsd). + */ + +struct pmap * +pmap_create() +{ + struct pmap *pmap; - pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); - bzero(pmap, sizeof(*pmap)); + pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); pmap_pinit(pmap); - return pmap; + return(pmap); } /* - * Initialize a preallocated and zeroed pmap structure, - * such as one in a vmspace structure. + * pmap_pinit: given a zero'd pmap structure, init it. */ + void pmap_pinit(pmap) - register struct pmap *pmap; + struct pmap *pmap; { + /* init uvm_object */ + simple_lock_init(&pmap->pm_obj.vmobjlock); + pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ + TAILQ_INIT(&pmap->pm_obj.memq); + pmap->pm_obj.uo_npages = 0; + pmap->pm_obj.uo_refs = 1; + pmap->pm_stats.wired_count = 0; + pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */ + pmap->pm_ptphint = NULL; + pmap->pm_flags = 0; + + /* allocate PDP */ + pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG); + if (pmap->pm_pdir == NULL) + panic("pmap_pinit: kernel_map out of virtual space!"); + (void) _pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, + (paddr_t *)&pmap->pm_pdirpa); -#ifdef DEBUG - if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) - printf("pmap_pinit(%x)\n", pmap); -#endif + /* init PDP */ + /* zero init area */ + bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + /* put in recursive PDE to map the PTEs */ + pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW; + + /* init the LDT */ + pmap->pm_ldt = NULL; + pmap->pm_ldt_len = 0; + pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); /* - * No need to allocate page table space yet but we do need a - * valid page directory table. + * we need to lock pmaps_lock to prevent nkpde from changing on + * us. note that there is no need to splimp to protect us from + * malloc since malloc allocates out of a submap and we should have + * already allocated kernel PTPs to cover the range... */ -#if defined(UVM) - pmap->pm_pdir = (pd_entry_t *) uvm_km_zalloc(kernel_map, NBPG); -#else - pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, NBPG); -#endif - -#ifdef DIAGNOSTIC - if (pmap->pm_pdir == NULL) - panic("pmap_pinit: alloc failed"); -#endif - /* wire in kernel global address entries */ - bcopy(&PTD[KPTDI], &pmap->pm_pdir[KPTDI], MAXKPDE * - sizeof(pd_entry_t)); - - /* install self-referential address mapping entry */ - pmap->pm_pdir[PTDPTDI] = pmap_extract(pmap_kernel(), - (vm_offset_t)pmap->pm_pdir) | PG_V | PG_KW; - - pmap->pm_count = 1; - simple_lock_init(&pmap->pm_lock); + simple_lock(&pmaps_lock); + /* put in kernel VM PDEs */ + bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN], + nkpde * sizeof(pd_entry_t)); + /* zero the rest */ + bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde], + NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); + simple_unlock(&pmaps_lock); } /* - * Retire the given physical map from service. - * Should only be called if the map contains - * no valid mappings. + * pmap_destroy: drop reference count on pmap. free pmap if + * reference count goes to zero. */ + void pmap_destroy(pmap) - register pmap_t pmap; + struct pmap *pmap; { - int count; + int refs; - if (pmap == NULL) + /* + * drop reference count + */ + + simple_lock(&pmap->pm_obj.vmobjlock); + refs = --pmap->pm_obj.uo_refs; + simple_unlock(&pmap->pm_obj.vmobjlock); + if (refs > 0) { return; + } -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_destroy(%x)\n", pmap); -#endif + /* + * reference count is zero, free pmap resources and then free pmap. + */ - simple_lock(&pmap->pm_lock); - count = --pmap->pm_count; - simple_unlock(&pmap->pm_lock); - if (count == 0) { - pmap_release(pmap); - free((caddr_t)pmap, M_VMPMAP); - } + pmap_release(pmap); + pool_put(&pmap_pmap_pool, pmap); } /* - * Release any resources held by the given physical map. - * Called when a pmap initialized by pmap_pinit is being released. - * Should only be called if the map contains no valid mappings. + * pmap_release: release all resources held by a pmap + * + * => if pmap is still referenced it should be locked + * => XXX: we currently don't expect any busy PTPs because we don't + * allow anything to map them (except for the kernel's private + * recursive mapping) or make them busy. */ + void pmap_release(pmap) - register struct pmap *pmap; + struct pmap *pmap; { + struct vm_page *pg; -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_release(%x)\n", pmap); -#endif + /* + * remove it from global list of pmaps + */ -#ifdef DIAGNOSTICx - /* sometimes 1, sometimes 0; could rearrange pmap_destroy */ - if (pmap->pm_count != 1) - panic("pmap_release count"); + simple_lock(&pmaps_lock); + if (pmap == pmaps_hand) + pmaps_hand = LIST_NEXT(pmaps_hand, pm_list); + LIST_REMOVE(pmap, pm_list); + simple_unlock(&pmaps_lock); + + /* + * free any remaining PTPs + */ + + while (pmap->pm_obj.memq.tqh_first != NULL) { + pg = pmap->pm_obj.memq.tqh_first; +#ifdef DIAGNOSTIC + if (pg->flags & PG_BUSY) + panic("pmap_release: busy page table page"); #endif + /* pmap_page_protect? currently no need for it. */ -#if defined(UVM) + pg->wire_count = 0; + uvm_pagefree(pg); + } + + /* XXX: need to flush it out of other processor's APTE space? */ uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG); -#else - kmem_free(kernel_map, (vm_offset_t)pmap->pm_pdir, NBPG); + +#ifdef USER_LDT + if (pmap->pm_flags & PMF_USER_LDT) { + /* + * no need to switch the LDT; this address space is gone, + * nothing is using it. + */ + ldt_free(pmap); + uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt, + pmap->pm_ldt_len * sizeof(union descriptor)); + } #endif } /* * Add a reference to the specified pmap. */ + void pmap_reference(pmap) - pmap_t pmap; + struct pmap *pmap; { + simple_lock(&pmap->pm_obj.vmobjlock); + pmap->pm_obj.uo_refs++; + simple_unlock(&pmap->pm_obj.vmobjlock); +} - if (pmap == NULL) - return; +#if defined(PMAP_FORK) +/* + * pmap_fork: perform any necessary data structure manipulation when + * a VM space is forked. + */ -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_reference(%x)", pmap); -#endif +void +pmap_fork(pmap1, pmap2) + struct pmap *pmap1, *pmap2; +{ + simple_lock(&pmap1->pm_obj.vmobjlock); + simple_lock(&pmap2->pm_obj.vmobjlock); + +#ifdef USER_LDT + /* Copy the LDT, if necessary. */ + if (pmap1->pm_flags & PMF_USER_LDT) { + union descriptor *new_ldt; + size_t len; + + len = pmap1->pm_ldt_len * sizeof(union descriptor); + new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len); + bcopy(pmap1->pm_ldt, new_ldt, len); + pmap2->pm_ldt = new_ldt; + pmap2->pm_ldt_len = pmap1->pm_ldt_len; + pmap2->pm_flags |= PMF_USER_LDT; + ldt_alloc(pmap2, new_ldt, len); + } +#endif /* USER_LDT */ - simple_lock(&pmap->pm_lock); - pmap->pm_count++; - simple_unlock(&pmap->pm_lock); + simple_unlock(&pmap2->pm_obj.vmobjlock); + simple_unlock(&pmap1->pm_obj.vmobjlock); } +#endif /* PMAP_FORK */ + +#ifdef USER_LDT +/* + * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and + * restore the default. + */ void -pmap_activate(p) +pmap_ldt_cleanup(p) struct proc *p; { struct pcb *pcb = &p->p_addr->u_pcb; pmap_t pmap = p->p_vmspace->vm_map.pmap; + union descriptor *old_ldt = NULL; + size_t len = 0; + + simple_lock(&pmap->pm_obj.vmobjlock); + + if (pmap->pm_flags & PMF_USER_LDT) { + ldt_free(pmap); + pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + pcb->pcb_ldt_sel = pmap->pm_ldt_sel; + if (pcb == curpcb) + lldt(pcb->pcb_ldt_sel); + old_ldt = pmap->pm_ldt; + len = pmap->pm_ldt_len * sizeof(union descriptor); + pmap->pm_ldt = NULL; + pmap->pm_ldt_len = 0; + pmap->pm_flags &= ~PMF_USER_LDT; + } - pcb->pcb_cr3 = pmap_extract(pmap_kernel(), (vm_offset_t)pmap->pm_pdir); - if (p == curproc) - lcr3(pcb->pcb_cr3); + simple_unlock(&pmap->pm_obj.vmobjlock); + + if (old_ldt != NULL) + uvm_km_free(kernel_map, (vaddr_t)old_ldt, len); } +#endif /* USER_LDT */ + +/* + * pmap_activate: activate a process' pmap (fill in %cr3 info) + * + * => called from cpu_fork() + * => if proc is the curproc, then load it into the MMU + */ void -pmap_deactivate(p) +pmap_activate(p) struct proc *p; { + struct pcb *pcb = &p->p_addr->u_pcb; + struct pmap *pmap = p->p_vmspace->vm_map.pmap; + + pcb->pcb_pmap = pmap; + pcb->pcb_ldt_sel = pmap->pm_ldt_sel; + pcb->pcb_cr3 = pmap->pm_pdirpa; + if (p == curproc) + lcr3(pcb->pcb_cr3); + if (pcb == curpcb) + lldt(pcb->pcb_ldt_sel); } /* - * Remove the given range of addresses from the specified map. + * pmap_deactivate: deactivate a process' pmap * - * It is assumed that the start and end are properly - * rounded to the page size. + * => XXX: what should this do, if anything? */ + void -pmap_remove(pmap, sva, eva) - struct pmap *pmap; - register vm_offset_t sva, eva; +pmap_deactivate(p) + struct proc *p; { - register pt_entry_t *pte; - vm_offset_t pa; - int bank, off; - int flush = 0; - - sva &= PG_FRAME; - eva &= PG_FRAME; +} - /* - * We need to acquire a pointer to a page table page before entering - * the following loop. - */ - while (sva < eva) { - pte = pmap_pte(pmap, sva); - if (pte) - break; - sva = (sva & PD_MASK) + NBPD; - } +/* + * end of lifecycle functions + */ - while (sva < eva) { - /* only check once in a while */ - if ((sva & PT_MASK) == 0) { - if (!pmap_pde_v(pmap_pde(pmap, sva))) { - /* We can race ahead here, to the next pde. */ - sva += NBPD; - pte += i386_btop(NBPD); - continue; - } - } +/* + * some misc. functions + */ - pte = pmap_pte(pmap, sva); - if (pte == NULL) { - /* We can race ahead here, to the next pde. */ - sva = (sva & PD_MASK) + NBPD; - continue; - } +/* + * pmap_extract: extract a PA for the given VA + */ - if (!pmap_pte_v(pte)) { -#ifdef __GNUC__ - /* - * Scan ahead in a tight loop for the next used PTE in - * this page. We don't scan the whole region here - * because we don't want to zero-fill unused page table - * pages. - */ - int n, m; - - n = min(eva - sva, NBPD - (sva & PT_MASK)) >> PGSHIFT; - __asm __volatile( - "cld\n\trepe\n\tscasl\n\tje 1f\n\tincl %1\n\t1:" - : "=D" (pte), "=c" (m) - : "0" (pte), "1" (n), "a" (0)); - sva += (n - m) << PGSHIFT; - if (!m) - continue; - /* Overshot. */ - --pte; -#else - goto next; -#endif - } +boolean_t +_pmap_extract(pmap, va, pap) + struct pmap *pmap; + vaddr_t va; + paddr_t *pap; +{ + paddr_t retval; + pt_entry_t *ptes; + + if (pmap->pm_pdir[pdei(va)]) { + ptes = pmap_map_ptes(pmap); + retval = (paddr_t)(ptes[i386_btop(va)] & PG_FRAME); + pmap_unmap_ptes(pmap); + if (pap != NULL) + *pap = retval | (va & ~PG_FRAME); + return (TRUE); + } + return (FALSE); +} - flush = 1; +paddr_t +pmap_extract(pmap, va) + pmap_t pmap; + vaddr_t va; +{ + paddr_t pa; - /* - * Update statistics - */ - if (pmap_pte_w(pte)) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; + if (_pmap_extract(pmap, va, &pa)) + return (pa); + return (NULL); +} - pa = pmap_pte_pa(pte); +/* + * pmap_virtual_space: used during bootup [pmap_steal_memory] to + * determine the bounds of the kernel virtual addess space. + */ - /* - * Invalidate the PTEs. - * XXX: should cluster them up and invalidate as many - * as possible at once. - */ -#ifdef DEBUG - if (pmapdebug & PDB_REMOVE) - printf("remove: inv pte at %x(%x) ", pte, *pte); -#endif +void +pmap_virtual_space(startp, endp) + vaddr_t *startp; + vaddr_t *endp; +{ + *startp = virtual_avail; + *endp = virtual_end; +} -#ifdef needednotdone -reduce wiring count on page table pages as references drop -#endif +/* + * pmap_map: map a range of PAs into kvm + * + * => used during crash dump + * => XXX: pmap_map() should be phased out? + */ - if ((bank = vm_physseg_find(atop(pa), &off)) != -1) { - vm_physmem[bank].pmseg.attrs[off] |= - *pte & (PG_M | PG_U); - pmap_remove_pv(pmap, sva, - &vm_physmem[bank].pmseg.pvent[off]); - } +vaddr_t +pmap_map(va, spa, epa, prot) + vaddr_t va; + paddr_t spa, epa; + vm_prot_t prot; +{ + while (spa < epa) { + _pmap_enter(pmap_kernel(), va, spa, prot, 0); + va += NBPG; + spa += NBPG; + } + return va; +} - *pte = 0; +/* + * pmap_zero_page: zero a page + */ -#ifndef __GNUC__ - next: +void +pmap_zero_page(pa) + paddr_t pa; +{ + simple_lock(&pmap_zero_page_lock); +#ifdef DIAGNOSTIC + if (*zero_pte) + panic("pmap_zero_page: lock botch"); #endif - sva += NBPG; - pte++; - } - if (flush) - pmap_update(); + *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ + bzero(zerop, NBPG); /* zero */ + *zero_pte = 0; /* zap! */ + pmap_update_pg((vaddr_t)zerop); /* flush TLB */ + simple_unlock(&pmap_zero_page_lock); } /* - * Routine: pmap_remove_all - * Function: - * Removes this physical page from - * all physical maps in which it resides. - * Reflects back modify bits to the pager. + * pmap_copy_page: copy a page */ + void -pmap_remove_all(pa) - vm_offset_t pa; +pmap_copy_page(srcpa, dstpa) + paddr_t srcpa, dstpa; { - struct pv_entry *ph, *pv, *npv; - register pmap_t pmap; - register pt_entry_t *pte; - int bank, off; - int s; - -#ifdef DEBUG - if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) - printf("pmap_remove_all(%x)", pa); - /*pmap_pvdump(pa);*/ + simple_lock(&pmap_copy_page_lock); +#ifdef DIAGNOSTIC + if (*csrc_pte || *cdst_pte) + panic("pmap_copy_page: lock botch"); #endif - bank = vm_physseg_find(atop(pa), &off); - if (bank == -1) - return; + *csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW; + *cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW; + bcopy(csrcp, cdstp, NBPG); + *csrc_pte = *cdst_pte = 0; /* zap! */ + pmap_update_2pg((vaddr_t)csrcp, (vaddr_t)cdstp); + simple_unlock(&pmap_copy_page_lock); +} - pv = ph = &vm_physmem[bank].pmseg.pvent[off]; - s = splimp(); +/* + * p m a p r e m o v e f u n c t i o n s + * + * functions that remove mappings + */ - if (ph->pv_pmap == NULL) { - splx(s); - return; - } +/* + * pmap_remove_ptes: remove PTEs from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + */ - while (pv) { - pmap = pv->pv_pmap; - pte = pmap_pte(pmap, pv->pv_va); +static void +pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva) + struct pmap *pmap; + struct pmap_remove_record *pmap_rr; + struct vm_page *ptp; + vaddr_t ptpva; + vaddr_t startva, endva; +{ + struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ + struct pv_entry *pve; + pt_entry_t *pte = (pt_entry_t *) ptpva; + pt_entry_t opte; + int bank, off; -#ifdef DEBUG - if (!pte || !pmap_pte_v(pte) || pmap_pte_pa(pte) != pa) - panic("pmap_remove_all: bad mapping"); -#endif + /* + * note that ptpva points to the PTE that maps startva. this may + * or may not be the first PTE in the PTP. + * + * we loop through the PTP while there are still PTEs to look at + * and the wire_count is greater than 1 (because we use the wire_count + * to keep track of the number of real PTEs in the PTP). + */ - /* - * Update statistics - */ - if (pmap_pte_w(pte)) + for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) + ; pte++, startva += NBPG) { + if (!pmap_valid_entry(*pte)) + continue; /* VA not mapped */ + + opte = *pte; /* save the old PTE */ + *pte = 0; /* zap! */ + if (opte & PG_W) pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; + if (pmap_rr) { /* worried about tlb flushing? */ + if (opte & PG_G) { + /* PG_G requires this */ + pmap_update_pg(startva); + } else { + if (pmap_rr->prr_npages < PMAP_RR_MAX) { + pmap_rr->prr_vas[pmap_rr->prr_npages++] + = startva; + } else { + if (pmap_rr->prr_npages == PMAP_RR_MAX) + /* signal an overflow */ + pmap_rr->prr_npages++; + } + } + } + if (ptp) + ptp->wire_count--; /* dropping a PTE */ + /* - * Invalidate the PTEs. - * XXX: should cluster them up and invalidate as many - * as possible at once. + * if we are not on a pv_head list we are done. */ -#ifdef DEBUG - if (pmapdebug & PDB_REMOVE) - printf("remove: inv pte at %x(%x) ", pte, *pte); + + if ((opte & PG_PVLIST) == 0) { +#ifdef DIAGNOSTIC + if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off) + != -1) + panic("pmap_remove_ptes: managed page without " + "PG_PVLIST for 0x%lx", startva); #endif + continue; + } -#ifdef needednotdone -reduce wiring count on page table pages as references drop + bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_remove_ptes: unmanaged page marked " + "PG_PVLIST"); #endif - /* - * Update saved attributes for managed page - */ - vm_physmem[bank].pmseg.attrs[off] |= *pte & (PG_M | PG_U); - *pte = 0; + /* sync R/M bits */ + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, + startva); + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); - npv = pv->pv_next; - if (pv == ph) - ph->pv_pmap = NULL; - else - pmap_free_pv(pv); - pv = npv; - } - splx(s); + if (pve) { + pve->pv_next = pv_tofree; + pv_tofree = pve; + } - pmap_update(); + /* end of "for" loop: time for next pte */ + } + if (pv_tofree) + pmap_free_pvs(pmap, pv_tofree); } + /* - * Set the physical protection on the - * specified range of this map as requested. + * pmap_remove_pte: remove a single PTE from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + * => returns true if we removed a mapping */ -void -pmap_protect(pmap, sva, eva, prot) - register pmap_t pmap; - vm_offset_t sva, eva; - vm_prot_t prot; + +static boolean_t +pmap_remove_pte(pmap, ptp, pte, va) + struct pmap *pmap; + struct vm_page *ptp; + pt_entry_t *pte; + vaddr_t va; { - register pt_entry_t *pte; - register int i386prot; - int flush = 0; + pt_entry_t opte; + int bank, off; + struct pv_entry *pve; -#ifdef DEBUG - if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) - printf("pmap_protect(%x, %x, %x, %x)", pmap, sva, eva, prot); -#endif + if (!pmap_valid_entry(*pte)) + return(FALSE); /* VA not mapped */ - if ((prot & VM_PROT_READ) == VM_PROT_NONE) { - pmap_remove(pmap, sva, eva); - return; - } + opte = *pte; /* save the old PTE */ + *pte = 0; /* zap! */ - if (prot & VM_PROT_WRITE) - return; + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; - sva &= PG_FRAME; - eva &= PG_FRAME; + if (ptp) + ptp->wire_count--; /* dropping a PTE */ + + if (pmap_is_curpmap(pmap)) + pmap_update_pg(va); /* flush TLB */ /* - * We need to acquire a pointer to a page table page before entering - * the following loop. + * if we are not on a pv_head list we are done. */ - while (sva < eva) { - pte = pmap_pte(pmap, sva); - if (pte) - break; - sva = (sva & PD_MASK) + NBPD; - } - while (sva < eva) { - /* only check once in a while */ - if ((sva & PT_MASK) == 0) { - if (!pmap_pde_v(pmap_pde(pmap, sva))) { - /* We can race ahead here, to the next pde. */ - sva += NBPD; - pte += i386_btop(NBPD); - continue; - } - } - - if (!pmap_pte_v(pte)) { -#ifdef __GNUC__ - /* - * Scan ahead in a tight loop for the next used PTE in - * this page. We don't scan the whole region here - * because we don't want to zero-fill unused page table - * pages. - */ - int n, m; - - n = min(eva - sva, NBPD - (sva & PT_MASK)) >> PGSHIFT; - __asm __volatile( - "cld\n\trepe\n\tscasl\n\tje 1f\n\tincl %1\n\t1:" - : "=D" (pte), "=c" (m) - : "0" (pte), "1" (n), "a" (0)); - sva += (n - m) << PGSHIFT; - if (!m) - continue; - /* Overshot. */ - --pte; -#else - goto next; + if ((opte & PG_PVLIST) == 0) { +#ifdef DIAGNOSTIC + if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off) != -1) + panic("pmap_remove_ptes: managed page without " + "PG_PVLIST for 0x%lx", va); #endif - } - - flush = 1; - - i386prot = protection_codes[prot]; - if (sva < VM_MAXUSER_ADDRESS) /* see also pmap_enter() */ - i386prot |= PG_u; - else if (sva < VM_MAX_ADDRESS) - i386prot |= PG_u | PG_RW; - pmap_pte_set_prot(pte, i386prot); + return(TRUE); + } -#ifndef __GNUC__ - next: + bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_remove_pte: unmanaged page marked PG_PVLIST"); #endif - sva += NBPG; - pte++; - } - if (flush) - pmap_update(); + /* sync R/M bits */ + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va); + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) + pmap_free_pv(pmap, pve); + return(TRUE); } /* - * Insert the given physical page (p) at - * the specified virtual address (v) in the - * target physical map with the protection requested. - * - * If specified, the page will be wired down, meaning - * that the related pte can not be reclaimed. + * pmap_remove: top level mapping removal function * - * NB: This is the only routine which MAY NOT lazy-evaluate - * or lose information. That is, this routine must actually - * insert this page into the given map NOW. + * => caller should not be holding any pmap locks */ + void -pmap_enter(pmap, va, pa, prot, wired, access_type) - register pmap_t pmap; - vm_offset_t va; - register vm_offset_t pa; - vm_prot_t prot; - boolean_t wired; - vm_prot_t access_type; +pmap_remove(pmap, sva, eva) + struct pmap *pmap; + vaddr_t sva, eva; { - register pt_entry_t *pte; - register pt_entry_t npte; - int bank, off; - int flush = 0; - boolean_t cacheable; + pt_entry_t *ptes; + boolean_t result; + paddr_t ptppa; + vaddr_t blkendva; + struct vm_page *ptp; + struct pmap_remove_record pmap_rr, *prr; -#ifdef DEBUG - if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) - printf("pmap_enter(%x, %x, %x, %x, %x)", pmap, va, pa, prot, - wired); -#endif + /* + * we lock in the pmap => pv_head direction + */ - if (pmap == NULL) - return; + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes(pmap); /* locks pmap */ - if (va >= VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: too big"); - /* also, should not muck with PTD va! */ + /* + * removing one page? take shortcut function. + */ -#ifdef DEBUG - if (pmap == pmap_kernel()) - enter_stats.kernel++; - else - enter_stats.user++; -#endif + if (sva + NBPG == eva) { - pte = pmap_pte(pmap, va); - if (!pte) { - /* - * Page Directory table entry not valid, we need a new PT page - * - * we want to vm_fault in a new zero-filled PT page for our - * use. in order to do this, we want to call vm_fault() - * with the VA of where we want to put the PTE. but in - * order to call vm_fault() we need to know which vm_map - * we are faulting in. in the m68k pmap's this is easy - * since all PT pages live in one global vm_map ("pt_map") - * and we have a lot of virtual space we can use for the - * pt_map (since the kernel doesn't have to share its 4GB - * address space with processes). but in the i386 port - * the kernel must live in the top part of the virtual - * address space and PT pages live in their process' vm_map - * rather than a global one. the problem is that we have - * no way of knowing which vm_map is the correct one to - * fault on. - * - * XXX: see NetBSD PR#1834 and Mycroft's posting to - * tech-kern on 7 Jan 1996. - * - * rather than always calling panic, we try and make an - * educated guess as to which vm_map to use by using curproc. - * this is a workaround and may not fully solve the problem? - */ - struct vm_map *vmap; - int rv; - vm_offset_t v; - - if (curproc == NULL || curproc->p_vmspace == NULL || - pmap != curproc->p_vmspace->vm_map.pmap) - panic("ptdi %x", pmap->pm_pdir[PTDPTDI]); - - /* our guess about the vm_map was good! fault it in. */ - - vmap = &curproc->p_vmspace->vm_map; - v = trunc_page(vtopte(va)); -#ifdef DEBUG - printf("faulting in a pt page map %x va %x\n", vmap, v); -#endif -#if defined(UVM) - rv = uvm_fault(vmap, v, 0, VM_PROT_READ|VM_PROT_WRITE); -#else - rv = vm_fault(vmap, v, VM_PROT_READ|VM_PROT_WRITE, FALSE); -#endif - if (rv != KERN_SUCCESS) - panic("ptdi2 %x", pmap->pm_pdir[PTDPTDI]); -#if defined(UVM) - /* - * XXX It is possible to get here from uvm_fault with vmap - * locked. uvm_map_pageable requires it to be unlocked, so - * try to record the state of the lock, unlock it, and then - * after the call, reacquire the original lock. - * THIS IS A GROSS HACK! - */ - { - int ls = lockstatus(&vmap->lock); - - if (ls) - lockmgr(&vmap->lock, LK_RELEASE, (void *)0, - curproc); - uvm_map_pageable(vmap, v, round_page(v+1), FALSE); - if (ls) - lockmgr(&vmap->lock, ls, (void *)0, curproc); - } -#else - vm_map_pageable(vmap, v, round_page(v+1), FALSE); -#endif - pte = pmap_pte(pmap, va); - if (!pte) - panic("ptdi3 %x", pmap->pm_pdir[PTDPTDI]); - } -#ifdef DEBUG - if (pmapdebug & PDB_ENTER) - printf("enter: pte %x, *pte %x ", pte, *pte); + if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) { + + /* PA of the PTP */ + ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME; + + /* get PTP if non-kernel mapping */ + + if (pmap == pmap_kernel()) { + /* we never free kernel PTPs */ + ptp = NULL; + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == + ptppa) { + ptp = pmap->pm_ptphint; + } else { + ptp = PHYS_TO_VM_PAGE(ptppa); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_remove: unmanaged " + "PTP detected"); #endif + } + } - if (pmap_pte_v(pte)) { - register vm_offset_t opa; + /* do it! */ + result = pmap_remove_pte(pmap, ptp, + &ptes[i386_btop(sva)], sva); - /* - * Check for wiring change and adjust statistics. - */ - if ((wired && !pmap_pte_w(pte)) || - (!wired && pmap_pte_w(pte))) { /* - * We don't worry about wiring PT pages as they remain - * resident as long as there are valid mappings in them. - * Hence, if a user page is wired, the PT page will be also. + * if mapping removed and the PTP is no longer + * being used, free it! */ -#ifdef DEBUG - if (pmapdebug & PDB_ENTER) - printf("enter: wiring change -> %x ", wired); -#endif - if (wired) - pmap->pm_stats.wired_count++; - else - pmap->pm_stats.wired_count--; -#ifdef DEBUG - enter_stats.wchange++; -#endif - } - - flush = 1; - opa = pmap_pte_pa(pte); - /* - * Mapping has not changed, must be protection or wiring change. - */ - if (opa == pa) { -#ifdef DEBUG - enter_stats.pwchange++; + if (result && ptp && ptp->wire_count <= 1) { + pmap->pm_pdir[pdei(sva)] = 0; /* zap! */ +#if defined(I386_CPU) + /* already dumped whole TLB on i386 */ + if (cpu_class != CPUCLASS_386) #endif - goto validate; - } - - /* - * Mapping has changed, invalidate old range and fall through to - * handle validating new mapping. - */ -#ifdef DEBUG - if (pmapdebug & PDB_ENTER) - printf("enter: removing old mapping %x pa %x ", va, opa); -#endif - if ((bank = vm_physseg_find(atop(opa), &off)) != -1) { - vm_physmem[bank].pmseg.attrs[off] |= - *pte & (PG_M | PG_U); - pmap_remove_pv(pmap, va, - &vm_physmem[bank].pmseg.pvent[off]); + { + pmap_update_pg(((vaddr_t) ptes) + + ptp->offset); + } + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) + pmap->pm_ptphint = + pmap->pm_obj.memq.tqh_first; + ptp->wire_count = 0; + uvm_pagefree(ptp); + } } -#ifdef DEBUG - enter_stats.mchange++; -#endif - } else { - /* - * Increment counters - */ - pmap->pm_stats.resident_count++; - if (wired) - pmap->pm_stats.wired_count++; + + pmap_unmap_ptes(pmap); /* unlock pmap */ + PMAP_MAP_TO_HEAD_UNLOCK(); + return; } /* - * Enter on the PV list if part of our managed memory + * removing a range of pages: we unmap in PTP sized blocks (4MB) + * + * if we are the currently loaded pmap, we use prr to keep track + * of the VAs we unload so that we can flush them out of the tlb. */ - if ((bank = vm_physseg_find(atop(pa), &off)) != -1) { -#ifdef DEBUG - enter_stats.managed++; -#endif - pmap_enter_pv(pmap, va, &vm_physmem[bank].pmseg.pvent[off]); - cacheable = TRUE; - } else if (pmap_initialized) { -#ifdef DEBUG - enter_stats.unmanaged++; -#endif - /* - * Assumption: if it is not part of our managed memory - * then it must be device memory which may be volatile. - */ - cacheable = FALSE; + + if (pmap_is_curpmap(pmap)) { + prr = &pmap_rr; + prr->prr_npages = 0; + } else { + prr = NULL; } -validate: - /* - * Now validate mapping with desired protection/wiring. - * Assume uniform modified and referenced status for all - * I386 pages in a MACH page. - */ - npte = (pa & PG_FRAME) | protection_codes[prot] | PG_V; - if (wired) - npte |= PG_W; + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; - if (va < VM_MAXUSER_ADDRESS) /* i.e. below USRSTACK */ - npte |= PG_u; - else if (va < VM_MAX_ADDRESS) /* - * Page tables need to be user RW, for some reason, and the - * user area must be writable too. Anything above - * VM_MAXUSER_ADDRESS is protected from user access by - * the user data and code segment descriptors, so this is OK. + * XXXCDC: our PTE mappings should never be removed + * with pmap_remove! if we allow this (and why would + * we?) then we end up freeing the pmap's page + * directory page (PDP) before we are finished using + * it when we hit in in the recursive mapping. this + * is BAD. + * + * long term solution is to move the PTEs out of user + * address space. and into kernel address space (up + * with APTE). then we can set VM_MAXUSER_ADDRESS to + * be VM_MAX_ADDRESS. */ - npte |= PG_u | PG_RW; -#ifdef DEBUG - if (pmapdebug & PDB_ENTER) - printf("enter: new pte value %x ", npte); + if (pdei(sva) == PDSLOT_PTE) + /* XXXCDC: ugly hack to avoid freeing PDP here */ + continue; + + if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + /* valid block? */ + continue; + + /* PA of the PTP */ + ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME); + + /* get PTP if non-kernel mapping */ + if (pmap == pmap_kernel()) { + /* we never free kernel PTPs */ + ptp = NULL; + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) { + ptp = pmap->pm_ptphint; + } else { + ptp = PHYS_TO_VM_PAGE(ptppa); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_remove: unmanaged PTP " + "detected"); #endif + } + } + pmap_remove_ptes(pmap, prr, ptp, + (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva); + + /* if PTP is no longer being used, free it! */ + if (ptp && ptp->wire_count <= 1) { + pmap->pm_pdir[pdei(sva)] = 0; /* zap! */ + pmap_update_pg( ((vaddr_t) ptes) + ptp->offset); +#if defined(I386_CPU) + /* cancel possible pending pmap update on i386 */ + if (cpu_class == CPUCLASS_386 && prr) + prr->prr_npages = 0; +#endif + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) /* update hint? */ + pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first; + ptp->wire_count = 0; + uvm_pagefree(ptp); + } + } - *pte = npte; - if (flush) - pmap_update(); + /* + * if we kept a removal record and removed some pages update the TLB + */ + + if (prr && prr->prr_npages) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + pmap_update(); + } else +#endif + { /* not I386 */ + if (prr->prr_npages > PMAP_RR_MAX) { + pmap_update(); + } else { + while (prr->prr_npages) { + pmap_update_pg( + prr->prr_vas[--prr->prr_npages]); + } + } + } /* not I386 */ + } + pmap_unmap_ptes(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); } /* - * pmap_page_protect: + * pmap_page_remove: remove a managed vm_page from all pmaps that map it * - * Lower the permission for all mappings to a given page. + * => we set pv_head => pmap locking + * => R/M bits are sync'd back to attrs */ + void -pmap_page_protect(phys, prot) - vm_offset_t phys; - vm_prot_t prot; +pmap_page_remove(pg) + struct vm_page *pg; { + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, opte; +#if defined(I386_CPU) + boolean_t needs_update = FALSE; +#endif - switch (prot) { - case VM_PROT_READ: - case VM_PROT_READ|VM_PROT_EXECUTE: - pmap_copy_on_write(phys); - break; - case VM_PROT_ALL: - break; - default: - pmap_remove_all(phys); - break; + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_page_remove: unmanaged page?\n"); + return; } + + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return; + } + + /* set pv_head => pmap locking */ + PMAP_HEAD_TO_MAP_LOCK(); + + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { + ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + +#ifdef DIAGNOSTIC + if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & + PG_FRAME) + != VM_PAGE_TO_PHYS(pve->pv_ptp)) { + printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", + pg, pve->pv_va, pve->pv_ptp); + printf("pmap_page_remove: PTP's phys addr: " + "actual=%x, recorded=%lx\n", + (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & + PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); + panic("pmap_page_remove: mapped managed page has " + "invalid pv_ptp field"); + } +#endif + + opte = ptes[i386_btop(pve->pv_va)]; + ptes[i386_btop(pve->pv_va)] = 0; /* zap! */ + + if (opte & PG_W) + pve->pv_pmap->pm_stats.wired_count--; + pve->pv_pmap->pm_stats.resident_count--; + + if (pmap_is_curpmap(pve->pv_pmap)) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + needs_update = TRUE; + else +#endif + pmap_update_pg(pve->pv_va); + } + + /* sync R/M bits */ + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + + /* update the PTP reference count. free if last reference. */ + if (pve->pv_ptp) { + pve->pv_ptp->wire_count--; + if (pve->pv_ptp->wire_count <= 1) { + /* zap! */ + pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0; + pmap_update_pg(((vaddr_t)ptes) + + pve->pv_ptp->offset); +#if defined(I386_CPU) + needs_update = FALSE; +#endif + pve->pv_pmap->pm_stats.resident_count--; + /* update hint? */ + if (pve->pv_pmap->pm_ptphint == pve->pv_ptp) + pve->pv_pmap->pm_ptphint = + pve->pv_pmap->pm_obj.memq.tqh_first; + pve->pv_ptp->wire_count = 0; + uvm_pagefree(pve->pv_ptp); + } + } + pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + } + pmap_free_pvs(NULL, pvh->pvh_list); + pvh->pvh_list = NULL; + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); +#if defined(I386_CPU) + if (needs_update) + pmap_update(); +#endif } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * p m a p a t t r i b u t e f u n c t i o n s + * functions that test/change managed page's attributes + * since a page can be mapped multiple times we must check each PTE that + * maps it by going down the pv lists. */ -void -pmap_change_wiring(pmap, va, wired) - register pmap_t pmap; - vm_offset_t va; - boolean_t wired; -{ - register pt_entry_t *pte; -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_change_wiring(%x, %x, %x)", pmap, va, wired); -#endif +/* + * pmap_test_attrs: test a page's attributes + * + * => we set pv_head => pmap locking + */ - pte = pmap_pte(pmap, va); - if (!pte) - return; +boolean_t +pmap_test_attrs(pg, testbits) + struct vm_page *pg; + int testbits; +{ + int bank, off; + char *myattrs; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, pte; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_test_attrs: unmanaged page?\n"); + return(FALSE); + } -#ifdef DEBUG /* - * Page not valid. Should this ever happen? - * Just continue and change wiring anyway. + * before locking: see if attributes are already set and if so, + * return! */ - if (!pmap_pte_v(pte)) { - if (pmapdebug & PDB_PARANOIA) - printf("pmap_change_wiring: invalid PTE for %x ", va); + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + if (*myattrs & testbits) + return(TRUE); + + /* test to see if there is a list before bothering to lock */ + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return(FALSE); } -#endif - if ((wired && !pmap_pte_w(pte)) || (!wired && pmap_pte_w(pte))) { - if (wired) - pmap->pm_stats.wired_count++; - else - pmap->pm_stats.wired_count--; - pmap_pte_set_w(pte, wired); + /* nope, gonna have to do it the hard way */ + PMAP_HEAD_TO_MAP_LOCK(); + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0; + pve = pve->pv_next) { + ptes = pmap_map_ptes(pve->pv_pmap); + pte = ptes[i386_btop(pve->pv_va)]; + pmap_unmap_ptes(pve->pv_pmap); + *myattrs |= pte; } + + /* + * note that we will exit the for loop with a non-null pve if + * we have found the bits we are testing for. + */ + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + return((*myattrs & testbits) != 0); } /* - * Routine: pmap_pte - * Function: - * Extract the page table entry associated - * with the given map/virtual_address pair. + * pmap_change_attrs: change a page's attributes + * + * => we set pv_head => pmap locking + * => we return TRUE if we cleared one of the bits we were asked to */ -pt_entry_t * -pmap_pte(pmap, va) - register pmap_t pmap; - vm_offset_t va; + +boolean_t +pmap_change_attrs(pg, setbits, clearbits) + struct vm_page *pg; + int setbits, clearbits; { - pt_entry_t *ptp; + u_int32_t result; + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, npte; + char *myattrs; +#if defined(I386_CPU) + boolean_t needs_update = FALSE; +#endif + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_change_attrs: unmanaged page?\n"); + return(FALSE); + } -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_pte(%x, %x) ->\n", pmap, va); + PMAP_HEAD_TO_MAP_LOCK(); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + result = *myattrs & clearbits; + *myattrs = (*myattrs | setbits) & ~clearbits; + + for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) { +#ifdef DIAGNOSTIC + if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) { + printf("pmap_change_attrs: found pager VA on pv_list"); + } + if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])) + panic("pmap_change_attrs: mapping without PTP " + "detected"); #endif - if (!pmap || !pmap_pde_v(pmap_pde(pmap, va))) - return NULL; + ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + npte = ptes[i386_btop(pve->pv_va)]; + result |= (npte & clearbits); + npte = (npte | setbits) & ~clearbits; + if (ptes[i386_btop(pve->pv_va)] != npte) { + ptes[i386_btop(pve->pv_va)] = npte; /* zap! */ - if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde & PG_FRAME) || - pmap == pmap_kernel()) - /* current address space or kernel */ - ptp = PTmap; - else { - /* alternate address space */ - if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) != (APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); + if (pmap_is_curpmap(pve->pv_pmap)) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + needs_update = TRUE; + else +#endif + pmap_update_pg(pve->pv_va); + } } - ptp = APTmap; + pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ } - return ptp + i386_btop(va); + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + +#if defined(I386_CPU) + if (needs_update) + pmap_update(); +#endif + return(result != 0); } /* - * Routine: pmap_extract - * Function: - * Extract the physical page address associated - * with the given map/virtual_address pair. + * p m a p p r o t e c t i o n f u n c t i o n s */ -vm_offset_t -pmap_extract(pmap, va) - register pmap_t pmap; - vm_offset_t va; -{ - register pt_entry_t *pte; - register vm_offset_t pa; - -#ifdef DEBUGx - if (pmapdebug & PDB_FOLLOW) - printf("pmap_extract(%x, %x) -> ", pmap, va); -#endif - pte = pmap_pte(pmap, va); - if (!pte) - return NULL; - if (!pmap_pte_v(pte)) - return NULL; +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => NOTE: this is an inline function in pmap.h + */ - pa = pmap_pte_pa(pte); -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("%x\n", pa); -#endif - return pa | (va & ~PG_FRAME); -} +/* see pmap.h */ /* - * Copy the range specified by src_addr/len - * from the source map to the range dst_addr/len - * in the destination map. + * pmap_protect: set the protection in of the pages in a pmap * - * This routine is only advisory and need not do anything. + * => NOTE: this is an inline function in pmap.h */ -void -pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) - pmap_t dst_pmap, src_pmap; - vm_offset_t dst_addr, src_addr; - vm_size_t len; -{ -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_copy(%x, %x, %x, %x, %x)", - dst_pmap, src_pmap, dst_addr, len, src_addr); -#endif -} +/* see pmap.h */ /* - * Routine: pmap_collect - * Function: - * Garbage collects the physical map system for - * pages which are no longer used. - * Success need not be guaranteed -- that is, there - * may well be pages which are not referenced, but - * others may be collected. - * Usage: - * Called by the pageout daemon when pages are scarce. - * [ needs to be written -wfj ] XXXX + * pmap_write_protect: write-protect pages in a pmap */ + void -pmap_collect(pmap) - pmap_t pmap; +pmap_write_protect(pmap, sva, eva, prot) + struct pmap *pmap; + vaddr_t sva, eva; + vm_prot_t prot; { -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_collect(%x) ", pmap); -#endif + pt_entry_t *ptes, *spte, *epte, npte; + struct pmap_remove_record pmap_rr, *prr; + vaddr_t blockend, va; + u_int32_t md_prot; - if (pmap != pmap_kernel()) - return; + ptes = pmap_map_ptes(pmap); /* locks pmap */ -} + /* need to worry about TLB? [TLB stores protection bits] */ + if (pmap_is_curpmap(pmap)) { + prr = &pmap_rr; + prr->prr_npages = 0; + } else { + prr = NULL; + } -#if DEBUG -void -pmap_dump_pvlist(phys, m) - vm_offset_t phys; - char *m; -{ - register struct pv_entry *pv; - int bank, off; + /* should be ok, but just in case ... */ + sva &= PG_FRAME; + eva &= PG_FRAME; - if (!(pmapdebug & PDB_PARANOIA)) - return; + for (/* null */ ; sva < eva ; sva = blockend) { - if (!pmap_initialized) - return; - printf("%s %08x:", m, phys); - bank = vm_physseg_find(atop(phys), &off); - pv = &vm_physmem[bank].pmseg.pvent[off]; - if (pv->pv_pmap == NULL) { - printf(" no mappings\n"); - return; + blockend = (sva & PD_MASK) + NBPD; + if (blockend > eva) + blockend = eva; + + /* + * XXXCDC: our PTE mappings should never be write-protected! + * + * long term solution is to move the PTEs out of user + * address space. and into kernel address space (up + * with APTE). then we can set VM_MAXUSER_ADDRESS to + * be VM_MAX_ADDRESS. + */ + + /* XXXCDC: ugly hack to avoid freeing PDP here */ + if (pdei(sva) == PDSLOT_PTE) + continue; + + /* empty block? */ + if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + continue; + + md_prot = protection_codes[prot]; + if (sva < VM_MAXUSER_ADDRESS) + md_prot |= PG_u; + else if (sva < VM_MAX_ADDRESS) + /* XXX: write-prot our PTES? never! */ + md_prot |= (PG_u | PG_RW); + + spte = &ptes[i386_btop(sva)]; + epte = &ptes[i386_btop(blockend)]; + + for (/*null */; spte < epte ; spte++) { + + if (!pmap_valid_entry(*spte)) /* no mapping? */ + continue; + + npte = (*spte & ~PG_PROT) | md_prot; + + if (npte != *spte) { + *spte = npte; /* zap! */ + + if (prr) { /* worried about tlb flushing? */ + va = i386_ptob(spte - ptes); + if (npte & PG_G) { + /* PG_G requires this */ + pmap_update_pg(va); + } else { + if (prr->prr_npages < + PMAP_RR_MAX) { + prr->prr_vas[ + prr->prr_npages++] = + va; + } else { + if (prr->prr_npages == + PMAP_RR_MAX) + /* signal an overflow */ + prr->prr_npages++; + } + } + } /* if (prr) */ + } /* npte != *spte */ + } /* for loop */ } - for (; pv; pv = pv->pv_next) - printf(" pmap %08x va %08x", pv->pv_pmap, pv->pv_va); - printf("\n"); -} -#else -#define pmap_dump_pvlist(a,b) + + /* + * if we kept a removal record and removed some pages update the TLB + */ + + if (prr && prr->prr_npages) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + pmap_update(); + } else #endif + { /* not I386 */ + if (prr->prr_npages > PMAP_RR_MAX) { + pmap_update(); + } else { + while (prr->prr_npages) { + pmap_update_pg(prr->prr_vas[ + --prr->prr_npages]); + } + } + } /* not I386 */ + } + pmap_unmap_ptes(pmap); /* unlocks pmap */ +} /* - * pmap_zero_page zeros the specified by mapping it into - * virtual memory and using bzero to clear its contents. + * end of protection functions */ + +/* + * pmap_unwire: clear the wired bit in the PTE + * + * => mapping should already be in map + */ + void -pmap_zero_page(phys) - register vm_offset_t phys; +pmap_change_wiring(pmap, va, wired) + struct pmap *pmap; + vaddr_t va; + boolean_t wired; { + pt_entry_t *ptes; -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_zero_page(%x)", phys); -#endif + if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { + ptes = pmap_map_ptes(pmap); /* locks pmap */ - pmap_dump_pvlist(phys, "pmap_zero_page: phys"); - *CMAP2 = (phys & PG_FRAME) | PG_V | PG_KW /*| PG_N*/; - pmap_update(); - bzero(CADDR2, NBPG); +#ifdef DIAGNOSTIC + if (!pmap_valid_entry(ptes[i386_btop(va)])) + panic("pmap_unwire: invalid (unmapped) va"); +#endif + if ((ptes[i386_btop(va)] & PG_W) != 0) { + ptes[i386_btop(va)] &= ~PG_W; + pmap->pm_stats.wired_count--; + } +#if 0 +#ifdef DIAGNOSITC + else { + printf("pmap_unwire: wiring for pmap %p va 0x%lx " + "didn't change!\n", pmap, va); + } +#endif +#endif + pmap_unmap_ptes(pmap); /* unlocks map */ + } +#ifdef DIAGNOSTIC + else { + panic("pmap_unwire: invalid PDE"); + } +#endif } /* - * pmap_copy_page copies the specified page by mapping - * it into virtual memory and using bcopy to copy its - * contents. + * pmap_collect: free resources held by a pmap + * + * => optional function. + * => called when a process is swapped out to free memory. */ + void -pmap_copy_page(src, dst) - register vm_offset_t src, dst; +pmap_collect(pmap) + struct pmap *pmap; { + /* + * free all of the pt pages by removing the physical mappings + * for its entire address space. + */ -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_copy_page(%x, %x)", src, dst); -#endif - - pmap_dump_pvlist(src, "pmap_copy_page: src"); - pmap_dump_pvlist(dst, "pmap_copy_page: dst"); - *CMAP1 = (src & PG_FRAME) | PG_V | PG_KR; - *CMAP2 = (dst & PG_FRAME) | PG_V | PG_KW /*| PG_N*/; - pmap_update(); - bcopy(CADDR1, CADDR2, NBPG); + pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS); } /* - * Routine: pmap_pageable - * Function: - * Make the specified pages (by pmap, offset) - * pageable (or not) as requested. + * pmap_transfer: transfer (move or copy) mapping from one pmap + * to another. * - * A page which is not pageable may not take - * a fault; therefore, its page table entry - * must remain valid for the duration. - * - * This routine is merely advisory; pmap_enter - * will specify that these pages are to be wired - * down (or not) as appropriate. + * => this function is optional, it doesn't have to do anything + * => we assume that the mapping in the src pmap is valid (i.e. that + * it doesn't run off the end of the map's virtual space). + * => we assume saddr, daddr, and len are page aligned/lengthed */ void -pmap_pageable(pmap, sva, eva, pageable) - pmap_t pmap; - vm_offset_t sva, eva; - boolean_t pageable; +pmap_transfer(dstpmap, srcpmap, daddr, len, saddr, move) + struct pmap *dstpmap, *srcpmap; + vaddr_t daddr, saddr; + vsize_t len; + boolean_t move; { + /* base address of PTEs, dst could be NULL */ + pt_entry_t *srcptes, *dstptes; -#ifdef DEBUG - if (pmapdebug & PDB_FOLLOW) - printf("pmap_pageable(%x, %x, %x, %x)", - pmap, sva, eva, pageable); + struct pmap_transfer_location srcl, dstl; + int dstvalid; /* # of PTEs left in dst's current PTP */ + struct pmap *mapped_pmap; /* the pmap we passed to pmap_map_ptes */ + vsize_t blklen; + int blkpgs, toxfer; + boolean_t ok; + +#ifdef DIAGNOSTIC + /* + * sanity check: let's make sure our len doesn't overflow our dst + * space. + */ + + if (daddr < VM_MAXUSER_ADDRESS) { + if (VM_MAXUSER_ADDRESS - daddr < len) { + printf("pmap_transfer: no room in user pmap " + "(addr=0x%lx, len=0x%lx)\n", daddr, len); + return; + } + } else if (daddr < VM_MIN_KERNEL_ADDRESS || + daddr >= VM_MAX_KERNEL_ADDRESS) { + printf("pmap_transfer: invalid transfer address 0x%lx\n", + daddr); + } else { + if (VM_MAX_KERNEL_ADDRESS - daddr < len) { + printf("pmap_transfer: no room in kernel pmap " + "(addr=0x%lx, len=0x%lx)\n", daddr, len); + return; + } + } #endif /* - * If we are making a PT page pageable then all valid - * mappings must be gone from that page. Hence it should - * be all zeros and there is no need to clean it. - * Assumption: - * - PT pages have only one pv_table entry - * - PT pages are the only single-page allocations - * between the user stack and kernel va's - * See also pmap_enter & pmap_protect for rehashes of this... + * ideally we would like to have either src or dst pmap's be the + * current pmap so that we can map the other one in APTE space + * (if needed... one of the maps could be the kernel's pmap). + * + * however, if we can't get this, then we have to use the tmpmap + * (alternately we could punt). */ - if (pageable && - pmap == pmap_kernel() && - sva >= VM_MAXUSER_ADDRESS && eva <= VM_MAX_ADDRESS && - eva - sva == NBPG) { - register vm_offset_t pa; - register pt_entry_t *pte; + if (!pmap_is_curpmap(dstpmap) && !pmap_is_curpmap(srcpmap)) { + dstptes = NULL; /* dstptes NOT mapped */ + srcptes = pmap_map_ptes(srcpmap); /* let's map the source */ + mapped_pmap = srcpmap; + } else { + if (!pmap_is_curpmap(srcpmap)) { + srcptes = pmap_map_ptes(srcpmap); /* possible APTE */ + dstptes = PTE_BASE; + mapped_pmap = srcpmap; + } else { + dstptes = pmap_map_ptes(dstpmap); /* possible APTE */ + srcptes = PTE_BASE; + mapped_pmap = dstpmap; + } + } + + /* + * at this point we know that the srcptes are mapped. the dstptes + * are mapped if (dstptes != NULL). if (dstptes == NULL) then we + * will have to map the dst PTPs page at a time using the tmpmap. + * [XXX: is it worth the effort, or should we just punt?] + */ + + srcl.addr = saddr; + srcl.pte = &srcptes[i386_btop(srcl.addr)]; + srcl.ptp = NULL; + dstl.addr = daddr; + if (dstptes) + dstl.pte = &dstptes[i386_btop(dstl.addr)]; + else + dstl.pte = NULL; /* we map page at a time */ + dstl.ptp = NULL; + dstvalid = 0; /* force us to load a new dst PTP to start */ + + while (len) { + + /* + * compute the size of this block. + */ + + /* length in bytes */ + blklen = i386_round_pdr(srcl.addr+1) - srcl.addr; + if (blklen > len) + blklen = len; + blkpgs = i386_btop(blklen); + + /* + * if the block is not valid in the src pmap, + * then we can skip it! + */ + + if (!pmap_valid_entry(srcpmap->pm_pdir[pdei(srcl.addr)])) { + len = len - blklen; + srcl.pte = srcl.pte + blkpgs; + srcl.addr += blklen; + dstl.addr += blklen; + if (blkpgs > dstvalid) { + dstvalid = 0; + dstl.ptp = NULL; + } else { + dstvalid = dstvalid - blkpgs; + } + if (dstptes == NULL && (len == 0 || dstvalid == 0)) { + if (dstl.pte) { + pmap_tmpunmap_pa(); + dstl.pte = NULL; + } + } else { + dstl.pte += blkpgs; + } + continue; + } + + /* + * we have a valid source block of "blkpgs" PTEs to transfer. + * if we don't have any dst PTEs ready, then get some. + */ + + if (dstvalid == 0) { + if (!pmap_valid_entry(dstpmap-> + pm_pdir[pdei(dstl.addr)])) { #ifdef DIAGNOSTIC - int bank, off; - register struct pv_entry *pv; -#endif + if (dstl.addr >= VM_MIN_KERNEL_ADDRESS) + panic("pmap_transfer: missing kernel " + "PTP at 0x%lx", dstl.addr); +#endif + dstl.ptp = pmap_get_ptp(dstpmap, + pdei(dstl.addr), TRUE); + if (dstl.ptp == NULL) /* out of RAM? punt. */ + break; + } else { + dstl.ptp = NULL; + } + dstvalid = i386_btop(i386_round_pdr(dstl.addr+1) - + dstl.addr); + if (dstptes == NULL) { + dstl.pte = (pt_entry_t *) + pmap_tmpmap_pa(dstpmap-> + pm_pdir[pdei(dstl.addr)] + & PG_FRAME); + dstl.pte = dstl.pte + (PTES_PER_PTP - dstvalid); + } + } -#ifdef DEBUG - if ((pmapdebug & (PDB_FOLLOW|PDB_PTPAGE)) == PDB_PTPAGE) - printf("pmap_pageable(%x, %x, %x, %x)", - pmap, sva, eva, pageable); -#endif + /* + * we have a valid source block of "blkpgs" PTEs to transfer. + * we have a valid dst block of "dstvalid" PTEs ready. + * thus we can transfer min(blkpgs, dstvalid) PTEs now. + */ - pte = pmap_pte(pmap, sva); - if (!pte) - return; - if (!pmap_pte_v(pte)) - return; + srcl.ptp = NULL; /* don't know source PTP yet */ + if (dstvalid < blkpgs) + toxfer = dstvalid; + else + toxfer = blkpgs; - pa = pmap_pte_pa(pte); + if (toxfer > 0) { + ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, + toxfer, move); + + if (!ok) /* memory shortage? punt. */ + break; + + dstvalid -= toxfer; + blkpgs -= toxfer; + len -= i386_ptob(toxfer); + if (blkpgs == 0) /* out of src PTEs? restart */ + continue; + } + + /* + * we have a valid source block of "blkpgs" PTEs left + * to transfer. we have just used up our "dstvalid" + * PTEs, and thus must obtain more dst PTEs to finish + * off the src block. since we are now going to + * obtain a brand new dst PTP, we know we can finish + * the src block in one more transfer. + */ #ifdef DIAGNOSTIC - if ((*pte & (PG_u | PG_RW)) != (PG_u | PG_RW)) - printf("pmap_pageable: unexpected pte=%x va %x\n", - *pte, sva); - if ((bank = vm_physseg_find(atop(pa), &off)) == -1) - return; - pv = &vm_physmem[bank].pmseg.pvent[off]; - if (pv->pv_va != sva || pv->pv_next) { - printf("pmap_pageable: bad PT page va %x next %x\n", - pv->pv_va, pv->pv_next); - return; + if (dstvalid) + panic("pmap_transfer: dstvalid non-zero after drain"); + if ((dstl.addr & (NBPD-1)) != 0) + panic("pmap_transfer: dstaddr not on PD boundary " + "(0x%lx)\n", dstl.addr); +#endif + + if (dstptes == NULL && dstl.pte != NULL) { + /* dispose of old PT mapping */ + pmap_tmpunmap_pa(); + dstl.pte = NULL; } + + /* + * get new dst PTP + */ + if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) { +#ifdef DIAGNOSTIC + if (dstl.addr >= VM_MIN_KERNEL_ADDRESS) + panic("pmap_transfer: missing kernel PTP at " + "0x%lx", dstl.addr); #endif + dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE); + if (dstl.ptp == NULL) /* out of free RAM? punt. */ + break; + } else { + dstl.ptp = NULL; + } + + dstvalid = PTES_PER_PTP; /* new PTP */ + + /* + * if the dstptes are un-mapped, then we need to tmpmap in the + * dstl.ptp. + */ + + if (dstptes == NULL) { + dstl.pte = (pt_entry_t *) + pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)] + & PG_FRAME); + } /* - * Mark it unmodified to avoid pageout + * we have a valid source block of "blkpgs" PTEs left + * to transfer. we just got a brand new dst PTP to + * receive these PTEs. */ - pmap_clear_modify(pa); -#ifdef needsomethinglikethis - if (pmapdebug & PDB_PTPAGE) - printf("pmap_pageable: PT page %x(%x) unmodified\n", - sva, *pmap_pte(pmap, sva)); - if (pmapdebug & PDB_WIRING) - pmap_check_wiring("pageable", sva); +#ifdef DIAGNOSTIC + if (dstvalid < blkpgs) + panic("pmap_transfer: too many blkpgs?"); #endif + toxfer = blkpgs; + ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer, + move); + + if (!ok) /* memory shortage? punt. */ + break; + + dstvalid -= toxfer; + blkpgs -= toxfer; + len -= i386_ptob(toxfer); + + /* + * done src pte block + */ } + if (dstptes == NULL && dstl.pte != NULL) + pmap_tmpunmap_pa(); /* dst PTP still mapped? */ + pmap_unmap_ptes(mapped_pmap); } /* - * Miscellaneous support routines follow + * pmap_transfer_ptes: transfer PTEs from one pmap to another + * + * => we assume that the needed PTPs are mapped and that we will + * not cross a block boundary. + * => we return TRUE if we transfered all PTEs, FALSE if we were + * unable to allocate a pv_entry */ -void -i386_protection_init() -{ - - protection_codes[VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE] = 0; - protection_codes[VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE] = - protection_codes[VM_PROT_NONE | VM_PROT_READ | VM_PROT_NONE] = - protection_codes[VM_PROT_NONE | VM_PROT_READ | VM_PROT_EXECUTE] = PG_RO; - protection_codes[VM_PROT_WRITE | VM_PROT_NONE | VM_PROT_NONE] = - protection_codes[VM_PROT_WRITE | VM_PROT_NONE | VM_PROT_EXECUTE] = - protection_codes[VM_PROT_WRITE | VM_PROT_READ | VM_PROT_NONE] = - protection_codes[VM_PROT_WRITE | VM_PROT_READ | VM_PROT_EXECUTE] = PG_RW; -} -boolean_t -pmap_testbit(pa, setbits) - register vm_offset_t pa; - int setbits; +static boolean_t +pmap_transfer_ptes(srcpmap, srcl, dstpmap, dstl, toxfer, move) + struct pmap *srcpmap, *dstpmap; + struct pmap_transfer_location *srcl, *dstl; + int toxfer; + boolean_t move; { - register struct pv_entry *pv; - register pt_entry_t *pte; - int s; + pt_entry_t dstproto, opte; int bank, off; + struct pv_head *pvh; + struct pv_entry *pve, *lpve; - if ((bank = vm_physseg_find(atop(pa), &off)) == -1) - return FALSE; - pv = &vm_physmem[bank].pmseg.pvent[off]; - s = splimp(); + /* + * generate "prototype" dst PTE + */ + + if (dstl->addr < VM_MAX_ADDRESS) + dstproto = PG_u; /* "user" page */ + else + dstproto = pmap_pg_g; /* kernel page */ /* - * Check saved info first + * ensure we have dst PTP for user addresses. */ - if (vm_physmem[bank].pmseg.attrs[off] & setbits) { - splx(s); - return TRUE; - } + + if (dstl->ptp == NULL && dstl->addr < VM_MAXUSER_ADDRESS) + dstl->ptp = PHYS_TO_VM_PAGE(dstpmap->pm_pdir[pdei(dstl->addr)] & + PG_FRAME); /* - * Not found, check current mappings returning - * immediately if found. + * main loop over range */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if (*pte & setbits) { - splx(s); - return TRUE; - } + + for (/*null*/; toxfer > 0 ; toxfer--, + srcl->addr += NBPG, dstl->addr += NBPG, + srcl->pte++, dstl->pte++) { + + if (!pmap_valid_entry(*srcl->pte)) /* skip invalid entrys */ + continue; + +#ifdef DIAGNOSTIC + if (pmap_valid_entry(*dstl->pte)) + panic("pmap_transfer_ptes: attempt to overwrite " + "active entry"); +#endif + + /* + * let's not worry about non-pvlist mappings (typically device + * pager mappings). + */ + + opte = *srcl->pte; + + if ((opte & PG_PVLIST) == 0) + continue; + + /* + * if we are moving the mapping, then we can just adjust the + * current pv_entry. if we are copying the mapping, then we + * need to allocate a new pv_entry to account for it. + */ + + if (move == FALSE) { + pve = pmap_alloc_pv(dstpmap, ALLOCPV_TRY); + if (pve == NULL) + return(FALSE); /* punt! */ + } else { + pve = NULL; /* XXX: quiet gcc warning */ } + + /* + * find the pv_head for this mapping. since our mapping is + * on the pvlist (PG_PVLIST), there must be a pv_head. + */ + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_transfer_ptes: PG_PVLIST PTE and " + "no pv_head!"); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + + /* + * now lock down the pvhead and find the current entry (there + * must be one). + */ + + simple_lock(&pvh->pvh_lock); + for (lpve = pvh->pvh_list ; lpve ; lpve = lpve->pv_next) + if (lpve->pv_pmap == srcpmap && + lpve->pv_va == srcl->addr) + break; +#ifdef DIAGNOSTIC + if (lpve == NULL) + panic("pmap_transfer_ptes: PG_PVLIST PTE, but " + "entry not found"); +#endif + + /* + * update src ptp. if the ptp is null in the pventry, then + * we are not counting valid entrys for this ptp (this is only + * true for kernel PTPs). + */ + + if (srcl->ptp == NULL) + srcl->ptp = lpve->pv_ptp; +#ifdef DIAGNOSTIC + if (srcl->ptp && + (srcpmap->pm_pdir[pdei(srcl->addr)] & PG_FRAME) != + VM_PAGE_TO_PHYS(srcl->ptp)) + panic("pmap_transfer_ptes: pm_pdir - pv_ptp mismatch!"); +#endif + + /* + * for move, update the pve we just found (lpve) to + * point to its new mapping. for copy, init the new + * pve and put it in the list. + */ + + if (move == TRUE) { + pve = lpve; + } + pve->pv_pmap = dstpmap; + pve->pv_va = dstl->addr; + pve->pv_ptp = dstl->ptp; + if (move == FALSE) { /* link in copy */ + pve->pv_next = lpve->pv_next; + lpve->pv_next = pve; + } + + /* + * sync the R/M bits while we are here. + */ + + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + + /* + * now actually update the ptes and unlock the pvlist. + */ + + if (move) { + *srcl->pte = 0; /* zap! */ + if (pmap_is_curpmap(srcpmap)) + pmap_update_pg(srcl->addr); + if (srcl->ptp) + /* don't bother trying to free PTP */ + srcl->ptp->wire_count--; + srcpmap->pm_stats.resident_count--; + if (opte & PG_W) + srcpmap->pm_stats.wired_count--; + } + *dstl->pte = (opte & ~(PG_u|PG_U|PG_M|PG_G|PG_W)) | dstproto; + dstpmap->pm_stats.resident_count++; + if (dstl->ptp) + dstl->ptp->wire_count++; + simple_unlock(&pvh->pvh_lock); } - splx(s); - return FALSE; + return(TRUE); } /* - * Modify pte bits for all ptes corresponding to the given physical address. - * We use `maskbits' rather than `clearbits' because we're always passing - * constants and the latter would require an extra inversion at run-time. + * pmap_copy: copy mappings from one pmap to another + * + * => optional function + * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) + */ + +/* + * defined as macro call to pmap_transfer in pmap.h */ -void -pmap_changebit(pa, setbits, maskbits) - register vm_offset_t pa; - int setbits, maskbits; + +/* + * pmap_move: move mappings from one pmap to another + * + * => optional function + * void pmap_move(dst_pmap, src_pmap, dst_addr, len, src_addr) + */ + +/* + * defined as macro call to pmap_transfer in pmap.h + */ + +/* + * pmap_enter: enter a mapping into a pmap + * + * => must be done "now" ... no lazy-evaluation + * => we set pmap => pv_head locking + */ + +int +_pmap_enter(pmap, va, pa, prot, flags) + struct pmap *pmap; + vaddr_t va; + paddr_t pa; + vm_prot_t prot; + int flags; { - register struct pv_entry *pv; - register pt_entry_t *pte; - vm_offset_t va; - int s; - int bank, off; + pt_entry_t *ptes, opte, npte; + struct vm_page *ptp; + struct pv_head *pvh; + struct pv_entry *pve; + int bank, off, error; + boolean_t wired = (flags & PMAP_WIRED) != 0; -#ifdef DEBUG - if (pmapdebug & PDB_BITS) - printf("pmap_changebit(%x, %x, %x)", - pa, setbits, ~maskbits); +#ifdef DIAGNOSTIC + /* sanity check: totally out of range? */ + if (va >= VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: too big"); + + if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) + panic("pmap_enter: trying to map over PDP/APDP!"); + + /* sanity check: kernel PTPs should already have been pre-allocated */ + if (va >= VM_MIN_KERNEL_ADDRESS && + !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + panic("pmap_enter: missing kernel PTP!"); #endif - if ((bank = vm_physseg_find(atop(pa), &off)) == -1) - return; - pv = &vm_physmem[bank].pmseg.pvent[off]; - s = splimp(); + /* get lock */ + PMAP_MAP_TO_HEAD_LOCK(); /* - * Clear saved attributes (modify, reference) + * map in ptes and get a pointer to our PTP (unless we are the kernel) */ - if (~maskbits) - vm_physmem[bank].pmseg.attrs[off] &= maskbits; + + ptes = pmap_map_ptes(pmap); /* locks pmap */ + if (pmap == pmap_kernel()) { + ptp = NULL; + } else { + ptp = pmap_get_ptp(pmap, pdei(va), FALSE); + if (ptp == NULL) { + if (flags & PMAP_CANFAIL) { + return (KERN_RESOURCE_SHORTAGE); + } + panic("pmap_enter: get ptp failed"); + } + } + opte = ptes[i386_btop(va)]; /* old PTE */ /* - * Loop over all current mappings setting/clearing as appropos - * If setting RO do we need to clear the VAC? + * is there currently a valid mapping at our VA? */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - va = pv->pv_va; - /* - * XXX don't write protect pager mappings - */ - if ((PG_RO && setbits == PG_RO) || - (PG_RW && maskbits == ~PG_RW)) { -#if defined(UVM) - if (va >= uvm.pager_sva && va < uvm.pager_eva) - continue; -#else - extern vm_offset_t pager_sva, pager_eva; + if (pmap_valid_entry(opte)) { - if (va >= pager_sva && va < pager_eva) - continue; -#endif + /* + * first, update pm_stats. resident count will not + * change since we are replacing/changing a valid + * mapping. wired count might change... + */ + + if (wired && (opte & PG_W) == 0) + pmap->pm_stats.wired_count++; + else if (!wired && (opte & PG_W) != 0) + pmap->pm_stats.wired_count--; + + /* + * is the currently mapped PA the same as the one we + * want to map? + */ + + if ((opte & PG_FRAME) == pa) { + + /* if this is on the PVLIST, sync R/M bit */ + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(pa), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_enter: PG_PVLIST mapping " + "with unmanaged page"); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pvh = NULL; /* ensure !PG_PVLIST */ } + goto enter_now; + } - pte = pmap_pte(pv->pv_pmap, va); - *pte = (*pte & maskbits) | setbits; + /* + * changing PAs: we must remove the old one first + */ + + /* + * if current mapping is on a pvlist, + * remove it (sync R/M bits) + */ + + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_enter: PG_PVLIST mapping with " + "unmanaged page"); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + pve = pmap_remove_pv(pvh, pmap, va); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pve = NULL; } - pmap_update(); + } else { /* opte not valid */ + pve = NULL; + pmap->pm_stats.resident_count++; + if (wired) + pmap->pm_stats.wired_count++; + if (ptp) + ptp->wire_count++; /* count # of valid entrys */ } - splx(s); -} -void -pmap_prefault(map, v, l) - vm_map_t map; - vm_offset_t v; - vm_size_t l; -{ - vm_offset_t pv, pv2; + /* + * at this point pm_stats has been updated. pve is either NULL + * or points to a now-free pv_entry structure (the latter case is + * if we called pmap_remove_pv above). + * + * if this entry is to be on a pvlist, enter it now. + */ - for (pv = v; pv < v + l ; pv += ~PD_MASK + 1) { - if (!pmap_pde_v(pmap_pde(map->pmap, pv))) { - pv2 = trunc_page(vtopte(pv)); -#if defined(UVM) - uvm_fault(map, pv2, 0, VM_PROT_READ); -#else - vm_fault(map, pv2, VM_PROT_READ, FALSE); -#endif + bank = vm_physseg_find(atop(pa), &off); + if (pmap_initialized && bank != -1) { + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pve == NULL) { + pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + error = KERN_RESOURCE_SHORTAGE; + goto out; + } + panic("pmap_enter: no pv entries available"); + } } - pv &= PD_MASK; + /* lock pvh when adding */ + pmap_enter_pv(pvh, pve, pmap, va, ptp); + } else { + + /* new mapping is not PG_PVLIST. free pve if we've got one */ + pvh = NULL; /* ensure !PG_PVLIST */ + if (pve) + pmap_free_pv(pmap, pve); } + +enter_now: + /* + * at this point pvh is !NULL if we want the PG_PVLIST bit set + */ + + npte = pa | protection_codes[prot] | PG_V; + if (pvh) + npte |= PG_PVLIST; + if (wired) + npte |= PG_W; + if (va < VM_MAXUSER_ADDRESS) + npte |= PG_u; + else if (va < VM_MAX_ADDRESS) + npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ + if (pmap == pmap_kernel()) + npte |= pmap_pg_g; + + ptes[i386_btop(va)] = npte; /* zap! */ + + if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap)) + pmap_update_pg(va); + + error = KERN_SUCCESS; + +out: + pmap_unmap_ptes(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); + + return error; } -#ifdef DEBUG -void -pmap_pvdump(pa) - vm_offset_t pa; +/* + * pmap_growkernel: increase usage of KVM space + * + * => we allocate new PTPs for the kernel and install them in all + * the pmaps on the system. + */ + +vaddr_t +pmap_growkernel(maxkvaddr) + vaddr_t maxkvaddr; { - register struct pv_entry *pv; - int bank, off; + struct pmap *kpm = pmap_kernel(), *pm; + int needed_kpde; /* needed number of kernel PTPs */ + int s; + paddr_t ptaddr; - printf("pa %x", pa); - if ((bank = vm_physseg_find(atop(pa), &off)) == -1) { - printf("INVALID PA!"); - } else { - for (pv = &vm_physmem[bank].pmseg.pvent[off] ; pv ; - pv = pv->pv_next) { - printf(" -> pmap %p, va %lx", pv->pv_pmap, pv->pv_va); - pads(pv->pv_pmap); + needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1)) + / NBPD; + if (needed_kpde <= nkpde) + goto out; /* we are OK */ + + /* + * whoops! we need to add kernel PTPs + */ + + s = splhigh(); /* to be safe */ + simple_lock(&kpm->pm_obj.vmobjlock); + + for (/*null*/ ; nkpde < needed_kpde ; nkpde++) { + + if (pmap_initialized == FALSE) { + + /* + * we're growing the kernel pmap early (from + * uvm_pageboot_alloc()). this case must be + * handled a little differently. + */ + + if (uvm_page_physget(&ptaddr) == FALSE) + panic("pmap_growkernel: out of memory"); + + kpm->pm_pdir[PDSLOT_KERN + nkpde] = + ptaddr | PG_RW | PG_V; + + /* count PTP as resident */ + kpm->pm_stats.resident_count++; + continue; } - } - printf(" "); -} -#ifdef notyet -void -pmap_check_wiring(str, va) - char *str; - vm_offset_t va; -{ - vm_map_entry_t entry; - register int count, *pte; + if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) { + panic("pmap_growkernel: alloc ptp failed"); + } - va = trunc_page(va); - if (!pmap_pde_v(pmap_pde(pmap_kernel(), va)) || - !pmap_pte_v(pmap_pte(pmap_kernel(), va))) - return; + /* PG_u not for kernel */ + kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u; - if (!vm_map_lookup_entry(pt_map, va, &entry)) { - printf("wired_check: entry for %x not found\n", va); - return; + /* distribute new kernel PTP to all active pmaps */ + simple_lock(&pmaps_lock); + for (pm = pmaps.lh_first; pm != NULL; + pm = pm->pm_list.le_next) { + pm->pm_pdir[PDSLOT_KERN + nkpde] = + kpm->pm_pdir[PDSLOT_KERN + nkpde]; + } + simple_unlock(&pmaps_lock); } - count = 0; - for (pte = (int *)va; pte < (int *)(va + NBPG); pte++) - if (*pte) - count++; - if (entry->wired_count != count) - printf("*%s*: %x: w%d/a%d\n", - str, va, entry->wired_count, count); + + simple_unlock(&kpm->pm_obj.vmobjlock); + splx(s); + +out: + return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD)); } -#endif -/* print address space of pmap*/ +#ifdef DEBUG +void pmap_dump __P((struct pmap *, vaddr_t, vaddr_t)); + +/* + * pmap_dump: dump all the mappings from a pmap + * + * => caller should not be holding any pmap locks + */ + void -pads(pm) - pmap_t pm; +pmap_dump(pmap, sva, eva) + struct pmap *pmap; + vaddr_t sva, eva; { - unsigned va, i, j; - register pt_entry_t *pte; + pt_entry_t *ptes, *pte; + vaddr_t blkendva; - if (pm == pmap_kernel()) - return; - for (i = 0; i < 1024; i++) - if (pmap_pde_v(&pm->pm_pdir[i])) - for (j = 0; j < 1024 ; j++) { - va = (i << PDSHIFT) | (j << PGSHIFT); - if (pm == pmap_kernel() && - va < VM_MIN_KERNEL_ADDRESS) - continue; - if (pm != pmap_kernel() && - va > VM_MAX_ADDRESS) - continue; - pte = pmap_pte(pm, va); - if (pmap_pte_v(pte)) - printf("%x:%x ", va, *pte); - } + /* + * if end is out of range truncate. + * if (end == start) update to max. + */ + + if (eva > VM_MAXUSER_ADDRESS || eva <= sva) + eva = VM_MAXUSER_ADDRESS; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes(pmap); /* locks pmap */ + + /* + * dumping a range of pages: we dump in PTP sized blocks (4MB) + */ + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + /* valid block? */ + if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + continue; + + pte = &ptes[i386_btop(sva)]; + for (/* null */; sva < blkendva ; sva += NBPG, pte++) { + if (!pmap_valid_entry(*pte)) + continue; + printf("va %#lx -> pa %#x (pte=%#x)\n", + sva, *pte, *pte & PG_FRAME); + } + } + pmap_unmap_ptes(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); } #endif diff --git a/sys/arch/i386/i386/sys_machdep.c b/sys/arch/i386/i386/sys_machdep.c index b1fb6877cfc..5d72a697af6 100644 --- a/sys/arch/i386/i386/sys_machdep.c +++ b/sys/arch/i386/i386/sys_machdep.c @@ -94,7 +94,11 @@ i386_user_cleanup(pcb) struct pcb *pcb; { +#ifdef PMAP_NEW + ldt_free(pcb->pcb_pmap); +#else ldt_free(pcb); +#endif pcb->pcb_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); if (pcb == curpcb) lldt(pcb->pcb_ldt_sel); @@ -123,8 +127,8 @@ i386_get_ldt(p, args, retval) if ((error = copyin(args, &ua, sizeof(ua))) != 0) return (error); -#ifdef DEBUG - printf("i386_get_ldt: start=%d num=%d descs=%x\n", ua.start, +#ifdef LDTDEBUG + printf("i386_get_ldt: start=%d num=%d descs=%p\n", ua.start, ua.num, ua.desc); #endif @@ -161,16 +165,21 @@ i386_set_ldt(p, args, retval) { int error, i, n; struct pcb *pcb = &p->p_addr->u_pcb; +#ifdef PMAP_NEW + pmap_t pmap = p->p_vmspace->vm_map.pmap; +#endif int fsslot, gsslot; +#ifndef PMAP_NEW int s; +#endif struct i386_set_ldt_args ua; union descriptor desc; if ((error = copyin(args, &ua, sizeof(ua))) != 0) return (error); -#ifdef DEBUG - printf("i386_set_ldt: start=%d num=%d descs=%x\n", ua.start, +#ifdef LDT_DEBUG + printf("i386_set_ldt: start=%d num=%d descs=%p\n", ua.start, ua.num, ua.desc); #endif @@ -179,22 +188,46 @@ i386_set_ldt(p, args, retval) if (ua.start > 8192 || (ua.start + ua.num) > 8192) return (EINVAL); + /* + * XXX LOCKING + */ + /* allocate user ldt */ +#ifdef PMAP_NEW + if (pmap->pm_ldt == 0 || (ua.start + ua.num) > pmap->pm_ldt_len) { +#else if (pcb->pcb_ldt == 0 || (ua.start + ua.num) > pcb->pcb_ldt_len) { +#endif size_t old_len, new_len; union descriptor *old_ldt, *new_ldt; +#ifdef PMAP_NEW + if (pmap->pm_flags & PMF_USER_LDT) { + old_len = pmap->pm_ldt_len * sizeof(union descriptor); + old_ldt = pmap->pm_ldt; +#else if (pcb->pcb_flags & PCB_USER_LDT) { old_len = pcb->pcb_ldt_len * sizeof(union descriptor); old_ldt = pcb->pcb_ldt; +#endif } else { old_len = NLDT * sizeof(union descriptor); old_ldt = ldt; +#ifdef PMAP_NEW + pmap->pm_ldt_len = 512; +#else pcb->pcb_ldt_len = 512; +#endif } +#ifdef PMAP_NEW + while ((ua.start + ua.num) > pmap->pm_ldt_len) + pmap->pm_ldt_len *= 2; + new_len = pmap->pm_ldt_len * sizeof(union descriptor); +#else while ((ua.start + ua.num) > pcb->pcb_ldt_len) pcb->pcb_ldt_len *= 2; new_len = pcb->pcb_ldt_len * sizeof(union descriptor); +#endif #if defined(UVM) new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, new_len); #else @@ -202,6 +235,16 @@ i386_set_ldt(p, args, retval) #endif bcopy(old_ldt, new_ldt, old_len); bzero((caddr_t)new_ldt + old_len, new_len - old_len); +#ifdef PMAP_NEW + pmap->pm_ldt = new_ldt; + + if (pmap->pm_flags & PCB_USER_LDT) + ldt_free(pmap); + else + pmap->pm_flags |= PCB_USER_LDT; + ldt_alloc(pmap, new_ldt, new_len); + pcb->pcb_ldt_sel = pmap->pm_ldt_sel; +#else pcb->pcb_ldt = new_ldt; if (pcb->pcb_flags & PCB_USER_LDT) @@ -209,17 +252,24 @@ i386_set_ldt(p, args, retval) else pcb->pcb_flags |= PCB_USER_LDT; ldt_alloc(pcb, new_ldt, new_len); +#endif if (pcb == curpcb) lldt(pcb->pcb_ldt_sel); + /* + * XXX Need to notify other processors which may be + * XXX currently using this pmap that they need to + * XXX re-load the LDT. + */ + if (old_ldt != ldt) #if defined(UVM) uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len); #else - kmem_free(kernel_map, (vm_offset_t)old_ldt, old_len); + kmem_free(kernel_map, (vaddr_t)old_ldt, old_len); #endif -#ifdef DEBUG - printf("i386_set_ldt(%d): new_ldt=%x\n", p->p_pid, new_ldt); +#ifdef LDT_DEBUG + printf("i386_set_ldt(%d): new_ldt=%p\n", p->p_pid, new_ldt); #endif } @@ -292,20 +342,28 @@ i386_set_ldt(p, args, retval) } } +#ifndef PMAP_NEW s = splhigh(); +#endif /* Now actually replace the descriptors. */ for (i = 0, n = ua.start; i < ua.num; i++, n++) { if ((error = copyin(&ua.desc[i], &desc, sizeof(desc))) != 0) goto out; +#ifdef PMAP_NEW + pmap->pm_ldt[n] = desc; +#else pcb->pcb_ldt[n] = desc; +#endif } *retval = ua.start; out: +#ifndef PMAP_NEW splx(s); +#endif return (error); } #endif /* USER_LDT */ diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c index c2ae880fa02..c89e50ca3a3 100644 --- a/sys/arch/i386/i386/trap.c +++ b/sys/arch/i386/i386/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.35 2001/01/24 09:37:59 hugh Exp $ */ +/* $OpenBSD: trap.c,v 1.36 2001/03/22 23:36:51 niklas Exp $ */ /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */ #undef DEBUG @@ -445,7 +445,10 @@ trap(frame) register vm_map_t map; int rv; extern vm_map_t kernel_map; - unsigned nss, v; + unsigned nss; +#ifndef PMAP_NEW + unsigned v; +#endif if (vm == NULL) goto we_re_toast; @@ -481,6 +484,7 @@ trap(frame) } } +#ifndef PMAP_NEW /* check if page table is mapped, if not, fault it first */ if ((PTD[pdei(va)] & PG_V) == 0) { v = trunc_page(vtopte(va)); @@ -499,6 +503,7 @@ trap(frame) #endif } else v = 0; +#endif #if defined(UVM) rv = uvm_fault(map, va, 0, ftype); diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c index 38c1141a9d7..36afc7340c4 100644 --- a/sys/arch/i386/i386/vm_machdep.c +++ b/sys/arch/i386/i386/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.20 2001/02/08 00:46:35 mickey Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.21 2001/03/22 23:36:51 niklas Exp $ */ /* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */ /*- @@ -114,21 +114,29 @@ cpu_fork(p1, p2, stack, stacksize) /* Sync curpcb (which is presumably p1's PCB) and copy it to p2. */ savectx(curpcb); *pcb = p1->p_addr->u_pcb; +#ifndef PMAP_NEW pmap_activate(p2); - +#endif /* * Preset these so that gdt_compact() doesn't get confused if called * during the allocations below. */ pcb->pcb_tss_sel = GSEL(GNULL_SEL, SEL_KPL); +#ifndef PMAP_NEW pcb->pcb_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); +#else + /* + * Activate the addres space. Note this will refresh pcb_ldt_sel. + */ + pmap_activate(p2); +#endif /* Fix up the TSS. */ pcb->pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); pcb->pcb_tss.tss_esp0 = (int)p2->p_addr + USPACE - 16; tss_alloc(pcb); -#ifdef USER_LDT +#if defined(USER_LDT) && !defined(PMAP_NEW) /* Copy the LDT, if necessary. */ if (pcb->pcb_flags & PCB_USER_LDT) { size_t len; @@ -228,10 +236,17 @@ cpu_wait(p) struct pcb *pcb; pcb = &p->p_addr->u_pcb; +#ifndef PMAP_NEW #ifdef USER_LDT if (pcb->pcb_flags & PCB_USER_LDT) i386_user_cleanup(pcb); #endif +#else + /* + * No need to do user LDT cleanup here; it's handled in + * pmap_destroy(). + */ +#endif tss_free(pcb); } @@ -375,8 +390,12 @@ vmapbuf(bp, len) vm_size_t len; { vm_offset_t faddr, taddr, off; +#ifdef PMAP_NEW + paddr_t fpa; +#else pt_entry_t *fpte, *tpte; pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)); +#endif if ((bp->b_flags & B_PHYS) == 0) panic("vmapbuf"); @@ -389,16 +408,42 @@ vmapbuf(bp, len) taddr = kmem_alloc_wait(phys_map, len); #endif bp->b_data = (caddr_t)(taddr + off); +#ifdef PMAP_NEW /* * The region is locked, so we expect that pmap_pte() will return * non-NULL. + * XXX: unwise to expect this in a multithreaded environment. + * anything can happen to a pmap between the time we lock a + * region, release the pmap lock, and then relock it for + * the pmap_extract(). + * + * no need to flush TLB since we expect nothing to be mapped + * where we we just allocated (TLB will be flushed when our + * mapping is removed). */ - fpte = pmap_pte(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), faddr); - tpte = pmap_pte(vm_map_pmap(phys_map), taddr); - do { - *tpte++ = *fpte++; + while (len) { + fpa = pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), + faddr); + pmap_enter(vm_map_pmap(phys_map), taddr, fpa, + VM_PROT_READ | VM_PROT_WRITE, TRUE, + VM_PROT_READ | VM_PROT_WRITE); + faddr += PAGE_SIZE; + taddr += PAGE_SIZE; len -= PAGE_SIZE; - } while (len); + } +#else + /* + * The region is locked, so we expect that pmap_pte() will return + * non-NULL. + */ + fpte = pmap_pte(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map), faddr); + tpte = pmap_pte(vm_map_pmap(phys_map), taddr); + do { + *tpte++ = *fpte++; + len -= PAGE_SIZE; + } while (len); +#endif + } /* diff --git a/sys/arch/i386/include/gdt.h b/sys/arch/i386/include/gdt.h index 2ec88669840..46d52f88b32 100644 --- a/sys/arch/i386/include/gdt.h +++ b/sys/arch/i386/include/gdt.h @@ -1,4 +1,4 @@ -/* $OpenBSD: gdt.h,v 1.5 1997/11/11 22:53:40 deraadt Exp $ */ +/* $OpenBSD: gdt.h,v 1.6 2001/03/22 23:36:52 niklas Exp $ */ /* $NetBSD: gdt.h,v 1.3 1996/02/27 22:32:11 jtc Exp $ */ /*- @@ -40,6 +40,11 @@ #ifdef _KERNEL void tss_alloc __P((struct pcb *)); void tss_free __P((struct pcb *)); +#ifdef PMAP_NEW +void ldt_alloc __P((struct pmap *, union descriptor *, size_t)); +void ldt_free __P((struct pmap *)); +#else void ldt_alloc __P((struct pcb *, union descriptor *, size_t)); void ldt_free __P((struct pcb *)); #endif +#endif diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h index 6342b19d615..7ab2d4acb26 100644 --- a/sys/arch/i386/include/pcb.h +++ b/sys/arch/i386/include/pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pcb.h,v 1.5 2000/08/05 22:07:32 niklas Exp $ */ +/* $OpenBSD: pcb.h,v 1.6 2001/03/22 23:36:52 niklas Exp $ */ /* $NetBSD: pcb.h,v 1.21 1996/01/08 13:51:42 mycroft Exp $ */ /*- @@ -79,6 +79,9 @@ struct pcb { int vm86_eflags; /* virtual eflags for vm86 mode */ int vm86_flagmask; /* flag mask for vm86 mode */ void *vm86_userp; /* XXX performance hack */ +#ifdef PMAP_NEW + struct pmap *pcb_pmap; /* back pointer to our pmap */ +#endif u_long pcb_iomap[NIOPORTS/32]; /* I/O bitmap */ u_char pcb_iomap_pad; /* required; must be 0xff, says intel */ }; diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index fdc7cbf9698..de770cb59a6 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,210 +1,5 @@ -/* $OpenBSD: pmap.h,v 1.12 1999/09/20 02:47:43 deraadt Exp $ */ -/* $NetBSD: pmap.h,v 1.23 1996/05/03 19:26:30 christos Exp $ */ - -/* - * Copyright (c) 1995 Charles M. Hannum. All rights reserved. - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * the Systems Programming Group of the University of Utah Computer - * Science Department and William Jolitz of UUNET Technologies Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)pmap.h 7.4 (Berkeley) 5/12/91 - */ - -/* - * Derived from hp300 version by Mike Hibler, this version by William - * Jolitz uses a recursive map [a pde points to the page directory] to - * map the page tables using the pagetables themselves. This is done to - * reduce the impact on kernel virtual memory for lots of sparse address - * space, and to reduce the cost of memory to each process. - * - * from hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 - */ - -#ifndef _I386_PMAP_H_ -#define _I386_PMAP_H_ - -#include <machine/cpufunc.h> -#include <machine/pte.h> - -/* - * 386 page table entry and page table directory - * W.Jolitz, 8/89 - */ - -/* - * One page directory, shared between - * kernel and user modes. - */ -#define KPTDI (KERNBASE>>22) /* start of kernel virtual pde's */ -#define PTDPTDI (KPTDI-1) /* ptd entry that points to ptd! */ -#define APTDPTDI 0x3ff /* start of alternate page directory */ -#define MAXKPDE (APTDPTDI-KPTDI) -#ifndef NKPDE /* permit config file override */ -#define NKPDE 127 /* # to static alloc */ +#ifdef PMAP_NEW +#include <machine/pmap.new.h> +#else +#include <machine/pmap.old.h> #endif - -/* - * Address of current and alternate address space page table maps - * and directories. - */ -#ifdef _KERNEL -extern pt_entry_t PTmap[], APTmap[], Upte; -extern pd_entry_t PTD[], APTD[], PTDpde, APTDpde, Upde; -extern pt_entry_t *Sysmap; - -extern int PTDpaddr; /* physical address of kernel PTD */ - -void pmap_bootstrap __P((vm_offset_t start)); -boolean_t pmap_testbit __P((vm_offset_t, int)); -void pmap_changebit __P((vm_offset_t, int, int)); -void pmap_prefault __P((vm_map_t, vm_offset_t, vm_size_t)); -#endif - -/* - * virtual address to page table entry and - * to physical address. Likewise for alternate address space. - * Note: these work recursively, thus vtopte of a pte will give - * the corresponding pde that in turn maps it. - */ -#define vtopte(va) (PTmap + i386_btop(va)) -#define kvtopte(va) vtopte(va) -#define ptetov(pt) (i386_ptob(pt - PTmap)) -#define vtophys(va) \ - ((*vtopte(va) & PG_FRAME) | ((unsigned)(va) & ~PG_FRAME)) - -#define avtopte(va) (APTmap + i386_btop(va)) -#define ptetoav(pt) (i386_ptob(pt - APTmap)) -#define avtophys(va) \ - ((*avtopte(va) & PG_FRAME) | ((unsigned)(va) & ~PG_FRAME)) - -/* - * macros to generate page directory/table indicies - */ -#define pdei(va) (((va) & PD_MASK) >> PDSHIFT) -#define ptei(va) (((va) & PT_MASK) >> PGSHIFT) - -/* - * Pmap stuff - */ -typedef struct pmap { - pd_entry_t *pm_pdir; /* KVA of page directory */ - boolean_t pm_pdchanged; /* pdir changed */ - short pm_dref; /* page directory ref count */ - short pm_count; /* pmap reference count */ - simple_lock_data_t pm_lock; /* lock on pmap */ - struct pmap_statistics pm_stats; /* pmap statistics */ - long pm_ptpages; /* more stats: PT pages */ -} *pmap_t; - -/* - * For each vm_page_t, there is a list of all currently valid virtual - * mappings of that page. An entry is a pv_entry, the list is pv_table. - */ -struct pv_entry { - struct pv_entry *pv_next; /* next pv_entry */ - pmap_t pv_pmap; /* pmap where mapping lies */ - vm_offset_t pv_va; /* virtual address for mapping */ -}; - -struct pv_page; - -struct pv_page_info { - TAILQ_ENTRY(pv_page) pgi_list; - struct pv_entry *pgi_freelist; - int pgi_nfree; -}; - -/* - * This is basically: - * ((NBPG - sizeof(struct pv_page_info)) / sizeof(struct pv_entry)) - */ -#define NPVPPG 340 - -struct pv_page { - struct pv_page_info pvp_pgi; - struct pv_entry pvp_pv[NPVPPG]; -}; - -#ifdef _KERNEL -extern struct pmap kernel_pmap_store; - -#define pmap_kernel() (&kernel_pmap_store) -#define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) -#define pmap_update() tlbflush() - -vm_offset_t reserve_dumppages __P((vm_offset_t)); - -static __inline void -pmap_clear_modify(vm_offset_t pa) -{ - pmap_changebit(pa, 0, ~PG_M); -} - -static __inline void -pmap_clear_reference(vm_offset_t pa) -{ - pmap_changebit(pa, 0, ~PG_U); -} - -static __inline void -pmap_copy_on_write(vm_offset_t pa) -{ - pmap_changebit(pa, PG_RO, ~PG_RW); -} - -static __inline boolean_t -pmap_is_modified(vm_offset_t pa) -{ - return pmap_testbit(pa, PG_M); -} - -static __inline boolean_t -pmap_is_referenced(vm_offset_t pa) -{ - return pmap_testbit(pa, PG_U); -} - -static __inline vm_offset_t -pmap_phys_address(int ppn) -{ - return i386_ptob(ppn); -} - -void pmap_activate __P((struct proc *)); -void pmap_deactivate __P((struct proc *)); -vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int)); - -#endif /* _KERNEL */ - -#endif /* _I386_PMAP_H_ */ diff --git a/sys/arch/i386/include/pmap.new.h b/sys/arch/i386/include/pmap.new.h new file mode 100644 index 00000000000..712d418dd33 --- /dev/null +++ b/sys/arch/i386/include/pmap.new.h @@ -0,0 +1,509 @@ +/* $OpenBSD: pmap.new.h,v 1.1 2001/03/22 23:36:52 niklas Exp $ */ +/* $NetBSD: pmap.h,v 1.43 2000/02/11 07:00:13 thorpej Exp $ */ + +/* + * + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * pmap.h: see pmap.c for the history of this pmap module. + */ + +#ifndef _I386_PMAP_H_ +#define _I386_PMAP_H_ + +#if defined(_KERNEL) && !defined(_LKM) && defined(__NetBSD__) +#include "opt_user_ldt.h" +#endif + +#include <machine/cpufunc.h> +#include <machine/pte.h> +#include <machine/segments.h> +#include <vm/pglist.h> +#include <uvm/uvm_object.h> + +/* + * see pte.h for a description of i386 MMU terminology and hardware + * interface. + * + * a pmap describes a processes' 4GB virtual address space. this + * virtual address space can be broken up into 1024 4MB regions which + * are described by PDEs in the PDP. the PDEs are defined as follows: + * + * (ranges are inclusive -> exclusive, just like vm_map_entry start/end) + * (the following assumes that KERNBASE is 0xc0000000) + * + * PDE#s VA range usage + * 0->767 0x0 -> 0xbfc00000 user address space, note that the + * max user address is 0xbfbfe000 + * the final two pages in the last 4MB + * used to be reserved for the UAREA + * but now are no longer used + * 768 0xbfc00000-> recursive mapping of PDP (used for + * 0xc0000000 linear mapping of PTPs) + * 768->1023 0xc0000000-> kernel address space (constant + * 0xffc00000 across all pmap's/processes) + * 1023 0xffc00000-> "alternate" recursive PDP mapping + * <end> (for other pmaps) + * + * + * note: a recursive PDP mapping provides a way to map all the PTEs for + * a 4GB address space into a linear chunk of virtual memory. in other + * words, the PTE for page 0 is the first int mapped into the 4MB recursive + * area. the PTE for page 1 is the second int. the very last int in the + * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB + * address). + * + * all pmap's PD's must have the same values in slots 768->1023 so that + * the kernel is always mapped in every process. these values are loaded + * into the PD at pmap creation time. + * + * at any one time only one pmap can be active on a processor. this is + * the pmap whose PDP is pointed to by processor register %cr3. this pmap + * will have all its PTEs mapped into memory at the recursive mapping + * point (slot #767 as show above). when the pmap code wants to find the + * PTE for a virtual address, all it has to do is the following: + * + * address of PTE = (767 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t) + * = 0xbfc00000 + (VA / 4096) * 4 + * + * what happens if the pmap layer is asked to perform an operation + * on a pmap that is not the one which is currently active? in that + * case we take the PA of the PDP of non-active pmap and put it in + * slot 1023 of the active pmap. this causes the non-active pmap's + * PTEs to get mapped in the final 4MB of the 4GB address space + * (e.g. starting at 0xffc00000). + * + * the following figure shows the effects of the recursive PDP mapping: + * + * PDP (%cr3) + * +----+ + * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 + * | | + * | | + * | 767| -> points back to PDP (%cr3) mapping VA 0xbfc00000 -> 0xc0000000 + * | 768| -> first kernel PTP (maps 0xc0000000 -> 0xf0400000) + * | | + * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) + * +----+ + * + * note that the PDE#767 VA (0xbfc00000) is defined as "PTE_BASE" + * note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE" + * + * starting at VA 0xbfc00000 the current active PDP (%cr3) acts as a + * PTP: + * + * PTP#767 == PDP(%cr3) => maps VA 0xbfc00000 -> 0xc0000000 + * +----+ + * | 0| -> maps the contents of PTP#0 at VA 0xbfc00000->0xbfc01000 + * | | + * | | + * | 767| -> maps contents of PTP#767 (the PDP) at VA 0xbffbf000 + * | 768| -> maps contents of first kernel PTP + * | | + * |1023| + * +----+ + * + * note that mapping of the PDP at PTP#959's VA (0xeffbf000) is + * defined as "PDP_BASE".... within that mapping there are two + * defines: + * "PDP_PDE" (0xeffbfefc) is the VA of the PDE in the PDP + * which points back to itself. + * "APDP_PDE" (0xeffbfffc) is the VA of the PDE in the PDP which + * establishes the recursive mapping of the alternate pmap. + * to set the alternate PDP, one just has to put the correct + * PA info in *APDP_PDE. + * + * note that in the APTE_BASE space, the APDP appears at VA + * "APDP_BASE" (0xfffff000). + */ + +/* + * the following defines identify the slots used as described above. + */ + +#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 767: for recursive PDP map */ +#define PDSLOT_KERN (KERNBASE/NBPD) /* 768: start of kernel space */ +#define PDSLOT_APTE ((unsigned)1023) /* 1023: alternative recursive slot */ + +/* + * the following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ + +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) +#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) +#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) + +/* + * XXXCDC: tmp xlate from old names: + * PTDPTDI -> PDSLOT_PTE + * KPTDI -> PDSLOT_KERN + * APTDPTDI -> PDSLOT_APTE + */ + +/* + * the follow define determines how many PTPs should be set up for the + * kernel by locore.s at boot time. this should be large enough to + * get the VM system running. once the VM system is running, the + * pmap module can add more PTPs to the kernel area on demand. + */ + +#ifndef NKPTP +#define NKPTP 4 /* 16MB to start */ +#endif +#define NKPTP_MIN 4 /* smallest value we allow */ +#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1) + /* largest value (-1 for APTP space) */ + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * kvtopte: same as above (takes a KVA, but doesn't matter with this pmap) + * ptetov: given a pointer to a PTE, return the VA that it maps + * vtophys: translate a VA to the PA mapped to it + * + * plus alternative versions of the above + */ + +#define vtopte(VA) (PTE_BASE + i386_btop(VA)) +#define kvtopte(VA) vtopte(VA) +#define ptetov(PT) (i386_ptob(PT - PTE_BASE)) +#define vtophys(VA) ((*vtopte(VA) & PG_FRAME) | \ + ((unsigned)(VA) & ~PG_FRAME)) +#define avtopte(VA) (APTE_BASE + i386_btop(VA)) +#define ptetoav(PT) (i386_ptob(PT - APTE_BASE)) +#define avtophys(VA) ((*avtopte(VA) & PG_FRAME) | \ + ((unsigned)(VA) & ~PG_FRAME)) + +/* + * pdei/ptei: generate index into PDP/PTP from a VA + */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * PTP macros: + * a PTP's index is the PD index of the PDE that points to it + * a PTP's offset is the byte-offset in the PTE space that this PTP is at + * a PTP's VA is the first VA mapped by that PTP + * + * note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +/* + * PG_AVAIL usage: we make use of the ignored bits of the PTE + */ + +#define PG_W PG_AVAIL1 /* "wired" mapping */ +#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ +/* PG_AVAIL3 not used */ + +#ifdef _KERNEL +/* + * pmap data structures: see pmap.c for details of locking. + */ + +struct pmap; +typedef struct pmap *pmap_t; + +/* + * we maintain a list of all non-kernel pmaps + */ + +LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ + +/* + * the pmap structure + * + * note that the pm_obj contains the simple_lock, the reference count, + * page list, and number of PTPs within the pmap. + */ + +struct pmap { + struct uvm_object pm_obj; /* object (lck by object lock) */ +#define pm_lock pm_obj.vmobjlock + LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ + pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ + u_int32_t pm_pdirpa; /* PA of PD (read-only after create) */ + struct vm_page *pm_ptphint; /* pointer to a PTP in our pmap */ + struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ + + int pm_flags; /* see below */ + + union descriptor *pm_ldt; /* user-set LDT */ + int pm_ldt_len; /* number of LDT entries */ + int pm_ldt_sel; /* LDT selector */ +}; + +/* pm_flags */ +#define PMF_USER_LDT 0x01 /* pmap has user-set LDT */ + +/* + * for each managed physical page we maintain a list of <PMAP,VA>'s + * which it is mapped at. the list is headed by a pv_head structure. + * there is one pv_head per managed phys page (allocated at boot time). + * the pv_head structure points to a list of pv_entry structures (each + * describes one mapping). + */ + +struct pv_entry; + +struct pv_head { + simple_lock_data_t pvh_lock; /* locks every pv on this list */ + struct pv_entry *pvh_list; /* head of list (locked by pvh_lock) */ +}; + +struct pv_entry { /* locked by its list's pvh_lock */ + struct pv_entry *pv_next; /* next entry */ + struct pmap *pv_pmap; /* the pmap */ + vaddr_t pv_va; /* the virtual address */ + struct vm_page *pv_ptp; /* the vm_page of the PTP */ +}; + +/* + * pv_entrys are dynamically allocated in chunks from a single page. + * we keep track of how many pv_entrys are in use for each page and + * we can free pv_entry pages if needed. there is one lock for the + * entire allocation system. + */ + +struct pv_page_info { + TAILQ_ENTRY(pv_page) pvpi_list; + struct pv_entry *pvpi_pvfree; + int pvpi_nfree; +}; + +/* + * number of pv_entry's in a pv_page + * (note: won't work on systems where NPBG isn't a constant) + */ + +#define PVE_PER_PVPAGE ((NBPG - sizeof(struct pv_page_info)) / \ + sizeof(struct pv_entry)) + +/* + * a pv_page: where pv_entrys are allocated from + */ + +struct pv_page { + struct pv_page_info pvinfo; + struct pv_entry pvents[PVE_PER_PVPAGE]; +}; + +/* + * pmap_remove_record: a record of VAs that have been unmapped, used to + * flush TLB. if we have more than PMAP_RR_MAX then we stop recording. + */ + +#define PMAP_RR_MAX 16 /* max of 16 pages (64K) */ + +struct pmap_remove_record { + int prr_npages; + vaddr_t prr_vas[PMAP_RR_MAX]; +}; + +/* + * pmap_transfer_location: used to pass the current location in the + * pmap between pmap_transfer and pmap_transfer_ptes [e.g. during + * a pmap_copy]. + */ + +struct pmap_transfer_location { + vaddr_t addr; /* the address (page-aligned) */ + pt_entry_t *pte; /* the PTE that maps address */ + struct vm_page *ptp; /* the PTP that the PTE lives in */ +}; + +/* + * global kernel variables + */ + +extern pd_entry_t PTD[]; + +/* PTDpaddr: is the physical address of the kernel's PDP */ +extern u_long PTDpaddr; + +extern struct pmap kernel_pmap_store; /* kernel pmap */ +extern int nkpde; /* current # of PDEs for kernel */ +extern int pmap_pg_g; /* do we support PG_G? */ + +/* + * macros + */ + +#define pmap_kernel() (&kernel_pmap_store) +#define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) +#define pmap_update() tlbflush() + +#define pmap_clear_modify(pg) pmap_change_attrs(pg, 0, PG_M) +#define pmap_clear_reference(pg) pmap_change_attrs(pg, 0, PG_U) +#define pmap_copy(DP,SP,D,L,S) pmap_transfer(DP,SP,D,L,S, FALSE) +#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) +#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) +#define pmap_move(DP,SP,D,L,S) pmap_transfer(DP,SP,D,L,S, TRUE) +#define pmap_phys_address(ppn) i386_ptob(ppn) +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ + + +/* + * prototypes + */ + +void pmap_activate __P((struct proc *)); +void pmap_bootstrap __P((vaddr_t)); +boolean_t pmap_change_attrs __P((struct vm_page *, int, int)); +void pmap_deactivate __P((struct proc *)); +static void pmap_page_protect __P((struct vm_page *, vm_prot_t)); +void pmap_page_remove __P((struct vm_page *)); +static void pmap_protect __P((struct pmap *, vaddr_t, + vaddr_t, vm_prot_t)); +void pmap_remove __P((struct pmap *, vaddr_t, vaddr_t)); +boolean_t pmap_test_attrs __P((struct vm_page *, int)); +void pmap_transfer __P((struct pmap *, struct pmap *, vaddr_t, + vsize_t, vaddr_t, boolean_t)); +static void pmap_update_pg __P((vaddr_t)); +static void pmap_update_2pg __P((vaddr_t,vaddr_t)); +void pmap_write_protect __P((struct pmap *, vaddr_t, + vaddr_t, vm_prot_t)); + +vaddr_t reserve_dumppages __P((vaddr_t)); /* XXX: not a pmap fn */ + +#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ + +/* + * inline functions + */ + +/* + * pmap_update_pg: flush one page from the TLB (or flush the whole thing + * if hardware doesn't support one-page flushing) + */ + +__inline static void +pmap_update_pg(va) + vaddr_t va; +{ +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + pmap_update(); + else +#endif + invlpg((u_int) va); +} + +/* + * pmap_update_2pg: flush two pages from the TLB + */ + +__inline static void +pmap_update_2pg(va, vb) + vaddr_t va, vb; +{ +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + pmap_update(); + else +#endif + { + invlpg((u_int) va); + invlpg((u_int) vb); + } +} + +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => this function is a frontend for pmap_page_remove/pmap_change_attrs + * => we only have to worry about making the page more protected. + * unprotecting a page is done on-demand at fault time. + */ + +__inline static void +pmap_page_protect(pg, prot) + struct vm_page *pg; + vm_prot_t prot; +{ + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { + (void) pmap_change_attrs(pg, PG_RO, PG_RW); + } else { + pmap_page_remove(pg); + } + } +} + +/* + * pmap_protect: change the protection of pages in a pmap + * + * => this function is a frontend for pmap_remove/pmap_write_protect + * => we only have to worry about making the page more protected. + * unprotecting a page is done on-demand at fault time. + */ + +__inline static void +pmap_protect(pmap, sva, eva, prot) + struct pmap *pmap; + vaddr_t sva, eva; + vm_prot_t prot; +{ + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { + pmap_write_protect(pmap, sva, eva, prot); + } else { + pmap_remove(pmap, sva, eva); + } + } +} + +vaddr_t pmap_map __P((vaddr_t, paddr_t, paddr_t, vm_prot_t)); + +#if defined(USER_LDT) +void pmap_ldt_cleanup __P((struct proc *)); +#define PMAP_FORK +#endif /* USER_LDT */ + +#endif /* _KERNEL */ +#endif /* _I386_PMAP_H_ */ diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h index 4f3dc049f8c..a8dd350ef77 100644 --- a/sys/arch/i386/include/vmparam.h +++ b/sys/arch/i386/include/vmparam.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmparam.h,v 1.15 2000/04/25 23:10:31 niklas Exp $ */ +/* $OpenBSD: vmparam.h,v 1.16 2001/03/22 23:36:52 niklas Exp $ */ /* $NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $ */ /*- @@ -130,6 +130,12 @@ * Mach derived constants */ +/* XXX Compatibility */ +#ifdef PMAP_NEW +#define APTDPTDI PDSLOT_APTE +#define PTDPTDI PDSLOT_PTE +#endif + /* user/kernel map constants */ #define VM_MIN_ADDRESS ((vm_offset_t)0) #define VM_MAXUSER_ADDRESS ((vm_offset_t)((PTDPTDI<<PDSHIFT) - USPACE)) diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index cf8a9345b9e..3d3b75b82bc 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_fault.c,v 1.9 2001/03/22 03:05:55 smart Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.10 2001/03/22 23:36:52 niklas Exp $ */ /* $NetBSD: uvm_fault.c,v 1.35 1999/06/16 18:43:28 thorpej Exp $ */ /* @@ -844,8 +844,8 @@ ReFault: uvmexp.fltnamap++; pmap_enter(ufi.orig_map->pmap, currva, VM_PAGE_TO_PHYS(anon->u.an_page), - (anon->an_ref > 1) ? (enter_prot & ~VM_PROT_WRITE) : - enter_prot, + (anon->an_ref > 1) ? + (enter_prot & ~VM_PROT_WRITE) : enter_prot, VM_MAPENT_ISWIRED(ufi.entry), 0); } simple_unlock(&anon->an_lock); @@ -1725,12 +1725,14 @@ uvm_fault_wire(map, start, end, access_type) pmap = vm_map_pmap(map); +#ifndef PMAP_NEW /* * call pmap pageable: this tells the pmap layer to lock down these * page tables. */ pmap_pageable(pmap, start, end, FALSE); +#endif /* * now fault it in page at a time. if the fault fails then we have @@ -1785,7 +1787,9 @@ uvm_fault_unwire(map, start, end) if (pa == (paddr_t) 0) { panic("uvm_fault_unwire: unwiring non-wired memory"); } + pmap_change_wiring(pmap, va, FALSE); /* tell the pmap */ + pg = PHYS_TO_VM_PAGE(pa); if (pg) uvm_pageunwire(pg); @@ -1793,11 +1797,12 @@ uvm_fault_unwire(map, start, end) uvm_unlock_pageq(); +#ifndef PMAP_NEW /* * now we call pmap_pageable to let the pmap know that the page tables * in this space no longer need to be wired. */ pmap_pageable(pmap, start, end, TRUE); - +#endif } diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index f98c1f2281f..8881998d2f0 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.13 2000/03/13 16:05:24 art Exp $ */ +/* $OpenBSD: pmap.h,v 1.14 2001/03/22 23:36:52 niklas Exp $ */ /* $NetBSD: pmap.h,v 1.16 1996/03/31 22:15:32 pk Exp $ */ /* @@ -107,6 +107,16 @@ typedef struct pmap_statistics *pmap_statistics_t; #define PMAP_PGARG(PG) (VM_PAGE_TO_PHYS(PG)) #endif +#ifdef PMAP_NEW +/* + * Flags passed to pmap_enter(). Note the bottom 3 bits are VM_PROT_* + * bits, used to indicate the access type that was made (to seed modified + * and referenced information). + */ +#define PMAP_WIRED 0x00000010 /* wired mapping */ +#define PMAP_CANFAIL 0x00000020 /* can fail if resource shortage */ +#endif + #ifndef PMAP_EXCLUDE_DECLS /* Used in Sparc port to virtualize pmap mod */ #ifdef _KERNEL __BEGIN_DECLS @@ -114,6 +124,9 @@ void *pmap_bootstrap_alloc __P((int)); void pmap_change_wiring __P((pmap_t, vaddr_t, boolean_t)); #if defined(PMAP_NEW) +#if 0 +void pmap_unwire __P((pmap_t, vaddr_t)); +#endif #if !defined(pmap_clear_modify) boolean_t pmap_clear_modify __P((struct vm_page *)); #endif @@ -134,8 +147,20 @@ struct pmap *pmap_create __P((void)); pmap_t pmap_create __P((vsize_t)); #endif void pmap_destroy __P((pmap_t)); +#ifdef PMAP_NEW +#ifdef notyet +int pmap_enter __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, int)); +boolean_t pmap_extract __P((pmap_t, vaddr_t, paddr_t *)); +#else +int _pmap_enter __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, int)); +#define pmap_enter(pmap, va, pa, prot, wired, access_type) \ + (_pmap_enter((pmap), (va), (pa), (prot), ((wired) ? PMAP_WIRED : 0))) +boolean_t _pmap_extract __P((pmap_t, vaddr_t, paddr_t *)); +#endif +#else void pmap_enter __P((pmap_t, vaddr_t, paddr_t, vm_prot_t, boolean_t, vm_prot_t)); +#endif paddr_t pmap_extract __P((pmap_t, vaddr_t)); #if defined(PMAP_NEW) && defined(PMAP_GROWKERNEL) vaddr_t pmap_growkernel __P((vaddr_t)); @@ -197,6 +222,10 @@ vaddr_t pmap_steal_memory __P((vsize_t, paddr_t *, paddr_t *)); #else void pmap_virtual_space __P((vaddr_t *, vaddr_t *)); #endif + +#if defined(PMAP_FORK) +void pmap_fork __P((pmap_t, pmap_t)); +#endif __END_DECLS #endif /* kernel*/ #endif /* PMAP_EXCLUDE_DECLS */ |