diff options
author | Michael Shalayeff <mickey@cvs.openbsd.org> | 2006-04-27 15:37:56 +0000 |
---|---|---|
committer | Michael Shalayeff <mickey@cvs.openbsd.org> | 2006-04-27 15:37:56 +0000 |
commit | 35b8d554a33affaf6b27876352c24652d73dab77 (patch) | |
tree | 96c478ed97d3ba2b010c6e8672e65eadd0de8127 /sys | |
parent | 11dad837ff643b7f59fbcfae411104a49ab48410 (diff) |
implement separate PAE pmap that allows access to 64g of physmem
if supported by the cpu(s). currently not enabled by default and
not compiled into ramdisks. this grows paddr_t to 64bit but yet
leaves bus_addr_t at 32bits. measures are taken to favour dmaable
memory allocation from below 4g line such that buffer cache is
already allocated form below, pool backend allocator prefers lower
memory and then finally bounce buffers are used as last resort.
PAE is engaged only if global variable cpu_pae is manually set
to non-zero and there is physical memory present above 4g.
simplify pcibios address math to use u_long as we always will
be in the 32bit space.
Diffstat (limited to 'sys')
27 files changed, 3421 insertions, 727 deletions
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index d3130003116..72a268d8bb6 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.144 2006/03/04 16:27:03 grange Exp $ +# $OpenBSD: files.i386,v 1.145 2006/04/27 15:37:48 mickey Exp $ # # new style config file for i386 architecture # @@ -32,6 +32,7 @@ file arch/i386/i386/k6_mem.c mtrr file arch/i386/i386/microtime.s file arch/i386/i386/p4tcc.c !small_kernel & i686_cpu file arch/i386/i386/pmap.c +file arch/i386/i386/pmapae.c !small_kernel file arch/i386/i386/powernow.c !small_kernel & i586_cpu file arch/i386/i386/powernow-k7.c !small_kernel & i686_cpu file arch/i386/i386/powernow-k8.c !small_kernel & i686_cpu diff --git a/sys/arch/i386/i386/autoconf.c b/sys/arch/i386/i386/autoconf.c index 117d866677e..6d872952748 100644 --- a/sys/arch/i386/i386/autoconf.c +++ b/sys/arch/i386/i386/autoconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: autoconf.c,v 1.57 2006/03/15 20:04:36 miod Exp $ */ +/* $OpenBSD: autoconf.c,v 1.58 2006/04/27 15:37:50 mickey Exp $ */ /* $NetBSD: autoconf.c,v 1.20 1996/05/03 19:41:56 christos Exp $ */ /*- @@ -45,6 +45,7 @@ */ #include <sys/param.h> #include <sys/systm.h> +#include <sys/user.h> #include <sys/buf.h> #include <sys/dkstat.h> #include <sys/disklabel.h> @@ -108,6 +109,9 @@ cpu_configure() gdt_init(); /* XXX - pcibios uses gdt stuff */ +#ifndef SMALL_KERNEL + pmap_bootstrap_pae(); +#endif if (config_rootfound("mainbus", NULL) == NULL) panic("cpu_configure: mainbus not configured"); diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index 95d82a9d06b..4d86a3af627 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.17 2006/01/12 22:39:20 weingart Exp $ */ +/* $OpenBSD: cpu.c,v 1.18 2006/04/27 15:37:50 mickey Exp $ */ /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ /*- @@ -253,8 +253,7 @@ cpu_attach(parent, self, aux) pcb->pcb_tss.tss_esp = kstack + USPACE - 16 - sizeof (struct trapframe); pcb->pcb_pmap = pmap_kernel(); - pcb->pcb_cr3 = vtophys((vaddr_t)pcb->pcb_pmap->pm_pdir); - /* pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdir - KERNBASE; XXX ??? */ + pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; cpu_default_ldt(ci); /* Use the `global' ldt until one alloc'd */ #endif @@ -417,7 +416,7 @@ cpu_boot_secondary (ci) printf("%s: starting", ci->ci_dev.dv_xname); /* XXX move elsewhere, not per CPU. */ - mp_pdirpa = vtophys((vaddr_t)kpm->pm_pdir); + mp_pdirpa = kpm->pm_pdirpa; pcb = ci->ci_idle_pcb; diff --git a/sys/arch/i386/i386/db_memrw.c b/sys/arch/i386/i386/db_memrw.c index 979c327178b..a282d61c9a7 100644 --- a/sys/arch/i386/i386/db_memrw.c +++ b/sys/arch/i386/i386/db_memrw.c @@ -1,4 +1,4 @@ -/* $OpenBSD: db_memrw.c,v 1.8 2005/11/22 12:52:55 mickey Exp $ */ +/* $OpenBSD: db_memrw.c,v 1.9 2006/04/27 15:37:50 mickey Exp $ */ /* $NetBSD: db_memrw.c,v 1.6 1999/04/12 20:38:19 pk Exp $ */ /* @@ -63,28 +63,19 @@ db_read_bytes(vaddr_t addr, size_t size, char *data) void db_write_bytes(vaddr_t addr, size_t size, char *data) { - char *dst; - - pt_entry_t *ptep0 = 0; - pt_entry_t oldmap0 = { 0 }; - vaddr_t addr1; - pt_entry_t *ptep1 = 0; - pt_entry_t oldmap1 = { 0 }; extern char etext; + u_int32_t bits, bits1; + vaddr_t addr1 = 0; + char *dst; if (addr >= VM_MIN_KERNEL_ADDRESS && addr < (vaddr_t)&etext) { - ptep0 = kvtopte(addr); - oldmap0 = *ptep0; - *(int *)ptep0 |= /* INTEL_PTE_WRITE */ PG_RW; + bits = pmap_pte_setbits(addr, PG_RW, 0) & PG_RW; addr1 = trunc_page(addr + size - 1); - if (trunc_page(addr) != addr1) { + if (trunc_page(addr) != addr1) /* data crosses a page boundary */ - ptep1 = kvtopte(addr1); - oldmap1 = *ptep1; - *(int *)ptep1 |= /* INTEL_PTE_WRITE */ PG_RW; - } + bits1 = pmap_pte_setbits(addr1, PG_RW, 0) & PG_RW; tlbflush(); } @@ -93,10 +84,10 @@ db_write_bytes(vaddr_t addr, size_t size, char *data) while (size-- > 0) *dst++ = *data++; - if (ptep0) { - *ptep0 = oldmap0; - if (ptep1) - *ptep1 = oldmap1; + if (addr1) { + pmap_pte_setbits(addr, 0, bits); + if (bits1) + pmap_pte_setbits(addr1, 0, bits1); tlbflush(); } } diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index 17ea5424f2d..cb328569a85 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.23 2006/01/12 22:39:20 weingart Exp $ +# $OpenBSD: genassym.cf,v 1.24 2006/04/27 15:37:50 mickey Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -80,7 +80,6 @@ export PDSLOT_KERN export PDSLOT_PTE export PDSLOT_APTE export NKPTP_MIN -export NKPTP_MAX # values for virtual memory export VM_MAXUSER_ADDRESS diff --git a/sys/arch/i386/i386/kgdb_machdep.c b/sys/arch/i386/i386/kgdb_machdep.c index a7cf21cacad..2520d07fd93 100644 --- a/sys/arch/i386/i386/kgdb_machdep.c +++ b/sys/arch/i386/i386/kgdb_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kgdb_machdep.c,v 1.4 2005/11/13 17:50:44 fgsch Exp $ */ +/* $OpenBSD: kgdb_machdep.c,v 1.5 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: kgdb_machdep.c,v 1.6 1998/08/13 21:36:03 thorpej Exp $ */ /*- @@ -97,15 +97,13 @@ kgdb_acc(va, len) size_t len; { vaddr_t last_va; - pt_entry_t *pte; last_va = va + len; va &= ~PGOFSET; last_va &= ~PGOFSET; do { - pte = kvtopte(va); - if ((*pte & PG_V) == 0) + if ((pmap_pte_bits(va) & PG_V) == 0) return (0); va += NBPG; } while (va < last_va); diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c index 77bb270c8b6..8c455833c69 100644 --- a/sys/arch/i386/i386/lapic.c +++ b/sys/arch/i386/i386/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.7 2006/03/13 18:42:16 mickey Exp $ */ +/* $OpenBSD: lapic.c,v 1.8 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */ /*- @@ -77,9 +77,8 @@ void lapic_map(lapic_base) paddr_t lapic_base; { - int s; - pt_entry_t *pte; vaddr_t va = (vaddr_t)&local_apic; + int s; disable_intr(); s = lapic_tpr; @@ -93,8 +92,7 @@ lapic_map(lapic_base) * might have changed the value of cpu_number().. */ - pte = kvtopte(va); - *pte = lapic_base | PG_RW | PG_V | PG_N; + pmap_pte_set(va, lapic_base, PG_RW | PG_V | PG_N); invlpg(va); #ifdef MULTIPROCESSOR diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index 4e0a0eb3e92..91010290ac4 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.99 2006/04/19 14:19:30 mickey Exp $ */ +/* $OpenBSD: locore.s,v 1.100 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -200,6 +200,7 @@ .globl _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx) .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem) .globl _C_LABEL(esym) + .globl _C_LABEL(nkptp_max) .globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase) .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr) .globl _C_LABEL(gdt) @@ -531,9 +532,9 @@ try586: /* Use the `cpuid' instruction. */ * 0 1 2 3 */ #define PROC0PDIR ((0) * NBPG) -#define PROC0STACK ((1) * NBPG) -#define SYSMAP ((1+UPAGES) * NBPG) -#define TABLESIZE ((1+UPAGES) * NBPG) /* + _C_LABEL(nkpde) * NBPG */ +#define PROC0STACK ((4) * NBPG) +#define SYSMAP ((4+UPAGES) * NBPG) +#define TABLESIZE ((4+UPAGES) * NBPG) /* + _C_LABEL(nkpde) * NBPG */ /* Clear the BSS. */ movl $RELOC(_C_LABEL(edata)),%edi @@ -572,9 +573,9 @@ try586: /* Use the `cpuid' instruction. */ jge 1f movl $NKPTP_MIN,%ecx # set at min jmp 2f -1: cmpl $NKPTP_MAX,%ecx # larger than max? +1: cmpl RELOC(_C_LABEL(nkptp_max)),%ecx # larger than max? jle 2f - movl $NKPTP_MAX,%ecx + movl RELOC(_C_LABEL(nkptp_max)),%ecx 2: movl %ecx,RELOC(_C_LABEL(nkpde)) # and store it back /* Clear memory for bootstrap tables. */ @@ -659,6 +660,8 @@ try586: /* Use the `cpuid' instruction. */ /* Install a PDE recursively mapping page directory as a page table! */ leal (PROC0PDIR+PG_V|PG_KW)(%esi),%eax # pte for ptd movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot + addl $NBPG, %eax # pte for ptd[1] + movl %eax,(PROC0PDIR+(PDSLOT_PTE+1)*4)(%esi) # recursive PD slot /* Save phys. addr of PTD, for libkvm. */ movl %esi,RELOC(_C_LABEL(PTDpaddr)) @@ -2310,6 +2313,40 @@ ENTRY(i686_pagezero) ret #endif +#ifndef SMALL_KERNEL +/* + * int cpu_paenable(void *); + */ +ENTRY(cpu_paenable) + movl $-1, %eax + testl $CPUID_PAE, _C_LABEL(cpu_feature) + jz 1f + + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl %cr3, %edi + orl $0xfe0, %edi /* PDPT will be in the last four slots! */ + movl %edi, %cr3 + addl $KERNBASE, %edi /* and make it back virtual again */ + movl $8, %ecx + cld + rep + movsl + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 /* BANG!!! */ + movl 12(%esp), %eax + subl $KERNBASE, %eax + movl %eax, %cr3 /* reload real PDPT */ + + xorl %eax, %eax + popl %edi + popl %esi +1: + ret +#endif /* !SMALL_KERNEL */ + #if NLAPIC > 0 #include <i386/i386/apicvec.s> #endif diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 61479795f5e..5291fb73dda 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.352 2006/04/18 17:39:15 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.353 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -218,6 +218,9 @@ int bufcachepercent = BUFCACHEPERCENT; extern int boothowto; int physmem; +#ifndef SMALL_KERNEL +int pae_copy; +#endif struct dumpmem { paddr_t start; @@ -243,7 +246,7 @@ int i386_has_sse2; int i386_has_xcrypt; bootarg_t *bootargp; -paddr_t avail_end; +paddr_t avail_end, avail_end2; struct vm_map *exec_map = NULL; struct vm_map *phys_map = NULL; @@ -323,6 +326,12 @@ int allowaperture = 0; #endif #endif +#ifdef I686_PAE +int cpu_pae = 1; +#else +int cpu_pae = 0; +#endif + void winchip_cpu_setup(struct cpu_info *); void amd_family5_setup(struct cpu_info *); void amd_family6_setup(struct cpu_info *); @@ -416,7 +425,8 @@ cpu_startup() curcpu()->ci_feature_flags = cpu_feature; identifycpu(curcpu()); - printf("real mem = %u (%uK)\n", ctob(physmem), ctob(physmem)/1024U); + printf("real mem = %llu (%uK)\n", ctob((paddr_t)physmem), + ctob((paddr_t)physmem)/1024U); /* * Find out how much space we need, allocate it, @@ -447,8 +457,8 @@ cpu_startup() phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, VM_PHYS_SIZE, 0, FALSE, NULL); - printf("avail mem = %lu (%uK)\n", ptoa(uvmexp.free), - ptoa(uvmexp.free)/1024U); + printf("avail mem = %llu (%uK)\n", ptoa((paddr_t)uvmexp.free), + ptoa((paddr_t)uvmexp.free) / 1024U); printf("using %d buffers containing %u bytes (%uK) of memory\n", nbuf, bufpages * PAGE_SIZE, bufpages * PAGE_SIZE / 1024); @@ -2708,7 +2718,6 @@ fix_f00f(void) { struct region_descriptor region; vaddr_t va; - pt_entry_t *pte; void *p; /* Allocate two new pages */ @@ -2724,8 +2733,7 @@ fix_f00f(void) GCODE_SEL); /* Map first page RO */ - pte = PTE_BASE + atop(va); - *pte &= ~PG_RW; + pmap_pte_setbits(va, 0, PG_RW); /* Reload idtr */ setregion(®ion, idt, sizeof(idt_region) - 1); @@ -2880,11 +2888,11 @@ init386(paddr_t first_avail) if (bootargc > NBPG) panic("too many boot args"); - if (extent_alloc_region(iomem_ex, (paddr_t)bootargv, bootargc, + if (extent_alloc_region(iomem_ex, (u_long)bootargv, bootargc, EX_NOWAIT)) panic("cannot reserve /boot args memory"); - pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (paddr_t)bootargv, + pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (u_long)bootargv, VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); @@ -2898,15 +2906,6 @@ init386(paddr_t first_avail) panic("no BIOS memory map supplied"); #endif -#if defined(MULTIPROCESSOR) - /* install the page after boot args as PT page for first 4M */ - pmap_enter(pmap_kernel(), (u_long)vtopte(0), - round_page((vaddr_t)(bootargv + bootargc)), - VM_PROT_READ|VM_PROT_WRITE, - VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); - memset(vtopte(0), 0, NBPG); /* make sure it is clean before using */ -#endif - /* * account all the memory passed in the map from /boot * calculate avail_end and count the physmem. @@ -2919,27 +2918,12 @@ init386(paddr_t first_avail) for(i = 0, im = bios_memmap; im->type != BIOS_MAP_END; im++) if (im->type == BIOS_MAP_FREE) { register paddr_t a, e; -#ifdef DEBUG - printf(" %llx-%llx", im->addr, im->addr + im->size); -#endif - - if (im->addr >= 0x100000000ULL) { -#ifdef DEBUG - printf("-H"); -#endif - continue; - } a = round_page(im->addr); - if (im->addr + im->size <= 0xfffff000ULL) - e = trunc_page(im->addr + im->size); - else { + e = trunc_page(im->addr + im->size); #ifdef DEBUG - printf("-T"); + printf(" %llx-%llx", a, e); #endif - e = 0xfffff000; - } - /* skip first eight pages */ if (a < 8 * NBPG) a = 8 * NBPG; @@ -2959,7 +2943,16 @@ init386(paddr_t first_avail) continue; } - if (extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT)) + if (a >= 0x100000000ULL) { +#ifdef DEBUG + printf("-H"); +#endif + if (!cpu_pae) + continue; + } + + if (e <= 0x100000000ULL && + extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT)) /* XXX What should we do? */ printf("\nWARNING: CAN'T ALLOCATE RAM (%x-%x)" " FROM IOMEM EXTENT MAP!\n", a, e); @@ -2968,11 +2961,15 @@ init386(paddr_t first_avail) dumpmem[i].start = atop(a); dumpmem[i].end = atop(e); i++; - avail_end = max(avail_end, e); + avail_end2 = MAX(avail_end2, e); + if (avail_end2 < 0x100000000ULL) + avail_end = avail_end2; } ndumpmem = i; avail_end -= round_page(MSGBUFSIZE); + if (avail_end2 < 0x100000000ULL) + avail_end2 = avail_end; #ifdef DEBUG printf(": %lx\n", avail_end); @@ -3003,30 +3000,34 @@ init386(paddr_t first_avail) e = dumpmem[i].end; if (a < atop(first_avail) && e > atop(first_avail)) a = atop(first_avail); - if (e > atop(avail_end)) + if (a < atop(avail_end) && e > atop(avail_end)) e = atop(avail_end); if (a < e) { if (a < atop(16 * 1024 * 1024)) { lim = MIN(atop(16 * 1024 * 1024), e); #ifdef DEBUG - printf(" %x-%x (<16M)", a, lim); + printf(" %llx-%llx (<16M)", a, lim); #endif uvm_page_physload(a, lim, a, lim, VM_FREELIST_FIRST16); if (e > lim) { #ifdef DEBUG - printf(" %x-%x", lim, e); + printf(" %llx-%llx", lim, e); #endif uvm_page_physload(lim, e, lim, e, VM_FREELIST_DEFAULT); } } else { #ifdef DEBUG - printf(" %x-%x", a, e); + printf(" %llx-%llx", a, e); #endif - uvm_page_physload(a, e, a, e, - VM_FREELIST_DEFAULT); + if (a >= atop(0x100000000ULL)) + uvm_page_physload(a, e, a, a - 1, + VM_FREELIST_ABOVE4G); + else + uvm_page_physload(a, e, a, e, + VM_FREELIST_DEFAULT); } } } @@ -3464,8 +3465,8 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) bus_space_handle_t *bshp; { u_long pa, endpa; + u_int32_t bits; vaddr_t va; - pt_entry_t *pte; bus_size_t map_size; #ifdef MULTIPROCESSOR u_int32_t cpumask = 0; @@ -3497,13 +3498,12 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) * on those machines. */ if (cpu_class != CPUCLASS_386) { - pte = kvtopte(va); if (cacheable) - *pte &= ~PG_N; + bits = pmap_pte_setbits(va, 0, PG_N); else - *pte |= PG_N; + bits = pmap_pte_setbits(va, PG_N, 0); #ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), va, *pte, + pmap_tlb_shootdown(pmap_kernel(), va, bits, &cpumask); #else pmap_update_pg(va); @@ -3526,7 +3526,7 @@ bus_space_unmap(t, bsh, size) { struct extent *ex; u_long va, endva; - bus_addr_t bpa; + paddr_t bpa; /* * Find the correct extent and bus physical address. @@ -3536,7 +3536,7 @@ bus_space_unmap(t, bsh, size) bpa = bsh; } else if (t == I386_BUS_SPACE_MEM) { ex = iomem_ex; - bpa = (bus_addr_t)ISA_PHYSADDR(bsh); + bpa = (u_long)ISA_PHYSADDR(bsh); if (IOM_BEGIN <= bpa && bpa <= IOM_END) goto ok; @@ -3572,7 +3572,7 @@ _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, bus_addr_t *adrp) { u_long va, endva; - bus_addr_t bpa; + paddr_t bpa; /* * Find the correct bus physical address. @@ -3580,7 +3580,7 @@ _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, if (t == I386_BUS_SPACE_IO) { bpa = bsh; } else if (t == I386_BUS_SPACE_MEM) { - bpa = (bus_addr_t)ISA_PHYSADDR(bsh); + bpa = (u_long)ISA_PHYSADDR(bsh); if (IOM_BEGIN <= bpa && bpa <= IOM_END) goto ok; @@ -3603,9 +3603,8 @@ _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, panic("bus_space_unmap: bad bus space tag"); ok: - if (adrp != NULL) { + if (adrp != NULL) *adrp = bpa; - } } void @@ -3647,6 +3646,7 @@ _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp) struct i386_bus_dmamap *map; void *mapstore; size_t mapsize; + int npages; /* * Allocate and initialize the DMA map. The end of the map @@ -3662,6 +3662,17 @@ _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp) */ mapsize = sizeof(struct i386_bus_dmamap) + (sizeof(bus_dma_segment_t) * (nsegments - 1)); + npages = 0; +#ifndef SMALL_KERNEL + if (avail_end2 > avail_end && + (flags & (BUS_DMA_64BIT|BUS_DMA_24BIT)) == 0) { + /* this many pages plus one in case we get split */ + npages = round_page(size) / PAGE_SIZE + 1; + if (npages < nsegments) /* looks stupid, but possible */ + npages = nsegments; + mapsize += sizeof(struct vm_page *) * npages; + } +#endif /* !SMALL_KERNEL */ if ((mapstore = malloc(mapsize, M_DEVBUF, (flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK)) == NULL) return (ENOMEM); @@ -3672,10 +3683,55 @@ _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp) map->_dm_segcnt = nsegments; map->_dm_maxsegsz = maxsegsz; map->_dm_boundary = boundary; + map->_dm_pages = npages? (void *)&map->dm_segs[nsegments] : NULL; + map->_dm_npages = npages; map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT); map->dm_mapsize = 0; /* no valid mappings */ map->dm_nsegs = 0; +#ifndef SMALL_KERNEL + if (npages) { + struct pglist mlist; + vaddr_t va; + int error; + + size = npages << PGSHIFT; + va = uvm_km_valloc(kernel_map, size); + if (va == 0) { + map->_dm_npages = 0; + free(map, M_DEVBUF); + return (ENOMEM); + } + + TAILQ_INIT(&mlist); + /* if not a 64bit map -- allocate some bouncy-bouncy */ + error = uvm_pglistalloc(size, + round_page(ISA_DMA_BOUNCE_THRESHOLD), 0xfffff000, + PAGE_SIZE, boundary, &mlist, nsegments, + (flags & BUS_DMA_NOWAIT) == 0); + if (error) { + map->_dm_npages = 0; + uvm_km_free(kernel_map, (vaddr_t)va, size); + free(map, M_DEVBUF); + return (ENOMEM); + } else { + struct vm_page **pg = map->_dm_pages; + + npages--; + *pg = TAILQ_FIRST(&mlist); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(*pg), + VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED); + for (pg++, va += PAGE_SIZE; npages--; + pg++, va += PAGE_SIZE) { + *pg = TAILQ_NEXT(pg[-1], pageq); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(*pg), + VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED); + } + } + map->_dm_pgva = va; + } +#endif /* !SMALL_KERNEL */ + *dmamp = map; return (0); } @@ -3706,7 +3762,7 @@ _bus_dmamap_load(t, map, buf, buflen, p, flags) struct proc *p; int flags; { - bus_addr_t lastaddr; + paddr_t lastaddr; int seg, error; /* @@ -3887,6 +3943,7 @@ _bus_dmamap_unload(t, map) */ map->dm_mapsize = 0; map->dm_nsegs = 0; + map->_dm_nused = 0; } /* @@ -3894,15 +3951,47 @@ _bus_dmamap_unload(t, map) * by bus-specific DMA map synchronization functions. */ void -_bus_dmamap_sync(t, map, addr, size, op) +_bus_dmamap_sync(t, map, offset, size, op) bus_dma_tag_t t; bus_dmamap_t map; - bus_addr_t addr; + bus_addr_t offset; bus_size_t size; int op; { +#ifndef SMALL_KERNEL + bus_dma_segment_t *sg; + int i, off = offset; + bus_size_t l; + + /* scan the segment list performing necessary copies */ + if (!(map->_dm_flags & BUS_DMA_64BIT) && map->_dm_nused) { + for (i = map->_dm_segcnt, sg = map->dm_segs; + size && i--; sg++) { + if (off >= sg->ds_len) { + off -= sg->ds_len; + continue; + } - /* Nothing to do here. */ + l = sg->ds_len - off; + if (l > size) + l = size; + size -= l; + if (sg->ds_addr2) { + if (op & BUS_DMASYNC_POSTREAD) { + bcopy((void *)(sg->ds_va2 + off), + (void *)(sg->ds_va + off), l); + pae_copy++; + } + if (op & BUS_DMASYNC_PREWRITE) { + bcopy((void *)(sg->ds_va + off), + (void *)(sg->ds_va2 + off), l); + pae_copy++; + } + } + off = 0; + } + } +#endif /* !SMALL_KERNEL */ } /* @@ -4072,8 +4161,8 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) int first; { bus_size_t sgsize; - bus_addr_t curaddr, lastaddr, baddr, bmask; - vaddr_t vaddr = (vaddr_t)buf; + paddr_t curaddr, lastaddr, oaddr, baddr, bmask; + vaddr_t pgva, vaddr = (vaddr_t)buf; int seg; pmap_t pmap; @@ -4089,7 +4178,24 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) /* * Get the physical address for this segment. */ - pmap_extract(pmap, vaddr, (paddr_t *)&curaddr); + pmap_extract(pmap, vaddr, &curaddr); + oaddr = 0; + pgva = 0; +#ifndef SMALL_KERNEL + if (!(map->_dm_flags & BUS_DMA_64BIT) && + curaddr >= 0x100000000ULL) { + struct vm_page *pg; + int page, off; + + if (map->_dm_nused + 1 >= map->_dm_npages) + return (ENOMEM); + off = vaddr & PAGE_MASK; + pg = map->_dm_pages[page = map->_dm_nused++]; + oaddr = curaddr; + curaddr = VM_PAGE_TO_PHYS(pg) + off; + pgva = map->_dm_pgva + (page << PGSHIFT) + off; + } +#endif /* !SMALL_KERNEL */ /* * Compute the segment size, and adjust counts. @@ -4113,7 +4219,10 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) */ if (first) { map->dm_segs[seg].ds_addr = curaddr; + map->dm_segs[seg].ds_addr2 = oaddr; map->dm_segs[seg].ds_len = sgsize; + map->dm_segs[seg].ds_va = vaddr; + map->dm_segs[seg].ds_va2 = pgva; first = 0; } else { if (curaddr == lastaddr && @@ -4127,7 +4236,10 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) if (++seg >= map->_dm_segcnt) break; map->dm_segs[seg].ds_addr = curaddr; + map->dm_segs[seg].ds_addr2 = oaddr; map->dm_segs[seg].ds_len = sgsize; + map->dm_segs[seg].ds_va = vaddr; + map->dm_segs[seg].ds_va2 = pgva; } } @@ -4170,6 +4282,19 @@ _bus_dmamem_alloc_range(t, size, alignment, boundary, segs, nsegs, rsegs, /* Always round the size. */ size = round_page(size); + if (flags & BUS_DMA_64BIT) { + if (high > 0x100000000ULL && low < 0x100000000ULL) + low = 0x100000000ULL; + } else if (high > 0x100000000ULL) { + if (low >= 0x100000000ULL) { +#ifdef DIAGNOSTIC + printf("_bus_dmamem_alloc_range: " + "32bit request in above 4GB space\n"); +#endif + return (EINVAL); + } else + high = 0x100000000ULL; + } TAILQ_INIT(&mlist); /* @@ -4215,7 +4340,6 @@ _bus_dmamem_alloc_range(t, size, alignment, boundary, segs, nsegs, rsegs, } lastaddr = curaddr; } - *rsegs = curseg + 1; return (0); diff --git a/sys/arch/i386/i386/mpbios.c b/sys/arch/i386/i386/mpbios.c index ad9e667364c..5a8ba2f5829 100644 --- a/sys/arch/i386/i386/mpbios.c +++ b/sys/arch/i386/i386/mpbios.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mpbios.c,v 1.8 2006/04/18 17:42:24 kettenis Exp $ */ +/* $OpenBSD: mpbios.c,v 1.9 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: mpbios.c,v 1.2 2002/10/01 12:56:57 fvdl Exp $ */ /*- @@ -1043,7 +1043,7 @@ mpbios_ioapic(ent, self) aaa.aaa_name = "ioapic"; aaa.apic_id = entry->apic_id; aaa.apic_version = entry->apic_version; - aaa.apic_address = (paddr_t)entry->apic_address; + aaa.apic_address = (u_long)entry->apic_address; aaa.flags = (mp_fps->mpfb2 & 0x80) ? IOAPIC_PICMODE : IOAPIC_VWIRE; config_found_sm(self, &aaa, mp_print, mp_match); diff --git a/sys/arch/i386/i386/mptramp.s b/sys/arch/i386/i386/mptramp.s index 75621628849..38e83191ff2 100644 --- a/sys/arch/i386/i386/mptramp.s +++ b/sys/arch/i386/i386/mptramp.s @@ -1,4 +1,4 @@ -/* $OpenBSD: mptramp.s,v 1.5 2006/03/14 14:44:37 mickey Exp $ */ +/* $OpenBSD: mptramp.s,v 1.6 2006/04/27 15:37:51 mickey Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -165,10 +165,20 @@ _TRMP_LABEL(mp_startup) /* Load base of page directory and enable mapping. */ movl %ecx,%cr3 # load ptd addr into mmu - movl %cr0,%eax # get control word - # enable paging & NPX emulation - orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax - movl %eax,%cr0 # and let's page NOW! +#ifndef SMALL_KERNEL + movl $_C_LABEL(pmap_pte_set_pae),%eax + cmpl RELOC(_C_LABEL(pmap_pte_set_p)),%eax + jne nopae + + movl %cr4,%eax + orl $CR4_PAE,%eax + movl %eax, %cr4 +nopae: +#endif + movl %cr0,%eax # get control word + # enable paging & NPX emulation + orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax + movl %eax,%cr0 # and let's page NOW! #ifdef MPDEBUG leal _C_LABEL(cpu_trace),%edi diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 765b2b9d233..ec49253f621 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.91 2006/03/13 18:42:16 mickey Exp $ */ +/* $OpenBSD: pmap.c,v 1.92 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -86,15 +86,6 @@ #endif /* - * general info: - * - * - for an explanation of how the i386 MMU hardware works see - * the comments in <machine/pte.h>. - * - * - for an explanation of the general memory structure used by - * this pmap (including the recursive mapping), see the comments - * in <machine/pmap.h>. - * * this file contains the code for the "pmap module." the module's * job is to manage the hardware's virtual to physical address mappings. * note that there are two levels of mapping in the VM system: @@ -131,7 +122,181 @@ * if we run out of pv_entry's we allocate a new pv_page and free * its pv_entrys. */ - +/* + * i386 MMU hardware structure: + * + * the i386 MMU is a two-level MMU which maps 4GB of virtual memory. + * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium + * processors can support a 4MB pagesize as well. + * + * the first level table (segment table?) is called a "page directory" + * and it contains 1024 page directory entries (PDEs). each PDE is + * 4 bytes (an int), so a PD fits in a single 4K page. this page is + * the page directory page (PDP). each PDE in a PDP maps 4MB of space + * (1024 * 4MB = 4GB). a PDE contains the physical address of the + * second level table: the page table. or, if 4MB pages are being used, + * then the PDE contains the PA of the 4MB page being mapped. + * + * a page table consists of 1024 page table entries (PTEs). each PTE is + * 4 bytes (an int), so a page table also fits in a single 4K page. a + * 4K page being used as a page table is called a page table page (PTP). + * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains + * the physical address of the page it maps and some flag bits (described + * below). + * + * the processor has a special register, "cr3", which points to the + * the PDP which is currently controlling the mappings of the virtual + * address space. + * + * the following picture shows the translation process for a 4K page: + * + * %cr3 register [PA of PDP] + * | + * | + * | bits <31-22> of VA bits <21-12> of VA bits <11-0> + * | index the PDP (0 - 1023) index the PTP are the page offset + * | | | | + * | v | | + * +--->+----------+ | | + * | PD Page | PA of v | + * | |---PTP-------->+------------+ | + * | 1024 PDE | | page table |--PTE--+ | + * | entries | | (aka PTP) | | | + * +----------+ | 1024 PTE | | | + * | entries | | | + * +------------+ | | + * | | + * bits <31-12> bits <11-0> + * p h y s i c a l a d d r + * + * the i386 caches PTEs in a TLB. it is important to flush out old + * TLB mappings when making a change to a mappings. writing to the + * %cr3 will flush the entire TLB. newer processors also have an + * instruction that will invalidate the mapping of a single page (which + * is useful if you are changing a single mappings because it preserves + * all the cached TLB entries). + * + * as shows, bits 31-12 of the PTE contain PA of the page being mapped. + * the rest of the PTE is defined as follows: + * bit# name use + * 11 n/a available for OS use, hardware ignores it + * 10 n/a available for OS use, hardware ignores it + * 9 n/a available for OS use, hardware ignores it + * 8 G global bit (see discussion below) + * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>) + * 6 D dirty (modified) page + * 5 A accessed (referenced) page + * 4 PCD cache disable + * 3 PWT prevent write through (cache) + * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s) + * 1 R/W read/write bit (0=read only, 1=read-write) + * 0 P present (valid) + * + * notes: + * - on the i386 the R/W bit is ignored if processor is in supervisor + * state (bug!) + * - PS is only supported on newer processors + * - PTEs with the G bit are global in the sense that they are not + * flushed from the TLB when %cr3 is written (to flush, use the + * "flush single page" instruction). this is only supported on + * newer processors. this bit can be used to keep the kernel's + * TLB entries around while context switching. since the kernel + * is mapped into all processes at the same place it does not make + * sense to flush these entries when switching from one process' + * pmap to another. + */ +/* + * A pmap describes a process' 4GB virtual address space. This + * virtual address space can be broken up into 1024 4MB regions which + * are described by PDEs in the PDP. The PDEs are defined as follows: + * + * Ranges are inclusive -> exclusive, just like vm_map_entry start/end. + * The following assumes that KERNBASE is 0xd0000000. + * + * PDE#s VA range Usage + * 0->831 0x0 -> 0xcfc00000 user address space, note that the + * max user address is 0xcfbfe000 + * the final two pages in the last 4MB + * used to be reserved for the UAREA + * but now are no longer used. + * 831 0xcfc00000-> recursive mapping of PDP (used for + * 0xd0000000 linear mapping of PTPs). + * 832->1023 0xd0000000-> kernel address space (constant + * 0xffc00000 across all pmaps/processes). + * 1023 0xffc00000-> "alternate" recursive PDP mapping + * <end> (for other pmaps). + * + * + * Note: A recursive PDP mapping provides a way to map all the PTEs for + * a 4GB address space into a linear chunk of virtual memory. In other + * words, the PTE for page 0 is the first int mapped into the 4MB recursive + * area. The PTE for page 1 is the second int. The very last int in the + * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB + * address). + * + * All pmaps' PDs must have the same values in slots 832->1023 so that + * the kernel is always mapped in every process. These values are loaded + * into the PD at pmap creation time. + * + * At any one time only one pmap can be active on a processor. This is + * the pmap whose PDP is pointed to by processor register %cr3. This pmap + * will have all its PTEs mapped into memory at the recursive mapping + * point (slot #831 as show above). When the pmap code wants to find the + * PTE for a virtual address, all it has to do is the following: + * + * Address of PTE = (831 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t) + * = 0xcfc00000 + (VA / 4096) * 4 + * + * What happens if the pmap layer is asked to perform an operation + * on a pmap that is not the one which is currently active? In that + * case we take the PA of the PDP of non-active pmap and put it in + * slot 1023 of the active pmap. This causes the non-active pmap's + * PTEs to get mapped in the final 4MB of the 4GB address space + * (e.g. starting at 0xffc00000). + * + * The following figure shows the effects of the recursive PDP mapping: + * + * PDP (%cr3) + * +----+ + * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 + * | | + * | | + * | 831| -> points back to PDP (%cr3) mapping VA 0xcfc00000 -> 0xd0000000 + * | 832| -> first kernel PTP (maps 0xd0000000 -> 0xe0400000) + * | | + * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) + * +----+ + * + * Note that the PDE#831 VA (0xcfc00000) is defined as "PTE_BASE". + * Note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE". + * + * Starting at VA 0xcfc00000 the current active PDP (%cr3) acts as a + * PTP: + * + * PTP#831 == PDP(%cr3) => maps VA 0xcfc00000 -> 0xd0000000 + * +----+ + * | 0| -> maps the contents of PTP#0 at VA 0xcfc00000->0xcfc01000 + * | | + * | | + * | 831| -> maps the contents of PTP#831 (the PDP) at VA 0xcff3f000 + * | 832| -> maps the contents of first kernel PTP + * | | + * |1023| + * +----+ + * + * Note that mapping of the PDP at PTP#831's VA (0xcff3f000) is + * defined as "PDP_BASE".... within that mapping there are two + * defines: + * "PDP_PDE" (0xcff3fcfc) is the VA of the PDE in the PDP + * which points back to itself. + * "APDP_PDE" (0xcff3fffc) is the VA of the PDE in the PDP which + * establishes the recursive mapping of the alternate pmap. + * To set the alternate PDP, one just has to put the correct + * PA info in *APDP_PDE. + * + * Note that in the APTE_BASE space, the APDP appears at VA + * "APDP_BASE" (0xfffff000). + */ /* * memory allocation * @@ -182,7 +347,6 @@ * save VA for later call to [a], go to plan 3. * If we fail, we simply let pmap_enter() tell UVM about it. */ - /* * locking * @@ -258,6 +422,79 @@ struct lock pmap_main_lock; #endif +#define PG_FRAME 0xfffff000 /* page frame mask */ +#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ + +/* + * The following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) +#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) +#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) + +#define PDOFSET (NBPD-1) /* mask for non-PD part of VA */ +#define PTES_PER_PTP (NBPD / NBPG) /* # of PTEs in a PTP */ + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * + */ +#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) + +/* + * Mach derived conversion macros + */ +#define i386_round_pdr(x) ((((unsigned)(x)) + PDOFSET) & ~PDOFSET) + +/* + * pdei/ptei: generate index into PDP/PTP from a VA + */ +#define PD_MASK 0xffc00000 /* page directory address bits */ +#define PT_MASK 0x003ff000 /* page table address bits */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * PTP macros: + * A PTP's index is the PD index of the PDE that points to it. + * A PTP's offset is the byte-offset in the PTE space that this PTP is at. + * A PTP's VA is the first VA mapped by that PTP. + * + * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +/* + * Access PD and PT + */ +#define PDE(pm,i) (((pd_entry_t *)(pm)->pm_pdir)[(i)]) + +/* + * here we define the data types for PDEs and PTEs + */ +typedef u_int32_t pd_entry_t; /* PDE */ +typedef u_int32_t pt_entry_t; /* PTE */ + +/* + * Number of PTE's per cache line. 4 byte pte, 32-byte cache line + * Used to avoid false sharing of cache lines. + */ +#define NPTECL 8 + /* * TLB Shootdown: * @@ -277,13 +514,13 @@ struct pmap_tlb_shootdown_job { TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; vaddr_t pj_va; /* virtual address */ pmap_t pj_pmap; /* the pmap which maps the address */ - pt_entry_t pj_pte; /* the PTE bits */ + u_int32_t pj_pte; /* the PTE bits */ struct pmap_tlb_shootdown_job *pj_nextfree; }; struct pmap_tlb_shootdown_q { TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; - int pq_pte; /* aggregate PTE bits */ + u_int32_t pq_pte; /* aggregate PTE bits */ int pq_count; /* number of pending requests */ struct mutex pq_mutex; /* mutex on queue */ int pq_flushg; /* pending flush global */ @@ -305,7 +542,8 @@ struct pmap_tlb_shootdown_job *pj_page, *pj_free; * global data structures */ -struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ +struct pmap kernel_pmap_store /* the kernel's pmap (proc0) */ + __attribute__((aligned(32))); /* * nkpde is the number of kernel PTPs allocated for the kernel at @@ -315,6 +553,7 @@ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ */ int nkpde = NKPTP; +int nkptp_max = 1024 - (KERNBASE/NBPD) - 1; /* largest value (-1 for APTP space) */ #ifdef NKPDE #error "obsolete NKPDE: use NKPTP" #endif @@ -341,8 +580,8 @@ paddr_t hole_end; /* PA of end of "hole" */ * other data structures */ -static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ -static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ +u_int32_t protection_codes[8]; /* maps MI prot to i386 prot code */ +boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ /* * the following two vaddr_t's are used during system startup @@ -351,9 +590,10 @@ static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ * VM space is turned over to the kernel_map vm_map. */ -static vaddr_t virtual_avail; /* VA of first free KVA */ -static vaddr_t virtual_end; /* VA of last free KVA */ +vaddr_t virtual_avail; /* VA of first free KVA */ +vaddr_t virtual_end; /* VA of last free KVA */ +vaddr_t vm_max_address = (PDSLOT_PTE << PDSHIFT) + (PDSLOT_PTE << PGSHIFT); /* * pv_page management structures: locked by pvalloc_lock @@ -374,8 +614,8 @@ static vaddr_t pv_cachedva; /* cached VA for later use */ * linked list of all non-kernel pmaps */ -static struct pmap_head pmaps; -static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ +struct pmap_head pmaps; +struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ /* * pool that pmap structures are allocated from @@ -402,7 +642,7 @@ struct pool pmap_pmap_pool; */ static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; -static caddr_t csrcp, cdstp, zerop, ptpp; +caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp; caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ #ifdef __NetBSD__ @@ -424,46 +664,27 @@ extern vaddr_t pentium_idt_vaddr; */ struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); -struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t); -struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ #define ALLOCPV_NEED 0 /* need PV now */ #define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ #define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); -void pmap_enter_pv(struct pv_head *, - struct pv_entry *, struct pmap *, - vaddr_t, struct vm_page *); -void pmap_free_pv(struct pmap *, struct pv_entry *); -void pmap_free_pvs(struct pmap *, struct pv_entry *); -void pmap_free_pv_doit(struct pv_entry *); -void pmap_free_pvpage(void); -struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t); -boolean_t pmap_is_curpmap(struct pmap *); -boolean_t pmap_is_active(struct pmap *, int); -pt_entry_t *pmap_map_ptes(struct pmap *); -struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, - vaddr_t); -boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, - vaddr_t, int32_t *); -void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, - vaddr_t, vaddr_t, int32_t *); -struct vm_page *pmap_steal_ptp(struct uvm_object *, vaddr_t); -vaddr_t pmap_tmpmap_pa(paddr_t); -pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *); -void pmap_tmpunmap_pa(void); -void pmap_tmpunmap_pvepte(struct pv_entry *); -void pmap_apte_flush(struct pmap *); -boolean_t pmap_try_steal_pv(struct pv_head *, - struct pv_entry *, - struct pv_entry *); -void pmap_unmap_ptes(struct pmap *); -void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t, - pt_entry_t); - -void pmap_pinit(pmap_t); -void pmap_release(pmap_t); - -void pmap_zero_phys(paddr_t); +struct vm_page *pmap_alloc_ptp_86(struct pmap *, int, boolean_t); +struct vm_page *pmap_get_ptp_86(struct pmap *, int, boolean_t); +struct vm_page *pmap_steal_ptp_86(struct uvm_object *, vaddr_t); +pt_entry_t *pmap_map_ptes_86(struct pmap *); +void pmap_unmap_ptes_86(struct pmap *); +boolean_t pmap_remove_pte_86(struct pmap *, struct vm_page *, + pt_entry_t *, vaddr_t, int32_t *); +void pmap_remove_ptes_86(struct pmap *, struct vm_page *, vaddr_t, + vaddr_t, vaddr_t, int32_t *); +vaddr_t pmap_tmpmap_pa_86(paddr_t); +pt_entry_t *pmap_tmpmap_pvepte_86(struct pv_entry *); +void pmap_tmpunmap_pa_86(void); +void pmap_tmpunmap_pvepte_86(struct pv_entry *); +boolean_t pmap_try_steal_pv_86(struct pv_head *, + struct pv_entry *, struct pv_entry *); + +void pmap_release(pmap_t); void setcslimit(struct pmap *, struct trapframe *, struct pcb *, vaddr_t); @@ -503,14 +724,13 @@ pmap_is_active(pmap, cpu_id) */ vaddr_t -pmap_tmpmap_pa(pa) - paddr_t pa; +pmap_tmpmap_pa_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); #if defined(DIAGNOSTIC) if (*ptpte) panic("pmap_tmpmap_pa: ptp_pte in use?"); @@ -524,13 +744,13 @@ pmap_tmpmap_pa(pa) */ void -pmap_tmpunmap_pa() +pmap_tmpunmap_pa_86() { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); #if defined(DIAGNOSTIC) if (!pmap_valid_entry(*ptpte)) panic("pmap_tmpunmap_pa: our pte invalid?"); @@ -551,8 +771,7 @@ pmap_tmpunmap_pa() */ pt_entry_t * -pmap_tmpmap_pvepte(pve) - struct pv_entry *pve; +pmap_tmpmap_pvepte_86(struct pv_entry *pve) { #ifdef DIAGNOSTIC if (pve->pv_pmap == pmap_kernel()) @@ -563,7 +782,7 @@ pmap_tmpmap_pvepte(pve) if (pmap_is_curpmap(pve->pv_pmap)) return(vtopte(pve->pv_va)); - return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp))) + return(((pt_entry_t *)pmap_tmpmap_pa_86(VM_PAGE_TO_PHYS(pve->pv_ptp))) + ptei((unsigned)pve->pv_va)); } @@ -572,14 +791,13 @@ pmap_tmpmap_pvepte(pve) */ void -pmap_tmpunmap_pvepte(pve) - struct pv_entry *pve; +pmap_tmpunmap_pvepte_86(struct pv_entry *pve) { /* was it current pmap? if so, return */ if (pmap_is_curpmap(pve->pv_pmap)) return; - pmap_tmpunmap_pa(); + pmap_tmpunmap_pa_86(); } void @@ -622,8 +840,7 @@ pmap_apte_flush(struct pmap *pmap) */ pt_entry_t * -pmap_map_ptes(pmap) - struct pmap *pmap; +pmap_map_ptes_86(struct pmap *pmap) { pd_entry_t opde; @@ -662,12 +879,11 @@ pmap_map_ptes(pmap) */ void -pmap_unmap_ptes(pmap) - struct pmap *pmap; +pmap_unmap_ptes_86(struct pmap *pmap) { - if (pmap == pmap_kernel()) { + if (pmap == pmap_kernel()) return; - } + if (pmap_is_curpmap(pmap)) { simple_unlock(&pmap->pm_obj.vmobjlock); } else { @@ -682,7 +898,7 @@ pmap_unmap_ptes(pmap) void pmap_exec_account(struct pmap *pm, vaddr_t va, - pt_entry_t opte, pt_entry_t npte) + u_int32_t opte, u_int32_t npte) { if (curproc == NULL || curproc->p_vmspace == NULL || pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) @@ -756,6 +972,48 @@ pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) return (1); } +u_int32_t +pmap_pte_set_86(vaddr_t va, paddr_t pa, u_int32_t bits) +{ + pt_entry_t pte, *ptep = vtopte(va); + + pte = i386_atomic_testset_ul(ptep, pa | bits); /* zap! */ + return (pte & ~PG_FRAME); +} + +u_int32_t +pmap_pte_setbits_86(vaddr_t va, u_int32_t set, u_int32_t clr) +{ + pt_entry_t *ptep = vtopte(va); + pt_entry_t pte = *ptep; + + *ptep = (pte | set) & ~clr; + return (pte & ~PG_FRAME); + +} + +u_int32_t +pmap_pte_bits_86(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & ~PG_FRAME); +} + +paddr_t +pmap_pte_paddr_86(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & PG_FRAME); +} + +paddr_t +vtophys(vaddr_t va) +{ + return ((*vtopte(va) & PG_FRAME) | (va & ~PG_FRAME)); +} + void setcslimit(struct pmap *pm, struct trapframe *tf, struct pcb *pcb, vaddr_t limit) @@ -813,16 +1071,15 @@ pmap_kenter_pa(va, pa, prot) paddr_t pa; vm_prot_t prot; { - pt_entry_t *pte, opte, npte; + u_int32_t bits; - pte = vtopte(va); - npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g; - opte = i386_atomic_testset_ul(pte, npte); /* zap! */ - if (pmap_valid_entry(opte)) { + bits = pmap_pte_set(va, pa, ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | + PG_V | pmap_pg_g); + if (pmap_valid_entry(bits)) { #ifdef MULTIPROCESSOR int32_t cpumask = 0; - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); + pmap_tlb_shootdown(pmap_kernel(), va, bits, &cpumask); pmap_tlb_shootnow(cpumask); #else /* Don't bother deferring in the single CPU case. */ @@ -845,25 +1102,21 @@ pmap_kremove(va, len) vaddr_t va; vsize_t len; { - pt_entry_t *pte, opte; + u_int32_t bits; #ifdef MULTIPROCESSOR int32_t cpumask = 0; #endif len >>= PAGE_SHIFT; for ( /* null */ ; len ; len--, va += PAGE_SIZE) { - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); - opte = i386_atomic_testset_ul(pte, 0); /* zap! */ + bits = pmap_pte_set(va, 0, 0); #ifdef DIAGNOSTIC - if (opte & PG_PVLIST) + if (bits & PG_PVLIST) panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va); #endif - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) + if ((bits & (PG_V | PG_U)) == (PG_V | PG_U)) #ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); + pmap_tlb_shootdown(pmap_kernel(), va, bits, &cpumask); #else pmap_update_pg(va); #endif @@ -964,8 +1217,8 @@ pmap_bootstrap(kva_start) kpm->pm_obj.uo_npages = 0; kpm->pm_obj.uo_refs = 1; bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ - kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); - kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3; + kpm->pm_pdir = (vaddr_t)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); + kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = atop(kva_start - VM_MIN_KERNEL_ADDRESS); @@ -1008,27 +1261,27 @@ pmap_bootstrap(kva_start) * as well; we could waste less space if we knew the largest * CPU ID beforehand. */ - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; + pmap_csrcp = (caddr_t) virtual_avail; csrc_pte = pte; - cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; + pmap_cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; - zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; + pmap_zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; - ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; + pmap_ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; virtual_avail += PAGE_SIZE * I386_MAXPROCS * NPTECL; pte += I386_MAXPROCS * NPTECL; #else - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ + pmap_csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ virtual_avail += PAGE_SIZE; pte++; /* advance */ - cdstp = (caddr_t) virtual_avail; cdst_pte = pte; + pmap_cdstp = (caddr_t) virtual_avail; cdst_pte = pte; virtual_avail += PAGE_SIZE; pte++; - zerop = (caddr_t) virtual_avail; zero_pte = pte; + pmap_zerop = (caddr_t) virtual_avail; zero_pte = pte; virtual_avail += PAGE_SIZE; pte++; - ptpp = (caddr_t) virtual_avail; ptp_pte = pte; + pmap_ptpp = (caddr_t) virtual_avail; ptp_pte = pte; virtual_avail += PAGE_SIZE; pte++; #endif @@ -1085,7 +1338,7 @@ pmap_bootstrap(kva_start) * initialize the pmap pool. */ - pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", + pool_init(&pmap_pmap_pool, sizeof(struct pmap), 32, 0, 0, "pmappl", &pool_allocator_nointr); /* @@ -1135,6 +1388,14 @@ pmap_bootstrap(kva_start) } #endif +#if defined(MULTIPROCESSOR) + /* install the page after boot args as PT page for first 4M */ + pmap_enter(pmap_kernel(), (u_long)vtopte(0), + round_page((vaddr_t)(bootargv + bootargc)), + VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); + memset(vtopte(0), 0, NBPG); /* make sure it is clean before using */ +#endif + /* * ensure the TLB is sync'd with reality by flushing it... */ @@ -1442,9 +1703,8 @@ steal_one: */ boolean_t -pmap_try_steal_pv(pvh, cpv, prevpv) - struct pv_head *pvh; - struct pv_entry *cpv, *prevpv; +pmap_try_steal_pv_86(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv) { pt_entry_t *ptep, opte; #ifdef MULTIPROCESSOR @@ -1464,7 +1724,7 @@ pmap_try_steal_pv(pvh, cpv, prevpv) * mapping from the pmap. */ - ptep = pmap_tmpmap_pvepte(cpv); + ptep = pmap_tmpmap_pvepte_86(cpv); if (*ptep & PG_W) { ptep = NULL; /* wired page, avoid stealing this one */ } else { @@ -1477,7 +1737,7 @@ pmap_try_steal_pv(pvh, cpv, prevpv) if (pmap_is_curpmap(cpv->pv_pmap)) pmap_update_pg(cpv->pv_va); #endif - pmap_tmpunmap_pvepte(cpv); + pmap_tmpunmap_pvepte_86(cpv); } if (ptep == NULL) { simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); @@ -1772,7 +2032,7 @@ pmap_remove_pv(pvh, pmap, va) */ struct vm_page * -pmap_alloc_ptp(pmap, pde_index, just_try) +pmap_alloc_ptp_86(pmap, pde_index, just_try) struct pmap *pmap; int pde_index; boolean_t just_try; @@ -1784,7 +2044,7 @@ pmap_alloc_ptp(pmap, pde_index, just_try) if (ptp == NULL) { if (just_try) return(NULL); - ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index)); + ptp = pmap_steal_ptp_86(&pmap->pm_obj, ptp_i2o(pde_index)); if (ptp == NULL) { return (NULL); } @@ -1795,8 +2055,8 @@ pmap_alloc_ptp(pmap, pde_index, just_try) /* got one! */ ptp->flags &= ~PG_BUSY; /* never busy */ ptp->wire_count = 1; /* no mappings yet */ - pmap->pm_pdir[pde_index] = - (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + PDE(pmap, pde_index) = + (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; return(ptp); @@ -1814,9 +2074,7 @@ pmap_alloc_ptp(pmap, pde_index, just_try) */ struct vm_page * -pmap_steal_ptp(obj, offset) - struct uvm_object *obj; - vaddr_t offset; +pmap_steal_ptp_86(struct uvm_object *obj, vaddr_t offset) { struct vm_page *ptp = NULL; struct pmap *firstpmap; @@ -1850,21 +2108,21 @@ pmap_steal_ptp(obj, offset) idx = ptp_o2i(ptp->offset); #ifdef DIAGNOSTIC if (VM_PAGE_TO_PHYS(ptp) != - (pmaps_hand->pm_pdir[idx] & PG_FRAME)) + (PDE(pmaps_hand, idx) & PG_FRAME)) panic("pmap_steal_ptp: PTP mismatch!"); #endif ptes = (pt_entry_t *) - pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp)); + pmap_tmpmap_pa_86(VM_PAGE_TO_PHYS(ptp)); for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) if ((ptes[lcv] & (PG_V|PG_W)) == (PG_V|PG_W)) break; if (lcv == PTES_PER_PTP) - pmap_remove_ptes(pmaps_hand, ptp, + pmap_remove_ptes_86(pmaps_hand, ptp, (vaddr_t)ptes, ptp_i2v(idx), ptp_i2v(idx+1), &cpumask); - pmap_tmpunmap_pa(); + pmap_tmpunmap_pa_86(); if (lcv != PTES_PER_PTP) /* wired, try next PTP */ @@ -1874,7 +2132,7 @@ pmap_steal_ptp(obj, offset) * got it!!! */ - pmaps_hand->pm_pdir[idx] = 0; /* zap! */ + PDE(pmaps_hand, idx) = 0; /* zap! */ pmaps_hand->pm_stats.resident_count--; #ifdef MULTIPROCESSOR pmap_apte_flush(pmaps_hand); @@ -1918,18 +2176,15 @@ pmap_steal_ptp(obj, offset) */ struct vm_page * -pmap_get_ptp(pmap, pde_index, just_try) - struct pmap *pmap; - int pde_index; - boolean_t just_try; +pmap_get_ptp_86(struct pmap *pmap, int pde_index, boolean_t just_try) { struct vm_page *ptp; - if (pmap_valid_entry(pmap->pm_pdir[pde_index])) { + if (pmap_valid_entry(PDE(pmap, pde_index))) { /* valid... check hint (saves us a PA->PG lookup) */ if (pmap->pm_ptphint && - (pmap->pm_pdir[pde_index] & PG_FRAME) == + (PDE(pmap, pde_index) & PG_FRAME) == VM_PAGE_TO_PHYS(pmap->pm_ptphint)) return(pmap->pm_ptphint); @@ -1943,7 +2198,7 @@ pmap_get_ptp(pmap, pde_index, just_try) } /* allocate a new PTP (updates ptphint) */ - return(pmap_alloc_ptp(pmap, pde_index, just_try)); + return(pmap_alloc_ptp_86(pmap, pde_index, just_try)); } /* @@ -1963,18 +2218,7 @@ pmap_create() struct pmap *pmap; pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); - pmap_pinit(pmap); - return(pmap); -} -/* - * pmap_pinit: given a zero'd pmap structure, init it. - */ - -void -pmap_pinit(pmap) - struct pmap *pmap; -{ /* init uvm_object */ simple_lock_init(&pmap->pm_obj.vmobjlock); pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ @@ -1987,26 +2231,37 @@ pmap_pinit(pmap) pmap->pm_hiexec = 0; pmap->pm_flags = 0; + /* init the LDT */ + pmap->pm_ldt = NULL; + pmap->pm_ldt_len = 0; + pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1); + pmap_pinit_pd(pmap); + return(pmap); +} + +/* + * pmap_pinit: given a zero'd pmap structure, init it. + */ + +void +pmap_pinit_pd_86(struct pmap *pmap) +{ /* allocate PDP */ - pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG); + pmap->pm_pdir = uvm_km_alloc(kernel_map, NBPG); if (pmap->pm_pdir == NULL) panic("pmap_pinit: kernel_map out of virtual space!"); - (void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, - (paddr_t *)&pmap->pm_pdirpa); + pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, &pmap->pm_pdirpa); + pmap->pm_pdirsize = NBPG; /* init PDP */ /* zero init area */ - bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); /* put in recursive PDE to map the PTEs */ - pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW; - - /* init the LDT */ - pmap->pm_ldt = NULL; - pmap->pm_ldt_len = 0; - pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + PDE(pmap, PDSLOT_PTE) = pmap->pm_pdirpa | PG_V | PG_KW; /* * we need to lock pmaps_lock to prevent nkpde from changing on @@ -2016,10 +2271,10 @@ pmap_pinit(pmap) */ simple_lock(&pmaps_lock); /* put in kernel VM PDEs */ - bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN], + bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN), nkpde * sizeof(pd_entry_t)); /* zero the rest */ - bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde], + bzero(&PDE(pmap, PDSLOT_KERN + nkpde), NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); LIST_INSERT_HEAD(&pmaps, pmap, pm_list); simple_unlock(&pmaps_lock); @@ -2043,9 +2298,8 @@ pmap_destroy(pmap) simple_lock(&pmap->pm_obj.vmobjlock); refs = --pmap->pm_obj.uo_refs; simple_unlock(&pmap->pm_obj.vmobjlock); - if (refs > 0) { + if (refs > 0) return; - } /* * reference count is zero, free pmap resources and then free pmap. @@ -2100,7 +2354,7 @@ pmap_release(pmap) * MULTIPROCESSOR -- no need to flush out of other processors' * APTE space because we do that in pmap_unmap_ptes(). */ - uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG); + uvm_km_free(kernel_map, pmap->pm_pdir, pmap->pm_pdirsize); #ifdef USER_LDT if (pmap->pm_flags & PMF_USER_LDT) { @@ -2286,18 +2540,15 @@ pmap_deactivate(p) */ boolean_t -pmap_extract(pmap, va, pap) - struct pmap *pmap; - vaddr_t va; - paddr_t *pap; +pmap_extract_86(struct pmap *pmap, vaddr_t va, paddr_t *pap) { paddr_t retval; pt_entry_t *ptes; - if (pmap->pm_pdir[pdei(va)]) { - ptes = pmap_map_ptes(pmap); + if (PDE(pmap, pdei(va))) { + ptes = pmap_map_ptes_86(pmap); retval = (paddr_t)(ptes[atop(va)] & PG_FRAME); - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); if (pap != NULL) *pap = retval | (va & ~PG_FRAME); return (TRUE); @@ -2335,13 +2586,13 @@ pmap_zero_page(struct vm_page *pg) * initialized. */ void -pmap_zero_phys(paddr_t pa) +pmap_zero_phys_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *zpte = PTESLEW(zero_pte, id); - caddr_t zerova = VASLEW(zerop, id); + caddr_t zerova = VASLEW(pmap_zerop, id); #ifdef DIAGNOSTIC if (*zpte) @@ -2359,14 +2610,13 @@ pmap_zero_phys(paddr_t pa) */ boolean_t -pmap_zero_page_uncached(pa) - paddr_t pa; +pmap_zero_page_uncached_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *zpte = PTESLEW(zero_pte, id); - caddr_t zerova = VASLEW(zerop, id); + caddr_t zerova = VASLEW(pmap_zerop, id); #ifdef DIAGNOSTIC if (*zpte) @@ -2387,7 +2637,7 @@ pmap_zero_page_uncached(pa) */ void -pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) +pmap_copy_page_86(struct vm_page *srcpg, struct vm_page *dstpg) { paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); @@ -2396,8 +2646,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) #endif pt_entry_t *spte = PTESLEW(csrc_pte,id); pt_entry_t *dpte = PTESLEW(cdst_pte,id); - caddr_t csrcva = VASLEW(csrcp, id); - caddr_t cdstva = VASLEW(cdstp, id); + caddr_t csrcva = VASLEW(pmap_csrcp, id); + caddr_t cdstva = VASLEW(pmap_cdstp, id); #ifdef DIAGNOSTIC if (*spte || *dpte) @@ -2431,12 +2681,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) */ void -pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) - struct pmap *pmap; - struct vm_page *ptp; - vaddr_t ptpva; - vaddr_t startva, endva; - int32_t *cpumaskp; +pmap_remove_ptes_86(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, + vaddr_t startva, vaddr_t endva, int32_t *cpumaskp) { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ struct pv_entry *pve; @@ -2494,7 +2740,7 @@ pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) #ifdef DIAGNOSTIC if (bank == -1) panic("pmap_remove_ptes: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", + "PG_PVLIST, va = 0x%lx, pa = 0x%llx", startva, (u_long)(opte & PG_FRAME)); #endif @@ -2528,7 +2774,7 @@ pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) */ boolean_t -pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) +pmap_remove_pte_86(pmap, ptp, pte, va, cpumaskp) struct pmap *pmap; struct vm_page *ptp; pt_entry_t *pte; @@ -2579,7 +2825,7 @@ pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) #ifdef DIAGNOSTIC if (bank == -1) panic("pmap_remove_pte: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, + "PG_PVLIST, va = 0x%lx, pa = 0x%llx", va, (u_long)(opte & PG_FRAME)); #endif @@ -2601,9 +2847,7 @@ pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) */ void -pmap_remove(pmap, sva, eva) - struct pmap *pmap; - vaddr_t sva, eva; +pmap_remove_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva) { pt_entry_t *ptes, opte; boolean_t result; @@ -2617,7 +2861,7 @@ pmap_remove(pmap, sva, eva) */ PMAP_MAP_TO_HEAD_LOCK(); - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* * removing one page? take shortcut function. @@ -2625,10 +2869,10 @@ pmap_remove(pmap, sva, eva) if (sva + PAGE_SIZE == eva) { - if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) { + if (pmap_valid_entry(PDE(pmap, pdei(sva)))) { /* PA of the PTP */ - ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME; + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; /* get PTP if non-kernel mapping */ @@ -2651,7 +2895,7 @@ pmap_remove(pmap, sva, eva) } /* do it! */ - result = pmap_remove_pte(pmap, ptp, + result = pmap_remove_pte_86(pmap, ptp, &ptes[atop(sva)], sva, &cpumask); /* @@ -2662,7 +2906,7 @@ pmap_remove(pmap, sva, eva) if (result && ptp && ptp->wire_count <= 1) { /* zap! */ opte = i386_atomic_testset_ul( - &pmap->pm_pdir[pdei(sva)], 0); + &PDE(pmap, pdei(sva)), 0); #ifdef MULTIPROCESSOR /* * XXXthorpej Redundant shootdown can happen @@ -2693,7 +2937,7 @@ pmap_remove(pmap, sva, eva) } } pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); /* unlock pmap */ + pmap_unmap_ptes_86(pmap); /* unlock pmap */ PMAP_MAP_TO_HEAD_UNLOCK(); return; } @@ -2723,12 +2967,12 @@ pmap_remove(pmap, sva, eva) /* XXXCDC: ugly hack to avoid freeing PDP here */ continue; - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) /* valid block? */ continue; /* PA of the PTP */ - ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME); + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; /* get PTP if non-kernel mapping */ if (pmap == pmap_kernel()) { @@ -2747,14 +2991,14 @@ pmap_remove(pmap, sva, eva) #endif } } - pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(sva)], + pmap_remove_ptes_86(pmap, ptp, (vaddr_t)&ptes[atop(sva)], sva, blkendva, &cpumask); /* if PTP is no longer being used, free it! */ if (ptp && ptp->wire_count <= 1) { /* zap! */ opte = i386_atomic_testset_ul( - &pmap->pm_pdir[pdei(sva)], 0); + &PDE(pmap, pdei(sva)), 0); #if defined(MULTIPROCESSOR) /* * XXXthorpej Redundant shootdown can happen here @@ -2783,7 +3027,7 @@ pmap_remove(pmap, sva, eva) } pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); } @@ -2795,8 +3039,7 @@ pmap_remove(pmap, sva, eva) */ void -pmap_page_remove(pg) - struct vm_page *pg; +pmap_page_remove_86(struct vm_page *pg) { int bank, off; struct pv_head *pvh; @@ -2823,20 +3066,19 @@ pmap_page_remove(pg) simple_lock(&pvh->pvh_lock); for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pve->pv_pmap); /* locks pmap */ #ifdef DIAGNOSTIC - if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) { + if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) printf("pmap_page_remove: found pager VA on pv_list\n"); - } - if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & - PG_FRAME) - != VM_PAGE_TO_PHYS(pve->pv_ptp)) { + if (pve->pv_ptp && (PDE(pve->pv_pmap, + pdei(pve->pv_va)) & PG_FRAME) != + VM_PAGE_TO_PHYS(pve->pv_ptp)) { printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", pg, pve->pv_va, pve->pv_ptp); printf("pmap_page_remove: PTP's phys addr: " "actual=%x, recorded=%lx\n", - (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & + (PDE(pve->pv_pmap, pdei(pve->pv_va)) & PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); panic("pmap_page_remove: mapped managed page has " "invalid pv_ptp field"); @@ -2872,8 +3114,7 @@ pmap_page_remove(pg) /* zap! */ opte = i386_atomic_testset_ul( - &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)], - 0); + &PDE(pve->pv_pmap, pdei(pve->pv_va)), 0); pmap_tlb_shootdown(curpcb->pcb_pmap, ((vaddr_t)ptes) + pve->pv_ptp->offset, opte, &cpumask); @@ -2895,7 +3136,7 @@ pmap_page_remove(pg) uvm_pagefree(pve->pv_ptp); } } - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pve->pv_pmap); /* unlocks pmap */ } pmap_free_pvs(NULL, pvh->pvh_list); pvh->pvh_list = NULL; @@ -2918,9 +3159,7 @@ pmap_page_remove(pg) */ boolean_t -pmap_test_attrs(pg, testbits) - struct vm_page *pg; - int testbits; +pmap_test_attrs_86(struct vm_page *pg, int testbits) { int bank, off; char *myattrs; @@ -2957,9 +3196,9 @@ pmap_test_attrs(pg, testbits) for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); + ptes = pmap_map_ptes_86(pve->pv_pmap); pte = ptes[atop(pve->pv_va)]; - pmap_unmap_ptes(pve->pv_pmap); + pmap_unmap_ptes_86(pve->pv_pmap); *myattrs |= pte; } @@ -2981,9 +3220,7 @@ pmap_test_attrs(pg, testbits) */ boolean_t -pmap_change_attrs(pg, setbits, clearbits) - struct vm_page *pg; - int setbits, clearbits; +pmap_change_attrs_86(struct vm_page *pg, int setbits, int clearbits) { u_int32_t result; int bank, off; @@ -3011,12 +3248,12 @@ pmap_change_attrs(pg, setbits, clearbits) for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) { #ifdef DIAGNOSTIC - if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])) + if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va)))) panic("pmap_change_attrs: mapping without PTP " "detected"); #endif - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pve->pv_pmap); /* locks pmap */ npte = ptes[atop(pve->pv_va)]; result |= (npte & clearbits); npte = (npte | setbits) & ~clearbits; @@ -3026,7 +3263,7 @@ pmap_change_attrs(pg, setbits, clearbits) pmap_tlb_shootdown(pve->pv_pmap, atop(pve->pv_va), opte, &cpumask); } - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pve->pv_pmap); /* unlocks pmap */ } simple_unlock(&pvh->pvh_lock); @@ -3062,17 +3299,15 @@ pmap_change_attrs(pg, setbits, clearbits) */ void -pmap_write_protect(pmap, sva, eva, prot) - struct pmap *pmap; - vaddr_t sva, eva; - vm_prot_t prot; +pmap_write_protect_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva, + vm_prot_t prot) { pt_entry_t *ptes, *spte, *epte, npte; vaddr_t blockend; u_int32_t md_prot; int32_t cpumask = 0; - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* should be ok, but just in case ... */ sva &= PG_FRAME; @@ -3098,7 +3333,7 @@ pmap_write_protect(pmap, sva, eva, prot) continue; /* empty block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) continue; md_prot = protection_codes[prot]; @@ -3127,7 +3362,7 @@ pmap_write_protect(pmap, sva, eva, prot) } pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pmap); /* unlocks pmap */ } /* @@ -3141,14 +3376,12 @@ pmap_write_protect(pmap, sva, eva, prot) */ void -pmap_unwire(pmap, va) - struct pmap *pmap; - vaddr_t va; +pmap_unwire_86(struct pmap *pmap, vaddr_t va) { pt_entry_t *ptes; - if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { - ptes = pmap_map_ptes(pmap); /* locks pmap */ + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ #ifdef DIAGNOSTIC if (!pmap_valid_entry(ptes[atop(va)])) @@ -3164,7 +3397,7 @@ pmap_unwire(pmap, va) "didn't change!\n", pmap, va); } #endif - pmap_unmap_ptes(pmap); /* unlocks map */ + pmap_unmap_ptes_86(pmap); /* unlocks map */ } #ifdef DIAGNOSTIC else { @@ -3211,12 +3444,8 @@ pmap_collect(pmap) */ int -pmap_enter(pmap, va, pa, prot, flags) - struct pmap *pmap; - vaddr_t va; - paddr_t pa; - vm_prot_t prot; - int flags; +pmap_enter_86(struct pmap *pmap, vaddr_t va, paddr_t pa, + vm_prot_t prot, int flags) { pt_entry_t *ptes, opte, npte; struct vm_page *ptp; @@ -3235,7 +3464,7 @@ pmap_enter(pmap, va, pa, prot, flags) /* sanity check: kernel PTPs should already have been pre-allocated */ if (va >= VM_MIN_KERNEL_ADDRESS && - !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + !pmap_valid_entry(PDE(pmap, pdei(va)))) panic("pmap_enter: missing kernel PTP!"); #endif @@ -3246,11 +3475,11 @@ pmap_enter(pmap, va, pa, prot, flags) * map in ptes and get a pointer to our PTP (unless we are the kernel) */ - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ if (pmap == pmap_kernel()) { ptp = NULL; } else { - ptp = pmap_get_ptp(pmap, pdei(va), FALSE); + ptp = pmap_get_ptp_86(pmap, pdei(va), FALSE); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { error = ENOMEM; @@ -3292,7 +3521,7 @@ pmap_enter(pmap, va, pa, prot, flags) if (bank == -1) panic("pmap_enter: same pa PG_PVLIST " "mapping with unmanaged page " - "pa = 0x%lx (0x%lx)", pa, + "pa = 0x%llx (0x%lx)", pa, atop(pa)); #endif pvh = &vm_physmem[bank].pmseg.pvhead[off]; @@ -3320,7 +3549,7 @@ pmap_enter(pmap, va, pa, prot, flags) if (bank == -1) panic("pmap_enter: PG_PVLIST mapping with " "unmanaged page " - "pa = 0x%lx (0x%lx)", pa, atop(pa)); + "pa = 0x%llx (0x%lx)", pa, atop(pa)); #endif pvh = &vm_physmem[bank].pmseg.pvhead[off]; simple_lock(&pvh->pvh_lock); @@ -3406,7 +3635,7 @@ enter_now: error = 0; out: - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); return error; @@ -3420,8 +3649,7 @@ out: */ vaddr_t -pmap_growkernel(maxkvaddr) - vaddr_t maxkvaddr; +pmap_growkernel_86(vaddr_t maxkvaddr) { struct pmap *kpm = pmap_kernel(), *pm; int needed_kpde; /* needed number of kernel PTPs */ @@ -3452,10 +3680,9 @@ pmap_growkernel(maxkvaddr) if (uvm_page_physget(&ptaddr) == FALSE) panic("pmap_growkernel: out of memory"); - pmap_zero_phys(ptaddr); + pmap_zero_phys_86(ptaddr); - kpm->pm_pdir[PDSLOT_KERN + nkpde] = - ptaddr | PG_RW | PG_V; + PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V; /* count PTP as resident */ kpm->pm_stats.resident_count++; @@ -3468,18 +3695,18 @@ pmap_growkernel(maxkvaddr) * INVOKED WHILE pmap_init() IS RUNNING! */ - if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) { + if (pmap_alloc_ptp_86(kpm, PDSLOT_KERN+nkpde, FALSE) == NULL) { panic("pmap_growkernel: alloc ptp failed"); } /* PG_u not for kernel */ - kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u; + PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u; /* distribute new kernel PTP to all active pmaps */ simple_lock(&pmaps_lock); LIST_FOREACH(pm, &pmaps, pm_list) { - pm->pm_pdir[PDSLOT_KERN + nkpde] = - kpm->pm_pdir[PDSLOT_KERN + nkpde]; + PDE(pm, PDSLOT_KERN + nkpde) = + PDE(kpm, PDSLOT_KERN + nkpde); } simple_unlock(&pmaps_lock); } @@ -3492,7 +3719,7 @@ out: } #ifdef DEBUG -void pmap_dump(struct pmap *, vaddr_t, vaddr_t); +void pmap_dump_86(struct pmap *, vaddr_t, vaddr_t); /* * pmap_dump: dump all the mappings from a pmap @@ -3501,7 +3728,7 @@ void pmap_dump(struct pmap *, vaddr_t, vaddr_t); */ void -pmap_dump(pmap, sva, eva) +pmap_dump_86(pmap, sva, eva) struct pmap *pmap; vaddr_t sva, eva; { @@ -3521,7 +3748,7 @@ pmap_dump(pmap, sva, eva) */ PMAP_MAP_TO_HEAD_LOCK(); - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* * dumping a range of pages: we dump in PTP sized blocks (4MB) @@ -3535,7 +3762,7 @@ pmap_dump(pmap, sva, eva) blkendva = eva; /* valid block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) continue; pte = &ptes[atop(sva)]; @@ -3546,7 +3773,7 @@ pmap_dump(pmap, sva, eva) sva, *pte, *pte & PG_FRAME); } } - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); } #endif @@ -3615,7 +3842,7 @@ void pmap_tlb_shootdown(pmap, va, pte, cpumaskp) pmap_t pmap; vaddr_t va; - pt_entry_t pte; + u_int32_t pte; int32_t *cpumaskp; { struct cpu_info *ci, *self; @@ -3839,3 +4066,31 @@ pmap_tlb_shootdown_job_put(pq, pj) pq->pq_count--; } + +#ifndef SMALL_KERNEL +u_int32_t (*pmap_pte_set_p)(vaddr_t, paddr_t, u_int32_t) = + pmap_pte_set_86; +u_int32_t (*pmap_pte_setbits_p)(vaddr_t, u_int32_t, u_int32_t) = + pmap_pte_setbits_86; +u_int32_t (*pmap_pte_bits_p)(vaddr_t) = pmap_pte_bits_86; +paddr_t (*pmap_pte_paddr_p)(vaddr_t) = pmap_pte_paddr_86; +boolean_t (*pmap_change_attrs_p)(struct vm_page *, int, int) = + pmap_change_attrs_86; +int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int) = + pmap_enter_86; +boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *) = pmap_extract_86; +vaddr_t (*pmap_growkernel_p)(vaddr_t) = pmap_growkernel_86; +void (*pmap_page_remove_p)(struct vm_page *) = pmap_page_remove_86; +void (*pmap_remove_p)(struct pmap *, vaddr_t, vaddr_t) = pmap_remove_86; +boolean_t (*pmap_test_attrs_p)(struct vm_page *, int) = pmap_test_attrs_86; +void (*pmap_unwire_p)(struct pmap *, vaddr_t) = pmap_unwire_86; +void (*pmap_write_protect_p)(struct pmap *, vaddr_t, vaddr_t, vm_prot_t) = + pmap_write_protect_86; +void (*pmap_pinit_pd_p)(pmap_t) = pmap_pinit_pd_86; +void (*pmap_zero_phys_p)(paddr_t) = pmap_zero_phys_86; +boolean_t (*pmap_zero_page_uncached_p)(paddr_t) = pmap_zero_page_uncached_86; +void (*pmap_copy_page_p)(struct vm_page *, struct vm_page *) = + pmap_copy_page_86; +boolean_t (*pmap_try_steal_pv_p)(struct pv_head *, struct pv_entry *, + struct pv_entry *) = pmap_try_steal_pv_86; +#endif /* !SMALL_KERNEL */ diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c new file mode 100644 index 00000000000..a121fd9d4d3 --- /dev/null +++ b/sys/arch/i386/i386/pmapae.c @@ -0,0 +1,2420 @@ +/* $OpenBSD: pmapae.c,v 1.1 2006/04/27 15:37:51 mickey Exp $ */ + +/* + * Copyright (c) 2006 Michael Shalayeff + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from OpenBSD: pmap.c,v 1.85 2005/11/18 17:05:04 brad Exp + */ +/* + * pmap.c: i386 pmap module rewrite + * Chuck Cranor <chuck@ccrc.wustl.edu> + * 11-Aug-97 + * + * history of this pmap module: in addition to my own input, i used + * the following references for this rewrite of the i386 pmap: + * + * [1] the NetBSD i386 pmap. this pmap appears to be based on the + * BSD hp300 pmap done by Mike Hibler at University of Utah. + * it was then ported to the i386 by William Jolitz of UUNET + * Technologies, Inc. Then Charles M. Hannum of the NetBSD + * project fixed some bugs and provided some speed ups. + * + * [2] the FreeBSD i386 pmap. this pmap seems to be the + * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson + * and David Greenman. + * + * [3] the Mach pmap. this pmap, from CMU, seems to have migrated + * between several processors. the VAX version was done by + * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 + * version was done by Lance Berc, Mike Kupfer, Bob Baron, + * David Golub, and Richard Draves. the alpha version was + * done by Alessandro Forin (CMU/Mach) and Chris Demetriou + * (NetBSD/alpha). + */ +/* + * PAE support + * Michael Shalayeff <mickey@lucifier.net> + * + * This module implements PAE mode for i386. + * + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/pool.h> +#include <sys/user.h> +#include <sys/kernel.h> +#include <sys/mutex.h> + +#include <uvm/uvm.h> + +#include <machine/atomic.h> +#include <machine/cpu.h> +#include <machine/specialreg.h> +#include <machine/gdt.h> + +#include <dev/isa/isareg.h> +#ifdef __NetBSD__ +#include <machine/isa_machdep.h> +#endif +#ifdef __OpenBSD__ +#include <sys/msgbuf.h> +#include <stand/boot/bootarg.h> +#endif + +/* + * this file contains the code for the "pmap module." the module's + * job is to manage the hardware's virtual to physical address mappings. + * note that there are two levels of mapping in the VM system: + * + * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's + * to map ranges of virtual address space to objects/files. for + * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only + * to the file /bin/ls starting at offset zero." note that + * the upper layer mapping is not concerned with how individual + * vm_pages are mapped. + * + * [2] the lower layer of the VM system (the pmap) maintains the mappings + * from virtual addresses. it is concerned with which vm_page is + * mapped where. for example, when you run /bin/ls and start + * at page 0x1000 the fault routine may lookup the correct page + * of the /bin/ls file and then ask the pmap layer to establish + * a mapping for it. + * + * note that information in the lower layer of the VM system can be + * thrown away since it can easily be reconstructed from the info + * in the upper layer. + * + * data structures we use include: + * + * - struct pmap: describes the address space of one thread + * - struct pv_entry: describes one <PMAP,VA> mapping of a PA + * - struct pv_head: there is one pv_head per managed page of + * physical memory. the pv_head points to a list of pv_entry + * structures which describe all the <PMAP,VA> pairs that this + * page is mapped in. this is critical for page based operations + * such as pmap_page_protect() [change protection on _all_ mappings + * of a page] + * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's. + * if we run out of pv_entry's we allocate a new pv_page and free + * its pv_entrys. + */ +/* + * i386 PAE hardware Page Tables structure: + * + * the i386 PAE Page Table is a three-level PT which maps 4GB of VA. + * the pagesize is 4K (4096 [0x1000] bytes) or 2MB. + * + * the first level table is called "page directory index" and consists + * of 4 page directory index entries (PDIE) each 64 bits in size. + * + * the second level table is called a "page directory" and it contains + * 512 page directory entries (PDEs). each PDE is + * 8 bytes (a long long), so a PD fits in a single 4K page. this page is + * the page directory page (PDP). each PDE in a PDP maps 1GB of space + * (512 * 2MB = 1GB). a PDE contains the physical address of the + * second level table: the page table. or, if 2MB pages are being used, + * then the PDE contains the PA of the 2MB page being mapped. + * + * a page table consists of 512 page table entries (PTEs). each PTE is + * 8 bytes (a long long), so a page table also fits in a single 4K page. + * a 4K page being used as a page table is called a page table page (PTP). + * each PTE in a PTP maps one 4K page (512 * 4K = 2MB). a PTE contains + * the physical address of the page it maps and some flag bits (described + * below). + * + * the processor has a special register, "cr3", which points to the + * the PDP which is currently controlling the mappings of the virtual + * address space. + * + * the following picture shows the translation process for a 4K page: + * + * %cr3 register [PA of PDPT] + * | + * | bits <31-30> of VA + * | index the DPE (0-3) + * | | + * v v + * +-----------+ + * | PDP Ptr | + * | 4 entries | + * +-----------+ + * | + * PA of PDP + * | + * | + * | bits <29-21> of VA bits <20-12> of VA bits <11-0> + * | index the PDP (0 - 512) index the PTP are the page offset + * | | | | + * | v | | + * +-->+---------+ | | + * | PD Page | PA of v | + * | |-----PTP----->+------------+ | + * | 512 PDE | | page table |--PTE--+ | + * | entries | | (aka PTP) | | | + * +---------+ | 512 PTE | | | + * | entries | | | + * +------------+ | | + * | | + * bits <35-12> bits <11-0> + * p h y s i c a l a d d r + * + * the i386 caches PTEs in a TLB. it is important to flush out old + * TLB mappings when making a change to a mappings. writing to the + * %cr3 will flush the entire TLB. newer processors also have an + * instruction that will invalidate the mapping of a single page (which + * is useful if you are changing a single mappings because it preserves + * all the cached TLB entries). + * + * as shows, bits 31-12 of the PTE contain PA of the page being mapped. + * the rest of the PTE is defined as follows: + * bit# name use + * 63 NX no-execute bit (0=ITLB, 1=DTLB), optional + * 11 n/a available for OS use, hardware ignores it + * 10 n/a available for OS use, hardware ignores it + * 9 n/a available for OS use, hardware ignores it + * 8 G global bit (see discussion below) + * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>) + * 6 D dirty (modified) page + * 5 A accessed (referenced) page + * 4 PCD cache disable + * 3 PWT prevent write through (cache) + * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s) + * 1 R/W read/write bit (0=read only, 1=read-write) + * 0 P present (valid) + * + * notes: + * - on the i386 the R/W bit is ignored if processor is in supervisor + * state (bug!) + * - PS is only supported on newer processors + * - PTEs with the G bit are global in the sense that they are not + * flushed from the TLB when %cr3 is written (to flush, use the + * "flush single page" instruction). this is only supported on + * newer processors. this bit can be used to keep the kernel's + * TLB entries around while context switching. since the kernel + * is mapped into all processes at the same place it does not make + * sense to flush these entries when switching from one process' + * pmap to another. + */ +/* + * A pmap describes a process' 4GB virtual address space. This + * virtual address space can be broken up into 2048 2MB regions which + * are described by PDEs in the PDP. The PDEs are defined as follows: + * + * Ranges are inclusive -> exclusive, just like vm_map_entry start/end. + * The following assumes that KERNBASE is 0xd0000000. + * + * PDE#s VA range Usage + * 0->1660 0x0 -> 0xcf800000 user address space, note that the + * max user address is 0xcfbfe000 + * the final two pages in the last 4MB + * used to be reserved for the UAREA + * but now are no longer used. + * 1660 0xcf800000-> recursive mapping of PDP (used for + * 0xd0000000 linear mapping of PTPs). + * 1664->2044 0xd0000000-> kernel address space (constant + * 0xff800000 across all pmaps/processes). + * 2044 0xff800000-> "alternate" recursive PDP mapping + * <end> (for other pmaps). + * + * + * Note: A recursive PDP mapping provides a way to map all the PTEs for + * a 4GB address space into a linear chunk of virtual memory. In other + * words, the PTE for page 0 is the first int mapped into the 2MB recursive + * area. The PTE for page 1 is the second int. The very last int in the + * 2MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB + * address). + * + * All pmaps' PDs must have the same values in slots 1660->2043 so that + * the kernel is always mapped in every process. These values are loaded + * into the PD at pmap creation time. + * + * At any one time only one pmap can be active on a processor. This is + * the pmap whose PDP is pointed to by processor register %cr3. This pmap + * will have all its PTEs mapped into memory at the recursive mapping + * point (slots #1660-3 as show above). When the pmap code wants to find the + * PTE for a virtual address, all it has to do is the following: + * + * Address of PTE = (1660 * 2MB) + (VA / NBPG) * sizeof(pt_entry_t) + * = 0xcf800000 + (VA / 4096) * 8 + * + * What happens if the pmap layer is asked to perform an operation + * on a pmap that is not the one which is currently active? In that + * case we take the PA of the PDP of non-active pmap and put it in + * slots 2044-7 of the active pmap. This causes the non-active pmap's + * PTEs to get mapped in the final 4MB of the 4GB address space + * (e.g. starting at 0xffc00000). + * + * The following figure shows the effects of the recursive PDP mapping: + * + * PDP (%cr3->PDPTP) + * +----+ + * | 0| -> PTP#0 that maps VA 0x0 -> 0x200000 + * | | + * | | + * |1660| -> points back to PDP (%cr3) mapping VA 0xcf800000 -> 0xd0000000 + * |1661| (PDP is 4 pages) + * |1662| + * |1663| + * |1664| -> first kernel PTP (maps 0xd0000000 -> 0xe0200000) + * | | + * |2044| -> points to alternate pmap's PDP (maps 0xff800000 -> end) + * |2045| + * |2046| + * |2047| + * +----+ + * + * Note that the PDE#1660 VA (0xcf8033e0) is defined as "PTE_BASE". + * Note that the PDE#2044 VA (0xff803fe0) is defined as "APTE_BASE". + * + * Starting at VA 0xcf8033e0 the current active PDPs (%cr3) acts as a + * PDPTP and references four consequetly mapped pages: + * + * PTP#1660-3 == PDP(%cr3) => maps VA 0xcf800000 -> 0xd0000000 + * +----+ + * | 0| -> maps the contents of PTP#0 at VA 0xcf800000->0xcf801000 + * | | + * | | + * |1660| -> maps the contents of PTP#1660 (the PDP) at VA 0xcfe7c000 + * |1661| + * |1662| + * |1663| + * |1664| -> maps the contents of first kernel PTP + * | | + * |2047| + * +----+ + * + * Note that mapping of the PDP at PTP#1660's VA (0xcfe7c000) is + * defined as "PDP_BASE".... within that mapping there are two + * defines: + * "PDP_PDE" (0xcfe7f3e0) is the VA of the PDE in the PDP + * which points back to itself. + * "APDP_PDE" (0xfff02fe0) is the VA of the PDE in the PDP which + * establishes the recursive mapping of the alternate pmap. + * To set the alternate PDP, one just has to put the correct + * PA info in *APDP_PDE. + * + * Note that in the APTE_BASE space, the APDP appears at VA + * "APDP_BASE" (0xffffc000). + * + * unfortunately we cannot use recursive PDPT from the page tables + * because in their infinite wisdom they have defined cr3 32 bits! + * + */ +/* + * memory allocation + * + * - there are three data structures that we must dynamically allocate: + * + * [A] new process' page directory page (PDP) + * - plan 1: done at pmap_create() we use + * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this + * allocation. + * + * if we are low in free physical memory then we sleep in + * uvm_km_alloc -- in this case this is ok since we are creating + * a new pmap and should not be holding any locks. + * + * if the kernel is totally out of virtual space + * (i.e. uvm_km_alloc returns NULL), then we panic. + * + * XXX: the fork code currently has no way to return an "out of + * memory, try again" error code since uvm_fork [fka vm_fork] + * is a void function. + * + * [B] new page tables pages (PTP) + * call uvm_pagealloc() + * => success: zero page, add to pm_pdir + * => failure: we are out of free vm_pages, let pmap_enter() + * tell UVM about it. + * + * note: for kernel PTPs, we start with NKPTP of them. as we map + * kernel memory (at uvm_map time) we check to see if we've grown + * the kernel pmap. if so, we call the optional function + * pmap_growkernel() to grow the kernel PTPs in advance. + * + * [C] pv_entry structures + * - plan 1: try to allocate one off the free list + * => success: done! + * => failure: no more free pv_entrys on the list + * - plan 2: try to allocate a new pv_page to add a chunk of + * pv_entrys to the free list + * [a] obtain a free, unmapped, VA in kmem_map. either + * we have one saved from a previous call, or we allocate + * one now using a "vm_map_lock_try" in uvm_map + * => success: we have an unmapped VA, continue to [b] + * => failure: unable to lock kmem_map or out of VA in it. + * move on to plan 3. + * [b] allocate a page in kmem_object for the VA + * => success: map it in, free the pv_entry's, DONE! + * => failure: kmem_object locked, no free vm_pages, etc. + * save VA for later call to [a], go to plan 3. + * If we fail, we simply let pmap_enter() tell UVM about it. + */ +/* + * locking + * + * we have the following locks that we must contend with: + * + * "normal" locks: + * + * - pmap_main_lock + * this lock is used to prevent deadlock and/or provide mutex + * access to the pmap system. most operations lock the pmap + * structure first, then they lock the pv_lists (if needed). + * however, some operations such as pmap_page_protect lock + * the pv_lists and then lock pmaps. in order to prevent a + * cycle, we require a mutex lock when locking the pv_lists + * first. thus, the "pmap = >pv_list" lockers must gain a + * read-lock on pmap_main_lock before locking the pmap. and + * the "pv_list => pmap" lockers must gain a write-lock on + * pmap_main_lock before locking. since only one thread + * can write-lock a lock at a time, this provides mutex. + * + * "simple" locks: + * + * - pmap lock (per pmap, part of uvm_object) + * this lock protects the fields in the pmap structure including + * the non-kernel PDEs in the PDP, and the PTEs. it also locks + * in the alternate PTE space (since that is determined by the + * entry in the PDP). + * + * - pvh_lock (per pv_head) + * this lock protects the pv_entry list which is chained off the + * pv_head structure for a specific managed PA. it is locked + * when traversing the list (e.g. adding/removing mappings, + * syncing R/M bits, etc.) + * + * - pvalloc_lock + * this lock protects the data structures which are used to manage + * the free list of pv_entry structures. + * + * - pmaps_lock + * this lock protects the list of active pmaps (headed by "pmaps"). + * we lock it when adding or removing pmaps from this list. + * + */ + +/* + * locking data structures + */ + +struct simplelock pvalloc_lock; +struct simplelock pmaps_lock; + +#if defined(MULTIPROCESSOR) && 0 + +extern struct lock pmap_main_lock; + +#define PMAP_MAP_TO_HEAD_LOCK() \ + spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0) +#define PMAP_MAP_TO_HEAD_UNLOCK() \ + spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) + +#define PMAP_HEAD_TO_MAP_LOCK() \ + spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *) 0) +#define PMAP_HEAD_TO_MAP_UNLOCK() \ + spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) + +#else + +#define PMAP_MAP_TO_HEAD_LOCK() /* null */ +#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ + +#define PMAP_HEAD_TO_MAP_LOCK() /* null */ +#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ + +#endif + +#define PG_FRAME 0xffffff000ULL /* page frame mask */ +#define PG_LGFRAME 0xfffe00000ULL /* large (2M) page frame mask */ + +/* + * Redefine the PDSHIFT, NBPD + */ +#undef PDSHIFT +#define PD_MASK 0xffe00000 /* page directory address bits */ +#define PDSHIFT 21 /* page directory address shift */ +#define PT_MASK 0x001ff000 /* page table address bits */ +#undef NBPD +#define NBPD (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */ + +/* + * + */ +#undef PDSLOT_PTE +#define PDSLOT_PTE (1660U) /* 1660: for recursive PDP map */ +#undef PDSLOT_KERN +#define PDSLOT_KERN (1664U) /* 1664: start of kernel space */ +#undef PDSLOT_APTE +#define PDSLOT_APTE (2044U) /* 2044: alternative recursive slot */ + +/* + * The following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) +#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) +#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) + +#define PTES_PER_PTP (NBPG / sizeof(pt_entry_t)) /* # of PTEs in a PTP */ + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * + */ +#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) + +/* + * pdei/ptei: generate index into PDP/PTP from a VA + */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * Mach derived conversion macros + */ +#define i386_round_pdr(x) ((((unsigned)(x)) + ~PD_MASK) & PD_MASK) + +/* + * PTP macros: + * A PTP's index is the PD index of the PDE that points to it. + * A PTP's offset is the byte-offset in the PTE space that this PTP is at. + * A PTP's VA is the first VA mapped by that PTP. + * + * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +/* + * Access PD and PT + */ +#define PDE(pm,i) (((pd_entry_t *)(pm)->pm_pdir)[(i)]) + +/* + * here we define the data types for PDEs and PTEs + */ +typedef u_int64_t pd_entry_t; /* PDE */ +typedef u_int64_t pt_entry_t; /* PTE */ + +/* + * Number of PTE's per cache line. 8 byte pte, 32-byte cache line + * Used to avoid false sharing of cache lines. + */ +#define NPTECL 4 + +/* + * other data structures + */ + +extern u_int32_t protection_codes[]; /* maps MI prot to i386 prot code */ +extern boolean_t pmap_initialized; /* pmap_init done yet? */ + +/* + * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a + * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing + * due to false sharing. + */ + +#ifdef MULTIPROCESSOR +#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) +#define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG) +#else +#define PTESLEW(pte, id) (pte) +#define VASLEW(va,id) (va) +#endif + +/* + * special VAs and the PTEs that map them + */ + +static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; +extern caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp; + +extern int pmap_pg_g; +extern struct pmap_head pmaps; +extern struct pmap *pmaps_hand; + +/* + * local prototypes + */ + +struct vm_page *pmap_alloc_ptp_pae(struct pmap *, int, boolean_t); +#define ALLOCPV_NEED 0 /* need PV now */ +#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ +#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ +struct vm_page *pmap_get_ptp_pae(struct pmap *, int, boolean_t); +pt_entry_t *pmap_map_ptes_pae(struct pmap *); +void pmap_remove_ptes_pae(struct pmap *, struct vm_page *, + vaddr_t, vaddr_t, vaddr_t, int32_t *); +boolean_t pmap_remove_pte_pae(struct pmap *, struct vm_page *, + pt_entry_t *, vaddr_t, int32_t *); +void pmap_unmap_ptes_pae(struct pmap *); +struct vm_page *pmap_steal_ptp_pae(struct uvm_object *, vaddr_t); +vaddr_t pmap_tmpmap_pa_pae(paddr_t); +pt_entry_t *pmap_tmpmap_pvepte_pae(struct pv_entry *); +void pmap_tmpunmap_pa_pae(void); +void pmap_tmpunmap_pvepte_pae(struct pv_entry *); + +/* + * pmap_tmpmap_pa: map a page in for tmp usage + */ + +vaddr_t +pmap_tmpmap_pa_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); +#if defined(DIAGNOSTIC) + if (*ptpte) + panic("pmap_tmpmap_pa: ptp_pte in use?"); +#endif + *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vaddr_t)ptpva); +} + +/* + * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) + */ + +void +pmap_tmpunmap_pa_pae() +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); +#if defined(DIAGNOSTIC) + if (!pmap_valid_entry(*ptpte)) + panic("pmap_tmpunmap_pa: our pte invalid?"); +#endif + *ptpte = 0; /* zap! */ + pmap_update_pg((vaddr_t)ptpva); +#ifdef MULTIPROCESSOR + /* + * No need for tlb shootdown here, since ptp_pte is per-CPU. + */ +#endif +} + +/* + * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry + * + * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] + */ + +pt_entry_t * +pmap_tmpmap_pvepte_pae(struct pv_entry *pve) +{ +#ifdef DIAGNOSTIC + if (pve->pv_pmap == pmap_kernel()) + panic("pmap_tmpmap_pvepte: attempt to map kernel"); +#endif + + /* is it current pmap? use direct mapping... */ + if (pmap_is_curpmap(pve->pv_pmap)) + return(vtopte(pve->pv_va)); + + return(((pt_entry_t *)pmap_tmpmap_pa_pae(VM_PAGE_TO_PHYS(pve->pv_ptp))) + + ptei((unsigned)pve->pv_va)); +} + +/* + * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte + */ + +void +pmap_tmpunmap_pvepte_pae(struct pv_entry *pve) +{ + /* was it current pmap? if so, return */ + if (pmap_is_curpmap(pve->pv_pmap)) + return; + + pmap_tmpunmap_pa_pae(); +} + +/* + * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in + * + * => we lock enough pmaps to keep things locked in + * => must be undone with pmap_unmap_ptes before returning + */ + +pt_entry_t * +pmap_map_ptes_pae(struct pmap *pmap) +{ + pd_entry_t opde; + + /* the kernel's pmap is always accessible */ + if (pmap == pmap_kernel()) { + return(PTE_BASE); + } + + /* if curpmap then we are always mapped */ + if (pmap_is_curpmap(pmap)) { + simple_lock(&pmap->pm_obj.vmobjlock); + return(PTE_BASE); + } + + /* need to lock both curpmap and pmap: use ordered locking */ + if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) { + simple_lock(&pmap->pm_obj.vmobjlock); + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + } else { + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&pmap->pm_obj.vmobjlock); + } + + /* need to load a new alternate pt space into curpmap? */ + opde = *APDP_PDE; + if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdidx[0]) { + APDP_PDE[0] = pmap->pm_pdidx[0] | PG_RW | PG_V; + APDP_PDE[1] = pmap->pm_pdidx[1] | PG_RW | PG_V; + APDP_PDE[2] = pmap->pm_pdidx[2] | PG_RW | PG_V; + APDP_PDE[3] = pmap->pm_pdidx[3] | PG_RW | PG_V; + if (pmap_valid_entry(opde)) + pmap_apte_flush(curpcb->pcb_pmap); + } + return(APTE_BASE); +} + +/* + * pmap_unmap_ptes: unlock the PTE mapping of "pmap" + */ + +void +pmap_unmap_ptes_pae(struct pmap *pmap) +{ + if (pmap == pmap_kernel()) + return; + + if (pmap_is_curpmap(pmap)) { + simple_unlock(&pmap->pm_obj.vmobjlock); + } else { +#if defined(MULTIPROCESSOR) + APDP_PDE[0] = 0; + APDP_PDE[1] = 0; + APDP_PDE[2] = 0; + APDP_PDE[3] = 0; + pmap_apte_flush(curpcb->pcb_pmap); +#endif + simple_unlock(&pmap->pm_obj.vmobjlock); + simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + } +} + +u_int32_t +pmap_pte_set_pae(vaddr_t va, paddr_t pa, u_int32_t bits) +{ + pt_entry_t pte, *ptep = vtopte(va); + + pte = i386_atomic_testset_uq(ptep, pa | bits); + return (pte & ~PG_FRAME); +} + +u_int32_t +pmap_pte_setbits_pae(vaddr_t va, u_int32_t set, u_int32_t clr) +{ + pt_entry_t *ptep = vtopte(va); + pt_entry_t pte = *ptep; + + i386_atomic_testset_uq(ptep, (pte | set) & ~(pt_entry_t)clr); + return (pte & ~PG_FRAME); + +} + +u_int32_t +pmap_pte_bits_pae(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & ~PG_FRAME); +} + +paddr_t +pmap_pte_paddr_pae(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & PG_FRAME); +} + +/* + * Switch over to PAE page tables + */ +void +pmap_bootstrap_pae() +{ + extern paddr_t avail_end, avail_end2; + extern int cpu_pae, nkpde; + struct pmap *kpm = pmap_kernel(); + struct vm_page *ptp; + paddr_t ptaddr; + u_int32_t bits; + vaddr_t va, eva; + int i, pn, pe; + + if (!cpu_pae || avail_end >= avail_end2 || !(cpu_feature & CPUID_PAE)) + return; + + va = (vaddr_t)kpm->pm_pdir; + kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V; + kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V; + kpm->pm_pdidx[2] = (va + 2*NBPG - KERNBASE) | PG_V; + kpm->pm_pdidx[3] = (va + 3*NBPG - KERNBASE) | PG_V; + /* map pde recursively into itself */ + PDE(kpm, PDSLOT_PTE+0) = kpm->pm_pdidx[0] | PG_KW; + PDE(kpm, PDSLOT_PTE+1) = kpm->pm_pdidx[1] | PG_KW; + PDE(kpm, PDSLOT_PTE+2) = kpm->pm_pdidx[2] | PG_KW; + PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW; + + /* transfer all kernel mappings over into pae tables */ + for (va = KERNBASE, eva = va + (nkpde << 22); + va < eva; va += PAGE_SIZE) { + if (!pmap_valid_entry(PDE(kpm, pdei(va)))) { + ptp = uvm_pagealloc(&kpm->pm_obj, va, NULL, + UVM_PGA_ZERO); + ptaddr = VM_PAGE_TO_PHYS(ptp); + PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V; + pmap_pte_set_86((vaddr_t)vtopte(va), + ptaddr, PG_KW | PG_V); + + /* count PTP as resident */ + kpm->pm_stats.resident_count++; + } + bits = pmap_pte_bits_86(va) | pmap_pg_g; + if (pmap_valid_entry(bits)) + pmap_pte_set_pae(va, pmap_pte_paddr_86(va), bits); + } + + if (!cpu_paenable(&kpm->pm_pdidx[0])) { + extern struct user *proc0paddr; + + proc0paddr->u_pcb.pcb_cr3 = kpm->pm_pdirpa = + (vaddr_t)kpm - KERNBASE; + kpm->pm_pdirsize = 4 * NBPG; + + csrc_pte = vtopte(pmap_csrcp); + cdst_pte = vtopte(pmap_cdstp); + zero_pte = vtopte(pmap_zerop); + ptp_pte = vtopte(pmap_ptpp); + + nkpde *= 2; + nkptp_max = 2048 - PDSLOT_KERN - 4; + vm_max_address = (PDSLOT_PTE << PDSHIFT) + + (PDSLOT_PTE << PGSHIFT); + avail_end = avail_end2; + + pmap_pte_set_p = pmap_pte_set_pae; + pmap_pte_setbits_p = pmap_pte_setbits_pae; + pmap_pte_bits_p = pmap_pte_bits_pae; + pmap_pte_paddr_p = pmap_pte_paddr_pae; + pmap_change_attrs_p = pmap_change_attrs_pae; + pmap_enter_p = pmap_enter_pae; + pmap_extract_p = pmap_extract_pae; + pmap_growkernel_p = pmap_growkernel_pae; + pmap_page_remove_p = pmap_page_remove_pae; + pmap_remove_p = pmap_remove_pae; + pmap_test_attrs_p = pmap_test_attrs_pae; + pmap_unwire_p = pmap_unwire_pae; + pmap_write_protect_p = pmap_write_protect_pae; + pmap_pinit_pd_p = pmap_pinit_pd_pae; + pmap_zero_phys_p = pmap_zero_phys_pae; + pmap_zero_page_uncached_p = pmap_zero_page_uncached_pae; + pmap_copy_page_p = pmap_copy_page_pae; + pmap_try_steal_pv_p = pmap_try_steal_pv_pae; + + bzero((void *)kpm->pm_pdir + 8, (PDSLOT_PTE-1) * 8); + /* TODO also reclaim old PDPs */ + for (i = 0; i < vm_nphysseg; i++) + if (vm_physmem[i].start > atop(0xfffff000)) { + vm_physmem[i].avail_end = vm_physmem[i].end; + /* free vm_pages (uvm had already zeroed 'em) */ + for (pn = 0, pe = vm_physmem[i].end - + vm_physmem[i].start; pn < pe ; pn++) { + uvmexp.npages++; + /* add page to free pool */ + uvm_pagefree(&vm_physmem[i].pgs[pn]); + } + + } + uvm_page_rehash(); + } +} + +/* + * p v _ e n t r y f u n c t i o n s + */ + +/* + * pv_entry allocation functions: + * the main pv_entry allocation functions are: + * pmap_alloc_pv: allocate a pv_entry structure + * pmap_free_pv: free one pv_entry + * pmap_free_pvs: free a list of pv_entrys + * + * the rest are helper functions + */ + +/* + * pmap_try_steal_pv: try and steal a pv_entry from a pmap + * + * => return true if we did it! + */ + +boolean_t +pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv) +{ + pt_entry_t *ptep, opte; +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; +#endif + + /* + * we never steal kernel mappings or mappings from pmaps we can't lock + */ + + if (cpv->pv_pmap == pmap_kernel() || + !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock)) + return(FALSE); + + /* + * yes, we can try and steal it. first we need to remove the + * mapping from the pmap. + */ + + ptep = pmap_tmpmap_pvepte_pae(cpv); + if (*ptep & PG_W) { + ptep = NULL; /* wired page, avoid stealing this one */ + } else { + opte = i386_atomic_testset_uq(ptep, 0); /* zap! */ +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ + if (pmap_is_curpmap(cpv->pv_pmap)) + pmap_update_pg(cpv->pv_va); +#endif + pmap_tmpunmap_pvepte_pae(cpv); + } + if (ptep == NULL) { + simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); + return(FALSE); /* wired page, abort! */ + } + cpv->pv_pmap->pm_stats.resident_count--; + if (cpv->pv_ptp && cpv->pv_ptp->wire_count) + /* drop PTP's wired count */ + cpv->pv_ptp->wire_count--; + + /* + * XXX: if wire_count goes to one the PTP could be freed, however, + * we'd have to lock the page queues (etc.) to do that and it could + * cause deadlock headaches. besides, the pmap we just stole from + * may want the mapping back anyway, so leave the PTP around. + */ + + /* + * now we need to remove the entry from the pvlist + */ + + if (cpv == pvh->pvh_list) + pvh->pvh_list = cpv->pv_next; + else + prevpv->pv_next = cpv->pv_next; + return(TRUE); +} + +/* + * p t p f u n c t i o n s + */ + +/* + * pmap_alloc_ptp: allocate a PTP for a PMAP + * + * => pmap should already be locked by caller + * => we use the ptp's wire_count to count the number of active mappings + * in the PTP (we start it at one to prevent any chance this PTP + * will ever leak onto the active/inactive queues) + * => we should not be holding any pv_head locks (in case we are forced + * to call pmap_steal_ptp()) + * => we may need to lock pv_head's if we have to steal a PTP + * => just_try: true if we want a PTP, but not enough to steal one + * from another pmap (e.g. during optional functions like pmap_copy) + */ + +struct vm_page * +pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) +{ + struct vm_page *ptp; + + ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, + UVM_PGA_USERESERVE|UVM_PGA_ZERO); + if (ptp == NULL) { + if (just_try) + return(NULL); + ptp = pmap_steal_ptp_pae(&pmap->pm_obj, ptp_i2o(pde_index)); + if (ptp == NULL) { + return (NULL); + } + /* stole one; zero it. */ + pmap_zero_page(ptp); + } + + /* got one! */ + ptp->flags &= ~PG_BUSY; /* never busy */ + ptp->wire_count = 1; /* no mappings yet */ + PDE(pmap, pde_index) = + (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + pmap->pm_stats.resident_count++; /* count PTP as resident */ + pmap->pm_ptphint = ptp; + return(ptp); +} + +/* + * pmap_steal_ptp: steal a PTP from any pmap that we can access + * + * => obj is locked by caller. + * => we can throw away mappings at this level (except in the kernel's pmap) + * => stolen PTP is placed in <obj,offset> pmap + * => we lock pv_head's + * => hopefully, this function will be seldom used [much better to have + * enough free pages around for us to allocate off the free page list] + */ + +struct vm_page * +pmap_steal_ptp_pae(struct uvm_object *obj, vaddr_t offset) +{ + struct vm_page *ptp = NULL; + struct pmap *firstpmap; + struct uvm_object *curobj; + pt_entry_t *ptes; + int idx, lcv; + boolean_t caller_locked, we_locked; + int32_t cpumask = 0; + + simple_lock(&pmaps_lock); + if (pmaps_hand == NULL) + pmaps_hand = LIST_FIRST(&pmaps); + firstpmap = pmaps_hand; + + do { /* while we haven't looped back around to firstpmap */ + + curobj = &pmaps_hand->pm_obj; + we_locked = FALSE; + caller_locked = (curobj == obj); + if (!caller_locked) { + we_locked = simple_lock_try(&curobj->vmobjlock); + } + if (caller_locked || we_locked) { + TAILQ_FOREACH(ptp, &curobj->memq, listq) { + + /* + * might have found a PTP we can steal + * (unless it has wired pages). + */ + + idx = ptp_o2i(ptp->offset); +#ifdef DIAGNOSTIC + if (VM_PAGE_TO_PHYS(ptp) != + (PDE(pmaps_hand, idx) & PG_FRAME)) + panic("pmap_steal_ptp: PTP mismatch!"); +#endif + + ptes = (pt_entry_t *) + pmap_tmpmap_pa_pae(VM_PAGE_TO_PHYS(ptp)); + for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) + if ((ptes[lcv] & (PG_V|PG_W)) == + (PG_V|PG_W)) + break; + if (lcv == PTES_PER_PTP) + pmap_remove_ptes_pae(pmaps_hand, ptp, + (vaddr_t)ptes, ptp_i2v(idx), + ptp_i2v(idx+1), &cpumask); + pmap_tmpunmap_pa_pae(); + + if (lcv != PTES_PER_PTP) + /* wired, try next PTP */ + continue; + + /* + * got it!!! + */ + + PDE(pmaps_hand, idx) = 0; /* zap! */ + pmaps_hand->pm_stats.resident_count--; +#ifdef MULTIPROCESSOR + pmap_apte_flush(pmaps_hand); +#else + if (pmap_is_curpmap(pmaps_hand)) + pmap_apte_flush(pmaps_hand); + else if (pmap_valid_entry(*APDP_PDE) && + (*APDP_PDE & PG_FRAME) == + pmaps_hand->pm_pdidx[0]) + pmap_update_pg(((vaddr_t)APTE_BASE) + + ptp->offset); +#endif + + /* put it in our pmap! */ + uvm_pagerealloc(ptp, obj, offset); + break; /* break out of "for" loop */ + } + if (we_locked) { + simple_unlock(&curobj->vmobjlock); + } + } + + /* advance the pmaps_hand */ + pmaps_hand = LIST_NEXT(pmaps_hand, pm_list); + if (pmaps_hand == NULL) { + pmaps_hand = LIST_FIRST(&pmaps); + } + + } while (ptp == NULL && pmaps_hand != firstpmap); + + simple_unlock(&pmaps_lock); + pmap_tlb_shootnow(cpumask); + return(ptp); +} + +/* + * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) + * + * => pmap should NOT be pmap_kernel() + * => pmap should be locked + */ + +struct vm_page * +pmap_get_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) +{ + struct vm_page *ptp; + + if (pmap_valid_entry(PDE(pmap, pde_index))) { + + /* valid... check hint (saves us a PA->PG lookup) */ + if (pmap->pm_ptphint && + (PDE(pmap, pde_index) & PG_FRAME) == + VM_PAGE_TO_PHYS(pmap->pm_ptphint)) + return(pmap->pm_ptphint); + + ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index)); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_get_ptp: unmanaged user PTP"); +#endif + pmap->pm_ptphint = ptp; + return(ptp); + } + + /* allocate a new PTP (updates ptphint) */ + return(pmap_alloc_ptp_pae(pmap, pde_index, just_try)); +} + +/* + * pmap_pinit_pd: given a freshly allocated pmap structure, give it a PD + */ +void +pmap_pinit_pd_pae(struct pmap *pmap) +{ + extern int nkpde; + vaddr_t va; + + /* allocate PDP */ + pmap->pm_pdir = uvm_km_alloc(kernel_map, 4 * NBPG); + if (pmap->pm_pdir == NULL) + panic("pmap_pinit: kernel_map out of virtual space!"); + /* page index is in the pmap! */ + pmap_extract(pmap_kernel(), (vaddr_t)pmap, &pmap->pm_pdirpa); + /* fill out the PDPT entries */ + va = (vaddr_t)pmap->pm_pdir; + pmap_extract(pmap_kernel(), va + 0*NBPG, &pmap->pm_pdidx[0]); + pmap_extract(pmap_kernel(), va + 1*NBPG, &pmap->pm_pdidx[1]); + pmap_extract(pmap_kernel(), va + 2*NBPG, &pmap->pm_pdidx[2]); + pmap_extract(pmap_kernel(), va + 3*NBPG, &pmap->pm_pdidx[3]); + pmap->pm_pdidx[0] |= PG_V; + pmap->pm_pdidx[1] |= PG_V; + pmap->pm_pdidx[2] |= PG_V; + pmap->pm_pdidx[3] |= PG_V; + pmap->pm_pdirsize = 4 * NBPG; + + /* init PDP */ + /* zero init area */ + bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + /* put in recursive PDE to map the PTEs */ + PDE(pmap, PDSLOT_PTE+0) = pmap->pm_pdidx[0] | PG_KW; + PDE(pmap, PDSLOT_PTE+1) = pmap->pm_pdidx[1] | PG_KW; + PDE(pmap, PDSLOT_PTE+2) = pmap->pm_pdidx[2] | PG_KW; + PDE(pmap, PDSLOT_PTE+3) = pmap->pm_pdidx[3] | PG_KW; + + /* + * we need to lock pmaps_lock to prevent nkpde from changing on + * us. note that there is no need to splvm to protect us from + * malloc since malloc allocates out of a submap and we should have + * already allocated kernel PTPs to cover the range... + */ + simple_lock(&pmaps_lock); + /* put in kernel VM PDEs */ + bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN), + nkpde * sizeof(pd_entry_t)); + /* zero the rest */ + bzero(&PDE(pmap, PDSLOT_KERN + nkpde), pmap->pm_pdirsize - + ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); + simple_unlock(&pmaps_lock); +} + +/* + * some misc. functions + */ + +/* + * pmap_extract: extract a PA for the given VA + */ + +boolean_t +pmap_extract_pae(struct pmap *pmap, vaddr_t va, paddr_t *pap) +{ + paddr_t retval; + pt_entry_t *ptes; + + if (PDE(pmap, pdei(va))) { + ptes = pmap_map_ptes_pae(pmap); + retval = (paddr_t)(ptes[atop(va)] & PG_FRAME); + pmap_unmap_ptes_pae(pmap); + if (pap != NULL) + *pap = retval | (va & ~PG_FRAME); + return (TRUE); + } + return (FALSE); +} + +extern void (*pagezero)(void *, size_t); + +/* + * pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are + * initialized. + */ +void +pmap_zero_phys_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *zpte = PTESLEW(zero_pte, id); + caddr_t zerova = VASLEW(pmap_zerop, id); + +#ifdef DIAGNOSTIC + if (*zpte) + panic("pmap_zero_phys: lock botch"); +#endif + *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ + pmap_update_pg((vaddr_t)zerova); /* flush TLB */ + pagezero(zerova, PAGE_SIZE); /* zero */ + *zpte = 0; /* zap! */ +} + +/* + * pmap_zero_page_uncached: the same, except uncached. + */ + +boolean_t +pmap_zero_page_uncached_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *zpte = PTESLEW(zero_pte, id); + caddr_t zerova = VASLEW(pmap_zerop, id); + +#ifdef DIAGNOSTIC + if (*zpte) + panic("pmap_zero_page_uncached: lock botch"); +#endif + + *zpte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */ + ((cpu_class != CPUCLASS_386) ? PG_N : 0); + pmap_update_pg((vaddr_t)zerova); /* flush TLB */ + pagezero(zerova, PAGE_SIZE); /* zero */ + *zpte = 0; /* zap! */ + + return (TRUE); +} + +/* + * pmap_copy_page: copy a page + */ + +void +pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg) +{ + paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); + paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *spte = PTESLEW(csrc_pte,id); + pt_entry_t *dpte = PTESLEW(cdst_pte,id); + caddr_t csrcva = VASLEW(pmap_csrcp, id); + caddr_t cdstva = VASLEW(pmap_cdstp, id); + +#ifdef DIAGNOSTIC + if (*spte || *dpte) + panic("pmap_copy_page: lock botch"); +#endif + + *spte = (srcpa & PG_FRAME) | PG_V | PG_RW; + *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW; + pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); + bcopy(csrcva, cdstva, PAGE_SIZE); + *spte = *dpte = 0; /* zap! */ + pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); +#ifdef MULTIPROCESSOR + /* Using per-cpu VA; no shootdown required here. */ +#endif +} + +/* + * p m a p r e m o v e f u n c t i o n s + * + * functions that remove mappings + */ + +/* + * pmap_remove_ptes: remove PTEs from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + */ + +void +pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, + vaddr_t startva, vaddr_t endva, int32_t *cpumaskp) +{ + struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ + struct pv_entry *pve; + pt_entry_t *pte = (pt_entry_t *) ptpva; + pt_entry_t opte; + int bank, off; + + /* + * note that ptpva points to the PTE that maps startva. this may + * or may not be the first PTE in the PTP. + * + * we loop through the PTP while there are still PTEs to look at + * and the wire_count is greater than 1 (because we use the wire_count + * to keep track of the number of real PTEs in the PTP). + */ + + for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) + ; pte++, startva += NBPG) { + if (!pmap_valid_entry(*pte)) + continue; /* VA not mapped */ + + opte = i386_atomic_testset_uq(pte, 0); /* zap! */ + + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; + + if (opte & PG_U) + pmap_tlb_shootdown(pmap, startva, opte, cpumaskp); + + if (ptp) { + ptp->wire_count--; /* dropping a PTE */ + /* Make sure that the PDE is flushed */ + if ((ptp->wire_count <= 1) && !(opte & PG_U)) + pmap_tlb_shootdown(pmap, startva, opte, + cpumaskp); + } + + /* + * if we are not on a pv_head list we are done. + */ + + if ((opte & PG_PVLIST) == 0) { +#ifdef DIAGNOSTIC + if (vm_physseg_find(atop(opte & PG_FRAME), &off) + != -1) + panic("pmap_remove_ptes: managed page without " + "PG_PVLIST for 0x%lx", startva); +#endif + continue; + } + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_remove_ptes: unmanaged page marked " + "PG_PVLIST, va = 0x%lx, pa = 0x%lx", + startva, (u_long)(opte & PG_FRAME)); +#endif + + /* sync R/M bits */ + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, + startva); + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) { + pve->pv_next = pv_tofree; + pv_tofree = pve; + } + + /* end of "for" loop: time for next pte */ + } + if (pv_tofree) + pmap_free_pvs(pmap, pv_tofree); +} + + +/* + * pmap_remove_pte: remove a single PTE from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + * => returns true if we removed a mapping + */ + +boolean_t +pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, + vaddr_t va, int32_t *cpumaskp) +{ + pt_entry_t opte; + int bank, off; + struct pv_entry *pve; + + if (!pmap_valid_entry(*pte)) + return(FALSE); /* VA not mapped */ + + opte = *pte; /* save the old PTE */ + *pte = 0; /* zap! */ + + pmap_exec_account(pmap, va, opte, 0); + + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; + + if (opte & PG_U) + pmap_tlb_shootdown(pmap, va, opte, cpumaskp); + + if (ptp) { + ptp->wire_count--; /* dropping a PTE */ + /* Make sure that the PDE is flushed */ + if ((ptp->wire_count <= 1) && !(opte & PG_U)) + pmap_tlb_shootdown(pmap, va, opte, cpumaskp); + + } + + /* + * if we are not on a pv_head list we are done. + */ + + if ((opte & PG_PVLIST) == 0) { +#ifdef DIAGNOSTIC + if (vm_physseg_find(atop(opte & PG_FRAME), &off) != -1) + panic("pmap_remove_pte: managed page without " + "PG_PVLIST for 0x%lx", va); +#endif + return(TRUE); + } + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_remove_pte: unmanaged page marked " + "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, + (u_long)(opte & PG_FRAME)); +#endif + + /* sync R/M bits */ + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va); + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) + pmap_free_pv(pmap, pve); + return(TRUE); +} + +/* + * pmap_remove: top level mapping removal function + * + * => caller should not be holding any pmap locks + */ + +void +pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + pt_entry_t *ptes, opte; + boolean_t result; + paddr_t ptppa; + vaddr_t blkendva; + struct vm_page *ptp; + int32_t cpumask = 0; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + /* + * removing one page? take shortcut function. + */ + + if (sva + PAGE_SIZE == eva) { + + if (pmap_valid_entry(PDE(pmap, pdei(sva)))) { + + /* PA of the PTP */ + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; + + /* get PTP if non-kernel mapping */ + + if (pmap == pmap_kernel()) { + /* we never free kernel PTPs */ + ptp = NULL; + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == + ptppa) { + ptp = pmap->pm_ptphint; + } else { + ptp = PHYS_TO_VM_PAGE(ptppa); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_remove: unmanaged " + "PTP detected"); +#endif + } + } + + /* do it! */ + result = pmap_remove_pte_pae(pmap, ptp, + &ptes[atop(sva)], sva, &cpumask); + + /* + * if mapping removed and the PTP is no longer + * being used, free it! + */ + + if (result && ptp && ptp->wire_count <= 1) { + opte = i386_atomic_testset_uq(&PDE(pmap, + pdei(sva)), 0); /* zap! */ +#ifdef MULTIPROCESSOR + /* + * XXXthorpej Redundant shootdown can happen + * here if we're using APTE space. + */ +#endif + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset, opte, + &cpumask); +#ifdef MULTIPROCESSOR + /* + * Always shoot down the pmap's self-mapping + * of the PTP. + * XXXthorpej Redundant shootdown can happen + * here if pmap == curpcb->pcb_pmap (not APTE + * space). + */ + pmap_tlb_shootdown(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset, opte, + &cpumask); +#endif + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) + pmap->pm_ptphint = + TAILQ_FIRST(&pmap->pm_obj.memq); + ptp->wire_count = 0; + uvm_pagefree(ptp); + } + } + pmap_tlb_shootnow(cpumask); + pmap_unmap_ptes_pae(pmap); /* unlock pmap */ + PMAP_MAP_TO_HEAD_UNLOCK(); + return; + } + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + /* + * XXXCDC: our PTE mappings should never be removed + * with pmap_remove! if we allow this (and why would + * we?) then we end up freeing the pmap's page + * directory page (PDP) before we are finished using + * it when we hit in in the recursive mapping. this + * is BAD. + * + * long term solution is to move the PTEs out of user + * address space. and into kernel address space (up + * with APTE). then we can set VM_MAXUSER_ADDRESS to + * be VM_MAX_ADDRESS. + */ + + if (pdei(sva) == PDSLOT_PTE) + /* XXXCDC: ugly hack to avoid freeing PDP here */ + continue; + + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + /* valid block? */ + continue; + + /* PA of the PTP */ + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; + + /* get PTP if non-kernel mapping */ + if (pmap == pmap_kernel()) { + /* we never free kernel PTPs */ + ptp = NULL; + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) { + ptp = pmap->pm_ptphint; + } else { + ptp = PHYS_TO_VM_PAGE(ptppa); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_remove: unmanaged PTP " + "detected"); +#endif + } + } + pmap_remove_ptes_pae(pmap, ptp, (vaddr_t)&ptes[atop(sva)], + sva, blkendva, &cpumask); + + /* if PTP is no longer being used, free it! */ + if (ptp && ptp->wire_count <= 1) { + opte = i386_atomic_testset_uq(&PDE(pmap, pdei(sva)),0); +#if defined(MULTIPROCESSOR) + /* + * XXXthorpej Redundant shootdown can happen here + * if we're using APTE space. + */ +#endif + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); +#if defined(MULTIPROCESSOR) + /* + * Always shoot down the pmap's self-mapping + * of the PTP. + * XXXthorpej Redundant shootdown can happen here + * if pmap == curpcb->pcb_pmap (not APTE space). + */ + pmap_tlb_shootdown(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); +#endif + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) /* update hint? */ + pmap->pm_ptphint = + TAILQ_FIRST(&pmap->pm_obj.memq); + ptp->wire_count = 0; + uvm_pagefree(ptp); + } + } + + pmap_tlb_shootnow(cpumask); + pmap_unmap_ptes_pae(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); +} + +/* + * pmap_page_remove: remove a managed vm_page from all pmaps that map it + * + * => we set pv_head => pmap locking + * => R/M bits are sync'd back to attrs + */ + +void +pmap_page_remove_pae(struct vm_page *pg) +{ + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, opte; + int32_t cpumask = 0; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_page_remove: unmanaged page?\n"); + return; + } + + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return; + } + + /* set pv_head => pmap locking */ + PMAP_HEAD_TO_MAP_LOCK(); + + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { + ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ + +#ifdef DIAGNOSTIC + if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) + printf("pmap_page_remove: found pager VA on pv_list\n"); + if (pve->pv_ptp && (PDE(pve->pv_pmap, + pdei(pve->pv_va)) & PG_FRAME) != + VM_PAGE_TO_PHYS(pve->pv_ptp)) { + printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", + pg, pve->pv_va, pve->pv_ptp); + printf("pmap_page_remove: PTP's phys addr: " + "actual=%llx, recorded=%llx\n", + (PDE(pve->pv_pmap, pdei(pve->pv_va)) & + PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); + panic("pmap_page_remove: mapped managed page has " + "invalid pv_ptp field"); + } +#endif + + opte = ptes[atop(pve->pv_va)]; + ptes[atop(pve->pv_va)] = 0; /* zap! */ + + if (opte & PG_W) + pve->pv_pmap->pm_stats.wired_count--; + pve->pv_pmap->pm_stats.resident_count--; + + /* Shootdown only if referenced */ + if (opte & PG_U) + pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, + &cpumask); + + /* sync R/M bits */ + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + + /* update the PTP reference count. free if last reference. */ + if (pve->pv_ptp) { + pve->pv_ptp->wire_count--; + if (pve->pv_ptp->wire_count <= 1) { + /* + * Do we have to shootdown the page just to + * get the pte out of the TLB ? + */ + if(!(opte & PG_U)) + pmap_tlb_shootdown(pve->pv_pmap, + pve->pv_va, opte, &cpumask); + + opte = i386_atomic_testset_uq(&PDE(pve->pv_pmap, + pdei(pve->pv_va)), 0); + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + pve->pv_ptp->offset, + opte, &cpumask); +#if defined(MULTIPROCESSOR) + /* + * Always shoot down the other pmap's + * self-mapping of the PTP. + */ + pmap_tlb_shootdown(pve->pv_pmap, + ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset, + opte, &cpumask); +#endif + pve->pv_pmap->pm_stats.resident_count--; + /* update hint? */ + if (pve->pv_pmap->pm_ptphint == pve->pv_ptp) + pve->pv_pmap->pm_ptphint = + TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq); + pve->pv_ptp->wire_count = 0; + uvm_pagefree(pve->pv_ptp); + } + } + pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ + } + pmap_free_pvs(NULL, pvh->pvh_list); + pvh->pvh_list = NULL; + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + pmap_tlb_shootnow(cpumask); +} + +/* + * p m a p a t t r i b u t e f u n c t i o n s + * functions that test/change managed page's attributes + * since a page can be mapped multiple times we must check each PTE that + * maps it by going down the pv lists. + */ + +/* + * pmap_test_attrs: test a page's attributes + * + * => we set pv_head => pmap locking + */ + +boolean_t +pmap_test_attrs_pae(struct vm_page *pg, int testbits) +{ + int bank, off; + char *myattrs; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, pte; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_test_attrs: unmanaged page?\n"); + return(FALSE); + } + + /* + * before locking: see if attributes are already set and if so, + * return! + */ + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + if (*myattrs & testbits) + return(TRUE); + + /* test to see if there is a list before bothering to lock */ + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return(FALSE); + } + + /* nope, gonna have to do it the hard way */ + PMAP_HEAD_TO_MAP_LOCK(); + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0; + pve = pve->pv_next) { + ptes = pmap_map_ptes_pae(pve->pv_pmap); + pte = ptes[atop(pve->pv_va)]; + pmap_unmap_ptes_pae(pve->pv_pmap); + *myattrs |= pte; + } + + /* + * note that we will exit the for loop with a non-null pve if + * we have found the bits we are testing for. + */ + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + return((*myattrs & testbits) != 0); +} + +/* + * pmap_change_attrs: change a page's attributes + * + * => we set pv_head => pmap locking + * => we return TRUE if we cleared one of the bits we were asked to + */ + +boolean_t +pmap_change_attrs_pae(struct vm_page *pg, int setbits, int clearbits) +{ + u_int32_t result; + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, npte, opte; + char *myattrs; + int32_t cpumask = 0; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_change_attrs: unmanaged page?\n"); + return(FALSE); + } + + PMAP_HEAD_TO_MAP_LOCK(); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + result = *myattrs & clearbits; + *myattrs = (*myattrs | setbits) & ~clearbits; + + for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) { +#ifdef DIAGNOSTIC + if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va)))) + panic("pmap_change_attrs: mapping without PTP " + "detected"); +#endif + + ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ + npte = ptes[atop(pve->pv_va)]; + result |= (npte & clearbits); + npte = (npte | setbits) & ~(pt_entry_t)clearbits; + if (ptes[atop(pve->pv_va)] != npte) { + opte = i386_atomic_testset_uq(&ptes[atop(pve->pv_va)], + npte); + pmap_tlb_shootdown(pve->pv_pmap, + atop(pve->pv_va), opte, &cpumask); + } + pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ + } + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + pmap_tlb_shootnow(cpumask); + + return(result != 0); +} + +/* + * p m a p p r o t e c t i o n f u n c t i o n s + */ + +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => NOTE: this is an inline function in pmap.h + */ + +/* see pmap.h */ + +/* + * pmap_protect: set the protection in of the pages in a pmap + * + * => NOTE: this is an inline function in pmap.h + */ + +/* see pmap.h */ + +/* + * pmap_write_protect: write-protect pages in a pmap + */ +void +pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, + vm_prot_t prot) +{ + pt_entry_t *ptes, *spte, *epte, opte, npte; + vaddr_t blockend; + u_int32_t md_prot; + int32_t cpumask = 0; + + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + + /* should be ok, but just in case ... */ + sva &= PG_FRAME; + eva &= PG_FRAME; + + for (/* null */ ; sva < eva ; sva = blockend) { + + blockend = (sva & PD_MASK) + NBPD; + if (blockend > eva) + blockend = eva; + + /* + * XXXCDC: our PTE mappings should never be write-protected! + * + * long term solution is to move the PTEs out of user + * address space. and into kernel address space (up + * with APTE). then we can set VM_MAXUSER_ADDRESS to + * be VM_MAX_ADDRESS. + */ + + /* XXXCDC: ugly hack to avoid freeing PDP here */ + if (pdei(sva) == PDSLOT_PTE) + continue; + + /* empty block? */ + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + continue; + + md_prot = protection_codes[prot]; + if (sva < VM_MAXUSER_ADDRESS) + md_prot |= PG_u; + else if (sva < VM_MAX_ADDRESS) + /* XXX: write-prot our PTES? never! */ + md_prot |= (PG_u | PG_RW); + + spte = &ptes[atop(sva)]; + epte = &ptes[atop(blockend)]; + + for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) { + + if (!pmap_valid_entry(*spte)) /* no mapping? */ + continue; + + npte = (*spte & ~(pt_entry_t)PG_PROT) | md_prot; + + if (npte != *spte) { + pmap_exec_account(pmap, sva, *spte, npte); + opte = *spte; + *spte = npte; + pmap_tlb_shootdown(pmap, sva, opte, &cpumask); + } + } + } + + pmap_tlb_shootnow(cpumask); + pmap_unmap_ptes_pae(pmap); /* unlocks pmap */ +} + +/* + * end of protection functions + */ + +/* + * pmap_unwire: clear the wired bit in the PTE + * + * => mapping should already be in map + */ + +void +pmap_unwire_pae(struct pmap *pmap, vaddr_t va) +{ + pt_entry_t *ptes; + + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + +#ifdef DIAGNOSTIC + if (!pmap_valid_entry(ptes[atop(va)])) + panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); +#endif + if ((ptes[atop(va)] & PG_W) != 0) { + ptes[atop(va)] &= ~PG_W; + pmap->pm_stats.wired_count--; + } +#ifdef DIAGNOSTIC + else { + printf("pmap_unwire: wiring for pmap %p va 0x%lx " + "didn't change!\n", pmap, va); + } +#endif + pmap_unmap_ptes_pae(pmap); /* unlocks map */ + } +#ifdef DIAGNOSTIC + else { + panic("pmap_unwire: invalid PDE"); + } +#endif +} + +/* + * pmap_copy: copy mappings from one pmap to another + * + * => optional function + * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) + */ + +/* + * defined as macro in pmap.h + */ + +/* + * pmap_enter: enter a mapping into a pmap + * + * => must be done "now" ... no lazy-evaluation + * => we set pmap => pv_head locking + */ + +int +pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, + int flags) +{ + pt_entry_t *ptes, opte, npte; + struct vm_page *ptp; + struct pv_head *pvh; + struct pv_entry *pve; + int bank, off, error; + boolean_t wired = (flags & PMAP_WIRED) != 0; + +#ifdef DIAGNOSTIC + /* sanity check: totally out of range? */ + if (va >= VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: too big"); + + if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) + panic("pmap_enter: trying to map over PDP/APDP!"); + + /* sanity check: kernel PTPs should already have been pre-allocated */ + if (va >= VM_MIN_KERNEL_ADDRESS && + !pmap_valid_entry(PDE(pmap, pdei(va)))) + panic("pmap_enter: missing kernel PTP!"); +#endif + + /* get lock */ + PMAP_MAP_TO_HEAD_LOCK(); + + /* + * map in ptes and get a pointer to our PTP (unless we are the kernel) + */ + + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + if (pmap == pmap_kernel()) { + ptp = NULL; + } else { + ptp = pmap_get_ptp_pae(pmap, pdei(va), FALSE); + if (ptp == NULL) { + if (flags & PMAP_CANFAIL) { + error = KERN_RESOURCE_SHORTAGE; + goto out; + } + panic("pmap_enter: get ptp failed"); + } + } + opte = ptes[atop(va)]; /* old PTE */ + + /* + * is there currently a valid mapping at our VA? + */ + + if (pmap_valid_entry(opte)) { + + /* + * first, update pm_stats. resident count will not + * change since we are replacing/changing a valid + * mapping. wired count might change... + */ + + if (wired && (opte & PG_W) == 0) + pmap->pm_stats.wired_count++; + else if (!wired && (opte & PG_W) != 0) + pmap->pm_stats.wired_count--; + + /* + * is the currently mapped PA the same as the one we + * want to map? + */ + + if ((opte & PG_FRAME) == pa) { + + /* if this is on the PVLIST, sync R/M bit */ + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(pa), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_enter: same pa PG_PVLIST " + "mapping with unmanaged page " + "pa = 0x%lx (0x%lx)", pa, + atop(pa)); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pvh = NULL; /* ensure !PG_PVLIST */ + } + goto enter_now; + } + + /* + * changing PAs: we must remove the old one first + */ + + /* + * if current mapping is on a pvlist, + * remove it (sync R/M bits) + */ + + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_enter: PG_PVLIST mapping with " + "unmanaged page " + "pa = 0x%lx (0x%lx)", pa, atop(pa)); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + pve = pmap_remove_pv(pvh, pmap, va); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pve = NULL; + } + } else { /* opte not valid */ + pve = NULL; + pmap->pm_stats.resident_count++; + if (wired) + pmap->pm_stats.wired_count++; + if (ptp) + ptp->wire_count++; /* count # of valid entrys */ + } + + /* + * at this point pm_stats has been updated. pve is either NULL + * or points to a now-free pv_entry structure (the latter case is + * if we called pmap_remove_pv above). + * + * if this entry is to be on a pvlist, enter it now. + */ + + bank = vm_physseg_find(atop(pa), &off); + if (pmap_initialized && bank != -1) { + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pve == NULL) { + pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + error = KERN_RESOURCE_SHORTAGE; + goto out; + } + panic("pmap_enter: no pv entries available"); + } + } + /* lock pvh when adding */ + pmap_enter_pv(pvh, pve, pmap, va, ptp); + } else { + + /* new mapping is not PG_PVLIST. free pve if we've got one */ + pvh = NULL; /* ensure !PG_PVLIST */ + if (pve) + pmap_free_pv(pmap, pve); + } + +enter_now: + /* + * at this point pvh is !NULL if we want the PG_PVLIST bit set + */ + + npte = pa | protection_codes[prot] | PG_V; + pmap_exec_account(pmap, va, opte, npte); + if (pvh) + npte |= PG_PVLIST; + if (wired) + npte |= PG_W; + if (va < VM_MAXUSER_ADDRESS) + npte |= PG_u; + else if (va < VM_MAX_ADDRESS) + npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ + if (pmap == pmap_kernel()) + npte |= pmap_pg_g; + + ptes[atop(va)] = npte; /* zap! */ + + if ((opte & ~(pt_entry_t)(PG_M|PG_U)) != npte) { +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; + + pmap_tlb_shootdown(pmap, va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ + if (pmap_is_curpmap(pmap)) + pmap_update_pg(va); +#endif + } + + error = 0; + +out: + pmap_unmap_ptes_pae(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); + return error; +} + +/* + * pmap_growkernel: increase usage of KVM space + * + * => we allocate new PTPs for the kernel and install them in all + * the pmaps on the system. + */ + +vaddr_t +pmap_growkernel_pae(vaddr_t maxkvaddr) +{ + extern int nkpde; + struct pmap *kpm = pmap_kernel(), *pm; + int needed_kpde; /* needed number of kernel PTPs */ + int s; + paddr_t ptaddr; + + needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1)) + / NBPD; + if (needed_kpde <= nkpde) + goto out; /* we are OK */ + + /* + * whoops! we need to add kernel PTPs + */ + + s = splhigh(); /* to be safe */ + simple_lock(&kpm->pm_obj.vmobjlock); + + for (/*null*/ ; nkpde < needed_kpde ; nkpde++) { + + if (uvm.page_init_done == FALSE) { + + /* + * we're growing the kernel pmap early (from + * uvm_pageboot_alloc()). this case must be + * handled a little differently. + */ + + if (uvm_page_physget(&ptaddr) == FALSE) + panic("pmap_growkernel: out of memory"); + pmap_zero_phys(ptaddr); + + PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V; + + /* count PTP as resident */ + kpm->pm_stats.resident_count++; + continue; + } + + /* + * THIS *MUST* BE CODED SO AS TO WORK IN THE + * pmap_initialized == FALSE CASE! WE MAY BE + * INVOKED WHILE pmap_init() IS RUNNING! + */ + + if (pmap_alloc_ptp_pae(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) { + panic("pmap_growkernel: alloc ptp failed"); + } + + /* PG_u not for kernel */ + PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u; + + /* distribute new kernel PTP to all active pmaps */ + simple_lock(&pmaps_lock); + LIST_FOREACH(pm, &pmaps, pm_list) { + PDE(pm, PDSLOT_KERN + nkpde) = + PDE(kpm, PDSLOT_KERN + nkpde); + } + simple_unlock(&pmaps_lock); + } + + simple_unlock(&kpm->pm_obj.vmobjlock); + splx(s); + +out: + return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD)); +} + +#ifdef DEBUG +void pmap_dump_pae(struct pmap *, vaddr_t, vaddr_t); + +/* + * pmap_dump: dump all the mappings from a pmap + * + * => caller should not be holding any pmap locks + */ + +void +pmap_dump_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + pt_entry_t *ptes, *pte; + vaddr_t blkendva; + + /* + * if end is out of range truncate. + * if (end == start) update to max. + */ + + if (eva > VM_MAXUSER_ADDRESS || eva <= sva) + eva = VM_MAXUSER_ADDRESS; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + + /* + * dumping a range of pages: we dump in PTP sized blocks (4MB) + */ + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + /* valid block? */ + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + continue; + + pte = &ptes[atop(sva)]; + for (/* null */; sva < blkendva ; sva += NBPG, pte++) { + if (!pmap_valid_entry(*pte)) + continue; + printf("va %#lx -> pa %#x (pte=%#x)\n", + sva, *pte, *pte & PG_FRAME); + } + } + pmap_unmap_ptes_pae(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); +} +#endif diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c index ebba0686b8f..433fcfd3724 100644 --- a/sys/arch/i386/i386/vm_machdep.c +++ b/sys/arch/i386/i386/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.42 2005/11/25 14:07:17 mickey Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.43 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */ /*- @@ -242,8 +242,7 @@ pagemove(from, to, size) caddr_t from, to; size_t size; { - pt_entry_t *fpte, *tpte; - pt_entry_t ofpte, otpte; + u_int32_t ofpte, otpte; #ifdef MULTIPROCESSOR u_int32_t cpumask = 0; #endif @@ -252,13 +251,12 @@ pagemove(from, to, size) if ((size & PAGE_MASK) != 0) panic("pagemove"); #endif - fpte = kvtopte((vaddr_t)from); - tpte = kvtopte((vaddr_t)to); while (size > 0) { - ofpte = *fpte; - otpte = *tpte; - *tpte++ = *fpte; - *fpte++ = 0; + ofpte = pmap_pte_bits((vaddr_t)from); + otpte = pmap_pte_bits((vaddr_t)to); + pmap_pte_set((vaddr_t)to, + pmap_pte_paddr((vaddr_t)from), ofpte); + pmap_pte_set((vaddr_t)from, 0, 0); #if defined(I386_CPU) && !defined(MULTIPROCESSOR) if (cpu_class != CPUCLASS_386) #endif diff --git a/sys/arch/i386/include/_types.h b/sys/arch/i386/include/_types.h index 8d54ca43d8c..f731aefd89f 100644 --- a/sys/arch/i386/include/_types.h +++ b/sys/arch/i386/include/_types.h @@ -1,4 +1,4 @@ -/* $OpenBSD: _types.h,v 1.2 2006/01/13 17:50:06 millert Exp $ */ +/* $OpenBSD: _types.h,v 1.3 2006/04/27 15:37:53 mickey Exp $ */ /*- * Copyright (c) 1990, 1993 @@ -86,9 +86,9 @@ typedef __int32_t __register_t; /* VM system types */ typedef unsigned long __vaddr_t; -typedef unsigned long __paddr_t; typedef unsigned long __vsize_t; -typedef unsigned long __psize_t; +typedef unsigned long long __paddr_t; +typedef unsigned long long __psize_t; /* Standard system types */ typedef int __clock_t; diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h index e3be6b68b1b..a06878e87b0 100644 --- a/sys/arch/i386/include/atomic.h +++ b/sys/arch/i386/include/atomic.h @@ -1,4 +1,4 @@ -/* $OpenBSD: atomic.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $OpenBSD: atomic.h,v 1.3 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */ /*- @@ -44,6 +44,13 @@ #ifndef _LOCORE +static __inline u_int64_t +i386_atomic_testset_uq (volatile u_int64_t *ptr, u_int64_t val) { + __asm__ volatile ("\n1:\tlock; cmpxchg8b (%1); jnz 1b" : "+A" (val) : + "r" (ptr), "b" ((u_int32_t)val), "c" ((u_int32_t)(val >> 32))); + return val; +} + static __inline u_int32_t i386_atomic_testset_ul (volatile u_int32_t *ptr, unsigned long val) { __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); diff --git a/sys/arch/i386/include/bus.h b/sys/arch/i386/include/bus.h index 0b26d524f49..9900c76d4f0 100644 --- a/sys/arch/i386/include/bus.h +++ b/sys/arch/i386/include/bus.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bus.h,v 1.38 2006/04/27 15:17:16 mickey Exp $ */ +/* $OpenBSD: bus.h,v 1.39 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: bus.h,v 1.6 1996/11/10 03:19:25 thorpej Exp $ */ /*- @@ -741,7 +741,7 @@ void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, #define BUS_DMA_COHERENT 0x004 /* hint: map memory DMA coherent */ #define BUS_DMA_BUS1 0x010 /* placeholders for bus functions... */ #define BUS_DMA_BUS2 0x020 -#define BUS_DMA_BUS3 0x040 +#define BUS_DMA_64BIT 0x040 /* large memory high segment is ok */ #define BUS_DMA_24BIT 0x080 /* isadma map */ #define BUS_DMA_STREAMING 0x100 /* hint: sequential, unidirectional */ #define BUS_DMA_READ 0x200 /* mapping is device -> memory only */ @@ -771,7 +771,10 @@ typedef struct i386_bus_dmamap *bus_dmamap_t; */ struct i386_bus_dma_segment { bus_addr_t ds_addr; /* DMA address */ + paddr_t ds_addr2; /* replacement store */ bus_size_t ds_len; /* length of transfer */ + vaddr_t ds_va; /* mapped loaded data */ + vaddr_t ds_va2; /* mapped replacement data */ }; typedef struct i386_bus_dma_segment bus_dma_segment_t; @@ -863,6 +866,11 @@ struct i386_bus_dmamap { void *_dm_cookie; /* cookie for bus-specific functions */ + struct vm_page **_dm_pages; /* replacement pages */ + vaddr_t _dm_pgva; /* those above -- mapped */ + int _dm_npages; /* number of pages allocated */ + int _dm_nused; /* number of pages replaced */ + /* * PUBLIC MEMBERS: these are used by machine-independent code. */ diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index bf7327f06a4..568a2ef2de5 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.74 2006/01/12 22:39:21 weingart Exp $ */ +/* $OpenBSD: cpu.h,v 1.75 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -418,6 +418,10 @@ int kvtop(caddr_t); void vm86_gpfault(struct proc *, int); #endif /* VM86 */ +#ifndef SMALL_KERNEL +int cpu_paenable(void *); +#endif /* !SMALL_KERNEL */ + #ifdef GENERIC /* swapgeneric.c */ void setconf(void); diff --git a/sys/arch/i386/include/loadfile_machdep.h b/sys/arch/i386/include/loadfile_machdep.h index a121e81d4ef..5903231fc58 100644 --- a/sys/arch/i386/include/loadfile_machdep.h +++ b/sys/arch/i386/include/loadfile_machdep.h @@ -1,4 +1,4 @@ -/* $OpenBSD: loadfile_machdep.h,v 1.1 2003/04/17 03:42:14 drahn Exp $ */ +/* $OpenBSD: loadfile_machdep.h,v 1.2 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: loadfile_machdep.h,v 1.1 1999/04/29 03:17:12 tsubai Exp $ */ /*- @@ -43,7 +43,7 @@ #define LOAD_KERNEL (LOAD_ALL & ~LOAD_TEXTA) #define COUNT_KERNEL (COUNT_ALL & ~COUNT_TEXTA) -#define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) +#define LOADADDR(a) (((u_long)(a) + (u_long)offset)&0xfffffff) #define ALIGNENTRY(a) ((u_long)(a)) #define READ(f, b, c) read((f), (void *)LOADADDR(b), (c)) #define BCOPY(s, d, c) memcpy((void *)LOADADDR(d), (void *)(s), (c)) diff --git a/sys/arch/i386/include/param.h b/sys/arch/i386/include/param.h index 439a9859587..433644f782e 100644 --- a/sys/arch/i386/include/param.h +++ b/sys/arch/i386/include/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.35 2006/03/19 01:47:23 martin Exp $ */ +/* $OpenBSD: param.h,v 1.36 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: param.h,v 1.29 1996/03/04 05:04:26 cgd Exp $ */ /*- @@ -75,8 +75,6 @@ #define PAGE_SIZE (1 << PAGE_SHIFT) #define PAGE_MASK (PAGE_SIZE - 1) -#define NPTEPG (NBPG/(sizeof (pt_entry_t))) - /* * Start of kernel virtual space. Remember to alter the memory and * page table layout description in pmap.h when changing this. @@ -131,9 +129,3 @@ /* bytes to disk blocks */ #define dbtob(x) ((x) << DEV_BSHIFT) #define btodb(x) ((x) >> DEV_BSHIFT) - -/* - * Mach derived conversion macros - */ -#define i386_round_pdr(x) ((((unsigned)(x)) + PDOFSET) & ~PDOFSET) -#define i386_trunc_pdr(x) ((unsigned)(x) & ~PDOFSET) diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 4a350f4201f..b1e1c2ec4b8 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.41 2006/01/12 22:39:21 weingart Exp $ */ +/* $OpenBSD: pmap.h,v 1.42 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -47,123 +47,11 @@ #include <uvm/uvm_object.h> /* - * See pte.h for a description of i386 MMU terminology and hardware - * interface. - * - * A pmap describes a process' 4GB virtual address space. This - * virtual address space can be broken up into 1024 4MB regions which - * are described by PDEs in the PDP. The PDEs are defined as follows: - * - * Ranges are inclusive -> exclusive, just like vm_map_entry start/end. - * The following assumes that KERNBASE is 0xd0000000. - * - * PDE#s VA range Usage - * 0->831 0x0 -> 0xcfc00000 user address space, note that the - * max user address is 0xcfbfe000 - * the final two pages in the last 4MB - * used to be reserved for the UAREA - * but now are no longer used. - * 831 0xcfc00000-> recursive mapping of PDP (used for - * 0xd0000000 linear mapping of PTPs). - * 832->1023 0xd0000000-> kernel address space (constant - * 0xffc00000 across all pmaps/processes). - * 1023 0xffc00000-> "alternate" recursive PDP mapping - * <end> (for other pmaps). - * - * - * Note: A recursive PDP mapping provides a way to map all the PTEs for - * a 4GB address space into a linear chunk of virtual memory. In other - * words, the PTE for page 0 is the first int mapped into the 4MB recursive - * area. The PTE for page 1 is the second int. The very last int in the - * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB - * address). - * - * All pmaps' PDs must have the same values in slots 832->1023 so that - * the kernel is always mapped in every process. These values are loaded - * into the PD at pmap creation time. - * - * At any one time only one pmap can be active on a processor. This is - * the pmap whose PDP is pointed to by processor register %cr3. This pmap - * will have all its PTEs mapped into memory at the recursive mapping - * point (slot #831 as show above). When the pmap code wants to find the - * PTE for a virtual address, all it has to do is the following: - * - * Address of PTE = (831 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t) - * = 0xcfc00000 + (VA / 4096) * 4 - * - * What happens if the pmap layer is asked to perform an operation - * on a pmap that is not the one which is currently active? In that - * case we take the PA of the PDP of non-active pmap and put it in - * slot 1023 of the active pmap. This causes the non-active pmap's - * PTEs to get mapped in the final 4MB of the 4GB address space - * (e.g. starting at 0xffc00000). - * - * The following figure shows the effects of the recursive PDP mapping: - * - * PDP (%cr3) - * +----+ - * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 - * | | - * | | - * | 831| -> points back to PDP (%cr3) mapping VA 0xcfc00000 -> 0xd0000000 - * | 832| -> first kernel PTP (maps 0xd0000000 -> 0xe0400000) - * | | - * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) - * +----+ - * - * Note that the PDE#831 VA (0xcfc00000) is defined as "PTE_BASE". - * Note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE". - * - * Starting at VA 0xcfc00000 the current active PDP (%cr3) acts as a - * PTP: - * - * PTP#831 == PDP(%cr3) => maps VA 0xcfc00000 -> 0xd0000000 - * +----+ - * | 0| -> maps the contents of PTP#0 at VA 0xcfc00000->0xcfc01000 - * | | - * | | - * | 831| -> maps the contents of PTP#831 (the PDP) at VA 0xcff3f000 - * | 832| -> maps the contents of first kernel PTP - * | | - * |1023| - * +----+ - * - * Note that mapping of the PDP at PTP#831's VA (0xcff3f000) is - * defined as "PDP_BASE".... within that mapping there are two - * defines: - * "PDP_PDE" (0xcff3fcfc) is the VA of the PDE in the PDP - * which points back to itself. - * "APDP_PDE" (0xcff3fffc) is the VA of the PDE in the PDP which - * establishes the recursive mapping of the alternate pmap. - * To set the alternate PDP, one just has to put the correct - * PA info in *APDP_PDE. - * - * Note that in the APTE_BASE space, the APDP appears at VA - * "APDP_BASE" (0xfffff000). + * The following defines identify the slots used as described in pmap.c . */ - -/* - * The following defines identify the slots used as described above. - */ - -#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 831: for recursive PDP map */ -#define PDSLOT_KERN (KERNBASE/NBPD) /* 832: start of kernel space */ -#define PDSLOT_APTE ((unsigned)1023) /* 1023: alternative recursive slot */ - -/* - * The following defines give the virtual addresses of various MMU - * data structures: - * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings - * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD - * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP - */ - -#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) -#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) -#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) -#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) -#define PDP_PDE (PDP_BASE + PDSLOT_PTE) -#define APDP_PDE (PDP_BASE + PDSLOT_APTE) +#define PDSLOT_PTE ((KERNBASE/NBPD)-2) /* 830: for recursive PDP map */ +#define PDSLOT_KERN (KERNBASE/NBPD) /* 832: start of kernel space */ +#define PDSLOT_APTE ((unsigned)1022) /* 1022: alternative recursive slot */ /* * The following define determines how many PTPs should be set up for the @@ -171,55 +59,10 @@ * get the VM system running. Once the VM system is running, the * pmap module can add more PTPs to the kernel area on demand. */ - #ifndef NKPTP -#define NKPTP 4 /* 16MB to start */ +#define NKPTP 8 /* 16/32MB to start */ #endif #define NKPTP_MIN 4 /* smallest value we allow */ -#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1) - /* largest value (-1 for APTP space) */ - -/* - * various address macros - * - * vtopte: return a pointer to the PTE mapping a VA - * kvtopte: same as above (takes a KVA, but doesn't matter with this pmap) - * ptetov: given a pointer to a PTE, return the VA that it maps - * vtophys: translate a VA to the PA mapped to it - * - * plus alternative versions of the above - */ - -#define vtopte(VA) (PTE_BASE + atop(VA)) -#define kvtopte(VA) vtopte(VA) -#define ptetov(PT) (ptoa(PT - PTE_BASE)) -#define vtophys(VA) ((*vtopte(VA) & PG_FRAME) | \ - ((unsigned)(VA) & ~PG_FRAME)) -#define avtopte(VA) (APTE_BASE + atop(VA)) -#define ptetoav(PT) (ptoa(PT - APTE_BASE)) -#define avtophys(VA) ((*avtopte(VA) & PG_FRAME) | \ - ((unsigned)(VA) & ~PG_FRAME)) - -/* - * pdei/ptei: generate index into PDP/PTP from a VA - */ -#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) -#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) - -/* - * PTP macros: - * A PTP's index is the PD index of the PDE that points to it. - * A PTP's offset is the byte-offset in the PTE space that this PTP is at. - * A PTP's VA is the first VA mapped by that PTP. - * - * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) - * NBPD == number of bytes a PTP can map (4MB) - */ - -#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ -#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ -#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ -#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ /* * PG_AVAIL usage: we make use of the ignored bits of the PTE @@ -229,12 +72,6 @@ #define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ #define PG_X PG_AVAIL3 /* executable mapping */ -/* - * Number of PTE's per cache line. 4 byte pte, 32-byte cache line - * Used to avoid false sharing of cache lines. - */ -#define NPTECL 8 - #ifdef _KERNEL /* * pmap data structures: see pmap.c for details of locking. @@ -257,13 +94,15 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ */ struct pmap { + paddr_t pm_pdidx[4]; /* PDIEs for PAE mode */ + paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ + vaddr_t pm_pdir; /* VA of PD (lck by object lock) */ + int pm_pdirsize; /* PD size (4k vs 16k on pae */ struct uvm_object pm_obj; /* object (lck by object lock) */ #define pm_lock pm_obj.vmobjlock LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ - pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ - paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ struct vm_page *pm_ptphint; /* pointer to a PTP in our pmap */ - struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ + struct pmap_statistics pm_stats;/* pmap stats (lck by object lock) */ vaddr_t pm_hiexec; /* highest executable mapping */ int pm_flags; /* see below */ @@ -333,67 +172,185 @@ struct pv_page { /* * global kernel variables */ - -extern pd_entry_t PTD[]; - -/* PTDpaddr: is the physical address of the kernel's PDP */ -extern u_int32_t PTDpaddr; - +extern char PTD[]; extern struct pmap kernel_pmap_store; /* kernel pmap */ -extern int nkpde; /* current # of PDEs for kernel */ -extern int pmap_pg_g; /* do we support PG_G? */ +extern int nkptp_max; /* - * Macros + * Our dual-pmap design requires to play a pointer-and-seek. + * Although being nice folks we are handle single-pmap kernels special. */ +#define PMAP_EXCLUDE_DECLS /* tells uvm_pmap.h *not* to include decls */ +/* + * Dumb macros + */ #define pmap_kernel() (&kernel_pmap_store) #define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) #define pmap_update(pm) /* nada */ -#define pmap_clear_modify(pg) pmap_change_attrs(pg, 0, PG_M) -#define pmap_clear_reference(pg) pmap_change_attrs(pg, 0, PG_U) -#define pmap_copy(DP,SP,D,L,S) -#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) -#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) -#define pmap_phys_address(ppn) ptoa(ppn) -#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ - -#define pmap_proc_iflush(p,va,len) /* nothing */ -#define pmap_unuse_final(p) /* nothing */ +#define pmap_clear_modify(pg) pmap_change_attrs(pg, 0, PG_M) +#define pmap_clear_reference(pg) pmap_change_attrs(pg, 0, PG_U) +#define pmap_copy(DP,SP,D,L,S) /* nicht */ +#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) +#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) +#define pmap_phys_address(ppn) ptoa(ppn) +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ +#define pmap_proc_iflush(p,va,len) /* nothing */ +#define pmap_unuse_final(p) /* 4anaEB u nycToTa */ /* * Prototypes */ - void pmap_bootstrap(vaddr_t); -boolean_t pmap_change_attrs(struct vm_page *, int, int); +void pmap_bootstrap_pae(void); +void pmap_virtual_space(vaddr_t *, vaddr_t *); +void pmap_init(void); +struct pmap * pmap_create(void); +void pmap_destroy(struct pmap *); +void pmap_reference(struct pmap *); +void pmap_fork(struct pmap *, struct pmap *); +void pmap_collect(struct pmap *); +void pmap_activate(struct proc *); +void pmap_deactivate(struct proc *); +void pmap_kenter_pa(vaddr_t, paddr_t, vm_prot_t); +void pmap_kremove(vaddr_t, vsize_t); +void pmap_zero_page(struct vm_page *); +void pmap_copy_page(struct vm_page *, struct vm_page *); + +struct pv_entry*pmap_alloc_pv(struct pmap *, int); +void pmap_enter_pv(struct pv_head *, struct pv_entry *, + struct pmap *, vaddr_t, struct vm_page *); +void pmap_free_pv(struct pmap *, struct pv_entry *); +void pmap_free_pvs(struct pmap *, struct pv_entry *); +void pmap_free_pv_doit(struct pv_entry *); +void pmap_free_pvpage(void); static void pmap_page_protect(struct vm_page *, vm_prot_t); -void pmap_page_remove(struct vm_page *); -static void pmap_protect(struct pmap *, vaddr_t, - vaddr_t, vm_prot_t); -void pmap_remove(struct pmap *, vaddr_t, vaddr_t); -boolean_t pmap_test_attrs(struct vm_page *, int); +static void pmap_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); static void pmap_update_pg(vaddr_t); -static void pmap_update_2pg(vaddr_t,vaddr_t); -void pmap_write_protect(struct pmap *, vaddr_t, - vaddr_t, vm_prot_t); +static void pmap_update_2pg(vaddr_t, vaddr_t); int pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *); +void pmap_exec_account(struct pmap *, vaddr_t, u_int32_t, + u_int32_t); vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ +paddr_t vtophys(vaddr_t va); -void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *); +void pmap_tlb_shootdown(pmap_t, vaddr_t, u_int32_t, int32_t *); void pmap_tlb_shootnow(int32_t); void pmap_do_tlb_shootdown(struct cpu_info *); +boolean_t pmap_is_curpmap(struct pmap *); +boolean_t pmap_is_active(struct pmap *, int); +void pmap_apte_flush(struct pmap *); +struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, vaddr_t); + +#ifdef SMALL_KERNEL +#define pmap_pte_set_86 pmap_pte_set +#define pmap_pte_setbits_86 pmap_pte_setbits +#define pmap_pte_bits_86 pmap_pte_bits +#define pmap_pte_paddr_86 pmap_pte_paddr +#define pmap_change_attrs_86 pmap_change_attrs +#define pmap_enter_86 pmap_enter +#define pmap_extract_86 pmap_extract +#define pmap_growkernel_86 pmap_growkernel +#define pmap_page_remove_86 pmap_page_remove +#define pmap_remove_86 pmap_remove +#define pmap_test_attrs_86 pmap_test_attrs +#define pmap_unwire_86 pmap_unwire +#define pmap_write_protect_86 pmap_write_protect +#define pmap_pinit_pd_86 pmap_pinit_pd +#define pmap_zero_phys_86 pmap_zero_phys +#define pmap_zero_page_uncached_86 pmap_zero_page_uncached +#define pmap_copy_page_86 pmap_copy_page +#define pmap_try_steal_pv_86 pmap_try_steal_pv +#else +extern u_int32_t (*pmap_pte_set_p)(vaddr_t, paddr_t, u_int32_t); +extern u_int32_t (*pmap_pte_setbits_p)(vaddr_t, u_int32_t, u_int32_t); +extern u_int32_t (*pmap_pte_bits_p)(vaddr_t); +extern paddr_t (*pmap_pte_paddr_p)(vaddr_t); +extern boolean_t (*pmap_change_attrs_p)(struct vm_page *, int, int); +extern int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +extern boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *); +extern vaddr_t (*pmap_growkernel_p)(vaddr_t); +extern void (*pmap_page_remove_p)(struct vm_page *); +extern void (*pmap_remove_p)(struct pmap *, vaddr_t, vaddr_t); +extern boolean_t (*pmap_test_attrs_p)(struct vm_page *, int); +extern void (*pmap_unwire_p)(struct pmap *, vaddr_t); +extern void (*pmap_write_protect_p)(struct pmap*, vaddr_t, vaddr_t, vm_prot_t); +extern void (*pmap_pinit_pd_p)(pmap_t); +extern void (*pmap_zero_phys_p)(paddr_t); +extern boolean_t (*pmap_zero_page_uncached_p)(paddr_t); +extern void (*pmap_copy_page_p)(struct vm_page *, struct vm_page *); +extern boolean_t (*pmap_try_steal_pv_p)(struct pv_head *pvh, + struct pv_entry *cpv, struct pv_entry *prevpv); + +u_int32_t pmap_pte_set_pae(vaddr_t, paddr_t, u_int32_t); +u_int32_t pmap_pte_setbits_pae(vaddr_t, u_int32_t, u_int32_t); +u_int32_t pmap_pte_bits_pae(vaddr_t); +paddr_t pmap_pte_paddr_pae(vaddr_t); +boolean_t pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv); +boolean_t pmap_change_attrs_pae(struct vm_page *, int, int); +int pmap_enter_pae(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +boolean_t pmap_extract_pae(pmap_t, vaddr_t, paddr_t *); +vaddr_t pmap_growkernel_pae(vaddr_t); +void pmap_page_remove_pae(struct vm_page *); +void pmap_remove_pae(struct pmap *, vaddr_t, vaddr_t); +boolean_t pmap_test_attrs_pae(struct vm_page *, int); +void pmap_unwire_pae(struct pmap *, vaddr_t); +void pmap_write_protect_pae(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); +void pmap_pinit_pd_pae(pmap_t); +void pmap_zero_phys_pae(paddr_t); +boolean_t pmap_zero_page_uncached_pae(paddr_t); +void pmap_copy_page_pae(struct vm_page *, struct vm_page *); + +#define pmap_pte_set (*pmap_pte_set_p) +#define pmap_pte_setbits (*pmap_pte_setbits_p) +#define pmap_pte_bits (*pmap_pte_bits_p) +#define pmap_pte_paddr (*pmap_pte_paddr_p) +#define pmap_change_attrs (*pmap_change_attrs_p) +#define pmap_enter (*pmap_enter_p) +#define pmap_extract (*pmap_extract_p) +#define pmap_growkernel (*pmap_growkernel_p) +#define pmap_page_remove (*pmap_page_remove_p) +#define pmap_remove (*pmap_remove_p) +#define pmap_test_attrs (*pmap_test_attrs_p) +#define pmap_unwire (*pmap_unwire_p) +#define pmap_write_protect (*pmap_write_protect_p) +#define pmap_pinit_pd (*pmap_pinit_pd_p) +#define pmap_zero_phys (*pmap_zero_phys_p) +#define pmap_zero_page_uncached (*pmap_zero_page_uncached_p) +#define pmap_copy_page (*pmap_copy_page_p) +#define pmap_try_steal_pv (*pmap_try_steal_pv_p) +#endif + +u_int32_t pmap_pte_set_86(vaddr_t, paddr_t, u_int32_t); +u_int32_t pmap_pte_setbits_86(vaddr_t, u_int32_t, u_int32_t); +u_int32_t pmap_pte_bits_86(vaddr_t); +paddr_t pmap_pte_paddr_86(vaddr_t); +boolean_t pmap_try_steal_pv_86(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv); +boolean_t pmap_change_attrs_86(struct vm_page *, int, int); +int pmap_enter_86(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +boolean_t pmap_extract_86(pmap_t, vaddr_t, paddr_t *); +vaddr_t pmap_growkernel_86(vaddr_t); +void pmap_page_remove_86(struct vm_page *); +void pmap_remove_86(struct pmap *, vaddr_t, vaddr_t); +boolean_t pmap_test_attrs_86(struct vm_page *, int); +void pmap_unwire_86(struct pmap *, vaddr_t); +void pmap_write_protect_86(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); +void pmap_pinit_pd_86(pmap_t); +void pmap_zero_phys_86(paddr_t); +boolean_t pmap_zero_page_uncached_86(paddr_t); +void pmap_copy_page_86(struct vm_page *, struct vm_page *); #define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ /* * Do idle page zero'ing uncached to avoid polluting the cache. */ -boolean_t pmap_zero_page_uncached(paddr_t); #define PMAP_PAGEIDLEZERO(pg) pmap_zero_page_uncached(VM_PAGE_TO_PHYS(pg)) /* diff --git a/sys/arch/i386/include/pte.h b/sys/arch/i386/include/pte.h index e27c072c19d..73a3bc3e7b0 100644 --- a/sys/arch/i386/include/pte.h +++ b/sys/arch/i386/include/pte.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pte.h,v 1.7 2004/02/06 00:23:21 deraadt Exp $ */ +/* $OpenBSD: pte.h,v 1.8 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: pte.h,v 1.11 1998/02/06 21:58:05 thorpej Exp $ */ /* @@ -45,114 +45,11 @@ #define _I386_PTE_H_ /* - * i386 MMU hardware structure: - * - * the i386 MMU is a two-level MMU which maps 4GB of virtual memory. - * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium - * processors can support a 4MB pagesize as well. - * - * the first level table (segment table?) is called a "page directory" - * and it contains 1024 page directory entries (PDEs). each PDE is - * 4 bytes (an int), so a PD fits in a single 4K page. this page is - * the page directory page (PDP). each PDE in a PDP maps 4MB of space - * (1024 * 4MB = 4GB). a PDE contains the physical address of the - * second level table: the page table. or, if 4MB pages are being used, - * then the PDE contains the PA of the 4MB page being mapped. - * - * a page table consists of 1024 page table entries (PTEs). each PTE is - * 4 bytes (an int), so a page table also fits in a single 4K page. a - * 4K page being used as a page table is called a page table page (PTP). - * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains - * the physical address of the page it maps and some flag bits (described - * below). - * - * the processor has a special register, "cr3", which points to the - * the PDP which is currently controlling the mappings of the virtual - * address space. - * - * the following picture shows the translation process for a 4K page: - * - * %cr3 register [PA of PDP] - * | - * | - * | bits <31-22> of VA bits <21-12> of VA bits <11-0> - * | index the PDP (0 - 1023) index the PTP are the page offset - * | | | | - * | v | | - * +--->+----------+ | | - * | PD Page | PA of v | - * | |---PTP-------->+------------+ | - * | 1024 PDE | | page table |--PTE--+ | - * | entries | | (aka PTP) | | | - * +----------+ | 1024 PTE | | | - * | entries | | | - * +------------+ | | - * | | - * bits <31-12> bits <11-0> - * p h y s i c a l a d d r - * - * the i386 caches PTEs in a TLB. it is important to flush out old - * TLB mappings when making a change to a mappings. writing to the - * %cr3 will flush the entire TLB. newer processors also have an - * instruction that will invalidate the mapping of a single page (which - * is useful if you are changing a single mappings because it preserves - * all the cached TLB entries). - * - * as shows, bits 31-12 of the PTE contain PA of the page being mapped. - * the rest of the PTE is defined as follows: - * bit# name use - * 11 n/a available for OS use, hardware ignores it - * 10 n/a available for OS use, hardware ignores it - * 9 n/a available for OS use, hardware ignores it - * 8 G global bit (see discussion below) - * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>) - * 6 D dirty (modified) page - * 5 A accessed (referenced) page - * 4 PCD cache disable - * 3 PWT prevent write through (cache) - * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s) - * 1 R/W read/write bit (0=read only, 1=read-write) - * 0 P present (valid) - * - * notes: - * - on the i386 the R/W bit is ignored if processor is in supervisor - * state (bug!) - * - PS is only supported on newer processors - * - PTEs with the G bit are global in the sense that they are not - * flushed from the TLB when %cr3 is written (to flush, use the - * "flush single page" instruction). this is only supported on - * newer processors. this bit can be used to keep the kernel's - * TLB entries around while context switching. since the kernel - * is mapped into all processes at the same place it does not make - * sense to flush these entries when switching from one process' - * pmap to another. - */ - -#if !defined(_LOCORE) - -/* - * here we define the data types for PDEs and PTEs - */ - -typedef u_int32_t pd_entry_t; /* PDE */ -typedef u_int32_t pt_entry_t; /* PTE */ - -#endif - -/* * now we define various for playing with virtual addresses */ #define PDSHIFT 22 /* offset of PD index in VA */ #define NBPD (1 << PDSHIFT) /* # bytes mapped by PD (4MB) */ -#define PDOFSET (NBPD-1) /* mask for non-PD part of VA */ -#if 0 /* not used? */ -#define NPTEPD (NBPD / NBPG) /* # of PTEs in a PD */ -#else -#define PTES_PER_PTP (NBPD / NBPG) /* # of PTEs in a PTP */ -#endif -#define PD_MASK 0xffc00000 /* page directory address bits */ -#define PT_MASK 0x003ff000 /* page table address bits */ /* * here we define the bits of the PDE/PTE, as described above: @@ -173,8 +70,6 @@ typedef u_int32_t pt_entry_t; /* PTE */ #define PG_AVAIL1 0x00000200 /* ignored by hardware */ #define PG_AVAIL2 0x00000400 /* ignored by hardware */ #define PG_AVAIL3 0x00000800 /* ignored by hardware */ -#define PG_FRAME 0xfffff000 /* page frame mask */ -#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ /* * various short-hand protection codes diff --git a/sys/arch/i386/include/tss.h b/sys/arch/i386/include/tss.h index 20f6f38f7d5..7590b8ce3a1 100644 --- a/sys/arch/i386/include/tss.h +++ b/sys/arch/i386/include/tss.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tss.h,v 1.6 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: tss.h,v 1.7 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: tss.h,v 1.6 1995/10/11 04:20:28 mycroft Exp $ */ /*- @@ -50,7 +50,7 @@ struct i386tss { int __tss_ss1; int __tss_esp2; int __tss_ss2; - int tss_cr3; /* page directory paddr */ + int tss_cr3; /* page directory [pointer] paddr */ int __tss_eip; int __tss_eflags; int __tss_eax; diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h index 43edd842463..6174c378725 100644 --- a/sys/arch/i386/include/vmparam.h +++ b/sys/arch/i386/include/vmparam.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmparam.h,v 1.34 2006/03/15 17:56:06 mickey Exp $ */ +/* $OpenBSD: vmparam.h,v 1.35 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $ */ /*- @@ -91,22 +91,23 @@ /* user/kernel map constants */ #define VM_MIN_ADDRESS ((vaddr_t)0) -#define VM_MAXUSER_ADDRESS ((vaddr_t)((PDSLOT_PTE<<PDSHIFT) - USPACE)) -#define VM_MAX_ADDRESS ((vaddr_t)((PDSLOT_PTE<<PDSHIFT) + \ - (PDSLOT_PTE<<PGSHIFT))) +#define VM_MAXUSER_ADDRESS ((vaddr_t)0xcf800000) +#define VM_MAX_ADDRESS (vm_max_address) +extern vaddr_t vm_max_address; #define VM_MIN_KERNEL_ADDRESS ((vaddr_t)KERNBASE) -#define VM_MAX_KERNEL_ADDRESS ((vaddr_t)(PDSLOT_APTE<<PDSHIFT)) +#define VM_MAX_KERNEL_ADDRESS ((vaddr_t)0xff800000) /* virtual sizes (bytes) for various kernel submaps */ #define VM_PHYS_SIZE (USRIOSIZE*PAGE_SIZE) -#define VM_PHYSSEG_MAX 5 /* actually we could have this many segments */ +#define VM_PHYSSEG_MAX 8 /* actually we could have this many segments */ #define VM_PHYSSEG_STRAT VM_PSTRAT_BSEARCH #define VM_PHYSSEG_NOADD /* can't add RAM after vm_mem_init */ -#define VM_NFREELIST 2 +#define VM_NFREELIST 3 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_FIRST16 1 +#define VM_FREELIST_ABOVE4G 2 /* * pmap specific data stored in the vm_physmem[] array diff --git a/sys/arch/i386/pci/pci_addr_fixup.c b/sys/arch/i386/pci/pci_addr_fixup.c index 63c88142917..d6721f6aeb7 100644 --- a/sys/arch/i386/pci/pci_addr_fixup.c +++ b/sys/arch/i386/pci/pci_addr_fixup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pci_addr_fixup.c,v 1.16 2005/11/22 09:09:58 mickey Exp $ */ +/* $OpenBSD: pci_addr_fixup.c,v 1.17 2006/04/27 15:37:55 mickey Exp $ */ /* $NetBSD: pci_addr_fixup.c,v 1.7 2000/08/03 20:10:45 nathanw Exp $ */ /*- @@ -45,19 +45,17 @@ #include <i386/pci/pcibiosvar.h> typedef int (*pciaddr_resource_manage_func_t)(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t, int, - struct extent *, int, bus_addr_t *, bus_size_t); + struct extent *, int, u_long *, bus_size_t); void pciaddr_resource_manage(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t, pciaddr_resource_manage_func_t); void pciaddr_resource_reserve(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t); -int pciaddr_do_resource_reserve(struct pcibios_softc *, - pci_chipset_tag_t, pcitag_t, int, struct extent *, int, - bus_addr_t *, bus_size_t); +int pciaddr_do_resource_reserve(struct pcibios_softc *, pci_chipset_tag_t, + pcitag_t, int, struct extent *, int, u_long *, bus_size_t); void pciaddr_resource_allocate(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t); -int pciaddr_do_resource_allocate(struct pcibios_softc *, - pci_chipset_tag_t, pcitag_t, int, struct extent *, int, bus_addr_t *, - bus_size_t); +int pciaddr_do_resource_allocate(struct pcibios_softc *, pci_chipset_tag_t, + pcitag_t, int, struct extent *, int, u_long *, bus_size_t); bus_addr_t pciaddr_ioaddr(u_int32_t); void pciaddr_print_devid(pci_chipset_tag_t, pcitag_t); @@ -180,7 +178,7 @@ pciaddr_resource_manage(sc, pc, tag, func) { struct extent *ex; pcireg_t val, mask; - bus_addr_t addr; + u_long addr; bus_size_t size; int error, mapreg, type, reg_start, reg_end, width; @@ -273,7 +271,7 @@ pciaddr_do_resource_allocate(sc, pc, tag, mapreg, ex, type, addr, size) pcitag_t tag; struct extent *ex; int mapreg, type; - bus_addr_t *addr; + u_long *addr; bus_size_t size; { bus_addr_t start; @@ -324,7 +322,7 @@ pciaddr_do_resource_reserve(sc, pc, tag, mapreg, ex, type, addr, size) pcitag_t tag; struct extent *ex; int type, mapreg; - bus_addr_t *addr; + u_long *addr; bus_size_t size; { int error; diff --git a/sys/arch/i386/pci/pci_machdep.c b/sys/arch/i386/pci/pci_machdep.c index a84bd091173..c4378af043f 100644 --- a/sys/arch/i386/pci/pci_machdep.c +++ b/sys/arch/i386/pci/pci_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pci_machdep.c,v 1.30 2005/11/23 09:24:57 mickey Exp $ */ +/* $OpenBSD: pci_machdep.c,v 1.31 2006/04/27 15:37:55 mickey Exp $ */ /* $NetBSD: pci_machdep.c,v 1.28 1997/06/06 23:29:17 thorpej Exp $ */ /*- @@ -113,9 +113,7 @@ extern bios_pciinfo_t *bios_pciinfo; #endif #include "pcibios.h" -#if NPCIBIOS > 0 #include <i386/pci/pcibiosvar.h> -#endif int pci_mode = -1; diff --git a/sys/arch/i386/pci/pcibios.c b/sys/arch/i386/pci/pcibios.c index a4784398e20..ea83f89717e 100644 --- a/sys/arch/i386/pci/pcibios.c +++ b/sys/arch/i386/pci/pcibios.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pcibios.c,v 1.32 2005/01/08 18:17:58 mickey Exp $ */ +/* $OpenBSD: pcibios.c,v 1.33 2006/04/27 15:37:55 mickey Exp $ */ /* $NetBSD: pcibios.c,v 1.5 2000/08/01 05:23:59 uch Exp $ */ /* @@ -270,7 +270,7 @@ pcibios_pir_init(sc) for (i = 0; i < pirh->tablesize; i++) cksum += p[i]; - printf("%s: PCI IRQ Routing Table rev %d.%d @ 0x%lx/%d " + printf("%s: PCI IRQ Routing Table rev %d.%d @ 0x%llx/%d " "(%d entries)\n", sc->sc_dev.dv_xname, pirh->version >> 8, pirh->version & 0xff, pa, pirh->tablesize, (pirh->tablesize - sizeof(*pirh)) / 16); |