diff options
Diffstat (limited to 'sys')
27 files changed, 3421 insertions, 727 deletions
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index d3130003116..72a268d8bb6 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.144 2006/03/04 16:27:03 grange Exp $ +# $OpenBSD: files.i386,v 1.145 2006/04/27 15:37:48 mickey Exp $ # # new style config file for i386 architecture # @@ -32,6 +32,7 @@ file arch/i386/i386/k6_mem.c mtrr file arch/i386/i386/microtime.s file arch/i386/i386/p4tcc.c !small_kernel & i686_cpu file arch/i386/i386/pmap.c +file arch/i386/i386/pmapae.c !small_kernel file arch/i386/i386/powernow.c !small_kernel & i586_cpu file arch/i386/i386/powernow-k7.c !small_kernel & i686_cpu file arch/i386/i386/powernow-k8.c !small_kernel & i686_cpu diff --git a/sys/arch/i386/i386/autoconf.c b/sys/arch/i386/i386/autoconf.c index 117d866677e..6d872952748 100644 --- a/sys/arch/i386/i386/autoconf.c +++ b/sys/arch/i386/i386/autoconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: autoconf.c,v 1.57 2006/03/15 20:04:36 miod Exp $ */ +/* $OpenBSD: autoconf.c,v 1.58 2006/04/27 15:37:50 mickey Exp $ */ /* $NetBSD: autoconf.c,v 1.20 1996/05/03 19:41:56 christos Exp $ */ /*- @@ -45,6 +45,7 @@ */ #include <sys/param.h> #include <sys/systm.h> +#include <sys/user.h> #include <sys/buf.h> #include <sys/dkstat.h> #include <sys/disklabel.h> @@ -108,6 +109,9 @@ cpu_configure() gdt_init(); /* XXX - pcibios uses gdt stuff */ +#ifndef SMALL_KERNEL + pmap_bootstrap_pae(); +#endif if (config_rootfound("mainbus", NULL) == NULL) panic("cpu_configure: mainbus not configured"); diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index 95d82a9d06b..4d86a3af627 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.17 2006/01/12 22:39:20 weingart Exp $ */ +/* $OpenBSD: cpu.c,v 1.18 2006/04/27 15:37:50 mickey Exp $ */ /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ /*- @@ -253,8 +253,7 @@ cpu_attach(parent, self, aux) pcb->pcb_tss.tss_esp = kstack + USPACE - 16 - sizeof (struct trapframe); pcb->pcb_pmap = pmap_kernel(); - pcb->pcb_cr3 = vtophys((vaddr_t)pcb->pcb_pmap->pm_pdir); - /* pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdir - KERNBASE; XXX ??? */ + pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; cpu_default_ldt(ci); /* Use the `global' ldt until one alloc'd */ #endif @@ -417,7 +416,7 @@ cpu_boot_secondary (ci) printf("%s: starting", ci->ci_dev.dv_xname); /* XXX move elsewhere, not per CPU. */ - mp_pdirpa = vtophys((vaddr_t)kpm->pm_pdir); + mp_pdirpa = kpm->pm_pdirpa; pcb = ci->ci_idle_pcb; diff --git a/sys/arch/i386/i386/db_memrw.c b/sys/arch/i386/i386/db_memrw.c index 979c327178b..a282d61c9a7 100644 --- a/sys/arch/i386/i386/db_memrw.c +++ b/sys/arch/i386/i386/db_memrw.c @@ -1,4 +1,4 @@ -/* $OpenBSD: db_memrw.c,v 1.8 2005/11/22 12:52:55 mickey Exp $ */ +/* $OpenBSD: db_memrw.c,v 1.9 2006/04/27 15:37:50 mickey Exp $ */ /* $NetBSD: db_memrw.c,v 1.6 1999/04/12 20:38:19 pk Exp $ */ /* @@ -63,28 +63,19 @@ db_read_bytes(vaddr_t addr, size_t size, char *data) void db_write_bytes(vaddr_t addr, size_t size, char *data) { - char *dst; - - pt_entry_t *ptep0 = 0; - pt_entry_t oldmap0 = { 0 }; - vaddr_t addr1; - pt_entry_t *ptep1 = 0; - pt_entry_t oldmap1 = { 0 }; extern char etext; + u_int32_t bits, bits1; + vaddr_t addr1 = 0; + char *dst; if (addr >= VM_MIN_KERNEL_ADDRESS && addr < (vaddr_t)&etext) { - ptep0 = kvtopte(addr); - oldmap0 = *ptep0; - *(int *)ptep0 |= /* INTEL_PTE_WRITE */ PG_RW; + bits = pmap_pte_setbits(addr, PG_RW, 0) & PG_RW; addr1 = trunc_page(addr + size - 1); - if (trunc_page(addr) != addr1) { + if (trunc_page(addr) != addr1) /* data crosses a page boundary */ - ptep1 = kvtopte(addr1); - oldmap1 = *ptep1; - *(int *)ptep1 |= /* INTEL_PTE_WRITE */ PG_RW; - } + bits1 = pmap_pte_setbits(addr1, PG_RW, 0) & PG_RW; tlbflush(); } @@ -93,10 +84,10 @@ db_write_bytes(vaddr_t addr, size_t size, char *data) while (size-- > 0) *dst++ = *data++; - if (ptep0) { - *ptep0 = oldmap0; - if (ptep1) - *ptep1 = oldmap1; + if (addr1) { + pmap_pte_setbits(addr, 0, bits); + if (bits1) + pmap_pte_setbits(addr1, 0, bits1); tlbflush(); } } diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index 17ea5424f2d..cb328569a85 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.23 2006/01/12 22:39:20 weingart Exp $ +# $OpenBSD: genassym.cf,v 1.24 2006/04/27 15:37:50 mickey Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -80,7 +80,6 @@ export PDSLOT_KERN export PDSLOT_PTE export PDSLOT_APTE export NKPTP_MIN -export NKPTP_MAX # values for virtual memory export VM_MAXUSER_ADDRESS diff --git a/sys/arch/i386/i386/kgdb_machdep.c b/sys/arch/i386/i386/kgdb_machdep.c index a7cf21cacad..2520d07fd93 100644 --- a/sys/arch/i386/i386/kgdb_machdep.c +++ b/sys/arch/i386/i386/kgdb_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kgdb_machdep.c,v 1.4 2005/11/13 17:50:44 fgsch Exp $ */ +/* $OpenBSD: kgdb_machdep.c,v 1.5 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: kgdb_machdep.c,v 1.6 1998/08/13 21:36:03 thorpej Exp $ */ /*- @@ -97,15 +97,13 @@ kgdb_acc(va, len) size_t len; { vaddr_t last_va; - pt_entry_t *pte; last_va = va + len; va &= ~PGOFSET; last_va &= ~PGOFSET; do { - pte = kvtopte(va); - if ((*pte & PG_V) == 0) + if ((pmap_pte_bits(va) & PG_V) == 0) return (0); va += NBPG; } while (va < last_va); diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c index 77bb270c8b6..8c455833c69 100644 --- a/sys/arch/i386/i386/lapic.c +++ b/sys/arch/i386/i386/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.7 2006/03/13 18:42:16 mickey Exp $ */ +/* $OpenBSD: lapic.c,v 1.8 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */ /*- @@ -77,9 +77,8 @@ void lapic_map(lapic_base) paddr_t lapic_base; { - int s; - pt_entry_t *pte; vaddr_t va = (vaddr_t)&local_apic; + int s; disable_intr(); s = lapic_tpr; @@ -93,8 +92,7 @@ lapic_map(lapic_base) * might have changed the value of cpu_number().. */ - pte = kvtopte(va); - *pte = lapic_base | PG_RW | PG_V | PG_N; + pmap_pte_set(va, lapic_base, PG_RW | PG_V | PG_N); invlpg(va); #ifdef MULTIPROCESSOR diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index 4e0a0eb3e92..91010290ac4 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.99 2006/04/19 14:19:30 mickey Exp $ */ +/* $OpenBSD: locore.s,v 1.100 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -200,6 +200,7 @@ .globl _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx) .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem) .globl _C_LABEL(esym) + .globl _C_LABEL(nkptp_max) .globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase) .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr) .globl _C_LABEL(gdt) @@ -531,9 +532,9 @@ try586: /* Use the `cpuid' instruction. */ * 0 1 2 3 */ #define PROC0PDIR ((0) * NBPG) -#define PROC0STACK ((1) * NBPG) -#define SYSMAP ((1+UPAGES) * NBPG) -#define TABLESIZE ((1+UPAGES) * NBPG) /* + _C_LABEL(nkpde) * NBPG */ +#define PROC0STACK ((4) * NBPG) +#define SYSMAP ((4+UPAGES) * NBPG) +#define TABLESIZE ((4+UPAGES) * NBPG) /* + _C_LABEL(nkpde) * NBPG */ /* Clear the BSS. */ movl $RELOC(_C_LABEL(edata)),%edi @@ -572,9 +573,9 @@ try586: /* Use the `cpuid' instruction. */ jge 1f movl $NKPTP_MIN,%ecx # set at min jmp 2f -1: cmpl $NKPTP_MAX,%ecx # larger than max? +1: cmpl RELOC(_C_LABEL(nkptp_max)),%ecx # larger than max? jle 2f - movl $NKPTP_MAX,%ecx + movl RELOC(_C_LABEL(nkptp_max)),%ecx 2: movl %ecx,RELOC(_C_LABEL(nkpde)) # and store it back /* Clear memory for bootstrap tables. */ @@ -659,6 +660,8 @@ try586: /* Use the `cpuid' instruction. */ /* Install a PDE recursively mapping page directory as a page table! */ leal (PROC0PDIR+PG_V|PG_KW)(%esi),%eax # pte for ptd movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot + addl $NBPG, %eax # pte for ptd[1] + movl %eax,(PROC0PDIR+(PDSLOT_PTE+1)*4)(%esi) # recursive PD slot /* Save phys. addr of PTD, for libkvm. */ movl %esi,RELOC(_C_LABEL(PTDpaddr)) @@ -2310,6 +2313,40 @@ ENTRY(i686_pagezero) ret #endif +#ifndef SMALL_KERNEL +/* + * int cpu_paenable(void *); + */ +ENTRY(cpu_paenable) + movl $-1, %eax + testl $CPUID_PAE, _C_LABEL(cpu_feature) + jz 1f + + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl %cr3, %edi + orl $0xfe0, %edi /* PDPT will be in the last four slots! */ + movl %edi, %cr3 + addl $KERNBASE, %edi /* and make it back virtual again */ + movl $8, %ecx + cld + rep + movsl + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 /* BANG!!! */ + movl 12(%esp), %eax + subl $KERNBASE, %eax + movl %eax, %cr3 /* reload real PDPT */ + + xorl %eax, %eax + popl %edi + popl %esi +1: + ret +#endif /* !SMALL_KERNEL */ + #if NLAPIC > 0 #include <i386/i386/apicvec.s> #endif diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 61479795f5e..5291fb73dda 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.352 2006/04/18 17:39:15 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.353 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -218,6 +218,9 @@ int bufcachepercent = BUFCACHEPERCENT; extern int boothowto; int physmem; +#ifndef SMALL_KERNEL +int pae_copy; +#endif struct dumpmem { paddr_t start; @@ -243,7 +246,7 @@ int i386_has_sse2; int i386_has_xcrypt; bootarg_t *bootargp; -paddr_t avail_end; +paddr_t avail_end, avail_end2; struct vm_map *exec_map = NULL; struct vm_map *phys_map = NULL; @@ -323,6 +326,12 @@ int allowaperture = 0; #endif #endif +#ifdef I686_PAE +int cpu_pae = 1; +#else +int cpu_pae = 0; +#endif + void winchip_cpu_setup(struct cpu_info *); void amd_family5_setup(struct cpu_info *); void amd_family6_setup(struct cpu_info *); @@ -416,7 +425,8 @@ cpu_startup() curcpu()->ci_feature_flags = cpu_feature; identifycpu(curcpu()); - printf("real mem = %u (%uK)\n", ctob(physmem), ctob(physmem)/1024U); + printf("real mem = %llu (%uK)\n", ctob((paddr_t)physmem), + ctob((paddr_t)physmem)/1024U); /* * Find out how much space we need, allocate it, @@ -447,8 +457,8 @@ cpu_startup() phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, VM_PHYS_SIZE, 0, FALSE, NULL); - printf("avail mem = %lu (%uK)\n", ptoa(uvmexp.free), - ptoa(uvmexp.free)/1024U); + printf("avail mem = %llu (%uK)\n", ptoa((paddr_t)uvmexp.free), + ptoa((paddr_t)uvmexp.free) / 1024U); printf("using %d buffers containing %u bytes (%uK) of memory\n", nbuf, bufpages * PAGE_SIZE, bufpages * PAGE_SIZE / 1024); @@ -2708,7 +2718,6 @@ fix_f00f(void) { struct region_descriptor region; vaddr_t va; - pt_entry_t *pte; void *p; /* Allocate two new pages */ @@ -2724,8 +2733,7 @@ fix_f00f(void) GCODE_SEL); /* Map first page RO */ - pte = PTE_BASE + atop(va); - *pte &= ~PG_RW; + pmap_pte_setbits(va, 0, PG_RW); /* Reload idtr */ setregion(®ion, idt, sizeof(idt_region) - 1); @@ -2880,11 +2888,11 @@ init386(paddr_t first_avail) if (bootargc > NBPG) panic("too many boot args"); - if (extent_alloc_region(iomem_ex, (paddr_t)bootargv, bootargc, + if (extent_alloc_region(iomem_ex, (u_long)bootargv, bootargc, EX_NOWAIT)) panic("cannot reserve /boot args memory"); - pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (paddr_t)bootargv, + pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (u_long)bootargv, VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); @@ -2898,15 +2906,6 @@ init386(paddr_t first_avail) panic("no BIOS memory map supplied"); #endif -#if defined(MULTIPROCESSOR) - /* install the page after boot args as PT page for first 4M */ - pmap_enter(pmap_kernel(), (u_long)vtopte(0), - round_page((vaddr_t)(bootargv + bootargc)), - VM_PROT_READ|VM_PROT_WRITE, - VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); - memset(vtopte(0), 0, NBPG); /* make sure it is clean before using */ -#endif - /* * account all the memory passed in the map from /boot * calculate avail_end and count the physmem. @@ -2919,27 +2918,12 @@ init386(paddr_t first_avail) for(i = 0, im = bios_memmap; im->type != BIOS_MAP_END; im++) if (im->type == BIOS_MAP_FREE) { register paddr_t a, e; -#ifdef DEBUG - printf(" %llx-%llx", im->addr, im->addr + im->size); -#endif - - if (im->addr >= 0x100000000ULL) { -#ifdef DEBUG - printf("-H"); -#endif - continue; - } a = round_page(im->addr); - if (im->addr + im->size <= 0xfffff000ULL) - e = trunc_page(im->addr + im->size); - else { + e = trunc_page(im->addr + im->size); #ifdef DEBUG - printf("-T"); + printf(" %llx-%llx", a, e); #endif - e = 0xfffff000; - } - /* skip first eight pages */ if (a < 8 * NBPG) a = 8 * NBPG; @@ -2959,7 +2943,16 @@ init386(paddr_t first_avail) continue; } - if (extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT)) + if (a >= 0x100000000ULL) { +#ifdef DEBUG + printf("-H"); +#endif + if (!cpu_pae) + continue; + } + + if (e <= 0x100000000ULL && + extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT)) /* XXX What should we do? */ printf("\nWARNING: CAN'T ALLOCATE RAM (%x-%x)" " FROM IOMEM EXTENT MAP!\n", a, e); @@ -2968,11 +2961,15 @@ init386(paddr_t first_avail) dumpmem[i].start = atop(a); dumpmem[i].end = atop(e); i++; - avail_end = max(avail_end, e); + avail_end2 = MAX(avail_end2, e); + if (avail_end2 < 0x100000000ULL) + avail_end = avail_end2; } ndumpmem = i; avail_end -= round_page(MSGBUFSIZE); + if (avail_end2 < 0x100000000ULL) + avail_end2 = avail_end; #ifdef DEBUG printf(": %lx\n", avail_end); @@ -3003,30 +3000,34 @@ init386(paddr_t first_avail) e = dumpmem[i].end; if (a < atop(first_avail) && e > atop(first_avail)) a = atop(first_avail); - if (e > atop(avail_end)) + if (a < atop(avail_end) && e > atop(avail_end)) e = atop(avail_end); if (a < e) { if (a < atop(16 * 1024 * 1024)) { lim = MIN(atop(16 * 1024 * 1024), e); #ifdef DEBUG - printf(" %x-%x (<16M)", a, lim); + printf(" %llx-%llx (<16M)", a, lim); #endif uvm_page_physload(a, lim, a, lim, VM_FREELIST_FIRST16); if (e > lim) { #ifdef DEBUG - printf(" %x-%x", lim, e); + printf(" %llx-%llx", lim, e); #endif uvm_page_physload(lim, e, lim, e, VM_FREELIST_DEFAULT); } } else { #ifdef DEBUG - printf(" %x-%x", a, e); + printf(" %llx-%llx", a, e); #endif - uvm_page_physload(a, e, a, e, - VM_FREELIST_DEFAULT); + if (a >= atop(0x100000000ULL)) + uvm_page_physload(a, e, a, a - 1, + VM_FREELIST_ABOVE4G); + else + uvm_page_physload(a, e, a, e, + VM_FREELIST_DEFAULT); } } } @@ -3464,8 +3465,8 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) bus_space_handle_t *bshp; { u_long pa, endpa; + u_int32_t bits; vaddr_t va; - pt_entry_t *pte; bus_size_t map_size; #ifdef MULTIPROCESSOR u_int32_t cpumask = 0; @@ -3497,13 +3498,12 @@ bus_mem_add_mapping(bpa, size, cacheable, bshp) * on those machines. */ if (cpu_class != CPUCLASS_386) { - pte = kvtopte(va); if (cacheable) - *pte &= ~PG_N; + bits = pmap_pte_setbits(va, 0, PG_N); else - *pte |= PG_N; + bits = pmap_pte_setbits(va, PG_N, 0); #ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), va, *pte, + pmap_tlb_shootdown(pmap_kernel(), va, bits, &cpumask); #else pmap_update_pg(va); @@ -3526,7 +3526,7 @@ bus_space_unmap(t, bsh, size) { struct extent *ex; u_long va, endva; - bus_addr_t bpa; + paddr_t bpa; /* * Find the correct extent and bus physical address. @@ -3536,7 +3536,7 @@ bus_space_unmap(t, bsh, size) bpa = bsh; } else if (t == I386_BUS_SPACE_MEM) { ex = iomem_ex; - bpa = (bus_addr_t)ISA_PHYSADDR(bsh); + bpa = (u_long)ISA_PHYSADDR(bsh); if (IOM_BEGIN <= bpa && bpa <= IOM_END) goto ok; @@ -3572,7 +3572,7 @@ _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, bus_addr_t *adrp) { u_long va, endva; - bus_addr_t bpa; + paddr_t bpa; /* * Find the correct bus physical address. @@ -3580,7 +3580,7 @@ _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, if (t == I386_BUS_SPACE_IO) { bpa = bsh; } else if (t == I386_BUS_SPACE_MEM) { - bpa = (bus_addr_t)ISA_PHYSADDR(bsh); + bpa = (u_long)ISA_PHYSADDR(bsh); if (IOM_BEGIN <= bpa && bpa <= IOM_END) goto ok; @@ -3603,9 +3603,8 @@ _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, panic("bus_space_unmap: bad bus space tag"); ok: - if (adrp != NULL) { + if (adrp != NULL) *adrp = bpa; - } } void @@ -3647,6 +3646,7 @@ _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp) struct i386_bus_dmamap *map; void *mapstore; size_t mapsize; + int npages; /* * Allocate and initialize the DMA map. The end of the map @@ -3662,6 +3662,17 @@ _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp) */ mapsize = sizeof(struct i386_bus_dmamap) + (sizeof(bus_dma_segment_t) * (nsegments - 1)); + npages = 0; +#ifndef SMALL_KERNEL + if (avail_end2 > avail_end && + (flags & (BUS_DMA_64BIT|BUS_DMA_24BIT)) == 0) { + /* this many pages plus one in case we get split */ + npages = round_page(size) / PAGE_SIZE + 1; + if (npages < nsegments) /* looks stupid, but possible */ + npages = nsegments; + mapsize += sizeof(struct vm_page *) * npages; + } +#endif /* !SMALL_KERNEL */ if ((mapstore = malloc(mapsize, M_DEVBUF, (flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK)) == NULL) return (ENOMEM); @@ -3672,10 +3683,55 @@ _bus_dmamap_create(t, size, nsegments, maxsegsz, boundary, flags, dmamp) map->_dm_segcnt = nsegments; map->_dm_maxsegsz = maxsegsz; map->_dm_boundary = boundary; + map->_dm_pages = npages? (void *)&map->dm_segs[nsegments] : NULL; + map->_dm_npages = npages; map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT); map->dm_mapsize = 0; /* no valid mappings */ map->dm_nsegs = 0; +#ifndef SMALL_KERNEL + if (npages) { + struct pglist mlist; + vaddr_t va; + int error; + + size = npages << PGSHIFT; + va = uvm_km_valloc(kernel_map, size); + if (va == 0) { + map->_dm_npages = 0; + free(map, M_DEVBUF); + return (ENOMEM); + } + + TAILQ_INIT(&mlist); + /* if not a 64bit map -- allocate some bouncy-bouncy */ + error = uvm_pglistalloc(size, + round_page(ISA_DMA_BOUNCE_THRESHOLD), 0xfffff000, + PAGE_SIZE, boundary, &mlist, nsegments, + (flags & BUS_DMA_NOWAIT) == 0); + if (error) { + map->_dm_npages = 0; + uvm_km_free(kernel_map, (vaddr_t)va, size); + free(map, M_DEVBUF); + return (ENOMEM); + } else { + struct vm_page **pg = map->_dm_pages; + + npages--; + *pg = TAILQ_FIRST(&mlist); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(*pg), + VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED); + for (pg++, va += PAGE_SIZE; npages--; + pg++, va += PAGE_SIZE) { + *pg = TAILQ_NEXT(pg[-1], pageq); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(*pg), + VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED); + } + } + map->_dm_pgva = va; + } +#endif /* !SMALL_KERNEL */ + *dmamp = map; return (0); } @@ -3706,7 +3762,7 @@ _bus_dmamap_load(t, map, buf, buflen, p, flags) struct proc *p; int flags; { - bus_addr_t lastaddr; + paddr_t lastaddr; int seg, error; /* @@ -3887,6 +3943,7 @@ _bus_dmamap_unload(t, map) */ map->dm_mapsize = 0; map->dm_nsegs = 0; + map->_dm_nused = 0; } /* @@ -3894,15 +3951,47 @@ _bus_dmamap_unload(t, map) * by bus-specific DMA map synchronization functions. */ void -_bus_dmamap_sync(t, map, addr, size, op) +_bus_dmamap_sync(t, map, offset, size, op) bus_dma_tag_t t; bus_dmamap_t map; - bus_addr_t addr; + bus_addr_t offset; bus_size_t size; int op; { +#ifndef SMALL_KERNEL + bus_dma_segment_t *sg; + int i, off = offset; + bus_size_t l; + + /* scan the segment list performing necessary copies */ + if (!(map->_dm_flags & BUS_DMA_64BIT) && map->_dm_nused) { + for (i = map->_dm_segcnt, sg = map->dm_segs; + size && i--; sg++) { + if (off >= sg->ds_len) { + off -= sg->ds_len; + continue; + } - /* Nothing to do here. */ + l = sg->ds_len - off; + if (l > size) + l = size; + size -= l; + if (sg->ds_addr2) { + if (op & BUS_DMASYNC_POSTREAD) { + bcopy((void *)(sg->ds_va2 + off), + (void *)(sg->ds_va + off), l); + pae_copy++; + } + if (op & BUS_DMASYNC_PREWRITE) { + bcopy((void *)(sg->ds_va + off), + (void *)(sg->ds_va2 + off), l); + pae_copy++; + } + } + off = 0; + } + } +#endif /* !SMALL_KERNEL */ } /* @@ -4072,8 +4161,8 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) int first; { bus_size_t sgsize; - bus_addr_t curaddr, lastaddr, baddr, bmask; - vaddr_t vaddr = (vaddr_t)buf; + paddr_t curaddr, lastaddr, oaddr, baddr, bmask; + vaddr_t pgva, vaddr = (vaddr_t)buf; int seg; pmap_t pmap; @@ -4089,7 +4178,24 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) /* * Get the physical address for this segment. */ - pmap_extract(pmap, vaddr, (paddr_t *)&curaddr); + pmap_extract(pmap, vaddr, &curaddr); + oaddr = 0; + pgva = 0; +#ifndef SMALL_KERNEL + if (!(map->_dm_flags & BUS_DMA_64BIT) && + curaddr >= 0x100000000ULL) { + struct vm_page *pg; + int page, off; + + if (map->_dm_nused + 1 >= map->_dm_npages) + return (ENOMEM); + off = vaddr & PAGE_MASK; + pg = map->_dm_pages[page = map->_dm_nused++]; + oaddr = curaddr; + curaddr = VM_PAGE_TO_PHYS(pg) + off; + pgva = map->_dm_pgva + (page << PGSHIFT) + off; + } +#endif /* !SMALL_KERNEL */ /* * Compute the segment size, and adjust counts. @@ -4113,7 +4219,10 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) */ if (first) { map->dm_segs[seg].ds_addr = curaddr; + map->dm_segs[seg].ds_addr2 = oaddr; map->dm_segs[seg].ds_len = sgsize; + map->dm_segs[seg].ds_va = vaddr; + map->dm_segs[seg].ds_va2 = pgva; first = 0; } else { if (curaddr == lastaddr && @@ -4127,7 +4236,10 @@ _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, lastaddrp, segp, first) if (++seg >= map->_dm_segcnt) break; map->dm_segs[seg].ds_addr = curaddr; + map->dm_segs[seg].ds_addr2 = oaddr; map->dm_segs[seg].ds_len = sgsize; + map->dm_segs[seg].ds_va = vaddr; + map->dm_segs[seg].ds_va2 = pgva; } } @@ -4170,6 +4282,19 @@ _bus_dmamem_alloc_range(t, size, alignment, boundary, segs, nsegs, rsegs, /* Always round the size. */ size = round_page(size); + if (flags & BUS_DMA_64BIT) { + if (high > 0x100000000ULL && low < 0x100000000ULL) + low = 0x100000000ULL; + } else if (high > 0x100000000ULL) { + if (low >= 0x100000000ULL) { +#ifdef DIAGNOSTIC + printf("_bus_dmamem_alloc_range: " + "32bit request in above 4GB space\n"); +#endif + return (EINVAL); + } else + high = 0x100000000ULL; + } TAILQ_INIT(&mlist); /* @@ -4215,7 +4340,6 @@ _bus_dmamem_alloc_range(t, size, alignment, boundary, segs, nsegs, rsegs, } lastaddr = curaddr; } - *rsegs = curseg + 1; return (0); diff --git a/sys/arch/i386/i386/mpbios.c b/sys/arch/i386/i386/mpbios.c index ad9e667364c..5a8ba2f5829 100644 --- a/sys/arch/i386/i386/mpbios.c +++ b/sys/arch/i386/i386/mpbios.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mpbios.c,v 1.8 2006/04/18 17:42:24 kettenis Exp $ */ +/* $OpenBSD: mpbios.c,v 1.9 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: mpbios.c,v 1.2 2002/10/01 12:56:57 fvdl Exp $ */ /*- @@ -1043,7 +1043,7 @@ mpbios_ioapic(ent, self) aaa.aaa_name = "ioapic"; aaa.apic_id = entry->apic_id; aaa.apic_version = entry->apic_version; - aaa.apic_address = (paddr_t)entry->apic_address; + aaa.apic_address = (u_long)entry->apic_address; aaa.flags = (mp_fps->mpfb2 & 0x80) ? IOAPIC_PICMODE : IOAPIC_VWIRE; config_found_sm(self, &aaa, mp_print, mp_match); diff --git a/sys/arch/i386/i386/mptramp.s b/sys/arch/i386/i386/mptramp.s index 75621628849..38e83191ff2 100644 --- a/sys/arch/i386/i386/mptramp.s +++ b/sys/arch/i386/i386/mptramp.s @@ -1,4 +1,4 @@ -/* $OpenBSD: mptramp.s,v 1.5 2006/03/14 14:44:37 mickey Exp $ */ +/* $OpenBSD: mptramp.s,v 1.6 2006/04/27 15:37:51 mickey Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -165,10 +165,20 @@ _TRMP_LABEL(mp_startup) /* Load base of page directory and enable mapping. */ movl %ecx,%cr3 # load ptd addr into mmu - movl %cr0,%eax # get control word - # enable paging & NPX emulation - orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax - movl %eax,%cr0 # and let's page NOW! +#ifndef SMALL_KERNEL + movl $_C_LABEL(pmap_pte_set_pae),%eax + cmpl RELOC(_C_LABEL(pmap_pte_set_p)),%eax + jne nopae + + movl %cr4,%eax + orl $CR4_PAE,%eax + movl %eax, %cr4 +nopae: +#endif + movl %cr0,%eax # get control word + # enable paging & NPX emulation + orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax + movl %eax,%cr0 # and let's page NOW! #ifdef MPDEBUG leal _C_LABEL(cpu_trace),%edi diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 765b2b9d233..ec49253f621 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.91 2006/03/13 18:42:16 mickey Exp $ */ +/* $OpenBSD: pmap.c,v 1.92 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -86,15 +86,6 @@ #endif /* - * general info: - * - * - for an explanation of how the i386 MMU hardware works see - * the comments in <machine/pte.h>. - * - * - for an explanation of the general memory structure used by - * this pmap (including the recursive mapping), see the comments - * in <machine/pmap.h>. - * * this file contains the code for the "pmap module." the module's * job is to manage the hardware's virtual to physical address mappings. * note that there are two levels of mapping in the VM system: @@ -131,7 +122,181 @@ * if we run out of pv_entry's we allocate a new pv_page and free * its pv_entrys. */ - +/* + * i386 MMU hardware structure: + * + * the i386 MMU is a two-level MMU which maps 4GB of virtual memory. + * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium + * processors can support a 4MB pagesize as well. + * + * the first level table (segment table?) is called a "page directory" + * and it contains 1024 page directory entries (PDEs). each PDE is + * 4 bytes (an int), so a PD fits in a single 4K page. this page is + * the page directory page (PDP). each PDE in a PDP maps 4MB of space + * (1024 * 4MB = 4GB). a PDE contains the physical address of the + * second level table: the page table. or, if 4MB pages are being used, + * then the PDE contains the PA of the 4MB page being mapped. + * + * a page table consists of 1024 page table entries (PTEs). each PTE is + * 4 bytes (an int), so a page table also fits in a single 4K page. a + * 4K page being used as a page table is called a page table page (PTP). + * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains + * the physical address of the page it maps and some flag bits (described + * below). + * + * the processor has a special register, "cr3", which points to the + * the PDP which is currently controlling the mappings of the virtual + * address space. + * + * the following picture shows the translation process for a 4K page: + * + * %cr3 register [PA of PDP] + * | + * | + * | bits <31-22> of VA bits <21-12> of VA bits <11-0> + * | index the PDP (0 - 1023) index the PTP are the page offset + * | | | | + * | v | | + * +--->+----------+ | | + * | PD Page | PA of v | + * | |---PTP-------->+------------+ | + * | 1024 PDE | | page table |--PTE--+ | + * | entries | | (aka PTP) | | | + * +----------+ | 1024 PTE | | | + * | entries | | | + * +------------+ | | + * | | + * bits <31-12> bits <11-0> + * p h y s i c a l a d d r + * + * the i386 caches PTEs in a TLB. it is important to flush out old + * TLB mappings when making a change to a mappings. writing to the + * %cr3 will flush the entire TLB. newer processors also have an + * instruction that will invalidate the mapping of a single page (which + * is useful if you are changing a single mappings because it preserves + * all the cached TLB entries). + * + * as shows, bits 31-12 of the PTE contain PA of the page being mapped. + * the rest of the PTE is defined as follows: + * bit# name use + * 11 n/a available for OS use, hardware ignores it + * 10 n/a available for OS use, hardware ignores it + * 9 n/a available for OS use, hardware ignores it + * 8 G global bit (see discussion below) + * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>) + * 6 D dirty (modified) page + * 5 A accessed (referenced) page + * 4 PCD cache disable + * 3 PWT prevent write through (cache) + * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s) + * 1 R/W read/write bit (0=read only, 1=read-write) + * 0 P present (valid) + * + * notes: + * - on the i386 the R/W bit is ignored if processor is in supervisor + * state (bug!) + * - PS is only supported on newer processors + * - PTEs with the G bit are global in the sense that they are not + * flushed from the TLB when %cr3 is written (to flush, use the + * "flush single page" instruction). this is only supported on + * newer processors. this bit can be used to keep the kernel's + * TLB entries around while context switching. since the kernel + * is mapped into all processes at the same place it does not make + * sense to flush these entries when switching from one process' + * pmap to another. + */ +/* + * A pmap describes a process' 4GB virtual address space. This + * virtual address space can be broken up into 1024 4MB regions which + * are described by PDEs in the PDP. The PDEs are defined as follows: + * + * Ranges are inclusive -> exclusive, just like vm_map_entry start/end. + * The following assumes that KERNBASE is 0xd0000000. + * + * PDE#s VA range Usage + * 0->831 0x0 -> 0xcfc00000 user address space, note that the + * max user address is 0xcfbfe000 + * the final two pages in the last 4MB + * used to be reserved for the UAREA + * but now are no longer used. + * 831 0xcfc00000-> recursive mapping of PDP (used for + * 0xd0000000 linear mapping of PTPs). + * 832->1023 0xd0000000-> kernel address space (constant + * 0xffc00000 across all pmaps/processes). + * 1023 0xffc00000-> "alternate" recursive PDP mapping + * <end> (for other pmaps). + * + * + * Note: A recursive PDP mapping provides a way to map all the PTEs for + * a 4GB address space into a linear chunk of virtual memory. In other + * words, the PTE for page 0 is the first int mapped into the 4MB recursive + * area. The PTE for page 1 is the second int. The very last int in the + * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB + * address). + * + * All pmaps' PDs must have the same values in slots 832->1023 so that + * the kernel is always mapped in every process. These values are loaded + * into the PD at pmap creation time. + * + * At any one time only one pmap can be active on a processor. This is + * the pmap whose PDP is pointed to by processor register %cr3. This pmap + * will have all its PTEs mapped into memory at the recursive mapping + * point (slot #831 as show above). When the pmap code wants to find the + * PTE for a virtual address, all it has to do is the following: + * + * Address of PTE = (831 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t) + * = 0xcfc00000 + (VA / 4096) * 4 + * + * What happens if the pmap layer is asked to perform an operation + * on a pmap that is not the one which is currently active? In that + * case we take the PA of the PDP of non-active pmap and put it in + * slot 1023 of the active pmap. This causes the non-active pmap's + * PTEs to get mapped in the final 4MB of the 4GB address space + * (e.g. starting at 0xffc00000). + * + * The following figure shows the effects of the recursive PDP mapping: + * + * PDP (%cr3) + * +----+ + * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 + * | | + * | | + * | 831| -> points back to PDP (%cr3) mapping VA 0xcfc00000 -> 0xd0000000 + * | 832| -> first kernel PTP (maps 0xd0000000 -> 0xe0400000) + * | | + * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) + * +----+ + * + * Note that the PDE#831 VA (0xcfc00000) is defined as "PTE_BASE". + * Note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE". + * + * Starting at VA 0xcfc00000 the current active PDP (%cr3) acts as a + * PTP: + * + * PTP#831 == PDP(%cr3) => maps VA 0xcfc00000 -> 0xd0000000 + * +----+ + * | 0| -> maps the contents of PTP#0 at VA 0xcfc00000->0xcfc01000 + * | | + * | | + * | 831| -> maps the contents of PTP#831 (the PDP) at VA 0xcff3f000 + * | 832| -> maps the contents of first kernel PTP + * | | + * |1023| + * +----+ + * + * Note that mapping of the PDP at PTP#831's VA (0xcff3f000) is + * defined as "PDP_BASE".... within that mapping there are two + * defines: + * "PDP_PDE" (0xcff3fcfc) is the VA of the PDE in the PDP + * which points back to itself. + * "APDP_PDE" (0xcff3fffc) is the VA of the PDE in the PDP which + * establishes the recursive mapping of the alternate pmap. + * To set the alternate PDP, one just has to put the correct + * PA info in *APDP_PDE. + * + * Note that in the APTE_BASE space, the APDP appears at VA + * "APDP_BASE" (0xfffff000). + */ /* * memory allocation * @@ -182,7 +347,6 @@ * save VA for later call to [a], go to plan 3. * If we fail, we simply let pmap_enter() tell UVM about it. */ - /* * locking * @@ -258,6 +422,79 @@ struct lock pmap_main_lock; #endif +#define PG_FRAME 0xfffff000 /* page frame mask */ +#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ + +/* + * The following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) +#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) +#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) + +#define PDOFSET (NBPD-1) /* mask for non-PD part of VA */ +#define PTES_PER_PTP (NBPD / NBPG) /* # of PTEs in a PTP */ + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * + */ +#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) + +/* + * Mach derived conversion macros + */ +#define i386_round_pdr(x) ((((unsigned)(x)) + PDOFSET) & ~PDOFSET) + +/* + * pdei/ptei: generate index into PDP/PTP from a VA + */ +#define PD_MASK 0xffc00000 /* page directory address bits */ +#define PT_MASK 0x003ff000 /* page table address bits */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * PTP macros: + * A PTP's index is the PD index of the PDE that points to it. + * A PTP's offset is the byte-offset in the PTE space that this PTP is at. + * A PTP's VA is the first VA mapped by that PTP. + * + * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +/* + * Access PD and PT + */ +#define PDE(pm,i) (((pd_entry_t *)(pm)->pm_pdir)[(i)]) + +/* + * here we define the data types for PDEs and PTEs + */ +typedef u_int32_t pd_entry_t; /* PDE */ +typedef u_int32_t pt_entry_t; /* PTE */ + +/* + * Number of PTE's per cache line. 4 byte pte, 32-byte cache line + * Used to avoid false sharing of cache lines. + */ +#define NPTECL 8 + /* * TLB Shootdown: * @@ -277,13 +514,13 @@ struct pmap_tlb_shootdown_job { TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; vaddr_t pj_va; /* virtual address */ pmap_t pj_pmap; /* the pmap which maps the address */ - pt_entry_t pj_pte; /* the PTE bits */ + u_int32_t pj_pte; /* the PTE bits */ struct pmap_tlb_shootdown_job *pj_nextfree; }; struct pmap_tlb_shootdown_q { TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; - int pq_pte; /* aggregate PTE bits */ + u_int32_t pq_pte; /* aggregate PTE bits */ int pq_count; /* number of pending requests */ struct mutex pq_mutex; /* mutex on queue */ int pq_flushg; /* pending flush global */ @@ -305,7 +542,8 @@ struct pmap_tlb_shootdown_job *pj_page, *pj_free; * global data structures */ -struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ +struct pmap kernel_pmap_store /* the kernel's pmap (proc0) */ + __attribute__((aligned(32))); /* * nkpde is the number of kernel PTPs allocated for the kernel at @@ -315,6 +553,7 @@ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ */ int nkpde = NKPTP; +int nkptp_max = 1024 - (KERNBASE/NBPD) - 1; /* largest value (-1 for APTP space) */ #ifdef NKPDE #error "obsolete NKPDE: use NKPTP" #endif @@ -341,8 +580,8 @@ paddr_t hole_end; /* PA of end of "hole" */ * other data structures */ -static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ -static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ +u_int32_t protection_codes[8]; /* maps MI prot to i386 prot code */ +boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ /* * the following two vaddr_t's are used during system startup @@ -351,9 +590,10 @@ static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ * VM space is turned over to the kernel_map vm_map. */ -static vaddr_t virtual_avail; /* VA of first free KVA */ -static vaddr_t virtual_end; /* VA of last free KVA */ +vaddr_t virtual_avail; /* VA of first free KVA */ +vaddr_t virtual_end; /* VA of last free KVA */ +vaddr_t vm_max_address = (PDSLOT_PTE << PDSHIFT) + (PDSLOT_PTE << PGSHIFT); /* * pv_page management structures: locked by pvalloc_lock @@ -374,8 +614,8 @@ static vaddr_t pv_cachedva; /* cached VA for later use */ * linked list of all non-kernel pmaps */ -static struct pmap_head pmaps; -static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ +struct pmap_head pmaps; +struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ /* * pool that pmap structures are allocated from @@ -402,7 +642,7 @@ struct pool pmap_pmap_pool; */ static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; -static caddr_t csrcp, cdstp, zerop, ptpp; +caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp; caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ #ifdef __NetBSD__ @@ -424,46 +664,27 @@ extern vaddr_t pentium_idt_vaddr; */ struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); -struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t); -struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ #define ALLOCPV_NEED 0 /* need PV now */ #define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ #define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); -void pmap_enter_pv(struct pv_head *, - struct pv_entry *, struct pmap *, - vaddr_t, struct vm_page *); -void pmap_free_pv(struct pmap *, struct pv_entry *); -void pmap_free_pvs(struct pmap *, struct pv_entry *); -void pmap_free_pv_doit(struct pv_entry *); -void pmap_free_pvpage(void); -struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t); -boolean_t pmap_is_curpmap(struct pmap *); -boolean_t pmap_is_active(struct pmap *, int); -pt_entry_t *pmap_map_ptes(struct pmap *); -struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, - vaddr_t); -boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, - vaddr_t, int32_t *); -void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, - vaddr_t, vaddr_t, int32_t *); -struct vm_page *pmap_steal_ptp(struct uvm_object *, vaddr_t); -vaddr_t pmap_tmpmap_pa(paddr_t); -pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *); -void pmap_tmpunmap_pa(void); -void pmap_tmpunmap_pvepte(struct pv_entry *); -void pmap_apte_flush(struct pmap *); -boolean_t pmap_try_steal_pv(struct pv_head *, - struct pv_entry *, - struct pv_entry *); -void pmap_unmap_ptes(struct pmap *); -void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t, - pt_entry_t); - -void pmap_pinit(pmap_t); -void pmap_release(pmap_t); - -void pmap_zero_phys(paddr_t); +struct vm_page *pmap_alloc_ptp_86(struct pmap *, int, boolean_t); +struct vm_page *pmap_get_ptp_86(struct pmap *, int, boolean_t); +struct vm_page *pmap_steal_ptp_86(struct uvm_object *, vaddr_t); +pt_entry_t *pmap_map_ptes_86(struct pmap *); +void pmap_unmap_ptes_86(struct pmap *); +boolean_t pmap_remove_pte_86(struct pmap *, struct vm_page *, + pt_entry_t *, vaddr_t, int32_t *); +void pmap_remove_ptes_86(struct pmap *, struct vm_page *, vaddr_t, + vaddr_t, vaddr_t, int32_t *); +vaddr_t pmap_tmpmap_pa_86(paddr_t); +pt_entry_t *pmap_tmpmap_pvepte_86(struct pv_entry *); +void pmap_tmpunmap_pa_86(void); +void pmap_tmpunmap_pvepte_86(struct pv_entry *); +boolean_t pmap_try_steal_pv_86(struct pv_head *, + struct pv_entry *, struct pv_entry *); + +void pmap_release(pmap_t); void setcslimit(struct pmap *, struct trapframe *, struct pcb *, vaddr_t); @@ -503,14 +724,13 @@ pmap_is_active(pmap, cpu_id) */ vaddr_t -pmap_tmpmap_pa(pa) - paddr_t pa; +pmap_tmpmap_pa_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); #if defined(DIAGNOSTIC) if (*ptpte) panic("pmap_tmpmap_pa: ptp_pte in use?"); @@ -524,13 +744,13 @@ pmap_tmpmap_pa(pa) */ void -pmap_tmpunmap_pa() +pmap_tmpunmap_pa_86() { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); #if defined(DIAGNOSTIC) if (!pmap_valid_entry(*ptpte)) panic("pmap_tmpunmap_pa: our pte invalid?"); @@ -551,8 +771,7 @@ pmap_tmpunmap_pa() */ pt_entry_t * -pmap_tmpmap_pvepte(pve) - struct pv_entry *pve; +pmap_tmpmap_pvepte_86(struct pv_entry *pve) { #ifdef DIAGNOSTIC if (pve->pv_pmap == pmap_kernel()) @@ -563,7 +782,7 @@ pmap_tmpmap_pvepte(pve) if (pmap_is_curpmap(pve->pv_pmap)) return(vtopte(pve->pv_va)); - return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp))) + return(((pt_entry_t *)pmap_tmpmap_pa_86(VM_PAGE_TO_PHYS(pve->pv_ptp))) + ptei((unsigned)pve->pv_va)); } @@ -572,14 +791,13 @@ pmap_tmpmap_pvepte(pve) */ void -pmap_tmpunmap_pvepte(pve) - struct pv_entry *pve; +pmap_tmpunmap_pvepte_86(struct pv_entry *pve) { /* was it current pmap? if so, return */ if (pmap_is_curpmap(pve->pv_pmap)) return; - pmap_tmpunmap_pa(); + pmap_tmpunmap_pa_86(); } void @@ -622,8 +840,7 @@ pmap_apte_flush(struct pmap *pmap) */ pt_entry_t * -pmap_map_ptes(pmap) - struct pmap *pmap; +pmap_map_ptes_86(struct pmap *pmap) { pd_entry_t opde; @@ -662,12 +879,11 @@ pmap_map_ptes(pmap) */ void -pmap_unmap_ptes(pmap) - struct pmap *pmap; +pmap_unmap_ptes_86(struct pmap *pmap) { - if (pmap == pmap_kernel()) { + if (pmap == pmap_kernel()) return; - } + if (pmap_is_curpmap(pmap)) { simple_unlock(&pmap->pm_obj.vmobjlock); } else { @@ -682,7 +898,7 @@ pmap_unmap_ptes(pmap) void pmap_exec_account(struct pmap *pm, vaddr_t va, - pt_entry_t opte, pt_entry_t npte) + u_int32_t opte, u_int32_t npte) { if (curproc == NULL || curproc->p_vmspace == NULL || pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) @@ -756,6 +972,48 @@ pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) return (1); } +u_int32_t +pmap_pte_set_86(vaddr_t va, paddr_t pa, u_int32_t bits) +{ + pt_entry_t pte, *ptep = vtopte(va); + + pte = i386_atomic_testset_ul(ptep, pa | bits); /* zap! */ + return (pte & ~PG_FRAME); +} + +u_int32_t +pmap_pte_setbits_86(vaddr_t va, u_int32_t set, u_int32_t clr) +{ + pt_entry_t *ptep = vtopte(va); + pt_entry_t pte = *ptep; + + *ptep = (pte | set) & ~clr; + return (pte & ~PG_FRAME); + +} + +u_int32_t +pmap_pte_bits_86(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & ~PG_FRAME); +} + +paddr_t +pmap_pte_paddr_86(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & PG_FRAME); +} + +paddr_t +vtophys(vaddr_t va) +{ + return ((*vtopte(va) & PG_FRAME) | (va & ~PG_FRAME)); +} + void setcslimit(struct pmap *pm, struct trapframe *tf, struct pcb *pcb, vaddr_t limit) @@ -813,16 +1071,15 @@ pmap_kenter_pa(va, pa, prot) paddr_t pa; vm_prot_t prot; { - pt_entry_t *pte, opte, npte; + u_int32_t bits; - pte = vtopte(va); - npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V | pmap_pg_g; - opte = i386_atomic_testset_ul(pte, npte); /* zap! */ - if (pmap_valid_entry(opte)) { + bits = pmap_pte_set(va, pa, ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | + PG_V | pmap_pg_g); + if (pmap_valid_entry(bits)) { #ifdef MULTIPROCESSOR int32_t cpumask = 0; - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); + pmap_tlb_shootdown(pmap_kernel(), va, bits, &cpumask); pmap_tlb_shootnow(cpumask); #else /* Don't bother deferring in the single CPU case. */ @@ -845,25 +1102,21 @@ pmap_kremove(va, len) vaddr_t va; vsize_t len; { - pt_entry_t *pte, opte; + u_int32_t bits; #ifdef MULTIPROCESSOR int32_t cpumask = 0; #endif len >>= PAGE_SHIFT; for ( /* null */ ; len ; len--, va += PAGE_SIZE) { - if (va < VM_MIN_KERNEL_ADDRESS) - pte = vtopte(va); - else - pte = kvtopte(va); - opte = i386_atomic_testset_ul(pte, 0); /* zap! */ + bits = pmap_pte_set(va, 0, 0); #ifdef DIAGNOSTIC - if (opte & PG_PVLIST) + if (bits & PG_PVLIST) panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va); #endif - if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) + if ((bits & (PG_V | PG_U)) == (PG_V | PG_U)) #ifdef MULTIPROCESSOR - pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask); + pmap_tlb_shootdown(pmap_kernel(), va, bits, &cpumask); #else pmap_update_pg(va); #endif @@ -964,8 +1217,8 @@ pmap_bootstrap(kva_start) kpm->pm_obj.uo_npages = 0; kpm->pm_obj.uo_refs = 1; bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ - kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); - kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3; + kpm->pm_pdir = (vaddr_t)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); + kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = atop(kva_start - VM_MIN_KERNEL_ADDRESS); @@ -1008,27 +1261,27 @@ pmap_bootstrap(kva_start) * as well; we could waste less space if we knew the largest * CPU ID beforehand. */ - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; + pmap_csrcp = (caddr_t) virtual_avail; csrc_pte = pte; - cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; + pmap_cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; - zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; + pmap_zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; - ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; + pmap_ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; virtual_avail += PAGE_SIZE * I386_MAXPROCS * NPTECL; pte += I386_MAXPROCS * NPTECL; #else - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ + pmap_csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ virtual_avail += PAGE_SIZE; pte++; /* advance */ - cdstp = (caddr_t) virtual_avail; cdst_pte = pte; + pmap_cdstp = (caddr_t) virtual_avail; cdst_pte = pte; virtual_avail += PAGE_SIZE; pte++; - zerop = (caddr_t) virtual_avail; zero_pte = pte; + pmap_zerop = (caddr_t) virtual_avail; zero_pte = pte; virtual_avail += PAGE_SIZE; pte++; - ptpp = (caddr_t) virtual_avail; ptp_pte = pte; + pmap_ptpp = (caddr_t) virtual_avail; ptp_pte = pte; virtual_avail += PAGE_SIZE; pte++; #endif @@ -1085,7 +1338,7 @@ pmap_bootstrap(kva_start) * initialize the pmap pool. */ - pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", + pool_init(&pmap_pmap_pool, sizeof(struct pmap), 32, 0, 0, "pmappl", &pool_allocator_nointr); /* @@ -1135,6 +1388,14 @@ pmap_bootstrap(kva_start) } #endif +#if defined(MULTIPROCESSOR) + /* install the page after boot args as PT page for first 4M */ + pmap_enter(pmap_kernel(), (u_long)vtopte(0), + round_page((vaddr_t)(bootargv + bootargc)), + VM_PROT_READ|VM_PROT_WRITE, VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED); + memset(vtopte(0), 0, NBPG); /* make sure it is clean before using */ +#endif + /* * ensure the TLB is sync'd with reality by flushing it... */ @@ -1442,9 +1703,8 @@ steal_one: */ boolean_t -pmap_try_steal_pv(pvh, cpv, prevpv) - struct pv_head *pvh; - struct pv_entry *cpv, *prevpv; +pmap_try_steal_pv_86(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv) { pt_entry_t *ptep, opte; #ifdef MULTIPROCESSOR @@ -1464,7 +1724,7 @@ pmap_try_steal_pv(pvh, cpv, prevpv) * mapping from the pmap. */ - ptep = pmap_tmpmap_pvepte(cpv); + ptep = pmap_tmpmap_pvepte_86(cpv); if (*ptep & PG_W) { ptep = NULL; /* wired page, avoid stealing this one */ } else { @@ -1477,7 +1737,7 @@ pmap_try_steal_pv(pvh, cpv, prevpv) if (pmap_is_curpmap(cpv->pv_pmap)) pmap_update_pg(cpv->pv_va); #endif - pmap_tmpunmap_pvepte(cpv); + pmap_tmpunmap_pvepte_86(cpv); } if (ptep == NULL) { simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); @@ -1772,7 +2032,7 @@ pmap_remove_pv(pvh, pmap, va) */ struct vm_page * -pmap_alloc_ptp(pmap, pde_index, just_try) +pmap_alloc_ptp_86(pmap, pde_index, just_try) struct pmap *pmap; int pde_index; boolean_t just_try; @@ -1784,7 +2044,7 @@ pmap_alloc_ptp(pmap, pde_index, just_try) if (ptp == NULL) { if (just_try) return(NULL); - ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index)); + ptp = pmap_steal_ptp_86(&pmap->pm_obj, ptp_i2o(pde_index)); if (ptp == NULL) { return (NULL); } @@ -1795,8 +2055,8 @@ pmap_alloc_ptp(pmap, pde_index, just_try) /* got one! */ ptp->flags &= ~PG_BUSY; /* never busy */ ptp->wire_count = 1; /* no mappings yet */ - pmap->pm_pdir[pde_index] = - (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + PDE(pmap, pde_index) = + (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; return(ptp); @@ -1814,9 +2074,7 @@ pmap_alloc_ptp(pmap, pde_index, just_try) */ struct vm_page * -pmap_steal_ptp(obj, offset) - struct uvm_object *obj; - vaddr_t offset; +pmap_steal_ptp_86(struct uvm_object *obj, vaddr_t offset) { struct vm_page *ptp = NULL; struct pmap *firstpmap; @@ -1850,21 +2108,21 @@ pmap_steal_ptp(obj, offset) idx = ptp_o2i(ptp->offset); #ifdef DIAGNOSTIC if (VM_PAGE_TO_PHYS(ptp) != - (pmaps_hand->pm_pdir[idx] & PG_FRAME)) + (PDE(pmaps_hand, idx) & PG_FRAME)) panic("pmap_steal_ptp: PTP mismatch!"); #endif ptes = (pt_entry_t *) - pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp)); + pmap_tmpmap_pa_86(VM_PAGE_TO_PHYS(ptp)); for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) if ((ptes[lcv] & (PG_V|PG_W)) == (PG_V|PG_W)) break; if (lcv == PTES_PER_PTP) - pmap_remove_ptes(pmaps_hand, ptp, + pmap_remove_ptes_86(pmaps_hand, ptp, (vaddr_t)ptes, ptp_i2v(idx), ptp_i2v(idx+1), &cpumask); - pmap_tmpunmap_pa(); + pmap_tmpunmap_pa_86(); if (lcv != PTES_PER_PTP) /* wired, try next PTP */ @@ -1874,7 +2132,7 @@ pmap_steal_ptp(obj, offset) * got it!!! */ - pmaps_hand->pm_pdir[idx] = 0; /* zap! */ + PDE(pmaps_hand, idx) = 0; /* zap! */ pmaps_hand->pm_stats.resident_count--; #ifdef MULTIPROCESSOR pmap_apte_flush(pmaps_hand); @@ -1918,18 +2176,15 @@ pmap_steal_ptp(obj, offset) */ struct vm_page * -pmap_get_ptp(pmap, pde_index, just_try) - struct pmap *pmap; - int pde_index; - boolean_t just_try; +pmap_get_ptp_86(struct pmap *pmap, int pde_index, boolean_t just_try) { struct vm_page *ptp; - if (pmap_valid_entry(pmap->pm_pdir[pde_index])) { + if (pmap_valid_entry(PDE(pmap, pde_index))) { /* valid... check hint (saves us a PA->PG lookup) */ if (pmap->pm_ptphint && - (pmap->pm_pdir[pde_index] & PG_FRAME) == + (PDE(pmap, pde_index) & PG_FRAME) == VM_PAGE_TO_PHYS(pmap->pm_ptphint)) return(pmap->pm_ptphint); @@ -1943,7 +2198,7 @@ pmap_get_ptp(pmap, pde_index, just_try) } /* allocate a new PTP (updates ptphint) */ - return(pmap_alloc_ptp(pmap, pde_index, just_try)); + return(pmap_alloc_ptp_86(pmap, pde_index, just_try)); } /* @@ -1963,18 +2218,7 @@ pmap_create() struct pmap *pmap; pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); - pmap_pinit(pmap); - return(pmap); -} -/* - * pmap_pinit: given a zero'd pmap structure, init it. - */ - -void -pmap_pinit(pmap) - struct pmap *pmap; -{ /* init uvm_object */ simple_lock_init(&pmap->pm_obj.vmobjlock); pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ @@ -1987,26 +2231,37 @@ pmap_pinit(pmap) pmap->pm_hiexec = 0; pmap->pm_flags = 0; + /* init the LDT */ + pmap->pm_ldt = NULL; + pmap->pm_ldt_len = 0; + pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1); + pmap_pinit_pd(pmap); + return(pmap); +} + +/* + * pmap_pinit: given a zero'd pmap structure, init it. + */ + +void +pmap_pinit_pd_86(struct pmap *pmap) +{ /* allocate PDP */ - pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG); + pmap->pm_pdir = uvm_km_alloc(kernel_map, NBPG); if (pmap->pm_pdir == NULL) panic("pmap_pinit: kernel_map out of virtual space!"); - (void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, - (paddr_t *)&pmap->pm_pdirpa); + pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, &pmap->pm_pdirpa); + pmap->pm_pdirsize = NBPG; /* init PDP */ /* zero init area */ - bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); /* put in recursive PDE to map the PTEs */ - pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW; - - /* init the LDT */ - pmap->pm_ldt = NULL; - pmap->pm_ldt_len = 0; - pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + PDE(pmap, PDSLOT_PTE) = pmap->pm_pdirpa | PG_V | PG_KW; /* * we need to lock pmaps_lock to prevent nkpde from changing on @@ -2016,10 +2271,10 @@ pmap_pinit(pmap) */ simple_lock(&pmaps_lock); /* put in kernel VM PDEs */ - bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN], + bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN), nkpde * sizeof(pd_entry_t)); /* zero the rest */ - bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde], + bzero(&PDE(pmap, PDSLOT_KERN + nkpde), NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); LIST_INSERT_HEAD(&pmaps, pmap, pm_list); simple_unlock(&pmaps_lock); @@ -2043,9 +2298,8 @@ pmap_destroy(pmap) simple_lock(&pmap->pm_obj.vmobjlock); refs = --pmap->pm_obj.uo_refs; simple_unlock(&pmap->pm_obj.vmobjlock); - if (refs > 0) { + if (refs > 0) return; - } /* * reference count is zero, free pmap resources and then free pmap. @@ -2100,7 +2354,7 @@ pmap_release(pmap) * MULTIPROCESSOR -- no need to flush out of other processors' * APTE space because we do that in pmap_unmap_ptes(). */ - uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG); + uvm_km_free(kernel_map, pmap->pm_pdir, pmap->pm_pdirsize); #ifdef USER_LDT if (pmap->pm_flags & PMF_USER_LDT) { @@ -2286,18 +2540,15 @@ pmap_deactivate(p) */ boolean_t -pmap_extract(pmap, va, pap) - struct pmap *pmap; - vaddr_t va; - paddr_t *pap; +pmap_extract_86(struct pmap *pmap, vaddr_t va, paddr_t *pap) { paddr_t retval; pt_entry_t *ptes; - if (pmap->pm_pdir[pdei(va)]) { - ptes = pmap_map_ptes(pmap); + if (PDE(pmap, pdei(va))) { + ptes = pmap_map_ptes_86(pmap); retval = (paddr_t)(ptes[atop(va)] & PG_FRAME); - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); if (pap != NULL) *pap = retval | (va & ~PG_FRAME); return (TRUE); @@ -2335,13 +2586,13 @@ pmap_zero_page(struct vm_page *pg) * initialized. */ void -pmap_zero_phys(paddr_t pa) +pmap_zero_phys_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *zpte = PTESLEW(zero_pte, id); - caddr_t zerova = VASLEW(zerop, id); + caddr_t zerova = VASLEW(pmap_zerop, id); #ifdef DIAGNOSTIC if (*zpte) @@ -2359,14 +2610,13 @@ pmap_zero_phys(paddr_t pa) */ boolean_t -pmap_zero_page_uncached(pa) - paddr_t pa; +pmap_zero_page_uncached_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *zpte = PTESLEW(zero_pte, id); - caddr_t zerova = VASLEW(zerop, id); + caddr_t zerova = VASLEW(pmap_zerop, id); #ifdef DIAGNOSTIC if (*zpte) @@ -2387,7 +2637,7 @@ pmap_zero_page_uncached(pa) */ void -pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) +pmap_copy_page_86(struct vm_page *srcpg, struct vm_page *dstpg) { paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); @@ -2396,8 +2646,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) #endif pt_entry_t *spte = PTESLEW(csrc_pte,id); pt_entry_t *dpte = PTESLEW(cdst_pte,id); - caddr_t csrcva = VASLEW(csrcp, id); - caddr_t cdstva = VASLEW(cdstp, id); + caddr_t csrcva = VASLEW(pmap_csrcp, id); + caddr_t cdstva = VASLEW(pmap_cdstp, id); #ifdef DIAGNOSTIC if (*spte || *dpte) @@ -2431,12 +2681,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) */ void -pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) - struct pmap *pmap; - struct vm_page *ptp; - vaddr_t ptpva; - vaddr_t startva, endva; - int32_t *cpumaskp; +pmap_remove_ptes_86(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, + vaddr_t startva, vaddr_t endva, int32_t *cpumaskp) { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ struct pv_entry *pve; @@ -2494,7 +2740,7 @@ pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) #ifdef DIAGNOSTIC if (bank == -1) panic("pmap_remove_ptes: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", + "PG_PVLIST, va = 0x%lx, pa = 0x%llx", startva, (u_long)(opte & PG_FRAME)); #endif @@ -2528,7 +2774,7 @@ pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp) */ boolean_t -pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) +pmap_remove_pte_86(pmap, ptp, pte, va, cpumaskp) struct pmap *pmap; struct vm_page *ptp; pt_entry_t *pte; @@ -2579,7 +2825,7 @@ pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) #ifdef DIAGNOSTIC if (bank == -1) panic("pmap_remove_pte: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, + "PG_PVLIST, va = 0x%lx, pa = 0x%llx", va, (u_long)(opte & PG_FRAME)); #endif @@ -2601,9 +2847,7 @@ pmap_remove_pte(pmap, ptp, pte, va, cpumaskp) */ void -pmap_remove(pmap, sva, eva) - struct pmap *pmap; - vaddr_t sva, eva; +pmap_remove_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva) { pt_entry_t *ptes, opte; boolean_t result; @@ -2617,7 +2861,7 @@ pmap_remove(pmap, sva, eva) */ PMAP_MAP_TO_HEAD_LOCK(); - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* * removing one page? take shortcut function. @@ -2625,10 +2869,10 @@ pmap_remove(pmap, sva, eva) if (sva + PAGE_SIZE == eva) { - if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) { + if (pmap_valid_entry(PDE(pmap, pdei(sva)))) { /* PA of the PTP */ - ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME; + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; /* get PTP if non-kernel mapping */ @@ -2651,7 +2895,7 @@ pmap_remove(pmap, sva, eva) } /* do it! */ - result = pmap_remove_pte(pmap, ptp, + result = pmap_remove_pte_86(pmap, ptp, &ptes[atop(sva)], sva, &cpumask); /* @@ -2662,7 +2906,7 @@ pmap_remove(pmap, sva, eva) if (result && ptp && ptp->wire_count <= 1) { /* zap! */ opte = i386_atomic_testset_ul( - &pmap->pm_pdir[pdei(sva)], 0); + &PDE(pmap, pdei(sva)), 0); #ifdef MULTIPROCESSOR /* * XXXthorpej Redundant shootdown can happen @@ -2693,7 +2937,7 @@ pmap_remove(pmap, sva, eva) } } pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); /* unlock pmap */ + pmap_unmap_ptes_86(pmap); /* unlock pmap */ PMAP_MAP_TO_HEAD_UNLOCK(); return; } @@ -2723,12 +2967,12 @@ pmap_remove(pmap, sva, eva) /* XXXCDC: ugly hack to avoid freeing PDP here */ continue; - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) /* valid block? */ continue; /* PA of the PTP */ - ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME); + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; /* get PTP if non-kernel mapping */ if (pmap == pmap_kernel()) { @@ -2747,14 +2991,14 @@ pmap_remove(pmap, sva, eva) #endif } } - pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(sva)], + pmap_remove_ptes_86(pmap, ptp, (vaddr_t)&ptes[atop(sva)], sva, blkendva, &cpumask); /* if PTP is no longer being used, free it! */ if (ptp && ptp->wire_count <= 1) { /* zap! */ opte = i386_atomic_testset_ul( - &pmap->pm_pdir[pdei(sva)], 0); + &PDE(pmap, pdei(sva)), 0); #if defined(MULTIPROCESSOR) /* * XXXthorpej Redundant shootdown can happen here @@ -2783,7 +3027,7 @@ pmap_remove(pmap, sva, eva) } pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); } @@ -2795,8 +3039,7 @@ pmap_remove(pmap, sva, eva) */ void -pmap_page_remove(pg) - struct vm_page *pg; +pmap_page_remove_86(struct vm_page *pg) { int bank, off; struct pv_head *pvh; @@ -2823,20 +3066,19 @@ pmap_page_remove(pg) simple_lock(&pvh->pvh_lock); for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pve->pv_pmap); /* locks pmap */ #ifdef DIAGNOSTIC - if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) { + if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) printf("pmap_page_remove: found pager VA on pv_list\n"); - } - if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & - PG_FRAME) - != VM_PAGE_TO_PHYS(pve->pv_ptp)) { + if (pve->pv_ptp && (PDE(pve->pv_pmap, + pdei(pve->pv_va)) & PG_FRAME) != + VM_PAGE_TO_PHYS(pve->pv_ptp)) { printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", pg, pve->pv_va, pve->pv_ptp); printf("pmap_page_remove: PTP's phys addr: " "actual=%x, recorded=%lx\n", - (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & + (PDE(pve->pv_pmap, pdei(pve->pv_va)) & PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); panic("pmap_page_remove: mapped managed page has " "invalid pv_ptp field"); @@ -2872,8 +3114,7 @@ pmap_page_remove(pg) /* zap! */ opte = i386_atomic_testset_ul( - &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)], - 0); + &PDE(pve->pv_pmap, pdei(pve->pv_va)), 0); pmap_tlb_shootdown(curpcb->pcb_pmap, ((vaddr_t)ptes) + pve->pv_ptp->offset, opte, &cpumask); @@ -2895,7 +3136,7 @@ pmap_page_remove(pg) uvm_pagefree(pve->pv_ptp); } } - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pve->pv_pmap); /* unlocks pmap */ } pmap_free_pvs(NULL, pvh->pvh_list); pvh->pvh_list = NULL; @@ -2918,9 +3159,7 @@ pmap_page_remove(pg) */ boolean_t -pmap_test_attrs(pg, testbits) - struct vm_page *pg; - int testbits; +pmap_test_attrs_86(struct vm_page *pg, int testbits) { int bank, off; char *myattrs; @@ -2957,9 +3196,9 @@ pmap_test_attrs(pg, testbits) for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); + ptes = pmap_map_ptes_86(pve->pv_pmap); pte = ptes[atop(pve->pv_va)]; - pmap_unmap_ptes(pve->pv_pmap); + pmap_unmap_ptes_86(pve->pv_pmap); *myattrs |= pte; } @@ -2981,9 +3220,7 @@ pmap_test_attrs(pg, testbits) */ boolean_t -pmap_change_attrs(pg, setbits, clearbits) - struct vm_page *pg; - int setbits, clearbits; +pmap_change_attrs_86(struct vm_page *pg, int setbits, int clearbits) { u_int32_t result; int bank, off; @@ -3011,12 +3248,12 @@ pmap_change_attrs(pg, setbits, clearbits) for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) { #ifdef DIAGNOSTIC - if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])) + if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va)))) panic("pmap_change_attrs: mapping without PTP " "detected"); #endif - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pve->pv_pmap); /* locks pmap */ npte = ptes[atop(pve->pv_va)]; result |= (npte & clearbits); npte = (npte | setbits) & ~clearbits; @@ -3026,7 +3263,7 @@ pmap_change_attrs(pg, setbits, clearbits) pmap_tlb_shootdown(pve->pv_pmap, atop(pve->pv_va), opte, &cpumask); } - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pve->pv_pmap); /* unlocks pmap */ } simple_unlock(&pvh->pvh_lock); @@ -3062,17 +3299,15 @@ pmap_change_attrs(pg, setbits, clearbits) */ void -pmap_write_protect(pmap, sva, eva, prot) - struct pmap *pmap; - vaddr_t sva, eva; - vm_prot_t prot; +pmap_write_protect_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva, + vm_prot_t prot) { pt_entry_t *ptes, *spte, *epte, npte; vaddr_t blockend; u_int32_t md_prot; int32_t cpumask = 0; - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* should be ok, but just in case ... */ sva &= PG_FRAME; @@ -3098,7 +3333,7 @@ pmap_write_protect(pmap, sva, eva, prot) continue; /* empty block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) continue; md_prot = protection_codes[prot]; @@ -3127,7 +3362,7 @@ pmap_write_protect(pmap, sva, eva, prot) } pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes(pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pmap); /* unlocks pmap */ } /* @@ -3141,14 +3376,12 @@ pmap_write_protect(pmap, sva, eva, prot) */ void -pmap_unwire(pmap, va) - struct pmap *pmap; - vaddr_t va; +pmap_unwire_86(struct pmap *pmap, vaddr_t va) { pt_entry_t *ptes; - if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { - ptes = pmap_map_ptes(pmap); /* locks pmap */ + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ #ifdef DIAGNOSTIC if (!pmap_valid_entry(ptes[atop(va)])) @@ -3164,7 +3397,7 @@ pmap_unwire(pmap, va) "didn't change!\n", pmap, va); } #endif - pmap_unmap_ptes(pmap); /* unlocks map */ + pmap_unmap_ptes_86(pmap); /* unlocks map */ } #ifdef DIAGNOSTIC else { @@ -3211,12 +3444,8 @@ pmap_collect(pmap) */ int -pmap_enter(pmap, va, pa, prot, flags) - struct pmap *pmap; - vaddr_t va; - paddr_t pa; - vm_prot_t prot; - int flags; +pmap_enter_86(struct pmap *pmap, vaddr_t va, paddr_t pa, + vm_prot_t prot, int flags) { pt_entry_t *ptes, opte, npte; struct vm_page *ptp; @@ -3235,7 +3464,7 @@ pmap_enter(pmap, va, pa, prot, flags) /* sanity check: kernel PTPs should already have been pre-allocated */ if (va >= VM_MIN_KERNEL_ADDRESS && - !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + !pmap_valid_entry(PDE(pmap, pdei(va)))) panic("pmap_enter: missing kernel PTP!"); #endif @@ -3246,11 +3475,11 @@ pmap_enter(pmap, va, pa, prot, flags) * map in ptes and get a pointer to our PTP (unless we are the kernel) */ - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ if (pmap == pmap_kernel()) { ptp = NULL; } else { - ptp = pmap_get_ptp(pmap, pdei(va), FALSE); + ptp = pmap_get_ptp_86(pmap, pdei(va), FALSE); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { error = ENOMEM; @@ -3292,7 +3521,7 @@ pmap_enter(pmap, va, pa, prot, flags) if (bank == -1) panic("pmap_enter: same pa PG_PVLIST " "mapping with unmanaged page " - "pa = 0x%lx (0x%lx)", pa, + "pa = 0x%llx (0x%lx)", pa, atop(pa)); #endif pvh = &vm_physmem[bank].pmseg.pvhead[off]; @@ -3320,7 +3549,7 @@ pmap_enter(pmap, va, pa, prot, flags) if (bank == -1) panic("pmap_enter: PG_PVLIST mapping with " "unmanaged page " - "pa = 0x%lx (0x%lx)", pa, atop(pa)); + "pa = 0x%llx (0x%lx)", pa, atop(pa)); #endif pvh = &vm_physmem[bank].pmseg.pvhead[off]; simple_lock(&pvh->pvh_lock); @@ -3406,7 +3635,7 @@ enter_now: error = 0; out: - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); return error; @@ -3420,8 +3649,7 @@ out: */ vaddr_t -pmap_growkernel(maxkvaddr) - vaddr_t maxkvaddr; +pmap_growkernel_86(vaddr_t maxkvaddr) { struct pmap *kpm = pmap_kernel(), *pm; int needed_kpde; /* needed number of kernel PTPs */ @@ -3452,10 +3680,9 @@ pmap_growkernel(maxkvaddr) if (uvm_page_physget(&ptaddr) == FALSE) panic("pmap_growkernel: out of memory"); - pmap_zero_phys(ptaddr); + pmap_zero_phys_86(ptaddr); - kpm->pm_pdir[PDSLOT_KERN + nkpde] = - ptaddr | PG_RW | PG_V; + PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V; /* count PTP as resident */ kpm->pm_stats.resident_count++; @@ -3468,18 +3695,18 @@ pmap_growkernel(maxkvaddr) * INVOKED WHILE pmap_init() IS RUNNING! */ - if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) { + if (pmap_alloc_ptp_86(kpm, PDSLOT_KERN+nkpde, FALSE) == NULL) { panic("pmap_growkernel: alloc ptp failed"); } /* PG_u not for kernel */ - kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u; + PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u; /* distribute new kernel PTP to all active pmaps */ simple_lock(&pmaps_lock); LIST_FOREACH(pm, &pmaps, pm_list) { - pm->pm_pdir[PDSLOT_KERN + nkpde] = - kpm->pm_pdir[PDSLOT_KERN + nkpde]; + PDE(pm, PDSLOT_KERN + nkpde) = + PDE(kpm, PDSLOT_KERN + nkpde); } simple_unlock(&pmaps_lock); } @@ -3492,7 +3719,7 @@ out: } #ifdef DEBUG -void pmap_dump(struct pmap *, vaddr_t, vaddr_t); +void pmap_dump_86(struct pmap *, vaddr_t, vaddr_t); /* * pmap_dump: dump all the mappings from a pmap @@ -3501,7 +3728,7 @@ void pmap_dump(struct pmap *, vaddr_t, vaddr_t); */ void -pmap_dump(pmap, sva, eva) +pmap_dump_86(pmap, sva, eva) struct pmap *pmap; vaddr_t sva, eva; { @@ -3521,7 +3748,7 @@ pmap_dump(pmap, sva, eva) */ PMAP_MAP_TO_HEAD_LOCK(); - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* * dumping a range of pages: we dump in PTP sized blocks (4MB) @@ -3535,7 +3762,7 @@ pmap_dump(pmap, sva, eva) blkendva = eva; /* valid block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) continue; pte = &ptes[atop(sva)]; @@ -3546,7 +3773,7 @@ pmap_dump(pmap, sva, eva) sva, *pte, *pte & PG_FRAME); } } - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); PMAP_MAP_TO_HEAD_UNLOCK(); } #endif @@ -3615,7 +3842,7 @@ void pmap_tlb_shootdown(pmap, va, pte, cpumaskp) pmap_t pmap; vaddr_t va; - pt_entry_t pte; + u_int32_t pte; int32_t *cpumaskp; { struct cpu_info *ci, *self; @@ -3839,3 +4066,31 @@ pmap_tlb_shootdown_job_put(pq, pj) pq->pq_count--; } + +#ifndef SMALL_KERNEL +u_int32_t (*pmap_pte_set_p)(vaddr_t, paddr_t, u_int32_t) = + pmap_pte_set_86; +u_int32_t (*pmap_pte_setbits_p)(vaddr_t, u_int32_t, u_int32_t) = + pmap_pte_setbits_86; +u_int32_t (*pmap_pte_bits_p)(vaddr_t) = pmap_pte_bits_86; +paddr_t (*pmap_pte_paddr_p)(vaddr_t) = pmap_pte_paddr_86; +boolean_t (*pmap_change_attrs_p)(struct vm_page *, int, int) = + pmap_change_attrs_86; +int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int) = + pmap_enter_86; +boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *) = pmap_extract_86; +vaddr_t (*pmap_growkernel_p)(vaddr_t) = pmap_growkernel_86; +void (*pmap_page_remove_p)(struct vm_page *) = pmap_page_remove_86; +void (*pmap_remove_p)(struct pmap *, vaddr_t, vaddr_t) = pmap_remove_86; +boolean_t (*pmap_test_attrs_p)(struct vm_page *, int) = pmap_test_attrs_86; +void (*pmap_unwire_p)(struct pmap *, vaddr_t) = pmap_unwire_86; +void (*pmap_write_protect_p)(struct pmap *, vaddr_t, vaddr_t, vm_prot_t) = + pmap_write_protect_86; +void (*pmap_pinit_pd_p)(pmap_t) = pmap_pinit_pd_86; +void (*pmap_zero_phys_p)(paddr_t) = pmap_zero_phys_86; +boolean_t (*pmap_zero_page_uncached_p)(paddr_t) = pmap_zero_page_uncached_86; +void (*pmap_copy_page_p)(struct vm_page *, struct vm_page *) = + pmap_copy_page_86; +boolean_t (*pmap_try_steal_pv_p)(struct pv_head *, struct pv_entry *, + struct pv_entry *) = pmap_try_steal_pv_86; +#endif /* !SMALL_KERNEL */ diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c new file mode 100644 index 00000000000..a121fd9d4d3 --- /dev/null +++ b/sys/arch/i386/i386/pmapae.c @@ -0,0 +1,2420 @@ +/* $OpenBSD: pmapae.c,v 1.1 2006/04/27 15:37:51 mickey Exp $ */ + +/* + * Copyright (c) 2006 Michael Shalayeff + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from OpenBSD: pmap.c,v 1.85 2005/11/18 17:05:04 brad Exp + */ +/* + * pmap.c: i386 pmap module rewrite + * Chuck Cranor <chuck@ccrc.wustl.edu> + * 11-Aug-97 + * + * history of this pmap module: in addition to my own input, i used + * the following references for this rewrite of the i386 pmap: + * + * [1] the NetBSD i386 pmap. this pmap appears to be based on the + * BSD hp300 pmap done by Mike Hibler at University of Utah. + * it was then ported to the i386 by William Jolitz of UUNET + * Technologies, Inc. Then Charles M. Hannum of the NetBSD + * project fixed some bugs and provided some speed ups. + * + * [2] the FreeBSD i386 pmap. this pmap seems to be the + * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson + * and David Greenman. + * + * [3] the Mach pmap. this pmap, from CMU, seems to have migrated + * between several processors. the VAX version was done by + * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 + * version was done by Lance Berc, Mike Kupfer, Bob Baron, + * David Golub, and Richard Draves. the alpha version was + * done by Alessandro Forin (CMU/Mach) and Chris Demetriou + * (NetBSD/alpha). + */ +/* + * PAE support + * Michael Shalayeff <mickey@lucifier.net> + * + * This module implements PAE mode for i386. + * + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/pool.h> +#include <sys/user.h> +#include <sys/kernel.h> +#include <sys/mutex.h> + +#include <uvm/uvm.h> + +#include <machine/atomic.h> +#include <machine/cpu.h> +#include <machine/specialreg.h> +#include <machine/gdt.h> + +#include <dev/isa/isareg.h> +#ifdef __NetBSD__ +#include <machine/isa_machdep.h> +#endif +#ifdef __OpenBSD__ +#include <sys/msgbuf.h> +#include <stand/boot/bootarg.h> +#endif + +/* + * this file contains the code for the "pmap module." the module's + * job is to manage the hardware's virtual to physical address mappings. + * note that there are two levels of mapping in the VM system: + * + * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's + * to map ranges of virtual address space to objects/files. for + * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only + * to the file /bin/ls starting at offset zero." note that + * the upper layer mapping is not concerned with how individual + * vm_pages are mapped. + * + * [2] the lower layer of the VM system (the pmap) maintains the mappings + * from virtual addresses. it is concerned with which vm_page is + * mapped where. for example, when you run /bin/ls and start + * at page 0x1000 the fault routine may lookup the correct page + * of the /bin/ls file and then ask the pmap layer to establish + * a mapping for it. + * + * note that information in the lower layer of the VM system can be + * thrown away since it can easily be reconstructed from the info + * in the upper layer. + * + * data structures we use include: + * + * - struct pmap: describes the address space of one thread + * - struct pv_entry: describes one <PMAP,VA> mapping of a PA + * - struct pv_head: there is one pv_head per managed page of + * physical memory. the pv_head points to a list of pv_entry + * structures which describe all the <PMAP,VA> pairs that this + * page is mapped in. this is critical for page based operations + * such as pmap_page_protect() [change protection on _all_ mappings + * of a page] + * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's. + * if we run out of pv_entry's we allocate a new pv_page and free + * its pv_entrys. + */ +/* + * i386 PAE hardware Page Tables structure: + * + * the i386 PAE Page Table is a three-level PT which maps 4GB of VA. + * the pagesize is 4K (4096 [0x1000] bytes) or 2MB. + * + * the first level table is called "page directory index" and consists + * of 4 page directory index entries (PDIE) each 64 bits in size. + * + * the second level table is called a "page directory" and it contains + * 512 page directory entries (PDEs). each PDE is + * 8 bytes (a long long), so a PD fits in a single 4K page. this page is + * the page directory page (PDP). each PDE in a PDP maps 1GB of space + * (512 * 2MB = 1GB). a PDE contains the physical address of the + * second level table: the page table. or, if 2MB pages are being used, + * then the PDE contains the PA of the 2MB page being mapped. + * + * a page table consists of 512 page table entries (PTEs). each PTE is + * 8 bytes (a long long), so a page table also fits in a single 4K page. + * a 4K page being used as a page table is called a page table page (PTP). + * each PTE in a PTP maps one 4K page (512 * 4K = 2MB). a PTE contains + * the physical address of the page it maps and some flag bits (described + * below). + * + * the processor has a special register, "cr3", which points to the + * the PDP which is currently controlling the mappings of the virtual + * address space. + * + * the following picture shows the translation process for a 4K page: + * + * %cr3 register [PA of PDPT] + * | + * | bits <31-30> of VA + * | index the DPE (0-3) + * | | + * v v + * +-----------+ + * | PDP Ptr | + * | 4 entries | + * +-----------+ + * | + * PA of PDP + * | + * | + * | bits <29-21> of VA bits <20-12> of VA bits <11-0> + * | index the PDP (0 - 512) index the PTP are the page offset + * | | | | + * | v | | + * +-->+---------+ | | + * | PD Page | PA of v | + * | |-----PTP----->+------------+ | + * | 512 PDE | | page table |--PTE--+ | + * | entries | | (aka PTP) | | | + * +---------+ | 512 PTE | | | + * | entries | | | + * +------------+ | | + * | | + * bits <35-12> bits <11-0> + * p h y s i c a l a d d r + * + * the i386 caches PTEs in a TLB. it is important to flush out old + * TLB mappings when making a change to a mappings. writing to the + * %cr3 will flush the entire TLB. newer processors also have an + * instruction that will invalidate the mapping of a single page (which + * is useful if you are changing a single mappings because it preserves + * all the cached TLB entries). + * + * as shows, bits 31-12 of the PTE contain PA of the page being mapped. + * the rest of the PTE is defined as follows: + * bit# name use + * 63 NX no-execute bit (0=ITLB, 1=DTLB), optional + * 11 n/a available for OS use, hardware ignores it + * 10 n/a available for OS use, hardware ignores it + * 9 n/a available for OS use, hardware ignores it + * 8 G global bit (see discussion below) + * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>) + * 6 D dirty (modified) page + * 5 A accessed (referenced) page + * 4 PCD cache disable + * 3 PWT prevent write through (cache) + * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s) + * 1 R/W read/write bit (0=read only, 1=read-write) + * 0 P present (valid) + * + * notes: + * - on the i386 the R/W bit is ignored if processor is in supervisor + * state (bug!) + * - PS is only supported on newer processors + * - PTEs with the G bit are global in the sense that they are not + * flushed from the TLB when %cr3 is written (to flush, use the + * "flush single page" instruction). this is only supported on + * newer processors. this bit can be used to keep the kernel's + * TLB entries around while context switching. since the kernel + * is mapped into all processes at the same place it does not make + * sense to flush these entries when switching from one process' + * pmap to another. + */ +/* + * A pmap describes a process' 4GB virtual address space. This + * virtual address space can be broken up into 2048 2MB regions which + * are described by PDEs in the PDP. The PDEs are defined as follows: + * + * Ranges are inclusive -> exclusive, just like vm_map_entry start/end. + * The following assumes that KERNBASE is 0xd0000000. + * + * PDE#s VA range Usage + * 0->1660 0x0 -> 0xcf800000 user address space, note that the + * max user address is 0xcfbfe000 + * the final two pages in the last 4MB + * used to be reserved for the UAREA + * but now are no longer used. + * 1660 0xcf800000-> recursive mapping of PDP (used for + * 0xd0000000 linear mapping of PTPs). + * 1664->2044 0xd0000000-> kernel address space (constant + * 0xff800000 across all pmaps/processes). + * 2044 0xff800000-> "alternate" recursive PDP mapping + * <end> (for other pmaps). + * + * + * Note: A recursive PDP mapping provides a way to map all the PTEs for + * a 4GB address space into a linear chunk of virtual memory. In other + * words, the PTE for page 0 is the first int mapped into the 2MB recursive + * area. The PTE for page 1 is the second int. The very last int in the + * 2MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB + * address). + * + * All pmaps' PDs must have the same values in slots 1660->2043 so that + * the kernel is always mapped in every process. These values are loaded + * into the PD at pmap creation time. + * + * At any one time only one pmap can be active on a processor. This is + * the pmap whose PDP is pointed to by processor register %cr3. This pmap + * will have all its PTEs mapped into memory at the recursive mapping + * point (slots #1660-3 as show above). When the pmap code wants to find the + * PTE for a virtual address, all it has to do is the following: + * + * Address of PTE = (1660 * 2MB) + (VA / NBPG) * sizeof(pt_entry_t) + * = 0xcf800000 + (VA / 4096) * 8 + * + * What happens if the pmap layer is asked to perform an operation + * on a pmap that is not the one which is currently active? In that + * case we take the PA of the PDP of non-active pmap and put it in + * slots 2044-7 of the active pmap. This causes the non-active pmap's + * PTEs to get mapped in the final 4MB of the 4GB address space + * (e.g. starting at 0xffc00000). + * + * The following figure shows the effects of the recursive PDP mapping: + * + * PDP (%cr3->PDPTP) + * +----+ + * | 0| -> PTP#0 that maps VA 0x0 -> 0x200000 + * | | + * | | + * |1660| -> points back to PDP (%cr3) mapping VA 0xcf800000 -> 0xd0000000 + * |1661| (PDP is 4 pages) + * |1662| + * |1663| + * |1664| -> first kernel PTP (maps 0xd0000000 -> 0xe0200000) + * | | + * |2044| -> points to alternate pmap's PDP (maps 0xff800000 -> end) + * |2045| + * |2046| + * |2047| + * +----+ + * + * Note that the PDE#1660 VA (0xcf8033e0) is defined as "PTE_BASE". + * Note that the PDE#2044 VA (0xff803fe0) is defined as "APTE_BASE". + * + * Starting at VA 0xcf8033e0 the current active PDPs (%cr3) acts as a + * PDPTP and references four consequetly mapped pages: + * + * PTP#1660-3 == PDP(%cr3) => maps VA 0xcf800000 -> 0xd0000000 + * +----+ + * | 0| -> maps the contents of PTP#0 at VA 0xcf800000->0xcf801000 + * | | + * | | + * |1660| -> maps the contents of PTP#1660 (the PDP) at VA 0xcfe7c000 + * |1661| + * |1662| + * |1663| + * |1664| -> maps the contents of first kernel PTP + * | | + * |2047| + * +----+ + * + * Note that mapping of the PDP at PTP#1660's VA (0xcfe7c000) is + * defined as "PDP_BASE".... within that mapping there are two + * defines: + * "PDP_PDE" (0xcfe7f3e0) is the VA of the PDE in the PDP + * which points back to itself. + * "APDP_PDE" (0xfff02fe0) is the VA of the PDE in the PDP which + * establishes the recursive mapping of the alternate pmap. + * To set the alternate PDP, one just has to put the correct + * PA info in *APDP_PDE. + * + * Note that in the APTE_BASE space, the APDP appears at VA + * "APDP_BASE" (0xffffc000). + * + * unfortunately we cannot use recursive PDPT from the page tables + * because in their infinite wisdom they have defined cr3 32 bits! + * + */ +/* + * memory allocation + * + * - there are three data structures that we must dynamically allocate: + * + * [A] new process' page directory page (PDP) + * - plan 1: done at pmap_create() we use + * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this + * allocation. + * + * if we are low in free physical memory then we sleep in + * uvm_km_alloc -- in this case this is ok since we are creating + * a new pmap and should not be holding any locks. + * + * if the kernel is totally out of virtual space + * (i.e. uvm_km_alloc returns NULL), then we panic. + * + * XXX: the fork code currently has no way to return an "out of + * memory, try again" error code since uvm_fork [fka vm_fork] + * is a void function. + * + * [B] new page tables pages (PTP) + * call uvm_pagealloc() + * => success: zero page, add to pm_pdir + * => failure: we are out of free vm_pages, let pmap_enter() + * tell UVM about it. + * + * note: for kernel PTPs, we start with NKPTP of them. as we map + * kernel memory (at uvm_map time) we check to see if we've grown + * the kernel pmap. if so, we call the optional function + * pmap_growkernel() to grow the kernel PTPs in advance. + * + * [C] pv_entry structures + * - plan 1: try to allocate one off the free list + * => success: done! + * => failure: no more free pv_entrys on the list + * - plan 2: try to allocate a new pv_page to add a chunk of + * pv_entrys to the free list + * [a] obtain a free, unmapped, VA in kmem_map. either + * we have one saved from a previous call, or we allocate + * one now using a "vm_map_lock_try" in uvm_map + * => success: we have an unmapped VA, continue to [b] + * => failure: unable to lock kmem_map or out of VA in it. + * move on to plan 3. + * [b] allocate a page in kmem_object for the VA + * => success: map it in, free the pv_entry's, DONE! + * => failure: kmem_object locked, no free vm_pages, etc. + * save VA for later call to [a], go to plan 3. + * If we fail, we simply let pmap_enter() tell UVM about it. + */ +/* + * locking + * + * we have the following locks that we must contend with: + * + * "normal" locks: + * + * - pmap_main_lock + * this lock is used to prevent deadlock and/or provide mutex + * access to the pmap system. most operations lock the pmap + * structure first, then they lock the pv_lists (if needed). + * however, some operations such as pmap_page_protect lock + * the pv_lists and then lock pmaps. in order to prevent a + * cycle, we require a mutex lock when locking the pv_lists + * first. thus, the "pmap = >pv_list" lockers must gain a + * read-lock on pmap_main_lock before locking the pmap. and + * the "pv_list => pmap" lockers must gain a write-lock on + * pmap_main_lock before locking. since only one thread + * can write-lock a lock at a time, this provides mutex. + * + * "simple" locks: + * + * - pmap lock (per pmap, part of uvm_object) + * this lock protects the fields in the pmap structure including + * the non-kernel PDEs in the PDP, and the PTEs. it also locks + * in the alternate PTE space (since that is determined by the + * entry in the PDP). + * + * - pvh_lock (per pv_head) + * this lock protects the pv_entry list which is chained off the + * pv_head structure for a specific managed PA. it is locked + * when traversing the list (e.g. adding/removing mappings, + * syncing R/M bits, etc.) + * + * - pvalloc_lock + * this lock protects the data structures which are used to manage + * the free list of pv_entry structures. + * + * - pmaps_lock + * this lock protects the list of active pmaps (headed by "pmaps"). + * we lock it when adding or removing pmaps from this list. + * + */ + +/* + * locking data structures + */ + +struct simplelock pvalloc_lock; +struct simplelock pmaps_lock; + +#if defined(MULTIPROCESSOR) && 0 + +extern struct lock pmap_main_lock; + +#define PMAP_MAP_TO_HEAD_LOCK() \ + spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0) +#define PMAP_MAP_TO_HEAD_UNLOCK() \ + spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) + +#define PMAP_HEAD_TO_MAP_LOCK() \ + spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *) 0) +#define PMAP_HEAD_TO_MAP_UNLOCK() \ + spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0) + +#else + +#define PMAP_MAP_TO_HEAD_LOCK() /* null */ +#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ + +#define PMAP_HEAD_TO_MAP_LOCK() /* null */ +#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ + +#endif + +#define PG_FRAME 0xffffff000ULL /* page frame mask */ +#define PG_LGFRAME 0xfffe00000ULL /* large (2M) page frame mask */ + +/* + * Redefine the PDSHIFT, NBPD + */ +#undef PDSHIFT +#define PD_MASK 0xffe00000 /* page directory address bits */ +#define PDSHIFT 21 /* page directory address shift */ +#define PT_MASK 0x001ff000 /* page table address bits */ +#undef NBPD +#define NBPD (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */ + +/* + * + */ +#undef PDSLOT_PTE +#define PDSLOT_PTE (1660U) /* 1660: for recursive PDP map */ +#undef PDSLOT_KERN +#define PDSLOT_KERN (1664U) /* 1664: start of kernel space */ +#undef PDSLOT_APTE +#define PDSLOT_APTE (2044U) /* 2044: alternative recursive slot */ + +/* + * The following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) +#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) +#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) + +#define PTES_PER_PTP (NBPG / sizeof(pt_entry_t)) /* # of PTEs in a PTP */ + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * + */ +#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) + +/* + * pdei/ptei: generate index into PDP/PTP from a VA + */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * Mach derived conversion macros + */ +#define i386_round_pdr(x) ((((unsigned)(x)) + ~PD_MASK) & PD_MASK) + +/* + * PTP macros: + * A PTP's index is the PD index of the PDE that points to it. + * A PTP's offset is the byte-offset in the PTE space that this PTP is at. + * A PTP's VA is the first VA mapped by that PTP. + * + * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +/* + * Access PD and PT + */ +#define PDE(pm,i) (((pd_entry_t *)(pm)->pm_pdir)[(i)]) + +/* + * here we define the data types for PDEs and PTEs + */ +typedef u_int64_t pd_entry_t; /* PDE */ +typedef u_int64_t pt_entry_t; /* PTE */ + +/* + * Number of PTE's per cache line. 8 byte pte, 32-byte cache line + * Used to avoid false sharing of cache lines. + */ +#define NPTECL 4 + +/* + * other data structures + */ + +extern u_int32_t protection_codes[]; /* maps MI prot to i386 prot code */ +extern boolean_t pmap_initialized; /* pmap_init done yet? */ + +/* + * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a + * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing + * due to false sharing. + */ + +#ifdef MULTIPROCESSOR +#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) +#define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG) +#else +#define PTESLEW(pte, id) (pte) +#define VASLEW(va,id) (va) +#endif + +/* + * special VAs and the PTEs that map them + */ + +static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; +extern caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp; + +extern int pmap_pg_g; +extern struct pmap_head pmaps; +extern struct pmap *pmaps_hand; + +/* + * local prototypes + */ + +struct vm_page *pmap_alloc_ptp_pae(struct pmap *, int, boolean_t); +#define ALLOCPV_NEED 0 /* need PV now */ +#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ +#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ +struct vm_page *pmap_get_ptp_pae(struct pmap *, int, boolean_t); +pt_entry_t *pmap_map_ptes_pae(struct pmap *); +void pmap_remove_ptes_pae(struct pmap *, struct vm_page *, + vaddr_t, vaddr_t, vaddr_t, int32_t *); +boolean_t pmap_remove_pte_pae(struct pmap *, struct vm_page *, + pt_entry_t *, vaddr_t, int32_t *); +void pmap_unmap_ptes_pae(struct pmap *); +struct vm_page *pmap_steal_ptp_pae(struct uvm_object *, vaddr_t); +vaddr_t pmap_tmpmap_pa_pae(paddr_t); +pt_entry_t *pmap_tmpmap_pvepte_pae(struct pv_entry *); +void pmap_tmpunmap_pa_pae(void); +void pmap_tmpunmap_pvepte_pae(struct pv_entry *); + +/* + * pmap_tmpmap_pa: map a page in for tmp usage + */ + +vaddr_t +pmap_tmpmap_pa_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); +#if defined(DIAGNOSTIC) + if (*ptpte) + panic("pmap_tmpmap_pa: ptp_pte in use?"); +#endif + *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vaddr_t)ptpva); +} + +/* + * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) + */ + +void +pmap_tmpunmap_pa_pae() +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); +#if defined(DIAGNOSTIC) + if (!pmap_valid_entry(*ptpte)) + panic("pmap_tmpunmap_pa: our pte invalid?"); +#endif + *ptpte = 0; /* zap! */ + pmap_update_pg((vaddr_t)ptpva); +#ifdef MULTIPROCESSOR + /* + * No need for tlb shootdown here, since ptp_pte is per-CPU. + */ +#endif +} + +/* + * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry + * + * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] + */ + +pt_entry_t * +pmap_tmpmap_pvepte_pae(struct pv_entry *pve) +{ +#ifdef DIAGNOSTIC + if (pve->pv_pmap == pmap_kernel()) + panic("pmap_tmpmap_pvepte: attempt to map kernel"); +#endif + + /* is it current pmap? use direct mapping... */ + if (pmap_is_curpmap(pve->pv_pmap)) + return(vtopte(pve->pv_va)); + + return(((pt_entry_t *)pmap_tmpmap_pa_pae(VM_PAGE_TO_PHYS(pve->pv_ptp))) + + ptei((unsigned)pve->pv_va)); +} + +/* + * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte + */ + +void +pmap_tmpunmap_pvepte_pae(struct pv_entry *pve) +{ + /* was it current pmap? if so, return */ + if (pmap_is_curpmap(pve->pv_pmap)) + return; + + pmap_tmpunmap_pa_pae(); +} + +/* + * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in + * + * => we lock enough pmaps to keep things locked in + * => must be undone with pmap_unmap_ptes before returning + */ + +pt_entry_t * +pmap_map_ptes_pae(struct pmap *pmap) +{ + pd_entry_t opde; + + /* the kernel's pmap is always accessible */ + if (pmap == pmap_kernel()) { + return(PTE_BASE); + } + + /* if curpmap then we are always mapped */ + if (pmap_is_curpmap(pmap)) { + simple_lock(&pmap->pm_obj.vmobjlock); + return(PTE_BASE); + } + + /* need to lock both curpmap and pmap: use ordered locking */ + if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) { + simple_lock(&pmap->pm_obj.vmobjlock); + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + } else { + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&pmap->pm_obj.vmobjlock); + } + + /* need to load a new alternate pt space into curpmap? */ + opde = *APDP_PDE; + if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdidx[0]) { + APDP_PDE[0] = pmap->pm_pdidx[0] | PG_RW | PG_V; + APDP_PDE[1] = pmap->pm_pdidx[1] | PG_RW | PG_V; + APDP_PDE[2] = pmap->pm_pdidx[2] | PG_RW | PG_V; + APDP_PDE[3] = pmap->pm_pdidx[3] | PG_RW | PG_V; + if (pmap_valid_entry(opde)) + pmap_apte_flush(curpcb->pcb_pmap); + } + return(APTE_BASE); +} + +/* + * pmap_unmap_ptes: unlock the PTE mapping of "pmap" + */ + +void +pmap_unmap_ptes_pae(struct pmap *pmap) +{ + if (pmap == pmap_kernel()) + return; + + if (pmap_is_curpmap(pmap)) { + simple_unlock(&pmap->pm_obj.vmobjlock); + } else { +#if defined(MULTIPROCESSOR) + APDP_PDE[0] = 0; + APDP_PDE[1] = 0; + APDP_PDE[2] = 0; + APDP_PDE[3] = 0; + pmap_apte_flush(curpcb->pcb_pmap); +#endif + simple_unlock(&pmap->pm_obj.vmobjlock); + simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + } +} + +u_int32_t +pmap_pte_set_pae(vaddr_t va, paddr_t pa, u_int32_t bits) +{ + pt_entry_t pte, *ptep = vtopte(va); + + pte = i386_atomic_testset_uq(ptep, pa | bits); + return (pte & ~PG_FRAME); +} + +u_int32_t +pmap_pte_setbits_pae(vaddr_t va, u_int32_t set, u_int32_t clr) +{ + pt_entry_t *ptep = vtopte(va); + pt_entry_t pte = *ptep; + + i386_atomic_testset_uq(ptep, (pte | set) & ~(pt_entry_t)clr); + return (pte & ~PG_FRAME); + +} + +u_int32_t +pmap_pte_bits_pae(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & ~PG_FRAME); +} + +paddr_t +pmap_pte_paddr_pae(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & PG_FRAME); +} + +/* + * Switch over to PAE page tables + */ +void +pmap_bootstrap_pae() +{ + extern paddr_t avail_end, avail_end2; + extern int cpu_pae, nkpde; + struct pmap *kpm = pmap_kernel(); + struct vm_page *ptp; + paddr_t ptaddr; + u_int32_t bits; + vaddr_t va, eva; + int i, pn, pe; + + if (!cpu_pae || avail_end >= avail_end2 || !(cpu_feature & CPUID_PAE)) + return; + + va = (vaddr_t)kpm->pm_pdir; + kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V; + kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V; + kpm->pm_pdidx[2] = (va + 2*NBPG - KERNBASE) | PG_V; + kpm->pm_pdidx[3] = (va + 3*NBPG - KERNBASE) | PG_V; + /* map pde recursively into itself */ + PDE(kpm, PDSLOT_PTE+0) = kpm->pm_pdidx[0] | PG_KW; + PDE(kpm, PDSLOT_PTE+1) = kpm->pm_pdidx[1] | PG_KW; + PDE(kpm, PDSLOT_PTE+2) = kpm->pm_pdidx[2] | PG_KW; + PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW; + + /* transfer all kernel mappings over into pae tables */ + for (va = KERNBASE, eva = va + (nkpde << 22); + va < eva; va += PAGE_SIZE) { + if (!pmap_valid_entry(PDE(kpm, pdei(va)))) { + ptp = uvm_pagealloc(&kpm->pm_obj, va, NULL, + UVM_PGA_ZERO); + ptaddr = VM_PAGE_TO_PHYS(ptp); + PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V; + pmap_pte_set_86((vaddr_t)vtopte(va), + ptaddr, PG_KW | PG_V); + + /* count PTP as resident */ + kpm->pm_stats.resident_count++; + } + bits = pmap_pte_bits_86(va) | pmap_pg_g; + if (pmap_valid_entry(bits)) + pmap_pte_set_pae(va, pmap_pte_paddr_86(va), bits); + } + + if (!cpu_paenable(&kpm->pm_pdidx[0])) { + extern struct user *proc0paddr; + + proc0paddr->u_pcb.pcb_cr3 = kpm->pm_pdirpa = + (vaddr_t)kpm - KERNBASE; + kpm->pm_pdirsize = 4 * NBPG; + + csrc_pte = vtopte(pmap_csrcp); + cdst_pte = vtopte(pmap_cdstp); + zero_pte = vtopte(pmap_zerop); + ptp_pte = vtopte(pmap_ptpp); + + nkpde *= 2; + nkptp_max = 2048 - PDSLOT_KERN - 4; + vm_max_address = (PDSLOT_PTE << PDSHIFT) + + (PDSLOT_PTE << PGSHIFT); + avail_end = avail_end2; + + pmap_pte_set_p = pmap_pte_set_pae; + pmap_pte_setbits_p = pmap_pte_setbits_pae; + pmap_pte_bits_p = pmap_pte_bits_pae; + pmap_pte_paddr_p = pmap_pte_paddr_pae; + pmap_change_attrs_p = pmap_change_attrs_pae; + pmap_enter_p = pmap_enter_pae; + pmap_extract_p = pmap_extract_pae; + pmap_growkernel_p = pmap_growkernel_pae; + pmap_page_remove_p = pmap_page_remove_pae; + pmap_remove_p = pmap_remove_pae; + pmap_test_attrs_p = pmap_test_attrs_pae; + pmap_unwire_p = pmap_unwire_pae; + pmap_write_protect_p = pmap_write_protect_pae; + pmap_pinit_pd_p = pmap_pinit_pd_pae; + pmap_zero_phys_p = pmap_zero_phys_pae; + pmap_zero_page_uncached_p = pmap_zero_page_uncached_pae; + pmap_copy_page_p = pmap_copy_page_pae; + pmap_try_steal_pv_p = pmap_try_steal_pv_pae; + + bzero((void *)kpm->pm_pdir + 8, (PDSLOT_PTE-1) * 8); + /* TODO also reclaim old PDPs */ + for (i = 0; i < vm_nphysseg; i++) + if (vm_physmem[i].start > atop(0xfffff000)) { + vm_physmem[i].avail_end = vm_physmem[i].end; + /* free vm_pages (uvm had already zeroed 'em) */ + for (pn = 0, pe = vm_physmem[i].end - + vm_physmem[i].start; pn < pe ; pn++) { + uvmexp.npages++; + /* add page to free pool */ + uvm_pagefree(&vm_physmem[i].pgs[pn]); + } + + } + uvm_page_rehash(); + } +} + +/* + * p v _ e n t r y f u n c t i o n s + */ + +/* + * pv_entry allocation functions: + * the main pv_entry allocation functions are: + * pmap_alloc_pv: allocate a pv_entry structure + * pmap_free_pv: free one pv_entry + * pmap_free_pvs: free a list of pv_entrys + * + * the rest are helper functions + */ + +/* + * pmap_try_steal_pv: try and steal a pv_entry from a pmap + * + * => return true if we did it! + */ + +boolean_t +pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv) +{ + pt_entry_t *ptep, opte; +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; +#endif + + /* + * we never steal kernel mappings or mappings from pmaps we can't lock + */ + + if (cpv->pv_pmap == pmap_kernel() || + !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock)) + return(FALSE); + + /* + * yes, we can try and steal it. first we need to remove the + * mapping from the pmap. + */ + + ptep = pmap_tmpmap_pvepte_pae(cpv); + if (*ptep & PG_W) { + ptep = NULL; /* wired page, avoid stealing this one */ + } else { + opte = i386_atomic_testset_uq(ptep, 0); /* zap! */ +#ifdef MULTIPROCESSOR + pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ + if (pmap_is_curpmap(cpv->pv_pmap)) + pmap_update_pg(cpv->pv_va); +#endif + pmap_tmpunmap_pvepte_pae(cpv); + } + if (ptep == NULL) { + simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); + return(FALSE); /* wired page, abort! */ + } + cpv->pv_pmap->pm_stats.resident_count--; + if (cpv->pv_ptp && cpv->pv_ptp->wire_count) + /* drop PTP's wired count */ + cpv->pv_ptp->wire_count--; + + /* + * XXX: if wire_count goes to one the PTP could be freed, however, + * we'd have to lock the page queues (etc.) to do that and it could + * cause deadlock headaches. besides, the pmap we just stole from + * may want the mapping back anyway, so leave the PTP around. + */ + + /* + * now we need to remove the entry from the pvlist + */ + + if (cpv == pvh->pvh_list) + pvh->pvh_list = cpv->pv_next; + else + prevpv->pv_next = cpv->pv_next; + return(TRUE); +} + +/* + * p t p f u n c t i o n s + */ + +/* + * pmap_alloc_ptp: allocate a PTP for a PMAP + * + * => pmap should already be locked by caller + * => we use the ptp's wire_count to count the number of active mappings + * in the PTP (we start it at one to prevent any chance this PTP + * will ever leak onto the active/inactive queues) + * => we should not be holding any pv_head locks (in case we are forced + * to call pmap_steal_ptp()) + * => we may need to lock pv_head's if we have to steal a PTP + * => just_try: true if we want a PTP, but not enough to steal one + * from another pmap (e.g. during optional functions like pmap_copy) + */ + +struct vm_page * +pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) +{ + struct vm_page *ptp; + + ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, + UVM_PGA_USERESERVE|UVM_PGA_ZERO); + if (ptp == NULL) { + if (just_try) + return(NULL); + ptp = pmap_steal_ptp_pae(&pmap->pm_obj, ptp_i2o(pde_index)); + if (ptp == NULL) { + return (NULL); + } + /* stole one; zero it. */ + pmap_zero_page(ptp); + } + + /* got one! */ + ptp->flags &= ~PG_BUSY; /* never busy */ + ptp->wire_count = 1; /* no mappings yet */ + PDE(pmap, pde_index) = + (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + pmap->pm_stats.resident_count++; /* count PTP as resident */ + pmap->pm_ptphint = ptp; + return(ptp); +} + +/* + * pmap_steal_ptp: steal a PTP from any pmap that we can access + * + * => obj is locked by caller. + * => we can throw away mappings at this level (except in the kernel's pmap) + * => stolen PTP is placed in <obj,offset> pmap + * => we lock pv_head's + * => hopefully, this function will be seldom used [much better to have + * enough free pages around for us to allocate off the free page list] + */ + +struct vm_page * +pmap_steal_ptp_pae(struct uvm_object *obj, vaddr_t offset) +{ + struct vm_page *ptp = NULL; + struct pmap *firstpmap; + struct uvm_object *curobj; + pt_entry_t *ptes; + int idx, lcv; + boolean_t caller_locked, we_locked; + int32_t cpumask = 0; + + simple_lock(&pmaps_lock); + if (pmaps_hand == NULL) + pmaps_hand = LIST_FIRST(&pmaps); + firstpmap = pmaps_hand; + + do { /* while we haven't looped back around to firstpmap */ + + curobj = &pmaps_hand->pm_obj; + we_locked = FALSE; + caller_locked = (curobj == obj); + if (!caller_locked) { + we_locked = simple_lock_try(&curobj->vmobjlock); + } + if (caller_locked || we_locked) { + TAILQ_FOREACH(ptp, &curobj->memq, listq) { + + /* + * might have found a PTP we can steal + * (unless it has wired pages). + */ + + idx = ptp_o2i(ptp->offset); +#ifdef DIAGNOSTIC + if (VM_PAGE_TO_PHYS(ptp) != + (PDE(pmaps_hand, idx) & PG_FRAME)) + panic("pmap_steal_ptp: PTP mismatch!"); +#endif + + ptes = (pt_entry_t *) + pmap_tmpmap_pa_pae(VM_PAGE_TO_PHYS(ptp)); + for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) + if ((ptes[lcv] & (PG_V|PG_W)) == + (PG_V|PG_W)) + break; + if (lcv == PTES_PER_PTP) + pmap_remove_ptes_pae(pmaps_hand, ptp, + (vaddr_t)ptes, ptp_i2v(idx), + ptp_i2v(idx+1), &cpumask); + pmap_tmpunmap_pa_pae(); + + if (lcv != PTES_PER_PTP) + /* wired, try next PTP */ + continue; + + /* + * got it!!! + */ + + PDE(pmaps_hand, idx) = 0; /* zap! */ + pmaps_hand->pm_stats.resident_count--; +#ifdef MULTIPROCESSOR + pmap_apte_flush(pmaps_hand); +#else + if (pmap_is_curpmap(pmaps_hand)) + pmap_apte_flush(pmaps_hand); + else if (pmap_valid_entry(*APDP_PDE) && + (*APDP_PDE & PG_FRAME) == + pmaps_hand->pm_pdidx[0]) + pmap_update_pg(((vaddr_t)APTE_BASE) + + ptp->offset); +#endif + + /* put it in our pmap! */ + uvm_pagerealloc(ptp, obj, offset); + break; /* break out of "for" loop */ + } + if (we_locked) { + simple_unlock(&curobj->vmobjlock); + } + } + + /* advance the pmaps_hand */ + pmaps_hand = LIST_NEXT(pmaps_hand, pm_list); + if (pmaps_hand == NULL) { + pmaps_hand = LIST_FIRST(&pmaps); + } + + } while (ptp == NULL && pmaps_hand != firstpmap); + + simple_unlock(&pmaps_lock); + pmap_tlb_shootnow(cpumask); + return(ptp); +} + +/* + * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) + * + * => pmap should NOT be pmap_kernel() + * => pmap should be locked + */ + +struct vm_page * +pmap_get_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) +{ + struct vm_page *ptp; + + if (pmap_valid_entry(PDE(pmap, pde_index))) { + + /* valid... check hint (saves us a PA->PG lookup) */ + if (pmap->pm_ptphint && + (PDE(pmap, pde_index) & PG_FRAME) == + VM_PAGE_TO_PHYS(pmap->pm_ptphint)) + return(pmap->pm_ptphint); + + ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index)); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_get_ptp: unmanaged user PTP"); +#endif + pmap->pm_ptphint = ptp; + return(ptp); + } + + /* allocate a new PTP (updates ptphint) */ + return(pmap_alloc_ptp_pae(pmap, pde_index, just_try)); +} + +/* + * pmap_pinit_pd: given a freshly allocated pmap structure, give it a PD + */ +void +pmap_pinit_pd_pae(struct pmap *pmap) +{ + extern int nkpde; + vaddr_t va; + + /* allocate PDP */ + pmap->pm_pdir = uvm_km_alloc(kernel_map, 4 * NBPG); + if (pmap->pm_pdir == NULL) + panic("pmap_pinit: kernel_map out of virtual space!"); + /* page index is in the pmap! */ + pmap_extract(pmap_kernel(), (vaddr_t)pmap, &pmap->pm_pdirpa); + /* fill out the PDPT entries */ + va = (vaddr_t)pmap->pm_pdir; + pmap_extract(pmap_kernel(), va + 0*NBPG, &pmap->pm_pdidx[0]); + pmap_extract(pmap_kernel(), va + 1*NBPG, &pmap->pm_pdidx[1]); + pmap_extract(pmap_kernel(), va + 2*NBPG, &pmap->pm_pdidx[2]); + pmap_extract(pmap_kernel(), va + 3*NBPG, &pmap->pm_pdidx[3]); + pmap->pm_pdidx[0] |= PG_V; + pmap->pm_pdidx[1] |= PG_V; + pmap->pm_pdidx[2] |= PG_V; + pmap->pm_pdidx[3] |= PG_V; + pmap->pm_pdirsize = 4 * NBPG; + + /* init PDP */ + /* zero init area */ + bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + /* put in recursive PDE to map the PTEs */ + PDE(pmap, PDSLOT_PTE+0) = pmap->pm_pdidx[0] | PG_KW; + PDE(pmap, PDSLOT_PTE+1) = pmap->pm_pdidx[1] | PG_KW; + PDE(pmap, PDSLOT_PTE+2) = pmap->pm_pdidx[2] | PG_KW; + PDE(pmap, PDSLOT_PTE+3) = pmap->pm_pdidx[3] | PG_KW; + + /* + * we need to lock pmaps_lock to prevent nkpde from changing on + * us. note that there is no need to splvm to protect us from + * malloc since malloc allocates out of a submap and we should have + * already allocated kernel PTPs to cover the range... + */ + simple_lock(&pmaps_lock); + /* put in kernel VM PDEs */ + bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN), + nkpde * sizeof(pd_entry_t)); + /* zero the rest */ + bzero(&PDE(pmap, PDSLOT_KERN + nkpde), pmap->pm_pdirsize - + ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); + simple_unlock(&pmaps_lock); +} + +/* + * some misc. functions + */ + +/* + * pmap_extract: extract a PA for the given VA + */ + +boolean_t +pmap_extract_pae(struct pmap *pmap, vaddr_t va, paddr_t *pap) +{ + paddr_t retval; + pt_entry_t *ptes; + + if (PDE(pmap, pdei(va))) { + ptes = pmap_map_ptes_pae(pmap); + retval = (paddr_t)(ptes[atop(va)] & PG_FRAME); + pmap_unmap_ptes_pae(pmap); + if (pap != NULL) + *pap = retval | (va & ~PG_FRAME); + return (TRUE); + } + return (FALSE); +} + +extern void (*pagezero)(void *, size_t); + +/* + * pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are + * initialized. + */ +void +pmap_zero_phys_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *zpte = PTESLEW(zero_pte, id); + caddr_t zerova = VASLEW(pmap_zerop, id); + +#ifdef DIAGNOSTIC + if (*zpte) + panic("pmap_zero_phys: lock botch"); +#endif + *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ + pmap_update_pg((vaddr_t)zerova); /* flush TLB */ + pagezero(zerova, PAGE_SIZE); /* zero */ + *zpte = 0; /* zap! */ +} + +/* + * pmap_zero_page_uncached: the same, except uncached. + */ + +boolean_t +pmap_zero_page_uncached_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *zpte = PTESLEW(zero_pte, id); + caddr_t zerova = VASLEW(pmap_zerop, id); + +#ifdef DIAGNOSTIC + if (*zpte) + panic("pmap_zero_page_uncached: lock botch"); +#endif + + *zpte = (pa & PG_FRAME) | PG_V | PG_RW | /* map in */ + ((cpu_class != CPUCLASS_386) ? PG_N : 0); + pmap_update_pg((vaddr_t)zerova); /* flush TLB */ + pagezero(zerova, PAGE_SIZE); /* zero */ + *zpte = 0; /* zap! */ + + return (TRUE); +} + +/* + * pmap_copy_page: copy a page + */ + +void +pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg) +{ + paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); + paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *spte = PTESLEW(csrc_pte,id); + pt_entry_t *dpte = PTESLEW(cdst_pte,id); + caddr_t csrcva = VASLEW(pmap_csrcp, id); + caddr_t cdstva = VASLEW(pmap_cdstp, id); + +#ifdef DIAGNOSTIC + if (*spte || *dpte) + panic("pmap_copy_page: lock botch"); +#endif + + *spte = (srcpa & PG_FRAME) | PG_V | PG_RW; + *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW; + pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); + bcopy(csrcva, cdstva, PAGE_SIZE); + *spte = *dpte = 0; /* zap! */ + pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); +#ifdef MULTIPROCESSOR + /* Using per-cpu VA; no shootdown required here. */ +#endif +} + +/* + * p m a p r e m o v e f u n c t i o n s + * + * functions that remove mappings + */ + +/* + * pmap_remove_ptes: remove PTEs from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + */ + +void +pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, + vaddr_t startva, vaddr_t endva, int32_t *cpumaskp) +{ + struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ + struct pv_entry *pve; + pt_entry_t *pte = (pt_entry_t *) ptpva; + pt_entry_t opte; + int bank, off; + + /* + * note that ptpva points to the PTE that maps startva. this may + * or may not be the first PTE in the PTP. + * + * we loop through the PTP while there are still PTEs to look at + * and the wire_count is greater than 1 (because we use the wire_count + * to keep track of the number of real PTEs in the PTP). + */ + + for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) + ; pte++, startva += NBPG) { + if (!pmap_valid_entry(*pte)) + continue; /* VA not mapped */ + + opte = i386_atomic_testset_uq(pte, 0); /* zap! */ + + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; + + if (opte & PG_U) + pmap_tlb_shootdown(pmap, startva, opte, cpumaskp); + + if (ptp) { + ptp->wire_count--; /* dropping a PTE */ + /* Make sure that the PDE is flushed */ + if ((ptp->wire_count <= 1) && !(opte & PG_U)) + pmap_tlb_shootdown(pmap, startva, opte, + cpumaskp); + } + + /* + * if we are not on a pv_head list we are done. + */ + + if ((opte & PG_PVLIST) == 0) { +#ifdef DIAGNOSTIC + if (vm_physseg_find(atop(opte & PG_FRAME), &off) + != -1) + panic("pmap_remove_ptes: managed page without " + "PG_PVLIST for 0x%lx", startva); +#endif + continue; + } + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_remove_ptes: unmanaged page marked " + "PG_PVLIST, va = 0x%lx, pa = 0x%lx", + startva, (u_long)(opte & PG_FRAME)); +#endif + + /* sync R/M bits */ + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, + startva); + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) { + pve->pv_next = pv_tofree; + pv_tofree = pve; + } + + /* end of "for" loop: time for next pte */ + } + if (pv_tofree) + pmap_free_pvs(pmap, pv_tofree); +} + + +/* + * pmap_remove_pte: remove a single PTE from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + * => returns true if we removed a mapping + */ + +boolean_t +pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, + vaddr_t va, int32_t *cpumaskp) +{ + pt_entry_t opte; + int bank, off; + struct pv_entry *pve; + + if (!pmap_valid_entry(*pte)) + return(FALSE); /* VA not mapped */ + + opte = *pte; /* save the old PTE */ + *pte = 0; /* zap! */ + + pmap_exec_account(pmap, va, opte, 0); + + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; + + if (opte & PG_U) + pmap_tlb_shootdown(pmap, va, opte, cpumaskp); + + if (ptp) { + ptp->wire_count--; /* dropping a PTE */ + /* Make sure that the PDE is flushed */ + if ((ptp->wire_count <= 1) && !(opte & PG_U)) + pmap_tlb_shootdown(pmap, va, opte, cpumaskp); + + } + + /* + * if we are not on a pv_head list we are done. + */ + + if ((opte & PG_PVLIST) == 0) { +#ifdef DIAGNOSTIC + if (vm_physseg_find(atop(opte & PG_FRAME), &off) != -1) + panic("pmap_remove_pte: managed page without " + "PG_PVLIST for 0x%lx", va); +#endif + return(TRUE); + } + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_remove_pte: unmanaged page marked " + "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, + (u_long)(opte & PG_FRAME)); +#endif + + /* sync R/M bits */ + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va); + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) + pmap_free_pv(pmap, pve); + return(TRUE); +} + +/* + * pmap_remove: top level mapping removal function + * + * => caller should not be holding any pmap locks + */ + +void +pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + pt_entry_t *ptes, opte; + boolean_t result; + paddr_t ptppa; + vaddr_t blkendva; + struct vm_page *ptp; + int32_t cpumask = 0; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + /* + * removing one page? take shortcut function. + */ + + if (sva + PAGE_SIZE == eva) { + + if (pmap_valid_entry(PDE(pmap, pdei(sva)))) { + + /* PA of the PTP */ + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; + + /* get PTP if non-kernel mapping */ + + if (pmap == pmap_kernel()) { + /* we never free kernel PTPs */ + ptp = NULL; + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == + ptppa) { + ptp = pmap->pm_ptphint; + } else { + ptp = PHYS_TO_VM_PAGE(ptppa); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_remove: unmanaged " + "PTP detected"); +#endif + } + } + + /* do it! */ + result = pmap_remove_pte_pae(pmap, ptp, + &ptes[atop(sva)], sva, &cpumask); + + /* + * if mapping removed and the PTP is no longer + * being used, free it! + */ + + if (result && ptp && ptp->wire_count <= 1) { + opte = i386_atomic_testset_uq(&PDE(pmap, + pdei(sva)), 0); /* zap! */ +#ifdef MULTIPROCESSOR + /* + * XXXthorpej Redundant shootdown can happen + * here if we're using APTE space. + */ +#endif + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset, opte, + &cpumask); +#ifdef MULTIPROCESSOR + /* + * Always shoot down the pmap's self-mapping + * of the PTP. + * XXXthorpej Redundant shootdown can happen + * here if pmap == curpcb->pcb_pmap (not APTE + * space). + */ + pmap_tlb_shootdown(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset, opte, + &cpumask); +#endif + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) + pmap->pm_ptphint = + TAILQ_FIRST(&pmap->pm_obj.memq); + ptp->wire_count = 0; + uvm_pagefree(ptp); + } + } + pmap_tlb_shootnow(cpumask); + pmap_unmap_ptes_pae(pmap); /* unlock pmap */ + PMAP_MAP_TO_HEAD_UNLOCK(); + return; + } + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + /* + * XXXCDC: our PTE mappings should never be removed + * with pmap_remove! if we allow this (and why would + * we?) then we end up freeing the pmap's page + * directory page (PDP) before we are finished using + * it when we hit in in the recursive mapping. this + * is BAD. + * + * long term solution is to move the PTEs out of user + * address space. and into kernel address space (up + * with APTE). then we can set VM_MAXUSER_ADDRESS to + * be VM_MAX_ADDRESS. + */ + + if (pdei(sva) == PDSLOT_PTE) + /* XXXCDC: ugly hack to avoid freeing PDP here */ + continue; + + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + /* valid block? */ + continue; + + /* PA of the PTP */ + ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; + + /* get PTP if non-kernel mapping */ + if (pmap == pmap_kernel()) { + /* we never free kernel PTPs */ + ptp = NULL; + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) { + ptp = pmap->pm_ptphint; + } else { + ptp = PHYS_TO_VM_PAGE(ptppa); +#ifdef DIAGNOSTIC + if (ptp == NULL) + panic("pmap_remove: unmanaged PTP " + "detected"); +#endif + } + } + pmap_remove_ptes_pae(pmap, ptp, (vaddr_t)&ptes[atop(sva)], + sva, blkendva, &cpumask); + + /* if PTP is no longer being used, free it! */ + if (ptp && ptp->wire_count <= 1) { + opte = i386_atomic_testset_uq(&PDE(pmap, pdei(sva)),0); +#if defined(MULTIPROCESSOR) + /* + * XXXthorpej Redundant shootdown can happen here + * if we're using APTE space. + */ +#endif + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); +#if defined(MULTIPROCESSOR) + /* + * Always shoot down the pmap's self-mapping + * of the PTP. + * XXXthorpej Redundant shootdown can happen here + * if pmap == curpcb->pcb_pmap (not APTE space). + */ + pmap_tlb_shootdown(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); +#endif + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) /* update hint? */ + pmap->pm_ptphint = + TAILQ_FIRST(&pmap->pm_obj.memq); + ptp->wire_count = 0; + uvm_pagefree(ptp); + } + } + + pmap_tlb_shootnow(cpumask); + pmap_unmap_ptes_pae(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); +} + +/* + * pmap_page_remove: remove a managed vm_page from all pmaps that map it + * + * => we set pv_head => pmap locking + * => R/M bits are sync'd back to attrs + */ + +void +pmap_page_remove_pae(struct vm_page *pg) +{ + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, opte; + int32_t cpumask = 0; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_page_remove: unmanaged page?\n"); + return; + } + + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return; + } + + /* set pv_head => pmap locking */ + PMAP_HEAD_TO_MAP_LOCK(); + + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { + ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ + +#ifdef DIAGNOSTIC + if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) + printf("pmap_page_remove: found pager VA on pv_list\n"); + if (pve->pv_ptp && (PDE(pve->pv_pmap, + pdei(pve->pv_va)) & PG_FRAME) != + VM_PAGE_TO_PHYS(pve->pv_ptp)) { + printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", + pg, pve->pv_va, pve->pv_ptp); + printf("pmap_page_remove: PTP's phys addr: " + "actual=%llx, recorded=%llx\n", + (PDE(pve->pv_pmap, pdei(pve->pv_va)) & + PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); + panic("pmap_page_remove: mapped managed page has " + "invalid pv_ptp field"); + } +#endif + + opte = ptes[atop(pve->pv_va)]; + ptes[atop(pve->pv_va)] = 0; /* zap! */ + + if (opte & PG_W) + pve->pv_pmap->pm_stats.wired_count--; + pve->pv_pmap->pm_stats.resident_count--; + + /* Shootdown only if referenced */ + if (opte & PG_U) + pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, + &cpumask); + + /* sync R/M bits */ + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + + /* update the PTP reference count. free if last reference. */ + if (pve->pv_ptp) { + pve->pv_ptp->wire_count--; + if (pve->pv_ptp->wire_count <= 1) { + /* + * Do we have to shootdown the page just to + * get the pte out of the TLB ? + */ + if(!(opte & PG_U)) + pmap_tlb_shootdown(pve->pv_pmap, + pve->pv_va, opte, &cpumask); + + opte = i386_atomic_testset_uq(&PDE(pve->pv_pmap, + pdei(pve->pv_va)), 0); + pmap_tlb_shootdown(curpcb->pcb_pmap, + ((vaddr_t)ptes) + pve->pv_ptp->offset, + opte, &cpumask); +#if defined(MULTIPROCESSOR) + /* + * Always shoot down the other pmap's + * self-mapping of the PTP. + */ + pmap_tlb_shootdown(pve->pv_pmap, + ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset, + opte, &cpumask); +#endif + pve->pv_pmap->pm_stats.resident_count--; + /* update hint? */ + if (pve->pv_pmap->pm_ptphint == pve->pv_ptp) + pve->pv_pmap->pm_ptphint = + TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq); + pve->pv_ptp->wire_count = 0; + uvm_pagefree(pve->pv_ptp); + } + } + pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ + } + pmap_free_pvs(NULL, pvh->pvh_list); + pvh->pvh_list = NULL; + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + pmap_tlb_shootnow(cpumask); +} + +/* + * p m a p a t t r i b u t e f u n c t i o n s + * functions that test/change managed page's attributes + * since a page can be mapped multiple times we must check each PTE that + * maps it by going down the pv lists. + */ + +/* + * pmap_test_attrs: test a page's attributes + * + * => we set pv_head => pmap locking + */ + +boolean_t +pmap_test_attrs_pae(struct vm_page *pg, int testbits) +{ + int bank, off; + char *myattrs; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, pte; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_test_attrs: unmanaged page?\n"); + return(FALSE); + } + + /* + * before locking: see if attributes are already set and if so, + * return! + */ + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + if (*myattrs & testbits) + return(TRUE); + + /* test to see if there is a list before bothering to lock */ + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return(FALSE); + } + + /* nope, gonna have to do it the hard way */ + PMAP_HEAD_TO_MAP_LOCK(); + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0; + pve = pve->pv_next) { + ptes = pmap_map_ptes_pae(pve->pv_pmap); + pte = ptes[atop(pve->pv_va)]; + pmap_unmap_ptes_pae(pve->pv_pmap); + *myattrs |= pte; + } + + /* + * note that we will exit the for loop with a non-null pve if + * we have found the bits we are testing for. + */ + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + return((*myattrs & testbits) != 0); +} + +/* + * pmap_change_attrs: change a page's attributes + * + * => we set pv_head => pmap locking + * => we return TRUE if we cleared one of the bits we were asked to + */ + +boolean_t +pmap_change_attrs_pae(struct vm_page *pg, int setbits, int clearbits) +{ + u_int32_t result; + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, npte, opte; + char *myattrs; + int32_t cpumask = 0; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_change_attrs: unmanaged page?\n"); + return(FALSE); + } + + PMAP_HEAD_TO_MAP_LOCK(); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + /* XXX: needed if we hold head->map lock? */ + simple_lock(&pvh->pvh_lock); + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + result = *myattrs & clearbits; + *myattrs = (*myattrs | setbits) & ~clearbits; + + for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) { +#ifdef DIAGNOSTIC + if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va)))) + panic("pmap_change_attrs: mapping without PTP " + "detected"); +#endif + + ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ + npte = ptes[atop(pve->pv_va)]; + result |= (npte & clearbits); + npte = (npte | setbits) & ~(pt_entry_t)clearbits; + if (ptes[atop(pve->pv_va)] != npte) { + opte = i386_atomic_testset_uq(&ptes[atop(pve->pv_va)], + npte); + pmap_tlb_shootdown(pve->pv_pmap, + atop(pve->pv_va), opte, &cpumask); + } + pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ + } + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + pmap_tlb_shootnow(cpumask); + + return(result != 0); +} + +/* + * p m a p p r o t e c t i o n f u n c t i o n s + */ + +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => NOTE: this is an inline function in pmap.h + */ + +/* see pmap.h */ + +/* + * pmap_protect: set the protection in of the pages in a pmap + * + * => NOTE: this is an inline function in pmap.h + */ + +/* see pmap.h */ + +/* + * pmap_write_protect: write-protect pages in a pmap + */ +void +pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, + vm_prot_t prot) +{ + pt_entry_t *ptes, *spte, *epte, opte, npte; + vaddr_t blockend; + u_int32_t md_prot; + int32_t cpumask = 0; + + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + + /* should be ok, but just in case ... */ + sva &= PG_FRAME; + eva &= PG_FRAME; + + for (/* null */ ; sva < eva ; sva = blockend) { + + blockend = (sva & PD_MASK) + NBPD; + if (blockend > eva) + blockend = eva; + + /* + * XXXCDC: our PTE mappings should never be write-protected! + * + * long term solution is to move the PTEs out of user + * address space. and into kernel address space (up + * with APTE). then we can set VM_MAXUSER_ADDRESS to + * be VM_MAX_ADDRESS. + */ + + /* XXXCDC: ugly hack to avoid freeing PDP here */ + if (pdei(sva) == PDSLOT_PTE) + continue; + + /* empty block? */ + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + continue; + + md_prot = protection_codes[prot]; + if (sva < VM_MAXUSER_ADDRESS) + md_prot |= PG_u; + else if (sva < VM_MAX_ADDRESS) + /* XXX: write-prot our PTES? never! */ + md_prot |= (PG_u | PG_RW); + + spte = &ptes[atop(sva)]; + epte = &ptes[atop(blockend)]; + + for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) { + + if (!pmap_valid_entry(*spte)) /* no mapping? */ + continue; + + npte = (*spte & ~(pt_entry_t)PG_PROT) | md_prot; + + if (npte != *spte) { + pmap_exec_account(pmap, sva, *spte, npte); + opte = *spte; + *spte = npte; + pmap_tlb_shootdown(pmap, sva, opte, &cpumask); + } + } + } + + pmap_tlb_shootnow(cpumask); + pmap_unmap_ptes_pae(pmap); /* unlocks pmap */ +} + +/* + * end of protection functions + */ + +/* + * pmap_unwire: clear the wired bit in the PTE + * + * => mapping should already be in map + */ + +void +pmap_unwire_pae(struct pmap *pmap, vaddr_t va) +{ + pt_entry_t *ptes; + + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + +#ifdef DIAGNOSTIC + if (!pmap_valid_entry(ptes[atop(va)])) + panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); +#endif + if ((ptes[atop(va)] & PG_W) != 0) { + ptes[atop(va)] &= ~PG_W; + pmap->pm_stats.wired_count--; + } +#ifdef DIAGNOSTIC + else { + printf("pmap_unwire: wiring for pmap %p va 0x%lx " + "didn't change!\n", pmap, va); + } +#endif + pmap_unmap_ptes_pae(pmap); /* unlocks map */ + } +#ifdef DIAGNOSTIC + else { + panic("pmap_unwire: invalid PDE"); + } +#endif +} + +/* + * pmap_copy: copy mappings from one pmap to another + * + * => optional function + * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) + */ + +/* + * defined as macro in pmap.h + */ + +/* + * pmap_enter: enter a mapping into a pmap + * + * => must be done "now" ... no lazy-evaluation + * => we set pmap => pv_head locking + */ + +int +pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, + int flags) +{ + pt_entry_t *ptes, opte, npte; + struct vm_page *ptp; + struct pv_head *pvh; + struct pv_entry *pve; + int bank, off, error; + boolean_t wired = (flags & PMAP_WIRED) != 0; + +#ifdef DIAGNOSTIC + /* sanity check: totally out of range? */ + if (va >= VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: too big"); + + if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) + panic("pmap_enter: trying to map over PDP/APDP!"); + + /* sanity check: kernel PTPs should already have been pre-allocated */ + if (va >= VM_MIN_KERNEL_ADDRESS && + !pmap_valid_entry(PDE(pmap, pdei(va)))) + panic("pmap_enter: missing kernel PTP!"); +#endif + + /* get lock */ + PMAP_MAP_TO_HEAD_LOCK(); + + /* + * map in ptes and get a pointer to our PTP (unless we are the kernel) + */ + + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + if (pmap == pmap_kernel()) { + ptp = NULL; + } else { + ptp = pmap_get_ptp_pae(pmap, pdei(va), FALSE); + if (ptp == NULL) { + if (flags & PMAP_CANFAIL) { + error = KERN_RESOURCE_SHORTAGE; + goto out; + } + panic("pmap_enter: get ptp failed"); + } + } + opte = ptes[atop(va)]; /* old PTE */ + + /* + * is there currently a valid mapping at our VA? + */ + + if (pmap_valid_entry(opte)) { + + /* + * first, update pm_stats. resident count will not + * change since we are replacing/changing a valid + * mapping. wired count might change... + */ + + if (wired && (opte & PG_W) == 0) + pmap->pm_stats.wired_count++; + else if (!wired && (opte & PG_W) != 0) + pmap->pm_stats.wired_count--; + + /* + * is the currently mapped PA the same as the one we + * want to map? + */ + + if ((opte & PG_FRAME) == pa) { + + /* if this is on the PVLIST, sync R/M bit */ + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(pa), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_enter: same pa PG_PVLIST " + "mapping with unmanaged page " + "pa = 0x%lx (0x%lx)", pa, + atop(pa)); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pvh = NULL; /* ensure !PG_PVLIST */ + } + goto enter_now; + } + + /* + * changing PAs: we must remove the old one first + */ + + /* + * if current mapping is on a pvlist, + * remove it (sync R/M bits) + */ + + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); +#ifdef DIAGNOSTIC + if (bank == -1) + panic("pmap_enter: PG_PVLIST mapping with " + "unmanaged page " + "pa = 0x%lx (0x%lx)", pa, atop(pa)); +#endif + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + pve = pmap_remove_pv(pvh, pmap, va); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pve = NULL; + } + } else { /* opte not valid */ + pve = NULL; + pmap->pm_stats.resident_count++; + if (wired) + pmap->pm_stats.wired_count++; + if (ptp) + ptp->wire_count++; /* count # of valid entrys */ + } + + /* + * at this point pm_stats has been updated. pve is either NULL + * or points to a now-free pv_entry structure (the latter case is + * if we called pmap_remove_pv above). + * + * if this entry is to be on a pvlist, enter it now. + */ + + bank = vm_physseg_find(atop(pa), &off); + if (pmap_initialized && bank != -1) { + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pve == NULL) { + pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); + if (pve == NULL) { + if (flags & PMAP_CANFAIL) { + error = KERN_RESOURCE_SHORTAGE; + goto out; + } + panic("pmap_enter: no pv entries available"); + } + } + /* lock pvh when adding */ + pmap_enter_pv(pvh, pve, pmap, va, ptp); + } else { + + /* new mapping is not PG_PVLIST. free pve if we've got one */ + pvh = NULL; /* ensure !PG_PVLIST */ + if (pve) + pmap_free_pv(pmap, pve); + } + +enter_now: + /* + * at this point pvh is !NULL if we want the PG_PVLIST bit set + */ + + npte = pa | protection_codes[prot] | PG_V; + pmap_exec_account(pmap, va, opte, npte); + if (pvh) + npte |= PG_PVLIST; + if (wired) + npte |= PG_W; + if (va < VM_MAXUSER_ADDRESS) + npte |= PG_u; + else if (va < VM_MAX_ADDRESS) + npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ + if (pmap == pmap_kernel()) + npte |= pmap_pg_g; + + ptes[atop(va)] = npte; /* zap! */ + + if ((opte & ~(pt_entry_t)(PG_M|PG_U)) != npte) { +#ifdef MULTIPROCESSOR + int32_t cpumask = 0; + + pmap_tlb_shootdown(pmap, va, opte, &cpumask); + pmap_tlb_shootnow(cpumask); +#else + /* Don't bother deferring in the single CPU case. */ + if (pmap_is_curpmap(pmap)) + pmap_update_pg(va); +#endif + } + + error = 0; + +out: + pmap_unmap_ptes_pae(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); + return error; +} + +/* + * pmap_growkernel: increase usage of KVM space + * + * => we allocate new PTPs for the kernel and install them in all + * the pmaps on the system. + */ + +vaddr_t +pmap_growkernel_pae(vaddr_t maxkvaddr) +{ + extern int nkpde; + struct pmap *kpm = pmap_kernel(), *pm; + int needed_kpde; /* needed number of kernel PTPs */ + int s; + paddr_t ptaddr; + + needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1)) + / NBPD; + if (needed_kpde <= nkpde) + goto out; /* we are OK */ + + /* + * whoops! we need to add kernel PTPs + */ + + s = splhigh(); /* to be safe */ + simple_lock(&kpm->pm_obj.vmobjlock); + + for (/*null*/ ; nkpde < needed_kpde ; nkpde++) { + + if (uvm.page_init_done == FALSE) { + + /* + * we're growing the kernel pmap early (from + * uvm_pageboot_alloc()). this case must be + * handled a little differently. + */ + + if (uvm_page_physget(&ptaddr) == FALSE) + panic("pmap_growkernel: out of memory"); + pmap_zero_phys(ptaddr); + + PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V; + + /* count PTP as resident */ + kpm->pm_stats.resident_count++; + continue; + } + + /* + * THIS *MUST* BE CODED SO AS TO WORK IN THE + * pmap_initialized == FALSE CASE! WE MAY BE + * INVOKED WHILE pmap_init() IS RUNNING! + */ + + if (pmap_alloc_ptp_pae(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) { + panic("pmap_growkernel: alloc ptp failed"); + } + + /* PG_u not for kernel */ + PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u; + + /* distribute new kernel PTP to all active pmaps */ + simple_lock(&pmaps_lock); + LIST_FOREACH(pm, &pmaps, pm_list) { + PDE(pm, PDSLOT_KERN + nkpde) = + PDE(kpm, PDSLOT_KERN + nkpde); + } + simple_unlock(&pmaps_lock); + } + + simple_unlock(&kpm->pm_obj.vmobjlock); + splx(s); + +out: + return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD)); +} + +#ifdef DEBUG +void pmap_dump_pae(struct pmap *, vaddr_t, vaddr_t); + +/* + * pmap_dump: dump all the mappings from a pmap + * + * => caller should not be holding any pmap locks + */ + +void +pmap_dump_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + pt_entry_t *ptes, *pte; + vaddr_t blkendva; + + /* + * if end is out of range truncate. + * if (end == start) update to max. + */ + + if (eva > VM_MAXUSER_ADDRESS || eva <= sva) + eva = VM_MAXUSER_ADDRESS; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + + /* + * dumping a range of pages: we dump in PTP sized blocks (4MB) + */ + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + /* valid block? */ + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + continue; + + pte = &ptes[atop(sva)]; + for (/* null */; sva < blkendva ; sva += NBPG, pte++) { + if (!pmap_valid_entry(*pte)) + continue; + printf("va %#lx -> pa %#x (pte=%#x)\n", + sva, *pte, *pte & PG_FRAME); + } + } + pmap_unmap_ptes_pae(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); +} +#endif diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c index ebba0686b8f..433fcfd3724 100644 --- a/sys/arch/i386/i386/vm_machdep.c +++ b/sys/arch/i386/i386/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.42 2005/11/25 14:07:17 mickey Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.43 2006/04/27 15:37:51 mickey Exp $ */ /* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */ /*- @@ -242,8 +242,7 @@ pagemove(from, to, size) caddr_t from, to; size_t size; { - pt_entry_t *fpte, *tpte; - pt_entry_t ofpte, otpte; + u_int32_t ofpte, otpte; #ifdef MULTIPROCESSOR u_int32_t cpumask = 0; #endif @@ -252,13 +251,12 @@ pagemove(from, to, size) if ((size & PAGE_MASK) != 0) panic("pagemove"); #endif - fpte = kvtopte((vaddr_t)from); - tpte = kvtopte((vaddr_t)to); while (size > 0) { - ofpte = *fpte; - otpte = *tpte; - *tpte++ = *fpte; - *fpte++ = 0; + ofpte = pmap_pte_bits((vaddr_t)from); + otpte = pmap_pte_bits((vaddr_t)to); + pmap_pte_set((vaddr_t)to, + pmap_pte_paddr((vaddr_t)from), ofpte); + pmap_pte_set((vaddr_t)from, 0, 0); #if defined(I386_CPU) && !defined(MULTIPROCESSOR) if (cpu_class != CPUCLASS_386) #endif diff --git a/sys/arch/i386/include/_types.h b/sys/arch/i386/include/_types.h index 8d54ca43d8c..f731aefd89f 100644 --- a/sys/arch/i386/include/_types.h +++ b/sys/arch/i386/include/_types.h @@ -1,4 +1,4 @@ -/* $OpenBSD: _types.h,v 1.2 2006/01/13 17:50:06 millert Exp $ */ +/* $OpenBSD: _types.h,v 1.3 2006/04/27 15:37:53 mickey Exp $ */ /*- * Copyright (c) 1990, 1993 @@ -86,9 +86,9 @@ typedef __int32_t __register_t; /* VM system types */ typedef unsigned long __vaddr_t; -typedef unsigned long __paddr_t; typedef unsigned long __vsize_t; -typedef unsigned long __psize_t; +typedef unsigned long long __paddr_t; +typedef unsigned long long __psize_t; /* Standard system types */ typedef int __clock_t; diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h index e3be6b68b1b..a06878e87b0 100644 --- a/sys/arch/i386/include/atomic.h +++ b/sys/arch/i386/include/atomic.h @@ -1,4 +1,4 @@ -/* $OpenBSD: atomic.h,v 1.2 2004/06/13 21:49:16 niklas Exp $ */ +/* $OpenBSD: atomic.h,v 1.3 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */ /*- @@ -44,6 +44,13 @@ #ifndef _LOCORE +static __inline u_int64_t +i386_atomic_testset_uq (volatile u_int64_t *ptr, u_int64_t val) { + __asm__ volatile ("\n1:\tlock; cmpxchg8b (%1); jnz 1b" : "+A" (val) : + "r" (ptr), "b" ((u_int32_t)val), "c" ((u_int32_t)(val >> 32))); + return val; +} + static __inline u_int32_t i386_atomic_testset_ul (volatile u_int32_t *ptr, unsigned long val) { __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); diff --git a/sys/arch/i386/include/bus.h b/sys/arch/i386/include/bus.h index 0b26d524f49..9900c76d4f0 100644 --- a/sys/arch/i386/include/bus.h +++ b/sys/arch/i386/include/bus.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bus.h,v 1.38 2006/04/27 15:17:16 mickey Exp $ */ +/* $OpenBSD: bus.h,v 1.39 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: bus.h,v 1.6 1996/11/10 03:19:25 thorpej Exp $ */ /*- @@ -741,7 +741,7 @@ void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, #define BUS_DMA_COHERENT 0x004 /* hint: map memory DMA coherent */ #define BUS_DMA_BUS1 0x010 /* placeholders for bus functions... */ #define BUS_DMA_BUS2 0x020 -#define BUS_DMA_BUS3 0x040 +#define BUS_DMA_64BIT 0x040 /* large memory high segment is ok */ #define BUS_DMA_24BIT 0x080 /* isadma map */ #define BUS_DMA_STREAMING 0x100 /* hint: sequential, unidirectional */ #define BUS_DMA_READ 0x200 /* mapping is device -> memory only */ @@ -771,7 +771,10 @@ typedef struct i386_bus_dmamap *bus_dmamap_t; */ struct i386_bus_dma_segment { bus_addr_t ds_addr; /* DMA address */ + paddr_t ds_addr2; /* replacement store */ bus_size_t ds_len; /* length of transfer */ + vaddr_t ds_va; /* mapped loaded data */ + vaddr_t ds_va2; /* mapped replacement data */ }; typedef struct i386_bus_dma_segment bus_dma_segment_t; @@ -863,6 +866,11 @@ struct i386_bus_dmamap { void *_dm_cookie; /* cookie for bus-specific functions */ + struct vm_page **_dm_pages; /* replacement pages */ + vaddr_t _dm_pgva; /* those above -- mapped */ + int _dm_npages; /* number of pages allocated */ + int _dm_nused; /* number of pages replaced */ + /* * PUBLIC MEMBERS: these are used by machine-independent code. */ diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index bf7327f06a4..568a2ef2de5 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.74 2006/01/12 22:39:21 weingart Exp $ */ +/* $OpenBSD: cpu.h,v 1.75 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -418,6 +418,10 @@ int kvtop(caddr_t); void vm86_gpfault(struct proc *, int); #endif /* VM86 */ +#ifndef SMALL_KERNEL +int cpu_paenable(void *); +#endif /* !SMALL_KERNEL */ + #ifdef GENERIC /* swapgeneric.c */ void setconf(void); diff --git a/sys/arch/i386/include/loadfile_machdep.h b/sys/arch/i386/include/loadfile_machdep.h index a121e81d4ef..5903231fc58 100644 --- a/sys/arch/i386/include/loadfile_machdep.h +++ b/sys/arch/i386/include/loadfile_machdep.h @@ -1,4 +1,4 @@ -/* $OpenBSD: loadfile_machdep.h,v 1.1 2003/04/17 03:42:14 drahn Exp $ */ +/* $OpenBSD: loadfile_machdep.h,v 1.2 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: loadfile_machdep.h,v 1.1 1999/04/29 03:17:12 tsubai Exp $ */ /*- @@ -43,7 +43,7 @@ #define LOAD_KERNEL (LOAD_ALL & ~LOAD_TEXTA) #define COUNT_KERNEL (COUNT_ALL & ~COUNT_TEXTA) -#define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) +#define LOADADDR(a) (((u_long)(a) + (u_long)offset)&0xfffffff) #define ALIGNENTRY(a) ((u_long)(a)) #define READ(f, b, c) read((f), (void *)LOADADDR(b), (c)) #define BCOPY(s, d, c) memcpy((void *)LOADADDR(d), (void *)(s), (c)) diff --git a/sys/arch/i386/include/param.h b/sys/arch/i386/include/param.h index 439a9859587..433644f782e 100644 --- a/sys/arch/i386/include/param.h +++ b/sys/arch/i386/include/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.35 2006/03/19 01:47:23 martin Exp $ */ +/* $OpenBSD: param.h,v 1.36 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: param.h,v 1.29 1996/03/04 05:04:26 cgd Exp $ */ /*- @@ -75,8 +75,6 @@ #define PAGE_SIZE (1 << PAGE_SHIFT) #define PAGE_MASK (PAGE_SIZE - 1) -#define NPTEPG (NBPG/(sizeof (pt_entry_t))) - /* * Start of kernel virtual space. Remember to alter the memory and * page table layout description in pmap.h when changing this. @@ -131,9 +129,3 @@ /* bytes to disk blocks */ #define dbtob(x) ((x) << DEV_BSHIFT) #define btodb(x) ((x) >> DEV_BSHIFT) - -/* - * Mach derived conversion macros - */ -#define i386_round_pdr(x) ((((unsigned)(x)) + PDOFSET) & ~PDOFSET) -#define i386_trunc_pdr(x) ((unsigned)(x) & ~PDOFSET) diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 4a350f4201f..b1e1c2ec4b8 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.41 2006/01/12 22:39:21 weingart Exp $ */ +/* $OpenBSD: pmap.h,v 1.42 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -47,123 +47,11 @@ #include <uvm/uvm_object.h> /* - * See pte.h for a description of i386 MMU terminology and hardware - * interface. - * - * A pmap describes a process' 4GB virtual address space. This - * virtual address space can be broken up into 1024 4MB regions which - * are described by PDEs in the PDP. The PDEs are defined as follows: - * - * Ranges are inclusive -> exclusive, just like vm_map_entry start/end. - * The following assumes that KERNBASE is 0xd0000000. - * - * PDE#s VA range Usage - * 0->831 0x0 -> 0xcfc00000 user address space, note that the - * max user address is 0xcfbfe000 - * the final two pages in the last 4MB - * used to be reserved for the UAREA - * but now are no longer used. - * 831 0xcfc00000-> recursive mapping of PDP (used for - * 0xd0000000 linear mapping of PTPs). - * 832->1023 0xd0000000-> kernel address space (constant - * 0xffc00000 across all pmaps/processes). - * 1023 0xffc00000-> "alternate" recursive PDP mapping - * <end> (for other pmaps). - * - * - * Note: A recursive PDP mapping provides a way to map all the PTEs for - * a 4GB address space into a linear chunk of virtual memory. In other - * words, the PTE for page 0 is the first int mapped into the 4MB recursive - * area. The PTE for page 1 is the second int. The very last int in the - * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB - * address). - * - * All pmaps' PDs must have the same values in slots 832->1023 so that - * the kernel is always mapped in every process. These values are loaded - * into the PD at pmap creation time. - * - * At any one time only one pmap can be active on a processor. This is - * the pmap whose PDP is pointed to by processor register %cr3. This pmap - * will have all its PTEs mapped into memory at the recursive mapping - * point (slot #831 as show above). When the pmap code wants to find the - * PTE for a virtual address, all it has to do is the following: - * - * Address of PTE = (831 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t) - * = 0xcfc00000 + (VA / 4096) * 4 - * - * What happens if the pmap layer is asked to perform an operation - * on a pmap that is not the one which is currently active? In that - * case we take the PA of the PDP of non-active pmap and put it in - * slot 1023 of the active pmap. This causes the non-active pmap's - * PTEs to get mapped in the final 4MB of the 4GB address space - * (e.g. starting at 0xffc00000). - * - * The following figure shows the effects of the recursive PDP mapping: - * - * PDP (%cr3) - * +----+ - * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 - * | | - * | | - * | 831| -> points back to PDP (%cr3) mapping VA 0xcfc00000 -> 0xd0000000 - * | 832| -> first kernel PTP (maps 0xd0000000 -> 0xe0400000) - * | | - * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) - * +----+ - * - * Note that the PDE#831 VA (0xcfc00000) is defined as "PTE_BASE". - * Note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE". - * - * Starting at VA 0xcfc00000 the current active PDP (%cr3) acts as a - * PTP: - * - * PTP#831 == PDP(%cr3) => maps VA 0xcfc00000 -> 0xd0000000 - * +----+ - * | 0| -> maps the contents of PTP#0 at VA 0xcfc00000->0xcfc01000 - * | | - * | | - * | 831| -> maps the contents of PTP#831 (the PDP) at VA 0xcff3f000 - * | 832| -> maps the contents of first kernel PTP - * | | - * |1023| - * +----+ - * - * Note that mapping of the PDP at PTP#831's VA (0xcff3f000) is - * defined as "PDP_BASE".... within that mapping there are two - * defines: - * "PDP_PDE" (0xcff3fcfc) is the VA of the PDE in the PDP - * which points back to itself. - * "APDP_PDE" (0xcff3fffc) is the VA of the PDE in the PDP which - * establishes the recursive mapping of the alternate pmap. - * To set the alternate PDP, one just has to put the correct - * PA info in *APDP_PDE. - * - * Note that in the APTE_BASE space, the APDP appears at VA - * "APDP_BASE" (0xfffff000). + * The following defines identify the slots used as described in pmap.c . */ - -/* - * The following defines identify the slots used as described above. - */ - -#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 831: for recursive PDP map */ -#define PDSLOT_KERN (KERNBASE/NBPD) /* 832: start of kernel space */ -#define PDSLOT_APTE ((unsigned)1023) /* 1023: alternative recursive slot */ - -/* - * The following defines give the virtual addresses of various MMU - * data structures: - * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings - * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD - * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP - */ - -#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) -#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) -#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) -#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) -#define PDP_PDE (PDP_BASE + PDSLOT_PTE) -#define APDP_PDE (PDP_BASE + PDSLOT_APTE) +#define PDSLOT_PTE ((KERNBASE/NBPD)-2) /* 830: for recursive PDP map */ +#define PDSLOT_KERN (KERNBASE/NBPD) /* 832: start of kernel space */ +#define PDSLOT_APTE ((unsigned)1022) /* 1022: alternative recursive slot */ /* * The following define determines how many PTPs should be set up for the @@ -171,55 +59,10 @@ * get the VM system running. Once the VM system is running, the * pmap module can add more PTPs to the kernel area on demand. */ - #ifndef NKPTP -#define NKPTP 4 /* 16MB to start */ +#define NKPTP 8 /* 16/32MB to start */ #endif #define NKPTP_MIN 4 /* smallest value we allow */ -#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1) - /* largest value (-1 for APTP space) */ - -/* - * various address macros - * - * vtopte: return a pointer to the PTE mapping a VA - * kvtopte: same as above (takes a KVA, but doesn't matter with this pmap) - * ptetov: given a pointer to a PTE, return the VA that it maps - * vtophys: translate a VA to the PA mapped to it - * - * plus alternative versions of the above - */ - -#define vtopte(VA) (PTE_BASE + atop(VA)) -#define kvtopte(VA) vtopte(VA) -#define ptetov(PT) (ptoa(PT - PTE_BASE)) -#define vtophys(VA) ((*vtopte(VA) & PG_FRAME) | \ - ((unsigned)(VA) & ~PG_FRAME)) -#define avtopte(VA) (APTE_BASE + atop(VA)) -#define ptetoav(PT) (ptoa(PT - APTE_BASE)) -#define avtophys(VA) ((*avtopte(VA) & PG_FRAME) | \ - ((unsigned)(VA) & ~PG_FRAME)) - -/* - * pdei/ptei: generate index into PDP/PTP from a VA - */ -#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) -#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) - -/* - * PTP macros: - * A PTP's index is the PD index of the PDE that points to it. - * A PTP's offset is the byte-offset in the PTE space that this PTP is at. - * A PTP's VA is the first VA mapped by that PTP. - * - * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) - * NBPD == number of bytes a PTP can map (4MB) - */ - -#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ -#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ -#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ -#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ /* * PG_AVAIL usage: we make use of the ignored bits of the PTE @@ -229,12 +72,6 @@ #define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ #define PG_X PG_AVAIL3 /* executable mapping */ -/* - * Number of PTE's per cache line. 4 byte pte, 32-byte cache line - * Used to avoid false sharing of cache lines. - */ -#define NPTECL 8 - #ifdef _KERNEL /* * pmap data structures: see pmap.c for details of locking. @@ -257,13 +94,15 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ */ struct pmap { + paddr_t pm_pdidx[4]; /* PDIEs for PAE mode */ + paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ + vaddr_t pm_pdir; /* VA of PD (lck by object lock) */ + int pm_pdirsize; /* PD size (4k vs 16k on pae */ struct uvm_object pm_obj; /* object (lck by object lock) */ #define pm_lock pm_obj.vmobjlock LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ - pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ - paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ struct vm_page *pm_ptphint; /* pointer to a PTP in our pmap */ - struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ + struct pmap_statistics pm_stats;/* pmap stats (lck by object lock) */ vaddr_t pm_hiexec; /* highest executable mapping */ int pm_flags; /* see below */ @@ -333,67 +172,185 @@ struct pv_page { /* * global kernel variables */ - -extern pd_entry_t PTD[]; - -/* PTDpaddr: is the physical address of the kernel's PDP */ -extern u_int32_t PTDpaddr; - +extern char PTD[]; extern struct pmap kernel_pmap_store; /* kernel pmap */ -extern int nkpde; /* current # of PDEs for kernel */ -extern int pmap_pg_g; /* do we support PG_G? */ +extern int nkptp_max; /* - * Macros + * Our dual-pmap design requires to play a pointer-and-seek. + * Although being nice folks we are handle single-pmap kernels special. */ +#define PMAP_EXCLUDE_DECLS /* tells uvm_pmap.h *not* to include decls */ +/* + * Dumb macros + */ #define pmap_kernel() (&kernel_pmap_store) #define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) #define pmap_update(pm) /* nada */ -#define pmap_clear_modify(pg) pmap_change_attrs(pg, 0, PG_M) -#define pmap_clear_reference(pg) pmap_change_attrs(pg, 0, PG_U) -#define pmap_copy(DP,SP,D,L,S) -#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) -#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) -#define pmap_phys_address(ppn) ptoa(ppn) -#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ - -#define pmap_proc_iflush(p,va,len) /* nothing */ -#define pmap_unuse_final(p) /* nothing */ +#define pmap_clear_modify(pg) pmap_change_attrs(pg, 0, PG_M) +#define pmap_clear_reference(pg) pmap_change_attrs(pg, 0, PG_U) +#define pmap_copy(DP,SP,D,L,S) /* nicht */ +#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) +#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) +#define pmap_phys_address(ppn) ptoa(ppn) +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ +#define pmap_proc_iflush(p,va,len) /* nothing */ +#define pmap_unuse_final(p) /* 4anaEB u nycToTa */ /* * Prototypes */ - void pmap_bootstrap(vaddr_t); -boolean_t pmap_change_attrs(struct vm_page *, int, int); +void pmap_bootstrap_pae(void); +void pmap_virtual_space(vaddr_t *, vaddr_t *); +void pmap_init(void); +struct pmap * pmap_create(void); +void pmap_destroy(struct pmap *); +void pmap_reference(struct pmap *); +void pmap_fork(struct pmap *, struct pmap *); +void pmap_collect(struct pmap *); +void pmap_activate(struct proc *); +void pmap_deactivate(struct proc *); +void pmap_kenter_pa(vaddr_t, paddr_t, vm_prot_t); +void pmap_kremove(vaddr_t, vsize_t); +void pmap_zero_page(struct vm_page *); +void pmap_copy_page(struct vm_page *, struct vm_page *); + +struct pv_entry*pmap_alloc_pv(struct pmap *, int); +void pmap_enter_pv(struct pv_head *, struct pv_entry *, + struct pmap *, vaddr_t, struct vm_page *); +void pmap_free_pv(struct pmap *, struct pv_entry *); +void pmap_free_pvs(struct pmap *, struct pv_entry *); +void pmap_free_pv_doit(struct pv_entry *); +void pmap_free_pvpage(void); static void pmap_page_protect(struct vm_page *, vm_prot_t); -void pmap_page_remove(struct vm_page *); -static void pmap_protect(struct pmap *, vaddr_t, - vaddr_t, vm_prot_t); -void pmap_remove(struct pmap *, vaddr_t, vaddr_t); -boolean_t pmap_test_attrs(struct vm_page *, int); +static void pmap_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); static void pmap_update_pg(vaddr_t); -static void pmap_update_2pg(vaddr_t,vaddr_t); -void pmap_write_protect(struct pmap *, vaddr_t, - vaddr_t, vm_prot_t); +static void pmap_update_2pg(vaddr_t, vaddr_t); int pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *); +void pmap_exec_account(struct pmap *, vaddr_t, u_int32_t, + u_int32_t); vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ +paddr_t vtophys(vaddr_t va); -void pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, int32_t *); +void pmap_tlb_shootdown(pmap_t, vaddr_t, u_int32_t, int32_t *); void pmap_tlb_shootnow(int32_t); void pmap_do_tlb_shootdown(struct cpu_info *); +boolean_t pmap_is_curpmap(struct pmap *); +boolean_t pmap_is_active(struct pmap *, int); +void pmap_apte_flush(struct pmap *); +struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *, vaddr_t); + +#ifdef SMALL_KERNEL +#define pmap_pte_set_86 pmap_pte_set +#define pmap_pte_setbits_86 pmap_pte_setbits +#define pmap_pte_bits_86 pmap_pte_bits +#define pmap_pte_paddr_86 pmap_pte_paddr +#define pmap_change_attrs_86 pmap_change_attrs +#define pmap_enter_86 pmap_enter +#define pmap_extract_86 pmap_extract +#define pmap_growkernel_86 pmap_growkernel +#define pmap_page_remove_86 pmap_page_remove +#define pmap_remove_86 pmap_remove +#define pmap_test_attrs_86 pmap_test_attrs +#define pmap_unwire_86 pmap_unwire +#define pmap_write_protect_86 pmap_write_protect +#define pmap_pinit_pd_86 pmap_pinit_pd +#define pmap_zero_phys_86 pmap_zero_phys +#define pmap_zero_page_uncached_86 pmap_zero_page_uncached +#define pmap_copy_page_86 pmap_copy_page +#define pmap_try_steal_pv_86 pmap_try_steal_pv +#else +extern u_int32_t (*pmap_pte_set_p)(vaddr_t, paddr_t, u_int32_t); +extern u_int32_t (*pmap_pte_setbits_p)(vaddr_t, u_int32_t, u_int32_t); +extern u_int32_t (*pmap_pte_bits_p)(vaddr_t); +extern paddr_t (*pmap_pte_paddr_p)(vaddr_t); +extern boolean_t (*pmap_change_attrs_p)(struct vm_page *, int, int); +extern int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +extern boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *); +extern vaddr_t (*pmap_growkernel_p)(vaddr_t); +extern void (*pmap_page_remove_p)(struct vm_page *); +extern void (*pmap_remove_p)(struct pmap *, vaddr_t, vaddr_t); +extern boolean_t (*pmap_test_attrs_p)(struct vm_page *, int); +extern void (*pmap_unwire_p)(struct pmap *, vaddr_t); +extern void (*pmap_write_protect_p)(struct pmap*, vaddr_t, vaddr_t, vm_prot_t); +extern void (*pmap_pinit_pd_p)(pmap_t); +extern void (*pmap_zero_phys_p)(paddr_t); +extern boolean_t (*pmap_zero_page_uncached_p)(paddr_t); +extern void (*pmap_copy_page_p)(struct vm_page *, struct vm_page *); +extern boolean_t (*pmap_try_steal_pv_p)(struct pv_head *pvh, + struct pv_entry *cpv, struct pv_entry *prevpv); + +u_int32_t pmap_pte_set_pae(vaddr_t, paddr_t, u_int32_t); +u_int32_t pmap_pte_setbits_pae(vaddr_t, u_int32_t, u_int32_t); +u_int32_t pmap_pte_bits_pae(vaddr_t); +paddr_t pmap_pte_paddr_pae(vaddr_t); +boolean_t pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv); +boolean_t pmap_change_attrs_pae(struct vm_page *, int, int); +int pmap_enter_pae(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +boolean_t pmap_extract_pae(pmap_t, vaddr_t, paddr_t *); +vaddr_t pmap_growkernel_pae(vaddr_t); +void pmap_page_remove_pae(struct vm_page *); +void pmap_remove_pae(struct pmap *, vaddr_t, vaddr_t); +boolean_t pmap_test_attrs_pae(struct vm_page *, int); +void pmap_unwire_pae(struct pmap *, vaddr_t); +void pmap_write_protect_pae(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); +void pmap_pinit_pd_pae(pmap_t); +void pmap_zero_phys_pae(paddr_t); +boolean_t pmap_zero_page_uncached_pae(paddr_t); +void pmap_copy_page_pae(struct vm_page *, struct vm_page *); + +#define pmap_pte_set (*pmap_pte_set_p) +#define pmap_pte_setbits (*pmap_pte_setbits_p) +#define pmap_pte_bits (*pmap_pte_bits_p) +#define pmap_pte_paddr (*pmap_pte_paddr_p) +#define pmap_change_attrs (*pmap_change_attrs_p) +#define pmap_enter (*pmap_enter_p) +#define pmap_extract (*pmap_extract_p) +#define pmap_growkernel (*pmap_growkernel_p) +#define pmap_page_remove (*pmap_page_remove_p) +#define pmap_remove (*pmap_remove_p) +#define pmap_test_attrs (*pmap_test_attrs_p) +#define pmap_unwire (*pmap_unwire_p) +#define pmap_write_protect (*pmap_write_protect_p) +#define pmap_pinit_pd (*pmap_pinit_pd_p) +#define pmap_zero_phys (*pmap_zero_phys_p) +#define pmap_zero_page_uncached (*pmap_zero_page_uncached_p) +#define pmap_copy_page (*pmap_copy_page_p) +#define pmap_try_steal_pv (*pmap_try_steal_pv_p) +#endif + +u_int32_t pmap_pte_set_86(vaddr_t, paddr_t, u_int32_t); +u_int32_t pmap_pte_setbits_86(vaddr_t, u_int32_t, u_int32_t); +u_int32_t pmap_pte_bits_86(vaddr_t); +paddr_t pmap_pte_paddr_86(vaddr_t); +boolean_t pmap_try_steal_pv_86(struct pv_head *pvh, struct pv_entry *cpv, + struct pv_entry *prevpv); +boolean_t pmap_change_attrs_86(struct vm_page *, int, int); +int pmap_enter_86(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +boolean_t pmap_extract_86(pmap_t, vaddr_t, paddr_t *); +vaddr_t pmap_growkernel_86(vaddr_t); +void pmap_page_remove_86(struct vm_page *); +void pmap_remove_86(struct pmap *, vaddr_t, vaddr_t); +boolean_t pmap_test_attrs_86(struct vm_page *, int); +void pmap_unwire_86(struct pmap *, vaddr_t); +void pmap_write_protect_86(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); +void pmap_pinit_pd_86(pmap_t); +void pmap_zero_phys_86(paddr_t); +boolean_t pmap_zero_page_uncached_86(paddr_t); +void pmap_copy_page_86(struct vm_page *, struct vm_page *); #define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ /* * Do idle page zero'ing uncached to avoid polluting the cache. */ -boolean_t pmap_zero_page_uncached(paddr_t); #define PMAP_PAGEIDLEZERO(pg) pmap_zero_page_uncached(VM_PAGE_TO_PHYS(pg)) /* diff --git a/sys/arch/i386/include/pte.h b/sys/arch/i386/include/pte.h index e27c072c19d..73a3bc3e7b0 100644 --- a/sys/arch/i386/include/pte.h +++ b/sys/arch/i386/include/pte.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pte.h,v 1.7 2004/02/06 00:23:21 deraadt Exp $ */ +/* $OpenBSD: pte.h,v 1.8 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: pte.h,v 1.11 1998/02/06 21:58:05 thorpej Exp $ */ /* @@ -45,114 +45,11 @@ #define _I386_PTE_H_ /* - * i386 MMU hardware structure: - * - * the i386 MMU is a two-level MMU which maps 4GB of virtual memory. - * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium - * processors can support a 4MB pagesize as well. - * - * the first level table (segment table?) is called a "page directory" - * and it contains 1024 page directory entries (PDEs). each PDE is - * 4 bytes (an int), so a PD fits in a single 4K page. this page is - * the page directory page (PDP). each PDE in a PDP maps 4MB of space - * (1024 * 4MB = 4GB). a PDE contains the physical address of the - * second level table: the page table. or, if 4MB pages are being used, - * then the PDE contains the PA of the 4MB page being mapped. - * - * a page table consists of 1024 page table entries (PTEs). each PTE is - * 4 bytes (an int), so a page table also fits in a single 4K page. a - * 4K page being used as a page table is called a page table page (PTP). - * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB). a PTE contains - * the physical address of the page it maps and some flag bits (described - * below). - * - * the processor has a special register, "cr3", which points to the - * the PDP which is currently controlling the mappings of the virtual - * address space. - * - * the following picture shows the translation process for a 4K page: - * - * %cr3 register [PA of PDP] - * | - * | - * | bits <31-22> of VA bits <21-12> of VA bits <11-0> - * | index the PDP (0 - 1023) index the PTP are the page offset - * | | | | - * | v | | - * +--->+----------+ | | - * | PD Page | PA of v | - * | |---PTP-------->+------------+ | - * | 1024 PDE | | page table |--PTE--+ | - * | entries | | (aka PTP) | | | - * +----------+ | 1024 PTE | | | - * | entries | | | - * +------------+ | | - * | | - * bits <31-12> bits <11-0> - * p h y s i c a l a d d r - * - * the i386 caches PTEs in a TLB. it is important to flush out old - * TLB mappings when making a change to a mappings. writing to the - * %cr3 will flush the entire TLB. newer processors also have an - * instruction that will invalidate the mapping of a single page (which - * is useful if you are changing a single mappings because it preserves - * all the cached TLB entries). - * - * as shows, bits 31-12 of the PTE contain PA of the page being mapped. - * the rest of the PTE is defined as follows: - * bit# name use - * 11 n/a available for OS use, hardware ignores it - * 10 n/a available for OS use, hardware ignores it - * 9 n/a available for OS use, hardware ignores it - * 8 G global bit (see discussion below) - * 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>) - * 6 D dirty (modified) page - * 5 A accessed (referenced) page - * 4 PCD cache disable - * 3 PWT prevent write through (cache) - * 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s) - * 1 R/W read/write bit (0=read only, 1=read-write) - * 0 P present (valid) - * - * notes: - * - on the i386 the R/W bit is ignored if processor is in supervisor - * state (bug!) - * - PS is only supported on newer processors - * - PTEs with the G bit are global in the sense that they are not - * flushed from the TLB when %cr3 is written (to flush, use the - * "flush single page" instruction). this is only supported on - * newer processors. this bit can be used to keep the kernel's - * TLB entries around while context switching. since the kernel - * is mapped into all processes at the same place it does not make - * sense to flush these entries when switching from one process' - * pmap to another. - */ - -#if !defined(_LOCORE) - -/* - * here we define the data types for PDEs and PTEs - */ - -typedef u_int32_t pd_entry_t; /* PDE */ -typedef u_int32_t pt_entry_t; /* PTE */ - -#endif - -/* * now we define various for playing with virtual addresses */ #define PDSHIFT 22 /* offset of PD index in VA */ #define NBPD (1 << PDSHIFT) /* # bytes mapped by PD (4MB) */ -#define PDOFSET (NBPD-1) /* mask for non-PD part of VA */ -#if 0 /* not used? */ -#define NPTEPD (NBPD / NBPG) /* # of PTEs in a PD */ -#else -#define PTES_PER_PTP (NBPD / NBPG) /* # of PTEs in a PTP */ -#endif -#define PD_MASK 0xffc00000 /* page directory address bits */ -#define PT_MASK 0x003ff000 /* page table address bits */ /* * here we define the bits of the PDE/PTE, as described above: @@ -173,8 +70,6 @@ typedef u_int32_t pt_entry_t; /* PTE */ #define PG_AVAIL1 0x00000200 /* ignored by hardware */ #define PG_AVAIL2 0x00000400 /* ignored by hardware */ #define PG_AVAIL3 0x00000800 /* ignored by hardware */ -#define PG_FRAME 0xfffff000 /* page frame mask */ -#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ /* * various short-hand protection codes diff --git a/sys/arch/i386/include/tss.h b/sys/arch/i386/include/tss.h index 20f6f38f7d5..7590b8ce3a1 100644 --- a/sys/arch/i386/include/tss.h +++ b/sys/arch/i386/include/tss.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tss.h,v 1.6 2003/06/02 23:27:47 millert Exp $ */ +/* $OpenBSD: tss.h,v 1.7 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: tss.h,v 1.6 1995/10/11 04:20:28 mycroft Exp $ */ /*- @@ -50,7 +50,7 @@ struct i386tss { int __tss_ss1; int __tss_esp2; int __tss_ss2; - int tss_cr3; /* page directory paddr */ + int tss_cr3; /* page directory [pointer] paddr */ int __tss_eip; int __tss_eflags; int __tss_eax; diff --git a/sys/arch/i386/include/vmparam.h b/sys/arch/i386/include/vmparam.h index 43edd842463..6174c378725 100644 --- a/sys/arch/i386/include/vmparam.h +++ b/sys/arch/i386/include/vmparam.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmparam.h,v 1.34 2006/03/15 17:56:06 mickey Exp $ */ +/* $OpenBSD: vmparam.h,v 1.35 2006/04/27 15:37:53 mickey Exp $ */ /* $NetBSD: vmparam.h,v 1.15 1994/10/27 04:16:34 cgd Exp $ */ /*- @@ -91,22 +91,23 @@ /* user/kernel map constants */ #define VM_MIN_ADDRESS ((vaddr_t)0) -#define VM_MAXUSER_ADDRESS ((vaddr_t)((PDSLOT_PTE<<PDSHIFT) - USPACE)) -#define VM_MAX_ADDRESS ((vaddr_t)((PDSLOT_PTE<<PDSHIFT) + \ - (PDSLOT_PTE<<PGSHIFT))) +#define VM_MAXUSER_ADDRESS ((vaddr_t)0xcf800000) +#define VM_MAX_ADDRESS (vm_max_address) +extern vaddr_t vm_max_address; #define VM_MIN_KERNEL_ADDRESS ((vaddr_t)KERNBASE) -#define VM_MAX_KERNEL_ADDRESS ((vaddr_t)(PDSLOT_APTE<<PDSHIFT)) +#define VM_MAX_KERNEL_ADDRESS ((vaddr_t)0xff800000) /* virtual sizes (bytes) for various kernel submaps */ #define VM_PHYS_SIZE (USRIOSIZE*PAGE_SIZE) -#define VM_PHYSSEG_MAX 5 /* actually we could have this many segments */ +#define VM_PHYSSEG_MAX 8 /* actually we could have this many segments */ #define VM_PHYSSEG_STRAT VM_PSTRAT_BSEARCH #define VM_PHYSSEG_NOADD /* can't add RAM after vm_mem_init */ -#define VM_NFREELIST 2 +#define VM_NFREELIST 3 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_FIRST16 1 +#define VM_FREELIST_ABOVE4G 2 /* * pmap specific data stored in the vm_physmem[] array diff --git a/sys/arch/i386/pci/pci_addr_fixup.c b/sys/arch/i386/pci/pci_addr_fixup.c index 63c88142917..d6721f6aeb7 100644 --- a/sys/arch/i386/pci/pci_addr_fixup.c +++ b/sys/arch/i386/pci/pci_addr_fixup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pci_addr_fixup.c,v 1.16 2005/11/22 09:09:58 mickey Exp $ */ +/* $OpenBSD: pci_addr_fixup.c,v 1.17 2006/04/27 15:37:55 mickey Exp $ */ /* $NetBSD: pci_addr_fixup.c,v 1.7 2000/08/03 20:10:45 nathanw Exp $ */ /*- @@ -45,19 +45,17 @@ #include <i386/pci/pcibiosvar.h> typedef int (*pciaddr_resource_manage_func_t)(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t, int, - struct extent *, int, bus_addr_t *, bus_size_t); + struct extent *, int, u_long *, bus_size_t); void pciaddr_resource_manage(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t, pciaddr_resource_manage_func_t); void pciaddr_resource_reserve(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t); -int pciaddr_do_resource_reserve(struct pcibios_softc *, - pci_chipset_tag_t, pcitag_t, int, struct extent *, int, - bus_addr_t *, bus_size_t); +int pciaddr_do_resource_reserve(struct pcibios_softc *, pci_chipset_tag_t, + pcitag_t, int, struct extent *, int, u_long *, bus_size_t); void pciaddr_resource_allocate(struct pcibios_softc *, pci_chipset_tag_t, pcitag_t); -int pciaddr_do_resource_allocate(struct pcibios_softc *, - pci_chipset_tag_t, pcitag_t, int, struct extent *, int, bus_addr_t *, - bus_size_t); +int pciaddr_do_resource_allocate(struct pcibios_softc *, pci_chipset_tag_t, + pcitag_t, int, struct extent *, int, u_long *, bus_size_t); bus_addr_t pciaddr_ioaddr(u_int32_t); void pciaddr_print_devid(pci_chipset_tag_t, pcitag_t); @@ -180,7 +178,7 @@ pciaddr_resource_manage(sc, pc, tag, func) { struct extent *ex; pcireg_t val, mask; - bus_addr_t addr; + u_long addr; bus_size_t size; int error, mapreg, type, reg_start, reg_end, width; @@ -273,7 +271,7 @@ pciaddr_do_resource_allocate(sc, pc, tag, mapreg, ex, type, addr, size) pcitag_t tag; struct extent *ex; int mapreg, type; - bus_addr_t *addr; + u_long *addr; bus_size_t size; { bus_addr_t start; @@ -324,7 +322,7 @@ pciaddr_do_resource_reserve(sc, pc, tag, mapreg, ex, type, addr, size) pcitag_t tag; struct extent *ex; int type, mapreg; - bus_addr_t *addr; + u_long *addr; bus_size_t size; { int error; diff --git a/sys/arch/i386/pci/pci_machdep.c b/sys/arch/i386/pci/pci_machdep.c index a84bd091173..c4378af043f 100644 --- a/sys/arch/i386/pci/pci_machdep.c +++ b/sys/arch/i386/pci/pci_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pci_machdep.c,v 1.30 2005/11/23 09:24:57 mickey Exp $ */ +/* $OpenBSD: pci_machdep.c,v 1.31 2006/04/27 15:37:55 mickey Exp $ */ /* $NetBSD: pci_machdep.c,v 1.28 1997/06/06 23:29:17 thorpej Exp $ */ /*- @@ -113,9 +113,7 @@ extern bios_pciinfo_t *bios_pciinfo; #endif #include "pcibios.h" -#if NPCIBIOS > 0 #include <i386/pci/pcibiosvar.h> -#endif int pci_mode = -1; diff --git a/sys/arch/i386/pci/pcibios.c b/sys/arch/i386/pci/pcibios.c index a4784398e20..ea83f89717e 100644 --- a/sys/arch/i386/pci/pcibios.c +++ b/sys/arch/i386/pci/pcibios.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pcibios.c,v 1.32 2005/01/08 18:17:58 mickey Exp $ */ +/* $OpenBSD: pcibios.c,v 1.33 2006/04/27 15:37:55 mickey Exp $ */ /* $NetBSD: pcibios.c,v 1.5 2000/08/01 05:23:59 uch Exp $ */ /* @@ -270,7 +270,7 @@ pcibios_pir_init(sc) for (i = 0; i < pirh->tablesize; i++) cksum += p[i]; - printf("%s: PCI IRQ Routing Table rev %d.%d @ 0x%lx/%d " + printf("%s: PCI IRQ Routing Table rev %d.%d @ 0x%llx/%d " "(%d entries)\n", sc->sc_dev.dv_xname, pirh->version >> 8, pirh->version & 0xff, pa, pirh->tablesize, (pirh->tablesize - sizeof(*pirh)) / 16); |