diff options
-rw-r--r-- | sys/arch/i386/i386/cpu.c | 3 | ||||
-rw-r--r-- | sys/arch/i386/i386/gdt.c | 3 | ||||
-rw-r--r-- | sys/arch/i386/i386/locore.s | 8 | ||||
-rw-r--r-- | sys/arch/i386/i386/locore0.S | 44 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 8 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 227 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmapae.c | 274 | ||||
-rw-r--r-- | sys/arch/i386/include/cpu_full.h | 15 | ||||
-rw-r--r-- | sys/arch/i386/include/pmap.h | 5 | ||||
-rw-r--r-- | sys/arch/i386/include/specialreg.h | 8 |
10 files changed, 553 insertions, 42 deletions
diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index cf3f9e12205..4cb39e6308e 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.91 2018/04/28 15:44:59 jasper Exp $ */ +/* $OpenBSD: cpu.c,v 1.92 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ /*- @@ -250,6 +250,7 @@ cpu_attach(struct device *parent, struct device *self, void *aux) ci = &cif->cif_cpu; #ifdef MULTIPROCESSOR ci->ci_tss = &cif->cif_tss; + ci->ci_gdt = (void *)&cif->cif_gdt; cpu_enter_pages(cif); if (cpu_info[cpunum] != NULL) panic("cpu at apic id %d already attached?", cpunum); diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c index ba8eb01907f..095019655a2 100644 --- a/sys/arch/i386/i386/gdt.c +++ b/sys/arch/i386/i386/gdt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gdt.c,v 1.41 2018/04/11 15:44:08 bluhm Exp $ */ +/* $OpenBSD: gdt.c,v 1.42 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: gdt.c,v 1.28 2002/12/14 09:38:50 junyoung Exp $ */ /*- @@ -97,7 +97,6 @@ gdt_init(void) void gdt_alloc_cpu(struct cpu_info *ci) { - ci->ci_gdt = (void *)(ci->ci_tss + 1); bcopy(cpu_info_primary.ci_gdt, ci->ci_gdt, GDT_SIZE); setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0); diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index 8a8b22f7a38..58096e2e00f 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.186 2018/05/11 15:27:43 bluhm Exp $ */ +/* $OpenBSD: locore.s,v 1.187 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -265,6 +265,8 @@ INTRENTRY_LABEL(label): /* from kernel */ ; \ .globl _C_LABEL(gdt) .globl _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv) .globl _C_LABEL(lapic_tpr) + .globl _C_LABEL(pg_g_kern) + .globl _C_LABEL(cpu_meltdown) #if NLAPIC > 0 .align NBPG @@ -318,6 +320,10 @@ _C_LABEL(bootdev): .long 0 # device we booted from _C_LABEL(proc0paddr): .long 0 _C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm _C_LABEL(PTDsize): .long NBPG # size of PTD, for libkvm +_C_LABEL(pg_g_kern): .long 0 # 0x100 if global pages should be used + # in kernel mappings, 0 otherwise (for + # insecure CPUs) +_C_LABEL(cpu_meltdown): .long 0 # 1 if this CPU has Meltdown .text diff --git a/sys/arch/i386/i386/locore0.S b/sys/arch/i386/i386/locore0.S index cdc1f522f04..b0c2fd7851d 100644 --- a/sys/arch/i386/i386/locore0.S +++ b/sys/arch/i386/i386/locore0.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore0.S,v 1.3 2017/12/10 21:44:07 deraadt Exp $ */ +/* $OpenBSD: locore0.S,v 1.4 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -234,6 +234,48 @@ start: movw $0x1234,0x472 # warm boot movl %ecx,RELOC(_C_LABEL(cpu_vendor))+8 movl $0, RELOC(_C_LABEL(cpu_vendor))+12 + /* + * Determine if CPU has meltdown. Certain Intel CPUs do not properly + * respect page permissions when speculatively loading data into + * the cache ("Meltdown" CVE). These CPUs must utilize a secondary + * sanitized page table lacking kernel mappings when executing user + * processes, and may not use PG_G global PTEs for kernel VAs. + */ + movl $0x1, RELOC(_C_LABEL(cpu_meltdown)) + movl $0x0, RELOC(_C_LABEL(pg_g_kern)) + + cmpl $0x756e6547,%ebx # "Genu" + jne .Lcpu_secure + cmpl $0x6c65746e,%ecx # "ntel" + jne .Lcpu_secure + cmpl $0x49656e69,%edx # "ineI" + jne .Lcpu_secure + + /* + * Intel CPU, now check if IA32_ARCH_CAPABILITIES is supported and + * if it says this CPU is safe. + */ + movl $0x0,%eax + cpuid + cmpl $0x7,%eax + jl .Lcpu_check_finished + + movl $0x7,%eax + cpuid + testl $SEFF0EDX_ARCH_CAP,%edx + jz .Lcpu_check_finished + + /* IA32_ARCH_CAPABILITIES MSR avaialble, use it to check CPU security */ + movl $MSR_ARCH_CAPABILITIES,%ecx + rdmsr + testl $ARCH_CAPABILITIES_RDCL_NO,%eax + jz .Lcpu_check_finished + +.Lcpu_secure: + movl $0x0, RELOC(_C_LABEL(cpu_meltdown)) + movl $PG_G, RELOC(_C_LABEL(pg_g_kern)) + +.Lcpu_check_finished: movl $1,%eax xorl %ecx,%ecx cpuid diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 6f7c5d4fca1..02ed064740a 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.616 2018/04/12 17:13:43 deraadt Exp $ */ +/* $OpenBSD: machdep.c,v 1.617 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -1698,6 +1698,7 @@ identifycpu(struct cpu_info *ci) char *brandstr_from, *brandstr_to; char *cpu_device = ci->ci_dev->dv_xname; int skipspace; + extern uint32_t cpu_meltdown; if (cpuid_level == -1) { #ifdef DIAGNOSTIC @@ -2020,6 +2021,9 @@ identifycpu(struct cpu_info *ci) printf(",%s", cpu_tpm_eaxfeatures[i].feature_name); } + if (cpu_meltdown) + printf(",MELTDOWN"); + printf("\n"); } @@ -3098,7 +3102,7 @@ init386(paddr_t first_avail) cpu_info_primary.ci_self = &cpu_info_primary; cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb; cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss; - cpu_info_primary.ci_gdt = (void *)(cpu_info_primary.ci_tss + 1); + cpu_info_primary.ci_gdt = (void *)&cpu_info_full_primary.cif_gdt; /* make bootstrap gdt gates and memory segments */ setsegment(&cpu_info_primary.ci_gdt[GCODE_SEL].sd, 0, 0xfffff, diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 340bc4fd789..ed2c99c8aad 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.201 2018/04/20 07:27:54 mlarkin Exp $ */ +/* $OpenBSD: pmap.c,v 1.202 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -75,6 +75,14 @@ #include "vmm.h" +/* #define PMAP_DEBUG */ + +#ifdef PMAP_DEBUG +#define DPRINTF(x...) do { printf(x); } while(0) +#else +#define DPRINTF(x...) +#endif /* PMAP_DEBUG */ + /* * this file contains the code for the "pmap module." the module's * job is to manage the hardware's virtual to physical address mappings. @@ -372,6 +380,13 @@ int nkptp_max = 1024 - (KERNBASE / NBPD) - 1; extern int cpu_pae; /* + * pg_g_kern: if CPU is affected by Meltdown pg_g_kern is 0, + * otherwise it is is set to PG_G. pmap_pg_g will be dervied + * from pg_g_kern, see pmap_bootstrap(). + */ +extern int pg_g_kern; + +/* * pmap_pg_g: if our processor supports PG_G in the PTE then we * set pmap_pg_g to PG_G (otherwise it is zero). */ @@ -445,6 +460,8 @@ pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *flsh_pte; caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp, pmap_flshp; caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ +extern uint32_t cpu_meltdown; + /* * local prototypes */ @@ -684,7 +701,7 @@ pmap_pte_paddr_86(vaddr_t va) */ vaddr_t -pmap_tmpmap_pa(paddr_t pa) +pmap_tmpmap_pa_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); @@ -692,9 +709,6 @@ pmap_tmpmap_pa(paddr_t pa) pt_entry_t *ptpte; caddr_t ptpva; - if (cpu_pae) - return pmap_tmpmap_pa_pae(pa); - ptpte = PTESLEW(ptp_pte, id); ptpva = VASLEW(pmap_ptpp, id); @@ -706,12 +720,22 @@ pmap_tmpmap_pa(paddr_t pa) return((vaddr_t)ptpva); } + +vaddr_t +pmap_tmpmap_pa(paddr_t pa) +{ + if (cpu_pae) + return pmap_tmpmap_pa_pae(pa); + + return pmap_tmpmap_pa_86(pa); +} + /* * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) */ void -pmap_tmpunmap_pa(void) +pmap_tmpunmap_pa_86(void) { #ifdef MULTIPROCESSOR int id = cpu_number(); @@ -719,11 +743,6 @@ pmap_tmpunmap_pa(void) pt_entry_t *ptpte; caddr_t ptpva; - if (cpu_pae) { - pmap_tmpunmap_pa_pae(); - return; - } - ptpte = PTESLEW(ptp_pte, id); ptpva = VASLEW(pmap_ptpp, id); @@ -741,6 +760,17 @@ pmap_tmpunmap_pa(void) #endif } +void +pmap_tmpunmap_pa(void) +{ + if (cpu_pae) { + pmap_tmpunmap_pa_pae(); + return; + } + + pmap_tmpunmap_pa_86(); +} + paddr_t vtophys(vaddr_t va) { @@ -946,18 +976,19 @@ pmap_bootstrap(vaddr_t kva_start) */ /* - * enable global TLB entries if they are supported + * enable global TLB entries if they are supported and the + * CPU is not affected by Meltdown. */ if (cpu_feature & CPUID_PGE) { lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */ - pmap_pg_g = PG_G; /* enable software */ + pmap_pg_g = pg_g_kern; /* if safe to use, enable software */ /* add PG_G attribute to already mapped kernel pages */ for (kva = VM_MIN_KERNEL_ADDRESS; kva < virtual_avail; kva += PAGE_SIZE) if (pmap_valid_entry(PTE_BASE[atop(kva)])) - PTE_BASE[atop(kva)] |= PG_G; + PTE_BASE[atop(kva)] |= pmap_pg_g; } /* @@ -1195,6 +1226,7 @@ struct vm_page * pmap_alloc_ptp_86(struct pmap *pmap, int pde_index, pt_entry_t pde_flags) { struct vm_page *ptp; + pd_entry_t *pva_intel; ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); @@ -1206,6 +1238,21 @@ pmap_alloc_ptp_86(struct pmap *pmap, int pde_index, pt_entry_t pde_flags) ptp->wire_count = 1; /* no mappings yet */ PDE(pmap, pde_index) = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_RW | PG_V | PG_M | PG_U | pde_flags); + + /* + * Meltdown special case - if we are adding a new PDE for + * usermode addresses, just copy the PDE to the U-K page + * table. + */ + if (pmap->pm_pdir_intel && ptp_i2v(pde_index) < VM_MAXUSER_ADDRESS) { + pva_intel = (pd_entry_t *)pmap->pm_pdir_intel; + pva_intel[pde_index] = PDE(pmap, pde_index); + DPRINTF("%s: copying usermode PDE (content=0x%x) pde_index %d " + "from 0x%x -> 0x%x\n", __func__, PDE(pmap, pde_index), + pde_index, (uint32_t)&PDE(pmap, pde_index), + (uint32_t)&(pva_intel[pde_index])); + } + pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; return(ptp); @@ -1247,6 +1294,8 @@ void pmap_drop_ptp_86(struct pmap *pm, vaddr_t va, struct vm_page *ptp, pt_entry_t *ptes) { + pd_entry_t *pva_intel; + i386_atomic_testset_ul(&PDE(pm, pdei(va)), 0); pmap_tlb_shootpage(curcpu()->ci_curpmap, ((vaddr_t)ptes) + ptp->offset); #ifdef MULTIPROCESSOR @@ -1263,6 +1312,16 @@ pmap_drop_ptp_86(struct pmap *pm, vaddr_t va, struct vm_page *ptp, ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); + + if (pm->pm_pdir_intel) { + KASSERT(va < VM_MAXUSER_ADDRESS); + /* Zap special meltdown PDE */ + pva_intel = (pd_entry_t *)pm->pm_pdir_intel; + i386_atomic_testset_ul(&pva_intel[pdei(va)], 0); + DPRINTF("%s: cleared meltdown PDE @ index %lu " + "(va range start 0x%x)\n", __func__, pdei(va), + (uint32_t)va); + } } /* @@ -1318,10 +1377,6 @@ pmap_pinit_pd_86(struct pmap *pmap) &pmap->pm_pdirpa); pmap->pm_pdirsize = NBPG; - /* XXX hshoexer */ - pmap->pm_pdir_intel = pmap->pm_pdir; - pmap->pm_pdirpa_intel = pmap->pm_pdirpa; - /* init PDP */ /* zero init area */ bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); @@ -1341,6 +1396,34 @@ pmap_pinit_pd_86(struct pmap *pmap) /* zero the rest */ bzero(&PDE(pmap, PDSLOT_KERN + nkpde), NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); + + /* + * Intel CPUs need a special page table to be used during usermode + * execution, one that lacks all kernel mappings. + */ + if (cpu_meltdown) { + pmap->pm_pdir_intel = uvm_km_zalloc(kernel_map, NBPG); + if (pmap->pm_pdir_intel == 0) + panic("%s: kernel_map out of virtual space!", __func__); + + if (!pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir_intel, + &pmap->pm_pdirpa_intel)) + panic("%s: unknown PA mapping for meltdown PD\n", + __func__); + + /* Copy PDEs from pmap_kernel's U-K view */ + bcopy((void *)pmap_kernel()->pm_pdir_intel, + (void *)pmap->pm_pdir_intel, NBPG); + + DPRINTF("%s: pmap %p pm_pdir 0x%lx pm_pdirpa 0x%lx " + "pdir_intel 0x%lx pdirpa_intel 0x%lx\n", + __func__, pmap, pmap->pm_pdir, pmap->pm_pdirpa, + pmap->pm_pdir_intel, pmap->pm_pdirpa_intel); + } else { + pmap->pm_pdir_intel = 0; + pmap->pm_pdirpa_intel = 0; + } + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); } @@ -1443,7 +1526,11 @@ pmap_switch(struct proc *o, struct proc *p) */ if (pmap->pm_pdirpa_intel) { self->ci_kern_cr3 = pmap->pm_pdirpa; +#if 0 /* XXX hshoexer: Do not unmap kernel, yet */ self->ci_user_cr3 = pmap->pm_pdirpa_intel; +#else + self->ci_user_cr3 = pmap->pm_pdirpa; +#endif } /* @@ -2421,10 +2508,112 @@ out: return error; } +/* + * Allocate an extra PD page and PT pages as needed to map kernel + * pages used for the U-K mappings. These special mappings are set + * up during bootstrap and get never removed and are part of + * pmap_kernel. + * + * New pmaps inherit the kernel portion of pmap_kernel including + * the special mappings (see pmap_pinit_pd_86()). + * + * To be able to release PT pages when migrating to PAE paging, use + * wire_count for number of PTEs in the PT page. + */ void pmap_enter_special_86(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags) { - /* XXX hshoexer nothing yet */ + struct pmap *pmap = pmap_kernel(); + struct vm_page *ptppg = NULL; + pd_entry_t *pd, *ptp; + pt_entry_t *ptes; + uint32_t l2idx, l1idx; + paddr_t npa; + + /* If CPU is secure, no need to do anything */ + if (!cpu_meltdown) + return; + + /* Must be kernel VA */ + if (va < VM_MIN_KERNEL_ADDRESS) + panic("%s: invalid special mapping va 0x%lx requested", + __func__, va); + + if (!pmap->pm_pdir_intel) { + if ((pmap->pm_pdir_intel = uvm_km_zalloc(kernel_map, NBPG)) + == 0) + panic("%s: kernel_map out of virtual space!", __func__); + if (!pmap_extract(pmap, pmap->pm_pdir_intel, + &pmap->pm_pdirpa_intel)) + panic("%s: can't locate PD page\n", __func__); + } + + DPRINTF("%s: pm_pdir_intel 0x%x pm_pdirpa_intel 0x%x\n", __func__, + (uint32_t)pmap->pm_pdir_intel, (uint32_t)pmap->pm_pdirpa_intel); + + l2idx = pdei(va); + l1idx = ptei(va); + + DPRINTF("%s: va 0x%08lx pa 0x%08lx prot 0x%08lx flags 0x%08x " + "l2idx %u l1idx %u\n", __func__, va, pa, (unsigned long)prot, + flags, l2idx, l1idx); + + if ((pd = (pd_entry_t *)pmap->pm_pdir_intel) == NULL) + panic("%s: PD not initialized for pmap @ %p\n", __func__, pmap); + + /* npa = physaddr of PT page */ + npa = pd[l2idx] & PMAP_PA_MASK; + + /* Valid PDE for the 4MB region containing va? */ + if (!npa) { + /* + * No valid PDE - allocate PT page and set PDE. We + * get it from pm_obj, which is used for PT pages. + * We calculate the offset from l2idx+1024, so we are + * beyond the regular PT pages. For their l2dix + * 0 <= l2idx < 1024 holds. + */ + ptppg = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(l2idx + 1024), + NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); + if (ptppg == NULL) + panic("%s: failed to allocate PT page", __func__); + + atomic_clearbits_int(&ptppg->pg_flags, PG_BUSY); + ptppg->wire_count = 1; /* no mappings yet */ + + npa = VM_PAGE_TO_PHYS(ptppg); + pd[l2idx] = (npa | PG_RW | PG_V | PG_M | PG_U); + + DPRINTF("%s: allocated new PT page at phys 0x%x, " + "setting PDE[%d] = 0x%x\n", __func__, (uint32_t)npa, + l2idx, pd[l2idx]); + } + + /* temporarily map PT page and set PTE for U-K mapping */ + if (ptppg == NULL && (ptppg = PHYS_TO_VM_PAGE(npa)) == NULL) + panic("%s: no vm_page for PT page", __func__); + mtx_enter(&ptppg->mdpage.pv_mtx); + ptp = (pd_entry_t *)pmap_tmpmap_pa(npa); + ptp[l1idx] = (pa | protection_codes[prot] | PG_V | PG_M | PG_U | flags); + ptppg->wire_count++; + DPRINTF("%s: setting PTE[%d] = 0x%x (wire_count %d)\n", __func__, + l1idx, ptp[l1idx], ptppg->wire_count); + pmap_tmpunmap_pa(); + mtx_leave(&ptppg->mdpage.pv_mtx); + + /* + * if supported, set the PG_G flag on the corresponding U+K + * entry. U+K mappings can use PG_G, as they are mapped + * along with user land anyway. + */ + if (!(cpu_feature & CPUID_PGE)) + return; + ptes = pmap_map_ptes_86(pmap); /* pmap_kernel -> PTE_BASE */ + if (pmap_valid_entry(ptes[atop(va)])) + ptes[atop(va)] |= PG_G; + else + DPRINTF("%s: no U+K mapping for special mapping?\n", __func__); + pmap_unmap_ptes_86(pmap); /* pmap_kernel -> nothing */ } /* diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c index 5f076400690..20575eda6f9 100644 --- a/sys/arch/i386/i386/pmapae.c +++ b/sys/arch/i386/i386/pmapae.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmapae.c,v 1.54 2018/04/20 07:27:54 mlarkin Exp $ */ +/* $OpenBSD: pmapae.c,v 1.55 2018/05/28 20:52:44 bluhm Exp $ */ /* * Copyright (c) 2006-2008 Michael Shalayeff @@ -100,6 +100,14 @@ #include "ksyms.h" +/* #define PMAPAE_DEBUG */ + +#ifdef PMAPAE_DEBUG +#define DPRINTF(x...) do { printf(x); } while(0) +#else +#define DPRINTF(x...) +#endif /* PMAPAE_DEBUG */ + /* * this file contains the code for the "pmap module." the module's * job is to manage the hardware's virtual to physical address mappings. @@ -347,6 +355,8 @@ #undef NBPD #define NBPD (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */ +#define PDSHIFT86 22 /* for pmap86 transfer */ + #undef PDSLOT_PTE #define PDSLOT_PTE (1660U) /* 1660: for recursive PDP map */ #undef PDSLOT_KERN @@ -375,6 +385,9 @@ #define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) #define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) +#define PD_MASK86 0xffc00000 /* for pmap86 transfer */ +#define PT_MASK86 0x003ff000 /* for pmap86 transfer */ + /* * Mach derived conversion macros */ @@ -457,6 +470,8 @@ extern int pmap_pg_g; extern int pmap_pg_wc; extern struct pmap_head pmaps; +extern uint32_t cpu_meltdown; + /* * local prototypes */ @@ -616,6 +631,10 @@ pmap_bootstrap_pae(void) cpu_pae = 1; + DPRINTF("%s: pm_pdir 0x%x pm_pdirpa 0x%x pm_pdirsize %d\n", __func__, + (uint32_t)kpm->pm_pdir, (uint32_t)kpm->pm_pdirpa, + kpm->pm_pdirsize); + va = (vaddr_t)kpm->pm_pdir; kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V; kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V; @@ -628,11 +647,13 @@ pmap_bootstrap_pae(void) PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW | PG_M | PG_U; /* transfer all kernel mappings over into pae tables */ - for (va = KERNBASE, eva = va + (nkpde << 22); + for (va = KERNBASE, eva = va + (nkpde << PDSHIFT86); va < eva; va += PAGE_SIZE) { if (!pmap_valid_entry(PDE(kpm, pdei(va)))) { ptp = uvm_pagealloc(&kpm->pm_obj, va, NULL, UVM_PGA_ZERO); + if (ptp == NULL) + panic("%s: uvm_pagealloc() failed", __func__); ptaddr = VM_PAGE_TO_PHYS(ptp); PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V | PG_U | PG_M; @@ -663,6 +684,64 @@ pmap_bootstrap_pae(void) pmap_pte_set_pae(va, pmap_pte_paddr_86(va), bits); } + /* Transfer special mappings */ + if (kpm->pm_pdir_intel) { + uint32_t *pd, *ptp; + uint32_t l1idx, l2idx; + paddr_t npa; + struct vm_page *ptppg; + + pd = (uint32_t *)kpm->pm_pdir_intel; + kpm->pm_pdir_intel = kpm->pm_pdirpa_intel = 0; + + for (va = KERNBASE, eva = va + (nkpde << PDSHIFT86); va < eva; + va += PAGE_SIZE) { + l1idx = ((va & PT_MASK86) >> PGSHIFT); + l2idx = ((va & PD_MASK86) >> PDSHIFT86); + + if (!pmap_valid_entry(pd[l2idx])) + continue; + + npa = pd[l2idx] & PMAP_PA_MASK; + ptppg = PHYS_TO_VM_PAGE(npa); + mtx_enter(&ptppg->mdpage.pv_mtx); + + /* still running on pmap86 */ + ptp = (uint32_t *)pmap_tmpmap_pa_86(npa); + + if (!pmap_valid_entry(ptp[l1idx])) { + mtx_leave(&ptppg->mdpage.pv_mtx); + pmap_tmpunmap_pa_86(); + continue; + } + DPRINTF("%s: va 0x%x l2idx %u 0x%x lx1idx %u 0x%x\n", + __func__, (uint32_t)va, l2idx, (uint32_t)pd[l2idx], + l1idx, (uint32_t)ptp[l1idx]); + + /* protection and cacheability */ + bits = ptp[l1idx] & (PG_PROT|PG_N|PG_WT); + npa = ptp[l1idx] & PMAP_PA_MASK; + + /* still running on pmap86 */ + pmap_tmpunmap_pa_86(); + mtx_leave(&ptppg->mdpage.pv_mtx); + + /* enforce use of pmap86 */ + cpu_pae = 0; + pmap_enter_special_pae(va, npa, 0, bits); + cpu_pae = 1; + + if (--ptppg->wire_count == 1) { + ptppg->wire_count = 0; + uvm_pagerealloc(ptppg, NULL, 0); + DPRINTF("%s: freeing PT page 0x%x\n", __func__, + (uint32_t)VM_PAGE_TO_PHYS(ptppg)); + } + } + uvm_km_free(kernel_map, (vaddr_t)pd, NBPG); + DPRINTF("%s: freeing PDP 0x%x\n", __func__, (uint32_t)pd); + } + if (!cpu_paenable(&kpm->pm_pdidx[0])) { extern struct user *proc0paddr; @@ -670,6 +749,10 @@ pmap_bootstrap_pae(void) (vaddr_t)kpm - KERNBASE; kpm->pm_pdirsize = 4 * NBPG; + DPRINTF("%s: pm_pdir 0x%x pm_pdirpa 0x%x pm_pdirsize %d\n", + __func__, (uint32_t)kpm->pm_pdir, (uint32_t)kpm->pm_pdirpa, + kpm->pm_pdirsize); + csrc_pte = vtopte(pmap_csrcp); cdst_pte = vtopte(pmap_cdstp); zero_pte = vtopte(pmap_zerop); @@ -748,6 +831,7 @@ struct vm_page * pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, pt_entry_t pde_flags) { struct vm_page *ptp; + pd_entry_t *pva_intel; ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); @@ -759,6 +843,22 @@ pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, pt_entry_t pde_flags) ptp->wire_count = 1; /* no mappings yet */ PDE(pmap, pde_index) = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_RW | PG_V | PG_M | PG_U | pde_flags); + + /* + * Meltdown special case - if we are adding a new PDE for + * usermode addresses, just copy the PDE to the U-K + * table. + */ + if (pmap->pm_pdir_intel && ptp_i2v(pde_index) < VM_MAXUSER_ADDRESS) { + pva_intel = (pd_entry_t *)pmap->pm_pdir_intel; + pva_intel[pde_index] = PDE(pmap, pde_index); + DPRINTF("%s: copying usermode PDE (content=0x%llx) pde_index " + "%d from 0x%llx -> 0x%llx\n", __func__, + PDE(pmap, pde_index), pde_index, + (uint64_t)&PDE(pmap, pde_index), + (uint64_t)&(pva_intel[pde_index])); + } + pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; return(ptp); @@ -800,6 +900,8 @@ void pmap_drop_ptp_pae(struct pmap *pm, vaddr_t va, struct vm_page *ptp, pt_entry_t *ptes) { + pd_entry_t *pva_intel; + i386_atomic_testset_uq(&PDE(pm, pdei(va)), 0); pmap_tlb_shootpage(curcpu()->ci_curpmap, ((vaddr_t)ptes) + ptp->offset); #ifdef MULTIPROCESSOR @@ -816,6 +918,16 @@ pmap_drop_ptp_pae(struct pmap *pm, vaddr_t va, struct vm_page *ptp, ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); + + if (pm->pm_pdir_intel) { + KASSERT(va < VM_MAXUSER_ADDRESS); + /* Zap special meltdown PDE */ + pva_intel = (pd_entry_t *)pm->pm_pdir_intel; + i386_atomic_testset_uq(&pva_intel[pdei(va)], 0); + DPRINTF("%s: cleared meltdown PDE @ index %lu " + "(va range start 0x%x)\n", __func__, pdei(va), + (uint32_t)va); + } } /* @@ -849,10 +961,6 @@ pmap_pinit_pd_pae(struct pmap *pmap) pmap->pm_pdidx[3] |= PG_V; pmap->pm_pdirsize = 4 * NBPG; - /* XXX hshoexer */ - pmap->pm_pdir_intel = pmap->pm_pdir; - pmap->pm_pdirpa_intel = pmap->pm_pdirpa; - /* init PDP */ /* zero init area */ bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); @@ -878,6 +986,44 @@ pmap_pinit_pd_pae(struct pmap *pmap) /* zero the rest */ bzero(&PDE(pmap, PDSLOT_KERN + nkpde), pmap->pm_pdirsize - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); + + /* + * Intel CPUs need a special page table to be used during usermode + * execution, one that lacks all kernel mappings. + */ + if (cpu_meltdown) { + int i; + + if ((va = uvm_km_zalloc(kernel_map, 4 * NBPG)) == 0) + panic("%s: kernel_map out of virtual space!", __func__); + if (!pmap_extract(pmap_kernel(), + (vaddr_t)&pmap->pm_pdidx_intel, &pmap->pm_pdirpa_intel)) + panic("%s: can't locate PDPT\n", __func__); + pmap->pm_pdir_intel = va; + + for (i = 0; i < 4; i++) { + pmap->pm_pdidx_intel[i] = 0; + if (!pmap_extract(pmap, va + i * NBPG, + (paddr_t *)&pmap->pm_pdidx_intel[i])) + panic("%s: can't locate PD page\n", __func__); + pmap->pm_pdidx_intel[i] |= PG_V; + DPRINTF("%s: pm_pdidx_intel[%d] = 0x%llx\n", __func__, + i, pmap->pm_pdidx_intel[i]); + } + + /* Copy PDEs from pmap_kernel's U-K view */ + bcopy((void *)pmap_kernel()->pm_pdir_intel, + (void *)pmap->pm_pdir_intel, 4 * NBPG); + + DPRINTF("%s: pmap %p pm_pdir 0x%lx pm_pdirpa 0x%lx " + "pdir_intel 0x%lx pdirpa_intel 0x%lx\n", + __func__, pmap, pmap->pm_pdir, pmap->pm_pdirpa, + pmap->pm_pdir_intel, pmap->pm_pdirpa_intel); + } else { + pmap->pm_pdir_intel = 0; + pmap->pm_pdirpa_intel = 0; + } + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); } @@ -1757,10 +1903,124 @@ out: return error; } +/* + * Allocate an extra PDPT and PT pages as needed to map kernel pages + * used for the U-K mappings. These special mappings are set up + * during bootstrap and get never removed and are part of pmap_kernel. + * + * New pmaps inherit the kernel portion of pmap_kernel including + * the special mappings (see pmap_pinit_pd_pae()). + */ void pmap_enter_special_pae(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags) { - /* XXX hshoexer nothing yet */ + struct pmap *pmap = pmap_kernel(); + struct vm_page *ptppg = NULL, *pdppg; + pd_entry_t *pd, *ptp; + pt_entry_t *ptes; + uint32_t l2idx, l1idx; + vaddr_t vapd; + paddr_t npa; + int i; + + /* If CPU is secure, no need to do anything */ + if (!cpu_meltdown) + return; + + /* Must be kernel VA */ + if (va < VM_MIN_KERNEL_ADDRESS) + panic("%s: invalid special mapping va 0x%lx requested", + __func__, va); + + if (!pmap->pm_pdir_intel) { + if ((vapd = uvm_km_zalloc(kernel_map, 4 * NBPG)) == 0) + panic("%s: kernel_map out of virtual space!", __func__); + pmap->pm_pdir_intel = vapd; + if (!pmap_extract(pmap, (vaddr_t)&pmap->pm_pdidx_intel, + &pmap->pm_pdirpa_intel)) + panic("%s: can't locate PDPT\n", __func__); + + for (i = 0; i < 4; i++) { + pmap->pm_pdidx_intel[i] = 0; + if (!pmap_extract(pmap, vapd + i*NBPG, + (paddr_t *)&pmap->pm_pdidx_intel[i])) + panic("%s: can't locate PD page\n", __func__); + + /* ensure PDPs are wired down XXX hshoexer why? */ + pdppg = PHYS_TO_VM_PAGE(pmap->pm_pdidx_intel[i]); + if (pdppg == NULL) + panic("%s: no vm_page for pdidx %d", __func__, i); + atomic_clearbits_int(&pdppg->pg_flags, PG_BUSY); + pdppg->wire_count = 1; /* no mappings yet */ + + pmap->pm_pdidx_intel[i] |= PG_V; + + DPRINTF("%s: pm_pdidx_intel[%d] = 0x%llx\n", __func__, + i, pmap->pm_pdidx_intel[i]); + } + } + + DPRINTF("%s: pm_pdir_intel 0x%x pm_pdirpa_intel 0x%x\n", __func__, + (uint32_t)pmap->pm_pdir_intel, (uint32_t)pmap->pm_pdirpa_intel); + + /* These are the PAE versions of pdei() and ptei() */ + l2idx = pdei(va); + l1idx = ptei(va); + + DPRINTF("%s: va 0x%08lx pa 0x%08lx prot 0x%08lx flags 0x%08x " + "l2idx %u l1idx %u\n", __func__, va, pa, (unsigned long)prot, + flags, l2idx, l1idx); + + if ((pd = (pd_entry_t *)pmap->pm_pdir_intel) == 0) + panic("%s: PD not initialized for pmap @ %p\n", __func__, pmap); + + /* npa = phsyaddr of PT page */ + npa = pd[l2idx] & PMAP_PA_MASK; + + /* Valide PDE for the 2MB region containing va? */ + if (!npa) { + /* + * No valid PDE - allocate PT page and set PDE. We + * get it from pm_obj, which is used for PT pages. + * We calculate the offset from l2idx+2048, so we are + * beyond the regular PT pages. For their l2dix + * 0 <= l2idx < 2048 holds. + */ + ptppg = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(l2idx + 2048), + NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); + if (ptppg == NULL) + panic("%s: failed to allocate PT page", __func__); + + atomic_clearbits_int(&ptppg->pg_flags, PG_BUSY); + ptppg->wire_count = 1; /* no mappings yet */ + + npa = VM_PAGE_TO_PHYS(ptppg); + pd[l2idx] = (npa | PG_RW | PG_V | PG_M | PG_U); + + DPRINTF("%s: allocated new PT page at phys 0x%x, " + "setting PDE[%d] = 0x%llx\n", __func__, (uint32_t)npa, + l2idx, pd[l2idx]); + } + + /* temporarily map PT page and set PTE for U-K mapping */ + if (ptppg == NULL && (ptppg = PHYS_TO_VM_PAGE(npa)) == NULL) + panic("%s: no vm_page for PT page", __func__); + mtx_enter(&ptppg->mdpage.pv_mtx); + ptp = (pd_entry_t *)pmap_tmpmap_pa(npa); + ptp[l1idx] = (pa | protection_codes[prot] | PG_V | PG_M | PG_U | flags); + DPRINTF("%s: setting PTE[%d] = 0x%llx\n", __func__, l1idx, ptp[l1idx]); + pmap_tmpunmap_pa(); + mtx_leave(&ptppg->mdpage.pv_mtx); + + /* if supported, set the PG_G flag on the corresponding U+K entry */ + if (!(cpu_feature & CPUID_PGE)) + return; + ptes = pmap_map_ptes_pae(pmap); /* pmap_kernel -> PTE_BASE */ + if (pmap_valid_entry(ptes[atop(va)])) + ptes[atop(va)] |= PG_G; + else + DPRINTF("%s: no U+K mapping for special mapping?\n", __func__); + pmap_unmap_ptes_pae(pmap); /* pmap_kernel -> nothing */ } /* diff --git a/sys/arch/i386/include/cpu_full.h b/sys/arch/i386/include/cpu_full.h index ef820a4fd2c..da2dee89e72 100644 --- a/sys/arch/i386/include/cpu_full.h +++ b/sys/arch/i386/include/cpu_full.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu_full.h,v 1.1 2018/04/11 15:44:08 bluhm Exp $ */ +/* $OpenBSD: cpu_full.h,v 1.2 2018/05/28 20:52:44 bluhm Exp $ */ /* * Copyright (c) 2018 Philip Guenther <guenther@openbsd.org> * Copyright (c) 2018 Hans-Joerg Hoexer <hshoexer@genua.de> @@ -26,10 +26,14 @@ struct cpu_info_full { /* page mapped kRO in u-k */ union { - struct i386tss u_tss; /* followed by gdt */ - char u_align[PAGE_SIZE]; + struct { + struct i386tss uu_tss; + union descriptor uu_gdt[NGDT]; + } u_tssgdt; + char u_align[PAGE_SIZE]; } cif_TSS_RO; -#define cif_tss cif_TSS_RO.u_tss +#define cif_tss cif_TSS_RO.u_tssgdt.uu_tss +#define cif_gdt cif_TSS_RO.u_tssgdt.uu_gdt /* start of page mapped kRW in u-k */ uint32_t cif_tramp_stack[(PAGE_SIZE @@ -42,9 +46,6 @@ struct cpu_info_full { struct cpu_info cif_cpu; } __aligned(PAGE_SIZE); -/* idt and align shim must fit exactly in a page */ -CTASSERT(_ALIGN(sizeof(struct gate_descriptor) * NIDT) <= PAGE_SIZE); - /* tss, align shim, and gdt must fit in a page */ CTASSERT(_ALIGN(sizeof(struct i386tss)) + sizeof(struct segment_descriptor) * NGDT < PAGE_SIZE); diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index d8992fd9763..91df8dfc14d 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.84 2018/04/11 15:44:08 bluhm Exp $ */ +/* $OpenBSD: pmap.h,v 1.85 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -95,6 +95,7 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ struct pmap { uint64_t pm_pdidx[4]; /* PDIEs for PAE mode */ + uint64_t pm_pdidx_intel[4]; /* PDIEs for PAE mode U-K */ struct mutex pm_mtx; struct mutex pm_apte_mtx; @@ -226,7 +227,9 @@ extern struct pool pmap_pv_pool; * Prototypes */ +vaddr_t pmap_tmpmap_pa_86(paddr_t); vaddr_t pmap_tmpmap_pa(paddr_t); +void pmap_tmpunmap_pa_86(void); void pmap_tmpunmap_pa(void); void pmap_bootstrap(vaddr_t); diff --git a/sys/arch/i386/include/specialreg.h b/sys/arch/i386/include/specialreg.h index a21292b7088..3c5de81b402 100644 --- a/sys/arch/i386/include/specialreg.h +++ b/sys/arch/i386/include/specialreg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specialreg.h,v 1.65 2018/02/10 09:46:58 jsg Exp $ */ +/* $OpenBSD: specialreg.h,v 1.66 2018/05/28 20:52:44 bluhm Exp $ */ /* $NetBSD: specialreg.h,v 1.7 1994/10/27 04:16:26 cgd Exp $ */ /*- @@ -168,6 +168,10 @@ #define CPUIDECX_F16C 0x20000000 /* 16bit fp conversion */ #define CPUIDECX_RDRAND 0x40000000 /* RDRAND instruction */ #define CPUIDECX_HV 0x80000000 /* Running on hypervisor */ +/* SEFF EDX bits */ +#define SEFF0EDX_IBRS 0x04000000 /* IBRS / IBPB Speculation Control */ +#define SEFF0EDX_STIBP 0x08000000 /* STIBP Speculation Control */ +#define SEFF0EDX_ARCH_CAP 0x20000000 /* Has IA32_ARCH_CAPABILITIES MSR */ /* * "Structured Extended Feature Flags Parameters" (CPUID function 0x7, leaf 0) @@ -329,6 +333,8 @@ #define MTRRcap_FIXED 0x100 /* bit 8 - fixed MTRRs supported */ #define MTRRcap_WC 0x400 /* bit 10 - WC type supported */ #define MTRRcap_SMRR 0x800 /* bit 11 - SMM range reg supported */ +#define MSR_ARCH_CAPABILITIES 0x10a +#define ARCH_CAPABILITIES_RDCL_NO (1 << 0) /* Meltdown safe */ #define MSR_BBL_CR_ADDR 0x116 /* PII+ only */ #define MSR_BBL_CR_DECC 0x118 /* PII+ only */ #define MSR_BBL_CR_CTL 0x119 /* PII+ only */ |