summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/arch/i386/i386/cpu.c3
-rw-r--r--sys/arch/i386/i386/gdt.c3
-rw-r--r--sys/arch/i386/i386/locore.s8
-rw-r--r--sys/arch/i386/i386/locore0.S44
-rw-r--r--sys/arch/i386/i386/machdep.c8
-rw-r--r--sys/arch/i386/i386/pmap.c227
-rw-r--r--sys/arch/i386/i386/pmapae.c274
-rw-r--r--sys/arch/i386/include/cpu_full.h15
-rw-r--r--sys/arch/i386/include/pmap.h5
-rw-r--r--sys/arch/i386/include/specialreg.h8
10 files changed, 553 insertions, 42 deletions
diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c
index cf3f9e12205..4cb39e6308e 100644
--- a/sys/arch/i386/i386/cpu.c
+++ b/sys/arch/i386/i386/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.91 2018/04/28 15:44:59 jasper Exp $ */
+/* $OpenBSD: cpu.c,v 1.92 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */
/*-
@@ -250,6 +250,7 @@ cpu_attach(struct device *parent, struct device *self, void *aux)
ci = &cif->cif_cpu;
#ifdef MULTIPROCESSOR
ci->ci_tss = &cif->cif_tss;
+ ci->ci_gdt = (void *)&cif->cif_gdt;
cpu_enter_pages(cif);
if (cpu_info[cpunum] != NULL)
panic("cpu at apic id %d already attached?", cpunum);
diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c
index ba8eb01907f..095019655a2 100644
--- a/sys/arch/i386/i386/gdt.c
+++ b/sys/arch/i386/i386/gdt.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: gdt.c,v 1.41 2018/04/11 15:44:08 bluhm Exp $ */
+/* $OpenBSD: gdt.c,v 1.42 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: gdt.c,v 1.28 2002/12/14 09:38:50 junyoung Exp $ */
/*-
@@ -97,7 +97,6 @@ gdt_init(void)
void
gdt_alloc_cpu(struct cpu_info *ci)
{
- ci->ci_gdt = (void *)(ci->ci_tss + 1);
bcopy(cpu_info_primary.ci_gdt, ci->ci_gdt, GDT_SIZE);
setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1,
SDT_MEMRWA, SEL_KPL, 0, 0);
diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s
index 8a8b22f7a38..58096e2e00f 100644
--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.186 2018/05/11 15:27:43 bluhm Exp $ */
+/* $OpenBSD: locore.s,v 1.187 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */
/*-
@@ -265,6 +265,8 @@ INTRENTRY_LABEL(label): /* from kernel */ ; \
.globl _C_LABEL(gdt)
.globl _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv)
.globl _C_LABEL(lapic_tpr)
+ .globl _C_LABEL(pg_g_kern)
+ .globl _C_LABEL(cpu_meltdown)
#if NLAPIC > 0
.align NBPG
@@ -318,6 +320,10 @@ _C_LABEL(bootdev): .long 0 # device we booted from
_C_LABEL(proc0paddr): .long 0
_C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm
_C_LABEL(PTDsize): .long NBPG # size of PTD, for libkvm
+_C_LABEL(pg_g_kern): .long 0 # 0x100 if global pages should be used
+ # in kernel mappings, 0 otherwise (for
+ # insecure CPUs)
+_C_LABEL(cpu_meltdown): .long 0 # 1 if this CPU has Meltdown
.text
diff --git a/sys/arch/i386/i386/locore0.S b/sys/arch/i386/i386/locore0.S
index cdc1f522f04..b0c2fd7851d 100644
--- a/sys/arch/i386/i386/locore0.S
+++ b/sys/arch/i386/i386/locore0.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore0.S,v 1.3 2017/12/10 21:44:07 deraadt Exp $ */
+/* $OpenBSD: locore0.S,v 1.4 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */
/*-
@@ -234,6 +234,48 @@ start: movw $0x1234,0x472 # warm boot
movl %ecx,RELOC(_C_LABEL(cpu_vendor))+8
movl $0, RELOC(_C_LABEL(cpu_vendor))+12
+ /*
+ * Determine if CPU has meltdown. Certain Intel CPUs do not properly
+ * respect page permissions when speculatively loading data into
+ * the cache ("Meltdown" CVE). These CPUs must utilize a secondary
+ * sanitized page table lacking kernel mappings when executing user
+ * processes, and may not use PG_G global PTEs for kernel VAs.
+ */
+ movl $0x1, RELOC(_C_LABEL(cpu_meltdown))
+ movl $0x0, RELOC(_C_LABEL(pg_g_kern))
+
+ cmpl $0x756e6547,%ebx # "Genu"
+ jne .Lcpu_secure
+ cmpl $0x6c65746e,%ecx # "ntel"
+ jne .Lcpu_secure
+ cmpl $0x49656e69,%edx # "ineI"
+ jne .Lcpu_secure
+
+ /*
+ * Intel CPU, now check if IA32_ARCH_CAPABILITIES is supported and
+ * if it says this CPU is safe.
+ */
+ movl $0x0,%eax
+ cpuid
+ cmpl $0x7,%eax
+ jl .Lcpu_check_finished
+
+ movl $0x7,%eax
+ cpuid
+ testl $SEFF0EDX_ARCH_CAP,%edx
+ jz .Lcpu_check_finished
+
+ /* IA32_ARCH_CAPABILITIES MSR avaialble, use it to check CPU security */
+ movl $MSR_ARCH_CAPABILITIES,%ecx
+ rdmsr
+ testl $ARCH_CAPABILITIES_RDCL_NO,%eax
+ jz .Lcpu_check_finished
+
+.Lcpu_secure:
+ movl $0x0, RELOC(_C_LABEL(cpu_meltdown))
+ movl $PG_G, RELOC(_C_LABEL(pg_g_kern))
+
+.Lcpu_check_finished:
movl $1,%eax
xorl %ecx,%ecx
cpuid
diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c
index 6f7c5d4fca1..02ed064740a 100644
--- a/sys/arch/i386/i386/machdep.c
+++ b/sys/arch/i386/i386/machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.616 2018/04/12 17:13:43 deraadt Exp $ */
+/* $OpenBSD: machdep.c,v 1.617 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */
/*-
@@ -1698,6 +1698,7 @@ identifycpu(struct cpu_info *ci)
char *brandstr_from, *brandstr_to;
char *cpu_device = ci->ci_dev->dv_xname;
int skipspace;
+ extern uint32_t cpu_meltdown;
if (cpuid_level == -1) {
#ifdef DIAGNOSTIC
@@ -2020,6 +2021,9 @@ identifycpu(struct cpu_info *ci)
printf(",%s", cpu_tpm_eaxfeatures[i].feature_name);
}
+ if (cpu_meltdown)
+ printf(",MELTDOWN");
+
printf("\n");
}
@@ -3098,7 +3102,7 @@ init386(paddr_t first_avail)
cpu_info_primary.ci_self = &cpu_info_primary;
cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb;
cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss;
- cpu_info_primary.ci_gdt = (void *)(cpu_info_primary.ci_tss + 1);
+ cpu_info_primary.ci_gdt = (void *)&cpu_info_full_primary.cif_gdt;
/* make bootstrap gdt gates and memory segments */
setsegment(&cpu_info_primary.ci_gdt[GCODE_SEL].sd, 0, 0xfffff,
diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c
index 340bc4fd789..ed2c99c8aad 100644
--- a/sys/arch/i386/i386/pmap.c
+++ b/sys/arch/i386/i386/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.201 2018/04/20 07:27:54 mlarkin Exp $ */
+/* $OpenBSD: pmap.c,v 1.202 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
/*
@@ -75,6 +75,14 @@
#include "vmm.h"
+/* #define PMAP_DEBUG */
+
+#ifdef PMAP_DEBUG
+#define DPRINTF(x...) do { printf(x); } while(0)
+#else
+#define DPRINTF(x...)
+#endif /* PMAP_DEBUG */
+
/*
* this file contains the code for the "pmap module." the module's
* job is to manage the hardware's virtual to physical address mappings.
@@ -372,6 +380,13 @@ int nkptp_max = 1024 - (KERNBASE / NBPD) - 1;
extern int cpu_pae;
/*
+ * pg_g_kern: if CPU is affected by Meltdown pg_g_kern is 0,
+ * otherwise it is is set to PG_G. pmap_pg_g will be dervied
+ * from pg_g_kern, see pmap_bootstrap().
+ */
+extern int pg_g_kern;
+
+/*
* pmap_pg_g: if our processor supports PG_G in the PTE then we
* set pmap_pg_g to PG_G (otherwise it is zero).
*/
@@ -445,6 +460,8 @@ pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *flsh_pte;
caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp, pmap_flshp;
caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
+extern uint32_t cpu_meltdown;
+
/*
* local prototypes
*/
@@ -684,7 +701,7 @@ pmap_pte_paddr_86(vaddr_t va)
*/
vaddr_t
-pmap_tmpmap_pa(paddr_t pa)
+pmap_tmpmap_pa_86(paddr_t pa)
{
#ifdef MULTIPROCESSOR
int id = cpu_number();
@@ -692,9 +709,6 @@ pmap_tmpmap_pa(paddr_t pa)
pt_entry_t *ptpte;
caddr_t ptpva;
- if (cpu_pae)
- return pmap_tmpmap_pa_pae(pa);
-
ptpte = PTESLEW(ptp_pte, id);
ptpva = VASLEW(pmap_ptpp, id);
@@ -706,12 +720,22 @@ pmap_tmpmap_pa(paddr_t pa)
return((vaddr_t)ptpva);
}
+
+vaddr_t
+pmap_tmpmap_pa(paddr_t pa)
+{
+ if (cpu_pae)
+ return pmap_tmpmap_pa_pae(pa);
+
+ return pmap_tmpmap_pa_86(pa);
+}
+
/*
* pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
*/
void
-pmap_tmpunmap_pa(void)
+pmap_tmpunmap_pa_86(void)
{
#ifdef MULTIPROCESSOR
int id = cpu_number();
@@ -719,11 +743,6 @@ pmap_tmpunmap_pa(void)
pt_entry_t *ptpte;
caddr_t ptpva;
- if (cpu_pae) {
- pmap_tmpunmap_pa_pae();
- return;
- }
-
ptpte = PTESLEW(ptp_pte, id);
ptpva = VASLEW(pmap_ptpp, id);
@@ -741,6 +760,17 @@ pmap_tmpunmap_pa(void)
#endif
}
+void
+pmap_tmpunmap_pa(void)
+{
+ if (cpu_pae) {
+ pmap_tmpunmap_pa_pae();
+ return;
+ }
+
+ pmap_tmpunmap_pa_86();
+}
+
paddr_t
vtophys(vaddr_t va)
{
@@ -946,18 +976,19 @@ pmap_bootstrap(vaddr_t kva_start)
*/
/*
- * enable global TLB entries if they are supported
+ * enable global TLB entries if they are supported and the
+ * CPU is not affected by Meltdown.
*/
if (cpu_feature & CPUID_PGE) {
lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */
- pmap_pg_g = PG_G; /* enable software */
+ pmap_pg_g = pg_g_kern; /* if safe to use, enable software */
/* add PG_G attribute to already mapped kernel pages */
for (kva = VM_MIN_KERNEL_ADDRESS; kva < virtual_avail;
kva += PAGE_SIZE)
if (pmap_valid_entry(PTE_BASE[atop(kva)]))
- PTE_BASE[atop(kva)] |= PG_G;
+ PTE_BASE[atop(kva)] |= pmap_pg_g;
}
/*
@@ -1195,6 +1226,7 @@ struct vm_page *
pmap_alloc_ptp_86(struct pmap *pmap, int pde_index, pt_entry_t pde_flags)
{
struct vm_page *ptp;
+ pd_entry_t *pva_intel;
ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
@@ -1206,6 +1238,21 @@ pmap_alloc_ptp_86(struct pmap *pmap, int pde_index, pt_entry_t pde_flags)
ptp->wire_count = 1; /* no mappings yet */
PDE(pmap, pde_index) = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) |
PG_RW | PG_V | PG_M | PG_U | pde_flags);
+
+ /*
+ * Meltdown special case - if we are adding a new PDE for
+ * usermode addresses, just copy the PDE to the U-K page
+ * table.
+ */
+ if (pmap->pm_pdir_intel && ptp_i2v(pde_index) < VM_MAXUSER_ADDRESS) {
+ pva_intel = (pd_entry_t *)pmap->pm_pdir_intel;
+ pva_intel[pde_index] = PDE(pmap, pde_index);
+ DPRINTF("%s: copying usermode PDE (content=0x%x) pde_index %d "
+ "from 0x%x -> 0x%x\n", __func__, PDE(pmap, pde_index),
+ pde_index, (uint32_t)&PDE(pmap, pde_index),
+ (uint32_t)&(pva_intel[pde_index]));
+ }
+
pmap->pm_stats.resident_count++; /* count PTP as resident */
pmap->pm_ptphint = ptp;
return(ptp);
@@ -1247,6 +1294,8 @@ void
pmap_drop_ptp_86(struct pmap *pm, vaddr_t va, struct vm_page *ptp,
pt_entry_t *ptes)
{
+ pd_entry_t *pva_intel;
+
i386_atomic_testset_ul(&PDE(pm, pdei(va)), 0);
pmap_tlb_shootpage(curcpu()->ci_curpmap, ((vaddr_t)ptes) + ptp->offset);
#ifdef MULTIPROCESSOR
@@ -1263,6 +1312,16 @@ pmap_drop_ptp_86(struct pmap *pm, vaddr_t va, struct vm_page *ptp,
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
+
+ if (pm->pm_pdir_intel) {
+ KASSERT(va < VM_MAXUSER_ADDRESS);
+ /* Zap special meltdown PDE */
+ pva_intel = (pd_entry_t *)pm->pm_pdir_intel;
+ i386_atomic_testset_ul(&pva_intel[pdei(va)], 0);
+ DPRINTF("%s: cleared meltdown PDE @ index %lu "
+ "(va range start 0x%x)\n", __func__, pdei(va),
+ (uint32_t)va);
+ }
}
/*
@@ -1318,10 +1377,6 @@ pmap_pinit_pd_86(struct pmap *pmap)
&pmap->pm_pdirpa);
pmap->pm_pdirsize = NBPG;
- /* XXX hshoexer */
- pmap->pm_pdir_intel = pmap->pm_pdir;
- pmap->pm_pdirpa_intel = pmap->pm_pdirpa;
-
/* init PDP */
/* zero init area */
bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
@@ -1341,6 +1396,34 @@ pmap_pinit_pd_86(struct pmap *pmap)
/* zero the rest */
bzero(&PDE(pmap, PDSLOT_KERN + nkpde),
NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
+
+ /*
+ * Intel CPUs need a special page table to be used during usermode
+ * execution, one that lacks all kernel mappings.
+ */
+ if (cpu_meltdown) {
+ pmap->pm_pdir_intel = uvm_km_zalloc(kernel_map, NBPG);
+ if (pmap->pm_pdir_intel == 0)
+ panic("%s: kernel_map out of virtual space!", __func__);
+
+ if (!pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir_intel,
+ &pmap->pm_pdirpa_intel))
+ panic("%s: unknown PA mapping for meltdown PD\n",
+ __func__);
+
+ /* Copy PDEs from pmap_kernel's U-K view */
+ bcopy((void *)pmap_kernel()->pm_pdir_intel,
+ (void *)pmap->pm_pdir_intel, NBPG);
+
+ DPRINTF("%s: pmap %p pm_pdir 0x%lx pm_pdirpa 0x%lx "
+ "pdir_intel 0x%lx pdirpa_intel 0x%lx\n",
+ __func__, pmap, pmap->pm_pdir, pmap->pm_pdirpa,
+ pmap->pm_pdir_intel, pmap->pm_pdirpa_intel);
+ } else {
+ pmap->pm_pdir_intel = 0;
+ pmap->pm_pdirpa_intel = 0;
+ }
+
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
}
@@ -1443,7 +1526,11 @@ pmap_switch(struct proc *o, struct proc *p)
*/
if (pmap->pm_pdirpa_intel) {
self->ci_kern_cr3 = pmap->pm_pdirpa;
+#if 0 /* XXX hshoexer: Do not unmap kernel, yet */
self->ci_user_cr3 = pmap->pm_pdirpa_intel;
+#else
+ self->ci_user_cr3 = pmap->pm_pdirpa;
+#endif
}
/*
@@ -2421,10 +2508,112 @@ out:
return error;
}
+/*
+ * Allocate an extra PD page and PT pages as needed to map kernel
+ * pages used for the U-K mappings. These special mappings are set
+ * up during bootstrap and get never removed and are part of
+ * pmap_kernel.
+ *
+ * New pmaps inherit the kernel portion of pmap_kernel including
+ * the special mappings (see pmap_pinit_pd_86()).
+ *
+ * To be able to release PT pages when migrating to PAE paging, use
+ * wire_count for number of PTEs in the PT page.
+ */
void
pmap_enter_special_86(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags)
{
- /* XXX hshoexer nothing yet */
+ struct pmap *pmap = pmap_kernel();
+ struct vm_page *ptppg = NULL;
+ pd_entry_t *pd, *ptp;
+ pt_entry_t *ptes;
+ uint32_t l2idx, l1idx;
+ paddr_t npa;
+
+ /* If CPU is secure, no need to do anything */
+ if (!cpu_meltdown)
+ return;
+
+ /* Must be kernel VA */
+ if (va < VM_MIN_KERNEL_ADDRESS)
+ panic("%s: invalid special mapping va 0x%lx requested",
+ __func__, va);
+
+ if (!pmap->pm_pdir_intel) {
+ if ((pmap->pm_pdir_intel = uvm_km_zalloc(kernel_map, NBPG))
+ == 0)
+ panic("%s: kernel_map out of virtual space!", __func__);
+ if (!pmap_extract(pmap, pmap->pm_pdir_intel,
+ &pmap->pm_pdirpa_intel))
+ panic("%s: can't locate PD page\n", __func__);
+ }
+
+ DPRINTF("%s: pm_pdir_intel 0x%x pm_pdirpa_intel 0x%x\n", __func__,
+ (uint32_t)pmap->pm_pdir_intel, (uint32_t)pmap->pm_pdirpa_intel);
+
+ l2idx = pdei(va);
+ l1idx = ptei(va);
+
+ DPRINTF("%s: va 0x%08lx pa 0x%08lx prot 0x%08lx flags 0x%08x "
+ "l2idx %u l1idx %u\n", __func__, va, pa, (unsigned long)prot,
+ flags, l2idx, l1idx);
+
+ if ((pd = (pd_entry_t *)pmap->pm_pdir_intel) == NULL)
+ panic("%s: PD not initialized for pmap @ %p\n", __func__, pmap);
+
+ /* npa = physaddr of PT page */
+ npa = pd[l2idx] & PMAP_PA_MASK;
+
+ /* Valid PDE for the 4MB region containing va? */
+ if (!npa) {
+ /*
+ * No valid PDE - allocate PT page and set PDE. We
+ * get it from pm_obj, which is used for PT pages.
+ * We calculate the offset from l2idx+1024, so we are
+ * beyond the regular PT pages. For their l2dix
+ * 0 <= l2idx < 1024 holds.
+ */
+ ptppg = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(l2idx + 1024),
+ NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
+ if (ptppg == NULL)
+ panic("%s: failed to allocate PT page", __func__);
+
+ atomic_clearbits_int(&ptppg->pg_flags, PG_BUSY);
+ ptppg->wire_count = 1; /* no mappings yet */
+
+ npa = VM_PAGE_TO_PHYS(ptppg);
+ pd[l2idx] = (npa | PG_RW | PG_V | PG_M | PG_U);
+
+ DPRINTF("%s: allocated new PT page at phys 0x%x, "
+ "setting PDE[%d] = 0x%x\n", __func__, (uint32_t)npa,
+ l2idx, pd[l2idx]);
+ }
+
+ /* temporarily map PT page and set PTE for U-K mapping */
+ if (ptppg == NULL && (ptppg = PHYS_TO_VM_PAGE(npa)) == NULL)
+ panic("%s: no vm_page for PT page", __func__);
+ mtx_enter(&ptppg->mdpage.pv_mtx);
+ ptp = (pd_entry_t *)pmap_tmpmap_pa(npa);
+ ptp[l1idx] = (pa | protection_codes[prot] | PG_V | PG_M | PG_U | flags);
+ ptppg->wire_count++;
+ DPRINTF("%s: setting PTE[%d] = 0x%x (wire_count %d)\n", __func__,
+ l1idx, ptp[l1idx], ptppg->wire_count);
+ pmap_tmpunmap_pa();
+ mtx_leave(&ptppg->mdpage.pv_mtx);
+
+ /*
+ * if supported, set the PG_G flag on the corresponding U+K
+ * entry. U+K mappings can use PG_G, as they are mapped
+ * along with user land anyway.
+ */
+ if (!(cpu_feature & CPUID_PGE))
+ return;
+ ptes = pmap_map_ptes_86(pmap); /* pmap_kernel -> PTE_BASE */
+ if (pmap_valid_entry(ptes[atop(va)]))
+ ptes[atop(va)] |= PG_G;
+ else
+ DPRINTF("%s: no U+K mapping for special mapping?\n", __func__);
+ pmap_unmap_ptes_86(pmap); /* pmap_kernel -> nothing */
}
/*
diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c
index 5f076400690..20575eda6f9 100644
--- a/sys/arch/i386/i386/pmapae.c
+++ b/sys/arch/i386/i386/pmapae.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmapae.c,v 1.54 2018/04/20 07:27:54 mlarkin Exp $ */
+/* $OpenBSD: pmapae.c,v 1.55 2018/05/28 20:52:44 bluhm Exp $ */
/*
* Copyright (c) 2006-2008 Michael Shalayeff
@@ -100,6 +100,14 @@
#include "ksyms.h"
+/* #define PMAPAE_DEBUG */
+
+#ifdef PMAPAE_DEBUG
+#define DPRINTF(x...) do { printf(x); } while(0)
+#else
+#define DPRINTF(x...)
+#endif /* PMAPAE_DEBUG */
+
/*
* this file contains the code for the "pmap module." the module's
* job is to manage the hardware's virtual to physical address mappings.
@@ -347,6 +355,8 @@
#undef NBPD
#define NBPD (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */
+#define PDSHIFT86 22 /* for pmap86 transfer */
+
#undef PDSLOT_PTE
#define PDSLOT_PTE (1660U) /* 1660: for recursive PDP map */
#undef PDSLOT_KERN
@@ -375,6 +385,9 @@
#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT)
#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT)
+#define PD_MASK86 0xffc00000 /* for pmap86 transfer */
+#define PT_MASK86 0x003ff000 /* for pmap86 transfer */
+
/*
* Mach derived conversion macros
*/
@@ -457,6 +470,8 @@ extern int pmap_pg_g;
extern int pmap_pg_wc;
extern struct pmap_head pmaps;
+extern uint32_t cpu_meltdown;
+
/*
* local prototypes
*/
@@ -616,6 +631,10 @@ pmap_bootstrap_pae(void)
cpu_pae = 1;
+ DPRINTF("%s: pm_pdir 0x%x pm_pdirpa 0x%x pm_pdirsize %d\n", __func__,
+ (uint32_t)kpm->pm_pdir, (uint32_t)kpm->pm_pdirpa,
+ kpm->pm_pdirsize);
+
va = (vaddr_t)kpm->pm_pdir;
kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V;
kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V;
@@ -628,11 +647,13 @@ pmap_bootstrap_pae(void)
PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW | PG_M | PG_U;
/* transfer all kernel mappings over into pae tables */
- for (va = KERNBASE, eva = va + (nkpde << 22);
+ for (va = KERNBASE, eva = va + (nkpde << PDSHIFT86);
va < eva; va += PAGE_SIZE) {
if (!pmap_valid_entry(PDE(kpm, pdei(va)))) {
ptp = uvm_pagealloc(&kpm->pm_obj, va, NULL,
UVM_PGA_ZERO);
+ if (ptp == NULL)
+ panic("%s: uvm_pagealloc() failed", __func__);
ptaddr = VM_PAGE_TO_PHYS(ptp);
PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V |
PG_U | PG_M;
@@ -663,6 +684,64 @@ pmap_bootstrap_pae(void)
pmap_pte_set_pae(va, pmap_pte_paddr_86(va), bits);
}
+ /* Transfer special mappings */
+ if (kpm->pm_pdir_intel) {
+ uint32_t *pd, *ptp;
+ uint32_t l1idx, l2idx;
+ paddr_t npa;
+ struct vm_page *ptppg;
+
+ pd = (uint32_t *)kpm->pm_pdir_intel;
+ kpm->pm_pdir_intel = kpm->pm_pdirpa_intel = 0;
+
+ for (va = KERNBASE, eva = va + (nkpde << PDSHIFT86); va < eva;
+ va += PAGE_SIZE) {
+ l1idx = ((va & PT_MASK86) >> PGSHIFT);
+ l2idx = ((va & PD_MASK86) >> PDSHIFT86);
+
+ if (!pmap_valid_entry(pd[l2idx]))
+ continue;
+
+ npa = pd[l2idx] & PMAP_PA_MASK;
+ ptppg = PHYS_TO_VM_PAGE(npa);
+ mtx_enter(&ptppg->mdpage.pv_mtx);
+
+ /* still running on pmap86 */
+ ptp = (uint32_t *)pmap_tmpmap_pa_86(npa);
+
+ if (!pmap_valid_entry(ptp[l1idx])) {
+ mtx_leave(&ptppg->mdpage.pv_mtx);
+ pmap_tmpunmap_pa_86();
+ continue;
+ }
+ DPRINTF("%s: va 0x%x l2idx %u 0x%x lx1idx %u 0x%x\n",
+ __func__, (uint32_t)va, l2idx, (uint32_t)pd[l2idx],
+ l1idx, (uint32_t)ptp[l1idx]);
+
+ /* protection and cacheability */
+ bits = ptp[l1idx] & (PG_PROT|PG_N|PG_WT);
+ npa = ptp[l1idx] & PMAP_PA_MASK;
+
+ /* still running on pmap86 */
+ pmap_tmpunmap_pa_86();
+ mtx_leave(&ptppg->mdpage.pv_mtx);
+
+ /* enforce use of pmap86 */
+ cpu_pae = 0;
+ pmap_enter_special_pae(va, npa, 0, bits);
+ cpu_pae = 1;
+
+ if (--ptppg->wire_count == 1) {
+ ptppg->wire_count = 0;
+ uvm_pagerealloc(ptppg, NULL, 0);
+ DPRINTF("%s: freeing PT page 0x%x\n", __func__,
+ (uint32_t)VM_PAGE_TO_PHYS(ptppg));
+ }
+ }
+ uvm_km_free(kernel_map, (vaddr_t)pd, NBPG);
+ DPRINTF("%s: freeing PDP 0x%x\n", __func__, (uint32_t)pd);
+ }
+
if (!cpu_paenable(&kpm->pm_pdidx[0])) {
extern struct user *proc0paddr;
@@ -670,6 +749,10 @@ pmap_bootstrap_pae(void)
(vaddr_t)kpm - KERNBASE;
kpm->pm_pdirsize = 4 * NBPG;
+ DPRINTF("%s: pm_pdir 0x%x pm_pdirpa 0x%x pm_pdirsize %d\n",
+ __func__, (uint32_t)kpm->pm_pdir, (uint32_t)kpm->pm_pdirpa,
+ kpm->pm_pdirsize);
+
csrc_pte = vtopte(pmap_csrcp);
cdst_pte = vtopte(pmap_cdstp);
zero_pte = vtopte(pmap_zerop);
@@ -748,6 +831,7 @@ struct vm_page *
pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, pt_entry_t pde_flags)
{
struct vm_page *ptp;
+ pd_entry_t *pva_intel;
ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
@@ -759,6 +843,22 @@ pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, pt_entry_t pde_flags)
ptp->wire_count = 1; /* no mappings yet */
PDE(pmap, pde_index) = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) |
PG_RW | PG_V | PG_M | PG_U | pde_flags);
+
+ /*
+ * Meltdown special case - if we are adding a new PDE for
+ * usermode addresses, just copy the PDE to the U-K
+ * table.
+ */
+ if (pmap->pm_pdir_intel && ptp_i2v(pde_index) < VM_MAXUSER_ADDRESS) {
+ pva_intel = (pd_entry_t *)pmap->pm_pdir_intel;
+ pva_intel[pde_index] = PDE(pmap, pde_index);
+ DPRINTF("%s: copying usermode PDE (content=0x%llx) pde_index "
+ "%d from 0x%llx -> 0x%llx\n", __func__,
+ PDE(pmap, pde_index), pde_index,
+ (uint64_t)&PDE(pmap, pde_index),
+ (uint64_t)&(pva_intel[pde_index]));
+ }
+
pmap->pm_stats.resident_count++; /* count PTP as resident */
pmap->pm_ptphint = ptp;
return(ptp);
@@ -800,6 +900,8 @@ void
pmap_drop_ptp_pae(struct pmap *pm, vaddr_t va, struct vm_page *ptp,
pt_entry_t *ptes)
{
+ pd_entry_t *pva_intel;
+
i386_atomic_testset_uq(&PDE(pm, pdei(va)), 0);
pmap_tlb_shootpage(curcpu()->ci_curpmap, ((vaddr_t)ptes) + ptp->offset);
#ifdef MULTIPROCESSOR
@@ -816,6 +918,16 @@ pmap_drop_ptp_pae(struct pmap *pm, vaddr_t va, struct vm_page *ptp,
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
+
+ if (pm->pm_pdir_intel) {
+ KASSERT(va < VM_MAXUSER_ADDRESS);
+ /* Zap special meltdown PDE */
+ pva_intel = (pd_entry_t *)pm->pm_pdir_intel;
+ i386_atomic_testset_uq(&pva_intel[pdei(va)], 0);
+ DPRINTF("%s: cleared meltdown PDE @ index %lu "
+ "(va range start 0x%x)\n", __func__, pdei(va),
+ (uint32_t)va);
+ }
}
/*
@@ -849,10 +961,6 @@ pmap_pinit_pd_pae(struct pmap *pmap)
pmap->pm_pdidx[3] |= PG_V;
pmap->pm_pdirsize = 4 * NBPG;
- /* XXX hshoexer */
- pmap->pm_pdir_intel = pmap->pm_pdir;
- pmap->pm_pdirpa_intel = pmap->pm_pdirpa;
-
/* init PDP */
/* zero init area */
bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
@@ -878,6 +986,44 @@ pmap_pinit_pd_pae(struct pmap *pmap)
/* zero the rest */
bzero(&PDE(pmap, PDSLOT_KERN + nkpde), pmap->pm_pdirsize -
((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
+
+ /*
+ * Intel CPUs need a special page table to be used during usermode
+ * execution, one that lacks all kernel mappings.
+ */
+ if (cpu_meltdown) {
+ int i;
+
+ if ((va = uvm_km_zalloc(kernel_map, 4 * NBPG)) == 0)
+ panic("%s: kernel_map out of virtual space!", __func__);
+ if (!pmap_extract(pmap_kernel(),
+ (vaddr_t)&pmap->pm_pdidx_intel, &pmap->pm_pdirpa_intel))
+ panic("%s: can't locate PDPT\n", __func__);
+ pmap->pm_pdir_intel = va;
+
+ for (i = 0; i < 4; i++) {
+ pmap->pm_pdidx_intel[i] = 0;
+ if (!pmap_extract(pmap, va + i * NBPG,
+ (paddr_t *)&pmap->pm_pdidx_intel[i]))
+ panic("%s: can't locate PD page\n", __func__);
+ pmap->pm_pdidx_intel[i] |= PG_V;
+ DPRINTF("%s: pm_pdidx_intel[%d] = 0x%llx\n", __func__,
+ i, pmap->pm_pdidx_intel[i]);
+ }
+
+ /* Copy PDEs from pmap_kernel's U-K view */
+ bcopy((void *)pmap_kernel()->pm_pdir_intel,
+ (void *)pmap->pm_pdir_intel, 4 * NBPG);
+
+ DPRINTF("%s: pmap %p pm_pdir 0x%lx pm_pdirpa 0x%lx "
+ "pdir_intel 0x%lx pdirpa_intel 0x%lx\n",
+ __func__, pmap, pmap->pm_pdir, pmap->pm_pdirpa,
+ pmap->pm_pdir_intel, pmap->pm_pdirpa_intel);
+ } else {
+ pmap->pm_pdir_intel = 0;
+ pmap->pm_pdirpa_intel = 0;
+ }
+
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
}
@@ -1757,10 +1903,124 @@ out:
return error;
}
+/*
+ * Allocate an extra PDPT and PT pages as needed to map kernel pages
+ * used for the U-K mappings. These special mappings are set up
+ * during bootstrap and get never removed and are part of pmap_kernel.
+ *
+ * New pmaps inherit the kernel portion of pmap_kernel including
+ * the special mappings (see pmap_pinit_pd_pae()).
+ */
void
pmap_enter_special_pae(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags)
{
- /* XXX hshoexer nothing yet */
+ struct pmap *pmap = pmap_kernel();
+ struct vm_page *ptppg = NULL, *pdppg;
+ pd_entry_t *pd, *ptp;
+ pt_entry_t *ptes;
+ uint32_t l2idx, l1idx;
+ vaddr_t vapd;
+ paddr_t npa;
+ int i;
+
+ /* If CPU is secure, no need to do anything */
+ if (!cpu_meltdown)
+ return;
+
+ /* Must be kernel VA */
+ if (va < VM_MIN_KERNEL_ADDRESS)
+ panic("%s: invalid special mapping va 0x%lx requested",
+ __func__, va);
+
+ if (!pmap->pm_pdir_intel) {
+ if ((vapd = uvm_km_zalloc(kernel_map, 4 * NBPG)) == 0)
+ panic("%s: kernel_map out of virtual space!", __func__);
+ pmap->pm_pdir_intel = vapd;
+ if (!pmap_extract(pmap, (vaddr_t)&pmap->pm_pdidx_intel,
+ &pmap->pm_pdirpa_intel))
+ panic("%s: can't locate PDPT\n", __func__);
+
+ for (i = 0; i < 4; i++) {
+ pmap->pm_pdidx_intel[i] = 0;
+ if (!pmap_extract(pmap, vapd + i*NBPG,
+ (paddr_t *)&pmap->pm_pdidx_intel[i]))
+ panic("%s: can't locate PD page\n", __func__);
+
+ /* ensure PDPs are wired down XXX hshoexer why? */
+ pdppg = PHYS_TO_VM_PAGE(pmap->pm_pdidx_intel[i]);
+ if (pdppg == NULL)
+ panic("%s: no vm_page for pdidx %d", __func__, i);
+ atomic_clearbits_int(&pdppg->pg_flags, PG_BUSY);
+ pdppg->wire_count = 1; /* no mappings yet */
+
+ pmap->pm_pdidx_intel[i] |= PG_V;
+
+ DPRINTF("%s: pm_pdidx_intel[%d] = 0x%llx\n", __func__,
+ i, pmap->pm_pdidx_intel[i]);
+ }
+ }
+
+ DPRINTF("%s: pm_pdir_intel 0x%x pm_pdirpa_intel 0x%x\n", __func__,
+ (uint32_t)pmap->pm_pdir_intel, (uint32_t)pmap->pm_pdirpa_intel);
+
+ /* These are the PAE versions of pdei() and ptei() */
+ l2idx = pdei(va);
+ l1idx = ptei(va);
+
+ DPRINTF("%s: va 0x%08lx pa 0x%08lx prot 0x%08lx flags 0x%08x "
+ "l2idx %u l1idx %u\n", __func__, va, pa, (unsigned long)prot,
+ flags, l2idx, l1idx);
+
+ if ((pd = (pd_entry_t *)pmap->pm_pdir_intel) == 0)
+ panic("%s: PD not initialized for pmap @ %p\n", __func__, pmap);
+
+ /* npa = phsyaddr of PT page */
+ npa = pd[l2idx] & PMAP_PA_MASK;
+
+ /* Valide PDE for the 2MB region containing va? */
+ if (!npa) {
+ /*
+ * No valid PDE - allocate PT page and set PDE. We
+ * get it from pm_obj, which is used for PT pages.
+ * We calculate the offset from l2idx+2048, so we are
+ * beyond the regular PT pages. For their l2dix
+ * 0 <= l2idx < 2048 holds.
+ */
+ ptppg = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(l2idx + 2048),
+ NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO);
+ if (ptppg == NULL)
+ panic("%s: failed to allocate PT page", __func__);
+
+ atomic_clearbits_int(&ptppg->pg_flags, PG_BUSY);
+ ptppg->wire_count = 1; /* no mappings yet */
+
+ npa = VM_PAGE_TO_PHYS(ptppg);
+ pd[l2idx] = (npa | PG_RW | PG_V | PG_M | PG_U);
+
+ DPRINTF("%s: allocated new PT page at phys 0x%x, "
+ "setting PDE[%d] = 0x%llx\n", __func__, (uint32_t)npa,
+ l2idx, pd[l2idx]);
+ }
+
+ /* temporarily map PT page and set PTE for U-K mapping */
+ if (ptppg == NULL && (ptppg = PHYS_TO_VM_PAGE(npa)) == NULL)
+ panic("%s: no vm_page for PT page", __func__);
+ mtx_enter(&ptppg->mdpage.pv_mtx);
+ ptp = (pd_entry_t *)pmap_tmpmap_pa(npa);
+ ptp[l1idx] = (pa | protection_codes[prot] | PG_V | PG_M | PG_U | flags);
+ DPRINTF("%s: setting PTE[%d] = 0x%llx\n", __func__, l1idx, ptp[l1idx]);
+ pmap_tmpunmap_pa();
+ mtx_leave(&ptppg->mdpage.pv_mtx);
+
+ /* if supported, set the PG_G flag on the corresponding U+K entry */
+ if (!(cpu_feature & CPUID_PGE))
+ return;
+ ptes = pmap_map_ptes_pae(pmap); /* pmap_kernel -> PTE_BASE */
+ if (pmap_valid_entry(ptes[atop(va)]))
+ ptes[atop(va)] |= PG_G;
+ else
+ DPRINTF("%s: no U+K mapping for special mapping?\n", __func__);
+ pmap_unmap_ptes_pae(pmap); /* pmap_kernel -> nothing */
}
/*
diff --git a/sys/arch/i386/include/cpu_full.h b/sys/arch/i386/include/cpu_full.h
index ef820a4fd2c..da2dee89e72 100644
--- a/sys/arch/i386/include/cpu_full.h
+++ b/sys/arch/i386/include/cpu_full.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu_full.h,v 1.1 2018/04/11 15:44:08 bluhm Exp $ */
+/* $OpenBSD: cpu_full.h,v 1.2 2018/05/28 20:52:44 bluhm Exp $ */
/*
* Copyright (c) 2018 Philip Guenther <guenther@openbsd.org>
* Copyright (c) 2018 Hans-Joerg Hoexer <hshoexer@genua.de>
@@ -26,10 +26,14 @@
struct cpu_info_full {
/* page mapped kRO in u-k */
union {
- struct i386tss u_tss; /* followed by gdt */
- char u_align[PAGE_SIZE];
+ struct {
+ struct i386tss uu_tss;
+ union descriptor uu_gdt[NGDT];
+ } u_tssgdt;
+ char u_align[PAGE_SIZE];
} cif_TSS_RO;
-#define cif_tss cif_TSS_RO.u_tss
+#define cif_tss cif_TSS_RO.u_tssgdt.uu_tss
+#define cif_gdt cif_TSS_RO.u_tssgdt.uu_gdt
/* start of page mapped kRW in u-k */
uint32_t cif_tramp_stack[(PAGE_SIZE
@@ -42,9 +46,6 @@ struct cpu_info_full {
struct cpu_info cif_cpu;
} __aligned(PAGE_SIZE);
-/* idt and align shim must fit exactly in a page */
-CTASSERT(_ALIGN(sizeof(struct gate_descriptor) * NIDT) <= PAGE_SIZE);
-
/* tss, align shim, and gdt must fit in a page */
CTASSERT(_ALIGN(sizeof(struct i386tss)) +
sizeof(struct segment_descriptor) * NGDT < PAGE_SIZE);
diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h
index d8992fd9763..91df8dfc14d 100644
--- a/sys/arch/i386/include/pmap.h
+++ b/sys/arch/i386/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.84 2018/04/11 15:44:08 bluhm Exp $ */
+/* $OpenBSD: pmap.h,v 1.85 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */
/*
@@ -95,6 +95,7 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
struct pmap {
uint64_t pm_pdidx[4]; /* PDIEs for PAE mode */
+ uint64_t pm_pdidx_intel[4]; /* PDIEs for PAE mode U-K */
struct mutex pm_mtx;
struct mutex pm_apte_mtx;
@@ -226,7 +227,9 @@ extern struct pool pmap_pv_pool;
* Prototypes
*/
+vaddr_t pmap_tmpmap_pa_86(paddr_t);
vaddr_t pmap_tmpmap_pa(paddr_t);
+void pmap_tmpunmap_pa_86(void);
void pmap_tmpunmap_pa(void);
void pmap_bootstrap(vaddr_t);
diff --git a/sys/arch/i386/include/specialreg.h b/sys/arch/i386/include/specialreg.h
index a21292b7088..3c5de81b402 100644
--- a/sys/arch/i386/include/specialreg.h
+++ b/sys/arch/i386/include/specialreg.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: specialreg.h,v 1.65 2018/02/10 09:46:58 jsg Exp $ */
+/* $OpenBSD: specialreg.h,v 1.66 2018/05/28 20:52:44 bluhm Exp $ */
/* $NetBSD: specialreg.h,v 1.7 1994/10/27 04:16:26 cgd Exp $ */
/*-
@@ -168,6 +168,10 @@
#define CPUIDECX_F16C 0x20000000 /* 16bit fp conversion */
#define CPUIDECX_RDRAND 0x40000000 /* RDRAND instruction */
#define CPUIDECX_HV 0x80000000 /* Running on hypervisor */
+/* SEFF EDX bits */
+#define SEFF0EDX_IBRS 0x04000000 /* IBRS / IBPB Speculation Control */
+#define SEFF0EDX_STIBP 0x08000000 /* STIBP Speculation Control */
+#define SEFF0EDX_ARCH_CAP 0x20000000 /* Has IA32_ARCH_CAPABILITIES MSR */
/*
* "Structured Extended Feature Flags Parameters" (CPUID function 0x7, leaf 0)
@@ -329,6 +333,8 @@
#define MTRRcap_FIXED 0x100 /* bit 8 - fixed MTRRs supported */
#define MTRRcap_WC 0x400 /* bit 10 - WC type supported */
#define MTRRcap_SMRR 0x800 /* bit 11 - SMM range reg supported */
+#define MSR_ARCH_CAPABILITIES 0x10a
+#define ARCH_CAPABILITIES_RDCL_NO (1 << 0) /* Meltdown safe */
#define MSR_BBL_CR_ADDR 0x116 /* PII+ only */
#define MSR_BBL_CR_DECC 0x118 /* PII+ only */
#define MSR_BBL_CR_CTL 0x119 /* PII+ only */