summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorPhilip Guenther <guenther@cvs.openbsd.org>2018-10-04 05:00:41 +0000
committerPhilip Guenther <guenther@cvs.openbsd.org>2018-10-04 05:00:41 +0000
commitabcaa6db282c51f6c0ca44abef525048f4dc5654 (patch)
tree6e391ad3aa4216eb439b0383b06346af77900049 /sys
parente8b69426f1fc858f731daa3e39ff8a11a8b32953 (diff)
Use PCIDs where they and the INVPCID instruction are available.
This uses one PCID for kernel threads, one for the U+K tables of normal processes, one for the matching U-K tables (when meltdown in effect), and one for temporary mappings when poking other processes. Some further tweaks are envisioned but this is good enough to provide more separation and has (finally) been stable under ports testing. lots of ports testing and valid complaints from naddy@ and sthen@ feedback from mlarkin@ and sf@
Diffstat (limited to 'sys')
-rw-r--r--sys/arch/amd64/amd64/acpi_wakecode.S12
-rw-r--r--sys/arch/amd64/amd64/cpu.c13
-rw-r--r--sys/arch/amd64/amd64/genassym.cf5
-rw-r--r--sys/arch/amd64/amd64/identcpu.c5
-rw-r--r--sys/arch/amd64/amd64/lapic.c14
-rw-r--r--sys/arch/amd64/amd64/locore.S10
-rw-r--r--sys/arch/amd64/amd64/pmap.c164
-rw-r--r--sys/arch/amd64/amd64/vector.S120
-rw-r--r--sys/arch/amd64/include/codepatch.h17
-rw-r--r--sys/arch/amd64/include/cpufunc.h13
-rw-r--r--sys/arch/amd64/include/i82489var.h5
-rw-r--r--sys/arch/amd64/include/pmap.h14
12 files changed, 345 insertions, 47 deletions
diff --git a/sys/arch/amd64/amd64/acpi_wakecode.S b/sys/arch/amd64/amd64/acpi_wakecode.S
index 59b5f2398f8..ccca99ef73c 100644
--- a/sys/arch/amd64/amd64/acpi_wakecode.S
+++ b/sys/arch/amd64/amd64/acpi_wakecode.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: acpi_wakecode.S,v 1.45 2018/08/14 16:43:02 deraadt Exp $ */
+/* $OpenBSD: acpi_wakecode.S,v 1.46 2018/10/04 05:00:40 guenther Exp $ */
/*
* Copyright (c) 2001 Takanori Watanabe <takawata@jp.freebsd.org>
* Copyright (c) 2001 Mitsuru IWASAKI <iwasaki@jp.freebsd.org>
@@ -404,6 +404,11 @@ _ACPI_TRMP_LABEL(.Lhibernate_resume_vector_2)
_ACPI_TRMP_OFFSET(.Lhibernate_resume_vector_3)
.code16
+ /* must clear CR4.PCIDE before clearing CR0.PG */
+ movl %cr4, %eax
+ andl $(~CR4_PCIDE), %eax
+ movl %eax, %cr4
+
movl %cr0, %eax
/* Disable CR0.PG - no paging */
andl $(~CR0_PG), %eax
@@ -441,6 +446,11 @@ _ACPI_TRMP_LABEL(.Lhibernate_resume_vector_2b)
_ACPI_TRMP_OFFSET(.Lhibernate_resume_vector_3b)
.code16
+ /* must clear CR4.PCIDE before clearing CR0.PG */
+ movl %cr4, %eax
+ andl $(~CR4_PCIDE), %eax
+ movl %eax, %cr4
+
movl %cr0, %eax
/* Disable CR0.PG - no paging */
andl $(~CR0_PG), %eax
diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 26fd458db95..14ef556ecc6 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.128 2018/09/26 03:05:53 deraadt Exp $ */
+/* $OpenBSD: cpu.c,v 1.129 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */
/*-
@@ -174,7 +174,14 @@ replacemeltdown(void)
replacedone = 1;
s = splhigh();
- codepatch_nop(CPTAG_MELTDOWN_NOP);
+ if (!cpu_meltdown)
+ codepatch_nop(CPTAG_MELTDOWN_NOP);
+ else if (pmap_use_pcid) {
+ extern long _pcid_set_reuse;
+ DPRINTF("%s: codepatching PCID use", __func__);
+ codepatch_replace(CPTAG_PCID_SET_REUSE, &_pcid_set_reuse,
+ PCID_SET_REUSE_SIZE);
+ }
splx(s);
}
@@ -563,6 +570,8 @@ cpu_init(struct cpu_info *ci)
cr4 |= CR4_UMIP;
if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd)
cr4 |= CR4_OSXSAVE;
+ if (pmap_use_pcid)
+ cr4 |= CR4_PCIDE;
lcr4(cr4);
if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd) {
diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf
index 59235d4c8d9..de40400d03c 100644
--- a/sys/arch/amd64/amd64/genassym.cf
+++ b/sys/arch/amd64/amd64/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.38 2018/07/10 08:57:44 guenther Exp $
+# $OpenBSD: genassym.cf,v 1.39 2018/10/04 05:00:40 guenther Exp $
# Written by Artur Grabowski art@openbsd.org, Public Domain
include <sys/param.h>
@@ -178,3 +178,6 @@ export NBPD_L2
export NPDPG
export PDIR_SLOT_DIRECT
+export PCID_PROC
+export PCID_PROC_INTEL
+export INVPCID_PCID
diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c
index d21871bf464..a212651fcf5 100644
--- a/sys/arch/amd64/amd64/identcpu.c
+++ b/sys/arch/amd64/amd64/identcpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: identcpu.c,v 1.108 2018/08/24 06:25:40 jsg Exp $ */
+/* $OpenBSD: identcpu.c,v 1.109 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */
/*
@@ -639,11 +639,10 @@ identifycpu(struct cpu_info *ci)
if (cpu_meltdown)
printf(",MELTDOWN");
- else
- replacemeltdown();
printf("\n");
+ replacemeltdown();
x86_print_cacheinfo(ci);
/*
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index 222ad6fbf15..8643073023b 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lapic.c,v 1.52 2018/07/27 21:11:31 kettenis Exp $ */
+/* $OpenBSD: lapic.c,v 1.53 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: lapic.c,v 1.2 2003/05/08 01:04:35 fvdl Exp $ */
/*-
@@ -361,11 +361,17 @@ lapic_boot_init(paddr_t lapic_base)
idt_allocmap[LAPIC_IPI_VECTOR] = 1;
idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
- idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
idt_allocmap[LAPIC_IPI_INVLPG] = 1;
- idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
- idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+ if (!pmap_use_pcid) {
+ idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
+ idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
+ idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+ } else {
+ idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb_pcid);
+ idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_pcid);
+ idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_pcid);
+ }
#endif
idt_allocmap[LAPIC_SPURIOUS_VECTOR] = 1;
idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S
index a205e681012..01f0d354f5d 100644
--- a/sys/arch/amd64/amd64/locore.S
+++ b/sys/arch/amd64/amd64/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.109 2018/09/12 06:12:59 guenther Exp $ */
+/* $OpenBSD: locore.S,v 1.110 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */
/*
@@ -480,6 +480,8 @@ restore_saved:
* interrupt trampolines.
*/
movq PM_PDIRPA_INTEL(%rcx),%rdx
+ orq cr3_reuse_pcid,%rax
+ orq cr3_pcid_proc_intel,%rdx
movq %rax,CPUVAR(KERN_CR3)
movq %rdx,CPUVAR(USER_CR3)
CODEPATCH_END(CPTAG_MELTDOWN_NOP)
@@ -688,6 +690,7 @@ IDTVEC_NOALIGN(syscall)
CODEPATCH_START
movq %rax,CPUVAR(SCRATCH)
movq CPUVAR(USER_CR3),%rax
+ PCID_SET_REUSE_NOP
movq %rax,%cr3
Xsyscall_trampback:
0: pause
@@ -864,6 +867,7 @@ intr_user_exit_post_ast:
CODEPATCH_START
movq %rax,CPUVAR(SCRATCH)
movq CPUVAR(USER_CR3),%rax
+ PCID_SET_REUSE_NOP
movq %rax,%cr3
Xiretq_trampback:
KTEXT_PAGE_END
@@ -1099,6 +1103,10 @@ _C_LABEL(_xsave):
_C_LABEL(_xsaveopt):
.byte 0x48; xsaveopt (%rdi) /* really xsaveopt64 */
+ .globl _C_LABEL(_pcid_set_reuse)
+_C_LABEL(_pcid_set_reuse):
+ orl $(CR3_REUSE_PCID >> 32),CPUVAR(USER_CR3 + 4)
+
ENTRY(pagezero)
RETGUARD_SETUP(pagezero, r11)
movq $-PAGE_SIZE,%rdx
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 8c1c946a687..92284383385 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.120 2018/09/12 07:00:51 guenther Exp $ */
+/* $OpenBSD: pmap.c,v 1.121 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
/*
@@ -230,6 +230,24 @@ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
int pmap_pg_wc = PG_UCMINUS;
/*
+ * pmap_use_pcid: nonzero if PCID use is enabled (currently we require INVPCID)
+ *
+ * The next three are zero unless and until PCID support is enabled so code
+ * can just 'or' them in as needed without tests.
+ * cr3_pcid: CR3_REUSE_PCID
+ * cr3_pcid_proc and cr3_pcid_temp: PCID_PROC and PCID_TEMP
+ */
+#if PCID_KERN != 0
+# error "pmap.c assumes PCID_KERN is zero"
+#endif
+int pmap_use_pcid;
+static u_int cr3_pcid_proc;
+static u_int cr3_pcid_temp;
+/* these two are accessed from locore.o */
+paddr_t cr3_reuse_pcid;
+paddr_t cr3_pcid_proc_intel;
+
+/*
* other data structures
*/
@@ -312,6 +330,7 @@ boolean_t pmap_get_physpage(vaddr_t, int, paddr_t *);
boolean_t pmap_pdes_valid(vaddr_t, pd_entry_t *);
void pmap_alloc_level(vaddr_t, int, long *);
+static inline
void pmap_sync_flags_pte(struct vm_page *, u_long);
void pmap_tlb_shootpage(struct pmap *, vaddr_t, int);
@@ -336,7 +355,7 @@ static __inline boolean_t
pmap_is_curpmap(struct pmap *pmap)
{
return((pmap == pmap_kernel()) ||
- (pmap->pm_pdirpa == (paddr_t) rcr3()));
+ (pmap->pm_pdirpa == (rcr3() & CR3_PADDR)));
}
/*
@@ -359,7 +378,7 @@ pmap_pte2flags(u_long pte)
((pte & PG_M) ? PG_PMAP_MOD : 0));
}
-void
+static inline void
pmap_sync_flags_pte(struct vm_page *pg, u_long pte)
{
if (pte & (PG_U|PG_M)) {
@@ -391,10 +410,13 @@ pmap_map_ptes(struct pmap *pmap)
mtx_enter(&pmap->pm_mtx);
cr3 = rcr3();
- if (pmap->pm_pdirpa == cr3)
+ KASSERT((cr3 & CR3_PCID) == PCID_KERN ||
+ (cr3 & CR3_PCID) == PCID_PROC);
+ if (pmap->pm_pdirpa == (cr3 & CR3_PADDR))
cr3 = 0;
else {
- lcr3(pmap->pm_pdirpa);
+ cr3 |= cr3_reuse_pcid;
+ lcr3(pmap->pm_pdirpa | cr3_pcid_temp);
}
return cr3;
@@ -597,6 +619,23 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa)
curpcb->pcb_pmap = kpm; /* proc0's pcb */
/*
+ * Configure and enable PCID use if supported.
+ * Currently we require INVPCID support.
+ */
+ if ((cpu_ecxfeature & CPUIDECX_PCID) && cpuid_level >= 0x07) {
+ uint32_t ebx, dummy;
+ CPUID_LEAF(0x7, 0, dummy, ebx, dummy, dummy);
+ if (ebx & SEFF0EBX_INVPCID) {
+ pmap_use_pcid = 1;
+ lcr4( rcr4() | CR4_PCIDE );
+ cr3_pcid_proc = PCID_PROC;
+ cr3_pcid_temp = PCID_TEMP;
+ cr3_reuse_pcid = CR3_REUSE_PCID;
+ cr3_pcid_proc_intel = PCID_PROC_INTEL;
+ }
+ }
+
+ /*
* Add PG_G attribute to already mapped kernel pages. pg_g_kern
* is calculated in locore0.S and may be set to:
*
@@ -1183,6 +1222,9 @@ pmap_activate(struct proc *p)
pcb->pcb_pmap = pmap;
pcb->pcb_cr3 = pmap->pm_pdirpa;
+ pcb->pcb_cr3 |= (pmap != pmap_kernel()) ? cr3_pcid_proc :
+ (PCID_KERN | cr3_reuse_pcid);
+
if (p == curproc) {
lcr3(pcb->pcb_cr3);
@@ -1190,8 +1232,9 @@ pmap_activate(struct proc *p)
if (cpu_meltdown) {
struct cpu_info *self = curcpu();
- self->ci_kern_cr3 = pcb->pcb_cr3;
- self->ci_user_cr3 = pmap->pm_pdirpa_intel;
+ self->ci_kern_cr3 = pcb->pcb_cr3 | cr3_reuse_pcid;
+ self->ci_user_cr3 = pmap->pm_pdirpa_intel |
+ cr3_pcid_proc_intel;
}
/*
@@ -1552,7 +1595,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
goto cleanup;
}
- if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
+ if ((eva - sva > 32 * PAGE_SIZE) && sva < VM_MIN_KERNEL_ADDRESS)
shootall = 1;
for (va = sva; va < eva; va = blkendva) {
@@ -1853,7 +1896,7 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
if (!(prot & PROT_EXEC))
nx = pg_nx;
- if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
+ if ((eva - sva > 32 * PAGE_SIZE) && sva < VM_MIN_KERNEL_ADDRESS)
shootall = 1;
for (va = sva; va < eva ; va = blockend) {
@@ -2854,6 +2897,7 @@ volatile long tlb_shoot_wait __attribute__((section(".kudata")));
volatile vaddr_t tlb_shoot_addr1 __attribute__((section(".kudata")));
volatile vaddr_t tlb_shoot_addr2 __attribute__((section(".kudata")));
+volatile int tlb_shoot_first_pcid __attribute__((section(".kudata")));
void
pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
@@ -2892,6 +2936,7 @@ pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
#endif
}
}
+ tlb_shoot_first_pcid = is_kva ? PCID_KERN : PCID_PROC;
tlb_shoot_addr1 = va;
CPU_INFO_FOREACH(cii, ci) {
if ((mask & (1ULL << ci->ci_cpuid)) == 0)
@@ -2902,8 +2947,17 @@ pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
splx(s);
}
- if (shootself)
- pmap_update_pg(va);
+ if (!pmap_use_pcid) {
+ if (shootself)
+ pmap_update_pg(va);
+ } else if (is_kva) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ invpcid(INVPCID_ADDR, PCID_KERN, va);
+ } else if (shootself) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ if (cpu_meltdown)
+ invpcid(INVPCID_ADDR, PCID_PROC_INTEL, va);
+ }
}
void
@@ -2944,6 +2998,7 @@ pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
#endif
}
}
+ tlb_shoot_first_pcid = is_kva ? PCID_KERN : PCID_PROC;
tlb_shoot_addr1 = sva;
tlb_shoot_addr2 = eva;
CPU_INFO_FOREACH(cii, ci) {
@@ -2955,9 +3010,27 @@ pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
splx(s);
}
- if (shootself)
- for (va = sva; va < eva; va += PAGE_SIZE)
- pmap_update_pg(va);
+ if (!pmap_use_pcid) {
+ if (shootself) {
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ pmap_update_pg(va);
+ }
+ } else if (is_kva) {
+ for (va = sva; va < eva; va += PAGE_SIZE) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ invpcid(INVPCID_ADDR, PCID_KERN, va);
+ }
+ } else if (shootself) {
+ if (cpu_meltdown) {
+ for (va = sva; va < eva; va += PAGE_SIZE) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ invpcid(INVPCID_ADDR, PCID_PROC_INTEL, va);
+ }
+ } else {
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ }
+ }
}
void
@@ -3007,8 +3080,15 @@ pmap_tlb_shoottlb(struct pmap *pm, int shootself)
splx(s);
}
- if (shootself)
- tlbflush();
+ if (shootself) {
+ if (!pmap_use_pcid)
+ tlbflush();
+ else {
+ invpcid(INVPCID_PCID, PCID_PROC, 0);
+ if (cpu_meltdown)
+ invpcid(INVPCID_PCID, PCID_PROC_INTEL, 0);
+ }
+ }
}
void
@@ -3034,9 +3114,17 @@ pmap_tlb_shootwait(void)
void
pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
{
- if (shootself)
- pmap_update_pg(va);
-
+ if (!pmap_use_pcid) {
+ if (shootself)
+ pmap_update_pg(va);
+ } else if (va >= VM_MIN_KERNEL_ADDRESS) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ invpcid(INVPCID_ADDR, PCID_KERN, va);
+ } else if (shootself) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ if (cpu_meltdown)
+ invpcid(INVPCID_ADDR, PCID_PROC_INTEL, va);
+ }
}
void
@@ -3044,18 +3132,40 @@ pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
{
vaddr_t va;
- if (!shootself)
- return;
-
- for (va = sva; va < eva; va += PAGE_SIZE)
- pmap_update_pg(va);
-
+ if (!pmap_use_pcid) {
+ if (shootself) {
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ pmap_update_pg(va);
+ }
+ } else if (sva >= VM_MIN_KERNEL_ADDRESS) {
+ for (va = sva; va < eva; va += PAGE_SIZE) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ invpcid(INVPCID_ADDR, PCID_KERN, va);
+ }
+ } else if (shootself) {
+ if (cpu_meltdown) {
+ for (va = sva; va < eva; va += PAGE_SIZE) {
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ invpcid(INVPCID_ADDR, PCID_PROC_INTEL, va);
+ }
+ } else {
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ invpcid(INVPCID_ADDR, PCID_PROC, va);
+ }
+ }
}
void
pmap_tlb_shoottlb(struct pmap *pm, int shootself)
{
- if (shootself)
- tlbflush();
+ if (shootself) {
+ if (!pmap_use_pcid)
+ tlbflush();
+ else {
+ invpcid(INVPCID_PCID, PCID_PROC, 0);
+ if (cpu_meltdown)
+ invpcid(INVPCID_PCID, PCID_PROC_INTEL, 0);
+ }
+ }
}
#endif /* MULTIPROCESSOR */
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index f4d4c4ec3b5..fb5f02b749f 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: vector.S,v 1.75 2018/07/24 02:42:25 guenther Exp $ */
+/* $OpenBSD: vector.S,v 1.76 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */
/*
@@ -519,6 +519,11 @@ KIDTVEC_FALLTHROUGH(resume_lapic_ipi)
orq %rax,CPUVAR(IPENDING)
INTRFASTEXIT
+/*
+ * "Fast" IPI handlers. These are the IPIs which are handled without
+ * unblocking interrupts, so no need for 'recurse' or 'resume' entry points
+ */
+/* invalidate the entire TLB, no PCIDs version */
IDTVEC(ipi_invltlb)
pushq %rax
@@ -533,6 +538,7 @@ IDTVEC(ipi_invltlb)
popq %rax
iretq
+/* invalidate a single page, no PCIDs version */
IDTVEC(ipi_invlpg)
pushq %rax
@@ -547,6 +553,7 @@ IDTVEC(ipi_invlpg)
popq %rax
iretq
+/* invalidate a range of pages, no PCIDs version */
IDTVEC(ipi_invlrange)
pushq %rax
pushq %rdx
@@ -567,6 +574,117 @@ IDTVEC(ipi_invlrange)
popq %rax
iretq
+/*
+ * Invalidate the userspace PCIDs.
+ */
+IDTVEC(ipi_invltlb_pcid)
+ pushq %rax
+
+ ioapic_asm_ack()
+
+ /* set the type */
+ movl $INVPCID_PCID,%eax
+
+ /* finish getting space for the INVPCID descriptor */
+#if INVPCID_PCID == PCID_PROC
+ pushq %rax
+#else
+ pushq $PCID_PROC
+#endif
+
+ invpcid (%rsp),%rax
+
+ /* bump the pcid in the descriptor and invpcid again */
+ movl $PCID_PROC_INTEL,(%rsp)
+ invpcid (%rsp),%rax
+
+ lock
+ decq tlb_shoot_wait
+
+ /* restore the stack */
+ popq %rax
+ popq %rax
+ iretq
+
+/*
+ * Invalidate a VA in two PCIDs. Kernel VAs are present in PCIDs 0 and 1,
+ * while userspace VAs are present in PCIDs 1 and 2.
+ */
+IDTVEC(ipi_invlpg_pcid)
+ pushq %rax
+
+ ioapic_asm_ack()
+
+ /* space for the INVPCID descriptor */
+ subq $16,%rsp
+
+ /* set the PCID in the descriptor */
+ movl tlb_shoot_first_pcid,%eax
+ movq %rax,(%rsp)
+
+ /* set the address in the descriptor */
+ movq tlb_shoot_addr1,%rax
+ movq %rax,8(%rsp)
+
+ /* set the type to zero, and invpcid */
+ xorl %eax,%eax
+ invpcid (%rsp),%rax
+
+ /* bump the pcid in the descriptor and invpcid again */
+ addl $1,(%rsp)
+ invpcid (%rsp),%rax
+
+ lock
+ decq tlb_shoot_wait
+
+ /* restore the stack */
+ addq $16,%rsp
+ popq %rax
+ iretq
+
+/*
+ * Invalidate a range of VA in two PCIDs. Kernel VAs are present in
+ * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
+ */
+IDTVEC(ipi_invlrange_pcid)
+ pushq %rax
+ pushq %rdx
+ pushq %rcx
+
+ ioapic_asm_ack()
+
+ /* space for the INVPCID descriptor */
+ subq $16,%rsp
+
+ /* set the PCID in the descriptor */
+ movl tlb_shoot_first_pcid,%eax
+ movq %rax,(%rsp)
+
+ /* set up for the loop: load the limit and set the type to zero */
+ movq tlb_shoot_addr2,%rdx
+ xorl %ecx,%ecx
+
+ /* set the address in the descriptor and loop the invalidate */
+ movq tlb_shoot_addr1,%rax
+1: movq %rax,8(%rsp)
+ invpcid (%rsp),%rcx
+ addl $1,(%rsp)
+ invpcid (%rsp),%rcx
+ subl $1,(%rsp)
+ addq $PAGE_SIZE,%rax
+ cmpq %rdx,%rax
+ jb 1b
+
+ lock
+ decq tlb_shoot_wait
+
+ /* restore the stack */
+ addq $16,%rsp
+ popq %rcx
+ popq %rdx
+ popq %rax
+ iretq
+
#endif /* MULTIPROCESSOR */
/*
diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
index a5bfc304e1f..74f8f198113 100644
--- a/sys/arch/amd64/include/codepatch.h
+++ b/sys/arch/amd64/include/codepatch.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: codepatch.h,v 1.7 2018/07/13 08:30:34 sf Exp $ */
+/* $OpenBSD: codepatch.h,v 1.8 2018/10/04 05:00:40 guenther Exp $ */
/*
* Copyright (c) 2014-2015 Stefan Fritsch <sf@sfritsch.de>
*
@@ -42,14 +42,15 @@ void codepatch_disable(void);
/*
* Mark the end of some code to be patched, and assign the given tag.
*/
-#define CODEPATCH_END(tag) \
+#define CODEPATCH_END2(startnum,tag) \
999: \
.section .codepatch, "a" ;\
- .quad 998b ;\
- .short (999b - 998b) ;\
+ .quad startnum##b ;\
+ .short (999b - startnum##b) ;\
.short tag ;\
.int 0 ;\
.previous
+#define CODEPATCH_END(tag) CODEPATCH_END2(998,tag)
#define CPTAG_STAC 1
#define CPTAG_CLAC 2
@@ -57,6 +58,7 @@ void codepatch_disable(void);
#define CPTAG_XRSTOR 4
#define CPTAG_XSAVE 5
#define CPTAG_MELTDOWN_NOP 6
+#define CPTAG_PCID_SET_REUSE 7
/*
* As stac/clac SMAP instructions are 3 bytes, we want the fastest
@@ -75,4 +77,11 @@ void codepatch_disable(void);
SMAP_NOP ;\
CODEPATCH_END(CPTAG_CLAC)
+#define PCID_SET_REUSE_SIZE 12
+#define PCID_SET_REUSE_NOP \
+ 997: ;\
+ .byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 ;\
+ .byte 0x0f, 0x1f, 0x40, 0x00 ;\
+ CODEPATCH_END2(997, CPTAG_PCID_SET_REUSE)
+
#endif /* _MACHINE_CODEPATCH_H_ */
diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h
index 8a483549f25..5e2437e1b95 100644
--- a/sys/arch/amd64/include/cpufunc.h
+++ b/sys/arch/amd64/include/cpufunc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpufunc.h,v 1.30 2018/07/27 21:11:31 kettenis Exp $ */
+/* $OpenBSD: cpufunc.h,v 1.31 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: cpufunc.h,v 1.3 2003/05/08 10:27:43 fvdl Exp $ */
/*-
@@ -145,6 +145,17 @@ tlbflush(void)
__asm volatile("movq %0,%%cr3" : : "r" (val));
}
+static inline void
+invpcid(uint64_t type, paddr_t pcid, paddr_t addr)
+{
+ uint64_t desc[2] = { pcid, addr };
+ asm volatile("invpcid %0,%1" : : "m"(desc[0]), "r"(type));
+}
+#define INVPCID_ADDR 0
+#define INVPCID_PCID 1
+#define INVPCID_ALL 2
+#define INVPCID_NON_GLOBAL 3
+
#ifdef notyet
void setidt(int idx, /*XXX*/caddr_t func, int typ, int dpl);
#endif
diff --git a/sys/arch/amd64/include/i82489var.h b/sys/arch/amd64/include/i82489var.h
index 746e3436d93..374c614749f 100644
--- a/sys/arch/amd64/include/i82489var.h
+++ b/sys/arch/amd64/include/i82489var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: i82489var.h,v 1.17 2016/06/22 01:12:38 mikeb Exp $ */
+/* $OpenBSD: i82489var.h,v 1.18 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: i82489var.h,v 1.1 2003/02/26 21:26:10 fvdl Exp $ */
/*-
@@ -72,8 +72,11 @@ extern void Xresume_lapic_ipi(void);
#define LAPIC_IPI_INVLRANGE (LAPIC_IPI_OFFSET + 2)
extern void Xipi_invltlb(void);
+extern void Xipi_invltlb_pcid(void);
extern void Xipi_invlpg(void);
+extern void Xipi_invlpg_pcid(void);
extern void Xipi_invlrange(void);
+extern void Xipi_invlrange_pcid(void);
/*
* Vector used for local apic timer interrupts.
diff --git a/sys/arch/amd64/include/pmap.h b/sys/arch/amd64/include/pmap.h
index f69af43e8eb..d0e7fad264c 100644
--- a/sys/arch/amd64/include/pmap.h
+++ b/sys/arch/amd64/include/pmap.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.h,v 1.68 2018/09/30 18:46:09 guenther Exp $ */
+/* $OpenBSD: pmap.h,v 1.69 2018/10/04 05:00:40 guenther Exp $ */
/* $NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */
/*
@@ -243,6 +243,18 @@
/* PG_AVAIL3 not used */
/*
+ * PCID assignments.
+ * The shootdown code assumes KERN, PROC, and PROC_INTEL are both
+ * consecutive and in that order.
+ */
+#define PCID_KERN 0 /* for pmap_kernel() */
+#define PCID_PROC 1 /* non-pmap_kernel(), U+K */
+#define PCID_PROC_INTEL 2 /* non-pmap_kernel(), U-K (meltdown) */
+#define PCID_TEMP 3 /* temp mapping of another non-pmap_kernel() */
+
+extern int pmap_use_pcid; /* non-zero if PCID support is enabled */
+
+/*
* Number of PTEs per cache line. 8 byte pte, 64-byte cache line
* Used to avoid false sharing of cache lines.
*/