diff options
author | Philip Guenther <guenther@cvs.openbsd.org> | 2018-06-05 06:39:12 +0000 |
---|---|---|
committer | Philip Guenther <guenther@cvs.openbsd.org> | 2018-06-05 06:39:12 +0000 |
commit | 8e680f241bb5bb67367999a49badc74bd9ebdb49 (patch) | |
tree | 81653238f19b23de3afab07e92861309bac11e9b /sys/arch/amd64 | |
parent | 8959fd48e4ccfafb5a9e130e20d8faafc31ff66f (diff) |
Switch from lazy FPU switching to semi-eager FPU switching: track whether
curproc's xstate ("extended state") is loaded in the CPU or not.
- context switch, sendsig(), vmm, and doing CPU crypto in the kernel all
check the flag and, if set, save the old thread's state to the PCB,
clear the flag, and then load the _blank_ state
- when returning to userspace, if the flag is clear then set it and restore
the thread's state
This simpler tracking also fixes the restoring of FPU state after nested
signal handlers.
With this, %cr0's TS flag is never set, the FPU #DNA trap can no
longer happen, and IPIs are no longer necessary for flushing or
syncing FPU state; on the other hand, restoring xstate while returning
to userspace means we have to handle xrstor faulting if we could
be loading an altered state. If that happens, reset the state,
fake a #GP fault (SIGBUS), and recheck for ASTs.
While here, regularize fxsave/fxrstor vs xsave/xrstor handling, by
using codepatching to switch to xsave/xrstor when present in the
CPU. In addition, code patch in use of xsaveopt in most places
when the CPU supports that. Use the 64bit-wide variants of the
instructions in all cases so that x87 instruction fault IPs are
reported correctly.
This change has three motivations:
1) with modern clang, SSE registers are used even in rcrt0.o, making
lazy FPU switching a smaller benefit vs trap costs
2) the Intel SDM warns that lazy FPU switching may increase power costs
3) post-Spectre rumors suggest that the %cr0 TS flag might not block
speculation, permitting leaking of information about FPU state
(AES keys?) across protection boundaries.
tested by many in snaps; prodding from deraadt@
Diffstat (limited to 'sys/arch/amd64')
-rw-r--r-- | sys/arch/amd64/amd64/acpi_machdep.c | 5 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/cpu.c | 47 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/fpu.c | 244 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/genassym.cf | 8 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/ipifuncs.c | 24 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/locore.S | 238 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/machdep.c | 64 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/mptramp.S | 4 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/process_machdep.c | 24 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/trap.c | 3 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/vector.S | 16 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/via.c | 23 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/vm_machdep.c | 22 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/vmm.c | 79 | ||||
-rw-r--r-- | sys/arch/amd64/include/codepatch.h | 4 | ||||
-rw-r--r-- | sys/arch/amd64/include/cpu.h | 10 | ||||
-rw-r--r-- | sys/arch/amd64/include/fpu.h | 28 | ||||
-rw-r--r-- | sys/arch/amd64/include/intrdefs.h | 8 | ||||
-rw-r--r-- | sys/arch/amd64/include/pcb.h | 5 | ||||
-rw-r--r-- | sys/arch/amd64/include/proc.h | 3 | ||||
-rw-r--r-- | sys/arch/amd64/include/specialreg.h | 4 |
21 files changed, 360 insertions, 503 deletions
diff --git a/sys/arch/amd64/amd64/acpi_machdep.c b/sys/arch/amd64/amd64/acpi_machdep.c index 268bab12c7c..4f1809b63aa 100644 --- a/sys/arch/amd64/amd64/acpi_machdep.c +++ b/sys/arch/amd64/amd64/acpi_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: acpi_machdep.c,v 1.80 2018/01/11 22:31:09 patrick Exp $ */ +/* $OpenBSD: acpi_machdep.c,v 1.81 2018/06/05 06:39:10 guenther Exp $ */ /* * Copyright (c) 2005 Thorsten Lockert <tholo@sigmasoft.com> * @@ -384,7 +384,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, int state) */ if (acpi_savecpu()) { /* Suspend path */ - fpusave_cpu(curcpu(), 1); + KASSERT((curcpu()->ci_flags & CPUF_USERXSTATE) == 0); wbinvd(); #ifdef HIBERNATE @@ -411,6 +411,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, int state) return (ECANCELED); } /* Resume path */ + fpureset(); /* Reset the vectors */ sc->sc_facs->wakeup_vector = 0; diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 00bec4839ed..cd3c73903e7 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.120 2018/05/26 23:09:39 guenther Exp $ */ +/* $OpenBSD: cpu.c,v 1.121 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ /*- @@ -70,6 +70,7 @@ #include "pvbus.h" #include <sys/param.h> +#include <sys/proc.h> #include <sys/timeout.h> #include <sys/systm.h> #include <sys/device.h> @@ -77,6 +78,7 @@ #include <sys/memrange.h> #include <dev/rndvar.h> #include <sys/atomic.h> +#include <sys/user.h> #include <uvm/uvm_extern.h> @@ -408,7 +410,6 @@ cpu_attach(struct device *parent, struct device *self, void *aux) pcb->pcb_kstack = kstack + USPACE - 16; pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE - 16; pcb->pcb_pmap = pmap_kernel(); - pcb->pcb_cr0 = rcr0(); pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; #endif @@ -496,6 +497,29 @@ cpu_attach(struct device *parent, struct device *self, void *aux) #endif /* NVMM > 0 */ } +static void +replacexsave(void) +{ + extern long _xrstor, _xsave, _xsaveopt; + u_int32_t eax, ebx, ecx, edx; + static int replacedone = 0; + int s; + + if (replacedone) + return; + replacedone = 1; + + /* find out whether xsaveopt is supported */ + CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx); + printf("using xsave%s\n", (eax & 1) ? "opt" : ""); + + s = splhigh(); + codepatch_replace(CPTAG_XRSTOR, &_xrstor, 4); + codepatch_replace(CPTAG_XSAVE, (eax & 1) ? &_xsaveopt : &_xsave, 4); + splx(s); +} + + /* * Initialize the processor appropriately. */ @@ -503,6 +527,7 @@ cpu_attach(struct device *parent, struct device *self, void *aux) void cpu_init(struct cpu_info *ci) { + struct savefpu *sfp; u_int cr4; /* configure the CPU if needed */ @@ -540,8 +565,19 @@ cpu_init(struct cpu_info *ci) } else { KASSERT(ebx == fpu_save_len); } + + replacexsave(); } + /* Give proc0 a clean FPU save area */ + sfp = &proc0.p_addr->u_pcb.pcb_savefpu; + memset(sfp, 0, fpu_save_len); + if (xsave_mask) { + /* must not use xsaveopt here */ + xsave(sfp, xsave_mask); + } else + fxsave(sfp); + #if NVMM > 0 /* Re-enable VMM if needed */ if (ci->ci_flags & CPUF_VMM) @@ -769,15 +805,14 @@ cpu_debug_dump(void) struct cpu_info *ci; CPU_INFO_ITERATOR cii; - db_printf("addr dev id flags ipis curproc fpcurproc\n"); + db_printf("addr dev id flags ipis curproc\n"); CPU_INFO_FOREACH(cii, ci) { - db_printf("%p %s %u %x %x %10p %10p\n", + db_printf("%p %s %u %x %x %10p\n", ci, ci->ci_dev == NULL ? "BOOT" : ci->ci_dev->dv_xname, ci->ci_cpuid, ci->ci_flags, ci->ci_ipis, - ci->ci_curproc, - ci->ci_fpcurproc); + ci->ci_curproc); } } #endif diff --git a/sys/arch/amd64/amd64/fpu.c b/sys/arch/amd64/amd64/fpu.c index 07e16a0efb6..a853bd2c342 100644 --- a/sys/arch/amd64/amd64/fpu.c +++ b/sys/arch/amd64/amd64/fpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fpu.c,v 1.39 2017/10/25 22:29:41 mikeb Exp $ */ +/* $OpenBSD: fpu.c,v 1.40 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: fpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ /*- @@ -48,33 +48,13 @@ #include <machine/specialreg.h> #include <machine/fpu.h> -int xrstor_user(struct savefpu *_addr, uint64_t _mask); void trap(struct trapframe *); /* - * We do lazy initialization and switching using the TS bit in cr0 and the - * MDP_USEDFPU bit in mdproc. - * - * DNA exceptions are handled like this: - * - * 1) If there is no FPU, return and go to the emulator. - * 2) If someone else has used the FPU, save its state into that process' PCB. - * 3a) If MDP_USEDFPU is not set, set it and initialize the FPU. - * 3b) Otherwise, reload the process' previous FPU state. - * - * When a process is created or exec()s, its saved cr0 image has the TS bit - * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the - * process first gets a DNA and the FPU is initialized. The TS bit is turned - * off when the FPU is used, and turned on again later when the process' FPU - * state is saved. - */ - -/* * The mask of enabled XSAVE features. */ uint64_t xsave_mask; -void fpudna(struct cpu_info *, struct trapframe *); static int x86fpflags_to_siginfo(u_int32_t); /* @@ -94,7 +74,6 @@ uint32_t fpu_mxcsr_mask; void fpuinit(struct cpu_info *ci) { - lcr0(rcr0() & ~(CR0_EM|CR0_TS)); fninit(); if (fpu_mxcsr_mask == 0) { struct fxsave64 fx __attribute__((aligned(16))); @@ -106,7 +85,6 @@ fpuinit(struct cpu_info *ci) else fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; } - lcr0(rcr0() | (CR0_TS)); } /* @@ -119,23 +97,18 @@ fpuinit(struct cpu_info *ci) void fputrap(struct trapframe *frame) { - struct proc *p = curcpu()->ci_fpcurproc; + struct cpu_info *ci = curcpu(); + struct proc *p = curproc; struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu; u_int32_t mxcsr, statbits; u_int16_t cw; int code; union sigval sv; -#ifdef DIAGNOSTIC - /* - * At this point, fpcurproc should be curproc. If it wasn't, - * the TS bit should be set, and we should have gotten a DNA exception. - */ - if (p != curproc) - panic("fputrap: wrong proc"); -#endif + KASSERT(ci->ci_flags & CPUF_USERXSTATE); + ci->ci_flags &= ~CPUF_USERXSTATE; + fpusavereset(sfp); - fxsave(sfp); if (frame->tf_trapno == T_XMM) { mxcsr = sfp->fp_fxsave.fx_mxcsr; statbits = mxcsr; @@ -180,212 +153,21 @@ x86fpflags_to_siginfo(u_int32_t flags) return (FPE_FLTINV); } -/* - * Implement device not available (DNA) exception - * - * If we were the last process to use the FPU, we can simply return. - * Otherwise, we save the previous state, if necessary, and restore our last - * saved state. - */ -void -fpudna(struct cpu_info *ci, struct trapframe *frame) -{ - struct savefpu *sfp; - struct proc *p; - int s; - - if (ci->ci_fpsaving) { - printf("recursive fpu trap; cr0=%x\n", rcr0()); - return; - } - - s = splipi(); - -#ifdef MULTIPROCESSOR - p = ci->ci_curproc; -#else - p = curproc; -#endif - - /* - * Initialize the FPU state to clear any exceptions. If someone else - * was using the FPU, save their state. - */ - if (ci->ci_fpcurproc != NULL && ci->ci_fpcurproc != p) { - fpusave_cpu(ci, ci->ci_fpcurproc != &proc0); - uvmexp.fpswtch++; - } - splx(s); - - if (p == NULL) { - clts(); - return; - } - - KDASSERT(ci->ci_fpcurproc == NULL); -#ifndef MULTIPROCESSOR - KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); -#else - if (p->p_addr->u_pcb.pcb_fpcpu != NULL) - fpusave_proc(p, 1); -#endif - - p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; - clts(); - - s = splipi(); - ci->ci_fpcurproc = p; - p->p_addr->u_pcb.pcb_fpcpu = ci; - splx(s); - - sfp = &p->p_addr->u_pcb.pcb_savefpu; - - if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { - fninit(); - bzero(&sfp->fp_fxsave, sizeof(sfp->fp_fxsave)); - sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__; - sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; - fxrstor(&sfp->fp_fxsave); - p->p_md.md_flags |= MDP_USEDFPU; - } else { - if (xsave_mask) { - if (xrstor_user(sfp, xsave_mask)) { - fpusave_proc(p, 0); /* faulted */ - frame->tf_trapno = T_PROTFLT; - trap(frame); - return; - } - } else { - static double zero = 0.0; - - /* - * amd fpu does not restore fip, fdp, fop on fxrstor - * thus leaking other process's execution history. - */ - fnclex(); - __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero)); - fxrstor(sfp); - } - } -} - - -void -fpusave_cpu(struct cpu_info *ci, int save) -{ - struct proc *p; - int s; - - KDASSERT(ci == curcpu()); - - p = ci->ci_fpcurproc; - if (p == NULL) - return; - - if (save) { -#ifdef DIAGNOSTIC - if (ci->ci_fpsaving != 0) - panic("fpusave_cpu: recursive save!"); -#endif - /* - * Set ci->ci_fpsaving, so that any pending exception will be - * thrown away. (It will be caught again if/when the FPU - * state is restored.) - */ - clts(); - ci->ci_fpsaving = 1; - if (xsave_mask) - xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask); - else - fxsave(&p->p_addr->u_pcb.pcb_savefpu); - ci->ci_fpsaving = 0; - } - - stts(); - p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; - - s = splipi(); - p->p_addr->u_pcb.pcb_fpcpu = NULL; - ci->ci_fpcurproc = NULL; - splx(s); -} - -/* - * Save p's FPU state, which may be on this processor or another processor. - */ -void -fpusave_proc(struct proc *p, int save) -{ - struct cpu_info *ci = curcpu(); - struct cpu_info *oci; - - KDASSERT(p->p_addr != NULL); - - oci = p->p_addr->u_pcb.pcb_fpcpu; - if (oci == NULL) - return; - -#if defined(MULTIPROCESSOR) - if (oci == ci) { - int s = splipi(); - fpusave_cpu(ci, save); - splx(s); - } else { - oci->ci_fpsaveproc = p; - x86_send_ipi(oci, - save ? X86_IPI_SYNCH_FPU : X86_IPI_FLUSH_FPU); - while (p->p_addr->u_pcb.pcb_fpcpu != NULL) - CPU_BUSY_CYCLE(); - } -#else - KASSERT(ci->ci_fpcurproc == p); - fpusave_cpu(ci, save); -#endif -} - void fpu_kernel_enter(void) { - struct cpu_info *ci = curcpu(); - struct savefpu *sfp; - int s; - - /* - * Fast path. If the kernel was using the FPU before, there - * is no work to do besides clearing TS. - */ - if (ci->ci_fpcurproc == &proc0) { - clts(); - return; - } - - s = splipi(); + struct cpu_info *ci = curcpu(); - if (ci->ci_fpcurproc != NULL) { - fpusave_cpu(ci, 1); - uvmexp.fpswtch++; + /* save curproc's FPU state if we haven't already */ + if (ci->ci_flags & CPUF_USERXSTATE) { + ci->ci_flags &= ~CPUF_USERXSTATE; + fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu); } - - /* Claim the FPU */ - ci->ci_fpcurproc = &proc0; - - splx(s); - - /* Disable DNA exceptions */ - clts(); - - /* Initialize the FPU */ - fninit(); - sfp = &proc0.p_addr->u_pcb.pcb_savefpu; - memset(&sfp->fp_fxsave, 0, sizeof(sfp->fp_fxsave)); - sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__; - sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; - fxrstor(&sfp->fp_fxsave); } void fpu_kernel_exit(void) { - /* Enable DNA exceptions */ - stts(); + /* make sure we don't leave anything in the registers */ + fpureset(); } diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index 1d71bf8a2eb..bd6892d9979 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.35 2018/05/23 05:37:01 guenther Exp $ +# $OpenBSD: genassym.cf,v 1.36 2018/06/05 06:39:10 guenther Exp $ # Written by Artur Grabowski art@openbsd.org, Public Domain include <sys/param.h> @@ -94,9 +94,8 @@ member pcb_rbp member pcb_kstack member pcb_fsbase member pcb_onfault -member pcb_fpcpu member pcb_pmap -member pcb_cr0 +member pcb_savefpu struct pmap member pm_cpus @@ -131,7 +130,8 @@ member CPU_INFO_USER_CR3 ci_user_cr3 member CPU_INFO_KERN_RSP ci_kern_rsp member CPU_INFO_INTR_RSP ci_intr_rsp -export CPUF_USERSEGS_BIT +export CPUF_USERSEGS +export CPUF_USERXSTATE struct intrsource member is_recurse diff --git a/sys/arch/amd64/amd64/ipifuncs.c b/sys/arch/amd64/amd64/ipifuncs.c index 81b5d079b67..d8b01111354 100644 --- a/sys/arch/amd64/amd64/ipifuncs.c +++ b/sys/arch/amd64/amd64/ipifuncs.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ipifuncs.c,v 1.30 2018/04/24 20:29:15 guenther Exp $ */ +/* $OpenBSD: ipifuncs.c,v 1.31 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: ipifuncs.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ /*- @@ -61,9 +61,6 @@ void x86_64_ipi_nop(struct cpu_info *); void x86_64_ipi_halt(struct cpu_info *); -void x86_64_ipi_synch_fpu(struct cpu_info *); -void x86_64_ipi_flush_fpu(struct cpu_info *); - #if NVMM > 0 void x86_64_ipi_start_vmm(struct cpu_info *); void x86_64_ipi_stop_vmm(struct cpu_info *); @@ -84,8 +81,8 @@ void (*ipifunc[X86_NIPI])(struct cpu_info *) = { x86_64_ipi_halt, x86_64_ipi_nop, - x86_64_ipi_flush_fpu, - x86_64_ipi_synch_fpu, + NULL, + NULL, NULL, x86_64_ipi_reload_mtrr, x86_setperf_ipi, @@ -114,7 +111,6 @@ x86_64_ipi_halt(struct cpu_info *ci) SCHED_ASSERT_UNLOCKED(); KASSERT(!_kernel_lock_held()); - fpusave_cpu(ci, 1); disable_intr(); lapic_disable(); wbinvd(); @@ -126,20 +122,6 @@ x86_64_ipi_halt(struct cpu_info *ci) } } -void -x86_64_ipi_flush_fpu(struct cpu_info *ci) -{ - if (ci->ci_fpsaveproc == ci->ci_fpcurproc) - fpusave_cpu(ci, 0); -} - -void -x86_64_ipi_synch_fpu(struct cpu_info *ci) -{ - if (ci->ci_fpsaveproc == ci->ci_fpcurproc) - fpusave_cpu(ci, 1); -} - #ifdef MTRR void x86_64_ipi_reload_mtrr(struct cpu_info *ci) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 8e94a2aae9b..7cea1392a31 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.97 2018/06/05 05:04:31 guenther Exp $ */ +/* $OpenBSD: locore.S,v 1.98 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ /* @@ -113,9 +113,11 @@ #include <sys/syscall.h> #include <machine/param.h> +#include <machine/codepatch.h> #include <machine/psl.h> #include <machine/segments.h> #include <machine/specialreg.h> +#include <machine/trap.h> /* T_PROTFLT */ #include <machine/frameasm.h> #if NLAPIC > 0 @@ -344,7 +346,12 @@ ENTRY(cpu_switchto) movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC SET_CURPROC(%r12,%rcx) - movl CPUVAR(CPUID),%edi + movl CPUVAR(CPUID),%r9d + + /* for the FPU/"extended CPU state" handling below */ + movq xsave_mask(%rip),%rdx + movl %edx,%eax + shrq $32,%rdx /* If old proc exited, don't bother. */ testq %r13,%r13 @@ -357,7 +364,7 @@ ENTRY(cpu_switchto) * %rax, %rcx - scratch * %r13 - old proc, then old pcb * %r12 - new proc - * %edi - cpuid + * %r9d - cpuid */ movq P_ADDR(%r13),%r13 @@ -365,16 +372,46 @@ ENTRY(cpu_switchto) /* clear the old pmap's bit for the cpu */ movq PCB_PMAP(%r13),%rcx lock - btrq %rdi,PM_CPUS(%rcx) + btrq %r9,PM_CPUS(%rcx) /* Save stack pointers. */ movq %rsp,PCB_RSP(%r13) movq %rbp,PCB_RBP(%r13) + /* + * If the old proc ran in userspace then save the + * floating-point/"extended state" registers + */ + testl $CPUF_USERXSTATE,CPUVAR(FLAGS) + jz .Lxstate_reset + + movq %r13, %rdi +#if PCB_SAVEFPU != 0 + addq $PCB_SAVEFPU,%rdi +#endif + CODEPATCH_START + .byte 0x48; fxsave (%rdi) /* really fxsave64 */ + CODEPATCH_END(CPTAG_XSAVE) + switch_exited: - /* did old proc run in userspace? then reset the segment regs */ - btrl $CPUF_USERSEGS_BIT, CPUVAR(FLAGS) - jnc restore_saved + /* now clear the xstate */ + movq proc0paddr(%rip),%rdi +#if PCB_SAVEFPU != 0 + addq $PCB_SAVEFPU,%rdi +#endif + CODEPATCH_START + .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ + CODEPATCH_END(CPTAG_XRSTOR) + andl $~CPUF_USERXSTATE,CPUVAR(FLAGS) + +.Lxstate_reset: + /* + * If the segment registers haven't been reset since the old proc + * ran in userspace then reset them now + */ + testl $CPUF_USERSEGS,CPUVAR(FLAGS) + jz restore_saved + andl $~CPUF_USERSEGS,CPUVAR(FLAGS) /* set %ds, %es, %fs, and %gs to expected value to prevent info leak */ movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax @@ -431,32 +468,17 @@ restore_saved: 0: /* set the new pmap's bit for the cpu */ - movl CPUVAR(CPUID),%edi lock - btsq %rdi,PM_CPUS(%rcx) + btsq %r9,PM_CPUS(%rcx) #ifdef DIAGNOSTIC jc _C_LABEL(switch_pmcpu_set) #endif switch_restored: - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%r13),%ecx -#ifdef MULTIPROCESSOR - movq PCB_FPCPU(%r13),%r8 - cmpq CPUVAR(SELF),%r8 - jz 1f - orl $CR0_TS,%ecx -1: -#endif - movq %rcx,%cr0 - SET_CURPCB(%r13) /* Interrupts are okay again. */ sti - -switch_return: - popq %r15 popq %r14 popq %r13 @@ -528,7 +550,7 @@ IDTVEC(syscall) * %rip and the original rflags has been copied to %r11. %cs and * %ss have been updated to the kernel segments, but %rsp is still * the user-space value. - * First order of business is to swap to the kernel gs.base so that + * First order of business is to swap to the kernel GS.base so that * we can access our struct cpu_info and use the scratch space there * to switch to the kernel page tables (thank you, Intel), then * switch to our kernel stack. Once that's in place we can @@ -591,6 +613,15 @@ NENTRY(Xsyscall_untramp) testl $MDP_IRET, P_MD_FLAGS(%r14) jne intr_user_exit_post_ast + /* Restore FPU/"extended CPU state" if it's not already in the CPU */ + testl $CPUF_USERXSTATE,CPUVAR(FLAGS) + jz .Lsyscall_restore_xstate + + /* Restore FS.base if it's not already in the CPU */ + testl $CPUF_USERSEGS,CPUVAR(FLAGS) + jz .Lsyscall_restore_fsbase + +.Lsyscall_restore_registers: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_R8(%rsp),%r8 @@ -603,17 +634,6 @@ NENTRY(Xsyscall_untramp) movq TF_RBP(%rsp),%rbp movq TF_RBX(%rsp),%rbx - /* Restore FS.base if it's not already in the CPU */ - btsl $CPUF_USERSEGS_BIT,CPUVAR(FLAGS) - jc 99f - movq CPUVAR(CURPCB),%rdx - movq PCB_FSBASE(%rdx),%rax - movq %rax,%rdx - shrq $32,%rdx - movl $MSR_FSBASE,%ecx - wrmsr -99: - /* * We need to finish reading from the trapframe, then switch * to the user page tables, swapgs, and return. We need @@ -641,6 +661,37 @@ KUENTRY(syscall_trampback) sysretq .text + .align 16,0xcc + /* in this case, need FS.base but not xstate, rarely happens */ +.Lsyscall_restore_fsbase: /* CPU doesn't have curproc's FS.base */ + orl $CPUF_USERSEGS,CPUVAR(FLAGS) + movq CPUVAR(CURPCB),%rdi + jmp .Lsyscall_restore_fsbase_real + + .align 16,0xcc +.Lsyscall_restore_xstate: /* CPU doesn't have curproc's xstate */ + orl $(CPUF_USERXSTATE|CPUF_USERSEGS),CPUVAR(FLAGS) + movq CPUVAR(CURPCB),%rdi + movq xsave_mask(%rip),%rdx + movl %edx,%eax + shrq $32,%rdx +#if PCB_SAVEFPU != 0 + addq $PCB_SAVEFPU,%rdi +#endif + /* untouched state so can't fault */ + CODEPATCH_START + .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ + CODEPATCH_END(CPTAG_XRSTOR) +#if PCB_SAVEFPU != 0 + subq $PCB_SAVEFPU,%rdi +#endif +.Lsyscall_restore_fsbase_real: + movq PCB_FSBASE(%rdi),%rdx + movl %edx,%eax + shrq $32,%rdx + movl $MSR_FSBASE,%ecx + wrmsr + jmp .Lsyscall_restore_registers #ifdef DIAGNOSTIC .Lsyscall_spl_not_lowered: @@ -677,9 +728,10 @@ NENTRY(proc_trampoline) /* * Returning to userspace via iretq. We do things in this order: * - check for ASTs + * - restore FPU/"extended CPU state" if it's not already in the CPU * - DIAGNOSTIC: no more C calls after this, so check the SPL - * - restore most registers * - restore FS.base if it's not already in the CPU + * - restore most registers * - update the iret frame from the trapframe * - finish reading from the trapframe * - switch to the trampoline stack \ @@ -706,14 +758,22 @@ NENTRY(intr_user_exit) cli jmp intr_user_exit - .global intr_user_exit_post_ast intr_user_exit_post_ast: + /* Restore FPU/"extended CPU state" if it's not already in the CPU */ + testl $CPUF_USERXSTATE,CPUVAR(FLAGS) + jz .Lintr_restore_xstate + #ifdef DIAGNOSTIC /* no more C calls after this, so check the SPL */ cmpl $0,CPUVAR(ILEVEL) jne .Luser_spl_not_lowered #endif /* DIAGNOSTIC */ + /* Restore FS.base if it's not already in the CPU */ + testl $CPUF_USERSEGS,CPUVAR(FLAGS) + jz .Lintr_restore_fsbase + +.Lintr_restore_registers: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_R8(%rsp),%r8 @@ -726,16 +786,6 @@ intr_user_exit_post_ast: movq TF_RBP(%rsp),%rbp movq TF_RBX(%rsp),%rbx - /* restore FS.base if it's not already in the CPU */ - btsl $CPUF_USERSEGS_BIT,CPUVAR(FLAGS) - jc 99f - movq CPUVAR(CURPCB),%rdx /* for below */ - movq PCB_FSBASE(%rdx),%rax - movq %rax,%rdx - shrq $32,%rdx - movl $MSR_FSBASE,%ecx - wrmsr -99: /* * To get the final value for the register that was used * for the mov to %cr3, we need access to somewhere accessible @@ -774,8 +824,45 @@ KUENTRY(iretq_tramp) _C_LABEL(doreti_iret): iretq -#ifdef DIAGNOSTIC .text + .align 16,0xcc +.Lintr_restore_xstate: /* CPU doesn't have curproc's xstate */ + orl $CPUF_USERXSTATE,CPUVAR(FLAGS) + movq CPUVAR(CURPCB),%rdi +#if PCB_SAVEFPU != 0 + addq $PCB_SAVEFPU,%rdi +#endif + movq xsave_mask(%rip),%rsi + call xrstor_user + testl %eax,%eax + jnz .Lintr_xrstor_faulted +.Lintr_restore_fsbase: /* CPU doesn't have curproc's FS.base */ + orl $CPUF_USERSEGS,CPUVAR(FLAGS) + movq CPUVAR(CURPCB),%rdx + movq PCB_FSBASE(%rdx),%rdx + movl %edx,%eax + shrq $32,%rdx + movl $MSR_FSBASE,%ecx + wrmsr + jmp .Lintr_restore_registers + +.Lintr_xrstor_faulted: + /* + * xrstor faulted; we need to reset the FPU state and call trap() + * to post a signal, which requires interrupts be enabled. + */ + sti + movq proc0paddr(%rip),%rdi +#if PCB_SAVEFPU != 0 + addq $PCB_SAVEFPU,%rdi +#endif + CODEPATCH_START + .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ + CODEPATCH_END(CPTAG_XRSTOR) + movq $T_PROTFLT,TF_TRAPNO(%rsp) + jmp recall_trap + +#ifdef DIAGNOSTIC .Lintr_user_exit_not_blocked: movl warn_once(%rip),%edi testl %edi,%edi @@ -909,18 +996,71 @@ warn_once: .text #endif +/* + * FPU/"extended CPU state" handling + * int xrstor_user(sfp, mask) + * load given state, returns 0/1 if okay/it trapped + * void fpusave(sfp) + * save current state, but retain it in the FPU + * void fpusavereset(sfp) + * save current state and reset FPU to initial/kernel state + */ + ENTRY(xrstor_user) movq %rsi, %rdx movl %esi, %eax shrq $32, %rdx .globl xrstor_fault xrstor_fault: - xrstor (%rdi) + CODEPATCH_START + .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ + CODEPATCH_END(CPTAG_XRSTOR) xorl %eax, %eax ret NENTRY(xrstor_resume) movl $1, %eax ret +END(xrstor_user) + +ENTRY(fpusave) + movq xsave_mask(%rip),%rdx + movl %edx,%eax + shrq $32,%rdx + CODEPATCH_START + .byte 0x48; fxsave (%rdi) /* really fxsave64 */ + CODEPATCH_END(CPTAG_XSAVE) + ret +END(fpusave) + +ENTRY(fpusavereset) + movq xsave_mask(%rip),%rdx + movl %edx,%eax + shrq $32,%rdx + CODEPATCH_START + .byte 0x48; fxsave (%rdi) /* really fxsave64 */ + CODEPATCH_END(CPTAG_XSAVE) + movq proc0paddr(%rip),%rdi +#if PCB_SAVEFPU != 0 + addq $PCB_SAVEFPU,%rdi +#endif + CODEPATCH_START + .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */ + CODEPATCH_END(CPTAG_XRSTOR) + ret +END(fpusavereset) + + .section .rodata + .globl _C_LABEL(_xrstor) +_C_LABEL(_xrstor): + .byte 0x48; xrstor (%rdi) /* really xrstor64 */ + + .globl _C_LABEL(_xsave) +_C_LABEL(_xsave): + .byte 0x48; xsave (%rdi) /* really xsave64 */ + + .globl _C_LABEL(_xsaveopt) +_C_LABEL(_xsaveopt): + .byte 0x48; xsaveopt (%rdi) /* really xsaveopt64 */ ENTRY(pagezero) movq $-PAGE_SIZE,%rdx diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index c336de2bd07..8400402779d 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.244 2018/05/26 18:02:01 guenther Exp $ */ +/* $OpenBSD: machdep.c,v 1.245 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */ /*- @@ -192,7 +192,7 @@ paddr_t tramp_pdirpa; int kbd_reset; int lid_action = 1; -int forceukbd; +int forceukbd = 1; /* * safepri is a safe priority for sleep to set for a spin-wait @@ -386,7 +386,6 @@ x86_64_proc0_tss_ldt_init(void) struct pcb *pcb; cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb; - pcb->pcb_cr0 = rcr0(); pcb->pcb_fsbase = 0; pcb->pcb_kstack = (u_int64_t)proc0.p_addr + USPACE - 16; proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1; @@ -562,6 +561,7 @@ sendsig(sig_t catcher, int sig, int mask, u_long code, int type, struct trapframe *tf = p->p_md.md_regs; struct sigacts *psp = p->p_p->ps_sigacts; struct sigcontext ksc; + struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu; siginfo_t ksi; register_t sp, scp, sip; u_long sss; @@ -599,17 +599,19 @@ sendsig(sig_t catcher, int sig, int mask, u_long code, int type, sp &= ~15ULL; /* just in case */ sss = (sizeof(ksc) + 15) & ~15; - if (p->p_md.md_flags & MDP_USEDFPU) { - fpusave_proc(p, 1); - sp -= fpu_save_len; - ksc.sc_fpstate = (struct fxsave64 *)sp; - if (copyout(&p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, - (void *)sp, fpu_save_len)) - sigexit(p, SIGILL); - - /* Signal handlers get a completely clean FP state */ - p->p_md.md_flags &= ~MDP_USEDFPU; + /* Save FPU state to PCB if necessary, then copy it out */ + if (curcpu()->ci_flags & CPUF_USERXSTATE) { + curcpu()->ci_flags &= ~CPUF_USERXSTATE; + fpusavereset(&p->p_addr->u_pcb.pcb_savefpu); } + sp -= fpu_save_len; + ksc.sc_fpstate = (struct fxsave64 *)sp; + if (copyout(sfp, (void *)sp, fpu_save_len)) + sigexit(p, SIGILL); + + /* Now reset the FPU state in PCB */ + memcpy(&p->p_addr->u_pcb.pcb_savefpu, + &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len); sip = 0; if (psp->ps_siginfo & sigmask(sig)) { @@ -639,6 +641,9 @@ sendsig(sig_t catcher, int sig, int mask, u_long code, int type, tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC); tf->tf_rsp = scp; tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); + + /* The reset state _is_ the userspace state for this thread now */ + curcpu()->ci_flags |= CPUF_USERXSTATE; } /* @@ -683,16 +688,23 @@ sys_sigreturn(struct proc *p, void *v, register_t *retval) !USERMODE(ksc.sc_cs, ksc.sc_eflags)) return (EINVAL); - if (p->p_md.md_flags & MDP_USEDFPU) - fpusave_proc(p, 0); + /* Current state is obsolete; toss it and force a reload */ + if (curcpu()->ci_flags & CPUF_USERXSTATE) { + curcpu()->ci_flags &= ~CPUF_USERXSTATE; + fpureset(); + } - if (ksc.sc_fpstate) { + /* Copy in the FPU state to restore */ + if (__predict_true(ksc.sc_fpstate != NULL)) { struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave; if ((error = copyin(ksc.sc_fpstate, fx, fpu_save_len))) return (error); fx->fx_mxcsr &= fpu_mxcsr_mask; - p->p_md.md_flags |= MDP_USEDFPU; + } else { + /* shouldn't happen, but handle it */ + memcpy(&p->p_addr->u_pcb.pcb_savefpu, + &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len); } tf->tf_rdi = ksc.sc_rdi; @@ -726,6 +738,7 @@ sys_sigreturn(struct proc *p, void *v, register_t *retval) * when a signal was being delivered, the process will be * completely restored, including the userland %rcx and %r11 * registers which the 'sysretq' instruction cannot restore. + * Also need to make sure we can handle faulting on xrstor. */ p->p_md.md_flags |= MDP_IRET; @@ -1111,10 +1124,19 @@ setregs(struct proc *p, struct exec_package *pack, u_long stack, { struct trapframe *tf; - /* If we were using the FPU, forget about it. */ - if (p->p_addr->u_pcb.pcb_fpcpu != NULL) - fpusave_proc(p, 0); - p->p_md.md_flags &= ~MDP_USEDFPU; + /* Reset FPU state in PCB */ + memcpy(&p->p_addr->u_pcb.pcb_savefpu, + &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len); + + if (curcpu()->ci_flags & CPUF_USERXSTATE) { + /* state in CPU is obsolete; reset it */ + fpureset(); + } else { + /* the reset state _is_ the userspace state now */ + curcpu()->ci_flags |= CPUF_USERXSTATE; + } + + /* To reset all registers we have to return via iretq */ p->p_md.md_flags |= MDP_IRET; reset_segs(); diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S index d84a4571afc..aa813186150 100644 --- a/sys/arch/amd64/amd64/mptramp.S +++ b/sys/arch/amd64/amd64/mptramp.S @@ -1,4 +1,4 @@ -/* $OpenBSD: mptramp.S,v 1.16 2018/05/22 15:55:30 guenther Exp $ */ +/* $OpenBSD: mptramp.S,v 1.17 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: mptramp.S,v 1.1 2003/04/26 18:39:30 fvdl Exp $ */ /*- @@ -230,7 +230,7 @@ _C_LABEL(cpu_spinup_trampoline_end): #end of code copied to MP_TRAMPOLINE /* Switch address space. */ movq PCB_CR3(%rsi),%rax movq %rax,%cr3 - movl PCB_CR0(%rsi),%eax + movl $CR0_DEFAULT,%eax movq %rax,%cr0 call _C_LABEL(cpu_hatch) /* NOTREACHED */ diff --git a/sys/arch/amd64/amd64/process_machdep.c b/sys/arch/amd64/amd64/process_machdep.c index 42eff548aae..e61409c3b4d 100644 --- a/sys/arch/amd64/amd64/process_machdep.c +++ b/sys/arch/amd64/amd64/process_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: process_machdep.c,v 1.15 2017/10/14 04:44:43 jsg Exp $ */ +/* $OpenBSD: process_machdep.c,v 1.16 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: process_machdep.c,v 1.1 2003/04/26 18:39:31 fvdl Exp $ */ /*- @@ -122,19 +122,6 @@ process_read_fpregs(struct proc *p, struct fpreg *regs) { struct fxsave64 *frame = process_fpframe(p); - if (p->p_md.md_flags & MDP_USEDFPU) { - fpusave_proc(p, 1); - } else { - /* Fake a FNINIT. */ - memset(frame, 0, sizeof(*regs)); - frame->fx_fcw = __INITIAL_NPXCW__; - frame->fx_fsw = 0x0000; - frame->fx_ftw = 0x00; - frame->fx_mxcsr = __INITIAL_MXCSR__; - frame->fx_mxcsr_mask = fpu_mxcsr_mask; - p->p_md.md_flags |= MDP_USEDFPU; - } - memcpy(®s->fxstate, frame, sizeof(*regs)); return (0); } @@ -184,14 +171,11 @@ process_write_fpregs(struct proc *p, struct fpreg *regs) { struct fxsave64 *frame = process_fpframe(p); - if (p->p_md.md_flags & MDP_USEDFPU) { - fpusave_proc(p, 0); - } else { - p->p_md.md_flags |= MDP_USEDFPU; - } - memcpy(frame, ®s->fxstate, sizeof(*regs)); frame->fx_mxcsr &= fpu_mxcsr_mask; + + /* force target to return via iretq so bogus xstate can be handled */ + p->p_md.md_flags |= MDP_IRET; return (0); } diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index e1febc2622c..2cc00a2429f 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.68 2018/05/13 22:01:13 guenther Exp $ */ +/* $OpenBSD: trap.c,v 1.69 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */ /*- @@ -224,6 +224,7 @@ copyfault: case T_FPOPFLT|T_USER: /* impossible without 32bit compat */ case T_BOUND|T_USER: case T_OFLOW|T_USER: + case T_DNA|T_USER: panic("impossible trap"); case T_DIVIDE|T_USER: sv.sival_ptr = (void *)frame->tf_rip; diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index a4c337971f8..a5cb2d5e835 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.S,v 1.59 2018/06/05 05:04:31 guenther Exp $ */ +/* $OpenBSD: vector.S,v 1.60 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */ /* @@ -179,17 +179,7 @@ IDTVEC(trap05) IDTVEC(trap06) ZTRAP(T_PRIVINFLT) IDTVEC(trap07) - pushq $0 # dummy error code - pushq $T_DNA - INTRENTRY(trap07) - sti - cld - SMAP_CLAC - movq CPUVAR(SELF),%rdi - movq %rsp, %rsi - call _C_LABEL(fpudna) - cli - jmp intr_user_exit_post_ast + ZTRAP(T_DNA) # impossible: we don't do lazy FPU IDTVEC(trap08) pushq $T_DOUBLEFLT jmp calltrap_specstk @@ -321,6 +311,8 @@ KUENTRY(alltraps) jz 2f .Lreal_trap: #endif /* !defined(GPROF) && defined(DDBPROF) */ + .globl recall_trap +recall_trap: movq %rsp, %rdi call _C_LABEL(trap) 2: /* Check for ASTs on exit to user mode. */ diff --git a/sys/arch/amd64/amd64/via.c b/sys/arch/amd64/amd64/via.c index a27852922a0..7f7035a23f8 100644 --- a/sys/arch/amd64/amd64/via.c +++ b/sys/arch/amd64/amd64/via.c @@ -1,4 +1,4 @@ -/* $OpenBSD: via.c,v 1.31 2018/06/01 14:23:48 fcambus Exp $ */ +/* $OpenBSD: via.c,v 1.32 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -306,18 +306,11 @@ static __inline void viac3_cbc(void *cw, void *src, void *dst, void *key, int rep, void *iv) { - unsigned int creg0; - - creg0 = rcr0(); /* Permit access to SIMD/FPU path */ - lcr0(creg0 & ~(CR0_EM|CR0_TS)); - /* Do the deed */ __asm volatile("pushfq; popfq"); __asm volatile("rep xcryptcbc" : : "b" (key), "a" (iv), "c" (rep), "d" (cw), "S" (src), "D" (dst) : "memory", "cc"); - - lcr0(creg0); } int @@ -507,14 +500,8 @@ void viac3_rnd(void *v) { struct timeout *tmo = v; - unsigned int *p, i, rv, creg0, len = VIAC3_RNG_BUFSIZ; + unsigned int *p, i, rv, len = VIAC3_RNG_BUFSIZ; static int buffer[VIAC3_RNG_BUFSIZ + 2]; /* XXX why + 2? */ -#ifdef MULTIPROCESSOR - int s = splipi(); -#endif - - creg0 = rcr0(); /* Permit access to SIMD/FPU path */ - lcr0(creg0 & ~(CR0_EM|CR0_TS)); /* * Here we collect the random data from the VIA C3 RNG. We make @@ -525,12 +512,6 @@ viac3_rnd(void *v) : "=a" (rv) : "d" (3), "D" (buffer), "c" (len*sizeof(int)) : "memory", "cc"); - lcr0(creg0); - -#ifdef MULTIPROCESSOR - splx(s); -#endif - for (i = 0, p = buffer; i < VIAC3_RNG_BUFSIZ; i++, p++) enqueue_randomness(*p); diff --git a/sys/arch/amd64/amd64/vm_machdep.c b/sys/arch/amd64/amd64/vm_machdep.c index 58a23136a49..fea5d268eca 100644 --- a/sys/arch/amd64/amd64/vm_machdep.c +++ b/sys/arch/amd64/amd64/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.41 2017/10/14 04:44:43 jsg Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.42 2018/06/05 06:39:10 guenther Exp $ */ /* $NetBSD: vm_machdep.c,v 1.1 2003/04/26 18:39:33 fvdl Exp $ */ /*- @@ -66,19 +66,12 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, void (*func)(void *), void *arg) { struct pcb *pcb = &p2->p_addr->u_pcb; + struct pcb *pcb1 = &p1->p_addr->u_pcb; struct trapframe *tf; struct switchframe *sf; - /* - * If fpuproc != p1, then the fpu h/w state is irrelevant and the - * state had better already be in the pcb. This is true for forks - * but not for dumps. - * - * If fpuproc == p1, then we have to save the fpu h/w state to - * p1's pcb so that we can copy it. - */ - if (p1->p_addr->u_pcb.pcb_fpcpu != NULL) - fpusave_proc(p1, 1); + /* Save the fpu h/w state to p1's pcb so that we can copy it. */ + fpusave(&pcb1->pcb_savefpu); p2->p_md.md_flags = p1->p_md.md_flags; @@ -86,7 +79,7 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, if (p1 != curproc && p1 != &proc0) panic("cpu_fork: curproc"); #endif - *pcb = p1->p_addr->u_pcb; + *pcb = *pcb1; /* * Activate the address space. @@ -130,11 +123,6 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, void cpu_exit(struct proc *p) { - - /* If we were using the FPU, forget about it. */ - if (p->p_addr->u_pcb.pcb_fpcpu != NULL) - fpusave_proc(p, 0); - pmap_deactivate(p); sched_exit(p); } diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c index e6bc703ef65..56e08db99d5 100644 --- a/sys/arch/amd64/amd64/vmm.c +++ b/sys/arch/amd64/amd64/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.199 2018/05/22 06:33:35 guenther Exp $ */ +/* $OpenBSD: vmm.c,v 1.200 2018/06/05 06:39:10 guenther Exp $ */ /* * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> * @@ -3818,60 +3818,33 @@ vcpu_must_stop(struct vcpu *vcpu) int vmm_fpurestore(struct vcpu *vcpu) { - struct proc *p; struct cpu_info *ci = curcpu(); - clts(); - p = ci->ci_fpcurproc; - if (p != NULL) { - uvmexp.fpswtch++; - - if (ci->ci_fpsaving != 0) - panic("%s: recursive save!", __func__); - /* - * Set ci->ci_fpsaving, so that any pending exception will be - * thrown away. (It will be caught again if/when the FPU - * state is restored.) - */ - ci->ci_fpsaving = 1; - if (xsave_mask) - xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask); - else - fxsave(&p->p_addr->u_pcb.pcb_savefpu); - - ci->ci_fpsaving = 0; - - p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; - - p->p_addr->u_pcb.pcb_fpcpu = NULL; - ci->ci_fpcurproc = NULL; - } - - /* Initialize the guest FPU if not inited already */ - if (!vcpu->vc_fpuinited) { - fninit(); - bzero(&vcpu->vc_g_fpu.fp_fxsave, - sizeof(vcpu->vc_g_fpu.fp_fxsave)); - vcpu->vc_g_fpu.fp_fxsave.fx_fcw = __INITIAL_NPXCW__; - vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; - fxrstor(&vcpu->vc_g_fpu.fp_fxsave); - vcpu->vc_fpuinited = 1; + /* save vmmd's FPU state if we haven't already */ + if (ci->ci_flags & CPUF_USERXSTATE) { + ci->ci_flags &= ~CPUF_USERXSTATE; + fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu); } - if (xsave_mask) { + if (vcpu->vc_fpuinited) { /* Restore guest XCR0 and FPU context */ if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) { - DPRINTF("%s: guest attempted to set invalid " - "bits in xcr0\n", __func__); - stts(); + DPRINTF("%s: guest attempted to set invalid %s\n" + __func__, "bits in xcr0"); + return EINVAL; + } + + if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) { + DPRINTF("%s: guest attempted to set invalid %s\n" + __func__, "xsave/xrstor state"); return EINVAL; } + } + if (xsave_mask) { /* Restore guest %xcr0 */ - xrstor(&vcpu->vc_g_fpu, xsave_mask); xsetbv(0, vcpu->vc_gueststate.vg_xcr0); - } else - fxrstor(&vcpu->vc_g_fpu.fp_fxsave); + } return 0; } @@ -3890,20 +3863,14 @@ vmm_fpusave(struct vcpu *vcpu) /* Restore host %xcr0 */ xsetbv(0, xsave_mask); - - /* - * Save full copy of FPU state - guest content is always - * a subset of host's save area (see xsetbv exit handler) - */ - xsave(&vcpu->vc_g_fpu, xsave_mask); - } else - fxsave(&vcpu->vc_g_fpu); + } /* - * FPU state is invalid, set CR0_TS to force DNA trap on next - * access. - */ - stts(); + * Save full copy of FPU state - guest content is always + * a subset of host's save area (see xsetbv exit handler) + */ + fpusavereset(&vcpu->vc_g_fpu); + vcpu->vc_fpuinited = 1; } /* diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h index 185b5b73abc..159d9109451 100644 --- a/sys/arch/amd64/include/codepatch.h +++ b/sys/arch/amd64/include/codepatch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: codepatch.h,v 1.4 2017/08/25 19:28:48 guenther Exp $ */ +/* $OpenBSD: codepatch.h,v 1.5 2018/06/05 06:39:11 guenther Exp $ */ /* * Copyright (c) 2014-2015 Stefan Fritsch <sf@sfritsch.de> * @@ -50,6 +50,8 @@ void codepatch_call(uint16_t tag, void *func); #define CPTAG_STAC 1 #define CPTAG_CLAC 2 #define CPTAG_EOI 3 +#define CPTAG_XRSTOR 4 +#define CPTAG_XSAVE 5 /* * As stac/clac SMAP instructions are 3 bytes, we want the fastest diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index 70e2d4642f7..b9520284548 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.122 2018/05/26 18:02:01 guenther Exp $ */ +/* $OpenBSD: cpu.h,v 1.123 2018/06/05 06:39:11 guenther Exp $ */ /* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */ /*- @@ -115,10 +115,6 @@ struct cpu_info { u_int64_t ci_intr_rsp; /* U<-->K trampoline stack */ u_int64_t ci_user_cr3; /* U-K page table */ - struct proc *ci_fpcurproc; - struct proc *ci_fpsaveproc; - int ci_fpsaving; - struct pcb *ci_curpcb; struct pcb *ci_idle_pcb; @@ -215,9 +211,9 @@ struct cpu_info { #define CPUF_IDENTIFIED 0x0020 /* CPU has been identified */ #define CPUF_CONST_TSC 0x0040 /* CPU has constant TSC */ -#define CPUF_USERSEGS_BIT 7 /* CPU has curproc's segments */ -#define CPUF_USERSEGS (1<<CPUF_USERSEGS_BIT) /* and FS.base */ +#define CPUF_USERSEGS 0x0080 /* CPU has curproc's segs and FS.base */ #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */ +#define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */ #define CPUF_PRESENT 0x1000 /* CPU is present */ #define CPUF_RUNNING 0x2000 /* CPU is running */ diff --git a/sys/arch/amd64/include/fpu.h b/sys/arch/amd64/include/fpu.h index 96536774abc..78db1e413d3 100644 --- a/sys/arch/amd64/include/fpu.h +++ b/sys/arch/amd64/include/fpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fpu.h,v 1.13 2018/05/26 17:28:17 guenther Exp $ */ +/* $OpenBSD: fpu.h,v 1.14 2018/06/05 06:39:11 guenther Exp $ */ /* $NetBSD: fpu.h,v 1.1 2003/04/26 18:39:40 fvdl Exp $ */ #ifndef _MACHINE_FPU_H_ @@ -64,23 +64,22 @@ extern uint32_t fpu_mxcsr_mask; extern uint64_t xsave_mask; void fpuinit(struct cpu_info *); -void fpudrop(void); -void fpudiscard(struct proc *); void fputrap(struct trapframe *); -void fpusave_proc(struct proc *, int); -void fpusave_cpu(struct cpu_info *, int); +void fpusave(struct savefpu *); +void fpusavereset(struct savefpu *); void fpu_kernel_enter(void); void fpu_kernel_exit(void); +int xrstor_user(struct savefpu *_addr, uint64_t _mask); +#define fpureset() \ + xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask) + #define fninit() __asm("fninit") #define fwait() __asm("fwait") -#define fnclex() __asm("fnclex") +/* should be fxsave64, but where we use this it doesn't matter */ #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) -#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) #define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr)) #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) -#define clts() __asm("clts") -#define stts() lcr0(rcr0() | CR0_TS) static inline void xsave(struct savefpu *addr, uint64_t mask) @@ -89,20 +88,11 @@ xsave(struct savefpu *addr, uint64_t mask) lo = mask; hi = mask >> 32; + /* should be xsave64, but where we use this it doesn't matter */ __asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) : "memory"); } -static inline void -xrstor(struct savefpu *addr, uint64_t mask) -{ - uint32_t lo, hi; - - lo = mask; - hi = mask >> 32; - __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi)); -} - #endif #endif /* _MACHINE_FPU_H_ */ diff --git a/sys/arch/amd64/include/intrdefs.h b/sys/arch/amd64/include/intrdefs.h index f435c672b6e..ca1ec3bf84e 100644 --- a/sys/arch/amd64/include/intrdefs.h +++ b/sys/arch/amd64/include/intrdefs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: intrdefs.h,v 1.17 2018/01/13 15:18:11 mpi Exp $ */ +/* $OpenBSD: intrdefs.h,v 1.18 2018/06/05 06:39:11 guenther Exp $ */ /* $NetBSD: intrdefs.h,v 1.2 2003/05/04 22:01:56 fvdl Exp $ */ #ifndef _AMD64_INTRDEFS_H @@ -76,8 +76,6 @@ #define X86_IPI_HALT 0x00000001 #define X86_IPI_NOP 0x00000002 -#define X86_IPI_FLUSH_FPU 0x00000004 -#define X86_IPI_SYNCH_FPU 0x00000008 #define X86_IPI_TLB 0x00000010 #define X86_IPI_MTRR 0x00000020 #define X86_IPI_SETPERF 0x00000040 @@ -87,8 +85,8 @@ #define X86_NIPI 10 -#define X86_IPI_NAMES { "halt IPI", "nop IPI", "FPU flush IPI", \ - "FPU synch IPI", "TLB shootdown IPI", \ +#define X86_IPI_NAMES { "halt IPI", "nop IPI", NULL, \ + NULL, "TLB shootdown IPI", \ "MTRR update IPI", "setperf IPI", "ddb IPI", \ "VMM start IPI", "VMM stop IPI" } diff --git a/sys/arch/amd64/include/pcb.h b/sys/arch/amd64/include/pcb.h index a4a1d752d95..d974172cf0f 100644 --- a/sys/arch/amd64/include/pcb.h +++ b/sys/arch/amd64/include/pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pcb.h,v 1.16 2017/04/26 07:05:24 mlarkin Exp $ */ +/* $OpenBSD: pcb.h,v 1.17 2018/06/05 06:39:11 guenther Exp $ */ /* $NetBSD: pcb.h,v 1.1 2003/04/26 18:39:45 fvdl Exp $ */ /*- @@ -69,7 +69,6 @@ #include <sys/signal.h> -#include <machine/tss.h> #include <machine/fpu.h> /* @@ -84,9 +83,7 @@ struct pcb { u_int64_t pcb_kstack; /* kernel stack address */ u_int64_t pcb_fsbase; /* per-thread offset: %fs */ caddr_t pcb_onfault; /* copyin/out fault recovery */ - struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */ struct pmap *pcb_pmap; /* back pointer to our pmap */ - int pcb_cr0; /* saved image of CR0 */ }; #ifdef _KERNEL diff --git a/sys/arch/amd64/include/proc.h b/sys/arch/amd64/include/proc.h index 9cb65df8735..a2ca633f601 100644 --- a/sys/arch/amd64/include/proc.h +++ b/sys/arch/amd64/include/proc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proc.h,v 1.9 2017/04/13 03:52:25 guenther Exp $ */ +/* $OpenBSD: proc.h,v 1.10 2018/06/05 06:39:11 guenther Exp $ */ /* $NetBSD: proc.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */ /* @@ -46,7 +46,6 @@ struct mdproc { }; /* md_flags */ -#define MDP_USEDFPU 0x0001 /* has used the FPU */ #define MDP_IRET 0x0002 /* return via iret, not sysret */ /* (iret can restore r11 and rcx) */ diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h index 9a298597abf..f0933c7de99 100644 --- a/sys/arch/amd64/include/specialreg.h +++ b/sys/arch/amd64/include/specialreg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specialreg.h,v 1.72 2018/05/23 05:37:01 guenther Exp $ */ +/* $OpenBSD: specialreg.h,v 1.73 2018/06/05 06:39:11 guenther Exp $ */ /* $NetBSD: specialreg.h,v 1.1 2003/04/26 18:39:48 fvdl Exp $ */ /* $NetBSD: x86/specialreg.h,v 1.2 2003/04/25 21:54:30 fvdl Exp $ */ @@ -1400,5 +1400,5 @@ /* * Default cr0 and cr4 flags. */ -#define CR0_DEFAULT (CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP) +#define CR0_DEFAULT (CR0_PE|CR0_PG|CR0_NE|CR0_WP) #define CR4_DEFAULT (CR4_PAE|CR4_PGE|CR4_PSE|CR4_OSFXSR|CR4_OSXMMEXCPT) |