diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2012-10-31 03:30:23 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2012-10-31 03:30:23 +0000 |
commit | ae838b87cfb9ec31994d844f5c2d450b1402a0b6 (patch) | |
tree | 18a76cb557d5e7fa2354b3c06fc5f9b7bc1443a0 | |
parent | 07be4e2f33cfae1bb1e37a79b17512b65d630dec (diff) |
Add support for Intel's Supervisor Mode Access Prevention (SMAP) feature.
When enabled SMAP will generate page faults on the kernel attempting
to read/write user data pages unless an override flag is set.
Instructions that modify the flag are patched into copyin/copyout and
friends on boot if SMAP is enabled.
Those with access to hardware with SMAP can contact me for a test case.
joint work with deraadt@
ok miod@ deraadt@
-rw-r--r-- | sys/arch/amd64/amd64/copy.S | 43 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/cpu.c | 74 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/identcpu.c | 7 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/trap.c | 7 | ||||
-rw-r--r-- | sys/arch/i386/i386/cpu.c | 78 | ||||
-rw-r--r-- | sys/arch/i386/i386/locore.s | 48 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 8 | ||||
-rw-r--r-- | sys/arch/i386/i386/trap.c | 7 |
8 files changed, 264 insertions, 8 deletions
diff --git a/sys/arch/amd64/amd64/copy.S b/sys/arch/amd64/amd64/copy.S index 36ca5ad4dab..32e18fc5b56 100644 --- a/sys/arch/amd64/amd64/copy.S +++ b/sys/arch/amd64/amd64/copy.S @@ -1,4 +1,4 @@ -/* $OpenBSD: copy.S,v 1.4 2008/06/09 20:43:41 miod Exp $ */ +/* $OpenBSD: copy.S,v 1.5 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: copy.S,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ /* @@ -44,6 +44,17 @@ #include <machine/asm.h> /* + * As stac/clac SMAP instructions are 3 bytes, we want the fastest + * 3 byte nop sequence possible here. This will be replaced by + * stac/clac instructions if SMAP is detected after booting. + * + * This would be 'nop (%rax)' if binutils could cope. + * Intel documents multi-byte NOP sequences as being available + * on all family 0x6 and 0xf processors (ie 686+) + */ +#define SMAP_NOP .byte 0x0f, 0x1f, 0x00 + +/* * Copy routines from and to userland, plus a few more. See the * section 9 manpages for info. Some cases can be optimized more. * @@ -99,6 +110,7 @@ ENTRY(kcopy) xorq %rax,%rax ret +.globl _C_LABEL(_copyout_stac), _C_LABEL(_copyout_clac) ENTRY(copyout) pushq $0 @@ -115,6 +127,8 @@ ENTRY(copyout) movq CPUVAR(CURPCB),%rdx leaq _C_LABEL(copy_fault)(%rip),%r11 movq %r11,PCB_ONFAULT(%rdx) +_C_LABEL(_copyout_stac): + SMAP_NOP cld movq %rax,%rcx @@ -126,15 +140,20 @@ ENTRY(copyout) rep movsb +_C_LABEL(_copyout_clac): + SMAP_NOP popq PCB_ONFAULT(%rdx) xorl %eax,%eax ret +.globl _C_LABEL(_copyin_stac), _C_LABEL(_copyin_clac) ENTRY(copyin) movq CPUVAR(CURPCB),%rax pushq $0 leaq _C_LABEL(copy_fault)(%rip),%r11 movq %r11,PCB_ONFAULT(%rax) +_C_LABEL(_copyin_stac): + SMAP_NOP xchgq %rdi,%rsi movq %rdx,%rax @@ -157,6 +176,8 @@ ENTRY(copyin) rep movsb +_C_LABEL(_copyin_clac): + SMAP_NOP movq CPUVAR(CURPCB),%rdx popq PCB_ONFAULT(%rdx) xorl %eax,%eax @@ -165,11 +186,15 @@ ENTRY(copyin) NENTRY(copy_efault) movq $EFAULT,%rax +.globl _C_LABEL(_copy_fault_clac) NENTRY(copy_fault) +_C_LABEL(_copy_fault_clac): + SMAP_NOP movq CPUVAR(CURPCB),%rdx popq PCB_ONFAULT(%rdx) ret +.globl _C_LABEL(_copyoutstr_stac) ENTRY(copyoutstr) xchgq %rdi,%rsi movq %rdx,%r8 @@ -178,6 +203,8 @@ ENTRY(copyoutstr) 5: movq CPUVAR(CURPCB),%rax leaq _C_LABEL(copystr_fault)(%rip),%r11 movq %r11,PCB_ONFAULT(%rax) +_C_LABEL(_copyoutstr_stac): + SMAP_NOP /* * Get min(%rdx, VM_MAXUSER_ADDRESS-%rdi). */ @@ -211,6 +238,7 @@ ENTRY(copyoutstr) movq $ENAMETOOLONG,%rax jmp copystr_return +.globl _C_LABEL(_copyinstr_stac) ENTRY(copyinstr) xchgq %rdi,%rsi movq %rdx,%r8 @@ -219,6 +247,8 @@ ENTRY(copyinstr) movq CPUVAR(CURPCB),%rcx leaq _C_LABEL(copystr_fault)(%rip),%r11 movq %r11,PCB_ONFAULT(%rcx) +_C_LABEL(_copyinstr_stac): + SMAP_NOP /* * Get min(%rdx, VM_MAXUSER_ADDRESS-%rsi). @@ -256,8 +286,11 @@ ENTRY(copyinstr) ENTRY(copystr_efault) movl $EFAULT,%eax +.globl _C_LABEL(_copystr_fault_clac) ENTRY(copystr_fault) copystr_return: +_C_LABEL(_copystr_fault_clac): + SMAP_NOP /* Set *lencopied and return %eax. */ movq CPUVAR(CURPCB),%rcx movq $0,PCB_ONFAULT(%rcx) @@ -297,3 +330,11 @@ ENTRY(copystr) movq %r8,(%rcx) 7: ret + +.globl _C_LABEL(_stac) +_C_LABEL(_stac): + stac + +.globl _C_LABEL(_clac) +_C_LABEL(_clac): + clac diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index f931c80771c..0b843d32dad 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.52 2012/10/09 04:40:36 jsg Exp $ */ +/* $OpenBSD: cpu.c,v 1.53 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ /*- @@ -112,6 +112,74 @@ struct cpu_softc { struct cpu_info *sc_info; /* pointer to CPU info */ }; +#ifndef SMALL_KERNEL +void replacesmap(void); + +extern long _copyout_stac; +extern long _copyout_clac; +extern long _copyin_stac; +extern long _copyin_clac; +extern long _copy_fault_clac; +extern long _copyoutstr_stac; +extern long _copyinstr_stac; +extern long _copystr_fault_clac; +extern long _stac; +extern long _clac; + +static const struct { + void *daddr; + void *saddr; +} ireplace[] = { + { &_copyout_stac, &_stac }, + { &_copyout_clac, &_clac }, + { &_copyin_stac, &_stac }, + { &_copyin_clac, &_clac }, + { &_copy_fault_clac, &_clac }, + { &_copyoutstr_stac, &_stac }, + { &_copyinstr_stac, &_stac }, + { &_copystr_fault_clac, &_clac }, +}; + +void +replacesmap(void) +{ + static int replacedone = 0; + int i, s; + vaddr_t nva; + + if (replacedone) + return; + replacedone = 1; + + s = splhigh(); + /* + * Create writeable aliases of memory we need + * to write to as kernel is mapped read-only + */ + nva = uvm_km_valloc(kernel_map, 2); + + for (i = 0; i < nitems(ireplace); i++) { + paddr_t kva = trunc_page((paddr_t)ireplace[i].daddr); + paddr_t po = (paddr_t)ireplace[i].daddr & PAGE_MASK; + paddr_t pa1, pa2; + + pmap_extract(pmap_kernel(), kva, &pa1); + pmap_extract(pmap_kernel(), kva + PAGE_SIZE, &pa2); + pmap_kenter_pa(nva, pa1, VM_PROT_READ | VM_PROT_WRITE); + pmap_kenter_pa(nva + PAGE_SIZE, pa2, VM_PROT_READ | + VM_PROT_WRITE); + pmap_update(pmap_kernel()); + + /* replace 3 byte nops with stac/clac instructions */ + bcopy(ireplace[i].saddr, (void *)(nva + po), 3); + } + + uvm_km_free(kernel_map, nva, 2); + + splx(s); +} +#endif /* !SMALL_KERNEL */ + #ifdef MULTIPROCESSOR int mp_cpu_start(struct cpu_info *); void mp_cpu_start_cleanup(struct cpu_info *); @@ -377,6 +445,10 @@ cpu_init(struct cpu_info *ci) lcr0(rcr0() | CR0_WP); lcr4(rcr4() | CR4_DEFAULT | (ci->ci_feature_sefflags & SEFF0EBX_SMEP ? CR4_SMEP : 0)); +#ifndef SMALL_KERNEL + if (ci->ci_feature_sefflags & SEFF0EBX_SMAP) + lcr4(rcr4() | CR4_SMAP); +#endif #ifdef MULTIPROCESSOR ci->ci_flags |= CPUF_RUNNING; diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c index c597bb099f7..8812d0c26e1 100644 --- a/sys/arch/amd64/amd64/identcpu.c +++ b/sys/arch/amd64/amd64/identcpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: identcpu.c,v 1.41 2012/10/09 09:16:09 jsg Exp $ */ +/* $OpenBSD: identcpu.c,v 1.42 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ /* @@ -44,6 +44,8 @@ #include <machine/cpu.h> #include <machine/cpufunc.h> +void replacesmap(void); + /* sysctl wants this. */ char cpu_model[48]; int cpuspeed; @@ -445,6 +447,9 @@ identifycpu(struct cpu_info *ci) if (cpu_ecxfeature & CPUIDECX_RDRAND) has_rdrand = 1; + + if (ci->ci_feature_sefflags & SEFF0EBX_SMAP) + replacesmap(); } if (!strncmp(mycpu_model, "Intel", 5)) { u_int32_t cflushsz; diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index 27343ddca33..6bbeda1b2e8 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.30 2012/10/09 04:40:36 jsg Exp $ */ +/* $OpenBSD: trap.c,v 1.31 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */ /*- @@ -324,6 +324,11 @@ copyfault: if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I) panic("attempt to execute user address %p " "in supervisor mode", (void *)cr2); + /* This will only trigger if SMAP is enabled */ + if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS && + frame->tf_err & PGEX_P) + panic("attempt to access user address %p " + "in supervisor mode", (void *)cr2); goto faultcommon; case T_PAGEFLT|T_USER: { /* page fault */ diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index 2640339e113..7334d685c76 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.46 2012/10/09 04:40:36 jsg Exp $ */ +/* $OpenBSD: cpu.c,v 1.47 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ /*- @@ -158,6 +158,78 @@ struct cfdriver cpu_cd = { NULL, "cpu", DV_DULL /* XXX DV_CPU */ }; +#ifndef SMALL_KERNEL +void replacesmap(void); + +extern int _copyout_stac; +extern int _copyout_clac; +extern int _copyin_stac; +extern int _copyin_clac; +extern int _copy_fault_clac; +extern int _copyoutstr_stac; +extern int _copyinstr_stac; +extern int _copystr_fault_clac; +extern int _ucas_32_stac; +extern int _ucas_32_clac; +extern int _stac; +extern int _clac; + +static const struct { + void *daddr; + void *saddr; +} ireplace[] = { + { &_copyout_stac, &_stac }, + { &_copyout_clac, &_clac }, + { &_copyin_stac, &_stac }, + { &_copyin_clac, &_clac }, + { &_copy_fault_clac, &_clac }, + { &_copyoutstr_stac, &_stac }, + { &_copyinstr_stac, &_stac }, + { &_copystr_fault_clac, &_clac }, + { &_ucas_32_stac, &_stac }, + { &_ucas_32_clac, &_clac }, +}; + +void +replacesmap(void) +{ + static int replacedone = 0; + int i, s; + vaddr_t nva; + + if (replacedone) + return; + replacedone = 1; + + s = splhigh(); + /* + * Create writeable aliases of memory we need + * to write to as kernel is mapped read-only + */ + nva = uvm_km_valloc(kernel_map, 2); + + for (i = 0; i < nitems(ireplace); i++) { + paddr_t kva = trunc_page((paddr_t)ireplace[i].daddr); + paddr_t po = (paddr_t)ireplace[i].daddr & PAGE_MASK; + paddr_t pa1, pa2; + + pmap_extract(pmap_kernel(), kva, &pa1); + pmap_extract(pmap_kernel(), kva + PAGE_SIZE, &pa2); + pmap_kenter_pa(nva, pa1, VM_PROT_READ | VM_PROT_WRITE); + pmap_kenter_pa(nva + PAGE_SIZE, pa2, VM_PROT_READ | + VM_PROT_WRITE); + pmap_update(pmap_kernel()); + + /* replace 3 byte nops with stac/clac instructions */ + bcopy(ireplace[i].saddr, (void *)(nva + po), 3); + } + + uvm_km_free(kernel_map, nva, 2); + + splx(s); +} +#endif /* !SMALL_KERNEL */ + int cpu_match(struct device *parent, void *match, void *aux) { @@ -335,6 +407,10 @@ cpu_init(struct cpu_info *ci) if (ci->ci_feature_sefflags & SEFF0EBX_SMEP) lcr4(rcr4() | CR4_SMEP); +#ifndef SMALL_KERNEL + if (ci->ci_feature_sefflags & SEFF0EBX_SMAP) + lcr4(rcr4() | CR4_SMAP); +#endif #ifdef MULTIPROCESSOR ci->ci_flags |= CPUF_RUNNING; diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index e07b5993701..fccbe7e1a75 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.142 2012/09/25 09:58:57 pirofti Exp $ */ +/* $OpenBSD: locore.s,v 1.143 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -65,6 +65,17 @@ #endif /* + * As stac/clac SMAP instructions are 3 bytes, we want the fastest + * 3 byte nop sequence possible here. This will be replaced by + * stac/clac instructions if SMAP is detected after booting. + * + * Intel documents multi-byte NOP sequences as being available + * on all family 0x6 and 0xf processors (ie 686+) + * So use 3 of the single byte nops for compatibility + */ +#define SMAP_NOP .byte 0x90, 0x90, 0x90 + +/* * override user-land alignment before including asm.h */ @@ -817,6 +828,7 @@ ENTRY(memcpy) * copyout(caddr_t from, caddr_t to, size_t len); * Copy len bytes into the user's address space. */ +.globl _C_LABEL(_copyout_stac), _C_LABEL(_copyout_clac) ENTRY(copyout) #ifdef DDB pushl %ebp @@ -845,6 +857,8 @@ ENTRY(copyout) GET_CURPCB(%edx) movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) +_C_LABEL(_copyout_stac): + SMAP_NOP /* bcopy(%esi, %edi, %eax); */ cld @@ -857,6 +871,8 @@ ENTRY(copyout) rep movsb +_C_LABEL(_copyout_clac): + SMAP_NOP popl PCB_ONFAULT(%edx) popl %edi popl %esi @@ -870,6 +886,7 @@ ENTRY(copyout) * copyin(caddr_t from, caddr_t to, size_t len); * Copy len bytes from the user's address space. */ +.globl _C_LABEL(_copyin_stac), _C_LABEL(_copyin_clac) ENTRY(copyin) #ifdef DDB pushl %ebp @@ -880,6 +897,8 @@ ENTRY(copyin) GET_CURPCB(%eax) pushl $0 movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax) +_C_LABEL(_copyin_stac): + SMAP_NOP movl 16+FPADD(%esp),%esi movl 20+FPADD(%esp),%edi @@ -907,6 +926,8 @@ ENTRY(copyin) rep movsb +_C_LABEL(_copyin_clac): + SMAP_NOP GET_CURPCB(%edx) popl PCB_ONFAULT(%edx) popl %edi @@ -917,7 +938,10 @@ ENTRY(copyin) #endif ret +.globl _C_LABEL(_copy_fault_clac) ENTRY(copy_fault) +_C_LABEL(_copy_fault_clac): + SMAP_NOP GET_CURPCB(%edx) popl PCB_ONFAULT(%edx) popl %edi @@ -935,6 +959,7 @@ ENTRY(copy_fault) * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else * return 0 or EFAULT. */ +.globl _C_LABEL(_copyoutstr_stac) ENTRY(copyoutstr) #ifdef DDB pushl %ebp @@ -949,6 +974,8 @@ ENTRY(copyoutstr) 5: GET_CURPCB(%eax) movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax) +_C_LABEL(_copyoutstr_stac): + SMAP_NOP /* * Get min(%edx, VM_MAXUSER_ADDRESS-%edi). */ @@ -991,6 +1018,7 @@ ENTRY(copyoutstr) * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else * return 0 or EFAULT. */ +.globl _C_LABEL(_copyinstr_stac) ENTRY(copyinstr) #ifdef DDB pushl %ebp @@ -1000,6 +1028,8 @@ ENTRY(copyinstr) pushl %edi GET_CURPCB(%ecx) movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) +_C_LABEL(_copyinstr_stac): + SMAP_NOP movl 12+FPADD(%esp),%esi # %esi = from movl 16+FPADD(%esp),%edi # %edi = to @@ -1039,10 +1069,13 @@ ENTRY(copyinstr) movl $ENAMETOOLONG,%eax jmp copystr_return +.globl _C_LABEL(_copystr_fault_clac) ENTRY(copystr_fault) movl $EFAULT,%eax copystr_return: +_C_LABEL(_copystr_fault_clac): + SMAP_NOP /* Set *lencopied and return %eax. */ GET_CURPCB(%ecx) movl $0,PCB_ONFAULT(%ecx) @@ -1654,6 +1687,7 @@ ENTRY(i686_pagezero) /* * ucas_32(volatile int32_t *uptr, int32_t old, int32_t new); */ +.global _C_LABEL(_ucas_32_stac), _C_LABEL(_ucas_32_clac) ENTRY(ucas_32) #ifdef DDB pushl %ebp @@ -1672,10 +1706,14 @@ ENTRY(ucas_32) GET_CURPCB(%edx) movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) +_C_LABEL(_ucas_32_stac): + SMAP_NOP lock cmpxchgl %edi, (%esi) +_C_LABEL(_ucas_32_clac): + SMAP_NOP popl PCB_ONFAULT(%edx) popl %edi popl %esi @@ -1690,3 +1728,11 @@ ENTRY(ucas_32) #endif #include <i386/i386/mutex.S> + +.globl _C_LABEL(_stac) +_C_LABEL(_stac): + stac + +.globl _C_LABEL(_clac) +_C_LABEL(_clac): + clac diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 2c11ebedf5a..7795286ea43 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.515 2012/10/09 09:16:09 jsg Exp $ */ +/* $OpenBSD: machdep.c,v 1.516 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -165,6 +165,8 @@ extern struct proc *npxproc; #include <dev/ic/comvar.h> #endif /* NCOM > 0 */ +void replacesmap(void); + /* the following is used externally (sysctl_hw) */ char machine[] = MACHINE; @@ -1921,6 +1923,10 @@ identifycpu(struct cpu_info *ci) if (ci->ci_flags & CPUF_PRIMARY) { if (cpu_ecxfeature & CPUIDECX_RDRAND) has_rdrand = 1; +#ifndef SMALL_KERNEL + if (ci->ci_feature_sefflags & SEFF0EBX_SMAP) + replacesmap(); +#endif } #ifndef SMALL_KERNEL diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c index 9bff6910df9..1da69bf5848 100644 --- a/sys/arch/i386/i386/trap.c +++ b/sys/arch/i386/i386/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.105 2012/10/09 04:40:36 jsg Exp $ */ +/* $OpenBSD: trap.c,v 1.106 2012/10/31 03:30:22 jsg Exp $ */ /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */ /*- @@ -407,6 +407,11 @@ trap(struct trapframe *frame) if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I) panic("attempt to execute user address %p " "in supervisor mode", (void *)cr2); + /* This will only trigger if SMAP is enabled */ + if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS && + frame->tf_err & PGEX_P) + panic("attempt to access user address %p " + "in supervisor mode", (void *)cr2); goto faultcommon; case T_PAGEFLT|T_USER: { /* page fault */ |