diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2023-01-20 16:01:05 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2023-01-20 16:01:05 +0000 |
commit | 64eda274b19d1687a786d431c866b2ab12f39bce (patch) | |
tree | 4263d1983e6c5a31450b6e36924d37aede035f5e | |
parent | 73e5f6974865e5906ed4dc82dbeff3a360557272 (diff) |
On cpu with the PKU feature, prot=PROT_EXEC pages now create pte which
contain PG_XO, which is PKU key1. On every exit from kernel to userland,
force the PKU register to inhibit data read against key1 memory. On
(some) traps into the kernel if the PKU register is changed, abort the
process (processes have no reason to change the PKU register). This
provides us with viable xonly functionality on most modern intel & AMD
cpus. I started with a xsave-based diff from dv@, but discovered the
fpu save/restore logic wasn't a good fit and went to direct register management.
Disabled on HV (vm) systems until we know they handle PKU correctly.
ok kettenis, dv, guenther, etc
-rw-r--r-- | sys/arch/amd64/amd64/cpu.c | 4 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/locore.S | 18 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/pmap.c | 24 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/trap.c | 22 | ||||
-rw-r--r-- | sys/arch/amd64/amd64/vector.S | 3 | ||||
-rw-r--r-- | sys/arch/amd64/include/cpufunc.h | 10 | ||||
-rw-r--r-- | sys/arch/amd64/include/pte.h | 3 |
7 files changed, 76 insertions, 8 deletions
diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 56c76df9333..38efd28f59b 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.163 2022/11/29 21:41:39 guenther Exp $ */ +/* $OpenBSD: cpu.c,v 1.164 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ /*- @@ -737,6 +737,8 @@ cpu_init(struct cpu_info *ci) cr4 |= CR4_UMIP; if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd) cr4 |= CR4_OSXSAVE; + if (pg_xo) + cr4 |= CR4_PKE; if (pmap_use_pcid) cr4 |= CR4_PCIDE; lcr4(cr4); diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 838b11e3e1f..50e6f07ca37 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.131 2022/12/01 00:26:15 guenther Exp $ */ +/* $OpenBSD: locore.S,v 1.132 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ /* @@ -597,6 +597,7 @@ IDTVEC_NOALIGN(syscall) jz .Lsyscall_restore_fsbase .Lsyscall_restore_registers: + call pku_xonly RET_STACK_REFILL_WITH_RCX movq TF_R8(%rsp),%r8 @@ -773,6 +774,7 @@ intr_user_exit_post_ast: jz .Lintr_restore_fsbase .Lintr_restore_registers: + call pku_xonly RET_STACK_REFILL_WITH_RCX movq TF_R8(%rsp),%r8 @@ -940,6 +942,7 @@ NENTRY(intr_fast_exit) testq $PSL_I,%rdx jnz .Lintr_exit_not_blocked #endif /* DIAGNOSTIC */ + call pku_xonly /* XXX guenther disapproves, but foo3 locks */ movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_R8(%rsp),%r8 @@ -1105,6 +1108,19 @@ ENTRY(pagezero) lfence END(pagezero) +/* void pku_xonly(void) */ +ENTRY(pku_xonly) + movq pg_xo,%rax /* have PKU support? */ + cmpq $0,%rax + je 1f + movl $0,%ecx /* force PKRU for xonly restriction */ + movl $0,%edx + movl $PGK_VALUE,%eax /* key0 normal, key1 is exec without read */ + wrpkru +1: ret + lfence +END(pku_xonly) + /* int rdmsr_safe(u_int msr, uint64_t *data) */ ENTRY(rdmsr_safe) RETGUARD_SETUP(rdmsr_safe, r10) diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index ea9b2c142e3..12b0e7c00f0 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.157 2023/01/19 20:17:11 kettenis Exp $ */ +/* $OpenBSD: pmap.c,v 1.158 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -232,6 +232,9 @@ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ pt_entry_t pg_nx = 0; pt_entry_t pg_g_kern = 0; +/* pg_nx: XO PTE bits, set to PKU key1 (if cpu supports PKU) */ +pt_entry_t pg_xo; + /* * pmap_pg_wc: if our processor supports PAT then we set this * to be the pte bits for Write Combining. Else we fall back to @@ -656,13 +659,28 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa) virtual_avail = kva_start; /* first free KVA */ /* + * If PKU is available, initialize PROT_EXEC entry correctly, + * and enable the feature before it gets used + * XXX Some Hypervisors forget to save/restore PKU + */ + if (cpuid_level >= 0x7) { + uint32_t ecx, dummy; + CPUID_LEAF(0x7, 0, dummy, dummy, ecx, dummy); + if ((ecx & SEFF0ECX_PKU) && + (cpu_ecxfeature & CPUIDECX_HV) == 0) { + lcr4(rcr4() | CR4_PKE); + pg_xo = PG_XO; + } + } + + /* * set up protection_codes: we need to be able to convert from * a MI protection code (some combo of VM_PROT...) to something * we can jam into a i386 PTE. */ protection_codes[PROT_NONE] = pg_nx; /* --- */ - protection_codes[PROT_EXEC] = PG_RO; /* --x */ + protection_codes[PROT_EXEC] = pg_xo; ; /* --x */ protection_codes[PROT_READ] = PG_RO | pg_nx; /* -r- */ protection_codes[PROT_READ | PROT_EXEC] = PG_RO; /* -rx */ protection_codes[PROT_WRITE] = PG_RW | pg_nx; /* w-- */ @@ -2119,6 +2137,8 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) sva &= PG_FRAME; eva &= PG_FRAME; + if (!(prot & PROT_READ)) + set |= pg_xo; if (!(prot & PROT_WRITE)) clear = PG_RW; if (!(prot & PROT_EXEC)) diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index 09574bc2a39..33f73911ff8 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.95 2023/01/17 08:03:51 kettenis Exp $ */ +/* $OpenBSD: trap.c,v 1.96 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */ /*- @@ -132,6 +132,7 @@ static void trap_print(struct trapframe *, int _type); static inline void frame_dump(struct trapframe *_tf, struct proc *_p, const char *_sig, uint64_t _cr2); static inline void verify_smap(const char *_func); +static inline int verify_pkru(struct proc *); static inline void debug_trap(struct trapframe *_frame, struct proc *_p, long _type); @@ -357,6 +358,17 @@ kerntrap(struct trapframe *frame) } } +/* If we find out userland changed the pkru register, punish the process */ +static inline int +verify_pkru(struct proc *p) +{ + if (pg_xo == 0 || rdpkru(0) == PGK_VALUE) + return 0; + KERNEL_LOCK(); + sigabort(p); + KERNEL_UNLOCK(); + return 1; +} /* * usertrap(frame): handler for exceptions, faults, and traps from userspace @@ -380,6 +392,9 @@ usertrap(struct trapframe *frame) p->p_md.md_regs = frame; refreshcreds(p); + if (verify_pkru(p)) + goto out; + switch (type) { case T_TSSFLT: sig = SIGBUS; @@ -548,6 +563,11 @@ syscall(struct trapframe *frame) uvmexp.syscalls++; p = curproc; + if (verify_pkru(p)) { + userret(p); + return; + } + code = frame->tf_rax; argp = &args[0]; argoff = 0; diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index 160a267c09a..c0e2a76a405 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.S,v 1.87 2022/12/01 00:26:15 guenther Exp $ */ +/* $OpenBSD: vector.S,v 1.88 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */ /* @@ -149,6 +149,7 @@ INTRENTRY_LABEL(calltrap_specstk): movq %r12,%rax movq %r13,%rdx wrmsr + call pku_xonly popq %rdi popq %rsi popq %rdx diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h index 2a8ddd34fd1..cd2d2db753d 100644 --- a/sys/arch/amd64/include/cpufunc.h +++ b/sys/arch/amd64/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.37 2022/09/22 04:57:08 robert Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.38 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: cpufunc.h,v 1.3 2003/05/08 10:27:43 fvdl Exp $ */ /*- @@ -232,6 +232,14 @@ rdmsr(u_int msr) return (((uint64_t)hi << 32) | (uint64_t) lo); } +static __inline int +rdpkru(u_int ecx) +{ + uint32_t edx, pkru; + asm volatile("rdpkru " : "=a" (pkru), "=d" (edx) : "c" (ecx)); + return pkru; +} + static __inline void wrmsr(u_int msr, u_int64_t newval) { diff --git a/sys/arch/amd64/include/pte.h b/sys/arch/amd64/include/pte.h index fc6f9cc4206..c2bd8793c7d 100644 --- a/sys/arch/amd64/include/pte.h +++ b/sys/arch/amd64/include/pte.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pte.h,v 1.16 2023/01/16 00:04:47 deraadt Exp $ */ +/* $OpenBSD: pte.h,v 1.17 2023/01/20 16:01:04 deraadt Exp $ */ /* $NetBSD: pte.h,v 1.1 2003/04/26 18:39:47 fvdl Exp $ */ /* @@ -162,6 +162,7 @@ typedef u_int64_t pt_entry_t; /* PTE */ #define PGEX_PK 0x20 /* protection-key violation */ #ifdef _KERNEL +extern pt_entry_t pg_xo; /* XO pte bits using PKU key1 */ extern pt_entry_t pg_nx; /* NX pte bit */ extern pt_entry_t pg_g_kern; /* PG_G if glbl mappings can be used in kern */ #endif /* _KERNEL */ |