summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>2023-01-20 16:01:05 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>2023-01-20 16:01:05 +0000
commit64eda274b19d1687a786d431c866b2ab12f39bce (patch)
tree4263d1983e6c5a31450b6e36924d37aede035f5e
parent73e5f6974865e5906ed4dc82dbeff3a360557272 (diff)
On cpu with the PKU feature, prot=PROT_EXEC pages now create pte which
contain PG_XO, which is PKU key1. On every exit from kernel to userland, force the PKU register to inhibit data read against key1 memory. On (some) traps into the kernel if the PKU register is changed, abort the process (processes have no reason to change the PKU register). This provides us with viable xonly functionality on most modern intel & AMD cpus. I started with a xsave-based diff from dv@, but discovered the fpu save/restore logic wasn't a good fit and went to direct register management. Disabled on HV (vm) systems until we know they handle PKU correctly. ok kettenis, dv, guenther, etc
-rw-r--r--sys/arch/amd64/amd64/cpu.c4
-rw-r--r--sys/arch/amd64/amd64/locore.S18
-rw-r--r--sys/arch/amd64/amd64/pmap.c24
-rw-r--r--sys/arch/amd64/amd64/trap.c22
-rw-r--r--sys/arch/amd64/amd64/vector.S3
-rw-r--r--sys/arch/amd64/include/cpufunc.h10
-rw-r--r--sys/arch/amd64/include/pte.h3
7 files changed, 76 insertions, 8 deletions
diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 56c76df9333..38efd28f59b 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.163 2022/11/29 21:41:39 guenther Exp $ */
+/* $OpenBSD: cpu.c,v 1.164 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */
/*-
@@ -737,6 +737,8 @@ cpu_init(struct cpu_info *ci)
cr4 |= CR4_UMIP;
if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd)
cr4 |= CR4_OSXSAVE;
+ if (pg_xo)
+ cr4 |= CR4_PKE;
if (pmap_use_pcid)
cr4 |= CR4_PCIDE;
lcr4(cr4);
diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S
index 838b11e3e1f..50e6f07ca37 100644
--- a/sys/arch/amd64/amd64/locore.S
+++ b/sys/arch/amd64/amd64/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.131 2022/12/01 00:26:15 guenther Exp $ */
+/* $OpenBSD: locore.S,v 1.132 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */
/*
@@ -597,6 +597,7 @@ IDTVEC_NOALIGN(syscall)
jz .Lsyscall_restore_fsbase
.Lsyscall_restore_registers:
+ call pku_xonly
RET_STACK_REFILL_WITH_RCX
movq TF_R8(%rsp),%r8
@@ -773,6 +774,7 @@ intr_user_exit_post_ast:
jz .Lintr_restore_fsbase
.Lintr_restore_registers:
+ call pku_xonly
RET_STACK_REFILL_WITH_RCX
movq TF_R8(%rsp),%r8
@@ -940,6 +942,7 @@ NENTRY(intr_fast_exit)
testq $PSL_I,%rdx
jnz .Lintr_exit_not_blocked
#endif /* DIAGNOSTIC */
+ call pku_xonly /* XXX guenther disapproves, but foo3 locks */
movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
movq TF_R8(%rsp),%r8
@@ -1105,6 +1108,19 @@ ENTRY(pagezero)
lfence
END(pagezero)
+/* void pku_xonly(void) */
+ENTRY(pku_xonly)
+ movq pg_xo,%rax /* have PKU support? */
+ cmpq $0,%rax
+ je 1f
+ movl $0,%ecx /* force PKRU for xonly restriction */
+ movl $0,%edx
+ movl $PGK_VALUE,%eax /* key0 normal, key1 is exec without read */
+ wrpkru
+1: ret
+ lfence
+END(pku_xonly)
+
/* int rdmsr_safe(u_int msr, uint64_t *data) */
ENTRY(rdmsr_safe)
RETGUARD_SETUP(rdmsr_safe, r10)
diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index ea9b2c142e3..12b0e7c00f0 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.157 2023/01/19 20:17:11 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.158 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */
/*
@@ -232,6 +232,9 @@ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
pt_entry_t pg_nx = 0;
pt_entry_t pg_g_kern = 0;
+/* pg_nx: XO PTE bits, set to PKU key1 (if cpu supports PKU) */
+pt_entry_t pg_xo;
+
/*
* pmap_pg_wc: if our processor supports PAT then we set this
* to be the pte bits for Write Combining. Else we fall back to
@@ -656,13 +659,28 @@ pmap_bootstrap(paddr_t first_avail, paddr_t max_pa)
virtual_avail = kva_start; /* first free KVA */
/*
+ * If PKU is available, initialize PROT_EXEC entry correctly,
+ * and enable the feature before it gets used
+ * XXX Some Hypervisors forget to save/restore PKU
+ */
+ if (cpuid_level >= 0x7) {
+ uint32_t ecx, dummy;
+ CPUID_LEAF(0x7, 0, dummy, dummy, ecx, dummy);
+ if ((ecx & SEFF0ECX_PKU) &&
+ (cpu_ecxfeature & CPUIDECX_HV) == 0) {
+ lcr4(rcr4() | CR4_PKE);
+ pg_xo = PG_XO;
+ }
+ }
+
+ /*
* set up protection_codes: we need to be able to convert from
* a MI protection code (some combo of VM_PROT...) to something
* we can jam into a i386 PTE.
*/
protection_codes[PROT_NONE] = pg_nx; /* --- */
- protection_codes[PROT_EXEC] = PG_RO; /* --x */
+ protection_codes[PROT_EXEC] = pg_xo; ; /* --x */
protection_codes[PROT_READ] = PG_RO | pg_nx; /* -r- */
protection_codes[PROT_READ | PROT_EXEC] = PG_RO; /* -rx */
protection_codes[PROT_WRITE] = PG_RW | pg_nx; /* w-- */
@@ -2119,6 +2137,8 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
sva &= PG_FRAME;
eva &= PG_FRAME;
+ if (!(prot & PROT_READ))
+ set |= pg_xo;
if (!(prot & PROT_WRITE))
clear = PG_RW;
if (!(prot & PROT_EXEC))
diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
index 09574bc2a39..33f73911ff8 100644
--- a/sys/arch/amd64/amd64/trap.c
+++ b/sys/arch/amd64/amd64/trap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: trap.c,v 1.95 2023/01/17 08:03:51 kettenis Exp $ */
+/* $OpenBSD: trap.c,v 1.96 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */
/*-
@@ -132,6 +132,7 @@ static void trap_print(struct trapframe *, int _type);
static inline void frame_dump(struct trapframe *_tf, struct proc *_p,
const char *_sig, uint64_t _cr2);
static inline void verify_smap(const char *_func);
+static inline int verify_pkru(struct proc *);
static inline void debug_trap(struct trapframe *_frame, struct proc *_p,
long _type);
@@ -357,6 +358,17 @@ kerntrap(struct trapframe *frame)
}
}
+/* If we find out userland changed the pkru register, punish the process */
+static inline int
+verify_pkru(struct proc *p)
+{
+ if (pg_xo == 0 || rdpkru(0) == PGK_VALUE)
+ return 0;
+ KERNEL_LOCK();
+ sigabort(p);
+ KERNEL_UNLOCK();
+ return 1;
+}
/*
* usertrap(frame): handler for exceptions, faults, and traps from userspace
@@ -380,6 +392,9 @@ usertrap(struct trapframe *frame)
p->p_md.md_regs = frame;
refreshcreds(p);
+ if (verify_pkru(p))
+ goto out;
+
switch (type) {
case T_TSSFLT:
sig = SIGBUS;
@@ -548,6 +563,11 @@ syscall(struct trapframe *frame)
uvmexp.syscalls++;
p = curproc;
+ if (verify_pkru(p)) {
+ userret(p);
+ return;
+ }
+
code = frame->tf_rax;
argp = &args[0];
argoff = 0;
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 160a267c09a..c0e2a76a405 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: vector.S,v 1.87 2022/12/01 00:26:15 guenther Exp $ */
+/* $OpenBSD: vector.S,v 1.88 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */
/*
@@ -149,6 +149,7 @@ INTRENTRY_LABEL(calltrap_specstk):
movq %r12,%rax
movq %r13,%rdx
wrmsr
+ call pku_xonly
popq %rdi
popq %rsi
popq %rdx
diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h
index 2a8ddd34fd1..cd2d2db753d 100644
--- a/sys/arch/amd64/include/cpufunc.h
+++ b/sys/arch/amd64/include/cpufunc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpufunc.h,v 1.37 2022/09/22 04:57:08 robert Exp $ */
+/* $OpenBSD: cpufunc.h,v 1.38 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: cpufunc.h,v 1.3 2003/05/08 10:27:43 fvdl Exp $ */
/*-
@@ -232,6 +232,14 @@ rdmsr(u_int msr)
return (((uint64_t)hi << 32) | (uint64_t) lo);
}
+static __inline int
+rdpkru(u_int ecx)
+{
+ uint32_t edx, pkru;
+ asm volatile("rdpkru " : "=a" (pkru), "=d" (edx) : "c" (ecx));
+ return pkru;
+}
+
static __inline void
wrmsr(u_int msr, u_int64_t newval)
{
diff --git a/sys/arch/amd64/include/pte.h b/sys/arch/amd64/include/pte.h
index fc6f9cc4206..c2bd8793c7d 100644
--- a/sys/arch/amd64/include/pte.h
+++ b/sys/arch/amd64/include/pte.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: pte.h,v 1.16 2023/01/16 00:04:47 deraadt Exp $ */
+/* $OpenBSD: pte.h,v 1.17 2023/01/20 16:01:04 deraadt Exp $ */
/* $NetBSD: pte.h,v 1.1 2003/04/26 18:39:47 fvdl Exp $ */
/*
@@ -162,6 +162,7 @@ typedef u_int64_t pt_entry_t; /* PTE */
#define PGEX_PK 0x20 /* protection-key violation */
#ifdef _KERNEL
+extern pt_entry_t pg_xo; /* XO pte bits using PKU key1 */
extern pt_entry_t pg_nx; /* NX pte bit */
extern pt_entry_t pg_g_kern; /* PG_G if glbl mappings can be used in kern */
#endif /* _KERNEL */