summaryrefslogtreecommitdiff
path: root/sys/arch
diff options
context:
space:
mode:
authorpd <pd@cvs.openbsd.org>2020-04-08 07:39:49 +0000
committerpd <pd@cvs.openbsd.org>2020-04-08 07:39:49 +0000
commit1c744abd28db81ba9b2296822c9a37011a2e6fa4 (patch)
tree8c23b35c219d6def3e277397732f8e60058be9a4 /sys/arch
parent77a98cd68d34fb32a9c3b40980cfd46d5ebbc95a (diff)
vmm(4): add IOCTL handler to sets the access protections of the ept
This exposes VMM_IOC_MPROTECT_EPT which can be used by vmd to lock in physical pages. Currently, vmd just terminates the vm in case it gets a protection fault in the future. This feature is used by solo5 which uses vmm(4) as a backend hypervisor. ok mpi@ Patch from Adam Steen <adam@adamsteen.com.au>
Diffstat (limited to 'sys/arch')
-rw-r--r--sys/arch/amd64/amd64/vmm.c327
-rw-r--r--sys/arch/amd64/include/specialreg.h9
-rw-r--r--sys/arch/amd64/include/vmmvar.h27
3 files changed, 349 insertions, 14 deletions
diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c
index f95c6e51ac7..8b2bc9f0192 100644
--- a/sys/arch/amd64/amd64/vmm.c
+++ b/sys/arch/amd64/amd64/vmm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmm.c,v 1.270 2020/04/08 07:32:56 pd Exp $ */
+/* $OpenBSD: vmm.c,v 1.271 2020/04/08 07:39:48 pd Exp $ */
/*
* Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
*
@@ -124,6 +124,7 @@ int vm_get_info(struct vm_info_params *);
int vm_resetcpu(struct vm_resetcpu_params *);
int vm_intr_pending(struct vm_intr_params *);
int vm_rwregs(struct vm_rwregs_params *, int);
+int vm_mprotect_ept(struct vm_mprotect_ept_params *);
int vm_rwvmparams(struct vm_rwvmparams_params *, int);
int vm_find(uint32_t, struct vm **);
int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *);
@@ -186,6 +187,8 @@ int svm_fault_page(struct vcpu *, paddr_t);
int vmx_fault_page(struct vcpu *, paddr_t);
int vmx_handle_np_fault(struct vcpu *);
int svm_handle_np_fault(struct vcpu *);
+int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int);
+pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t);
int vmm_alloc_vpid(uint16_t *);
void vmm_free_vpid(uint16_t);
const char *vcpu_state_decode(u_int);
@@ -494,6 +497,9 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
case VMM_IOC_WRITEREGS:
ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
break;
+ case VMM_IOC_MPROTECT_EPT:
+ ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data);
+ break;
case VMM_IOC_READVMPARAMS:
ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
break;
@@ -532,6 +538,7 @@ pledge_ioctl_vmm(struct proc *p, long com)
case VMM_IOC_INTR:
case VMM_IOC_READREGS:
case VMM_IOC_WRITEREGS:
+ case VMM_IOC_MPROTECT_EPT:
case VMM_IOC_READVMPARAMS:
case VMM_IOC_WRITEVMPARAMS:
return (0);
@@ -807,6 +814,288 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
}
/*
+ * vm_mprotect_ept
+ *
+ * IOCTL handler to sets the access protections of the ept
+ *
+ * Parameters:
+ * vmep: decribes the memory for which the protect will be applied..
+ *
+ * Return values:
+ * 0: if successful
+ * ENOENT: if the VM defined by 'vmep' cannot be found
+ * EINVAL: if the sgpa or size is not page aligned, the prot is invalid,
+ * size is too large (512GB), there is wraparound
+ * (like start = 512GB-1 and end = 512GB-2),
+ * the address specified is not within the vm's mem range
+ * or the address lies inside reserved (MMIO) memory
+ */
+int
+vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
+{
+ struct vm *vm;
+ struct vcpu *vcpu;
+ vaddr_t sgpa;
+ size_t size;
+ vm_prot_t prot;
+ uint64_t msr;
+ int ret, memtype;
+
+ /* If not EPT or RVI, nothing to do here */
+ if (!(vmm_softc->mode == VMM_MODE_EPT
+ || vmm_softc->mode == VMM_MODE_RVI))
+ return (0);
+
+ /* Find the desired VM */
+ rw_enter_read(&vmm_softc->vm_lock);
+ ret = vm_find(vmep->vmep_vm_id, &vm);
+ rw_exit_read(&vmm_softc->vm_lock);
+
+ /* Not found? exit. */
+ if (ret != 0) {
+ DPRINTF("%s: vm id %u not found\n", __func__,
+ vmep->vmep_vm_id);
+ return (ret);
+ }
+
+ rw_enter_read(&vm->vm_vcpu_lock);
+ SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
+ if (vcpu->vc_id == vmep->vmep_vcpu_id)
+ break;
+ }
+ rw_exit_read(&vm->vm_vcpu_lock);
+
+ if (vcpu == NULL) {
+ DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
+ vmep->vmep_vcpu_id, vmep->vmep_vm_id);
+ return (ENOENT);
+ }
+
+ if (vcpu->vc_state != VCPU_STATE_STOPPED) {
+ DPRINTF("%s: mprotect_ept %u on vm %u attempted "
+ "while vcpu was in state %u (%s)\n", __func__,
+ vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
+ vcpu_state_decode(vcpu->vc_state));
+
+ return (EBUSY);
+ }
+
+ /* Only proceed if the pmap is in the correct mode */
+ KASSERT((vmm_softc->mode == VMM_MODE_EPT &&
+ vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||
+ (vmm_softc->mode == VMM_MODE_RVI &&
+ vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI));
+
+ sgpa = vmep->vmep_sgpa;
+ size = vmep->vmep_size;
+ prot = vmep->vmep_prot;
+
+ /* No W^X permissions */
+ if ((prot & PROT_MASK) != prot &&
+ (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) {
+ DPRINTF("%s: W+X permission requested\n", __func__);
+ return (EINVAL);
+ }
+
+ /* No Write only permissions */
+ if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
+ DPRINTF("%s: No Write only permissions\n", __func__);
+ return (EINVAL);
+ }
+
+ /* No empty permissions */
+ if (prot == 0) {
+ DPRINTF("%s: No empty permissions\n", __func__);
+ return (EINVAL);
+ }
+
+ /* No execute only on EPT CPUs that don't have that capability */
+ if (vmm_softc->mode == VMM_MODE_EPT) {
+ msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
+ if (prot == PROT_EXEC &&
+ (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS) == 0) {
+ DPRINTF("%s: Execute only permissions unsupported,"
+ " adding read permission\n", __func__);
+
+ prot |= PROT_READ;
+ }
+ }
+
+ /* Must be page aligned */
+ if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0)
+ return (EINVAL);
+
+ /* size must be less then 512GB */
+ if (size >= NBPD_L4)
+ return (EINVAL);
+
+ /* no wraparound */
+ if (sgpa + size < sgpa)
+ return (EINVAL);
+
+ /*
+ * Specifying addresses within the PCI MMIO space is forbidden.
+ * Disallow addresses that start inside the MMIO space:
+ * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
+ */
+ if (sgpa >= VMM_PCI_MMIO_BAR_BASE && sgpa <= VMM_PCI_MMIO_BAR_END)
+ return (EINVAL);
+
+ /*
+ * ... and disallow addresses that end inside the MMIO space:
+ * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
+ */
+ if (sgpa + size > VMM_PCI_MMIO_BAR_BASE &&
+ sgpa + size <= VMM_PCI_MMIO_BAR_END)
+ return (EINVAL);
+
+ memtype = vmm_get_guest_memtype(vm, sgpa);
+ if (memtype == VMM_MEM_TYPE_UNKNOWN)
+ return (EINVAL);
+
+ if (vmm_softc->mode == VMM_MODE_EPT)
+ ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
+ else if (vmm_softc->mode == VMM_MODE_RVI) {
+ pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot);
+ /* XXX requires a invlpga */
+ ret = 0;
+ } else
+ return (EINVAL);
+
+ return (ret);
+}
+
+/*
+ * vmx_mprotect_ept
+ *
+ * apply the ept protections to the requested pages, faulting in the page if
+ * required.
+ */
+int
+vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot)
+{
+ struct vmx_invept_descriptor vid;
+ pmap_t pmap;
+ pt_entry_t *pte;
+ paddr_t addr;
+ int ret = 0;
+
+ pmap = vm_map->pmap;
+
+ for (addr = sgpa; addr < egpa; addr += PAGE_SIZE) {
+ pte = vmx_pmap_find_pte_ept(pmap, addr);
+ if (pte == NULL) {
+ ret = uvm_fault(vm_map, addr, VM_FAULT_INVALID,
+ PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (ret)
+ printf("%s: uvm_fault returns %d, GPA=0x%llx\n",
+ __func__, ret, (uint64_t)addr);
+
+ pte = vmx_pmap_find_pte_ept(pmap, addr);
+ if (pte == NULL)
+ return EFAULT;
+ }
+
+ if (prot & PROT_READ)
+ *pte |= EPT_R;
+ else
+ *pte &= ~EPT_R;
+
+ if (prot & PROT_WRITE)
+ *pte |= EPT_W;
+ else
+ *pte &= ~EPT_W;
+
+ if (prot & PROT_EXEC)
+ *pte |= EPT_X;
+ else
+ *pte &= ~EPT_X;
+ }
+
+ /*
+ * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction
+ * the first bullet point seems to say we should call invept.
+ *
+ * Software should use the INVEPT instruction with the “single-context”
+ * INVEPT type after making any of the following changes to an EPT
+ * paging-structure entry (the INVEPT descriptor should contain an
+ * EPTP value that references — directly or indirectly
+ * — the modified EPT paging structure):
+ * — Changing any of the privilege bits 2:0 from 1 to 0.
+ * */
+ if (pmap->eptp != 0) {
+ memset(&vid, 0, sizeof(vid));
+ vid.vid_eptp = pmap->eptp;
+ DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
+ vid.vid_eptp);
+ invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid);
+ }
+
+ return ret;
+}
+
+/*
+ * vmx_pmap_find_pte_ept
+ *
+ * find the page table entry specified by addr in the pmap supplied.
+ */
+pt_entry_t *
+vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr)
+{
+ int l4idx, l3idx, l2idx, l1idx;
+ pd_entry_t *pd;
+ paddr_t pdppa;
+ pt_entry_t *ptes, *pte;
+
+ l4idx = (addr & L4_MASK) >> L4_SHIFT; /* PML4E idx */
+ l3idx = (addr & L3_MASK) >> L3_SHIFT; /* PDPTE idx */
+ l2idx = (addr & L2_MASK) >> L2_SHIFT; /* PDE idx */
+ l1idx = (addr & L1_MASK) >> L1_SHIFT; /* PTE idx */
+
+ pd = (pd_entry_t *)pmap->pm_pdir;
+ if (pd == NULL)
+ return NULL;
+
+ /*
+ * l4idx should always be 0 since we don't support more than 512GB
+ * guest physical memory.
+ */
+ if (l4idx > 0)
+ return NULL;
+
+ /*
+ * l3idx should always be < MAXDSIZ/1GB because we don't support more
+ * than MAXDSIZ guest phys mem.
+ */
+ if (l3idx >= MAXDSIZ / ((paddr_t)1024 * 1024 * 1024))
+ return NULL;
+
+ pdppa = pd[l4idx] & PG_FRAME;
+ if (pdppa == 0)
+ return NULL;
+
+ ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
+
+ pdppa = ptes[l3idx] & PG_FRAME;
+ if (pdppa == 0)
+ return NULL;
+
+ ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
+
+ pdppa = ptes[l2idx] & PG_FRAME;
+ if (pdppa == 0)
+ return NULL;
+
+ ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa);
+
+ pte = &ptes[l1idx];
+ if (*pte == 0)
+ return NULL;
+
+ return pte;
+}
+
+/*
* vm_find
*
* Function to find an existing VM by its identifier.
@@ -5107,19 +5396,35 @@ vmx_get_exit_qualification(uint64_t *exit_qualification)
int
vmx_get_guest_faulttype(void)
{
- uint64_t exit_qualification;
+ uint64_t exit_qual;
uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE |
IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE;
- uint64_t protmask = IA32_VMX_EPT_FAULT_READ |
- IA32_VMX_EPT_FAULT_WRITE | IA32_VMX_EPT_FAULT_EXEC;
+ vm_prot_t prot, was_prot;
- if (vmx_get_exit_qualification(&exit_qualification))
+ if (vmx_get_exit_qualification(&exit_qual))
return (-1);
- if ((exit_qualification & presentmask) == 0)
+ if ((exit_qual & presentmask) == 0)
return VM_FAULT_INVALID;
- if (exit_qualification & protmask)
+
+ was_prot = 0;
+ if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE)
+ was_prot |= PROT_READ;
+ if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE)
+ was_prot |= PROT_WRITE;
+ if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE)
+ was_prot |= PROT_EXEC;
+
+ if (exit_qual & IA32_VMX_EPT_FAULT_READ)
+ prot = PROT_READ;
+ else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE)
+ prot = PROT_WRITE;
+ else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC)
+ prot = PROT_EXEC;
+
+ if ((was_prot & prot) == 0)
return VM_FAULT_PROTECT;
+
return (-1);
}
@@ -5196,6 +5501,9 @@ svm_handle_np_fault(struct vcpu *vcpu)
*
* Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
* at address 'gpa'.
+ *
+ * Returns EAGAIN to indication a protection fault, ie writing to a read only
+ * page.
*/
int
vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
@@ -5208,6 +5516,11 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
return (EINVAL);
}
+ if (fault_type == VM_FAULT_PROTECT) {
+ vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
+ return (EAGAIN);
+ }
+
ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type,
PROT_READ | PROT_WRITE | PROT_EXEC);
diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
index c551b6fca26..b291d00b619 100644
--- a/sys/arch/amd64/include/specialreg.h
+++ b/sys/arch/amd64/include/specialreg.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: specialreg.h,v 1.86 2019/12/20 07:49:31 jsg Exp $ */
+/* $OpenBSD: specialreg.h,v 1.87 2020/04/08 07:39:48 pd Exp $ */
/* $NetBSD: specialreg.h,v 1.1 2003/04/26 18:39:48 fvdl Exp $ */
/* $NetBSD: x86/specialreg.h,v 1.2 2003/04/25 21:54:30 fvdl Exp $ */
@@ -954,9 +954,10 @@
#define IA32_VMX_TRUE_ENTRY_CTLS 0x490
#define IA32_VMX_VMFUNC 0x491
-#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6)
-#define IA32_EPT_VPID_CAP_WB (1ULL << 14)
-#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21)
+#define IA32_EPT_VPID_CAP_XO_TRANSLATIONS 0x0
+#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6)
+#define IA32_EPT_VPID_CAP_WB (1ULL << 14)
+#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21)
#define IA32_EPT_PAGING_CACHE_TYPE_UC 0x0
#define IA32_EPT_PAGING_CACHE_TYPE_WB 0x6
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 43858558a73..4990a5c5343 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmmvar.h,v 1.69 2020/04/08 07:32:56 pd Exp $ */
+/* $OpenBSD: vmmvar.h,v 1.70 2020/04/08 07:39:48 pd Exp $ */
/*
* Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
*
@@ -322,6 +322,10 @@ enum {
};
enum {
+ VEE_FAULT_PROTECT
+};
+
+enum {
VMM_CPU_MODE_REAL,
VMM_CPU_MODE_PROT,
VMM_CPU_MODE_PROT32,
@@ -350,6 +354,12 @@ struct vm_exit_inout {
uint16_t vei_port; /* port */
uint32_t vei_data; /* data */
};
+/*
+ * vm_exit_eptviolation : describes an EPT VIOLATION exit
+ */
+struct vm_exit_eptviolation {
+ uint8_t vee_fault_type;
+};
/*
* struct vcpu_segment_info
@@ -447,7 +457,8 @@ struct vm_mem_range {
*/
struct vm_exit {
union {
- struct vm_exit_inout vei; /* IN/OUT exit */
+ struct vm_exit_inout vei; /* IN/OUT exit */
+ struct vm_exit_eptviolation vee; /* EPT VIOLATION exit*/
};
struct vcpu_reg_state vrs;
@@ -558,6 +569,15 @@ struct vm_rwregs_params {
struct vcpu_reg_state vrwp_regs;
};
+struct vm_mprotect_ept_params {
+ /* Input parameters to VMM_IOC_MPROTECT_EPT */
+ uint32_t vmep_vm_id;
+ uint32_t vmep_vcpu_id;
+ vaddr_t vmep_sgpa;
+ size_t vmep_size;
+ int vmep_prot;
+};
+
/* IOCTL definitions */
#define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */
#define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */
@@ -571,7 +591,8 @@ struct vm_rwregs_params {
#define VMM_IOC_READVMPARAMS _IOWR('V', 9, struct vm_rwvmparams_params)
/* Set VM params */
#define VMM_IOC_WRITEVMPARAMS _IOW('V', 10, struct vm_rwvmparams_params)
-
+/* Control the protection of ept pages*/
+#define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params)
/* CPUID masks */
/*