diff options
author | pd <pd@cvs.openbsd.org> | 2020-04-08 07:39:49 +0000 |
---|---|---|
committer | pd <pd@cvs.openbsd.org> | 2020-04-08 07:39:49 +0000 |
commit | 1c744abd28db81ba9b2296822c9a37011a2e6fa4 (patch) | |
tree | 8c23b35c219d6def3e277397732f8e60058be9a4 /sys/arch | |
parent | 77a98cd68d34fb32a9c3b40980cfd46d5ebbc95a (diff) |
vmm(4): add IOCTL handler to sets the access protections of the ept
This exposes VMM_IOC_MPROTECT_EPT which can be used by vmd to lock in physical
pages. Currently, vmd just terminates the vm in case it gets a protection fault
in the future.
This feature is used by solo5 which uses vmm(4) as a backend hypervisor.
ok mpi@
Patch from Adam Steen <adam@adamsteen.com.au>
Diffstat (limited to 'sys/arch')
-rw-r--r-- | sys/arch/amd64/amd64/vmm.c | 327 | ||||
-rw-r--r-- | sys/arch/amd64/include/specialreg.h | 9 | ||||
-rw-r--r-- | sys/arch/amd64/include/vmmvar.h | 27 |
3 files changed, 349 insertions, 14 deletions
diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c index f95c6e51ac7..8b2bc9f0192 100644 --- a/sys/arch/amd64/amd64/vmm.c +++ b/sys/arch/amd64/amd64/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.270 2020/04/08 07:32:56 pd Exp $ */ +/* $OpenBSD: vmm.c,v 1.271 2020/04/08 07:39:48 pd Exp $ */ /* * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> * @@ -124,6 +124,7 @@ int vm_get_info(struct vm_info_params *); int vm_resetcpu(struct vm_resetcpu_params *); int vm_intr_pending(struct vm_intr_params *); int vm_rwregs(struct vm_rwregs_params *, int); +int vm_mprotect_ept(struct vm_mprotect_ept_params *); int vm_rwvmparams(struct vm_rwvmparams_params *, int); int vm_find(uint32_t, struct vm **); int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *); @@ -186,6 +187,8 @@ int svm_fault_page(struct vcpu *, paddr_t); int vmx_fault_page(struct vcpu *, paddr_t); int vmx_handle_np_fault(struct vcpu *); int svm_handle_np_fault(struct vcpu *); +int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int); +pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t); int vmm_alloc_vpid(uint16_t *); void vmm_free_vpid(uint16_t); const char *vcpu_state_decode(u_int); @@ -494,6 +497,9 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) case VMM_IOC_WRITEREGS: ret = vm_rwregs((struct vm_rwregs_params *)data, 1); break; + case VMM_IOC_MPROTECT_EPT: + ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data); + break; case VMM_IOC_READVMPARAMS: ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0); break; @@ -532,6 +538,7 @@ pledge_ioctl_vmm(struct proc *p, long com) case VMM_IOC_INTR: case VMM_IOC_READREGS: case VMM_IOC_WRITEREGS: + case VMM_IOC_MPROTECT_EPT: case VMM_IOC_READVMPARAMS: case VMM_IOC_WRITEVMPARAMS: return (0); @@ -807,6 +814,288 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir) } /* + * vm_mprotect_ept + * + * IOCTL handler to sets the access protections of the ept + * + * Parameters: + * vmep: decribes the memory for which the protect will be applied.. + * + * Return values: + * 0: if successful + * ENOENT: if the VM defined by 'vmep' cannot be found + * EINVAL: if the sgpa or size is not page aligned, the prot is invalid, + * size is too large (512GB), there is wraparound + * (like start = 512GB-1 and end = 512GB-2), + * the address specified is not within the vm's mem range + * or the address lies inside reserved (MMIO) memory + */ +int +vm_mprotect_ept(struct vm_mprotect_ept_params *vmep) +{ + struct vm *vm; + struct vcpu *vcpu; + vaddr_t sgpa; + size_t size; + vm_prot_t prot; + uint64_t msr; + int ret, memtype; + + /* If not EPT or RVI, nothing to do here */ + if (!(vmm_softc->mode == VMM_MODE_EPT + || vmm_softc->mode == VMM_MODE_RVI)) + return (0); + + /* Find the desired VM */ + rw_enter_read(&vmm_softc->vm_lock); + ret = vm_find(vmep->vmep_vm_id, &vm); + rw_exit_read(&vmm_softc->vm_lock); + + /* Not found? exit. */ + if (ret != 0) { + DPRINTF("%s: vm id %u not found\n", __func__, + vmep->vmep_vm_id); + return (ret); + } + + rw_enter_read(&vm->vm_vcpu_lock); + SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { + if (vcpu->vc_id == vmep->vmep_vcpu_id) + break; + } + rw_exit_read(&vm->vm_vcpu_lock); + + if (vcpu == NULL) { + DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__, + vmep->vmep_vcpu_id, vmep->vmep_vm_id); + return (ENOENT); + } + + if (vcpu->vc_state != VCPU_STATE_STOPPED) { + DPRINTF("%s: mprotect_ept %u on vm %u attempted " + "while vcpu was in state %u (%s)\n", __func__, + vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state, + vcpu_state_decode(vcpu->vc_state)); + + return (EBUSY); + } + + /* Only proceed if the pmap is in the correct mode */ + KASSERT((vmm_softc->mode == VMM_MODE_EPT && + vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || + (vmm_softc->mode == VMM_MODE_RVI && + vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)); + + sgpa = vmep->vmep_sgpa; + size = vmep->vmep_size; + prot = vmep->vmep_prot; + + /* No W^X permissions */ + if ((prot & PROT_MASK) != prot && + (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) { + DPRINTF("%s: W+X permission requested\n", __func__); + return (EINVAL); + } + + /* No Write only permissions */ + if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) == PROT_WRITE) { + DPRINTF("%s: No Write only permissions\n", __func__); + return (EINVAL); + } + + /* No empty permissions */ + if (prot == 0) { + DPRINTF("%s: No empty permissions\n", __func__); + return (EINVAL); + } + + /* No execute only on EPT CPUs that don't have that capability */ + if (vmm_softc->mode == VMM_MODE_EPT) { + msr = rdmsr(IA32_VMX_EPT_VPID_CAP); + if (prot == PROT_EXEC && + (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS) == 0) { + DPRINTF("%s: Execute only permissions unsupported," + " adding read permission\n", __func__); + + prot |= PROT_READ; + } + } + + /* Must be page aligned */ + if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0) + return (EINVAL); + + /* size must be less then 512GB */ + if (size >= NBPD_L4) + return (EINVAL); + + /* no wraparound */ + if (sgpa + size < sgpa) + return (EINVAL); + + /* + * Specifying addresses within the PCI MMIO space is forbidden. + * Disallow addresses that start inside the MMIO space: + * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] + */ + if (sgpa >= VMM_PCI_MMIO_BAR_BASE && sgpa <= VMM_PCI_MMIO_BAR_END) + return (EINVAL); + + /* + * ... and disallow addresses that end inside the MMIO space: + * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] + */ + if (sgpa + size > VMM_PCI_MMIO_BAR_BASE && + sgpa + size <= VMM_PCI_MMIO_BAR_END) + return (EINVAL); + + memtype = vmm_get_guest_memtype(vm, sgpa); + if (memtype == VMM_MEM_TYPE_UNKNOWN) + return (EINVAL); + + if (vmm_softc->mode == VMM_MODE_EPT) + ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot); + else if (vmm_softc->mode == VMM_MODE_RVI) { + pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot); + /* XXX requires a invlpga */ + ret = 0; + } else + return (EINVAL); + + return (ret); +} + +/* + * vmx_mprotect_ept + * + * apply the ept protections to the requested pages, faulting in the page if + * required. + */ +int +vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot) +{ + struct vmx_invept_descriptor vid; + pmap_t pmap; + pt_entry_t *pte; + paddr_t addr; + int ret = 0; + + pmap = vm_map->pmap; + + for (addr = sgpa; addr < egpa; addr += PAGE_SIZE) { + pte = vmx_pmap_find_pte_ept(pmap, addr); + if (pte == NULL) { + ret = uvm_fault(vm_map, addr, VM_FAULT_INVALID, + PROT_READ | PROT_WRITE | PROT_EXEC); + if (ret) + printf("%s: uvm_fault returns %d, GPA=0x%llx\n", + __func__, ret, (uint64_t)addr); + + pte = vmx_pmap_find_pte_ept(pmap, addr); + if (pte == NULL) + return EFAULT; + } + + if (prot & PROT_READ) + *pte |= EPT_R; + else + *pte &= ~EPT_R; + + if (prot & PROT_WRITE) + *pte |= EPT_W; + else + *pte &= ~EPT_W; + + if (prot & PROT_EXEC) + *pte |= EPT_X; + else + *pte &= ~EPT_X; + } + + /* + * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction + * the first bullet point seems to say we should call invept. + * + * Software should use the INVEPT instruction with the “single-context” + * INVEPT type after making any of the following changes to an EPT + * paging-structure entry (the INVEPT descriptor should contain an + * EPTP value that references — directly or indirectly + * — the modified EPT paging structure): + * — Changing any of the privilege bits 2:0 from 1 to 0. + * */ + if (pmap->eptp != 0) { + memset(&vid, 0, sizeof(vid)); + vid.vid_eptp = pmap->eptp; + DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__, + vid.vid_eptp); + invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid); + } + + return ret; +} + +/* + * vmx_pmap_find_pte_ept + * + * find the page table entry specified by addr in the pmap supplied. + */ +pt_entry_t * +vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr) +{ + int l4idx, l3idx, l2idx, l1idx; + pd_entry_t *pd; + paddr_t pdppa; + pt_entry_t *ptes, *pte; + + l4idx = (addr & L4_MASK) >> L4_SHIFT; /* PML4E idx */ + l3idx = (addr & L3_MASK) >> L3_SHIFT; /* PDPTE idx */ + l2idx = (addr & L2_MASK) >> L2_SHIFT; /* PDE idx */ + l1idx = (addr & L1_MASK) >> L1_SHIFT; /* PTE idx */ + + pd = (pd_entry_t *)pmap->pm_pdir; + if (pd == NULL) + return NULL; + + /* + * l4idx should always be 0 since we don't support more than 512GB + * guest physical memory. + */ + if (l4idx > 0) + return NULL; + + /* + * l3idx should always be < MAXDSIZ/1GB because we don't support more + * than MAXDSIZ guest phys mem. + */ + if (l3idx >= MAXDSIZ / ((paddr_t)1024 * 1024 * 1024)) + return NULL; + + pdppa = pd[l4idx] & PG_FRAME; + if (pdppa == 0) + return NULL; + + ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa); + + pdppa = ptes[l3idx] & PG_FRAME; + if (pdppa == 0) + return NULL; + + ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa); + + pdppa = ptes[l2idx] & PG_FRAME; + if (pdppa == 0) + return NULL; + + ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa); + + pte = &ptes[l1idx]; + if (*pte == 0) + return NULL; + + return pte; +} + +/* * vm_find * * Function to find an existing VM by its identifier. @@ -5107,19 +5396,35 @@ vmx_get_exit_qualification(uint64_t *exit_qualification) int vmx_get_guest_faulttype(void) { - uint64_t exit_qualification; + uint64_t exit_qual; uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE | IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE; - uint64_t protmask = IA32_VMX_EPT_FAULT_READ | - IA32_VMX_EPT_FAULT_WRITE | IA32_VMX_EPT_FAULT_EXEC; + vm_prot_t prot, was_prot; - if (vmx_get_exit_qualification(&exit_qualification)) + if (vmx_get_exit_qualification(&exit_qual)) return (-1); - if ((exit_qualification & presentmask) == 0) + if ((exit_qual & presentmask) == 0) return VM_FAULT_INVALID; - if (exit_qualification & protmask) + + was_prot = 0; + if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE) + was_prot |= PROT_READ; + if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE) + was_prot |= PROT_WRITE; + if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE) + was_prot |= PROT_EXEC; + + if (exit_qual & IA32_VMX_EPT_FAULT_READ) + prot = PROT_READ; + else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE) + prot = PROT_WRITE; + else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC) + prot = PROT_EXEC; + + if ((was_prot & prot) == 0) return VM_FAULT_PROTECT; + return (-1); } @@ -5196,6 +5501,9 @@ svm_handle_np_fault(struct vcpu *vcpu) * * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' * at address 'gpa'. + * + * Returns EAGAIN to indication a protection fault, ie writing to a read only + * page. */ int vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) @@ -5208,6 +5516,11 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) return (EINVAL); } + if (fault_type == VM_FAULT_PROTECT) { + vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT; + return (EAGAIN); + } + ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type, PROT_READ | PROT_WRITE | PROT_EXEC); diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h index c551b6fca26..b291d00b619 100644 --- a/sys/arch/amd64/include/specialreg.h +++ b/sys/arch/amd64/include/specialreg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specialreg.h,v 1.86 2019/12/20 07:49:31 jsg Exp $ */ +/* $OpenBSD: specialreg.h,v 1.87 2020/04/08 07:39:48 pd Exp $ */ /* $NetBSD: specialreg.h,v 1.1 2003/04/26 18:39:48 fvdl Exp $ */ /* $NetBSD: x86/specialreg.h,v 1.2 2003/04/25 21:54:30 fvdl Exp $ */ @@ -954,9 +954,10 @@ #define IA32_VMX_TRUE_ENTRY_CTLS 0x490 #define IA32_VMX_VMFUNC 0x491 -#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6) -#define IA32_EPT_VPID_CAP_WB (1ULL << 14) -#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21) +#define IA32_EPT_VPID_CAP_XO_TRANSLATIONS 0x0 +#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6) +#define IA32_EPT_VPID_CAP_WB (1ULL << 14) +#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21) #define IA32_EPT_PAGING_CACHE_TYPE_UC 0x0 #define IA32_EPT_PAGING_CACHE_TYPE_WB 0x6 diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index 43858558a73..4990a5c5343 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmmvar.h,v 1.69 2020/04/08 07:32:56 pd Exp $ */ +/* $OpenBSD: vmmvar.h,v 1.70 2020/04/08 07:39:48 pd Exp $ */ /* * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> * @@ -322,6 +322,10 @@ enum { }; enum { + VEE_FAULT_PROTECT +}; + +enum { VMM_CPU_MODE_REAL, VMM_CPU_MODE_PROT, VMM_CPU_MODE_PROT32, @@ -350,6 +354,12 @@ struct vm_exit_inout { uint16_t vei_port; /* port */ uint32_t vei_data; /* data */ }; +/* + * vm_exit_eptviolation : describes an EPT VIOLATION exit + */ +struct vm_exit_eptviolation { + uint8_t vee_fault_type; +}; /* * struct vcpu_segment_info @@ -447,7 +457,8 @@ struct vm_mem_range { */ struct vm_exit { union { - struct vm_exit_inout vei; /* IN/OUT exit */ + struct vm_exit_inout vei; /* IN/OUT exit */ + struct vm_exit_eptviolation vee; /* EPT VIOLATION exit*/ }; struct vcpu_reg_state vrs; @@ -558,6 +569,15 @@ struct vm_rwregs_params { struct vcpu_reg_state vrwp_regs; }; +struct vm_mprotect_ept_params { + /* Input parameters to VMM_IOC_MPROTECT_EPT */ + uint32_t vmep_vm_id; + uint32_t vmep_vcpu_id; + vaddr_t vmep_sgpa; + size_t vmep_size; + int vmep_prot; +}; + /* IOCTL definitions */ #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */ #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */ @@ -571,7 +591,8 @@ struct vm_rwregs_params { #define VMM_IOC_READVMPARAMS _IOWR('V', 9, struct vm_rwvmparams_params) /* Set VM params */ #define VMM_IOC_WRITEVMPARAMS _IOW('V', 10, struct vm_rwvmparams_params) - +/* Control the protection of ept pages*/ +#define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params) /* CPUID masks */ /* |