diff options
Diffstat (limited to 'sys/arch')
-rw-r--r-- | sys/arch/amd64/amd64/vmm.c | 157 | ||||
-rw-r--r-- | sys/arch/amd64/include/vmmvar.h | 3 |
2 files changed, 158 insertions, 2 deletions
diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c index c651f1e88bc..66bd38c77f4 100644 --- a/sys/arch/amd64/amd64/vmm.c +++ b/sys/arch/amd64/amd64/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.242 2019/05/10 20:17:41 pd Exp $ */ +/* $OpenBSD: vmm.c,v 1.243 2019/05/12 20:56:34 pd Exp $ */ /* * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> * @@ -3950,6 +3950,160 @@ vmm_fpusave(struct vcpu *vcpu) } /* + * vmm_translate_gva + * + * Translates a guest virtual address to a guest physical address by walking + * the currently active page table (if needed). + * + * Note - this function can possibly alter the supplied VCPU state. + * Specifically, it may inject exceptions depending on the current VCPU + * configuration, and may alter %cr2 on #PF. Consequently, this function + * should only be used as part of instruction emulation. + * + * Parameters: + * vcpu: The VCPU this translation should be performed for (guest MMU settings + * are gathered from this VCPU) + * va: virtual address to translate + * pa: pointer to paddr_t variable that will receive the translated physical + * address. 'pa' is unchanged on error. + * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which + * the address should be translated + * + * Return values: + * 0: the address was successfully translated - 'pa' contains the physical + * address currently mapped by 'va'. + * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case + * and %cr2 set in the vcpu structure. + * EINVAL: an error occurred reading paging table structures + */ +int +vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode) +{ + int level, shift, pdidx; + uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; + uint64_t shift_width, pte_size, *hva; + paddr_t hpa; + struct vcpu_reg_state vrs; + + level = 0; + + if (vmm_softc->mode == VMM_MODE_EPT || + vmm_softc->mode == VMM_MODE_VMX) { + if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, &vrs)) + return (EINVAL); + } else if (vmm_softc->mode == VMM_MODE_RVI || + vmm_softc->mode == VMM_MODE_SVM) { + if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vrs)) + return (EINVAL); + } + + DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__, + vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]); + + if (!(vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PG)) { + DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__, + va); + *pa = va; + return (0); + } + + pt_paddr = vrs.vrs_crs[VCPU_REGS_CR3]; + + if (vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PE) { + if (vrs.vrs_crs[VCPU_REGS_CR4] & CR4_PAE) { + pte_size = sizeof(uint64_t); + shift_width = 9; + + if (vrs.vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) { + level = 4; + mask = L4_MASK; + shift = L4_SHIFT; + } else { + level = 3; + mask = L3_MASK; + shift = L3_SHIFT; + } + } else { + level = 2; + shift_width = 10; + mask = 0xFFC00000; + shift = 22; + pte_size = sizeof(uint32_t); + } + } else { + return (EINVAL); + } + + DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, " + "shift_width=%lld\n", __func__, pte_size, level, mask, shift, + shift_width); + + /* XXX: Check for R bit in segment selector and set A bit */ + + for (;level > 0; level--) { + pdidx = (va & mask) >> shift; + pte_paddr = (pt_paddr) + (pdidx * pte_size); + + DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__, + level, pte_paddr); + if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr, + &hpa)) { + DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n", + __func__, pte_paddr); + return (EINVAL); + } + + hpa = hpa | (pte_paddr & 0xFFF); + hva = (uint64_t *)PMAP_DIRECT_MAP(hpa); + DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n", + __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva); + if (pte_size == 8) + pte = *hva; + else + pte = *(uint32_t *)hva; + + DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr, + pte); + + /* XXX: Set CR2 */ + if (!(pte & PG_V)) + return (EFAULT); + + /* XXX: Check for SMAP */ + if ((mode == PROT_WRITE) && !(pte & PG_RW)) + return (EPERM); + + if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u)) + return (EPERM); + + pte = pte | PG_U; + if (mode == PROT_WRITE) + pte = pte | PG_M; + *hva = pte; + + /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ + if (pte & PG_PS) + break; + + if (level > 1) { + pt_paddr = pte & PG_FRAME; + shift -= shift_width; + mask = mask >> shift_width; + } + } + + low_mask = (1 << shift) - 1; + high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; + *pa = (pte & high_mask) | (va & low_mask); + + DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, + va, *pa); + + return (0); +} + + +/* * vcpu_run_vmx * * VMX main loop used to run a VCPU. @@ -4355,6 +4509,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) /* Copy the VCPU register state to the exit structure */ if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs)) ret = EINVAL; + vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu); /* * We are heading back to userspace (vmd), either because we need help * handling an exit, a guest interrupt is pending, or we failed in some diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index a6d7d4ad22e..5bbc099740f 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmmvar.h,v 1.63 2019/04/01 12:02:43 mlarkin Exp $ */ +/* $OpenBSD: vmmvar.h,v 1.64 2019/05/12 20:56:34 pd Exp $ */ /* * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> * @@ -451,6 +451,7 @@ struct vm_exit { }; struct vcpu_reg_state vrs; + int cpl; }; struct vm_create_params { |