diff options
author | pd <pd@cvs.openbsd.org> | 2019-01-18 01:34:51 +0000 |
---|---|---|
committer | pd <pd@cvs.openbsd.org> | 2019-01-18 01:34:51 +0000 |
commit | d5ea264072f2d4b0d4480de6fee2698391e06e32 (patch) | |
tree | 91c4033f3bc1e168d291303d4355fb12357b0f58 /sys/arch/i386 | |
parent | 532bade6dd16ec7ebee5cd8c05f5955dac154f68 (diff) |
delete vmm(4) in i386
We will still be able to run i386 guests on amd64 vmm.
Reasons to delete i386 vmm:
- Been broken for a while, almost no one complained.
- Had been falling out of sync from amd64 while it worked.
- If your machine has vmx, you most probably can run amd64, so why not run that?
ok deraadt@ mlarkin@
Diffstat (limited to 'sys/arch/i386')
-rw-r--r-- | sys/arch/i386/conf/GENERIC | 3 | ||||
-rw-r--r-- | sys/arch/i386/conf/Makefile.i386 | 4 | ||||
-rw-r--r-- | sys/arch/i386/conf/files.i386 | 10 | ||||
-rw-r--r-- | sys/arch/i386/i386/conf.c | 15 | ||||
-rw-r--r-- | sys/arch/i386/i386/cpu.c | 26 | ||||
-rw-r--r-- | sys/arch/i386/i386/ipifuncs.c | 29 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 116 | ||||
-rw-r--r-- | sys/arch/i386/i386/mainbus.c | 13 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 24 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmapae.c | 64 | ||||
-rw-r--r-- | sys/arch/i386/i386/vmm.c | 6805 | ||||
-rw-r--r-- | sys/arch/i386/i386/vmm_support.S | 290 | ||||
-rw-r--r-- | sys/arch/i386/include/conf.h | 6 | ||||
-rw-r--r-- | sys/arch/i386/include/cpu.h | 41 | ||||
-rw-r--r-- | sys/arch/i386/include/intrdefs.h | 9 | ||||
-rw-r--r-- | sys/arch/i386/include/pmap.h | 11 | ||||
-rw-r--r-- | sys/arch/i386/include/pte.h | 9 | ||||
-rw-r--r-- | sys/arch/i386/include/specialreg.h | 455 | ||||
-rw-r--r-- | sys/arch/i386/include/vmmvar.h | 809 |
19 files changed, 20 insertions, 8719 deletions
diff --git a/sys/arch/i386/conf/GENERIC b/sys/arch/i386/conf/GENERIC index e34f28d01cf..01eddb941ef 100644 --- a/sys/arch/i386/conf/GENERIC +++ b/sys/arch/i386/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.838 2018/11/23 12:37:40 reyk Exp $ +# $OpenBSD: GENERIC,v 1.839 2019/01/18 01:34:50 pd Exp $ # # For further information on compiling OpenBSD kernels, see the config(8) # man page. @@ -78,7 +78,6 @@ isa0 at gscpcib? isa0 at glxpcib? eisa0 at mainbus0 pci* at mainbus0 -vmm0 at mainbus0 pchb* at pci? # PCI-Host bridges ppb* at pci? # PCI-PCI bridges diff --git a/sys/arch/i386/conf/Makefile.i386 b/sys/arch/i386/conf/Makefile.i386 index 3ee65a9256e..e72bde2cd87 100644 --- a/sys/arch/i386/conf/Makefile.i386 +++ b/sys/arch/i386/conf/Makefile.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.i386,v 1.130 2018/10/30 11:08:30 kettenis Exp $ +# $OpenBSD: Makefile.i386,v 1.131 2019/01/18 01:34:50 pd Exp $ # For instructions on building kernels consult the config(8) and options(4) # manual pages. @@ -162,7 +162,7 @@ depend obj: locore0.o: ${_machdir}/${_mach}/locore0.S assym.h locore.o mutex.o in_cksum.o mptramp.o: assym.h -kvm86call.o acpi_wakecode.o vmm_support.o: assym.h +kvm86call.o acpi_wakecode.o: assym.h hardlink-obsd: [[ ! -f /bsd ]] || cmp -s bsd /bsd || ln -f /bsd /obsd diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index e44323853ea..ed0eaaea075 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.243 2018/08/23 14:47:52 jsg Exp $ +# $OpenBSD: files.i386,v 1.244 2019/01/18 01:34:50 pd Exp $ # # new style config file for i386 architecture # @@ -392,14 +392,6 @@ file arch/i386/i386/acpi_machdep.c acpi file arch/i386/i386/acpi_wakecode.S acpi & !small_kernel # -# VMM -# -device vmm {} -attach vmm at mainbus -file arch/i386/i386/vmm.c vmm needs-flag -file arch/i386/i386/vmm_support.S vmm - -# # IPMI # attach ipmi at mainbus diff --git a/sys/arch/i386/i386/conf.c b/sys/arch/i386/i386/conf.c index 3565bc5ba13..346bee93262 100644 --- a/sys/arch/i386/i386/conf.c +++ b/sys/arch/i386/i386/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.161 2018/08/31 04:20:37 visa Exp $ */ +/* $OpenBSD: conf.c,v 1.162 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: conf.c,v 1.75 1996/05/03 19:40:20 christos Exp $ */ /* @@ -105,15 +105,6 @@ int nblkdev = nitems(bdevsw); (dev_type_stop((*))) enodev, 0, seltrue, \ (dev_type_mmap((*))) enodev, 0 } -/* open, close, ioctl */ -#define cdev_vmm_init(c,n) { \ - dev_init(c,n,open), dev_init(c,n,close), \ - (dev_type_read((*))) enodev, \ - (dev_type_write((*))) enodev, \ - dev_init(c,n,ioctl), \ - (dev_type_stop((*))) enodev, 0, seltrue, \ - (dev_type_mmap((*))) enodev } - #define mmread mmrw #define mmwrite mmrw cdev_decl(mm); @@ -187,8 +178,6 @@ cdev_decl(pci); #include "pvbus.h" #include "ipmi.h" #include "switch.h" -#include "vmm.h" -cdev_decl(vmm); struct cdevsw cdevsw[] = { @@ -202,7 +191,7 @@ struct cdevsw cdevsw[] = cdev_log_init(1,log), /* 7: /dev/klog */ cdev_tty_init(NCOM,com), /* 8: serial port */ cdev_disk_init(NFD,fd), /* 9: floppy disk */ - cdev_vmm_init(NVMM,vmm), /* 10: vmm */ + cdev_notdef(), /* 10 */ cdev_notdef(), /* 11 */ cdev_wsdisplay_init(NWSDISPLAY, /* 12: frame buffers, etc. */ wsdisplay), diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index dd8e404d305..ea81df66231 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.96 2018/10/23 17:51:32 kettenis Exp $ */ +/* $OpenBSD: cpu.c,v 1.97 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ /*- @@ -66,7 +66,6 @@ #include "lapic.h" #include "ioapic.h" -#include "vmm.h" #include "pvbus.h" #include <sys/param.h> @@ -133,9 +132,6 @@ void cpu_idle_mwait_cycle(void); void cpu_init_mwait(struct cpu_softc *); void cpu_init_tss(struct i386tss *, void *, void *); void cpu_update_nmi_cr3(vaddr_t); -#if NVMM > 0 -void cpu_init_vmm(struct cpu_info *ci); -#endif /* NVMM > 0 */ u_int cpu_mwait_size, cpu_mwait_states; @@ -386,9 +382,6 @@ cpu_attach(struct device *parent, struct device *self, void *aux) } #endif -#if NVMM > 0 - cpu_init_vmm(ci); -#endif /* NVMM > 0 */ } /* @@ -474,23 +467,6 @@ cpu_init(struct cpu_info *ci) } void -cpu_init_vmm(struct cpu_info *ci) -{ - /* - * Allocate a per-cpu VMXON region - */ - if (ci->ci_vmm_flags & CI_VMM_VMX) { - ci->ci_vmxon_region_pa = 0; - ci->ci_vmxon_region = (struct vmxon_region *)malloc(PAGE_SIZE, - M_DEVBUF, M_WAITOK|M_ZERO); - if (!pmap_extract(pmap_kernel(), (vaddr_t)ci->ci_vmxon_region, - (paddr_t *)&ci->ci_vmxon_region_pa)) - panic("Can't locate VMXON region in phys mem\n"); - } -} - - -void patinit(struct cpu_info *ci) { extern int pmap_pg_wc; diff --git a/sys/arch/i386/i386/ipifuncs.c b/sys/arch/i386/i386/ipifuncs.c index 2ef0e4d9429..6b705741d37 100644 --- a/sys/arch/i386/i386/ipifuncs.c +++ b/sys/arch/i386/i386/ipifuncs.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ipifuncs.c,v 1.31 2018/07/30 14:19:12 kettenis Exp $ */ +/* $OpenBSD: ipifuncs.c,v 1.32 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: ipifuncs.c,v 1.1.2.3 2000/06/26 02:04:06 sommerfeld Exp $ */ /*- @@ -37,7 +37,6 @@ */ #include "npx.h" -#include "vmm.h" #include <sys/param.h> #include <sys/device.h> @@ -72,11 +71,6 @@ void i386_ipi_reload_mtrr(struct cpu_info *); #define i386_ipi_reload_mtrr 0 #endif -#if NVMM > 0 -void i386_ipi_start_vmm(struct cpu_info *); -void i386_ipi_stop_vmm(struct cpu_info *); -#endif /* NVMM > 0 */ - void (*ipifunc[I386_NIPI])(struct cpu_info *) = { i386_ipi_halt, @@ -95,13 +89,6 @@ void (*ipifunc[I386_NIPI])(struct cpu_info *) = NULL, #endif i386_setperf_ipi, -#if NVMM > 0 - i386_ipi_start_vmm, - i386_ipi_stop_vmm, -#else - NULL, - NULL, -#endif /* NVMM > 0 */ }; void @@ -223,17 +210,3 @@ i386_ipi_handler(void) } } -#if NVMM > 0 -void -i386_ipi_start_vmm(struct cpu_info *ci) -{ - start_vmm_on_cpu(ci); -} - -void -i386_ipi_stop_vmm(struct cpu_info *ci) -{ - stop_vmm_on_cpu(ci); -} -#endif /* NVMM > 0 */ - diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index 94a7107abc7..7add707307a 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.627 2018/08/24 06:25:40 jsg Exp $ */ +/* $OpenBSD: machdep.c,v 1.628 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -174,8 +174,6 @@ extern struct proc *npxproc; #define DPRINTF(x...) #endif /* MACHDEP_DEBUG */ -#include "vmm.h" - void replacesmap(void); int intr_handler(struct intrframe *, struct intrhand *); @@ -320,9 +318,6 @@ void p4_update_cpuspeed(void); void p3_update_cpuspeed(void); int pentium_cpuspeed(int *); void enter_shared_special_pages(void); -#if NVMM > 0 -void cpu_check_vmm_cap(struct cpu_info *); -#endif /* NVMM > 0 */ static __inline u_char cyrix_read_reg(u_char reg) @@ -2160,10 +2155,6 @@ identifycpu(struct cpu_info *ci) } else i386_use_fxsave = 0; -#if NVMM > 0 - cpu_check_vmm_cap(ci); -#endif /* NVMM > 0 */ - } char * @@ -4040,108 +4031,3 @@ intr_barrier(void *ih) sched_barrier(NULL); } -#if NVMM > 0 -/* - * cpu_check_vmm_cap - * - * Checks for VMM capabilities for 'ci'. Initializes certain per-cpu VMM - * state in 'ci' if virtualization extensions are found. - * - * Parameters: - * ci: the cpu being checked - */ -void -cpu_check_vmm_cap(struct cpu_info *ci) -{ - uint64_t msr; - uint32_t cap, dummy; - - /* - * Check for workable VMX - */ - if (cpu_ecxfeature & CPUIDECX_VMX) { - msr = rdmsr(MSR_IA32_FEATURE_CONTROL); - - if (!(msr & IA32_FEATURE_CONTROL_LOCK)) - ci->ci_vmm_flags |= CI_VMM_VMX; - else { - if (msr & IA32_FEATURE_CONTROL_VMX_EN) - ci->ci_vmm_flags |= CI_VMM_VMX; - else - ci->ci_vmm_flags |= CI_VMM_DIS; - } - } - - /* - * Check for EPT (Intel Nested Paging) and other secondary - * controls - */ - if (ci->ci_vmm_flags & CI_VMM_VMX) { - /* Secondary controls available? */ - /* XXX should we check true procbased ctls here if avail? */ - msr = rdmsr(IA32_VMX_PROCBASED_CTLS); - if (msr & (IA32_VMX_ACTIVATE_SECONDARY_CONTROLS) << 32) { - msr = rdmsr(IA32_VMX_PROCBASED2_CTLS); - /* EPT available? */ - if (msr & (IA32_VMX_ENABLE_EPT) << 32) - ci->ci_vmm_flags |= CI_VMM_EPT; - /* VM Functions available? */ - if (msr & (IA32_VMX_ENABLE_VM_FUNCTIONS) << 32) { - ci->ci_vmm_cap.vcc_vmx.vmx_vm_func = - rdmsr(IA32_VMX_VMFUNC); - } - } - } - - /* - * Check startup config (VMX) - */ - if (ci->ci_vmm_flags & CI_VMM_VMX) { - /* CR0 fixed and flexible bits */ - msr = rdmsr(IA32_VMX_CR0_FIXED0); - ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0 = msr; - msr = rdmsr(IA32_VMX_CR0_FIXED1); - ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1 = msr; - - /* CR4 fixed and flexible bits */ - msr = rdmsr(IA32_VMX_CR4_FIXED0); - ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0 = msr; - msr = rdmsr(IA32_VMX_CR4_FIXED1); - ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1 = msr; - - /* VMXON region revision ID (bits 30:0 of IA32_VMX_BASIC) */ - msr = rdmsr(IA32_VMX_BASIC); - ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision = - (uint32_t)(msr & 0x7FFFFFFF); - - /* MSR save / load table size */ - msr = rdmsr(IA32_VMX_MISC); - ci->ci_vmm_cap.vcc_vmx.vmx_msr_table_size = - (uint32_t)(msr & IA32_VMX_MSR_LIST_SIZE_MASK) >> 25; - - /* CR3 target count size */ - ci->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count = - (uint32_t)(msr & IA32_VMX_CR3_TGT_SIZE_MASK) >> 16; - } - - /* - * Check for workable SVM - */ - if (ecpu_ecxfeature & CPUIDECX_SVM) { - msr = rdmsr(MSR_AMD_VM_CR); - - if (!(msr & AMD_SVMDIS)) - ci->ci_vmm_flags |= CI_VMM_SVM; - } - - /* - * Check for SVM Nested Paging - */ - if (ci->ci_vmm_flags & CI_VMM_SVM) { - CPUID(CPUID_AMD_SVM_CAP, dummy, dummy, dummy, cap); - if (cap & AMD_SVM_NESTED_PAGING_CAP) - ci->ci_vmm_flags |= CI_VMM_RVI; - } -} -#endif /* NVMM > 0 */ - diff --git a/sys/arch/i386/i386/mainbus.c b/sys/arch/i386/i386/mainbus.c index f86e56e87dd..6b0b62a3131 100644 --- a/sys/arch/i386/i386/mainbus.c +++ b/sys/arch/i386/i386/mainbus.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mainbus.c,v 1.58 2018/07/02 04:26:58 mlarkin Exp $ */ +/* $OpenBSD: mainbus.c,v 1.59 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: mainbus.c,v 1.21 1997/06/06 23:14:20 thorpej Exp $ */ /* @@ -54,7 +54,6 @@ #include "ipmi.h" #include "esm.h" #include "amdmsr.h" -#include "vmm.h" #include "pvbus.h" #include <machine/cpuvar.h> @@ -138,9 +137,6 @@ mainbus_match(struct device *parent, void *match, void *aux) void mainbus_attach(struct device *parent, struct device *self, void *aux) { -#if NVMM > 0 - extern int vmm_enabled(void); -#endif union mainbus_attach_args mba; extern void (*setperf_setup)(struct cpu_info *); extern void (*cpusensors_setup)(struct cpu_info *); @@ -273,13 +269,6 @@ mainbus_attach(struct device *parent, struct device *self, void *aux) #endif config_found(self, &mba.mba_iba, mainbus_print); } - -#if NVMM > 0 - if (vmm_enabled()) { - mba.mba_busname = "vmm"; - config_found(self, &mba.mba_busname, mainbus_print); - } -#endif /* NVMM > 0 */ } int diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index e4a639400fa..7708f447612 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.203 2018/06/22 13:21:14 bluhm Exp $ */ +/* $OpenBSD: pmap.c,v 1.204 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -73,8 +73,6 @@ #include <sys/msgbuf.h> #include <stand/boot/bootarg.h> -#include "vmm.h" - /* #define PMAP_DEBUG */ #ifdef PMAP_DEBUG @@ -964,11 +962,6 @@ pmap_bootstrap(vaddr_t kva_start) kpm->pm_pdirpa_intel = 0; kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = atop(kva_start - VM_MIN_KERNEL_ADDRESS); - kpm->pm_type = PMAP_TYPE_NORMAL; -#if NVMM > 0 - kpm->pm_npt_pml4 = 0; - kpm->pm_npt_pdpt = 0; -#endif /* NVMM > 0 */ /* * the above is just a rough estimate and not critical to the proper @@ -1356,12 +1349,6 @@ pmap_create(void) setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1); - pmap->pm_type = PMAP_TYPE_NORMAL; -#if NVMM > 0 - pmap->pm_npt_pml4 = 0; - pmap->pm_npt_pdpt = 0; -#endif /* NVMM > 0 */ - pmap_pinit_pd(pmap); return (pmap); } @@ -1457,15 +1444,6 @@ pmap_destroy(struct pmap *pmap) uvm_km_free(kernel_map, pmap->pm_pdir, pmap->pm_pdirsize); pmap->pm_pdir = 0; -#if NVMM > 0 - if (pmap->pm_npt_pml4) - km_free((void *)pmap->pm_npt_pml4, PAGE_SIZE, &kv_any, - &kp_zero); - if (pmap->pm_npt_pdpt) - km_free((void *)pmap->pm_npt_pdpt, PAGE_SIZE, &kv_any, - &kp_zero); -#endif /* NVMM > 0 */ - if (pmap->pm_pdir_intel) { uvm_km_free(kernel_map, pmap->pm_pdir_intel, pmap->pm_pdirsize); pmap->pm_pdir_intel = 0; diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c index 4ea8707d776..fef0815e148 100644 --- a/sys/arch/i386/i386/pmapae.c +++ b/sys/arch/i386/i386/pmapae.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmapae.c,v 1.56 2018/06/22 13:21:14 bluhm Exp $ */ +/* $OpenBSD: pmapae.c,v 1.57 2019/01/18 01:34:50 pd Exp $ */ /* * Copyright (c) 2006-2008 Michael Shalayeff @@ -2189,65 +2189,3 @@ pmap_flush_page_pae(paddr_t pa) *pte = 0; pmap_update_pg(va); } - -int -pmap_convert(struct pmap *pmap, int mode) -{ - int ret; - pt_entry_t *pte; - paddr_t pml4_pa, pdpt_pa; - - pmap->pm_type = mode; - - ret = 0; - if (mode == PMAP_TYPE_EPT) { - pmap->pm_npt_pml4 = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, - &kp_zero, &kd_nowait); - if (!pmap->pm_npt_pml4) { - ret = ENOMEM; - goto error; - } - - pmap->pm_npt_pdpt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, - &kp_zero, &kd_nowait); - if (!pmap->pm_npt_pdpt) { - ret = ENOMEM; - goto error; - } - - if (!pmap_extract(pmap_kernel(), pmap->pm_npt_pml4, - &pml4_pa)) { - ret = ENOMEM; - goto error; - } - pmap->pm_npt_pa = pml4_pa; - - if (!pmap_extract(pmap_kernel(), pmap->pm_npt_pdpt, - &pdpt_pa)) { - ret = ENOMEM; - goto error; - } - - pte = (pt_entry_t *)pmap->pm_npt_pml4; - pte[0] = (pdpt_pa & PG_FRAME) | EPT_R | EPT_W | EPT_X; - pte = (pt_entry_t *)pmap->pm_npt_pdpt; - pte[0] = (pmap->pm_pdidx[0] & PG_FRAME) | - EPT_R | EPT_W | EPT_X; - pte[1] = (pmap->pm_pdidx[1] & PG_FRAME) | - EPT_R | EPT_W | EPT_X; - pte[2] = (pmap->pm_pdidx[2] & PG_FRAME) | - EPT_R | EPT_W | EPT_X; - pte[3] = (pmap->pm_pdidx[3] & PG_FRAME) | - EPT_R | EPT_W | EPT_X; - } - - return (ret); - -error: - if (pmap->pm_npt_pml4) - km_free((void *)pmap->pm_npt_pml4, PAGE_SIZE, &kv_any, &kp_zero); - if (pmap->pm_npt_pdpt) - km_free((void *)pmap->pm_npt_pdpt, PAGE_SIZE, &kv_any, &kp_zero); - - return (ret); -} diff --git a/sys/arch/i386/i386/vmm.c b/sys/arch/i386/i386/vmm.c deleted file mode 100644 index 82988f37eb0..00000000000 --- a/sys/arch/i386/i386/vmm.c +++ /dev/null @@ -1,6805 +0,0 @@ -/* $OpenBSD: vmm.c,v 1.42 2018/08/29 04:51:12 pd Exp $ */ -/* - * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/signalvar.h> -#include <sys/malloc.h> -#include <sys/device.h> -#include <sys/pool.h> -#include <sys/proc.h> -#include <sys/ioctl.h> -#include <sys/queue.h> -#include <sys/rwlock.h> -#include <sys/pledge.h> -#include <sys/memrange.h> - -#include <uvm/uvm_extern.h> - -#include <machine/pmap.h> -#include <machine/biosvar.h> -#include <machine/segments.h> -#include <machine/cpufunc.h> -#include <machine/vmmvar.h> -#include <machine/i82489reg.h> - -#include <dev/isa/isareg.h> - -/* #define VMM_DEBUG */ - -#ifdef VMM_DEBUG -int vmm_debug = 0; -#define DPRINTF(x...) do { if (vmm_debug) printf(x); } while(0) -#else -#define DPRINTF(x...) -#endif /* VMM_DEBUG */ - -#define DEVNAME(s) ((s)->sc_dev.dv_xname) - -#define CTRL_DUMP(x,y,z) printf(" %s: Can set:%s Can clear:%s\n", #z , \ - vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ - IA32_VMX_##z, 1) ? "Yes" : "No", \ - vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ - IA32_VMX_##z, 0) ? "Yes" : "No"); - -#define VMX_EXIT_INFO_HAVE_RIP 0x1 -#define VMX_EXIT_INFO_HAVE_REASON 0x2 -#define VMX_EXIT_INFO_COMPLETE \ - (VMX_EXIT_INFO_HAVE_RIP | VMX_EXIT_INFO_HAVE_REASON) - -struct vm { - vm_map_t vm_map; - uint32_t vm_id; - pid_t vm_creator_pid; - size_t vm_nmemranges; - size_t vm_memory_size; - char vm_name[VMM_MAX_NAME_LEN]; - struct vm_mem_range vm_memranges[VMM_MAX_MEM_RANGES]; - - struct vcpu_head vm_vcpu_list; - uint32_t vm_vcpu_ct; - u_int vm_vcpus_running; - struct rwlock vm_vcpu_lock; - - SLIST_ENTRY(vm) vm_link; -}; - -SLIST_HEAD(vmlist_head, vm); - -struct vmm_softc { - struct device sc_dev; - - /* Capabilities */ - uint32_t nr_vmx_cpus; - uint32_t nr_svm_cpus; - uint32_t nr_rvi_cpus; - uint32_t nr_ept_cpus; - - /* Managed VMs */ - struct vmlist_head vm_list; - - int mode; - - struct rwlock vm_lock; - size_t vm_ct; /* number of in-memory VMs */ - size_t vm_idx; /* next unique VM index */ - - struct rwlock vpid_lock; - uint16_t max_vpid; - uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */ -}; - -int vmm_enabled(void); -int vmm_probe(struct device *, void *, void *); -void vmm_attach(struct device *, struct device *, void *); -int vmmopen(dev_t, int, int, struct proc *); -int vmmioctl(dev_t, u_long, caddr_t, int, struct proc *); -int vmmclose(dev_t, int, int, struct proc *); -int vmm_start(void); -int vmm_stop(void); -size_t vm_create_check_mem_ranges(struct vm_create_params *); -int vm_create(struct vm_create_params *, struct proc *); -int vm_run(struct vm_run_params *); -int vm_terminate(struct vm_terminate_params *); -int vm_get_info(struct vm_info_params *); -int vm_resetcpu(struct vm_resetcpu_params *); -int vm_intr_pending(struct vm_intr_params *); -int vm_rwregs(struct vm_rwregs_params *, int); -int vm_find(uint32_t, struct vm **); -int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *); -int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); -int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *); -int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); -int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); -int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); -int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); -int vcpu_reload_vmcs_vmx(uint64_t *); -int vcpu_init(struct vcpu *); -int vcpu_init_vmx(struct vcpu *); -int vcpu_init_svm(struct vcpu *); -int vcpu_must_stop(struct vcpu *); -int vcpu_run_vmx(struct vcpu *, struct vm_run_params *); -int vcpu_run_svm(struct vcpu *, struct vm_run_params *); -void vcpu_deinit(struct vcpu *); -void vcpu_deinit_vmx(struct vcpu *); -void vcpu_deinit_svm(struct vcpu *); -int vm_impl_init(struct vm *, struct proc *); -int vm_impl_init_vmx(struct vm *, struct proc *); -int vm_impl_init_svm(struct vm *, struct proc *); -void vm_impl_deinit(struct vm *); -void vm_impl_deinit_vmx(struct vm *); -void vm_impl_deinit_svm(struct vm *); -void vm_teardown(struct vm *); -int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int); -int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *); -int vmx_get_exit_info(uint32_t *, uint32_t *); -int vmx_load_pdptes(struct vcpu *); -int vmx_handle_exit(struct vcpu *); -int vmm_handle_cpuid(struct vcpu *); -int vmx_handle_rdmsr(struct vcpu *); -int vmx_handle_wrmsr(struct vcpu *); -int vmx_handle_cr0_write(struct vcpu *, uint32_t); -int vmx_handle_cr4_write(struct vcpu *, uint32_t); -int vmx_handle_cr(struct vcpu *); -int vmx_handle_inout(struct vcpu *); -int vmx_handle_hlt(struct vcpu *); -int vmm_inject_ud(struct vcpu *); -int vmm_inject_gp(struct vcpu *); -int vmm_inject_db(struct vcpu *); -void vmx_handle_intr(struct vcpu *); -void vmx_handle_intwin(struct vcpu *); -int vmm_get_guest_memtype(struct vm *, paddr_t); -int vmm_get_guest_faulttype(void); -int vmx_get_guest_faulttype(void); -int svm_get_guest_faulttype(void); -int vmx_get_exit_qualification(uint32_t *); -int vmx_fault_page(struct vcpu *, paddr_t); -int vmx_handle_np_fault(struct vcpu *); -int vmm_alloc_vpid(uint16_t *); -void vmm_free_vpid(uint16_t); -const char *vcpu_state_decode(u_int); -const char *vmx_exit_reason_decode(uint32_t); -const char *vmx_instruction_error_decode(uint32_t); -void svm_setmsrbr(struct vcpu *, uint32_t); -void svm_setmsrbw(struct vcpu *, uint32_t); -void svm_setmsrbrw(struct vcpu *, uint32_t); -void vmx_setmsrbr(struct vcpu *, uint32_t); -void vmx_setmsrbw(struct vcpu *, uint32_t); -void vmx_setmsrbrw(struct vcpu *, uint32_t); - -#ifdef VMM_DEBUG -void dump_vcpu(struct vcpu *); -void vmx_vcpu_dump_regs(struct vcpu *); -void vmx_dump_vmcs(struct vcpu *); -const char *msr_name_decode(uint32_t); -void vmm_segment_desc_decode(uint32_t); -void vmm_decode_cr0(uint32_t); -void vmm_decode_cr3(uint32_t); -void vmm_decode_cr4(uint32_t); -void vmm_decode_msr_value(uint64_t, uint64_t); -void vmm_decode_apicbase_msr_value(uint64_t); -void vmm_decode_ia32_fc_value(uint64_t); -void vmm_decode_mtrrcap_value(uint64_t); -void vmm_decode_perf_status_value(uint64_t); -void vmm_decode_perf_ctl_value(uint64_t); -void vmm_decode_mtrrdeftype_value(uint64_t); -void vmm_decode_efer_value(uint64_t); - -extern int mtrr2mrt(int); - -struct vmm_reg_debug_info { - uint64_t vrdi_bit; - const char *vrdi_present; - const char *vrdi_absent; -}; -#endif /* VMM_DEBUG */ - -const char *vmm_hv_signature = VMM_HV_SIGNATURE; - -const struct kmem_pa_mode vmm_kp_contig = { - .kp_constraint = &no_constraint, - .kp_maxseg = 1, - .kp_align = 4096, - .kp_zero = 1, -}; - -struct cfdriver vmm_cd = { - NULL, "vmm", DV_DULL -}; - -const struct cfattach vmm_ca = { - sizeof(struct vmm_softc), vmm_probe, vmm_attach, NULL, NULL -}; - -/* - * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite - * to access the individual fields of the guest segment registers. This - * struct is indexed by VCPU_REGS_* id. - */ -const struct { - uint64_t selid; - uint64_t limitid; - uint64_t arid; - uint64_t baseid; -} vmm_vmx_sreg_vmcs_fields[] = { - { VMCS_GUEST_IA32_CS_SEL, VMCS_GUEST_IA32_CS_LIMIT, - VMCS_GUEST_IA32_CS_AR, VMCS_GUEST_IA32_CS_BASE }, - { VMCS_GUEST_IA32_DS_SEL, VMCS_GUEST_IA32_DS_LIMIT, - VMCS_GUEST_IA32_DS_AR, VMCS_GUEST_IA32_DS_BASE }, - { VMCS_GUEST_IA32_ES_SEL, VMCS_GUEST_IA32_ES_LIMIT, - VMCS_GUEST_IA32_ES_AR, VMCS_GUEST_IA32_ES_BASE }, - { VMCS_GUEST_IA32_FS_SEL, VMCS_GUEST_IA32_FS_LIMIT, - VMCS_GUEST_IA32_FS_AR, VMCS_GUEST_IA32_FS_BASE }, - { VMCS_GUEST_IA32_GS_SEL, VMCS_GUEST_IA32_GS_LIMIT, - VMCS_GUEST_IA32_GS_AR, VMCS_GUEST_IA32_GS_BASE }, - { VMCS_GUEST_IA32_SS_SEL, VMCS_GUEST_IA32_SS_LIMIT, - VMCS_GUEST_IA32_SS_AR, VMCS_GUEST_IA32_SS_BASE }, - { VMCS_GUEST_IA32_LDTR_SEL, VMCS_GUEST_IA32_LDTR_LIMIT, - VMCS_GUEST_IA32_LDTR_AR, VMCS_GUEST_IA32_LDTR_BASE }, - { VMCS_GUEST_IA32_TR_SEL, VMCS_GUEST_IA32_TR_LIMIT, - VMCS_GUEST_IA32_TR_AR, VMCS_GUEST_IA32_TR_BASE } -}; - -/* Pools for VMs and VCPUs */ -struct pool vm_pool; -struct pool vcpu_pool; - -struct vmm_softc *vmm_softc; - -/* IDT information used when populating host state area */ -extern vaddr_t idt_vaddr; -extern struct gate_descriptor *idt; - -/* CPU info (i386) */ -extern char cpu_brandstr[]; -extern uint32_t ecpu_eaxfeature; -extern int cpu_pae; - -/* Constants used in "CR access exit" */ -#define CR_WRITE 0 -#define CR_READ 1 -#define CR_CLTS 2 -#define CR_LMSW 3 - -/* - * vmm_enabled - * - * Checks if we have at least one CPU with either VMX or SVM. - * Returns 1 if we have at least one of either type, but not both, 0 otherwise. - */ -int -vmm_enabled(void) -{ - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - int found_vmx = 0, found_svm = 0, vmm_disabled = 0; - - /* i386 must be using PAE */ - if (!cpu_pae) - return (0); - - /* Check if we have at least one CPU with either VMX or SVM */ - CPU_INFO_FOREACH(cii, ci) { - if (ci->ci_vmm_flags & CI_VMM_VMX) - found_vmx = 1; - if (ci->ci_vmm_flags & CI_VMM_SVM) - found_svm = 1; - if (ci->ci_vmm_flags & CI_VMM_DIS) - vmm_disabled = 1; - } - - /* Don't support both SVM and VMX at the same time */ - if (found_vmx && found_svm) - return (0); - - /* SVM is not implemented yet */ - if (found_vmx) - return 1; - - return 0; -} - -int -vmm_probe(struct device *parent, void *match, void *aux) -{ - const char **busname = (const char **)aux; - - if (strcmp(*busname, vmm_cd.cd_name) != 0) - return (0); - return (1); -} - -/* - * vmm_attach - * - * Calculates how many of each type of CPU we have, prints this into dmesg - * during attach. Initializes various locks, pools, and list structures for the - * VMM. - */ -void -vmm_attach(struct device *parent, struct device *self, void *aux) -{ - struct vmm_softc *sc = (struct vmm_softc *)self; - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - - sc->nr_vmx_cpus = 0; - sc->nr_svm_cpus = 0; - sc->nr_rvi_cpus = 0; - sc->nr_ept_cpus = 0; - sc->vm_ct = 0; - sc->vm_idx = 0; - - /* Calculate CPU features */ - CPU_INFO_FOREACH(cii, ci) { - if (ci->ci_vmm_flags & CI_VMM_VMX) - sc->nr_vmx_cpus++; - if (ci->ci_vmm_flags & CI_VMM_SVM) - sc->nr_svm_cpus++; - if (ci->ci_vmm_flags & CI_VMM_RVI) - sc->nr_rvi_cpus++; - if (ci->ci_vmm_flags & CI_VMM_EPT) - sc->nr_ept_cpus++; - } - - SLIST_INIT(&sc->vm_list); - rw_init(&sc->vm_lock, "vmlistlock"); - - if (sc->nr_ept_cpus) { - printf(": VMX/EPT\n"); - sc->mode = VMM_MODE_EPT; - } else if (sc->nr_vmx_cpus) { - printf(": VMX\n"); - sc->mode = VMM_MODE_VMX; - } else if (sc->nr_rvi_cpus) { - printf(": SVM/RVI\n"); - sc->mode = VMM_MODE_RVI; - } else if (sc->nr_svm_cpus) { - printf(": SVM\n"); - sc->mode = VMM_MODE_SVM; - } else { - printf(": unknown\n"); - sc->mode = VMM_MODE_UNKNOWN; - } - - if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) { - /* XXX SVM not supported */ - } else { - sc->max_vpid = 0xFFF; - } - - bzero(&sc->vpids, sizeof(sc->vpids)); - rw_init(&sc->vpid_lock, "vpidlock"); - - pool_init(&vm_pool, sizeof(struct vm), 0, IPL_NONE, PR_WAITOK, - "vmpool", NULL); - pool_init(&vcpu_pool, sizeof(struct vcpu), 0, IPL_NONE, PR_WAITOK, - "vcpupl", NULL); - - vmm_softc = sc; -} - -/* - * vmmopen - * - * Called during open of /dev/vmm. Presently unused. - */ -int -vmmopen(dev_t dev, int flag, int mode, struct proc *p) -{ - /* Don't allow open if we didn't attach */ - if (vmm_softc == NULL) - return (ENODEV); - - /* Don't allow open if we didn't detect any supported CPUs */ - /* XXX presently this means EPT until SP and SVM are back */ - if (vmm_softc->mode != VMM_MODE_EPT) - return (ENODEV); - - return 0; -} - -/* - * vmmioctl - * - * Main ioctl dispatch routine for /dev/vmm. Parses ioctl type and calls - * appropriate lower level handler routine. Returns result to ioctl caller. - */ -int -vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) -{ - int ret; - - switch (cmd) { - case VMM_IOC_CREATE: - if ((ret = vmm_start()) != 0) { - vmm_stop(); - break; - } - ret = vm_create((struct vm_create_params *)data, p); - break; - case VMM_IOC_RUN: - ret = vm_run((struct vm_run_params *)data); - break; - case VMM_IOC_INFO: - ret = vm_get_info((struct vm_info_params *)data); - break; - case VMM_IOC_TERM: - ret = vm_terminate((struct vm_terminate_params *)data); - break; - case VMM_IOC_RESETCPU: - ret = vm_resetcpu((struct vm_resetcpu_params *)data); - break; - case VMM_IOC_INTR: - ret = vm_intr_pending((struct vm_intr_params *)data); - break; - case VMM_IOC_READREGS: - ret = vm_rwregs((struct vm_rwregs_params *)data, 0); - break; - case VMM_IOC_WRITEREGS: - ret = vm_rwregs((struct vm_rwregs_params *)data, 1); - break; - default: - DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd); - ret = ENOTTY; - } - - return (ret); -} - -/* - * pledge_ioctl_vmm - * - * Restrict the allowed ioctls in a pledged process context. - * Is called from pledge_ioctl(). - */ -int -pledge_ioctl_vmm(struct proc *p, long com) -{ - switch (com) { - case VMM_IOC_CREATE: - case VMM_IOC_INFO: - /* The "parent" process in vmd forks and manages VMs */ - if (p->p_p->ps_pledge & PLEDGE_PROC) - return (0); - break; - case VMM_IOC_TERM: - /* XXX VM processes should only terminate themselves */ - case VMM_IOC_RUN: - case VMM_IOC_RESETCPU: - case VMM_IOC_INTR: - case VMM_IOC_READREGS: - case VMM_IOC_WRITEREGS: - return (0); - } - - return (EPERM); -} - -/* - * vmmclose - * - * Called when /dev/vmm is closed. Presently unused. - */ -int -vmmclose(dev_t dev, int flag, int mode, struct proc *p) -{ - return 0; -} - -/* - * vm_resetcpu - * - * Resets the vcpu defined in 'vrp' to power-on-init register state - * - * Parameters: - * vrp: ioctl structure defining the vcpu to reset (see vmmvar.h) - * - * Returns 0 if successful, or various error codes on failure: - * ENOENT if the VM id contained in 'vrp' refers to an unknown VM or - * if vrp describes an unknown vcpu for this VM - * EBUSY if the indicated VCPU is not stopped - * EIO if the indicated VCPU failed to reset - */ -int -vm_resetcpu(struct vm_resetcpu_params *vrp) -{ - struct vm *vm; - struct vcpu *vcpu; - int error; - - /* Find the desired VM */ - rw_enter_read(&vmm_softc->vm_lock); - error = vm_find(vrp->vrp_vm_id, &vm); - rw_exit_read(&vmm_softc->vm_lock); - - /* Not found? exit. */ - if (error != 0) { - DPRINTF("%s: vm id %u not found\n", __func__, - vrp->vrp_vm_id); - return (error); - } - - rw_enter_read(&vm->vm_vcpu_lock); - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - if (vcpu->vc_id == vrp->vrp_vcpu_id) - break; - } - rw_exit_read(&vm->vm_vcpu_lock); - - if (vcpu == NULL) { - DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__, - vrp->vrp_vcpu_id, vrp->vrp_vm_id); - return (ENOENT); - } - - if (vcpu->vc_state != VCPU_STATE_STOPPED) { - DPRINTF("%s: reset of vcpu %u on vm %u attempted " - "while vcpu was in state %u (%s)\n", __func__, - vrp->vrp_vcpu_id, vrp->vrp_vm_id, vcpu->vc_state, - vcpu_state_decode(vcpu->vc_state)); - - return (EBUSY); - } - - DPRINTF("%s: resetting vm %d vcpu %d to power on defaults\n", - __func__, vm->vm_id, vcpu->vc_id); - - if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) { - printf("%s: failed\n", __func__); -#ifdef VMM_DEBUG - dump_vcpu(vcpu); -#endif /* VMM_DEBUG */ - return (EIO); - } - - return (0); -} - -/* - * vm_intr_pending - * - * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an - * interrupt is pending and needs acknowledgment - * - * Parameters: - * vip: Describes the vm/vcpu for which the interrupt is pending - * - * Return values: - * 0: if successful - * ENOENT: if the VM/VCPU defined by 'vip' cannot be found - */ -int -vm_intr_pending(struct vm_intr_params *vip) -{ - struct vm *vm; - struct vcpu *vcpu; - int error; - - /* Find the desired VM */ - rw_enter_read(&vmm_softc->vm_lock); - error = vm_find(vip->vip_vm_id, &vm); - - /* Not found? exit. */ - if (error != 0) { - rw_exit_read(&vmm_softc->vm_lock); - return (error); - } - - rw_enter_read(&vm->vm_vcpu_lock); - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - if (vcpu->vc_id == vip->vip_vcpu_id) - break; - } - rw_exit_read(&vm->vm_vcpu_lock); - rw_exit_read(&vmm_softc->vm_lock); - - if (vcpu == NULL) - return (ENOENT); - - vcpu->vc_intr = vip->vip_intr; - -#ifdef MULTIPROCESSOR - /* - * If the vcpu is running on another PCPU, attempt to force it - * to exit to process the pending interrupt. This could race as - * it could be running when we do the check but be stopped by the - * time we send the IPI. In this case, there is a small extra - * overhead to process the IPI but no other side effects. - * - * There is also a chance that the vcpu may have interrupts blocked. - * That's ok as that condition will be checked on exit, and we will - * simply re-enter the guest. This "fast notification" is done only - * as an optimization. - */ - if (vcpu->vc_state == VCPU_STATE_RUNNING && - vip->vip_intr == 1) - i386_send_ipi(vcpu->vc_last_pcpu, I386_IPI_NOP); -#endif /* MULTIPROCESSOR */ - - return (0); -} - -/* - * vm_readregs - * - * IOCTL handler to read/write the current register values of a guest VCPU. - * The VCPU must not be running. - * - * Parameters: - * vrwp: Describes the VM and VCPU to get/set the registers from. The - * register values are returned here as well. - * dir: 0 for reading, 1 for writing - * - * Return values: - * 0: if successful - * ENOENT: if the VM/VCPU defined by 'vgp' cannot be found - * EINVAL: if an error occured reading the registers of the guest - * EPERM: if the vm cannot be accessed from the calling process - */ -int -vm_rwregs(struct vm_rwregs_params *vrwp, int dir) -{ - struct vm *vm; - struct vcpu *vcpu; - struct vcpu_reg_state *vrs = &vrwp->vrwp_regs; - int error; - - /* Find the desired VM */ - rw_enter_read(&vmm_softc->vm_lock); - error = vm_find(vrwp->vrwp_vm_id, &vm); - - /* Not found? exit. */ - if (error != 0) { - rw_exit_read(&vmm_softc->vm_lock); - return (error); - } - - rw_enter_read(&vm->vm_vcpu_lock); - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - if (vcpu->vc_id == vrwp->vrwp_vcpu_id) - break; - } - rw_exit_read(&vm->vm_vcpu_lock); - rw_exit_read(&vmm_softc->vm_lock); - - if (vcpu == NULL) - return (ENOENT); - - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) - return (dir == 0) ? - vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, vrs) : - vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs); - else if (vmm_softc->mode == VMM_MODE_SVM || - vmm_softc->mode == VMM_MODE_RVI) - return (dir == 0) ? - vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) : - vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs); - else { - DPRINTF("%s: unknown vmm mode", __func__); - return (EINVAL); - } -} - -/* - * vm_find - * - * Function to find an existing VM by its identifier. - * Must be called under the global vm_lock. - * - * Parameters: - * id: The VM identifier. - * *res: A pointer to the VM or NULL if not found - * - * Return values: - * 0: if successful - * ENOENT: if the VM defined by 'id' cannot be found - * EPERM: if the VM cannot be accessed by the current process - */ -int -vm_find(uint32_t id, struct vm **res) -{ - struct proc *p = curproc; - struct vm *vm; - - *res = NULL; - SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) { - if (vm->vm_id == id) { - /* - * In the pledged VM process, only allow to find - * the VM that is running in the current process. - * The managing vmm parent process can lookup all - * all VMs and is indicated by PLEDGE_PROC. - */ - if (((p->p_p->ps_pledge & - (PLEDGE_VMM|PLEDGE_PROC)) == PLEDGE_VMM) && - (vm->vm_creator_pid != p->p_p->ps_pid)) - return (pledge_fail(p, EPERM, PLEDGE_VMM)); - *res = vm; - return (0); - } - } - - return (ENOENT); -} - -/* - * vmm_start - * - * Starts VMM mode on the system - */ -int -vmm_start(void) -{ - struct cpu_info *self = curcpu(); - int ret = 0; -#ifdef MULTIPROCESSOR - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - int i; -#endif - - /* VMM is already running */ - if (self->ci_flags & CPUF_VMM) - return (0); - -#ifdef MULTIPROCESSOR - /* Broadcast start VMM IPI */ - i386_broadcast_ipi(I386_IPI_START_VMM); - - CPU_INFO_FOREACH(cii, ci) { - if (ci == self) - continue; - for (i = 100000; (!(ci->ci_flags & CPUF_VMM)) && i>0;i--) - delay(10); - if (!(ci->ci_flags & CPUF_VMM)) { - printf("%s: failed to enter VMM mode\n", - ci->ci_dev->dv_xname); - ret = EIO; - } - } -#endif /* MULTIPROCESSOR */ - - /* Start VMM on this CPU */ - start_vmm_on_cpu(self); - if (!(self->ci_flags & CPUF_VMM)) { - printf("%s: failed to enter VMM mode\n", - self->ci_dev->dv_xname); - ret = EIO; - } - - return (ret); -} - -/* - * vmm_stop - * - * Stops VMM mode on the system - */ -int -vmm_stop(void) -{ - struct cpu_info *self = curcpu(); - int ret = 0; -#ifdef MULTIPROCESSOR - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - int i; -#endif - - /* VMM is not running */ - if (!(self->ci_flags & CPUF_VMM)) - return (0); - -#ifdef MULTIPROCESSOR - /* Stop VMM on other CPUs */ - i386_broadcast_ipi(I386_IPI_STOP_VMM); - - CPU_INFO_FOREACH(cii, ci) { - if (ci == self) - continue; - for (i = 100000; (ci->ci_flags & CPUF_VMM) && i>0 ;i--) - delay(10); - if (ci->ci_flags & CPUF_VMM) { - printf("%s: failed to exit VMM mode\n", - ci->ci_dev->dv_xname); - ret = EIO; - } - } -#endif /* MULTIPROCESSOR */ - - /* Stop VMM on this CPU */ - stop_vmm_on_cpu(self); - if (self->ci_flags & CPUF_VMM) { - printf("%s: failed to exit VMM mode\n", - self->ci_dev->dv_xname); - ret = EIO; - } - - return (ret); -} - -/* - * start_vmm_on_cpu - * - * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn - * sequence to enter VMM mode (eg, VMXON) - */ -void -start_vmm_on_cpu(struct cpu_info *ci) -{ - uint64_t msr; - uint32_t cr4; - - /* No VMM mode? exit. */ - if ((ci->ci_vmm_flags & CI_VMM_VMX) == 0 && - (ci->ci_vmm_flags & CI_VMM_SVM) == 0) - return; - - /* - * AMD SVM - */ - if (ci->ci_vmm_flags & CI_VMM_SVM) { - msr = rdmsr(MSR_EFER); - msr |= EFER_SVME; - wrmsr(MSR_EFER, msr); - } - - /* - * Intel VMX - */ - if (ci->ci_vmm_flags & CI_VMM_VMX) { - if (ci->ci_vmxon_region == 0) - return; - else { - bzero(ci->ci_vmxon_region, PAGE_SIZE); - ci->ci_vmxon_region->vr_revision = - ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; - - /* Set CR4.VMXE */ - cr4 = rcr4(); - cr4 |= CR4_VMXE; - lcr4(cr4); - - /* Enable VMX */ - msr = rdmsr(MSR_IA32_FEATURE_CONTROL); - if (msr & IA32_FEATURE_CONTROL_LOCK) { - if (!(msr & IA32_FEATURE_CONTROL_VMX_EN)) - return; - } else { - msr |= IA32_FEATURE_CONTROL_VMX_EN | - IA32_FEATURE_CONTROL_LOCK; - wrmsr(MSR_IA32_FEATURE_CONTROL, msr); - } - - /* Enter VMX mode */ - if (vmxon(&ci->ci_vmxon_region_pa)) - return; - } - } - - ci->ci_flags |= CPUF_VMM; -} - -/* - * stop_vmm_on_cpu - * - * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn - * sequence to exit VMM mode (eg, VMXOFF) - */ -void -stop_vmm_on_cpu(struct cpu_info *ci) -{ - uint64_t msr; - uint32_t cr4; - - if (!(ci->ci_flags & CPUF_VMM)) - return; - - /* - * AMD SVM - */ - if (ci->ci_vmm_flags & CI_VMM_SVM) { - msr = rdmsr(MSR_EFER); - msr &= ~EFER_SVME; - wrmsr(MSR_EFER, msr); - } - - /* - * Intel VMX - */ - if (ci->ci_vmm_flags & CI_VMM_VMX) { - if (vmxoff()) - panic("VMXOFF failed"); - - cr4 = rcr4(); - cr4 &= ~CR4_VMXE; - lcr4(cr4); - } - - ci->ci_flags &= ~CPUF_VMM; -} - -/* - * vm_create_check_mem_ranges - * - * Make sure that the guest physical memory ranges given by the user process - * do not overlap and are in ascending order. - * - * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE. - * - * Return Values: - * The total memory size in MB if the checks were successful - * 0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was - * exceeded - */ -size_t -vm_create_check_mem_ranges(struct vm_create_params *vcp) -{ - size_t i, memsize = 0; - struct vm_mem_range *vmr, *pvmr; - const paddr_t maxgpa = (uint32_t)VMM_MAX_VM_MEM_SIZE * 1024 * 1024; - - if (vcp->vcp_nmemranges == 0 || - vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) - return (0); - - for (i = 0; i < vcp->vcp_nmemranges; i++) { - vmr = &vcp->vcp_memranges[i]; - - /* Only page-aligned addresses and sizes are permitted */ - if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) || - (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) - return (0); - - /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */ - if (vmr->vmr_gpa >= maxgpa || - vmr->vmr_size > maxgpa - vmr->vmr_gpa) - return (0); - - /* - * Make sure that all virtual addresses are within the address - * space of the process and that they do not wrap around. - * Calling uvm_share() when creating the VM will take care of - * further checks. - */ - if (vmr->vmr_va < VM_MIN_ADDRESS || - vmr->vmr_va >= VM_MAXUSER_ADDRESS || - vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) - return (0); - - /* - * Specifying ranges within the PCI MMIO space is forbidden. - * Disallow ranges that start inside the MMIO space: - * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] - */ - if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE && - vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END) - return (0); - - /* - * ... and disallow ranges that end inside the MMIO space: - * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END] - */ - if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE && - vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END) - return (0); - - /* - * Make sure that guest physcal memory ranges do not overlap - * and that they are ascending. - */ - if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) - return (0); - - memsize += vmr->vmr_size; - pvmr = vmr; - } - - if (memsize % (1024 * 1024) != 0) - return (0); - memsize /= 1024 * 1024; - return (memsize); -} - -/* - * vm_create - * - * Creates the in-memory VMM structures for the VM defined by 'vcp'. The - * parent of this VM shall be the process defined by 'p'. - * This function does not start the VCPU(s) - see vm_start. - * - * Return Values: - * 0: the create operation was successful - * ENOMEM: out of memory - * various other errors from vcpu_init/vm_impl_init - */ -int -vm_create(struct vm_create_params *vcp, struct proc *p) -{ - int i, ret; - size_t memsize; - struct vm *vm; - struct vcpu *vcpu; - - if (!(curcpu()->ci_flags & CPUF_VMM)) - return (EINVAL); - - memsize = vm_create_check_mem_ranges(vcp); - if (memsize == 0) - return (EINVAL); - - /* XXX - support UP only (for now) */ - if (vcp->vcp_ncpus != 1) - return (EINVAL); - - vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO); - SLIST_INIT(&vm->vm_vcpu_list); - rw_init(&vm->vm_vcpu_lock, "vcpulock"); - - vm->vm_creator_pid = p->p_p->ps_pid; - vm->vm_nmemranges = vcp->vcp_nmemranges; - memcpy(vm->vm_memranges, vcp->vcp_memranges, - vm->vm_nmemranges * sizeof(vm->vm_memranges[0])); - vm->vm_memory_size = memsize; - strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN); - - if (vm_impl_init(vm, p)) { - printf("failed to init arch-specific features for vm 0x%p\n", - vm); - vm_teardown(vm); - return (ENOMEM); - } - - rw_enter_write(&vmm_softc->vm_lock); - vmm_softc->vm_ct++; - vmm_softc->vm_idx++; - - /* - * XXX we use the vm_id for the VPID/ASID, so we need to prevent - * wrapping around 65536/4096 entries here - */ - vm->vm_id = vmm_softc->vm_idx; - vm->vm_vcpu_ct = 0; - vm->vm_vcpus_running = 0; - - /* Initialize each VCPU defined in 'vcp' */ - for (i = 0; i < vcp->vcp_ncpus; i++) { - vcpu = pool_get(&vcpu_pool, PR_WAITOK | PR_ZERO); - vcpu->vc_parent = vm; - if ((ret = vcpu_init(vcpu)) != 0) { - printf("failed to init vcpu %d for vm 0x%p\n", i, vm); - vm_teardown(vm); - vmm_softc->vm_ct--; - vmm_softc->vm_idx--; - rw_exit_write(&vmm_softc->vm_lock); - return (ret); - } - rw_enter_write(&vm->vm_vcpu_lock); - vcpu->vc_id = vm->vm_vcpu_ct; - vm->vm_vcpu_ct++; - SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link); - rw_exit_write(&vm->vm_vcpu_lock); - } - - /* XXX init various other hardware parts (vlapic, vioapic, etc) */ - - SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link); - rw_exit_write(&vmm_softc->vm_lock); - - vcp->vcp_id = vm->vm_id; - - return (0); -} - -/* - * vm_impl_init_vmx - * - * Intel VMX specific VM initialization routine - * - * Parameters: - * vm: the VM being initialized - * p: vmd process owning the VM - * - * Return values: - * 0: the initialization was successful - * ENOMEM: the initialization failed (lack of resources) - */ -int -vm_impl_init_vmx(struct vm *vm, struct proc *p) -{ - int i, ret; - vaddr_t mingpa, maxgpa; - struct pmap *pmap; - struct vm_mem_range *vmr; - - /* If not EPT, nothing to do here */ - if (vmm_softc->mode != VMM_MODE_EPT) - return (0); - - /* Create a new pmap for this VM */ - pmap = pmap_create(); - if (!pmap) { - printf("%s: pmap_create failed\n", __func__); - return (ENOMEM); - } - - /* - * Create a new UVM map for this VM, and assign it the pmap just - * created. - */ - vmr = &vm->vm_memranges[0]; - mingpa = vmr->vmr_gpa; - vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; - maxgpa = vmr->vmr_gpa + vmr->vmr_size; - vm->vm_map = uvm_map_create(pmap, mingpa, maxgpa, - VM_MAP_ISVMSPACE | VM_MAP_PAGEABLE); - - if (!vm->vm_map) { - printf("%s: uvm_map_create failed\n", __func__); - pmap_destroy(pmap); - return (ENOMEM); - } - - /* Map the new map with an anon */ - DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); - for (i = 0; i < vm->vm_nmemranges; i++) { - vmr = &vm->vm_memranges[i]; - ret = uvm_share(vm->vm_map, vmr->vmr_gpa, - PROT_READ | PROT_WRITE | PROT_EXEC, - &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); - if (ret) { - printf("%s: uvm_share failed (%d)\n", __func__, ret); - /* uvm_map_deallocate calls pmap_destroy for us */ - uvm_map_deallocate(vm->vm_map); - vm->vm_map = NULL; - return (ENOMEM); - } - } - - /* Convert the low 512GB of the pmap to EPT */ - ret = pmap_convert(pmap, PMAP_TYPE_EPT); - if (ret) { - printf("%s: pmap_convert failed\n", __func__); - /* uvm_map_deallocate calls pmap_destroy for us */ - uvm_map_deallocate(vm->vm_map); - vm->vm_map = NULL; - return (ENOMEM); - } - - return (0); -} - -/* - * vm_impl_init_svm - * - * AMD SVM specific VM initialization routine - * - * Parameters: - * vm: the VM being initialized - * p: vmd process owning the VM - * - * Return values: - * 0: the initialization was successful - * ENOMEM: the initialization failed (lack of resources) - */ -int -vm_impl_init_svm(struct vm *vm, struct proc *p) -{ - int i, ret; - vaddr_t mingpa, maxgpa; - struct pmap *pmap; - struct vm_mem_range *vmr; - - /* If not RVI, nothing to do here */ - if (vmm_softc->mode != VMM_MODE_RVI) - return (0); - - /* Create a new pmap for this VM */ - pmap = pmap_create(); - if (!pmap) { - printf("%s: pmap_create failed\n", __func__); - return (ENOMEM); - } - - DPRINTF("%s: RVI pmap allocated @ %p\n", __func__, pmap); - - /* - * Create a new UVM map for this VM, and assign it the pmap just - * created. - */ - vmr = &vm->vm_memranges[0]; - mingpa = vmr->vmr_gpa; - vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; - maxgpa = vmr->vmr_gpa + vmr->vmr_size; - vm->vm_map = uvm_map_create(pmap, mingpa, maxgpa, - VM_MAP_ISVMSPACE | VM_MAP_PAGEABLE); - - if (!vm->vm_map) { - printf("%s: uvm_map_create failed\n", __func__); - pmap_destroy(pmap); - return (ENOMEM); - } - - /* Map the new map with an anon */ - DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); - for (i = 0; i < vm->vm_nmemranges; i++) { - vmr = &vm->vm_memranges[i]; - ret = uvm_share(vm->vm_map, vmr->vmr_gpa, - PROT_READ | PROT_WRITE | PROT_EXEC, - &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); - if (ret) { - printf("%s: uvm_share failed (%d)\n", __func__, ret); - /* uvm_map_deallocate calls pmap_destroy for us */ - uvm_map_deallocate(vm->vm_map); - vm->vm_map = NULL; - return (ENOMEM); - } - } - - return (0); -} - -/* - * vm_impl_init - * - * Calls the architecture-specific VM init routine - * - * Parameters: - * vm: the VM being initialized - * p: vmd process owning the VM - * - * Return values (from architecture-specific init routines): - * 0: the initialization was successful - * ENOMEM: the initialization failed (lack of resources) - */ -int -vm_impl_init(struct vm *vm, struct proc *p) -{ - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) - return vm_impl_init_vmx(vm, p); - else if (vmm_softc->mode == VMM_MODE_SVM || - vmm_softc->mode == VMM_MODE_RVI) - return vm_impl_init_svm(vm, p); - else - panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); -} - -/* - * vm_impl_deinit_vmx - * - * Intel VMX specific VM initialization routine - */ -void -vm_impl_deinit_vmx(struct vm *vm) -{ - /* Unused */ -} - -/* - * vm_impl_deinit_svm - * - * AMD SVM specific VM initialization routine - */ -void -vm_impl_deinit_svm(struct vm *vm) -{ - /* Unused */ -} - -/* - * vm_impl_deinit - * - * Calls the architecture-specific VM init routine - */ -void -vm_impl_deinit(struct vm *vm) -{ - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) - vm_impl_deinit_vmx(vm); - else if (vmm_softc->mode == VMM_MODE_SVM || - vmm_softc->mode == VMM_MODE_RVI) - vm_impl_deinit_svm(vm); - else - panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); -} - -/* - * vcpu_reload_vmcs_vmx - * - * Loads 'vmcs' on the current CPU, possibly flushing any old vmcs state - * of the previous occupant. - * - * Parameters: - * vmcs: Pointer to uint64_t containing the PA of the vmcs to load - * - * Return values: - * 0: if successful - * EINVAL: an error occurred during flush or reload - */ -int -vcpu_reload_vmcs_vmx(uint64_t *vmcs) -{ - uint64_t old; - - /* Flush any old state */ - if (!vmptrst(&old)) { - if (old != 0xFFFFFFFFFFFFFFFFULL) { - if (vmclear(&old)) - return (EINVAL); - } - } else - return (EINVAL); - - /* - * Load the VMCS onto this PCPU - */ - if (vmptrld(vmcs)) - return (EINVAL); - - return (0); -} - -/* - * vcpu_readregs_vmx - * - * Reads 'vcpu's registers - * - * Parameters: - * vcpu: the vcpu to read register values from - * regmask: the types of registers to read - * vrs: output parameter where register values are stored - * - * Return values: - * 0: if successful - * EINVAL: an error reading registers occured - */ -int -vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, - struct vcpu_reg_state *vrs) -{ - int i, ret = 0; - uint32_t sel, limit, ar; - uint32_t *gprs = vrs->vrs_gprs; - uint32_t *crs = vrs->vrs_crs; - uint32_t *msrs = vrs->vrs_msrs; - struct vcpu_segment_info *sregs = vrs->vrs_sregs; - struct vmx_msr_store *msr_store; - - if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa)) - return (EINVAL); - - if (regmask & VM_RWREGS_GPRS) { - gprs[VCPU_REGS_EAX] = vcpu->vc_gueststate.vg_eax; - gprs[VCPU_REGS_EBX] = vcpu->vc_gueststate.vg_ebx; - gprs[VCPU_REGS_ECX] = vcpu->vc_gueststate.vg_ecx; - gprs[VCPU_REGS_EDX] = vcpu->vc_gueststate.vg_edx; - gprs[VCPU_REGS_ESI] = vcpu->vc_gueststate.vg_esi; - gprs[VCPU_REGS_EDI] = vcpu->vc_gueststate.vg_edi; - gprs[VCPU_REGS_EBP] = vcpu->vc_gueststate.vg_ebp; - gprs[VCPU_REGS_EIP] = vcpu->vc_gueststate.vg_eip; - if (vmread(VMCS_GUEST_IA32_RSP, &gprs[VCPU_REGS_ESP])) - goto errout; - if (vmread(VMCS_GUEST_IA32_RFLAGS, &gprs[VCPU_REGS_EFLAGS])) - goto errout; - } - if (regmask & VM_RWREGS_SREGS) { - for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) { - if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel)) - goto errout; - if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit)) - goto errout; - if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar)) - goto errout; - if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid, - &sregs[i].vsi_base)) - goto errout; - - sregs[i].vsi_sel = sel; - sregs[i].vsi_limit = limit; - sregs[i].vsi_ar = ar; - } - - if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &limit)) - goto errout; - if (vmread(VMCS_GUEST_IA32_GDTR_BASE, - &vrs->vrs_gdtr.vsi_base)) - goto errout; - vrs->vrs_gdtr.vsi_limit = limit; - - if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &limit)) - goto errout; - if (vmread(VMCS_GUEST_IA32_IDTR_BASE, - &vrs->vrs_idtr.vsi_base)) - goto errout; - vrs->vrs_idtr.vsi_limit = limit; - } - if (regmask & VM_RWREGS_CRS) { - crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2; - if (vmread(VMCS_GUEST_IA32_CR0, &crs[VCPU_REGS_CR0])) - goto errout; - if (vmread(VMCS_GUEST_IA32_CR3, &crs[VCPU_REGS_CR3])) - goto errout; - if (vmread(VMCS_GUEST_IA32_CR4, &crs[VCPU_REGS_CR4])) - goto errout; - if (vmread(VMCS_GUEST_PDPTE0, &crs[VCPU_REGS_PDPTE0])) - goto errout; - if (vmread(VMCS_GUEST_PDPTE1, &crs[VCPU_REGS_PDPTE1])) - goto errout; - if (vmread(VMCS_GUEST_PDPTE2, &crs[VCPU_REGS_PDPTE2])) - goto errout; - if (vmread(VMCS_GUEST_PDPTE3, &crs[VCPU_REGS_PDPTE3])) - goto errout; - } - - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - - if (regmask & VM_RWREGS_MSRS) { - for (i = 0; i < VCPU_REGS_NMSRS; i++) { - msrs[i] = msr_store[i].vms_data; - } - } - - goto out; - -errout: - ret = EINVAL; -out: - if (vmclear(&vcpu->vc_control_pa)) - ret = EINVAL; - return (ret); -} - -/* - * vcpu_readregs_svm - * - * XXX - unimplemented - */ -int -vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask, - struct vcpu_reg_state *regs) -{ - return (0); -} - -/* - * vcpu_writeregs_vmx - * - * Writes VCPU registers - * - * Parameters: - * vcpu: the vcpu that has to get its registers written to - * regmask: the types of registers to write - * loadvmcs: bit to indicate whether the VMCS has to be loaded first - * vrs: the register values to write - * - * Return values: - * 0: if successful - * EINVAL an error writing registers occured - */ -int -vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs, - struct vcpu_reg_state *vrs) -{ - int i, ret = 0; - uint16_t sel; - uint32_t limit, ar; - uint32_t *gprs = vrs->vrs_gprs; - uint32_t *crs = vrs->vrs_crs; - uint32_t *msrs = vrs->vrs_msrs; - struct vcpu_segment_info *sregs = vrs->vrs_sregs; - struct vmx_msr_store *msr_store; - - if (loadvmcs) { - if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa)) - return (EINVAL); - } - - if (regmask & VM_RWREGS_GPRS) { - vcpu->vc_gueststate.vg_eax = gprs[VCPU_REGS_EAX]; - vcpu->vc_gueststate.vg_ebx = gprs[VCPU_REGS_EBX]; - vcpu->vc_gueststate.vg_ecx = gprs[VCPU_REGS_ECX]; - vcpu->vc_gueststate.vg_edx = gprs[VCPU_REGS_EDX]; - vcpu->vc_gueststate.vg_esi = gprs[VCPU_REGS_ESI]; - vcpu->vc_gueststate.vg_edi = gprs[VCPU_REGS_EDI]; - vcpu->vc_gueststate.vg_ebp = gprs[VCPU_REGS_EBP]; - vcpu->vc_gueststate.vg_eip = gprs[VCPU_REGS_EIP]; - if (vmwrite(VMCS_GUEST_IA32_RIP, gprs[VCPU_REGS_EIP])) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_RSP, gprs[VCPU_REGS_ESP])) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_RFLAGS, gprs[VCPU_REGS_EFLAGS])) - goto errout; - } - if (regmask & VM_RWREGS_SREGS) { - for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) { - sel = sregs[i].vsi_sel; - limit = sregs[i].vsi_limit; - ar = sregs[i].vsi_ar; - - if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel)) - goto errout; - if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit)) - goto errout; - if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar)) - goto errout; - if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid, - sregs[i].vsi_base)) - goto errout; - } - - if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT, - vrs->vrs_gdtr.vsi_limit)) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE, - vrs->vrs_gdtr.vsi_base)) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT, - vrs->vrs_idtr.vsi_limit)) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE, - vrs->vrs_idtr.vsi_base)) - goto errout; - } - if (regmask & VM_RWREGS_CRS) { - if (vmwrite(VMCS_GUEST_IA32_CR0, crs[VCPU_REGS_CR0])) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_CR3, crs[VCPU_REGS_CR3])) - goto errout; - if (vmwrite(VMCS_GUEST_IA32_CR4, crs[VCPU_REGS_CR4])) - goto errout; - if (vmwrite(VMCS_GUEST_PDPTE0, crs[VCPU_REGS_PDPTE0])) - goto errout; - if (vmwrite(VMCS_GUEST_PDPTE1, crs[VCPU_REGS_PDPTE1])) - goto errout; - if (vmwrite(VMCS_GUEST_PDPTE2, crs[VCPU_REGS_PDPTE2])) - goto errout; - if (vmwrite(VMCS_GUEST_PDPTE3, crs[VCPU_REGS_PDPTE3])) - goto errout; - } - - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - - if (regmask & VM_RWREGS_MSRS) { - for (i = 0; i < VCPU_REGS_NMSRS; i++) { - msr_store[i].vms_data = msrs[i]; - } - } - - goto out; - -errout: - ret = EINVAL; -out: - if (loadvmcs) { - if (vmclear(&vcpu->vc_control_pa)) - ret = EINVAL; - } - return (ret); -} - -/* - * vcpu_writeregs_svm - * - * Writes 'vcpu's registers - * - * Parameters: - * vcpu: the vcpu that has to get its registers written to - * regmask: the types of registers to write - * vrs: the register values to write - * - * Return values: - * 0: if successful - * EINVAL an error writing registers occured - */ -int -vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask, - struct vcpu_reg_state *vrs) -{ - uint32_t *gprs = vrs->vrs_gprs; - uint32_t *crs = vrs->vrs_crs; - uint16_t attr; - struct vcpu_segment_info *sregs = vrs->vrs_sregs; - struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; - - if (regmask & VM_RWREGS_GPRS) { - vcpu->vc_gueststate.vg_eax = gprs[VCPU_REGS_EAX]; - vcpu->vc_gueststate.vg_ebx = gprs[VCPU_REGS_EBX]; - vcpu->vc_gueststate.vg_ecx = gprs[VCPU_REGS_ECX]; - vcpu->vc_gueststate.vg_edx = gprs[VCPU_REGS_EDX]; - vcpu->vc_gueststate.vg_esi = gprs[VCPU_REGS_ESI]; - vcpu->vc_gueststate.vg_edi = gprs[VCPU_REGS_EDI]; - vcpu->vc_gueststate.vg_ebp = gprs[VCPU_REGS_EBP]; - vcpu->vc_gueststate.vg_eip = gprs[VCPU_REGS_EIP]; - - vmcb->v_rip = gprs[VCPU_REGS_EIP]; - vmcb->v_rsp = gprs[VCPU_REGS_ESP]; - vmcb->v_rflags = gprs[VCPU_REGS_EFLAGS]; - } - - if (regmask & VM_RWREGS_SREGS) { - vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS].vsi_sel; - vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS].vsi_limit; - attr = sregs[VCPU_REGS_CS].vsi_ar; - vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS].vsi_base; - vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS].vsi_sel; - vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS].vsi_limit; - attr = sregs[VCPU_REGS_DS].vsi_ar; - vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS].vsi_base; - vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES].vsi_sel; - vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES].vsi_limit; - attr = sregs[VCPU_REGS_ES].vsi_ar; - vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_es.vs_base = sregs[VCPU_REGS_ES].vsi_base; - vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS].vsi_sel; - vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS].vsi_limit; - attr = sregs[VCPU_REGS_FS].vsi_ar; - vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS].vsi_base; - vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS].vsi_sel; - vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS].vsi_limit; - attr = sregs[VCPU_REGS_GS].vsi_ar; - vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS].vsi_base; - vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS].vsi_sel; - vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS].vsi_limit; - attr = sregs[VCPU_REGS_SS].vsi_ar; - vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS].vsi_base; - vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR].vsi_sel; - vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR].vsi_limit; - attr = sregs[VCPU_REGS_LDTR].vsi_ar; - vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR].vsi_base; - vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR].vsi_sel; - vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR].vsi_limit; - attr = sregs[VCPU_REGS_TR].vsi_ar; - vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); - vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR].vsi_base; - vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit; - vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base; - vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit; - vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base; - } - - if (regmask & VM_RWREGS_CRS) { - vmcb->v_cr0 = crs[VCPU_REGS_CR0]; - vmcb->v_cr3 = crs[VCPU_REGS_CR3]; - vmcb->v_cr4 = crs[VCPU_REGS_CR4]; - } - - return (0); -} - -/* - * vcpu_reset_regs_svm - * - * Initializes 'vcpu's registers to supplied state - * - * Parameters: - * vcpu: the vcpu whose register state is to be initialized - * vrs: the register state to set - * - * Return values: - * 0: registers init'ed successfully - * EINVAL: an error occurred setting register state - */ -int -vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) -{ - struct vmcb *vmcb; - int ret; - - vmcb = (struct vmcb *)vcpu->vc_control_va; - - /* - * Intercept controls - * - * External Interrupt exiting (SVM_INTERCEPT_INTR) - * External NMI exiting (SVM_INTERCEPT_NMI) - * CPUID instruction (SVM_INTERCEPT_CPUID) - * HLT instruction (SVM_INTERCEPT_HLT) - * I/O instructions (SVM_INTERCEPT_INOUT) - * MSR access (SVM_INTERCEPT_MSR) - * - * VMRUN instruction (SVM_INTERCEPT_VMRUN) - * VMMCALL instruction (SVM_INTERCEPT_VMMCALL) - * VMLOAD instruction (SVM_INTERCEPT_VMLOAD) - * VMSAVE instruction (SVM_INTERCEPT_VMSAVE) - * STGI instruction (SVM_INTERCEPT_STGI) - * CLGI instruction (SVM_INTERCEPT_CLGI) - * SKINIT instruction (SVM_INTERCEPT_SKINIT) - * ICEBP instruction (SVM_INTERCEPT_ICEBP) - * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND) - */ - vmcb->v_intercept1 = SVM_INTERCEPT_INTR | SVM_INTERCEPT_NMI | - SVM_INTERCEPT_CPUID | SVM_INTERCEPT_HLT | SVM_INTERCEPT_INOUT | - SVM_INTERCEPT_MSR; - - vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN | SVM_INTERCEPT_VMMCALL | - SVM_INTERCEPT_VMLOAD | SVM_INTERCEPT_VMSAVE | SVM_INTERCEPT_STGI | - SVM_INTERCEPT_CLGI | SVM_INTERCEPT_SKINIT | SVM_INTERCEPT_ICEBP | - SVM_INTERCEPT_MWAIT_UNCOND; - - /* Setup I/O bitmap */ - memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE); - vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); - - /* Setup MSR bitmap */ - memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE); - vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa); - svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL); - svm_setmsrbrw(vcpu, MSR_SYSENTER_CS); - svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP); - svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP); - svm_setmsrbrw(vcpu, MSR_STAR); - svm_setmsrbrw(vcpu, MSR_LSTAR); - svm_setmsrbrw(vcpu, MSR_CSTAR); - svm_setmsrbrw(vcpu, MSR_SFMASK); - svm_setmsrbrw(vcpu, MSR_FSBASE); - svm_setmsrbrw(vcpu, MSR_GSBASE); - svm_setmsrbrw(vcpu, MSR_KERNELGSBASE); - - /* EFER is R/O so we can ensure the guest always has SVME */ - svm_setmsrbr(vcpu, MSR_EFER); - - /* Guest VCPU ASID */ - vmcb->v_asid = vcpu->vc_parent->vm_id; - - /* TLB Control */ - vmcb->v_tlb_control = 2; /* Flush this guest's TLB entries */ - - /* NPT */ - if (vmm_softc->mode == VMM_MODE_RVI) { - vmcb->v_np_enable = 1; - vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; - } - - /* Enable SVME in EFER (must always be set) */ - vmcb->v_efer |= EFER_SVME; - - ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs); - - vmcb->v_efer |= (EFER_LME | EFER_LMA); - vmcb->v_cr4 |= CR4_PAE; - - return ret; -} - -/* - * svm_setmsrbr - * - * Allow read access to the specified msr on the supplied vcpu. - * - * Parameters: - * vcpu: the VCPU to allow access - * msr: the MSR number to allow access to - */ -void -svm_setmsrbr(struct vcpu *vcpu, uint32_t msr) -{ - uint8_t *msrs; - uint16_t idx; - - msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; - - /* - * MSR Read bitmap layout: - * Pentium MSRs (0x0 - 0x1fff) @ 0x0 - * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 - * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 - * - * Read enable bit is low order bit of 2-bit pair - * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0) - */ - if (msr <= 0x1fff) { - idx = SVM_MSRIDX(msr); - msrs[idx] &= ~(SVM_MSRBIT_R(msr)); - } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { - idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800; - msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000)); - } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { - idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000; - msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000)); - } else { - printf("%s: invalid msr 0x%x\n", __func__, msr); - return; - } -} - -/* - * svm_setmsrbw - * - * Allow write access to the specified msr on the supplied vcpu - * - * Parameters: - * vcpu: the VCPU to allow access - * msr: the MSR number to allow access to - */ -void -svm_setmsrbw(struct vcpu *vcpu, uint32_t msr) -{ - uint8_t *msrs; - uint16_t idx; - - msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; - - /* - * MSR Write bitmap layout: - * Pentium MSRs (0x0 - 0x1fff) @ 0x0 - * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 - * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 - * - * Write enable bit is high order bit of 2-bit pair - * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0) - */ - if (msr <= 0x1fff) { - idx = SVM_MSRIDX(msr); - msrs[idx] &= ~(SVM_MSRBIT_W(msr)); - } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { - idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800; - msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000)); - } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { - idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000; - msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000)); - } else { - printf("%s: invalid msr 0x%x\n", __func__, msr); - return; - } -} - -/* - * svm_setmsrbrw - * - * Allow read/write access to the specified msr on the supplied vcpu - * - * Parameters: - * vcpu: the VCPU to allow access - * msr: the MSR number to allow access to - */ -void -svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr) -{ - svm_setmsrbr(vcpu, msr); - svm_setmsrbw(vcpu, msr); -} - -/* - * vmx_setmsrbr - * - * Allow read access to the specified msr on the supplied vcpu. - * - * Parameters: - * vcpu: the VCPU to allow access - * msr: the MSR number to allow access to - */ -void -vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr) -{ - uint8_t *msrs; - uint16_t idx; - - msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; - - /* - * MSR Read bitmap layout: - * "Low" MSRs (0x0 - 0x1fff) @ 0x0 - * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400 - */ - if (msr <= 0x1fff) { - idx = VMX_MSRIDX(msr); - msrs[idx] &= ~(VMX_MSRBIT(msr)); - } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { - idx = VMX_MSRIDX(msr - 0xc0000000) + 0x400; - msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)); - } else - printf("%s: invalid msr 0x%x\n", __func__, msr); -} - -/* - * vmx_setmsrbw - * - * Allow write access to the specified msr on the supplied vcpu - * - * Parameters: - * vcpu: the VCPU to allow access - * msr: the MSR number to allow access to - */ -void -vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr) -{ - uint8_t *msrs; - uint16_t idx; - - msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; - - /* - * MSR Write bitmap layout: - * "Low" MSRs (0x0 - 0x1fff) @ 0x800 - * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00 - */ - if (msr <= 0x1fff) { - idx = VMX_MSRIDX(msr) + 0x800; - msrs[idx] &= ~(VMX_MSRBIT(msr)); - } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { - idx = VMX_MSRIDX(msr - 0xc0000000) + 0xc00; - msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)); - } else - printf("%s: invalid msr 0x%x\n", __func__, msr); -} - -/* - * vmx_setmsrbrw - * - * Allow read/write access to the specified msr on the supplied vcpu - * - * Parameters: - * vcpu: the VCPU to allow access - * msr: the MSR number to allow access to - */ -void -vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr) -{ - vmx_setmsrbr(vcpu, msr); - vmx_setmsrbw(vcpu, msr); -} - -/* - * vcpu_reset_regs_vmx - * - * Initializes 'vcpu's registers to supplied state - * - * Parameters: - * vcpu: the vcpu whose register state is to be initialized - * vrs: the register state to set - * - * Return values: - * 0: registers init'ed successfully - * EINVAL: an error occurred setting register state - */ -int -vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs) -{ - int ret, ug; - uint32_t cr0, cr4; - uint32_t pinbased, procbased, procbased2, exit, entry; - uint32_t want1, want0; - uint64_t msr, ctrlval, eptp, cr3; - uint16_t ctrl, vpid; - struct vmx_msr_store *msr_store; - - ret = 0; - ug = 0; - - cr0 = vrs->vrs_crs[VCPU_REGS_CR0]; - - if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa)) { - DPRINTF("%s: error reloading VMCS\n", __func__); - return (EINVAL); - } - - /* Compute Basic Entry / Exit Controls */ - vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC); - vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS); - vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS); - vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS); - vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS); - - /* Compute True Entry / Exit Controls (if applicable) */ - if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { - vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS); - vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS); - vcpu->vc_vmx_true_pinbased_ctls = - rdmsr(IA32_VMX_TRUE_PINBASED_CTLS); - vcpu->vc_vmx_true_procbased_ctls = - rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS); - } - - /* Compute Secondary Procbased Controls (if applicable) */ - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) - vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS); - - /* - * Pinbased ctrls - * - * We must be able to set the following: - * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt - * IA32_VMX_NMI_EXITING - exit on host NMI - */ - want1 = IA32_VMX_EXTERNAL_INT_EXITING | - IA32_VMX_NMI_EXITING; - want0 = 0; - - if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { - ctrl = IA32_VMX_TRUE_PINBASED_CTLS; - ctrlval = vcpu->vc_vmx_true_pinbased_ctls; - } else { - ctrl = IA32_VMX_PINBASED_CTLS; - ctrlval = vcpu->vc_vmx_pinbased_ctls; - } - - if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) { - DPRINTF("%s: error computing pinbased controls\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_PINBASED_CTLS, pinbased)) { - DPRINTF("%s: error setting pinbased controls\n", __func__); - ret = EINVAL; - goto exit; - } - - /* - * Procbased ctrls - * - * We must be able to set the following: - * IA32_VMX_HLT_EXITING - exit on HLT instruction - * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction - * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions - * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses - * IA32_VMX_CR8_LOAD_EXITING - guest TPR access - * IA32_VMX_CR8_STORE_EXITING - guest TPR access - * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow) - * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction - * - * If we have EPT, we must be able to clear the following - * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses - * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses - */ - want1 = IA32_VMX_HLT_EXITING | - IA32_VMX_MWAIT_EXITING | - IA32_VMX_UNCONDITIONAL_IO_EXITING | - IA32_VMX_USE_MSR_BITMAPS | - IA32_VMX_CR8_LOAD_EXITING | - IA32_VMX_CR8_STORE_EXITING | - IA32_VMX_MONITOR_EXITING | - IA32_VMX_USE_TPR_SHADOW; - want0 = 0; - - if (vmm_softc->mode == VMM_MODE_EPT) { - want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS; - want0 |= IA32_VMX_CR3_LOAD_EXITING | - IA32_VMX_CR3_STORE_EXITING; - } - - if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { - ctrl = IA32_VMX_TRUE_PROCBASED_CTLS; - ctrlval = vcpu->vc_vmx_true_procbased_ctls; - } else { - ctrl = IA32_VMX_PROCBASED_CTLS; - ctrlval = vcpu->vc_vmx_procbased_ctls; - } - - if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) { - DPRINTF("%s: error computing procbased controls\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) { - DPRINTF("%s: error setting procbased controls\n", __func__); - ret = EINVAL; - goto exit; - } - - /* - * Secondary Procbased ctrls - * - * We want to be able to set the following, if available: - * IA32_VMX_ENABLE_VPID - use VPIDs where available - * - * If we have EPT, we must be able to set the following: - * IA32_VMX_ENABLE_EPT - enable EPT - * - * If we have unrestricted guest capability, we must be able to set - * the following: - * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller - * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter) - */ - want1 = 0; - - /* XXX checking for 2ndary controls can be combined here */ - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_VPID, 1)) - want1 |= IA32_VMX_ENABLE_VPID; - } - - if (vmm_softc->mode == VMM_MODE_EPT) - want1 |= IA32_VMX_ENABLE_EPT; - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_UNRESTRICTED_GUEST, 1)) { - if ((cr0 & (CR0_PE | CR0_PG)) == 0) { - want1 |= IA32_VMX_UNRESTRICTED_GUEST; - ug = 1; - } - } - } - - want0 = ~want1; - ctrlval = vcpu->vc_vmx_procbased2_ctls; - ctrl = IA32_VMX_PROCBASED2_CTLS; - - if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) { - DPRINTF("%s: error computing secondary procbased controls\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_PROCBASED2_CTLS, procbased2)) { - DPRINTF("%s: error setting secondary procbased controls\n", - __func__); - ret = EINVAL; - goto exit; - } - - /* - * Exit ctrls - * - * We must be able to set the following: - * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit - * XXX clear save_debug_ctrls on exit ? - */ - want1 = IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT; - want0 = 0; - - if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { - ctrl = IA32_VMX_TRUE_EXIT_CTLS; - ctrlval = vcpu->vc_vmx_true_exit_ctls; - } else { - ctrl = IA32_VMX_EXIT_CTLS; - ctrlval = vcpu->vc_vmx_exit_ctls; - } - - if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) { - DPRINTF("%s: error computing exit controls\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_EXIT_CTLS, exit)) { - DPRINTF("%s: error setting exit controls\n", __func__); - ret = EINVAL; - goto exit; - } - - /* - * Entry ctrls - * - * We must be able to set the following: - * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest) - * We must be able to clear the following: - * IA32_VMX_ENTRY_TO_SMM - enter to SMM - * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT - * IA32_VMX_LOAD_DEBUG_CONTROLS - * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY - */ - want0 = IA32_VMX_ENTRY_TO_SMM | - IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT | - IA32_VMX_LOAD_DEBUG_CONTROLS | - IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY; - - if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { - ctrl = IA32_VMX_TRUE_ENTRY_CTLS; - ctrlval = vcpu->vc_vmx_true_entry_ctls; - } else { - ctrl = IA32_VMX_ENTRY_CTLS; - ctrlval = vcpu->vc_vmx_entry_ctls; - } - - if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) { - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_ENTRY_CTLS, entry)) { - ret = EINVAL; - goto exit; - } - - if (vmm_softc->mode == VMM_MODE_EPT) { - eptp = vcpu->vc_parent->vm_map->pmap->pm_npt_pa; - msr = rdmsr(IA32_VMX_EPT_VPID_CAP); - if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4) { - /* Page walk length 4 supported */ - eptp |= ((IA32_EPT_PAGE_WALK_LENGTH - 1) << 3); - } else { - DPRINTF("EPT page walk length 4 not supported"); - ret = EINVAL; - goto exit; - } - - if (msr & IA32_EPT_VPID_CAP_WB) { - /* WB cache type supported */ - eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB; - } - - DPRINTF("guest eptp = 0x%llx\n", eptp); - if (vmwrite(VMCS_GUEST_IA32_EPTP, - (uint32_t)(eptp & 0xFFFFFFFFUL))) { - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_IA32_EPTP_HI, 0)) { - ret = EINVAL; - goto exit; - } - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_VPID, 1)) { - if (vmm_alloc_vpid(&vpid)) { - DPRINTF("%s: could not allocate VPID\n", - __func__); - ret = EINVAL; - goto exit; - } - if (vmwrite(VMCS_GUEST_VPID, vpid)) { - DPRINTF("%s: error setting guest VPID\n", - __func__); - ret = EINVAL; - goto exit; - } - - vcpu->vc_vpid = vpid; - } - } - - - /* - * Determine which bits in CR0 have to be set to a fixed - * value as per Intel SDM A.7. - * CR0 bits in the vrs parameter must match these. - */ - - want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & - (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); - want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & - ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); - - /* - * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as - * fixed to 1 even if the CPU supports the unrestricted guest - * feature. Update want1 and want0 accordingly to allow - * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if - * the CPU has the unrestricted guest capability. - */ - if (ug) { - want1 &= ~(CR0_PG | CR0_PE); - want0 &= ~(CR0_PG | CR0_PE); - } - - /* - * VMX may require some bits to be set that userland should not have - * to care about. Set those here. - */ - if (want1 & CR0_NE) - cr0 |= CR0_NE; - - if ((cr0 & want1) != want1) { - ret = EINVAL; - goto exit; - } - if ((~cr0 & want0) != want0) { - ret = EINVAL; - goto exit; - } - - /* - * Determine which bits in CR4 have to be set to a fixed - * value as per Intel SDM A.8. - * CR4 bits in the vrs parameter must match these, except - * CR4_VMXE - we add that here since it must always be set. - */ - want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & - (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); - want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & - ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); - - cr4 = vrs->vrs_crs[VCPU_REGS_CR4] | CR4_VMXE; - - if ((cr4 & want1) != want1) { - ret = EINVAL; - goto exit; - } - - if ((~cr4 & want0) != want0) { - ret = EINVAL; - goto exit; - } - - cr3 = vrs->vrs_crs[VCPU_REGS_CR3]; - - /* Restore PDPTEs if 32-bit PAE paging is being used */ - if (cr3 && (cr4 & CR4_PAE) && - !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)) { - if (vmwrite(VMCS_GUEST_PDPTE0, - vrs->vrs_crs[VCPU_REGS_PDPTE0])) { - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_PDPTE1, - vrs->vrs_crs[VCPU_REGS_PDPTE1])) { - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_PDPTE2, - vrs->vrs_crs[VCPU_REGS_PDPTE2])) { - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_PDPTE3, - vrs->vrs_crs[VCPU_REGS_PDPTE3])) { - ret = EINVAL; - goto exit; - } - } - - vrs->vrs_crs[VCPU_REGS_CR0] = cr0; - vrs->vrs_crs[VCPU_REGS_CR4] = cr4; - - /* - * Select host MSRs to be loaded on exit - */ - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va; - msr_store[0].vms_index = MSR_EFER; - msr_store[0].vms_data = rdmsr(MSR_EFER); - msr_store[1].vms_index = MSR_STAR; - msr_store[1].vms_data = rdmsr(MSR_STAR); - msr_store[2].vms_index = MSR_LSTAR; - msr_store[2].vms_data = rdmsr(MSR_LSTAR); - msr_store[3].vms_index = MSR_CSTAR; - msr_store[3].vms_data = rdmsr(MSR_CSTAR); - msr_store[4].vms_index = MSR_SFMASK; - msr_store[4].vms_data = rdmsr(MSR_SFMASK); - msr_store[5].vms_index = MSR_KERNELGSBASE; - msr_store[5].vms_data = rdmsr(MSR_KERNELGSBASE); - msr_store[6].vms_index = MSR_MISC_ENABLE; - msr_store[6].vms_data = rdmsr(MSR_MISC_ENABLE); - - /* - * Select guest MSRs to be loaded on entry / saved on exit - */ - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - - msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER; - msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR; - msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR; - msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR; - msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK; - msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE; - msr_store[VCPU_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE; - - /* - * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd - * and some of the content is based on the host. - */ - msr_store[VCPU_REGS_MISC_ENABLE].vms_data = rdmsr(MSR_MISC_ENABLE); - msr_store[VCPU_REGS_MISC_ENABLE].vms_data &= - ~(MISC_ENABLE_TCC | MISC_ENABLE_PERF_MON_AVAILABLE | - MISC_ENABLE_EIST_ENABLED | MISC_ENABLE_ENABLE_MONITOR_FSM | - MISC_ENABLE_xTPR_MESSAGE_DISABLE); - msr_store[VCPU_REGS_MISC_ENABLE].vms_data |= - MISC_ENABLE_BTS_UNAVAILABLE | MISC_ENABLE_PEBS_UNAVAILABLE; - - /* - * Currently we have the same count of entry/exit MSRs loads/stores - * but this is not an architectural requirement. - */ - if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT, VMX_NUM_MSR_STORE)) { - DPRINTF("%s: error setting guest MSR exit store count\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT, VMX_NUM_MSR_STORE)) { - DPRINTF("%s: error setting guest MSR exit load count\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, VMX_NUM_MSR_STORE)) { - DPRINTF("%s: error setting guest MSR entry load count\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS, - vcpu->vc_vmx_msr_exit_save_pa)) { - DPRINTF("%s: error setting guest MSR exit store address\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS_HI, 0)) { - DPRINTF("%s: error setting guest MSR exit store address HI\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS, - vcpu->vc_vmx_msr_exit_load_pa)) { - DPRINTF("%s: error setting guest MSR exit load address\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS_HI, 0)) { - DPRINTF("%s: error setting guest MSR exit load address HI\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS, - vcpu->vc_vmx_msr_exit_save_pa)) { - DPRINTF("%s: error setting guest MSR entry load address\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS_HI, 0)) { - DPRINTF("%s: error setting guest MSR entry load address HI\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_MSR_BITMAP_ADDRESS, - vcpu->vc_msr_bitmap_pa)) { - DPRINTF("%s: error setting guest MSR bitmap address\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_MSR_BITMAP_ADDRESS_HI, 0)) { - DPRINTF("%s: error setting guest MSR bitmap address HI\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_CR4_MASK, CR4_VMXE)) { - DPRINTF("%s: error setting guest CR4 mask\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_CR0_MASK, CR0_NE)) { - DPRINTF("%s: error setting guest CR0 mask\n", __func__); - ret = EINVAL; - goto exit; - } - - /* - * Set up the VMCS for the register state we want during VCPU start. - * This matches what the CPU state would be after a bootloader - * transition to 'start'. - */ - ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs); - - /* - * Set up the MSR bitmap - */ - memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE); - vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL); - vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS); - vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP); - vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP); - vmx_setmsrbrw(vcpu, MSR_EFER); - vmx_setmsrbrw(vcpu, MSR_STAR); - vmx_setmsrbrw(vcpu, MSR_LSTAR); - vmx_setmsrbrw(vcpu, MSR_CSTAR); - vmx_setmsrbrw(vcpu, MSR_SFMASK); - vmx_setmsrbrw(vcpu, MSR_FSBASE); - vmx_setmsrbrw(vcpu, MSR_GSBASE); - vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE); - vmx_setmsrbr(vcpu, MSR_MISC_ENABLE); - - /* XXX CR0 shadow */ - /* XXX CR4 shadow */ - - /* Flush the VMCS */ - if (vmclear(&vcpu->vc_control_pa)) { - DPRINTF("%s: vmclear failed\n", __func__); - ret = EINVAL; - goto exit; - } - -exit: - return (ret); -} - -/* - * vcpu_init_vmx - * - * Intel VMX specific VCPU initialization routine. - * - * This function allocates various per-VCPU memory regions, sets up initial - * VCPU VMCS controls, and sets initial register values. - * - * Parameters: - * vcpu: the VCPU structure being initialized - * - * Return values: - * 0: the VCPU was initialized successfully - * ENOMEM: insufficient resources - * EINVAL: an error occurred during VCPU initialization - */ -int -vcpu_init_vmx(struct vcpu *vcpu) -{ - struct vmcs *vmcs; - uint32_t cr0, cr4; - int ret; - - ret = 0; - - /* Allocate VMCS VA */ - vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, - &kd_waitok); - - if (!vcpu->vc_control_va) - return (ENOMEM); - - /* Compute VMCS PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, - (paddr_t *)&vcpu->vc_control_pa)) { - ret = ENOMEM; - goto exit; - } - - /* Allocate MSR bitmap VA */ - vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, - &kd_waitok); - - if (!vcpu->vc_msr_bitmap_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute MSR bitmap PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va, - (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { - ret = ENOMEM; - goto exit; - } - - /* Allocate MSR exit load area VA */ - vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, - &kp_zero, &kd_waitok); - - if (!vcpu->vc_vmx_msr_exit_load_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute MSR exit load area PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_load_va, - &vcpu->vc_vmx_msr_exit_load_pa)) { - ret = ENOMEM; - goto exit; - } - - /* Allocate MSR exit save area VA */ - vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, - &kp_zero, &kd_waitok); - - if (!vcpu->vc_vmx_msr_exit_save_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute MSR exit save area PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_save_va, - &vcpu->vc_vmx_msr_exit_save_pa)) { - ret = ENOMEM; - goto exit; - } - - /* Allocate MSR entry load area VA */ - vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, - &kp_zero, &kd_waitok); - - if (!vcpu->vc_vmx_msr_entry_load_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute MSR entry load area PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_entry_load_va, - &vcpu->vc_vmx_msr_entry_load_pa)) { - ret = ENOMEM; - goto exit; - } - - vmcs = (struct vmcs *)vcpu->vc_control_va; - vmcs->vmcs_revision = curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; - - /* - * Load the VMCS onto this PCPU so we can write registers - */ - if (vmptrld(&vcpu->vc_control_pa)) { - ret = EINVAL; - goto exit; - } - - /* Host CR0 */ - cr0 = rcr0() & ~CR0_TS; - if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) { - DPRINTF("%s: error writing host CR0\n", __func__); - ret = EINVAL; - goto exit; - } - - /* Host CR4 */ - cr4 = rcr4(); - if (vmwrite(VMCS_HOST_IA32_CR4, cr4)) { - DPRINTF("%s: error writing host CR4\n", __func__); - ret = EINVAL; - goto exit; - } - - /* Host Segment Selectors */ - if (vmwrite(VMCS_HOST_IA32_CS_SEL, GSEL(GCODE_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host CS selector\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_HOST_IA32_DS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host DS selector\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_HOST_IA32_ES_SEL, GSEL(GDATA_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host ES selector\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_HOST_IA32_FS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host FS selector\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_HOST_IA32_GS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host GS selector\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_HOST_IA32_SS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host SS selector\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_HOST_IA32_TR_SEL, GSEL(GTSS_SEL, SEL_KPL))) { - DPRINTF("%s: error writing host TR selector\n", __func__); - ret = EINVAL; - goto exit; - } - - /* Host IDTR base */ - if (vmwrite(VMCS_HOST_IA32_IDTR_BASE, (uint32_t)idt)) { - DPRINTF("%s: error writing host IDTR base\n", __func__); - ret = EINVAL; - goto exit; - } - - /* VMCS link */ - if (vmwrite(VMCS_LINK_POINTER, 0xFFFFFFFF)) { - DPRINTF("%s: error writing VMCS link pointer\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_LINK_POINTER_HI, 0xFFFFFFFF)) { - DPRINTF("%s: error writing VMCS link pointer HI\n", __func__); - ret = EINVAL; - goto exit; - } - -exit: - if (ret) { - if (vcpu->vc_control_va) - km_free((void *)vcpu->vc_control_va, PAGE_SIZE, - &kv_page, &kp_zero); - if (vcpu->vc_msr_bitmap_va) - km_free((void *)vcpu->vc_msr_bitmap_va, PAGE_SIZE, - &kv_page, &kp_zero); - if (vcpu->vc_vmx_msr_exit_save_va) - km_free((void *)vcpu->vc_vmx_msr_exit_save_va, - PAGE_SIZE, &kv_page, &kp_zero); - if (vcpu->vc_vmx_msr_exit_load_va) - km_free((void *)vcpu->vc_vmx_msr_exit_load_va, - PAGE_SIZE, &kv_page, &kp_zero); - if (vcpu->vc_vmx_msr_entry_load_va) - km_free((void *)vcpu->vc_vmx_msr_entry_load_va, - PAGE_SIZE, &kv_page, &kp_zero); - } else { - if (vmclear(&vcpu->vc_control_pa)) { - DPRINTF("%s: vmclear failed\n", __func__); - ret = EINVAL; - } - } - - return (ret); -} - -/* - * vcpu_reset_regs - * - * Resets a vcpu's registers to the provided state - * - * Parameters: - * vcpu: the vcpu whose registers shall be reset - * vrs: the desired register state - * - * Return values: - * 0: the vcpu's registers were successfully reset - * !0: the vcpu's registers could not be reset (see arch-specific reset - * function for various values that can be returned here) - */ -int -vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs) -{ - int ret; - - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) - ret = vcpu_reset_regs_vmx(vcpu, vrs); - else if (vmm_softc->mode == VMM_MODE_SVM || - vmm_softc->mode == VMM_MODE_RVI) - ret = vcpu_reset_regs_svm(vcpu, vrs); - else - panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); - - return (ret); -} - -/* - * vcpu_init_svm - * - * AMD SVM specific VCPU initialization routine. - * - * This function allocates various per-VCPU memory regions, sets up initial - * VCPU VMCB controls, and sets initial register values. - * - * Parameters: - * vcpu: the VCPU structure being initialized - * - * Return values: - * 0: the VCPU was initialized successfully - * ENOMEM: insufficient resources - * EINVAL: an error occurred during VCPU initialization - */ -int -vcpu_init_svm(struct vcpu *vcpu) -{ - int ret; - - ret = 0; - - /* Allocate VMCB VA */ - vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, - &kd_waitok); - - if (!vcpu->vc_control_va) - return (ENOMEM); - - /* Compute VMCB PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, - (paddr_t *)&vcpu->vc_control_pa)) { - ret = ENOMEM; - goto exit; - } - - DPRINTF("%s: VMCB va @ 0x%x, pa @ 0x%x\n", __func__, - (uint32_t)vcpu->vc_control_va, - (uint32_t)vcpu->vc_control_pa); - - - /* Allocate MSR bitmap VA (2 pages) */ - vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any, - &vmm_kp_contig, &kd_waitok); - - if (!vcpu->vc_msr_bitmap_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute MSR bitmap PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va, - (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { - ret = ENOMEM; - goto exit; - } - - DPRINTF("%s: MSR bitmap va @ 0x%x, pa @ 0x%x\n", __func__, - (uint32_t)vcpu->vc_msr_bitmap_va, - (uint32_t)vcpu->vc_msr_bitmap_pa); - - /* Allocate host state area VA */ - vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, - &kp_zero, &kd_waitok); - - if (!vcpu->vc_svm_hsa_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute host state area PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_hsa_va, - &vcpu->vc_svm_hsa_pa)) { - ret = ENOMEM; - goto exit; - } - - DPRINTF("%s: HSA va @ 0x%x, pa @ 0x%x\n", __func__, - (uint32_t)vcpu->vc_svm_hsa_va, - (uint32_t)vcpu->vc_svm_hsa_pa); - - /* Allocate IOIO area VA (3 pages) */ - vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any, - &vmm_kp_contig, &kd_waitok); - - if (!vcpu->vc_svm_ioio_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute IOIO area PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_ioio_va, - &vcpu->vc_svm_ioio_pa)) { - ret = ENOMEM; - goto exit; - } - - DPRINTF("%s: IOIO va @ 0x%x, pa @ 0x%x\n", __func__, - (uint32_t)vcpu->vc_svm_ioio_va, - (uint32_t)vcpu->vc_svm_ioio_pa); - -exit: - if (ret) { - if (vcpu->vc_control_va) - km_free((void *)vcpu->vc_control_va, PAGE_SIZE, - &kv_page, &kp_zero); - if (vcpu->vc_msr_bitmap_va) - km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE, - &kv_any, &vmm_kp_contig); - if (vcpu->vc_svm_hsa_va) - km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE, - &kv_page, &kp_zero); - if (vcpu->vc_svm_ioio_va) - km_free((void *)vcpu->vc_svm_ioio_va, - 3 * PAGE_SIZE, &kv_any, &vmm_kp_contig); - } - - return (ret); -} - -/* - * vcpu_init - * - * Calls the architecture-specific VCPU init routine - */ -int -vcpu_init(struct vcpu *vcpu) -{ - int ret = 0; - - vcpu->vc_virt_mode = vmm_softc->mode; - vcpu->vc_state = VCPU_STATE_STOPPED; - vcpu->vc_vpid = 0; - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) - ret = vcpu_init_vmx(vcpu); - else if (vmm_softc->mode == VMM_MODE_SVM || - vmm_softc->mode == VMM_MODE_RVI) - ret = vcpu_init_svm(vcpu); - else - panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); - - return (ret); -} - -/* - * vcpu_deinit_vmx - * - * Deinitializes the vcpu described by 'vcpu' - * - * Parameters: - * vcpu: the vcpu to be deinited - */ -void -vcpu_deinit_vmx(struct vcpu *vcpu) -{ - if (vcpu->vc_control_va) - km_free((void *)vcpu->vc_control_va, PAGE_SIZE, - &kv_page, &kp_zero); - if (vcpu->vc_vmx_msr_exit_save_va) - km_free((void *)vcpu->vc_vmx_msr_exit_save_va, - PAGE_SIZE, &kv_page, &kp_zero); - if (vcpu->vc_vmx_msr_exit_load_va) - km_free((void *)vcpu->vc_vmx_msr_exit_load_va, - PAGE_SIZE, &kv_page, &kp_zero); - if (vcpu->vc_vmx_msr_entry_load_va) - km_free((void *)vcpu->vc_vmx_msr_entry_load_va, - PAGE_SIZE, &kv_page, &kp_zero); - - if (vcpu->vc_vmx_vpid_enabled) - vmm_free_vpid(vcpu->vc_vpid); -} - -/* - * vcpu_deinit_svm - * - * Deinitializes the vcpu described by 'vcpu' - * - * Parameters: - * vcpu: the vcpu to be deinited - */ -void -vcpu_deinit_svm(struct vcpu *vcpu) -{ - if (vcpu->vc_control_va) - km_free((void *)vcpu->vc_control_va, PAGE_SIZE, &kv_page, - &kp_zero); - if (vcpu->vc_msr_bitmap_va) - km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE, &kv_any, - &vmm_kp_contig); - if (vcpu->vc_svm_hsa_va) - km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE, &kv_page, - &kp_zero); - if (vcpu->vc_svm_ioio_va) - km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any, - &vmm_kp_contig); -} - -/* - * vcpu_deinit - * - * Calls the architecture-specific VCPU deinit routine - * - * Parameters: - * vcpu: the vcpu to be deinited - */ -void -vcpu_deinit(struct vcpu *vcpu) -{ - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) - vcpu_deinit_vmx(vcpu); - else if (vmm_softc->mode == VMM_MODE_SVM || - vmm_softc->mode == VMM_MODE_RVI) - vcpu_deinit_svm(vcpu); - else - panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); -} - -/* - * vm_teardown - * - * Tears down (destroys) the vm indicated by 'vm'. - */ -void -vm_teardown(struct vm *vm) -{ - struct vcpu *vcpu, *tmp; - - /* Free VCPUs */ - rw_enter_write(&vm->vm_vcpu_lock); - SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp) { - SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link); - vcpu_deinit(vcpu); - pool_put(&vcpu_pool, vcpu); - } - - vm_impl_deinit(vm); - - /* teardown guest vmspace */ - if (vm->vm_map != NULL) - uvm_map_deallocate(vm->vm_map); - - vmm_softc->vm_ct--; - if (vmm_softc->vm_ct < 1) - vmm_stop(); - rw_exit_write(&vm->vm_vcpu_lock); - pool_put(&vm_pool, vm); -} - -/* - * vcpu_vmx_check_cap - * - * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1 - * or set = 0, respectively). - * - * When considering 'msr', we check to see if true controls are available, - * and use those if so. - * - * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise. - */ -int -vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set) -{ - uint64_t ctl; - - if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { - switch (msr) { - case IA32_VMX_PINBASED_CTLS: - ctl = vcpu->vc_vmx_true_pinbased_ctls; - break; - case IA32_VMX_PROCBASED_CTLS: - ctl = vcpu->vc_vmx_true_procbased_ctls; - break; - case IA32_VMX_PROCBASED2_CTLS: - ctl = vcpu->vc_vmx_procbased2_ctls; - break; - case IA32_VMX_ENTRY_CTLS: - ctl = vcpu->vc_vmx_true_entry_ctls; - break; - case IA32_VMX_EXIT_CTLS: - ctl = vcpu->vc_vmx_true_exit_ctls; - break; - default: - return (0); - } - } else { - switch (msr) { - case IA32_VMX_PINBASED_CTLS: - ctl = vcpu->vc_vmx_pinbased_ctls; - break; - case IA32_VMX_PROCBASED_CTLS: - ctl = vcpu->vc_vmx_procbased_ctls; - break; - case IA32_VMX_PROCBASED2_CTLS: - ctl = vcpu->vc_vmx_procbased2_ctls; - break; - case IA32_VMX_ENTRY_CTLS: - ctl = vcpu->vc_vmx_entry_ctls; - break; - case IA32_VMX_EXIT_CTLS: - ctl = vcpu->vc_vmx_exit_ctls; - break; - default: - return (0); - } - } - - if (set) { - /* Check bit 'cap << 32', must be !0 */ - return (ctl & ((uint64_t)cap << 32)) != 0; - } else { - /* Check bit 'cap', must be 0 */ - return (ctl & cap) == 0; - } -} - -/* - * vcpu_vmx_compute_ctrl - * - * Computes the appropriate control value, given the supplied parameters - * and CPU capabilities. - * - * Intel has made somewhat of a mess of this computation - it is described - * using no fewer than three different approaches, spread across many - * pages of the SDM. Further compounding the problem is the fact that now - * we have "true controls" for each type of "control", and each needs to - * be examined to get the calculation right, but only if "true" controls - * are present on the CPU we're on. - * - * Parameters: - * ctrlval: the control value, as read from the CPU MSR - * ctrl: which control is being set (eg, pinbased, procbased, etc) - * want0: the set of desired 0 bits - * want1: the set of desired 1 bits - * out: (out) the correct value to write into the VMCS for this VCPU, - * for the 'ctrl' desired. - * - * Returns 0 if successful, or EINVAL if the supplied parameters define - * an unworkable control setup. - */ -int -vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1, - uint32_t want0, uint32_t *out) -{ - int i, set, clear; - - /* - * The Intel SDM gives three formulae for determining which bits to - * set/clear for a given control and desired functionality. Formula - * 1 is the simplest but disallows use of newer features that are - * enabled by functionality in later CPUs. - * - * Formulas 2 and 3 allow such extra functionality. We use formula - * 2 - this requires us to know the identity of controls in the - * "default1" class for each control register, but allows us to not - * have to pass along and/or query both sets of capability MSRs for - * each control lookup. This makes the code slightly longer, - * however. - */ - for (i = 0; i < 32; i++) { - /* Figure out if we can set and / or clear this bit */ - set = (ctrlval & (1ULL << (i + 32))) != 0; - clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0; - - /* If the bit can't be set nor cleared, something's wrong */ - if (!set && !clear) - return (EINVAL); - - /* - * Formula 2.c.i - "If the relevant VMX capability MSR - * reports that a control has a single setting, use that - * setting." - */ - if (set && !clear) { - if (want0 & (1ULL << i)) - return (EINVAL); - else - *out |= (1ULL << i); - } else if (clear && !set) { - if (want1 & (1ULL << i)) - return (EINVAL); - else - *out &= ~(1ULL << i); - } else { - /* - * 2.c.ii - "If the relevant VMX capability MSR - * reports that a control can be set to 0 or 1 - * and that control's meaning is known to the VMM, - * set the control based on the functionality desired." - */ - if (want1 & (1ULL << i)) - *out |= (1ULL << i); - else if (want0 & (1 << i)) - *out &= ~(1ULL << i); - else { - /* - * ... assuming the control's meaning is not - * known to the VMM ... - * - * 2.c.iii - "If the relevant VMX capability - * MSR reports that a control can be set to 0 - * or 1 and the control is not in the default1 - * class, set the control to 0." - * - * 2.c.iv - "If the relevant VMX capability - * MSR reports that a control can be set to 0 - * or 1 and the control is in the default1 - * class, set the control to 1." - */ - switch (ctrl) { - case IA32_VMX_PINBASED_CTLS: - case IA32_VMX_TRUE_PINBASED_CTLS: - /* - * A.3.1 - default1 class of pinbased - * controls comprises bits 1,2,4 - */ - switch (i) { - case 1: - case 2: - case 4: - *out |= (1ULL << i); - break; - default: - *out &= ~(1ULL << i); - break; - } - break; - case IA32_VMX_PROCBASED_CTLS: - case IA32_VMX_TRUE_PROCBASED_CTLS: - /* - * A.3.2 - default1 class of procbased - * controls comprises bits 1, 4-6, 8, - * 13-16, 26 - */ - switch (i) { - case 1: - case 4 ... 6: - case 8: - case 13 ... 16: - case 26: - *out |= (1ULL << i); - break; - default: - *out &= ~(1ULL << i); - break; - } - break; - /* - * Unknown secondary procbased controls - * can always be set to 0 - */ - case IA32_VMX_PROCBASED2_CTLS: - *out &= ~(1ULL << i); - break; - case IA32_VMX_EXIT_CTLS: - case IA32_VMX_TRUE_EXIT_CTLS: - /* - * A.4 - default1 class of exit - * controls comprises bits 0-8, 10, - * 11, 13, 14, 16, 17 - */ - switch (i) { - case 0 ... 8: - case 10 ... 11: - case 13 ... 14: - case 16 ... 17: - *out |= (1ULL << i); - break; - default: - *out &= ~(1ULL << i); - break; - } - break; - case IA32_VMX_ENTRY_CTLS: - case IA32_VMX_TRUE_ENTRY_CTLS: - /* - * A.5 - default1 class of entry - * controls comprises bits 0-8, 12 - */ - switch (i) { - case 0 ... 8: - case 12: - *out |= (1ULL << i); - break; - default: - *out &= ~(1ULL << i); - break; - } - break; - } - } - } - } - - return (0); -} - -/* - * vm_get_info - * - * Returns information about the VM indicated by 'vip'. The 'vip_size' field - * in the 'vip' parameter is used to indicate the size of the caller's buffer. - * If insufficient space exists in that buffer, the required size needed is - * returned in vip_size and the number of VM information structures returned - * in vip_info_count is set to 0. The caller should then try the ioctl again - * after allocating a sufficiently large buffer. - * - * Parameters: - * vip: information structure identifying the VM to queery - * - * Return values: - * 0: the operation succeeded - * ENOMEM: memory allocation error during processng - * EFAULT: error copying data to user process - */ -int -vm_get_info(struct vm_info_params *vip) -{ - struct vm_info_result *out; - struct vm *vm; - struct vcpu *vcpu; - int i, j; - size_t need; - - rw_enter_read(&vmm_softc->vm_lock); - need = vmm_softc->vm_ct * sizeof(struct vm_info_result); - if (vip->vip_size < need) { - vip->vip_info_ct = 0; - vip->vip_size = need; - rw_exit_read(&vmm_softc->vm_lock); - return (0); - } - - out = malloc(need, M_DEVBUF, M_NOWAIT|M_ZERO); - if (out == NULL) { - vip->vip_info_ct = 0; - rw_exit_read(&vmm_softc->vm_lock); - return (ENOMEM); - } - - i = 0; - vip->vip_info_ct = vmm_softc->vm_ct; - SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) { - out[i].vir_memory_size = vm->vm_memory_size; - out[i].vir_used_size = - pmap_resident_count(vm->vm_map->pmap) * PAGE_SIZE; - out[i].vir_ncpus = vm->vm_vcpu_ct; - out[i].vir_id = vm->vm_id; - out[i].vir_creator_pid = vm->vm_creator_pid; - strncpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN); - rw_enter_read(&vm->vm_vcpu_lock); - for (j = 0; j < vm->vm_vcpu_ct; j++) { - out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN; - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, - vc_vcpu_link) { - if (vcpu->vc_id == j) - out[i].vir_vcpu_state[j] = - vcpu->vc_state; - } - } - rw_exit_read(&vm->vm_vcpu_lock); - i++; - } - rw_exit_read(&vmm_softc->vm_lock); - if (copyout(out, vip->vip_info, need) == EFAULT) { - free(out, M_DEVBUF, need); - return (EFAULT); - } - - free(out, M_DEVBUF, need); - return (0); -} - -/* - * vm_terminate - * - * Terminates the VM indicated by 'vtp'. - * - * Parameters: - * vtp: structure defining the VM to terminate - * - * Return values: - * 0: the VM was terminated - * !0: the VM could not be located - */ -int -vm_terminate(struct vm_terminate_params *vtp) -{ - struct vm *vm; - struct vcpu *vcpu; - u_int old, next; - int error; - - /* - * Find desired VM - */ - rw_enter_write(&vmm_softc->vm_lock); - error = vm_find(vtp->vtp_vm_id, &vm); - - if (error == 0) { - rw_enter_read(&vm->vm_vcpu_lock); - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - do { - old = vcpu->vc_state; - if (old == VCPU_STATE_RUNNING) - next = VCPU_STATE_REQTERM; - else if (old == VCPU_STATE_STOPPED) - next = VCPU_STATE_TERMINATED; - else /* must be REQTERM or TERMINATED */ - break; - } while (old != atomic_cas_uint(&vcpu->vc_state, - old, next)); - } - rw_exit_read(&vm->vm_vcpu_lock); - } else { - rw_exit_write(&vmm_softc->vm_lock); - return (error); - } - - SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link); - if (vm->vm_vcpus_running == 0) - vm_teardown(vm); - - rw_exit_write(&vmm_softc->vm_lock); - - return (0); -} - -/* - * vm_run - * - * Run the vm / vcpu specified by 'vrp' - * - * Parameters: - * vrp: structure defining the VM to run - * - * Return value: - * ENOENT: the VM defined in 'vrp' could not be located - * EBUSY: the VM defined in 'vrp' is already running - * EFAULT: error copying data from userspace (vmd) on return from previous - * exit. - * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot - * handle in-kernel.) - * 0: the run loop exited and no help is needed from vmd(8) - */ -int -vm_run(struct vm_run_params *vrp) -{ - struct vm *vm; - struct vcpu *vcpu; - int ret = 0, error; - u_int old, next; - - /* - * Find desired VM - */ - rw_enter_read(&vmm_softc->vm_lock); - error = vm_find(vrp->vrp_vm_id, &vm); - - /* - * Attempt to locate the requested VCPU. If found, attempt to - * to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING. - * Failure to make the transition indicates the VCPU is busy. - */ - if (error == 0) { - rw_enter_read(&vm->vm_vcpu_lock); - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - if (vcpu->vc_id == vrp->vrp_vcpu_id) - break; - } - - if (vcpu != NULL) { - old = VCPU_STATE_STOPPED; - next = VCPU_STATE_RUNNING; - - if (atomic_cas_uint(&vcpu->vc_state, old, next) != old) - ret = EBUSY; - else - atomic_inc_int(&vm->vm_vcpus_running); - } - rw_exit_read(&vm->vm_vcpu_lock); - - if (vcpu == NULL) - ret = ENOENT; - } - rw_exit_read(&vmm_softc->vm_lock); - - if (error != 0) - ret = error; - - /* Bail if errors detected in the previous steps */ - if (ret) - return (ret); - - /* - * We may be returning from userland helping us from the last exit. - * If so (vrp_continue == 1), copy in the exit data from vmd. The - * exit data will be consumed before the next entry (this typically - * comprises VCPU register changes as the result of vmd(8)'s actions). - */ - if (vrp->vrp_continue) { - if (copyin(vrp->vrp_exit, &vcpu->vc_exit, - sizeof(struct vm_exit)) == EFAULT) { - return (EFAULT); - } - } - - /* Run the VCPU specified in vrp */ - if (vcpu->vc_virt_mode == VMM_MODE_VMX || - vcpu->vc_virt_mode == VMM_MODE_EPT) { - ret = vcpu_run_vmx(vcpu, vrp); - } else if (vcpu->vc_virt_mode == VMM_MODE_SVM || - vcpu->vc_virt_mode == VMM_MODE_RVI) { - ret = vcpu_run_svm(vcpu, vrp); - } - - /* - * We can set the VCPU states here without CAS because once - * a VCPU is in state RUNNING or REQTERM, only the VCPU itself - * can switch the state. - */ - atomic_dec_int(&vm->vm_vcpus_running); - if (vcpu->vc_state == VCPU_STATE_REQTERM) { - vrp->vrp_exit_reason = VM_EXIT_TERMINATED; - vcpu->vc_state = VCPU_STATE_TERMINATED; - if (vm->vm_vcpus_running == 0) - vm_teardown(vm); - ret = 0; - } else if (ret == EAGAIN) { - /* If we are exiting, populate exit data so vmd can help. */ - vrp->vrp_exit_reason = vcpu->vc_gueststate.vg_exit_reason; - vrp->vrp_irqready = vcpu->vc_irqready; - vcpu->vc_state = VCPU_STATE_STOPPED; - - if (copyout(&vcpu->vc_exit, vrp->vrp_exit, - sizeof(struct vm_exit)) == EFAULT) { - ret = EFAULT; - } else - ret = 0; - } else if (ret == 0) { - vrp->vrp_exit_reason = VM_EXIT_NONE; - vcpu->vc_state = VCPU_STATE_STOPPED; - } else { - vrp->vrp_exit_reason = VM_EXIT_TERMINATED; - vcpu->vc_state = VCPU_STATE_TERMINATED; - } - - return (ret); -} - -/* - * vcpu_must_stop - * - * Check if we need to (temporarily) stop running the VCPU for some reason, - * such as: - * - the VM was requested to terminate - * - the proc running this VCPU has pending signals - */ -int -vcpu_must_stop(struct vcpu *vcpu) -{ - struct proc *p = curproc; - - if (vcpu->vc_state == VCPU_STATE_REQTERM) - return (1); - if (CURSIG(p) != 0) - return (1); - return (0); -} - -/* - * vcpu_run_vmx - * - * VMX main loop used to run a VCPU. - * - * Parameters: - * vcpu: The VCPU to run - * vrp: run parameters - * - * Return values: - * 0: The run loop exited and no help is needed from vmd - * EAGAIN: The run loop exited and help from vmd is needed - * EINVAL: an error occured - */ -int -vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) -{ - int ret = 0, resume, locked, exitinfo; - struct region_descriptor gdt; - struct cpu_info *ci; - uint64_t cr3, vmcs_ptr; - uint32_t insn_error, exit_reason; - struct schedstate_percpu *spc; - struct vmx_invvpid_descriptor vid; - uint32_t eii, procbased, int_st; - uint16_t irq; - - resume = 0; - irq = vrp->vrp_irq; - - /* - * If we are returning from userspace (vmd) because we exited - * last time, fix up any needed vcpu state first. Which state - * needs to be fixed up depends on what vmd populated in the - * exit data structure. - */ - if (vrp->vrp_continue) { - switch (vcpu->vc_gueststate.vg_exit_reason) { - case VMX_EXIT_IO: - if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) - vcpu->vc_gueststate.vg_eax = - vcpu->vc_exit.vei.vei_data; - break; - case VMX_EXIT_HLT: - break; - case VMX_EXIT_INT_WINDOW: - break; - case VMX_EXIT_EXTINT: - break; - case VMX_EXIT_EPT_VIOLATION: - break; - case VMX_EXIT_CPUID: - break; - case VMX_EXIT_XSETBV: - break; -#ifdef VMM_DEBUG - case VMX_EXIT_TRIPLE_FAULT: - DPRINTF("%s: vm %d vcpu %d triple fault\n", - __func__, vcpu->vc_parent->vm_id, - vcpu->vc_id); - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); - vmx_dump_vmcs(vcpu); - break; - case VMX_EXIT_ENTRY_FAILED_GUEST_STATE: - DPRINTF("%s: vm %d vcpu %d failed entry " - "due to invalid guest state\n", - __func__, vcpu->vc_parent->vm_id, - vcpu->vc_id); - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); - return EINVAL; - default: - DPRINTF("%s: unimplemented exit type %d (%s)\n", - __func__, - vcpu->vc_gueststate.vg_exit_reason, - vmx_exit_reason_decode( - vcpu->vc_gueststate.vg_exit_reason)); - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); - break; -#endif /* VMM_DEBUG */ - } - } - - while (ret == 0) { - if (!resume) { - /* - * We are launching for the first time, or we are - * resuming from a different pcpu, so we need to - * reset certain pcpu-specific values. - */ - ci = curcpu(); - setregion(&gdt, ci->ci_gdt, - NGDT * sizeof(union descriptor) - 1); - - vcpu->vc_last_pcpu = ci; - - if (vmptrld(&vcpu->vc_control_pa)) { - ret = EINVAL; - break; - } - - if (gdt.rd_base == 0) { - ret = EINVAL; - break; - } - - /* Host GDTR base */ - if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) { - ret = EINVAL; - break; - } - - /* Host TR base */ - if (vmwrite(VMCS_HOST_IA32_TR_BASE, - GSEL(GTSS_SEL, SEL_KPL))) { - ret = EINVAL; - break; - } - - /* Host CR3 */ - cr3 = rcr3(); - if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) { - ret = EINVAL; - break; - } - } - - /* Handle vmd(8) injected interrupts */ - /* Is there an interrupt pending injection? */ - if (irq != 0xFFFF) { - if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) { - printf("%s: can't get interruptibility state\n", - __func__); - ret = EINVAL; - break; - } - - /* Interruptbility state 0x3 covers NMIs and STI */ - if (!(int_st & 0x3) && vcpu->vc_irqready) { - eii = (irq & 0xFF); - eii |= (1ULL << 31); /* Valid */ - eii |= (0ULL << 8); /* Hardware Interrupt */ - if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) { - printf("vcpu_run_vmx: can't vector " - "interrupt to guest\n"); - ret = EINVAL; - break; - } - - irq = 0xFFFF; - } - } else if (!vcpu->vc_intr) { - /* - * Disable window exiting - */ - if (vmread(VMCS_PROCBASED_CTLS, &procbased)) { - printf("%s: can't read procbased ctls on " - "exit\n", __func__); - ret = EINVAL; - break; - } else { - procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING; - if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) { - printf("%s: can't write procbased ctls " - "on exit\n", __func__); - ret = EINVAL; - break; - } - } - } - - /* Inject event if present */ - if (vcpu->vc_event != 0) { - eii = (vcpu->vc_event & 0xFF); - eii |= (1ULL << 31); /* Valid */ - - /* Set the "Send error code" flag for certain vectors */ - switch (vcpu->vc_event & 0xFF) { - case VMM_EX_DF: - case VMM_EX_TS: - case VMM_EX_NP: - case VMM_EX_SS: - case VMM_EX_GP: - case VMM_EX_PF: - case VMM_EX_AC: - eii |= (1ULL << 11); - } - - eii |= (3ULL << 8); /* Hardware Exception */ - if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) { - printf("%s: can't vector event to guest\n", - __func__); - ret = EINVAL; - break; - } - - if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE, 0)) { - printf("%s: can't write error code to guest\n", - __func__); - ret = EINVAL; - break; - } - - vcpu->vc_event = 0; - } - - if (vcpu->vc_vmx_vpid_enabled) { - /* Invalidate old TLB mappings */ - vid.vid_vpid = vcpu->vc_parent->vm_id; - vid.vid_addr = 0; - invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid); - } - - /* Start / resume the VCPU */ -#ifdef VMM_DEBUG - KERNEL_ASSERT_LOCKED(); -#endif /* VMM_DEBUG */ - KERNEL_UNLOCK(); - ret = vmx_enter_guest(&vcpu->vc_control_pa, - &vcpu->vc_gueststate, resume, gdt.rd_base); - - exit_reason = VM_EXIT_NONE; - if (ret == 0) { - /* - * ret == 0 implies we entered the guest, and later - * exited for some valid reason - */ - exitinfo = vmx_get_exit_info( - &vcpu->vc_gueststate.vg_eip, &exit_reason); - if (vmread(VMCS_GUEST_IA32_RFLAGS, - &vcpu->vc_gueststate.vg_eflags)) { - printf("%s: can't read guest rflags during " - "exit\n", __func__); - ret = EINVAL; - break; - } - } - - if (ret || exitinfo != VMX_EXIT_INFO_COMPLETE || - exit_reason != VMX_EXIT_EXTINT) { - KERNEL_LOCK(); - locked = 1; - } else - locked = 0; - - /* If we exited successfully ... */ - if (ret == 0) { - resume = 1; - if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP)) { - printf("%s: cannot read guest rip\n", __func__); - if (!locked) - KERNEL_LOCK(); - ret = EINVAL; - break; - } - - if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON)) { - printf("%s: cant read exit reason\n", __func__); - if (!locked) - KERNEL_LOCK(); - ret = EINVAL; - break; - } - - /* - * Handle the exit. This will alter "ret" to EAGAIN if - * the exit handler determines help from vmd is needed. - */ - vcpu->vc_gueststate.vg_exit_reason = exit_reason; - ret = vmx_handle_exit(vcpu); - - /* - * When the guest exited due to an external interrupt, - * we do not yet hold the kernel lock: we need to - * handle interrupts first before grabbing the lock: - * the interrupt handler might do work that - * another CPU holding the kernel lock waits for. - * - * Example: the TLB shootdown code in the pmap module - * sends an IPI to all other CPUs and busy-waits for - * them to decrement tlb_shoot_wait to zero. While - * busy-waiting, the kernel lock is held. - * - * If this code here attempted to grab the kernel lock - * before handling the interrupt, it would block - * forever. - */ - if (!locked) - KERNEL_LOCK(); - - if (vcpu->vc_gueststate.vg_eflags & PSL_I) - vcpu->vc_irqready = 1; - else - vcpu->vc_irqready = 0; - - /* - * If not ready for interrupts, but interrupts pending, - * enable interrupt window exiting. - */ - if (vcpu->vc_irqready == 0 && vcpu->vc_intr) { - if (vmread(VMCS_PROCBASED_CTLS, &procbased)) { - printf("%s: can't read procbased ctls " - "on intwin exit\n", __func__); - ret = EINVAL; - break; - } - - procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING; - if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) { - printf("%s: can't write procbased ctls " - "on intwin exit\n", __func__); - ret = EINVAL; - break; - } - } - - /* - * Exit to vmd if we are terminating, failed to enter, - * or need help (device I/O) - */ - if (ret || vcpu_must_stop(vcpu)) - break; - - if (vcpu->vc_intr && vcpu->vc_irqready) { - ret = EAGAIN; - break; - } - - /* Check if we should yield - don't hog the cpu */ - spc = &ci->ci_schedstate; - if (spc->spc_schedflags & SPCF_SHOULDYIELD) { - resume = 0; - if (vmclear(&vcpu->vc_control_pa)) { - ret = EINVAL; - break; - } - yield(); - } - } else if (ret == VMX_FAIL_LAUNCH_INVALID_VMCS) { - printf("%s: failed launch with invalid vmcs\n", - __func__); -#ifdef VMM_DEBUG - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); -#endif /* VMM_DEBUG */ - ret = EINVAL; - } else if (ret == VMX_FAIL_LAUNCH_VALID_VMCS) { - exit_reason = vcpu->vc_gueststate.vg_exit_reason; - printf("%s: failed launch with valid vmcs, code=%d " - "(%s)\n", __func__, exit_reason, - vmx_instruction_error_decode(exit_reason)); - if (vmread(VMCS_INSTRUCTION_ERROR, &insn_error)) { - printf("%s: can't read insn error field\n", - __func__); - } else - printf("%s: insn error code = %d\n", __func__, - insn_error); -#ifdef VMM_DEBUG - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); -#endif /* VMM_DEBUG */ - ret = EINVAL; - } else { - printf("%s: failed launch for unknown reason %d\n", - __func__, ret); -#ifdef VMM_DEBUG - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); -#endif /* VMM_DEBUG */ - ret = EINVAL; - } - } - - /* Copy the VCPU register state to the exit structure */ - if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs)) - ret = EINVAL; - - /* - * We are heading back to userspace (vmd), either because we need help - * handling an exit, a guest interrupt is pending, or we failed in some - * way to enter the guest. Clear any current VMCS pointer as we may end - * up coming back on a different CPU. - */ - if (!vmptrst(&vmcs_ptr)) { - if (vmcs_ptr != 0xFFFFFFFFFFFFFFFFULL) - if (vmclear(&vcpu->vc_control_pa)) - ret = EINVAL; - } else - ret = EINVAL; - -#ifdef VMM_DEBUG - KERNEL_ASSERT_LOCKED(); -#endif /* VMM_DEBUG */ - return (ret); -} - -/* - * vmx_handle_intr - * - * Handle host (external) interrupts. We read which interrupt fired by - * extracting the vector from the VMCS and dispatch the interrupt directly - * to the host using vmm_dispatch_intr. - */ -void -vmx_handle_intr(struct vcpu *vcpu) -{ - uint8_t vec; - uint32_t eii; - struct gate_descriptor *idte; - vaddr_t handler; - - if (vmread(VMCS_EXIT_INTERRUPTION_INFO, &eii)) { - printf("%s: can't obtain intr info\n", __func__); - return; - } - - vec = eii & 0xFF; - - /* XXX check "error valid" code in eii, abort if 0 */ - idte=&idt[vec]; - handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16); - vmm_dispatch_intr(handler); -} - -/* - * vmx_handle_hlt - * - * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate - * the guest (no NMIs handled) by returning EIO to vmd. - * - * Paramters: - * vcpu: The VCPU that executed the HLT instruction - * - * Return Values: - * EINVAL: An error occurred extracting information from the VMCS, or an - * invalid HLT instruction was encountered - * EIO: The guest halted with interrupts disabled - * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU - * until a virtual interrupt is ready to inject - * - */ -int -vmx_handle_hlt(struct vcpu *vcpu) -{ - uint32_t insn_length, eflags; - - if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { - printf("%s: can't obtain instruction length\n", __func__); - return (EINVAL); - } - - if (vmread(VMCS_GUEST_IA32_RFLAGS, &eflags)) { - printf("%s: can't obtain guest eflags\n", __func__); - return (EINVAL); - } - - if (insn_length != 1) { - DPRINTF("%s: HLT with instruction length %d not supported\n", - __func__, insn_length); - return (EINVAL); - } - - if (!(eflags & PSL_I)) { - DPRINTF("%s: guest halted with interrupts disabled\n", - __func__); - return (EIO); - } - - vcpu->vc_gueststate.vg_eip += insn_length; - return (EAGAIN); -} - -/* - * vmx_get_exit_info - * - * Returns exit information containing the current guest RIP and exit reason - * in rip and exit_reason. The return value is a bitmask indicating whether - * reading the RIP and exit reason was successful. - */ -int -vmx_get_exit_info(uint32_t *eip, uint32_t *exit_reason) -{ - int rv = 0; - - if (vmread(VMCS_GUEST_IA32_RIP, eip) == 0) { - rv |= VMX_EXIT_INFO_HAVE_RIP; - if (vmread(VMCS_EXIT_REASON, exit_reason) == 0) - rv |= VMX_EXIT_INFO_HAVE_REASON; - } - return (rv); -} - -/* - * vmx_handle_exit - * - * Handle exits from the VM by decoding the exit reason and calling various - * subhandlers as needed. - */ -int -vmx_handle_exit(struct vcpu *vcpu) -{ - uint64_t exit_reason; - uint32_t eflags, istate; - int update_rip, ret = 0; - - update_rip = 0; - exit_reason = vcpu->vc_gueststate.vg_exit_reason; - eflags = vcpu->vc_gueststate.vg_eflags; - - switch (exit_reason) { - case VMX_EXIT_INT_WINDOW: - if (!(eflags & PSL_I)) { - DPRINTF("%s: impossible intr window exit config\n", - __func__); - ret = EINVAL; - break; - } - - ret = EAGAIN; - update_rip = 0; - break; - case VMX_EXIT_EPT_VIOLATION: - ret = vmx_handle_np_fault(vcpu); - break; - case VMX_EXIT_CPUID: - ret = vmm_handle_cpuid(vcpu); - update_rip = 1; - break; - case VMX_EXIT_IO: - ret = vmx_handle_inout(vcpu); - update_rip = 1; - break; - case VMX_EXIT_EXTINT: - vmx_handle_intr(vcpu); - update_rip = 0; - break; - case VMX_EXIT_CR_ACCESS: - ret = vmx_handle_cr(vcpu); - update_rip = 1; - break; - case VMX_EXIT_HLT: - ret = vmx_handle_hlt(vcpu); - update_rip = 1; - break; - case VMX_EXIT_RDMSR: - ret = vmx_handle_rdmsr(vcpu); - update_rip = 1; - break; - case VMX_EXIT_WRMSR: - ret = vmx_handle_wrmsr(vcpu); - update_rip = 1; - break; - case VMX_EXIT_MWAIT: - case VMX_EXIT_MONITOR: - case VMX_EXIT_VMXON: - case VMX_EXIT_VMWRITE: - case VMX_EXIT_VMREAD: - case VMX_EXIT_VMLAUNCH: - case VMX_EXIT_VMRESUME: - case VMX_EXIT_VMPTRLD: - case VMX_EXIT_VMPTRST: - case VMX_EXIT_VMCLEAR: - case VMX_EXIT_VMCALL: - case VMX_EXIT_VMFUNC: - case VMX_EXIT_VMXOFF: - case VMX_EXIT_INVVPID: - case VMX_EXIT_INVEPT: - ret = vmm_inject_ud(vcpu); - update_rip = 0; - break; - case VMX_EXIT_TRIPLE_FAULT: -#ifdef VMM_DEBUG - DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__, - vcpu->vc_parent->vm_id, vcpu->vc_id); - vmx_vcpu_dump_regs(vcpu); - dump_vcpu(vcpu); - vmx_dump_vmcs(vcpu); -#endif /* VMM_DEBUG */ - ret = EAGAIN; - update_rip = 0; - break; - default: - DPRINTF("%s: unhandled exit %lld (%s)\n", __func__, - exit_reason, vmx_exit_reason_decode(exit_reason)); - return (EINVAL); - } - - if (update_rip) { - if (vmwrite(VMCS_GUEST_IA32_RIP, - vcpu->vc_gueststate.vg_eip)) { - printf("%s: can't advance rip\n", __func__); - return (EINVAL); - } - - if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, - &istate)) { - printf("%s: can't read interruptibility state\n", - __func__); - return (EINVAL); - } - - /* Interruptibilty state 0x3 covers NMIs and STI */ - istate &= ~0x3; - - if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST, - istate)) { - printf("%s: can't write interruptibility state\n", - __func__); - return (EINVAL); - } - - if (eflags & PSL_T) { - if (vmm_inject_db(vcpu)) { - printf("%s: can't inject #DB exception to " - "guest", __func__); - return (EINVAL); - } - } - } - - return (ret); -} - -/* - * vmm_inject_gp - * - * Injects an #GP exception into the guest VCPU. - * - * Parameters: - * vcpu: vcpu to inject into - * - * Return values: - * Always 0 - */ -int -vmm_inject_gp(struct vcpu *vcpu) -{ - DPRINTF("%s: injecting #GP at guest %%eip 0x%x\n", __func__, - vcpu->vc_gueststate.vg_eip); - vcpu->vc_event = VMM_EX_GP; - - return (0); -} - -/* - * vmm_inject_ud - * - * Injects an #UD exception into the guest VCPU. - * - * Parameters: - * vcpu: vcpu to inject into - * - * Return values: - * Always 0 - */ -int -vmm_inject_ud(struct vcpu *vcpu) -{ - DPRINTF("%s: injecting #UD at guest %%eip 0x%x\n", __func__, - vcpu->vc_gueststate.vg_eip); - vcpu->vc_event = VMM_EX_UD; - - return (0); -} - -/* - * vmm_inject_db - * - * Injects a #DB exception into the guest VCPU. - * - * Parameters: - * vcpu: vcpu to inject into - * - * Return values: - * Always 0 - */ -int -vmm_inject_db(struct vcpu *vcpu) -{ - DPRINTF("%s: injecting #DB at guest %%eip 0x%x\n", __func__, - vcpu->vc_gueststate.vg_eip); - vcpu->vc_event = VMM_EX_DB; - - return (0); -} - -/* - * vmm_get_guest_memtype - * - * Returns the type of memory 'gpa' refers to in the context of vm 'vm' - */ -int -vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) -{ - int i; - struct vm_mem_range *vmr; - - if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) { - DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa); - return (VMM_MEM_TYPE_REGULAR); - } - - /* XXX Use binary search? */ - for (i = 0; i < vm->vm_nmemranges; i++) { - vmr = &vm->vm_memranges[i]; - - /* - * vm_memranges are ascending. gpa can no longer be in one of - * the memranges - */ - if (gpa < vmr->vmr_gpa) - break; - - if (gpa < vmr->vmr_gpa + vmr->vmr_size) - return (VMM_MEM_TYPE_REGULAR); - } - - DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa); - return (VMM_MEM_TYPE_UNKNOWN); -} - -/* - * vmm_get_guest_faulttype - * - * Determines the type (R/W/X) of the last fault on the VCPU last run on - * this PCPU. Calls the appropriate architecture-specific subroutine. - */ -int -vmm_get_guest_faulttype(void) -{ - if (vmm_softc->mode == VMM_MODE_EPT) - return vmx_get_guest_faulttype(); - else if (vmm_softc->mode == VMM_MODE_RVI) - return vmx_get_guest_faulttype(); - else - panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); -} - -/* - * vmx_get_exit_qualification - * - * Return the current VMCS' exit qualification information - */ -int -vmx_get_exit_qualification(uint32_t *exit_qualification) -{ - if (vmread(VMCS_GUEST_EXIT_QUALIFICATION, exit_qualification)) { - printf("%s: cant extract exit qual\n", __func__); - return (EINVAL); - } - - return (0); -} - -/* - * vmx_get_guest_faulttype - * - * Determines the type (R/W/X) of the last fault on the VCPU last run on - * this PCPU. - */ -int -vmx_get_guest_faulttype(void) -{ - uint32_t exit_qualification; - uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE | - IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE; - uint64_t protmask = IA32_VMX_EPT_FAULT_READ | - IA32_VMX_EPT_FAULT_WRITE | IA32_VMX_EPT_FAULT_EXEC; - - if (vmx_get_exit_qualification(&exit_qualification)) - return (-1); - - if ((exit_qualification & presentmask) == 0) - return VM_FAULT_INVALID; - if (exit_qualification & protmask) - return VM_FAULT_PROTECT; - return (-1); -} - -/* - * svm_get_guest_faulttype - * - * Determines the type (R/W/X) of the last fault on the VCPU last run on - * this PCPU. - */ -int -svm_get_guest_faulttype(void) -{ - /* XXX removed due to rot */ - return (-1); -} - -/* - * vmx_fault_page - * - * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' - * at address 'gpa'. - */ -int -vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) -{ - int fault_type, ret; - - fault_type = vmx_get_guest_faulttype(); - if (fault_type == -1) { - printf("%s: invalid fault type\n", __func__); - return (EINVAL); - } - - ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type, - PROT_READ | PROT_WRITE | PROT_EXEC); - - if (ret) - printf("%s: uvm_fault returns %d, GPA=0x%llx, eip=0x%x\n", - __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_eip); - - return (ret); -} - -/* - * vmx_handle_np_fault - * - * High level nested paging handler for VMX. Verifies that a fault is for a - * valid memory region, then faults a page, or aborts otherwise. - */ -int -vmx_handle_np_fault(struct vcpu *vcpu) -{ - uint64_t gpa; - uint32_t gpa_lo, gpa_hi; - int gpa_memtype, ret; - - ret = 0; - if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa_lo)) { - printf("%s: cannot extract faulting pa lo\n", __func__); - return (EINVAL); - } - - if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS_HI, &gpa_hi)) { - printf("%s: cannot extract faulting pa hi\n", __func__); - return (EINVAL); - } - - gpa = (uint64_t)gpa_lo | (uint64_t)gpa_hi << 32ULL; - - gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); - switch (gpa_memtype) { - case VMM_MEM_TYPE_REGULAR: - ret = vmx_fault_page(vcpu, gpa); - break; - default: - printf("unknown memory type %d for GPA 0x%llx\n", - gpa_memtype, gpa); - return (EINVAL); - } - - return (ret); -} - -/* - * vmx_handle_inout - * - * Exit handler for IN/OUT instructions. - * - * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these - * will be passed to vmd for completion. - */ -int -vmx_handle_inout(struct vcpu *vcpu) -{ - uint32_t insn_length, exit_qual; - int ret; - - if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { - printf("%s: can't obtain instruction length\n", __func__); - return (EINVAL); - } - - if (vmx_get_exit_qualification(&exit_qual)) { - printf("%s: can't get exit qual\n", __func__); - return (EINVAL); - } - - /* Bits 0:2 - size of exit */ - vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1; - /* Bit 3 - direction */ - vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x8) >> 3; - /* Bit 4 - string instruction? */ - vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4; - /* Bit 5 - REP prefix? */ - vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5; - /* Bit 6 - Operand encoding */ - vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6; - /* Bit 16:31 - port */ - vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; - /* Data */ - vcpu->vc_exit.vei.vei_data = vcpu->vc_gueststate.vg_eax; - - vcpu->vc_gueststate.vg_eip += insn_length; - - /* - * The following ports usually belong to devices owned by vmd. - * Return EAGAIN to signal help needed from userspace (vmd). - * Return 0 to indicate we don't care about this port. - * - * XXX something better than a hardcoded list here, maybe - * configure via vmd via the device list in vm create params? - * - * XXX handle not eax target - */ - switch (vcpu->vc_exit.vei.vei_port) { - case IO_ICU1 ... IO_ICU1 + 1: - case 0x40 ... 0x43: - case PCKBC_AUX: - case IO_RTC ... IO_RTC + 1: - case IO_ICU2 ... IO_ICU2 + 1: - case 0x3f8 ... 0x3ff: - case ELCR0 ... ELCR1: - case 0xcf8: - case 0xcfc ... 0xcff: - case 0x500 ... 0x50f: - case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: - ret = EAGAIN; - break; - default: - /* Read from unsupported ports returns FFs */ - if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) { - if (vcpu->vc_exit.vei.vei_size == 4) - vcpu->vc_gueststate.vg_eax |= 0xFFFFFFFF; - else if (vcpu->vc_exit.vei.vei_size == 2) - vcpu->vc_gueststate.vg_eax |= 0xFFFF; - else if (vcpu->vc_exit.vei.vei_size == 1) - vcpu->vc_gueststate.vg_eax |= 0xFF; - } - ret = 0; - } - - return (ret); -} - -/* - * vmx_load_pdptes - * - * Update the PDPTEs in the VMCS with the values currently indicated by the - * guest CR3. This is used for 32-bit PAE guests when enabling paging. - * - * Parameters - * vcpu: The vcpu whose PDPTEs should be loaded - * - * Return values: - * 0: if successful - * EINVAL: if the PDPTEs could not be loaded - * ENOMEM: memory allocation failure - */ -int -vmx_load_pdptes(struct vcpu *vcpu) -{ - uint32_t cr3, cr3_host_phys; - vaddr_t cr3_host_virt; - pd_entry_t *pdptes; - int ret; - - if (vmread(VMCS_GUEST_IA32_CR3, &cr3)) { - printf("%s: can't read guest cr3\n", __func__); - return (EINVAL); - } - - if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3, - (paddr_t *)&cr3_host_phys)) { - DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n", - __func__); - if (vmwrite(VMCS_GUEST_PDPTE0, 0)) { - printf("%s: can't write guest PDPTE0\n", __func__); - return (EINVAL); - } - - if (vmwrite(VMCS_GUEST_PDPTE1, 0)) { - printf("%s: can't write guest PDPTE1\n", __func__); - return (EINVAL); - } - - if (vmwrite(VMCS_GUEST_PDPTE2, 0)) { - printf("%s: can't write guest PDPTE2\n", __func__); - return (EINVAL); - } - - if (vmwrite(VMCS_GUEST_PDPTE3, 0)) { - printf("%s: can't write guest PDPTE3\n", __func__); - return (EINVAL); - } - return (0); - } - - ret = 0; - - cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_waitok); - if (!cr3_host_virt) { - printf("%s: can't allocate address for guest CR3 mapping\n", - __func__); - return (ENOMEM); - } - - pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ); - - pdptes = (pd_entry_t *)cr3_host_virt; - if (vmwrite(VMCS_GUEST_PDPTE0, pdptes[0])) { - printf("%s: can't write guest PDPTE0\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_PDPTE1, pdptes[1])) { - printf("%s: can't write guest PDPTE1\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_PDPTE2, pdptes[2])) { - printf("%s: can't write guest PDPTE2\n", __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_PDPTE3, pdptes[3])) { - printf("%s: can't write guest PDPTE3\n", __func__); - ret = EINVAL; - goto exit; - } - -exit: - pmap_kremove(cr3_host_virt, PAGE_SIZE); - km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none); - return (ret); -} - -/* - * vmx_handle_cr0_write - * - * Write handler for CR0. This function ensures valid values are written into - * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc). - * - * Parameters - * vcpu: The vcpu taking the cr0 write exit - * r: The guest's desired (incoming) cr0 value - * - * Return values: - * 0: if succesful - * EINVAL: if an error occurred - */ -int -vmx_handle_cr0_write(struct vcpu *vcpu, uint32_t r) -{ - struct vmx_msr_store *msr_store; - uint32_t ectls, oldcr0, cr4, mask; - int ret; - - /* Check must-be-0 bits */ - mask = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); - if (r & mask) { - /* Inject #GP, let the guest handle it */ - DPRINTF("%s: guest set invalid bits in %%cr0. Zeros " - "mask=0x%llx, data=0x%x\n", __func__, - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1, - r); - vmm_inject_gp(vcpu); - return (0); - } - - /* Check must-be-1 bits */ - mask = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0; - if ((r & mask) != mask) { - /* Inject #GP, let the guest handle it */ - DPRINTF("%s: guest set invalid bits in %%cr0. Ones " - "mask=0x%llx, data=0x%x\n", __func__, - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0, - r); - vmm_inject_gp(vcpu); - return (0); - } - - if (vmread(VMCS_GUEST_IA32_CR0, &oldcr0)) { - printf("%s: can't read guest cr0\n", __func__); - return (EINVAL); - } - - if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) { - printf("%s: can't read guest cr4\n", __func__); - return (EINVAL); - } - - /* CR0 must always have NE set */ - r |= CR0_NE; - - if (vmwrite(VMCS_GUEST_IA32_CR0, r)) { - printf("%s: can't write guest cr0\n", __func__); - return (EINVAL); - } - - /* If the guest hasn't enabled paging, nothing more to do. */ - if (!(r & CR0_PG)) - return (0); - - /* - * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST - * control must be set to the same as EFER_LME. - */ - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - - if (vmread(VMCS_ENTRY_CTLS, &ectls)) { - printf("%s: can't read entry controls", __func__); - return (EINVAL); - } - - if (msr_store[VCPU_REGS_EFER].vms_data & EFER_LME) - ectls |= IA32_VMX_IA32E_MODE_GUEST; - else - ectls &= ~IA32_VMX_IA32E_MODE_GUEST; - - if (vmwrite(VMCS_ENTRY_CTLS, ectls)) { - printf("%s: can't write entry controls", __func__); - return (EINVAL); - } - - /* Load PDPTEs if PAE guest enabling paging */ - if (!(oldcr0 & CR0_PG) && (r & CR0_PG) && (cr4 & CR4_PAE)) { - ret = vmx_load_pdptes(vcpu); - - if (ret) { - printf("%s: updating PDPTEs failed\n", __func__); - return (ret); - } - } - - return (0); -} - -/* - * vmx_handle_cr4_write - * - * Write handler for CR4. This function ensures valid values are written into - * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc). - * - * Parameters - * vcpu: The vcpu taking the cr4 write exit - * r: The guest's desired (incoming) cr4 value - * - * Return values: - * 0: if succesful - * EINVAL: if an error occurred - */ -int -vmx_handle_cr4_write(struct vcpu *vcpu, uint32_t r) -{ - uint64_t mask; - - /* Check must-be-0 bits */ - mask = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); - if (r & mask) { - /* Inject #GP, let the guest handle it */ - DPRINTF("%s: guest set invalid bits in %%cr4. Zeros " - "mask=0x%llx, data=0x%x\n", __func__, - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1, - r); - vmm_inject_gp(vcpu); - return (0); - } - - /* Check must-be-1 bits */ - mask = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0; - if ((r & mask) != mask) { - /* Inject #GP, let the guest handle it */ - DPRINTF("%s: guest set invalid bits in %%cr4. Ones " - "mask=0x%llx, data=0x%x\n", __func__, - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0, - r); - vmm_inject_gp(vcpu); - return (0); - } - - /* CR4_VMXE must always be enabled */ - r |= CR4_VMXE; - - if (vmwrite(VMCS_GUEST_IA32_CR4, r)) { - printf("%s: can't write guest cr4\n", __func__); - return (EINVAL); - } - - return (0); -} - -/* - * vmx_handle_cr - * - * Handle reads/writes to control registers (except CR3) - */ -int -vmx_handle_cr(struct vcpu *vcpu) -{ - uint32_t insn_length, exit_qual, r; - uint8_t crnum, dir, reg; - - if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { - printf("%s: can't obtain instruction length\n", __func__); - return (EINVAL); - } - - if (vmx_get_exit_qualification(&exit_qual)) { - printf("%s: can't get exit qual\n", __func__); - return (EINVAL); - } - - /* Low 4 bits of exit_qual represent the CR number */ - crnum = exit_qual & 0xf; - - /* - * Bits 5:4 indicate the direction of operation (or special CR-modifying - * instruction - */ - dir = (exit_qual & 0x30) >> 4; - - /* Bits 11:8 encode the source/target register */ - reg = (exit_qual & 0xf00) >> 8; - - switch (dir) { - case CR_WRITE: - if (crnum == 0 || crnum == 4) { - switch (reg) { - case 0: r = vcpu->vc_gueststate.vg_eax; break; - case 1: r = vcpu->vc_gueststate.vg_ecx; break; - case 2: r = vcpu->vc_gueststate.vg_edx; break; - case 3: r = vcpu->vc_gueststate.vg_ebx; break; - case 4: if (vmread(VMCS_GUEST_IA32_RSP, &r)) { - printf("%s: unable to read guest " - "RSP\n", __func__); - return (EINVAL); - } - break; - case 5: r = vcpu->vc_gueststate.vg_ebp; break; - case 6: r = vcpu->vc_gueststate.vg_esi; break; - case 7: r = vcpu->vc_gueststate.vg_edi; break; - } - DPRINTF("%s: mov to cr%d @ %x, data=0x%x\n", - __func__, crnum, vcpu->vc_gueststate.vg_eip, r); - } - - if (crnum == 0) - vmx_handle_cr0_write(vcpu, r); - - if (crnum == 4) - vmx_handle_cr4_write(vcpu, r); - - break; - case CR_READ: - DPRINTF("%s: mov from cr%d @ %x\n", __func__, - crnum, vcpu->vc_gueststate.vg_eip); - break; - case CR_CLTS: - DPRINTF("%s: clts instruction @ %x\n", __func__, - vcpu->vc_gueststate.vg_eip); - break; - case CR_LMSW: - DPRINTF("%s: lmsw instruction @ %x\n", __func__, - vcpu->vc_gueststate.vg_eip); - break; - default: - DPRINTF("%s: unknown cr access @ %x\n", __func__, - vcpu->vc_gueststate.vg_eip); - } - - vcpu->vc_gueststate.vg_eip += insn_length; - - return (0); -} - -/* - * vmx_handle_rdmsr - * - * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access - * and won't end up here. This handler is primarily intended to catch otherwise - * unknown MSR access for possible later inclusion in the bitmap list. For - * each MSR access that ends up here, we log the access (when VMM_DEBUG is - * enabled) - * - * Parameters: - * vcpu: vcpu structure containing instruction info causing the exit - * - * Return value: - * 0: The operation was successful - * EINVAL: An error occurred - */ -int -vmx_handle_rdmsr(struct vcpu *vcpu) -{ - uint32_t insn_length; - uint32_t *eax, *edx; -#ifdef VMM_DEBUG - uint32_t *ecx; -#endif /* VMM_DEBUG */ - - if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { - printf("%s: can't obtain instruction length\n", __func__); - return (EINVAL); - } - - if (insn_length != 2) { - DPRINTF("%s: RDMSR with instruction length %d not " - "supported\n", __func__, insn_length); - return (EINVAL); - } - - eax = &vcpu->vc_gueststate.vg_eax; - edx = &vcpu->vc_gueststate.vg_edx; - - *eax = 0; - *edx = 0; - -#ifdef VMM_DEBUG - /* Log the access, to be able to identify unknown MSRs */ - ecx = &vcpu->vc_gueststate.vg_ecx; - DPRINTF("%s: rdmsr exit, msr=0x%x, data returned to " - "guest=0x%x:0x%x\n", __func__, *ecx, *edx, *eax); -#endif /* VMM_DEBUG */ - - vcpu->vc_gueststate.vg_eip += insn_length; - - return (0); -} - -/* - * vmx_handle_misc_enable_msr - * - * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We - * limit what the guest can write to this MSR (certain hardware-related - * settings like speedstep, etc). - * - * Parameters: - * vcpu: vcpu structure containing information about the wrmsr causing this - * exit - */ -void -vmx_handle_misc_enable_msr(struct vcpu *vcpu) -{ - uint32_t *eax, *edx; - struct vmx_msr_store *msr_store; - - eax = &vcpu->vc_gueststate.vg_eax; - edx = &vcpu->vc_gueststate.vg_edx; - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - - /* Filter out guest writes to TCC, EIST, and xTPR */ - *eax &= ~(MISC_ENABLE_TCC | MISC_ENABLE_EIST_ENABLED | - MISC_ENABLE_xTPR_MESSAGE_DISABLE); - - msr_store[VCPU_REGS_MISC_ENABLE].vms_data = *eax | (((uint64_t) *edx << 32)); -} - -/* - * vmx_handle_wrmsr - * - * Handler for wrmsr instructions. This handler logs the access, and discards - * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end - * up here (it will be whitelisted in the MSR bitmap). - * - * Parameters: - * vcpu: vcpu structure containing instruction info causing the exit - * - * Return value: - * 0: The operation was successful - * 1: An error occurred - */ -int -vmx_handle_wrmsr(struct vcpu *vcpu) -{ - uint32_t insn_length; - uint32_t *eax, *ecx, *edx; - - if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { - printf("%s: can't obtain instruction length\n", __func__); - return (EINVAL); - } - - if (insn_length != 2) { - DPRINTF("%s: WRMSR with instruction length %d not " - "supported\n", __func__, insn_length); - return (EINVAL); - } - - eax = &vcpu->vc_gueststate.vg_eax; - ecx = &vcpu->vc_gueststate.vg_ecx; - edx = &vcpu->vc_gueststate.vg_edx; - - switch (*ecx) { - case MSR_MISC_ENABLE: - vmx_handle_misc_enable_msr(vcpu); - break; -#ifdef VMM_DEBUG - default: - /* - * Log the access, to be able to identify unknown MSRs - */ - DPRINTF("%s: wrmsr exit, msr=0x%x, discarding data " - "written from guest=0x%x:0x%x\n", __func__, - *ecx, *edx, *eax); -#endif /* VMM_DEBUG */ - } - - vcpu->vc_gueststate.vg_eip += insn_length; - - return (0); -} - -/* - * vmm_handle_cpuid - * - * Exit handler for CPUID instruction - * - * Parameters: - * vcpu: vcpu causing the CPUID exit - * - * Return value: - * 0: the exit was processed successfully - * EINVAL: error occurred validating the CPUID instruction arguments - */ -int -vmm_handle_cpuid(struct vcpu *vcpu) -{ - uint32_t insn_length; - uint32_t *eax, *ebx, *ecx, *edx, cpuid_limit; - uint32_t new_eax, new_ebx, new_ecx, new_edx; - struct vmx_msr_store *msr_store; - - if (vmm_softc->mode == VMM_MODE_VMX || - vmm_softc->mode == VMM_MODE_EPT) { - if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { - DPRINTF("%s: can't obtain instruction length\n", - __func__); - return (EINVAL); - } - - eax = &vcpu->vc_gueststate.vg_eax; - msr_store = - (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - cpuid_limit = msr_store[VCPU_REGS_MISC_ENABLE].vms_data & - MISC_ENABLE_LIMIT_CPUID_MAXVAL; - } - - eax = &vcpu->vc_gueststate.vg_eax; - ebx = &vcpu->vc_gueststate.vg_ebx; - ecx = &vcpu->vc_gueststate.vg_ecx; - edx = &vcpu->vc_gueststate.vg_edx; - - /* - * "CPUID leaves above 02H and below 80000000H are only visible when - * IA32_MISC_ENABLE MSR has bit 22 set to its default value 0" - */ - if ((vmm_softc->mode == VMM_MODE_VMX || vmm_softc->mode == VMM_MODE_EPT) - && cpuid_limit && (*eax > 0x02 && *eax < 0x80000000)) { - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - return (0); - } - - switch (*eax) { - case 0x00: /* Max level and vendor ID */ - *eax = 0x07; /* cpuid_level */ - *ebx = *((uint32_t *)&cpu_vendor); - *edx = *((uint32_t *)&cpu_vendor + 1); - *ecx = *((uint32_t *)&cpu_vendor + 2); - break; - case 0x01: /* Version, brand, feature info */ - *eax = cpu_id; - /* mask off host's APIC ID, reset to vcpu id */ - *ebx = cpu_miscinfo & 0x00FFFFFF; - *ebx |= (vcpu->vc_id & 0xFF) << 24; - *ecx = (cpu_ecxfeature | CPUIDECX_HV) & - VMM_CPUIDECX_MASK; - *edx = curcpu()->ci_feature_flags & - VMM_CPUIDEDX_MASK; - break; - case 0x02: /* Cache and TLB information */ - DPRINTF("%s: function 0x02 (cache/TLB) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x03: /* Processor serial number (not supported) */ - DPRINTF("%s: function 0x03 (processor serial number) not " - "supported\n", __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x04: - if (*ecx == 0) { - CPUID_LEAF(*eax, 0, new_eax, new_ebx, new_ecx, new_edx); - *eax = new_eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK; - *ebx = new_ebx; - *ecx = new_ecx; - *edx = new_edx; - } else { - CPUID_LEAF(*eax, *ecx, new_eax, new_ebx, new_ecx, new_edx); - *eax = new_eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK; - *ebx = new_ebx; - *ecx = new_ecx; - *edx = new_edx; - } - break; - case 0x05: /* MONITOR/MWAIT (not supported) */ - DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x06: /* Thermal / Power management (not supported) */ - DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x07: /* SEFF */ - if (*ecx == 0) { - *eax = 0; /* Highest subleaf supported */ - *ebx = curcpu()->ci_feature_sefflags_ebx & - VMM_SEFF0EBX_MASK; - *ecx = curcpu()->ci_feature_sefflags_ecx & - VMM_SEFF0ECX_MASK; - *edx = 0; - } else { - /* Unsupported subleaf */ - DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf " - "0x%x not supported\n", __func__, *ecx); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - } - break; - case 0x09: /* Direct Cache Access (not supported) */ - DPRINTF("%s: function 0x09 (direct cache access) not " - "supported\n", __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x0a: /* Architectural perf monitoring (not supported) */ - DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x0b: /* Extended topology enumeration (not supported) */ - DPRINTF("%s: function 0x0b (topology enumeration) not " - "supported\n", __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x0d: /* Processor ext. state information (not supported) */ - DPRINTF("%s: function 0x0d (ext. state info) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x0f: /* QoS info (not supported) */ - DPRINTF("%s: function 0x0f (QoS info) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x14: /* Processor Trace info (not supported) */ - DPRINTF("%s: function 0x14 (processor trace info) not " - "supported\n", __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x15: /* TSC / Core Crystal Clock info (not supported) */ - DPRINTF("%s: function 0x15 (TSC / CCC info) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x16: /* Processor frequency info (not supported) */ - DPRINTF("%s: function 0x16 (frequency info) not supported\n", - __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - case 0x40000000: /* Hypervisor information */ - *eax = 0; - *ebx = *((uint32_t *)&vmm_hv_signature[0]); - *ecx = *((uint32_t *)&vmm_hv_signature[4]); - *edx = *((uint32_t *)&vmm_hv_signature[8]); - break; - case 0x80000000: /* Extended function level */ - *eax = 0x80000007; /* curcpu()->ci_pnfeatset */ - *ebx = 0; - *ecx = 0; - *edx = 0; - case 0x80000001: /* Extended function info */ - *eax = ecpu_eaxfeature; - *ebx = 0; /* Reserved */ - *ecx = ecpu_ecxfeature; - *edx = ecpu_feature; - break; - case 0x80000002: /* Brand string */ - *eax = cpu_brandstr[0]; - *ebx = cpu_brandstr[1]; - *ecx = cpu_brandstr[2]; - *edx = cpu_brandstr[3]; - break; - case 0x80000003: /* Brand string */ - *eax = cpu_brandstr[4]; - *ebx = cpu_brandstr[5]; - *ecx = cpu_brandstr[6]; - *edx = cpu_brandstr[7]; - break; - case 0x80000004: /* Brand string */ - *eax = cpu_brandstr[8]; - *ebx = cpu_brandstr[9]; - *ecx = cpu_brandstr[10]; - *edx = cpu_brandstr[11]; - break; - case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */ - *eax = curcpu()->ci_amdcacheinfo[0]; - *ebx = curcpu()->ci_amdcacheinfo[1]; - *ecx = curcpu()->ci_amdcacheinfo[2]; - *edx = curcpu()->ci_amdcacheinfo[3]; - break; - case 0x80000006: /* ext. cache info */ - *eax = curcpu()->ci_extcacheinfo[0]; - *ebx = curcpu()->ci_extcacheinfo[1]; - *ecx = curcpu()->ci_extcacheinfo[2]; - *edx = curcpu()->ci_extcacheinfo[3]; - break; - case 0x80000007: /* apmi */ - *eax = 0; /* Reserved */ - *ebx = 0; /* Reserved */ - *ecx = 0; /* Reserved */ - *edx = 0; /* unsupported ITSC */ - break; - case 0x80000008: /* Phys bits info and topology (AMD) */ - DPRINTF("%s: function 0x80000008 (phys bits info) not " - "supported\n", __func__); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - break; - default: - DPRINTF("%s: unsupported eax=0x%x\n", __func__, *eax); - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; - } - - vcpu->vc_gueststate.vg_eip += insn_length; - - return (0); -} - -/* - * vcpu_run_svm - * - * VMM main loop used to run a VCPU. - */ -int -vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp) -{ - /* XXX removed due to rot */ - return (0); -} - -/* - * vmm_alloc_vpid - * - * Sets the memory location pointed to by "vpid" to the next available VPID - * or ASID. - * - * Parameters: - * vpid: Pointer to location to receive the next VPID/ASID - * - * Return Values: - * 0: The operation completed successfully - * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged. - */ -int -vmm_alloc_vpid(uint16_t *vpid) -{ - uint16_t i; - uint8_t idx, bit; - struct vmm_softc *sc = vmm_softc; - - rw_enter_write(&vmm_softc->vpid_lock); - for (i = 1; i <= sc->max_vpid; i++) { - idx = i / 8; - bit = i - (idx * 8); - - if (!(sc->vpids[idx] & (1 << bit))) { - sc->vpids[idx] |= (1 << bit); - *vpid = i; - DPRINTF("%s: allocated VPID/ASID %d\n", __func__, - i); - rw_exit_write(&vmm_softc->vpid_lock); - return 0; - } - } - - printf("%s: no available %ss\n", __func__, - (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) ? "VPID" : - "ASID"); - - rw_exit_write(&vmm_softc->vpid_lock); - return ENOMEM; -} - -/* - * vmm_free_vpid - * - * Frees the VPID/ASID id supplied in "vpid". - * - * Parameters: - * vpid: VPID/ASID to free. - */ -void -vmm_free_vpid(uint16_t vpid) -{ - uint8_t idx, bit; - struct vmm_softc *sc = vmm_softc; - - rw_enter_write(&vmm_softc->vpid_lock); - idx = vpid / 8; - bit = vpid - (idx * 8); - sc->vpids[idx] &= ~(1 << bit); - - DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid); - rw_exit_write(&vmm_softc->vpid_lock); -} - -/* - * vmx_exit_reason_decode - * - * Returns a human readable string describing exit type 'code' - */ -const char * -vmx_exit_reason_decode(uint32_t code) -{ - switch (code) { - case VMX_EXIT_NMI: return "NMI"; - case VMX_EXIT_EXTINT: return "external interrupt"; - case VMX_EXIT_TRIPLE_FAULT: return "triple fault"; - case VMX_EXIT_INIT: return "INIT signal"; - case VMX_EXIT_SIPI: return "SIPI signal"; - case VMX_EXIT_IO_SMI: return "I/O SMI"; - case VMX_EXIT_OTHER_SMI: return "other SMI"; - case VMX_EXIT_INT_WINDOW: return "interrupt window"; - case VMX_EXIT_NMI_WINDOW: return "NMI window"; - case VMX_EXIT_TASK_SWITCH: return "task switch"; - case VMX_EXIT_CPUID: return "CPUID instruction"; - case VMX_EXIT_GETSEC: return "GETSEC instruction"; - case VMX_EXIT_HLT: return "HLT instruction"; - case VMX_EXIT_INVD: return "INVD instruction"; - case VMX_EXIT_INVLPG: return "INVLPG instruction"; - case VMX_EXIT_RDPMC: return "RDPMC instruction"; - case VMX_EXIT_RDTSC: return "RDTSC instruction"; - case VMX_EXIT_RSM: return "RSM instruction"; - case VMX_EXIT_VMCALL: return "VMCALL instruction"; - case VMX_EXIT_VMCLEAR: return "VMCLEAR instruction"; - case VMX_EXIT_VMLAUNCH: return "VMLAUNCH instruction"; - case VMX_EXIT_VMPTRLD: return "VMPTRLD instruction"; - case VMX_EXIT_VMPTRST: return "VMPTRST instruction"; - case VMX_EXIT_VMREAD: return "VMREAD instruction"; - case VMX_EXIT_VMRESUME: return "VMRESUME instruction"; - case VMX_EXIT_VMWRITE: return "VMWRITE instruction"; - case VMX_EXIT_VMXOFF: return "VMXOFF instruction"; - case VMX_EXIT_VMXON: return "VMXON instruction"; - case VMX_EXIT_CR_ACCESS: return "CR access"; - case VMX_EXIT_MOV_DR: return "MOV DR instruction"; - case VMX_EXIT_IO: return "I/O instruction"; - case VMX_EXIT_RDMSR: return "RDMSR instruction"; - case VMX_EXIT_WRMSR: return "WRMSR instruction"; - case VMX_EXIT_ENTRY_FAILED_GUEST_STATE: return "guest state invalid"; - case VMX_EXIT_ENTRY_FAILED_MSR_LOAD: return "MSR load failed"; - case VMX_EXIT_MWAIT: return "MWAIT instruction"; - case VMX_EXIT_MTF: return "monitor trap flag"; - case VMX_EXIT_MONITOR: return "MONITOR instruction"; - case VMX_EXIT_PAUSE: return "PAUSE instruction"; - case VMX_EXIT_ENTRY_FAILED_MCE: return "MCE during entry"; - case VMX_EXIT_TPR_BELOW_THRESHOLD: return "TPR below threshold"; - case VMX_EXIT_APIC_ACCESS: return "APIC access"; - case VMX_EXIT_VIRTUALIZED_EOI: return "virtualized EOI"; - case VMX_EXIT_GDTR_IDTR: return "GDTR/IDTR access"; - case VMX_EXIT_LDTR_TR: return "LDTR/TR access"; - case VMX_EXIT_EPT_VIOLATION: return "EPT violation"; - case VMX_EXIT_EPT_MISCONFIGURATION: return "EPT misconfiguration"; - case VMX_EXIT_INVEPT: return "INVEPT instruction"; - case VMX_EXIT_RDTSCP: return "RDTSCP instruction"; - case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED: - return "preemption timer expired"; - case VMX_EXIT_INVVPID: return "INVVPID instruction"; - case VMX_EXIT_WBINVD: return "WBINVD instruction"; - case VMX_EXIT_XSETBV: return "XSETBV instruction"; - case VMX_EXIT_APIC_WRITE: return "APIC write"; - case VMX_EXIT_RDRAND: return "RDRAND instruction"; - case VMX_EXIT_INVPCID: return "INVPCID instruction"; - case VMX_EXIT_VMFUNC: return "VMFUNC instruction"; - case VMX_EXIT_RDSEED: return "RDSEED instruction"; - case VMX_EXIT_XSAVES: return "XSAVES instruction"; - case VMX_EXIT_XRSTORS: return "XRSTORS instruction"; - default: return "unknown"; - } -} - -/* - * vmx_instruction_error_decode - * - * Returns a human readable string describing the instruction error in 'code' - */ -const char * -vmx_instruction_error_decode(uint32_t code) -{ - switch (code) { - case 1: return "VMCALL: unsupported in VMX root"; - case 2: return "VMCLEAR: invalid paddr"; - case 3: return "VMCLEAR: VMXON pointer"; - case 4: return "VMLAUNCH: non-clear VMCS"; - case 5: return "VMRESUME: non-launched VMCS"; - case 6: return "VMRESUME: executed after VMXOFF"; - case 7: return "VM entry: invalid control field(s)"; - case 8: return "VM entry: invalid host state field(s)"; - case 9: return "VMPTRLD: invalid paddr"; - case 10: return "VMPTRLD: VMXON pointer"; - case 11: return "VMPTRLD: incorrect VMCS revid"; - case 12: return "VMREAD/VMWRITE: unsupported VMCS field"; - case 13: return "VMWRITE: RO VMCS field"; - case 15: return "VMXON: unsupported in VMX root"; - case 20: return "VMCALL: invalid VM exit control fields"; - case 26: return "VM entry: blocked by MOV SS"; - case 28: return "Invalid operand to INVEPT/INVVPID"; - default: return "unknown"; - } -} - -/* - * vcpu_state_decode - * - * Returns a human readable string describing the vcpu state in 'state'. - */ -const char * -vcpu_state_decode(u_int state) -{ - switch (state) { - case VCPU_STATE_STOPPED: return "stopped"; - case VCPU_STATE_RUNNING: return "running"; - case VCPU_STATE_REQTERM: return "requesting termination"; - case VCPU_STATE_TERMINATED: return "terminated"; - case VCPU_STATE_UNKNOWN: return "unknown"; - default: return "invalid"; - } -} - -#ifdef VMM_DEBUG -/* - * dump_vcpu - * - * Dumps the VMX capabilites of vcpu 'vcpu' - */ -void -dump_vcpu(struct vcpu *vcpu) -{ - printf("vcpu @ %p\n", vcpu); - printf(" parent vm @ %p\n", vcpu->vc_parent); - printf(" mode: "); - if (vcpu->vc_virt_mode == VMM_MODE_VMX || - vcpu->vc_virt_mode == VMM_MODE_EPT) { - printf("VMX\n"); - printf(" pinbased ctls: 0x%llx\n", - vcpu->vc_vmx_pinbased_ctls); - printf(" true pinbased ctls: 0x%llx\n", - vcpu->vc_vmx_true_pinbased_ctls); - CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING); - CTRL_DUMP(vcpu, PINBASED, NMI_EXITING); - CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS); - CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER); - CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS); - printf(" procbased ctls: 0x%llx\n", - vcpu->vc_vmx_procbased_ctls); - printf(" true procbased ctls: 0x%llx\n", - vcpu->vc_vmx_true_procbased_ctls); - CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING); - CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING); - CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING); - CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING); - CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING); - CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING); - CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING); - CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING); - CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING); - CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING); - CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING); - CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW); - CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING); - CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING); - CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING); - CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS); - CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG); - CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS); - CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING); - CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING); - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { - printf(" procbased2 ctls: 0x%llx\n", - vcpu->vc_vmx_procbased2_ctls); - CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT); - CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP); - CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID); - CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING); - CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST); - CTRL_DUMP(vcpu, PROCBASED2, - APIC_REGISTER_VIRTUALIZATION); - CTRL_DUMP(vcpu, PROCBASED2, - VIRTUAL_INTERRUPT_DELIVERY); - CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING); - CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS); - CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING); - CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML); - CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE); - CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS); - CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING); - } - printf(" entry ctls: 0x%llx\n", - vcpu->vc_vmx_entry_ctls); - printf(" true entry ctls: 0x%llx\n", - vcpu->vc_vmx_true_entry_ctls); - CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS); - CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST); - CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM); - CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT); - CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY); - CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY); - CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY); - CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY); - CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT); - printf(" exit ctls: 0x%llx\n", - vcpu->vc_vmx_exit_ctls); - printf(" true exit ctls: 0x%llx\n", - vcpu->vc_vmx_true_exit_ctls); - CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS); - CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE); - CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER); - CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT); - CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT); - } -} - -/* - * vmx_dump_vmcs_field - * - * Debug function to dump the contents of a single VMCS field - * - * Parameters: - * fieldid: VMCS Field ID - * msg: string to display - */ -void -vmx_dump_vmcs_field(uint16_t fieldid, const char *msg) -{ - uint8_t width; - uint64_t val; - uint32_t val_lo, val_hi; - - DPRINTF("%s (0x%04x): ", msg, fieldid); - width = (fieldid >> 13) & 0x3; - - if (width == 1) { - if (vmread(fieldid, &val_lo)) { - DPRINTF("???? "); - return; - } - if (vmread(fieldid + 1, &val_hi)) { - DPRINTF("???? "); - return; - } - - val = (uint64_t)val_lo | (uint64_t)val_hi << 32ULL; - } - - /* - * Field width encoding : bits 13:14 - * - * 0: 16-bit - * 1: 64-bit - * 2: 32-bit - * 3: natural width - */ - switch (width) { - case 0: DPRINTF("0x%04llx ", val); break; - case 1: - case 3: DPRINTF("0x%016llx ", val); break; - case 2: DPRINTF("0x%08llx ", val); - } -} - -/* - * vmx_dump_vmcs - * - * Debug function to dump the contents of the current VMCS. - */ -void -vmx_dump_vmcs(struct vcpu *vcpu) -{ - int has_sec, i; - uint32_t cr3_tgt_ct; - - /* XXX save and load new vmcs, restore at end */ - - DPRINTF("--CURRENT VMCS STATE--\n"); - DPRINTF("VMXON revision : 0x%x\n", - curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision); - DPRINTF("CR0 fixed0: 0x%llx\n", - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0); - DPRINTF("CR0 fixed1: 0x%llx\n", - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); - DPRINTF("CR4 fixed0: 0x%llx\n", - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0); - DPRINTF("CR4 fixed1: 0x%llx\n", - curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); - DPRINTF("MSR table size: 0x%x\n", - 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1)); - - has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1); - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_VPID, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_VPID, "VPID"); - } - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS, - IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) { - vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR, - "Posted Int Notif Vec"); - } - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_EPT_VIOLATION_VE, 1)) { - vmx_dump_vmcs_field(VMCS_EPTP_INDEX, "EPTP idx"); - } - } - - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL, "G.ES"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL, "G.CS"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL, "G.SS"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL, "G.DS"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL, "G.FS"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL, "G.GS"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL, "LDTR"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL, "G.TR"); - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS, - "Int sts"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_PML, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX, "PML Idx"); - } - } - - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL, "H.ES"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL, "H.CS"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL, "H.SS"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL, "H.DS"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL, "H.FS"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL, "H.GS"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_IO_BITMAP_A, "I/O Bitmap A"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_IO_BITMAP_B, "I/O Bitmap B"); - DPRINTF("\n"); - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_USE_MSR_BITMAPS, 1)) { - vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS, "MSR Bitmap"); - DPRINTF("\n"); - } - - vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS, "Exit Store MSRs"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS, "Exit Load MSRs"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS, "Entry Load MSRs"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER, "Exec VMCS Ptr"); - DPRINTF("\n"); - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_PML, 1)) { - vmx_dump_vmcs_field(VMCS_PML_ADDRESS, "PML Addr"); - DPRINTF("\n"); - } - } - - vmx_dump_vmcs_field(VMCS_TSC_OFFSET, "TSC Offset"); - DPRINTF("\n"); - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_USE_TPR_SHADOW, 1)) { - vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS, - "Virtual APIC Addr"); - DPRINTF("\n"); - } - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_VIRTUALIZE_APIC, 1)) { - vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS, - "APIC Access Addr"); - DPRINTF("\n"); - } - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS, - IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) { - vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC, - "Posted Int Desc Addr"); - DPRINTF("\n"); - } - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_VM_FUNCTIONS, 1)) { - vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS, - "VM Function Controls"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_EPT, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP, - "EPT Pointer"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) { - vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_0, - "EOI Exit Bitmap 0"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_1, - "EOI Exit Bitmap 1"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_2, - "EOI Exit Bitmap 2"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_3, - "EOI Exit Bitmap 3"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_VM_FUNCTIONS, 1)) { - /* We assume all CPUs have the same VMFUNC caps */ - if (curcpu()->ci_vmm_cap.vcc_vmx.vmx_vm_func & 0x1) { - vmx_dump_vmcs_field(VMCS_EPTP_LIST_ADDRESS, - "EPTP List Addr"); - DPRINTF("\n"); - } - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_VMCS_SHADOWING, 1)) { - vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS, - "VMREAD Bitmap Addr"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS, - "VMWRITE Bitmap Addr"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_EPT_VIOLATION_VE, 1)) { - vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS, - "#VE Addr"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_XSAVES_XRSTORS, 1)) { - vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP, - "XSS exiting bitmap addr"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_ENCLS_EXITING, 1)) { - vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP, - "Encls exiting bitmap addr"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_TSC_SCALING, 1)) { - vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER, - "TSC scaling factor"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_EPT, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS, - "Guest PA"); - DPRINTF("\n"); - } - } - - vmx_dump_vmcs_field(VMCS_LINK_POINTER, "VMCS Link Pointer"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL, "Guest DEBUGCTL"); - DPRINTF("\n"); - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, - IA32_VMX_LOAD_IA32_PAT_ON_ENTRY, 1) || - vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, - IA32_VMX_SAVE_IA32_PAT_ON_EXIT, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT, - "Guest PAT"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, - IA32_VMX_LOAD_IA32_EFER_ON_ENTRY, 1) || - vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, - IA32_VMX_SAVE_IA32_EFER_ON_EXIT, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER, - "Guest EFER"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, - IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL, - "Guest Perf Global Ctrl"); - DPRINTF("\n"); - } - - if (has_sec) { - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_ENABLE_EPT, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_PDPTE0, "Guest PDPTE0"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_PDPTE1, "Guest PDPTE1"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_PDPTE2, "Guest PDPTE2"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_PDPTE3, "Guest PDPTE3"); - DPRINTF("\n"); - } - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, - IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY, 1) || - vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, - IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT, 1)) { - vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS, - "Guest BNDCFGS"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, - IA32_VMX_LOAD_IA32_PAT_ON_EXIT, 1)) { - vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT, - "Host PAT"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, - IA32_VMX_LOAD_IA32_EFER_ON_EXIT, 1)) { - vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER, - "Host EFER"); - DPRINTF("\n"); - } - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, - IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT, 1)) { - vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL, - "Host Perf Global Ctrl"); - DPRINTF("\n"); - } - - vmx_dump_vmcs_field(VMCS_PINBASED_CTLS, "Pinbased Ctrls"); - vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS, "Procbased Ctrls"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP, "Exception Bitmap"); - vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK, "#PF Err Code Mask"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH, "#PF Err Code Match"); - vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT, "CR3 Tgt Count"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EXIT_CTLS, "Exit Ctrls"); - vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT, "Exit MSR Store Ct"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT, "Exit MSR Load Ct"); - vmx_dump_vmcs_field(VMCS_ENTRY_CTLS, "Entry Ctrls"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT, "Entry MSR Load Ct"); - vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO, "Entry Int. Info"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE, - "Entry Ex. Err Code"); - vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH, "Entry Insn Len"); - DPRINTF("\n"); - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, - IA32_VMX_USE_TPR_SHADOW, 1)) { - vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD, "TPR Threshold"); - DPRINTF("\n"); - } - - if (has_sec) { - vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS, "2ndary Ctrls"); - DPRINTF("\n"); - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, - IA32_VMX_PAUSE_LOOP_EXITING, 1)) { - vmx_dump_vmcs_field(VMCS_PLE_GAP, "PLE Gap"); - vmx_dump_vmcs_field(VMCS_PLE_WINDOW, "PLE Window"); - } - DPRINTF("\n"); - } - - vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR, "Insn Error"); - vmx_dump_vmcs_field(VMCS_EXIT_REASON, "Exit Reason"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO, "Exit Int. Info"); - vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE, - "Exit Int. Err Code"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO, "IDT vect info"); - vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE, - "IDT vect err code"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH, "Insn Len"); - vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO, "Exit Insn Info"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT, "G. ES Lim"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT, "G. CS Lim"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT, "G. SS Lim"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT, "G. DS Lim"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT, "G. FS Lim"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT, "G. GS Lim"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT, "G. LDTR Lim"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT, "G. TR Lim"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT, "G. GDTR Lim"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT, "G. IDTR Lim"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR, "G. ES AR"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR, "G. CS AR"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR, "G. SS AR"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR, "G. DS AR"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR, "G. FS AR"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR, "G. GS AR"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR, "G. LDTR AR"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR, "G. TR AR"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST, "G. Int St."); - vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE, "G. Act St."); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_GUEST_SMBASE, "G. SMBASE"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS, "G. SYSENTER CS"); - DPRINTF("\n"); - - if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS, - IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER, 1)) { - vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL, - "VMX Preempt Timer"); - DPRINTF("\n"); - } - - vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS, "H. SYSENTER CS"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_CR0_MASK, "CR0 Mask"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_CR4_MASK, "CR4 Mask"); - DPRINTF("\n"); - - vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW, "CR0 RD Shadow"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW, "CR4 RD Shadow"); - DPRINTF("\n"); - - /* We assume all CPUs have the same max CR3 target ct */ - cr3_tgt_ct = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count; - DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct); - if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS) { - for (i = 0 ; i < cr3_tgt_ct; i++) { - vmx_dump_vmcs_field(VMCS_CR3_TARGET_0 + (2 * i), - "CR3 Target"); - DPRINTF("\n"); - } - } else { - DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS); - } - - vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION, "G. Exit Qual"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_IO_RCX, "I/O RCX"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_IO_RSI, "I/O RSI"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_IO_RDI, "I/O RDI"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_IO_RIP, "I/O RIP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS, "G. Lin Addr"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR0, "G. CR0"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR3, "G. CR3"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR4, "G. CR4"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE, "G. ES Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE, "G. CS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE, "G. SS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE, "G. DS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE, "G. FS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE, "G. GS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE, "G. LDTR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE, "G. TR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE, "G. GDTR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE, "G. IDTR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR7, "G. DR7"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP, "G. RSP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP, "G. RIP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS, "G. RFLAGS"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC, "G. Pend Dbg Exc"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP, "G. SYSENTER ESP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP, "G. SYSENTER EIP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_CR0, "H. CR0"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_CR3, "H. CR3"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_CR4, "H. CR4"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE, "H. FS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE, "H. GS Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE, "H. TR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE, "H. GDTR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE, "H. IDTR Base"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP, "H. SYSENTER ESP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP, "H. SYSENTER EIP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP, "H. RSP"); - DPRINTF("\n"); - vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP, "H. RIP"); - DPRINTF("\n"); -} - -/* - * vmx_vcpu_dump_regs - * - * Debug function to print vcpu regs from the current vcpu - * note - vmcs for 'vcpu' must be on this pcpu. - * - * Parameters: - * vcpu - vcpu whose registers should be dumped - */ -void -vmx_vcpu_dump_regs(struct vcpu *vcpu) -{ - uint32_t r; - int i; - struct vmx_msr_store *msr_store; - - DPRINTF("vcpu @ %p\n", vcpu); - DPRINTF(" eax=0x%08x ebx=0x%08x ecx=0x%08x\n", - vcpu->vc_gueststate.vg_eax, vcpu->vc_gueststate.vg_ebx, - vcpu->vc_gueststate.vg_ecx); - DPRINTF(" edx=0x%08x ebp=0x%08x edi=0x%08x\n", - vcpu->vc_gueststate.vg_edx, vcpu->vc_gueststate.vg_ebp, - vcpu->vc_gueststate.vg_edi); - DPRINTF(" esi=0x%08x\n", vcpu->vc_gueststate.vg_esi); - - DPRINTF(" eip=0x%08x rsp=", vcpu->vc_gueststate.vg_eip); - if (vmread(VMCS_GUEST_IA32_RSP, &r)) - DPRINTF("(error reading)\n"); - else - DPRINTF("0x%08x\n", r); - - DPRINTF(" cr0="); - if (vmread(VMCS_GUEST_IA32_CR0, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%08x ", r); - vmm_decode_cr0(r); - } - - DPRINTF(" cr2=0x%08x\n", vcpu->vc_gueststate.vg_cr2); - - DPRINTF(" cr3="); - if (vmread(VMCS_GUEST_IA32_CR3, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%08x ", r); - vmm_decode_cr3(r); - } - - DPRINTF(" cr4="); - if (vmread(VMCS_GUEST_IA32_CR4, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%08x ", r); - vmm_decode_cr4(r); - } - - DPRINTF(" --Guest Segment Info--\n"); - - DPRINTF(" cs="); - if (vmread(VMCS_GUEST_IA32_CS_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x rpl=%d", r, r & 0x3); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_CS_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_CS_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_CS_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" ds="); - if (vmread(VMCS_GUEST_IA32_DS_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x rpl=%d", r, r & 0x3); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_DS_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_DS_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_DS_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" es="); - if (vmread(VMCS_GUEST_IA32_ES_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x rpl=%d", r, r & 0x3); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_ES_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_ES_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_ES_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" fs="); - if (vmread(VMCS_GUEST_IA32_FS_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x rpl=%d", r, r & 0x3); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_FS_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_FS_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_FS_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" gs="); - if (vmread(VMCS_GUEST_IA32_GS_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x rpl=%d", r, r & 0x3); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_GS_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_GS_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_GS_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" ss="); - if (vmread(VMCS_GUEST_IA32_SS_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x rpl=%d", r, r & 0x3); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_SS_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_SS_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_SS_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" tr="); - if (vmread(VMCS_GUEST_IA32_TR_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x", r); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_TR_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_TR_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_TR_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" gdtr base="); - if (vmread(VMCS_GUEST_IA32_GDTR_BASE, &r)) - DPRINTF("(error reading) "); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &r)) - DPRINTF("(error reading)\n"); - else - DPRINTF("0x%08x\n", r); - - DPRINTF(" idtr base="); - if (vmread(VMCS_GUEST_IA32_IDTR_BASE, &r)) - DPRINTF("(error reading) "); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &r)) - DPRINTF("(error reading)\n"); - else - DPRINTF("0x%08x\n", r); - - DPRINTF(" ldtr="); - if (vmread(VMCS_GUEST_IA32_LDTR_SEL, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%04x", r); - - DPRINTF(" base="); - if (vmread(VMCS_GUEST_IA32_LDTR_BASE, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" limit="); - if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT, &r)) - DPRINTF("(error reading)"); - else - DPRINTF("0x%08x", r); - - DPRINTF(" a/r="); - if (vmread(VMCS_GUEST_IA32_LDTR_AR, &r)) - DPRINTF("(error reading)\n"); - else { - DPRINTF("0x%04x\n ", r); - vmm_segment_desc_decode(r); - } - - DPRINTF(" --Guest MSRs @ 0x%08x (paddr: 0x%08x)--\n", - (uint32_t)vcpu->vc_vmx_msr_exit_save_va, - (uint32_t)vcpu->vc_vmx_msr_exit_save_pa); - - msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; - - for (i = 0; i < VMX_NUM_MSR_STORE; i++) { - DPRINTF(" MSR %d @ %p : 0x%08llx (%s), " - "value=0x%016llx ", - i, &msr_store[i], msr_store[i].vms_index, - msr_name_decode(msr_store[i].vms_index), - msr_store[i].vms_data); - vmm_decode_msr_value(msr_store[i].vms_index, - msr_store[i].vms_data); - } - - DPRINTF(" last PIC irq=%d\n", vcpu->vc_intr); -} - -/* - * msr_name_decode - * - * Returns a human-readable name for the MSR supplied in 'msr'. - * - * Parameters: - * msr - The MSR to decode - * - * Return value: - * NULL-terminated character string containing the name of the MSR requested - */ -const char * -msr_name_decode(uint32_t msr) -{ - /* - * Add as needed. Also consider adding a decode function when - * adding to this table. - */ - - switch (msr) { - case MSR_TSC: return "TSC"; - case MSR_APICBASE: return "APIC base"; - case MSR_IA32_FEATURE_CONTROL: return "IA32 feature control"; - case MSR_PERFCTR0: return "perf counter 0"; - case MSR_PERFCTR1: return "perf counter 1"; - case MSR_TEMPERATURE_TARGET: return "temperature target"; - case MSR_MTRRcap: return "MTRR cap"; - case MSR_PERF_STATUS: return "perf status"; - case MSR_PERF_CTL: return "perf control"; - case MSR_MTRRvarBase: return "MTRR variable base"; - case MSR_MTRRfix64K_00000: return "MTRR fixed 64K"; - case MSR_MTRRfix16K_80000: return "MTRR fixed 16K"; - case MSR_MTRRfix4K_C0000: return "MTRR fixed 4K"; - case MSR_CR_PAT: return "PAT"; - case MSR_MTRRdefType: return "MTRR default type"; - case MSR_EFER: return "EFER"; - case MSR_STAR: return "STAR"; - case MSR_LSTAR: return "LSTAR"; - case MSR_CSTAR: return "CSTAR"; - case MSR_SFMASK: return "SFMASK"; - case MSR_FSBASE: return "FSBASE"; - case MSR_GSBASE: return "GSBASE"; - case MSR_KERNELGSBASE: return "KGSBASE"; - default: return "Unknown MSR"; - } -} - -/* - * vmm_segment_desc_decode - * - * Debug function to print segment information for supplied descriptor - * - * Parameters: - * val - The A/R bytes for the segment descriptor to decode - */ -void -vmm_segment_desc_decode(uint32_t val) -{ - uint16_t ar; - uint8_t g, type, s, dpl, p, dib, l; - uint32_t unusable; - - /* Exit early on unusable descriptors */ - unusable = val & 0x10000; - if (unusable) { - DPRINTF("(unusable)\n"); - return; - } - - ar = (uint16_t)val; - - g = (ar & 0x8000) >> 15; - dib = (ar & 0x4000) >> 14; - l = (ar & 0x2000) >> 13; - p = (ar & 0x80) >> 7; - dpl = (ar & 0x60) >> 5; - s = (ar & 0x10) >> 4; - type = (ar & 0xf); - - DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ", - g, dib, l, p, s); - - DPRINTF("type="); - if (!s) { - switch (type) { - case SDT_SYSLDT: DPRINTF("ldt\n"); break; - case SDT_SYS386TSS: DPRINTF("tss (available)\n"); break; - case SDT_SYS386BSY: DPRINTF("tss (busy)\n"); break; - case SDT_SYS386CGT: DPRINTF("call gate\n"); break; - case SDT_SYS386IGT: DPRINTF("interrupt gate\n"); break; - case SDT_SYS386TGT: DPRINTF("trap gate\n"); break; - /* XXX handle 32 bit segment types by inspecting mode */ - default: DPRINTF("unknown"); - } - } else { - switch (type + 16) { - case SDT_MEMRO: DPRINTF("data, r/o\n"); break; - case SDT_MEMROA: DPRINTF("data, r/o, accessed\n"); break; - case SDT_MEMRW: DPRINTF("data, r/w\n"); break; - case SDT_MEMRWA: DPRINTF("data, r/w, accessed\n"); break; - case SDT_MEMROD: DPRINTF("data, r/o, expand down\n"); break; - case SDT_MEMRODA: DPRINTF("data, r/o, expand down, " - "accessed\n"); - break; - case SDT_MEMRWD: DPRINTF("data, r/w, expand down\n"); break; - case SDT_MEMRWDA: DPRINTF("data, r/w, expand down, " - "accessed\n"); - break; - case SDT_MEME: DPRINTF("code, x only\n"); break; - case SDT_MEMEA: DPRINTF("code, x only, accessed\n"); - case SDT_MEMER: DPRINTF("code, r/x\n"); break; - case SDT_MEMERA: DPRINTF("code, r/x, accessed\n"); break; - case SDT_MEMEC: DPRINTF("code, x only, conforming\n"); break; - case SDT_MEMEAC: DPRINTF("code, x only, conforming, " - "accessed\n"); - break; - case SDT_MEMERC: DPRINTF("code, r/x, conforming\n"); break; - case SDT_MEMERAC: DPRINTF("code, r/x, conforming, accessed\n"); - break; - } - } -} - -void -vmm_decode_cr0(uint32_t cr0) -{ - struct vmm_reg_debug_info cr0_info[11] = { - { CR0_PG, "PG ", "pg " }, - { CR0_CD, "CD ", "cd " }, - { CR0_NW, "NW ", "nw " }, - { CR0_AM, "AM ", "am " }, - { CR0_WP, "WP ", "wp " }, - { CR0_NE, "NE ", "ne " }, - { CR0_ET, "ET ", "et " }, - { CR0_TS, "TS ", "ts " }, - { CR0_EM, "EM ", "em " }, - { CR0_MP, "MP ", "mp " }, - { CR0_PE, "PE", "pe" } - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0; i < nitems(cr0_info); i++) - if (cr0 & cr0_info[i].vrdi_bit) - DPRINTF("%s", cr0_info[i].vrdi_present); - else - DPRINTF("%s", cr0_info[i].vrdi_absent); - - DPRINTF(")\n"); -} - -void -vmm_decode_cr3(uint32_t cr3) -{ - struct vmm_reg_debug_info cr3_info[2] = { - { CR3_PWT, "PWT ", "pwt "}, - { CR3_PCD, "PCD", "pcd"} - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0 ; i < nitems(cr3_info) ; i++) - if (cr3 & cr3_info[i].vrdi_bit) - DPRINTF("%s", cr3_info[i].vrdi_present); - else - DPRINTF("%s", cr3_info[i].vrdi_absent); - - DPRINTF(")\n"); -} - -void -vmm_decode_cr4(uint32_t cr4) -{ - struct vmm_reg_debug_info cr4_info[19] = { - { CR4_PKE, "PKE ", "pke "}, - { CR4_SMAP, "SMAP ", "smap "}, - { CR4_SMEP, "SMEP ", "smep "}, - { CR4_OSXSAVE, "OSXSAVE ", "osxsave "}, - { CR4_PCIDE, "PCIDE ", "pcide "}, - { CR4_FSGSBASE, "FSGSBASE ", "fsgsbase "}, - { CR4_SMXE, "SMXE ", "smxe "}, - { CR4_VMXE, "VMXE ", "vmxe "}, - { CR4_OSXMMEXCPT, "OSXMMEXCPT ", "osxmmexcpt "}, - { CR4_OSFXSR, "OSFXSR ", "osfxsr "}, - { CR4_PCE, "PCE ", "pce "}, - { CR4_PGE, "PGE ", "pge "}, - { CR4_MCE, "MCE ", "mce "}, - { CR4_PAE, "PAE ", "pae "}, - { CR4_PSE, "PSE ", "pse "}, - { CR4_DE, "DE ", "de "}, - { CR4_TSD, "TSD ", "tsd "}, - { CR4_PVI, "PVI ", "pvi "}, - { CR4_VME, "VME", "vme"} - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0; i < nitems(cr4_info); i++) - if (cr4 & cr4_info[i].vrdi_bit) - DPRINTF("%s", cr4_info[i].vrdi_present); - else - DPRINTF("%s", cr4_info[i].vrdi_absent); - - DPRINTF(")\n"); -} - -void -vmm_decode_apicbase_msr_value(uint64_t apicbase) -{ - struct vmm_reg_debug_info apicbase_info[3] = { - { APICBASE_BSP, "BSP ", "bsp "}, - { APICBASE_ENABLE_X2APIC, "X2APIC ", "x2apic "}, - { APICBASE_GLOBAL_ENABLE, "GLB_EN", "glb_en"} - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0; i < nitems(apicbase_info); i++) - if (apicbase & apicbase_info[i].vrdi_bit) - DPRINTF("%s", apicbase_info[i].vrdi_present); - else - DPRINTF("%s", apicbase_info[i].vrdi_absent); - - DPRINTF(")\n"); -} - -void -vmm_decode_ia32_fc_value(uint64_t fcr) -{ - struct vmm_reg_debug_info fcr_info[4] = { - { IA32_FEATURE_CONTROL_LOCK, "LOCK ", "lock "}, - { IA32_FEATURE_CONTROL_SMX_EN, "SMX ", "smx "}, - { IA32_FEATURE_CONTROL_VMX_EN, "VMX ", "vmx "}, - { IA32_FEATURE_CONTROL_SENTER_EN, "SENTER ", "senter "} - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0; i < nitems(fcr_info); i++) - if (fcr & fcr_info[i].vrdi_bit) - DPRINTF("%s", fcr_info[i].vrdi_present); - else - DPRINTF("%s", fcr_info[i].vrdi_absent); - - if (fcr & IA32_FEATURE_CONTROL_SENTER_EN) - DPRINTF(" [SENTER param = 0x%llx]", - (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8); - - DPRINTF(")\n"); -} - -void -vmm_decode_mtrrcap_value(uint64_t val) -{ - struct vmm_reg_debug_info mtrrcap_info[3] = { - { MTRRcap_FIXED, "FIXED ", "fixed "}, - { MTRRcap_WC, "WC ", "wc "}, - { MTRRcap_SMRR, "SMRR ", "smrr "} - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0; i < nitems(mtrrcap_info); i++) - if (val & mtrrcap_info[i].vrdi_bit) - DPRINTF("%s", mtrrcap_info[i].vrdi_present); - else - DPRINTF("%s", mtrrcap_info[i].vrdi_absent); - - if (val & MTRRcap_FIXED) - DPRINTF(" [nr fixed ranges = 0x%llx]", - (val & 0xff)); - - DPRINTF(")\n"); -} - -void -vmm_decode_perf_status_value(uint64_t val) -{ - DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff)); -} - -void vmm_decode_perf_ctl_value(uint64_t val) -{ - DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo"); - DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF)); -} - -void -vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype) -{ - struct vmm_reg_debug_info mtrrdeftype_info[2] = { - { MTRRdefType_FIXED_ENABLE, "FIXED ", "fixed "}, - { MTRRdefType_ENABLE, "ENABLED ", "enabled "}, - }; - - uint8_t i; - int type; - - DPRINTF("("); - for (i = 0; i < nitems(mtrrdeftype_info); i++) - if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit) - DPRINTF("%s", mtrrdeftype_info[i].vrdi_present); - else - DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent); - - DPRINTF("type = "); - type = mtrr2mrt(mtrrdeftype & 0xff); - switch (type) { - case MDF_UNCACHEABLE: DPRINTF("UC"); break; - case MDF_WRITECOMBINE: DPRINTF("WC"); break; - case MDF_WRITETHROUGH: DPRINTF("WT"); break; - case MDF_WRITEPROTECT: DPRINTF("RO"); break; - case MDF_WRITEBACK: DPRINTF("WB"); break; - case MDF_UNKNOWN: - default: - DPRINTF("??"); - break; - } - - DPRINTF(")\n"); -} - -void -vmm_decode_efer_value(uint64_t efer) -{ - struct vmm_reg_debug_info efer_info[4] = { - { EFER_SCE, "SCE ", "sce "}, - { EFER_LME, "LME ", "lme "}, - { EFER_LMA, "LMA ", "lma "}, - { EFER_NXE, "NXE", "nxe"}, - }; - - uint8_t i; - - DPRINTF("("); - for (i = 0; i < nitems(efer_info); i++) - if (efer & efer_info[i].vrdi_bit) - DPRINTF("%s", efer_info[i].vrdi_present); - else - DPRINTF("%s", efer_info[i].vrdi_absent); - - DPRINTF(")\n"); -} - -void -vmm_decode_msr_value(uint64_t msr, uint64_t val) -{ - switch (msr) { - case MSR_APICBASE: vmm_decode_apicbase_msr_value(val); break; - case MSR_IA32_FEATURE_CONTROL: vmm_decode_ia32_fc_value(val); break; - case MSR_MTRRcap: vmm_decode_mtrrcap_value(val); break; - case MSR_PERF_STATUS: vmm_decode_perf_status_value(val); break; - case MSR_PERF_CTL: vmm_decode_perf_ctl_value(val); break; - case MSR_MTRRdefType: vmm_decode_mtrrdeftype_value(val); break; - case MSR_EFER: vmm_decode_efer_value(val); break; - default: DPRINTF("\n"); - } -} -#endif /* VMM_DEBUG */ diff --git a/sys/arch/i386/i386/vmm_support.S b/sys/arch/i386/i386/vmm_support.S deleted file mode 100644 index ef4c3c644bd..00000000000 --- a/sys/arch/i386/i386/vmm_support.S +++ /dev/null @@ -1,290 +0,0 @@ -/* $OpenBSD: vmm_support.S,v 1.3 2017/07/06 04:32:30 mlarkin Exp $ */ -/* - * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "assym.h" -#include <machine/asm.h> -#include <machine/specialreg.h> - -/* - * XXX duplicated in vmmvar.h due to song-and-dance with sys/rwlock.h inclusion - * here - */ -#define VMX_FAIL_LAUNCH_UNKNOWN 1 -#define VMX_FAIL_LAUNCH_INVALID_VMCS 2 -#define VMX_FAIL_LAUNCH_VALID_VMCS 3 - - .text - .global _C_LABEL(vmxon) - .global _C_LABEL(vmxoff) - .global _C_LABEL(vmclear) - .global _C_LABEL(vmptrld) - .global _C_LABEL(vmptrst) - .global _C_LABEL(vmwrite) - .global _C_LABEL(vmread) - .global _C_LABEL(invvpid) - .global _C_LABEL(invept) - .global _C_LABEL(vmx_enter_guest) - .global _C_LABEL(vmm_dispatch_intr) - -_C_LABEL(vmm_dispatch_intr): - movl %esp, %eax - andl $0xFFFFFFF0, %esp - pushl %ss - pushl %eax - pushfl - pushl %cs - cli - movl 4(%eax), %eax - calll *%eax - addl $0x8, %esp - ret - -_C_LABEL(vmxon): - movl 4(%esp), %eax - vmxon (%eax) - jz failed_on - jc failed_on - xorl %eax, %eax - ret -failed_on: - movl $0x01, %eax - ret - -_C_LABEL(vmxoff): - vmxoff - jz failed_off - jc failed_off - xorl %eax, %eax - ret -failed_off: - movl $0x01, %eax - ret - -_C_LABEL(vmclear): - movl 0x04(%esp), %eax - vmclear (%eax) - jz failed_clear - jc failed_clear - xorl %eax, %eax - ret -failed_clear: - movl $0x01, %eax - ret - -_C_LABEL(vmptrld): - movl 4(%esp), %eax - vmptrld (%eax) - jz failed_ptrld - jc failed_ptrld - xorl %eax, %eax - ret -failed_ptrld: - movl $0x01, %eax - ret - -_C_LABEL(vmptrst): - movl 0x04(%esp), %eax - vmptrst (%eax) - jz failed_ptrst - jc failed_ptrst - xorl %eax, %eax - ret -failed_ptrst: - movl $0x01, %eax - ret - -_C_LABEL(vmwrite): - movl 0x04(%esp), %eax - vmwrite 0x08(%esp), %eax - jz failed_write - jc failed_write - xorl %eax, %eax - ret -failed_write: - movl $0x01, %eax - ret - -_C_LABEL(vmread): - pushl %ebx - movl 0x08(%esp), %ebx - movl 0x0c(%esp), %eax - vmread %ebx, (%eax) - jz failed_read - jc failed_read - popl %ebx - xorl %eax, %eax - ret -failed_read: - popl %ebx - movl $0x01, %eax - ret - -_C_LABEL(invvpid): - pushl %ebx - movl 0x08(%esp), %eax - movl 0x0c(%esp), %ebx - invvpid (%ebx), %eax - popl %ebx - ret - -_C_LABEL(invept): - movl 0x04(%esp), %eax - invept 0x08(%esp), %eax - ret - -_C_LABEL(vmx_enter_guest): - pushl %ebx - pushl %ecx - pushl %edx - movl 0x14(%esp), %edx /* Guest Regs Pointer */ - movl 0x18(%esp), %ebx /* resume flag */ - testl %ebx, %ebx - jnz skip_init - - /* - * XXX make vmx_exit_handler a global and put this in the per-vcpu - * init code - */ - movl $VMCS_HOST_IA32_RIP, %eax - movl $vmx_exit_handler_asm, %ecx - vmwrite %ecx, %eax - -skip_init: - pushfl - - strw %ax - pushw %ax - movw %es, %ax - pushw %ax - movw %ds, %ax - pushw %ax - movw %ss, %ax - pushw %ax - pushw %fs - pushw %gs - - pushl %ebp - pushl %esi - pushl %edi - pushl %edx /* Guest Regs Pointer */ - - movl $VMCS_HOST_IA32_RSP, %edi - movl %esp, %eax - vmwrite %eax, %edi - - testl %ebx, %ebx - jnz do_resume - - /* Restore guest registers */ - movl 0x1c(%edx), %eax - movl %eax, %cr2 - movl 0x18(%edx), %ebp - movl 0x14(%edx), %edi - movl 0x0c(%edx), %ecx - movl 0x08(%edx), %ebx - movl 0x04(%edx), %eax - movl (%edx), %esi - movl 0x10(%edx), %edx - - vmlaunch - jmp fail_launch_or_resume -do_resume: - /* Restore guest registers */ - movl 0x1c(%edx), %eax - movl %eax, %cr2 - movl 0x18(%edx), %ebp - movl 0x14(%edx), %edi - movl 0x0c(%edx), %ecx - movl 0x08(%edx), %ebx - movl 0x04(%edx), %eax - movl (%edx), %esi - movl 0x10(%edx), %edx - vmresume -fail_launch_or_resume: - /* Failed launch/resume (fell through) */ - jc fail_launch_invalid_vmcs /* Invalid VMCS */ - jz fail_launch_valid_vmcs /* Valid VMCS, failed launch/resume */ - - /* Unknown failure mode (not documented as per Intel SDM) */ - movl $VMX_FAIL_LAUNCH_UNKNOWN, %eax - popl %edx - jmp restore_host - -fail_launch_invalid_vmcs: - movl $VMX_FAIL_LAUNCH_INVALID_VMCS, %eax - popl %edx - jmp restore_host - -fail_launch_valid_vmcs: - movl $VMCS_INSTRUCTION_ERROR, %edi - popl %edx - vmread %edi, %eax - /* XXX check failure of vmread */ - movl %eax, 0x20(%edx) - movl $VMX_FAIL_LAUNCH_VALID_VMCS, %eax - jmp restore_host - -vmx_exit_handler_asm: - /* Preserve guest registers not saved in VMCS */ - pushl %esi - pushl %edi - movl 0x8(%esp), %edi - movl 0x4(%esp), %esi - movl %esi, (%edi) - popl %edi - popl %esi /* discard */ - - popl %esi - movl %eax, 0x4(%esi) - movl %ebx, 0x8(%esi) - movl %ecx, 0xc(%esi) - movl %edx, 0x10(%esi) - movl %edi, 0x14(%esi) - movl %ebp, 0x18(%esi) - movl %cr2, %eax - movl %eax, 0x1c(%esi) - -restore_host: - popl %edi - popl %esi - popl %ebp - - popw %gs - popw %fs - popw %ax - movw %ax, %ss - popw %ax - movw %ax, %ds - popw %ax - movw %ax, %es - xorl %ecx, %ecx - popw %cx - - popfl - - movl 0x1c(%esp), %ebx - leal (%ebx, %ecx), %eax - andb $0xF9, 5(%eax) - ltr %cx - - popl %edx - popl %ecx - popl %ebx - - xorl %eax, %eax - - ret diff --git a/sys/arch/i386/include/conf.h b/sys/arch/i386/include/conf.h index 09c82ee00d0..ce73591fb1c 100644 --- a/sys/arch/i386/include/conf.h +++ b/sys/arch/i386/include/conf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.h,v 1.16 2016/10/21 06:56:38 mlarkin Exp $ */ +/* $OpenBSD: conf.h,v 1.17 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: conf.h,v 1.2 1996/05/05 19:28:34 christos Exp $ */ /* @@ -76,7 +76,3 @@ cdev_decl(acpiapm); #define pctrpoll seltrue cdev_decl(pctr); - -#include "vmm.h" -cdev_decl(vmm); - diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index ce71e371f4d..5c53af46020 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.166 2018/12/05 10:28:21 jsg Exp $ */ +/* $OpenBSD: cpu.h,v 1.167 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -71,36 +71,6 @@ struct intrsource; -/* VMXON region (Intel) */ -struct vmxon_region { - uint32_t vr_revision; -}; - -/* - * VMX for Intel CPUs - */ -struct vmx { - uint64_t vmx_cr0_fixed0; - uint64_t vmx_cr0_fixed1; - uint64_t vmx_cr4_fixed0; - uint64_t vmx_cr4_fixed1; - uint32_t vmx_vmxon_revision; - uint32_t vmx_msr_table_size; - uint32_t vmx_cr3_tgt_count; - uint64_t vmx_vm_func; -}; - -/* - * SVM for AMD CPUs - */ -struct svm { -}; - -union vmm_cpu_cap { - struct vmx vcc_vmx; - struct svm vcc_svm; -}; - #ifdef _KERNEL /* XXX stuff to move to cpuvar.h later */ struct cpu_info { @@ -200,15 +170,6 @@ struct cpu_info { #if defined(GPROF) || defined(DDBPROF) struct gmonparam *ci_gmon; #endif - u_int32_t ci_vmm_flags; -#define CI_VMM_VMX (1 << 0) -#define CI_VMM_SVM (1 << 1) -#define CI_VMM_RVI (1 << 2) -#define CI_VMM_EPT (1 << 3) -#define CI_VMM_DIS (1 << 4) - union vmm_cpu_cap ci_vmm_cap; - uint64_t ci_vmxon_region_pa; /* Must be 64 bit */ - struct vmxon_region *ci_vmxon_region; }; /* diff --git a/sys/arch/i386/include/intrdefs.h b/sys/arch/i386/include/intrdefs.h index c95b7890396..26dac01251e 100644 --- a/sys/arch/i386/include/intrdefs.h +++ b/sys/arch/i386/include/intrdefs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: intrdefs.h,v 1.16 2018/01/13 15:18:11 mpi Exp $ */ +/* $OpenBSD: intrdefs.h,v 1.17 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: intrdefs.h,v 1.2 2003/05/04 22:01:56 fvdl Exp $ */ #ifndef _I386_INTRDEFS_H @@ -116,16 +116,13 @@ #define I386_IPI_GDT 0x00000020 #define I386_IPI_DDB 0x00000040 /* synchronize while in ddb */ #define I386_IPI_SETPERF 0x00000080 -#define I386_IPI_START_VMM 0x00000100 -#define I386_IPI_STOP_VMM 0x00000200 -#define I386_NIPI 10 +#define I386_NIPI 8 #define I386_IPI_NAMES { "halt IPI", "nop IPI", "FPU flush IPI", \ "FPU synch IPI", \ "MTRR update IPI", "GDT update IPI", \ - "DDB IPI", "setperf IPI", "VMM start IPI", \ - "VMM stop IPI" } + "DDB IPI", "setperf IPI" } #define IREENT_MAGIC 0x18041969 diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 91df8dfc14d..8a126935a97 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.85 2018/05/28 20:52:44 bluhm Exp $ */ +/* $OpenBSD: pmap.h,v 1.86 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -88,11 +88,6 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ * page list, and number of PTPs within the pmap. */ -#define PMAP_TYPE_NORMAL 1 -#define PMAP_TYPE_EPT 2 -#define PMAP_TYPE_RVI 3 -#define pmap_nested(pm) ((pm)->pm_type != PMAP_TYPE_NORMAL) - struct pmap { uint64_t pm_pdidx[4]; /* PDIEs for PAE mode */ uint64_t pm_pdidx_intel[4]; /* PDIEs for PAE mode U-K */ @@ -122,10 +117,6 @@ struct pmap { int pm_flags; /* see below */ struct segment_descriptor pm_codeseg; /* cs descriptor for process */ - int pm_type; /* Type of pmap this is (PMAP_TYPE_x) */ - vaddr_t pm_npt_pml4; /* Nested paging PML4 VA */ - paddr_t pm_npt_pa; /* Nested paging PML4 PA */ - vaddr_t pm_npt_pdpt; /* Nested paging PDPT */ }; /* diff --git a/sys/arch/i386/include/pte.h b/sys/arch/i386/include/pte.h index aa9b62341d6..5977b9a1ccd 100644 --- a/sys/arch/i386/include/pte.h +++ b/sys/arch/i386/include/pte.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pte.h,v 1.22 2016/10/21 06:20:59 mlarkin Exp $ */ +/* $OpenBSD: pte.h,v 1.23 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: pte.h,v 1.11 1998/02/06 21:58:05 thorpej Exp $ */ /* @@ -67,13 +67,6 @@ #define PG_AVAIL3 0x00000800 /* ignored by hardware */ #define PG_PATLG 0x00001000 /* PAT on large pages */ -/* EPT PTE bits */ -#define EPT_R (1ULL << 0) -#define EPT_W (1ULL << 1) -#define EPT_X (1ULL << 2) -#define EPT_WB (6ULL << 3) -#define EPT_PS (1ULL << 7) - /* Cacheability bits when we are using PAT */ #define PG_WB (0) /* The default */ #define PG_WC (PG_WT) /* WT and CD is WC */ diff --git a/sys/arch/i386/include/specialreg.h b/sys/arch/i386/include/specialreg.h index 81c9ef2240c..094acfff322 100644 --- a/sys/arch/i386/include/specialreg.h +++ b/sys/arch/i386/include/specialreg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specialreg.h,v 1.72 2018/09/11 07:13:23 jsg Exp $ */ +/* $OpenBSD: specialreg.h,v 1.73 2019/01/18 01:34:50 pd Exp $ */ /* $NetBSD: specialreg.h,v 1.7 1994/10/27 04:16:26 cgd Exp $ */ /*- @@ -779,459 +779,6 @@ #define IA32_DEBUG_INTERFACE_MASK 0x80000000 /* - * VMX - */ -#define IA32_FEATURE_CONTROL_LOCK 0x01 -#define IA32_FEATURE_CONTROL_SMX_EN 0x02 -#define IA32_FEATURE_CONTROL_VMX_EN 0x04 -#define IA32_FEATURE_CONTROL_SENTER_EN (1ULL << 15) -#define IA32_FEATURE_CONTROL_SENTER_PARAM_MASK 0x7f00 -#define IA32_VMX_BASIC 0x480 -#define IA32_VMX_PINBASED_CTLS 0x481 -#define IA32_VMX_PROCBASED_CTLS 0x482 -#define IA32_VMX_EXIT_CTLS 0x483 -#define IA32_VMX_ENTRY_CTLS 0x484 -#define IA32_VMX_MISC 0x485 -#define IA32_VMX_CR0_FIXED0 0x486 -#define IA32_VMX_CR0_FIXED1 0x487 -#define IA32_VMX_CR4_FIXED0 0x488 -#define IA32_VMX_CR4_FIXED1 0x489 -#define IA32_VMX_PROCBASED2_CTLS 0x48B -#define IA32_VMX_EPT_VPID_CAP 0x48C -#define IA32_VMX_TRUE_PINBASED_CTLS 0x48D -#define IA32_VMX_TRUE_PROCBASED_CTLS 0x48E -#define IA32_VMX_TRUE_EXIT_CTLS 0x48F -#define IA32_VMX_TRUE_ENTRY_CTLS 0x490 -#define IA32_VMX_VMFUNC 0x491 - -#define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6) -#define IA32_EPT_VPID_CAP_WB (1ULL << 14) -#define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21) - -#define IA32_EPT_PAGING_CACHE_TYPE_UC 0x0 -#define IA32_EPT_PAGING_CACHE_TYPE_WB 0x6 -#define IA32_EPT_AD_BITS_ENABLE (1ULL << 6) -#define IA32_EPT_PAGE_WALK_LENGTH 0x4 - -/* VMX : IA32_VMX_BASIC bits */ -#define IA32_VMX_TRUE_CTLS_AVAIL (1ULL << 55) - -/* VMX : IA32_VMX_PINBASED_CTLS bits */ -#define IA32_VMX_EXTERNAL_INT_EXITING (1ULL << 0) -#define IA32_VMX_NMI_EXITING (1ULL << 3) -#define IA32_VMX_VIRTUAL_NMIS (1ULL << 5) -#define IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER (1ULL << 6) -#define IA32_VMX_PROCESS_POSTED_INTERRUPTS (1ULL << 7) - -/* VMX : IA32_VMX_PROCBASED_CTLS bits */ -#define IA32_VMX_INTERRUPT_WINDOW_EXITING (1ULL << 2) -#define IA32_VMX_USE_TSC_OFFSETTING (1ULL << 3) -#define IA32_VMX_HLT_EXITING (1ULL << 7) -#define IA32_VMX_INVLPG_EXITING (1ULL << 9) -#define IA32_VMX_MWAIT_EXITING (1ULL << 10) -#define IA32_VMX_RDPMC_EXITING (1ULL << 11) -#define IA32_VMX_RDTSC_EXITING (1ULL << 12) -#define IA32_VMX_CR3_LOAD_EXITING (1ULL << 15) -#define IA32_VMX_CR3_STORE_EXITING (1ULL << 16) -#define IA32_VMX_CR8_LOAD_EXITING (1ULL << 19) -#define IA32_VMX_CR8_STORE_EXITING (1ULL << 20) -#define IA32_VMX_USE_TPR_SHADOW (1ULL << 21) -#define IA32_VMX_NMI_WINDOW_EXITING (1ULL << 22) -#define IA32_VMX_MOV_DR_EXITING (1ULL << 23) -#define IA32_VMX_UNCONDITIONAL_IO_EXITING (1ULL << 24) -#define IA32_VMX_USE_IO_BITMAPS (1ULL << 25) -#define IA32_VMX_MONITOR_TRAP_FLAG (1ULL << 27) -#define IA32_VMX_USE_MSR_BITMAPS (1ULL << 28) -#define IA32_VMX_MONITOR_EXITING (1ULL << 29) -#define IA32_VMX_PAUSE_EXITING (1ULL << 30) -#define IA32_VMX_ACTIVATE_SECONDARY_CONTROLS (1ULL << 31) - -/* VMX : IA32_VMX_PROCBASED2_CTLS bits */ -#define IA32_VMX_VIRTUALIZE_APIC (1ULL << 0) -#define IA32_VMX_ENABLE_EPT (1ULL << 1) -#define IA32_VMX_DESCRIPTOR_TABLE_EXITING (1ULL << 2) -#define IA32_VMX_ENABLE_RDTSCP (1ULL << 3) -#define IA32_VMX_VIRTUALIZE_X2APIC_MODE (1ULL << 4) -#define IA32_VMX_ENABLE_VPID (1ULL << 5) -#define IA32_VMX_WBINVD_EXITING (1ULL << 6) -#define IA32_VMX_UNRESTRICTED_GUEST (1ULL << 7) -#define IA32_VMX_APIC_REGISTER_VIRTUALIZATION (1ULL << 8) -#define IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY (1ULL << 9) -#define IA32_VMX_PAUSE_LOOP_EXITING (1ULL << 10) -#define IA32_VMX_RDRAND_EXITING (1ULL << 11) -#define IA32_VMX_ENABLE_INVPCID (1ULL << 12) -#define IA32_VMX_ENABLE_VM_FUNCTIONS (1ULL << 13) -#define IA32_VMX_VMCS_SHADOWING (1ULL << 14) -#define IA32_VMX_ENABLE_ENCLS_EXITING (1ULL << 15) -#define IA32_VMX_RDSEED_EXITING (1ULL << 16) -#define IA32_VMX_ENABLE_PML (1ULL << 17) -#define IA32_VMX_EPT_VIOLATION_VE (1ULL << 18) -#define IA32_VMX_CONCEAL_VMX_FROM_PT (1ULL << 19) -#define IA32_VMX_ENABLE_XSAVES_XRSTORS (1ULL << 20) -#define IA32_VMX_ENABLE_TSC_SCALING (1ULL << 25) - -/* VMX : IA32_VMX_EXIT_CTLS bits */ -#define IA32_VMX_SAVE_DEBUG_CONTROLS (1ULL << 2) -#define IA32_VMX_HOST_SPACE_ADDRESS_SIZE (1ULL << 9) -#define IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT (1ULL << 12) -#define IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT (1ULL << 15) -#define IA32_VMX_SAVE_IA32_PAT_ON_EXIT (1ULL << 18) -#define IA32_VMX_LOAD_IA32_PAT_ON_EXIT (1ULL << 19) -#define IA32_VMX_SAVE_IA32_EFER_ON_EXIT (1ULL << 20) -#define IA32_VMX_LOAD_IA32_EFER_ON_EXIT (1ULL << 21) -#define IA32_VMX_SAVE_VMX_PREEMPTION_TIMER (1ULL << 22) -#define IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT (1ULL << 23) -#define IA32_VMX_CONCEAL_VM_EXITS_FROM_PT (1ULL << 24) - -/* VMX: IA32_VMX_ENTRY_CTLS bits */ -#define IA32_VMX_LOAD_DEBUG_CONTROLS (1ULL << 2) -#define IA32_VMX_IA32E_MODE_GUEST (1ULL << 9) -#define IA32_VMX_ENTRY_TO_SMM (1ULL << 10) -#define IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT (1ULL << 11) -#define IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY (1ULL << 13) -#define IA32_VMX_LOAD_IA32_PAT_ON_ENTRY (1ULL << 14) -#define IA32_VMX_LOAD_IA32_EFER_ON_ENTRY (1ULL << 15) -#define IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY (1ULL << 16) -#define IA32_VMX_CONCEAL_VM_ENTRIES_FROM_PT (1ULL << 17) - -/* - * VMX : VMCS Fields - */ - -/* 16-bit control fields */ -#define VMCS_GUEST_VPID 0x0000 -#define VMCS_POSTED_INT_NOTIF_VECTOR 0x0002 -#define VMCS_EPTP_INDEX 0x0004 - -/* 16-bit guest state fields */ -#define VMCS_GUEST_IA32_ES_SEL 0x0800 -#define VMCS_GUEST_IA32_CS_SEL 0x0802 -#define VMCS_GUEST_IA32_SS_SEL 0x0804 -#define VMCS_GUEST_IA32_DS_SEL 0x0806 -#define VMCS_GUEST_IA32_FS_SEL 0x0808 -#define VMCS_GUEST_IA32_GS_SEL 0x080A -#define VMCS_GUEST_IA32_LDTR_SEL 0x080C -#define VMCS_GUEST_IA32_TR_SEL 0x080E -#define VMCS_GUEST_INTERRUPT_STATUS 0x0810 -#define VMCS_GUEST_PML_INDEX 0x0812 - -/* 16-bit host state fields */ -#define VMCS_HOST_IA32_ES_SEL 0x0C00 -#define VMCS_HOST_IA32_CS_SEL 0x0C02 -#define VMCS_HOST_IA32_SS_SEL 0x0C04 -#define VMCS_HOST_IA32_DS_SEL 0x0C06 -#define VMCS_HOST_IA32_FS_SEL 0x0C08 -#define VMCS_HOST_IA32_GS_SEL 0x0C0A -#define VMCS_HOST_IA32_TR_SEL 0x0C0C - -/* 64-bit control fields */ -#define VMCS_IO_BITMAP_A 0x2000 -#define VMCS_IO_BITMAP_B 0x2002 -#define VMCS_MSR_BITMAP_ADDRESS 0x2004 -#define VMCS_MSR_BITMAP_ADDRESS_HI 0x2005 -#define VMCS_EXIT_STORE_MSR_ADDRESS 0x2006 -#define VMCS_EXIT_STORE_MSR_ADDRESS_HI 0x2007 -#define VMCS_EXIT_LOAD_MSR_ADDRESS 0x2008 -#define VMCS_EXIT_LOAD_MSR_ADDRESS_HI 0x2009 -#define VMCS_ENTRY_LOAD_MSR_ADDRESS 0x200A -#define VMCS_ENTRY_LOAD_MSR_ADDRESS_HI 0x200B -#define VMCS_EXECUTIVE_VMCS_POINTER 0x200C -#define VMCS_PML_ADDRESS 0x200E -#define VMCS_TSC_OFFSET 0x2010 -#define VMCS_VIRTUAL_APIC_ADDRESS 0x2012 -#define VMCS_APIC_ACCESS_ADDRESS 0x2014 -#define VMCS_POSTED_INTERRUPT_DESC 0x2016 -#define VMCS_VM_FUNCTION_CONTROLS 0x2018 -#define VMCS_GUEST_IA32_EPTP 0x201A -#define VMCS_GUEST_IA32_EPTP_HI 0x201B -#define VMCS_EOI_EXIT_BITMAP_0 0x201C -#define VMCS_EOI_EXIT_BITMAP_1 0x201E -#define VMCS_EOI_EXIT_BITMAP_2 0x2020 -#define VMCS_EOI_EXIT_BITMAP_3 0x2022 -#define VMCS_EPTP_LIST_ADDRESS 0x2024 -#define VMCS_VMREAD_BITMAP_ADDRESS 0x2026 -#define VMCS_VMWRITE_BITMAP_ADDRESS 0x2028 -#define VMCS_VIRTUALIZATION_EXC_ADDRESS 0x202A -#define VMCS_XSS_EXITING_BITMAP 0x202C -#define VMCS_ENCLS_EXITING_BITMAP 0x202E -#define VMCS_TSC_MULTIPLIER 0x2032 - -/* 64-bit RO data field */ -#define VMCS_GUEST_PHYSICAL_ADDRESS 0x2400 -#define VMCS_GUEST_PHYSICAL_ADDRESS_HI 0x2401 - -/* 64-bit guest state fields */ -#define VMCS_LINK_POINTER 0x2800 -#define VMCS_LINK_POINTER_HI 0x2801 -#define VMCS_GUEST_IA32_DEBUGCTL 0x2802 -#define VMCS_GUEST_IA32_PAT 0x2804 -#define VMCS_GUEST_IA32_EFER 0x2806 -#define VMCS_GUEST_IA32_PERF_GBL_CTRL 0x2808 -#define VMCS_GUEST_PDPTE0 0x280A -#define VMCS_GUEST_PDPTE1 0x280C -#define VMCS_GUEST_PDPTE2 0x280E -#define VMCS_GUEST_PDPTE3 0x2810 -#define VMCS_GUEST_IA32_BNDCFGS 0x2812 - -/* 64-bit host state fields */ -#define VMCS_HOST_IA32_PAT 0x2C00 -#define VMCS_HOST_IA32_EFER 0x2C02 -#define VMCS_HOST_IA32_PERF_GBL_CTRL 0x2C04 - -/* 32-bit control fields */ -#define VMCS_PINBASED_CTLS 0x4000 -#define VMCS_PROCBASED_CTLS 0x4002 -#define VMCS_EXCEPTION_BITMAP 0x4004 -#define VMCS_PF_ERROR_CODE_MASK 0x4006 -#define VMCS_PF_ERROR_CODE_MATCH 0x4008 -#define VMCS_CR3_TARGET_COUNT 0x400A -#define VMCS_EXIT_CTLS 0x400C -#define VMCS_EXIT_MSR_STORE_COUNT 0x400E -#define VMCS_EXIT_MSR_LOAD_COUNT 0x4010 -#define VMCS_ENTRY_CTLS 0x4012 -#define VMCS_ENTRY_MSR_LOAD_COUNT 0x4014 -#define VMCS_ENTRY_INTERRUPTION_INFO 0x4016 -#define VMCS_ENTRY_EXCEPTION_ERROR_CODE 0x4018 -#define VMCS_ENTRY_INSTRUCTION_LENGTH 0x401A -#define VMCS_TPR_THRESHOLD 0x401C -#define VMCS_PROCBASED2_CTLS 0x401E -#define VMCS_PLE_GAP 0x4020 -#define VMCS_PLE_WINDOW 0x4022 - -/* 32-bit RO data fields */ -#define VMCS_INSTRUCTION_ERROR 0x4400 -#define VMCS_EXIT_REASON 0x4402 -#define VMCS_EXIT_INTERRUPTION_INFO 0x4404 -#define VMCS_EXIT_INTERRUPTION_ERR_CODE 0x4406 -#define VMCS_IDT_VECTORING_INFO 0x4408 -#define VMCS_IDT_VECTORING_ERROR_CODE 0x440A -#define VMCS_INSTRUCTION_LENGTH 0x440C -#define VMCS_EXIT_INSTRUCTION_INFO 0x440E - -/* 32-bit guest state fields */ -#define VMCS_GUEST_IA32_ES_LIMIT 0x4800 -#define VMCS_GUEST_IA32_CS_LIMIT 0x4802 -#define VMCS_GUEST_IA32_SS_LIMIT 0x4804 -#define VMCS_GUEST_IA32_DS_LIMIT 0x4806 -#define VMCS_GUEST_IA32_FS_LIMIT 0x4808 -#define VMCS_GUEST_IA32_GS_LIMIT 0x480A -#define VMCS_GUEST_IA32_LDTR_LIMIT 0x480C -#define VMCS_GUEST_IA32_TR_LIMIT 0x480E -#define VMCS_GUEST_IA32_GDTR_LIMIT 0x4810 -#define VMCS_GUEST_IA32_IDTR_LIMIT 0x4812 -#define VMCS_GUEST_IA32_ES_AR 0x4814 -#define VMCS_GUEST_IA32_CS_AR 0x4816 -#define VMCS_GUEST_IA32_SS_AR 0x4818 -#define VMCS_GUEST_IA32_DS_AR 0x481A -#define VMCS_GUEST_IA32_FS_AR 0x481C -#define VMCS_GUEST_IA32_GS_AR 0x481E -#define VMCS_GUEST_IA32_LDTR_AR 0x4820 -#define VMCS_GUEST_IA32_TR_AR 0x4822 -#define VMCS_GUEST_INTERRUPTIBILITY_ST 0x4824 -#define VMCS_GUEST_ACTIVITY_STATE 0x4826 -#define VMCS_GUEST_SMBASE 0x4828 -#define VMCS_GUEST_IA32_SYSENTER_CS 0x482A -#define VMCS_VMX_PREEMPTION_TIMER_VAL 0x482E - -/* 32-bit host state field */ -#define VMCS_HOST_IA32_SYSENTER_CS 0x4C00 - -/* Natural-width control fields */ -#define VMCS_CR0_MASK 0x6000 -#define VMCS_CR4_MASK 0x6002 -#define VMCS_CR0_READ_SHADOW 0x6004 -#define VMCS_CR4_READ_SHADOW 0x6006 -#define VMCS_CR3_TARGET_0 0x6008 -#define VMCS_CR3_TARGET_1 0x600A -#define VMCS_CR3_TARGET_2 0x600C -#define VMCS_CR3_TARGET_3 0x600E - -/* Natural-width RO fields */ -#define VMCS_GUEST_EXIT_QUALIFICATION 0x6400 -#define VMCS_IO_RCX 0x6402 -#define VMCS_IO_RSI 0x6404 -#define VMCS_IO_RDI 0x6406 -#define VMCS_IO_RIP 0x6408 -#define VMCS_GUEST_LINEAR_ADDRESS 0x640A - -/* Natural-width guest state fields */ -#define VMCS_GUEST_IA32_CR0 0x6800 -#define VMCS_GUEST_IA32_CR3 0x6802 -#define VMCS_GUEST_IA32_CR4 0x6804 -#define VMCS_GUEST_IA32_ES_BASE 0x6806 -#define VMCS_GUEST_IA32_CS_BASE 0x6808 -#define VMCS_GUEST_IA32_SS_BASE 0x680A -#define VMCS_GUEST_IA32_DS_BASE 0x680C -#define VMCS_GUEST_IA32_FS_BASE 0x680E -#define VMCS_GUEST_IA32_GS_BASE 0x6810 -#define VMCS_GUEST_IA32_LDTR_BASE 0x6812 -#define VMCS_GUEST_IA32_TR_BASE 0x6814 -#define VMCS_GUEST_IA32_GDTR_BASE 0x6816 -#define VMCS_GUEST_IA32_IDTR_BASE 0x6818 -#define VMCS_GUEST_IA32_DR7 0x681A -#define VMCS_GUEST_IA32_RSP 0x681C -#define VMCS_GUEST_IA32_RIP 0x681E -#define VMCS_GUEST_IA32_RFLAGS 0x6820 -#define VMCS_GUEST_PENDING_DBG_EXC 0x6822 -#define VMCS_GUEST_IA32_SYSENTER_ESP 0x6824 -#define VMCS_GUEST_IA32_SYSENTER_EIP 0x6826 - -/* Natural-width host state fields */ -#define VMCS_HOST_IA32_CR0 0x6C00 -#define VMCS_HOST_IA32_CR3 0x6C02 -#define VMCS_HOST_IA32_CR4 0x6C04 -#define VMCS_HOST_IA32_FS_BASE 0x6C06 -#define VMCS_HOST_IA32_GS_BASE 0x6C08 -#define VMCS_HOST_IA32_TR_BASE 0x6C0A -#define VMCS_HOST_IA32_GDTR_BASE 0x6C0C -#define VMCS_HOST_IA32_IDTR_BASE 0x6C0E -#define VMCS_HOST_IA32_SYSENTER_ESP 0x6C10 -#define VMCS_HOST_IA32_SYSENTER_EIP 0x6C12 -#define VMCS_HOST_IA32_RSP 0x6C14 -#define VMCS_HOST_IA32_RIP 0x6C16 - -#define IA32_VMX_INVVPID_INDIV_ADDR_CTX 0x0 -#define IA32_VMX_INVVPID_SINGLE_CTX 0x1 -#define IA32_VMX_INVVPID_ALL_CTX 0x2 -#define IA32_VMX_INVVPID_SINGLE_CTX_GLB 0x3 - -#define IA32_VMX_INVEPT_SINGLE_CTX 0x1 -#define IA32_VMX_INVEPT_GLOBAL_CTX 0x2 - -#define IA32_VMX_EPT_FAULT_READ (1ULL << 0) -#define IA32_VMX_EPT_FAULT_WRITE (1ULL << 1) -#define IA32_VMX_EPT_FAULT_EXEC (1ULL << 2) - -#define IA32_VMX_EPT_FAULT_WAS_READABLE (1ULL << 3) -#define IA32_VMX_EPT_FAULT_WAS_WRITABLE (1ULL << 4) -#define IA32_VMX_EPT_FAULT_WAS_EXECABLE (1ULL << 5) - -#define IA32_VMX_MSR_LIST_SIZE_MASK (7ULL << 25) -#define IA32_VMX_CR3_TGT_SIZE_MASK (0x1FFULL << 16) - -/* - * SVM - */ -#define MSR_AMD_VM_CR 0xc0010114 -#define MSR_AMD_VM_HSAVE_PA 0xc0010117 -#define CPUID_AMD_SVM_CAP 0x8000000A -#define AMD_SVMDIS 0x10 -#define AMD_SVM_NESTED_PAGING_CAP (1 << 0) - -/* - * SVM : VMCB intercepts - */ -#define SVM_INTERCEPT_CR0_READ (1UL << 0) -#define SVM_INTERCEPT_CR1_READ (1UL << 1) -#define SVM_INTERCEPT_CR2_READ (1UL << 2) -#define SVM_INTERCEPT_CR3_READ (1UL << 2) -#define SVM_INTERCEPT_CR4_READ (1UL << 4) -#define SVM_INTERCEPT_CR5_READ (1UL << 5) -#define SVM_INTERCEPT_CR6_READ (1UL << 6) -#define SVM_INTERCEPT_CR7_READ (1UL << 7) -#define SVM_INTERCEPT_CR8_READ (1UL << 8) -#define SVM_INTERCEPT_CR9_READ (1UL << 9) -#define SVM_INTERCEPT_CR10_READ (1UL << 10) -#define SVM_INTERCEPT_CR11_READ (1UL << 11) -#define SVM_INTERCEPT_CR12_READ (1UL << 12) -#define SVM_INTERCEPT_CR13_READ (1UL << 13) -#define SVM_INTERCEPT_CR14_READ (1UL << 14) -#define SVM_INTERCEPT_CR15_READ (1UL << 15) -#define SVM_INTERCEPT_CR0_WRITE (1UL << 16) -#define SVM_INTERCEPT_CR1_WRITE (1UL << 17) -#define SVM_INTERCEPT_CR2_WRITE (1UL << 18) -#define SVM_INTERCEPT_CR3_WRITE (1UL << 19) -#define SVM_INTERCEPT_CR4_WRITE (1UL << 20) -#define SVM_INTERCEPT_CR5_WRITE (1UL << 21) -#define SVM_INTERCEPT_CR6_WRITE (1UL << 22) -#define SVM_INTERCEPT_CR7_WRITE (1UL << 23) -#define SVM_INTERCEPT_CR8_WRITE (1UL << 24) -#define SVM_INTERCEPT_CR9_WRITE (1UL << 25) -#define SVM_INTERCEPT_CR10_WRITE (1UL << 26) -#define SVM_INTERCEPT_CR11_WRITE (1UL << 27) -#define SVM_INTERCEPT_CR12_WRITE (1UL << 28) -#define SVM_INTERCEPT_CR13_WRITE (1UL << 29) -#define SVM_INTERCEPT_CR14_WRITE (1UL << 30) -#define SVM_INTERCEPT_CR15_WRITE (1UL << 31) -#define SVM_INTERCEPT_DR0_READ (1UL << 0) -#define SVM_INTERCEPT_DR1_READ (1UL << 1) -#define SVM_INTERCEPT_DR2_READ (1UL << 2) -#define SVM_INTERCEPT_DR3_READ (1UL << 2) -#define SVM_INTERCEPT_DR4_READ (1UL << 4) -#define SVM_INTERCEPT_DR5_READ (1UL << 5) -#define SVM_INTERCEPT_DR6_READ (1UL << 6) -#define SVM_INTERCEPT_DR7_READ (1UL << 7) -#define SVM_INTERCEPT_DR8_READ (1UL << 8) -#define SVM_INTERCEPT_DR9_READ (1UL << 9) -#define SVM_INTERCEPT_DR10_READ (1UL << 10) -#define SVM_INTERCEPT_DR11_READ (1UL << 11) -#define SVM_INTERCEPT_DR12_READ (1UL << 12) -#define SVM_INTERCEPT_DR13_READ (1UL << 13) -#define SVM_INTERCEPT_DR14_READ (1UL << 14) -#define SVM_INTERCEPT_DR15_READ (1UL << 15) -#define SVM_INTERCEPT_DR0_WRITE (1UL << 16) -#define SVM_INTERCEPT_DR1_WRITE (1UL << 17) -#define SVM_INTERCEPT_DR2_WRITE (1UL << 18) -#define SVM_INTERCEPT_DR3_WRITE (1UL << 19) -#define SVM_INTERCEPT_DR4_WRITE (1UL << 20) -#define SVM_INTERCEPT_DR5_WRITE (1UL << 21) -#define SVM_INTERCEPT_DR6_WRITE (1UL << 22) -#define SVM_INTERCEPT_DR7_WRITE (1UL << 23) -#define SVM_INTERCEPT_DR8_WRITE (1UL << 24) -#define SVM_INTERCEPT_DR9_WRITE (1UL << 25) -#define SVM_INTERCEPT_DR10_WRITE (1UL << 26) -#define SVM_INTERCEPT_DR11_WRITE (1UL << 27) -#define SVM_INTERCEPT_DR12_WRITE (1UL << 28) -#define SVM_INTERCEPT_DR13_WRITE (1UL << 29) -#define SVM_INTERCEPT_DR14_WRITE (1UL << 30) -#define SVM_INTERCEPT_DR15_WRITE (1UL << 31) -#define SVM_INTERCEPT_INTR (1UL << 0) -#define SVM_INTERCEPT_NMI (1UL << 1) -#define SVM_INTERCEPT_SMI (1UL << 2) -#define SVM_INTERCEPT_INIT (1UL << 3) -#define SVM_INTERCEPT_VINTR (1UL << 4) -#define SVM_INTERCEPT_CR0_SEL_WRITE (1UL << 5) -#define SVM_INTERCEPT_IDTR_READ (1UL << 6) -#define SVM_INTERCEPT_GDTR_READ (1UL << 7) -#define SVM_INTERCEPT_LDTR_READ (1UL << 8) -#define SVM_INTERCEPT_TR_READ (1UL << 9) -#define SVM_INTERCEPT_IDTR_WRITE (1UL << 10) -#define SVM_INTERCEPT_GDTR_WRITE (1UL << 11) -#define SVM_INTERCEPT_LDTR_WRITE (1UL << 12) -#define SVM_INTERCEPT_TR_WRITE (1UL << 13) -#define SVM_INTERCEPT_RDTSC (1UL << 14) -#define SVM_INTERCEPT_RDPMC (1UL << 15) -#define SVM_INTERCEPT_PUSHF (1UL << 16) -#define SVM_INTERCEPT_POPF (1UL << 17) -#define SVM_INTERCEPT_CPUID (1UL << 18) -#define SVM_INTERCEPT_RSM (1UL << 19) -#define SVM_INTERCEPT_IRET (1UL << 20) -#define SVM_INTERCEPT_INTN (1UL << 21) -#define SVM_INTERCEPT_INVD (1UL << 22) -#define SVM_INTERCEPT_PAUSE (1UL << 23) -#define SVM_INTERCEPT_HLT (1UL << 24) -#define SVM_INTERCEPT_INVLPG (1UL << 25) -#define SVM_INTERCEPT_INVLPGA (1UL << 26) -#define SVM_INTERCEPT_INOUT (1UL << 27) -#define SVM_INTERCEPT_MSR (1UL << 28) -#define SVM_INTERCEPT_TASK_SWITCH (1UL << 29) -#define SVM_INTERCEPT_FERR_FREEZE (1UL << 30) -#define SVM_INTERCEPT_SHUTDOWN (1UL << 31) -#define SVM_INTERCEPT_VMRUN (1UL << 0) -#define SVM_INTERCEPT_VMMCALL (1UL << 1) -#define SVM_INTERCEPT_VMLOAD (1UL << 2) -#define SVM_INTERCEPT_VMSAVE (1UL << 3) -#define SVM_INTERCEPT_STGI (1UL << 4) -#define SVM_INTERCEPT_CLGI (1UL << 5) -#define SVM_INTERCEPT_SKINIT (1UL << 6) -#define SVM_INTERCEPT_RDTSCP (1UL << 7) -#define SVM_INTERCEPT_ICEBP (1UL << 8) -#define SVM_INTERCEPT_WBINVD (1UL << 9) -#define SVM_INTERCEPT_MONITOR (1UL << 10) -#define SVM_INTERCEPT_MWAIT_UNCOND (1UL << 11) -#define SVM_INTERCEPT_MWAIT_COND (1UL << 12) - -/* * PAT */ #define PATENTRY(n, type) ((uint64_t)type << ((n) * 8)) diff --git a/sys/arch/i386/include/vmmvar.h b/sys/arch/i386/include/vmmvar.h index 51c03e53c37..d80938a0001 100644 --- a/sys/arch/i386/include/vmmvar.h +++ b/sys/arch/i386/include/vmmvar.h @@ -14,813 +14,4 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* - * CPU capabilities for VMM operation - */ -#ifndef _MACHINE_VMMVAR_H_ -#define _MACHINE_VMMVAR_H_ - #define VMM_HV_SIGNATURE "OpenBSDVMM58" - -#define VMM_MAX_MEM_RANGES 16 -#define VMM_MAX_DISKS_PER_VM 4 -#define VMM_MAX_PATH_DISK 128 -#define VMM_MAX_PATH_CDROM 128 -#define VMM_MAX_NAME_LEN 32 -#define VMM_MAX_KERNEL_PATH 128 -#define VMM_MAX_VCPUS_PER_VM 64 -#define VMM_MAX_VM_MEM_SIZE 3072 -#define VMM_MAX_NICS_PER_VM 4 - -#define VMM_PCI_MMIO_BAR_BASE 0xF0000000 -#define VMM_PCI_MMIO_BAR_END 0xF0FFFFFF -#define VMM_PCI_MMIO_BAR_SIZE 0x00010000 -#define VMM_PCI_IO_BAR_BASE 0x1000 -#define VMM_PCI_IO_BAR_END 0xFFFF -#define VMM_PCI_IO_BAR_SIZE 0x1000 - -/* VMX: Basic Exit Reasons */ -#define VMX_EXIT_NMI 0 -#define VMX_EXIT_EXTINT 1 -#define VMX_EXIT_TRIPLE_FAULT 2 -#define VMX_EXIT_INIT 3 -#define VMX_EXIT_SIPI 4 -#define VMX_EXIT_IO_SMI 5 -#define VMX_EXIT_OTHER_SMI 6 -#define VMX_EXIT_INT_WINDOW 7 -#define VMX_EXIT_NMI_WINDOW 8 -#define VMX_EXIT_TASK_SWITCH 9 -#define VMX_EXIT_CPUID 10 -#define VMX_EXIT_GETSEC 11 -#define VMX_EXIT_HLT 12 -#define VMX_EXIT_INVD 13 -#define VMX_EXIT_INVLPG 14 -#define VMX_EXIT_RDPMC 15 -#define VMX_EXIT_RDTSC 16 -#define VMX_EXIT_RSM 17 -#define VMX_EXIT_VMCALL 18 -#define VMX_EXIT_VMCLEAR 19 -#define VMX_EXIT_VMLAUNCH 20 -#define VMX_EXIT_VMPTRLD 21 -#define VMX_EXIT_VMPTRST 22 -#define VMX_EXIT_VMREAD 23 -#define VMX_EXIT_VMRESUME 24 -#define VMX_EXIT_VMWRITE 25 -#define VMX_EXIT_VMXOFF 26 -#define VMX_EXIT_VMXON 27 -#define VMX_EXIT_CR_ACCESS 28 -#define VMX_EXIT_MOV_DR 29 -#define VMX_EXIT_IO 30 -#define VMX_EXIT_RDMSR 31 -#define VMX_EXIT_WRMSR 32 -#define VMX_EXIT_ENTRY_FAILED_GUEST_STATE 33 -#define VMX_EXIT_ENTRY_FAILED_MSR_LOAD 34 -#define VMX_EXIT_MWAIT 36 -#define VMX_EXIT_MTF 37 -#define VMX_EXIT_MONITOR 39 -#define VMX_EXIT_PAUSE 40 -#define VMX_EXIT_ENTRY_FAILED_MCE 41 -#define VMX_EXIT_TPR_BELOW_THRESHOLD 43 -#define VMX_EXIT_APIC_ACCESS 44 -#define VMX_EXIT_VIRTUALIZED_EOI 45 -#define VMX_EXIT_GDTR_IDTR 46 -#define VMX_EXIT_LDTR_TR 47 -#define VMX_EXIT_EPT_VIOLATION 48 -#define VMX_EXIT_EPT_MISCONFIGURATION 49 -#define VMX_EXIT_INVEPT 50 -#define VMX_EXIT_RDTSCP 51 -#define VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED 52 -#define VMX_EXIT_INVVPID 53 -#define VMX_EXIT_WBINVD 54 -#define VMX_EXIT_XSETBV 55 -#define VMX_EXIT_APIC_WRITE 56 -#define VMX_EXIT_RDRAND 57 -#define VMX_EXIT_INVPCID 58 -#define VMX_EXIT_VMFUNC 59 -#define VMX_EXIT_RDSEED 61 -#define VMX_EXIT_XSAVES 63 -#define VMX_EXIT_XRSTORS 64 - -/* - * VMX: Misc defines - */ -#define VMX_MAX_CR3_TARGETS 256 - -#define VM_EXIT_TERMINATED 0xFFFE -#define VM_EXIT_NONE 0xFFFF - -/* - * SVM: Intercept codes (exit reasons) - */ -#define SVM_VMEXIT_CR0_READ 0x00 -#define SVM_VMEXIT_CR1_READ 0x01 -#define SVM_VMEXIT_CR2_READ 0x02 -#define SVM_VMEXIT_CR3_READ 0x03 -#define SVM_VMEXIT_CR4_READ 0x04 -#define SVM_VMEXIT_CR5_READ 0x05 -#define SVM_VMEXIT_CR6_READ 0x06 -#define SVM_VMEXIT_CR7_READ 0x07 -#define SVM_VMEXIT_CR8_READ 0x08 -#define SVM_VMEXIT_CR9_READ 0x09 -#define SVM_VMEXIT_CR10_READ 0x0A -#define SVM_VMEXIT_CR11_READ 0x0B -#define SVM_VMEXIT_CR12_READ 0x0C -#define SVM_VMEXIT_CR13_READ 0x0D -#define SVM_VMEXIT_CR14_READ 0x0E -#define SVM_VMEXIT_CR15_READ 0x0F -#define SVM_VMEXIT_CR0_WRITE 0x10 -#define SVM_VMEXIT_CR1_WRITE 0x11 -#define SVM_VMEXIT_CR2_WRITE 0x12 -#define SVM_VMEXIT_CR3_WRITE 0x13 -#define SVM_VMEXIT_CR4_WRITE 0x14 -#define SVM_VMEXIT_CR5_WRITE 0x15 -#define SVM_VMEXIT_CR6_WRITE 0x16 -#define SVM_VMEXIT_CR7_WRITE 0x17 -#define SVM_VMEXIT_CR8_WRITE 0x18 -#define SVM_VMEXIT_CR9_WRITE 0x19 -#define SVM_VMEXIT_CR10_WRITE 0x1A -#define SVM_VMEXIT_CR11_WRITE 0x1B -#define SVM_VMEXIT_CR12_WRITE 0x1C -#define SVM_VMEXIT_CR13_WRITE 0x1D -#define SVM_VMEXIT_CR14_WRITE 0x1E -#define SVM_VMEXIT_CR15_WRITE 0x1F -#define SVM_VMEXIT_DR0_READ 0x20 -#define SVM_VMEXIT_DR1_READ 0x21 -#define SVM_VMEXIT_DR2_READ 0x22 -#define SVM_VMEXIT_DR3_READ 0x23 -#define SVM_VMEXIT_DR4_READ 0x24 -#define SVM_VMEXIT_DR5_READ 0x25 -#define SVM_VMEXIT_DR6_READ 0x26 -#define SVM_VMEXIT_DR7_READ 0x27 -#define SVM_VMEXIT_DR8_READ 0x28 -#define SVM_VMEXIT_DR9_READ 0x29 -#define SVM_VMEXIT_DR10_READ 0x2A -#define SVM_VMEXIT_DR11_READ 0x2B -#define SVM_VMEXIT_DR12_READ 0x2C -#define SVM_VMEXIT_DR13_READ 0x2D -#define SVM_VMEXIT_DR14_READ 0x2E -#define SVM_VMEXIT_DR15_READ 0x2F -#define SVM_VMEXIT_DR0_WRITE 0x30 -#define SVM_VMEXIT_DR1_WRITE 0x31 -#define SVM_VMEXIT_DR2_WRITE 0x32 -#define SVM_VMEXIT_DR3_WRITE 0x33 -#define SVM_VMEXIT_DR4_WRITE 0x34 -#define SVM_VMEXIT_DR5_WRITE 0x35 -#define SVM_VMEXIT_DR6_WRITE 0x36 -#define SVM_VMEXIT_DR7_WRITE 0x37 -#define SVM_VMEXIT_DR8_WRITE 0x38 -#define SVM_VMEXIT_DR9_WRITE 0x39 -#define SVM_VMEXIT_DR10_WRITE 0x3A -#define SVM_VMEXIT_DR11_WRITE 0x3B -#define SVM_VMEXIT_DR12_WRITE 0x3C -#define SVM_VMEXIT_DR13_WRITE 0x3D -#define SVM_VMEXIT_DR14_WRITE 0x3E -#define SVM_VMEXIT_DR15_WRITE 0x3F -#define SVM_VMEXIT_EXCP0 0x40 -#define SVM_VMEXIT_EXCP1 0x41 -#define SVM_VMEXIT_EXCP2 0x42 -#define SVM_VMEXIT_EXCP3 0x43 -#define SVM_VMEXIT_EXCP4 0x44 -#define SVM_VMEXIT_EXCP5 0x45 -#define SVM_VMEXIT_EXCP6 0x46 -#define SVM_VMEXIT_EXCP7 0x47 -#define SVM_VMEXIT_EXCP8 0x48 -#define SVM_VMEXIT_EXCP9 0x49 -#define SVM_VMEXIT_EXCP10 0x4A -#define SVM_VMEXIT_EXCP11 0x4B -#define SVM_VMEXIT_EXCP12 0x4C -#define SVM_VMEXIT_EXCP13 0x4D -#define SVM_VMEXIT_EXCP14 0x4E -#define SVM_VMEXIT_EXCP15 0x4F -#define SVM_VMEXIT_EXCP16 0x50 -#define SVM_VMEXIT_EXCP17 0x51 -#define SVM_VMEXIT_EXCP18 0x52 -#define SVM_VMEXIT_EXCP19 0x53 -#define SVM_VMEXIT_EXCP20 0x54 -#define SVM_VMEXIT_EXCP21 0x55 -#define SVM_VMEXIT_EXCP22 0x56 -#define SVM_VMEXIT_EXCP23 0x57 -#define SVM_VMEXIT_EXCP24 0x58 -#define SVM_VMEXIT_EXCP25 0x59 -#define SVM_VMEXIT_EXCP26 0x5A -#define SVM_VMEXIT_EXCP27 0x5B -#define SVM_VMEXIT_EXCP28 0x5C -#define SVM_VMEXIT_EXCP29 0x5D -#define SVM_VMEXIT_EXCP30 0x5E -#define SVM_VMEXIT_EXCP31 0x5F -#define SVM_VMEXIT_INTR 0x60 -#define SVM_VMEXIT_NMI 0x61 -#define SVM_VMEXIT_SMI 0x62 -#define SVM_VMEXIT_INIT 0x63 -#define SVM_VMEXIT_VINTR 0x64 -#define SVM_VMEXIT_CR0_SEL_WRITE 0x65 -#define SVM_VMEXIT_IDTR_READ 0x66 -#define SVM_VMEXIT_GDTR_READ 0x67 -#define SVM_VMEXIT_LDTR_READ 0x68 -#define SVM_VMEXIT_TR_READ 0x69 -#define SVM_VMEXIT_IDTR_WRITE 0x6A -#define SVM_VMEXIT_GDTR_WRITE 0x6B -#define SVM_VMEXIT_LDTR_WRITE 0x6C -#define SVM_VMEXIT_TR_WRITE 0x6D -#define SVM_VMEXIT_RDTSC 0x6E -#define SVM_VMEXIT_RDPMC 0x6F -#define SVM_VMEXIT_PUSHF 0x70 -#define SVM_VMEXIT_POPF 0x71 -#define SVM_VMEXIT_CPUID 0x72 -#define SVM_VMEXIT_RSM 0x73 -#define SVM_VMEXIT_IRET 0x74 -#define SVM_VMEXIT_SWINT 0x75 -#define SVM_VMEXIT_INVD 0x76 -#define SVM_VMEXIT_PAUSE 0x77 -#define SVM_VMEXIT_HLT 0x78 -#define SVM_VMEXIT_INVLPG 0x79 -#define SVM_VMEXIT_INVLPGA 0x7A -#define SVM_VMEXIT_IOIO 0x7B -#define SVM_VMEXIT_MSR 0x7C -#define SVM_VMEXIT_TASK_SWITCH 0x7D -#define SVM_VMEXIT_FERR_FREEZE 0x7E -#define SVM_VMEXIT_SHUTDOWN 0x7F -#define SVM_VMEXIT_VMRUN 0x80 -#define SVM_VMEXIT_VMMCALL 0x81 -#define SVM_VMEXIT_VMLOAD 0x82 -#define SVM_VMEXIT_VMSAVE 0x83 -#define SVM_VMEXIT_STGI 0x84 -#define SVM_VMEXIT_CLGI 0x85 -#define SVM_VMEXIT_SKINIT 0x86 -#define SVM_VMEXIT_RDTSCP 0x87 -#define SVM_VMEXIT_ICEBP 0x88 -#define SVM_VMEXIT_WBINVD 0x89 -#define SVM_VMEXIT_MONITOR 0x8A -#define SVM_VMEXIT_MWAIT 0x8B -#define SVM_VMEXIT_MWAIT_CONDITIONAL 0x8C -#define SVM_VMEXIT_NPF 0x400 -#define SVM_VMEXIT_INVALID -1 - -/* - * Exception injection vectors (these correspond to the CPU exception types - * defined in the SDM.) - */ -#define VMM_EX_DE 0 /* Divide Error #DE */ -#define VMM_EX_DB 1 /* Debug Exception #DB */ -#define VMM_EX_NMI 2 /* NMI */ -#define VMM_EX_BP 3 /* Breakpoint #BP */ -#define VMM_EX_OF 4 /* Overflow #OF */ -#define VMM_EX_BR 5 /* Bound range exceeded #BR */ -#define VMM_EX_UD 6 /* Undefined opcode #UD */ -#define VMM_EX_NM 7 /* Device not available #NM */ -#define VMM_EX_DF 8 /* Double fault #DF */ -#define VMM_EX_CP 9 /* Coprocessor segment overrun (unused) */ -#define VMM_EX_TS 10 /* Invalid TSS #TS */ -#define VMM_EX_NP 11 /* Segment not present #NP */ -#define VMM_EX_SS 12 /* Stack segment fault #SS */ -#define VMM_EX_GP 13 /* General protection #GP */ -#define VMM_EX_PF 14 /* Page fault #PF */ -#define VMM_EX_MF 16 /* x87 FPU floating point error #MF */ -#define VMM_EX_AC 17 /* Alignment check #AC */ -#define VMM_EX_MC 18 /* Machine check #MC */ -#define VMM_EX_XM 19 /* SIMD floating point exception #XM */ -#define VMM_EX_VE 20 /* Virtualization exception #VE */ - -/* - * VCPU state values. Note that there is a conversion function in vmm.c - * (vcpu_state_decode) that converts these to human readable strings, - * so this enum and vcpu_state_decode should be kept in sync. - */ -enum { - VCPU_STATE_STOPPED, - VCPU_STATE_RUNNING, - VCPU_STATE_REQTERM, - VCPU_STATE_TERMINATED, - VCPU_STATE_UNKNOWN, -}; - -enum { - VEI_DIR_OUT, - VEI_DIR_IN -}; - -/* - * Port definitions not found elsewhere - */ -#define PCKBC_AUX 0x61 -#define ELCR0 0x4D0 -#define ELCR1 0x4D1 - -/* - * vm exit data - * vm_exit_inout : describes an IN/OUT exit - */ -struct vm_exit_inout { - uint8_t vei_size; /* Size of access */ - uint8_t vei_dir; /* Direction */ - uint8_t vei_rep; /* REP prefix? */ - uint8_t vei_string; /* string variety? */ - uint8_t vei_encoding; /* operand encoding */ - uint16_t vei_port; /* port */ - uint32_t vei_data; /* data (for IN insns) */ -}; - -/* - * struct vcpu_segment_info describes a segment + selector set, used - * in constructing the initial vcpu register content - */ -struct vcpu_segment_info { - uint16_t vsi_sel; - uint32_t vsi_limit; - uint32_t vsi_ar; - uint32_t vsi_base; -}; - -#define VCPU_REGS_EAX 0 -#define VCPU_REGS_EBX 1 -#define VCPU_REGS_ECX 2 -#define VCPU_REGS_EDX 3 -#define VCPU_REGS_ESI 4 -#define VCPU_REGS_EDI 5 -#define VCPU_REGS_ESP 6 -#define VCPU_REGS_EBP 7 -#define VCPU_REGS_EIP 8 -#define VCPU_REGS_EFLAGS 9 -#define VCPU_REGS_NGPRS (VCPU_REGS_EFLAGS + 1) - -#define VCPU_REGS_CR0 0 -#define VCPU_REGS_CR2 1 -#define VCPU_REGS_CR3 2 -#define VCPU_REGS_CR4 3 -#define VCPU_REGS_CR8 4 -#define VCPU_REGS_PDPTE0 5 -#define VCPU_REGS_PDPTE1 6 -#define VCPU_REGS_PDPTE2 7 -#define VCPU_REGS_PDPTE3 8 -#define VCPU_REGS_NCRS (VCPU_REGS_PDPTE3 + 1) - -#define VCPU_REGS_CS 0 -#define VCPU_REGS_DS 1 -#define VCPU_REGS_ES 2 -#define VCPU_REGS_FS 3 -#define VCPU_REGS_GS 4 -#define VCPU_REGS_SS 5 -#define VCPU_REGS_LDTR 6 -#define VCPU_REGS_TR 7 -#define VCPU_REGS_NSREGS (VCPU_REGS_TR + 1) - -#define VCPU_REGS_EFER 0 -#define VCPU_REGS_STAR 1 -#define VCPU_REGS_LSTAR 2 -#define VCPU_REGS_CSTAR 3 -#define VCPU_REGS_SFMASK 4 -#define VCPU_REGS_KGSBASE 5 -#define VCPU_REGS_MISC_ENABLE 6 -#define VCPU_REGS_NMSRS (VCPU_REGS_MISC_ENABLE + 1) - -struct vcpu_reg_state { - uint32_t vrs_gprs[VCPU_REGS_NGPRS]; - uint32_t vrs_crs[VCPU_REGS_NCRS]; - uint32_t vrs_msrs[VCPU_REGS_NMSRS]; - struct vcpu_segment_info vrs_sregs[VCPU_REGS_NSREGS]; - struct vcpu_segment_info vrs_gdtr; - struct vcpu_segment_info vrs_idtr; -}; - -struct vm_mem_range { - paddr_t vmr_gpa; - vaddr_t vmr_va; - size_t vmr_size; -}; - -/* - * struct vm_exit - * - * Contains VM exit information communicated to vmd(8). This information is - * gathered by vmm(4) from the CPU on each exit that requires help from vmd. - */ -struct vm_exit { - union { - struct vm_exit_inout vei; /* IN/OUT exit */ - }; - - struct vcpu_reg_state vrs; -}; - -struct vm_create_params { - /* Input parameters to VMM_IOC_CREATE */ - size_t vcp_nmemranges; - size_t vcp_ncpus; - size_t vcp_ndisks; - size_t vcp_nnics; - struct vm_mem_range vcp_memranges[VMM_MAX_MEM_RANGES]; - char vcp_disks[VMM_MAX_DISKS_PER_VM][VMM_MAX_PATH_DISK]; - char vcp_cdrom[VMM_MAX_PATH_CDROM]; - char vcp_name[VMM_MAX_NAME_LEN]; - char vcp_kernel[VMM_MAX_KERNEL_PATH]; - uint8_t vcp_macs[VMM_MAX_NICS_PER_VM][6]; - - /* Output parameter from VMM_IOC_CREATE */ - uint32_t vcp_id; -}; - -struct vm_run_params { - /* Input parameters to VMM_IOC_RUN */ - uint32_t vrp_vm_id; - uint32_t vrp_vcpu_id; - uint8_t vrp_continue; /* Continuing from an exit */ - uint16_t vrp_irq; /* IRQ to inject */ - - /* Input/output parameter to VMM_IOC_RUN */ - struct vm_exit *vrp_exit; /* updated exit data */ - - /* Output parameter from VMM_IOC_RUN */ - uint16_t vrp_exit_reason; /* exit reason */ - uint8_t vrp_irqready; /* ready for IRQ on entry */ -}; - -struct vm_info_result { - /* Output parameters from VMM_IOC_INFO */ - size_t vir_memory_size; - size_t vir_used_size; - size_t vir_ncpus; - uint8_t vir_vcpu_state[VMM_MAX_VCPUS_PER_VM]; - pid_t vir_creator_pid; - uint32_t vir_id; - char vir_name[VMM_MAX_NAME_LEN]; -}; - -struct vm_info_params { - /* Input parameters to VMM_IOC_INFO */ - size_t vip_size; /* Output buffer size */ - - /* Output Parameters from VMM_IOC_INFO */ - size_t vip_info_ct; /* # of entries returned */ - struct vm_info_result *vip_info; /* Output buffer */ -}; - -struct vm_terminate_params { - /* Input parameters to VMM_IOC_TERM */ - uint32_t vtp_vm_id; -}; - -struct vm_resetcpu_params { - /* Input parameters to VMM_IOC_RESETCPU */ - uint32_t vrp_vm_id; - uint32_t vrp_vcpu_id; - struct vcpu_reg_state vrp_init_state; -}; - -struct vm_intr_params { - /* Input parameters to VMM_IOC_INTR */ - uint32_t vip_vm_id; - uint32_t vip_vcpu_id; - uint16_t vip_intr; -}; - -#define VM_RWREGS_GPRS 0x1 /* read/write GPRs */ -#define VM_RWREGS_SREGS 0x2 /* read/write segment registers */ -#define VM_RWREGS_CRS 0x4 /* read/write CRs */ -#define VM_RWREGS_MSRS 0x8 /* read/write MSRs */ -#define VM_RWREGS_ALL (VM_RWREGS_GPRS | VM_RWREGS_SREGS | VM_RWREGS_CRS | \ - VM_RWREGS_MSRS) - -struct vm_rwregs_params { - uint32_t vrwp_vm_id; - uint32_t vrwp_vcpu_id; - uint64_t vrwp_mask; - struct vcpu_reg_state vrwp_regs; -}; - -/* IOCTL definitions */ -#define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */ -#define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */ -#define VMM_IOC_INFO _IOWR('V', 3, struct vm_info_params) /* Get VM Info */ -#define VMM_IOC_TERM _IOW('V', 4, struct vm_terminate_params) /* Terminate VM */ -#define VMM_IOC_RESETCPU _IOW('V', 5, struct vm_resetcpu_params) /* Reset */ -#define VMM_IOC_INTR _IOW('V', 6, struct vm_intr_params) /* Intr pending */ -#define VMM_IOC_READREGS _IOWR('V', 7, struct vm_rwregs_params) /* Get registers */ -#define VMM_IOC_WRITEREGS _IOW('V', 8, struct vm_rwregs_params) /* Set registers */ - -/* CPUID masks */ -/* - * clone host capabilities minus: - * debug store (CPUIDECX_DTES64, CPUIDECX_DSCPL, CPUID_DS) - * monitor/mwait (CPUIDECX_MWAIT) - * vmx (CPUIDECX_VMX) - * smx (CPUIDECX_SMX) - * speedstep (CPUIDECX_EST) - * thermal (CPUIDECX_TM2, CPUID_ACPI, CPUID_TM) - * context id (CPUIDECX_CNXTID) - * silicon debug (CPUIDECX_SDBG) - * xTPR (CPUIDECX_XTPR) - * perf/debug (CPUIDECX_PDCM) - * pcid (CPUIDECX_PCID) - * direct cache access (CPUIDECX_DCA) - * x2APIC (CPUIDECX_X2APIC) - * apic deadline (CPUIDECX_DEADLINE) - * timestamp (CPUID_TSC) - * apic (CPUID_APIC) - * psn (CPUID_PSN) - * self snoop (CPUID_SS) - * hyperthreading (CPUID_HTT) - * pending break enabled (CPUID_PBE) - * MTRR (CPUID_MTRR) - * PAT (CPUID_PAT) - */ -#define VMM_CPUIDECX_MASK ~(CPUIDECX_EST | CPUIDECX_TM2 | \ - CPUIDECX_MWAIT | CPUIDECX_PDCM | CPUIDECX_VMX | CPUIDECX_DTES64 | \ - CPUIDECX_DSCPL | CPUIDECX_SMX | CPUIDECX_CNXTID | CPUIDECX_SDBG | \ - CPUIDECX_XTPR | CPUIDECX_PCID | CPUIDECX_DCA | CPUIDECX_X2APIC | \ - CPUIDECX_DEADLINE) -#define VMM_CPUIDEDX_MASK ~(CPUID_ACPI | CPUID_TM | CPUID_TSC | \ - CPUID_HTT | CPUID_DS | CPUID_APIC | CPUID_PSN | CPUID_SS | CPUID_PBE | \ - CPUID_MTRR | CPUID_PAT) - - -/* - * SEFF flags - copy from host minus: - * SGX (SEFF0EBX_SGX) - * HLE (SEFF0EBX_HLE) - * INVPCID (SEFF0EBX_INVPCID) - * RTM (SEFF0EBX_RTM) - * PQM (SEFF0EBX_PQM) - * MPX (SEFF0EBX_MPX) - * PCOMMIT (SEFF0EBX_PCOMMIT) - * PT (SEFF0EBX_PT) - */ -#define VMM_SEFF0EBX_MASK ~(SEFF0EBX_SGX | SEFF0EBX_HLE | SEFF0EBX_INVPCID | \ - SEFF0EBX_RTM | SEFF0EBX_PQM | SEFF0EBX_MPX | \ - SEFF0EBX_PCOMMIT | SEFF0EBX_PT) -#define VMM_SEFF0ECX_MASK 0xFFFFFFFF - -/* - * CPUID[0x4] deterministic cache info - */ -#define VMM_CPUID4_CACHE_TOPOLOGY_MASK 0x3FF - -#ifdef _KERNEL - -#define VMX_FAIL_LAUNCH_UNKNOWN 1 -#define VMX_FAIL_LAUNCH_INVALID_VMCS 2 -#define VMX_FAIL_LAUNCH_VALID_VMCS 3 - -#define VMX_NUM_MSR_STORE 1 - -/* MSR bitmap manipulation macros */ -#define VMX_MSRIDX(m) ((m) / 8) -#define VMX_MSRBIT(m) (1 << (m) % 8) - -#define SVM_MSRIDX(m) ((m) / 4) -#define SVM_MSRBIT_R(m) (1 << (((m) % 4) * 2)) -#define SVM_MSRBIT_W(m) (1 << (((m) % 4) * 2 + 1)) - -enum { - VMM_MODE_UNKNOWN, - VMM_MODE_VMX, - VMM_MODE_EPT, - VMM_MODE_SVM, - VMM_MODE_RVI -}; - -enum { - VMM_MEM_TYPE_REGULAR, - VMM_MEM_TYPE_UNKNOWN -}; - -/* Forward declarations */ -struct vm; - -/* - * Implementation-specific cpu state - */ -struct vmcb_segment { - uint16_t vs_sel; /* 000h */ - uint16_t vs_attr; /* 002h */ - uint32_t vs_lim; /* 004h */ - uint64_t vs_base; /* 008h */ -}; - -struct vmcb { - union { - struct { - uint32_t v_cr_rw; /* 000h */ - uint32_t v_dr_rw; /* 004h */ - uint32_t v_excp; /* 008h */ - uint32_t v_intercept1; /* 00Ch */ - uint32_t v_intercept2; /* 010h */ - uint8_t v_pad1[0x28]; /* 014h-03Bh */ - uint16_t v_pause_thr; /* 03Ch */ - uint16_t v_pause_ct; /* 03Eh */ - uint64_t v_iopm_pa; /* 040h */ - uint64_t v_msrpm_pa; /* 048h */ - uint64_t v_tsc_offset; /* 050h */ - uint32_t v_asid; /* 058h */ - uint8_t v_tlb_control; /* 05Ch */ - uint8_t v_pad2[0x3]; /* 05Dh-05Fh */ - uint8_t v_tpr; /* 060h */ - uint8_t v_irq; /* 061h */ - uint8_t v_misc1; /* 062h */ - uint8_t v_misc2; /* 063h */ - uint8_t v_misc3; /* 064h */ - uint8_t v_pad3[0x3]; /* 065h-067h */ - uint64_t v_intr_shadow; /* 068h */ - uint64_t v_exitcode; /* 070h */ - uint64_t v_exitinfo1; /* 078h */ - uint64_t v_exitinfo2; /* 080h */ - uint64_t v_exitintinfo; /* 088h */ - uint64_t v_np_enable; /* 090h */ - uint64_t v_avic_apic_bar; /* 098h */ - uint64_t v_pad4; /* 0A0h */ - uint64_t v_eventinj; /* 0A8h */ - uint64_t v_n_cr3; /* 0B0h */ - uint64_t v_lbr_virt_enable; /* 0B8h */ - uint64_t v_vmcb_clean_bits; /* 0C0h */ - uint64_t v_nrip; /* 0C8h */ - uint8_t v_n_bytes_fetched; /* 0D0h */ - uint8_t v_guest_ins_bytes[0xf]; /* 0D1h-0DFh */ - uint64_t v_avic_apic_back_page; /* 0E0h */ - uint64_t v_pad5; /* 0E8h-0EFh */ - uint64_t v_avic_logical_table; /* 0F0h */ - uint64_t v_avic_phys; /* 0F8h */ - - }; - uint8_t vmcb_control[0x400]; - }; - - union { - struct { - /* Offsets here are relative to start of VMCB SSA */ - struct vmcb_segment v_es; /* 000h */ - struct vmcb_segment v_cs; /* 010h */ - struct vmcb_segment v_ss; /* 020h */ - struct vmcb_segment v_ds; /* 030h */ - struct vmcb_segment v_fs; /* 040h */ - struct vmcb_segment v_gs; /* 050h */ - struct vmcb_segment v_gdtr; /* 060h */ - struct vmcb_segment v_ldtr; /* 070h */ - struct vmcb_segment v_idtr; /* 080h */ - struct vmcb_segment v_tr; /* 090h */ - uint8_t v_pad6[0x2B]; /* 0A0h-0CAh */ - uint8_t v_cpl; /* 0CBh */ - uint32_t v_pad7; /* 0CCh-0CFh */ - uint64_t v_efer; /* 0D0h */ - uint8_t v_pad8[0x70]; /* 0D8h-147h */ - uint64_t v_cr4; /* 148h */ - uint64_t v_cr3; /* 150h */ - uint64_t v_cr0; /* 158h */ - uint64_t v_dr7; /* 160h */ - uint64_t v_dr6; /* 168h */ - uint64_t v_rflags; /* 170h */ - uint64_t v_rip; /* 178h */ - uint64_t v_pad9[0xB]; /* 180h-1D7h */ - uint64_t v_rsp; /* 1D8h */ - uint64_t v_pad10[0x3]; /* 1E0h-1F7h */ - uint64_t v_rax; /* 1F8h */ - uint64_t v_star; /* 200h */ - uint64_t v_lstar; /* 208h */ - uint64_t v_cstar; /* 210h */ - uint64_t v_sfmask; /* 218h */ - uint64_t v_kgsbase; /* 220h */ - uint64_t v_sysenter_cs; /* 228h */ - uint64_t v_sysenter_esp; /* 230h */ - uint64_t v_sysenter_eip; /* 238h */ - uint64_t v_cr2; /* 240h */ - uint64_t v_pad11[0x4]; /* 248h-267h */ - uint64_t v_g_pat; /* 268h */ - uint64_t v_dbgctl; /* 270h */ - uint64_t v_br_from; /* 278h */ - uint64_t v_br_to; /* 280h */ - uint64_t v_lastexcpfrom; /* 288h */ - uint64_t v_lastexcpto; /* 290h */ - }; - - uint8_t vmcb_layout[PAGE_SIZE - 0x400]; - }; -}; - -struct vmcs { - uint32_t vmcs_revision; -}; - -struct vmx_invvpid_descriptor -{ - uint64_t vid_vpid; - uint64_t vid_addr; -}; - -struct vmx_invept_descriptor -{ - uint64_t vid_eptp; - uint64_t vid_reserved; -}; - -struct vmx_msr_store -{ - uint64_t vms_index; - uint64_t vms_data; -}; - -/* - * Storage for guest registers not preserved in VMCS and various exit - * information. - * - * Note that vmx_enter_guest depends on the layout of this struct for - * field access. - */ -struct vmx_gueststate -{ - /* %esi should be first */ - uint32_t vg_esi; /* 0x00 */ - uint32_t vg_eax; /* 0x04 */ - uint32_t vg_ebx; /* 0x08 */ - uint32_t vg_ecx; /* 0x0c */ - uint32_t vg_edx; /* 0x10 */ - uint32_t vg_edi; /* 0x14 */ - uint32_t vg_ebp; /* 0x18 */ - uint32_t vg_cr2; /* 0x1c */ - uint32_t vg_eip; /* 0x20 */ - uint32_t vg_exit_reason; /* 0x24 */ - uint32_t vg_eflags; /* 0x28 */ -}; - -/* - * Virtual Machine - */ -struct vm; - -/* - * Virtual CPU - */ -struct vcpu { - /* VMCS / VMCB pointer */ - vaddr_t vc_control_va; - uint64_t vc_control_pa; - - /* VLAPIC pointer */ - vaddr_t vc_vlapic_va; - uint64_t vc_vlapic_pa; - - /* MSR bitmap address */ - vaddr_t vc_msr_bitmap_va; - uint64_t vc_msr_bitmap_pa; - - struct vm *vc_parent; - uint32_t vc_id; - uint16_t vc_vpid; - u_int vc_state; - SLIST_ENTRY(vcpu) vc_vcpu_link; - - uint8_t vc_virt_mode; - - struct cpu_info *vc_last_pcpu; - struct vm_exit vc_exit; - - uint16_t vc_intr; - uint8_t vc_irqready; - - uint8_t vc_event; - - /* VMX only */ - uint64_t vc_vmx_basic; - uint64_t vc_vmx_entry_ctls; - uint64_t vc_vmx_true_entry_ctls; - uint64_t vc_vmx_exit_ctls; - uint64_t vc_vmx_true_exit_ctls; - uint64_t vc_vmx_pinbased_ctls; - uint64_t vc_vmx_true_pinbased_ctls; - uint64_t vc_vmx_procbased_ctls; - uint64_t vc_vmx_true_procbased_ctls; - uint64_t vc_vmx_procbased2_ctls; - struct vmx_gueststate vc_gueststate; - vaddr_t vc_vmx_msr_exit_save_va; - paddr_t vc_vmx_msr_exit_save_pa; - vaddr_t vc_vmx_msr_exit_load_va; - paddr_t vc_vmx_msr_exit_load_pa; - vaddr_t vc_vmx_msr_entry_load_va; - paddr_t vc_vmx_msr_entry_load_pa; - uint8_t vc_vmx_vpid_enabled; - - /* SVM only */ - vaddr_t vc_svm_hsa_va; - paddr_t vc_svm_hsa_pa; - vaddr_t vc_svm_ioio_va; - paddr_t vc_svm_ioio_pa; -}; - -SLIST_HEAD(vcpu_head, vcpu); - -void vmm_dispatch_intr(vaddr_t); -int vmxon(uint64_t *); -int vmxoff(void); -int vmclear(uint64_t *); -int vmptrld(uint64_t *); -int vmptrst(uint64_t *); -int vmwrite(uint32_t, uint32_t); -int vmread(uint32_t, uint32_t *); -void invvpid(uint32_t, struct vmx_invvpid_descriptor *); -void invept(uint32_t, struct vmx_invept_descriptor *); -int vmx_enter_guest(uint64_t *, struct vmx_gueststate *, int, vaddr_t); -void start_vmm_on_cpu(struct cpu_info *); -void stop_vmm_on_cpu(struct cpu_info *); - -typedef u_int64_t pd_entry_t; - -#endif /* _KERNEL */ - -#endif /* ! _MACHINE_VMMVAR_H_ */ |