summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/arch/amd64/amd64/vmm.c517
-rw-r--r--sys/arch/amd64/include/vmmvar.h66
2 files changed, 344 insertions, 239 deletions
diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c
index b230bffa410..f790f5a73f2 100644
--- a/sys/arch/amd64/amd64/vmm.c
+++ b/sys/arch/amd64/amd64/vmm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmm.c,v 1.315 2022/06/27 15:12:14 dv Exp $ */
+/* $OpenBSD: vmm.c,v 1.316 2022/06/30 13:17:58 dv Exp $ */
/*
* Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
*
@@ -71,56 +71,79 @@ void *l1tf_flush_region;
#define VMX_EXIT_INFO_COMPLETE \
(VMX_EXIT_INFO_HAVE_RIP | VMX_EXIT_INFO_HAVE_REASON)
+/*
+ * Virtual Machine
+ *
+ * Methods used to protect vm struct members:
+ * a atomic operations
+ * I immutable after create
+ * K kernel lock
+ * r reference count
+ * v vcpu list rwlock (vm_vcpu_list)
+ * V vmm_softc's vm_lock
+ */
struct vm {
- struct vmspace *vm_vmspace;
- vm_map_t vm_map;
- uint32_t vm_id;
- pid_t vm_creator_pid;
- size_t vm_nmemranges;
- size_t vm_memory_size;
+ struct vmspace *vm_vmspace; /* [K] */
+ vm_map_t vm_map; /* [K] */
+ uint32_t vm_id; /* [I] */
+ pid_t vm_creator_pid; /* [I] */
+ size_t vm_nmemranges; /* [I] */
+ size_t vm_memory_size; /* [I] */
char vm_name[VMM_MAX_NAME_LEN];
struct vm_mem_range vm_memranges[VMM_MAX_MEM_RANGES];
+ struct refcnt vm_refcnt; /* [a] */
- struct vcpu_head vm_vcpu_list;
- uint32_t vm_vcpu_ct;
- u_int vm_vcpus_running;
+ struct vcpu_head vm_vcpu_list; /* [v] */
+ uint32_t vm_vcpu_ct; /* [v] */
+ u_int vm_vcpus_running; /* [a] */
struct rwlock vm_vcpu_lock;
- SLIST_ENTRY(vm) vm_link;
+ SLIST_ENTRY(vm) vm_link; /* [V] */
};
SLIST_HEAD(vmlist_head, vm);
+/*
+ * Virtual Machine Monitor
+ *
+ * Methods used to protect struct members in the global vmm device:
+ * a atomic opererations
+ * I immutable operations
+ * K kernel lock
+ * p virtual process id (vpid/asid) rwlock
+ * r reference count
+ * v vm list rwlock (vm_lock)
+ */
struct vmm_softc {
- struct device sc_dev;
+ struct device sc_dev; /* [r] */
/* Suspend/Resume Synchronization */
struct refcnt sc_refcnt;
- volatile unsigned int sc_status;
+ volatile unsigned int sc_status; /* [a] */
#define VMM_SUSPENDED (unsigned int) 0
#define VMM_ACTIVE (unsigned int) 1
/* Capabilities */
- uint32_t nr_vmx_cpus;
- uint32_t nr_svm_cpus;
- uint32_t nr_rvi_cpus;
- uint32_t nr_ept_cpus;
+ uint32_t nr_vmx_cpus; /* [I] */
+ uint32_t nr_svm_cpus; /* [I] */
+ uint32_t nr_rvi_cpus; /* [I] */
+ uint32_t nr_ept_cpus; /* [I] */
/* Managed VMs */
- struct vmlist_head vm_list;
+ struct vmlist_head vm_list; /* [v] */
- int mode;
+ int mode; /* [I] */
- size_t vcpu_ct;
- size_t vcpu_max;
+ size_t vcpu_ct; /* [v] */
+ size_t vcpu_max; /* [I] */
struct rwlock vm_lock;
- size_t vm_ct; /* number of in-memory VMs */
- size_t vm_idx; /* next unique VM index */
+ size_t vm_ct; /* [v] no. of in-memory VMs */
+ size_t vm_idx; /* [a] next unique VM index */
struct rwlock vpid_lock;
- uint16_t max_vpid;
- uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */
+ uint16_t max_vpid; /* [I] */
+ uint8_t vpids[512]; /* [p] bitmap of VPID/ASIDs */
};
void vmx_dump_vmcs_field(uint16_t, const char *);
@@ -168,7 +191,7 @@ int vm_impl_init_svm(struct vm *, struct proc *);
void vm_impl_deinit(struct vm *);
void vm_impl_deinit_vmx(struct vm *);
void vm_impl_deinit_svm(struct vm *);
-void vm_teardown(struct vm *);
+void vm_teardown(struct vm **);
int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int);
int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
int vmx_get_exit_info(uint64_t *, uint64_t *);
@@ -391,6 +414,7 @@ vmm_attach(struct device *parent, struct device *self, void *aux)
sc->nr_rvi_cpus = 0;
sc->nr_ept_cpus = 0;
sc->vcpu_ct = 0;
+ sc->vcpu_max = VMM_MAX_VCPUS;
sc->vm_ct = 0;
sc->vm_idx = 0;
@@ -761,12 +785,16 @@ vm_find_vcpu(struct vm *vm, uint32_t id)
if (vm == NULL)
return NULL;
+
rw_enter_read(&vm->vm_vcpu_lock);
SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
+ refcnt_take(&vcpu->vc_refcnt);
if (vcpu->vc_id == id)
break;
+ refcnt_rele_wake(&vcpu->vc_refcnt);
}
rw_exit_read(&vm->vm_vcpu_lock);
+
return vcpu;
}
@@ -790,12 +818,10 @@ vm_resetcpu(struct vm_resetcpu_params *vrp)
{
struct vm *vm;
struct vcpu *vcpu;
- int error;
+ int error, ret = 0;
/* Find the desired VM */
- rw_enter_read(&vmm_softc->vm_lock);
error = vm_find(vrp->vrp_vm_id, &vm);
- rw_exit_read(&vmm_softc->vm_lock);
/* Not found? exit. */
if (error != 0) {
@@ -809,35 +835,31 @@ vm_resetcpu(struct vm_resetcpu_params *vrp)
if (vcpu == NULL) {
DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
vrp->vrp_vcpu_id, vrp->vrp_vm_id);
- return (ENOENT);
+ ret = ENOENT;
+ goto out;
}
rw_enter_write(&vcpu->vc_lock);
- if (vcpu->vc_state != VCPU_STATE_STOPPED) {
- DPRINTF("%s: reset of vcpu %u on vm %u attempted "
- "while vcpu was in state %u (%s)\n", __func__,
- vrp->vrp_vcpu_id, vrp->vrp_vm_id, vcpu->vc_state,
- vcpu_state_decode(vcpu->vc_state));
-
- rw_exit_write(&vcpu->vc_lock);
- return (EBUSY);
- }
-
- DPRINTF("%s: resetting vm %d vcpu %d to power on defaults\n", __func__,
- vm->vm_id, vcpu->vc_id);
-
- if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) {
- printf("%s: failed\n", __func__);
+ if (vcpu->vc_state != VCPU_STATE_STOPPED ||
+ refcnt_shared(&vcpu->vc_refcnt)) {
+ ret = EBUSY;
+ } else {
+ if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) {
+ printf("%s: failed\n", __func__);
#ifdef VMM_DEBUG
- dump_vcpu(vcpu);
+ dump_vcpu(vcpu);
#endif /* VMM_DEBUG */
- rw_exit_write(&vcpu->vc_lock);
- return (EIO);
+ ret = EIO;
+ }
}
-
rw_exit_write(&vcpu->vc_lock);
- return (0);
+out:
+ if (vcpu != NULL)
+ refcnt_rele_wake(&vcpu->vc_refcnt);
+ refcnt_rele_wake(&vm->vm_refcnt);
+
+ return (ret);
}
/*
@@ -858,29 +880,30 @@ vm_intr_pending(struct vm_intr_params *vip)
{
struct vm *vm;
struct vcpu *vcpu;
- int error;
+ int error, ret = 0;
/* Find the desired VM */
- rw_enter_read(&vmm_softc->vm_lock);
error = vm_find(vip->vip_vm_id, &vm);
/* Not found? exit. */
- if (error != 0) {
- rw_exit_read(&vmm_softc->vm_lock);
+ if (error != 0)
return (error);
- }
vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
- rw_exit_read(&vmm_softc->vm_lock);
- if (vcpu == NULL)
- return (ENOENT);
+ if (vcpu == NULL) {
+ ret = ENOENT;
+ goto out;
+ }
rw_enter_write(&vcpu->vc_lock);
vcpu->vc_intr = vip->vip_intr;
rw_exit_write(&vcpu->vc_lock);
- return (0);
+ refcnt_rele_wake(&vcpu->vc_refcnt);
+out:
+ refcnt_rele_wake(&vm->vm_refcnt);
+ return (ret);
}
/*
@@ -902,23 +925,21 @@ int
vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) {
struct vm *vm;
struct vcpu *vcpu;
- int error;
+ int error, ret = 0;
/* Find the desired VM */
- rw_enter_read(&vmm_softc->vm_lock);
error = vm_find(vpp->vpp_vm_id, &vm);
/* Not found? exit. */
- if (error != 0) {
- rw_exit_read(&vmm_softc->vm_lock);
+ if (error != 0)
return (error);
- }
vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
- rw_exit_read(&vmm_softc->vm_lock);
- if (vcpu == NULL)
- return (ENOENT);
+ if (vcpu == NULL) {
+ ret = ENOENT;
+ goto out;
+ }
if (dir == 0) {
if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
@@ -926,16 +947,17 @@ vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) {
if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA)
vpp->vpp_pvclock_system_gpa = \
vcpu->vc_pvclock_system_gpa;
- return (0);
- }
-
- if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
- vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
- if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) {
- vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
+ } else {
+ if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION)
+ vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
+ if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) {
+ vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
+ }
}
- return (0);
-
+ refcnt_rele_wake(&vcpu->vc_refcnt);
+out:
+ refcnt_rele_wake(&vm->vm_refcnt);
+ return (ret);
}
/*
@@ -961,23 +983,21 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
struct vm *vm;
struct vcpu *vcpu;
struct vcpu_reg_state *vrs = &vrwp->vrwp_regs;
- int error, ret;
+ int error, ret = 0;
/* Find the desired VM */
- rw_enter_read(&vmm_softc->vm_lock);
error = vm_find(vrwp->vrwp_vm_id, &vm);
/* Not found? exit. */
- if (error != 0) {
- rw_exit_read(&vmm_softc->vm_lock);
+ if (error != 0)
return (error);
- }
vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
- rw_exit_read(&vmm_softc->vm_lock);
- if (vcpu == NULL)
- return (ENOENT);
+ if (vcpu == NULL) {
+ ret = ENOENT;
+ goto out;
+ }
rw_enter_write(&vcpu->vc_lock);
if (vmm_softc->mode == VMM_MODE_VMX ||
@@ -995,7 +1015,9 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
ret = EINVAL;
}
rw_exit_write(&vcpu->vc_lock);
-
+ refcnt_rele_wake(&vcpu->vc_refcnt);
+out:
+ refcnt_rele_wake(&vm->vm_refcnt);
return (ret);
}
@@ -1025,7 +1047,7 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
size_t size;
vm_prot_t prot;
uint64_t msr;
- int ret, memtype;
+ int ret = 0, memtype;
/* If not EPT or RVI, nothing to do here */
if (!(vmm_softc->mode == VMM_MODE_EPT
@@ -1033,9 +1055,7 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
return (0);
/* Find the desired VM */
- rw_enter_read(&vmm_softc->vm_lock);
ret = vm_find(vmep->vmep_vm_id, &vm);
- rw_exit_read(&vmm_softc->vm_lock);
/* Not found? exit. */
if (ret != 0) {
@@ -1049,16 +1069,19 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
if (vcpu == NULL) {
DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
vmep->vmep_vcpu_id, vmep->vmep_vm_id);
- return (ENOENT);
+ ret = ENOENT;
+ goto out_nolock;
}
+ rw_enter_write(&vcpu->vc_lock);
+
if (vcpu->vc_state != VCPU_STATE_STOPPED) {
DPRINTF("%s: mprotect_ept %u on vm %u attempted "
"while vcpu was in state %u (%s)\n", __func__,
vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
vcpu_state_decode(vcpu->vc_state));
-
- return (EBUSY);
+ ret = EBUSY;
+ goto out;
}
/* Only proceed if the pmap is in the correct mode */
@@ -1075,19 +1098,22 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
if ((prot & PROT_MASK) != prot &&
(prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) {
DPRINTF("%s: W+X permission requested\n", __func__);
- return (EINVAL);
+ ret = EINVAL;
+ goto out;
}
/* No Write only permissions */
if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
DPRINTF("%s: No Write only permissions\n", __func__);
- return (EINVAL);
+ ret = EINVAL;
+ goto out;
}
/* No empty permissions */
if (prot == 0) {
DPRINTF("%s: No empty permissions\n", __func__);
- return (EINVAL);
+ ret = EINVAL;
+ goto out;
}
/* No execute only on EPT CPUs that don't have that capability */
@@ -1103,36 +1129,48 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
}
/* Must be page aligned */
- if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0)
- return (EINVAL);
+ if ((sgpa & PAGE_MASK) || (size & PAGE_MASK) || size == 0) {
+ ret = EINVAL;
+ goto out;
+ }
/* size must be less then 512GB */
- if (size >= NBPD_L4)
- return (EINVAL);
+ if (size >= NBPD_L4) {
+ ret = EINVAL;
+ goto out;
+ }
/* no wraparound */
- if (sgpa + size < sgpa)
- return (EINVAL);
+ if (sgpa + size < sgpa) {
+ ret = EINVAL;
+ goto out;
+ }
/*
* Specifying addresses within the PCI MMIO space is forbidden.
* Disallow addresses that start inside the MMIO space:
* [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
*/
- if (sgpa >= VMM_PCI_MMIO_BAR_BASE && sgpa <= VMM_PCI_MMIO_BAR_END)
- return (EINVAL);
+ if (sgpa >= VMM_PCI_MMIO_BAR_BASE && sgpa <= VMM_PCI_MMIO_BAR_END) {
+ ret = EINVAL;
+ goto out;
+ }
/*
* ... and disallow addresses that end inside the MMIO space:
* (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
*/
if (sgpa + size > VMM_PCI_MMIO_BAR_BASE &&
- sgpa + size <= VMM_PCI_MMIO_BAR_END)
- return (EINVAL);
+ sgpa + size <= VMM_PCI_MMIO_BAR_END) {
+ ret = EINVAL;
+ goto out;
+ }
memtype = vmm_get_guest_memtype(vm, sgpa);
- if (memtype == VMM_MEM_TYPE_UNKNOWN)
- return (EINVAL);
+ if (memtype == VMM_MEM_TYPE_UNKNOWN) {
+ ret = EINVAL;
+ goto out;
+ }
if (vmm_softc->mode == VMM_MODE_EPT)
ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
@@ -1141,8 +1179,14 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
/* XXX requires a invlpga */
ret = 0;
} else
- return (EINVAL);
-
+ ret = EINVAL;
+out:
+ if (vcpu != NULL) {
+ rw_exit_write(&vcpu->vc_lock);
+ refcnt_rele_wake(&vcpu->vc_refcnt);
+ }
+out_nolock:
+ refcnt_rele_wake(&vm->vm_refcnt);
return (ret);
}
@@ -1302,9 +1346,13 @@ vm_find(uint32_t id, struct vm **res)
{
struct proc *p = curproc;
struct vm *vm;
+ int ret = ENOENT;
*res = NULL;
+
+ rw_enter_read(&vmm_softc->vm_lock);
SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
+ refcnt_take(&vm->vm_refcnt);
if (vm->vm_id == id) {
/*
* In the pledged VM process, only allow to find
@@ -1315,13 +1363,20 @@ vm_find(uint32_t id, struct vm **res)
if (((p->p_p->ps_pledge &
(PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
(vm->vm_creator_pid != p->p_p->ps_pid))
- return (pledge_fail(p, EPERM, PLEDGE_VMM));
- *res = vm;
- return (0);
+ ret = EPERM;
+ else {
+ *res = vm;
+ ret = 0;
+ }
+ break;
}
+ refcnt_rele_wake(&vm->vm_refcnt);
}
+ rw_exit_read(&vmm_softc->vm_lock);
- return (ENOENT);
+ if (ret == EPERM)
+ return (pledge_fail(p, EPERM, PLEDGE_VMM));
+ return (ret);
}
/*
@@ -1692,16 +1747,20 @@ vm_create(struct vm_create_params *vcp, struct proc *p)
if (vcp->vcp_ncpus != 1)
return (EINVAL);
- rw_enter_write(&vmm_softc->vm_lock);
- if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > VMM_MAX_VCPUS) {
+ /* Bail early if we're already at vcpu capacity. */
+ rw_enter_read(&vmm_softc->vm_lock);
+ if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > vmm_softc->vcpu_max) {
DPRINTF("%s: maximum vcpus (%lu) reached\n", __func__,
vmm_softc->vcpu_max);
- rw_exit_write(&vmm_softc->vm_lock);
+ rw_exit_read(&vmm_softc->vm_lock);
return (ENOMEM);
}
- vmm_softc->vcpu_ct += vcp->vcp_ncpus;
+ rw_exit_read(&vmm_softc->vm_lock);
+ /* Instantiate and configure the new vm. */
vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
+ refcnt_init(&vm->vm_refcnt); /* Do not release yet. */
+
SLIST_INIT(&vm->vm_vcpu_list);
rw_init(&vm->vm_vcpu_lock, "vcpu_list");
@@ -1714,43 +1773,57 @@ vm_create(struct vm_create_params *vcp, struct proc *p)
if (vm_impl_init(vm, p)) {
printf("failed to init arch-specific features for vm %p\n", vm);
- vm_teardown(vm);
- rw_exit_write(&vmm_softc->vm_lock);
+ vm_teardown(&vm);
return (ENOMEM);
}
- vmm_softc->vm_ct++;
- vmm_softc->vm_idx++;
-
- vm->vm_id = vmm_softc->vm_idx;
vm->vm_vcpu_ct = 0;
vm->vm_vcpus_running = 0;
/* Initialize each VCPU defined in 'vcp' */
for (i = 0; i < vcp->vcp_ncpus; i++) {
vcpu = pool_get(&vcpu_pool, PR_WAITOK | PR_ZERO);
+ refcnt_init(&vcpu->vc_refcnt);
+ refcnt_rele(&vcpu->vc_refcnt);
+
vcpu->vc_parent = vm;
if ((ret = vcpu_init(vcpu)) != 0) {
printf("failed to init vcpu %d for vm %p\n", i, vm);
- vm_teardown(vm);
- vmm_softc->vm_idx--;
- rw_exit_write(&vmm_softc->vm_lock);
+ vm_teardown(&vm);
return (ret);
}
- rw_enter_write(&vm->vm_vcpu_lock);
vcpu->vc_id = vm->vm_vcpu_ct;
vm->vm_vcpu_ct++;
SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link);
- rw_exit_write(&vm->vm_vcpu_lock);
}
/* XXX init various other hardware parts (vlapic, vioapic, etc) */
- SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link);
- rw_exit_write(&vmm_softc->vm_lock);
+ /* Attempt to register the vm now that it's configured. */
+ rw_enter_write(&vmm_softc->vm_lock);
+
+ if (vmm_softc->vcpu_ct + vm->vm_vcpu_ct > vmm_softc->vcpu_max) {
+ /* Someone already took our capacity. */
+ printf("%s: maximum vcpus (%lu) reached\n", __func__,
+ vmm_softc->vcpu_max);
+ rw_exit_write(&vmm_softc->vm_lock);
+ vm_teardown(&vm);
+ return (ENOMEM);
+ }
+ /* Update the global index and identify the vm. */
+ vmm_softc->vm_idx++;
+ vm->vm_id = vmm_softc->vm_idx;
vcp->vcp_id = vm->vm_id;
+ /* Publish the vm into the list and update list count. */
+ SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link);
+ vmm_softc->vm_ct++;
+ vmm_softc->vcpu_ct += vm->vm_vcpu_ct;
+
+ refcnt_rele(&vm->vm_refcnt); /* No need for wake. */
+ rw_exit_write(&vmm_softc->vm_lock);
+
return (0);
}
@@ -3967,43 +4040,51 @@ vcpu_deinit(struct vcpu *vcpu)
*
* Tears down (destroys) the vm indicated by 'vm'.
*
+ * Assumes the vm is already removed from the global vm list (or was never
+ * added).
+ *
* Parameters:
* vm: vm to be torn down
*/
void
-vm_teardown(struct vm *vm)
+vm_teardown(struct vm **target)
{
+ size_t nvcpu = 0;
struct vcpu *vcpu, *tmp;
+ struct vm *vm = *target;
+ struct vmspace *vm_vmspace;
- rw_assert_wrlock(&vmm_softc->vm_lock);
- KERNEL_LOCK();
+ KERNEL_ASSERT_UNLOCKED();
+
+ refcnt_finalize(&vm->vm_refcnt, "vmteardown");
/* Free VCPUs */
rw_enter_write(&vm->vm_vcpu_lock);
SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp) {
+ refcnt_take(&vcpu->vc_refcnt);
+ refcnt_finalize(&vcpu->vc_refcnt, "vcputeardown");
+
SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link);
vcpu_deinit(vcpu);
+
pool_put(&vcpu_pool, vcpu);
- vmm_softc->vcpu_ct--;
+ nvcpu++;
}
+ rw_exit_write(&vm->vm_vcpu_lock);
vm_impl_deinit(vm);
/* teardown guest vmspace */
- if (vm->vm_vmspace != NULL) {
- uvmspace_free(vm->vm_vmspace);
+ KERNEL_LOCK();
+ vm_vmspace = vm->vm_vmspace;
+ if (vm_vmspace != NULL) {
vm->vm_vmspace = NULL;
+ uvmspace_free(vm_vmspace);
}
+ KERNEL_UNLOCK();
- if (vm->vm_id > 0) {
- vmm_softc->vm_ct--;
- if (vmm_softc->vm_ct < 1)
- vmm_stop();
- }
pool_put(&vm_pool, vm);
-
- KERNEL_UNLOCK();
- rw_exit_write(&vm->vm_vcpu_lock);
+ *target = NULL;
}
/*
@@ -4280,28 +4361,32 @@ vm_get_info(struct vm_info_params *vip)
struct vm_info_result *out;
struct vm *vm;
struct vcpu *vcpu;
- int i, j;
- size_t need;
+ int i = 0, j;
+ size_t need, vm_ct;
rw_enter_read(&vmm_softc->vm_lock);
- need = vmm_softc->vm_ct * sizeof(struct vm_info_result);
+ vm_ct = vmm_softc->vm_ct;
+ rw_exit_read(&vmm_softc->vm_lock);
+
+ need = vm_ct * sizeof(struct vm_info_result);
if (vip->vip_size < need) {
vip->vip_info_ct = 0;
vip->vip_size = need;
- rw_exit_read(&vmm_softc->vm_lock);
return (0);
}
out = malloc(need, M_DEVBUF, M_NOWAIT|M_ZERO);
if (out == NULL) {
vip->vip_info_ct = 0;
- rw_exit_read(&vmm_softc->vm_lock);
return (ENOMEM);
}
- i = 0;
- vip->vip_info_ct = vmm_softc->vm_ct;
+ vip->vip_info_ct = vm_ct;
+
+ rw_enter_read(&vmm_softc->vm_lock);
SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
+ refcnt_take(&vm->vm_refcnt);
+
out[i].vir_memory_size = vm->vm_memory_size;
out[i].vir_used_size =
pmap_resident_count(vm->vm_map->pmap) * PAGE_SIZE;
@@ -4309,20 +4394,28 @@ vm_get_info(struct vm_info_params *vip)
out[i].vir_id = vm->vm_id;
out[i].vir_creator_pid = vm->vm_creator_pid;
strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN);
+
rw_enter_read(&vm->vm_vcpu_lock);
for (j = 0; j < vm->vm_vcpu_ct; j++) {
out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN;
SLIST_FOREACH(vcpu, &vm->vm_vcpu_list,
vc_vcpu_link) {
+ refcnt_take(&vcpu->vc_refcnt);
if (vcpu->vc_id == j)
out[i].vir_vcpu_state[j] =
vcpu->vc_state;
+ refcnt_rele_wake(&vcpu->vc_refcnt);
}
}
rw_exit_read(&vm->vm_vcpu_lock);
+
+ refcnt_rele_wake(&vm->vm_refcnt);
i++;
+ if (i == vm_ct)
+ break; /* Truncate to keep within bounds of 'out'. */
}
rw_exit_read(&vmm_softc->vm_lock);
+
if (copyout(out, vip->vip_info, need) == EFAULT) {
free(out, M_DEVBUF, need);
return (EFAULT);
@@ -4350,40 +4443,51 @@ vm_terminate(struct vm_terminate_params *vtp)
struct vm *vm;
struct vcpu *vcpu;
u_int old, next;
- int error;
+ int error, nvcpu, vm_id;
/*
* Find desired VM
*/
- rw_enter_write(&vmm_softc->vm_lock);
error = vm_find(vtp->vtp_vm_id, &vm);
-
- if (error == 0) {
- rw_enter_read(&vm->vm_vcpu_lock);
- SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- do {
- old = vcpu->vc_state;
- if (old == VCPU_STATE_RUNNING)
- next = VCPU_STATE_REQTERM;
- else if (old == VCPU_STATE_STOPPED)
- next = VCPU_STATE_TERMINATED;
- else /* must be REQTERM or TERMINATED */
- break;
- } while (old != atomic_cas_uint(&vcpu->vc_state,
- old, next));
- }
- rw_exit_read(&vm->vm_vcpu_lock);
- } else {
- rw_exit_write(&vmm_softc->vm_lock);
+ if (error)
return (error);
+
+ /* Stop all vcpu's for the vm. */
+ rw_enter_read(&vm->vm_vcpu_lock);
+ SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
+ refcnt_take(&vcpu->vc_refcnt);
+ do {
+ old = vcpu->vc_state;
+ if (old == VCPU_STATE_RUNNING)
+ next = VCPU_STATE_REQTERM;
+ else if (old == VCPU_STATE_STOPPED)
+ next = VCPU_STATE_TERMINATED;
+ else /* must be REQTERM or TERMINATED */
+ break;
+ } while (old != atomic_cas_uint(&vcpu->vc_state, old, next));
+ refcnt_rele_wake(&vcpu->vc_refcnt);
}
+ rw_exit_read(&vm->vm_vcpu_lock);
+ /* Pop the vm out of the global vm list. */
+ rw_enter_write(&vmm_softc->vm_lock);
SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link);
- if (vm->vm_vcpus_running == 0)
- vm_teardown(vm);
-
rw_exit_write(&vmm_softc->vm_lock);
+ vm_id = vm->vm_id;
+ nvcpu = vm->vm_vcpu_ct;
+
+ vm_teardown(&vm);
+
+ if (vm_id > 0) {
+ rw_enter_write(&vmm_softc->vm_lock);
+ vmm_softc->vm_ct--;
+ vmm_softc->vcpu_ct -= nvcpu;
+ if (vmm_softc->vm_ct < 1)
+ vmm_stop();
+ rw_exit_write(&vmm_softc->vm_lock);
+ }
+
return (0);
}
@@ -4409,50 +4513,34 @@ vm_run(struct vm_run_params *vrp)
{
struct vm *vm;
struct vcpu *vcpu;
- int ret = 0, error;
+ int ret = 0;
u_int old, next;
/*
* Find desired VM
*/
- rw_enter_read(&vmm_softc->vm_lock);
- error = vm_find(vrp->vrp_vm_id, &vm);
+ ret = vm_find(vrp->vrp_vm_id, &vm);
+ if (ret)
+ return (ret);
+
+ vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
+ if (vcpu == NULL) {
+ ret = ENOENT;
+ goto out;
+ }
/*
- * Attempt to locate the requested VCPU. If found, attempt to
- * to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
+ * Attempt to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
* Failure to make the transition indicates the VCPU is busy.
*/
- if (error == 0) {
- rw_enter_read(&vm->vm_vcpu_lock);
- SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
- if (vcpu->vc_id == vrp->vrp_vcpu_id)
- break;
- }
-
- if (vcpu != NULL) {
- old = VCPU_STATE_STOPPED;
- next = VCPU_STATE_RUNNING;
-
- if (atomic_cas_uint(&vcpu->vc_state, old, next) != old)
- ret = EBUSY;
- else {
- atomic_inc_int(&vm->vm_vcpus_running);
- rw_enter_write(&vcpu->vc_lock);
- }
- } else
- ret = ENOENT;
-
- rw_exit_read(&vm->vm_vcpu_lock);
+ rw_enter_write(&vcpu->vc_lock);
+ old = VCPU_STATE_STOPPED;
+ next = VCPU_STATE_RUNNING;
+ if (atomic_cas_uint(&vcpu->vc_state, old, next) != old) {
+ ret = EBUSY;
+ goto out_unlock;
}
- rw_exit_read(&vmm_softc->vm_lock);
-
- if (error != 0)
- ret = error;
-
- /* Bail if errors detected in the previous steps */
- if (ret)
- return (ret);
+ atomic_inc_int(&vm->vm_vcpus_running);
/*
* We may be returning from userland helping us from the last exit.
@@ -4463,8 +4551,8 @@ vm_run(struct vm_run_params *vrp)
if (vrp->vrp_continue) {
if (copyin(vrp->vrp_exit, &vcpu->vc_exit,
sizeof(struct vm_exit)) == EFAULT) {
- rw_exit_write(&vcpu->vc_lock);
- return (EFAULT);
+ ret = EFAULT;
+ goto out_unlock;
}
}
@@ -4494,9 +4582,12 @@ vm_run(struct vm_run_params *vrp)
vrp->vrp_exit_reason = VM_EXIT_TERMINATED;
vcpu->vc_state = VCPU_STATE_TERMINATED;
}
-
+out_unlock:
rw_exit_write(&vcpu->vc_lock);
-
+out:
+ if (vcpu != NULL)
+ refcnt_rele_wake(&vcpu->vc_refcnt);
+ refcnt_rele_wake(&vm->vm_refcnt);
return (ret);
}
@@ -4538,6 +4629,8 @@ vmm_fpurestore(struct vcpu *vcpu)
{
struct cpu_info *ci = curcpu();
+ rw_assert_wrlock(&vcpu->vc_lock);
+
/* save vmm's FPU state if we haven't already */
if (ci->ci_flags & CPUF_USERXSTATE) {
ci->ci_flags &= ~CPUF_USERXSTATE;
@@ -4573,6 +4666,8 @@ vmm_fpurestore(struct vcpu *vcpu)
void
vmm_fpusave(struct vcpu *vcpu)
{
+ rw_assert_wrlock(&vcpu->vc_lock);
+
if (xsave_mask) {
/* Save guest %xcr0 */
vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 933348a5b32..6fbb5e1bdc2 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmmvar.h,v 1.78 2022/06/26 06:49:09 dv Exp $ */
+/* $OpenBSD: vmmvar.h,v 1.79 2022/06/30 13:17:58 dv Exp $ */
/*
* Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
*
@@ -894,57 +894,67 @@ struct vm;
/*
* Virtual CPU
+ *
+ * Methods used to vcpu struct members:
+ * a atomic operations
+ * I immutable operations
+ * K kernel lock
+ * r reference count
+ * v vcpu rwlock
+ * V vm struct's vcpu list lock (vm_vcpu_lock)
*/
struct vcpu {
/*
* Guest FPU state - this must remain as the first member of the struct
* to ensure 64-byte alignment (set up during vcpu_pool init)
*/
- struct savefpu vc_g_fpu;
+ struct savefpu vc_g_fpu; /* [v] */
/* VMCS / VMCB pointer */
- vaddr_t vc_control_va;
- paddr_t vc_control_pa;
+ vaddr_t vc_control_va; /* [I] */
+ paddr_t vc_control_pa; /* [I] */
/* VLAPIC pointer */
- vaddr_t vc_vlapic_va;
- uint64_t vc_vlapic_pa;
+ vaddr_t vc_vlapic_va; /* [I] */
+ uint64_t vc_vlapic_pa; /* [I] */
/* MSR bitmap address */
- vaddr_t vc_msr_bitmap_va;
- uint64_t vc_msr_bitmap_pa;
+ vaddr_t vc_msr_bitmap_va; /* [I] */
+ uint64_t vc_msr_bitmap_pa; /* [I] */
- struct vm *vc_parent;
- uint32_t vc_id;
- uint16_t vc_vpid;
- u_int vc_state;
- SLIST_ENTRY(vcpu) vc_vcpu_link;
+ struct vm *vc_parent; /* [I] */
+ uint32_t vc_id; /* [I] */
+ uint16_t vc_vpid; /* [I] */
+ u_int vc_state; /* [a] */
+ SLIST_ENTRY(vcpu) vc_vcpu_link; /* [V] */
- uint8_t vc_virt_mode;
+ uint8_t vc_virt_mode; /* [I] */
struct rwlock vc_lock;
- struct cpu_info *vc_last_pcpu;
- struct vm_exit vc_exit;
+ struct refcnt vc_refcnt; /* [a] */
+
+ struct cpu_info *vc_last_pcpu; /* [v] */
+ struct vm_exit vc_exit; /* [v] */
- uint16_t vc_intr;
- uint8_t vc_irqready;
+ uint16_t vc_intr; /* [v] */
+ uint8_t vc_irqready; /* [v] */
- uint8_t vc_fpuinited;
+ uint8_t vc_fpuinited; /* [v] */
- uint64_t vc_h_xcr0;
+ uint64_t vc_h_xcr0; /* [v] */
- struct vcpu_gueststate vc_gueststate;
+ struct vcpu_gueststate vc_gueststate; /* [v] */
uint8_t vc_event;
- uint32_t vc_pvclock_version;
- paddr_t vc_pvclock_system_gpa;
- uint32_t vc_pvclock_system_tsc_mul;
+ uint32_t vc_pvclock_version; /* [v] */
+ paddr_t vc_pvclock_system_gpa; /* [v] */
+ uint32_t vc_pvclock_system_tsc_mul; /* [v] */
/* Shadowed MSRs */
- uint64_t vc_shadow_pat;
+ uint64_t vc_shadow_pat; /* [v] */
- /* VMX only */
+ /* VMX only (all requiring [v]) */
uint64_t vc_vmx_basic;
uint64_t vc_vmx_entry_ctls;
uint64_t vc_vmx_true_entry_ctls;
@@ -964,11 +974,11 @@ struct vcpu {
uint8_t vc_vmx_vpid_enabled;
uint64_t vc_vmx_cr0_fixed1;
uint64_t vc_vmx_cr0_fixed0;
- uint32_t vc_vmx_vmcs_state;
+ uint32_t vc_vmx_vmcs_state; /* [a] */
#define VMCS_CLEARED 0
#define VMCS_LAUNCHED 1
- /* SVM only */
+ /* SVM only (all requiring [v]) */
vaddr_t vc_svm_hsa_va;
paddr_t vc_svm_hsa_pa;
vaddr_t vc_svm_ioio_va;