diff options
author | Dave Voutila <dv@cvs.openbsd.org> | 2023-05-13 23:15:29 +0000 |
---|---|---|
committer | Dave Voutila <dv@cvs.openbsd.org> | 2023-05-13 23:15:29 +0000 |
commit | 0450f77e1971ab09e8861ab1cab51909d3e1adbb (patch) | |
tree | 54405ab1af285f1b5301105e31cc0e9c675cda5d /sys/dev | |
parent | 7698cea1a4c2fa7eff4cc2405547ca04b294213b (diff) |
vmm(4)/vmd(8): switch to anonymous shared mappings.
While splitting out emulated virtio network and block devices into
separate processes, I originally used named mappings via shm_mkstemp(3).
While this functionally achieved the desired result, it had two
unintended consequences:
1) tearing down a vm process and its child processes required
excessive locking as the guest memory was tied into the VFS layer.
2) it was observed by mlarkin@ that actions in other parts of the
VFS layer could cause some of the guest memory to flush to storage,
possibly filling /tmp.
This commit adds a new vmm(4) ioctl dedicated to allowing a process
request the kernel share a mapping of guest memory into its own vm
space. This requires an open fd to /dev/vmm (requiring root) and
both the "vmm" and "proc" pledge(2) promises. In addition, the caller
must know enough about the original memory ranges to reconstruct them
to make the vm's ranges.
Tested with help from Mischa Peters.
ok mlarkin@
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/vmm/vmm.c | 85 | ||||
-rw-r--r-- | sys/dev/vmm/vmm.h | 12 |
2 files changed, 94 insertions, 3 deletions
diff --git a/sys/dev/vmm/vmm.c b/sys/dev/vmm/vmm.c index d46b3431081..4d4866f70dc 100644 --- a/sys/dev/vmm/vmm.c +++ b/sys/dev/vmm/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.1 2023/04/26 15:11:21 mlarkin Exp $ */ +/* $OpenBSD: vmm.c,v 1.2 2023/05/13 23:15:28 dv Exp $ */ /* * Copyright (c) 2014-2023 Mike Larkin <mlarkin@openbsd.org> * @@ -262,6 +262,9 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) case VMM_IOC_WRITEVMPARAMS: ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1); break; + case VMM_IOC_SHAREMEM: + ret = vm_share_mem((struct vm_sharemem_params *)data, p); + break; default: ret = vmmioctl_machdep(dev, cmd, data, flag, p); break; @@ -286,6 +289,7 @@ pledge_ioctl_vmm(struct proc *p, long com) switch (com) { case VMM_IOC_CREATE: case VMM_IOC_INFO: + case VMM_IOC_SHAREMEM: /* The "parent" process in vmd forks and manages VMs */ if (p->p_p->ps_pledge & PLEDGE_PROC) return (0); @@ -780,3 +784,82 @@ vcpu_must_stop(struct vcpu *vcpu) return (1); return (0); } + +/* + * vm_share_mem + * + * Share a uvm mapping for the vm guest memory ranges into the calling process. + * + * Return values: + * 0: if successful + * ENOENT: if the vm cannot be found by vm_find + * EPERM: if the vm cannot be accessed by the current process + * EINVAL: if the provide memory ranges fail checks + * ENOMEM: if uvm_share fails to find available memory in the destination map + */ +int +vm_share_mem(struct vm_sharemem_params *vsp, struct proc *p) +{ + int ret = EINVAL; + size_t i, n; + struct vm *vm; + struct vm_mem_range *src, *dst; + + ret = vm_find(vsp->vsp_vm_id, &vm); + if (ret) + return (ret); + + /* Check we have the expected number of ranges. */ + if (vm->vm_nmemranges != vsp->vsp_nmemranges) + goto out; + n = vm->vm_nmemranges; + + /* Check their types, sizes, and gpa's (implying page alignment). */ + for (i = 0; i < n; i++) { + src = &vm->vm_memranges[i]; + dst = &vsp->vsp_memranges[i]; + + /* + * The vm memranges were already checked during creation, so + * compare to them to confirm validity of mapping request. + */ + if (src->vmr_type != dst->vmr_type) + goto out; + if (src->vmr_gpa != dst->vmr_gpa) + goto out; + if (src->vmr_size != dst->vmr_size) + goto out; + + /* Check our intended destination is page-aligned. */ + if (dst->vmr_va & PAGE_MASK) + goto out; + } + + /* + * Share each range individually with the calling process. We do + * not need PROC_EXEC as the emulated devices do not need to execute + * instructions from guest memory. + */ + for (i = 0; i < n; i++) { + src = &vm->vm_memranges[i]; + dst = &vsp->vsp_memranges[i]; + + /* Skip MMIO range. */ + if (src->vmr_type == VM_MEM_MMIO) + continue; + + DPRINTF("sharing gpa=0x%lx for pid %d @ va=0x%lx\n", + src->vmr_gpa, p->p_p->ps_pid, dst->vmr_va); + ret = uvm_share(&p->p_vmspace->vm_map, dst->vmr_va, + PROT_READ | PROT_WRITE, vm->vm_map, src->vmr_gpa, + src->vmr_size); + if (ret) { + printf("%s: uvm_share failed (%d)\n", __func__, ret); + break; + } + } + ret = 0; +out: + refcnt_rele_wake(&vm->vm_refcnt); + return (ret); +} diff --git a/sys/dev/vmm/vmm.h b/sys/dev/vmm/vmm.h index d2355d42b44..38b4a3f85f1 100644 --- a/sys/dev/vmm/vmm.h +++ b/sys/dev/vmm/vmm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.h,v 1.2 2023/04/26 16:13:19 mlarkin Exp $ */ +/* $OpenBSD: vmm.h,v 1.3 2023/05/13 23:15:28 dv Exp $ */ /* * Copyright (c) 2014-2023 Mike Larkin <mlarkin@openbsd.org> * @@ -76,6 +76,13 @@ struct vm_resetcpu_params { struct vcpu_reg_state vrp_init_state; }; +struct vm_sharemem_params { + /* Input parameters to VMM_IOC_SHAREMEM */ + uint32_t vsp_vm_id; + size_t vsp_nmemranges; + struct vm_mem_range vsp_memranges[VMM_MAX_MEM_RANGES]; +}; + /* IOCTL definitions */ #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */ #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */ @@ -88,7 +95,7 @@ struct vm_resetcpu_params { #define VMM_IOC_READVMPARAMS _IOWR('V', 9, struct vm_rwvmparams_params) /* Set VM params */ #define VMM_IOC_WRITEVMPARAMS _IOW('V', 10, struct vm_rwvmparams_params) - +#define VMM_IOC_SHAREMEM _IOW('V', 11, struct vm_sharemem_params) #ifdef _KERNEL @@ -194,6 +201,7 @@ int vm_get_info(struct vm_info_params *); int vm_terminate(struct vm_terminate_params *); int vm_resetcpu(struct vm_resetcpu_params *); int vcpu_must_stop(struct vcpu *); +int vm_share_mem(struct vm_sharemem_params *, struct proc *); #endif /* _KERNEL */ #endif /* DEV_VMM_H */ |