diff options
-rw-r--r-- | usr.sbin/vmd/config.c | 27 | ||||
-rw-r--r-- | usr.sbin/vmd/vioblk.c | 7 | ||||
-rw-r--r-- | usr.sbin/vmd/vionet.c | 7 | ||||
-rw-r--r-- | usr.sbin/vmd/virtio.c | 83 | ||||
-rw-r--r-- | usr.sbin/vmd/vm.c | 18 | ||||
-rw-r--r-- | usr.sbin/vmd/vmd.c | 38 | ||||
-rw-r--r-- | usr.sbin/vmd/vmm.c | 39 |
7 files changed, 108 insertions, 111 deletions
diff --git a/usr.sbin/vmd/config.c b/usr.sbin/vmd/config.c index 0022ce2a28c..3fb2c9b4003 100644 --- a/usr.sbin/vmd/config.c +++ b/usr.sbin/vmd/config.c @@ -1,4 +1,4 @@ -/* $OpenBSD: config.c,v 1.74 2024/01/18 14:49:59 claudio Exp $ */ +/* $OpenBSD: config.c,v 1.75 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> @@ -284,7 +284,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) { if (vm->vm_kernel_path != NULL) { /* Open external kernel for child */ - kernfd = open(vm->vm_kernel_path, O_RDONLY); + kernfd = open(vm->vm_kernel_path, O_RDONLY | O_CLOEXEC); if (kernfd == -1) { ret = errno; log_warn("%s: can't open kernel or BIOS " @@ -301,7 +301,8 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) * license. */ if (kernfd == -1) { - if ((kernfd = open(VM_DEFAULT_BIOS, O_RDONLY)) == -1) { + if ((kernfd = open(VM_DEFAULT_BIOS, + O_RDONLY | O_CLOEXEC)) == -1) { log_warn("can't open %s", VM_DEFAULT_BIOS); ret = VMD_BIOS_MISSING; goto fail; @@ -341,14 +342,17 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) } } - /* Open disk images for child */ + /* + * Open disk images for child. Don't set O_CLOEXEC as these must be + * explicitly closed by the vm process during virtio subprocess launch. + */ for (i = 0 ; i < vmc->vmc_ndisks; i++) { if (strlcpy(path, vmc->vmc_disks[i], sizeof(path)) >= sizeof(path)) log_warnx("disk path %s too long", vmc->vmc_disks[i]); memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases)); - oflags = O_RDWR|O_EXLOCK|O_NONBLOCK; - aflags = R_OK|W_OK; + oflags = O_RDWR | O_EXLOCK | O_NONBLOCK; + aflags = R_OK | W_OK; for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { /* Stat disk[i] to ensure it is a regular file */ if ((diskfds[i][j] = open(path, oflags)) == -1) { @@ -372,7 +376,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) * All writes should go to the top image, allowing them * to be shared. */ - oflags = O_RDONLY|O_NONBLOCK; + oflags = O_RDONLY | O_NONBLOCK; aflags = R_OK; n = virtio_get_base(diskfds[i][j], base, sizeof(base), vmc->vmc_disktypes[i], path); @@ -407,7 +411,9 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) /* * Either open the requested tap(4) device or get - * the next available one. + * the next available one. Don't set O_CLOEXEC as these + * should be closed by the vm process during virtio device + * launch. */ if (s != NULL) { snprintf(path, PATH_MAX, "/dev/%s", s); @@ -454,7 +460,10 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid) vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK); } - /* Open TTY */ + /* + * Open TTY. Duplicate the fd before sending so the privileged parent + * process can perform permissions cleanup of the pty on vm termination. + */ if (vm->vm_ttyname[0] == '\0') { if (vm_opentty(vm) == -1) { log_warn("%s: can't open tty %s", __func__, diff --git a/usr.sbin/vmd/vioblk.c b/usr.sbin/vmd/vioblk.c index b1d393f8a8a..d97e5cc113a 100644 --- a/usr.sbin/vmd/vioblk.c +++ b/usr.sbin/vmd/vioblk.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vioblk.c,v 1.11 2024/02/04 14:54:51 dv Exp $ */ +/* $OpenBSD: vioblk.c,v 1.12 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> @@ -176,11 +176,6 @@ vioblk_main(int fd, int fd_vmm) /* Configure our sync channel event handler. */ log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, dev.sync_fd); - if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto fail; - } imsg_init(&dev.sync_iev.ibuf, dev.sync_fd); dev.sync_iev.handler = handle_sync_io; dev.sync_iev.data = &dev; diff --git a/usr.sbin/vmd/vionet.c b/usr.sbin/vmd/vionet.c index 5f7e146b32b..5ed06fb8a4c 100644 --- a/usr.sbin/vmd/vionet.c +++ b/usr.sbin/vmd/vionet.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vionet.c,v 1.9 2024/02/03 21:41:35 dv Exp $ */ +/* $OpenBSD: vionet.c,v 1.10 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> @@ -203,11 +203,6 @@ vionet_main(int fd, int fd_vmm) /* Configure our sync channel event handler. */ log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, dev.sync_fd); - if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto fail; - } imsg_init(&dev.sync_iev.ibuf, dev.sync_fd); dev.sync_iev.handler = handle_sync_io; dev.sync_iev.data = &dev; diff --git a/usr.sbin/vmd/virtio.c b/usr.sbin/vmd/virtio.c index cca6e041046..afe3dd8f7a4 100644 --- a/usr.sbin/vmd/virtio.c +++ b/usr.sbin/vmd/virtio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio.c,v 1.110 2023/11/03 11:16:43 dv Exp $ */ +/* $OpenBSD: virtio.c,v 1.111 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> @@ -73,6 +73,7 @@ SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs; static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *); static void virtio_dispatch_dev(int, short, void *); static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *); +static int virtio_dev_closefds(struct virtio_dev *); const char * virtio_reg_name(uint8_t reg) @@ -1303,6 +1304,7 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) int data_fds[VM_MAX_BASE_PER_DISK], sync_fds[2], async_fds[2], ret = 0; size_t i, data_fds_sz, sz = 0; struct viodev_msg msg; + struct virtio_dev *dev_entry; struct imsg imsg; struct imsgev *iev = &dev->sync_iev; @@ -1326,27 +1328,17 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) } /* We need two channels: one synchronous (IO reads) and one async. */ - if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, sync_fds) == -1) { + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, + sync_fds) == -1) { log_warn("failed to create socketpair"); return (errno); } - if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, async_fds) == -1) { + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, + async_fds) == -1) { log_warn("failed to create async socketpair"); return (errno); } - /* Keep communication channels open after exec. */ - if (fcntl(sync_fds[1], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } - if (fcntl(async_fds[1], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcnt", __func__); - goto err; - } - /* Fork... */ dev_pid = fork(); if (dev_pid == -1) { @@ -1371,13 +1363,6 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) for (i = 0; i < data_fds_sz; i++) close_fd(data_fds[i]); - /* Set our synchronous channel to non-blocking. */ - if (fcntl(sync_fds[0], F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } - /* 1. Send over our configured device. */ log_debug("%s: sending '%c' type device struct", __func__, dev->dev_type); @@ -1446,15 +1431,21 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev) close_fd(async_fds[0]); close_fd(sync_fds[0]); + /* Close pty. Virtio devices do not need it. */ + close_fd(vm->vm_tty); + vm->vm_tty = -1; + + if (vm->vm_cdrom != -1) { + close_fd(vm->vm_cdrom); + vm->vm_cdrom = -1; + } + /* Keep data file descriptors open after exec. */ - for (i = 0; i < data_fds_sz; i++) { - log_debug("%s: marking fd %d !close-on-exec", __func__, - data_fds[i]); - if (fcntl(data_fds[i], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } + SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) { + if (dev_entry == dev) + continue; + if (virtio_dev_closefds(dev_entry) == -1) + fatalx("unable to close other virtio devs"); } memset(&nargv, 0, sizeof(nargv)); @@ -1520,11 +1511,6 @@ vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *)) log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__, dev->dev_type, fd); - if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { - log_warn("failed to set nonblocking mode on vm device pipe"); - return (-1); - } - imsg_init(&iev->ibuf, fd); iev->handler = cb; iev->data = dev; @@ -1764,3 +1750,30 @@ virtio_deassert_pic_irq(struct virtio_dev *dev, int vcpu) if (ret == -1) log_warnx("%s: failed to deassert irq %d", __func__, dev->irq); } + +/* + * Close all underlying file descriptors for a given virtio device. + */ +static int +virtio_dev_closefds(struct virtio_dev *dev) +{ + size_t i; + + switch (dev->dev_type) { + case VMD_DEVTYPE_DISK: + for (i = 0; i < dev->vioblk.ndisk_fd; i++) { + close_fd(dev->vioblk.disk_fd[i]); + dev->vioblk.disk_fd[i] = -1; + } + break; + case VMD_DEVTYPE_NET: + close_fd(dev->vionet.data_fd); + dev->vionet.data_fd = -1; + break; + default: + log_warnx("%s: invalid device type", __func__); + return (-1); + } + + return (0); +} diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c index 8d20b4c3a93..772d044604d 100644 --- a/usr.sbin/vmd/vm.c +++ b/usr.sbin/vmd/vm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm.c,v 1.96 2024/01/18 14:49:59 claudio Exp $ */ +/* $OpenBSD: vm.c,v 1.97 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> @@ -221,7 +221,7 @@ static const struct vcpu_reg_state vcpu_init_flat16 = { * fd_vmm: file descriptor for communicating with vmm(4) device */ void -vm_main(int fd, int vmm_fd) +vm_main(int fd, int fd_vmm) { struct vm_create_params *vcp = NULL; struct vmd_vm vm; @@ -229,6 +229,11 @@ vm_main(int fd, int vmm_fd) int ret = 0; /* + * The vm process relies on global state. Set the fd for /dev/vmm. + */ + env->vmd_fd = fd_vmm; + + /* * We aren't root, so we can't chroot(2). Use unveil(2) instead. */ if (unveil(env->argv0, "x") == -1) @@ -277,11 +282,6 @@ vm_main(int fd, int vmm_fd) vcp->vcp_name); _exit(EINVAL); } - if (fcntl(vm.vm_kernel, F_SETFL, O_NONBLOCK) == -1) { - ret = errno; - log_warn("failed to set nonblocking mode on boot fd"); - _exit(ret); - } } ret = start_vm(&vm, fd); @@ -2454,7 +2454,7 @@ vm_pipe_init(struct vm_dev_pipe *p, void (*cb)(int, short, void *)) memset(p, 0, sizeof(struct vm_dev_pipe)); - ret = pipe(fds); + ret = pipe2(fds, O_CLOEXEC); if (ret) fatal("failed to create vm_dev_pipe pipe"); @@ -2507,7 +2507,7 @@ vm_pipe_recv(struct vm_dev_pipe *p) } /* - * Re-map the guest address space using the shared memory file descriptor. + * Re-map the guest address space using vmm(4)'s VMM_IOC_SHARE * * Returns 0 on success, non-zero in event of failure. */ diff --git a/usr.sbin/vmd/vmd.c b/usr.sbin/vmd/vmd.c index f821b7e3c8f..887e1cc9bf8 100644 --- a/usr.sbin/vmd/vmd.c +++ b/usr.sbin/vmd/vmd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmd.c,v 1.154 2024/02/04 14:56:45 dv Exp $ */ +/* $OpenBSD: vmd.c,v 1.155 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org> @@ -800,6 +800,8 @@ main(int argc, char **argv) if ((env = calloc(1, sizeof(*env))) == NULL) fatal("calloc: env"); + env->vmd_fd = -1; + env->vmd_fd6 = -1; while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { switch (ch) { @@ -891,7 +893,6 @@ main(int argc, char **argv) ps = &env->vmd_ps; ps->ps_env = env; - env->vmd_fd = vmm_fd; if (config_init(env) == -1) fatal("failed to initialize configuration"); @@ -924,7 +925,7 @@ main(int argc, char **argv) /* Open /dev/vmm early. */ if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { - env->vmd_fd = open(VMM_NODE, O_RDWR); + env->vmd_fd = open(VMM_NODE, O_RDWR | O_CLOEXEC); if (env->vmd_fd == -1) fatal("%s", VMM_NODE); } @@ -1014,9 +1015,6 @@ vmd_configure(void) int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE}; size_t ncpus_sz = sizeof(ncpus); - if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1) - fatal("open %s", PATH_PTMDEV); - /* * pledge in the parent process: * stdio - for malloc and basic I/O including events. @@ -1034,6 +1032,9 @@ vmd_configure(void) " chown fattr flock", NULL) == -1) fatal("pledge"); + if ((env->vmd_ptmfd = getptmfd()) == -1) + fatal("getptmfd %s", PATH_PTMDEV); + if (parse_config(env->vmd_conffile) == -1) { proc_kill(&env->vmd_ps); exit(1); @@ -1846,32 +1847,29 @@ vm_checkaccess(int fd, unsigned int uflag, uid_t uid, int amode) int vm_opentty(struct vmd_vm *vm) { - struct ptmget ptm; struct stat st; struct group *gr; uid_t uid; gid_t gid; mode_t mode; - int on; + int on = 1, tty_slave; /* * Open tty with pre-opened PTM fd */ - if ((ioctl(env->vmd_ptmfd, PTMGET, &ptm) == -1)) + if (fdopenpty(env->vmd_ptmfd, &vm->vm_tty, &tty_slave, vm->vm_ttyname, + NULL, NULL) == -1) { + log_warn("fdopenpty"); return (-1); + } + close(tty_slave); /* * We use user ioctl(2) mode to pass break commands. */ - on = 1; - if (ioctl(ptm.cfd, TIOCUCNTL, &on) == -1) - fatal("could not enable user ioctl mode"); - - vm->vm_tty = ptm.cfd; - close(ptm.sfd); - if (strlcpy(vm->vm_ttyname, ptm.sn, sizeof(vm->vm_ttyname)) - >= sizeof(vm->vm_ttyname)) { - log_warnx("%s: truncated ttyname", __func__); + if (ioctl(vm->vm_tty, TIOCUCNTL, &on) == -1) { + log_warn("could not enable user ioctl mode on %s", + vm->vm_ttyname); goto fail; } @@ -1896,8 +1894,10 @@ vm_opentty(struct vmd_vm *vm) * Change ownership and mode of the tty as required. * Loosely based on the implementation of sshpty.c */ - if (stat(vm->vm_ttyname, &st) == -1) + if (fstat(vm->vm_tty, &st) == -1) { + log_warn("fstat failed for %s", vm->vm_ttyname); goto fail; + } if (st.st_uid != uid || st.st_gid != gid) { if (chown(vm->vm_ttyname, uid, gid) == -1) { diff --git a/usr.sbin/vmd/vmm.c b/usr.sbin/vmd/vmm.c index 1f7678fbaff..dcd9a91fe4f 100644 --- a/usr.sbin/vmd/vmm.c +++ b/usr.sbin/vmd/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.118 2024/02/04 14:57:00 dv Exp $ */ +/* $OpenBSD: vmm.c,v 1.119 2024/02/05 21:58:09 dv Exp $ */ /* * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> @@ -52,6 +52,7 @@ #include "vmd.h" #include "vmm.h" #include "atomicio.h" +#include "proc.h" void vmm_sighdlr(int, short, void *); int vmm_start_vm(struct imsg *, uint32_t *, pid_t *); @@ -467,8 +468,14 @@ vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *)) { struct imsgev *iev = &vm->vm_iev; - if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { - log_warn("failed to set nonblocking mode on vm pipe"); + /* + * Set to close-on-exec as vmm_pipe is used after fork+exec to + * establish async ipc between vm and vmd's vmm process. This + * prevents future vm processes or virtio subprocesses from + * inheriting this control channel. + */ + if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + log_warn("failed to set close-on-exec for vmm ipc channel"); return (-1); } @@ -661,16 +668,10 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) } } - if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1) + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, fds) + == -1) fatal("socketpair"); - /* Keep our channel open after exec. */ - if (fcntl(fds[1], F_SETFD, 0)) { - ret = errno; - log_warn("%s: fcntl", __func__); - goto err; - } - /* Start child vmd for this VM (fork, chroot, drop privs) */ vm_pid = fork(); if (vm_pid == -1) { @@ -745,7 +746,6 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) /* Wire up our pipe into the event handling. */ if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1) fatal("setup vm pipe"); - } else { /* Child. Create a new session. */ if (setsid() == -1) @@ -764,21 +764,6 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) close(fd); } - /* Toggle all fds to not close on exec. */ - for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) - for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) - if (vm->vm_disks[i][j] != -1) - fcntl(vm->vm_disks[i][j], F_SETFD, 0); - for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) - fcntl(vm->vm_ifs[i].vif_fd, F_SETFD, 0); - if (vm->vm_kernel != -1) - fcntl(vm->vm_kernel, F_SETFD, 0); - if (vm->vm_cdrom != -1) - fcntl(vm->vm_cdrom, F_SETFD, 0); - if (vm->vm_tty != -1) - fcntl(vm->vm_tty, F_SETFD, 0); - fcntl(env->vmd_fd, F_SETFD, 0); /* vmm device fd */ - /* * Prepare our new argv for execvp(2) with the fd of our open * pipe to the parent/vmm process as an argument. |