Dave Voutila <d...@sisu.io> writes:
> It turns out not only does vmd have numerous error paths for handling > when something is amiss with a guest, most of the paths don't check if > it's a known vm defined in vm.conf. > > As a result, vmd often removes the vm from the SLIST of vm's meaning > one can't easily attempt to start it again or see it in vmctl's status > output. > > A simple reproduction: > > 1. define a vm with memory > 4gb in vm.conf > 2. run vmd in the foreground (doas vmd -d) so it's not started by rc.d > 3. try to start with `vmctl start -c ${vm_name}`, you should trigger > an ENOMEM and get the "Cannot allocate memory" message from vmctl. > 4. try to start the same vm again...now you get EPERM! > 5. the vm is no longer visible in the output from `vmctl status` :( > > The problem is most of the error paths call vm_remove, which not only > tears down the vm via vm_stop, but also removes it from the vm list and > frees it. Only clean stops or restarts seem to perform this check > currently. > > Below diff refactors into checking if the vm is defined in the global > config before deciding to call vm_stop or vm_remove. Slight tweak... __func__->caller to actually pass the correct name to vm_{stop,remove}() from vm_terminate() diff refs/heads/master refs/heads/vmd-accounting commit - d4e23fe7544b01187ebf3ac8ae32e955445ee666 commit + 46503195403bfab50cd34bd8682f35a17d54d03d blob - 6bffb2519a31464836aa573dbccb7aa14ea97722 blob + f30dc14de1ff9d5cf121cbc08b6db183a06d0c07 --- usr.sbin/vmd/vmd.c +++ usr.sbin/vmd/vmd.c @@ -67,6 +67,8 @@ struct vmd *env; int vm_claimid(const char *, int, uint32_t *); void start_vm_batch(int, short, void*); +static inline void vm_terminate(struct vmd_vm *, const char *); + struct vmd *env; static struct privsep_proc procs[] = { @@ -395,14 +397,14 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc errno = vmr.vmr_result; log_warn("%s: failed to forward vm result", vcp->vcp_name); - vm_remove(vm, __func__); + vm_terminate(vm, __func__); return (-1); } } if (vmr.vmr_result) { log_warnx("%s: failed to start vm", vcp->vcp_name); - vm_remove(vm, __func__); + vm_terminate(vm, __func__); errno = vmr.vmr_result; break; } @@ -410,7 +412,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc /* Now configure all the interfaces */ if (vm_priv_ifconfig(ps, vm) == -1) { log_warn("%s: failed to configure vm", vcp->vcp_name); - vm_remove(vm, __func__); + vm_terminate(vm, __func__); break; } @@ -441,10 +443,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc log_info("%s: sent vm %d successfully.", vm->vm_params.vmc_params.vcp_name, vm->vm_vmid); - if (vm->vm_from_config) - vm_stop(vm, 0, __func__); - else - vm_remove(vm, __func__); + vm_terminate(vm, __func__); } /* Send a response if a control client is waiting for it */ @@ -470,10 +469,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc } if (vmr.vmr_result != EAGAIN || vm->vm_params.vmc_bootdevice) { - if (vm->vm_from_config) - vm_stop(vm, 0, __func__); - else - vm_remove(vm, __func__); + vm_terminate(vm, __func__); } else { /* Stop VM instance but keep the tty open */ vm_stop(vm, 1, __func__); @@ -509,7 +505,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { log_debug("%s: GET_INFO_VM failed for vm %d, removing", __func__, vm->vm_vmid); - vm_remove(vm, __func__); + vm_terminate(vm, __func__); return (-1); } break; @@ -545,7 +541,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc sizeof(vir)) == -1) { log_debug("%s: GET_INFO_VM_END failed", __func__); - vm_remove(vm, __func__); + vm_terminate(vm, __func__); return (-1); } } @@ -1943,3 +1943,14 @@ getmonotime(struct timeval *tv) TIMESPEC_TO_TIMEVAL(tv, &ts); } + +static inline void +vm_terminate(struct vmd_vm *vm, const char *caller) +{ + if (vm->vm_from_config) + vm_stop(vm, 0, caller); + else { + /* vm_remove calls vm_stop */ + vm_remove(vm, caller); + } +}