On Sun, Jan 15, 2023 at 09:08:29AM -0500, Dave Voutila wrote: > > Dave Voutila <d...@sisu.io> writes: > > > It turns out not only does vmd have numerous error paths for handling > > when something is amiss with a guest, most of the paths don't check if > > it's a known vm defined in vm.conf. > > > > As a result, vmd often removes the vm from the SLIST of vm's meaning > > one can't easily attempt to start it again or see it in vmctl's status > > output. > > > > A simple reproduction: > > > > 1. define a vm with memory > 4gb in vm.conf > > 2. run vmd in the foreground (doas vmd -d) so it's not started by rc.d > > 3. try to start with `vmctl start -c ${vm_name}`, you should trigger > > an ENOMEM and get the "Cannot allocate memory" message from vmctl. > > 4. try to start the same vm again...now you get EPERM! > > 5. the vm is no longer visible in the output from `vmctl status` :( > > > > The problem is most of the error paths call vm_remove, which not only > > tears down the vm via vm_stop, but also removes it from the vm list and > > frees it. Only clean stops or restarts seem to perform this check > > currently. > > > > Below diff refactors into checking if the vm is defined in the global > > config before deciding to call vm_stop or vm_remove. > > Slight tweak... __func__->caller to actually pass the correct name to > vm_{stop,remove}() from vm_terminate() >
Finally getting caught up. ok mlarkin on this if you didn't commit it already. -ml > > diff refs/heads/master refs/heads/vmd-accounting > commit - d4e23fe7544b01187ebf3ac8ae32e955445ee666 > commit + 46503195403bfab50cd34bd8682f35a17d54d03d > blob - 6bffb2519a31464836aa573dbccb7aa14ea97722 > blob + f30dc14de1ff9d5cf121cbc08b6db183a06d0c07 > --- usr.sbin/vmd/vmd.c > +++ usr.sbin/vmd/vmd.c > @@ -67,6 +67,8 @@ struct vmd *env; > int vm_claimid(const char *, int, uint32_t *); > void start_vm_batch(int, short, void*); > > +static inline void vm_terminate(struct vmd_vm *, const char *); > + > struct vmd *env; > > static struct privsep_proc procs[] = { > @@ -395,14 +397,14 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc > errno = vmr.vmr_result; > log_warn("%s: failed to forward vm result", > vcp->vcp_name); > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > return (-1); > } > } > > if (vmr.vmr_result) { > log_warnx("%s: failed to start vm", vcp->vcp_name); > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > errno = vmr.vmr_result; > break; > } > @@ -410,7 +412,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc > /* Now configure all the interfaces */ > if (vm_priv_ifconfig(ps, vm) == -1) { > log_warn("%s: failed to configure vm", vcp->vcp_name); > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > break; > } > > @@ -441,10 +443,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc > log_info("%s: sent vm %d successfully.", > vm->vm_params.vmc_params.vcp_name, > vm->vm_vmid); > - if (vm->vm_from_config) > - vm_stop(vm, 0, __func__); > - else > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > } > > /* Send a response if a control client is waiting for it */ > @@ -470,10 +469,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc > } > if (vmr.vmr_result != EAGAIN || > vm->vm_params.vmc_bootdevice) { > - if (vm->vm_from_config) > - vm_stop(vm, 0, __func__); > - else > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > } else { > /* Stop VM instance but keep the tty open */ > vm_stop(vm, 1, __func__); > @@ -509,7 +505,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc > imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) { > log_debug("%s: GET_INFO_VM failed for vm %d, removing", > __func__, vm->vm_vmid); > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > return (-1); > } > break; > @@ -545,7 +541,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc > sizeof(vir)) == -1) { > log_debug("%s: GET_INFO_VM_END failed", > __func__); > - vm_remove(vm, __func__); > + vm_terminate(vm, __func__); > return (-1); > } > } > > @@ -1943,3 +1943,14 @@ getmonotime(struct timeval *tv) > > TIMESPEC_TO_TIMEVAL(tv, &ts); > } > + > +static inline void > +vm_terminate(struct vmd_vm *vm, const char *caller) > +{ > + if (vm->vm_from_config) > + vm_stop(vm, 0, caller); > + else { > + /* vm_remove calls vm_stop */ > + vm_remove(vm, caller); > + } > +} >