It turns out not only does vmd have numerous error paths for handling
when something is amiss with a guest, most of the paths don't check if
it's a known vm defined in vm.conf.

As a result, vmd often removes the vm from the SLIST of vm's meaning
one can't easily attempt to start it again or see it in vmctl's status
output.

A simple reproduction:

  1. define a vm with memory > 4gb in vm.conf
  2. run vmd in the foreground (doas vmd -d) so it's not started by rc.d
  3. try to start with `vmctl start -c ${vm_name}`, you should trigger
     an ENOMEM and get the "Cannot allocate memory" message from vmctl.
  4. try to start the same vm again...now you get EPERM!
  5. the vm is no longer visible in the output from `vmctl status` :(

The problem is most of the error paths call vm_remove, which not only
tears down the vm via vm_stop, but also removes it from the vm list and
frees it. Only clean stops or restarts seem to perform this check
currently.

Below diff refactors into checking if the vm is defined in the global
config before deciding to call vm_stop or vm_remove.

ok?
-dv


diff refs/heads/master refs/heads/vmd-accounting
commit - d4e23fe7544b01187ebf3ac8ae32e955445ee666
commit + 46503195403bfab50cd34bd8682f35a17d54d03d
blob - 6bffb2519a31464836aa573dbccb7aa14ea97722
blob + f30dc14de1ff9d5cf121cbc08b6db183a06d0c07
--- usr.sbin/vmd/vmd.c
+++ usr.sbin/vmd/vmd.c
@@ -67,6 +67,8 @@ struct vmd    *env;
 int     vm_claimid(const char *, int, uint32_t *);
 void    start_vm_batch(int, short, void*);

+static inline void vm_terminate(struct vmd_vm *, const char *);
+
 struct vmd     *env;

 static struct privsep_proc procs[] = {
@@ -395,14 +397,14 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
                                errno = vmr.vmr_result;
                                log_warn("%s: failed to forward vm result",
                                    vcp->vcp_name);
-                               vm_remove(vm, __func__);
+                               vm_terminate(vm, __func__);
                                return (-1);
                        }
                }

                if (vmr.vmr_result) {
                        log_warnx("%s: failed to start vm", vcp->vcp_name);
-                       vm_remove(vm, __func__);
+                       vm_terminate(vm, __func__);
                        errno = vmr.vmr_result;
                        break;
                }
@@ -410,7 +412,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
                /* Now configure all the interfaces */
                if (vm_priv_ifconfig(ps, vm) == -1) {
                        log_warn("%s: failed to configure vm", vcp->vcp_name);
-                       vm_remove(vm, __func__);
+                       vm_terminate(vm, __func__);
                        break;
                }

@@ -441,10 +443,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
                        log_info("%s: sent vm %d successfully.",
                            vm->vm_params.vmc_params.vcp_name,
                            vm->vm_vmid);
-                       if (vm->vm_from_config)
-                               vm_stop(vm, 0, __func__);
-                       else
-                               vm_remove(vm, __func__);
+                       vm_terminate(vm, __func__);
                }

                /* Send a response if a control client is waiting for it */
@@ -470,10 +469,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
                }
                if (vmr.vmr_result != EAGAIN ||
                    vm->vm_params.vmc_bootdevice) {
-                       if (vm->vm_from_config)
-                               vm_stop(vm, 0, __func__);
-                       else
-                               vm_remove(vm, __func__);
+                       vm_terminate(vm, __func__);
                } else {
                        /* Stop VM instance but keep the tty open */
                        vm_stop(vm, 1, __func__);
@@ -509,7 +505,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
                    imsg->hdr.peerid, -1, &vir, sizeof(vir)) == -1) {
                        log_debug("%s: GET_INFO_VM failed for vm %d, removing",
                            __func__, vm->vm_vmid);
-                       vm_remove(vm, __func__);
+                       vm_terminate(vm, __func__);
                        return (-1);
                }
                break;
@@ -545,7 +541,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
                                    sizeof(vir)) == -1) {
                                        log_debug("%s: GET_INFO_VM_END failed",
                                            __func__);
-                                       vm_remove(vm, __func__);
+                                       vm_terminate(vm, __func__);
                                        return (-1);
                                }
                        }
@@ -1943,3 +1943,14 @@ getmonotime(struct timeval *tv)

        TIMESPEC_TO_TIMEVAL(tv, &ts);
 }
+
+static inline void
+vm_terminate(struct vmd_vm *vm, const char *caller)
+{
+       if (vm->vm_from_config)
+               vm_stop(vm, 0, __func__);
+       else {
+               /* vm_remove calls vm_stop */
+               vm_remove(vm, __func__);
+       }
+}

Reply via email to