Hi!

This is an attempt to address 'thundering herd' problem when a lot of
vms are configured in vm.conf.  A lot of vms booting in parallel can
overload the host and also mess up tsc calibration in openbsd guests as
it uses PIT which doesn't fire reliably if the host is overloaded.


This diff makes vmd start vms in a staggered fashion with default parallelism of
number of cpus on the host and a delay of 30s.  Default can be overridden with
a line like following in vm.conf

staggered start parallel 4 delay 30


Every non-disabled vm starts in waiting state.  If you are eager to
start a vm that is way further in the list, you can vmctl start it.

Discussed the idea with ori@, mlarkin@ and phessler@.

Comments / ok?
--
Pratik

Index: usr.sbin/vmctl/vmctl.c
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmctl/vmctl.c,v
retrieving revision 1.71
diff -u -p -a -u -r1.71 vmctl.c
--- usr.sbin/vmctl/vmctl.c      7 Sep 2019 09:11:14 -0000       1.71
+++ usr.sbin/vmctl/vmctl.c      8 Dec 2019 09:29:39 -0000
@@ -716,6 +716,8 @@ vm_state(unsigned int mask)
{
        if (mask & VM_STATE_PAUSED)
                return "paused";
+       else if (mask & VM_STATE_WAITING)
+               return "waiting";
        else if (mask & VM_STATE_RUNNING)
                return "running";
        else if (mask & VM_STATE_SHUTDOWN)
Index: usr.sbin/vmd/parse.y
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/parse.y,v
retrieving revision 1.52
diff -u -p -a -u -r1.52 parse.y
--- usr.sbin/vmd/parse.y        14 May 2019 06:05:45 -0000      1.52
+++ usr.sbin/vmd/parse.y        8 Dec 2019 09:29:39 -0000
@@ -122,7 +122,8 @@ typedef struct {
%token  INCLUDE ERROR
%token  ADD ALLOW BOOT CDROM DEVICE DISABLE DISK DOWN ENABLE FORMAT GROUP
%token  INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER
-%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID
+%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START
+%token  PARALLEL DELAY
%token  <v.number>        NUMBER
%token  <v.string>        STRING
%type   <v.lladdr>        lladdr
@@ -217,6 +218,11 @@ main               : LOCAL INET6 {
                        env->vmd_ps.ps_csock.cs_uid = $3.uid;
                        env->vmd_ps.ps_csock.cs_gid = $3.gid == -1 ? 0 : $3.gid;
                }
+               | STAGGERED START PARALLEL NUMBER DELAY NUMBER {
+                       env->vmd_cfg.cfg_flags |= VMD_CFG_STAGGERED_START;
+                       env->vmd_cfg.delay.tv_sec = $6;
+                       env->vmd_cfg.parallelism = $4;
+               }
                ;

switch          : SWITCH string                 {
@@ -368,6 +374,8 @@ vm          : VM string vm_instance         {
                                } else {
                                        if (vcp_disable)
                                                vm->vm_state |= 
VM_STATE_DISABLED;
+                                       else
+                                               vm->vm_state |= 
VM_STATE_WAITING;
                                        log_debug("%s:%d: vm \"%s\" "
                                            "registered (%s)",
                                            file->name, yylval.lineno,
@@ -766,6 +774,7 @@ lookup(char *s)
                { "allow",            ALLOW },
                { "boot",             BOOT },
                { "cdrom",            CDROM },
+               { "delay",            DELAY },
                { "device",           DEVICE },
                { "disable",          DISABLE },
                { "disk",             DISK },
@@ -785,10 +794,13 @@ lookup(char *s)
                { "memory",           MEMORY },
                { "net",              NET },
                { "owner",            OWNER },
+               { "parallel",         PARALLEL },
                { "prefix",           PREFIX },
                { "rdomain",          RDOMAIN },
                { "size",             SIZE },
                { "socket",           SOCKET },
+               { "staggered",                STAGGERED },
+               { "start",            START  },
                { "switch",           SWITCH },
                { "up",                       UP },
                { "vm",                       VM }
Index: usr.sbin/vmd/vm.conf.5
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/vm.conf.5,v
retrieving revision 1.44
diff -u -p -a -u -r1.44 vm.conf.5
--- usr.sbin/vmd/vm.conf.5      14 May 2019 12:47:17 -0000      1.44
+++ usr.sbin/vmd/vm.conf.5      8 Dec 2019 09:29:39 -0000
@@ -91,6 +91,16 @@ vm "vm1.example.com" {
.Sh GLOBAL CONFIGURATION
The following setting can be configured globally:
.Bl -tag -width Ds
+.It Ic staggered start parallel Ar parallelism Ic delay Ar seconds
+Start all configured vms in staggered fashion with
+.Ar parallelism
+instances in parallel every
+.Ar delay
+seconds.  Defaults to
+.Ar parallelism
+equal to number of cpus and
+.Ar delay
+of 30 seconds.
.It Ic local prefix Ar address Ns Li / Ns Ar prefix
Set the network prefix that is used to allocate subnets for
local interfaces, see
Index: usr.sbin/vmd/vmd.c
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/vmd.c,v
retrieving revision 1.116
diff -u -p -a -u -r1.116 vmd.c
--- usr.sbin/vmd/vmd.c  4 Sep 2019 07:02:03 -0000       1.116
+++ usr.sbin/vmd/vmd.c  8 Dec 2019 09:29:39 -0000
@@ -21,6 +21,7 @@
#include <sys/wait.h>
#include <sys/cdefs.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
#include <sys/tty.h>
#include <sys/ttycom.h>
#include <sys/ioctl.h>
@@ -63,6 +64,7 @@ int    vm_instance(struct privsep *, struc
            struct vmop_create_params *, uid_t);
int      vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
int      vm_claimid(const char *, int, uint32_t *);
+void    start_vm_batch(int, short, void*);

struct vmd      *env;

@@ -73,6 +75,8 @@ static struct privsep_proc procs[] = {
        { "vmm",      PROC_VMM,       vmd_dispatch_vmm, vmm, vmm_shutdown },
};

+struct event staggered_start_timer;
+
/* For the privileged process */
static struct privsep_proc *proc_priv = &procs[0];
static struct passwd proc_privpw;
@@ -854,11 +858,40 @@ main(int argc, char **argv)
        return (0);
}

+void
+start_vm_batch(int fd, short type, void *args)
+{
+       int             i = 0;
+       struct vmd_vm   *vm;
+       log_debug("%s: starting batch of %d vms", __func__,
+           env->vmd_cfg.parallelism);
+       TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
+               if (!(vm->vm_state & VM_STATE_WAITING)) {
+                       log_debug("%s: not creating vm %s (disabled)",
+                           __func__,
+                           vm->vm_params.vmc_params.vcp_name);
+                       continue;
+               }
+               i++;
+               if (i > env->vmd_cfg.parallelism) {
+                       evtimer_add(&staggered_start_timer,
+                           &env->vmd_cfg.delay);
+                       break;
+               }
+               vm->vm_state &= ~VM_STATE_WAITING;
+               config_setvm(&env->vmd_ps, vm, -1,
+                   vm->vm_params.vmc_owner.uid);
+       }
+       log_debug("%s: done starting vms", __func__);
+}
+
int
vmd_configure(void)
{
-       struct vmd_vm           *vm;
+       int                     ncpus;
        struct vmd_switch       *vsw;
+       int ncpu_mib[] = {CTL_HW, HW_NCPU};
+       size_t ncpus_sz = sizeof(ncpus);

        if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
                fatal("open %s", PATH_PTMDEV);
@@ -906,17 +939,21 @@ vmd_configure(void)
                }
        }

-       TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
-               if (vm->vm_state & VM_STATE_DISABLED) {
-                       log_debug("%s: not creating vm %s (disabled)",
-                           __func__,
-                           vm->vm_params.vmc_params.vcp_name);
-                       continue;
-               }
-               if (config_setvm(&env->vmd_ps, vm,
-                   -1, vm->vm_params.vmc_owner.uid) == -1)
-                       return (-1);
+       if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) {
+               env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY;
+               if (sysctl(ncpu_mib, 2, &ncpus, &ncpus_sz, NULL, 0) == -1)
+                       ncpus = 1;
+               env->vmd_cfg.parallelism = ncpus;
+               log_debug("%s: setting staggered start configuration to "
+                   "parallelism: %d and delay: %lld",
+                   __func__, ncpus, env->vmd_cfg.delay.tv_sec);
+
        }
+
+       log_debug("%s: starting vms in staggered fashion", __func__);
+       evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
+       /* start first batch */
+       start_vm_batch(0, 0, NULL);

        return (0);
}
Index: usr.sbin/vmd/vmd.h
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.97
diff -u -p -a -u -r1.97 vmd.h
--- usr.sbin/vmd/vmd.h  7 Sep 2019 09:11:14 -0000       1.97
+++ usr.sbin/vmd/vmd.h  8 Dec 2019 09:29:39 -0000
@@ -56,6 +56,8 @@
#define VMD_SWITCH_TYPE         "bridge"
#define VM_DEFAULT_MEMORY       512

+#define VMD_DEFAULT_STAGGERED_START_DELAY 30
+
/* Rate-limit fast reboots */
#define VM_START_RATE_SEC       6       /* min. seconds since last reboot */
#define VM_START_RATE_LIMIT     3       /* max. number of fast reboots */
@@ -280,6 +282,7 @@ struct vmd_vm {
#define VM_STATE_SHUTDOWN       0x04
#define VM_STATE_RECEIVED       0x08
#define VM_STATE_PAUSED         0x10
+#define VM_STATE_WAITING       0x20

        /* For rate-limiting */
        struct timeval           vm_start_tv;
@@ -319,7 +322,10 @@ struct vmd_config {
        unsigned int             cfg_flags;
#define VMD_CFG_INET6           0x01
#define VMD_CFG_AUTOINET6       0x02
+#define VMD_CFG_STAGGERED_START        0x04

+       struct timeval           delay;
+       int                      parallelism;
        struct address           cfg_localprefix;
        struct address           cfg_localprefix6;
};

Reply via email to