> On 8 Dec 2019, at 11:08, Pratik Vyas <m...@pd.io> wrote:
> 
> Hi!
> 
> This is an attempt to address 'thundering herd' problem when a lot of
> vms are configured in vm.conf.  A lot of vms booting in parallel can
> overload the host and also mess up tsc calibration in openbsd guests as
> it uses PIT which doesn't fire reliably if the host is overloaded.
> 
> 
> This diff makes vmd start vms in a staggered fashion with default parallelism 
> of
> number of cpus on the host and a delay of 30s.  Default can be overridden with
> a line like following in vm.conf
> 
> staggered start parallel 4 delay 30
> 
> 
> Every non-disabled vm starts in waiting state.  If you are eager to
> start a vm that is way further in the list, you can vmctl start it.
> 
> Discussed the idea with ori@, mlarkin@ and phessler@.
> 
> Comments / ok? 

Great addition to stop -w. Like it!

Mischa


> --
> Pratik
> 
> Index: usr.sbin/vmctl/vmctl.c
> ===================================================================
> RCS file: /home/cvs/src/usr.sbin/vmctl/vmctl.c,v
> retrieving revision 1.71
> diff -u -p -a -u -r1.71 vmctl.c
> --- usr.sbin/vmctl/vmctl.c    7 Sep 2019 09:11:14 -0000    1.71
> +++ usr.sbin/vmctl/vmctl.c    8 Dec 2019 09:29:39 -0000
> @@ -716,6 +716,8 @@ vm_state(unsigned int mask)
> {
>    if (mask & VM_STATE_PAUSED)
>        return "paused";
> +    else if (mask & VM_STATE_WAITING)
> +        return "waiting";
>    else if (mask & VM_STATE_RUNNING)
>        return "running";
>    else if (mask & VM_STATE_SHUTDOWN)
> Index: usr.sbin/vmd/parse.y
> ===================================================================
> RCS file: /home/cvs/src/usr.sbin/vmd/parse.y,v
> retrieving revision 1.52
> diff -u -p -a -u -r1.52 parse.y
> --- usr.sbin/vmd/parse.y    14 May 2019 06:05:45 -0000    1.52
> +++ usr.sbin/vmd/parse.y    8 Dec 2019 09:29:39 -0000
> @@ -122,7 +122,8 @@ typedef struct {
> %token    INCLUDE ERROR
> %token    ADD ALLOW BOOT CDROM DEVICE DISABLE DISK DOWN ENABLE FORMAT GROUP
> %token    INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER
> -%token    PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID
> +%token    PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START
> +%token  PARALLEL DELAY
> %token    <v.number>    NUMBER
> %token    <v.string>    STRING
> %type    <v.lladdr>    lladdr
> @@ -217,6 +218,11 @@ main        : LOCAL INET6 {
>            env->vmd_ps.ps_csock.cs_uid = $3.uid;
>            env->vmd_ps.ps_csock.cs_gid = $3.gid == -1 ? 0 : $3.gid;
>        }
> +        | STAGGERED START PARALLEL NUMBER DELAY NUMBER {
> +            env->vmd_cfg.cfg_flags |= VMD_CFG_STAGGERED_START;
> +            env->vmd_cfg.delay.tv_sec = $6;
> +            env->vmd_cfg.parallelism = $4;
> +        }
>        ;
> switch        : SWITCH string            {
> @@ -368,6 +374,8 @@ vm        : VM string vm_instance        {
>                } else {
>                    if (vcp_disable)
>                        vm->vm_state |= VM_STATE_DISABLED;
> +                    else
> +                        vm->vm_state |= VM_STATE_WAITING;
>                    log_debug("%s:%d: vm \"%s\" "
>                        "registered (%s)",
>                        file->name, yylval.lineno,
> @@ -766,6 +774,7 @@ lookup(char *s)
>        { "allow",        ALLOW },
>        { "boot",        BOOT },
>        { "cdrom",        CDROM },
> +        { "delay",        DELAY },
>        { "device",        DEVICE },
>        { "disable",        DISABLE },
>        { "disk",        DISK },
> @@ -785,10 +794,13 @@ lookup(char *s)
>        { "memory",        MEMORY },
>        { "net",        NET },
>        { "owner",        OWNER },
> +        { "parallel",        PARALLEL },
>        { "prefix",        PREFIX },
>        { "rdomain",        RDOMAIN },
>        { "size",        SIZE },
>        { "socket",        SOCKET },
> +        { "staggered",        STAGGERED },
> +        { "start",        START  },
>        { "switch",        SWITCH },
>        { "up",            UP },
>        { "vm",            VM }
> Index: usr.sbin/vmd/vm.conf.5
> ===================================================================
> RCS file: /home/cvs/src/usr.sbin/vmd/vm.conf.5,v
> retrieving revision 1.44
> diff -u -p -a -u -r1.44 vm.conf.5
> --- usr.sbin/vmd/vm.conf.5    14 May 2019 12:47:17 -0000    1.44
> +++ usr.sbin/vmd/vm.conf.5    8 Dec 2019 09:29:39 -0000
> @@ -91,6 +91,16 @@ vm "vm1.example.com" {
> .Sh GLOBAL CONFIGURATION
> The following setting can be configured globally:
> .Bl -tag -width Ds
> +.It Ic staggered start parallel Ar parallelism Ic delay Ar seconds
> +Start all configured vms in staggered fashion with
> +.Ar parallelism
> +instances in parallel every
> +.Ar delay
> +seconds.  Defaults to
> +.Ar parallelism
> +equal to number of cpus and
> +.Ar delay
> +of 30 seconds.
> .It Ic local prefix Ar address Ns Li / Ns Ar prefix
> Set the network prefix that is used to allocate subnets for
> local interfaces, see
> Index: usr.sbin/vmd/vmd.c
> ===================================================================
> RCS file: /home/cvs/src/usr.sbin/vmd/vmd.c,v
> retrieving revision 1.116
> diff -u -p -a -u -r1.116 vmd.c
> --- usr.sbin/vmd/vmd.c    4 Sep 2019 07:02:03 -0000    1.116
> +++ usr.sbin/vmd/vmd.c    8 Dec 2019 09:29:39 -0000
> @@ -21,6 +21,7 @@
> #include <sys/wait.h>
> #include <sys/cdefs.h>
> #include <sys/stat.h>
> +#include <sys/sysctl.h>
> #include <sys/tty.h>
> #include <sys/ttycom.h>
> #include <sys/ioctl.h>
> @@ -63,6 +64,7 @@ int     vm_instance(struct privsep *, struc
>        struct vmop_create_params *, uid_t);
> int     vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
> int     vm_claimid(const char *, int, uint32_t *);
> +void     start_vm_batch(int, short, void*);
> struct vmd    *env;
> @@ -73,6 +75,8 @@ static struct privsep_proc procs[] = {
>    { "vmm",    PROC_VMM,    vmd_dispatch_vmm, vmm, vmm_shutdown },
> };
> +struct event staggered_start_timer;
> +
> /* For the privileged process */
> static struct privsep_proc *proc_priv = &procs[0];
> static struct passwd proc_privpw;
> @@ -854,11 +858,40 @@ main(int argc, char **argv)
>    return (0);
> }
> +void
> +start_vm_batch(int fd, short type, void *args)
> +{
> +    int        i = 0;
> +    struct vmd_vm    *vm;
> +    log_debug("%s: starting batch of %d vms", __func__,
> +        env->vmd_cfg.parallelism);
> +    TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
> +        if (!(vm->vm_state & VM_STATE_WAITING)) {
> +            log_debug("%s: not creating vm %s (disabled)",
> +                __func__,
> +                vm->vm_params.vmc_params.vcp_name);
> +            continue;
> +        }
> +        i++;
> +        if (i > env->vmd_cfg.parallelism) {
> +            evtimer_add(&staggered_start_timer,
> +                &env->vmd_cfg.delay);
> +            break;
> +        }
> +        vm->vm_state &= ~VM_STATE_WAITING;
> +        config_setvm(&env->vmd_ps, vm, -1,
> +            vm->vm_params.vmc_owner.uid);
> +    }
> +    log_debug("%s: done starting vms", __func__);
> +}
> +
> int
> vmd_configure(void)
> {
> -    struct vmd_vm        *vm;
> +    int            ncpus;
>    struct vmd_switch    *vsw;
> +    int ncpu_mib[] = {CTL_HW, HW_NCPU};
> +    size_t ncpus_sz = sizeof(ncpus);
>    if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
>        fatal("open %s", PATH_PTMDEV);
> @@ -906,17 +939,21 @@ vmd_configure(void)
>        }
>    }
> -    TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
> -        if (vm->vm_state & VM_STATE_DISABLED) {
> -            log_debug("%s: not creating vm %s (disabled)",
> -                __func__,
> -                vm->vm_params.vmc_params.vcp_name);
> -            continue;
> -        }
> -        if (config_setvm(&env->vmd_ps, vm,
> -            -1, vm->vm_params.vmc_owner.uid) == -1)
> -            return (-1);
> +    if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) {
> +        env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY;
> +        if (sysctl(ncpu_mib, 2, &ncpus, &ncpus_sz, NULL, 0) == -1)
> +            ncpus = 1;
> +        env->vmd_cfg.parallelism = ncpus;
> +        log_debug("%s: setting staggered start configuration to "
> +            "parallelism: %d and delay: %lld",
> +            __func__, ncpus, env->vmd_cfg.delay.tv_sec);
> +
>    }
> +
> +    log_debug("%s: starting vms in staggered fashion", __func__);
> +    evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
> +    /* start first batch */
> +    start_vm_batch(0, 0, NULL);
>    return (0);
> }
> Index: usr.sbin/vmd/vmd.h
> ===================================================================
> RCS file: /home/cvs/src/usr.sbin/vmd/vmd.h,v
> retrieving revision 1.97
> diff -u -p -a -u -r1.97 vmd.h
> --- usr.sbin/vmd/vmd.h    7 Sep 2019 09:11:14 -0000    1.97
> +++ usr.sbin/vmd/vmd.h    8 Dec 2019 09:29:39 -0000
> @@ -56,6 +56,8 @@
> #define VMD_SWITCH_TYPE        "bridge"
> #define VM_DEFAULT_MEMORY    512
> +#define VMD_DEFAULT_STAGGERED_START_DELAY 30
> +
> /* Rate-limit fast reboots */
> #define VM_START_RATE_SEC    6    /* min. seconds since last reboot */
> #define VM_START_RATE_LIMIT    3    /* max. number of fast reboots */
> @@ -280,6 +282,7 @@ struct vmd_vm {
> #define VM_STATE_SHUTDOWN    0x04
> #define VM_STATE_RECEIVED    0x08
> #define VM_STATE_PAUSED        0x10
> +#define VM_STATE_WAITING    0x20
>    /* For rate-limiting */
>    struct timeval         vm_start_tv;
> @@ -319,7 +322,10 @@ struct vmd_config {
>    unsigned int         cfg_flags;
> #define VMD_CFG_INET6        0x01
> #define VMD_CFG_AUTOINET6    0x02
> +#define VMD_CFG_STAGGERED_START    0x04
> +    struct timeval         delay;
> +    int             parallelism;
>    struct address         cfg_localprefix;
>    struct address         cfg_localprefix6;
> };
> 

Reply via email to