Hi!
This is an attempt to address 'thundering herd' problem when a lot of
vms are configured in vm.conf. A lot of vms booting in parallel can
overload the host and also mess up tsc calibration in openbsd guests as
it uses PIT which doesn't fire reliably if the host is overloaded.
This diff makes vmd start vms in a staggered fashion with default parallelism of
number of cpus on the host and a delay of 30s. Default can be overridden with
a line like following in vm.conf
staggered start parallel 4 delay 30
Every non-disabled vm starts in waiting state. If you are eager to
start a vm that is way further in the list, you can vmctl start it.
Discussed the idea with ori@, mlarkin@ and phessler@.
Comments / ok?
--
Pratik
Index: usr.sbin/vmctl/vmctl.c
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmctl/vmctl.c,v
retrieving revision 1.71
diff -u -p -a -u -r1.71 vmctl.c
--- usr.sbin/vmctl/vmctl.c 7 Sep 2019 09:11:14 -0000 1.71
+++ usr.sbin/vmctl/vmctl.c 8 Dec 2019 09:29:39 -0000
@@ -716,6 +716,8 @@ vm_state(unsigned int mask)
{
if (mask & VM_STATE_PAUSED)
return "paused";
+ else if (mask & VM_STATE_WAITING)
+ return "waiting";
else if (mask & VM_STATE_RUNNING)
return "running";
else if (mask & VM_STATE_SHUTDOWN)
Index: usr.sbin/vmd/parse.y
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/parse.y,v
retrieving revision 1.52
diff -u -p -a -u -r1.52 parse.y
--- usr.sbin/vmd/parse.y 14 May 2019 06:05:45 -0000 1.52
+++ usr.sbin/vmd/parse.y 8 Dec 2019 09:29:39 -0000
@@ -122,7 +122,8 @@ typedef struct {
%token INCLUDE ERROR
%token ADD ALLOW BOOT CDROM DEVICE DISABLE DISK DOWN ENABLE FORMAT GROUP
%token INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER
-%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID
+%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START
+%token PARALLEL DELAY
%token <v.number> NUMBER
%token <v.string> STRING
%type <v.lladdr> lladdr
@@ -217,6 +218,11 @@ main : LOCAL INET6 {
env->vmd_ps.ps_csock.cs_uid = $3.uid;
env->vmd_ps.ps_csock.cs_gid = $3.gid == -1 ? 0 : $3.gid;
}
+ | STAGGERED START PARALLEL NUMBER DELAY NUMBER {
+ env->vmd_cfg.cfg_flags |= VMD_CFG_STAGGERED_START;
+ env->vmd_cfg.delay.tv_sec = $6;
+ env->vmd_cfg.parallelism = $4;
+ }
;
switch : SWITCH string {
@@ -368,6 +374,8 @@ vm : VM string vm_instance {
} else {
if (vcp_disable)
vm->vm_state |=
VM_STATE_DISABLED;
+ else
+ vm->vm_state |=
VM_STATE_WAITING;
log_debug("%s:%d: vm \"%s\" "
"registered (%s)",
file->name, yylval.lineno,
@@ -766,6 +774,7 @@ lookup(char *s)
{ "allow", ALLOW },
{ "boot", BOOT },
{ "cdrom", CDROM },
+ { "delay", DELAY },
{ "device", DEVICE },
{ "disable", DISABLE },
{ "disk", DISK },
@@ -785,10 +794,13 @@ lookup(char *s)
{ "memory", MEMORY },
{ "net", NET },
{ "owner", OWNER },
+ { "parallel", PARALLEL },
{ "prefix", PREFIX },
{ "rdomain", RDOMAIN },
{ "size", SIZE },
{ "socket", SOCKET },
+ { "staggered", STAGGERED },
+ { "start", START },
{ "switch", SWITCH },
{ "up", UP },
{ "vm", VM }
Index: usr.sbin/vmd/vm.conf.5
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/vm.conf.5,v
retrieving revision 1.44
diff -u -p -a -u -r1.44 vm.conf.5
--- usr.sbin/vmd/vm.conf.5 14 May 2019 12:47:17 -0000 1.44
+++ usr.sbin/vmd/vm.conf.5 8 Dec 2019 09:29:39 -0000
@@ -91,6 +91,16 @@ vm "vm1.example.com" {
.Sh GLOBAL CONFIGURATION
The following setting can be configured globally:
.Bl -tag -width Ds
+.It Ic staggered start parallel Ar parallelism Ic delay Ar seconds
+Start all configured vms in staggered fashion with
+.Ar parallelism
+instances in parallel every
+.Ar delay
+seconds. Defaults to
+.Ar parallelism
+equal to number of cpus and
+.Ar delay
+of 30 seconds.
.It Ic local prefix Ar address Ns Li / Ns Ar prefix
Set the network prefix that is used to allocate subnets for
local interfaces, see
Index: usr.sbin/vmd/vmd.c
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/vmd.c,v
retrieving revision 1.116
diff -u -p -a -u -r1.116 vmd.c
--- usr.sbin/vmd/vmd.c 4 Sep 2019 07:02:03 -0000 1.116
+++ usr.sbin/vmd/vmd.c 8 Dec 2019 09:29:39 -0000
@@ -21,6 +21,7 @@
#include <sys/wait.h>
#include <sys/cdefs.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
#include <sys/tty.h>
#include <sys/ttycom.h>
#include <sys/ioctl.h>
@@ -63,6 +64,7 @@ int vm_instance(struct privsep *, struc
struct vmop_create_params *, uid_t);
int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
int vm_claimid(const char *, int, uint32_t *);
+void start_vm_batch(int, short, void*);
struct vmd *env;
@@ -73,6 +75,8 @@ static struct privsep_proc procs[] = {
{ "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown },
};
+struct event staggered_start_timer;
+
/* For the privileged process */
static struct privsep_proc *proc_priv = &procs[0];
static struct passwd proc_privpw;
@@ -854,11 +858,40 @@ main(int argc, char **argv)
return (0);
}
+void
+start_vm_batch(int fd, short type, void *args)
+{
+ int i = 0;
+ struct vmd_vm *vm;
+ log_debug("%s: starting batch of %d vms", __func__,
+ env->vmd_cfg.parallelism);
+ TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
+ if (!(vm->vm_state & VM_STATE_WAITING)) {
+ log_debug("%s: not creating vm %s (disabled)",
+ __func__,
+ vm->vm_params.vmc_params.vcp_name);
+ continue;
+ }
+ i++;
+ if (i > env->vmd_cfg.parallelism) {
+ evtimer_add(&staggered_start_timer,
+ &env->vmd_cfg.delay);
+ break;
+ }
+ vm->vm_state &= ~VM_STATE_WAITING;
+ config_setvm(&env->vmd_ps, vm, -1,
+ vm->vm_params.vmc_owner.uid);
+ }
+ log_debug("%s: done starting vms", __func__);
+}
+
int
vmd_configure(void)
{
- struct vmd_vm *vm;
+ int ncpus;
struct vmd_switch *vsw;
+ int ncpu_mib[] = {CTL_HW, HW_NCPU};
+ size_t ncpus_sz = sizeof(ncpus);
if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
fatal("open %s", PATH_PTMDEV);
@@ -906,17 +939,21 @@ vmd_configure(void)
}
}
- TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
- if (vm->vm_state & VM_STATE_DISABLED) {
- log_debug("%s: not creating vm %s (disabled)",
- __func__,
- vm->vm_params.vmc_params.vcp_name);
- continue;
- }
- if (config_setvm(&env->vmd_ps, vm,
- -1, vm->vm_params.vmc_owner.uid) == -1)
- return (-1);
+ if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) {
+ env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY;
+ if (sysctl(ncpu_mib, 2, &ncpus, &ncpus_sz, NULL, 0) == -1)
+ ncpus = 1;
+ env->vmd_cfg.parallelism = ncpus;
+ log_debug("%s: setting staggered start configuration to "
+ "parallelism: %d and delay: %lld",
+ __func__, ncpus, env->vmd_cfg.delay.tv_sec);
+
}
+
+ log_debug("%s: starting vms in staggered fashion", __func__);
+ evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
+ /* start first batch */
+ start_vm_batch(0, 0, NULL);
return (0);
}
Index: usr.sbin/vmd/vmd.h
===================================================================
RCS file: /home/cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.97
diff -u -p -a -u -r1.97 vmd.h
--- usr.sbin/vmd/vmd.h 7 Sep 2019 09:11:14 -0000 1.97
+++ usr.sbin/vmd/vmd.h 8 Dec 2019 09:29:39 -0000
@@ -56,6 +56,8 @@
#define VMD_SWITCH_TYPE "bridge"
#define VM_DEFAULT_MEMORY 512
+#define VMD_DEFAULT_STAGGERED_START_DELAY 30
+
/* Rate-limit fast reboots */
#define VM_START_RATE_SEC 6 /* min. seconds since last reboot */
#define VM_START_RATE_LIMIT 3 /* max. number of fast reboots */
@@ -280,6 +282,7 @@ struct vmd_vm {
#define VM_STATE_SHUTDOWN 0x04
#define VM_STATE_RECEIVED 0x08
#define VM_STATE_PAUSED 0x10
+#define VM_STATE_WAITING 0x20
/* For rate-limiting */
struct timeval vm_start_tv;
@@ -319,7 +322,10 @@ struct vmd_config {
unsigned int cfg_flags;
#define VMD_CFG_INET6 0x01
#define VMD_CFG_AUTOINET6 0x02
+#define VMD_CFG_STAGGERED_START 0x04
+ struct timeval delay;
+ int parallelism;
struct address cfg_localprefix;
struct address cfg_localprefix6;
};