Hi, On Fri, Oct 22, 2021 at 12:35:17AM +0700, Dio Putra wrote: > Hi, this bug just happened right in front of me (see my picture attachment). > Fortunately, I was able to create a rebase patch to Debian Bullseye: > https://listman.redhat.com/archives/libvir-list/2021-April/msg00756.html > > Here's my patch: > >From ea7d0ca37cce76e1327945c4864b996d7fd6d2e6 Mon Sep 17 00:00:00 > 2001
thanks. I've moved that to an MR at for a point release update. Testing is appreciated. Cheers, -- Guido > Message-Id: > <ea7d0ca37cce76e1327945c4864b996d7fd6d2e6.1618903455.git.mpriv...@redhat.com> > From: Michal Privoznik <mpriv...@redhat.com> > Date: Fri, 16 Apr 2021 16:39:14 +0200 > Subject: [PATCH] vircgroup: Fix virCgroupKillRecursive() wrt nested > controllers > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > I've encountered the following bug, but only on Gentoo with > systemd and CGroupsV2. I've started an LXC container successfully > but destroying it reported the following error: > > error: Failed to destroy domain 'amd64' > error: internal error: failed to get cgroup backend for 'pathOfController' > > Debugging showed, that CGroup hierarchy is full of surprises: > > /sys/fs/cgroup/machine.slice/machine-lxc\x2d861\x2damd64.scope/ > └── libvirt > ├── dev-hugepages.mount > ├── dev-mqueue.mount > ├── init.scope > ├── sys-fs-fuse-connections.mount > ├── sys-kernel-config.mount > ├── sys-kernel-debug.mount > ├── sys-kernel-tracing.mount > ├── system.slice > │ ├── console-getty.service > │ ├── dbus.service > │ ├── system-getty.slice > │ ├── system-modprobe.slice > │ ├── systemd-journald.service > │ ├── systemd-logind.service > │ └── tmp.mount > └── user.slice > > For comparison, here's the same container on recent Rawhide: > > /sys/fs/cgroup/machine.slice/machine-lxc\x2d13550\x2damd64.scope/ > └── libvirt > > Anyway, those nested directories should not be a problem, because > virCgroupKillRecursiveInternal() removes them recursively, right? > Sort of. The function really does remove nested directories, but > it assumes that every directory has the same controller as the > rest. Just take a look at virCgroupV2KillRecursive() - it gets > 'Any' controller (the first one it found in ".scope") and then > passes it to virCgroupKillRecursiveInternal(). > > This assumption is not true though. The controllers found in > ".scope" are the following: > > cpuset cpu io memory pids > > while "libvirt" has fewer: > > cpuset cpu io memory > > Up until now it's not problem, because of how we order > controllers internally - "cpu" is the first and thus picking > "Any" controller returns just that. But the rest of directories > has no controllers, their "cgroup.controllers" is just empty. > > What fixes the bug is dropping @controller argument from > virCgroupKillRecursiveInternal() and letting each iteration work > pick its own controller. > > Signed-off-by: Michal Privoznik <mpriv...@redhat.com> > Reviewed-by: Pavel Hrdina <phrd...@redhat.com> > --- > src/util/vircgroup.c | 29 +++++++++++++++++++++++++---- > src/util/vircgrouppriv.h | 1 - > src/util/vircgroupv1.c | 7 +------ > src/util/vircgroupv2.c | 7 +------ > 4 files changed, 27 insertions(+), 17 deletions(-) > > Signed-off-by: Dio Putra <diopu...@gmail.com> > --- > --- libvirt-7.0.0.orig/src/util/vircgroup.c > +++ libvirt-7.0.0/src/util/vircgroup.c > @@ -1380,6 +1380,24 @@ > } > > > +static int > +virCgroupGetAnyController(virCgroup *cgroup) > +{ > + size_t i; > + > + for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { > + if (!cgroup->backends[i]) > + continue; > + > + return cgroup->backends[i]->getAnyController(cgroup); > + } > + > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Unable to get any controller")); > + return -1; > +} > + > + > int > virCgroupPathOfController(virCgroupPtr group, > unsigned int controller, > @@ -2548,18 +2566,21 @@ > virCgroupKillRecursiveInternal(virCgroupPtr group, > int signum, > GHashTable *pids, > - int controller, > const char *taskFile, > bool dormdir) > { > int rc; > + int controller; > bool killedAny = false; > g_autofree char *keypath = NULL; > g_autoptr(DIR) dp = NULL; > struct dirent *ent; > int direrr; > - VIR_DEBUG("group=%p signum=%d pids=%p", > - group, signum, pids); > + VIR_DEBUG("group=%p signum=%d pids=%p taskFile=%s dormdir=%d", > + group, signum, pids, taskFile, dormdir); > + > + if ((controller = virCgroupGetAnyController(group)) < 0) > + return -1; > > if (virCgroupPathOfController(group, controller, "", &keypath) < 0) > return -1; > @@ -2593,7 +2614,7 @@ > return -1; > > if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, > - controller, > taskFile, true)) < 0) > + taskFile, true)) < 0) > return -1; > if (rc == 1) > killedAny = true; > --- libvirt-7.0.0.orig/src/util/vircgrouppriv.h > +++ libvirt-7.0.0/src/util/vircgrouppriv.h > @@ -128,6 +128,5 @@ int virCgroupRemoveRecursively(char *grp > int virCgroupKillRecursiveInternal(virCgroupPtr group, > int signum, > GHashTable *pids, > - int controller, > const char *taskFile, > bool dormdir); > --- libvirt-7.0.0.orig/src/util/vircgroupv1.c > +++ libvirt-7.0.0/src/util/vircgroupv1.c > @@ -771,12 +771,7 @@ virCgroupV1KillRecursive(virCgroupPtr gr > int signum, > GHashTable *pids) > { > - int controller = virCgroupV1GetAnyController(group); > - > - if (controller < 0) > - return -1; > - > - return virCgroupKillRecursiveInternal(group, signum, pids, controller, > + return virCgroupKillRecursiveInternal(group, signum, pids, > "tasks", false); > } > > --- libvirt-7.0.0.orig/src/util/vircgroupv2.c > +++ libvirt-7.0.0/src/util/vircgroupv2.c > @@ -543,12 +543,7 @@ virCgroupV2KillRecursive(virCgroupPtr gr > int signum, > GHashTable *pids) > { > - int controller = virCgroupV2GetAnyController(group); > - > - if (controller < 0) > - return -1; > - > - return virCgroupKillRecursiveInternal(group, signum, pids, controller, > + return virCgroupKillRecursiveInternal(group, signum, pids, > "cgroup.threads", false); > } > > > On Tue, 02 Mar 2021 15:37:38 +0100 Thorsten Glaser <t...@mirbsd.de> wrote: > > Package: libvirt-daemon > > Version: 7.0.0-2 > > Severity: important > > X-Debbugs-Cc: t...@mirbsd.de > > > > After an upgrade+reboot I cannot start VMs *again* with some cgroup error: > > > > $ wirrsh start Netboot > > error: Failed to start domain 'Netboot' > > error: internal error: failed to get cgroup backend for 'pathOfController' > > > > To unconfuse: > > > > $ alias wirrsh > > wirrsh='virsh -c qemu:///system' > > > > -- System Information: > > Debian Release: bullseye/sid > > APT prefers unreleased > > APT policy: (500, 'unreleased'), (500, 'buildd-unstable'), (500, > > 'unstable'), (100, 'experimental') > > Architecture: x32 (x86_64) > > Foreign Architectures: i386, amd64 > > > > Kernel: Linux 5.10.0-3-amd64 (SMP w/4 CPU threads) > > Kernel taint flags: TAINT_FIRMWARE_WORKAROUND > > Locale: LANG=C, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8), LANGUAGE not set > > Shell: /bin/sh linked to /bin/lksh > > Init: sysvinit (via /sbin/init) > > > > Versions of packages libvirt-daemon depends on: > > ii libblkid1 2.36.1-7 > > ii libc6 2.31-9 > > ii libdevmapper1.02.1 2:1.02.175-2.1 > > ii libgcc-s1 10.2.1-6 > > ii libglib2.0-0 2.66.7-1 > > ii libnetcf1 1:0.2.8-1.1 > > ii libparted2 3.4-1 > > ii libpcap0.8 1.10.0-2 > > ii libpciaccess0 0.16-1 > > ii libselinux1 3.1-3 > > ii libudev1 247.3-1 > > ii libvirt-daemon-driver-qemu 7.0.0-2 > > ii libvirt0 7.0.0-2 > > ii libxml2 2.9.10+dfsg-6.3+b1 > > > > Versions of packages libvirt-daemon recommends: > > pn libvirt-daemon-driver-lxc <none> > > pn libvirt-daemon-driver-vbox <none> > > pn libvirt-daemon-driver-xen <none> > > ii libxml2-utils 2.9.10+dfsg-6.3+b1 > > ii netcat-openbsd 1.217-3 > > ii qemu-system 1:5.2+dfsg-6 > > > > Versions of packages libvirt-daemon suggests: > > pn libvirt-daemon-driver-storage-gluster <none> > > pn libvirt-daemon-driver-storage-iscsi-direct <none> > > pn libvirt-daemon-driver-storage-rbd <none> > > pn libvirt-daemon-driver-storage-zfs <none> > > ii libvirt-daemon-system 7.0.0-2 > > pn numad <none> > From ea7d0ca37cce76e1327945c4864b996d7fd6d2e6 Mon Sep 17 00:00:00 2001 > Message-Id: > <ea7d0ca37cce76e1327945c4864b996d7fd6d2e6.1618903455.git.mpriv...@redhat.com> > From: Michal Privoznik <mpriv...@redhat.com> > Date: Fri, 16 Apr 2021 16:39:14 +0200 > Subject: [PATCH] vircgroup: Fix virCgroupKillRecursive() wrt nested > controllers > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > I've encountered the following bug, but only on Gentoo with > systemd and CGroupsV2. I've started an LXC container successfully > but destroying it reported the following error: > > error: Failed to destroy domain 'amd64' > error: internal error: failed to get cgroup backend for 'pathOfController' > > Debugging showed, that CGroup hierarchy is full of surprises: > > /sys/fs/cgroup/machine.slice/machine-lxc\x2d861\x2damd64.scope/ > └── libvirt > ├── dev-hugepages.mount > ├── dev-mqueue.mount > ├── init.scope > ├── sys-fs-fuse-connections.mount > ├── sys-kernel-config.mount > ├── sys-kernel-debug.mount > ├── sys-kernel-tracing.mount > ├── system.slice > │ ├── console-getty.service > │ ├── dbus.service > │ ├── system-getty.slice > │ ├── system-modprobe.slice > │ ├── systemd-journald.service > │ ├── systemd-logind.service > │ └── tmp.mount > └── user.slice > > For comparison, here's the same container on recent Rawhide: > > /sys/fs/cgroup/machine.slice/machine-lxc\x2d13550\x2damd64.scope/ > └── libvirt > > Anyway, those nested directories should not be a problem, because > virCgroupKillRecursiveInternal() removes them recursively, right? > Sort of. The function really does remove nested directories, but > it assumes that every directory has the same controller as the > rest. Just take a look at virCgroupV2KillRecursive() - it gets > 'Any' controller (the first one it found in ".scope") and then > passes it to virCgroupKillRecursiveInternal(). > > This assumption is not true though. The controllers found in > ".scope" are the following: > > cpuset cpu io memory pids > > while "libvirt" has fewer: > > cpuset cpu io memory > > Up until now it's not problem, because of how we order > controllers internally - "cpu" is the first and thus picking > "Any" controller returns just that. But the rest of directories > has no controllers, their "cgroup.controllers" is just empty. > > What fixes the bug is dropping @controller argument from > virCgroupKillRecursiveInternal() and letting each iteration work > pick its own controller. > > Signed-off-by: Michal Privoznik <mpriv...@redhat.com> > Reviewed-by: Pavel Hrdina <phrd...@redhat.com> > --- > src/util/vircgroup.c | 29 +++++++++++++++++++++++++---- > src/util/vircgrouppriv.h | 1 - > src/util/vircgroupv1.c | 7 +------ > src/util/vircgroupv2.c | 7 +------ > 4 files changed, 27 insertions(+), 17 deletions(-) > > Signed-by: Dio Putra <diopu...@gmail.com> > --- > --- libvirt-7.0.0.orig/src/util/vircgroup.c > +++ libvirt-7.0.0/src/util/vircgroup.c > @@ -1380,6 +1380,24 @@ > } > > > +static int > +virCgroupGetAnyController(virCgroup *cgroup) > +{ > + size_t i; > + > + for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { > + if (!cgroup->backends[i]) > + continue; > + > + return cgroup->backends[i]->getAnyController(cgroup); > + } > + > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Unable to get any controller")); > + return -1; > +} > + > + > int > virCgroupPathOfController(virCgroupPtr group, > unsigned int controller, > @@ -2548,18 +2566,21 @@ > virCgroupKillRecursiveInternal(virCgroupPtr group, > int signum, > GHashTable *pids, > - int controller, > const char *taskFile, > bool dormdir) > { > int rc; > + int controller; > bool killedAny = false; > g_autofree char *keypath = NULL; > g_autoptr(DIR) dp = NULL; > struct dirent *ent; > int direrr; > - VIR_DEBUG("group=%p signum=%d pids=%p", > - group, signum, pids); > + VIR_DEBUG("group=%p signum=%d pids=%p taskFile=%s dormdir=%d", > + group, signum, pids, taskFile, dormdir); > + > + if ((controller = virCgroupGetAnyController(group)) < 0) > + return -1; > > if (virCgroupPathOfController(group, controller, "", &keypath) < 0) > return -1; > @@ -2593,7 +2614,7 @@ > return -1; > > if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, > - controller, taskFile, > true)) < 0) > + taskFile, true)) < 0) > return -1; > if (rc == 1) > killedAny = true; > --- libvirt-7.0.0.orig/src/util/vircgrouppriv.h > +++ libvirt-7.0.0/src/util/vircgrouppriv.h > @@ -128,6 +128,5 @@ int virCgroupRemoveRecursively(char *grp > int virCgroupKillRecursiveInternal(virCgroupPtr group, > int signum, > GHashTable *pids, > - int controller, > const char *taskFile, > bool dormdir); > --- libvirt-7.0.0.orig/src/util/vircgroupv1.c > +++ libvirt-7.0.0/src/util/vircgroupv1.c > @@ -771,12 +771,7 @@ virCgroupV1KillRecursive(virCgroupPtr gr > int signum, > GHashTable *pids) > { > - int controller = virCgroupV1GetAnyController(group); > - > - if (controller < 0) > - return -1; > - > - return virCgroupKillRecursiveInternal(group, signum, pids, controller, > + return virCgroupKillRecursiveInternal(group, signum, pids, > "tasks", false); > } > > --- libvirt-7.0.0.orig/src/util/vircgroupv2.c > +++ libvirt-7.0.0/src/util/vircgroupv2.c > @@ -543,12 +543,7 @@ virCgroupV2KillRecursive(virCgroupPtr gr > int signum, > GHashTable *pids) > { > - int controller = virCgroupV2GetAnyController(group); > - > - if (controller < 0) > - return -1; > - > - return virCgroupKillRecursiveInternal(group, signum, pids, controller, > + return virCgroupKillRecursiveInternal(group, signum, pids, > "cgroup.threads", false); > } >