Hi, we've moved the development of systemd to github, it's best to open pull requests there. I uploaded your patches as https://github.com/systemd/systemd/pull/2902. Let's discuss things there.
Zbyszek On Fri, Mar 25, 2016 at 02:34:00PM -0400, Tejun Heo wrote: > (sorry, of course forgot to attach the patches) > (bounced for not being subscribed, resending...) > > Hello, > > Unified hierarchy is available on the 4.5 kernel but there have been > several updates. > > 1. The __DEVEL__sane_behavior flag is gone. Unified hierarchy is now > available as "cgroup2" filesystem type with its own super magic > number. > > 2. "cgroup.populated" file is replaced with "populated" field of > "cgroup.events" file. > > 3. A zombie task remains associated with the cgroup it was associated > with at the time of death instead of being moved immediately to > root. This means that pid to unit lookup may return a slice if the > session or service unit the pid belonged to is already gone. > > Three patches are attached addressing each of the above. > > Thanks! > > -- > tejun > From 278a39f0a8fa34cd899c6a08e76626c987a4713e Mon Sep 17 00:00:00 2001 > From: Tejun Heo <[email protected]> > Date: Fri, 25 Mar 2016 11:38:50 -0400 > Subject: [PATCH 1/3] core: update unified hierarchy support > > Unified hierarchy is official as of Linux v4.5 and now available through a new > filesystem type, cgroup2, with its own super magic. Update mount logic > accordingly. > > Signed-off-by: Tejun Heo <[email protected]> > --- > src/basic/cgroup-util.c | 2 +- > src/basic/missing.h | 4 ++++ > src/core/mount-setup.c | 2 +- > 3 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c > index 56c1fca..5124b5b 100644 > --- a/src/basic/cgroup-util.c > +++ b/src/basic/cgroup-util.c > @@ -2129,7 +2129,7 @@ int cg_unified(void) { > if (statfs("/sys/fs/cgroup/", &fs) < 0) > return -errno; > > - if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) > + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) > unified_cache = true; > else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) > unified_cache = false; > diff --git a/src/basic/missing.h b/src/basic/missing.h > index 034e334..66cd592 100644 > --- a/src/basic/missing.h > +++ b/src/basic/missing.h > @@ -437,6 +437,10 @@ struct btrfs_ioctl_quota_ctl_args { > #define CGROUP_SUPER_MAGIC 0x27e0eb > #endif > > +#ifndef CGROUP2_SUPER_MAGIC > +#define CGROUP2_SUPER_MAGIC 0x63677270 > +#endif > + > #ifndef TMPFS_MAGIC > #define TMPFS_MAGIC 0x01021994 > #endif > diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c > index de1a361..32fe51c 100644 > --- a/src/core/mount-setup.c > +++ b/src/core/mount-setup.c > @@ -94,7 +94,7 @@ static const MountPoint mount_table[] = { > #endif > { "tmpfs", "/run", "tmpfs", > "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, > NULL, MNT_FATAL|MNT_IN_CONTAINER }, > - { "cgroup", "/sys/fs/cgroup", "cgroup", > "__DEVEL__sane_behavior", MS_NOSUID|MS_NOEXEC|MS_NODEV, > + { "cgroup", "/sys/fs/cgroup", "cgroup2", NULL, > MS_NOSUID|MS_NOEXEC|MS_NODEV, > cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, > { "tmpfs", "/sys/fs/cgroup", "tmpfs", > "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, > cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, > -- > 2.5.5 > > From 0fed0c3cdebe72557db528572ed2c531e32e7d5a Mon Sep 17 00:00:00 2001 > From: Tejun Heo <[email protected]> > Date: Fri, 25 Mar 2016 11:38:50 -0400 > Subject: [PATCH 2/3] core: update populated event handling in unified > hierarchy > > Earlier during the development of unified hierarchy, the populated event was > reported through by the dedicated "cgroup.populated" file; however, the > interface was updated so that it's reported through the "populated" field of > "cgroup.events" file. Update populated event handling logic accordingly. > > Signed-off-by: Tejun Heo <[email protected]> > --- > src/basic/cgroup-util.c | 45 ++++++++++++++++++++++++++++++++++++--------- > src/basic/cgroup-util.h | 2 ++ > src/core/cgroup.c | 6 +++--- > src/nspawn/nspawn-cgroup.c | 3 +-- > 4 files changed, 42 insertions(+), 14 deletions(-) > > diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c > index 5124b5b..5043180 100644 > --- a/src/basic/cgroup-util.c > +++ b/src/basic/cgroup-util.c > @@ -101,6 +101,39 @@ int cg_read_pid(FILE *f, pid_t *_pid) { > return 1; > } > > +int cg_read_event(const char *controller, const char *path, const char > *event, > + char **val) > +{ > + _cleanup_free_ char *events = NULL, *content = NULL; > + char *p, *line; > + int r; > + > + r = cg_get_path(controller, path, "cgroup.events", &events); > + if (r < 0) > + return r; > + > + r = read_full_file(events, &content, NULL); > + if (r < 0) > + return r; > + > + p = content; > + while ((line = strsep(&p, "\n"))) { > + char *key; > + > + key = strsep(&line, " "); > + if (!key || !line) > + return -EINVAL; > + > + if (strcmp(key, event)) > + continue; > + > + *val = strdup(line); > + return 0; > + } > + > + return -ENOENT; > +} > + > int cg_enumerate_subgroups(const char *controller, const char *path, DIR > **_d) { > _cleanup_free_ char *fs = NULL; > int r; > @@ -1007,18 +1040,12 @@ int cg_is_empty_recursive(const char *controller, > const char *path) { > return unified; > > if (unified > 0) { > - _cleanup_free_ char *populated = NULL, *t = NULL; > + _cleanup_free_ char *t = NULL; > > /* On the unified hierarchy we can check empty state > - * via the "cgroup.populated" attribute. */ > + * via the "populated" attribute of "cgroup.events". */ > > - r = cg_get_path(controller, path, "cgroup.populated", > &populated); > - if (r < 0) > - return r; > - > - r = read_one_line_file(populated, &t); > - if (r == -ENOENT) > - return 1; > + r = cg_read_event(controller, path, "populated", &t); > if (r < 0) > return r; > > diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h > index ad1edd9..4254e51 100644 > --- a/src/basic/cgroup-util.h > +++ b/src/basic/cgroup-util.h > @@ -96,6 +96,8 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { > > int cg_enumerate_processes(const char *controller, const char *path, FILE > **_f); > int cg_read_pid(FILE *f, pid_t *_pid); > +int cg_read_event(const char *controller, const char *path, const char > *event, > + char **val); > > int cg_enumerate_subgroups(const char *controller, const char *path, DIR > **_d); > int cg_read_subgroup(DIR *d, char **fn); > diff --git a/src/core/cgroup.c b/src/core/cgroup.c > index 39235a9..9c34928 100644 > --- a/src/core/cgroup.c > +++ b/src/core/cgroup.c > @@ -765,7 +765,7 @@ int unit_set_cgroup_path(Unit *u, const char *path) { > } > > int unit_watch_cgroup(Unit *u) { > - _cleanup_free_ char *populated = NULL; > + _cleanup_free_ char *events = NULL; > int r; > > assert(u); > @@ -791,11 +791,11 @@ int unit_watch_cgroup(Unit *u) { > if (r < 0) > return log_oom(); > > - r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, > "cgroup.populated", &populated); > + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, > "cgroup.events", &events); > if (r < 0) > return log_oom(); > > - u->cgroup_inotify_wd = > inotify_add_watch(u->manager->cgroup_inotify_fd, populated, IN_MODIFY); > + u->cgroup_inotify_wd = > inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY); > if (u->cgroup_inotify_wd < 0) { > > if (errno == ENOENT) /* If the directory is already > diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c > index 9f9a475..a63c30e 100644 > --- a/src/nspawn/nspawn-cgroup.c > +++ b/src/nspawn/nspawn-cgroup.c > @@ -55,8 +55,7 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) { > "cgroup.events", > "cgroup.clone_children", > "cgroup.controllers", > - "cgroup.subtree_control", > - "cgroup.populated") > + "cgroup.subtree_control") > if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0) > log_full_errno(errno == ENOENT ? LOG_DEBUG : > LOG_WARNING, errno, > "Failed to chown() cgroup file %s, > ignoring: %m", fn); > -- > 2.5.5 > > From 80d7d0ad42ad910acd06517f3f3862eeb1c9dc59 Mon Sep 17 00:00:00 2001 > From: Tejun Heo <[email protected]> > Date: Fri, 25 Mar 2016 11:38:50 -0400 > Subject: [PATCH 3/3] core: update invoke_sigchld_event() to handle NULL > ->sigchld_event() > > After receiving SIGCHLD, one of the ways manager_dispatch_sigchld() maps the > now zombie $PID to its unit is through manager_get_unit_by_pid_cgroup() which > reads /proc/$PID/cgroup and looks up the unit associated with the cgroup path. > > On non-unified cgroup hierarchies, a process is immediately migrated to the > root cgroup on death and the cgroup lookup would always have returned the unit > associated with it, making it rather pointless but safe. On unified > hierarchy, > a zombie remains associated with the cgroup that it was associated with at the > time of death and thus manager_get_unit_by_pid_cgroup() will look up the unit > properly. > > However, by the time manager_dispatch_sigchld() is running, the original > cgroup > may have become empty and it and its associated unit might already have been > removed. If the cgroup path doesn't yield a match, manager_dispatch_sigchld() > keeps pruning the leaf component. This means that the function may return a > slice unit for a pid and as a slice doesn't have ->sigchld_event() handler, > calling invoke_sigchld_event() on it causes a segfault. > > This patch updates invoke_sigchld_event() so that it skips calling if the > handler is not set. > > Signed-off-by: Tejun Heo <[email protected]> > --- > src/core/manager.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/core/manager.c b/src/core/manager.c > index f13e933..78f2a21 100644 > --- a/src/core/manager.c > +++ b/src/core/manager.c > @@ -1631,7 +1631,9 @@ static void invoke_sigchld_event(Manager *m, Unit *u, > const siginfo_t *si) { > log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, > u->id); > > unit_unwatch_pid(u, si->si_pid); > - UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, > si->si_status); > + > + if (UNIT_VTABLE(u)->sigchld_event) > + UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, > si->si_status); > } > > static int manager_dispatch_sigchld(Manager *m) { > -- > 2.5.5 > > _______________________________________________ > systemd-devel mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/systemd-devel _______________________________________________ systemd-devel mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/systemd-devel
