After PSBM-34089 is done, there's no need in hacks that allowed us to attach tasks to cpuset cgroups with empty cpuset.cpus or cpuset.mems. So let's revert them.
https://jira.sw.ru/browse/PSBM-42087 Signed-off-by: Vladimir Davydov <[email protected]> --- kernel/cpuset.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 17 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 81030b340dbd..123cdc5b58cf 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -268,14 +268,6 @@ static DEFINE_MUTEX(cpuset_mutex); static DEFINE_MUTEX(callback_mutex); /* - * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach() - * but we can't allocate it dynamically there. Define it global and - * allocate from cpuset_init(). - */ -static cpumask_var_t cpus_attach; - - -/* * CPU / memory hotplug is handled asynchronously. */ static struct workqueue_struct *cpuset_propagate_hotplug_wq; @@ -491,6 +483,16 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) goto out; } + /* + * Cpusets with tasks - existing or newly being attached - can't + * have empty cpus_allowed or mems_allowed. + */ + ret = -ENOSPC; + if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) && + (cpumask_empty(trial->cpus_allowed) || + nodes_empty(trial->mems_allowed))) + goto out; + ret = 0; out: rcu_read_unlock(); @@ -812,7 +814,8 @@ void rebuild_sched_domains(void) static int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) { - return !cpumask_equal(&tsk->cpus_allowed, cpus_attach); + return !cpumask_equal(&tsk->cpus_allowed, + (cgroup_cs(scan->cg))->cpus_allowed); } /** @@ -829,7 +832,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk, static void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) { - set_cpus_allowed_ptr(tsk, cpus_attach); + set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed)); } /** @@ -849,7 +852,6 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) { struct cgroup_scanner scan; - guarantee_online_cpus(cs, cpus_attach); scan.cg = cs->css.cgroup; scan.test_task = cpuset_test_cpumask; scan.process_task = cpuset_change_cpumask; @@ -935,8 +937,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf) return -ENOMEM; /* + * An empty cpus_allowed is ok only if the cpuset has no tasks. * Since cpulist_parse() fails on an empty mask, we special case - * that parsing. + * that parsing. The validate_change() call ensures that cpusets + * with tasks have cpus. */ if (!*buf) cpumask_clear(cpus_allowed); @@ -1059,9 +1063,9 @@ static void cpuset_change_nodemask(struct task_struct *p, migrate = is_memory_migrate(cs); - mpol_rebind_mm(mm, &newmems); + mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) - cpuset_migrate_mm(mm, oldmem, &newmems); + cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); mmput(mm); } @@ -1162,7 +1166,7 @@ static int __update_nodemask(struct cpuset *cs, trialcs->mems_allowed = *mems_allowed; - guarantee_online_mems(cs, oldmem); + *oldmem = cs->mems_allowed; if (nodes_equal(*oldmem, trialcs->mems_allowed)) { retval = 0; /* Too easy - nothing to do */ goto done; @@ -1198,8 +1202,10 @@ static int update_nodemask(struct cpuset *cs, const char *buf) return -ENOMEM; /* + * An empty mems_allowed is ok iff there are no tasks in the cpuset. * Since nodelist_parse() fails on an empty mask, we special case - * that parsing. + * that parsing. The validate_change() call ensures that cpusets + * with tasks have memory. */ if (!*buf) nodes_clear(*mems_allowed); @@ -1438,6 +1444,10 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) mutex_lock(&cpuset_mutex); + ret = -ENOSPC; + if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) + goto out_unlock; + cgroup_taskset_for_each(task, cgrp, tset) { /* * Kthreads which disallow setaffinity shouldn't be moved @@ -1475,6 +1485,13 @@ static void cpuset_cancel_attach(struct cgroup *cgrp, mutex_unlock(&cpuset_mutex); } +/* + * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach() + * but we can't allocate it dynamically there. Define it global and + * allocate from cpuset_init(). + */ +static cpumask_var_t cpus_attach; + static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { /* static bufs protected by cpuset_mutex */ @@ -2103,18 +2120,48 @@ int __init cpuset_init(void) return 0; } +/* + * If CPU and/or memory hotplug handlers, below, unplug any CPUs + * or memory nodes, we need to walk over the cpuset hierarchy, + * removing that CPU or node from all cpusets. If this removes the + * last CPU or node from a cpuset, then move the tasks in the empty + * cpuset to its next-highest non-empty parent. + */ +static void remove_tasks_in_empty_cpuset(struct cpuset *cs) +{ + struct cpuset *parent; + + /* + * Find its next-highest non-empty parent, (top cpuset + * has online cpus, so can't be empty). + */ + parent = parent_cs(cs); + while (cpumask_empty(parent->cpus_allowed) || + nodes_empty(parent->mems_allowed)) + parent = parent_cs(parent); + + if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { + rcu_read_lock(); + printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n", + cgroup_name(cs->css.cgroup)); + rcu_read_unlock(); + } +} + /** * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset * @cs: cpuset in interest * * Compare @cs's cpu and mem masks against top_cpuset and if some have gone - * offline, update @cs accordingly. + * offline, update @cs accordingly. If @cs ends up with no CPU or memory, + * all its tasks are moved to the nearest ancestor with both resources. */ static void cpuset_propagate_hotplug_workfn(struct work_struct *work) { static cpumask_t off_cpus; static nodemask_t off_mems, tmp_mems; struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); + bool is_empty; mutex_lock(&cpuset_mutex); @@ -2138,8 +2185,19 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work) update_tasks_nodemask(cs, &tmp_mems, NULL); } + is_empty = cpumask_empty(cs->cpus_allowed) || + nodes_empty(cs->mems_allowed); + mutex_unlock(&cpuset_mutex); + /* + * If @cs became empty, move tasks to the nearest ancestor with + * execution resources. This is full cgroup operation which will + * also call back into cpuset. Should be done outside any lock. + */ + if (is_empty) + remove_tasks_in_empty_cpuset(cs); + /* the following may free @cs, should be the last operation */ css_put(&cs->css); } -- 2.1.4 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
