After PSBM-34089 is done, there's no need in hacks that allowed us to
attach tasks to cpuset cgroups with empty cpuset.cpus or cpuset.mems.
So let's revert them.

https://jira.sw.ru/browse/PSBM-42087

Signed-off-by: Vladimir Davydov <[email protected]>
---
 kernel/cpuset.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 17 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 81030b340dbd..123cdc5b58cf 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -268,14 +268,6 @@ static DEFINE_MUTEX(cpuset_mutex);
 static DEFINE_MUTEX(callback_mutex);
 
 /*
- * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
- * but we can't allocate it dynamically there.  Define it global and
- * allocate from cpuset_init().
- */
-static cpumask_var_t cpus_attach;
-
-
-/*
  * CPU / memory hotplug is handled asynchronously.
  */
 static struct workqueue_struct *cpuset_propagate_hotplug_wq;
@@ -491,6 +483,16 @@ static int validate_change(const struct cpuset *cur, const 
struct cpuset *trial)
                        goto out;
        }
 
+       /*
+        * Cpusets with tasks - existing or newly being attached - can't
+        * have empty cpus_allowed or mems_allowed.
+        */
+       ret = -ENOSPC;
+       if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
+           (cpumask_empty(trial->cpus_allowed) ||
+            nodes_empty(trial->mems_allowed)))
+               goto out;
+
        ret = 0;
 out:
        rcu_read_unlock();
@@ -812,7 +814,8 @@ void rebuild_sched_domains(void)
 static int cpuset_test_cpumask(struct task_struct *tsk,
                               struct cgroup_scanner *scan)
 {
-       return !cpumask_equal(&tsk->cpus_allowed, cpus_attach);
+       return !cpumask_equal(&tsk->cpus_allowed,
+                       (cgroup_cs(scan->cg))->cpus_allowed);
 }
 
 /**
@@ -829,7 +832,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
 static void cpuset_change_cpumask(struct task_struct *tsk,
                                  struct cgroup_scanner *scan)
 {
-       set_cpus_allowed_ptr(tsk, cpus_attach);
+       set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
 }
 
 /**
@@ -849,7 +852,6 @@ static void update_tasks_cpumask(struct cpuset *cs, struct 
ptr_heap *heap)
 {
        struct cgroup_scanner scan;
 
-       guarantee_online_cpus(cs, cpus_attach);
        scan.cg = cs->css.cgroup;
        scan.test_task = cpuset_test_cpumask;
        scan.process_task = cpuset_change_cpumask;
@@ -935,8 +937,10 @@ static int update_cpumask(struct cpuset *cs, const char 
*buf)
                return -ENOMEM;
 
        /*
+        * An empty cpus_allowed is ok only if the cpuset has no tasks.
         * Since cpulist_parse() fails on an empty mask, we special case
-        * that parsing.
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have cpus.
         */
        if (!*buf)
                cpumask_clear(cpus_allowed);
@@ -1059,9 +1063,9 @@ static void cpuset_change_nodemask(struct task_struct *p,
 
        migrate = is_memory_migrate(cs);
 
-       mpol_rebind_mm(mm, &newmems);
+       mpol_rebind_mm(mm, &cs->mems_allowed);
        if (migrate)
-               cpuset_migrate_mm(mm, oldmem, &newmems);
+               cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
        mmput(mm);
 }
 
@@ -1162,7 +1166,7 @@ static int __update_nodemask(struct cpuset *cs,
 
        trialcs->mems_allowed = *mems_allowed;
 
-       guarantee_online_mems(cs, oldmem);
+       *oldmem = cs->mems_allowed;
        if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
                retval = 0;             /* Too easy - nothing to do */
                goto done;
@@ -1198,8 +1202,10 @@ static int update_nodemask(struct cpuset *cs, const char 
*buf)
                return -ENOMEM;
 
        /*
+        * An empty mems_allowed is ok iff there are no tasks in the cpuset.
         * Since nodelist_parse() fails on an empty mask, we special case
-        * that parsing.
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have memory.
         */
        if (!*buf)
                nodes_clear(*mems_allowed);
@@ -1438,6 +1444,10 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct 
cgroup_taskset *tset)
 
        mutex_lock(&cpuset_mutex);
 
+       ret = -ENOSPC;
+       if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+               goto out_unlock;
+
        cgroup_taskset_for_each(task, cgrp, tset) {
                /*
                 * Kthreads which disallow setaffinity shouldn't be moved
@@ -1475,6 +1485,13 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
        mutex_unlock(&cpuset_mutex);
 }
 
+/*
+ * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
+ * but we can't allocate it dynamically there.  Define it global and
+ * allocate from cpuset_init().
+ */
+static cpumask_var_t cpus_attach;
+
 static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 {
        /* static bufs protected by cpuset_mutex */
@@ -2103,18 +2120,48 @@ int __init cpuset_init(void)
        return 0;
 }
 
+/*
+ * If CPU and/or memory hotplug handlers, below, unplug any CPUs
+ * or memory nodes, we need to walk over the cpuset hierarchy,
+ * removing that CPU or node from all cpusets.  If this removes the
+ * last CPU or node from a cpuset, then move the tasks in the empty
+ * cpuset to its next-highest non-empty parent.
+ */
+static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
+{
+       struct cpuset *parent;
+
+       /*
+        * Find its next-highest non-empty parent, (top cpuset
+        * has online cpus, so can't be empty).
+        */
+       parent = parent_cs(cs);
+       while (cpumask_empty(parent->cpus_allowed) ||
+                       nodes_empty(parent->mems_allowed))
+               parent = parent_cs(parent);
+
+       if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
+               rcu_read_lock();
+               printk(KERN_ERR "cpuset: failed to transfer tasks out of empty 
cpuset %s\n",
+                      cgroup_name(cs->css.cgroup));
+               rcu_read_unlock();
+       }
+}
+
 /**
  * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
  * @cs: cpuset in interest
  *
  * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
- * offline, update @cs accordingly.
+ * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
+ * all its tasks are moved to the nearest ancestor with both resources.
  */
 static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
 {
        static cpumask_t off_cpus;
        static nodemask_t off_mems, tmp_mems;
        struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
+       bool is_empty;
 
        mutex_lock(&cpuset_mutex);
 
@@ -2138,8 +2185,19 @@ static void cpuset_propagate_hotplug_workfn(struct 
work_struct *work)
                update_tasks_nodemask(cs, &tmp_mems, NULL);
        }
 
+       is_empty = cpumask_empty(cs->cpus_allowed) ||
+               nodes_empty(cs->mems_allowed);
+
        mutex_unlock(&cpuset_mutex);
 
+       /*
+        * If @cs became empty, move tasks to the nearest ancestor with
+        * execution resources.  This is full cgroup operation which will
+        * also call back into cpuset.  Should be done outside any lock.
+        */
+       if (is_empty)
+               remove_tasks_in_empty_cpuset(cs);
+
        /* the following may free @cs, should be the last operation */
        css_put(&cs->css);
 }
-- 
2.1.4

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to