Migration may occur when wake up a process, so we must update
the rq->nr_uninterruptible before set_task_cpu, otherwise we
will decrease the nr_interuptible of the incorrect rq. Over
time, it cause some rq accounting according to be too large,
but others are negative.

Also change the type of rq->nr_uninterruptible to atomic_t.

Signed-off-by: Cheng Jian <[email protected]>
---
 kernel/sched/core.c    | 14 +++++++++-----
 kernel/sched/debug.c   |  2 +-
 kernel/sched/loadavg.c |  2 +-
 kernel/sched/sched.h   |  2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f1..4d3bbc1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1198,7 +1198,7 @@ static inline void dequeue_task(struct rq *rq, struct 
task_struct *p, int flags)
 void activate_task(struct rq *rq, struct task_struct *p, int flags)
 {
        if (task_contributes_to_load(p))
-               rq->nr_uninterruptible--;
+               atomic_dec(&rq->nr_uninterruptible);
 
        enqueue_task(rq, p, flags);
 
@@ -1210,7 +1210,7 @@ void deactivate_task(struct rq *rq, struct task_struct 
*p, int flags)
        p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
 
        if (task_contributes_to_load(p))
-               rq->nr_uninterruptible++;
+               atomic_inc(&rq->nr_uninterruptible);
 
        dequeue_task(rq, p, flags);
 }
@@ -2135,9 +2135,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, 
int wake_flags,
        lockdep_assert_held(&rq->lock);
 
 #ifdef CONFIG_SMP
-       if (p->sched_contributes_to_load)
-               rq->nr_uninterruptible--;
-
        if (wake_flags & WF_MIGRATED)
                en_flags |= ENQUEUE_MIGRATED;
 #endif
@@ -2500,11 +2497,15 @@ try_to_wake_up(struct task_struct *p, unsigned int 
state, int wake_flags)
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
 
+       /* update the rq accounting according before set_task_cpu */
        if (p->in_iowait) {
                delayacct_blkio_end(p);
                atomic_dec(&task_rq(p)->nr_iowait);
        }
 
+       if (p->sched_contributes_to_load)
+               atomic_dec(&task_rq(p)->nr_uninterruptible);
+
        cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
        if (task_cpu(p) != cpu) {
                wake_flags |= WF_MIGRATED;
@@ -2519,6 +2520,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, 
int wake_flags)
                atomic_dec(&task_rq(p)->nr_iowait);
        }
 
+       if (p->sched_contributes_to_load)
+               atomic_dec(&task_rq(p)->nr_uninterruptible);
+
 #endif /* CONFIG_SMP */
 
        ttwu_queue(p, cpu, wake_flags);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index f7e4579..fa2c1bc 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -641,7 +641,7 @@ do {                                                        
                \
        P(nr_running);
        P(nr_switches);
        P(nr_load_updates);
-       P(nr_uninterruptible);
+       SEQ_printf(m, "  .%-30s: %d\n", "nr_uninterruptible", 
atomic_read(&rq->nr_uninterruptible));
        PN(next_balance);
        SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", 
(long)(task_pid_nr(rq->curr)));
        PN(clock);
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 28a5165..cae7643 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -81,7 +81,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust)
        long nr_active, delta = 0;
 
        nr_active = this_rq->nr_running - adjust;
-       nr_active += (long)this_rq->nr_uninterruptible;
+       nr_active += (long)atomic_read(&this_rq->nr_uninterruptible);
 
        if (nr_active != this_rq->calc_load_active) {
                delta = nr_active - this_rq->calc_load_active;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 802b1f3..8429281 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -890,7 +890,7 @@ struct rq {
         * one CPU and if it got migrated afterwards it may decrease
         * it on another CPU. Always updated under the runqueue lock:
         */
-       unsigned long           nr_uninterruptible;
+       atomic_t                nr_uninterruptible;
 
        struct task_struct      *curr;
        struct task_struct      *idle;
-- 
2.7.4

Reply via email to