On Sat, Mar 07 2026 at 23:29, Thomas Gleixner wrote:
> I'll look at it more tomorrow in the hope that this rested brain
> approach works out again.
There is another one of the same category. Combo patch below.
Thanks,
tglx
---
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10584,6 +10584,11 @@ static void mm_cid_fixup_cpus_to_tasks(s
/* Remote access to mm::mm_cid::pcpu requires rq_lock */
guard(rq_lock_irq)(rq);
+
+ /* If the transit bit is set already, nothing to do anymore. */
+ if (cid_in_transit(pcp->cid))
+ continue;
+
/* Is the CID still owned by the CPU? */
if (cid_on_cpu(pcp->cid)) {
/*
@@ -10598,12 +10603,9 @@ static void mm_cid_fixup_cpus_to_tasks(s
} else if (rq->curr->mm == mm && rq->curr->mm_cid.active) {
unsigned int cid = rq->curr->mm_cid.cid;
- /* Ensure it has the transition bit set */
- if (!cid_in_transit(cid)) {
- cid = cid_to_transit_cid(cid);
- rq->curr->mm_cid.cid = cid;
- pcp->cid = cid;
- }
+ cid = cid_to_transit_cid(cid);
+ rq->curr->mm_cid.cid = cid;
+ pcp->cid = cid;
}
}
mm_cid_complete_transit(mm, 0);
@@ -10733,11 +10735,30 @@ void sched_mm_cid_fork(struct task_struc
static bool sched_mm_cid_remove_user(struct task_struct *t)
{
t->mm_cid.active = 0;
- scoped_guard(preempt) {
- /* Clear the transition bit */
+ /*
+ * If @t is current and the CID is in transition mode, then this has to
+ * handle both the task and the per CPU storage.
+ *
+ * If the CID has TRANSIT and ONCPU set, then mm_unset_cid_on_task()
+ * won't drop the CID. As @t has already mm_cid::active cleared
+ * mm_cid_schedout() won't drop it either.
+ *
+ * A failed fork cleanup can't have the transit bit set because the task
+ * never showed up in the task list or got on a CPU.
+ */
+ if (t == current) {
+ /* Invalidate the per CPU CID */
+ this_cpu_ptr(t->mm->mm_cid.pcpu)->cid = 0;
+ /*
+ * Clear TRANSIT and ONCPU, so the CID gets actually dropped
+ * below.
+ */
t->mm_cid.cid = cid_from_transit_cid(t->mm_cid.cid);
- mm_unset_cid_on_task(t);
+ t->mm_cid.cid = cpu_cid_to_cid(t->mm_cid.cid);
}
+
+ mm_unset_cid_on_task(t);
+
t->mm->mm_cid.users--;
return mm_update_max_cids(t->mm);
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3809,7 +3809,8 @@ static __always_inline bool cid_on_task(
static __always_inline void mm_drop_cid(struct mm_struct *mm, unsigned int cid)
{
- clear_bit(cid, mm_cidmask(mm));
+ if (!WARN_ON_ONCE(cid >= num_possible_cpus()))
+ clear_bit(cid, mm_cidmask(mm));
}
static __always_inline void mm_unset_cid_on_task(struct task_struct *t)
@@ -3978,7 +3979,13 @@ static __always_inline void mm_cid_sched
return;
mode = READ_ONCE(mm->mm_cid.mode);
+
+ /*
+ * Needs to clear both TRANSIT and ONCPU to make the range comparison
+ * and mm_drop_cid() work correctly.
+ */
cid = cid_from_transit_cid(prev->mm_cid.cid);
+ cid = cpu_cid_to_cid(cid);
/*
* If transition mode is done, transfer ownership when the CID is
@@ -3994,6 +4001,11 @@ static __always_inline void mm_cid_sched
} else {
mm_drop_cid(mm, cid);
prev->mm_cid.cid = MM_CID_UNSET;
+ /*
+ * Invalidate the per CPU CID so that the next mm_cid_schedin()
+ * can't observe MM_CID_ONCPU on the per CPU CID.
+ */
+ mm_cid_update_pcpu_cid(mm, 0);
}
}