On Wed, 2019-08-14 at 19:35 +0200, Dario Faggioli wrote: > On Wed, 2019-08-14 at 09:27 -0700, Stefano Stabellini wrote: > > On Wed, 14 Aug 2019, Dario Faggioli wrote: > > > On Tue, 2019-08-13 at 14:14 -0700, Stefano Stabellini wrote: > > > Now, while staring at the code of that loop, I've seen that > > > pick_cpu() > > > may mess up with the scratch cpumask for the CPU, which I don't > > > think > > > is a good thing. > > > > > > So, can you also try this third debug-patch? > > > > Yep, see attached > > > Ok, thanks again. So, cpumask_scratch() being mishandled was part of > the problem, but not the root-cause. > > Well, it was worth a shot. :-P > > I think we need to get rid of the loop in which we're stuck. > Hey, Stefano, Julien,
Here's another patch. Rather than a debug patch, this is rather an actual "proposed solution". Can you give it a go? If it works, I'll spin it as a proper patch. Thanks! -- Dario Faggioli, Ph.D http://about.me/dario.faggioli Virtualization Software Engineer SUSE Labs, SUSE https://www.suse.com/ ------------------------------------------------------------------- <<This happens because _I_ choose it to happen!>> (Raistlin Majere)
diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index 26c6f0f129..4fc6f3a3c5 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -565,50 +565,52 @@ static void null_vcpu_wake(const struct scheduler *ops, struct vcpu *v)
else
SCHED_STAT_CRANK(vcpu_wake_not_runnable);
+ if ( likely(per_cpu(npc, cpu).vcpu == v) )
+ {
+ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+ return;
+ }
+
/*
* If a vcpu is neither on a pCPU nor in the waitqueue, it means it was
- * offline, and that it is now coming back being online.
+ * offline, and that it is now coming back being online. If we're lucky,
+ * and v->processor is free (and affinities match), we can just assign
+ * the vcpu to it (we own the proper lock already) and be done.
*/
- if ( unlikely(per_cpu(npc, cpu).vcpu != v && list_empty(&nvc->waitq_elem)) )
+ if ( per_cpu(npc, cpu).vcpu == NULL &&
+ vcpu_check_affinity(v, cpu, BALANCE_HARD_AFFINITY) )
{
- spin_lock(&prv->waitq_lock);
- list_add_tail(&nvc->waitq_elem, &prv->waitq);
- spin_unlock(&prv->waitq_lock);
-
- cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
- cpupool_domain_cpumask(v->domain));
-
- if ( !cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
+ if ( !has_soft_affinity(v) ||
+ vcpu_check_affinity(v, cpu, BALANCE_SOFT_AFFINITY) )
{
- dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n",
- v->domain->domain_id, v->vcpu_id);
+ vcpu_assign(prv, v, cpu);
+ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
return;
}
+ }
- /*
- * Now we would want to assign the vcpu to cpu, but we can't, because
- * we don't have the lock. So, let's do the following:
- * - try to remove cpu from the list of free cpus, to avoid races with
- * other onlining, inserting or migrating operations;
- * - tickle the cpu, which will pickup work from the waitqueue, and
- * assign it to itself;
- * - if we're racing already, and if there still are free cpus, try
- * again.
- */
- while ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
- {
- unsigned int new_cpu = pick_cpu(prv, v);
+ /*
+ * If v->processor is not free (or affinities do not match) we need
+ * to assign v to some other CPU, but we can't do it here, as:
+ * - we don't own the proper lock,
+ * - we can't change v->processor under vcpu_wake()'s feet.
+ * So we add it to the waitqueue, and tickle all the free CPUs (if any)
+ * on which v can run. The first one that schedules will pick it up.
+ */
+ spin_lock(&prv->waitq_lock);
+ list_add_tail(&nvc->waitq_elem, &prv->waitq);
+ spin_unlock(&prv->waitq_lock);
- if ( test_and_clear_bit(new_cpu, &prv->cpus_free) )
- {
- cpu_raise_softirq(new_cpu, SCHEDULE_SOFTIRQ);
- return;
- }
- }
- }
+ cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+ cpupool_domain_cpumask(v->domain));
+ cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+ &prv->cpus_free);
- /* Note that we get here only for vCPUs assigned to a pCPU */
- cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
+ if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
+ dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n",
+ v->domain->domain_id, v->vcpu_id);
+ else
+ cpumask_raise_softirq(cpumask_scratch_cpu(cpu), SCHEDULE_SOFTIRQ);
}
static void null_vcpu_sleep(const struct scheduler *ops, struct vcpu *v)
@@ -822,6 +824,8 @@ static struct task_slice null_schedule(const struct scheduler *ops,
*/
if ( unlikely(ret.task == NULL) )
{
+ bool vcpu_found;
+
spin_lock(&prv->waitq_lock);
if ( list_empty(&prv->waitq) )
@@ -834,6 +838,7 @@ static struct task_slice null_schedule(const struct scheduler *ops,
* it only in cases where a pcpu has no vcpu associated (e.g., as
* said above, the cpu has just joined a cpupool).
*/
+ vcpu_found = false;
for_each_affinity_balance_step( bs )
{
list_for_each_entry( wvc, &prv->waitq, waitq_elem )
@@ -844,13 +849,44 @@ static struct task_slice null_schedule(const struct scheduler *ops,
if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
{
- vcpu_assign(prv, wvc->vcpu, cpu);
- list_del_init(&wvc->waitq_elem);
- ret.task = wvc->vcpu;
- goto unlock;
+ spinlock_t *lock;
+
+ vcpu_found = true;
+
+ /*
+ * If the vcpu in the waitqueue has just come up online,
+ * we risk racing with vcpu_wake(). To avoid this, sync
+ * on the spinlock that vcpu_wake() holds, while waking up
+ * this vcpu (but only with trylock, or we may deadlock).
+ */
+ lock = pcpu_schedule_trylock(wvc->vcpu->processor);
+
+ /*
+ * We know the vcpu's lock is not this cpu's lock. In
+ * fact, if it were, since this cpu is free, vcpu_wake()
+ * would have assigned the vcpu to this cpu directly.
+ */
+ ASSERT(lock != per_cpu(schedule_data, cpu).schedule_lock);
+
+ if ( lock ) {
+ vcpu_assign(prv, wvc->vcpu, cpu);
+ list_del_init(&wvc->waitq_elem);
+ ret.task = wvc->vcpu;
+ spin_unlock(lock);
+ goto unlock;
+ }
}
}
}
+ /*
+ * If we did find a vcpu with suitable affinity in the waitqueue, but
+ * we could not pick it up (due to lock contention), and hence we are
+ * still free, plan for another try. In fact, we don't want such vcpu
+ * to be stuck in the waitqueue, when there are free cpus where it
+ * could run.
+ */
+ if ( unlikely( vcpu_found && ret.task == NULL && !list_empty(&prv->waitq)) )
+ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
unlock:
spin_unlock(&prv->waitq_lock);
signature.asc
Description: This is a digitally signed message part
_______________________________________________ Xen-devel mailing list [email protected] https://lists.xenproject.org/mailman/listinfo/xen-devel
