On Tue, 2019-08-13 at 14:14 -0700, Stefano Stabellini wrote: > On Tue, 13 Aug 2019, Dario Faggioli wrote: > > > > I am attaching an updated debug patch, with an additional printk > > when > > we reach the point, within the null scheduler, when the vcpu would > > wake > > up (to check whether the problem is that we never reach that point, > > or > > something else). > > See attached. > Ok, so we're not missing an "online call" nor a wakeup.
As Julien has identified, we seem to be stuck in a loop. Now, while staring at the code of that loop, I've seen that pick_cpu() may mess up with the scratch cpumask for the CPU, which I don't think is a good thing. So, can you also try this third debug-patch? Thanks and Regards -- Dario Faggioli, Ph.D http://about.me/dario.faggioli Virtualization Software Engineer SUSE Labs, SUSE https://www.suse.com/ ------------------------------------------------------------------- <<This happens because _I_ choose it to happen!>> (Raistlin Majere)
diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index 26c6f0f129..f90b146209 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -455,6 +455,7 @@ static void null_vcpu_insert(const struct scheduler *ops, struct vcpu *v)
if ( unlikely(!is_vcpu_online(v)) )
{
+ dprintk(XENLOG_G_INFO, "Not inserting %pv (not online!)\n", v);
vcpu_schedule_unlock_irq(lock, v);
return;
}
@@ -516,6 +517,7 @@ static void null_vcpu_remove(const struct scheduler *ops, struct vcpu *v)
/* If offline, the vcpu shouldn't be assigned, nor in the waitqueue */
if ( unlikely(!is_vcpu_online(v)) )
{
+ dprintk(XENLOG_G_INFO, "Not removing %pv (wasn't online!)\n", v);
ASSERT(per_cpu(npc, v->processor).vcpu != v);
ASSERT(list_empty(&nvc->waitq_elem));
goto out;
@@ -571,14 +573,17 @@ static void null_vcpu_wake(const struct scheduler *ops, struct vcpu *v)
*/
if ( unlikely(per_cpu(npc, cpu).vcpu != v && list_empty(&nvc->waitq_elem)) )
{
+ cpumask_t mask;
+
+ dprintk(XENLOG_G_INFO, "%pv is waking up after having been offline\n", v);
spin_lock(&prv->waitq_lock);
list_add_tail(&nvc->waitq_elem, &prv->waitq);
spin_unlock(&prv->waitq_lock);
- cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+ cpumask_and(&mask, v->cpu_hard_affinity,
cpupool_domain_cpumask(v->domain));
- if ( !cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
+ if ( !cpumask_intersects(&prv->cpus_free, &mask) )
{
dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n",
v->domain->domain_id, v->vcpu_id);
@@ -595,7 +600,7 @@ static void null_vcpu_wake(const struct scheduler *ops, struct vcpu *v)
* - if we're racing already, and if there still are free cpus, try
* again.
*/
- while ( cpumask_intersects(&prv->cpus_free, cpumask_scratch_cpu(cpu)) )
+ while ( cpumask_intersects(&prv->cpus_free, &mask) )
{
unsigned int new_cpu = pick_cpu(prv, v);
@@ -635,6 +640,8 @@ static void null_vcpu_sleep(const struct scheduler *ops, struct vcpu *v)
}
else if ( per_cpu(npc, cpu).vcpu == v )
tickled = vcpu_deassign(prv, v);
+
+ dprintk(XENLOG_G_INFO, "%pv is, apparently, going offline (tickled=%d)\n", v, tickled);
}
/* If v is not assigned to a pCPU, or is not running, no need to bother */
@@ -697,6 +704,8 @@ static void null_vcpu_migrate(const struct scheduler *ops, struct vcpu *v,
*/
if ( unlikely(!is_vcpu_online(v)) )
{
+ dprintk(XENLOG_G_INFO, "%pv is, apparently, going offline\n", v);
+
spin_lock(&prv->waitq_lock);
list_del_init(&nvc->waitq_elem);
spin_unlock(&prv->waitq_lock);
signature.asc
Description: This is a digitally signed message part
_______________________________________________ Xen-devel mailing list [email protected] https://lists.xenproject.org/mailman/listinfo/xen-devel
