On 13.03.25 07:51, Choi, Anderson wrote:
We are observing an incorrect or unexpected behavior with ARINC653 scheduler 
when we set up multiple ARINC653 CPU pools and assign a different number of 
domains to each CPU pool.

...

It seems there's a corner condition in using the global variables "sched_index" and 
"next_switch_time" when multiple ARINC653 cpupools are running on different physical CPUs

The variables sched_index and next_switch_time are defined as static at 
xen/common/sched/arinc653.c as shown below.

static void cf_check
a653sched_do_schedule(
     const struct scheduler *ops,
     struct sched_unit *prev,
     s_time_t now,
     bool tasklet_work_scheduled)
{
     struct sched_unit *new_task = NULL;
     static unsigned int sched_index = 0;    <==
     static s_time_t next_switch_time;       <==

Thanks for the report!

Could you please verify the attached patch is fixing your problem?

And please tell me whether adding you as "Reported-by:" is fine with you!


Juergen
From 2766aeaf3ccbe9fec2c3faa6fde314cda891f34c Mon Sep 17 00:00:00 2001
From: Juergen Gross <[email protected]>
Date: Thu, 13 Mar 2025 08:12:00 +0100
Subject: [PATCH] xen/sched: fix arinc653 to not use variables across cpupools

a653sched_do_schedule() is using two function local static variables,
which is resulting in bad behavior when using more than one cpupool
with the arinc653 scheduler.

Fix that by moving those variables to the scheduler private data.

Fixes: 22787f2e107c ("ARINC 653 scheduler")
Reported-by: Choi, Anderson <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
 xen/common/sched/arinc653.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/xen/common/sched/arinc653.c b/xen/common/sched/arinc653.c
index a82c0d7314..9ebae6d7ae 100644
--- a/xen/common/sched/arinc653.c
+++ b/xen/common/sched/arinc653.c
@@ -143,6 +143,12 @@ typedef struct a653sched_priv_s
      * pointers to all Xen UNIT structures for iterating through
      */
     struct list_head unit_list;
+
+    /**
+     * scheduling house keeping variables
+     */
+    unsigned int sched_index;
+    s_time_t next_switch_time;
 } a653sched_priv_t;
 
 /**************************************************************************
@@ -513,8 +519,6 @@ a653sched_do_schedule(
     bool tasklet_work_scheduled)
 {
     struct sched_unit *new_task = NULL;
-    static unsigned int sched_index = 0;
-    static s_time_t next_switch_time;
     a653sched_priv_t *sched_priv = SCHED_PRIV(ops);
     const unsigned int cpu = sched_get_resource_cpu(smp_processor_id());
     unsigned long flags;
@@ -528,18 +532,19 @@ a653sched_do_schedule(
         /* time to enter a new major frame
          * the first time this function is called, this will be true */
         /* start with the first domain in the schedule */
-        sched_index = 0;
+        sched_priv->sched_index = 0;
         sched_priv->next_major_frame = now + sched_priv->major_frame;
-        next_switch_time = now + sched_priv->schedule[0].runtime;
+        sched_priv->next_switch_time = now + sched_priv->schedule[0].runtime;
     }
     else
     {
-        while ( (now >= next_switch_time)
-                && (sched_index < sched_priv->num_schedule_entries) )
+        while ( (now >= sched_priv->next_switch_time) &&
+                (sched_priv->sched_index < sched_priv->num_schedule_entries) )
         {
             /* time to switch to the next domain in this major frame */
-            sched_index++;
-            next_switch_time += sched_priv->schedule[sched_index].runtime;
+            sched_priv->sched_index++;
+            sched_priv->next_switch_time +=
+                sched_priv->schedule[sched_priv->sched_index].runtime;
         }
     }
 
@@ -547,8 +552,8 @@ a653sched_do_schedule(
      * If we exhausted the domains in the schedule and still have time left
      * in the major frame then switch next at the next major frame.
      */
-    if ( sched_index >= sched_priv->num_schedule_entries )
-        next_switch_time = sched_priv->next_major_frame;
+    if ( sched_priv->sched_index >= sched_priv->num_schedule_entries )
+        sched_priv->next_switch_time = sched_priv->next_major_frame;
 
     /*
      * If there are more domains to run in the current major frame, set
@@ -556,8 +561,8 @@ a653sched_do_schedule(
      * Otherwise, set new_task equal to the address of the idle task's
      * sched_unit structure.
      */
-    new_task = (sched_index < sched_priv->num_schedule_entries)
-        ? sched_priv->schedule[sched_index].unit
+    new_task = (sched_priv->sched_index < sched_priv->num_schedule_entries)
+        ? sched_priv->schedule[sched_priv->sched_index].unit
         : IDLETASK(cpu);
 
     /* Check to see if the new task can be run (awake & runnable). */
@@ -589,7 +594,7 @@ a653sched_do_schedule(
      * Return the amount of time the next domain has to run and the address
      * of the selected task's UNIT structure.
      */
-    prev->next_time = next_switch_time - now;
+    prev->next_time = sched_priv->next_switch_time - now;
     prev->next_task = new_task;
     new_task->migrated = false;
 
-- 
2.43.0

Attachment: OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key

Attachment: OpenPGP_signature.asc
Description: OpenPGP digital signature

Reply via email to