A CDAN wakes the worker through the portal's VFIO eventfd, which is
signalled regardless of the core that took the MSI, so the wake is
correct either way. By default the MSI affinity is spread across the
housekeeping cores, so the wake crosses cores and adds a little latency.

Pin each DPIO's MSI to the lcore that arms it (as the event driver does)
so the wake lands on the worker's own core. This is a latency
optimisation, not a correctness fix.

Split dpaa2_affine_dpio_intr_to_respective_core out of the RTE_EVENT_DPAA2
guard, expose dpaa2_dpio_affine_intr_to_core, and call it from the rx-queue
interrupt arm path. The pin is best-effort: it writes
/proc/irq/<n>/smp_affinity (needs privilege) and irqbalance may move it
back.

Signed-off-by: Maxime Leroy <[email protected]>
---
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c | 45 +++++++++++++-----------
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.h |  4 +++
 drivers/net/dpaa2/dpaa2_ethdev.c         |  2 ++
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c 
b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 354d81b4d1..057f2bb70d 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -149,7 +149,6 @@ dpaa2_core_cluster_sdest(int cpu_id)
        return dpaa2_core_cluster_base + x;
 }
 
-#ifdef RTE_EVENT_DPAA2
 static void
 dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
 {
@@ -204,7 +203,19 @@ dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, 
int cpu_id)
 
        fclose(file);
 }
-#endif /* RTE_EVENT_DPAA2 */
+
+/* Pin the dpio's MSI IRQ to the calling lcore's core (best-effort), so a
+ * worker sleeping on this portal's eventfd is woken on its own core.
+ */
+RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_dpio_affine_intr_to_core)
+void
+dpaa2_dpio_affine_intr_to_core(int32_t dpio_id)
+{
+       int cpu_id = dpaa2_get_core_id();
+
+       if (cpu_id >= 0)
+               dpaa2_affine_dpio_intr_to_respective_core(dpio_id, cpu_id);
+}
 
 /* arm the portal DQRI (threshold/timeout); idempotent, first caller per 
portal wins */
 RTE_EXPORT_INTERNAL_SYMBOL(dpaa2_dpio_intr_init)
@@ -299,31 +310,25 @@ dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, 
int cpu_id, bool ethrx
        }
 
 #ifdef RTE_EVENT_DPAA2
-       {
-               /* ethrx portal: immediate DQRI (1, 0); event portal: coalesced 
(3, 0xFF).
-                * Each mode is tunable through its own env vars.
-                */
-               const char *thr_env = "DPAA2_PORTAL_INTR_THRESHOLD";
-               const char *to_env = "DPAA2_PORTAL_INTR_TIMEOUT";
+       /* only the event PMD's portal is set up here; the net ethrx portal is
+        * configured by the net PMD in rx_queue_intr_enable
+        */
+       if (!ethrx) {
                int threshold = 3, timeout = 0xFF;
 
-               if (ethrx) {
-                       thr_env = "DPAA2_PORTAL_ETHRX_INTR_THRESHOLD";
-                       to_env = "DPAA2_PORTAL_ETHRX_INTR_TIMEOUT";
-                       threshold = 1;
-                       timeout = 0;
-               }
-               if (getenv(thr_env))
-                       threshold = atoi(getenv(thr_env));
-               if (getenv(to_env))
-                       sscanf(getenv(to_env), "%x", &timeout);
+               if (getenv("DPAA2_PORTAL_INTR_THRESHOLD"))
+                       threshold = atoi(getenv("DPAA2_PORTAL_INTR_THRESHOLD"));
+               if (getenv("DPAA2_PORTAL_INTR_TIMEOUT"))
+                       sscanf(getenv("DPAA2_PORTAL_INTR_TIMEOUT"), "%x", 
&timeout);
 
-               if (dpaa2_dpio_intr_init(dpio_dev, threshold, timeout, !ethrx)) 
{
+               if (dpaa2_dpio_intr_init(dpio_dev, threshold, timeout, true)) {
                        DPAA2_BUS_ERR("Interrupt registration failed for dpio");
                        return -1;
                }
+               dpaa2_affine_dpio_intr_to_respective_core(dpio_dev->hw_id, 
cpu_id);
        }
-       dpaa2_affine_dpio_intr_to_respective_core(dpio_dev->hw_id, cpu_id);
+#else
+       RTE_SET_USED(ethrx);
 #endif
 
        return 0;
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h 
b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
index 1f6e521341..7e2271cc35 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
@@ -58,6 +58,10 @@ int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, 
int threshold,
 __rte_internal
 void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev);
 
+/* pin a DPIO's MSI IRQ to the calling lcore's core (rx-queue interrupt mode) 
*/
+__rte_internal
+void dpaa2_dpio_affine_intr_to_core(int32_t dpio_id);
+
 /* allocate memory for FQ - dq storage */
 __rte_internal
 int
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 36f8669644..2a721eb005 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -3148,6 +3148,8 @@ dpaa2_napi_subscribe(struct rte_eth_dev *dev, uint16_t 
queue_id,
        }
        rte_atomic_store_explicit(&dpaa2_q->napi_sub_dpio, dpio,
                        rte_memory_order_release);
+       /* pin this portal's MSI to the worker's core so a CDAN wake lands 
locally */
+       dpaa2_dpio_affine_intr_to_core(dpio->hw_id);
        return 0;
 }
 
-- 
2.43.0

Reply via email to