The current probe time ownership check for Soft Reserved memory based solely on CXL window intersection is insufficient. dax_hmem probing is not always guaranteed to run after CXL enumeration and region assembly, which can lead to incorrect ownership decisions before the CXL stack has finished publishing windows and assembling committed regions.
Introduce deferred ownership handling for Soft Reserved ranges that intersect CXL windows. When such a range is encountered during the initial dax_hmem probe, schedule deferred work to wait for the CXL stack to complete enumeration and region assembly before deciding ownership. Once the deferred work runs, evaluate each Soft Reserved range individually: if a CXL region fully contains the range, skip it and let dax_cxl bind. Otherwise, register it with dax_hmem. This per-range ownership model avoids the need for CXL region teardown and alloc_dax_region() resource exclusion prevents double claiming. Introduce a boolean flag dax_hmem_initial_probe to live inside device.c so it survives module reload. Ensure dax_cxl defers driver registration until dax_hmem has completed ownership resolution. dax_cxl calls dax_hmem_flush_work() before cxl_driver_register(), which both waits for the deferred work to complete and creates a module symbol dependency that forces dax_hmem.ko to load before dax_cxl. Co-developed-by: Dan Williams <[email protected]> Signed-off-by: Dan Williams <[email protected]> Signed-off-by: Smita Koralahalli <[email protected]> --- drivers/dax/bus.h | 7 +++++ drivers/dax/cxl.c | 1 + drivers/dax/hmem/device.c | 3 ++ drivers/dax/hmem/hmem.c | 66 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 75 insertions(+), 2 deletions(-) diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index cbbf64443098..ebbfe2d6da14 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -49,6 +49,13 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); bool static_dev_dax(struct dev_dax *dev_dax); +#if IS_ENABLED(CONFIG_DEV_DAX_HMEM) +extern bool dax_hmem_initial_probe; +void dax_hmem_flush_work(void); +#else +static inline void dax_hmem_flush_work(void) { } +#endif + #define MODULE_ALIAS_DAX_DEVICE(type) \ MODULE_ALIAS("dax:t" __stringify(type) "*") #define DAX_DEVICE_MODALIAS_FMT "dax:t%d" diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index a2136adfa186..3ab39b77843d 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -44,6 +44,7 @@ static struct cxl_driver cxl_dax_region_driver = { static void cxl_dax_region_driver_register(struct work_struct *work) { + dax_hmem_flush_work(); cxl_driver_register(&cxl_dax_region_driver); } diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 56e3cbd181b5..991a4bf7d969 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,9 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +bool dax_hmem_initial_probe; +EXPORT_SYMBOL_GPL(dax_hmem_initial_probe); + static bool platform_initialized; static DEFINE_MUTEX(hmem_resource_lock); static struct resource hmem_active = { diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 1e3424358490..8c574123bd3b 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -3,6 +3,7 @@ #include <linux/memregion.h> #include <linux/module.h> #include <linux/dax.h> +#include <cxl/cxl.h> #include "../bus.h" static bool region_idle; @@ -58,6 +59,19 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } +struct dax_defer_work { + struct platform_device *pdev; + struct work_struct work; +}; + +static struct dax_defer_work dax_hmem_work; + +void dax_hmem_flush_work(void) +{ + flush_work(&dax_hmem_work.work); +} +EXPORT_SYMBOL_GPL(dax_hmem_flush_work); + static int hmem_register_device(struct device *host, int target_nid, const struct resource *res) { @@ -69,8 +83,11 @@ static int hmem_register_device(struct device *host, int target_nid, if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && region_intersects(res->start, resource_size(res), IORESOURCE_MEM, IORES_DESC_CXL) != REGION_DISJOINT) { - dev_dbg(host, "deferring range to CXL: %pr\n", res); - return 0; + if (!dax_hmem_initial_probe) { + dev_dbg(host, "deferring range to CXL: %pr\n", res); + queue_work(system_long_wq, &dax_hmem_work.work); + return 0; + } } rc = region_intersects_soft_reserve(res->start, resource_size(res)); @@ -123,8 +140,48 @@ static int hmem_register_device(struct device *host, int target_nid, return rc; } +static int hmem_register_cxl_device(struct device *host, int target_nid, + const struct resource *res) +{ + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) == REGION_DISJOINT) + return 0; + + if (cxl_region_contains_resource((struct resource *)res)) { + dev_dbg(host, "CXL claims resource, dropping: %pr\n", res); + return 0; + } + + dev_dbg(host, "CXL did not claim resource, registering: %pr\n", res); + return hmem_register_device(host, target_nid, res); +} + +static void process_defer_work(struct work_struct *w) +{ + struct dax_defer_work *work = container_of(w, typeof(*work), work); + struct platform_device *pdev = work->pdev; + + wait_for_device_probe(); + + guard(device)(&pdev->dev); + if (!pdev->dev.driver) + return; + + dax_hmem_initial_probe = true; + walk_hmem_resources(&pdev->dev, hmem_register_cxl_device); +} + static int dax_hmem_platform_probe(struct platform_device *pdev) { + if (work_pending(&dax_hmem_work.work)) + return -EBUSY; + + if (!dax_hmem_work.pdev) { + get_device(&pdev->dev); + dax_hmem_work.pdev = pdev; + INIT_WORK(&dax_hmem_work.work, process_defer_work); + } + return walk_hmem_resources(&pdev->dev, hmem_register_device); } @@ -162,6 +219,11 @@ static __init int dax_hmem_init(void) static __exit void dax_hmem_exit(void) { + flush_work(&dax_hmem_work.work); + + if (dax_hmem_work.pdev) + put_device(&dax_hmem_work.pdev->dev); + platform_driver_unregister(&dax_hmem_driver); platform_driver_unregister(&dax_hmem_platform_driver); } -- 2.17.1

