This breaks on my system:

On 7.0.0 It boots fine.  With net-next/main currently at this commit


commit 8737d7194d6d5947c3d7d8813895b44a25b84477 (net-next/main, net-next/HEAD)
Author: Lorenzo Bianconi <[email protected]>
Date:   Fri Mar 13 17:28:36 2026 +0100

I get:

[   21.859081] mlx5_core 0005:01:00.0: probe_one:2017:(pid 10): mlx5_shd_init failed with error code -2 [   21.863266] mlx5_core 0005:01:00.0: probe with driver mlx5_core failed with error -2 [   21.866360] mlx5_core 0005:01:00.1: probe_one:2017:(pid 10): mlx5_shd_init failed with error code -2 [   21.869937] mlx5_core 0005:01:00.1: probe with driver mlx5_core failed with error -2


I am happy to help debug:   what do you need from me?


On 3/12/26 06:04, Jiri Pirko wrote:
From: Jiri Pirko <[email protected]>

Use the previously introduced shared devlink infrastructure to create
a shared devlink instance for mlx5 PFs that reside on the same physical
chip. The shared instance is identified by the chip's serial number
extracted from PCI VPD (V3 keyword, with fallback to serial number
for older devices).

Each PF that probes calls mlx5_shd_init() which extracts the chip serial
number and uses devlink_shd_get() to get or create the shared instance.
When a PF is removed, mlx5_shd_uninit() calls devlink_shd_put()
to release the reference. The shared instance is automatically destroyed
when the last PF is removed.

Make the PF devlink instances nested in this shared devlink instance,
allowing userspace to identify which PFs belong to the same physical
chip.

Example:

pci/0000:08:00.0: index 0
   nested_devlink:
     auxiliary/mlx5_core.eth.0
devlink_index/1: index 1
   nested_devlink:
     pci/0000:08:00.0
     pci/0000:08:00.1
auxiliary/mlx5_core.eth.0: index 2
pci/0000:08:00.1: index 3
   nested_devlink:
     auxiliary/mlx5_core.eth.1
auxiliary/mlx5_core.eth.1: index 4

Signed-off-by: Jiri Pirko <[email protected]>
---
v2->v3:
- removed "const" from "sn"
- passing driver pointer to devlink_shd_get()
---
  .../net/ethernet/mellanox/mlx5/core/Makefile  |  5 +-
  .../net/ethernet/mellanox/mlx5/core/main.c    | 17 ++++++
  .../ethernet/mellanox/mlx5/core/sh_devlink.c  | 61 +++++++++++++++++++
  .../ethernet/mellanox/mlx5/core/sh_devlink.h  | 12 ++++
  include/linux/mlx5/driver.h                   |  1 +
  5 files changed, 94 insertions(+), 2 deletions(-)
  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8ffa286a18f5..d39fe9c4a87c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,8 +16,9 @@ mlx5_core-y :=        main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
                transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
                fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o 
events.o wq.o lib/gid.o \
                lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o 
diag/fs_tracepoint.o \
-               diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o 
diag/reporter_vnic.o \
-               fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o 
lib/nv_param.o
+               diag/fw_tracer.o diag/crdump.o devlink.o sh_devlink.o 
diag/rsc_dump.o \
+               diag/reporter_vnic.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o 
fs_pool.o \
+               lib/nv_param.o
#
  # Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fdc3ba20912e..1c35c3fc3bb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -74,6 +74,7 @@
  #include "mlx5_irq.h"
  #include "hwmon.h"
  #include "lag/lag.h"
+#include "sh_devlink.h"
MODULE_AUTHOR("Eli Cohen <[email protected]>");
  MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) 
core driver");
@@ -1520,10 +1521,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
        int err;
devl_lock(devlink);
+       if (dev->shd) {
+               err = devl_nested_devlink_set(dev->shd, devlink);
+               if (err)
+                       goto unlock;
+       }
        devl_register(devlink);
        err = mlx5_init_one_devl_locked(dev);
        if (err)
                devl_unregister(devlink);
+unlock:
        devl_unlock(devlink);
        return err;
  }
@@ -2005,6 +2012,13 @@ static int probe_one(struct pci_dev *pdev, const struct 
pci_device_id *id)
                goto pci_init_err;
        }
+ err = mlx5_shd_init(dev);
+       if (err) {
+               mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n",
+                             err);
+               goto shd_init_err;
+       }
+
        err = mlx5_init_one(dev);
        if (err) {
                mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
@@ -2018,6 +2032,8 @@ static int probe_one(struct pci_dev *pdev, const struct 
pci_device_id *id)
        return 0;
err_init_one:
+       mlx5_shd_uninit(dev);
+shd_init_err:
        mlx5_pci_close(dev);
  pci_init_err:
        mlx5_mdev_uninit(dev);
@@ -2039,6 +2055,7 @@ static void remove_one(struct pci_dev *pdev)
        mlx5_drain_health_wq(dev);
        mlx5_sriov_disable(pdev, false);
        mlx5_uninit_one(dev);
+       mlx5_shd_uninit(dev);
        mlx5_pci_close(dev);
        mlx5_mdev_uninit(dev);
        mlx5_adev_idx_free(dev->priv.adev_idx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c 
b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
new file mode 100644
index 000000000000..bc33f95302df
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
+
+#include "sh_devlink.h"
+
+static const struct devlink_ops mlx5_shd_ops = {
+};
+
+int mlx5_shd_init(struct mlx5_core_dev *dev)
+{
+       u8 *vpd_data __free(kfree) = NULL;
+       struct pci_dev *pdev = dev->pdev;
+       unsigned int vpd_size, kw_len;
+       struct devlink *devlink;
+       char *sn, *end;
+       int start;
+       int err;
+
+       if (!mlx5_core_is_pf(dev))
+               return 0;
+
+       vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+       if (IS_ERR(vpd_data)) {
+               err = PTR_ERR(vpd_data);
+               return err == -ENODEV ? 0 : err;
+       }
+       start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len);
+       if (start < 0) {
+               /* Fall-back to SN for older devices. */
+               start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+                                                    PCI_VPD_RO_KEYWORD_SERIALNO, 
&kw_len);
+               if (start < 0)
+                       return -ENOENT;
+       }
+       sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
+       if (!sn)
+               return -ENOMEM;
+       /* Firmware may return spaces at the end of the string, strip it. */
+       end = strchrnul(sn, ' ');
+       *end = '\0';
+
+       /* Get or create shared devlink instance */
+       devlink = devlink_shd_get(sn, &mlx5_shd_ops, 0, pdev->dev.driver);
+       kfree(sn);
+       if (!devlink)
+               return -ENOMEM;
+
+       dev->shd = devlink;
+       return 0;
+}
+
+void mlx5_shd_uninit(struct mlx5_core_dev *dev)
+{
+       if (!dev->shd)
+               return;
+
+       devlink_shd_put(dev->shd);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h 
b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
new file mode 100644
index 000000000000..8ab8d6940227
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_SH_DEVLINK_H__
+#define __MLX5_SH_DEVLINK_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_shd_init(struct mlx5_core_dev *dev);
+void mlx5_shd_uninit(struct mlx5_core_dev *dev);
+
+#endif /* __MLX5_SH_DEVLINK_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04dcd09f7517..1268fcf35ec7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -798,6 +798,7 @@ struct mlx5_core_dev {
        enum mlx5_wc_state wc_state;
        /* sync write combining state */
        struct mutex wc_state_lock;
+       struct devlink *shd;
  };
struct mlx5_db {

Reply via email to