On Mon, May 18, 2026 at 12:52 PM Dipayaan Roy
<[email protected]> wrote:
>
> When queue allocation fails partway through, the error cleanup frees
> and NULLs apc->tx_qp and apc->rxqs. Multiple teardown paths such as
> mana_remove(), mana_change_mtu() recovery, and internal error handling
> in mana_alloc_queues() can subsequently call into functions that
> dereference these pointers without NULL checks:
>
> - mana_chn_setxdp() dereferences apc->rxqs[0], causing a NULL pointer
>   dereference panic (CR2: 0000000000000000 at mana_chn_setxdp+0x26).
> - mana_destroy_vport() iterates apc->rxqs without a NULL check.
> - mana_fence_rqs() iterates apc->rxqs without a NULL check.
> - mana_dealloc_queues() iterates apc->tx_qp without a NULL check.
>
> Add NULL guards for apc->rxqs in mana_fence_rqs(),
> mana_destroy_vport(), and before the mana_chn_setxdp() call. Add a
> NULL guard for apc->tx_qp in mana_dealloc_queues() to skip TX queue
> draining when TX queues were never allocated or already freed.
>
> Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network 
> Adapter (MANA)")
> Reviewed-by: Haiyang Zhang <[email protected]>
> Signed-off-by: Dipayaan Roy <[email protected]>
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 70 +++++++++++--------
>  1 file changed, 41 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c 
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 9afc786b297a..0582803907a8 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1727,6 +1727,9 @@ static void mana_fence_rqs(struct mana_port_context 
> *apc)
>         struct mana_rxq *rxq;
>         int err;
>
> +       if (!apc->rxqs)
> +               return;
> +
>         for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
>                 rxq = apc->rxqs[rxq_idx];
>                 err = mana_fence_rq(apc, rxq);
> @@ -2858,13 +2861,16 @@ static void mana_destroy_vport(struct 
> mana_port_context *apc)
>         struct mana_rxq *rxq;
>         u32 rxq_idx;
>
> -       for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
> -               rxq = apc->rxqs[rxq_idx];
> -               if (!rxq)
> -                       continue;
> +       if (apc->rxqs) {
>
> -               mana_destroy_rxq(apc, rxq, true);
> -               apc->rxqs[rxq_idx] = NULL;
> +               for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
> +                       rxq = apc->rxqs[rxq_idx];
> +                       if (!rxq)
> +                               continue;
> +
> +                       mana_destroy_rxq(apc, rxq, true);
> +                       apc->rxqs[rxq_idx] = NULL;
> +               }
>         }
>
>         mana_destroy_txq(apc);
> @@ -3269,7 +3275,8 @@ static int mana_dealloc_queues(struct net_device *ndev)
>         if (apc->port_is_up)
>                 return -EINVAL;
>
> -       mana_chn_setxdp(apc, NULL);
> +       if (apc->rxqs)
> +               mana_chn_setxdp(apc, NULL);

Is this check required? mana_dealloc_queues() is only called by
mana_detach(). And mana_detach() calls `mana_dealloc_queues()` only if
the port state was previously up. apc->port_is_up seems to be set to
true only on successful queue allocation. Can apc->rxqs be NULL here?

>
>         if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode)
>                 mana_pf_deregister_filter(apc);
> @@ -3287,33 +3294,38 @@ static int mana_dealloc_queues(struct net_device 
> *ndev)
>          * number of queues.
>          */
>
> -       for (i = 0; i < apc->num_queues; i++) {
> -               txq = &apc->tx_qp[i].txq;
> -               tsleep = 1000;
> -               while (atomic_read(&txq->pending_sends) > 0 &&
> -                      time_before(jiffies, timeout)) {
> -                       usleep_range(tsleep, tsleep + 1000);
> -                       tsleep <<= 1;
> -               }
> -               if (atomic_read(&txq->pending_sends)) {
> -                       err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
> -                       if (err) {
> -                               netdev_err(ndev, "flr failed %d with %d pkts 
> pending in txq %u\n",
> -                                          err, 
> atomic_read(&txq->pending_sends),
> -                                          txq->gdma_txq_id);
> +       if (apc->tx_qp) {

And same as above, is it possible for apc->tx_qp to be NULL here?

> +               for (i = 0; i < apc->num_queues; i++) {
> +                       txq = &apc->tx_qp[i].txq;
> +                       tsleep = 1000;
> +                       while (atomic_read(&txq->pending_sends) > 0 &&
> +                              time_before(jiffies, timeout)) {
> +                               usleep_range(tsleep, tsleep + 1000);
> +                               tsleep <<= 1;
> +                       }
> +                       if (atomic_read(&txq->pending_sends)) {
> +                               err =
> +                                   
> pcie_flr(to_pci_dev(gd->gdma_context->dev));
> +                               if (err) {
> +                                       netdev_err(ndev, "flr failed %d with 
> %d pkts pending in txq %u\n",
> +                                                  err,
> +                                           atomic_read(&txq->pending_sends),
> +                                           txq->gdma_txq_id);
> +                               }
> +                               break;
>                         }
> -                       break;
>                 }
> -       }
>
> -       for (i = 0; i < apc->num_queues; i++) {
> -               txq = &apc->tx_qp[i].txq;
> -               while ((skb = skb_dequeue(&txq->pending_skbs))) {
> -                       mana_unmap_skb(skb, apc);
> -                       dev_kfree_skb_any(skb);
> +               for (i = 0; i < apc->num_queues; i++) {
> +                       txq = &apc->tx_qp[i].txq;
> +                       while ((skb = skb_dequeue(&txq->pending_skbs))) {
> +                               mana_unmap_skb(skb, apc);
> +                               dev_kfree_skb_any(skb);
> +                       }
> +                       atomic_set(&txq->pending_sends, 0);
>                 }
> -               atomic_set(&txq->pending_sends, 0);
>         }
> +
>         /* We're 100% sure the queues can no longer be woken up, because
>          * we're sure now mana_poll_tx_cq() can't be running.
>          */
> --
> 2.43.0
>
>

Reply via email to