When queue allocation fails partway through, the error cleanup frees
and NULLs apc->tx_qp and apc->rxqs. Multiple teardown paths such as
mana_remove(), mana_change_mtu() recovery, and internal error handling
in mana_alloc_queues() can subsequently call into functions that
dereference these pointers without NULL checks:

- mana_chn_setxdp() dereferences apc->rxqs[0], causing a NULL pointer
  dereference panic (CR2: 0000000000000000 at mana_chn_setxdp+0x26).
- mana_destroy_vport() iterates apc->rxqs without a NULL check.
- mana_fence_rqs() iterates apc->rxqs without a NULL check.
- mana_dealloc_queues() iterates apc->tx_qp without a NULL check.

Add NULL guards for apc->rxqs in mana_fence_rqs(),
mana_destroy_vport(), and before the mana_chn_setxdp() call. Add a
NULL guard for apc->tx_qp in mana_dealloc_queues() to skip TX queue
draining when TX queues were never allocated or already freed.

Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network 
Adapter (MANA)")
Reviewed-by: Haiyang Zhang <[email protected]>
Signed-off-by: Dipayaan Roy <[email protected]>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 70 +++++++++++--------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c 
b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 9afc786b297a..0582803907a8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1727,6 +1727,9 @@ static void mana_fence_rqs(struct mana_port_context *apc)
        struct mana_rxq *rxq;
        int err;
 
+       if (!apc->rxqs)
+               return;
+
        for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
                rxq = apc->rxqs[rxq_idx];
                err = mana_fence_rq(apc, rxq);
@@ -2858,13 +2861,16 @@ static void mana_destroy_vport(struct mana_port_context 
*apc)
        struct mana_rxq *rxq;
        u32 rxq_idx;
 
-       for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
-               rxq = apc->rxqs[rxq_idx];
-               if (!rxq)
-                       continue;
+       if (apc->rxqs) {
 
-               mana_destroy_rxq(apc, rxq, true);
-               apc->rxqs[rxq_idx] = NULL;
+               for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
+                       rxq = apc->rxqs[rxq_idx];
+                       if (!rxq)
+                               continue;
+
+                       mana_destroy_rxq(apc, rxq, true);
+                       apc->rxqs[rxq_idx] = NULL;
+               }
        }
 
        mana_destroy_txq(apc);
@@ -3269,7 +3275,8 @@ static int mana_dealloc_queues(struct net_device *ndev)
        if (apc->port_is_up)
                return -EINVAL;
 
-       mana_chn_setxdp(apc, NULL);
+       if (apc->rxqs)
+               mana_chn_setxdp(apc, NULL);
 
        if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode)
                mana_pf_deregister_filter(apc);
@@ -3287,33 +3294,38 @@ static int mana_dealloc_queues(struct net_device *ndev)
         * number of queues.
         */
 
-       for (i = 0; i < apc->num_queues; i++) {
-               txq = &apc->tx_qp[i].txq;
-               tsleep = 1000;
-               while (atomic_read(&txq->pending_sends) > 0 &&
-                      time_before(jiffies, timeout)) {
-                       usleep_range(tsleep, tsleep + 1000);
-                       tsleep <<= 1;
-               }
-               if (atomic_read(&txq->pending_sends)) {
-                       err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
-                       if (err) {
-                               netdev_err(ndev, "flr failed %d with %d pkts 
pending in txq %u\n",
-                                          err, 
atomic_read(&txq->pending_sends),
-                                          txq->gdma_txq_id);
+       if (apc->tx_qp) {
+               for (i = 0; i < apc->num_queues; i++) {
+                       txq = &apc->tx_qp[i].txq;
+                       tsleep = 1000;
+                       while (atomic_read(&txq->pending_sends) > 0 &&
+                              time_before(jiffies, timeout)) {
+                               usleep_range(tsleep, tsleep + 1000);
+                               tsleep <<= 1;
+                       }
+                       if (atomic_read(&txq->pending_sends)) {
+                               err =
+                                   pcie_flr(to_pci_dev(gd->gdma_context->dev));
+                               if (err) {
+                                       netdev_err(ndev, "flr failed %d with %d 
pkts pending in txq %u\n",
+                                                  err,
+                                           atomic_read(&txq->pending_sends),
+                                           txq->gdma_txq_id);
+                               }
+                               break;
                        }
-                       break;
                }
-       }
 
-       for (i = 0; i < apc->num_queues; i++) {
-               txq = &apc->tx_qp[i].txq;
-               while ((skb = skb_dequeue(&txq->pending_skbs))) {
-                       mana_unmap_skb(skb, apc);
-                       dev_kfree_skb_any(skb);
+               for (i = 0; i < apc->num_queues; i++) {
+                       txq = &apc->tx_qp[i].txq;
+                       while ((skb = skb_dequeue(&txq->pending_skbs))) {
+                               mana_unmap_skb(skb, apc);
+                               dev_kfree_skb_any(skb);
+                       }
+                       atomic_set(&txq->pending_sends, 0);
                }
-               atomic_set(&txq->pending_sends, 0);
        }
+
        /* We're 100% sure the queues can no longer be woken up, because
         * we're sure now mana_poll_tx_cq() can't be running.
         */
-- 
2.43.0


Reply via email to