During VF reset, multiple issues can lead to initialization
instability.

The first issue is a race condition in the VF-initiated reset path,
where VFR state VFACTIVE is treated as both "reset not started" and
"reset completed" in iavf_check_vf_reset_done(). When a VF initiates
a reset, the PF may not have begun processing it by the time
iavf_check_vf_reset_done() is called. Since VFACTIVE satisfies the
completion check, the VF proceeds before the PF has acknowledged the
reset, resulting in inconsistent initialization and virtchnl command
failures (e.g., OP_VERSION timeout).

The second issue is the presence of stale messages in the Admin
Receive Queue (ARQ) after VF reset. After the admin queue is
re-initialized during reset recovery, the PF may post responses to
pre-reset commands or unsolicited events. These may include opcode 0
(VIRTCHNL_OP_UNKNOWN) or responses to commands issued before reset,
which can interfere with API negotiation and cause command mismatch
errors.

Additionally, opcode 0 messages generate excessive warning logs,
causing unnecessary noise during initialization.

The solution involves:

1. Introducing a wait-for-reset-start helper that polls RSTAT until
   it leaves VFACTIVE. This helper is used in VF-initiated reset paths
   to ensure that the PF has started processing the reset before VF
   reinitialization proceeds. It is invoked from iavf_handle_hw_reset()
   for event-driven resets and from iavf_queues_req_reset() for
   queue-change-triggered resets. It is intentionally not used in
   iavf_dev_reset() to avoid redundant wait and unnecessary delay
   when reset completion is already confirmed.

2. Draining stale ARQ messages after admin queue initialization
   during reset recovery only (vf->in_reset_recovery == true).
   During initial device probe, the admin queue is freshly allocated
   and does not contain stale entries.

3. Downgrading opcode 0 (VIRTCHNL_OP_UNKNOWN) message logging to
   DEBUG level while preserving mismatch detection for other
   opcodes, allowing polling to continue until a valid response
   is received.

4. Refactoring reset-start detection and ARQ drain logic into helper
   functions (iavf_wait_for_reset_start() and iavf_drain_arq()) to
   improve readability and maintainability.

5. Introducing a short delay after triggering VF reset in
   iavf_dev_reset() to mitigate timing issues between VF
   reinitialization and PF reset processing. This helps avoid
   virtchnl command failures when PF reset completion is not yet
   fully synchronized.

This fix primarily targets VF-initiated reset handling, while ARQ
drain and opcode handling improvements also benefit PF-initiated
reset recovery scenarios.

Fixes: 28a1a72eac26 ("net/iavf: add VF initiated reset")
Cc: [email protected]

Signed-off-by: Talluri Chaitanyababu <[email protected]>

v2:
- Removed iavf_wait_for_reset_start() from iavf_dev_reset() to avoid
  redundant wait and 1-second delay after reset completion.
- Added iavf_wait_for_reset_start() to iavf_queues_req_reset() to
  properly handle queue-change-triggered resets.
- Retained usage in iavf_handle_hw_reset() for VF-initiated reset flow.
- Restricted iavf_drain_arq() to reset recovery paths only.
- Added delay in iavf_dev_reset() to mitigate reset timing issues.
---
 drivers/net/intel/iavf/iavf_ethdev.c | 65 ++++++++++++++++++++++++++++
 drivers/net/intel/iavf/iavf_vchnl.c  | 16 +++++--
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/drivers/net/intel/iavf/iavf_ethdev.c 
b/drivers/net/intel/iavf/iavf_ethdev.c
index 1eca20bc9a..2a4b8096d2 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -101,6 +101,7 @@ static int iavf_dev_start(struct rte_eth_dev *dev);
 static int iavf_dev_stop(struct rte_eth_dev *dev);
 static int iavf_dev_close(struct rte_eth_dev *dev);
 static int iavf_dev_reset(struct rte_eth_dev *dev);
+static int iavf_wait_for_reset_start(struct iavf_hw *hw);
 static int iavf_dev_info_get(struct rte_eth_dev *dev,
                             struct rte_eth_dev_info *dev_info);
 static const uint32_t *iavf_dev_supported_ptypes_get(struct rte_eth_dev *dev,
@@ -591,6 +592,7 @@ iavf_queues_req_reset(struct rte_eth_dev *dev, uint16_t num)
        struct iavf_adapter *ad =
                IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct iavf_info *vf =  IAVF_DEV_PRIVATE_TO_VF(ad);
+       struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(ad);
        int ret;
 
        ret = iavf_request_queues(dev, num);
@@ -602,6 +604,8 @@ iavf_queues_req_reset(struct rte_eth_dev *dev, uint16_t num)
                        vf->vsi_res->num_queue_pairs, num);
 
        iavf_dev_watchdog_disable(ad);
+       /* Wait for PF to start processing reset triggered by queue change */
+       iavf_wait_for_reset_start(hw);
        ret = iavf_dev_reset(dev);
        if (ret) {
                PMD_DRV_LOG(ERR, "vf reset failed");
@@ -2002,6 +2006,30 @@ iavf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, 
uint16_t queue_id)
        return 0;
 }
 
+/* Wait until PF acknowledges VF reset (RSTAT leaves VFACTIVE) */
+static int
+iavf_wait_for_reset_start(struct iavf_hw *hw)
+{
+       int i;
+       uint32_t rstat;
+
+       for (i = 0; i < 100; i++) {
+               rte_delay_ms(10);
+
+               rstat = IAVF_READ_REG(hw, IAVF_VFGEN_RSTAT);
+               rstat &= IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+               rstat >>= IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT;
+
+               if (rstat != VIRTCHNL_VFR_VFACTIVE)
+                       return 0;
+       }
+
+       PMD_DRV_LOG(DEBUG, "VF reset did not start within timeout");
+       return -1;
+}
+
+static void iavf_drain_arq(struct iavf_hw *hw, struct iavf_info *vf);
+
 static int
 iavf_check_vf_reset_done(struct iavf_hw *hw)
 {
@@ -2517,6 +2545,30 @@ iavf_init_proto_xtr(struct rte_eth_dev *dev)
        }
 }
 
+/* Drain stale Admin Receive Queue messages after reset */
+static void
+iavf_drain_arq(struct iavf_hw *hw, struct iavf_info *vf)
+{
+       struct iavf_arq_event_info event;
+       int drain_count = 0;
+
+       memset(&event, 0, sizeof(event));
+       event.msg_buf = vf->aq_resp;
+
+       while (drain_count < IAVF_AQ_LEN) {
+               event.buf_len = IAVF_AQ_BUF_SZ;
+
+               if (iavf_clean_arq_element(hw, &event, NULL) != IAVF_SUCCESS)
+                       break;
+
+               drain_count++;
+       }
+
+       if (drain_count > 0)
+               PMD_INIT_LOG(DEBUG, "Drained %d stale ARQ messages",
+                               drain_count);
+}
+
 static int
 iavf_init_vf(struct rte_eth_dev *dev)
 {
@@ -2558,6 +2610,11 @@ iavf_init_vf(struct rte_eth_dev *dev)
                PMD_INIT_LOG(ERR, "unable to allocate vf_aq_resp memory");
                goto err_aq;
        }
+
+       /* Drain stale ARQ messages only during reset recovery */
+       if (vf->in_reset_recovery)
+               iavf_drain_arq(hw, vf);
+
        if (iavf_check_api_version(adapter) != 0) {
                PMD_INIT_LOG(ERR, "check_api version failed");
                goto err_api;
@@ -3070,6 +3127,8 @@ iavf_dev_reset(struct rte_eth_dev *dev)
        ret = iavf_dev_uninit(dev);
        if (ret)
                return ret;
+       /* Add delay before re-initialization */
+       rte_delay_ms(50);
 
        return iavf_dev_init(dev);
 }
@@ -3105,6 +3164,7 @@ iavf_handle_hw_reset(struct rte_eth_dev *dev, bool 
vf_initiated_reset)
 {
        struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct iavf_adapter *adapter = dev->data->dev_private;
+       struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        int ret;
        bool restart_device = false;
 
@@ -3123,6 +3183,11 @@ iavf_handle_hw_reset(struct rte_eth_dev *dev, bool 
vf_initiated_reset)
        vf->in_reset_recovery = true;
        iavf_set_no_poll(adapter, false);
 
+       /* For VF-initiated reset, wait for PF to start processing it */
+       if (vf_initiated_reset)
+               if (iavf_wait_for_reset_start(hw) != 0)
+                       PMD_DRV_LOG(WARNING, "PF did not acknowledge VF reset");
+
        /* Call the pre reset callback */
        if (vf->pre_reset_cb != NULL)
                vf->pre_reset_cb(dev->data->port_id, vf->pre_reset_cb_arg);
diff --git a/drivers/net/intel/iavf/iavf_vchnl.c 
b/drivers/net/intel/iavf/iavf_vchnl.c
index 08dd6f2d7f..e6209e1f18 100644
--- a/drivers/net/intel/iavf/iavf_vchnl.c
+++ b/drivers/net/intel/iavf/iavf_vchnl.c
@@ -296,11 +296,21 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, 
uint16_t buf_len,
                                        __func__, vpe->event);
                }
        }  else {
-               /* async reply msg on command issued by vf previously */
+               /* Async reply for previously issued VF command.
+                * Stale messages from before reset are ignored, and polling
+                * continues until the expected response is received.
+                */
                result = IAVF_MSG_CMD;
                if (opcode != vf->pend_cmd) {
-                       PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get 
%u",
-                                       vf->pend_cmd, opcode);
+                       if (opcode == VIRTCHNL_OP_UNKNOWN)
+                               PMD_DRV_LOG(DEBUG,
+                                           "Ignoring stale msg (opcode 0), 
pending cmd %u",
+                                           vf->pend_cmd);
+                       else
+                               PMD_DRV_LOG(WARNING,
+                                           "command mismatch, expect %u, get 
%u",
+                                           vf->pend_cmd, opcode);
+
                        result = IAVF_MSG_ERR;
                }
        }
-- 
2.43.0

Reply via email to