From: Björn Töpel <bjorn.to...@intel.com> This patch adds proper XDP_TX support.
Acked-by: John Fastabend <john.r.fastab...@intel.com> Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- drivers/net/ethernet/intel/i40e/i40e.h | 5 + drivers/net/ethernet/intel/i40e/i40e_main.c | 273 ++++++++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 304 +++++++++++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 5 + 4 files changed, 491 insertions(+), 96 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 05d805f439e6..adc1f3f32729 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -545,6 +545,10 @@ struct i40e_vsi { struct i40e_ring **rx_rings; struct i40e_ring **tx_rings; + /* The XDP rings are Tx only, and follows the count of the + * regular rings, i.e. alloc_queue_pairs/num_queue_pairs + */ + struct i40e_ring **xdp_rings; struct bpf_prog *xdp_prog; u32 active_filters; @@ -622,6 +626,7 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; + struct i40e_ring_container xdp; u8 num_ringpairs; /* total number of ring pairs in vector */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index db0240213f3b..9310a5712ae3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -107,6 +107,18 @@ MODULE_VERSION(DRV_VERSION); static struct workqueue_struct *i40e_wq; /** + * i40e_alloc_queue_pairs_xdp_vsi - required # of XDP queue pairs + * @vsi: pointer to a vsi + **/ +static u16 i40e_alloc_queue_pairs_xdp_vsi(const struct i40e_vsi *vsi) +{ + if (i40e_enabled_xdp_vsi(vsi)) + return vsi->alloc_queue_pairs; + + return 0; +} + +/** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out @@ -2828,6 +2840,12 @@ static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs && !err; i++) err = i40e_setup_tx_descriptors(vsi->tx_rings[i]); + if (!i40e_enabled_xdp_vsi(vsi)) + return err; + + for (i = 0; i < vsi->num_queue_pairs && !err; i++) + err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]); + return err; } @@ -2841,12 +2859,17 @@ static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi) { int i; - if (!vsi->tx_rings) - return; + if (vsi->tx_rings) { + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) + i40e_free_tx_resources(vsi->tx_rings[i]); + } - for (i = 0; i < vsi->num_queue_pairs; i++) - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) - i40e_free_tx_resources(vsi->tx_rings[i]); + if (vsi->xdp_rings) { + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + i40e_free_tx_resources(vsi->xdp_rings[i]); + } } /** @@ -3121,6 +3144,12 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) err = i40e_configure_tx_ring(vsi->tx_rings[i]); + if (!i40e_enabled_xdp_vsi(vsi)) + return err; + + for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) + err = i40e_configure_tx_ring(vsi->xdp_rings[i]); + return err; } @@ -3269,7 +3298,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) struct i40e_hw *hw = &pf->hw; u16 vector; int i, q; - u32 qp; + u32 qp, qp_idx = 0; /* The interrupt indexing is offset by 1 in the PFINT_ITRn * and PFINT_LNKLSTn registers, e.g.: @@ -3296,16 +3325,33 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp); for (q = 0; q < q_vector->num_ringpairs; q++) { u32 val; + u32 nqp = qp; + + if (i40e_enabled_xdp_vsi(vsi)) { + nqp = vsi->base_queue + + vsi->xdp_rings[qp_idx]->queue_index; + } val = I40E_QINT_RQCTL_CAUSE_ENA_MASK | - (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | - (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | - (qp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)| + (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | + (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | + (nqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(qp), val); + if (i40e_enabled_xdp_vsi(vsi)) { + val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | + (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | + (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | + (qp << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | + (I40E_QUEUE_TYPE_TX + << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); + + wr32(hw, I40E_QINT_TQCTL(nqp), val); + } + val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | @@ -3320,6 +3366,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) wr32(hw, I40E_QINT_TQCTL(qp), val); qp++; + qp_idx++; } } @@ -3562,6 +3609,10 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs; i++) { wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), 0); wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), 0); + if (i40e_enabled_xdp_vsi(vsi)) { + wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), + 0); + } } if (pf->flags & I40E_FLAG_MSIX_ENABLED) { @@ -3871,6 +3922,24 @@ static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) } /** + * i40e_map_vector_to_xdp_ring - Assigns the XDP Tx queue to the vector + * @vsi: the VSI being configured + * @v_idx: vector index + * @xdp_idx: XDP Tx queue index + **/ +static void i40e_map_vector_to_xdp_ring(struct i40e_vsi *vsi, int v_idx, + int xdp_idx) +{ + struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct i40e_ring *xdp_ring = vsi->xdp_rings[xdp_idx]; + + xdp_ring->q_vector = q_vector; + xdp_ring->next = q_vector->xdp.ring; + q_vector->xdp.ring = xdp_ring; + q_vector->xdp.count++; +} + +/** * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors * @vsi: the VSI being configured * @@ -3903,11 +3972,17 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi) q_vector->rx.count = 0; q_vector->tx.count = 0; + q_vector->xdp.count = 0; q_vector->rx.ring = NULL; q_vector->tx.ring = NULL; + q_vector->xdp.ring = NULL; while (num_ringpairs--) { i40e_map_vector_to_qp(vsi, v_start, qp_idx); + if (i40e_enabled_xdp_vsi(vsi)) { + i40e_map_vector_to_xdp_ring(vsi, v_start, + qp_idx); + } qp_idx++; qp_remaining--; } @@ -4001,56 +4076,82 @@ static int i40e_pf_txq_wait(struct i40e_pf *pf, int pf_q, bool enable) } /** - * i40e_vsi_control_tx - Start or stop a VSI's rings + * i40e_vsi_control_txq - Start or stop a VSI's queue * @vsi: the VSI being configured * @enable: start or stop the rings + * @pf_q: the PF queue **/ -static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_control_txq(struct i40e_vsi *vsi, bool enable, int pf_q) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - int i, j, pf_q, ret = 0; + int j, ret = 0; u32 tx_reg; - pf_q = vsi->base_queue; - for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { + /* warn the TX unit of coming changes */ + i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable); + if (!enable) + usleep_range(10, 20); - /* warn the TX unit of coming changes */ - i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable); - if (!enable) - usleep_range(10, 20); + for (j = 0; j < 50; j++) { + tx_reg = rd32(hw, I40E_QTX_ENA(pf_q)); + if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) == + ((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1)) + break; + usleep_range(1000, 2000); + } + /* Skip if the queue is already in the requested state */ + if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) + return 0; - for (j = 0; j < 50; j++) { - tx_reg = rd32(hw, I40E_QTX_ENA(pf_q)); - if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) == - ((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1)) - break; - usleep_range(1000, 2000); - } - /* Skip if the queue is already in the requested state */ - if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) - continue; + /* turn on/off the queue */ + if (enable) { + wr32(hw, I40E_QTX_HEAD(pf_q), 0); + tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK; + } else { + tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; + } - /* turn on/off the queue */ - if (enable) { - wr32(hw, I40E_QTX_HEAD(pf_q), 0); - tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK; - } else { - tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; - } + wr32(hw, I40E_QTX_ENA(pf_q), tx_reg); + /* No waiting for the Tx queue to disable */ + if (!enable && test_bit(__I40E_PORT_TX_SUSPENDED, &pf->state)) + return 0; - wr32(hw, I40E_QTX_ENA(pf_q), tx_reg); - /* No waiting for the Tx queue to disable */ - if (!enable && test_bit(__I40E_PORT_TX_SUSPENDED, &pf->state)) - continue; + /* wait for the change to finish */ + ret = i40e_pf_txq_wait(pf, pf_q, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + return 0; +} - /* wait for the change to finish */ - ret = i40e_pf_txq_wait(pf, pf_q, enable); - if (ret) { - dev_info(&pf->pdev->dev, - "VSI seid %d Tx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); +/** + * i40e_vsi_control_tx - Start or stop a VSI's rings + * @vsi: the VSI being configured + * @enable: start or stop the rings + **/ +static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int i, pf_q, ret = 0; + + pf_q = vsi->base_queue; + for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { + ret = i40e_vsi_control_txq(vsi, enable, pf_q); + if (ret) break; + } + + if (!ret && i40e_enabled_xdp_vsi(vsi)) { + for (i = 0; i < vsi->num_queue_pairs; i++) { + pf_q = vsi->base_queue + vsi->xdp_rings[i]->queue_index; + ret = i40e_vsi_control_txq(vsi, enable, pf_q); + if (ret) + break; } } @@ -4311,6 +4412,9 @@ static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx) i40e_for_each_ring(ring, q_vector->rx) ring->q_vector = NULL; + i40e_for_each_ring(ring, q_vector->xdp) + ring->q_vector = NULL; + /* only VSI w/ an associated netdev is set up w/ NAPI */ if (vsi->netdev) netif_napi_del(&q_vector->napi); @@ -4534,6 +4638,21 @@ static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) } } + if (!i40e_enabled_xdp_vsi(vsi)) + return 0; + + for (i = 0; i < vsi->num_queue_pairs; i++) { + pf_q = vsi->base_queue + vsi->xdp_rings[i]->queue_index; + /* Check and wait for the disable status of the queue */ + ret = i40e_pf_txq_wait(pf, pf_q, false); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d XDP Tx ring %d disable timeout\n", + vsi->seid, pf_q); + return ret; + } + } + return 0; } @@ -5474,6 +5593,8 @@ void i40e_down(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); + if (i40e_enabled_xdp_vsi(vsi)) + i40e_clean_tx_ring(vsi->xdp_rings[i]); i40e_clean_rx_ring(vsi->rx_rings[i]); } @@ -7446,6 +7567,16 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors) return -ENOMEM; vsi->rx_rings = &vsi->tx_rings[vsi->alloc_queue_pairs]; + if (i40e_enabled_xdp_vsi(vsi)) { + size = sizeof(struct i40e_ring *) * + i40e_alloc_queue_pairs_xdp_vsi(vsi); + vsi->xdp_rings = kzalloc(size, GFP_KERNEL); + if (!vsi->xdp_rings) { + ret = -ENOMEM; + goto err_xdp_rings; + } + } + if (alloc_qvectors) { /* allocate memory for q_vector pointers */ size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors; @@ -7458,6 +7589,8 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors) return ret; err_vectors: + kfree(vsi->xdp_rings); +err_xdp_rings: kfree(vsi->tx_rings); return ret; } @@ -7564,6 +7697,8 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors) kfree(vsi->tx_rings); vsi->tx_rings = NULL; vsi->rx_rings = NULL; + kfree(vsi->xdp_rings); + vsi->xdp_rings = NULL; } /** @@ -7649,6 +7784,13 @@ static void i40e_vsi_clear_rings(struct i40e_vsi *vsi) vsi->rx_rings[i] = NULL; } } + + if (vsi->xdp_rings && vsi->xdp_rings[0]) { + for (i = 0; i < vsi->alloc_queue_pairs; i++) { + kfree_rcu(vsi->xdp_rings[i], rcu); + vsi->xdp_rings[i] = NULL; + } + } } /** @@ -7696,6 +7838,31 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) vsi->rx_rings[i] = rx_ring; } + if (!i40e_enabled_xdp_vsi(vsi)) + return 0; + + for (i = 0; i < vsi->alloc_queue_pairs; i++) { + tx_ring = kzalloc(sizeof(*tx_ring), GFP_KERNEL); + if (!tx_ring) + goto err_out; + + tx_ring->queue_index = vsi->alloc_queue_pairs + i; + tx_ring->reg_idx = vsi->base_queue + vsi->alloc_queue_pairs + i; + tx_ring->ring_active = false; + tx_ring->vsi = vsi; + tx_ring->netdev = NULL; + tx_ring->dev = &pf->pdev->dev; + tx_ring->count = vsi->num_desc; + tx_ring->size = 0; + tx_ring->dcb_tc = 0; + if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) + tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; + tx_ring->tx_itr_setting = pf->tx_itr_default; + tx_ring->xdp_sibling = vsi->rx_rings[i]; + vsi->xdp_rings[i] = tx_ring; + vsi->rx_rings[i]->xdp_sibling = tx_ring; + } + return 0; err_out: @@ -9921,6 +10088,7 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) struct i40e_pf *pf; u8 enabled_tc; int ret; + u16 alloc_queue_pairs; if (!vsi) return NULL; @@ -9936,11 +10104,13 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) if (ret) goto err_vsi; - ret = i40e_get_lump(pf, pf->qp_pile, vsi->alloc_queue_pairs, vsi->idx); + alloc_queue_pairs = vsi->alloc_queue_pairs + + i40e_alloc_queue_pairs_xdp_vsi(vsi); + ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err %d\n", - vsi->alloc_queue_pairs, vsi->seid, ret); + alloc_queue_pairs, vsi->seid, ret); goto err_vsi; } vsi->base_queue = ret; @@ -9998,6 +10168,7 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, struct i40e_veb *veb = NULL; int ret, i; int v_idx; + u16 alloc_queue_pairs; /* The requested uplink_seid must be either * - the PF's port seid @@ -10082,13 +10253,15 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, pf->lan_vsi = v_idx; else if (type == I40E_VSI_SRIOV) vsi->vf_id = param1; + + alloc_queue_pairs = vsi->alloc_queue_pairs + + i40e_alloc_queue_pairs_xdp_vsi(vsi); /* assign it some queues */ - ret = i40e_get_lump(pf, pf->qp_pile, vsi->alloc_queue_pairs, - vsi->idx); + ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err=%d\n", - vsi->alloc_queue_pairs, vsi->seid, ret); + alloc_queue_pairs, vsi->seid, ret); goto err_vsi; } vsi->base_queue = ret; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index d835a51dafa6..fccdec7ae102 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -520,6 +520,8 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, if (tx_buffer->skb) { if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) kfree(tx_buffer->raw_buf); + else if (tx_buffer->tx_flags & I40E_TX_FLAGS_XDP) + put_page(tx_buffer->page); else dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) @@ -620,6 +622,15 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) #define WB_STRIDE 4 /** + * i40e_page_is_reserved - check if reuse is possible + * @page: page struct to check + */ +static inline bool i40e_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); +} + +/** * i40e_clean_tx_irq - Reclaim resources after transmit completes * @vsi: the VSI we care about * @tx_ring: Tx ring to clean @@ -762,6 +773,98 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, return !!budget; } +static bool i40e_clean_xdp_irq(struct i40e_vsi *vsi, + struct i40e_ring *tx_ring) +{ + u16 i = tx_ring->next_to_clean; + struct i40e_tx_buffer *tx_buf; + struct i40e_tx_desc *tx_head; + struct i40e_tx_desc *tx_desc; + unsigned int total_bytes = 0, total_packets = 0; + unsigned int budget = vsi->work_limit; + + tx_buf = &tx_ring->tx_bi[i]; + tx_desc = I40E_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); + + do { + struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + + /* we have caught up to head, no work left to do */ + if (tx_head == tx_desc) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_packets += tx_buf->gso_segs; + + put_page(tx_buf->page); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + /* clear tx_buffer data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + + /* move us one more past the eop_desc for start of next pkt */ + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_bi; + tx_desc = I40E_TX_DESC(tx_ring, 0); + } + + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.packets += total_packets; + u64_stats_update_end(&tx_ring->syncp); + tx_ring->q_vector->tx.total_bytes += total_bytes; + tx_ring->q_vector->tx.total_packets += total_packets; + + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. + */ + unsigned int j = i40e_get_tx_pending(tx_ring, false); + + if (budget && + ((j / WB_STRIDE) == 0) && (j > 0) && + !test_bit(__I40E_DOWN, &vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; + } + + return !!budget; +} + /** * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about @@ -1496,35 +1599,46 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) } /** - * i40e_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused + * i40e_try_flip_rx_page - attempts to flip a page for reuse + * @rx_buffer: The buffer to alter * - * Synchronizes page for reuse by the adapter - **/ -static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *old_buff) + * Returns true if the page was successfully flipped and can be + * reused. + */ +static bool i40e_try_flip_rx_page(struct i40e_rx_buffer *rx_buffer) { - struct i40e_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; +#if (PAGE_SIZE < 8192) + unsigned int truesize = I40E_RXBUFFER_2048; +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif - new_buff = &rx_ring->rx_bi[nta]; + /* avoid re-using remote pages */ + if (unlikely(i40e_page_is_reserved(rx_buffer->page))) + return false; - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(rx_buffer->page) != 1)) + return false; - /* transfer page from old buffer to new buffer */ - *new_buff = *old_buff; -} + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; -/** - * i40e_page_is_reserved - check if reuse is possible - * @page: page struct to check - */ -static inline bool i40e_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + get_page(rx_buffer->page); + + return true; } /** @@ -1555,7 +1669,6 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif /* will the data fit in the skb we allocated? if so, just @@ -1578,34 +1691,107 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, rx_buffer->page_offset, size, truesize); - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; + return i40e_try_flip_rx_page(rx_buffer); +} -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) +/** + * i40e_xdp_xmit_tail_bump - updates the tail and sets the RS bit + * @xdp_ring: XDP Tx ring + **/ +static +void i40e_xdp_xmit_tail_bump(struct i40e_ring *xdp_ring) +{ + struct i40e_tx_desc *tx_desc; + + /* Set RS and bump tail */ + tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->curr_in_use); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); + /* Force memory writes to complete before letting h/w know + * there are new descriptors to fetch. (Only applicable for + * weak-ordered memory model archs, such as IA-64). + */ + wmb(); + writel(xdp_ring->curr_in_use, xdp_ring->tail); + + xdp_ring->xdp_needs_tail_bump = false; +} + +/** + * i40e_xdp_xmit - transmit a frame on the XDP Tx queue + * @xdp_ring: XDP Tx ring + * @page: current page containing the frame + * @page_offset: offset where the frame resides + * @dma: Bus address of the frame + * @size: size of the frame + * + * Returns true successfully sent. + **/ +static bool i40e_xdp_xmit(void *data, size_t size, struct page *page, + struct i40e_ring *xdp_ring) +{ + struct i40e_tx_buffer *tx_bi; + struct i40e_tx_desc *tx_desc; + u16 i = xdp_ring->next_to_use; + dma_addr_t dma; + + if (unlikely(I40E_DESC_UNUSED(xdp_ring) < 1)) { + if (xdp_ring->xdp_needs_tail_bump) + i40e_xdp_xmit_tail_bump(xdp_ring); + xdp_ring->tx_stats.tx_busy++; return false; + } - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + tx_bi = &xdp_ring->tx_bi[i]; + tx_bi->bytecount = size; + tx_bi->gso_segs = 1; + tx_bi->tx_flags = I40E_TX_FLAGS_XDP; + tx_bi->page = page; - if (rx_buffer->page_offset > last_offset) + dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) return false; -#endif - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + tx_desc = I40E_TX_DESC(xdp_ring, i); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC + | I40E_TX_DESC_CMD_EOP, + 0, size, 0); + tx_bi->next_to_watch = tx_desc; + xdp_ring->curr_in_use = i++; + xdp_ring->next_to_use = (i < xdp_ring->count) ? i : 0; + xdp_ring->xdp_needs_tail_bump = true; return true; } /** + * i40e_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *old_buff) +{ + struct i40e_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_bi[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + *new_buff = *old_buff; +} + +/** * i40e_run_xdp - Runs an XDP program for an Rx ring * @rx_ring: Rx ring used for XDP * @rx_buffer: current Rx buffer @@ -1625,6 +1811,7 @@ static bool i40e_run_xdp(struct i40e_ring *rx_ring, I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct xdp_buff xdp; u32 xdp_action; + bool tx_ok; WARN_ON(!i40e_test_staterr(rx_desc, BIT(I40E_RX_DESC_STATUS_EOF_SHIFT))); @@ -1636,21 +1823,34 @@ static bool i40e_run_xdp(struct i40e_ring *rx_ring, switch (xdp_action) { case XDP_PASS: return false; - default: - bpf_warn_invalid_xdp_action(xdp_action); - case XDP_ABORTED: case XDP_TX: + tx_ok = i40e_xdp_xmit(xdp.data, size, rx_buffer->page, + rx_ring->xdp_sibling); + if (likely(tx_ok)) { + if (i40e_try_flip_rx_page(rx_buffer)) { + i40e_reuse_rx_page(rx_ring, rx_buffer); + rx_ring->rx_stats.page_reuse_count++; + } else { + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + break; + } + case XDP_ABORTED: case XDP_DROP: +do_drop: if (likely(!i40e_page_is_reserved(rx_buffer->page))) { i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; break; } - - /* we are not reusing the buffer so unmap it */ dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, DMA_FROM_DEVICE); __free_pages(rx_buffer->page, 0); + break; + default: + bpf_warn_invalid_xdp_action(xdp_action); + goto do_drop; } /* clear contents of buffer_info */ @@ -2065,6 +2265,15 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) ring->arm_wb = false; } + i40e_for_each_ring(ring, q_vector->xdp) { + if (!i40e_clean_xdp_irq(vsi, ring)) { + clean_complete = false; + continue; + } + arm_wb |= ring->arm_wb; + ring->arm_wb = false; + } + /* Handle case where we are called by netpoll with a budget of 0 */ if (budget <= 0) goto tx_only; @@ -2077,6 +2286,9 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) i40e_for_each_ring(ring, q_vector->rx) { int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); + if (ring->xdp_sibling && ring->xdp_sibling->xdp_needs_tail_bump) + i40e_xdp_xmit_tail_bump(ring->xdp_sibling); + work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ if (cleaned >= budget_per_ring) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 957d856a82c4..4d9459134e69 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -220,6 +220,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size) #define I40E_TX_FLAGS_TSYN BIT(8) #define I40E_TX_FLAGS_FD_SB BIT(9) #define I40E_TX_FLAGS_UDP_TUNNEL BIT(10) +#define I40E_TX_FLAGS_XDP BIT(11) #define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 #define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -230,6 +231,7 @@ struct i40e_tx_buffer { union { struct sk_buff *skb; void *raw_buf; + struct page *page; }; unsigned int bytecount; unsigned short gso_segs; @@ -343,6 +345,9 @@ struct i40e_ring { u16 next_to_alloc; struct bpf_prog *xdp_prog; + struct i40e_ring *xdp_sibling; /* rx to xdp, and xdp to rx */ + bool xdp_needs_tail_bump; + u16 curr_in_use; } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { -- 2.9.3