From: Björn Töpel <bjorn.to...@intel.com> In this commit we add support for the two xsk ndo_bpf sub-commands for registering a xsk to the driver.
NB! There's code here for disabling/enabling a queue pair in i40e. Should probably separate this commit from the ndo implementation. Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- drivers/net/ethernet/intel/i40e/i40e.h | 24 ++ drivers/net/ethernet/intel/i40e/i40e_main.c | 434 +++++++++++++++++++++++++++- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 +- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 32 ++ 4 files changed, 493 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 46e9f4e0a02c..6452ac5caa76 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -806,6 +806,10 @@ struct i40e_vsi { /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); + + struct i40e_xsk_ctx **xsk_ctxs; + u16 num_xsk_ctxs; + u16 xsk_ctxs_in_use; } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { @@ -1109,4 +1113,24 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); + +static inline bool i40e_xsk_attached(struct i40e_ring *rxr) +{ + bool xdp_on = i40e_enabled_xdp_vsi(rxr->vsi); + int qid = rxr->queue_index; + + return rxr->vsi->xsk_ctxs && rxr->vsi->xsk_ctxs[qid] && xdp_on; +} + +static inline struct buff_pool *i40e_xsk_buff_pool(struct i40e_ring *rxr) +{ + bool xdp_on = i40e_enabled_xdp_vsi(rxr->vsi); + int qid = rxr->queue_index; + + if (!rxr->vsi->xsk_ctxs || !rxr->vsi->xsk_ctxs[qid] || !xdp_on) + return NULL; + + return rxr->vsi->xsk_ctxs[qid]->buff_pool; +} + #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0e1445af6b01..0c1ac8564f77 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -29,6 +29,7 @@ #include <linux/pci.h> #include <linux/bpf.h> #include <linux/buff_pool.h> +#include <net/xdp_sock.h> /* Local includes */ #include "i40e.h" @@ -3211,6 +3212,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len; u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; + struct buff_pool *xsk_buff_pool; struct i40e_hmc_obj_rxq rx_ctx; bool reserve_headroom; unsigned int mtu = 0; @@ -3229,9 +3231,20 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } else { reserve_headroom = false; } - ring->bpool = i40e_buff_pool_recycle_create(mtu, reserve_headroom, - ring->dev, - ring->count); + + xsk_buff_pool = i40e_xsk_buff_pool(ring); + if (xsk_buff_pool) { + ring->bpool = xsk_buff_pool; + ring->xdp_rxq.bpool = xsk_buff_pool; + set_ring_xsk_buff_pool(ring); + } else { + ring->bpool = i40e_buff_pool_recycle_create(mtu, + reserve_headroom, + ring->dev, + ring->count); + ring->xdp_rxq.bpool = NULL; + clear_ring_xsk_buff_pool(ring); + } ring->rx_buf_hr = (u16)bpool_buff_headroom(ring->bpool); ring->rx_buf_len = (u16)bpool_buff_size(ring->bpool); @@ -9923,6 +9936,25 @@ static void i40e_clear_rss_config_user(struct i40e_vsi *vsi) vsi->rss_lut_user = NULL; } +static void i40e_free_xsk_ctxs(struct i40e_vsi *vsi) +{ + struct i40e_xsk_ctx *ctx; + u16 i; + + if (!vsi->xsk_ctxs) + return; + + for (i = 0; i < vsi->num_xsk_ctxs; i++) { + ctx = vsi->xsk_ctxs[i]; + /* ctx free'd by error handle */ + if (ctx) + ctx->err_handler(ctx->err_ctx, -1 /* XXX wat? */); + } + + kfree(vsi->xsk_ctxs); + vsi->xsk_ctxs = NULL; +} + /** * i40e_vsi_clear - Deallocate the VSI provided * @vsi: the VSI being un-configured @@ -9938,6 +9970,8 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) goto free_vsi; pf = vsi->back; + i40e_free_xsk_ctxs(vsi); + mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) { dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n", @@ -11635,6 +11669,394 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, return 0; } +/** + * i40e_enter_busy_conf - Enters busy config state + * @vsi: vsi + * + * Returns 0 on success, <0 for failure. + **/ +static int i40e_enter_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + int timeout = 50; + + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + return 0; +} + +/** + * i40e_exit_busy_conf - Exits busy config state + * @vsi: vsi + **/ +static void i40e_exit_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + clear_bit(__I40E_CONFIG_BUSY, pf->state); +} + +/** + * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair) +{ + memset(&vsi->rx_rings[queue_pair]->rx_stats, 0, + sizeof(vsi->rx_rings[queue_pair]->rx_stats)); + memset(&vsi->tx_rings[queue_pair]->stats, 0, + sizeof(vsi->tx_rings[queue_pair]->stats)); + if (i40e_enabled_xdp_vsi(vsi)) { + memset(&vsi->xdp_rings[queue_pair]->stats, 0, + sizeof(vsi->xdp_rings[queue_pair]->stats)); + } +} + +/** + * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair) +{ + i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); + if (i40e_enabled_xdp_vsi(vsi)) + i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]); + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); +} + +/** + * i40e_queue_pair_control_napi - Enables/disables NAPI for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + **/ +static void i40e_queue_pair_control_napi(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_q_vector *q_vector = rxr->q_vector; + + if (!vsi->netdev) + return; + + /* All rings in a qp belong to the same qvector. */ + if (q_vector->rx.ring || q_vector->tx.ring) { + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); + } +} + +/** + * i40e_queue_pair_control_rings - Enables/disables all rings for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + * + * Returns 0 on success, <0 on failure. + **/ +static int i40e_queue_pair_control_rings(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_pf *pf = vsi->back; + int pf_q, ret = 0; + + pf_q = vsi->base_queue + queue_pair; + ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, + false /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + i40e_control_rx_q(pf, pf_q, enable); + ret = i40e_pf_rxq_wait(pf, pf_q, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + /* Due to HW errata, on Rx disable only, the register can + * indicate done before it really is. Needs 50ms to be sure + */ + if (!enable) + mdelay(50); + + if (!i40e_enabled_xdp_vsi(vsi)) + return ret; + + ret = i40e_control_wait_tx_q(vsi->seid, pf, + pf_q + vsi->alloc_queue_pairs, + true /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d XDP Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + } + + return ret; +} + +/** + * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* All rings in a qp belong to the same qvector. */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) + i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); + else + i40e_irq_dynamic_enable_icr0(pf); + + i40e_flush(hw); +} + +/** + * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* For simplicity, instead of removing the qp interrupt causes + * from the interrupt linked list, we simply disable the interrupt, and + * leave the list intact. + * + * All rings in a qp belong to the same qvector. + */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + u32 intpf = vsi->base_vector + rxr->q_vector->v_idx; + + wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0); + i40e_flush(hw); + synchronize_irq(pf->msix_entries[intpf].vector); + } else { + /* Legacy and MSI mode - this stops all interrupt handling */ + wr32(hw, I40E_PFINT_ICR0_ENA, 0); + wr32(hw, I40E_PFINT_DYN_CTL0, 0); + i40e_flush(hw); + synchronize_irq(pf->pdev->irq); + } +} + +/** + * i40e_queue_pair_disable - Disables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +static int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_enter_busy_conf(vsi); + if (err) + return err; + + i40e_queue_pair_disable_irq(vsi, queue_pair); + err = i40e_queue_pair_control_rings(vsi, queue_pair, + false /* disable */); + i40e_queue_pair_control_napi(vsi, queue_pair, false /* disable */); + i40e_queue_pair_clean_rings(vsi, queue_pair); + i40e_queue_pair_reset_stats(vsi, queue_pair); + + return err; +} + +/** + * i40e_queue_pair_enable - Enables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +static int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]); + if (err) + return err; + + if (i40e_enabled_xdp_vsi(vsi)) { + err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]); + if (err) + return err; + } + + err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]); + if (err) + return err; + + err = i40e_queue_pair_control_rings(vsi, queue_pair, true /* enable */); + i40e_queue_pair_control_napi(vsi, queue_pair, true /* enable */); + i40e_queue_pair_enable_irq(vsi, queue_pair); + + i40e_exit_busy_conf(vsi); + + return err; +} + +static void i40e_free_xsk_ctxs_if_last(struct i40e_vsi *vsi) +{ + if (vsi->xsk_ctxs_in_use > 0) + return; + + kfree(vsi->xsk_ctxs); + vsi->xsk_ctxs = NULL; + vsi->num_xsk_ctxs = 0; +} + +static int i40e_alloc_xsk_ctxs(struct i40e_vsi *vsi) +{ + if (vsi->xsk_ctxs) + return 0; + + vsi->num_xsk_ctxs = vsi->alloc_queue_pairs; + vsi->xsk_ctxs = kcalloc(vsi->num_xsk_ctxs, sizeof(*vsi->xsk_ctxs), + GFP_KERNEL); + if (!vsi->xsk_ctxs) { + vsi->num_xsk_ctxs = 0; + return -ENOMEM; + } + + return 0; +} + +static int i40e_add_xsk_ctx(struct i40e_vsi *vsi, + int queue_id, + struct buff_pool *buff_pool, + void *err_ctx, + void (*err_handler)(void *, int)) +{ + struct i40e_xsk_ctx *ctx; + int err; + + err = i40e_alloc_xsk_ctxs(vsi); + if (err) + return err; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + i40e_free_xsk_ctxs_if_last(vsi); + return -ENOMEM; + } + + vsi->xsk_ctxs_in_use++; + ctx->buff_pool = buff_pool; + ctx->err_ctx = err_ctx; + ctx->err_handler = err_handler; + + vsi->xsk_ctxs[queue_id] = ctx; + + return 0; +} + +static void i40e_remove_xsk_ctx(struct i40e_vsi *vsi, int queue_id) +{ + kfree(vsi->xsk_ctxs[queue_id]); + vsi->xsk_ctxs[queue_id] = NULL; + vsi->xsk_ctxs_in_use--; + i40e_free_xsk_ctxs_if_last(vsi); +} + +static int i40e_xsk_enable(struct net_device *netdev, u32 qid, + struct xsk_rx_parms *parms) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + bool if_running; + int err; + + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + + if (qid >= vsi->num_queue_pairs) + return -EINVAL; + + if (vsi->xsk_ctxs && vsi->xsk_ctxs[qid]) + return -EBUSY; + + err = parms->dma_map(parms->buff_pool, &vsi->back->pdev->dev, + DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + if (err) + return err; + + if_running = netif_running(netdev) && i40e_enabled_xdp_vsi(vsi); + + if (if_running) { + err = i40e_queue_pair_disable(vsi, qid); + if (err) + return err; + } + + err = i40e_add_xsk_ctx(vsi, qid, parms->buff_pool, + parms->error_report_ctx, parms->error_report); + if (err) + return err; + + if (if_running) { + err = i40e_queue_pair_enable(vsi, qid); + if (err) + return err; + } + + return 0; +} + +static int i40e_xsk_disable(struct net_device *netdev, u32 qid, + struct xsk_rx_parms *parms) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + bool if_running; + int err; + + if (!vsi->xsk_ctxs || qid >= vsi->num_xsk_ctxs || !vsi->xsk_ctxs[qid]) + return -EINVAL; + + if_running = netif_running(netdev) && i40e_enabled_xdp_vsi(vsi); + + if (if_running) { + err = i40e_queue_pair_disable(vsi, qid); + if (err) + return err; + } + + i40e_remove_xsk_ctx(vsi, qid); + + if (if_running) { + err = i40e_queue_pair_enable(vsi, qid); + if (err) + return err; + } + + return 0; +} + /** * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice @@ -11656,6 +12078,12 @@ static int i40e_xdp(struct net_device *dev, xdp->prog_attached = i40e_enabled_xdp_vsi(vsi); xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; return 0; + case XDP_REGISTER_XSK: + return i40e_xsk_enable(dev, xdp->xsk.queue_id, + xdp->xsk.rx_parms); + case XDP_UNREGISTER_XSK: + return i40e_xsk_disable(dev, xdp->xsk.queue_id, + xdp->xsk.rx_parms); default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index fffc254abd8c..4fb5bc030df7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1256,8 +1256,11 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring) kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL; - bpool_destroy(rx_ring->bpool); + if (!ring_has_xsk_buff_pool(rx_ring)) + bpool_destroy(rx_ring->bpool); + rx_ring->bpool = NULL; + clear_ring_xsk_buff_pool(rx_ring); if (rx_ring->desc) { dma_free_coherent(rx_ring->dev, rx_ring->size, @@ -1917,6 +1920,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, xdp.data = xdp.data_hard_start + *headroom; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *size; + xdp.bp_handle = handle; xdp.rxq = &rx_ring->xdp_rxq; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -1943,17 +1947,8 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, } break; case XDP_REDIRECT: - err = i40e_xdp_buff_convert_page(rx_ring, &xdp, handle, *size, - *headroom); - if (err) { - result = I40E_XDP_CONSUMED; - break; - } - err = xdp_do_redirect(rx_ring->netdev, &xdp, xdp_prog); - result = I40E_XDP_TX; - if (err) - page_frag_free(xdp.data); + result = err ? I40E_XDP_CONSUMED : I40E_XDP_TX; break; default: bpf_warn_invalid_xdp_action(act); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index d8345265db1e..906a562507a9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -245,6 +245,14 @@ static inline unsigned int i40e_txd_use_count(unsigned int size) #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 #define I40E_TX_FLAGS_VLAN_SHIFT 16 +/* Signals completion of a TX packet for an XDP socket. */ +typedef void (*tx_completion_func)(u32 start, u32 npackets, + unsigned long ctx1, unsigned long ctx2); +/* Returns the next packet to send for an XDP socket. */ +typedef int (*get_tx_packet_func)(struct net_device *dev, u32 queue_id, + dma_addr_t *dma, void **data, u32 *len, + u32 *offset); + struct i40e_tx_buffer { struct i40e_tx_desc *next_to_watch; union { @@ -291,6 +299,12 @@ enum i40e_ring_state_t { __I40E_RING_STATE_NBITS /* must be last */ }; +struct i40e_xsk_ctx { + struct buff_pool *buff_pool; + void *err_ctx; + void (*err_handler)(void *ctx, int errno); +}; + /* some useful defines for virtchannel interface, which * is the only remaining user of header split */ @@ -346,6 +360,7 @@ struct i40e_ring { #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) #define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) #define I40E_TXR_FLAGS_XDP BIT(2) +#define I40E_RXR_FLAGS_XSK_BUFF_POOL BIT(3) /* stats structs */ struct i40e_queue_stats stats; @@ -374,6 +389,7 @@ struct i40e_ring { struct i40e_channel *ch; struct xdp_rxq_info xdp_rxq; struct buff_pool *bpool; + struct i40e_xsk_ctx *xsk; } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) @@ -401,6 +417,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ring->flags |= I40E_TXR_FLAGS_XDP; } +static inline bool ring_has_xsk_buff_pool(struct i40e_ring *ring) +{ + return !!(ring->flags & I40E_RXR_FLAGS_XSK_BUFF_POOL); +} + +static inline void clear_ring_xsk_buff_pool(struct i40e_ring *ring) +{ + ring->flags &= ~I40E_RXR_FLAGS_XSK_BUFF_POOL; +} + +static inline void set_ring_xsk_buff_pool(struct i40e_ring *ring) +{ + ring->flags |= I40E_RXR_FLAGS_XSK_BUFF_POOL; +} + enum i40e_latency_range { I40E_LOWEST_LATENCY = 0, I40E_LOW_LATENCY = 1, @@ -536,4 +567,5 @@ static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->queue_index); } + #endif /* _I40E_TXRX_H_ */ -- 2.14.1