Intel NIC mbuf fast free

Morten Brørup Thu, 05 Jun 2025 10:17:04 -0700

Anatoly,

I noticed you are consolidating the Intel NIC drivers into common code, which 
is good.


While you are at it, please also consider replacing some ancient code with 
functions doing the same:
https://git.dpdk.org/dpdk/tree/drivers/net/intel/common/tx.h#n157

Something like (untested):

static __rte_always_inline int
ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done, bool 
ctx_descs)
{
        int nb_free = 0;
        struct rte_mbuf *free[IETH_VPMD_TX_MAX_FREE_BUF];
        struct rte_mbuf *m;

        /* check DD bits on threshold descriptor */
        if (!desc_done(txq, txq->tx_next_dd))
                return 0;

        const uint32_t n = txq->tx_rs_thresh >> ctx_descs;

        /* first buffer to free from S/W ring is at index
         * tx_next_dd - (tx_rs_thresh - 1)
         */
        struct ci_tx_entry_vec *txep = txq->sw_ring_vec;
        txep += (txq->tx_next_dd >> ctx_descs) - (n - 1);

-       if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) 
{
-               struct rte_mempool *mp = txep[0].mbuf->pool;
-               void **cache_objs;
-               struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, 
rte_lcore_id());
-
-               if (cache == NULL)
-                       goto normal;
-
-               cache_objs = &cache->objs[cache->len];
-
-               if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
-                       rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
-                       goto done;
-               }
-
-               /* The cache follows the following algorithm
-                *   1. Add the objects to the cache
-                *   2. Anything greater than the cache min value (if it
-                *   crosses the cache flush threshold) is flushed to the ring.
-                */
-               /* Add elements back into the cache */
-               uint32_t copied = 0;
-               /* n is multiple of 32 */
-               while (copied < n) {
-                       memcpy(&cache_objs[copied], &txep[copied], 32 * 
sizeof(void *));
-                       copied += 32;
-               }
-               cache->len += n;
-
-               if (cache->len >= cache->flushthresh) {
-                       rte_mempool_ops_enqueue_bulk(mp, 
&cache->objs[cache->size],
-                                       cache->len - cache->size);
-                       cache->len = cache->size;
-               }
-               goto done;
-       }
-
-normal:
-       m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
-       if (likely(m)) {
-               free[0] = m;
-               nb_free = 1;
-               for (uint32_t i = 1; i < n; i++) {
-                       m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-                       if (likely(m)) {
-                               if (likely(m->pool == free[0]->pool)) {
-                                       free[nb_free++] = m;
-                               } else {
-                                       rte_mempool_put_bulk(free[0]->pool, 
(void *)free, nb_free);
-                                       free[0] = m;
-                                       nb_free = 1;
-                               }
-                       }
-               }
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
-       } else {
-               for (uint32_t i = 1; i < n; i++) {
-                       m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-                       if (m)
-                               rte_mempool_put(m->pool, m);
-               }
-       }
-
-done:
+       if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
+               rte_mempool_put_bulk(txep[0].mbuf->pool, (void **)txep, n);
+       else
+               rte_pktmbuf_free_bulk((void **)txep, n);
+
        /* buffers were freed, update counters */
        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
        txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
        if (txq->tx_next_dd >= txq->nb_tx_desc)
                txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);

        return txq->tx_rs_thresh;
}

Note:
My suggestion relies on the ci_tx_entry_vec structure effectively being the 
same as an mbuf pointer.
The existing code path for RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE also relies on 
this.


-Morten

Intel NIC mbuf fast free

Reply via email to