Hi,

On 9/28/21 03:43, Gaoxiang Liu wrote:
To improve performance in vhost Tx/Rx, merge vhost stats loop.
eth_vhost_tx has 2 loop of send num iteraion.
It can be merge into one.
eth_vhost_rx has the same issue as Tx.

Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Please remove the Fixes tag, this is an optimization, not a fix.


Signed-off-by: Gaoxiang Liu <[email protected]>
---

v2:
  * Fix coding style issues.
---
  drivers/net/vhost/rte_eth_vhost.c | 62 ++++++++++++++-----------------
  1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..a4129980f2 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -336,38 +336,29 @@ vhost_count_xcast_packets(struct vhost_queue *vq,
  }
static void
-vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
-                          uint16_t count, uint64_t nb_bytes,
-                          uint64_t nb_missed)
+vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)

I tried to build without and with your patch, and I think that what can
explain most of the performance difference is that without your patch
the function is not inlined, whereas it is implicitely inlined with your
patch applied.

I agree with your patch, but I think we might add __rte_always_inline to
this function to make it explicit. What do you think?

Other than that:

Reviewed-by: Maxime Coquelin <[email protected]>

Thanks,
Maxime


  {
        uint32_t pkt_len = 0;
-       uint64_t i = 0;
        uint64_t index;
        struct vhost_stats *pstats = &vq->stats;
- pstats->xstats[VHOST_BYTE] += nb_bytes;
-       pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
-       pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
-
-       for (i = 0; i < count ; i++) {
-               pstats->xstats[VHOST_PKT]++;
-               pkt_len = bufs[i]->pkt_len;
-               if (pkt_len == 64) {
-                       pstats->xstats[VHOST_64_PKT]++;
-               } else if (pkt_len > 64 && pkt_len < 1024) {
-                       index = (sizeof(pkt_len) * 8)
-                               - __builtin_clz(pkt_len) - 5;
-                       pstats->xstats[index]++;
-               } else {
-                       if (pkt_len < 64)
-                               pstats->xstats[VHOST_UNDERSIZE_PKT]++;
-                       else if (pkt_len <= 1522)
-                               pstats->xstats[VHOST_1024_TO_1522_PKT]++;
-                       else if (pkt_len > 1522)
-                               pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
-               }
-               vhost_count_xcast_packets(vq, bufs[i]);
+       pstats->xstats[VHOST_PKT]++;
+       pkt_len = buf->pkt_len;
+       if (pkt_len == 64) {
+               pstats->xstats[VHOST_64_PKT]++;
+       } else if (pkt_len > 64 && pkt_len < 1024) {
+               index = (sizeof(pkt_len) * 8)
+                       - __builtin_clz(pkt_len) - 5;
+               pstats->xstats[index]++;
+       } else {
+               if (pkt_len < 64)
+                       pstats->xstats[VHOST_UNDERSIZE_PKT]++;
+               else if (pkt_len <= 1522)
+                       pstats->xstats[VHOST_1024_TO_1522_PKT]++;
+               else if (pkt_len > 1522)
+                       pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
        }
+       vhost_count_xcast_packets(vq, buf);
  }
static uint16_t
@@ -376,7 +367,6 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
        struct vhost_queue *r = q;
        uint16_t i, nb_rx = 0;
        uint16_t nb_receive = nb_bufs;
-       uint64_t nb_bytes = 0;
if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
                return 0;
@@ -411,11 +401,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
                if (r->internal->vlan_strip)
                        rte_vlan_strip(bufs[i]);
- nb_bytes += bufs[i]->pkt_len;
-       }
+               r->stats.bytes += bufs[i]->pkt_len;
+               r->stats.xstats[VHOST_BYTE] += bufs[i]->pkt_len;
- r->stats.bytes += nb_bytes;
-       vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
+               vhost_update_single_packet_xstats(r, bufs[i]);
+       }
out:
        rte_atomic32_set(&r->while_queuing, 0);
@@ -471,16 +461,20 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
                        break;
        }
- for (i = 0; likely(i < nb_tx); i++)
+       for (i = 0; likely(i < nb_tx); i++) {
                nb_bytes += bufs[i]->pkt_len;
+               vhost_update_single_packet_xstats(r, bufs[i]);
+       }
nb_missed = nb_bufs - nb_tx; r->stats.pkts += nb_tx;
        r->stats.bytes += nb_bytes;
-       r->stats.missed_pkts += nb_bufs - nb_tx;
+       r->stats.missed_pkts += nb_missed;
- vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
+       r->stats.xstats[VHOST_BYTE] += nb_bytes;
+       r->stats.xstats[VHOST_MISSED_PKT] += nb_missed;
+       r->stats.xstats[VHOST_UNICAST_PKT] += nb_missed;
/* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
         * ifHCOutBroadcastPkts counters are increased when packets are not


Reply via email to