> Using volatile for statistics generates expensive atomic rmw
> operations when not necessary.

I am not sure it is true, AFAIK volatile only affects the compiler. In this 
case it mostly guards against these variables being completely cached in 
registers. Your new version is actually more correct, but also potentially 
slower since general fence affects all reads or writes, not just counters in 
question. I think it is common among drivers to not guarantee visibility of 
the most recent counter values for the sake of performance.

> 
> Signed-off-by: Stephen Hemminger <[email protected]>
> ---
>  drivers/net/pcap/pcap_ethdev.c | 38 ++++++++++++++++++++++++++--------
>  1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
> index 1658685a28..175d6998f9 100644
> --- a/drivers/net/pcap/pcap_ethdev.c
> +++ b/drivers/net/pcap/pcap_ethdev.c
> @@ -47,10 +47,10 @@ static uint64_t timestamp_rx_dynflag;
>  static int timestamp_dynfield_offset = -1;
> 
>  struct queue_stat {
> -     volatile unsigned long pkts;
> -     volatile unsigned long bytes;
> -     volatile unsigned long err_pkts;
> -     volatile unsigned long rx_nombuf;
> +     uint64_t pkts;
> +     uint64_t bytes;
> +     uint64_t err_pkts;
> +     uint64_t rx_nombuf;
>  };
> 
>  struct queue_missed_stat {
> @@ -267,6 +267,9 @@ eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, 
> uint16_t nb_pkts)
>       pcap_q->rx_stat.pkts += i;
>       pcap_q->rx_stat.bytes += rx_bytes;
> 
> +     /* ensure store operations of statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_release);
> +
>       return i;
>  }
> 
> @@ -345,6 +348,9 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t 
> nb_pkts)
>       pcap_q->rx_stat.pkts += num_rx;
>       pcap_q->rx_stat.bytes += rx_bytes;
> 
> +     /* ensure store operations of statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_release);
> +
>       return num_rx;
>  }
> 
> @@ -440,6 +446,9 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, 
> uint16_t nb_pkts)
>       dumper_q->tx_stat.bytes += tx_bytes;
>       dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
> 
> +     /* ensure store operations of statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_release);
> +
>       return nb_pkts;
>  }
> 
> @@ -464,6 +473,9 @@ eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t 
> nb_pkts)
>       tx_queue->tx_stat.pkts += nb_pkts;
>       tx_queue->tx_stat.bytes += tx_bytes;
> 
> +     /* ensure store operations of statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_release);
> +
>       return nb_pkts;
>  }
> 
> @@ -515,6 +527,9 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t 
> nb_pkts)
>       tx_queue->tx_stat.bytes += tx_bytes;
>       tx_queue->tx_stat.err_pkts += nb_pkts - num_tx;
> 
> +     /* ensure store operations of statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_release);
> +
>       return nb_pkts;
>  }
> 
> @@ -821,13 +836,16 @@ eth_stats_get(struct rte_eth_dev *dev, struct 
> rte_eth_stats *stats,
>             struct eth_queue_stats *qstats)
>  {
>       unsigned int i;
> -     unsigned long rx_packets_total = 0, rx_bytes_total = 0;
> -     unsigned long rx_missed_total = 0;
> -     unsigned long rx_nombuf_total = 0, rx_err_total = 0;
> -     unsigned long tx_packets_total = 0, tx_bytes_total = 0;
> -     unsigned long tx_packets_err_total = 0;
> +     uint64_t rx_packets_total = 0, rx_bytes_total = 0;
> +     uint64_t rx_missed_total = 0;
> +     uint64_t rx_nombuf_total = 0, rx_err_total = 0;
> +     uint64_t tx_packets_total = 0, tx_bytes_total = 0;
> +     uint64_t tx_packets_err_total = 0;
>       const struct pmd_internals *internal = dev->data->dev_private;
> 
> +     /* ensure that current statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_acquire);
> +
>       for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
>                       i < dev->data->nb_rx_queues; i++) {
>               if (qstats != NULL) {
> @@ -884,6 +902,8 @@ eth_stats_reset(struct rte_eth_dev *dev)
>               internal->tx_queue[i].tx_stat.err_pkts = 0;
>       }
> 
> +     /* ensure store operations of statistics are visible */
> +     rte_atomic_thread_fence(rte_memory_order_release);
>       return 0;
>  }
> 
> --
> 2.51.0
> 

Reply via email to