As in the Siena/EF10 case, it minimises cacheline ping-pong between
 the TX and completion paths.

Signed-off-by: Edward Cree <ec...@solarflare.com>
---
 drivers/net/ethernet/sfc/ef100_tx.c   |  8 ++++++--
 drivers/net/ethernet/sfc/net_driver.h | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef100_tx.c 
b/drivers/net/ethernet/sfc/ef100_tx.c
index ce1b462efd17..078c7ec2a70e 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.c
+++ b/drivers/net/ethernet/sfc/ef100_tx.c
@@ -360,15 +360,19 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, 
struct sk_buff *skb)
                goto err;
        ef100_tx_make_descriptors(tx_queue, skb, segments);
 
-       fill_level = efx_channel_tx_fill_level(tx_queue->channel);
+       fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
        if (fill_level > efx->txq_stop_thresh) {
+               struct efx_tx_queue *txq2;
+
                netif_tx_stop_queue(tx_queue->core_txq);
                /* Re-read after a memory barrier in case we've raced with
                 * the completion path. Otherwise there's a danger we'll never
                 * restart the queue if all completions have just happened.
                 */
                smp_mb();
-               fill_level = efx_channel_tx_fill_level(tx_queue->channel);
+               efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
+                       txq2->old_read_count = READ_ONCE(txq2->read_count);
+               fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
                if (fill_level < efx->txq_stop_thresh)
                        netif_tx_start_queue(tx_queue->core_txq);
        }
diff --git a/drivers/net/ethernet/sfc/net_driver.h 
b/drivers/net/ethernet/sfc/net_driver.h
index adc138f9d15f..366e649fa869 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1692,6 +1692,20 @@ efx_channel_tx_fill_level(struct efx_channel *channel)
        return fill_level;
 }
 
+/* Conservative approximation of efx_channel_tx_fill_level using cached value 
*/
+static inline unsigned int
+efx_channel_tx_old_fill_level(struct efx_channel *channel)
+{
+       struct efx_tx_queue *tx_queue;
+       unsigned int fill_level = 0;
+
+       efx_for_each_channel_tx_queue(tx_queue, channel)
+               fill_level = max(fill_level,
+                                tx_queue->insert_count - 
tx_queue->old_read_count);
+
+       return fill_level;
+}
+
 /* Get all supported features.
  * If a feature is not fixed, it is present in hw_features.
  * If a feature is fixed, it does not present in hw_features, but

Reply via email to