From: Ilias Apalodimas <ilias.apalodi...@linaro.org>

The driver memcpy for packets < 256b and it's recycle tricks are not
needed anymore.  As previous patch introduces buffer recycling using
the page_pool API (although recycling doesn't get fully activated in
this patch).  After this switch to using build_skb().

This patch implicit fixes a driver bug where the memory is copied
prior to it's syncing for the CPU, in the < 256b case (as this case is
removed).

We also remove the data prefetch completely. The original driver had
the prefetch misplaced before any dma_sync operations took place.
Based on Jesper's analysis even if the prefetch is placed after
any DMA sync ops it ends up hurting performance.

Signed-off-by: Ilias Apalodimas <ilias.apalodi...@linaro.org>
Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
---
 drivers/net/ethernet/marvell/mvneta.c |   81 +++++++++------------------------
 1 file changed, 22 insertions(+), 59 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index 2354421fe96f..78f1fcdc1f00 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -643,7 +643,6 @@ static int txq_number = 8;
 static int rxq_def;
 
 static int rx_copybreak __read_mostly = 256;
-static int rx_header_size __read_mostly = 128;
 
 /* HW BM need that each port be identify by a unique ID */
 static int global_port_id;
@@ -1823,7 +1822,7 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
 
        phys_addr = page_pool_get_dma_addr(page);
 
-       phys_addr += pp->rx_offset_correction;
+       phys_addr += pp->rx_offset_correction + NET_SKB_PAD;
        mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
        return 0;
 }
@@ -1944,14 +1943,12 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                struct page *page;
                dma_addr_t phys_addr;
                u32 rx_status, index;
-               int rx_bytes, skb_size, copy_size;
-               int frag_num, frag_size, frag_offset;
+               int frag_num, frag_size;
+               int rx_bytes;
 
                index = rx_desc - rxq->descs;
                page = (struct page *)rxq->buf_virt_addr[index];
                data = page_address(page);
-               /* Prefetch header */
-               prefetch(data);
 
                phys_addr = rx_desc->buf_phys_addr;
                rx_status = rx_desc->status;
@@ -1969,49 +1966,25 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                        rx_bytes = rx_desc->data_size -
                                   (ETH_FCS_LEN + MVNETA_MH_SIZE);
 
-                       /* Allocate small skb for each new packet */
-                       skb_size = max(rx_copybreak, rx_header_size);
-                       rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
-                       if (unlikely(!rxq->skb)) {
-                               netdev_err(dev,
-                                          "Can't allocate skb on queue %d\n",
-                                          rxq->id);
-                               dev->stats.rx_dropped++;
-                               rxq->skb_alloc_err++;
-                               continue;
-                       }
-                       copy_size = min(skb_size, rx_bytes);
-
-                       /* Copy data from buffer to SKB, skip Marvell header */
-                       memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
-                              copy_size);
-                       skb_put(rxq->skb, copy_size);
-                       rxq->left_size = rx_bytes - copy_size;
 
-                       mvneta_rx_csum(pp, rx_status, rxq->skb);
-                       if (rxq->left_size == 0) {
-                               int size = copy_size + MVNETA_MH_SIZE;
-
-                               dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                             phys_addr, 0,
-                                                             size,
-                                                             DMA_FROM_DEVICE);
+                       dma_sync_single_range_for_cpu(dev->dev.parent,
+                                                     phys_addr, 0,
+                                                     rx_bytes,
+                                                     DMA_FROM_DEVICE);
 
-                               /* leave the descriptor and buffer untouched */
-                       } else {
-                               /* refill descriptor with new buffer later */
-                               rx_desc->buf_phys_addr = 0;
+                       rxq->skb = build_skb(data, PAGE_SIZE);
+                       if (!rxq->skb)
+                               break;
 
-                               frag_num = 0;
-                               frag_offset = copy_size + MVNETA_MH_SIZE;
-                               frag_size = min(rxq->left_size,
-                                               (int)(PAGE_SIZE - frag_offset));
-                               skb_add_rx_frag(rxq->skb, frag_num, page,
-                                               frag_offset, frag_size,
-                                               PAGE_SIZE);
-                               page_pool_unmap_page(rxq->page_pool, page);
-                               rxq->left_size -= frag_size;
-                       }
+                       rx_desc->buf_phys_addr = 0;
+                       frag_num = 0;
+                       skb_reserve(rxq->skb, MVNETA_MH_SIZE + NET_SKB_PAD);
+                       skb_put(rxq->skb, rx_bytes < PAGE_SIZE ? rx_bytes :
+                               PAGE_SIZE);
+                       mvneta_rx_csum(pp, rx_status, rxq->skb);
+                       page_pool_unmap_page(rxq->page_pool, page);
+                       rxq->left_size = rx_bytes < PAGE_SIZE ? 0 : rx_bytes -
+                               PAGE_SIZE;
                } else {
                        /* Middle or Last descriptor */
                        if (unlikely(!rxq->skb)) {
@@ -2019,24 +1992,14 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
                                         rx_status);
                                continue;
                        }
-                       if (!rxq->left_size) {
-                               /* last descriptor has only FCS */
-                               /* and can be discarded */
-                               dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                             phys_addr, 0,
-                                                             ETH_FCS_LEN,
-                                                             DMA_FROM_DEVICE);
-                               /* leave the descriptor and buffer untouched */
-                       } else {
+                       if (rxq->left_size) {
                                /* refill descriptor with new buffer later */
                                rx_desc->buf_phys_addr = 0;
 
                                frag_num = skb_shinfo(rxq->skb)->nr_frags;
-                               frag_offset = 0;
-                               frag_size = min(rxq->left_size,
-                                               (int)(PAGE_SIZE - frag_offset));
+                               frag_size = min(rxq->left_size, (int)PAGE_SIZE);
                                skb_add_rx_frag(rxq->skb, frag_num, page,
-                                               frag_offset, frag_size,
+                                               0, frag_size,
                                                PAGE_SIZE);
 
                                page_pool_unmap_page(rxq->page_pool, page);

Reply via email to