> -----Original Message-----
> From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org]
> On Behalf Of Jeff Kirsher
> Sent: Thursday, February 16, 2017 8:51 PM
> To: da...@davemloft.net
> Cc: Alexander Duyck; netdev@vger.kernel.org; nhor...@redhat.com;
> sassm...@redhat.com; jogre...@redhat.com; Jeff Kirsher
> Subject: [net-next 06/14] ixgbe: Update driver to make use of DMA attributes 
> in
> Rx path
> 
> From: Alexander Duyck <alexander.h.du...@intel.com>
> 
> This patch adds support for DMA_ATTR_SKIP_CPU_SYNC and
> DMA_ATTR_WEAK_ORDERING.  By enabling both of these for the Rx path we
> are able to see performance improvements on architectures that implement
> either one due to the fact that page mapping and unmapping only has to sync
> what is actually being used instead of the entire buffer.  In addition by
> enabling the weak ordering attribute enables a performance improvement for
> architectures that can associate a memory ordering with a DMA buffer such as
> Sparc.
> 
> Signed-off-by: Alexander Duyck <alexander.h.du...@intel.com>
> Tested-by: Andrew Bowers <andrewx.bow...@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirs...@intel.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h      |  3 ++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56
> ++++++++++++++++++---------
>  2 files changed, 40 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index 6530eff01a0b..8167e77b924f 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -104,6 +104,9 @@
>  /* How many Rx Buffers do we bundle into one write to the hardware ? */
>  #define IXGBE_RX_BUFFER_WRITE        16      /* Must be power of 2 */
> 
> +#define IXGBE_RX_DMA_ATTR \
> +     (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
> +
>  enum ixgbe_tx_flags {
>       /* cmd_type flags */
>       IXGBE_TX_FLAGS_HW_VLAN  = 0x01,
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index dde2c852e01d..ddde6759f094 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -1570,8 +1570,10 @@ static bool ixgbe_alloc_mapped_page(struct
> ixgbe_ring *rx_ring,
>       }
> 
>       /* map page for use */
> -     dma = dma_map_page(rx_ring->dev, page, 0,
> -                        ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
> +     dma = dma_map_page_attrs(rx_ring->dev, page, 0,
> +                              ixgbe_rx_pg_size(rx_ring),
> +                              DMA_FROM_DEVICE,
> +                              IXGBE_RX_DMA_ATTR);
> 
>       /*
>        * if mapping failed free memory back to system since @@ -1614,6
> +1616,12 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16
> cleaned_count)
>               if (!ixgbe_alloc_mapped_page(rx_ring, bi))
>                       break;
> 
> +             /* sync the buffer for use by the device */
> +             dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
> +                                              bi->page_offset,
> +                                              ixgbe_rx_bufsz(rx_ring),
> +                                              DMA_FROM_DEVICE);
> +
>               /*
>                * Refresh the desc even if buffer_addrs didn't change
>                * because each write-back erases this info.
> @@ -1832,8 +1840,10 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring
> *rx_ring,  {
>       /* if the page was released unmap it, else just sync our portion */
>       if (unlikely(IXGBE_CB(skb)->page_released)) {
> -             dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma,
> -                            ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
> +             dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
> +                                  ixgbe_rx_pg_size(rx_ring),
> +                                  DMA_FROM_DEVICE,
> +                                  IXGBE_RX_DMA_ATTR);
>               IXGBE_CB(skb)->page_released = false;
>       } else {
>               struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; @@
> -1917,12 +1927,6 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring
> *rx_ring,
> 
>       /* transfer page from old buffer to new buffer */
>       *new_buff = *old_buff;
> -
> -     /* sync the buffer for use by the device */
> -     dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
> -                                      new_buff->page_offset,
> -                                      ixgbe_rx_bufsz(rx_ring),
> -                                      DMA_FROM_DEVICE);
>  }
> 
>  static inline bool ixgbe_page_is_reserved(struct page *page) @@ -2089,9
> +2093,10 @@ static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring
> *rx_ring,
>               IXGBE_CB(skb)->page_released = true;
>       } else {
>               /* we are not reusing the buffer so unmap it */
> -             dma_unmap_page(rx_ring->dev, rx_buffer->dma,
> -                            ixgbe_rx_pg_size(rx_ring),
> -                            DMA_FROM_DEVICE);
> +             dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
> +                                  ixgbe_rx_pg_size(rx_ring),
> +                                  DMA_FROM_DEVICE,
> +                                  IXGBE_RX_DMA_ATTR);
>       }
> 
>       /* clear contents of buffer_info */
> @@ -4883,10 +4888,11 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring
> *rx_ring)
>               if (rx_buffer->skb) {
>                       struct sk_buff *skb = rx_buffer->skb;
>                       if (IXGBE_CB(skb)->page_released)
> -                             dma_unmap_page(dev,
> -                                            IXGBE_CB(skb)->dma,
> -                                            ixgbe_rx_bufsz(rx_ring),
> -                                            DMA_FROM_DEVICE);
> +                             dma_unmap_page_attrs(dev,
> +                                                  IXGBE_CB(skb)->dma,
> +                                                  ixgbe_rx_pg_size(rx_ring),
> +                                                  DMA_FROM_DEVICE,
> +                                                  IXGBE_RX_DMA_ATTR);
>                       dev_kfree_skb(skb);
>                       rx_buffer->skb = NULL;
>               }
> @@ -4894,8 +4900,20 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring
> *rx_ring)
>               if (!rx_buffer->page)
>                       continue;
> 
> -             dma_unmap_page(dev, rx_buffer->dma,
> -                            ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
> +             /* Invalidate cache lines that may have been written to by
> +              * device so that we avoid corrupting memory.
> +              */
> +             dma_sync_single_range_for_cpu(rx_ring->dev,
> +                                           rx_buffer->dma,
> +                                           rx_buffer->page_offset,
> +                                           ixgbe_rx_bufsz(rx_ring),
> +                                           DMA_FROM_DEVICE);
> +
> +             /* free resources associated with mapping */
> +             dma_unmap_page_attrs(dev, rx_buffer->dma,
> +                                  ixgbe_rx_pg_size(rx_ring),
> +                                  DMA_FROM_DEVICE,
> +                                  IXGBE_RX_DMA_ATTR);
>               __free_pages(rx_buffer->page, ixgbe_rx_pg_order(rx_ring));
> 
>               rx_buffer->page = NULL;
> --
> 2.11.0

Hi Alex,
Is this patch available for arm64? I remember that it needs IOMMU support, 
right? 

Reply via email to