On Mon, Feb 13, 2017 at 11:58 AM, Eric Dumazet <eduma...@google.com> wrote: > Use of order-3 pages is problematic in some cases. > > This patch might add three kinds of regression : > > 1) a CPU performance regression, but we will add later page > recycling and performance should be back. > > 2) TCP receiver could grow its receive window slightly slower, > because skb->len/skb->truesize ratio will decrease. > This is mostly ok, we prefer being conservative to not risk OOM, > and eventually tune TCP better in the future. > This is consistent with other drivers using 2048 per ethernet frame. > > 3) Because we allocate one page per RX slot, we consume more > memory for the ring buffers. XDP already had this constraint anyway. > > Signed-off-by: Eric Dumazet <eduma...@google.com> > --- > drivers/net/ethernet/mellanox/mlx4/en_rx.c | 72 > +++++++++++++--------------- > drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 4 -- > 2 files changed, 33 insertions(+), 43 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c > b/drivers/net/ethernet/mellanox/mlx4/en_rx.c > index > 0c61c1200f2aec4c74d7403d9ec3ed06821c1bac..069ea09185fb0669d5c1f9b1b88f517b2d69c5ed > 100644 > --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c > +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c > @@ -53,38 +53,26 @@ > static int mlx4_alloc_pages(struct mlx4_en_priv *priv, > struct mlx4_en_rx_alloc *page_alloc, > const struct mlx4_en_frag_info *frag_info, > - gfp_t _gfp) > + gfp_t gfp) > { > - int order; > struct page *page; > dma_addr_t dma; > > - for (order = priv->rx_page_order; ;) { > - gfp_t gfp = _gfp; > - > - if (order) > - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC; > - page = alloc_pages(gfp, order); > - if (likely(page)) > - break; > - if (--order < 0 || > - ((PAGE_SIZE << order) < frag_info->frag_size)) > - return -ENOMEM; > - } > - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, > - priv->dma_dir); > + page = alloc_page(gfp); > + if (unlikely(!page)) > + return -ENOMEM; > + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); > if (unlikely(dma_mapping_error(priv->ddev, dma))) { > put_page(page); > return -ENOMEM; > } > - page_alloc->page_size = PAGE_SIZE << order; > page_alloc->page = page; > page_alloc->dma = dma; > page_alloc->page_offset = 0; > /* Not doing get_page() for each frag is a big win > * on asymetric workloads. Note we can not use atomic_set(). > */ > - page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - > 1); > + page_ref_add(page, PAGE_SIZE / frag_info->frag_stride - 1); > return 0; > } > > @@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, > page_alloc[i].page_offset += frag_info->frag_stride; > > if (page_alloc[i].page_offset + frag_info->frag_stride <= > - ring_alloc[i].page_size) > + PAGE_SIZE) > continue; > > if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], > @@ -127,11 +115,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv > *priv, > while (i--) { > if (page_alloc[i].page != ring_alloc[i].page) { > dma_unmap_page(priv->ddev, page_alloc[i].dma, > - page_alloc[i].page_size, > - priv->dma_dir); > + PAGE_SIZE, priv->dma_dir); > page = page_alloc[i].page; > /* Revert changes done by mlx4_alloc_pages */ > - page_ref_sub(page, page_alloc[i].page_size / > + page_ref_sub(page, PAGE_SIZE / > priv->frag_info[i].frag_stride - > 1); > put_page(page); > } > @@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, > u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; > > > - if (next_frag_end > frags[i].page_size) > - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, > + if (next_frag_end > PAGE_SIZE) > + dma_unmap_page(priv->ddev, frags[i].dma, PAGE_SIZE, > priv->dma_dir); > > if (frags[i].page) > @@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv > *priv, > frag_info, GFP_KERNEL | __GFP_COLD)) > goto out; > > - en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", > - i, ring->page_alloc[i].page_size, > - page_ref_count(ring->page_alloc[i].page)); > + en_dbg(DRV, priv, " frag %d allocator: - frags:%d\n", > + i, page_ref_count(ring->page_alloc[i].page)); > } > return 0; > > @@ -180,11 +166,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv > *priv, > > page_alloc = &ring->page_alloc[i]; > dma_unmap_page(priv->ddev, page_alloc->dma, > - page_alloc->page_size, > - priv->dma_dir); > + PAGE_SIZE, priv->dma_dir); > page = page_alloc->page; > /* Revert changes done by mlx4_alloc_pages */ > - page_ref_sub(page, page_alloc->page_size / > + page_ref_sub(page, PAGE_SIZE / > priv->frag_info[i].frag_stride - 1); > put_page(page); > page_alloc->page = NULL;
You can probably simplify this by using __page_frag_cache_drain since that way you can just doe the sub and test for the whole set instead of doing N - 1 and 1.