On Thu, 2021-02-11 at 14:28 +0000, Alexander Lobakin wrote: > From: Paolo Abeni <pab...@redhat.com> on Thu, 11 Feb 2021 11:16:40 +0100 > wrote: > > What about changing __napi_alloc_skb() to always use > > the __napi_build_skb(), for both kmalloc and page backed skbs? That is, > > always doing the 'data' allocation in __napi_alloc_skb() - either via > > page_frag or via kmalloc() - and than call __napi_build_skb(). > > > > I think that should avoid adding more checks in __alloc_skb() and > > should probably reduce the number of conditional used > > by __napi_alloc_skb(). > > I thought of this too. But this will introduce conditional branch > to set or not skb->head_frag. So one branch less in __alloc_skb(), > one branch more here, and we also lose the ability to __alloc_skb() > with decached head.
Just to try to be clear, I mean something alike the following (not even build tested). In the fast path it has less branches than the current code - for both kmalloc and page_frag allocation. --- diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 785daff48030..a242fbe4730e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -506,23 +506,12 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { struct napi_alloc_cache *nc; + bool head_frag, pfmemalloc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; - /* If requested length is either too small or too big, - * we use kmalloc() for skb->head allocation. - */ - if (len <= SKB_WITH_OVERHEAD(1024) || - len > SKB_WITH_OVERHEAD(PAGE_SIZE) || - (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { - skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); - if (!skb) - goto skb_fail; - goto skb_success; - } - nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); @@ -530,25 +519,34 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - data = page_frag_alloc(&nc->page, len, gfp_mask); + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { + data = kmalloc_reserve(len, gfp_mask, NUMA_NO_NODE, &pfmemalloc); + head_frag = 0; + len = 0; + } else { + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = nc->page.pfmemalloc; + head_frag = 1; + } if (unlikely(!data)) return NULL; skb = __build_skb(data, len); if (unlikely(!skb)) { - skb_free_frag(data); + if (head_frag) + skb_free_frag(data); + else + kfree(data); return NULL; } - if (nc->page.pfmemalloc) - skb->pfmemalloc = 1; - skb->head_frag = 1; + skb->pfmemalloc = pfmemalloc; + skb->head_frag = head_frag; -skb_success: skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); skb->dev = napi->dev; - -skb_fail: return skb; } EXPORT_SYMBOL(__napi_alloc_skb);