From: "David S. Miller" <[EMAIL PROTECTED]>
Date: Mon, 15 Aug 2005 15:45:00 -0700 (PDT)
> Ok, this scheme doesn't work as-is.
FWIW the fclone_ref version works perfectly fine, and
I'm running this right now. I'm including it below
against current net-2.6.14 for reference.
So what do folks think we should do? I'm inclined to put
this in first, as-is, then if we can get the skb->users
variant functional we can add that in as a follow-on
patch.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -162,6 +162,13 @@ struct skb_timeval {
u32 off_usec;
};
+
+enum {
+ SKB_FCLONE_UNAVAILABLE,
+ SKB_FCLONE_ORIG,
+ SKB_FCLONE_CLONE,
+};
+
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@@ -255,8 +262,10 @@ struct sk_buff {
ip_summed:2,
nohdr:1,
nfctinfo:3;
- __u8 pkt_type;
+ __u8 pkt_type:3,
+ fclone:2;
__u16 protocol;
+ atomic_t fclone_ref;
void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_NETFILTER
@@ -295,8 +304,20 @@ struct sk_buff {
#include <asm/system.h>
extern void __kfree_skb(struct sk_buff *skb);
-extern struct sk_buff *alloc_skb(unsigned int size,
- unsigned int __nocast priority);
+extern struct sk_buff *__alloc_skb(unsigned int size,
+ unsigned int __nocast priority, int fclone);
+static inline struct sk_buff *alloc_skb(unsigned int size,
+ unsigned int __nocast priority)
+{
+ return __alloc_skb(size, priority, 0);
+}
+
+static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
+ unsigned int __nocast priority)
+{
+ return __alloc_skb(size, priority, 1);
+}
+
extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
unsigned int size,
unsigned int __nocast priority);
diff --git a/include/net/sock.h b/include/net/sock.h
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1195,7 +1195,7 @@ static inline struct sk_buff *sk_stream_
int hdr_len;
hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
- skb = alloc_skb(size + hdr_len, gfp);
+ skb = alloc_skb_fclone(size + hdr_len, gfp);
if (skb) {
skb->truesize += mem;
if (sk->sk_forward_alloc >= (int)skb->truesize ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
#include <asm/system.h>
static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_fclone_cache;
struct timeval __read_mostly skb_tv_base;
@@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb
*/
/**
- * alloc_skb - allocate a network buffer
+ * __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
*
@@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+ int fclone)
{
struct sk_buff *skb;
u8 *data;
/* Get the HEAD */
- skb = kmem_cache_alloc(skbuff_head_cache,
- gfp_mask & ~__GFP_DMA);
+ if (fclone)
+ skb = kmem_cache_alloc(skbuff_fclone_cache,
+ gfp_mask & ~__GFP_DMA);
+ else
+ skb = kmem_cache_alloc(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA);
+
if (!skb)
goto out;
@@ -155,7 +162,14 @@ struct sk_buff *alloc_skb(unsigned int s
skb->data = data;
skb->tail = data;
skb->end = data + size;
+ if (fclone) {
+ struct sk_buff *child = skb + 1;
+ skb->fclone = SKB_FCLONE_ORIG;
+ atomic_set(&skb->fclone_ref, 1);
+
+ child->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->tso_size = 0;
@@ -268,8 +282,31 @@ void skb_release_data(struct sk_buff *sk
*/
void kfree_skbmem(struct sk_buff *skb)
{
+ struct sk_buff *other;
+
skb_release_data(skb);
- kmem_cache_free(skbuff_head_cache, skb);
+ switch (skb->fclone) {
+ case SKB_FCLONE_UNAVAILABLE:
+ kmem_cache_free(skbuff_head_cache, skb);
+ break;
+
+ case SKB_FCLONE_ORIG:
+ if (atomic_dec_and_test(&skb->fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, skb);
+ break;
+
+ case SKB_FCLONE_CLONE:
+ other = skb - 1;
+
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+ if (atomic_dec_and_test(&other->fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, other);
+ break;
+ };
}
/**
@@ -324,10 +361,19 @@ void __kfree_skb(struct sk_buff *skb)
struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
- struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ struct sk_buff *n;
- if (!n)
- return NULL;
+ n = skb + 1;
+ if (skb->fclone == SKB_FCLONE_ORIG &&
+ n->fclone == SKB_FCLONE_UNAVAILABLE) {
+ n->fclone = SKB_FCLONE_CLONE;
+ atomic_inc(&skb->fclone_ref);
+ } else {
+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (!n)
+ return NULL;
+ n->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
#define C(x) n->x = skb->x
@@ -409,6 +455,7 @@ static void copy_skb_header(struct sk_bu
new->mac.raw = old->mac.raw + offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
+ new->fclone = SKB_FCLONE_UNAVAILABLE;
new->pkt_type = old->pkt_type;
new->tstamp = old->tstamp;
new->destructor = NULL;
@@ -1647,13 +1694,22 @@ void __init skb_init(void)
NULL, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");
+
+ skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ 2*sizeof(struct sk_buff),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!skbuff_fclone_cache)
+ panic("cannot create skbuff cache");
+
do_gettimeofday(&skb_tv_base);
}
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1579,7 +1579,7 @@ void tcp_send_fin(struct sock *sk)
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
- skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
if (skb)
break;
yield();
@@ -1801,7 +1801,7 @@ int tcp_connect(struct sock *sk)
tcp_connect_init(sk);
- buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+ buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
if (unlikely(buff == NULL))
return -ENOBUFS;
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html