Struct net_device's atomic refcnt are probably one of the hotest memory spots in a SMP/NUMA network router or network server.

This counter is constantly incremented/decremented each time a network packet is handled, or a IP route is added/deleted in route cache. This is *not* SMP nor NUMA friendly (because of the locked op that are expensive and memory ping pongs between cpus)

But as a matter of fact, the counter is *never* read: It's only read when the device must be unregistered.

Some devices are *never* unregistered : loopback, or statically linked drivers, thus we are refcounting them for nothing.

This patch try to avoid atomic ops on SMP for the cases were the device wont be unregistered.

A 'int static_dev' integer is added next to 'atomic_t refcnt', and may be set to one by drivers that are statically linked.

I changed SET_MODULE_OWNER(dev) macro to avoid changing all network drivers, but I'm open to other suggestions.

All drivers that are currently using this macro automatically benefit from this SMP optimization : It's better to perform a test/conditional branch (even if badly predicted) than an atomic_{inc|dec}()

Signed-off-by: Eric Dumazet <[EMAIL PROTECTED]>
--- a/include/linux/netdevice.h 2006-02-07 11:55:42.000000000 +0100
+++ b/include/linux/netdevice.h 2006-02-07 13:06:14.000000000 +0100
@@ -417,10 +417,14 @@
        struct timer_list       watchdog_timer;
 
 /*
- * refcnt is a very hot point, so align it on SMP
+ * {static_dev,refcnt} is a very hot point, so align it on SMP
  */
        /* Number of references to this device */
-       atomic_t                refcnt ____cacheline_aligned_in_smp;
+#ifdef CONFIG_SMP
+       /* SMP optimization : if dev is static, no need to modify refcnt */
+       int                     static_dev ____cacheline_aligned_in_smp;
+#endif
+       atomic_t                refcnt;
 
        /* delayed register/unregister */
        struct list_head        todo_list;
@@ -514,7 +518,29 @@
                                & ~NETDEV_ALIGN_CONST);
 }
 
-#define SET_MODULE_OWNER(dev) do { } while (0)
+static inline int netif_static(const struct net_device *dev)
+{
+#if defined(CONFIG_SMP)
+       return dev->static_dev;
+#else
+       return 0;
+#endif
+}
+
+static inline void netif_setstatic(struct net_device *dev, int v)
+{
+#if defined(CONFIG_SMP)
+#if defined(MODULE)
+       v = 0;
+#endif
+       dev->static_dev = v;
+#endif
+}
+/*
+ * If a driver is a not a module, dev can be marked as static
+ */
+#define SET_MODULE_OWNER(dev) do { netif_setstatic(dev, 1); } while (0)
+
 /* Set the sysfs physical device reference for the network logical device
  * if set prior to registration will cause a symlink during initialization.
  */
@@ -705,11 +731,12 @@
 
 static inline void dev_put(struct net_device *dev)
 {
-       atomic_dec(&dev->refcnt);
+       if (!netif_static(dev))
+               atomic_dec(&dev->refcnt);
 }
 
-#define __dev_put(dev) atomic_dec(&(dev)->refcnt)
-#define dev_hold(dev) atomic_inc(&(dev)->refcnt)
+#define __dev_put(dev) if (!netif_static(dev)) atomic_dec(&(dev)->refcnt)
+#define dev_hold(dev) if (!netif_static(dev)) atomic_inc(&(dev)->refcnt)
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
--- a/net/core/dev.c    2006-02-07 11:59:53.000000000 +0100
+++ b/net/core/dev.c    2006-02-07 12:52:27.000000000 +0100
@@ -2990,12 +2990,11 @@
        alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
        alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
-       p = kmalloc(alloc_size, GFP_KERNEL);
+       p = kzalloc(alloc_size, GFP_KERNEL);
        if (!p) {
                printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
                return NULL;
        }
-       memset(p, 0, alloc_size);
 
        dev = (struct net_device *)
                (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
@@ -3248,6 +3247,7 @@
                queue->backlog_dev.weight = weight_p;
                queue->backlog_dev.poll = process_backlog;
                atomic_set(&queue->backlog_dev.refcnt, 1);
+               netif_setstatic(&queue->backlog_dev, 1);
        }
 
        dev_boot_phase = 0;
--- a/drivers/net/loopback.c    2006-02-07 12:10:55.000000000 +0100
+++ b/drivers/net/loopback.c    2006-02-07 12:37:49.000000000 +0100
@@ -224,16 +224,18 @@
 int __init loopback_init(void)
 {
        struct net_device_stats *stats;
+       int res;
 
        /* Can survive without statistics */
-       stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+       stats = kzalloc(sizeof(struct net_device_stats), GFP_KERNEL);
        if (stats) {
-               memset(stats, 0, sizeof(struct net_device_stats));
                loopback_dev.priv = stats;
                loopback_dev.get_stats = &get_stats;
        }
        
-       return register_netdev(&loopback_dev);
+       res = register_netdev(&loopback_dev);
+       SET_MODULE_OWNER(&loopback_dev);
+       return res;
 };
 
 EXPORT_SYMBOL(loopback_dev);

Reply via email to