Struct net_device's atomic refcnt are probably one of the hotest memory spots
in a SMP/NUMA network router or network server.
This counter is constantly incremented/decremented each time a network packet
is handled, or a IP route is added/deleted in route cache. This is *not* SMP
nor NUMA friendly (because of the locked op that are expensive and memory ping
pongs between cpus)
But as a matter of fact, the counter is *never* read: It's only read when the
device must be unregistered.
Some devices are *never* unregistered : loopback, or statically linked
drivers, thus we are refcounting them for nothing.
This patch try to avoid atomic ops on SMP for the cases were the device wont
be unregistered.
A 'int static_dev' integer is added next to 'atomic_t refcnt', and may be set
to one by drivers that are statically linked.
I changed SET_MODULE_OWNER(dev) macro to avoid changing all network drivers,
but I'm open to other suggestions.
All drivers that are currently using this macro automatically benefit from
this SMP optimization : It's better to perform a test/conditional branch (even
if badly predicted) than an atomic_{inc|dec}()
Signed-off-by: Eric Dumazet <[EMAIL PROTECTED]>
--- a/include/linux/netdevice.h 2006-02-07 11:55:42.000000000 +0100
+++ b/include/linux/netdevice.h 2006-02-07 13:06:14.000000000 +0100
@@ -417,10 +417,14 @@
struct timer_list watchdog_timer;
/*
- * refcnt is a very hot point, so align it on SMP
+ * {static_dev,refcnt} is a very hot point, so align it on SMP
*/
/* Number of references to this device */
- atomic_t refcnt ____cacheline_aligned_in_smp;
+#ifdef CONFIG_SMP
+ /* SMP optimization : if dev is static, no need to modify refcnt */
+ int static_dev ____cacheline_aligned_in_smp;
+#endif
+ atomic_t refcnt;
/* delayed register/unregister */
struct list_head todo_list;
@@ -514,7 +518,29 @@
& ~NETDEV_ALIGN_CONST);
}
-#define SET_MODULE_OWNER(dev) do { } while (0)
+static inline int netif_static(const struct net_device *dev)
+{
+#if defined(CONFIG_SMP)
+ return dev->static_dev;
+#else
+ return 0;
+#endif
+}
+
+static inline void netif_setstatic(struct net_device *dev, int v)
+{
+#if defined(CONFIG_SMP)
+#if defined(MODULE)
+ v = 0;
+#endif
+ dev->static_dev = v;
+#endif
+}
+/*
+ * If a driver is a not a module, dev can be marked as static
+ */
+#define SET_MODULE_OWNER(dev) do { netif_setstatic(dev, 1); } while (0)
+
/* Set the sysfs physical device reference for the network logical device
* if set prior to registration will cause a symlink during initialization.
*/
@@ -705,11 +731,12 @@
static inline void dev_put(struct net_device *dev)
{
- atomic_dec(&dev->refcnt);
+ if (!netif_static(dev))
+ atomic_dec(&dev->refcnt);
}
-#define __dev_put(dev) atomic_dec(&(dev)->refcnt)
-#define dev_hold(dev) atomic_inc(&(dev)->refcnt)
+#define __dev_put(dev) if (!netif_static(dev)) atomic_dec(&(dev)->refcnt)
+#define dev_hold(dev) if (!netif_static(dev)) atomic_inc(&(dev)->refcnt)
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
* and _off may be called from IRQ context, but it is caller
--- a/net/core/dev.c 2006-02-07 11:59:53.000000000 +0100
+++ b/net/core/dev.c 2006-02-07 12:52:27.000000000 +0100
@@ -2990,12 +2990,11 @@
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
- p = kmalloc(alloc_size, GFP_KERNEL);
+ p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
return NULL;
}
- memset(p, 0, alloc_size);
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
@@ -3248,6 +3247,7 @@
queue->backlog_dev.weight = weight_p;
queue->backlog_dev.poll = process_backlog;
atomic_set(&queue->backlog_dev.refcnt, 1);
+ netif_setstatic(&queue->backlog_dev, 1);
}
dev_boot_phase = 0;
--- a/drivers/net/loopback.c 2006-02-07 12:10:55.000000000 +0100
+++ b/drivers/net/loopback.c 2006-02-07 12:37:49.000000000 +0100
@@ -224,16 +224,18 @@
int __init loopback_init(void)
{
struct net_device_stats *stats;
+ int res;
/* Can survive without statistics */
- stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+ stats = kzalloc(sizeof(struct net_device_stats), GFP_KERNEL);
if (stats) {
- memset(stats, 0, sizeof(struct net_device_stats));
loopback_dev.priv = stats;
loopback_dev.get_stats = &get_stats;
}
- return register_netdev(&loopback_dev);
+ res = register_netdev(&loopback_dev);
+ SET_MODULE_OWNER(&loopback_dev);
+ return res;
};
EXPORT_SYMBOL(loopback_dev);