Folks,
I am not a big readability officianado, but this piece of code has
become a victim of hairy activities over the last few years. So while i
was furiously chasing Herbert's qdisc_is_running changes[1] i made a
small cleanup just so that i could absorb what was going on.
The patch included is a small side effect of that effort (theres a lot
of WIP as well which is not part of this patch that may never see the
light of day).
Actually the patch itself may be slightly unreadable, so i have attached
how qdisc_restart looks after the patch.
I am not yet asking for inclusion, but if people are fine with it i will
run some performance tests to make sure we at least get the same numbers
as before and submit for 2.6.19. I have tested the code with a lot of
other stuff but not its the version i am attaching. It does compile
however ;->
Also if i am missing some piece of the translation please comment.
Alexey, I know you havent looked at this creation of yours in years and
a few changes have happened since, so please if you have time stare and
comment.
cheers,
jamal
[1] I wanted to check the qdisc is running change before 2.6.18 came
out. I have to say I have failed to find any issues with it. There are
some theoretical issues but i cant practically create them. Maybe we can
chat over a drink.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 75f02d8..49278c8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -75,8 +75,11 @@ #define MAX_ADDR_LEN 32 /* Largest hard
/* Driver transmit return codes */
#define NETDEV_TX_OK 0 /* driver took care of packet */
-#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/
-#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */
+#define NETDEV_TX_BUSY -1 /* driver tx path was busy*/
+#define NETDEV_TX_LOCKED -2 /* driver tx lock was already taken */
+#define NETDEV_TX_DROP -3 /* request caller to drop packet */
+#define NETDEV_TX_QUEUE -4 /* request caller to requeue packet */
+
/*
* Compute the worst case header length according to the protocols
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0834c2e..0e86927 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -67,7 +67,64 @@ void qdisc_unlock_tree(struct net_device
write_unlock_bh(&qdisc_tree_lock);
}
+static inline int
+handle_dev_cpu_collision(struct net_device *dev)
+{
+ if (dev->xmit_lock_owner == smp_processor_id()) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+ return NETDEV_TX_DROP;
+ }
+ __get_cpu_var(netdev_rx_stat).cpu_collision++;
+ return NETDEV_TX_QUEUE;
+}
+
+static inline int
+handle_dev_requeue(struct sk_buff *skb, struct net_device *dev,
+ struct Qdisc *q)
+{
+ if (skb->next)
+ dev->gso_skb = skb;
+ else
+ q->ops->requeue(skb, q);
+ netif_schedule(dev);
+ return 1;
+}
+
+static inline int
+try_get_tx_pkt(struct sk_buff **skb, struct net_device *dev,
+ struct Qdisc *q)
+{
+ if (((*skb = dev->gso_skb)) || ((*skb = q->dequeue(q)))) {
+
+ dev->gso_skb = NULL;
+ return -1;
+ }
+
+ BUG_ON((int) q->q.qlen < 0);
+ return q->q.qlen;
+}
+
+static inline int
+handle_tx_locked(struct sk_buff *skb, struct net_device *dev,
+ struct Qdisc *q)
+{
+ int ret = handle_dev_cpu_collision(dev);
+
+ if (ret == NETDEV_TX_DROP) {
+ kfree_skb(skb);
+ return -1;
+ }
+
+ return handle_dev_requeue(skb,dev,q);
+}
+
/*
+ NOTE: Called under dev->queue_lock with locally disabled BH.
+
+ __LINK_STATE_QDISC_RUNNING guarantees only one CPU
+ can enter this region at a time.
+
dev->queue_lock serializes queue accesses for this device
AND dev->qdisc pointer itself.
@@ -75,111 +132,63 @@ void qdisc_unlock_tree(struct net_device
dev->queue_lock and netif_tx_lock are mutually exclusive,
if one is grabbed, another must be free.
- */
+ Multiple CPUs may contend for the two locks.
-/* Kick device.
Note, that this procedure can be called by a watchdog timer, so that
we do not check dev->tbusy flag here.
- Returns: 0 - queue is empty.
- >0 - queue is not empty, but throttled.
- <0 - queue is not empty. Device is throttled, if dev->tbusy != 0.
-
- NOTE: Called under dev->queue_lock with locally disabled BH.
+ Returns to the caller:
+ 0 - queue is empty.
+ >0 - queue is not empty: tx lock access or queue throttle
+ <0 - Theres room to receive more if !netif_queue_stopped.
+ (It is upon the caller to check for netif_queue_stopped
+ before invoking this routine)
*/
-static inline int qdisc_restart(struct net_device *dev)
+int
+qdisc_restart(struct net_device *dev)
{
struct Qdisc *q = dev->qdisc;
- struct sk_buff *skb;
-
- /* Dequeue packet */
- if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
- unsigned nolock = (dev->features & NETIF_F_LLTX);
-
- dev->gso_skb = NULL;
-
- /*
- * When the driver has LLTX set it does its own locking
- * in start_xmit. No need to add additional overhead by
- * locking again. These checks are worth it because
- * even uncongested locks can be quite expensive.
- * The driver can do trylock like here too, in case
- * of lock congestion it should return -1 and the packet
- * will be requeued.
- */
- if (!nolock) {
- if (!netif_tx_trylock(dev)) {
- collision:
- /* So, someone grabbed the driver. */
-
- /* It may be transient configuration error,
- when hard_start_xmit() recurses. We detect
- it by checking xmit owner and drop the
- packet when deadloop is detected.
- */
- if (dev->xmit_lock_owner == smp_processor_id()) {
- kfree_skb(skb);
- if (net_ratelimit())
- printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
- return -1;
- }
- __get_cpu_var(netdev_rx_stat).cpu_collision++;
- goto requeue;
- }
- }
+ unsigned lockless = (dev->features & NETIF_F_LLTX);
+ struct sk_buff *skb = NULL;
+ int ret;
+
+ ret = try_get_tx_pkt(&skb, dev, q);
+ if (ret >= 0)
+ return ret;
+
+ /* we have a packet to send */
+ if (!lockless) {
+ if (!netif_tx_trylock(dev))
+ return handle_tx_locked(skb,dev,q);
+ }
- {
- /* And release queue */
- spin_unlock(&dev->queue_lock);
-
- if (!netif_queue_stopped(dev)) {
- int ret;
-
- ret = dev_hard_start_xmit(skb, dev);
- if (ret == NETDEV_TX_OK) {
- if (!nolock) {
- netif_tx_unlock(dev);
- }
- spin_lock(&dev->queue_lock);
- return -1;
- }
- if (ret == NETDEV_TX_LOCKED && nolock) {
- spin_lock(&dev->queue_lock);
- goto collision;
- }
- }
+ /* all clear .. */
+ spin_unlock(&dev->queue_lock);
- /* NETDEV_TX_BUSY - we need to requeue */
- /* Release the driver */
- if (!nolock) {
- netif_tx_unlock(dev);
- }
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- }
+ /* churn baby churn .. */
+ ret = dev_hard_start_xmit(skb, dev);
- /* Device kicked us out :(
- This is possible in three cases:
+ if (!lockless)
+ netif_tx_unlock(dev);
- 0. driver is locked
- 1. fastroute is enabled
- 2. device cannot determine busy state
- before start of transmission (f.e. dialout)
- 3. device is buggy (ppp)
- */
+ spin_lock(&dev->queue_lock);
-requeue:
- if (skb->next)
- dev->gso_skb = skb;
- else
- q->ops->requeue(skb, q);
- netif_schedule(dev);
- return 1;
+ /* most likely result, packet went ok */
+ if (ret == NETDEV_TX_OK)
+ return -1;
+ /* only for lockless drivers .. */
+ if (ret == NETDEV_TX_LOCKED && lockless) {
+ return handle_tx_locked(skb,dev,q);
}
- BUG_ON((int) q->q.qlen < 0);
- return q->q.qlen;
+
+ if (unlikely (ret != NETDEV_TX_BUSY)) {
+ /* XXX: Do we need a ratelimit? */
+ printk("BUG %s code %d qlen %d\n",dev->name,ret,q->q.qlen);
+ }
+
+ return handle_dev_requeue(skb,dev,q);
}
void __qdisc_run(struct net_device *dev)
/*
NOTE: Called under dev->queue_lock with locally disabled BH.
__LINK_STATE_QDISC_RUNNING guarantees only one CPU
can enter this region at a time.
dev->queue_lock serializes queue accesses for this device
AND dev->qdisc pointer itself.
netif_tx_lock serializes accesses to device driver.
dev->queue_lock and netif_tx_lock are mutually exclusive,
if one is grabbed, another must be free.
Multiple CPUs may contend for the two locks.
Note, that this procedure can be called by a watchdog timer, so that
we do not check dev->tbusy flag here.
Returns to the caller:
0 - queue is empty.
>0 - queue is not empty: tx lock access or queue throttle
<0 - Theres room to receive more if !netif_queue_stopped.
(It is upon the caller to check for netif_queue_stopped
before invoking this routine)
*/
int
qdisc_restart(struct net_device *dev)
{
struct Qdisc *q = dev->qdisc;
unsigned lockless = (dev->features & NETIF_F_LLTX);
struct sk_buff *skb = NULL;
int ret;
ret = try_get_tx_pkt(&skb, dev, q);
if (ret >= 0)
return ret;
/* we have a packet to send */
if (!lockless) {
if (!netif_tx_trylock(dev))
return handle_tx_locked(skb,dev,q);
}
/* all clear .. */
spin_unlock(&dev->queue_lock);
/* churn baby churn .. */
ret = dev_hard_start_xmit(skb, dev);
if (!lockless)
netif_tx_unlock(dev);
spin_lock(&dev->queue_lock);
/* most likely result, packet went ok */
if (ret == NETDEV_TX_OK)
return -1;
/* only for lockless drivers .. */
if (ret == NETDEV_TX_LOCKED && lockless) {
return handle_tx_locked(skb,dev,q);
}
if (unlikely (ret != NETDEV_TX_BUSY)) {
/* XXX: Do we need a ratelimit? */
printk("BUG %s code %d qlen %d\n",dev->name,ret,q->q.qlen);
}
return handle_dev_requeue(skb,dev,q);
}