On Wed, Jun 07, 2006 at 02:44:54PM -0400, Jeff Moyer wrote:
> ==> Regarding Re: [PATCH 1/2] e1000: fix netpoll with NAPI; Auke Kok <[EMAIL
> PROTECTED]> adds:
>
> auke-jan.h.kok> Hi,
>
> auke-jan.h.kok> we're not too happy with this as it puts a branch right in
> auke-jan.h.kok> the regular receive path. We haven't ran the numbers on it
> auke-jan.h.kok> yet but it is likely that this will lower performance
> auke-jan.h.kok> significantly during normal receives for something that is
> auke-jan.h.kok> not common use.
>
> auke-jan.h.kok> Attached below a (revised) patch that adds proper locking
> auke-jan.h.kok> around the rx_clean to prevent the race.
>
> That patch locks around the tx clean routine. As such, it doesn't prevent
> the problem.
>
Further to that, do tests on this if you like, but I would certainly think a
properly formed conditional operation is going to provide better performance
than a spin_lock operation in the receive path. Why not just turn the:
if(netpoll_op)
into an
if(unlikely(netpoll_op))
I expect that will reduce the overhead of the conditional to effectively zero
for the normal receive case. The following patch does that, and I expect you
performance won't suffer at all:
Signed-off-by: Neil Horman <[EMAIL PROTECTED]>
e1000_main.c | 54 +++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 45 insertions(+), 9 deletions(-)
--- linux-2.6.9/drivers/net/e1000/e1000_main.c.neil 2006-06-06
10:37:42.000000000 -0400
+++ linux-2.6.9/drivers/net/e1000/e1000_main.c 2006-06-07 10:48:22.000000000
-0400
@@ -3207,8 +3207,9 @@ e1000_update_stats(struct e1000_adapter
* @pt_regs: CPU registers structure
**/
+
static irqreturn_t
-e1000_intr(int irq, void *data, struct pt_regs *regs)
+__e1000_intr(int irq, void *data, struct pt_regs *regs, int netpoll_op)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
@@ -3217,6 +3218,7 @@ e1000_intr(int irq, void *data, struct p
#ifndef CONFIG_E1000_NAPI
int i;
#else
+ struct net_device *dev_to_sched;
/* Interrupt Auto-Mask...upon reading ICR,
* interrupts are masked. No need for the
* IMC write, but it does mean we should
@@ -3255,8 +3257,22 @@ e1000_intr(int irq, void *data, struct p
E1000_WRITE_REG(hw, IMC, ~0);
E1000_WRITE_FLUSH(hw);
}
- if (likely(netif_rx_schedule_prep(&adapter->polling_netdev[0])))
- __netif_rx_schedule(&adapter->polling_netdev[0]);
+
+ /*
+ * netpoll operations, in the interests of efficiency
+ * only do napi polling on the device passed to the
+ * poll_controller. Therefore, if we are preforming
+ * a netpoll operation, then we can't schedule a receive
+ * to one of the dummy net devices that exist for sole
+ * purpose of spreading out rx schedules
+ */
+ if (unlikely(netpoll_op))
+ dev_to_sched = netdev;
+ else
+ dev_to_sched = &adapter->polling_netdev[0];
+
+ if (likely(netif_rx_schedule_prep(dev_to_sched)))
+ __netif_rx_schedule(dev_to_sched);
else
e1000_irq_enable(adapter);
#else
@@ -3288,6 +3304,13 @@ e1000_intr(int irq, void *data, struct p
return IRQ_HANDLED;
}
+static irqreturn_t
+e1000_intr(int irq, void *data, struct pt_regs *regs)
+{
+ return __e1000_intr(irq, data, regs, 0);
+}
+
+
#ifdef CONFIG_E1000_NAPI
/**
* e1000_clean - NAPI Rx polling callback
@@ -3300,7 +3323,6 @@ e1000_clean(struct net_device *poll_dev,
struct e1000_adapter *adapter;
int work_to_do = min(*budget, poll_dev->quota);
int tx_cleaned = 0, i = 0, work_done = 0;
-
/* Must NOT use netdev_priv macro here. */
adapter = poll_dev->priv;
@@ -3308,10 +3330,24 @@ e1000_clean(struct net_device *poll_dev,
if (!netif_carrier_ok(adapter->netdev))
goto quit_polling;
- while (poll_dev != &adapter->polling_netdev[i]) {
- i++;
- if (unlikely(i == adapter->num_rx_queues))
- BUG();
+ /*
+ * only search for a matching polling_netdev in the event
+ * that this isn't a real registered net_device
+ * A real net device can be passed in here in the event
+ * that netdump has been activated (this comes through
+ * netpoll_poll_dev). We detect this by virtue of the
+ * fact that each polling_netdev->priv points to the private
+ * data of its parent (registered) netdev. So if:
+ * poll_dev->priv == netdev_priv(poll_dev), its a real device
+ * otherwise its a polling_netdev.
+ */
+ if (likely(adapter != netdev_priv(poll_dev))) {
+ while (poll_dev != &adapter->polling_netdev[i]) {
+ i++;
+ if (unlikely(i == adapter->num_rx_queues))
+ BUG();
+ }
+
}
if (likely(adapter->num_tx_queues == 1)) {
@@ -4624,7 +4660,7 @@ e1000_netpoll(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
disable_irq(adapter->pdev->irq);
- e1000_intr(adapter->pdev->irq, netdev, NULL);
+ __e1000_intr(adapter->pdev->irq, netdev, NULL, 1);
e1000_clean_tx_irq(adapter, adapter->tx_ring);
#ifndef CONFIG_E1000_NAPI
adapter->clean_rx(adapter, adapter->rx_ring);
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html