IPoIB: implement the new batching API.

Signed-off-by: Krishna Kumar <[EMAIL PROTECTED]>
---
 ipoib_main.c |  251 ++++++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 171 insertions(+), 80 deletions(-)

diff -ruNp org/drivers/infiniband/ulp/ipoib/ipoib_main.c 
new/drivers/infiniband/ulp/ipoib/ipoib_main.c
--- org/drivers/infiniband/ulp/ipoib/ipoib_main.c       2007-08-20 
14:26:26.000000000 +0530
+++ new/drivers/infiniband/ulp/ipoib/ipoib_main.c       2007-08-22 
08:33:51.000000000 +0530
@@ -560,7 +560,8 @@ static void neigh_add_path(struct sk_buf
                                goto err_drop;
                        }
                } else
-                       ipoib_send(dev, skb, path->ah, 
IPOIB_QPN(skb->dst->neighbour->ha));
+                       ipoib_send(dev, skb, path->ah,
+                                  IPOIB_QPN(skb->dst->neighbour->ha), 1);
        } else {
                neigh->ah  = NULL;
 
@@ -640,7 +641,7 @@ static void unicast_arp_send(struct sk_b
                ipoib_dbg(priv, "Send unicast ARP to %04x\n",
                          be16_to_cpu(path->pathrec.dlid));
 
-               ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+               ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr), 1);
        } else if ((path->query || !path_rec_start(dev, path)) &&
                   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
                /* put pseudoheader back on for next time */
@@ -654,105 +655,166 @@ static void unicast_arp_send(struct sk_b
        spin_unlock(&priv->lock);
 }
 
+#define        XMIT_PROCESSED_SKBS()                                           
\
+       do {                                                            \
+               if (wr_num) {                                           \
+                       ipoib_send(dev, NULL, old_neigh->ah, old_qpn,   \
+                                  wr_num);                             \
+                       wr_num = 0;                                     \
+               }                                                       \
+       } while (0)
+
 static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ipoib_neigh *neigh;
+       struct sk_buff_head *blist;
+       int max_skbs, wr_num = 0;
+       u32 qpn, old_qpn = 0;
+       struct ipoib_neigh *neigh, *old_neigh = NULL;
        unsigned long flags;
 
        if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
                return NETDEV_TX_LOCKED;
 
-       /*
-        * Check if our queue is stopped.  Since we have the LLTX bit
-        * set, we can't rely on netif_stop_queue() preventing our
-        * xmit function from being called with a full queue.
-        */
-       if (unlikely(netif_queue_stopped(dev))) {
-               spin_unlock_irqrestore(&priv->tx_lock, flags);
-               return NETDEV_TX_BUSY;
-       }
-
-       if (likely(skb->dst && skb->dst->neighbour)) {
-               if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
-                       ipoib_path_lookup(skb, dev);
-                       goto out;
-               }
+       blist = dev->skb_blist;
 
-               neigh = *to_ipoib_neigh(skb->dst->neighbour);
+       if (!skb || (blist && skb_queue_len(blist))) {
+               /*
+                * Either batching xmit call, or single skb case but there are
+                * skbs already in the batch list from previous failure to
+                * xmit - send the earlier skbs first to avoid out of order.
+                */
+
+               if (skb)
+                       __skb_queue_tail(blist, skb);
+
+               /*
+                * Figure out how many skbs can be sent. This prevents the
+                * device getting full and avoids checking for stopped queue
+                * after each iteration. Now the queue can get stopped atmost
+                * after xmit of the last skb.
+                */
+               max_skbs = ipoib_sendq_size - (priv->tx_head - priv->tx_tail);
+               skb = __skb_dequeue(blist);
+       } else {
+               blist = NULL;
+               max_skbs = 1;
+       }
 
-               if (ipoib_cm_get(neigh)) {
-                       if (ipoib_cm_up(neigh)) {
-                               ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
-                               goto out;
-                       }
-               } else if (neigh->ah) {
-                       if (unlikely(memcmp(&neigh->dgid.raw,
-                                           skb->dst->neighbour->ha + 4,
-                                           sizeof(union ib_gid)))) {
-                               spin_lock(&priv->lock);
-                               /*
-                                * It's safe to call ipoib_put_ah() inside
-                                * priv->lock here, because we know that
-                                * path->ah will always hold one more reference,
-                                * so ipoib_put_ah() will never do more than
-                                * decrement the ref count.
-                                */
-                               ipoib_put_ah(neigh->ah);
-                               list_del(&neigh->list);
-                               ipoib_neigh_free(dev, neigh);
-                               spin_unlock(&priv->lock);
+       do {
+               if (likely(skb->dst && skb->dst->neighbour)) {
+                       if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
+                               XMIT_PROCESSED_SKBS();
                                ipoib_path_lookup(skb, dev);
-                               goto out;
+                               continue;
                        }
 
-                       ipoib_send(dev, skb, neigh->ah, 
IPOIB_QPN(skb->dst->neighbour->ha));
-                       goto out;
-               }
-
-               if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
-                       spin_lock(&priv->lock);
-                       __skb_queue_tail(&neigh->queue, skb);
-                       spin_unlock(&priv->lock);
-               } else {
-                       ++priv->stats.tx_dropped;
-                       dev_kfree_skb_any(skb);
-               }
-       } else {
-               struct ipoib_pseudoheader *phdr =
-                       (struct ipoib_pseudoheader *) skb->data;
-               skb_pull(skb, sizeof *phdr);
+                       neigh = *to_ipoib_neigh(skb->dst->neighbour);
 
-               if (phdr->hwaddr[4] == 0xff) {
-                       /* Add in the P_Key for multicast*/
-                       phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
-                       phdr->hwaddr[9] = priv->pkey & 0xff;
+                       if (ipoib_cm_get(neigh)) {
+                               if (ipoib_cm_up(neigh)) {
+                                       XMIT_PROCESSED_SKBS();
+                                       ipoib_cm_send(dev, skb,
+                                                     ipoib_cm_get(neigh));
+                                       continue;
+                               }
+                       } else if (neigh->ah) {
+                               if (unlikely(memcmp(&neigh->dgid.raw,
+                                                   skb->dst->neighbour->ha + 4,
+                                                   sizeof(union ib_gid)))) {
+                                       spin_lock(&priv->lock);
+                                       /*
+                                        * It's safe to call ipoib_put_ah()
+                                        * inside priv->lock here, because we
+                                        * know that path->ah will always hold
+                                        * one more reference, so ipoib_put_ah()
+                                        * will never do more than decrement
+                                        * the ref count.
+                                        */
+                                       ipoib_put_ah(neigh->ah);
+                                       list_del(&neigh->list);
+                                       ipoib_neigh_free(dev, neigh);
+                                       spin_unlock(&priv->lock);
+                                       XMIT_PROCESSED_SKBS();
+                                       ipoib_path_lookup(skb, dev);
+                                       continue;
+                               }
+
+                               qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+                               if (neigh != old_neigh || qpn != old_qpn) {
+                                       /*
+                                        * Sending to a different destination
+                                        * from earlier skb's (or this is the
+                                        * first skb) - send all existing skbs.
+                                        */
+                                       XMIT_PROCESSED_SKBS();
+                                       old_neigh = neigh;
+                                       old_qpn = qpn;
+                               }
+
+                               if (likely(!ipoib_process_skb(dev, skb, priv,
+                                                             neigh->ah, qpn,
+                                                             wr_num)))
+                                       wr_num++;
 
-                       ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
-               } else {
-                       /* unicast GID -- should be ARP or RARP reply */
+                               continue;
+                       }
 
-                       if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
-                           (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) 
{
-                               ipoib_warn(priv, "Unicast, no %s: type %04x, 
QPN %06x "
-                                          IPOIB_GID_FMT "\n",
-                                          skb->dst ? "neigh" : "dst",
-                                          be16_to_cpup((__be16 *) skb->data),
-                                          IPOIB_QPN(phdr->hwaddr),
-                                          IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
+                       if (skb_queue_len(&neigh->queue) <
+                           IPOIB_MAX_PATH_REC_QUEUE) {
+                               spin_lock(&priv->lock);
+                               __skb_queue_tail(&neigh->queue, skb);
+                               spin_unlock(&priv->lock);
+                       } else {
                                dev_kfree_skb_any(skb);
                                ++priv->stats.tx_dropped;
-                               goto out;
+                               ++max_skbs;
+                       }
+               } else {
+                       struct ipoib_pseudoheader *phdr =
+                               (struct ipoib_pseudoheader *) skb->data;
+                       skb_pull(skb, sizeof *phdr);
+
+                       if (phdr->hwaddr[4] == 0xff) {
+                               /* Add in the P_Key for multicast*/
+                               phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+                               phdr->hwaddr[9] = priv->pkey & 0xff;
+
+                               XMIT_PROCESSED_SKBS();
+                               ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
+                       } else {
+                               /* unicast GID -- should be ARP or RARP reply */
+
+                               if ((be16_to_cpup((__be16 *) skb->data) !=
+                                   ETH_P_ARP) &&
+                                   (be16_to_cpup((__be16 *) skb->data) !=
+                                   ETH_P_RARP)) {
+                                       ipoib_warn(priv, "Unicast, no %s: type 
%04x, QPN %06x "
+                                               IPOIB_GID_FMT "\n",
+                                               skb->dst ? "neigh" : "dst",
+                                               be16_to_cpup((__be16 *)
+                                               skb->data),
+                                               IPOIB_QPN(phdr->hwaddr),
+                                               IPOIB_GID_RAW_ARG(phdr->hwaddr
+                                                                 + 4));
+                                       dev_kfree_skb_any(skb);
+                                       ++priv->stats.tx_dropped;
+                                       ++max_skbs;
+                                       continue;
+                               }
+                               XMIT_PROCESSED_SKBS();
+                               unicast_arp_send(skb, dev, phdr);
                        }
-
-                       unicast_arp_send(skb, dev, phdr);
                }
-       }
+       } while (--max_skbs && (skb = __skb_dequeue(blist)) != NULL);
+
+       /* Send out last packets (if any) */
+       XMIT_PROCESSED_SKBS();
 
-out:
        spin_unlock_irqrestore(&priv->tx_lock, flags);
 
-       return NETDEV_TX_OK;
+       return (!blist || !skb_queue_len(blist)) ? NETDEV_TX_OK :
+                                                  NETDEV_TX_BUSY;
 }
 
 static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
@@ -900,11 +962,35 @@ int ipoib_dev_init(struct net_device *de
 
        /* priv->tx_head & tx_tail are already 0 */
 
-       if (ipoib_ib_dev_init(dev, ca, port))
+       /* Allocate tx_sge */
+       priv->tx_sge = kmalloc(ipoib_sendq_size * sizeof *priv->tx_sge,
+                              GFP_KERNEL);
+       if (!priv->tx_sge) {
+               printk(KERN_WARNING "%s: failed to allocate TX sge (%d 
entries)\n",
+                      ca->name, ipoib_sendq_size);
                goto out_tx_ring_cleanup;
+       }
+
+       /* Allocate tx_wr */
+       priv->tx_wr = kmalloc(ipoib_sendq_size * sizeof *priv->tx_wr,
+                             GFP_KERNEL);
+       if (!priv->tx_wr) {
+               printk(KERN_WARNING "%s: failed to allocate TX wr (%d 
entries)\n",
+                      ca->name, ipoib_sendq_size);
+               goto out_tx_sge_cleanup;
+       }
+
+       if (ipoib_ib_dev_init(dev, ca, port))
+               goto out_tx_wr_cleanup;
 
        return 0;
 
+out_tx_wr_cleanup:
+       kfree(priv->tx_wr);
+
+out_tx_sge_cleanup:
+       kfree(priv->tx_sge);
+
 out_tx_ring_cleanup:
        kfree(priv->tx_ring);
 
@@ -932,9 +1018,13 @@ void ipoib_dev_cleanup(struct net_device
 
        kfree(priv->rx_ring);
        kfree(priv->tx_ring);
+       kfree(priv->tx_sge);
+       kfree(priv->tx_wr);
 
        priv->rx_ring = NULL;
        priv->tx_ring = NULL;
+       priv->tx_sge = NULL;
+       priv->tx_wr = NULL;
 }
 
 static void ipoib_setup(struct net_device *dev)
@@ -965,7 +1055,8 @@ static void ipoib_setup(struct net_devic
        dev->addr_len            = INFINIBAND_ALEN;
        dev->type                = ARPHRD_INFINIBAND;
        dev->tx_queue_len        = ipoib_sendq_size * 2;
-       dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
+       dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX |
+                                  NETIF_F_BATCH_SKBS;
 
        /* MTU will be reset when mcast join happens */
        dev->mtu                 = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to