Adding VIOC device driver. Packet transmit code.

Signed-off-by: Misha Tomushev  <[EMAIL PROTECTED]>

diff -uprN linux-2.6.17/drivers/net/vioc/vioc_transmit.c
linux-2.6.17.vioc/drivers/net/vioc/vioc_transmit.c
--- linux-2.6.17/drivers/net/vioc/vioc_transmit.c       1969-12-31
16:00:00.000000000 -0800
+++ linux-2.6.17.vioc/drivers/net/vioc/vioc_transmit.c  2006-10-04
10:51:49.000000000 -0700
@@ -0,0 +1,1032 @@
+/*
+ * Fabric7 Systems Virtual IO Controller Driver
+ * Copyright (C) 2003-2005 Fabric7 Systems.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * http://www.fabric7.com/
+ *
+ * Maintainers:
+ *    [EMAIL PROTECTED]
+ *
+ *
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/if_vlan.h>
+#include <linux/timex.h>
+#include <linux/ethtool.h>
+
+#include <net/dst.h>
+#include <net/arp.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/byteorder.h>
+#include <asm/msr.h>
+
+#include "f7/vnic_defs.h"
+#include "f7/vioc_pkts_defs.h"
+
+#include "vioc_vnic.h"
+#include "vioc_api.h"
+
+#define VNIC_MIN_MTU   64
+#define TXQ0            0
+#define NOT_SET        -1
+
+static inline u32 vnic_rd_txd_ctl(struct txq *txq)
+{
+       return readl(txq->va_of_vreg_veng_txd_ctl);
+}
+
+static inline void vnic_ring_tx_bell(struct txq *txq)
+{
+       writel(txq->shadow_VREG_VENG_TXD_CTL | VREG_VENG_TXD_CTL_QRING_MASK,
+              txq->va_of_vreg_veng_txd_ctl);
+       txq->bells++;
+}
+
+static inline void vnic_reset_tx_ring_err(struct txq *txq)
+{
+       writel(txq->shadow_VREG_VENG_TXD_CTL |
+              (VREG_VENG_TXD_CTL_QENABLE_MASK | VREG_VENG_TXD_CTL_CLEARMASK),
+              txq->va_of_vreg_veng_txd_ctl);
+}
+
+static inline void vnic_enable_tx_ring(struct txq *txq)
+{
+       txq->shadow_VREG_VENG_TXD_CTL = VREG_VENG_TXD_CTL_QENABLE_MASK;
+       writel(txq->shadow_VREG_VENG_TXD_CTL, txq->va_of_vreg_veng_txd_ctl);
+}
+
+static inline void vnic_disable_tx_ring(struct txq *txq)
+{
+       txq->shadow_VREG_VENG_TXD_CTL = 0;
+       writel(0, txq->va_of_vreg_veng_txd_ctl);
+}
+
+static inline void vnic_pause_tx_ring(struct txq *txq)
+{
+       txq->shadow_VREG_VENG_TXD_CTL |= VREG_VENG_TXD_CTL_QPAUSE_MASK;
+       writel(txq->shadow_VREG_VENG_TXD_CTL, txq->va_of_vreg_veng_txd_ctl);
+}
+
+static inline void vnic_resume_tx_ring(struct txq *txq)
+{
+       txq->shadow_VREG_VENG_TXD_CTL &= ~VREG_VENG_TXD_CTL_QPAUSE_MASK;
+       writel(txq->shadow_VREG_VENG_TXD_CTL, txq->va_of_vreg_veng_txd_ctl);
+}
+
+
+/* TxQ must be locked */
+static void vnic_reset_txq(struct vnic_device *vnicdev, struct txq *txq)
+{
+
+       struct tx_pktBufDesc_Phys_w *txd;
+       int i;
+
+       vnic_reset_tx_ring_err(txq);
+
+       /* The reset of the code is not executing
+        * because so far we can't reset individual VNICs.
+        * Need to (SW) Reset the whole VIOC.
+        */
+
+       vnic_disable_tx_ring(txq);
+       wmb();
+       /*
+        * Clean-up all Tx Descriptors, take ownership of all
+        * descriptors
+        */
+       for (i = 0; i < txq->count; i++) {
+               if (txq->desc) {
+                       txd = TXD_PTR(txq, i);
+                       txd->word_1 = 0;
+                       txd->word_0 = 0;
+               }
+               if (txq->vbuf) {
+                       if (txq->vbuf[i].dma) {
+                               pci_unmap_page(vnicdev->viocdev->pdev,
+                                              txq->vbuf[i].dma,
+                                              txq->vbuf[i].length,
+                                              PCI_DMA_TODEVICE);
+                               txq->vbuf[i].dma = 0;
+                       }
+
+                       /* Free skb , should be for SOP (in case of frags) only 
 */
+                       if (txq->vbuf[i].skb) {
+                               dev_kfree_skb_any((struct sk_buff *)txq->
+                                                 vbuf[i].skb);
+                               txq->vbuf[i].skb = NULL;
+                       }
+               }
+       }
+       txq->next_to_clean = 0;
+       txq->next_to_use = 0;
+       txq->empty = txq->count;
+       wmb();
+       vnic_enable_tx_ring(txq);
+}
+
+/* TxQ must be locked */
+static int vnic_clean_txq(struct vnic_device *vnicdev, struct txq *txq)
+{
+       struct tx_pktBufDesc_Phys_w *txd;
+       int clean_idx, pkt_len;
+       int sop_idx = NOT_SET;
+       int eop_idx = NOT_SET;
+       int reset_flag = 0;
+
+       if (unlikely(!txq->desc))
+               return reset_flag;
+
+       /*
+        * Clean-up all Tx Descriptors, whose buffers where
+        * transmitted by VIOC:
+        * bit 30 (Valid) indicates if bits 27-29 (Status) have been set
+        * by the VIOC HW, stating that descrptor was processed by HW.
+        */
+       for (clean_idx = txq->next_to_clean;;
+            clean_idx = VNIC_NEXT_IDX(clean_idx, txq->count)) {
+
+               txd = TXD_PTR(txq, clean_idx);
+
+               if (GET_VNIC_TX_HANDED(txd) != VNIC_TX_HANDED_HW_W)
+                       /* This descriptor has NOT been handed to HW, done! */
+                       break;
+
+               if (GET_VNIC_TX_SOP(txd) == VNIC_TX_SOP_W) {
+                       if (sop_idx != NOT_SET) {
+                               /* Problem - SOP back-to-back without EOP */
+                               dev_err(&vnicdev->viocdev->pdev->dev,
+                                      "vioc%d-vnic%d-txd%d ERROR (back-to-back 
SOP)
(txd->word_1=%08x).\n",
+                                      vnicdev->viocdev->viocdev_idx,
+                                      vnicdev->vnic_id, clean_idx,
+                                      txd->word_1);
+
+                               vnicdev->net_stats.tx_errors++;
+                               reset_flag = 1;
+                               break;
+                       }
+                       sop_idx = clean_idx;
+               }
+
+               if (GET_VNIC_TX_EOP(txd) == VNIC_TX_EOP_W) {
+                       eop_idx = clean_idx;
+                       if (sop_idx == NOT_SET) {
+                               /* Problem - EOP without SOP */
+                               dev_err(&vnicdev->viocdev->pdev->dev,
+                                      "vioc%d-vnic%d-txd%d ERROR (EOP without 
SOP)
(txd->word_1=%08x).\n",
+                                      vnicdev->viocdev->viocdev_idx,
+                                      vnicdev->vnic_id, clean_idx,
+                                      txd->word_1);
+
+                               vnicdev->net_stats.tx_errors++;
+                               reset_flag = 1;
+                               break;
+                       }
+                       if (GET_VNIC_TX_VALID(txd) != VNIC_TX_VALID_W)
+                               /* VIOC is still working on this descriptor */
+                               break;
+               }
+
+               /*
+                * Check for errors: regardless of whether an error detected
+                * on SOP, MOP or EOP descritptor, reset the ring.
+                */
+               if (GET_VNIC_TX_STS(txd) != VNIC_TX_TX_OK_W) {
+                       dev_err(&vnicdev->viocdev->pdev->dev,
+                              "vioc%d-vnic%d TxD ERROR (txd->word_1=%08x).\n",
+                              vnicdev->viocdev->viocdev_idx, vnicdev->vnic_id,
+                              txd->word_1);
+
+                       vnicdev->net_stats.tx_errors++;
+                       reset_flag = 1;
+                       break;
+               }
+
+               if (eop_idx != NOT_SET) {
+                       /* Found EOP fragment: start CLEANING */
+                       pkt_len = 0;
+                       for (clean_idx = sop_idx;;
+                            clean_idx = VNIC_NEXT_IDX(clean_idx, txq->count)) {
+
+                               txd = TXD_PTR(txq, clean_idx);
+
+                               /* Clear TxD's Handed bit, indicating that SW 
owns it now */
+                               CLR_VNIC_TX_HANDED(txd);
+
+                               /* One more empty descriptor */
+                               txq->empty++;
+
+                               if (txq->vbuf[clean_idx].dma) {
+                                       pci_unmap_page(vnicdev->viocdev->pdev,
+                                                      txq->vbuf[clean_idx].dma,
+                                                      txq->vbuf[clean_idx].
+                                                      length,
+                                                      PCI_DMA_TODEVICE);
+                                       txq->vbuf[clean_idx].dma = 0;
+                               }
+
+                               /* Free skb , should be for SOP (in case of 
frags) only  */
+                               if (txq->vbuf[clean_idx].skb) {
+                                       dev_kfree_skb_any((struct sk_buff *)
+                                                         txq->vbuf[clean_idx].
+                                                         skb);
+                                       txq->vbuf[clean_idx].skb = NULL;
+                               }
+
+                               pkt_len += txq->vbuf[clean_idx].length;
+
+                               if (clean_idx == eop_idx)
+                                       goto set_pkt_stats;
+                       }
+
+                     set_pkt_stats:
+                       /*
+                        * Since this Tx Descriptor was already
+                        * transmitted, account for it - update stats.
+                        */
+                       vnicdev->net_stats.tx_bytes += pkt_len;
+                       vnicdev->net_stats.tx_packets++;
+                       /*
+                        * This is the ONLY place, where txq->next_to_clean is
+                        * advanced.
+                        * It will point past EOP descriptor of the just 
cleaned pkt.
+                        */
+                       txq->next_to_clean = VNIC_NEXT_IDX(eop_idx, txq->count);
+                       /*
+                        * Reset sop_idx and eop_idx: start looking for next pkt
+                        */
+                       sop_idx = eop_idx = NOT_SET;
+                       /*
+                        * At this point clean_idx == eop_idx, it will be 
advanced
+                        * to the next descriptor at the top of the loop
+                        */
+               }
+       }
+
+       if (reset_flag) {
+               /* For DEBUGGING */
+       }
+
+       /*
+        * If the queue was stopped, and if we have now enough room -
+        * wake it up
+        */
+       if ((netif_queue_stopped(vnicdev->netdev)) &&
+           !txq->vbuf[txq->next_to_use].skb) {
+               netif_wake_queue(vnicdev->netdev);
+       }
+
+       return reset_flag;
+}
+
+/*
+ * Only called from interrupt context.
+ */
+static void vnic_tx_interrupt(struct vioc_device *viocdev, int vnic_id,
+                             int clean)
+{
+       struct vnic_device *vnicdev = viocdev->vnic_netdev[vnic_id]->priv;
+       u32 txd_ctl;
+       int txq_was_reset;
+       struct txq *txq;
+       char *txdesc_s = "";
+       char *txring_s = "";
+
+       txq = &vnicdev->txq;
+
+       if (!spin_trylock(&txq->lock)) {
+               /* Retry later */
+               return;
+       }
+
+       /* Get the TxD Control Register */
+       txd_ctl = vnic_rd_txd_ctl(txq);
+
+       if (txd_ctl & VREG_VENG_TXD_CTL_ERROR_MASK)
+               txring_s = "Tx Ring";
+
+       if (txd_ctl & VREG_VENG_TXD_CTL_INVDESC_MASK)
+               txdesc_s = "Tx Descriptor";
+
+       if (txd_ctl &
+           (VREG_VENG_TXD_CTL_INVDESC_MASK | VREG_VENG_TXD_CTL_ERROR_MASK)) {
+               dev_err(&viocdev->pdev->dev,
+                      "vioc%d-vnic%d TxD Ctl=%08x, ERROR %s %s. Reset Tx 
Ring!\n",
+                      viocdev->viocdev_idx, vnic_id, txd_ctl, txdesc_s,
+                      txring_s);
+
+               vnic_reset_txq(vnicdev, txq);
+               netif_wake_queue(vnicdev->netdev);
+       } else {
+               /* No problem with HW, just clean-up the Tx Ring */
+               if (clean)
+                       txq_was_reset = vnic_clean_txq(vnicdev, txq);
+       }
+
+       if ((txd_ctl & VREG_VENG_TXD_CTL_TXSTATE_MASK) ==
+           VVAL_VENG_TXD_CTL_TXSTATE_EMPTY)
+               vnicdev->vnic_stats.tx_on_empty_interrupts++;
+
+       spin_unlock(&txq->lock);
+}
+
+/*
+ * Must only be called from interrupt context.
+ */
+void vioc_tx_interrupt(void *input_param)
+{
+       struct vioc_device *viocdev;
+       u32 vioc_idx;
+       u32 vnic_idx;
+       u32 vnic_map;
+
+       vioc_idx = VIOC_IRQ_PARAM_VIOC_ID(input_param);
+       viocdev = vioc_viocdev(vioc_idx);
+       // read_lock(&viocdev->lock); /* protect against vnic changes */
+       vnic_map = viocdev->vnics_map;
+       for (vnic_idx = 0; vnic_idx < VIOC_MAX_VNICS; vnic_idx++) {
+               if (vnic_map & (1 << vnic_idx))
+                       vnic_tx_interrupt(viocdev, vnic_idx, 1);
+       }
+       viocdev->vioc_stats.tx_tasklets++;
+       // read_unlock(&viocdev->lock);
+}
+void vnic_enqueue_tx_pkt(struct vnic_device *vnicdev, struct txq *txq,
+                        struct sk_buff *skb, struct vioc_prov *prov)
+{
+       int idx, sop_idx, eop_idx, f;
+       struct tx_pktBufDesc_Phys_w *txd;
+
+       /*
+        * Map Tx buffers vbuf queue.
+        */
+       idx = txq->next_to_use;
+       sop_idx = idx;
+
+       txq->vbuf[idx].skb = skb;
+       txq->vbuf[idx].dma = pci_map_single(vnicdev->viocdev->pdev,
+                                           skb->data,
+                                           skb->len, PCI_DMA_TODEVICE);
+       txq->vbuf[idx].length = skb_headlen(skb);
+
+       for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+               struct skb_frag_struct *frag;
+
+               frag = &skb_shinfo(skb)->frags[f];
+
+               idx = VNIC_NEXT_IDX(idx, txq->count);
+
+               txq->vbuf[idx].skb = NULL;
+
+               txq->vbuf[idx].dma = pci_map_page(vnicdev->viocdev->pdev,
+                                                 frag->page,
+                                                 frag->page_offset,
+                                                 frag->size, PCI_DMA_TODEVICE);
+               txq->vbuf[idx].length = frag->size;
+               txq->frags++;
+       }
+
+       eop_idx = idx;
+
+       txq->next_to_use = VNIC_NEXT_IDX(eop_idx, txq->count);
+
+       if (txq->next_to_use < sop_idx)
+               txq->empty -= ((txq->count + txq->next_to_use) - sop_idx);
+       else
+               txq->empty -= (txq->next_to_use - sop_idx);
+
+       /*
+        * We are going backwards (from EOP to SOP) in setting up Tx 
Descriptors.
+        * (idx == eop_ied, when we enter the loop)
+        * So, by the time we will transfer the SOP Tx Descriptor
+        * fragment over to VIOC HW, ALL following fragments would have
+        * been already transferred, and VIOC HW should not have trouble
+        * picking all of them.
+        */
+
+       for (;;) {
+               u32 word_1 = 0;
+
+               txd = TXD_PTR(txq, idx);
+
+               /* Set Tx buffer address */
+               *((dma_addr_t *) txd) = txq->vbuf[idx].dma;
+
+               /*
+                * Force memory writes to complete (FENCE), before letting VIOC 
know,
+                * that there are new descriptor(s). Do it ONLY for the
+                * SOP descriptor:  no point "fencing" on every other 
descriptori
+                * if, there were frags...
+                */
+               /* Set SOP */
+               if (idx == sop_idx) {
+                       word_1 |= VNIC_TX_SOP_W;
+                       wmb();
+               }
+               /* Set EOP */
+               if (idx == eop_idx)
+                       word_1 |= VNIC_TX_EOP_W;
+
+               /* Set Interrupt request (VNIC_TX_INTR_W), when needed */
+               if (prov->run_param.tx_pkts_per_irq > 0) {
+                       if (txq->tx_pkts_til_irq == 0) {
+                               txq->tx_pkts_til_irq =
+                                   prov->run_param.tx_pkts_per_irq;
+                               word_1 |= VNIC_TX_INTR_W;
+                       } else {
+                               txq->tx_pkts_til_irq--;
+                       }
+               }
+
+               /* Now the rest of it */
+               txd->word_1 |= word_1 |
+                   VNIC_TX_HANDED_HW_W |
+                   ((txq->vbuf[idx].length << VNIC_TX_BUFLEN_SHIFT) &
+                    VNIC_TX_BUFLEN_MASK);
+
+               if (idx == sop_idx)
+                       /* All done, if SOP descriptor was just set */
+                       break;
+               else
+                       /* Go back one more fragment */
+                       idx = VNIC_PREV_IDX(idx, txq->count);
+       }
+
+       /*
+        *  Ring bell here, before checking, if vnic_clean_txq() needs to
+        *  be called.
+        */
+       vnic_ring_tx_bell(txq);
+
+       if (txq->next_to_use == txq->next_to_clean) {
+               txq->wraps++;
+               vnic_clean_txq(vnicdev, txq);
+               if (txq->next_to_use == txq->next_to_clean) {
+                       txq->full++;
+               }
+       }
+
+}
+
+void vnic_enqueue_tx_buffers(struct vnic_device *vnicdev, struct txq *txq,
+                            struct sk_buff *skb, struct vioc_prov *prov)
+{
+       int len;
+       int idx;
+       struct tx_pktBufDesc_Phys_w *txd;
+
+       idx = txq->next_to_use;
+       len = skb->len;
+
+       txq->vbuf[idx].skb = skb;
+       txq->vbuf[idx].dma = pci_map_single(vnicdev->viocdev->pdev,
+                                           skb->data, len, PCI_DMA_TODEVICE);
+       txq->vbuf[idx].length = skb->len;
+
+       /*
+        * We are going backwards in setting up Tx Descriptors.  So,
+        * by the time we will trun the Tx Descriptor with the first
+        * fragment over to VIOC, the following fragments would have
+        * been already turned over.
+        */
+       txd = TXD_PTR(txq, idx);
+
+       /*
+        * Force memory writes to complete, before letting VIOC know,
+        * that there are new descriptor(s), but do it ONLY for the
+        * very first descriptor (in case there were frags). No point
+        * "fencing" on every descriptor in this request.
+        */
+       wmb();
+
+       *((dma_addr_t *) txd) = txq->vbuf[idx].dma;
+
+       if (prov->run_param.tx_pkts_per_irq > 0) {
+               if (txq->tx_pkts_til_irq == 0) {
+                       txq->tx_pkts_til_irq = prov->run_param.tx_pkts_per_irq;
+                       /* Set Interrupt request: VNIC_TX_INTR_W */
+                       txd->word_1 |=
+                           (VNIC_TX_HANDED_HW_W | VNIC_TX_SOP_W | VNIC_TX_EOP_W
+                            | VNIC_TX_INTR_W | ((len << VNIC_TX_BUFLEN_SHIFT) &
+                                                VNIC_TX_BUFLEN_MASK));
+               } else {
+                       /* Set NO Interrupt request... */
+                       txd->word_1 |=
+                           (VNIC_TX_HANDED_HW_W | VNIC_TX_SOP_W | VNIC_TX_EOP_W
+                            | ((len << VNIC_TX_BUFLEN_SHIFT) &
+                               VNIC_TX_BUFLEN_MASK));
+                       txq->tx_pkts_til_irq--;
+               }
+       } else {
+               /* Set NO Interrupt request... */
+               txd->word_1 |=
+                   (VNIC_TX_HANDED_HW_W | VNIC_TX_SOP_W | VNIC_TX_EOP_W |
+                    ((len << VNIC_TX_BUFLEN_SHIFT) & VNIC_TX_BUFLEN_MASK));
+       }
+
+       /*
+        *  Ring bell here, before checking, if vnic_clean_txq() needs to
+        *  be called.
+        */
+       vnic_ring_tx_bell(txq);
+
+       idx = VNIC_NEXT_IDX(idx, txq->count);
+       if (idx == txq->next_to_clean) {
+               txq->wraps++;
+               vnic_clean_txq(vnicdev, txq);
+               if (idx == txq->next_to_clean) {
+                       txq->full++;
+               }
+       }
+
+       txq->next_to_use = idx;
+}
+
+static inline void init_f7_header(struct sk_buff *skb)
+{
+       struct vioc_f7pf_w *f7p;
+       unsigned char tag;
+
+       /*
+        * Initialize F7 Header AFTER processing the skb + frags, because we
+        * need the TOTAL pkt length in the F7 Header.
+        */
+
+       /* Determine packet tag */
+       if (((struct ethhdr *)skb->mac.raw)->h_proto == ntohs(ETH_P_IP)) {
+               if (skb->ip_summed == CHECKSUM_HW) {
+                       switch (skb->nh.iph->protocol) {
+                       case IPPROTO_TCP:
+                               tag = VIOC_F7PF_ET_ETH_IPV4_CKS;
+                               skb->h.th->check = 0;
+                               break;
+                       case IPPROTO_UDP:
+                               tag = VIOC_F7PF_ET_ETH_IPV4_CKS;
+                               skb->h.uh->check = 0;
+                               break;
+                       default:
+                               tag = VIOC_F7PF_ET_ETH_IPV4;
+                               break;
+                       }
+               } else {
+                       tag = VIOC_F7PF_ET_ETH_IPV4;
+               }
+       } else {
+               tag = VIOC_F7PF_ET_ETH;
+       }
+
+       f7p = (struct vioc_f7pf_w *)skb->data;
+       memset((void *)skb->data, 0, F7PF_HLEN_STD);
+
+       /* Encapsulation Version */
+       SET_HTON_VIOC_F7PF_ENVER_SHIFTED(f7p, VIOC_F7PF_VERSION1);
+       /* Reserved */
+       SET_HTON_VIOC_F7PF_MC_SHIFTED(f7p, 0);
+       /* No Touch Flag */
+       SET_HTON_VIOC_F7PF_NOTOUCH_SHIFTED(f7p, 0);
+       /* Drop Precedence */
+       SET_HTON_VIOC_F7PF_F7DP_SHIFTED(f7p, 0);
+       /* Class of Service */
+       SET_HTON_VIOC_F7PF_F7COS_SHIFTED(f7p, 2);
+       /* Encapsulation Tag */
+       SET_HTON_VIOC_F7PF_ENTAG_SHIFTED(f7p, tag);
+       /* Key Length */
+       SET_HTON_VIOC_F7PF_EKLEN_SHIFTED(f7p, 1);
+       /* Packet Length */
+       SET_HTON_VIOC_F7PF_PKTLEN_SHIFTED(f7p, skb->len);
+
+       /* lifID */
+       SET_HTON_VIOC_F7PF_LIFID_SHIFTED(f7p, 0);
+}
+
+/**
+ * vioc_tx_timer - Tx Timer
+ * @data: pointer to viocdev cast into an unsigned long
+ **/
+void vioc_tx_timer(unsigned long data)
+{
+       struct vioc_device *viocdev = (struct vioc_device *)data;
+       u32 vnic_idx;
+
+       if (!viocdev->tx_timer_active)
+               return;
+
+       viocdev->vioc_stats.tx_timers++;
+
+       for (vnic_idx = 0; vnic_idx < VIOC_MAX_VNICS; vnic_idx++) {
+               if (viocdev->vnics_map & (1 << vnic_idx)) {
+                       vnic_tx_interrupt(viocdev, vnic_idx, 1);
+               }               /* Process VNIC's TX interrupt */
+       }
+       /* Reset the timer */
+       mod_timer(&viocdev->tx_timer, jiffies + HZ / 4);
+}
+
+
+/*
+ * hard_start_xmit() routine.
+ * NOTE WELL: We don't take a read lock on the VIOC, but rely on the
+ * networking subsystem to guarantee we will not be asked to Tx if
+ * the interface is unregistered.  Revisit if this assumption does
+ * not hold - add a tx_enabled flag to the vnic struct protected
+ * by txq->lock.  Or just read-lock the VIOC.
+ */
+int vnic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       struct txq *txq = &vnicdev->txq;
+       unsigned long flags;
+       int ret;
+
+       local_irq_save(flags);
+       if (!spin_trylock(&txq->lock)) {
+               /* Retry later */
+               local_irq_restore(flags);
+               return NETDEV_TX_LOCKED;
+       }
+
+       if (unlikely(skb_headroom(skb) < F7PF_HLEN_STD)) {
+               vnicdev->vnic_stats.headroom_misses++;
+               if (unlikely(skb_cow(skb, F7PF_HLEN_STD))) {
+                       dev_kfree_skb_any(skb);
+                       vnicdev->vnic_stats.headroom_miss_drops++;
+                       ret = NETDEV_TX_OK;     /* since we freed it */
+                       goto end_start_xmit;
+               }
+       }
+
+       /* Don't rely on the skb pointers being set */
+       skb->mac.raw = skb->data;
+       skb->nh.raw = skb->data + ETH_HLEN;
+       skb_push(skb, F7PF_HLEN_STD);
+
+       init_f7_header(skb);
+
+       if (skb_shinfo(skb)->nr_frags)
+               vnic_enqueue_tx_pkt(vnicdev, txq, skb, &vnicdev->viocdev->prov);
+       else
+               vnic_enqueue_tx_buffers(vnicdev, txq, skb,
+                                       &vnicdev->viocdev->prov);
+
+       /*
+        * Check if there is room on the queue.
+        */
+       if (txq->empty < MAX_SKB_FRAGS) {
+               netif_stop_queue(netdev);
+               vnicdev->vnic_stats.netif_stops++;
+               ret = NETDEV_TX_BUSY;
+       } else {
+               ret = NETDEV_TX_OK;
+       }
+
+      end_start_xmit:
+       spin_unlock_irqrestore(&txq->lock, flags);
+       return ret;
+}
+
+/*
+ *      Create Ethernet header
+ *
+ *      saddr=NULL      means use device source address
+ *      daddr=NULL      means leave destination address (eg unresolved arp)
+ */
+int vnic_eth_header(struct sk_buff *skb, struct net_device *dev,
+                   unsigned short type, void *daddr, void *saddr, unsigned len)
+{
+       struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+
+       skb->mac.raw = skb->data;
+
+       /*
+        *      Set the protocol type. For a packet of type
+        *      ETH_P_802_3 we put the length in here instead. It is
+        *      up to the 802.2 layer to carry protocol information.
+        */
+
+       if (type != ETH_P_802_3)
+               eth->h_proto = htons(type);
+       else
+               eth->h_proto = htons(len);
+
+       if (saddr)
+               memcpy(eth->h_source, saddr, ETH_ALEN);
+       else
+               memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
+
+       if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
+               memset(eth->h_dest, 0, ETH_ALEN);
+               return ETH_HLEN + F7PF_HLEN_STD;
+       }
+
+       if (daddr) {
+               memcpy(eth->h_dest, daddr, ETH_ALEN);
+               return ETH_HLEN + F7PF_HLEN_STD;
+       }
+
+       return -(ETH_HLEN + F7PF_HLEN_STD);     /* XXX */
+}
+
+
+
+/**
+ * vnic_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+
+static int vnic_open(struct net_device *netdev)
+{
+       int ret = 0;
+       struct vnic_device *vnicdev = netdev->priv;
+
+       ret = vioc_set_vnic_cfg(vnicdev->viocdev->viocdev_idx,
+                               vnicdev->vnic_id,
+                               (VREG_BMC_VNIC_CFG_ENABLE_MASK |
+                                VREG_BMC_VNIC_CFG_PROMISCUOUS_MASK));
+
+       vnic_enable_tx_ring(&vnicdev->txq);
+
+       netif_start_queue(netdev);
+       netif_carrier_on(netdev);
+
+       return ret;
+}
+
+static int vnic_close(struct net_device *netdev)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       struct txq *txq = &vnicdev->txq;
+       unsigned long flags;
+
+       vioc_set_vnic_cfg(vnicdev->viocdev->viocdev_idx, vnicdev->vnic_id, 0);
+
+       netif_carrier_off(netdev);
+       netif_stop_queue(netdev);
+
+       spin_lock_irqsave(&txq->lock, flags);
+
+       vnic_reset_txq(vnicdev, txq);
+       vnic_disable_tx_ring(&vnicdev->txq);
+
+       spin_unlock_irqrestore(&txq->lock, flags);
+
+       return 0;
+}
+
+/*
+ * Set netdev->dev_addr to this interface's MAC Address
+ */
+static int vnic_set_mac_addr(struct net_device *netdev, void *p)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+
+       /*
+        * Get HW MAC address form VIOC egisters
+        */
+       vioc_get_vnic_mac(vnicdev->viocdev->viocdev_idx, vnicdev->vnic_id,
+                         &vnicdev->hw_mac[0]);
+
+       if (!is_valid_ether_addr(vnicdev->hw_mac)) {
+               dev_err(&vnicdev->viocdev->pdev->dev, "Invalid MAC Address\n");
+               return -EINVAL;
+       }
+
+       /*
+        * ...and install it in nedev structure
+        */
+       memcpy(netdev->dev_addr, vnicdev->hw_mac, netdev->addr_len);
+       netdev->addr_len = ETH_ALEN;
+
+       return 0;
+}
+
+/*
+ * Set netdev->mtu to this interface's MTU
+ */
+static int vnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       int max_frame = new_mtu + ETH_HLEN + F7PF_HLEN_STD;
+
+       if ((max_frame < VNIC_MIN_MTU) || (max_frame > VNIC_MAX_MTU)) {
+               dev_err(&vnicdev->viocdev->pdev->dev, "Invalid MTU setting\n");
+               return -EINVAL;
+       }
+
+       netdev->mtu = new_mtu;
+       return 0;
+}
+
+/**
+ * vnic_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the timer callback.
+ **/
+
+static struct net_device_stats *vnic_get_stats(struct net_device *netdev)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       return &vnicdev->net_stats;
+}
+
+static int vnic_alloc_tx_resources(struct vnic_device *vnicdev)
+{
+       struct vioc_device *viocdev = vnicdev->viocdev;
+       struct net_device *netdev = viocdev->vnic_netdev[vnicdev->vnic_id];
+       struct txq *txq;
+       size_t size;
+
+       vnicdev->vnic_stats.tx_on_empty_interrupts = 0;
+
+       txq = &vnicdev->txq;
+
+       txq->txq_id = TXQ0;
+       txq->vnic_id = vnicdev->vnic_id;
+       txq->next_to_use = 0;
+       txq->next_to_clean = 0;
+       txq->empty = txq->count;
+       txq->tx_pkts_til_irq = viocdev->prov.run_param.tx_pkts_per_irq;
+       txq->tx_pkts_til_bell = viocdev->prov.run_param.tx_pkts_per_bell;
+       txq->do_ring_bell = 0;
+       txq->bells = 0;
+       txq->frags = 0;
+       txq->wraps = 0;
+       txq->full = 0;
+
+       size = TX_DESC_SIZE * txq->count;
+       txq->desc = pci_alloc_consistent(viocdev->pdev, size, &txq->dma);
+       if (!txq->desc) {
+               dev_err(&viocdev->pdev->dev, "%sError allocating Tx ring (size 
%d)\n",
+                      netdev->name, txq->count);
+               return -ENOMEM;
+       }
+
+       txq->vbuf = vmalloc(sizeof(struct vbuf) * txq->count);
+       if (!txq->vbuf) {
+               dev_err(&viocdev->pdev->dev, "%sError allocating Tx resource 
(size %d)\n",
+                      netdev->name, txq->count);
+               return -ENOMEM;
+       }
+       memset(txq->vbuf, 0, sizeof(struct vbuf) * txq->count);
+
+       txq->va_of_vreg_veng_txd_ctl =
+           (&viocdev->ba)->virt +
+           GETRELADDR(VIOC_VENG, vnicdev->vnic_id,
+                      (VREG_VENG_TXD_CTL + (TXQ0 * 0x14)));
+       spin_lock_init(&txq->lock);
+
+       /*
+        * Tell VIOC where TxQ things are
+        */
+       vioc_set_txq(viocdev->viocdev_idx, vnicdev->vnic_id, TXQ0,
+                    txq->dma, txq->count);
+       vnic_enable_tx_ring(txq);
+       vioc_ena_dis_tx_on_empty(viocdev->viocdev_idx,
+                                vnicdev->vnic_id,
+                                TXQ0,
+                                viocdev->prov.run_param.tx_intr_on_empty);
+       return 0;
+}
+
+static void vnic_free_tx_resources(struct vnic_device *vnicdev)
+{
+       pci_free_consistent(vnicdev->viocdev->pdev,
+                           vnicdev->txq.count * TX_DESC_SIZE,
+                           vnicdev->txq.desc, vnicdev->txq.dma);
+       vnicdev->txq.desc = NULL;
+       vnicdev->txq.dma = (dma_addr_t) NULL;
+       vfree(vnicdev->txq.vbuf);
+       vnicdev->txq.vbuf = NULL;
+}
+
+void vioc_reset_if_tx(struct net_device *netdev)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       struct txq *txq = &vnicdev->txq;
+
+       vnic_reset_txq(vnicdev, txq);
+}
+
+extern struct ethtool_ops vioc_ethtool_ops;
+
+/**
+ * vnic_uninit - Device Termination Routine
+ *
+ * Returns 0 on success, negative on failure
+ *
+ **/
+static void vnic_uninit(struct net_device *netdev)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       vnic_free_tx_resources(vnicdev);
+}
+
+/**
+ * vnic_init - Device Initialization Routine
+ *
+ * Returns 0 on success, negative on failure
+ *
+ **/
+int vioc_vnic_init(struct net_device *netdev)
+{
+       struct vnic_device *vnicdev = netdev->priv;
+       struct vioc_device *viocdev = vnicdev->viocdev;
+       int ret;
+
+       SET_ETHTOOL_OPS(netdev, &vioc_ethtool_ops);
+       /*
+        * we're going to reset, so assume we have no link for now
+        */
+       netif_carrier_off(netdev);
+       netif_stop_queue(netdev);
+
+       ether_setup(netdev);
+
+       netdev->hard_header_len = ETH_HLEN + F7PF_HLEN_STD;     /* XXX */
+       netdev->hard_header = &vnic_eth_header;
+       netdev->rebuild_header = NULL;  /* XXX */
+
+       vnic_change_mtu(netdev, 1500);  /* default */
+       vnic_set_mac_addr(netdev, NULL);
+
+       netdev->open = &vnic_open;
+       netdev->stop = &vnic_close;
+       netdev->get_stats = &vnic_get_stats;
+       netdev->uninit = &vnic_uninit;
+       netdev->set_mac_address = &vnic_set_mac_addr;
+       netdev->change_mtu = &vnic_change_mtu;
+       netdev->watchdog_timeo = HZ;
+       if (viocdev->highdma) {
+               netdev->features |= NETIF_F_HIGHDMA;
+       }
+       netdev->features |= NETIF_F_VLAN_CHALLENGED;    /* VLAN locked */
+       netdev->features |= NETIF_F_LLTX;       /* lockless Tx */
+
+       netdev->features |= NETIF_F_IP_CSUM;    /* Tx checksum */
+       dev_err(&viocdev->pdev->dev, "%s: HW IP checksum offload ENABLED\n",
netdev->name);
+
+       /* allocate Tx descriptors, tell VIOC where */
+       if ((ret = vnic_alloc_tx_resources(vnicdev)))
+               goto vnic_init_err;
+
+       netdev->hard_start_xmit = &vnic_start_xmit;
+       /* Set standard  Rx callback */
+
+       return 0;
+
+      vnic_init_err:
+       dev_err(&viocdev->pdev->dev, "%s: Error initializing vnic resources\n",
+              netdev->name);
+       return ret;
+}

--
Misha Tomushev
[EMAIL PROTECTED]

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to