Hi Arjun,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    
https://github.com/0day-ci/linux/commits/Ganesh-Goudar/cxgb4-Add-support-for-FW_ETH_TX_PKT_VM_WR/20180626-163628
config: x86_64-randconfig-x001-201825 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/net/ethernet/chelsio/cxgb4/sge.c: In function 'cxgb4_vf_eth_xmit':
>> drivers/net/ethernet/chelsio/cxgb4/sge.c:1646:18: error: assignment of 
>> read-only variable 'fw_hdr_copy_len'
     fw_hdr_copy_len = (sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
                     ^

vim +/fw_hdr_copy_len +1646 drivers/net/ethernet/chelsio/cxgb4/sge.c

  1622  
  1623  /**
  1624   *      cxgb4_vf_eth_xmit - add a packet to an Ethernet TX queue
  1625   *      @skb: the packet
  1626   *      @dev: the egress net device
  1627   *
  1628   *      Add a packet to an SGE Ethernet TX queue.  Runs with softirqs 
disabled.
  1629   */
  1630  static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
  1631                                       struct net_device *dev)
  1632  {
  1633          dma_addr_t addr[MAX_SKB_FRAGS + 1];
  1634          const struct skb_shared_info *ssi;
  1635          struct fw_eth_tx_pkt_vm_wr *wr;
  1636          int qidx, credits, max_pkt_len;
  1637          const size_t fw_hdr_copy_len;
  1638          struct cpl_tx_pkt_core *cpl;
  1639          const struct port_info *pi;
  1640          unsigned int flits, ndesc;
  1641          struct sge_eth_txq *txq;
  1642          struct adapter *adapter;
  1643          u64 cntrl, *end;
  1644          u32 wr_mid;
  1645  
> 1646          fw_hdr_copy_len = (sizeof(wr->ethmacdst) + 
> sizeof(wr->ethmacsrc) +
  1647                             sizeof(wr->ethtype) + sizeof(wr->vlantci));
  1648  
  1649          /* The chip minimum packet length is 10 octets but the firmware
  1650           * command that we are using requires that we copy the Ethernet 
header
  1651           * (including the VLAN tag) into the header so we reject 
anything
  1652           * smaller than that ...
  1653           */
  1654          if (unlikely(skb->len < fw_hdr_copy_len))
  1655                  goto out_free;
  1656  
  1657          /* Discard the packet if the length is greater than mtu */
  1658          max_pkt_len = ETH_HLEN + dev->mtu;
  1659          if (skb_vlan_tag_present(skb))
  1660                  max_pkt_len += VLAN_HLEN;
  1661          if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > 
max_pkt_len)))
  1662                  goto out_free;
  1663  
  1664          /* Figure out which TX Queue we're going to use. */
  1665          pi = netdev_priv(dev);
  1666          adapter = pi->adapter;
  1667          qidx = skb_get_queue_mapping(skb);
  1668          WARN_ON(qidx >= pi->nqsets);
  1669          txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
  1670  
  1671          /* Take this opportunity to reclaim any TX Descriptors whose DMA
  1672           * transfers have completed.
  1673           */
  1674          cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
  1675  
  1676          /* Calculate the number of flits and TX Descriptors we're going 
to
  1677           * need along with how many TX Descriptors will be left over 
after
  1678           * we inject our Work Request.
  1679           */
  1680          flits = t4vf_calc_tx_flits(skb);
  1681          ndesc = flits_to_desc(flits);
  1682          credits = txq_avail(&txq->q) - ndesc;
  1683  
  1684          if (unlikely(credits < 0)) {
  1685                  /* Not enough room for this packet's Work Request.  
Stop the
  1686                   * TX Queue and return a "busy" condition.  The queue 
will get
  1687                   * started later on when the firmware informs us that 
space
  1688                   * has opened up.
  1689                   */
  1690                  eth_txq_stop(txq);
  1691                  dev_err(adapter->pdev_dev,
  1692                          "%s: TX ring %u full while queue awake!\n",
  1693                          dev->name, qidx);
  1694                  return NETDEV_TX_BUSY;
  1695          }
  1696  
  1697          if (!t4vf_is_eth_imm(skb) &&
  1698              unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) {
  1699                  /* We need to map the skb into PCI DMA space (because 
it can't
  1700                   * be in-lined directly into the Work Request) and the 
mapping
  1701                   * operation failed.  Record the error and drop the 
packet.
  1702                   */
  1703                  txq->mapping_err++;
  1704                  goto out_free;
  1705          }
  1706  
  1707          wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
  1708          if (unlikely(credits < ETHTXQ_STOP_THRES)) {
  1709                  /* After we're done injecting the Work Request for this
  1710                   * packet, we'll be below our "stop threshold" so stop 
the TX
  1711                   * Queue now and schedule a request for an SGE Egress 
Queue
  1712                   * Update message.  The queue will get started later on 
when
  1713                   * the firmware processes this Work Request and sends 
us an
  1714                   * Egress Queue Status Update message indicating that 
space
  1715                   * has opened up.
  1716                   */
  1717                  eth_txq_stop(txq);
  1718                  wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
  1719          }
  1720  
  1721          /* Start filling in our Work Request.  Note that we do _not_ 
handle
  1722           * the WR Header wrapping around the TX Descriptor Ring.  If our
  1723           * maximum header size ever exceeds one TX Descriptor, we'll 
need to
  1724           * do something else here.
  1725           */
  1726          WARN_ON(DIV_ROUND_UP(T4VF_ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
  1727          wr = (void *)&txq->q.desc[txq->q.pidx];
  1728          wr->equiq_to_len16 = cpu_to_be32(wr_mid);
  1729          wr->r3[0] = cpu_to_be32(0);
  1730          wr->r3[1] = cpu_to_be32(0);
  1731          skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, 
fw_hdr_copy_len);
  1732          end = (u64 *)wr + flits;
  1733  
  1734          /* If this is a Large Send Offload packet we'll put in an LSO 
CPL
  1735           * message with an encapsulated TX Packet CPL message.  
Otherwise we
  1736           * just use a TX Packet CPL message.
  1737           */
  1738          ssi = skb_shinfo(skb);
  1739          if (ssi->gso_size) {
  1740                  struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
  1741                  bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
  1742                  int l3hdr_len = skb_network_header_len(skb);
  1743                  int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
  1744  
  1745                  wr->op_immdlen =
  1746                          cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
  1747                                      FW_WR_IMMDLEN_V(sizeof(*lso) +
  1748                                                      sizeof(*cpl)));
  1749                   /* Fill in the LSO CPL message. */
  1750                  lso->lso_ctrl =
  1751                          cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
  1752                                      LSO_FIRST_SLICE_F |
  1753                                      LSO_LAST_SLICE_F |
  1754                                      LSO_IPV6_V(v6) |
  1755                                      LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
  1756                                      LSO_IPHDR_LEN_V(l3hdr_len / 4) |
  1757                                      
LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
  1758                  lso->ipid_ofst = cpu_to_be16(0);
  1759                  lso->mss = cpu_to_be16(ssi->gso_size);
  1760                  lso->seqno_offset = cpu_to_be32(0);
  1761                  if (is_t4(adapter->params.chip))
  1762                          lso->len = cpu_to_be32(skb->len);
  1763                  else
  1764                          lso->len = 
cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len));
  1765  
  1766                  /* Set up TX Packet CPL pointer, control word and 
perform
  1767                   * accounting.
  1768                   */
  1769                  cpl = (void *)(lso + 1);
  1770  
  1771                  if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= 
CHELSIO_T5)
  1772                          cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
  1773                  else
  1774                          cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
  1775  
  1776                  cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
  1777                                             TX_CSUM_TCPIP6 : 
TX_CSUM_TCPIP) |
  1778                           TXPKT_IPHDR_LEN_V(l3hdr_len);
  1779                  txq->tso++;
  1780                  txq->tx_cso += ssi->gso_segs;
  1781          } else {
  1782                  int len;
  1783  
  1784                  len = (t4vf_is_eth_imm(skb)
  1785                         ? skb->len + sizeof(*cpl)
  1786                         : sizeof(*cpl));
  1787                  wr->op_immdlen =
  1788                          cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
  1789                                      FW_WR_IMMDLEN_V(len));
  1790  
  1791                  /* Set up TX Packet CPL pointer, control word and 
perform
  1792                   * accounting.
  1793                   */
  1794                  cpl = (void *)(wr + 1);
  1795                  if (skb->ip_summed == CHECKSUM_PARTIAL) {
  1796                          cntrl = hwcsum(adapter->params.chip, skb) |
  1797                                  TXPKT_IPCSUM_DIS_F;
  1798                          txq->tx_cso++;
  1799                  } else {
  1800                          cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
  1801                  }
  1802          }
  1803  
  1804          /* If there's a VLAN tag present, add that to the list of 
things to
  1805           * do in this Work Request.
  1806           */
  1807          if (skb_vlan_tag_present(skb)) {
  1808                  txq->vlan_ins++;
  1809                  cntrl |= TXPKT_VLAN_VLD_F | 
TXPKT_VLAN_V(skb_vlan_tag_get(skb));
  1810          }
  1811  
  1812           /* Fill in the TX Packet CPL message header. */
  1813          cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) |
  1814                                   TXPKT_INTF_V(pi->port_id) |
  1815                                   TXPKT_PF_V(0));
  1816          cpl->pack = cpu_to_be16(0);
  1817          cpl->len = cpu_to_be16(skb->len);
  1818          cpl->ctrl1 = cpu_to_be64(cntrl);
  1819  
  1820          /* Fill in the body of the TX Packet CPL message with either 
in-lined
  1821           * data or a Scatter/Gather List.
  1822           */
  1823          if (t4vf_is_eth_imm(skb)) {
  1824                  /* In-line the packet's data and free the skb since we 
don't
  1825                   * need it any longer.
  1826                   */
  1827                  cxgb4_inline_tx_skb(skb, &txq->q, cpl + 1);
  1828                  dev_consume_skb_any(skb);
  1829          } else {
  1830                  /* Write the skb's Scatter/Gather list into the TX 
Packet CPL
  1831                   * message and retain a pointer to the skb so we can 
free it
  1832                   * later when its DMA completes.  (We store the skb 
pointer
  1833                   * in the Software Descriptor corresponding to the last 
TX
  1834                   * Descriptor used by the Work Request.)
  1835                   *
  1836                   * The retained skb will be freed when the 
corresponding TX
  1837                   * Descriptors are reclaimed after their DMAs complete.
  1838                   * However, this could take quite a while since, in 
general,
  1839                   * the hardware is set up to be lazy about sending DMA
  1840                   * completion notifications to us and we mostly perform 
TX
  1841                   * reclaims in the transmit routine.
  1842                   *
  1843                   * This is good for performamce but means that we rely 
on new
  1844                   * TX packets arriving to run the destructors of 
completed
  1845                   * packets, which open up space in their sockets' send 
queues.
  1846                   * Sometimes we do not get such new packets causing TX 
to
  1847                   * stall.  A single UDP transmitter is a good example 
of this
  1848                   * situation.  We have a clean up timer that 
periodically
  1849                   * reclaims completed packets but it doesn't run often 
enough
  1850                   * (nor do we want it to) to prevent lengthy stalls.  A
  1851                   * solution to this problem is to run the destructor 
early,
  1852                   * after the packet is queued but before it's DMAd.  A 
con is
  1853                   * that we lie to socket memory accounting, but the 
amount of
  1854                   * extra memory is reasonable (limited by the number of 
TX
  1855                   * descriptors), the packets do actually get freed 
quickly by
  1856                   * new packets almost always, and for protocols like 
TCP that
  1857                   * wait for acks to really free up the data the extra 
memory
  1858                   * is even less.  On the positive side we run the 
destructors
  1859                   * on the sending CPU rather than on a potentially 
different
  1860                   * completing CPU, usually a good thing.
  1861                   *
  1862                   * Run the destructor before telling the DMA engine 
about the
  1863                   * packet to make sure it doesn't complete and get freed
  1864                   * prematurely.
  1865                   */
  1866                  struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
  1867                  struct sge_txq *tq = &txq->q;
  1868                  int last_desc;
  1869  
  1870                  /* If the Work Request header was an exact multiple of 
our TX
  1871                   * Descriptor length, then it's possible that the 
starting SGL
  1872                   * pointer lines up exactly with the end of our TX 
Descriptor
  1873                   * ring.  If that's the case, wrap around to the 
beginning
  1874                   * here ...
  1875                   */
  1876                  if (unlikely((void *)sgl == (void *)tq->stat)) {
  1877                          sgl = (void *)tq->desc;
  1878                          end = (void *)((void *)tq->desc +
  1879                                         ((void *)end - (void 
*)tq->stat));
  1880                  }
  1881  
  1882                  cxgb4_write_sgl(skb, tq, sgl, end, 0, addr);
  1883                  skb_orphan(skb);
  1884  
  1885                  last_desc = tq->pidx + ndesc - 1;
  1886                  if (last_desc >= tq->size)
  1887                          last_desc -= tq->size;
  1888                  tq->sdesc[last_desc].skb = skb;
  1889                  tq->sdesc[last_desc].sgl = sgl;
  1890          }
  1891  
  1892          /* Advance our internal TX Queue state, tell the hardware about
  1893           * the new TX descriptors and return success.
  1894           */
  1895          txq_advance(&txq->q, ndesc);
  1896  
  1897          cxgb4_ring_tx_db(adapter, &txq->q, ndesc);
  1898          return NETDEV_TX_OK;
  1899  
  1900  out_free:
  1901          /* An error of some sort happened.  Free the TX skb and tell the
  1902           * OS that we've "dealt" with the packet ...
  1903           */
  1904          dev_kfree_skb_any(skb);
  1905          return NETDEV_TX_OK;
  1906  }
  1907  

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Attachment: .config.gz
Description: application/gzip

Reply via email to