Fri, Jan 06, 2017 at 01:27:13AM CET, sridhar.samudr...@intel.com wrote: > > >On 1/5/2017 4:56 AM, Jiri Pirko wrote: >> Tue, Jan 03, 2017 at 07:07:53PM CET, sridhar.samudr...@intel.com wrote: >> > In switchdev mode, broadcast filter is not enabled on VFs. The broadcasts >> > and >> > unknown frames from VFs are received by the PF and passed to corresponding >> > VF >> > port representator netdev. >> > A host based switching entity like a linux bridge or OVS redirects these >> > frames >> > to the right VFs via VFPR netdevs. Any frames sent via VFPR netdevs are >> > sent as >> > directed transmits to the corresponding VFs. To enable directed transmit, >> > skb >> > metadata dst is used to pass the VF id and the frame is requeued to call >> > the PFs >> > transmit routine. >> > >> > Small script to demonstrate inter VF pings in switchdev mode. >> > PF: enp5s0f0, VFs: enp5s2,enp5s2f1 VFPRs:enp5s0f0-vf0, enp5s0f0-vf1 >> > >> > # rmmod i40e; modprobe i40e >> > # devlink dev eswitch set pci/0000:05:00.0 mode switchdev >> > # echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs >> > # ip link set enp5s0f0 vf 0 mac 00:11:22:33:44:55 >> > # ip link set enp5s0f0 vf 1 mac 00:11:22:33:44:56 >> > # rmmod i40evf; modprobe i40evf >> > >> > /* Create 2 namespaces and move the VFs to the corresponding ns. */ >> > # ip netns add ns0 >> > # ip link set enp5s2 netns ns0 >> > # ip netns exec ns0 ip addr add 192.168.1.10/24 dev enp5s2 >> > # ip netns exec ns0 ip link set enp5s2 up >> > # ip netns add ns1 >> > # ip link set enp5s2f1 netns ns1 >> > # ip netns exec ns1 ip addr add 192.168.1.11/24 dev enp5s2f1 >> > # ip netns exec ns1 ip link set enp5s2f1 up >> > >> > /* bring up pf and vfpr netdevs */ >> > # ip link set enp5s0f0 up >> > # ip link set enp5s0f0-vf0 up >> > # ip link set enp5s0f0-vf1 up >> > >> > /* Create a linux bridge and add vfpr netdevs to it. */ >> > # ip link add vfpr-br type bridge >> > # ip link set enp5s0f0-vf0 master vfpr-br >> > # ip link set enp5s0f0-vf1 master vfpr-br >> > # ip addr add 192.168.1.1/24 dev vfpr-br >> > # ip link set vfpr-br up >> > >> > # ip netns exec ns0 ping -c3 192.168.1.11 >> > # ip netns exec ns1 ping -c3 192.168.1.10 >> > >> > Signed-off-by: Sridhar Samudrala <sridhar.samudr...@intel.com> >> [...] >> >> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c >> > b/drivers/net/ethernet/intel/i40e/i40e_txrx.c >> > index 352cf7c..b46ddaa 100644 >> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c >> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c >> > @@ -1176,16 +1176,37 @@ static bool i40e_alloc_mapped_page(struct >> > i40e_ring *rx_ring, >> > * @rx_ring: rx ring in play >> > * @skb: packet to send up >> > * @vlan_tag: vlan tag for packet >> > + * @lpbk: is it a loopback frame? >> > **/ >> > static void i40e_receive_skb(struct i40e_ring *rx_ring, >> > - struct sk_buff *skb, u16 vlan_tag) >> > + struct sk_buff *skb, u16 vlan_tag, bool lpbk) >> > { >> > struct i40e_q_vector *q_vector = rx_ring->q_vector; >> > + struct i40e_pf *pf = rx_ring->vsi->back; >> > + struct i40e_vf *vf; >> > + struct ethhdr *eth; >> > + int vf_id; >> > >> > if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && >> > (vlan_tag & VLAN_VID_MASK)) >> > __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); >> > >> > + if ((pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) || !lpbk) >> > + goto gro_receive; >> > + >> > + /* If a loopback packet is received from a VF in switchdev mode, pass >> > the >> > + * frame to the corresponding VFPR netdev based on the source MAC in >> > the frame. >> > + */ >> > + eth = (struct ethhdr *)skb_mac_header(skb); >> > + for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) { >> > + vf = &pf->vf[vf_id]; >> > + if (ether_addr_equal(eth->h_source, vf->default_lan_addr.addr)) >> > { >> > + skb->dev = vf->vfpr_netdev; >> This sucks :( Is't there any identification coming from rx ring that >> would tell you what vf this is? To match vrpr according to a single MAC >> address seems a bit awkward. What if there is a macvlan configured >> on the VF? >Unfortunately, with the current HW, RX descriptor only indicates if it is a >loopback packet from a VF, >but not the specific id of the VF.
Is it a FW limitation? If so, I believe that you should consider to implement it. >Multiple macs on VF are not supported with the current patchset. >At this point we are not making switchdev as the default mode because of >these limitations. > >> >> >> >> > + break; >> > + } >> > + } >> > + >> > +gro_receive: >> > napi_gro_receive(&q_vector->napi, skb); >> > } >> > >> [...] >> >> > @@ -2998,3 +3064,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff >> > *skb, struct net_device *netdev) >> > >> > return i40e_xmit_frame_ring(skb, tx_ring); >> > } >> > + >> > +netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb, >> > + struct net_device *dev) >> > +{ >> > + struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev); >> > + struct i40e_vf *vf = priv->vf; >> > + struct i40e_pf *pf = vf->pf; >> > + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; >> > + >> > + skb_dst_drop(skb); >> > + dst_hold(&priv->vfpr_dst->dst); >> > + skb_dst_set(skb, &priv->vfpr_dst->dst); >> > + skb->dev = vsi->netdev; >> This dst dance seems a bit odd to me. Why don't you just call >> i40e_xmit_frame_ring with an extra arg holding the needed metadata? > >We don't have TX/RX queues associated with VFPR netdevs, so we need to set >the dev to PF netdev and requeue the skb. Still, you eventually call a function within same .c file. Using dst does not look right to me. > >> >> >> > + >> > + return dev_queue_xmit(skb); >> > +} >> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h >> > b/drivers/net/ethernet/intel/i40e/i40e_txrx.h >> > index e065321..850723f 100644 >> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h >> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h >> > @@ -366,6 +366,7 @@ struct i40e_ring_container { >> > >> > bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); >> > netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device >> > *netdev); >> > +netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb, struct >> > net_device *netdev); >> > void i40e_clean_tx_ring(struct i40e_ring *tx_ring); >> > void i40e_clean_rx_ring(struct i40e_ring *rx_ring); >> > int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring); >> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h >> > b/drivers/net/ethernet/intel/i40e/i40e_type.h >> > index edc0abd..c136cc9 100644 >> > --- a/drivers/net/ethernet/intel/i40e/i40e_type.h >> > +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h >> > @@ -728,6 +728,9 @@ enum i40e_rx_desc_status_bits { >> > #define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT >> > I40E_RX_DESC_STATUS_TSYNVALID_SHIFT >> > #define I40E_RXD_QW1_STATUS_TSYNVALID_MASK \ >> > BIT_ULL(I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT) >> > +#define I40E_RXD_QW1_STATUS_LPBK_SHIFT I40E_RX_DESC_STATUS_LPBK_SHIFT >> > +#define I40E_RXD_QW1_STATUS_LPBK_MASK \ >> > + BIT_ULL(I40E_RXD_QW1_STATUS_LPBK_SHIFT) >> > >> > enum i40e_rx_desc_fltstat_values { >> > I40E_RX_DESC_FLTSTAT_NO_DATA = 0, >> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c >> > b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c >> > index 0c8687d..f0860ef 100644 >> > --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c >> > +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c >> > @@ -1062,6 +1062,7 @@ static int i40e_vfpr_netdev_stop(struct net_device >> > *dev) >> > static const struct net_device_ops i40e_vfpr_netdev_ops = { >> > .ndo_open = i40e_vfpr_netdev_open, >> > .ndo_stop = i40e_vfpr_netdev_stop, >> > + .ndo_start_xmit = i40e_vfpr_netdev_start_xmit, >> > }; >> > >> > /** >> > @@ -1121,16 +1122,22 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 >> > vf_num) >> > >> > priv = netdev_priv(vfpr_netdev); >> > priv->vf = &(pf->vf[vf_num]); >> > + priv->vfpr_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, >> > GFP_KERNEL); >> I'm missing a patch here. In net-next, metadata_dst_alloc has 2 args. > >Somehow that patch didn't make it to netdev. You can find it here on >intel-wired-lan archives. >http://lists.osuosl.org/pipermail/intel-wired-lan/Week-of-Mon-20170102/007723.html >I will CC you when i submit V3. Thanks.