On 21/01/17 06:46, Roopa Prabhu wrote:
> From: Roopa Prabhu <ro...@cumulusnetworks.com>
> 
> This patch series makes vxlan COLLECT_METADATA mode bridge
> and layer2 network friendly. Vxlan COLLECT_METADATA mode today
> solves the per-vni netdev scalability problem in l3 networks.
> When vxlan collect metadata device participates in bridging
> vlan to vn-segments, It can only get the vlan mapped vni in
> the xmit tunnel dst metadata. It will need the vxlan driver to
> continue learn, hold forwarding state and remote destination
> information similar to how it already does for non COLLECT_METADATA
> vxlan netdevices today.
> 
> Changes introduced by this patch:
>     - allow learning and forwarding database state to vxlan netdev in
>       COLLECT_METADATA mode. Current behaviour is not changed
>       by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used
>       to support the new bridge friendly mode.
>     - A single fdb table hashed by (mac, vni) to allow fdb entries with
>       multiple vnis in the same fdb table
>     - rx path already has the vni
>     - tx path expects a vni in the packet with dst_metadata
>     - prior to this series, fdb remote_dsts carried remote vni and
>       the vxlan device carrying the fdb table represented the
>       source vni. With the vxlan device now representing multiple vnis,
>       this patch adds a src vni attribute to the fdb entry. The remote
>       vni already uses NDA_VNI attribute. This patch introduces
>       NDA_SRC_VNI netlink attribute to represent the src vni in a multi
>       vni fdb table.
> 
> Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com>
> ---
[snip]
> @@ -2173,23 +2221,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, 
> struct net_device *dev)
>       bool did_rsc = false;
>       struct vxlan_rdst *rdst, *fdst = NULL;
>       struct vxlan_fdb *f;
> +     __be32 vni = 0;
>  
>       info = skb_tunnel_info(skb);
>  
>       skb_reset_mac_header(skb);
>  
>       if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
> -             if (info && info->mode & IP_TUNNEL_INFO_TX)
> -                     vxlan_xmit_one(skb, dev, NULL, false);
> -             else
> -                     kfree_skb(skb);
> -             return NETDEV_TX_OK;
> +             if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
> +                 info->mode & IP_TUNNEL_INFO_TX) {

nit: parentheses around the IP_TUNNEL_INFO_TX check

> +                     vni = tunnel_id_to_key32(info->key.tun_id);
> +             } else {
> +                     if (info && info->mode & IP_TUNNEL_INFO_TX)

nit: parentheses around the IP_TUNNEL_INFO_TX check

> +                             vxlan_xmit_one(skb, dev, vni, NULL, false);
> +                     else
> +                             kfree_skb(skb);
> +                     return NETDEV_TX_OK;
> +             }
>       }
>  
>       if (vxlan->flags & VXLAN_F_PROXY) {
>               eth = eth_hdr(skb);
>               if (ntohs(eth->h_proto) == ETH_P_ARP)
> -                     return arp_reduce(dev, skb);
> +                     return arp_reduce(dev, skb, vni);
>  #if IS_ENABLED(CONFIG_IPV6)
>               else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
>                        pskb_may_pull(skb, sizeof(struct ipv6hdr)
> @@ -2200,13 +2254,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, 
> struct net_device *dev)
>                               msg = (struct nd_msg 
> *)skb_transport_header(skb);
>                               if (msg->icmph.icmp6_code == 0 &&
>                                   msg->icmph.icmp6_type == 
> NDISC_NEIGHBOUR_SOLICITATION)
> -                                     return neigh_reduce(dev, skb);
> +                                     return neigh_reduce(dev, skb, vni);
>               }
>  #endif
>       }
>  
>       eth = eth_hdr(skb);
> -     f = vxlan_find_mac(vxlan, eth->h_dest);
> +     f = vxlan_find_mac(vxlan, eth->h_dest, vni);
>       did_rsc = false;
>  
>       if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
> @@ -2214,11 +2268,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, 
> struct net_device *dev)
>            ntohs(eth->h_proto) == ETH_P_IPV6)) {
>               did_rsc = route_shortcircuit(dev, skb);
>               if (did_rsc)
> -                     f = vxlan_find_mac(vxlan, eth->h_dest);
> +                     f = vxlan_find_mac(vxlan, eth->h_dest, vni);
>       }
>  
>       if (f == NULL) {
> -             f = vxlan_find_mac(vxlan, all_zeros_mac);
> +             f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
>               if (f == NULL) {
>                       if ((vxlan->flags & VXLAN_F_L2MISS) &&
>                           !is_multicast_ether_addr(eth->h_dest))
> @@ -2239,11 +2293,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, 
> struct net_device *dev)
>               }
>               skb1 = skb_clone(skb, GFP_ATOMIC);
>               if (skb1)
> -                     vxlan_xmit_one(skb1, dev, rdst, did_rsc);
> +                     vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
>       }
>  
>       if (fdst)
> -             vxlan_xmit_one(skb, dev, fdst, did_rsc);
> +             vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
>       else
>               kfree_skb(skb);
>       return NETDEV_TX_OK;
> @@ -2307,12 +2361,12 @@ static int vxlan_init(struct net_device *dev)
>       return 0;
>  }
>  
> -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
> +static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
>  {
>       struct vxlan_fdb *f;
>  
>       spin_lock_bh(&vxlan->hash_lock);
> -     f = __vxlan_find_mac(vxlan, all_zeros_mac);
> +     f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
>       if (f)
>               vxlan_fdb_destroy(vxlan, f);
>       spin_unlock_bh(&vxlan->hash_lock);
> @@ -2322,7 +2376,7 @@ static void vxlan_uninit(struct net_device *dev)
>  {
>       struct vxlan_dev *vxlan = netdev_priv(dev);
>  
> -     vxlan_fdb_delete_default(vxlan);
> +     vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
>  
>       free_percpu(dev->tstats);
>  }
> @@ -2536,6 +2590,8 @@ static void vxlan_setup(struct net_device *dev)
>       dev->vlan_features = dev->features;
>       dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
>       dev->hw_features |= NETIF_F_GSO_SOFTWARE;
> +     dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
> +     dev->features |= dev->hw_features;
>       netif_keep_dst(dev);
>       dev->priv_flags |= IFF_NO_QUEUE;
>  
> @@ -2921,6 +2977,7 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>                                      NLM_F_EXCL|NLM_F_CREATE,
>                                      vxlan->cfg.dst_port,
>                                      vxlan->default_dst.remote_vni,
> +                                    vxlan->default_dst.remote_vni,
>                                      vxlan->default_dst.remote_ifindex,
>                                      NTF_SELF);
>               if (err)
> @@ -2929,7 +2986,7 @@ static int vxlan_dev_configure(struct net *src_net, 
> struct net_device *dev,
>  
>       err = register_netdevice(dev);
>       if (err) {
> -             vxlan_fdb_delete_default(vxlan);
> +             vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
>               return err;
>       }
>  
> @@ -3023,19 +3080,19 @@ static int vxlan_newlink(struct net *src_net, struct 
> net_device *dev,
>               conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
>  
>       if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
> -         nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
> +         !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
>               conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
>  
>       if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
> -         nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
> +         !nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
>               conf.flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
>  
>       if (data[IFLA_VXLAN_REMCSUM_TX] &&
> -         nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
> +         !nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
>               conf.flags |= VXLAN_F_REMCSUM_TX;
>  
>       if (data[IFLA_VXLAN_REMCSUM_RX] &&
> -         nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
> +         !nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
>               conf.flags |= VXLAN_F_REMCSUM_RX;

Aren't these going to break user-space ? 

>  
>       if (data[IFLA_VXLAN_GBP])
> diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
> index bd99a8d..f3d16db 100644
> --- a/include/uapi/linux/neighbour.h
> +++ b/include/uapi/linux/neighbour.h
> @@ -26,6 +26,7 @@ enum {
>       NDA_IFINDEX,
>       NDA_MASTER,
>       NDA_LINK_NETNSID,
> +     NDA_SRC_VNI,
>       __NDA_MAX
>  };
>  
> 

Reply via email to