Flip the IPv6 output path from use of the vrf dst to the l3mdev tx out
hook.

Signed-off-by: David Ahern <d...@cumulusnetworks.com>
---
 drivers/net/vrf.c     | 156 ++++++++++++++++++++------------------------------
 net/ipv6/ip6_output.c |   9 ++-
 net/ipv6/route.c      |   5 --
 3 files changed, 70 insertions(+), 100 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 7517645347c3..df58bc791cfd 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -140,80 +140,42 @@ static int vrf_local_xmit(struct sk_buff *skb, struct 
net_device *dev,
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
                                           struct net_device *dev)
 {
-       const struct ipv6hdr *iph = ipv6_hdr(skb);
-       struct net *net = dev_net(skb->dev);
-       struct flowi6 fl6 = {
-               /* needed to match OIF rule */
-               .flowi6_oif = dev->ifindex,
-               .flowi6_iif = LOOPBACK_IFINDEX,
-               .daddr = iph->daddr,
-               .saddr = iph->saddr,
-               .flowlabel = ip6_flowinfo(iph),
-               .flowi6_mark = skb->mark,
-               .flowi6_proto = iph->nexthdr,
-               .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
-       };
-       int ret = NET_XMIT_DROP;
-       struct dst_entry *dst;
-       struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
-
-       dst = ip6_route_output(net, NULL, &fl6);
-       if (dst == dst_null)
-               goto err;
+       struct net_vrf *vrf = netdev_priv(dev);
+       struct dst_entry *dst = NULL;
+       struct rt6_info *rt6_local;
 
        skb_dst_drop(skb);
 
-       /* if dst.dev is loopback or the VRF device again this is locally
-        * originated traffic destined to a local address. Short circuit
-        * to Rx path using our local dst
-        */
-       if (dst->dev == net->loopback_dev || dst->dev == dev) {
-               struct net_vrf *vrf = netdev_priv(dev);
-               struct rt6_info *rt6_local;
-
-               /* release looked up dst and use cached local dst */
-               dst_release(dst);
+       rcu_read_lock();
 
-               rcu_read_lock();
+       rt6_local = rcu_dereference(vrf->rt6_local);
+       if (unlikely(!rt6_local)) {
+               rcu_read_unlock();
+               goto err;
+       }
 
-               rt6_local = rcu_dereference(vrf->rt6_local);
-               if (unlikely(!rt6_local)) {
+       /* Ordering issue: cached local dst is created on newlink
+        * before the IPv6 initialization. Using the local dst
+        * requires rt6i_idev to be set so make sure it is.
+        */
+       if (unlikely(!rt6_local->rt6i_idev)) {
+               rt6_local->rt6i_idev = in6_dev_get(dev);
+               if (!rt6_local->rt6i_idev) {
                        rcu_read_unlock();
                        goto err;
                }
-
-               /* Ordering issue: cached local dst is created on newlink
-                * before the IPv6 initialization. Using the local dst
-                * requires rt6i_idev to be set so make sure it is.
-                */
-               if (unlikely(!rt6_local->rt6i_idev)) {
-                       rt6_local->rt6i_idev = in6_dev_get(dev);
-                       if (!rt6_local->rt6i_idev) {
-                               rcu_read_unlock();
-                               goto err;
-                       }
-               }
-
-               dst = &rt6_local->dst;
-               dst_hold(dst);
-
-               rcu_read_unlock();
-
-               return vrf_local_xmit(skb, dev, &rt6_local->dst);
        }
 
-       skb_dst_set(skb, dst);
+       dst = &rt6_local->dst;
+       if (likely(dst))
+               dst_hold(dst);
 
-       /* strip the ethernet header added for pass through VRF device */
-       __skb_pull(skb, skb_network_offset(skb));
+       rcu_read_unlock();
 
-       ret = ip6_local_out(net, skb->sk, skb);
-       if (unlikely(net_xmit_eval(ret)))
-               dev->stats.tx_errors++;
-       else
-               ret = NET_XMIT_SUCCESS;
+       if (unlikely(!dst))
+               goto err;
 
-       return ret;
+       return vrf_local_xmit(skb, dev, dst);
 err:
        vrf_tx_error(dev, skb);
        return NET_XMIT_DROP;
@@ -286,44 +248,43 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct 
net_device *dev)
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-/* modelled after ip6_finish_output2 */
-static int vrf_finish_output6(struct net *net, struct sock *sk,
-                             struct sk_buff *skb)
-{
-       struct dst_entry *dst = skb_dst(skb);
-       struct net_device *dev = dst->dev;
-       struct neighbour *neigh;
-       struct in6_addr *nexthop;
-       int ret;
+static int vrf_finish_output(struct net *net, struct sock *sk,
+                            struct sk_buff *skb);
 
+/* modelled after ip6_output */
+static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
        skb->protocol = htons(ETH_P_IPV6);
-       skb->dev = dev;
-
-       rcu_read_lock_bh();
-       nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
-       neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
-       if (unlikely(!neigh))
-               neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
-       if (!IS_ERR(neigh)) {
-               ret = dst_neigh_output(dst, neigh, skb);
-               rcu_read_unlock_bh();
-               return ret;
-       }
-       rcu_read_unlock_bh();
 
-       IP6_INC_STATS(dev_net(dst->dev),
-                     ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
-       kfree_skb(skb);
-       return -EINVAL;
+       return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+                           net, sk, skb, NULL, skb->dev,
+                           vrf_finish_output,
+                           !(IPCB(skb)->flags & IP6SKB_REROUTED));
 }
 
-/* modelled after ip6_output */
-static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
 {
-       return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
-                           net, sk, skb, NULL, skb_dst(skb)->dev,
-                           vrf_finish_output6,
-                           !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+       struct net *net = dev_net(vrf_dev);
+       struct net_device *dev = skb->dev;
+       int err;
+
+       skb->dev = vrf_dev;
+
+       err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, vrf_dev, vrf_output6);
+       if (likely(err == 1))
+               err = vrf_output6(net, sk, skb);
+
+       if (likely(err == 1)) {
+               skb->dev = dev;
+               nf_reset(skb);
+       } else {
+               skb = NULL;
+       }
+
+       return skb;
 }
 
 /* holding rtnl */
@@ -412,6 +373,13 @@ static int vrf_rt6_create(struct net_device *dev)
        return rc;
 }
 #else
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       return skb;
+}
+
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 }
@@ -482,6 +450,8 @@ static struct sk_buff *vrf_l3_out(struct net_device 
*vrf_dev,
        switch (proto) {
        case AF_INET:
                return vrf_ip_out(vrf_dev, sk, skb);
+       case AF_INET6:
+               return vrf_ip6_out(vrf_dev, sk, skb);
        }
 
        return skb;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bcec7e73eb0b..9711f32eedd7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1054,10 +1054,15 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 
*fl6,
                                      const struct in6_addr *final_dst)
 {
+       struct net *net = sock_net(sk);
        struct dst_entry *dst = NULL;
        int err;
 
-       err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
+       if (rt6_need_strict(&fl6->daddr) &&
+           netif_index_is_l3_master(net, fl6->flowi6_oif))
+               return ERR_PTR(-ENETUNREACH);
+
+       err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
        if (err)
                return ERR_PTR(err);
        if (final_dst)
@@ -1065,7 +1070,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock 
*sk, struct flowi6 *fl6,
        if (!fl6->flowi6_oif)
                fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
 
-       return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 
0);
+       return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
 }
 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4a0f77aa49cf..65ee42ad2afd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1188,13 +1188,8 @@ static struct rt6_info *ip6_pol_route_output(struct net 
*net, struct fib6_table
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock 
*sk,
                                         struct flowi6 *fl6, int flags)
 {
-       struct dst_entry *dst;
        bool any_src;
 
-       dst = l3mdev_get_rt6_dst(net, fl6);
-       if (dst)
-               return dst;
-
        fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
        any_src = ipv6_addr_any(&fl6->saddr);
-- 
2.1.4

Reply via email to