On 10/15/20 5:46 PM, Toke Høiland-Jørgensen wrote:
[...]
+struct bpf_redir_neigh {
+       /* network family for lookup (AF_INET, AF_INET6)
+        */
+       __u8    nh_family;
+       /* network address of nexthop; skips fib lookup to find gateway */
+       union {
+               __be32          ipv4_nh;
+               __u32           ipv6_nh[4];  /* in6_addr; network order */
+       };
+};
+
  enum bpf_task_fd_type {
        BPF_FD_TYPE_RAW_TRACEPOINT,     /* tp name */
        BPF_FD_TYPE_TRACEPOINT,         /* tp name */
diff --git a/net/core/filter.c b/net/core/filter.c
index c5e2a1c5fd8d..d073031a3a61 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2165,12 +2165,11 @@ static int __bpf_redirect(struct sk_buff *skb, struct 
net_device *dev,
  }
#if IS_ENABLED(CONFIG_IPV6)
-static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
+                           struct net_device *dev, const struct in6_addr 
*nexthop)
  {
-       struct dst_entry *dst = skb_dst(skb);
-       struct net_device *dev = dst->dev;
        u32 hh_len = LL_RESERVED_SPACE(dev);
-       const struct in6_addr *nexthop;
+       struct dst_entry *dst = NULL;
        struct neighbour *neigh;
if (dev_xmit_recursion()) {
@@ -2196,8 +2195,11 @@ static int bpf_out_neigh_v6(struct net *net, struct 
sk_buff *skb)
        }
rcu_read_lock_bh();
-       nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
-                             &ipv6_hdr(skb)->daddr);
+       if (!nexthop) {
+               dst = skb_dst(skb);
+               nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+                                     &ipv6_hdr(skb)->daddr);
+       }
        neigh = ip_neigh_gw6(dev, nexthop);
        if (likely(!IS_ERR(neigh))) {
                int ret;
@@ -2210,36 +2212,46 @@ static int bpf_out_neigh_v6(struct net *net, struct 
sk_buff *skb)
                return ret;
        }
        rcu_read_unlock_bh();
-       IP6_INC_STATS(dev_net(dst->dev),
-                     ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       if (dst)
+               IP6_INC_STATS(dev_net(dst->dev),
+                             ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  out_drop:
        kfree_skb(skb);
        return -ENETDOWN;
  }
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+                                  struct bpf_nh_params *nh)
  {
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       struct in6_addr *nexthop = NULL;
        struct net *net = dev_net(dev);
        int err, ret = NET_XMIT_DROP;
-       struct dst_entry *dst;
-       struct flowi6 fl6 = {
-               .flowi6_flags   = FLOWI_FLAG_ANYSRC,
-               .flowi6_mark    = skb->mark,
-               .flowlabel      = ip6_flowinfo(ip6h),
-               .flowi6_oif     = dev->ifindex,
-               .flowi6_proto   = ip6h->nexthdr,
-               .daddr          = ip6h->daddr,
-               .saddr          = ip6h->saddr,
-       };
- dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
-       if (IS_ERR(dst))
-               goto out_drop;
+       if (!nh->nh_family) {
+               struct dst_entry *dst;
+               struct flowi6 fl6 = {
+                       .flowi6_flags = FLOWI_FLAG_ANYSRC,
+                       .flowi6_mark = skb->mark,
+                       .flowlabel = ip6_flowinfo(ip6h),
+                       .flowi6_oif = dev->ifindex,
+                       .flowi6_proto = ip6h->nexthdr,
+                       .daddr = ip6h->daddr,
+                       .saddr = ip6h->saddr,

nit: Would be good for readability to keep the previous whitespace alignment 
intact.

+               };
+
+               dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+               if (IS_ERR(dst))
+                       goto out_drop;
- skb_dst_set(skb, dst);
+               skb_dst_set(skb, dst);
+       } else if (nh->nh_family == AF_INET6) {
+               nexthop = &nh->ipv6_nh;
+       } else {
+               goto out_drop;
+       }
- err = bpf_out_neigh_v6(net, skb);
+       err = bpf_out_neigh_v6(net, skb, dev, nexthop);

I'd probably model the bpf_out_neigh_v{4,6}() as close as possible similar to 
each other in terms
of args we pass etc. In the v6 case you pass the nexthop in6_addr directly 
whereas v4 passes
bpf_nh_params, I'd probably also stick to the latter for v6 to keep it 
symmetric.

        if (unlikely(net_xmit_eval(err)))
                dev->stats.tx_errors++;
        else
@@ -2260,11 +2272,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, 
struct net_device *dev)
  #endif /* CONFIG_IPV6 */
#if IS_ENABLED(CONFIG_INET)
-static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
+                           struct net_device *dev, struct bpf_nh_params *nh)
  {
-       struct dst_entry *dst = skb_dst(skb);
-       struct rtable *rt = container_of(dst, struct rtable, dst);
-       struct net_device *dev = dst->dev;
        u32 hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
        bool is_v6gw = false;
@@ -2292,7 +2302,20 @@ static int bpf_out_neigh_v4(struct net *net, struct 
sk_buff *skb)
        }
rcu_read_lock_bh();
-       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+       if (!nh) {
+               struct dst_entry *dst = skb_dst(skb);
+               struct rtable *rt = container_of(dst, struct rtable, dst);
+
+               neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+       } else if (nh->nh_family == AF_INET6) {
+               neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
+               is_v6gw = true;
+       } else if (nh->nh_family == AF_INET) {
+               neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
+       } else {
+               goto out_drop;
+       }
+
        if (likely(!IS_ERR(neigh))) {
                int ret;

Reply via email to