On 9/28/20 7:38 AM, Daniel Borkmann wrote:
> diff --git a/net/core/filter.c b/net/core/filter.c
> index a0776e48dcc9..64c6e5ec97d7 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device 
> *dev)
> +{
> +     const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +     struct net *net = dev_net(dev);
> +     int err, ret = NET_XMIT_DROP;
> +     struct flowi6 fl6 = {
> +             .flowi6_flags   = FLOWI_FLAG_ANYSRC,
> +             .flowi6_mark    = skb->mark,
> +             .flowlabel      = ip6_flowinfo(ip6h),
> +             .flowi6_proto   = ip6h->nexthdr,
> +             .flowi6_oif     = dev->ifindex,
> +             .daddr          = ip6h->daddr,
> +             .saddr          = ip6h->saddr,
> +     };
> +     struct dst_entry *dst;
> +
> +     skb->dev = dev;

this is not needed here. You set dev in bpf_out_neigh_v6 to the dst dev.
Everything else is an error path where the skb is dropped.


> +     skb->tstamp = 0;
> +
> +     dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> +     if (IS_ERR(dst))
> +             goto out_drop;
> +
> +     skb_dst_set(skb, dst);
> +
> +     err = bpf_out_neigh_v6(net, skb);
> +     if (unlikely(net_xmit_eval(err)))
> +             dev->stats.tx_errors++;
> +     else
> +             ret = NET_XMIT_SUCCESS;
> +     goto out_xmit;
> +out_drop:
> +     dev->stats.tx_errors++;
> +     kfree_skb(skb);
> +out_xmit:
> +     return ret;
> +}
> +#else
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device 
> *dev)
> +{
> +     kfree_skb(skb);
> +     return NET_XMIT_DROP;
> +}
> +#endif /* CONFIG_IPV6 */
> +
> +#if IS_ENABLED(CONFIG_INET)
> +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
> +{
> +     struct dst_entry *dst = skb_dst(skb);
> +     struct rtable *rt = container_of(dst, struct rtable, dst);
> +     struct net_device *dev = dst->dev;
> +     u32 hh_len = LL_RESERVED_SPACE(dev);
> +     struct neighbour *neigh;
> +     bool is_v6gw = false;
> +
> +     if (dev_xmit_recursion())
> +             goto out_rec;
> +     if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {

Why is this check needed for v4 but not v6?

> +             struct sk_buff *skb2;
> +
> +             skb2 = skb_realloc_headroom(skb, hh_len);
> +             if (!skb2) {
> +                     kfree_skb(skb);
> +                     return -ENOMEM;
> +             }
> +             if (skb->sk)
> +                     skb_set_owner_w(skb2, skb->sk);
> +             consume_skb(skb);
> +             skb = skb2;
> +     }
> +     rcu_read_lock_bh();
> +     neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
> +     if (likely(!IS_ERR(neigh))) {
> +             int ret;
> +
> +             sock_confirm_neigh(skb, neigh);
> +             dev_xmit_recursion_inc();
> +             ret = neigh_output(neigh, skb, is_v6gw);
> +             dev_xmit_recursion_dec();
> +             rcu_read_unlock_bh();
> +             return ret;
> +     }
> +     rcu_read_unlock_bh();
> +out_drop:
> +     kfree_skb(skb);
> +     return -EINVAL;
> +out_rec:
> +     net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy 
> bpf program?\n");
> +     goto out_drop;
> +}
> +
> +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device 
> *dev)
> +{
> +     const struct iphdr *ip4h = ip_hdr(skb);
> +     struct net *net = dev_net(dev);
> +     int err, ret = NET_XMIT_DROP;
> +     struct flowi4 fl4 = {
> +             .flowi4_flags   = FLOWI_FLAG_ANYSRC,
> +             .flowi4_mark    = skb->mark,
> +             .flowi4_tos     = RT_TOS(ip4h->tos),

set flowi4_proto here.

> +             .flowi4_oif     = dev->ifindex,
> +             .daddr          = ip4h->daddr,
> +             .saddr          = ip4h->saddr,
> +     };
> +     struct rtable *rt;
> +
> +     skb->dev = dev;
> +     skb->tstamp = 0;
> +
> +     rt = ip_route_output_flow(net, &fl4, NULL);
> +     if (IS_ERR(rt))
> +             goto out_drop;
> +     if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
> +             ip_rt_put(rt);
> +             goto out_drop;
> +     }
> +
> +     skb_dst_set(skb, &rt->dst);
> +
> +     err = bpf_out_neigh_v4(net, skb);
> +     if (unlikely(net_xmit_eval(err)))
> +             dev->stats.tx_errors++;
> +     else
> +             ret = NET_XMIT_SUCCESS;
> +     goto out_xmit;
> +out_drop:
> +     dev->stats.tx_errors++;
> +     kfree_skb(skb);
> +out_xmit:
> +     return ret;
> +}

Reply via email to