On Tue, Sep 29, 2020 at 11:23:03PM +0200, Daniel Borkmann wrote:

[ ... ]

> ---
>  include/linux/skbuff.h         |   5 +
>  include/uapi/linux/bpf.h       |  14 ++
>  net/core/filter.c              | 273 +++++++++++++++++++++++++++++++--
>  tools/include/uapi/linux/bpf.h |  14 ++
>  4 files changed, 293 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 04a18e01b362..3d0cf3722bb4 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct 
> sk_buff *skb)
>       return skb->mac_header != (typeof(skb->mac_header))~0U;
>  }
>  
> +static inline void skb_unset_mac_header(struct sk_buff *skb)
> +{
> +     skb->mac_header = (typeof(skb->mac_header))~0U;
> +}
> +
>  static inline void skb_reset_mac_header(struct sk_buff *skb)
>  {
>       skb->mac_header = skb->data - skb->head;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6116a7f54c8f..1f17c6752deb 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3652,6 +3652,19 @@ union bpf_attr {
>   *           associated socket instead of the current process.
>   *   Return
>   *           The id is returned or 0 in case the id could not be retrieved.
> + *
> + * long bpf_redirect_neigh(u32 ifindex, u64 flags)
> + *   Description
> + *           Redirect the packet to another net device of index *ifindex*
> + *           and fill in L2 addresses from neighboring subsystem. This helper
> + *           is somewhat similar to **bpf_redirect**\ (), except that it
> + *           fills in e.g. MAC addresses based on the L3 information from
> + *           the packet. This helper is supported for IPv4 and IPv6 
> protocols.
> + *           The *flags* argument is reserved and must be 0. The helper is
> + *           currently only supported for tc BPF program types.
> + *   Return
> + *           The helper returns **TC_ACT_REDIRECT** on success or
> + *           **TC_ACT_SHOT** on error.
>   */
>  #define __BPF_FUNC_MAPPER(FN)                \
>       FN(unspec),                     \
> @@ -3806,6 +3819,7 @@ union bpf_attr {
>       FN(snprintf_btf),               \
>       FN(seq_printf_btf),             \
>       FN(skb_cgroup_classid),         \
> +     FN(redirect_neigh),             \
>       /* */
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index a0776e48dcc9..14b1534f6b46 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2163,6 +2163,222 @@ static int __bpf_redirect(struct sk_buff *skb, struct 
> net_device *dev,
>               return __bpf_redirect_no_mac(skb, dev, flags);
>  }
>  
> +#if IS_ENABLED(CONFIG_IPV6)
> +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> +{
> +     struct dst_entry *dst = skb_dst(skb);
> +     struct net_device *dev = dst->dev;
> +     u32 hh_len = LL_RESERVED_SPACE(dev);
> +     const struct in6_addr *nexthop;
> +     struct neighbour *neigh;
> +
> +     if (dev_xmit_recursion())
> +             goto out_rec;
> +
> +     skb->dev = dev;
> +     skb->tstamp = 0;
> +
> +     if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
> +             struct sk_buff *skb2;
> +
> +             skb2 = skb_realloc_headroom(skb, hh_len);
> +             if (!skb2) {
> +                     kfree_skb(skb);
> +                     return -ENOMEM;
> +             }
> +             if (skb->sk)
> +                     skb_set_owner_w(skb2, skb->sk);
> +             consume_skb(skb);
> +             skb = skb2;
> +     }
> +
> +     rcu_read_lock_bh();
> +     nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> +                           &ipv6_hdr(skb)->daddr);
> +     neigh = ip_neigh_gw6(dev, nexthop);
> +     if (likely(!IS_ERR(neigh))) {
> +             int ret;
> +
> +             sock_confirm_neigh(skb, neigh);
> +             dev_xmit_recursion_inc();
> +             ret = neigh_output(neigh, skb, false);
> +             dev_xmit_recursion_dec();
> +             rcu_read_unlock_bh();
> +             return ret;
> +     }
> +     rcu_read_unlock_bh();
> +     IP6_INC_STATS(dev_net(dst->dev),
> +                   ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> +out_drop:
> +     kfree_skb(skb);
> +     return -EINVAL;
> +out_rec:
> +     net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy 
> bpf program?\n");
> +     goto out_drop;
nit. may be log this at the earlier "if (dev_xmit_recursion)" and
then directly goto out_drop.

> +}
> +

[ ... ]

> +/* Internal, non-exposed redirect flags. */
> +enum {
> +     BPF_F_NEIGH = (1ULL << 1),
> +};
It will be useful to ensure the future "flags" of BPF_FUNC_redirect
will not overlap with this.  May be a BUILD_BUG_ON?

Others LGTM.

Acked-by: Martin KaFai Lau <ka...@fb.com>


>  
>  int skb_do_redirect(struct sk_buff *skb)
>  {
>       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>       struct net_device *dev;
> +     u32 flags = ri->flags;
>  
>       dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
>       ri->tgt_index = 0;
> @@ -2231,7 +2440,22 @@ int skb_do_redirect(struct sk_buff *skb)
>               return -EINVAL;
>       }
>  
> -     return __bpf_redirect(skb, dev, ri->flags);
> +     return flags & BPF_F_NEIGH ?
> +            __bpf_redirect_neigh(skb, dev) :
> +            __bpf_redirect(skb, dev, flags);
> +}

Reply via email to