Daniel Borkmann <dan...@iogearbox.net> writes:

> On 10/15/20 5:46 PM, Toke Høiland-Jørgensen wrote:
>> From: Toke Høiland-Jørgensen <t...@redhat.com>
>> 
>> Based on the discussion in [0], update the bpf_redirect_neigh() helper to
>> accept an optional parameter specifying the nexthop information. This makes
>> it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without
>> incurring a duplicate FIB lookup - since the FIB lookup helper will return
>> the nexthop information even if no neighbour is present, this can simply be
>> passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns
>> BPF_FIB_LKUP_RET_NO_NEIGH.
>> 
>> [0] 
>> https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63...@iogearbox.net/
>> 
>> Signed-off-by: Toke Høiland-Jørgensen <t...@redhat.com>
>
> Overall looks good from what I can tell, just small nits below on top of
> David's feedback:
>
> [...]
>> -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device 
>> *dev)
>> +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device 
>> *dev,
>> +                               struct bpf_nh_params *nh)
>>   {
>>      const struct iphdr *ip4h = ip_hdr(skb);
>>      struct net *net = dev_net(dev);
>>      int err, ret = NET_XMIT_DROP;
>> -    struct rtable *rt;
>> -    struct flowi4 fl4 = {
>> -            .flowi4_flags   = FLOWI_FLAG_ANYSRC,
>> -            .flowi4_mark    = skb->mark,
>> -            .flowi4_tos     = RT_TOS(ip4h->tos),
>> -            .flowi4_oif     = dev->ifindex,
>> -            .flowi4_proto   = ip4h->protocol,
>> -            .daddr          = ip4h->daddr,
>> -            .saddr          = ip4h->saddr,
>> -    };
>>   
>> -    rt = ip_route_output_flow(net, &fl4, NULL);
>> -    if (IS_ERR(rt))
>> -            goto out_drop;
>> -    if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
>> -            ip_rt_put(rt);
>> -            goto out_drop;
>> -    }
>> +    if (!nh->nh_family) {
>> +            struct rtable *rt;
>> +            struct flowi4 fl4 = {
>> +                    .flowi4_flags = FLOWI_FLAG_ANYSRC,
>> +                    .flowi4_mark = skb->mark,
>> +                    .flowi4_tos = RT_TOS(ip4h->tos),
>> +                    .flowi4_oif = dev->ifindex,
>> +                    .flowi4_proto = ip4h->protocol,
>> +                    .daddr = ip4h->daddr,
>> +                    .saddr = ip4h->saddr,
>> +            };
>> +
>> +            rt = ip_route_output_flow(net, &fl4, NULL);
>> +            if (IS_ERR(rt))
>> +                    goto out_drop;
>> +            if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
>> +                    ip_rt_put(rt);
>> +                    goto out_drop;
>> +            }
>>   
>> -    skb_dst_set(skb, &rt->dst);
>> +            skb_dst_set(skb, &rt->dst);
>> +            nh = NULL;
>> +    }
>>   
>> -    err = bpf_out_neigh_v4(net, skb);
>> +    err = bpf_out_neigh_v4(net, skb, dev, nh);
>>      if (unlikely(net_xmit_eval(err)))
>>              dev->stats.tx_errors++;
>>      else
>> @@ -2355,7 +2383,8 @@ static int __bpf_redirect_neigh_v4(struct sk_buff 
>> *skb, struct net_device *dev)
>>   }
>>   #endif /* CONFIG_INET */
>>   
>> -static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
>> +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
>> +                            struct bpf_nh_params *nh)
>>   {
>>      struct ethhdr *ethh = eth_hdr(skb);
>>   
>> @@ -2370,9 +2399,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, 
>> struct net_device *dev)
>>      skb_reset_network_header(skb);
>>   
>>      if (skb->protocol == htons(ETH_P_IP))
>> -            return __bpf_redirect_neigh_v4(skb, dev);
>> +            return __bpf_redirect_neigh_v4(skb, dev, nh);
>>      else if (skb->protocol == htons(ETH_P_IPV6))
>> -            return __bpf_redirect_neigh_v6(skb, dev);
>> +            return __bpf_redirect_neigh_v6(skb, dev, nh);
>>   out:
>>      kfree_skb(skb);
>>      return -ENOTSUPP;
>> @@ -2455,8 +2484,8 @@ int skb_do_redirect(struct sk_buff *skb)
>>              return -EAGAIN;
>>      }
>>      return flags & BPF_F_NEIGH ?
>> -           __bpf_redirect_neigh(skb, dev) :
>> -           __bpf_redirect(skb, dev, flags);
>> +            __bpf_redirect_neigh(skb, dev, &ri->nh) :
>> +            __bpf_redirect(skb, dev, flags);
>>   out_drop:
>>      kfree_skb(skb);
>>      return -EINVAL;
>> @@ -2504,16 +2533,23 @@ static const struct bpf_func_proto 
>> bpf_redirect_peer_proto = {
>>      .arg2_type      = ARG_ANYTHING,
>>   };
>>   
>> -BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
>> +BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, 
>> params,
>> +       int, plen, u64, flags)
>>   {
>>      struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>>   
>> -    if (unlikely(flags))
>> +    if (unlikely((plen && plen < sizeof(*params)) || flags))
>>              return TC_ACT_SHOT;
>>   
>>      ri->flags = BPF_F_NEIGH;
>>      ri->tgt_index = ifindex;
>>   
>> +    BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct 
>> bpf_nh_params));
>> +    if (plen)
>> +            memcpy(&ri->nh, params, sizeof(ri->nh));
>> +    else
>> +            ri->nh.nh_family = 0; /* clear previous value */
>
> I'd probably just add an internal flag and do ...
>
>    ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
>
> ... instead of above clearing, and skb_do_redirect() then becomes:
>
>    __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? &ri->nh : NULL)
>
> ... which would then also avoid this !nh->nh_family check where you later on
> set nh = NULL to pass it onwards.

Ah yes, excellent idea! Will fix :)

-Toke

Reply via email to