From: Toke Høiland-Jørgensen <t...@redhat.com>

The bpf_fib_lookup() helper performs a neighbour lookup for the destination
IP and returns BPF_FIB_LKUP_NO_NEIGH if this fails, with the expectation
that the BPF program will deal with this condition, either by passing the
packet up the stack, or by using bpf_redirect_neigh().

The neighbour lookup is done via a hash table (through ___neigh_lookup_noref()),
which incurs some overhead. If the caller knows this is likely to fail
anyway, it may want to skip that and go unconditionally to
bpf_redirect_neigh(). For this use case, add a flag to bpf_fib_lookup()
that will make it skip the neighbour lookup and instead always return
BPF_FIB_LKUP_RET_NO_NEIGH (but still populate the gateway and target
ifindex).

Signed-off-by: Toke Høiland-Jørgensen <t...@redhat.com>
---
 include/uapi/linux/bpf.h       |   10 ++++++----
 net/core/filter.c              |   16 ++++++++++++++--
 tools/include/uapi/linux/bpf.h |   10 ++++++----
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9668cde9d684..4bfd3c72dae6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args {
        __u64 args[0];
 };
 
-/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
- * OUTPUT:  Do lookup from egress perspective; default is ingress
+/* DIRECT:      Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:      Do lookup from egress perspective; default is ingress
+ * SKIP_NEIGH:  Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on 
success
  */
 enum {
-       BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
-       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+       BPF_FIB_LOOKUP_DIRECT     = (1U << 0),
+       BPF_FIB_LOOKUP_OUTPUT     = (1U << 1),
+       BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index fa09b4f141ae..9791e6311afa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5382,6 +5382,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct 
bpf_fib_lookup *params,
                if (nhc->nhc_gw_family)
                        params->ipv4_dst = nhc->nhc_gw.ipv4;
 
+               if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+                       return BPF_FIB_LKUP_RET_NO_NEIGH;
+
                neigh = __ipv4_neigh_lookup_noref(dev,
                                                 (__force u32)params->ipv4_dst);
        } else {
@@ -5389,6 +5392,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct 
bpf_fib_lookup *params,
 
                params->family = AF_INET6;
                *dst = nhc->nhc_gw.ipv6;
+
+               if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+                       return BPF_FIB_LKUP_RET_NO_NEIGH;
+
                neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
        }
 
@@ -5501,6 +5508,9 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct 
bpf_fib_lookup *params,
        params->rt_metric = res.f6i->fib6_metric;
        params->ifindex = dev->ifindex;
 
+       if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+               return BPF_FIB_LKUP_RET_NO_NEIGH;
+
        /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
         * not needed here.
         */
@@ -5518,7 +5528,8 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
        if (plen < sizeof(*params))
                return -EINVAL;
 
-       if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+       if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT |
+                     BPF_FIB_LOOKUP_SKIP_NEIGH))
                return -EINVAL;
 
        switch (params->family) {
@@ -5555,7 +5566,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
        if (plen < sizeof(*params))
                return -EINVAL;
 
-       if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+       if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT |
+                     BPF_FIB_LOOKUP_SKIP_NEIGH))
                return -EINVAL;
 
        switch (params->family) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9668cde9d684..4bfd3c72dae6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args {
        __u64 args[0];
 };
 
-/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
- * OUTPUT:  Do lookup from egress perspective; default is ingress
+/* DIRECT:      Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:      Do lookup from egress perspective; default is ingress
+ * SKIP_NEIGH:  Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on 
success
  */
 enum {
-       BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
-       BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+       BPF_FIB_LOOKUP_DIRECT     = (1U << 0),
+       BPF_FIB_LOOKUP_OUTPUT     = (1U << 1),
+       BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 };
 
 enum {

Reply via email to