Following ipv4 stack changes, run a BPF program attached to netns before
looking up a listening socket. Program can return a listening socket to use
as result of socket lookup, fail the lookup, or take no action.

Suggested-by: Marek Majkowski <ma...@cloudflare.com>
Signed-off-by: Jakub Sitnicki <ja...@cloudflare.com>
---

Notes:
    v5:
    - Simplify prog runners now that only SK_DROP/PASS can be returned.
    
    v4:
    - Adapt to changes in BPF prog return codes.
    - Invert return value from bpf_sk_lookup_run_v6 to true on skip reuseport.
    
    v3:
    - Use a static_key to minimize the hook overhead when not used. (Alexei)
    - Don't copy struct in6_addr when populating BPF prog context. (Martin)
    - Adapt for running an array of attached programs. (Alexei)
    - Adapt for optionally skipping reuseport selection. (Martin)

 include/linux/filter.h      | 39 +++++++++++++++++++++++++++++++++++++
 net/ipv6/inet6_hashtables.c | 35 +++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c4f54c216347..8252572db918 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1386,4 +1386,43 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, 
int protocol,
        return no_reuseport;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
+                                       const struct in6_addr *saddr,
+                                       const __be16 sport,
+                                       const struct in6_addr *daddr,
+                                       const u16 dport,
+                                       struct sock **psk)
+{
+       struct bpf_prog_array *run_array;
+       struct sock *selected_sk = NULL;
+       bool no_reuseport = false;
+
+       rcu_read_lock();
+       run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+       if (run_array) {
+               struct bpf_sk_lookup_kern ctx = {
+                       .family         = AF_INET6,
+                       .protocol       = protocol,
+                       .v6.saddr       = saddr,
+                       .v6.daddr       = daddr,
+                       .sport          = sport,
+                       .dport          = dport,
+               };
+               u32 act;
+
+               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, 
BPF_PROG_RUN);
+               if (act == SK_PASS) {
+                       selected_sk = ctx.selected_sk;
+                       no_reuseport = ctx.no_reuseport;
+               } else {
+                       selected_sk = ERR_PTR(-ECONNREFUSED);
+               }
+       }
+       rcu_read_unlock();
+       *psk = selected_sk;
+       return no_reuseport;
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 03942eef8ab6..2d3add9e6116 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,8 @@
 #include <net/ip.h>
 #include <net/sock_reuseport.h>
 
+extern struct inet_hashinfo tcp_hashinfo;
+
 u32 inet6_ehashfn(const struct net *net,
                  const struct in6_addr *laddr, const u16 lport,
                  const struct in6_addr *faddr, const __be16 fport)
@@ -159,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
        return result;
 }
 
+static inline struct sock *inet6_lookup_run_bpf(struct net *net,
+                                               struct inet_hashinfo *hashinfo,
+                                               struct sk_buff *skb, int doff,
+                                               const struct in6_addr *saddr,
+                                               const __be16 sport,
+                                               const struct in6_addr *daddr,
+                                               const u16 hnum)
+{
+       struct sock *sk, *reuse_sk;
+       bool no_reuseport;
+
+       if (hashinfo != &tcp_hashinfo)
+               return NULL; /* only TCP is supported */
+
+       no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
+                                           saddr, sport, daddr, hnum, &sk);
+       if (no_reuseport || IS_ERR_OR_NULL(sk))
+               return sk;
+
+       reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, 
hnum);
+       if (reuse_sk)
+               sk = reuse_sk;
+       return sk;
+}
+
 struct sock *inet6_lookup_listener(struct net *net,
                struct inet_hashinfo *hashinfo,
                struct sk_buff *skb, int doff,
@@ -170,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
        struct sock *result = NULL;
        unsigned int hash2;
 
+       /* Lookup redirect from BPF */
+       if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+               result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
+                                             saddr, sport, daddr, hnum);
+               if (result)
+                       goto done;
+       }
+
        hash2 = ipv6_portaddr_hash(net, daddr, hnum);
        ilb2 = inet_lhash2_bucket(hashinfo, hash2);
 
-- 
2.25.4

Reply via email to