On Fri, Nov 30, 2018 at 03:32:20PM -0800, Joe Stringer wrote: > + * the netns associated with the *ctx*. *netns* values beyond the > + * range of 32-bit integers are reserved for future use.
Would adding a word or two before "*netns*" here be helpful to placate the english pedants among us (such as myself)? Starting a sentence with a lowercase word, even if it's a variable name, just never really sits right with me. Any *netns* values ... Doesn't that kind of flow a bit better anyway? Anyway, now with that unimportant nitpick if off my chest, the rest looks fine to me. Ack with or without any changes to that comment. Acked-by: Joey Pabalinas <joeypabali...@gmail.com> On Fri, Nov 30, 2018 at 03:32:20PM -0800, Joe Stringer wrote: > David Ahern and Nicolas Dichtel report that the handling of the netns id > 0 is incorrect for the BPF socket lookup helpers: rather than finding > the netns with id 0, it is resolving to the current netns. This renders > the netns_id 0 inaccessible. > > To fix this, adjust the API for the netns to treat all negative s32 > values as a lookup in the current netns (including u64 values which when > truncated to s32 become negative), while any values with a positive > value in the signed 32-bit integer space would result in a lookup for a > socket in the netns corresponding to that id. As before, if the netns > with that ID does not exist, no socket will be found. Any netns outside > of these ranges will fail to find a corresponding socket, as those > values are reserved for future usage. > > Signed-off-by: Joe Stringer <j...@wand.net.nz> > Acked-by: Nicolas Dichtel <nicolas.dich...@6wind.com> > --- > include/uapi/linux/bpf.h | 35 ++++++++++------- > net/core/filter.c | 11 +++--- > tools/include/uapi/linux/bpf.h | 39 ++++++++++++------- > tools/testing/selftests/bpf/bpf_helpers.h | 4 +- > .../selftests/bpf/test_sk_lookup_kern.c | 18 ++++----- > 5 files changed, 63 insertions(+), 44 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 852dc17ab47a..ad68b472dad2 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2170,7 +2170,7 @@ union bpf_attr { > * Return > * 0 on success, or a negative error in case of failure. > * > - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u32 netns, u64 flags) > + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u64 netns, u64 flags) > * Description > * Look for TCP socket matching *tuple*, optionally in a child > * network namespace *netns*. The return value must be checked, > @@ -2187,12 +2187,14 @@ union bpf_attr { > * **sizeof**\ (*tuple*\ **->ipv6**) > * Look for an IPv6 socket. > * > - * If the *netns* is zero, then the socket lookup table in the > - * netns associated with the *ctx* will be used. For the TC hooks, > - * this in the netns of the device in the skb. For socket hooks, > - * this in the netns of the socket. If *netns* is non-zero, then > - * it specifies the ID of the netns relative to the netns > - * associated with the *ctx*. > + * If the *netns* is a negative signed 32-bit integer, then the > + * socket lookup table in the netns associated with the *ctx* will > + * will be used. For the TC hooks, this is the netns of the device > + * in the skb. For socket hooks, this is the netns of the socket. > + * If *netns* is any other signed 32-bit value greater than or > + * equal to zero then it specifies the ID of the netns relative to > + * the netns associated with the *ctx*. *netns* values beyond the > + * range of 32-bit integers are reserved for future use. > * > * All values for *flags* are reserved for future usage, and must > * be left at zero. > @@ -2202,7 +2204,7 @@ union bpf_attr { > * Return > * Pointer to *struct bpf_sock*, or NULL in case of failure. > * > - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u32 netns, u64 flags) > + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u64 netns, u64 flags) > * Description > * Look for UDP socket matching *tuple*, optionally in a child > * network namespace *netns*. The return value must be checked, > @@ -2219,12 +2221,14 @@ union bpf_attr { > * **sizeof**\ (*tuple*\ **->ipv6**) > * Look for an IPv6 socket. > * > - * If the *netns* is zero, then the socket lookup table in the > - * netns associated with the *ctx* will be used. For the TC hooks, > - * this in the netns of the device in the skb. For socket hooks, > - * this in the netns of the socket. If *netns* is non-zero, then > - * it specifies the ID of the netns relative to the netns > - * associated with the *ctx*. > + * If the *netns* is a negative signed 32-bit integer, then the > + * socket lookup table in the netns associated with the *ctx* will > + * will be used. For the TC hooks, this is the netns of the device > + * in the skb. For socket hooks, this is the netns of the socket. > + * If *netns* is any other signed 32-bit value greater than or > + * equal to zero then it specifies the ID of the netns relative to > + * the netns associated with the *ctx*. *netns* values beyond the > + * range of 32-bit integers are reserved for future use. > * > * All values for *flags* are reserved for future usage, and must > * be left at zero. > @@ -2405,6 +2409,9 @@ enum bpf_func_id { > /* BPF_FUNC_perf_event_output for sk_buff input context. */ > #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) > > +/* Current network namespace */ > +#define BPF_F_CURRENT_NETNS (-1L) > + > /* Mode for BPF_FUNC_skb_adjust_room helper. */ > enum bpf_adj_room_mode { > BPF_ADJ_ROOM_NET, > diff --git a/net/core/filter.c b/net/core/filter.c > index 9a1327eb25fa..d00b55929ab8 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct > bpf_sock_tuple *tuple, u32 len, > struct net *net; > > family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; > - if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) > + if (unlikely(family == AF_UNSPEC || flags || > + !((s32)netns_id < 0 || netns_id <= S32_MAX))) > goto out; > > if (skb->dev) > caller_net = dev_net(skb->dev); > else > caller_net = sock_net(skb->sk); > - if (netns_id) { > + if ((s32)netns_id < 0) { > + net = caller_net; > + sk = sk_lookup(net, tuple, skb, family, proto); > + } else { > net = get_net_ns_by_id(caller_net, netns_id); > if (unlikely(!net)) > goto out; > sk = sk_lookup(net, tuple, skb, family, proto); > put_net(net); > - } else { > - net = caller_net; > - sk = sk_lookup(net, tuple, skb, family, proto); > } > > if (sk) > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 852dc17ab47a..de2072ef475b 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -2170,7 +2170,7 @@ union bpf_attr { > * Return > * 0 on success, or a negative error in case of failure. > * > - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u32 netns, u64 flags) > + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u64 netns, u64 flags) > * Description > * Look for TCP socket matching *tuple*, optionally in a child > * network namespace *netns*. The return value must be checked, > @@ -2187,12 +2187,14 @@ union bpf_attr { > * **sizeof**\ (*tuple*\ **->ipv6**) > * Look for an IPv6 socket. > * > - * If the *netns* is zero, then the socket lookup table in the > - * netns associated with the *ctx* will be used. For the TC hooks, > - * this in the netns of the device in the skb. For socket hooks, > - * this in the netns of the socket. If *netns* is non-zero, then > - * it specifies the ID of the netns relative to the netns > - * associated with the *ctx*. > + * If the *netns* is a negative signed 32-bit integer, then the > + * socket lookup table in the netns associated with the *ctx* will > + * will be used. For the TC hooks, this is the netns of the device > + * in the skb. For socket hooks, this is the netns of the socket. > + * If *netns* is any other signed 32-bit value greater than or > + * equal to zero then it specifies the ID of the netns relative to > + * the netns associated with the *ctx*. *netns* values beyond the > + * range of 32-bit integers are reserved for future use. > * > * All values for *flags* are reserved for future usage, and must > * be left at zero. > @@ -2201,8 +2203,10 @@ union bpf_attr { > * **CONFIG_NET** configuration option. > * Return > * Pointer to *struct bpf_sock*, or NULL in case of failure. > + * For sockets with reuseport option, *struct bpf_sock* > + * return is from reuse->socks[] using hash of the packet. > * > - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u32 netns, u64 flags) > + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple > *tuple, u32 tuple_size, u64 netns, u64 flags) > * Description > * Look for UDP socket matching *tuple*, optionally in a child > * network namespace *netns*. The return value must be checked, > @@ -2219,12 +2223,14 @@ union bpf_attr { > * **sizeof**\ (*tuple*\ **->ipv6**) > * Look for an IPv6 socket. > * > - * If the *netns* is zero, then the socket lookup table in the > - * netns associated with the *ctx* will be used. For the TC hooks, > - * this in the netns of the device in the skb. For socket hooks, > - * this in the netns of the socket. If *netns* is non-zero, then > - * it specifies the ID of the netns relative to the netns > - * associated with the *ctx*. > + * If the *netns* is a negative signed 32-bit integer, then the > + * socket lookup table in the netns associated with the *ctx* will > + * will be used. For the TC hooks, this is the netns of the device > + * in the skb. For socket hooks, this is the netns of the socket. > + * If *netns* is any other signed 32-bit value greater than or > + * equal to zero then it specifies the ID of the netns relative to > + * the netns associated with the *ctx*. *netns* values beyond the > + * range of 32-bit integers are reserved for future use. > * > * All values for *flags* are reserved for future usage, and must > * be left at zero. > @@ -2233,6 +2239,8 @@ union bpf_attr { > * **CONFIG_NET** configuration option. > * Return > * Pointer to *struct bpf_sock*, or NULL in case of failure. > + * For sockets with reuseport option, *struct bpf_sock* > + * return is from reuse->socks[] using hash of the packet. > * > * int bpf_sk_release(struct bpf_sock *sk) > * Description > @@ -2405,6 +2413,9 @@ enum bpf_func_id { > /* BPF_FUNC_perf_event_output for sk_buff input context. */ > #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) > > +/* Current network namespace */ > +#define BPF_F_CURRENT_NETNS (-1L) > + > /* Mode for BPF_FUNC_skb_adjust_room helper. */ > enum bpf_adj_room_mode { > BPF_ADJ_ROOM_NET, > diff --git a/tools/testing/selftests/bpf/bpf_helpers.h > b/tools/testing/selftests/bpf/bpf_helpers.h > index 686e57ce40f4..efb6c13ab0de 100644 > --- a/tools/testing/selftests/bpf/bpf_helpers.h > +++ b/tools/testing/selftests/bpf/bpf_helpers.h > @@ -154,12 +154,12 @@ static unsigned long long > (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = > (void *) BPF_FUNC_skb_ancestor_cgroup_id; > static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, > struct bpf_sock_tuple *tuple, > - int size, unsigned int netns_id, > + int size, unsigned long long > netns_id, > unsigned long long flags) = > (void *) BPF_FUNC_sk_lookup_tcp; > static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, > struct bpf_sock_tuple *tuple, > - int size, unsigned int netns_id, > + int size, unsigned long long > netns_id, > unsigned long long flags) = > (void *) BPF_FUNC_sk_lookup_udp; > static int (*bpf_sk_release)(struct bpf_sock *sk) = > diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c > b/tools/testing/selftests/bpf/test_sk_lookup_kern.c > index b745bdc08c2b..e21cd736c196 100644 > --- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c > +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c > @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) > return TC_ACT_SHOT; > > tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); > - sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); > + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); > if (sk) > bpf_sk_release(sk); > return sk ? TC_ACT_OK : TC_ACT_UNSPEC; > @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) > struct bpf_sock_tuple tuple = {}; > struct bpf_sock *sk; > > - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, > 0); > if (sk) > bpf_sk_release(sk); > return 0; > @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) > struct bpf_sock *sk; > __u32 family = 0; > > - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, > 0); > if (sk) { > bpf_sk_release(sk); > family = sk->family; > @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) > struct bpf_sock *sk; > __u32 family; > > - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, > 0); > if (sk) { > sk += 1; > bpf_sk_release(sk); > @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) > struct bpf_sock *sk; > __u32 family; > > - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, > 0); > sk += 1; > if (sk) > bpf_sk_release(sk); > @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) > { > struct bpf_sock_tuple tuple = {}; > > - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); > return 0; > } > > @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) > struct bpf_sock_tuple tuple = {}; > struct bpf_sock *sk; > > - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, > 0); > bpf_sk_release(sk); > bpf_sk_release(sk); > return 0; > @@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) > struct bpf_sock_tuple tuple = {}; > struct bpf_sock *sk; > > - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, > 0); > bpf_sk_release(sk); > return 0; > } > @@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) > void lookup_no_release(struct __sk_buff *skb) > { > struct bpf_sock_tuple tuple = {}; > - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); > + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); > } > > SEC("fail_no_release_subcall") > -- > 2.19.1 > -- Cheers, Joey Pabalinas
signature.asc
Description: PGP signature