Hi Lawrence, On 12/01/2017 07:15 PM, Lawrence Brakmo wrote: > Adds read access to snd_cwnd and srtt_us fields of tcp_sock. Since these > fields are only valid if the socket associated with the sock_ops program > call is a full socket, the field is_fullsock is also added to the > bpf_sock_ops struct. If the socket is not a full socket, reading these > fields returns 0. > > Note that in most cases it will not be necessary to check is_fullsock to > know if there is a full socket. The context of the call, as specified by > the 'op' field, can sometimes determine whether there is a full socket. > > The struct bpf_sock_ops has the following fields added: > > __u32 is_fullsock; /* Some TCP fields are only valid if > * there is a full socket. If not, the > * fields read as zero. > */ > __u32 snd_cwnd; > __u32 srtt_us; /* Averaged RTT << 3 in usecs */ > > There is a new macro, SOCK_OPS_GET_TCP32(NAME), to make it easier to add > read access to more 32 bit tcp_sock fields. > > Signed-off-by: Lawrence Brakmo <bra...@fb.com> > --- > include/linux/filter.h | 1 + > include/net/tcp.h | 6 ++++-- > include/uapi/linux/bpf.h | 6 ++++++ > net/core/filter.c | 36 ++++++++++++++++++++++++++++++++++++ > 4 files changed, 47 insertions(+), 2 deletions(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 80b5b48..0062302 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -985,6 +985,7 @@ struct bpf_sock_ops_kern { > u32 reply; > u32 replylong[4]; > }; > + u32 is_fullsock; > }; > > #endif /* __LINUX_FILTER_H__ */ > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 4e09398..89a6560 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -2012,10 +2012,12 @@ static inline int tcp_call_bpf(struct sock *sk, int > op) > struct bpf_sock_ops_kern sock_ops; > int ret; > > - if (sk_fullsock(sk)) > + memset(&sock_ops, 0, sizeof(sock_ops)); > + if (sk_fullsock(sk)) { > + sock_ops.is_fullsock = 1; > sock_owned_by_me(sk); > + } > > - memset(&sock_ops, 0, sizeof(sock_ops)); > sock_ops.sk = sk; > sock_ops.op = op; > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 4c223ab..80d62e8 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -941,6 +941,12 @@ struct bpf_sock_ops { > __u32 local_ip6[4]; /* Stored in network byte order */ > __u32 remote_port; /* Stored in network byte order */ > __u32 local_port; /* stored in host byte order */ > + __u32 is_fullsock; /* Some TCP fields are only valid if > + * there is a full socket. If not, the > + * fields read as zero. > + */
Just a question for clarification: do you need to have this in the uapi struct as well? Meaning, do you have a specific use case where you do this check out of the BPF program given the below two snd_cwnd/srtt_us check this internally in the ctx rewrite? Do you plan to reuse the ctx assignment also for bpf_setsockopt() and bpf_getsockopt() internally? > + __u32 snd_cwnd; > + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ > }; > > /* List of known BPF sock_ops operators. > diff --git a/net/core/filter.c b/net/core/filter.c > index 6a85e67..bf31236 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -4437,6 +4437,42 @@ static u32 sock_ops_convert_ctx_access(enum > bpf_access_type type, > *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, > offsetof(struct sock_common, skc_num)); > break; > + > + case offsetof(struct bpf_sock_ops, is_fullsock): > + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( > + struct bpf_sock_ops_kern, > + is_fullsock), > + si->dst_reg, si->src_reg, > + offsetof(struct bpf_sock_ops_kern, > + is_fullsock)); > + break; > + > +/* Helper macro for adding read access to tcp_sock fields. */ > +#define SOCK_OPS_GET_TCP32(FIELD_NAME) > \ > + do { \ > + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \ > + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ > + struct bpf_sock_ops_kern, \ > + is_fullsock), \ > + si->dst_reg, si->src_reg, \ > + offsetof(struct bpf_sock_ops_kern, \ > + is_fullsock)); \ > + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ > + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ > + struct bpf_sock_ops_kern, sk),\ > + si->dst_reg, si->src_reg, \ > + offsetof(struct bpf_sock_ops_kern, sk));\ > + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \ > + offsetof(struct tcp_sock, FIELD_NAME)); \ > + } while (0) > + > + case offsetof(struct bpf_sock_ops, snd_cwnd): > + SOCK_OPS_GET_TCP32(snd_cwnd); > + break; > + > + case offsetof(struct bpf_sock_ops, srtt_us): > + SOCK_OPS_GET_TCP32(srtt_us); > + break; > } > return insn - insn_buf; > } >