On Mon, Sep 3, 2018 at 1:54 PM, Petar Penkov <ppen...@google.com> wrote: > > On Sun, Sep 2, 2018 at 2:03 PM, Daniel Borkmann <dan...@iogearbox.net> wrote: > > On 08/30/2018 08:22 PM, Petar Penkov wrote: > >> From: Petar Penkov <ppen...@google.com> > >> > >> Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and > >> attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector > >> path. The BPF program is per-network namespace. > >> > >> Signed-off-by: Petar Penkov <ppen...@google.com> > >> Signed-off-by: Willem de Bruijn <will...@google.com> > > [...] > >> + err = check_flow_keys_access(env, off, size); > >> + if (!err && t == BPF_READ && value_regno >= 0) > >> + mark_reg_unknown(env, regs, value_regno); > >> } else { > >> verbose(env, "R%d invalid mem access '%s'\n", regno, > >> reg_type_str[reg->type]); > >> @@ -1925,6 +1954,8 @@ static int check_helper_mem_access(struct > >> bpf_verifier_env *env, int regno, > >> case PTR_TO_PACKET_META: > >> return check_packet_access(env, regno, reg->off, access_size, > >> zero_size_allowed); > >> + case PTR_TO_FLOW_KEYS: > >> + return check_flow_keys_access(env, reg->off, access_size); > >> case PTR_TO_MAP_VALUE: > >> return check_map_access(env, regno, reg->off, access_size, > >> zero_size_allowed); > >> @@ -3976,6 +4007,7 @@ static bool may_access_skb(enum bpf_prog_type type) > >> case BPF_PROG_TYPE_SOCKET_FILTER: > >> case BPF_PROG_TYPE_SCHED_CLS: > >> case BPF_PROG_TYPE_SCHED_ACT: > >> + case BPF_PROG_TYPE_FLOW_DISSECTOR: > >> return true; > > > > This one should not be added here. It would allow for LD_ABS to be used, but > > you already have direct packet access as well as bpf_skb_load_bytes() helper > > enabled. Downside on LD_ABS is that error path will exit the BPF prog with > > return 0 for historical reasons w/o user realizing (here: to BPF_OK > > mapping). > > So we should not encourage use of LD_ABS/IND anymore in eBPF context and > > avoid surprises. > > > >> default: > >> return false; > >> @@ -4451,6 +4483,7 @@ static bool regsafe(struct bpf_reg_state *rold, > >> struct bpf_reg_state *rcur, > >> case PTR_TO_CTX: > >> case CONST_PTR_TO_MAP: > >> case PTR_TO_PACKET_END: > >> + case PTR_TO_FLOW_KEYS: > >> /* Only valid matches are exact, which memcmp() above > >> * would have accepted > >> */ > >> diff --git a/net/core/filter.c b/net/core/filter.c > >> index c25eb36f1320..0143b9c0c67e 100644 > >> --- a/net/core/filter.c > >> +++ b/net/core/filter.c > >> @@ -5092,6 +5092,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const > >> struct bpf_prog *prog) > >> } > >> } > >> > >> +static const struct bpf_func_proto * > >> +flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog > >> *prog) > >> +{ > >> + switch (func_id) { > >> + case BPF_FUNC_skb_load_bytes: > >> + return &bpf_skb_load_bytes_proto; > > > > Probably makes sense to also enable bpf_skb_pull_data helper for direct > > packet > > access use to fetch non-linear data from here once.
On closer look, it turns out that __skb_flow_dissect takes a const skb pointer, which conflicts with the realloc in bpf_skb_pull_data. But, we also don't need it if I change bpf_flow_dissect_get_header to try bpf_skb_load_bytes when direct packet access fails. This is very similar to the existing use of __skb_header_pointer. See the below snippet: static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, __u16 hdr_size, void *buffer) { struct bpf_dissect_cb *cb = (struct bpf_dissect_cb *)(skb->cb); void *data_end = (__u8 *)(long)skb->data_end; void *data = (__u8 *)(long)skb->data; __u8 *hdr; /* Verifies this variable offset does not overflow */ if (cb->nhoff > (USHRT_MAX - hdr_size)) return NULL; hdr = data + cb->nhoff; if (hdr + hdr_size <= data_end) return hdr; if (bpf_skb_load_bytes(skb, cb->nhoff, buffer, hdr_size)) return NULL; return buffer; } > > > >> + default: > >> + return bpf_base_func_proto(func_id); > >> + } > >> +} > >> + > >> static const struct bpf_func_proto * > >> lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > >> { > >> @@ -5207,6 +5218,7 @@ static bool bpf_skb_is_valid_access(int off, int > >> size, enum bpf_access_type type > >> case bpf_ctx_range(struct __sk_buff, data): > >> case bpf_ctx_range(struct __sk_buff, data_meta): > >> case bpf_ctx_range(struct __sk_buff, data_end): > >> + case bpf_ctx_range(struct __sk_buff, flow_keys): > >> if (size != size_default) > >> return false; > >> break; > >> @@ -5235,6 +5247,7 @@ static bool sk_filter_is_valid_access(int off, int > >> size, > >> case bpf_ctx_range(struct __sk_buff, data): > >> case bpf_ctx_range(struct __sk_buff, data_meta): > >> case bpf_ctx_range(struct __sk_buff, data_end): > >> + case bpf_ctx_range(struct __sk_buff, flow_keys): > >> case bpf_ctx_range_till(struct __sk_buff, family, local_port): > > [...] > > Thanks, > > Daniel > > Thank you for your feedback, Daniel! I'll make these changes and submit a v2. > Petar