On 09/21/2018 07:10 PM, Joe Stringer wrote:
> Teach the verifier a little bit about a new type of pointer, a
> PTR_TO_SOCKET. This pointer type is accessed from BPF through the
> 'struct bpf_sock' structure.
> 
> Signed-off-by: Joe Stringer <j...@wand.net.nz>
> 
> ---
> 
> v2: Reuse reg_type_mismatch() in more places
>     Reduce the number of passes at convert_ctx_access()
> ---
>  include/linux/bpf.h          |  17 +++++
>  include/linux/bpf_verifier.h |   2 +
>  kernel/bpf/verifier.c        | 120 +++++++++++++++++++++++++++++++----
>  net/core/filter.c            |  30 +++++----
>  4 files changed, 143 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 988a00797bcd..daeb0d343d9c 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -154,6 +154,7 @@ enum bpf_arg_type {
>  
>       ARG_PTR_TO_CTX,         /* pointer to context */
>       ARG_ANYTHING,           /* any (initialized) argument is ok */
> +     ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock */
>  };
>  
>  /* type of values returned from helper functions */
> @@ -162,6 +163,7 @@ enum bpf_return_type {
>       RET_VOID,                       /* function doesn't return anything */
>       RET_PTR_TO_MAP_VALUE,           /* returns a pointer to map elem value 
> */
>       RET_PTR_TO_MAP_VALUE_OR_NULL,   /* returns a pointer to map elem value 
> or NULL */
> +     RET_PTR_TO_SOCKET_OR_NULL,      /* returns a pointer to a socket or 
> NULL */
>  };
>  
>  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF 
> programs
> @@ -213,6 +215,8 @@ enum bpf_reg_type {
>       PTR_TO_PACKET,           /* reg points to skb->data */
>       PTR_TO_PACKET_END,       /* skb->data + headlen */
>       PTR_TO_FLOW_KEYS,        /* reg points to bpf_flow_keys */
> +     PTR_TO_SOCKET,           /* reg points to struct bpf_sock */
> +     PTR_TO_SOCKET_OR_NULL,   /* reg points to struct bpf_sock or NULL */
>  };
>  
>  /* The information passed from prog-specific *_is_valid_access
> @@ -335,6 +339,11 @@ const struct bpf_func_proto 
> *bpf_get_trace_printk_proto(void);
>  
>  typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
>                                       unsigned long off, unsigned long len);
> +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
> +                                     const struct bpf_insn *src,
> +                                     struct bpf_insn *dst,
> +                                     struct bpf_prog *prog,
> +                                     u32 *target_size);
>  
>  u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 
> meta_size,
>                    void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
> @@ -828,4 +837,12 @@ extern const struct bpf_func_proto 
> bpf_get_local_storage_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
> +                           struct bpf_insn_access_aux *info);
> +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
> +                             const struct bpf_insn *si,
> +                             struct bpf_insn *insn_buf,
> +                             struct bpf_prog *prog,
> +                             u32 *target_size);
> +
>  #endif /* _LINUX_BPF_H */
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index af262b97f586..23a2b17bfd75 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -58,6 +58,8 @@ struct bpf_reg_state {
>        * offset, so they can share range knowledge.
>        * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
>        * came from, when one is tested for != NULL.
> +      * For PTR_TO_SOCKET this is used to share which pointers retain the
> +      * same reference to the socket, to determine proper reference freeing.
>        */
>       u32 id;
>       /* For scalar types (SCALAR_VALUE), this represents our knowledge of
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 7dccb18ede03..1fee63d82290 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const 
> bpf_verifier_ops[] = {
>   * (like pointer plus pointer becomes SCALAR_VALUE type)
>   *
>   * When verifier sees load or store instructions the type of base register
> - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three 
> pointer
> - * types recognized by check_mem_access() function.
> + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These 
> are
> + * four pointer types recognized by check_mem_access() function.
>   *
>   * PTR_TO_MAP_VALUE means that this register is pointing to 'map element 
> value'
>   * and the range of [ptr, ptr + map's value_size) is accessible.
> @@ -267,6 +267,8 @@ static const char * const reg_type_str[] = {
>       [PTR_TO_PACKET_META]    = "pkt_meta",
>       [PTR_TO_PACKET_END]     = "pkt_end",
>       [PTR_TO_FLOW_KEYS]      = "flow_keys",
> +     [PTR_TO_SOCKET]         = "sock",
> +     [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
>  };
>  
>  static char slot_type_char[] = {
> @@ -973,6 +975,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
>       case PTR_TO_PACKET_END:
>       case PTR_TO_FLOW_KEYS:
>       case CONST_PTR_TO_MAP:
> +     case PTR_TO_SOCKET:
> +     case PTR_TO_SOCKET_OR_NULL:
>               return true;
>       default:
>               return false;
> @@ -1341,6 +1345,28 @@ static int check_flow_keys_access(struct 
> bpf_verifier_env *env, int off,
>       return 0;
>  }
>  
> +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int 
> off,
> +                          int size, enum bpf_access_type t)
> +{
> +     struct bpf_reg_state *regs = cur_regs(env);
> +     struct bpf_reg_state *reg = &regs[regno];
> +     struct bpf_insn_access_aux info;
> +
> +     if (reg->smin_value < 0) {
> +             verbose(env, "R%d min value is negative, either use unsigned 
> index or do a if (index >=0) check.\n",
> +                     regno);
> +             return -EACCES;
> +     }
> +
> +     if (!bpf_sock_is_valid_access(off, size, t, &info)) {
> +             verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n",
> +                     off, size);
> +             return -EACCES;
> +     }
> +
> +     return 0;
> +}
> +
>  static bool __is_pointer_value(bool allow_ptr_leaks,
>                              const struct bpf_reg_state *reg)
>  {
> @@ -1459,6 +1485,9 @@ static int check_ptr_alignment(struct bpf_verifier_env 
> *env,
>                */
>               strict = true;
>               break;
> +     case PTR_TO_SOCKET:
> +             pointer_desc = "sock ";
> +             break;
>       default:
>               break;
>       }
> @@ -1726,6 +1755,14 @@ static int check_mem_access(struct bpf_verifier_env 
> *env, int insn_idx, u32 regn
>               err = check_flow_keys_access(env, off, size);
>               if (!err && t == BPF_READ && value_regno >= 0)
>                       mark_reg_unknown(env, regs, value_regno);
> +     } else if (reg->type == PTR_TO_SOCKET) {
> +             if (t == BPF_WRITE) {
> +                     verbose(env, "cannot write into socket\n");
> +                     return -EACCES;
> +             }
> +             err = check_sock_access(env, regno, off, size, t);
> +             if (!err && value_regno >= 0)
> +                     mark_reg_unknown(env, regs, value_regno);
>       } else {
>               verbose(env, "R%d invalid mem access '%s'\n", regno,
>                       reg_type_str[reg->type]);
> @@ -1949,6 +1986,10 @@ static int check_func_arg(struct bpf_verifier_env 
> *env, u32 regno,
>               err = check_ctx_reg(env, reg, regno);
>               if (err < 0)
>                       return err;
> +     } else if (arg_type == ARG_PTR_TO_SOCKET) {
> +             expected_type = PTR_TO_SOCKET;
> +             if (type != expected_type)
> +                     goto err_type;
>       } else if (arg_type_is_mem_ptr(arg_type)) {
>               expected_type = PTR_TO_STACK;
>               /* One exception here. In case function allows for NULL to be
> @@ -2542,6 +2583,10 @@ static int check_helper_call(struct bpf_verifier_env 
> *env, int func_id, int insn
>               }
>               regs[BPF_REG_0].map_ptr = meta.map_ptr;
>               regs[BPF_REG_0].id = ++env->id_gen;
> +     } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
> +             mark_reg_known_zero(env, regs, BPF_REG_0);
> +             regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
> +             regs[BPF_REG_0].id = ++env->id_gen;
>       } else {
>               verbose(env, "unknown return type %d of func %s#%d\n",
>                       fn->ret_type, func_id_name(func_id), func_id);
> @@ -2679,6 +2724,8 @@ static int adjust_ptr_min_max_vals(struct 
> bpf_verifier_env *env,
>               return -EACCES;
>       case CONST_PTR_TO_MAP:
>       case PTR_TO_PACKET_END:
> +     case PTR_TO_SOCKET:
> +     case PTR_TO_SOCKET_OR_NULL:
>               verbose(env, "R%d pointer arithmetic on %s prohibited\n",
>                       dst, reg_type_str[ptr_reg->type]);
>               return -EACCES;
> @@ -3626,6 +3673,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state 
> *reg, u32 id,
>                       } else {
>                               reg->type = PTR_TO_MAP_VALUE;
>                       }
> +             } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
> +                     reg->type = PTR_TO_SOCKET;
>               }
>               /* We don't need id from this point onwards anymore, thus we
>                * should better reset it, so that state pruning has chances
> @@ -4401,6 +4450,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct 
> bpf_reg_state *rcur,
>       case CONST_PTR_TO_MAP:
>       case PTR_TO_PACKET_END:
>       case PTR_TO_FLOW_KEYS:
> +     case PTR_TO_SOCKET:
> +     case PTR_TO_SOCKET_OR_NULL:
>               /* Only valid matches are exact, which memcmp() above
>                * would have accepted
>                */
> @@ -4678,6 +4729,37 @@ static int is_state_visited(struct bpf_verifier_env 
> *env, int insn_idx)
>       return 0;
>  }
>  
> +/* Return true if it's OK to have the same insn return a different type. */
> +static bool reg_type_mismatch_ok(enum bpf_reg_type type)
> +{
> +     switch (type) {
> +     case PTR_TO_CTX:
> +     case PTR_TO_SOCKET:
> +     case PTR_TO_SOCKET_OR_NULL:
> +             return false;
> +     default:
> +             return true;
> +     }
> +}
> +
> +/* If an instruction was previously used with particular pointer types, then 
> we
> + * need to be careful to avoid cases such as the below, where it may be ok
> + * for one branch accessing the pointer, but not ok for the other branch:
> + *
> + * R1 = sock_ptr
> + * goto X;
> + * ...
> + * R1 = some_other_valid_ptr;
> + * goto X;
> + * ...
> + * R2 = *(u32 *)(R1 + 0);
> + */
> +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
> +{
> +     return src != prev && (!reg_type_mismatch_ok(src) ||
> +                            !reg_type_mismatch_ok(prev));
> +}
> +
>  static int do_check(struct bpf_verifier_env *env)
>  {
>       struct bpf_verifier_state *state;
> @@ -4810,9 +4892,7 @@ static int do_check(struct bpf_verifier_env *env)
>                                */
>                               *prev_src_type = src_reg_type;
>  
> -                     } else if (src_reg_type != *prev_src_type &&
> -                                (src_reg_type == PTR_TO_CTX ||
> -                                 *prev_src_type == PTR_TO_CTX)) {
> +                     } else if (reg_type_mismatch(src_reg_type, 
> *prev_src_type)) {
>                               /* ABuser program is trying to use the same insn
>                                * dst_reg = *(u32*) (src_reg + off)
>                                * with different pointer types:
> @@ -4857,9 +4937,7 @@ static int do_check(struct bpf_verifier_env *env)
>  
>                       if (*prev_dst_type == NOT_INIT) {
>                               *prev_dst_type = dst_reg_type;
> -                     } else if (dst_reg_type != *prev_dst_type &&
> -                                (dst_reg_type == PTR_TO_CTX ||
> -                                 *prev_dst_type == PTR_TO_CTX)) {
> +                     } else if (reg_type_mismatch(dst_reg_type, 
> *prev_dst_type)) {
>                               verbose(env, "same insn cannot be used with 
> different pointers\n");
>                               return -EINVAL;
>                       }
> @@ -5276,8 +5354,10 @@ static void sanitize_dead_code(struct bpf_verifier_env 
> *env)
>       }
>  }
>  
> -/* convert load instructions that access fields of 'struct __sk_buff'
> - * into sequence of instructions that access fields of 'struct sk_buff'
> +/* convert load instructions that access fields of a context type into a
> + * sequence of instructions that access fields of the underlying structure:
> + *     struct __sk_buff    -> struct sk_buff
> + *     struct bpf_sock_ops -> struct sock
>   */
>  static int convert_ctx_accesses(struct bpf_verifier_env *env)
>  {
> @@ -5306,12 +5386,14 @@ static int convert_ctx_accesses(struct 
> bpf_verifier_env *env)
>               }
>       }
>  
> -     if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
> +     if (bpf_prog_is_dev_bound(env->prog->aux))
>               return 0;
>  
>       insn = env->prog->insnsi + delta;
>  
>       for (i = 0; i < insn_cnt; i++, insn++) {
> +             bpf_convert_ctx_access_t convert_ctx_access;
> +
>               if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
>                   insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
>                   insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
> @@ -5353,8 +5435,18 @@ static int convert_ctx_accesses(struct 
> bpf_verifier_env *env)
>                       continue;
>               }
>  
> -             if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
> +             switch (env->insn_aux_data[i + delta].ptr_type) {
> +             case PTR_TO_CTX:
> +                     if (!ops->convert_ctx_access)
> +                             continue;
> +                     convert_ctx_access = ops->convert_ctx_access;
> +                     break;
> +             case PTR_TO_SOCKET:
> +                     convert_ctx_access = bpf_sock_convert_ctx_access;
> +                     break;
> +             default:
>                       continue;
> +             }
>  
>               ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
>               size = BPF_LDST_BYTES(insn);

When you compile without NET, this would need to have a stub implementation as
otherwise it might break build. We recently tripped over it in bpf flow 
dissector
as well.

Reply via email to