Hi David.
Thanks for the patch. Please see a few comments below.
> @@ -975,6 +978,161 @@ static tree bpf_core_compute (tree, vec<unsigned int>
> *);
> static int bpf_core_get_index (const tree);
> static bool is_attr_preserve_access (tree);
>
> +static void
> +maybe_make_core_relo (tree expr, enum btf_core_reloc_kind kind)
This function is missing a comment explaining what it does.
> +{
> + /* If we are not targetting BPF CO-RE, do not make a relocation. We
> + might not be generating any debug info at all. */
> + if (!TARGET_BPF_CORE)
> + return;
> +
> + auto_vec<unsigned int, 16> accessors;
> + tree container = bpf_core_compute (expr, &accessors);
> +
> + /* Any valid use of the builtin must have at least one access. Otherwise,
> + there is nothing to record and nothing to do. This is primarily a
> + guard against optimizations leading to unexpected expressions in the
> + argument of the builtin. For example, if the builtin is used to read
> + a field of a structure which can be statically determined to hold a
> + constant value, the argument to the builtin will be optimized to that
> + constant. This is OK, and means the builtin call is superfluous.
> + e.g.
> + struct S foo;
> + foo.a = 5;
> + int x = __preserve_access_index (foo.a);
> + ... do stuff with x
> + 'foo.a' in the builtin argument will be optimized to '5' with -01+.
> + This sequence does not warrant recording a CO-RE relocation. */
> +
> + if (accessors.length () < 1)
> + return;
> + accessors.reverse ();
> +
> + rtx_code_label *label = gen_label_rtx ();
> + LABEL_PRESERVE_P (label) = 1;
> + emit_label (label);
> +
> + /* Determine what output section this relocation will apply to.
> + If this function is associated with a section, use that. Otherwise,
> + fall back on '.text'. */
> + const char * section_name;
> + if (current_function_decl && DECL_SECTION_NAME (current_function_decl))
> + section_name = DECL_SECTION_NAME (current_function_decl);
> + else
> + section_name = ".text";
> +
> + /* Add the CO-RE relocation information to the BTF container. */
> + bpf_core_reloc_add (TREE_TYPE (container), section_name, &accessors, label,
> + kind);
> +}
> +
> +/* Expand a call to __builtin_preserve_field_info by evaluating the requested
> + information about SRC according to KIND, and return a tree holding
> + the result. */
> +
> +static tree
> +bpf_core_field_info (tree src, enum btf_core_reloc_kind kind)
> +{
> + unsigned int result;
> + poly_int64 bitsize, bitpos;
> + tree var_off;
> + machine_mode mode;
> + int unsignedp, reversep, volatilep;
> +
> + get_inner_reference (src, &bitsize, &bitpos, &var_off, &mode, &unsignedp,
> + &reversep, &volatilep);
Since the information returned by the builtin is always constant
(positions, sizes) I think you will want to adjust the code for the
eventuality of variable positioned fields and also variable sized
fields.
get_inner_reference sets var_off to a tree if the position of the field
is variable. In these cases `bitpos' is relative to that position.
Likewise, get_inner_reference sets `mode' is set to BLKmode and
`bitsize' will be set to -1.
I'm not sure what the built-in is supposed to do/return in these cases.
I guess it makes sense to error out, but what does LLVM do?
> +
> + /* Note: Use DECL_BIT_FIELD_TYPE rather than DECL_BIT_FIELD here, because
> it
> + remembers whether the field in question was originally declared as a
> + bitfield, regardless of how it has been optimized. */
> + bool bitfieldp = (TREE_CODE (src) == COMPONENT_REF
> + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (src, 1)));
> +
> + unsigned int align = TYPE_ALIGN (TREE_TYPE (src));
> + if (TREE_CODE (src) == COMPONENT_REF)
> + {
> + tree field = TREE_OPERAND (src, 1);
> + if (DECL_BIT_FIELD_TYPE (field))
> + align = TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field));
> + else
> + align = TYPE_ALIGN (TREE_TYPE (field));
> + }
> +
> + unsigned int start_bitpos = bitpos & ~(align - 1);
> + unsigned int end_bitpos = start_bitpos + align;
> +
> + switch (kind)
> + {
> + case BPF_RELO_FIELD_BYTE_OFFSET:
> + {
> + if (bitfieldp)
> + result = start_bitpos / 8;
> + else
> + result = bitpos / 8;
> + }
> + break;
> +
> + case BPF_RELO_FIELD_BYTE_SIZE:
> + {
> + if (bitfieldp)
> + {
> + /* To match LLVM behavior, byte size of bitfields is recorded as
> + the full size of the base type. A 3-bit bitfield of type int is
> + therefore recorded as having a byte size of 4 bytes. */
> + result = end_bitpos - start_bitpos;
> + if (result & (result - 1))
> + error ("unsupported field expression");
> + result = result / 8;
> + }
> + else
> + result = bitsize / 8;
> + }
> + break;
> +
> + case BPF_RELO_FIELD_EXISTS:
> + /* The field always exists at compile time. */
> + result = 1;
> + break;
> +
> + case BPF_RELO_FIELD_SIGNED:
> + result = !unsignedp;
> + break;
> +
> + case BPF_RELO_FIELD_LSHIFT_U64:
> + case BPF_RELO_FIELD_RSHIFT_U64:
> + {
> + if (!bitfieldp)
> + {
> + if (bitsize > 64)
> + error ("field size too large");
> + result = 64 - bitsize;
> + break;
> + }
> +
> + if (end_bitpos - start_bitpos > 64)
> + error ("field size too large");
> +
> + if (kind == BPF_RELO_FIELD_LSHIFT_U64)
> + {
> + if (TARGET_BIG_ENDIAN)
> + result = bitpos + 64 - start_bitpos - align;
> + else
> + result = start_bitpos + 64 - bitpos - bitsize;
> + }
> + else /* RSHIFT_U64 */
> + result = 64 - bitsize;
> + }
> + break;
> +
> + default:
> + error ("invalid argument to built-in function");
> + return error_mark_node;
> + break;
> + }
> +
> + return build_int_cst (unsigned_type_node, result);
> +}
> +
> /* Expand a call to a BPF-specific built-in function that was set up
> with bpf_init_builtins. */
>
> @@ -1025,17 +1183,15 @@ bpf_expand_builtin (tree exp, rtx target
> ATTRIBUTE_UNUSED,
> /* The result of the load is in R0. */
> return gen_rtx_REG (ops[0].mode, BPF_R0);
> }
> +
> else if (code == -1)
> {
> - /* A resolved overloaded builtin, e.g. __bpf_preserve_access_index_si
> */
> + /* A resolved overloaded __builtin_preserve_access_index. */
> tree arg = CALL_EXPR_ARG (exp, 0);
>
> if (arg == NULL_TREE)
> return NULL_RTX;
>
> - auto_vec<unsigned int, 16> accessors;
> - tree container;
> -
> if (TREE_CODE (arg) == SSA_NAME)
> {
> gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
> @@ -1049,51 +1205,42 @@ bpf_expand_builtin (tree exp, rtx target
> ATTRIBUTE_UNUSED,
> /* Avoid double-recording information if the argument is an access to
> a struct/union marked __attribute__((preserve_access_index)). This
> Will be handled by the attribute handling pass. */
> - if (is_attr_preserve_access (arg))
> - return expand_normal (arg);
> -
> - container = bpf_core_compute (arg, &accessors);
> -
> - /* Any valid use of the builtin must have at least one access.
> Otherwise,
> - there is nothing to record and nothing to do. This is primarily a
> - guard against optimizations leading to unexpected expressions in the
> - argument of the builtin. For example, if the builtin is used to read
> - a field of a structure which can be statically determined to hold a
> - constant value, the argument to the builtin will be optimized to that
> - constant. This is OK, and means the builtin call is superfluous.
> - e.g.
> - struct S foo;
> - foo.a = 5;
> - int x = __preserve_access_index (foo.a);
> - ... do stuff with x
> - 'foo.a' in the builtin argument will be optimized to '5' with -01+.
> - This sequence does not warrant recording a CO-RE relocation. */
> -
> - if (accessors.length () < 1)
> - return expand_normal (arg);
> -
> - accessors.reverse ();
> -
> - container = TREE_TYPE (container);
> -
> - rtx_code_label *label = gen_label_rtx ();
> - LABEL_PRESERVE_P (label) = 1;
> - emit_label (label);
> -
> - /* Determine what output section this relocation will apply to.
> - If this function is associated with a section, use that. Otherwise,
> - fall back on '.text'. */
> - const char * section_name;
> - if (current_function_decl && DECL_SECTION_NAME (current_function_decl))
> - section_name = DECL_SECTION_NAME (current_function_decl);
> + if (!is_attr_preserve_access (arg))
> + maybe_make_core_relo (arg, BPF_RELO_FIELD_BYTE_OFFSET);
> +
> + return expand_normal (arg);
> + }
> +
> + else if (code == -2)
> + {
> + /* A resolved overloaded __builtin_preserve_field_info. */
> + tree src = CALL_EXPR_ARG (exp, 0);
> + tree kind_tree = CALL_EXPR_ARG (exp, 1);
> + unsigned HOST_WIDE_INT kind_val;
> + if (tree_fits_uhwi_p (kind_tree))
> + kind_val = tree_to_uhwi (kind_tree);
> else
> - section_name = ".text";
> + error ("invalid argument to built-in function");
>
> - /* Add the CO-RE relocation information to the BTF container. */
> - bpf_core_reloc_add (container, section_name, &accessors, label);
> + enum btf_core_reloc_kind kind = (enum btf_core_reloc_kind) kind_val;
>
> - return expand_normal (arg);
> + if (TREE_CODE (src) == SSA_NAME)
> + {
> + gimple *def_stmt = SSA_NAME_DEF_STMT (src);
> + if (is_gimple_assign (def_stmt))
> + src = gimple_assign_rhs1 (def_stmt);
> + }
> + if (TREE_CODE (src) == ADDR_EXPR)
> + src = TREE_OPERAND (src, 0);
> +
> + tree result = bpf_core_field_info (src, kind);
If I read this properly, for something like:
__builtin_preserve_field_info (a = foo.bar + bar.baz, KIND)
On one side CO-RE relocations are computed for both foo.bar and bar.baz
(I see bpf_core_compute does that) as expected.
But then the builtin returns information that can only apply to one
access. Which one?
> +
> + if (result != error_mark_node)
> + maybe_make_core_relo (src, kind);
> +
> + return expand_normal (result);
> }
> +
> gcc_unreachable ();
> }
>
> @@ -1259,17 +1406,29 @@ bpf_core_get_index (const tree node)
> __builtin_preserve_access_index. */
>
> static tree
> -bpf_core_newdecl (tree type)
> +bpf_core_newdecl (tree type, bool is_pai)
> {
> - tree rettype = build_function_type_list (type, type, NULL);
> + tree rettype;
> char name[80];
> - int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_");
> + static unsigned long pai_count = 0;
> + static unsigned long pfi_count = 0;
>
> - static unsigned long cnt = 0;
> - len = snprintf (name + len, sizeof (name) - len, "%lu", cnt++);
> + if (is_pai)
> + {
> + rettype = build_function_type_list (type, type, NULL);
> + int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_");
> + len = snprintf (name + len, sizeof (name) - len, "%lu", pai_count++);
> + }
> + else
> + {
> + rettype = build_function_type_list (unsigned_type_node, type,
> + unsigned_type_node, NULL);
> + int len = snprintf (name, sizeof (name), "%s", "__builtin_pfi_");
> + len = snprintf (name + len, sizeof (name) - len, "%lu", pfi_count++);
> + }
>
> - return add_builtin_function_ext_scope (name, rettype, -1, BUILT_IN_MD,
> NULL,
> - NULL_TREE);
> + return add_builtin_function_ext_scope (name, rettype, is_pai ? -1 : -2,
> + BUILT_IN_MD, NULL, NULL_TREE);
> }
>
> /* Return whether EXPR could access some aggregate data structure that
> @@ -1278,22 +1437,33 @@ bpf_core_newdecl (tree type)
> static int
> bpf_core_is_maybe_aggregate_access (tree expr)
> {
> - enum tree_code code = TREE_CODE (expr);
> - if (code == COMPONENT_REF || code == ARRAY_REF)
> - return 1;
> -
> - if (code == ADDR_EXPR)
> + switch (TREE_CODE (expr))
> + {
> + case COMPONENT_REF:
> + case BIT_FIELD_REF:
> + case ARRAY_REF:
> + case ARRAY_RANGE_REF:
> + return 1;
> + case ADDR_EXPR:
> + case NOP_EXPR:
> return bpf_core_is_maybe_aggregate_access (TREE_OPERAND (expr, 0));
> -
> + default:;
> + }
> return 0;
> }
>
> +struct core_walk_data {
> + location_t loc;
> + tree arg;
> +};
> +
> /* Callback function used with walk_tree from
> bpf_resolve_overloaded_builtin. */
>
> static tree
> bpf_core_walk (tree *tp, int *walk_subtrees, void *data)
> {
> - location_t loc = *((location_t *) data);
> + struct core_walk_data *dat = (struct core_walk_data *) data;
> + bool is_pai = dat->arg == NULL_TREE;
>
> /* If this is a type, don't do anything. */
> if (TYPE_P (*tp))
> @@ -1302,10 +1472,18 @@ bpf_core_walk (tree *tp, int *walk_subtrees, void
> *data)
> return NULL_TREE;
> }
>
> + /* Build a new function call to a resolved builtin for the desired
> operation.
> + If this is a preserve_field_info call, pass along the argument to the
> + resolved builtin call. */
> if (bpf_core_is_maybe_aggregate_access (*tp))
> {
> - tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp));
> - tree newcall = build_call_expr_loc (loc, newdecl, 1, *tp);
> + tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp), is_pai);
> + tree newcall;
> + if (is_pai)
> + newcall = build_call_expr_loc (dat->loc, newdecl, 1, *tp);
> + else
> + newcall = build_call_expr_loc (dat->loc, newdecl, 2, *tp, dat->arg);
> +
> *tp = newcall;
> *walk_subtrees = 0;
> }
> @@ -1344,7 +1522,12 @@ bpf_small_register_classes_for_mode_p (machine_mode
> mode)
> static tree
> bpf_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist)
> {
> - if (DECL_MD_FUNCTION_CODE (fndecl) != BPF_BUILTIN_PRESERVE_ACCESS_INDEX)
> + bool is_pai = DECL_MD_FUNCTION_CODE (fndecl)
> + == BPF_BUILTIN_PRESERVE_ACCESS_INDEX;
> + bool is_pfi = DECL_MD_FUNCTION_CODE (fndecl)
> + == BPF_BUILTIN_PRESERVE_FIELD_INFO;
> +
> + if (!is_pai && !is_pfi)
> return NULL_TREE;
>
> /* We only expect one argument, but it may be an arbitrarily-complicated
> @@ -1352,16 +1535,17 @@ bpf_resolve_overloaded_builtin (location_t loc, tree
> fndecl, void *arglist)
> vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (arglist);
> unsigned n_params = params ? params->length() : 0;
>
> - if (n_params != 1)
> + if ((is_pai && n_params != 1) || (is_pfi && n_params != 2))
> {
> - error_at (loc, "expected exactly 1 argument");
> + error_at (loc, "wrong number of arguments");
> return NULL_TREE;
> }
>
> tree param = (*params)[0];
>
> - /* If not generating BPF_CORE information, the builtin does nothing. */
> - if (!TARGET_BPF_CORE)
> + /* If not generating BPF_CORE information, preserve_access_index does
> nothing,
> + and simply "resolves to" the argument. */
> + if (!TARGET_BPF_CORE && is_pai)
> return param;
>
> /* Do remove_c_maybe_const_expr for the arg.
> @@ -1387,7 +1571,11 @@ bpf_resolve_overloaded_builtin (location_t loc, tree
> fndecl, void *arglist)
> This ensures that all the relevant information remains within the
> expression trees the builtin finally gets. */
>
> - walk_tree (¶m, bpf_core_walk, (void *) &loc, NULL);
> + struct core_walk_data data;
> + data.loc = loc;
> + data.arg = is_pai ? NULL_TREE : (*params)[1];
> +
> + walk_tree (¶m, bpf_core_walk, (void *) &data, NULL);
>
> return param;
> }
> @@ -1524,7 +1712,8 @@ handle_attr_preserve (function *fn)
> emit_label (label);
>
> /* Add the CO-RE relocation information to the BTF
> container. */
> - bpf_core_reloc_add (container, section_name, &accessors,
> label);
> + bpf_core_reloc_add (container, section_name, &accessors,
> label,
> + BPF_RELO_FIELD_BYTE_OFFSET);
> }
> }
> }
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
> index 8897a045ea1..9f71040846b 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/coreout.cc
> @@ -152,7 +152,8 @@ static GTY (()) vec<bpf_core_section_ref, va_gc>
> *bpf_core_sections;
>
> void
> bpf_core_reloc_add (const tree type, const char * section_name,
> - vec<unsigned int> *accessors, rtx_code_label *label)
> + vec<unsigned int> *accessors, rtx_code_label *label,
> + enum btf_core_reloc_kind kind)
> {
> char buf[40];
> unsigned int i, n = 0;
> @@ -173,7 +174,7 @@ bpf_core_reloc_add (const tree type, const char *
> section_name,
>
> bpfcr->bpfcr_type = get_btf_id (ctf_lookup_tree_type (ctfc, type));
> bpfcr->bpfcr_insn_label = label;
> - bpfcr->bpfcr_kind = BPF_RELO_FIELD_BYTE_OFFSET;
> + bpfcr->bpfcr_kind = kind;
>
> /* Add the CO-RE reloc to the appropriate section. */
> bpf_core_section_ref sec;
> diff --git a/gcc/config/bpf/coreout.h b/gcc/config/bpf/coreout.h
> index 3c7bdfd8c2f..498853f6e00 100644
> --- a/gcc/config/bpf/coreout.h
> +++ b/gcc/config/bpf/coreout.h
> @@ -103,7 +103,7 @@ extern void btf_ext_init (void);
> extern void btf_ext_output (void);
>
> extern void bpf_core_reloc_add (const tree, const char *, vec<unsigned int>
> *,
> - rtx_code_label *);
> + rtx_code_label *, enum btf_core_reloc_kind);
> extern int bpf_core_get_sou_member_index (ctf_container_ref, const tree);
>
> #ifdef __cplusplus
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 04af0584d82..0d3bcb24ab9 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -15745,6 +15745,27 @@ Load 32-bits from the @code{struct sk_buff} packet
> data pointed by the register
> BPF Compile Once-Run Everywhere (CO-RE) support. Instruct GCC to generate
> CO-RE relocation records for any accesses to aggregate data structures
> (struct, union, array types) in @var{expr}. This builtin is otherwise
> transparent, the return value is whatever @var{expr} evaluates to. It is also
> overloaded: @var{expr} may be of any type (not necessarily a pointer), the
> return type is the same. Has no effect if @code{-mco-re} is not in effect
> (either specified or implied).
> @end deftypefn
>
> +@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info
> (@var{expr}, unsigned int @var{kind})
> +BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
> +extract information to aid in struct/union relocations. @var{expr} is
> +an access to a field of a struct or union. Depending on @var{kind}, different
> +information is returned to the program. A CO-RE relocation for the access in
> +@var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
> +
> +@var{kind} is one of:
> +@smallexample
> +enum
> +@{
> + FIELD_BYTE_OFFSET = 0,
> + FIELD_BYTE_SIZE,
> + FIELD_EXISTENCE,
> + FIELD_SIGNEDNESS,
> + FIELD_LSHIFT_U64,
> + FIELD_RSHIFT_U64,
> +@};
> +@end smallexample
> +@end deftypefn
> +
We need to document what all these returned values are. It may be
more-or-less obvious, but it is better to be explicit IMO. It would
also be good to document the fact the returned value is always known at
compile-time.