This patch is largely a copy/paste from the aarch64 SVE counterpart, and adds support for tuples to the MVE intrinsics framework.
Introduce function_resolver::infer_tuple_type which will be used to resolve overloaded vst2q and vst4q function names in a later patch. Update MAX_TUPLE_SIZE to 4: MVE uses 2- and 4- tuples, we just don't use the 3rd entry of the table, which is small so it's not a big waste. As a result, fix access to acle_vector_types in a few places. The new wrap_type_in_struct, register_type_decl and infer_tuple_type are largely copies of the aarch64 versions, and register_builtin_tuple_types is very similar. The patch adds arm_array_mode, which is used by build_array_type_nelts. gcc/ChangeLog: * config/arm/arm-mve-builtins.cc (wrap_type_in_struct): New. (register_type_decl): New. (register_builtin_tuple_types): Fix support for tuples. (function_resolver::infer_tuple_type): New. * config/arm/arm-mve-builtins.h (MAX_TUPLE_SIZE): Set to 4. (function_resolver::infer_tuple_type): Declare. * config/arm/arm.cc (arm_array_mode): New. (TARGET_ARRAY_MODE): New. --- gcc/config/arm/arm-mve-builtins.cc | 76 ++++++++++++++++++++++++++---- gcc/config/arm/arm-mve-builtins.h | 3 +- gcc/config/arm/arm.cc | 24 ++++++++++ 3 files changed, 92 insertions(+), 11 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 3982d20058b..255aed25600 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -39,6 +39,7 @@ #include "gimple-iterator.h" #include "explow.h" #include "emit-rtl.h" +#include "stor-layout.h" #include "langhooks.h" #include "stringpool.h" #include "attribs.h" @@ -463,6 +464,48 @@ register_vector_type (vector_type_index type) acle_vector_types[0][type] = vectype; } +/* Return a structure type that contains a single field of type FIELD_TYPE. + The field is called __val, but that's an internal detail rather than + an exposed part of the API. */ +static tree +wrap_type_in_struct (tree field_type) +{ + tree field = build_decl (input_location, FIELD_DECL, + get_identifier ("__val"), field_type); + tree struct_type = lang_hooks.types.make_type (RECORD_TYPE); + DECL_FIELD_CONTEXT (field) = struct_type; + TYPE_FIELDS (struct_type) = field; + layout_type (struct_type); + return struct_type; +} + +/* Register a built-in TYPE_DECL called NAME for TYPE. This is used/needed + when TYPE is a structure type. */ +static void +register_type_decl (tree type, const char *name) +{ + tree decl = build_decl (input_location, TYPE_DECL, + get_identifier (name), type); + TYPE_NAME (type) = decl; + TYPE_STUB_DECL (type) = decl; + lang_hooks.decls.pushdecl (decl); + /* ??? Undo the effect of set_underlying_type for C. The C frontend + doesn't recognize DECL as a built-in because (as intended) the decl has + a real location instead of BUILTINS_LOCATION. The frontend therefore + treats the decl like a normal C "typedef struct foo foo;", expecting + the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead + of the named one we attached above. It then sets DECL_ORIGINAL_TYPE + on the supposedly unnamed decl, creating a circularity that upsets + dwarf2out. + + We don't want to follow the normal C model and create "struct foo" + tags for tuple types since (a) the types are supposed to be opaque + and (b) they couldn't be defined as a real struct anyway. Treating + the TYPE_DECLs as "typedef struct foo foo;" without creating + "struct foo" would lead to confusing error messages. */ + DECL_ORIGINAL_TYPE (decl) = NULL_TREE; +} + /* Register tuple types of element type TYPE under their arm_mve_types.h names. */ static void @@ -479,7 +522,7 @@ register_builtin_tuple_types (vector_type_index type) || (info->requires_float && !TARGET_HAVE_MVE_FLOAT)) { for (unsigned int num_vectors = 2; num_vectors <= 4; num_vectors += 2) - acle_vector_types[num_vectors >> 1][type] = void_type_node; + acle_vector_types[num_vectors - 1][type] = void_type_node; return; } @@ -493,15 +536,18 @@ register_builtin_tuple_types (vector_type_index type) tree vectype = acle_vector_types[0][type]; tree arrtype = build_array_type_nelts (vectype, num_vectors); - gcc_assert (TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype)); - tree field = build_decl (input_location, FIELD_DECL, - get_identifier ("val"), arrtype); - - tree t = lang_hooks.types.simulate_record_decl (input_location, buffer, - make_array_slice (&field, - 1)); - gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)); - acle_vector_types[num_vectors >> 1][type] = TREE_TYPE (t); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (arrtype)) + && TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype) + && TYPE_ALIGN (arrtype) == 64); + + tree tuple_type = wrap_type_in_struct (arrtype); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type)) + && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type) + && TYPE_ALIGN (tuple_type) == 64); + + register_type_decl (tuple_type, buffer); + + acle_vector_types[num_vectors - 1][type] = tuple_type; } } @@ -1328,6 +1374,16 @@ function_resolver::infer_vector_type (unsigned int argno) return infer_vector_or_tuple_type (argno, 1); } +/* If the function operates on tuples of vectors, require argument ARGNO to be + a tuple with the appropriate number of vectors, otherwise require it to be a + single vector. Return the associated type suffix on success. Report an + error and return NUM_TYPE_SUFFIXES on failure. */ +type_suffix_index +function_resolver::infer_tuple_type (unsigned int argno) +{ + return infer_vector_or_tuple_type (argno, vectors_per_tuple ()); +} + /* Require argument ARGNO to be a vector or scalar argument. Return true if it is, otherwise report an appropriate error. */ bool diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h index c6a929c3eee..cdc07b4e51f 100644 --- a/gcc/config/arm/arm-mve-builtins.h +++ b/gcc/config/arm/arm-mve-builtins.h @@ -81,7 +81,7 @@ constant. */ namespace arm_mve { /* The maximum number of vectors in an ACLE tuple type. */ -const unsigned int MAX_TUPLE_SIZE = 3; +const unsigned int MAX_TUPLE_SIZE = 4; /* Used to represent the default merge argument index for _m functions. The actual index depends on how many arguments the function takes. */ @@ -387,6 +387,7 @@ public: type_suffix_index infer_pointer_type (unsigned int); type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); type_suffix_index infer_vector_type (unsigned int); + type_suffix_index infer_tuple_type (unsigned int); bool require_vector_or_scalar_type (unsigned int); diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 6f11b6c816d..24949c6a6b5 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -279,6 +279,7 @@ static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg); static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *); static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *); static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *); +static opt_machine_mode arm_array_mode (machine_mode, unsigned HOST_WIDE_INT); static bool arm_array_mode_supported_p (machine_mode, unsigned HOST_WIDE_INT); static machine_mode arm_preferred_simd_mode (scalar_mode); @@ -516,6 +517,8 @@ static const scoped_attribute_specs *const arm_attribute_table[] = #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p +#undef TARGET_ARRAY_MODE +#define TARGET_ARRAY_MODE arm_array_mode #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE @@ -29786,6 +29789,27 @@ arm_vector_mode_supported_p (machine_mode mode) return false; } +/* Implements target hook array_mode. */ +static opt_machine_mode +arm_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_HAVE_MVE + /* MVE accepts only tuples of 2 or 4 vectors. */ + && (nelems == 2 + || nelems == 4)) + { + machine_mode struct_mode; + FOR_EACH_MODE_IN_CLASS (struct_mode, GET_MODE_CLASS (mode)) + { + if (GET_MODE_INNER (struct_mode) == GET_MODE_INNER (mode) + && known_eq (GET_MODE_NUNITS (struct_mode), + GET_MODE_NUNITS (mode) * nelems)) + return struct_mode; + } + } + return opt_machine_mode (); +} + /* Implements target hook array_mode_supported_p. */ static bool -- 2.34.1