Compared to v1, I've added a new function aarch64_get_required_features to avoid having to pass a long list of explicit features. I also changed aarch64_target_switcher to only disable TARGET_GENERAL_REGS_ONLY if the requested flags include FP, to address Richard's comment.
Bootstrapped and regression tested on aarch64. Is this ok for master? --- When initialising intrinsics with `#pragma GCC aarch64 "arm_*.h"`, we often set an explicit target, but currently leave current_target_pragma unchanged. This results in the target pragma being applied to each simulated intrinsic on top of our explicit target, which is clearly undesirable. As far as I can tell this doesn't cause any bugs at the moment, because none of the behaviour for builtin functions depends upon the function specific target. However, the unintended target feature combinations led to unwanted behaviour in an under-developement patch. This patch fixes the issue by extending aarch64_simd_switcher to explicitly unset the current_target_pragma. It also simplifies constructor arguments by automatically including any feature dependencies, which results in FCMA and BF16 being added to the sets of features used when handling arm_sve.h and arm_sme.h pragmas. gcc/ChangeLog: * common/config/aarch64/aarch64-common.cc (struct aarch64_extension_info): Add field. (aarch64_get_required_features): New. * config/aarch64/aarch64-builtins.cc (aarch64_simd_switcher::aarch64_simd_switcher): Rename to... (aarch64_target_switcher::aarch64_target_switcher): ...this, remove default simd flags and save current_target_pragma. (aarch64_simd_switcher::~aarch64_simd_switcher): Rename to... (aarch64_target_switcher::~aarch64_target_switcher): ...this, and restore current_target_pragma. (handle_arm_acle_h): Use aarch64_target_switcher. (handle_arm_neon_h): Rename switcher and pass explicit flags. (aarch64_general_init_builtins): Ditto. * config/aarch64/aarch64-protos.h (class aarch64_simd_switcher): Rename to... (class aarch64_target_switcher): ...this, and add pragma member. (aarch64_get_required_features): New prototype. * config/aarch64/aarch64-sve-builtins.cc (sve_switcher::sve_switcher): Rename to... (sve_target_switcher::sve_target_switcher): ...this. (sve_switcher::~sve_switcher): Rename to... (sve_target_switcher::~sve_target_switcher): ...this. (init_builtins): Rename switcher. (handle_arm_sve_h): Ditto. (handle_arm_neon_sve_bridge_h): Ditto. (handle_arm_sme_h): Ditto. * config/aarch64/aarch64-sve-builtins.h (class sve_switcher): Rename to... (class sve_target_switcher): ...this. (class sme_switcher): Rename to... (class sme_target_switcher): ...this. diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index ef4458fb69308d2bb6785e97be5be85226cf0ebb..500bf784983d851c54ea4ec59cf3cad29e5e309e 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -157,6 +157,8 @@ struct aarch64_extension_info aarch64_feature_flags flags_on; /* If this feature is turned off, these bits also need to be turned off. */ aarch64_feature_flags flags_off; + /* If this feature remains enabled, these bits must also remain enabled. */ + aarch64_feature_flags flags_required; }; /* ISA extensions in AArch64. */ @@ -164,9 +166,10 @@ static constexpr aarch64_extension_info all_extensions[] = { #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, FEATURE_STRING) \ {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \ - feature_deps::get_flags_off (feature_deps::root_off_##IDENT)}, + feature_deps::get_flags_off (feature_deps::root_off_##IDENT), \ + feature_deps::IDENT ().enable}, #include "config/aarch64/aarch64-option-extensions.def" - {NULL, 0, 0, 0} + {NULL, 0, 0, 0, 0} }; struct aarch64_arch_info @@ -204,6 +207,18 @@ static constexpr aarch64_processor_info all_cores[] = {NULL, aarch64_no_cpu, aarch64_no_arch, 0} }; +/* Return the set of feature flags that are required to be enabled when the + features in FLAGS are enabled. */ + +aarch64_feature_flags +aarch64_get_required_features (aarch64_feature_flags flags) +{ + const struct aarch64_extension_info *opt; + for (opt = all_extensions; opt->name != NULL; opt++) + if (flags & opt->flag_canonical) + flags |= opt->flags_required; + return flags; +} /* Print a list of CANDIDATES for an argument, and try to suggest a specific close match. */ diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 128cc365d3d585e01cb69668f285318ee56a36fc..5174fb1daefee2d73a5098e0de1cca73dc103416 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -1877,23 +1877,31 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t) return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t); } -/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD - set. */ -aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags extra_flags) +/* Temporarily set FLAGS as the enabled target features. */ +aarch64_target_switcher::aarch64_target_switcher (aarch64_feature_flags flags) : m_old_asm_isa_flags (aarch64_asm_isa_flags), - m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY) + m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY), + m_old_target_pragma (current_target_pragma) { + /* Include all dependencies. */ + flags = aarch64_get_required_features (flags); + /* Changing the ISA flags should be enough here. We shouldn't need to pay the compile-time cost of a full target switch. */ - global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY; - aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags); + if (flags & AARCH64_FL_FP) + global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY; + aarch64_set_asm_isa_flags (flags); + + /* Target pragmas are irrelevant when defining intrinsics artificially. */ + current_target_pragma = NULL_TREE; } -aarch64_simd_switcher::~aarch64_simd_switcher () +aarch64_target_switcher::~aarch64_target_switcher () { if (m_old_general_regs_only) global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY; aarch64_set_asm_isa_flags (m_old_asm_isa_flags); + current_target_pragma = m_old_target_pragma; } /* Implement #pragma GCC aarch64 "arm_neon.h". @@ -1903,7 +1911,7 @@ aarch64_simd_switcher::~aarch64_simd_switcher () void handle_arm_neon_h (void) { - aarch64_simd_switcher simd; + aarch64_target_switcher switcher (AARCH64_FL_SIMD); /* Register the AdvSIMD vector tuple types. */ for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++) @@ -2353,6 +2361,8 @@ aarch64_init_data_intrinsics (void) void handle_arm_acle_h (void) { + aarch64_target_switcher switcher; + aarch64_init_ls64_builtins (); aarch64_init_tme_builtins (); aarch64_init_memtag_builtins (); @@ -2446,7 +2456,7 @@ aarch64_general_init_builtins (void) aarch64_init_bf16_types (); { - aarch64_simd_switcher simd; + aarch64_target_switcher switcher (AARCH64_FL_SIMD); aarch64_init_simd_builtins (); } diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4235f4a0ca51af49c2852a420f1056727b24f345..3a809d10fa8ce2340672c4eb38168260f2c7d4e0 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -733,15 +733,16 @@ const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1; /* RAII class for enabling enough features to define built-in types and implement the arm_neon.h pragma. */ -class aarch64_simd_switcher +class aarch64_target_switcher { public: - aarch64_simd_switcher (aarch64_feature_flags extra_flags = 0); - ~aarch64_simd_switcher (); + aarch64_target_switcher (aarch64_feature_flags flags = 0); + ~aarch64_target_switcher (); private: aarch64_feature_flags m_old_asm_isa_flags; bool m_old_general_regs_only; + tree m_old_target_pragma; }; /* Represents the ISA requirements of an intrinsic function, or of some @@ -1190,6 +1191,7 @@ void aarch64_set_asm_isa_flags (aarch64_feature_flags); void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags); bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, const struct cl_decoded_option *, location_t); +aarch64_feature_flags aarch64_get_required_features (aarch64_feature_flags); void aarch64_print_hint_for_extensions (const char *); void aarch64_print_hint_for_arch (const char *); void aarch64_print_hint_for_core (const char *); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index 54d213dfe6e0e1cd95e932fc4a04e9cd360f15f5..ea19cfe47bec042e0fb0b4f3c3820b2d37bb222f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -824,11 +824,11 @@ public: /* RAII class for enabling enough SVE features to define the built-in types and implement the arm_sve.h pragma. */ -class sve_switcher : public aarch64_simd_switcher +class sve_target_switcher : public aarch64_target_switcher { public: - sve_switcher (aarch64_feature_flags = 0); - ~sve_switcher (); + sve_target_switcher (aarch64_feature_flags = 0); + ~sve_target_switcher (); private: unsigned int m_old_maximum_field_alignment; @@ -836,10 +836,10 @@ private: }; /* Extends sve_switch enough for defining arm_sme.h. */ -class sme_switcher : public sve_switcher +class sme_target_switcher : public sve_target_switcher { public: - sme_switcher () : sve_switcher (AARCH64_FL_SME) {} + sme_target_switcher () : sve_target_switcher (AARCH64_FL_SME) {} }; extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1]; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 5d2062726d6bab31652bc9fa4bbd597704ef46e5..8e9cb5cea2de1d51a853900f9002550606805052 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1296,8 +1296,8 @@ registered_function_hasher::equal (value_type value, const compare_type &key) return value->instance == key; } -sve_switcher::sve_switcher (aarch64_feature_flags flags) - : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags) +sve_target_switcher::sve_target_switcher (aarch64_feature_flags flags) + : aarch64_target_switcher (AARCH64_FL_SVE | flags) { /* Changing the ISA flags and have_regs_of_mode should be enough here. We shouldn't need to pay the compile-time cost of a full target @@ -1312,7 +1312,7 @@ sve_switcher::sve_switcher (aarch64_feature_flags flags) have_regs_of_mode[i] = true; } -sve_switcher::~sve_switcher () +sve_target_switcher::~sve_target_switcher () { memcpy (have_regs_of_mode, m_old_have_regs_of_mode, sizeof (have_regs_of_mode)); @@ -4726,7 +4726,7 @@ register_builtin_types () void init_builtins () { - sve_switcher sve; + sve_target_switcher switcher; register_builtin_types (); if (in_lto_p) { @@ -4842,7 +4842,7 @@ handle_arm_sve_h (bool function_nulls_p) return; } - sve_switcher sve; + sve_target_switcher switcher; /* Define the vector and tuple types. */ for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i) @@ -4873,6 +4873,8 @@ handle_arm_neon_sve_bridge_h (bool function_nulls_p) if (initial_indexes[arm_sme_handle] == 0) handle_arm_sme_h (true); + aarch64_target_switcher switcher; + /* Define the functions. */ function_builder builder (arm_neon_sve_handle, function_nulls_p); for (unsigned int i = 0; i < ARRAY_SIZE (neon_sve_function_groups); ++i) @@ -4900,7 +4902,7 @@ handle_arm_sme_h (bool function_nulls_p) return; } - sme_switcher sme; + sme_target_switcher switcher; function_builder builder (arm_sme_handle, function_nulls_p); for (unsigned int i = 0; i < ARRAY_SIZE (sme_function_groups); ++i)