Adds an optimisation in FMV to redirect to a specific target if possible. A call is redirected to a specific target if both: - the caller can always call the callee version - and, it is possible to rule out all higher priority versions of the callee fmv set. That is estabilished either by the callee being the highest priority version, or each higher priority version of the callee implying that, were it resolved, a higher priority version of the caller would have been selected.
For this logic, introduces the new TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A hook. Adds a full implementation for Aarch64, and a weaker default version for other targets. This allows the target to replace the previous optimisation as the new one is able to cover the same case where two function sets implement the same versions. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_functions_b_resolvable_from_a): New function. (TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A): New define. * doc/tm.texi: Regenerate. * doc/tm.texi.in: Add documentation for TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A. * multiple_target.cc (redirect_to_specific_clone): Add new optimisation logic. (ipa_target_clone): Remove check for TARGET_HAS_FMV_TARGET_ATTRIBUTE. * target.def: Document new hook.. * attribs.cc: (functions_b_resolvable_from_a) New function. * attribs.h: (functions_b_resolvable_from_a) New function. gcc/testsuite/ChangeLog: * g++.target/aarch64/fmv-selection1.C: New test. * g++.target/aarch64/fmv-selection2.C: New test. * g++.target/aarch64/fmv-selection3.C: New test. * g++.target/aarch64/fmv-selection4.C: New test. * g++.target/aarch64/fmv-selection5.C: New test. * g++.target/aarch64/fmv-selection6.C: New test. * g++.target/aarch64/fmv-selection7.C: New test. --- gcc/attribs.cc | 22 ++++ gcc/attribs.h | 1 + gcc/config/aarch64/aarch64.cc | 28 +++++ gcc/doc/tm.texi | 19 +++ gcc/doc/tm.texi.in | 2 + gcc/multiple_target.cc | 114 ++++++++++++------ gcc/target.def | 22 ++++ .../g++.target/aarch64/fmv-selection1.C | 40 ++++++ .../g++.target/aarch64/fmv-selection2.C | 40 ++++++ .../g++.target/aarch64/fmv-selection3.C | 25 ++++ .../g++.target/aarch64/fmv-selection4.C | 30 +++++ .../g++.target/aarch64/fmv-selection5.C | 28 +++++ .../g++.target/aarch64/fmv-selection6.C | 27 +++++ .../g++.target/aarch64/fmv-selection7.C | 65 ++++++++++ 14 files changed, 425 insertions(+), 38 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection1.C create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection2.C create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection3.C create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection4.C create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection5.C create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection6.C create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection7.C diff --git a/gcc/attribs.cc b/gcc/attribs.cc index 2ca82674f7c..7b6ce6e0020 100644 --- a/gcc/attribs.cc +++ b/gcc/attribs.cc @@ -1095,6 +1095,28 @@ common_function_versions (string_slice fn1 ATTRIBUTE_UNUSED, gcc_unreachable (); } +/* Default implementation of TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A. + Used to check very basically if DECL_B is callable from DECL_A. + For now this checks if the version strings are the same. */ + +bool +functions_b_resolvable_from_a (tree decl_a, tree decl_b, + tree base ATTRIBUTE_UNUSED) +{ + const char *attr_name = TARGET_HAS_FMV_TARGET_ATTRIBUTE + ? "target" + : "target_version"; + + tree attr_a = lookup_attribute (attr_name, DECL_ATTRIBUTES (decl_a)); + tree attr_b = lookup_attribute (attr_name, DECL_ATTRIBUTES (decl_b)); + + gcc_assert (attr_b); + if (!attr_a) + return false; + + return attribute_value_equal (attr_a, attr_b); +} + /* Comparator function to be used in qsort routine to sort attribute specification strings to "target". */ diff --git a/gcc/attribs.h b/gcc/attribs.h index 6e15b48e1ed..74a793458eb 100644 --- a/gcc/attribs.h +++ b/gcc/attribs.h @@ -57,6 +57,7 @@ extern char *sorted_attr_string (tree); extern bool common_function_versions (string_slice, string_slice); extern tree make_dispatcher_decl (const tree); extern bool is_function_default_version (const tree); +extern bool functions_b_resolvable_from_a (tree, tree, tree); extern void handle_ignored_attributes_option (vec<char *> *); /* Return a type like TTYPE except that its TYPE_ATTRIBUTES diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 9dff3ab61e3..30453b46b0d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -20574,6 +20574,30 @@ aarch64_compare_version_priority (tree decl1, tree decl2) return compare_feature_masks (mask1, mask2); } +/* Implement TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A. */ + +bool +aarch64_functions_b_resolvable_from_a (tree decl_a, tree decl_b, tree baseline) +{ + auto baseline_isa = aarch64_get_isa_flags + (TREE_TARGET_OPTION (aarch64_fndecl_options (baseline))); + auto isa_a = baseline_isa; + auto isa_b = baseline_isa; + + auto a_version = get_target_version (decl_a); + auto b_version = get_target_version (decl_b); + if (a_version.is_valid ()) + aarch64_parse_fmv_features (a_version, &isa_a, NULL, NULL); + if (b_version.is_valid ()) + aarch64_parse_fmv_features (b_version, &isa_b, NULL, NULL); + + /* Are there any bits of b that arent in a. */ + if (isa_b & (~isa_a)) + return false; + + return true; +} + /* Build the struct __ifunc_arg_t type: struct __ifunc_arg_t @@ -32786,6 +32810,10 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_COMPARE_VERSION_PRIORITY #define TARGET_COMPARE_VERSION_PRIORITY aarch64_compare_version_priority +#undef TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A +#define TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A \ + aarch64_functions_b_resolvable_from_a + #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ aarch64_generate_version_dispatcher_body diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index df89f751d08..6425e3042e6 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -10982,6 +10982,25 @@ This target hook returns @code{true} if the target/target-version strings @var{fn1} and @var{fn2} imply the same function version. @end deftypefn +@deftypefn {Target Hook} bool TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A (tree @var{decl_a}, tree @var{decl_v}, tree @var{base}) +@var{decl_b} is a function declaration with a function multi-versioning +(FMV) attribute; this attribute is either @code{target} or +@code{target_version}, depending on @code{TARGET_HAS_FMV_TARGET_ATTRIBUTE}. +@var{decl_a} is a function declaration that may or may not have an FMV +attribute. + +Return true if we have enough information to determine that the +requirements of @var{decl_b}'s FMV attribute are met whenever @var{decl_a} +is executed, given that the target supports all features required by +function declaration @var{base}. + +The default implementation just checks whether @var{decl_a} has the same +FMV attribute as @var{decl_b}. This is conservatively correct, +but ports can do better by taking the relationships between architecture +features into account. For example, on AArch64, @code{sve} is present +whenever @code{sve2} is present. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_CAN_INLINE_P (tree @var{caller}, tree @var{callee}) This target hook returns @code{false} if the @var{caller} function cannot inline @var{callee}, based on target specific information. By diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index c2e7921192c..5d33158bd48 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7142,6 +7142,8 @@ with the target specific attributes. The default value is @code{','}. @hook TARGET_OPTION_FUNCTION_VERSIONS +@hook TARGET_OPTION_FUNCTIONS_B_RESOLVABLE_FROM_A + @hook TARGET_CAN_INLINE_P @hook TARGET_UPDATE_IPA_FN_TARGET_INFO diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc index e85d7e71442..7a0d22cfc78 100644 --- a/gcc/multiple_target.cc +++ b/gcc/multiple_target.cc @@ -423,61 +423,100 @@ expand_target_clones (struct cgraph_node *node, bool definition) return true; } -/* When NODE is a target clone, consider all callees and redirect - to a clone with equal target attributes. That prevents multiple - multi-versioning dispatches and a call-chain can be optimized. - - This optimisation might pick the wrong version in some cases, since knowing - that we meet the target requirements for a matching callee version does not - tell us that we won't also meet the target requirements for a higher - priority callee version at runtime. Since this is longstanding behaviour - for x86 and powerpc, we preserve it for those targets, but skip the optimisation - for targets that use the "target_version" attribute for multi-versioning. */ +/* When NODE is part of an FMV function set, consider all callees and check if + any can provably always resolve a certain version and then call that version + directly. */ static void redirect_to_specific_clone (cgraph_node *node) { - cgraph_function_version_info *fv = node->function_version (); - if (fv == NULL) - return; - - gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE); - tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl)); - if (attr_target == NULL_TREE) + if (!targetm.compare_version_priority || !optimize) return; /* We need to remember NEXT_CALLER as it could be modified in the loop. */ for (cgraph_edge *e = node->callees; e ; e = e->next_callee) { - cgraph_function_version_info *fv2 = e->callee->function_version (); - if (!fv2) + /* Only if this is a call to a dispatched symbol. */ + if (!e->callee->dispatcher_function) continue; - tree attr_target2 = lookup_attribute ("target", - DECL_ATTRIBUTES (e->callee->decl)); + cgraph_function_version_info *callee_v + = e->callee->function_version (); + cgraph_function_version_info *caller_v + = e->caller->function_version (); + + gcc_assert (callee_v); - /* Function is not calling proper target clone. */ - if (attr_target2 == NULL_TREE - || !attribute_value_equal (attr_target, attr_target2)) + /* Find the default nodes for both callee and caller (if present). */ + cgraph_function_version_info *callee_default_v = callee_v->next; + cgraph_function_version_info *caller_default_v = caller_v; + if (caller_v) { - while (fv2->prev != NULL) - fv2 = fv2->prev; + while (caller_default_v->prev) + caller_default_v = caller_default_v->prev; + if (!is_function_default_version (caller_default_v->this_node->decl)) + caller_default_v = NULL; + } + + /* If this is not the TU that contains the definition of the default + version we are not guaranteed to have visibility of all versions + so cannot reason about them. */ + if (!callee_default_v + || !callee_default_v->this_node->binds_to_current_def_p ()) + continue; + + cgraph_function_version_info *highest_callable_fn = NULL; + for (cgraph_function_version_info *ver = callee_v->next; + ver; + ver = ver->next) + if (targetm.target_option.functions_b_resolvable_from_a + (node->decl, ver->this_node->decl, node->decl)) + highest_callable_fn = ver; - /* Try to find a clone with equal target attribute. */ - for (; fv2 != NULL; fv2 = fv2->next) + if (!highest_callable_fn) + continue; + + bool inlinable = true; + + /* If there are higher priority versions of callee and caller has no + more version information, then not callable. */ + if (highest_callable_fn->next) + { + /* If this is not the decl where the callee default is defined then + cannot reason about the caller versions. */ + if (!caller_default_v + || !caller_default_v->this_node->binds_to_current_def_p ()) + continue; + + /* If every higher priority version would imply a higher priority + version of caller would have been selected, then this is + callable. */ + for (cgraph_function_version_info *callee_ver + = highest_callable_fn->next; + callee_ver; callee_ver = callee_ver->next) { - cgraph_node *callee = fv2->this_node; - attr_target2 = lookup_attribute ("target", - DECL_ATTRIBUTES (callee->decl)); - if (attr_target2 != NULL_TREE - && attribute_value_equal (attr_target, attr_target2)) + bool is_possible = true; + for (cgraph_function_version_info *caller_ver = caller_v->next; + caller_ver; caller_ver = caller_ver->next) + if (targetm.target_option.functions_b_resolvable_from_a + (callee_ver->this_node->decl, caller_ver->this_node->decl, + node->decl)) + { + is_possible = false; + break; + } + if (is_possible) { - e->redirect_callee (callee); - cgraph_edge::redirect_call_stmt_to_callee (e); + inlinable = false; break; } } } + if (inlinable) + { + e->redirect_callee (highest_callable_fn->this_node); + cgraph_edge::redirect_call_stmt_to_callee (e); + } } } @@ -566,9 +605,8 @@ ipa_target_clone (bool early) for (unsigned i = 0; i < to_dispatch.length (); i++) create_dispatcher_calls (to_dispatch[i]); - if (TARGET_HAS_FMV_TARGET_ATTRIBUTE) - FOR_EACH_FUNCTION (node) - redirect_to_specific_clone (node); + FOR_EACH_FUNCTION (node) + redirect_to_specific_clone (node); return 0; } diff --git a/gcc/target.def b/gcc/target.def index 9c51c1989dd..492ad676274 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -6934,6 +6934,28 @@ DEFHOOK bool, (string_slice fn1, string_slice fn2), hook_stringslice_stringslice_unreachable) +/* Checks if we can be certain that function DECL_A could resolve DECL_B. */ +DEFHOOK +(functions_b_resolvable_from_a, + "@var{decl_b} is a function declaration with a function multi-versioning\n\ +(FMV) attribute; this attribute is either @code{target} or\n\ +@code{target_version}, depending on @code{TARGET_HAS_FMV_TARGET_ATTRIBUTE}.\n\ +@var{decl_a} is a function declaration that may or may not have an FMV\n\ +attribute.\n\ +\n\ +Return true if we have enough information to determine that the\n\ +requirements of @var{decl_b}'s FMV attribute are met whenever @var{decl_a}\n\ +is executed, given that the target supports all features required by\n\ +function declaration @var{base}.\n\ +\n\ +The default implementation just checks whether @var{decl_a} has the same\n\ +FMV attribute as @var{decl_b}. This is conservatively correct,\n\ +but ports can do better by taking the relationships between architecture\n\ +features into account. For example, on AArch64, @code{sve} is present\n\ +whenever @code{sve2} is present.", + bool, (tree decl_a, tree decl_v, tree base), + functions_b_resolvable_from_a) + /* Function to determine if one function can inline another function. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_" diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection1.C b/gcc/testsuite/g++.target/aarch64/fmv-selection1.C new file mode 100644 index 00000000000..4ee54466c13 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection1.C @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a" } */ + +__attribute__((target_version("default"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 1; +} + +__attribute__((target_version("rng"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 2; +} + +__attribute__((target_version("flagm"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 3; +} + +__attribute__((target_version("rng+flagm"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 4; +} + +int bar() +{ + return foo (); +} + +/* Cannot optimize */ +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\n" 1 } } */ + diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection2.C b/gcc/testsuite/g++.target/aarch64/fmv-selection2.C new file mode 100644 index 00000000000..f580dac4458 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection2.C @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a+rng+flagm" } */ + +__attribute__((target_version("default"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 1; +} + +__attribute__((target_version("rng"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 2; +} + +__attribute__((target_version("flagm"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 3; +} + +__attribute__((target_version("rng+flagm"))) +__attribute__((optimize("O0"))) +int foo () +{ + return 4; +} + +int bar() +{ + return foo (); +} + +/* Can optimize to highest priority function */ +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\._MrngMflagm\n" 1 } } */ + diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection3.C b/gcc/testsuite/g++.target/aarch64/fmv-selection3.C new file mode 100644 index 00000000000..6b52fd4f644 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection3.C @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a" } */ + +__attribute__((target_version("default"))) +__attribute__((optimize("O0"))) +int foo () +{ return 1; } + +__attribute__((target_version("rng"))) +int foo (); +__attribute__((target_version("flagm"))) +int foo (); +__attribute__((target_version("rng+flagm"))) +int foo (); + +__attribute__((target_version("rng+flagm"))) +int bar() +{ + return foo (); +} + +/* Cannot optimize */ +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\._MrngMflagm\n" 1 } } */ + diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection4.C b/gcc/testsuite/g++.target/aarch64/fmv-selection4.C new file mode 100644 index 00000000000..155145dcd88 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection4.C @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a" } */ + +__attribute__((target_version("default"))) +__attribute__((optimize("O0"))) +int foo () +{ return 1; } + +__attribute__((target_version("rng"))) +int foo (); +__attribute__((target_version("flagm"))) +int foo (); +__attribute__((target_version("rng+flagm"))) +int foo (); + +__attribute__((target_version("default"))) +int bar() +{ + return foo (); +} + +__attribute__((target_version("rng"))) +int bar(); + +__attribute__((target_version("flagm"))) +int bar(); + +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\.default\n" 1 } } */ + diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection5.C b/gcc/testsuite/g++.target/aarch64/fmv-selection5.C new file mode 100644 index 00000000000..4d6d38e3754 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection5.C @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a" } */ + +__attribute__((target_version("default"))) +__attribute__((optimize("O0"))) +int foo () +{ return 1; } + +__attribute__((target_version("rng"))) +int foo (); +__attribute__((target_version("flagm"))) +int foo (); +__attribute__((target_version("rng+flagm"))) +int foo (); + +__attribute__((target_version("default"))) +int bar() +{ + return foo (); +} + +__attribute__((target_version("flagm"))) +int bar(); + +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\.default\n" 0 } } */ +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\n" 1 } } */ + diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection6.C b/gcc/testsuite/g++.target/aarch64/fmv-selection6.C new file mode 100644 index 00000000000..db384e16c09 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection6.C @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a+rng" } */ + +__attribute__((target_version("default"))) +__attribute__((optimize("O0"))) +int foo () +{ return 1; } + +__attribute__((target_version("rng"))) +int foo (); +__attribute__((target_version("flagm"))) +int foo (); +__attribute__((target_version("rng+flagm"))) +int foo (); + +__attribute__((target_version("default"))) +int bar() +{ + return foo (); +} + +__attribute__((target_version("flagm"))) +int bar(); + +/* { dg-final { scan-assembler-times "\n\tb\t_Z3foov\._Mrng\n" 1 } } */ + diff --git a/gcc/testsuite/g++.target/aarch64/fmv-selection7.C b/gcc/testsuite/g++.target/aarch64/fmv-selection7.C new file mode 100644 index 00000000000..41e7462ebb1 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/fmv-selection7.C @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -march=armv8-a" } */ + +[[gnu::optimize("O0")]] +[[gnu::target_version ("default")]] +int bar () { + return 1; +} + +[[gnu::optimize("O0")]] +[[gnu::target ("+sve2")]] +[[gnu::target_version ("sve")]] +int bar (); + +[[gnu::target ("+sve")]] +int foo () { + return bar(); +} + +/* { dg-final { scan-assembler-times "\n\tb\t_Z3barv\._Msve\n" 1 } } */ + +[[gnu::target_version ("default")]] +int bar2 () { + return 1; +} + +[[gnu::target_version ("sve2")]] +int bar2 (); + +[[gnu::target_version ("default")]] +int foo2 (); + +[[gnu::target_version ("sve")]] +[[gnu::target ("+sve2")]] +int foo2 () { + return bar2(); +} + +/* { dg-final { scan-assembler-times "\n\tb\t_Z4bar2v\._Msve2\n" 1 } } */ + +[[gnu::target_version ("default")]] +int bar3 () { + return 1; +} + +[[gnu::target_version ("sve")]] +int bar3 (); + +[[gnu::target ("+rng")]] +[[gnu::target_version ("sve2")]] +int bar3 (); + +[[gnu::target_version ("default")]] +int foo3 (); + +[[gnu::target_version ("sve")]] +int foo3 () { + return bar3 (); +} + +[[gnu::target_version ("sve2+rng")]] +int foo3 (); + +/* { dg-final { scan-assembler-times "\n\tb\t_Z4bar3v\n" 1 } } */ -- 2.34.1 -- Alfie Richards