Hi. When having a pair of target clones where foo calls bar, if the target attribute are equal we can redirect the call and not use ifunc dispatcher.
Patch survives regression tests on x86_64-linux-gnu. Ready for trunk? Martin gcc/ChangeLog: 2018-10-04 Martin Liska <mli...@suse.cz> PR ipa/82625 * multiple_target.c (redirect_to_specific_clone): New function. (ipa_target_clone): Use it. * tree-inline.c: Fix comment. gcc/testsuite/ChangeLog: 2018-10-04 Martin Liska <mli...@suse.cz> PR ipa/82625 * g++.dg/ext/pr82625.C: New test. --- gcc/multiple_target.c | 51 ++++++++++++++++++++++++++++++ gcc/testsuite/g++.dg/ext/pr82625.C | 36 +++++++++++++++++++++ gcc/tree-inline.c | 2 +- 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/ext/pr82625.C
diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c index a610d9a3345..2d892f201c5 100644 --- a/gcc/multiple_target.c +++ b/gcc/multiple_target.c @@ -451,6 +451,54 @@ expand_target_clones (struct cgraph_node *node, bool definition) return ret; } +/* When NODE is a target clone, consider all callees and redirect + to a clone with equal target attributes. That prevents multiple + multi-versioning dispatches and a call-chain can be optimized. */ + +static void +redirect_to_specific_clone (cgraph_node *node) +{ + cgraph_function_version_info *fv = node->function_version (); + if (fv == NULL) + return; + + tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES (node->decl)); + if (attr_target == NULL_TREE) + return; + + /* We need to remember NEXT_CALLER as it could be modified in the loop. */ + for (cgraph_edge *e = node->callees; e ; e = e->next_callee) + { + cgraph_function_version_info *fv2 = e->callee->function_version (); + if (!fv2) + continue; + + tree attr_target2 = lookup_attribute ("target", + DECL_ATTRIBUTES (e->callee->decl)); + + /* Function is not calling proper target clone. */ + if (!attribute_list_equal (attr_target, attr_target2)) + { + while (fv2->prev != NULL) + fv2 = fv2->prev; + + /* Try to find a clone with equal target attribute. */ + for (; fv2 != NULL; fv2 = fv2->next) + { + cgraph_node *callee = fv2->this_node; + attr_target2 = lookup_attribute ("target", + DECL_ATTRIBUTES (callee->decl)); + if (attribute_list_equal (attr_target, attr_target2)) + { + e->redirect_callee (callee); + e->redirect_call_stmt_to_callee (); + break; + } + } + } + } +} + static unsigned int ipa_target_clone (void) { @@ -464,6 +512,9 @@ ipa_target_clone (void) for (unsigned i = 0; i < to_dispatch.length (); i++) create_dispatcher_calls (to_dispatch[i]); + FOR_EACH_FUNCTION (node) + redirect_to_specific_clone (node); + return 0; } diff --git a/gcc/testsuite/g++.dg/ext/pr82625.C b/gcc/testsuite/g++.dg/ext/pr82625.C new file mode 100644 index 00000000000..47bd2df1104 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/pr82625.C @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +__attribute__ ((target ("default"))) +static unsigned foo(const char *buf, unsigned size) { + return 1; +} + +__attribute__ ((target ("avx"))) +static unsigned foo(const char *buf, unsigned size) { + return 2; +} + +__attribute__ ((target ("default"))) +unsigned bar() { + char buf[4096]; + unsigned acc = 0; + for (int i = 0; i < sizeof(buf); i++) { + acc += foo(&buf[i], 1); + } + return acc; +} + +__attribute__ ((target ("avx"))) +unsigned bar() { + char buf[4096]; + unsigned acc = 0; + for (int i = 0; i < sizeof(buf); i++) { + acc += foo(&buf[i], 1); + } + return acc; +} + +/* { dg-final { scan-tree-dump-times "return 4096;" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "return 8192;" 1 "optimized" } } */ diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index ff8ee8ce78f..913425394e0 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -2631,7 +2631,7 @@ copy_loops (copy_body_data *id, } } -/* Call cgraph_redirect_edge_call_stmt_to_callee on all calls in BB */ +/* Call redirect_call_stmt_to_callee on all calls in BB. */ void redirect_all_calls (copy_body_data * id, basic_block bb)