[gcc r15-4620] c++/modules: Prevent maybe_clone_decl being called multiple times [PR115007]
https://gcc.gnu.org/g:058ed8705a7b38bef2c107b6ff5de243aebd57b4 commit r15-4620-g058ed8705a7b38bef2c107b6ff5de243aebd57b4 Author: Nathaniel Shead Date: Wed Aug 21 00:50:53 2024 +1000 c++/modules: Prevent maybe_clone_decl being called multiple times [PR115007] The ICE in the linked PR is caused because maybe_clone_decl is not prepared to be called on a declaration that has already had clones created; what happens otherwise is that start_preparsed_function early exits and never sets up cfun, causing a segfault later on. To fix this we ensure that post_load_processing only calls maybe_clone_decl if TREE_ASM_WRITTEN has not been marked on the declaration yet, and (if maybe_clone_decls succeeds) marks this flag on the decl so that it doesn't get called again later when finalising deferred vague linkage declarations in c_parse_final_cleanups. As a bonus this now allows us to only keep the DECL_SAVED_TREE around in expand_or_defer_fn_1 for modules which have CMIs, which will have benefits for LTO performance in non-interface TUs. For clarity we also update the streaming code to do post_load_decls for maybe in-charge cdtors rather than any DECL_ABSTRACT_P declaration, as this is more accurate to the decls affected by maybe_clone_body. PR c++/115007 gcc/cp/ChangeLog: * module.cc (module_state::read_cluster): Replace DECL_ABSTRACT_P with DECL_MAYBE_IN_CHARGE_CDTOR_P. (post_load_processing): Check and mark TREE_ASM_WRITTEN. * semantics.cc (expand_or_defer_fn_1): Use the more specific module_maybe_has_cmi_p instead of modules_p. gcc/testsuite/ChangeLog: * g++.dg/modules/virt-6_a.C: New test. * g++.dg/modules/virt-6_b.C: New test. Signed-off-by: Nathaniel Shead Diff: --- gcc/cp/module.cc| 7 --- gcc/cp/semantics.cc | 2 +- gcc/testsuite/g++.dg/modules/virt-6_a.C | 13 + gcc/testsuite/g++.dg/modules/virt-6_b.C | 6 ++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index fd9b1d3bf2e1..d7494cc813a0 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -15539,7 +15539,7 @@ module_state::read_cluster (unsigned snum) if (abstract) ; - else if (DECL_ABSTRACT_P (decl)) + else if (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl)) vec_safe_push (post_load_decls, decl); else { @@ -17958,10 +17958,11 @@ post_load_processing () dump () && dump ("Post-load processing of %N", decl); - gcc_checking_assert (DECL_ABSTRACT_P (decl)); + gcc_checking_assert (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl)); /* Cloning can cause loading -- specifically operator delete for the deleting dtor. */ - maybe_clone_body (decl); + if (!TREE_ASM_WRITTEN (decl) && maybe_clone_body (decl)) + TREE_ASM_WRITTEN (decl) = 1; } cfun = old_cfun; diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 8c1a8b3c68dc..266fba11eec8 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -5242,7 +5242,7 @@ expand_or_defer_fn_1 (tree fn) demand, so we also need to keep the body. Otherwise we don't need it anymore. */ if (!DECL_DECLARED_CONSTEXPR_P (fn) - && !(modules_p () && vague_linkage_p (fn))) + && !(module_maybe_has_cmi_p () && vague_linkage_p (fn))) DECL_SAVED_TREE (fn) = NULL_TREE; return false; } diff --git a/gcc/testsuite/g++.dg/modules/virt-6_a.C b/gcc/testsuite/g++.dg/modules/virt-6_a.C new file mode 100644 index ..68e466ace3ff --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/virt-6_a.C @@ -0,0 +1,13 @@ +// PR c++/115007 +// { dg-additional-options "-fmodules-ts -Wno-global-module" } +// { dg-module-cmi M:a } + +module; +struct S { + virtual ~S() = default; + virtual void f() = 0; +}; +module M:a; +extern S* p; +template void format(T) { p->~S(); } +template void format(int); diff --git a/gcc/testsuite/g++.dg/modules/virt-6_b.C b/gcc/testsuite/g++.dg/modules/virt-6_b.C new file mode 100644 index ..c53f5fac742b --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/virt-6_b.C @@ -0,0 +1,6 @@ +// PR c++/115007 +// { dg-additional-options "-fmodules-ts" } +// { dg-module-cmi M } + +export module M; +import :a;
[gcc r15-4621] c++/modules: Support decloned cdtors
https://gcc.gnu.org/g:29efc621b7c66ec67d10fc87cddbb3f1ab709fb2 commit r15-4621-g29efc621b7c66ec67d10fc87cddbb3f1ab709fb2 Author: Nathaniel Shead Date: Wed Aug 21 01:08:36 2024 +1000 c++/modules: Support decloned cdtors When compiling with '-fdeclone-ctor-dtor' (enabled by default with -Os), we run into issues where we don't correctly emit the underlying functions. We also need to ensure that COMDAT constructors are marked as such before 'maybe_clone_body' attempts to propagate COMDAT groups to the new thunks. gcc/cp/ChangeLog: * module.cc (post_load_processing): Mark COMDAT as needed, emit declarations if maybe_clone_body fails. gcc/testsuite/ChangeLog: * g++.dg/modules/clone-2_a.C: New test. * g++.dg/modules/clone-2_b.C: New test. * g++.dg/modules/clone-3_a.C: New test. * g++.dg/modules/clone-3_b.C: New test. Signed-off-by: Nathaniel Shead Diff: --- gcc/cp/module.cc | 20 gcc/testsuite/g++.dg/modules/clone-2_a.C | 7 +++ gcc/testsuite/g++.dg/modules/clone-2_b.C | 5 + gcc/testsuite/g++.dg/modules/clone-3_a.C | 9 + gcc/testsuite/g++.dg/modules/clone-3_b.C | 8 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index d7494cc813a0..90ad67daf72b 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -17959,10 +17959,22 @@ post_load_processing () dump () && dump ("Post-load processing of %N", decl); gcc_checking_assert (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl)); - /* Cloning can cause loading -- specifically operator delete for -the deleting dtor. */ - if (!TREE_ASM_WRITTEN (decl) && maybe_clone_body (decl)) - TREE_ASM_WRITTEN (decl) = 1; + + if (DECL_COMDAT (decl)) + comdat_linkage (decl); + if (!TREE_ASM_WRITTEN (decl)) + { + /* Cloning can cause loading -- specifically operator delete for +the deleting dtor. */ + if (maybe_clone_body (decl)) + TREE_ASM_WRITTEN (decl) = 1; + else + { + /* We didn't clone the cdtor, make sure we emit it. */ + note_vague_linkage_fn (decl); + cgraph_node::finalize_function (decl, true); + } + } } cfun = old_cfun; diff --git a/gcc/testsuite/g++.dg/modules/clone-2_a.C b/gcc/testsuite/g++.dg/modules/clone-2_a.C new file mode 100644 index ..47e21581fdc5 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/clone-2_a.C @@ -0,0 +1,7 @@ +// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" } +// { dg-module-cmi M } + +export module M; +export struct S { + inline S(int) {} +}; diff --git a/gcc/testsuite/g++.dg/modules/clone-2_b.C b/gcc/testsuite/g++.dg/modules/clone-2_b.C new file mode 100644 index ..80c1e149518b --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/clone-2_b.C @@ -0,0 +1,5 @@ +// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" } + +import M; + +S s(0); diff --git a/gcc/testsuite/g++.dg/modules/clone-3_a.C b/gcc/testsuite/g++.dg/modules/clone-3_a.C new file mode 100644 index ..87de746f5c2c --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/clone-3_a.C @@ -0,0 +1,9 @@ +// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" } +// { dg-module-cmi M } + +export module M; + +struct A {}; +export struct B : virtual A { + inline B (int) {} +}; diff --git a/gcc/testsuite/g++.dg/modules/clone-3_b.C b/gcc/testsuite/g++.dg/modules/clone-3_b.C new file mode 100644 index ..23c9ac4a8046 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/clone-3_b.C @@ -0,0 +1,8 @@ +// { dg-module-do link } +// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" } + +import M; + +int main() { + B b(0); +}
[gcc/aoliva/heads/testme] (6 commits) ifcombine across noncontiguous blocks
The branch 'aoliva/heads/testme' was updated to point to: 1070f7874f55... ifcombine across noncontiguous blocks It previously pointed to: f7a9315f62c2... ifcombine across noncontiguous blocks Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- f7a9315... ifcombine across noncontiguous blocks 11bf09a... introduce ifcombine_replace_cond d586ea3... adjust update_profile_after_ifcombine for noncontiguous ifc 3e47b27... introduce ifcombine_replace_cond ebc7c0f... drop redundant ifcombine_ifandif parm 6c0e7c5... allow vuses in ifcombine blocks Summary of changes (added commits): --- 1070f78... ifcombine across noncontiguous blocks cb75ab7... extend ifcombine_replace_cond to handle noncontiguous ifcom b7de5a6... adjust update_profile_after_ifcombine for noncontiguous ifc 5b44192... introduce ifcombine_replace_cond db11a63... drop redundant ifcombine_ifandif parm 683dabf... allow vuses in ifcombine blocks
[gcc(refs/users/aoliva/heads/testme)] drop redundant ifcombine_ifandif parm
https://gcc.gnu.org/g:db11a63b19b436accc5f0b4afebec10c5ab8aae6 commit db11a63b19b436accc5f0b4afebec10c5ab8aae6 Author: Alexandre Oliva Date: Thu Oct 24 05:25:24 2024 -0300 drop redundant ifcombine_ifandif parm In preparation to changes that may modify both inner and outer conditions in ifcombine, drop the redundant parameter result_inv, that is always identical to inner_inv. for gcc/ChangeLog * tree-ssa-ifcombine.cc (ifcombine_ifandif): Drop redundant result_inv parm. Adjust all callers. Diff: --- gcc/tree-ssa-ifcombine.cc | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index ed20a231951a..0a2ba970548c 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -401,14 +401,14 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. - inner_inv, outer_inv and result_inv indicate whether the conditions - are inverted. + inner_inv, outer_inv indicate whether the conditions are inverted. Returns true if the edges to the common else basic-block were merged. */ static bool ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, - basic_block outer_cond_bb, bool outer_inv, bool result_inv) + basic_block outer_cond_bb, bool outer_inv) { + bool result_inv = inner_inv; gimple_stmt_iterator gsi; tree name1, name2, bit1, bit2, bits1, bits2; @@ -693,8 +693,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false, - false); + return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false); } /* And a version where the outer condition is negated. */ @@ -711,8 +710,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true, - false); + return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true); } /* The || form is characterized by a common then_bb with the @@ -731,8 +729,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true, - true); + return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true); } /* And a version where the outer condition is negated. */ @@ -748,8 +745,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false, - true); + return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false); } return false;
[gcc r15-4607] Record nonzero bits in the irange_bitmask of POLY_INT_CSTs
https://gcc.gnu.org/g:b02503d3fe79778ccc149b0abbc736f7972603da commit r15-4607-gb02503d3fe79778ccc149b0abbc736f7972603da Author: Richard Sandiford Date: Thu Oct 24 14:22:34 2024 +0100 Record nonzero bits in the irange_bitmask of POLY_INT_CSTs At the moment, ranger punts entirely on POLY_INT_CSTs. Numerical ranges are a bit difficult, unless we do start modelling bounds on the indeterminates. But we can at least track the nonzero bits. gcc/ * value-query.cc (range_query::get_tree_range): Use get_nonzero_bits to populate the irange_bitmask of a POLY_INT_CST. gcc/testsuite/ * gcc.target/aarch64/sve/cnt_fold_6.c: New test. Diff: --- gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c | 75 +++ gcc/value-query.cc| 7 +++ 2 files changed, 82 insertions(+) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c new file mode 100644 index ..9d9e1ca93303 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c @@ -0,0 +1,75 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +/* +** f1: +** ... +** cntb(x[0-9]+) +** ... +** add x[0-9]+, \1, #?16 +** ... +** csel[^\n]+ +** ret +*/ +uint64_t +f1 (int x) +{ + uint64_t y = x ? svcnth () : svcnth () + 8; + y >>= 3; + y <<= 4; + return y; +} + +/* +** f2: +** ... +** (?:and|[al]sr) [^\n]+ +** ... +** ret +*/ +uint64_t +f2 (int x) +{ + uint64_t y = x ? svcnth () : svcnth () + 8; + y >>= 4; + y <<= 5; + return y; +} + +/* +** f3: +** ... +** cntw(x[0-9]+) +** ... +** add x[0-9]+, \1, #?16 +** ... +** csel[^\n]+ +** ret +*/ +uint64_t +f3 (int x) +{ + uint64_t y = x ? svcntd () : svcntd () + 8; + y >>= 1; + y <<= 2; + return y; +} + +/* +** f4: +** ... +** (?:and|[al]sr) [^\n]+ +** ... +** ret +*/ +uint64_t +f4 (int x) +{ + uint64_t y = x ? svcntd () : svcntd () + 8; + y >>= 2; + y <<= 3; + return y; +} diff --git a/gcc/value-query.cc b/gcc/value-query.cc index cac2cb5b2bc0..34499da1a987 100644 --- a/gcc/value-query.cc +++ b/gcc/value-query.cc @@ -375,6 +375,13 @@ range_query::get_tree_range (vrange &r, tree expr, gimple *stmt, } default: + if (POLY_INT_CST_P (expr)) + { + unsigned int precision = TYPE_PRECISION (type); + r.set_varying (type); + r.update_bitmask ({ wi::zero (precision), get_nonzero_bits (expr) }); + return true; + } break; } if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:a050f30dcecc725fd5b8cdea57101710f030d81a commit a050f30dcecc725fd5b8cdea57101710f030d81a Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Rework ifcombine to support merging conditions from noncontiguous blocks. This depends on earlier preparation changes. The function that attempted to ifcombine a block with its immediate predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks eligible for ifcombine, attempting to combine with them. The function that actually drives the combination of a pair of blocks, tree_ssa_ifcombine_bb_1, now takes an additional parameter: the successor of outer that leads to inner. The function that recognizes if_then_else patterns is modified to enable testing without distinguishing between then and else, or to require nondegenerate conditions, that aren't worth combining with. for gcc/ChangeLog * tree-ssa-ifcombine.cc (recognize_if_then_else): Support relaxed then/else testing; require nondegenerate condition otherwise. (tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it instead of inner_cond_bb. Adjust callers. (tree_ssa_ifcombine_bb): Loop over dominating outer blocks eligible for ifcombine. (pass_tree_ifcombine::execute): Noted potential need for changes to the post-combine logic. Diff: --- gcc/tree-ssa-ifcombine.cc | 140 -- 1 file changed, 111 insertions(+), 29 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index c271d1e86a9b..d7cb75619978 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb) is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t->dest == *then_bb && e->dest == *else_bb) + || (t->dest == *else_bb && e->dest == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -899,19 +908,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-effects. */ if (phi_pred_bb != else_bb - && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb) + && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb) && same_phi_args_p (outer_cond_bb, phi_pred_bb, el
[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:a8ecc1a3ff1faece3363d50d7c501123c2be6a5b commit a8ecc1a3ff1faece3363d50d7c501123c2be6a5b Author: Michael Meissner Date: Thu Oct 24 12:26:28 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 143 +++--- 1 file changed, 126 insertions(+), 17 deletions(-) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index fe43d0cb19a8..de75ac6f0e81 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -1,18 +1,8 @@ - Branch work182-sha, patch #402 - -Add missing test. - -2024-10-16 Michael Meissner - -gcc/testsuite/ - - * gcc.target/powerpc/vector-rotate-left.c: New test. - - Branch work182-sha, patch #401 + Branch work182-sha, patch #411 was reverted Add potential p-future XVRLD and XVRLDI instructions. -2024-10-16 Michael Meissner +2024-10-24 Michael Meissner gcc/ @@ -24,11 +14,128 @@ gcc/ * config/rs6000/rs6000.md (isa attribute): Add xvrlw. (enabled attribute): Add support for xvrlw. - Branch work182-sha, patch #400 +gcc/testsuite/ + + * gcc.target/powerpc/vector-rotate-left.c: New test. + + Branch work182-sha, patch #410 was reverted + +PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations + +The multibuff.c benchmark attached to the PR target/117251 compiled for Power10 +PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14 +compared to GCC 11 - GCC 13, due to excessive amounts of spilling. + +The main function for the multibuf.c file has 3,747 lines, all of which are +using vector unsigned long long. There are 696 vector rotates (all rotates are +constant), 1,824 vector xor's and 600 vector andc's. + +In looking at it, the main thing that steps out is the reason for either +spilling or moving variables is the support in fusion.md (generated by +genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and other +vec_xor's feeding into vec_xor. + +On the powerpc for power10, there is a special fusion mode that happens if the +machine has a VANDC or VXOR instruction that is adjacent to a VXOR instruction +and the VANDC/VXOR feeds into the 2nd VXOR instruction. + +While the Power10 has 64 vector registers (which uses the XXL prefix to do +logical operations), the fusion only works with the older Altivec instruction +set (which uses the V prefix). The Altivec instruction only has 32 vector +registers (which are overlaid over the VSX vector registers 32-63). + +By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do this +fusion, it means that the register allocator has more register pressure for the +traditional Altivec registers instead of the VSX registers. + +In addition, since there are vector rotates, these rotates only work on the +traditional Altivec registers, which adds to the Altivec register pressure. + +Finally in addition to doing the explicit xor, andc, and rotates using the +Altivec registers, we have to also load vector constants for the rotate amount +and these registers also are allocated as Altivec registers. -Initial support for adding xxeval fusion support. +Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 has +many more vector moves that the later compilers. Thus even though it has way +less spills, the vector moves are why GCC 11 have the slowest results. -2024-10-16 Michael Meissner +There is an instruction that was added in power10 (XXEVAL) that does provide +fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion. + +The latency of XXEVAL is slightly more than the fused VANDC/VXOR or VXOR/VXOR, +so I have written the patch to prefer doing the Altivec instructions if they +don't need a temporary register. + +Here are the results for adding support for XXEVAL for the multibuff.c +benchmark attached to the PR. Note that we essentially recover the speed with +this patch that were lost with GCC 14 and the current trunk: + + XXEVALTrunk GCC14 GCC13 GCC12GCC11 + --- - - -- +Benchmark time in seconds 5.53 6.156.265.575.61 9.56 + +Fuse VANDC -> VXOR 209 600 600 600 600 600 +Fuse VXOR -> VXOR 0 240 240 120 120 120 +XXEVAL to fuse ANDC -> XOR 391 00 0 0 0 +XXEVAL to fuse XOR -> XOR240 00 0 0 0 + +Spill vector to stack 78 364 364 172 184 110 +Load spilled vector from stack 431 962 962 713 723 166 +Vector moves 10 100 100 70 72 3,055 + +Vector rota
[gcc r15-4623] SVE intrinsics: Fold svaba with op1 all zeros to svabd.
https://gcc.gnu.org/g:0b22f0585348335369298c7d39afd171758eebe9 commit r15-4623-g0b22f0585348335369298c7d39afd171758eebe9 Author: Jennifer Schmitz Date: Thu Oct 24 05:11:31 2024 -0700 SVE intrinsics: Fold svaba with op1 all zeros to svabd. Similar to https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665780.html, this patch implements folding of svaba to svabd if op1 is all zeros, resulting in the use of UABD/SABD instructions instead of UABA/SABA. Tests were added to check the produced assembly for use of UABD/SABD, also for the _n case. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ * config/aarch64/aarch64-sve-builtins-sve2.cc (svaba_impl::fold): Fold svaba to svabd if op1 is all zeros. gcc/testsuite/ * gcc.target/aarch64/sve2/acle/asm/aba_s32.c: New tests. * gcc.target/aarch64/sve2/acle/asm/aba_s64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/aba_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/aba_u64.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 18 + .../gcc.target/aarch64/sve2/acle/asm/aba_s32.c | 23 ++ .../gcc.target/aarch64/sve2/acle/asm/aba_s64.c | 22 + .../gcc.target/aarch64/sve2/acle/asm/aba_u32.c | 22 + .../gcc.target/aarch64/sve2/acle/asm/aba_u64.c | 22 + 5 files changed, 107 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index ddd6e466ee3a..d29c2209fdfe 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -80,6 +80,24 @@ unspec_sqrdcmlah (int rot) class svaba_impl : public function_base { +public: + gimple * + fold (gimple_folder &f) const override + { +/* Fold to svabd if op1 is all zeros. */ +tree op1 = gimple_call_arg (f.call, 0); +if (!integer_zerop (op1)) + return NULL; +function_instance instance ("svabd", functions::svabd, + shapes::binary_opt_n, f.mode_suffix_id, + f.type_suffix_ids, GROUP_none, PRED_x); +gcall *call = f.redirect_call (instance); +/* Add a ptrue as predicate, because unlike svaba, svabd is + predicated. */ +gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); +return call; + } + public: rtx expand (function_expander &e) const override diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c index 73c002825267..655ad6302414 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c @@ -108,3 +108,26 @@ TEST_UNIFORM_Z (aba_11_s32_tied2, svint32_t, TEST_UNIFORM_Z (aba_11_s32_untied, svint32_t, z0 = svaba_n_s32 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_s32_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** sabdz0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_s32_zeroop1n, svint32_t, + z0 = svaba_n_s32 (svdup_s32 (0), z1, 11), + z0 = svaba (svdup_s32 (0), z1, 11)) + + +/* +** aba_11_s32_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** sabdz0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_s32_zeroop1, svint32_t, + z0 = svaba_s32 (svdup_s32 (0), z1, svdup_s32 (11)), + z0 = svaba (svdup_s32 (0), z1, svdup_s32 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c index 0c169dbf6136..8b1eb7d2f4e6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_s64_tied2, svint64_t, TEST_UNIFORM_Z (aba_11_s64_untied, svint64_t, z0 = svaba_n_s64 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_s64_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** sabdz0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_s64_zeroop1n, svint64_t, + z0 = svaba_n_s64 (svdup_s64 (0), z1, 11), + z0 = svaba (svdup_s64 (0), z1, 11)) + +/* +** aba_11_s64_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** sabdz0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_s64_zeroop1, svint64_t, + z0 = svaba_s64 (svdup_s64 (0), z1, svdup_s64 (11)), + z0 = svaba (svdup_s64 (0), z1, svdup_s64 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve
[gcc r15-4608] aarch64: libstdc++: Use shufflevector instead of shuffle in opt_random.h
https://gcc.gnu.org/g:cc33531df065c20a52cb0c35637f096978128b46 commit r15-4608-gcc33531df065c20a52cb0c35637f096978128b46 Author: Ricardo Jesus Date: Mon Oct 14 14:28:02 2024 +0100 aarch64: libstdc++: Use shufflevector instead of shuffle in opt_random.h This patch modifies the implementation of the vectorized mersenne twister random number generator to use __builtin_shufflevector instead of __builtin_shuffle. This makes it (almost) compatible with Clang. To make the implementation fully compatible with Clang, Clang will need to support internal Neon types like __Uint8x16_t and __Uint32x4_t, which currently it does not. This looks like an oversight in Clang and so will be addressed separately. I see no codegen change with this patch. Bootstrapped and tested on aarch64-none-linux-gnu. libstdc++-v3/ChangeLog: * config/cpu/aarch64/opt/ext/opt_random.h (__VEXT): Replace uses of __builtin_shuffle with __builtin_shufflevector. (__aarch64_lsl_128): Move shift amount to a template parameter. (__aarch64_lsr_128): Move shift amount to a template parameter. (__aarch64_recursion): Update call sites of __aarch64_lsl_128 and __aarch64_lsr_128. Signed-off-by: Ricardo Jesus Diff: --- .../config/cpu/aarch64/opt/ext/opt_random.h| 28 -- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h index 7f756d1572f3..7eb816abcd00 100644 --- a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h +++ b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h @@ -35,13 +35,13 @@ #ifdef __ARM_NEON #ifdef __ARM_BIG_ENDIAN -# define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \ -{16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \ - 24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C}) +# define __VEXT(_A,_B,_C) __builtin_shufflevector (_A, _B, \ +16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \ +24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C) #else -# define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \ -{_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \ - _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15}) +# define __VEXT(_A,_B,_C) __builtin_shufflevector (_B, _A, \ +_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \ +_C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15) #endif #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ @@ -52,9 +52,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION namespace { // Logical Shift right 128-bits by c * 8 bits -__extension__ extern __inline __Uint32x4_t +__extension__ +template +extern __inline __Uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__aarch64_lsr_128 (__Uint8x16_t __a, __const int __c) +__aarch64_lsr_128 (__Uint8x16_t __a) { const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -64,9 +66,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Logical Shift left 128-bits by c * 8 bits -__extension__ extern __inline __Uint32x4_t +__extension__ +template +extern __inline __Uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__aarch64_lsl_128 (__Uint8x16_t __a, __const int __c) +__aarch64_lsl_128 (__Uint8x16_t __a) { const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -82,14 +86,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __Uint32x4_t __e) { __Uint32x4_t __y = (__b >> __sr1); - __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2); + __Uint32x4_t __z = __aarch64_lsr_128<__sr2> ((__Uint8x16_t) __c); __Uint32x4_t __v = __d << __sl1; __z = __z ^ __a; __z = __z ^ __v; - __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2); + __Uint32x4_t __x = __aarch64_lsl_128<__sl2> ((__Uint8x16_t) __a); __y = __y & __e; __z = __z ^ __x;
[gcc r15-4600] Make more places handle exact_div like trunc_div
https://gcc.gnu.org/g:3e93828e601c62176ea2b4a1dd0b5a1db5657a8e commit r15-4600-g3e93828e601c62176ea2b4a1dd0b5a1db5657a8e Author: Richard Sandiford Date: Thu Oct 24 14:22:30 2024 +0100 Make more places handle exact_div like trunc_div I tried to look for places where we were handling TRUNC_DIV_EXPR more favourably than EXACT_DIV_EXPR. Most of the places that I looked at but didn't change were handling div/mod pairs. But there's bound to be others I missed... gcc/ * match.pd: Extend some rules to handle exact_div like trunc_div. * tree.h (trunc_or_exact_div_p): New function. * tree-ssa-loop-niter.cc (is_rshift_by_1): Use it. * tree-ssa-loop-ivopts.cc (force_expr_to_var_cost): Handle EXACT_DIV_EXPR. Diff: --- gcc/match.pd| 60 +++-- gcc/tree-ssa-loop-ivopts.cc | 2 ++ gcc/tree-ssa-loop-niter.cc | 2 +- gcc/tree.h | 13 ++ 4 files changed, 47 insertions(+), 30 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 0455dfa69937..9024277e5d34 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -492,27 +492,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) of A starting from shift's type sign bit are zero, as (unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL, so it is valid only if A >> 31 is zero. */ -(simplify - (trunc_div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2))) - (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) - && (!VECTOR_TYPE_P (type) - || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) - || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) - && (useless_type_conversion_p (type, TREE_TYPE (@1)) - || (element_precision (type) >= element_precision (TREE_TYPE (@1)) - && (TYPE_UNSIGNED (TREE_TYPE (@1)) - || (element_precision (type) - == element_precision (TREE_TYPE (@1))) - || (INTEGRAL_TYPE_P (type) - && (tree_nonzero_bits (@0) - & wi::mask (element_precision (TREE_TYPE (@1)) - 1, - true, - element_precision (type))) == 0) - (if (!VECTOR_TYPE_P (type) - && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)) - && element_precision (TREE_TYPE (@3)) < element_precision (type)) -(convert (rshift @3 @2)) -(rshift @0 @2 +(for div (trunc_div exact_div) + (simplify + (div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2))) + (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) + && (!VECTOR_TYPE_P (type) + || target_supports_op_p (type, RSHIFT_EXPR, optab_vector) + || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)) + && (useless_type_conversion_p (type, TREE_TYPE (@1)) + || (element_precision (type) >= element_precision (TREE_TYPE (@1)) + && (TYPE_UNSIGNED (TREE_TYPE (@1)) + || (element_precision (type) + == element_precision (TREE_TYPE (@1))) + || (INTEGRAL_TYPE_P (type) + && (tree_nonzero_bits (@0) + & wi::mask (element_precision (TREE_TYPE (@1)) - 1, + true, + element_precision (type))) == 0) +(if (!VECTOR_TYPE_P (type) +&& useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)) +&& element_precision (TREE_TYPE (@3)) < element_precision (type)) + (convert (rshift @3 @2)) + (rshift @0 @2) /* Preserve explicit divisions by 0: the C++ front-end wants to detect undefined behavior in constexpr evaluation, and assuming that the division @@ -947,13 +948,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { build_one_cst (utype); }))) /* Simplify (unsigned t * 2)/2 -> unsigned t & 0x7FFF. */ -(simplify - (trunc_div (mult @0 integer_pow2p@1) @1) - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) - (bit_and @0 { wide_int_to_tree - (type, wi::mask (TYPE_PRECISION (type) -- wi::exact_log2 (wi::to_wide (@1)), -false, TYPE_PRECISION (type))); }))) +(for div (trunc_div exact_div) + (simplify + (div (mult @0 integer_pow2p@1) @1) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) + (bit_and @0 { wide_int_to_tree +(type, wi::mask (TYPE_PRECISION (type) + - wi::exact_log2 (wi::to_wide (@1)), + false, TYPE_PRECISION (type))); } /* Simplify (unsigned t / 2) * 2 -> unsigned t & ~1. */ (simplify @@ -5740,7 +5742,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Sink binary operation to branches, but on
[gcc r15-4616] testsuite: Use -fno-ipa-icf in gcc.dg/stack-check-2.c
https://gcc.gnu.org/g:097994003cb3b09af2b07238e54f08b89dd34369 commit r15-4616-g097994003cb3b09af2b07238e54f08b89dd34369 Author: Joseph Myers Date: Thu Oct 24 19:41:26 2024 + testsuite: Use -fno-ipa-icf in gcc.dg/stack-check-2.c One test failing with a -std=gnu23 default that I wanted to investigate further is gcc.dg/stack-check-2.c. The failures are FAIL: gcc.dg/stack-check-2.c scan-tree-dump-not optimized "tail call" FAIL: gcc.dg/stack-check-2.c scan-tree-dump-not tailc "tail call" but it turns out the tail calls in question are not the ones that test is actually checking for. Rather, when () is interpreted as (void) in C23 mode, ICF notices that certain functions are identical and so turns test_indirect_2 into a tail call to text_indirect_1 and test_indirect_casted_2 into a tail call to test_indirect_casted_1 (which it didn't do previously when one function used () and one used (void)). To avoid these spurious failures, make the test use -fno-ipa-icf rather than relying on () and (void) giving different function types to avoid ICF. Tested for x86_64-pc-linux-gnu. * gcc.dg/stack-check-2.c: Use -fno-ipa-icf. Diff: --- gcc/testsuite/gcc.dg/stack-check-2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/stack-check-2.c b/gcc/testsuite/gcc.dg/stack-check-2.c index 196c4bbfbdda..a821c0ef8657 100644 --- a/gcc/testsuite/gcc.dg/stack-check-2.c +++ b/gcc/testsuite/gcc.dg/stack-check-2.c @@ -12,7 +12,7 @@ depend on to elide stack probes. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection -fdump-tree-tailc -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fstack-clash-protection -fdump-tree-tailc -fdump-tree-optimized -fno-ipa-icf" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ extern void foo (void) __attribute__ ((__noreturn__));
[gcc/aoliva/heads/testme] (2 commits) fold fold_truth_andor field merging into ifcombine
The branch 'aoliva/heads/testme' was updated to point to: 5a9cf11ec7d1... fold fold_truth_andor field merging into ifcombine It previously pointed to: 4f8e1ea7f2e1... fold fold_truth_andor field merging into ifcombine Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 4f8e1ea... fold fold_truth_andor field merging into ifcombine 88b6065... preserve TRUTH_ANDIF handling in ifcombine_replace_cond Summary of changes (added commits): --- 5a9cf11... fold fold_truth_andor field merging into ifcombine 374dec3... handle TRUTH_ANDIF cond exprs in ifcombine_replace_cond
[gcc r15-4618] AVR: target/116953 - Restore recog_data after calling jump_over_one_insn_p.
https://gcc.gnu.org/g:ca0ab7a0ac18911181e9161cfb8b87fb90039612 commit r15-4618-gca0ab7a0ac18911181e9161cfb8b87fb90039612 Author: Georg-Johann Lay Date: Tue Oct 22 11:51:44 2024 +0200 AVR: target/116953 - Restore recog_data after calling jump_over_one_insn_p. The previous fix for PR116953 is incomplete because references to recog_data are escaping avr_out_sbxx_branch() in the form of %-operands in the returned asm code template. This patch reverts the previous fix, and re-extracts the operands by means of extract_constrain_insn_cached() after the call of jump_over_one_insn_p(). PR target/116953 gcc/ * config/avr/avr.cc (avr_out_sbxx_branch): Revert previous fix for PR116953 (r15-4078). Run extract_constrain_insn_cached on the current insn after calling jump_over_one_insn_p. Diff: --- gcc/config/avr/avr.cc | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 735d05b1e747..b69a9c24aa0d 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -13603,16 +13603,16 @@ avr_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg) Operand 3: label to jump to if the test is true. */ const char * -avr_out_sbxx_branch (rtx_insn *insn, rtx xop[]) +avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]) { - // jump_over_one_insn_p may call extract on the next insn, clobbering - // recog_data.operand. Hence make a copy of the operands (PR116953). - rtx operands[] = { xop[0], xop[1], xop[2], xop[3] }; - rtx_code comp = GET_CODE (operands[0]); bool long_jump = get_attr_length (insn) >= 4; bool reverse = long_jump || jump_over_one_insn_p (insn, operands[3]); + // PR116953: jump_over_one_insn_p may call extract on the next insn, + // clobbering recog_data.operand. Thus, restore recog_data. + extract_constrain_insn_cached (insn); + if (comp == GE) comp = EQ; else if (comp == LT)
[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:95ecd3d784f74351018560ac99d6398ca429a252 commit 95ecd3d784f74351018560ac99d6398ca429a252 Author: Michael Meissner Date: Thu Oct 24 12:26:59 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index de75ac6f0e81..dc35b2de5a28 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -1,4 +1,4 @@ - Branch work182-sha, patch #411 was reverted + Branch work182-sha, patch #411 Add potential p-future XVRLD and XVRLDI instructions. @@ -18,7 +18,7 @@ gcc/testsuite/ * gcc.target/powerpc/vector-rotate-left.c: New test. - Branch work182-sha, patch #410 was reverted + Branch work182-sha, patch #410 PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
[gcc r15-4613] libstdc++: Fix typos in tests using macros for std::float128_t support
https://gcc.gnu.org/g:f7bcdf449148872d576b261e9bf385b0e12be0c0 commit r15-4613-gf7bcdf449148872d576b261e9bf385b0e12be0c0 Author: Jonathan Wakely Date: Thu Oct 24 11:38:39 2024 +0100 libstdc++: Fix typos in tests using macros for std::float128_t support These tests check `_GLIBCXX_DOUBLE_IS_IEEE_BINARY128` but that's never defined, it should be "LDOUBLE" not "DOUBLE". libstdc++-v3/ChangeLog: * testsuite/26_numerics/complex/ext_c++23.cc: Fix typo in macro. * testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc: Likewise. * testsuite/26_numerics/headers/cmath/functions_std_c++23.cc: Likewise. * testsuite/26_numerics/headers/cmath/nextafter_c++23.cc: Likewise. Diff: --- libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc | 2 +- libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc | 2 +- libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc | 2 +- libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc b/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc index 0fc3d6f1a666..5f4ff06448e0 100644 --- a/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc +++ b/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc @@ -70,7 +70,7 @@ main() } #endif #if defined(__STDCPP_FLOAT128_T__) \ -&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \ +&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \ || defined(_GLIBCXX_HAVE_FLOAT128_MATH)) { std::float128_t p[2] = {}; diff --git a/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc b/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc index 3c2377fd6987..983027ff6546 100644 --- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc +++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc @@ -119,7 +119,7 @@ main() test_functions(); #endif #if defined(__STDCPP_FLOAT128_T__) \ -&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \ +&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \ || defined(_GLIBCXX_HAVE_FLOAT128_MATH)) test_functions(); #endif diff --git a/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc b/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc index ea68ac5da755..bf07493ecd47 100644 --- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc +++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc @@ -131,7 +131,7 @@ main() } #endif #if defined(__STDCPP_FLOAT128_T__) \ -&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \ +&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \ || defined(_GLIBCXX_HAVE_FLOAT128_MATH)) { std::float128_t p[128] = {}; diff --git a/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc b/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc index 91767d22cc3f..2d0f8017f4aa 100644 --- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc +++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc @@ -117,7 +117,7 @@ main () test (); #endif #if defined(__STDCPP_FLOAT128_T__) \ -&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \ +&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \ || defined(_GLIBCXX_HAVE_FLOAT128_MATH)) test (); #endif
[gcc r15-4605] Handle POLY_INT_CSTs in get_nonzero_bits
https://gcc.gnu.org/g:d6c4badffafa295f6082b7d74de314e131f30a96 commit r15-4605-gd6c4badffafa295f6082b7d74de314e131f30a96 Author: Richard Sandiford Date: Thu Oct 24 14:22:33 2024 +0100 Handle POLY_INT_CSTs in get_nonzero_bits This patch extends get_nonzero_bits to handle POLY_INT_CSTs, The easiest (but also most useful) case is that the number of trailing zeros in the runtime value is at least the number of trailing zeros in each individual component. In principle, we could do this for coeffs 1 and above only, and then OR in ceoff 0. This would give ~0x11 for [14, 32], say. But that's future work. gcc/ * tree-ssanames.cc (get_nonzero_bits): Handle POLY_INT_CSTs. * match.pd (with_possible_nonzero_bits): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/cnt_fold_4.c: New test. Diff: --- gcc/match.pd | 2 + gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c | 61 +++ gcc/tree-ssanames.cc | 3 ++ 3 files changed, 66 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 17613ec8ade4..391c60bdfb32 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2872,6 +2872,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) possibly set. */ (match with_possible_nonzero_bits INTEGER_CST@0) +(match with_possible_nonzero_bits + POLY_INT_CST@0) (match with_possible_nonzero_bits SSA_NAME@0 (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c new file mode 100644 index ..b7a53701993c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +/* +** f1: +** cnthx0 +** ret +*/ +uint64_t +f1 () +{ + uint64_t x = svcntw (); + x >>= 2; + return x << 3; +} + +/* +** f2: +** [^\n]+ +** [^\n]+ +** ... +** ret +*/ +uint64_t +f2 () +{ + uint64_t x = svcntd (); + x >>= 2; + return x << 3; +} + +/* +** f3: +** cntbx0, all, mul #4 +** ret +*/ +uint64_t +f3 () +{ + uint64_t x = svcntd (); + x >>= 1; + return x << 6; +} + +/* +** f4: +** [^\n]+ +** [^\n]+ +** ... +** ret +*/ +uint64_t +f4 () +{ + uint64_t x = svcntd (); + x >>= 2; + return x << 2; +} diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc index 4f83fcbb5171..ae6a0cd48fe6 100644 --- a/gcc/tree-ssanames.cc +++ b/gcc/tree-ssanames.cc @@ -502,6 +502,9 @@ get_nonzero_bits (const_tree name) if (TREE_CODE (name) == INTEGER_CST) return wi::to_wide (name); + if (POLY_INT_CST_P (name)) +return -known_alignment (wi::to_poly_wide (name)); + /* Use element_precision instead of TYPE_PRECISION so complex and vector types get a non-zero precision. */ unsigned int precision = element_precision (TREE_TYPE (name));
[gcc r15-4604] Try to simplify (X >> C1) << (C1 + C2) -> X << C2
https://gcc.gnu.org/g:ec8e8d359690e7347e6e718cc9254d59f694e138 commit r15-4604-gec8e8d359690e7347e6e718cc9254d59f694e138 Author: Richard Sandiford Date: Thu Oct 24 14:22:32 2024 +0100 Try to simplify (X >> C1) << (C1 + C2) -> X << C2 This patch adds a rule to simplify (X >> C1) << (C1 + C2) -> X << C2 when the low C1 bits of X are known to be zero. Any single conversion can take place between the shifts. E.g. for a truncating conversion, any extra bits of X that are preserved by truncating after the shift are immediately lost by the shift left. And the sign bits used for an extending conversion are the same as the sign bits used for the rshift. (A double conversion of say int->unsigned->uint64_t would be wrong though.) gcc/ * match.pd: Simplify (X >> C1) << (C1 + C2) -> X << C2 if the low C1 bits of X are zero. gcc/testsuite/ * gcc.dg/tree-ssa/shifts-1.c: New test. * gcc.dg/tree-ssa/shifts-2.c: Likewise. Diff: --- gcc/match.pd | 13 +++ gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c | 61 gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c | 21 +++ 3 files changed, 95 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index b9621a47cdf1..17613ec8ade4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4906,6 +4906,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - TYPE_PRECISION (TREE_TYPE (@2) (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1 +#if GIMPLE +/* (X >> C1) << (C1 + C2) -> X << C2 if the low C1 bits of X are zero. */ +(simplify + (lshift (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1)) + INTEGER_CST@2) + (if (INTEGRAL_TYPE_P (type) + && wi::ltu_p (wi::to_wide (@1), element_precision (type)) + && wi::ltu_p (wi::to_wide (@2), element_precision (type)) + && wi::to_widest (@2) >= wi::to_widest (@1) + && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))) + (lshift (convert @0) (minus @2 @1 +#endif + /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for unsigned x OR truncate into the precision(type) - c lowest bits of signed x (if they have mode precision or a precision of 1). */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c new file mode 100644 index ..d88500ca8ddf --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c @@ -0,0 +1,61 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +unsigned int +f1 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return x << 3; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + unsigned char y = x; + y >>= 2; + return y << 3; +} + +unsigned long +f3 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return (unsigned long) x << 3; +} + +int +f4 (int x) +{ + if (x & 15) +__builtin_unreachable (); + x >>= 4; + return x << 5; +} + +unsigned int +f5 (int x) +{ + if (x & 31) +__builtin_unreachable (); + x >>= 5; + return x << 6; +} + +unsigned int +f6 (unsigned int x) +{ + if (x & 1) +__builtin_unreachable (); + x >>= 1; + return x << (sizeof (int) * __CHAR_BIT__ - 1); +} + +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-not {>= 3; + return x << 4; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return x << 1; +} + +/* { dg-final { scan-tree-dump-times {
[gcc(refs/users/meissner/heads/work182-sha)] Add potential p-future XVRLD and XVRLDI instructions.
https://gcc.gnu.org/g:d7334a2776a8498ac757eed08c6c4989c8e99c86 commit d7334a2776a8498ac757eed08c6c4989c8e99c86 Author: Michael Meissner Date: Thu Oct 24 12:23:17 2024 -0400 Add potential p-future XVRLD and XVRLDI instructions. 2024-10-24 Michael Meissner gcc/ * config/rs6000/altivec.md (altivec_vrl): Add support for a possible XVRLD instruction in the future. (altivec_vrl_immediate): New insns. * config/rs6000/predicates.md (vector_shift_immediate): New predicate. * config/rs6000/rs6000.h (TARGET_XVRLW): New macro. * config/rs6000/rs6000.md (isa attribute): Add xvrlw. (enabled attribute): Add support for xvrlw. gcc/testsuite/ * gcc.target/powerpc/vector-rotate-left.c: New test. Diff: --- gcc/config/rs6000/altivec.md | 35 +++--- gcc/config/rs6000/predicates.md| 26 gcc/config/rs6000/rs6000.h | 3 ++ gcc/config/rs6000/rs6000.md| 6 +++- .../gcc.target/powerpc/vector-rotate-left.c| 34 + 5 files changed, 99 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 00dad4b91f1c..d4ee50322ca1 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1983,12 +1983,39 @@ } [(set_attr "type" "vecperm")]) +;; -mcpu=future adds a vector rotate left word variant. There is no vector +;; byte/half-word/double-word/quad-word rotate left. This insn occurs before +;; altivec_vrl and will match for -mcpu=future, while other cpus will +;; match the generic insn. +;; However for testing, allow other xvrl variants. In particular, XVRLD for +;; the sha3 tests for multibuf/singlebuf. (define_insn "altivec_vrl" - [(set (match_operand:VI2 0 "register_operand" "=v") -(rotate:VI2 (match_operand:VI2 1 "register_operand" "v") - (match_operand:VI2 2 "register_operand" "v")))] + [(set (match_operand:VI2 0 "register_operand" "=v,wa") +(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa") + (match_operand:VI2 2 "register_operand" "v,wa")))] "" - "vrl %0,%1,%2" + "@ + vrl %0,%1,%2 + xvrl %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "isa" "*,xvrlw")]) + +(define_insn "*altivec_vrl_immediate" + [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa") + (match_operand:VI2 2 "vector_shift_immediate" "j,wM,wE,wS")))] + "TARGET_XVRLW && " +{ + rtx op2 = operands[2]; + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op2, mode, &num_insns, &value)) +gcc_unreachable (); + + operands[3] = GEN_INT (value & 0xff); + return "xvrli %x0,%x1,%3"; +} [(set_attr "type" "vecsimple")]) (define_insn "altivec_vrlq" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 1d95e34557e5..fccfbd7e4904 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -728,6 +728,32 @@ return num_insns == 1; }) +;; Return 1 if the operand is a CONST_VECTOR whose elements are all the +;; same and the elements can be an immediate shift or rotate factor +(define_predicate "vector_shift_immediate" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (zero_constant (op, mode) || all_ones_constant (op, mode)) +return true; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) +return false; + + switch (mode) +{ +case V16QImode: return IN_RANGE (value, 0, 7); +case V8HImode: return IN_RANGE (value, 0, 15); +case V4SImode: return IN_RANGE (value, 0, 31); +case V2DImode: return IN_RANGE (value, 0, 63); +default:break; +} + + return false; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 8cfd9faf77dc..1a168c2c9596 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -581,6 +581,9 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 +/* Whether we have XVRLW support. */ +#define TARGET_XVRLW TARGET_FUTURE + /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 68fbfec95546..420f20d4524b 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -369,7 +369,7 @@ (const (symbol_ref "(enum attr_cpu) rs6000_tune"))) ;; The ISA we impl
[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:d658729b2a69cf7e63d7afbdba4d60e66db844f9 commit d658729b2a69cf7e63d7afbdba4d60e66db844f9 Author: Michael Meissner Date: Thu Oct 24 12:27:43 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index dc35b2de5a28..c6151fce09a4 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -74,21 +74,21 @@ this patch that were lost with GCC 14 and the current trunk: --- - - -- Benchmark time in seconds 5.53 6.156.265.575.61 9.56 -Fuse VANDC -> VXOR 209 600 600 600 600 600 -Fuse VXOR -> VXOR 0 240 240 120 120 120 -XXEVAL to fuse ANDC -> XOR 391 00 0 0 0 -XXEVAL to fuse XOR -> XOR240 00 0 0 0 +Fuse VANDC -> VXOR 209 600 600 600 600 600 +Fuse VXOR -> VXOR 0 240 240 120 120 120 +XXEVAL to fuse ANDC -> XOR 391 00 0 00 +XXEVAL to fuse XOR -> XOR240 00 0 00 -Spill vector to stack 78 364 364 172 184 110 -Load spilled vector from stack 431 962 962 713 723 166 -Vector moves 10 100 100 70 72 3,055 +Spill vector to stack 78 364 364 172 184 110 +Load spilled vector from stack 431 962 962 713 723 166 +Vector moves 10 100 100 70 723,055 -Vector rotate right 696 696 696 696 696 696 -XXLANDC or VANDC 209 600 600 600 600 600 -XXLXOR or VXOR 953 1,8241,824 1,824 1,824 1,825 -XXEVAL 631 00 0 0 0 +Vector rotate right 696 696 696 696 696 696 +XXLANDC or VANDC 209 600 600 600 600 600 +XXLXOR or VXOR 953 1,8241,824 1,824 1,8241,825 +XXEVAL 631 00 0 00 -Load vector rotate constants 24 24 24 24 2424 +Load vector rotate constants 24 24 24 24 24 24 Here are the results for adding support for XXEVAL for the singlebuff.c
[gcc r15-4611] libstdc++: Simplify std::__throw_bad_variant_access
https://gcc.gnu.org/g:0dbc588acaa27a3a56bc9173bd577e1293f10046 commit r15-4611-g0dbc588acaa27a3a56bc9173bd577e1293f10046 Author: Jonathan Wakely Date: Tue Oct 22 16:06:12 2024 +0100 libstdc++: Simplify std::__throw_bad_variant_access This removes the overload of __throw_bad_variant_access that must be called with a string literal. This avoids a potential source of undefined behaviour if that function got misused. The other overload that takes a bool parameter can be adjusted to take an integer index selecting one of the four possible string literals to use, ensuring that the std::bad_variant_access constructor is only called with those literals. Passing an index outside the range [0,3] is bogus, but will still select a valid string literal and avoid undefined behaviour. libstdc++-v3/ChangeLog: * include/std/variant (__throw_bad_variant_access(unsigned)): Define new function as inline friend, with namespace-scope declaration using noreturn attribute. (__throw_bad_variant_access(const char*)): Remove. (__throw_bad_variant_access(bool)): Remove. (visit, visit): Adjust calls to __throw_bad_variant_access. Reviewed-by: Patrick Palka Diff: --- libstdc++-v3/include/std/variant | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant index cf532126d798..bd0f9c3252a5 100644 --- a/libstdc++-v3/include/std/variant +++ b/libstdc++-v3/include/std/variant @@ -1402,6 +1402,8 @@ namespace __detail::__variant && (is_swappable_v<_Types> && ...))> swap(variant<_Types...>&, variant<_Types...>&) = delete; + [[noreturn]] void __throw_bad_variant_access(unsigned); + class bad_variant_access : public exception { public: @@ -1411,28 +1413,24 @@ namespace __detail::__variant { return _M_reason; } private: +// Must only be called with a string literal bad_variant_access(const char* __reason) noexcept : _M_reason(__reason) { } // Must point to a string with static storage duration: const char* _M_reason = "bad variant access"; -friend void __throw_bad_variant_access(const char* __what); +friend void __throw_bad_variant_access([[maybe_unused]] unsigned __n) +{ + [[maybe_unused]] static constexpr const char* __reasons[] = { + "std::get: wrong index for variant", + "std::get: variant is valueless", + "std::visit: variant is valueless", + "std::visit: variant is valueless", + }; + _GLIBCXX_THROW_OR_ABORT(bad_variant_access(__reasons[__n % 4u])); +} }; - // Must only be called with a string literal - inline void - __throw_bad_variant_access(const char* __what) - { _GLIBCXX_THROW_OR_ABORT(bad_variant_access(__what)); } - - inline void - __throw_bad_variant_access(bool __valueless) - { -if (__valueless) [[__unlikely__]] - __throw_bad_variant_access("std::get: variant is valueless"); -else - __throw_bad_variant_access("std::get: wrong index for variant"); - } - template class variant : private __detail::__variant::_Variant_base<_Types...>, @@ -1941,7 +1939,7 @@ namespace __detail::__variant namespace __variant = std::__detail::__variant; if ((__variant::__as(__variants).valueless_by_exception() || ...)) - __throw_bad_variant_access("std::visit: variant is valueless"); + __throw_bad_variant_access(2); using _Result_type = __detail::__variant::__visit_result_t<_Visitor, _Variants...>; @@ -1981,7 +1979,7 @@ namespace __detail::__variant namespace __variant = std::__detail::__variant; if ((__variant::__as(__variants).valueless_by_exception() || ...)) - __throw_bad_variant_access("std::visit: variant is valueless"); + __throw_bad_variant_access(3); return std::__do_visit<_Res>(std::forward<_Visitor>(__visitor), __variant::__as(std::forward<_Variants>(__variants))...);
[gcc(refs/users/meissner/heads/work182-sha)] PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
https://gcc.gnu.org/g:754997fd268a5e798da4af34ace7db9f6d30a720 commit 754997fd268a5e798da4af34ace7db9f6d30a720 Author: Michael Meissner Date: Thu Oct 24 12:21:09 2024 -0400 PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations The multibuff.c benchmark attached to the PR target/117251 compiled for Power10 PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14 compared to GCC 11 - GCC 13, due to excessive amounts of spilling. The main function for the multibuf.c file has 3,747 lines, all of which are using vector unsigned long long. There are 696 vector rotates (all rotates are constant), 1,824 vector xor's and 600 vector andc's. In looking at it, the main thing that steps out is the reason for either spilling or moving variables is the support in fusion.md (generated by genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and other vec_xor's feeding into vec_xor. On the powerpc for power10, there is a special fusion mode that happens if the machine has a VANDC or VXOR instruction that is adjacent to a VXOR instruction and the VANDC/VXOR feeds into the 2nd VXOR instruction. While the Power10 has 64 vector registers (which uses the XXL prefix to do logical operations), the fusion only works with the older Altivec instruction set (which uses the V prefix). The Altivec instruction only has 32 vector registers (which are overlaid over the VSX vector registers 32-63). By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do this fusion, it means that the register allocator has more register pressure for the traditional Altivec registers instead of the VSX registers. In addition, since there are vector rotates, these rotates only work on the traditional Altivec registers, which adds to the Altivec register pressure. Finally in addition to doing the explicit xor, andc, and rotates using the Altivec registers, we have to also load vector constants for the rotate amount and these registers also are allocated as Altivec registers. Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 has many more vector moves that the later compilers. Thus even though it has way less spills, the vector moves are why GCC 11 have the slowest results. There is an instruction that was added in power10 (XXEVAL) that does provide fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion. The latency of XXEVAL is slightly more than the fused VANDC/VXOR or VXOR/VXOR, so I have written the patch to prefer doing the Altivec instructions if they don't need a temporary register. Here are the results for adding support for XXEVAL for the multibuff.c benchmark attached to the PR. Note that we essentially recover the speed with this patch that were lost with GCC 14 and the current trunk: XXEVALTrunk GCC14 GCC13 GCC12 GCC11 --- - - - - Benchmark time in seconds 5.53 6.156.265.575.61 9.56 Fuse VANDC -> VXOR 209 600 600 600 600 600 Fuse VXOR -> VXOR 0 240 240 120 120 120 XXEVAL to fuse ANDC -> XOR 391 00 0 0 0 XXEVAL to fuse XOR -> XOR240 00 0 0 0 Spill vector to stack 78 364 364 172 184 110 Load spilled vector from stack 431 962 962 713 723 166 Vector moves 10 100 100 70 72 3,055 Vector rotate right 696 696 696 696 696 696 XXLANDC or VANDC 209 600 600 600 600 600 XXLXOR or VXOR 953 1,8241,824 1,824 1,824 1,825 XXEVAL 631 00 0 0 0 Load vector rotate constants 24 24 24 24 24 24 Here are the results for adding support for XXEVAL for the singlebuff.c benchmark attached to the PR. Note that adding XXEVAL greatly speeds up this particular benchmark: XXEVALTrunk GCC14 GCC13 GCC12 GCC11 --- - - - - Benchmark time in seconds 4.46 5.405.405.355.36 7.54 Fuse VANDC -> VXOR 210 600 600 600 600 600 Fuse VXOR -> VXOR 0 240 240 120 120 120 XXEVAL to fuse ANDC -> XOR 3900
[gcc(refs/users/meissner/heads/work182-sha)] Revert changes
https://gcc.gnu.org/g:06e655c67c6f696cfedda0e5519c874a1d25a3da commit 06e655c67c6f696cfedda0e5519c874a1d25a3da Author: Michael Meissner Date: Thu Oct 24 12:15:54 2024 -0400 Revert changes Diff: --- gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c | 0 gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c | 0 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c deleted file mode 100644 index e69de29bb2d1.. diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c deleted file mode 100644 index e69de29bb2d1..
[gcc/aoliva/heads/testme] (4 commits) fold fold_truth_andor field merging into ifcombine
The branch 'aoliva/heads/testme' was updated to point to: 4f8e1ea7f2e1... fold fold_truth_andor field merging into ifcombine It previously pointed to: 53a0460c1f49... ifcombine across noncontiguous blocks Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 53a0460... ifcombine across noncontiguous blocks aee55fb... extend ifcombine_replace_cond to handle noncontiguous ifcom Summary of changes (added commits): --- 4f8e1ea... fold fold_truth_andor field merging into ifcombine 88b6065... preserve TRUTH_ANDIF handling in ifcombine_replace_cond bb30d95... ifcombine across noncontiguous blocks 46fa49f... extend ifcombine_replace_cond to handle noncontiguous ifcom
[gcc(refs/users/aoliva/heads/testme)] fold fold_truth_andor field merging into ifcombine
https://gcc.gnu.org/g:4f8e1ea7f2e14bbe3b687f9e2911acd46eec0bfe commit 4f8e1ea7f2e14bbe3b687f9e2911acd46eec0bfe Author: Alexandre Oliva Date: Sun Oct 20 21:02:05 2024 -0300 fold fold_truth_andor field merging into ifcombine This patch introduces various improvements to the logic that merges field compares, moving it into ifcombine. Before the patch, we could merge: (a.x1 EQNE b.x1) ANDOR (a.y1 EQNE b.y1) into something like: (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK) if both of A's fields live within the same alignment boundaries, and so do B's, at the same relative positions. Constants may be used instead of the object B. The initial goal of this patch was to enable such combinations when a field crossed alignment boundaries, e.g. for packed types. We can't generally access such fields with a single memory access, so when we come across such a compare, we will attempt to combine each access separately. Some merging opportunities were missed because of right-shifts, compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and narrowing conversions, especially after earlier merges. This patch introduces handlers for several cases involving these. The merging of multiple field accesses into wider bitfield-like accesses is undesirable to do too early in compilation, so we move it from folding to ifcombine, and extend ifcombine to merge noncontiguous compares, absent intervening side effects. VUSEs used to prevent ifcombine; that seemed excessively conservative, since relevant side effects were already tested, including the possibility of trapping loads, so that's removed. Unlike earlier ifcombine, when merging noncontiguous compares the merged compare must replace the earliest compare, which may require moving up the DEFs that contributed to the latter compare. When it is the second of a noncontiguous pair of compares that first accesses a word, we may merge the first compare with part of the second compare that refers to the same word, keeping the compare of the remaining bits at the spot where the second compare used to be. Handling compares with non-constant fields was somewhat generalized from what fold used to do, now handling non-adjacent fields, even if a field of one object crosses an alignment boundary but the other doesn't. The -Wno-error for toplev.o on rs6000 is because of toplev.c's: if ((flag_sanitize & SANITIZE_ADDRESS) && !FRAME_GROWS_DOWNWARD) and rs6000.h's: #define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 \ || (flag_sanitize & SANITIZE_ADDRESS) != 0) The mutually exclusive conditions involving flag_sanitize are now noticed and reported by ifcombine's warning on mutually exclusive compares. i386's needs -Wno-error for insn-attrtab.o for similar reasons. for gcc/ChangeLog * fold-const.cc (make_bit_field): Export. (all_ones_mask_p): Drop. (unextend, decode_field_reference, fold_truth_andor_1): Move field compare merging logic... * gimple-fold.cc: ... here. (ssa_is_substitutable_p, is_cast_p, is_binop_p): New. (prepare_xor, follow_load): New. (compute_split_boundary_from_align): New. (make_bit_field_load, build_split_load): New. (reuse_split_load, mergeable_loads_p): New. (fold_truth_andor_maybe_separate): New. * tree-ssa-ifcombine.cc: Include bitmap.h. (constant_condition_p): New. (recognize_if_then_else_nc, recognize_if_succs): New. (bb_no_side_effects_p): Don't reject VUSEs. (update_profile_after_ifcombine): Adjust for noncontiguous merges. (ifcombine_mark_ssa_name): New. (struct ifcombine_mark_ssa_name_t): New. (ifcombine_mark_ssa_name_walk): New. (ifcombine_replace_cond): Extended for noncontiguous merges after factoring out of... (ifcombine_ifandif): ... this. Drop result_inv arg. Try fold_truth_andor_maybe_separate. (tree_ssa_ifcombine_bb_1): Add outer_succ_bb arg. Call recognize_if_then_else_nc. Adjust ifcombine_ifandif calls. (tree_ssa_ifcombine_bb): Return the earliest affected block. Call recognize_if_then_else_nc. Try noncontiguous blocks. (pass_tree_ifcombine::execute): Retry affected blocks. * config/i386/t-i386 (insn-attrtab.o-warn): Disable errors. * config/rs6000/t-rs6000 (toplev.o-warn): Likewise. for gcc/testsuite/ChangeLog * gcc.dg/field-merge-1.c: New. * gcc.dg/field-merge-2.c: New.
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:bb30d957fbed1e33f862cf49109e221dacb36fb0 commit bb30d957fbed1e33f862cf49109e221dacb36fb0 Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Rework ifcombine to support merging conditions from noncontiguous blocks. This depends on earlier preparation changes. The function that attempted to ifcombine a block with its immediate predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks eligible for ifcombine, attempting to combine with them. The function that actually drives the combination of a pair of blocks, tree_ssa_ifcombine_bb_1, now takes an additional parameter: the successor of outer that leads to inner. The function that recognizes if_then_else patterns is modified to enable testing without distinguishing between then and else, or to require nondegenerate conditions, that aren't worth combining with. for gcc/ChangeLog * tree-ssa-ifcombine.cc (recognize_if_then_else): Support relaxed then/else testing; require nondegenerate condition otherwise. (tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it instead of inner_cond_bb. Adjust callers. (tree_ssa_ifcombine_bb): Loop over dominating outer blocks eligible for ifcombine. (pass_tree_ifcombine::execute): Noted potential need for changes to the post-combine logic. Diff: --- gcc/tree-ssa-ifcombine.cc | 152 +- 1 file changed, 123 insertions(+), 29 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 71c7c9074e94..817c95b20252 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb) is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t->dest == *then_bb && e->dest == *else_bb) + || (t->dest == *else_bb && e->dest == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -886,19 +895,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-effects. */ if (phi_pred_bb != else_bb - && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb) + && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb) && same_phi_args_p (outer_cond_bb, phi_pred_bb, el
[gcc(refs/users/aoliva/heads/testme)] extend ifcombine_replace_cond to handle noncontiguous ifcombine
https://gcc.gnu.org/g:46fa49fec2574a95dad05c297eebcb41f8dd8dc1 commit 46fa49fec2574a95dad05c297eebcb41f8dd8dc1 Author: Alexandre Oliva Date: Thu Oct 24 05:25:30 2024 -0300 extend ifcombine_replace_cond to handle noncontiguous ifcombine Prepare to handle noncontiguous ifcombine, introducing logic to modify the outer condition when needed. There are two cases worth mentioning: - when blocks are noncontiguous, we have to place the combined condition in the outer block to avoid pessimizing carefully crafted short-circuited tests; - even when blocks are contiguous, we prepare for situations in which the combined condition has two tests, one to be placed in outer and the other in inner. This circumstance will not come up when noncontiguous ifcombine is first enabled, but it will when an improved fold_truth_andor is integrated with ifcombine. Combining the condition from inner into outer may require moving SSA DEFs used in the inner condition, and the changes implement this as well. for gcc/ChangeLog * tree-ssa-ifcombine.cc: Include bitmap.h. (ifcombine_mark_ssa_name): New. (struct ifcombine_mark_ssa_name_t): New. (ifcombine_mark_ssa_name_walk): New. (ifcombine_replace_cond): Prepare to handle noncontiguous and split-condition ifcombine. Diff: --- gcc/tree-ssa-ifcombine.cc | 173 -- 1 file changed, 168 insertions(+), 5 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index b5b72be29bbf..71c7c9074e94 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa.h" #include "attribs.h" #include "asan.h" +#include "bitmap.h" #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT #define LOGICAL_OP_NON_SHORT_CIRCUIT \ @@ -460,17 +461,57 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, } } -/* Replace the conditions in INNER_COND with COND. - Replace OUTER_COND with a constant. */ +/* Set NAME's bit in USED if OUTER dominates it. */ + +static void +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer) +{ + if (SSA_NAME_IS_DEFAULT_DEF (name)) +return; + + gimple *def = SSA_NAME_DEF_STMT (name); + basic_block bb = gimple_bb (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, outer)) +return; + + bitmap_set_bit (used, SSA_NAME_VERSION (name)); +} + +/* Data structure passed to ifcombine_mark_ssa_name. */ +struct ifcombine_mark_ssa_name_t +{ + /* SSA_NAMEs that have been referenced. */ + bitmap used; + /* Dominating block of DEFs that might need moving. */ + basic_block outer; +}; + +/* Mark in DATA->used any SSA_NAMEs used in *t. */ + +static tree +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) +{ + ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_; + + if (*t && TREE_CODE (*t) == SSA_NAME) +ifcombine_mark_ssa_name (data->used, *t, data->outer); + + return NULL; +} + +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND + replaced with a constant, but if there are intervening blocks, it's best to + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ static bool ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, gcond *outer_cond, bool outer_inv, tree cond, bool must_canon, tree cond2) { - bool result_inv = inner_inv; - - gcc_checking_assert (!cond2); + bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond)) + != gimple_bb (outer_cond)); + bool result_inv = outer_p ? outer_inv : inner_inv; if (result_inv) cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); @@ -480,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, else if (must_canon) return false; + if (outer_p) +{ + { + auto_bitmap used; + basic_block outer_bb = gimple_bb (outer_cond); + + /* Mark SSA DEFs that are referenced by cond and may thus need to be + moved to outer. */ + { + ifcombine_mark_ssa_name_t data = { used, outer_bb }; + walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL); + } + + if (!bitmap_empty_p (used)) + { + /* Iterate up from inner_cond, moving DEFs identified as used by + cond, and marking USEs in the DEFs for moving as well. */ + gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond); + for (basic_block bb = gimple_bb (inner_cond); +bb != outer_bb; bb = single_pred (bb)) + { + for (gimple_stmt_iterator gsitr = gsi_last_bb (bb); +!gsi_end_p (gs
[gcc r15-4614] libstdc++: Fix test broken when using COW std::string
https://gcc.gnu.org/g:d01dc97a26d2f5034ca135f46094aa52c44cc90a commit r15-4614-gd01dc97a26d2f5034ca135f46094aa52c44cc90a Author: François Dumont Date: Thu Oct 24 20:30:16 2024 +0200 libstdc++: Fix test broken when using COW std::string libstdc++-v3/ChangeLog: * testsuite/23_containers/unordered_map/96088.cc (test03): Fix increments value when _GLIBCXX_USE_CXX11_ABI is equal to 0. Diff: --- libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc index b5be7d06aa03..ee41675a16ba 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc @@ -233,9 +233,8 @@ test03() um.insert(v.begin(), v.end()); VERIFY( um.size() == 1 ); -// Allocate array of buckets, a node, the std::string value and the -// std::string key (unless COW). -constexpr std::size_t increments = _GLIBCXX_USE_CXX11_ABI ? 4 : 3; +// Allocate array of buckets, a node, and the 2 std::string (unless COW). +constexpr std::size_t increments = _GLIBCXX_USE_CXX11_ABI ? 4 : 2; VERIFY( __gnu_test::counter::count() == origin + increments ); VERIFY( __gnu_test::counter::get()._M_increments == increments );
[gcc(refs/users/meissner/heads/work182-sha)] Revert changes
https://gcc.gnu.org/g:0f27c445e6267e0a544ae030a2fb9e7058fc8ec0 commit 0f27c445e6267e0a544ae030a2fb9e7058fc8ec0 Author: Michael Meissner Date: Thu Oct 24 12:11:15 2024 -0400 Revert changes Diff: --- gcc/config/rs6000/altivec.md | 35 +- gcc/config/rs6000/predicates.md| 26 - gcc/config/rs6000/rs6000.h | 3 - gcc/config/rs6000/rs6000.md| 6 +- .../gcc.target/powerpc/p10-vector-fused-1.c| 409 - .../gcc.target/powerpc/p10-vector-fused-2.c| 936 - .../gcc.target/powerpc/vector-rotate-left.c| 34 - 7 files changed, 5 insertions(+), 1444 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index d4ee50322ca1..00dad4b91f1c 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1983,39 +1983,12 @@ } [(set_attr "type" "vecperm")]) -;; -mcpu=future adds a vector rotate left word variant. There is no vector -;; byte/half-word/double-word/quad-word rotate left. This insn occurs before -;; altivec_vrl and will match for -mcpu=future, while other cpus will -;; match the generic insn. -;; However for testing, allow other xvrl variants. In particular, XVRLD for -;; the sha3 tests for multibuf/singlebuf. (define_insn "altivec_vrl" - [(set (match_operand:VI2 0 "register_operand" "=v,wa") -(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa") - (match_operand:VI2 2 "register_operand" "v,wa")))] + [(set (match_operand:VI2 0 "register_operand" "=v") +(rotate:VI2 (match_operand:VI2 1 "register_operand" "v") + (match_operand:VI2 2 "register_operand" "v")))] "" - "@ - vrl %0,%1,%2 - xvrl %x0,%x1,%x2" - [(set_attr "type" "vecsimple") - (set_attr "isa" "*,xvrlw")]) - -(define_insn "*altivec_vrl_immediate" - [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa") - (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa") - (match_operand:VI2 2 "vector_shift_immediate" "j,wM,wE,wS")))] - "TARGET_XVRLW && " -{ - rtx op2 = operands[2]; - int value = 256; - int num_insns = -1; - - if (!xxspltib_constant_p (op2, mode, &num_insns, &value)) -gcc_unreachable (); - - operands[3] = GEN_INT (value & 0xff); - return "xvrli %x0,%x1,%3"; -} + "vrl %0,%1,%2" [(set_attr "type" "vecsimple")]) (define_insn "altivec_vrlq" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index fccfbd7e4904..1d95e34557e5 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -728,32 +728,6 @@ return num_insns == 1; }) -;; Return 1 if the operand is a CONST_VECTOR whose elements are all the -;; same and the elements can be an immediate shift or rotate factor -(define_predicate "vector_shift_immediate" - (match_code "const_vector,vec_duplicate,const_int") -{ - int value = 256; - int num_insns = -1; - - if (zero_constant (op, mode) || all_ones_constant (op, mode)) -return true; - - if (!xxspltib_constant_p (op, mode, &num_insns, &value)) -return false; - - switch (mode) -{ -case V16QImode: return IN_RANGE (value, 0, 7); -case V8HImode: return IN_RANGE (value, 0, 15); -case V4SImode: return IN_RANGE (value, 0, 31); -case V2DImode: return IN_RANGE (value, 0, 63); -default:break; -} - - return false; -}) - ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 1a168c2c9596..8cfd9faf77dc 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -581,9 +581,6 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 -/* Whether we have XVRLW support. */ -#define TARGET_XVRLW TARGET_FUTURE - /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 420f20d4524b..68fbfec95546 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -369,7 +369,7 @@ (const (symbol_ref "(enum attr_cpu) rs6000_tune"))) ;; The ISA we implement. -(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10,xxeval,xvrlw" +(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10,xxeval" (const_string "any")) ;; Is this alternative enabled for the current CPU/ISA/etc.? @@ -426,10 +426,6 @@ (match_test "TARGET_PREFIXED && TARGET_XXEVAL")) (const_int 1) - (and (eq_attr "isa" "xvrlw") - (match_test "TARGET_XVRLW")) - (const_int 1) - ] (const_int 0))) ;; If this instruction is microcoded on the CELL processor diff -
[gcc r15-4619] c++: Handle ABI for non-polymorphic dynamic classes
https://gcc.gnu.org/g:6713f05a2aeb852c3f4d738c8c5dbad816624323 commit r15-4619-g6713f05a2aeb852c3f4d738c8c5dbad816624323 Author: Nathaniel Shead Date: Wed Aug 21 00:42:42 2024 +1000 c++: Handle ABI for non-polymorphic dynamic classes The Itanium ABI has specific rules for when virtual tables for dynamic classes should be emitted. However we didn't consider structures with virtual inheritance but no virtual members as dynamic classes for ABI purposes; this patch fixes this. gcc/cp/ChangeLog: * decl2.cc (import_export_class): Use TYPE_CONTAINS_VPTR_P instead of TYPE_POLYMORPHIC_P. (import_export_decl): Likewise. gcc/testsuite/ChangeLog: * g++.dg/modules/virt-5_a.C: New test. * g++.dg/modules/virt-5_b.C: New test. Signed-off-by: Nathaniel Shead Diff: --- gcc/cp/decl2.cc | 4 ++-- gcc/testsuite/g++.dg/modules/virt-5_a.C | 16 gcc/testsuite/g++.dg/modules/virt-5_b.C | 11 +++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index a455eb481b1a..fa32ce35c8c4 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -2434,7 +2434,7 @@ import_export_class (tree ctype) translation unit, then export the class; otherwise, import it. */ import_export = -1; - else if (TYPE_POLYMORPHIC_P (ctype)) + else if (TYPE_CONTAINS_VPTR_P (ctype)) { tree cdecl = TYPE_NAME (ctype); if (DECL_LANG_SPECIFIC (cdecl) && DECL_MODULE_ATTACH_P (cdecl)) @@ -3530,7 +3530,7 @@ import_export_decl (tree decl) class_type = type; import_export_class (type); if (CLASSTYPE_INTERFACE_KNOWN (type) - && TYPE_POLYMORPHIC_P (type) + && TYPE_CONTAINS_VPTR_P (type) && CLASSTYPE_INTERFACE_ONLY (type) /* If -fno-rtti was specified, then we cannot be sure that RTTI information will be emitted with the diff --git a/gcc/testsuite/g++.dg/modules/virt-5_a.C b/gcc/testsuite/g++.dg/modules/virt-5_a.C new file mode 100644 index ..f4c6abe85ef6 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/virt-5_a.C @@ -0,0 +1,16 @@ +// { dg-additional-options "-fmodules-ts" } +// { dg-module-cmi M } + +export module M; + +struct C {}; +struct B : virtual C {}; + +// Despite no non-inline key function, this is still a dynamic class +// and so by the Itanium ABI 5.2.3 should be uniquely emitted in this TU +export struct A : B { + inline A (int) {} +}; + +// { dg-final { scan-assembler {_ZTTW1M1A:} } } +// { dg-final { scan-assembler {_ZTVW1M1A:} } } diff --git a/gcc/testsuite/g++.dg/modules/virt-5_b.C b/gcc/testsuite/g++.dg/modules/virt-5_b.C new file mode 100644 index ..785dd92ac1ee --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/virt-5_b.C @@ -0,0 +1,11 @@ +// { dg-module-do link } +// { dg-additional-options "-fmodules-ts" } + +import M; + +int main() { + A a(0); +} + +// { dg-final { scan-assembler-not {_ZTTW1M1A:} } } +// { dg-final { scan-assembler-not {_ZTVW1M1A:} } }
[gcc r15-4622] c++/modules: Propagate some missing flags on type definitions
https://gcc.gnu.org/g:6aba48a8cc128e54ee243d451ac9a843ff41c4f9 commit r15-4622-g6aba48a8cc128e54ee243d451ac9a843ff41c4f9 Author: Nathaniel Shead Date: Thu Oct 24 18:10:52 2024 +1100 c++/modules: Propagate some missing flags on type definitions Noticed while testing my fix for PR c++/113814. Not all of these are easily testable but I've tested a couple that were straight-forward. For consistency also adds a new TYPE_WARN_IF_NOT_ALIGN_RAW flag to match the decl version Nathan added. gcc/cp/ChangeLog: * module.cc (trees_in::read_class_def): Propagate some missing flags from the streamed-in definition. gcc/ChangeLog: * tree.h (TYPE_WARN_IF_NOT_ALIGN_RAW): New accessor. (TYPE_WARN_IF_NOT_ALIGN): Use it. (SET_TYPE_WARN_IF_NOT_ALIGN): Likewise. gcc/testsuite/ChangeLog: * g++.dg/modules/class-10_a.H: New test. * g++.dg/modules/class-10_b.C: New test. Signed-off-by: Nathaniel Shead Diff: --- gcc/cp/module.cc | 20 +++- gcc/testsuite/g++.dg/modules/class-10_a.H | 6 ++ gcc/testsuite/g++.dg/modules/class-10_b.C | 19 +++ gcc/tree.h| 8 +--- 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 90ad67daf72b..297ef85bb1e9 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -12379,8 +12379,12 @@ trees_in::read_class_def (tree defn, tree maybe_template) /* Core pieces. */ TYPE_MODE_RAW (type) = TYPE_MODE_RAW (type_dup); + TYPE_ALIGN_RAW (type) = TYPE_ALIGN_RAW (type_dup); + TYPE_WARN_IF_NOT_ALIGN_RAW (type) + = TYPE_WARN_IF_NOT_ALIGN_RAW (type_dup); + TYPE_USER_ALIGN (type) = TYPE_USER_ALIGN (type_dup); + SET_DECL_MODE (defn, DECL_MODE (maybe_dup)); - TREE_ADDRESSABLE (type) = TREE_ADDRESSABLE (type_dup); DECL_SIZE (defn) = DECL_SIZE (maybe_dup); DECL_SIZE_UNIT (defn) = DECL_SIZE_UNIT (maybe_dup); DECL_ALIGN_RAW (defn) = DECL_ALIGN_RAW (maybe_dup); @@ -12388,12 +12392,26 @@ trees_in::read_class_def (tree defn, tree maybe_template) = DECL_WARN_IF_NOT_ALIGN_RAW (maybe_dup); DECL_USER_ALIGN (defn) = DECL_USER_ALIGN (maybe_dup); + TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (type_dup); + TYPE_CXX_ODR_P (type) = TYPE_CXX_ODR_P (type_dup); + TYPE_NO_FORCE_BLK (type) = TYPE_NO_FORCE_BLK (type_dup); + TYPE_TRANSPARENT_AGGR (type) = TYPE_TRANSPARENT_AGGR (type_dup); + TYPE_CONTAINS_PLACEHOLDER_INTERNAL (type) + = TYPE_CONTAINS_PLACEHOLDER_INTERNAL (type_dup); + + TYPE_EMPTY_P (type) = TYPE_EMPTY_P (type_dup); + TREE_ADDRESSABLE (type) = TREE_ADDRESSABLE (type_dup); + /* C++ pieces. */ TYPE_POLYMORPHIC_P (type) = TYPE_POLYMORPHIC_P (type_dup); + CLASSTYPE_FINAL (type) = CLASSTYPE_FINAL (type_dup); + TYPE_HAS_USER_CONSTRUCTOR (type) = TYPE_HAS_USER_CONSTRUCTOR (type_dup); TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type) = TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type_dup); + TYPE_NEEDS_CONSTRUCTING (type) + = TYPE_NEEDS_CONSTRUCTING (type_dup); if (auto ls = TYPE_LANG_SPECIFIC (type_dup)) { diff --git a/gcc/testsuite/g++.dg/modules/class-10_a.H b/gcc/testsuite/g++.dg/modules/class-10_a.H new file mode 100644 index ..177cf57fec13 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/class-10_a.H @@ -0,0 +1,6 @@ +// { dg-additional-options "-fmodule-header" } +// { dg-module-cmi {} } + +struct alignas(16) Align {}; +struct Final final {}; +struct NeedsConstructing { NeedsConstructing(); }; diff --git a/gcc/testsuite/g++.dg/modules/class-10_b.C b/gcc/testsuite/g++.dg/modules/class-10_b.C new file mode 100644 index ..2f982124f3e7 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/class-10_b.C @@ -0,0 +1,19 @@ +// { dg-additional-options "-fmodules-ts -Wno-pedantic" } +// Test bits and pieces of merging information +// from class defs into forward declarations + +struct Align; +struct Final; +struct NeedsConstructing; + +import "class-10_a.H"; + +static_assert(alignof(Align) == 16); + +struct TestFinal : Final {}; // { dg-error "cannot derive" } + +struct TestNeedsConstructing { + struct { +NeedsConstructing a; // { dg-error "with constructor not allowed in anonymous aggregate" } + }; +}; diff --git a/gcc/tree.h b/gcc/tree.h index efda032a220c..66e08793c2ec 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -2357,13 +2357,15 @@ extern tree vector_element_bits_tree (const_tree); /* The minimum alignment necessary for objects of this type without warning. The value is an int, measured in bits. */ +#define TYPE_WARN_IF_NOT_ALIGN_RAW(NODE) \ +(TYPE_CHECK (NODE)->type_common.warn_if_not_ali
[gcc(refs/users/aoliva/heads/testme)] extend ifcombine_replace_cond to handle noncontiguous ifcombine
https://gcc.gnu.org/g:cb75ab71333fe6fb59bd73288baf5b7170d80cfa commit cb75ab71333fe6fb59bd73288baf5b7170d80cfa Author: Alexandre Oliva Date: Thu Oct 24 05:25:30 2024 -0300 extend ifcombine_replace_cond to handle noncontiguous ifcombine Prepare to handle noncontiguous ifcombine, introducing logic to modify the outer condition when needed. There are two cases worth mentioning: - when blocks are noncontiguous, we have to place the combined condition in the outer block to avoid pessimizing carefully crafted short-circuited tests; - even when blocks are contiguous, we prepare for situations in which the combined condition has two tests, one to be placed in outer and the other in inner. This circumstance will not come up when noncontiguous ifcombine is first enabled, but it will when an improved fold_truth_andor is integrated with ifcombine. Combining the condition from inner into outer may require moving SSA DEFs used in the inner condition, and the changes implement this as well. for gcc/ChangeLog * tree-ssa-ifcombine.cc: Include bitmap.h. (ifcombine_mark_ssa_name): New. (struct ifcombine_mark_ssa_name_t): New. (ifcombine_mark_ssa_name_walk): New. (ifcombine_replace_cond): Prepare to handle noncontiguous and split-condition ifcombine. Diff: --- gcc/tree-ssa-ifcombine.cc | 184 +- 1 file changed, 181 insertions(+), 3 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index b5b72be29bbf..8c6eaaaf5e5c 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa.h" #include "attribs.h" #include "asan.h" +#include "bitmap.h" #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT #define LOGICAL_OP_NON_SHORT_CIRCUIT \ @@ -460,8 +461,48 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, } } -/* Replace the conditions in INNER_COND with COND. - Replace OUTER_COND with a constant. */ +/* Set NAME's bit in USED if OUTER dominates it. */ + +static void +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer) +{ + if (SSA_NAME_IS_DEFAULT_DEF (name)) +return; + + gimple *def = SSA_NAME_DEF_STMT (name); + basic_block bb = gimple_bb (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, outer)) +return; + + bitmap_set_bit (used, SSA_NAME_VERSION (name)); +} + +/* Data structure passed to ifcombine_mark_ssa_name. */ +struct ifcombine_mark_ssa_name_t +{ + /* SSA_NAMEs that have been referenced. */ + bitmap used; + /* Dominating block of DEFs that might need moving. */ + basic_block outer; +}; + +/* Mark in DATA->used any SSA_NAMEs used in *t. */ + +static tree +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) +{ + ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_; + + if (*t && TREE_CODE (*t) == SSA_NAME) +ifcombine_mark_ssa_name (data->used, *t, data->outer); + + return NULL; +} + +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND + replaced with a constant, but if there are intervening blocks, it's best to + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ static bool ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, @@ -470,7 +511,22 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, { bool result_inv = inner_inv; - gcc_checking_assert (!cond2); + /* Split cond into cond2 if they're contiguous. ??? We might be able to + handle ORIF as well, inverting both conditions, but it's not clear that + this would be enough, and it never comes up. */ + if (!cond2 + && TREE_CODE (cond) == TRUTH_ANDIF_EXPR + && single_pred (gimple_bb (inner_cond)) == gimple_bb (outer_cond)) +{ + /* ??? Does this ever hit? */ + gcc_unreachable (); + cond2 = TREE_OPERAND (cond, 1); + cond = TREE_OPERAND (cond, 0); +} + + bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond)) + != gimple_bb (outer_cond)); + bool result_inv = outer_p ? outer_inv : inner_inv; if (result_inv) cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); @@ -480,6 +536,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, else if (must_canon) return false; + if (outer_p) +{ + { + auto_bitmap used; + basic_block outer_bb = gimple_bb (outer_cond); + + /* Mark SSA DEFs that are referenced by cond and may thus need to be + moved to outer. */ + { + ifcombine_mark_ssa_name_t data = { used, outer_bb }; + walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL); + } + + if (!bitmap_empty_p (used)) +
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:1070f7874f55ec00c345a3b5c77c8fa0bc0e1612 commit 1070f7874f55ec00c345a3b5c77c8fa0bc0e1612 Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Rework ifcombine to support merging conditions from noncontiguous blocks. This depends on earlier preparation changes. The function that attempted to ifcombine a block with its immediate predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks eligible for ifcombine, attempting to combine with them. The function that actually drives the combination of a pair of blocks, tree_ssa_ifcombine_bb_1, now takes an additional parameter: the successor of outer that leads to inner. The function that recognizes if_then_else patterns is modified to enable testing without distinguishing between then and else, or to require nondegenerate conditions, that aren't worth combining with. for gcc/ChangeLog * tree-ssa-ifcombine.cc (recognize_if_then_else): Support relaxed then/else testing; require nondegenerate condition otherwise. (tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it instead of inner_cond_bb. Adjust callers. (tree_ssa_ifcombine_bb): Loop over dominating outer blocks eligible for ifcombine. (pass_tree_ifcombine::execute): Noted potential need for changes to the post-combine logic. Diff: --- gcc/tree-ssa-ifcombine.cc | 140 -- 1 file changed, 111 insertions(+), 29 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 8c6eaaaf5e5c..d84c450db059 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb) is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t->dest == *then_bb && e->dest == *else_bb) + || (t->dest == *else_bb && e->dest == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -901,19 +910,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-effects. */ if (phi_pred_bb != else_bb - && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb) + && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb) && same_phi_args_p (outer_cond_bb, phi_pred_bb, el
[gcc(refs/users/aoliva/heads/testme)] introduce ifcombine_replace_cond
https://gcc.gnu.org/g:5b4419273e4384451828861958027a82d5c38806 commit 5b4419273e4384451828861958027a82d5c38806 Author: Alexandre Oliva Date: Thu Oct 24 05:25:26 2024 -0300 introduce ifcombine_replace_cond Refactor ifcombine_ifandif, moving the common code from the various paths that apply the combined condition to a new function. for gcc/ChangeLog * tree-ssa-ifcombine.cc (ifcombine_replace_cond): Factor out of... (ifcombine_ifandif): ... this. Diff: --- gcc/tree-ssa-ifcombine.cc | 137 ++ 1 file changed, 65 insertions(+), 72 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 0a2ba970548c..6dcf5e6efe1d 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -399,6 +399,51 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, outer2->probability = profile_probability::never (); } +/* Replace the conditions in INNER_COND with COND. + Replace OUTER_COND with a constant. */ + +static bool +ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, + gcond *outer_cond, bool outer_inv, + tree cond, bool must_canon, tree cond2) +{ + bool result_inv = inner_inv; + + gcc_checking_assert (!cond2); + + if (result_inv) +cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); + + if (tree tcanon = canonicalize_cond_expr_cond (cond)) +cond = tcanon; + else if (must_canon) +return false; + +{ + if (!is_gimple_condexpr_for_cond (cond)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond); + cond = force_gimple_operand_gsi_1 (&gsi, cond, +is_gimple_condexpr_for_cond, +NULL, true, GSI_SAME_STMT); + } + gimple_cond_set_condition_from_tree (inner_cond, cond); + update_stmt (inner_cond); + + /* Leave CFG optimization to cfg_cleanup. */ + gimple_cond_set_condition_from_tree (outer_cond, + outer_inv + ? boolean_false_node + : boolean_true_node); + update_stmt (outer_cond); +} + + update_profile_after_ifcombine (gimple_bb (inner_cond), + gimple_bb (outer_cond)); + + return true; +} + /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. inner_inv, outer_inv indicate whether the conditions are inverted. @@ -408,7 +453,6 @@ static bool ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, basic_block outer_cond_bb, bool outer_inv) { - bool result_inv = inner_inv; gimple_stmt_iterator gsi; tree name1, name2, bit1, bit2, bits1, bits2; @@ -446,26 +490,13 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, t2 = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t); t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, true, GSI_SAME_STMT); - t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR, - boolean_type_node, t2, t); - t = canonicalize_cond_expr_cond (t); - if (!t) - return false; - if (!is_gimple_condexpr_for_cond (t)) - { - gsi = gsi_for_stmt (inner_cond); - t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond, - NULL, true, GSI_SAME_STMT); - } - gimple_cond_set_condition_from_tree (inner_cond, t); - update_stmt (inner_cond); - /* Leave CFG optimization to cfg_cleanup. */ - gimple_cond_set_condition_from_tree (outer_cond, - outer_inv ? boolean_false_node : boolean_true_node); - update_stmt (outer_cond); + t = fold_build2 (EQ_EXPR, boolean_type_node, t2, t); - update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); + if (!ifcombine_replace_cond (inner_cond, inner_inv, + outer_cond, outer_inv, + t, true, NULL_TREE)) + return false; if (dump_file) { @@ -485,9 +516,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, In that case remove the outer test and change the inner one to test for name & (bits1 | bits2) != 0. */ else if (recognize_bits_test (inner_cond, &name1, &bits1, !inner_inv) - && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv)) + && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv)) { - gimple_stmt_iterator gsi; tree t; if ((TREE_CODE (name1) == SSA_NAME @@ -530,33 +560,14 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, bits1 = fold_convert (TREE_TYPE (bits2), bits1);
[gcc(refs/users/aoliva/heads/testme)] allow vuses in ifcombine blocks
https://gcc.gnu.org/g:683dabf0a585970652c3a217231f6897922ec1b7 commit 683dabf0a585970652c3a217231f6897922ec1b7 Author: Alexandre Oliva Date: Thu Oct 24 05:25:21 2024 -0300 allow vuses in ifcombine blocks Disallowing vuses in blocks for ifcombine is too strict, and it prevents usefully moving fold_truth_andor into ifcombine. That tree-level folder has long ifcombined loads, absent other relevant side effects. for gcc/ChangeLog * tree-ssa-ifcombine.c (bb_no_side_effects_p): Allow vuses, but not vdefs. Diff: --- gcc/tree-ssa-ifcombine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 6a3bc99190d9..ed20a231951a 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb) enum tree_code rhs_code; if (gimple_has_side_effects (stmt) || gimple_could_trap_p (stmt) - || gimple_vuse (stmt) + || gimple_vdef (stmt) /* We need to rewrite stmts with undefined overflow to use unsigned arithmetic but cannot do so for signed division. */ || ((ass = dyn_cast (stmt))
[gcc r15-4615] Use unique_ptr in more places in pretty_printer/diagnostics: 'gcc/config/gcn/mkoffload.cc' [PR116613
https://gcc.gnu.org/g:b3aa301db1b09b533b3635791a98d6bf906e9a15 commit r15-4615-gb3aa301db1b09b533b3635791a98d6bf906e9a15 Author: Thomas Schwinge Date: Thu Oct 24 20:56:21 2024 +0200 Use unique_ptr in more places in pretty_printer/diagnostics: 'gcc/config/gcn/mkoffload.cc' [PR116613] After recent commit bf43fe6aa966eaf397ea3b8ebd6408d3d124e285 "Use unique_ptr in more places in pretty_printer/diagnostics [PR116613]": [...] In file included from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31:0: ../../source-gcc/gcc/diagnostic.h:29:3: error: #error "You must define INCLUDE_MEMORY before including system.h to use diagnostic.h" # error "You must define INCLUDE_MEMORY before including system.h to use diagnostic.h" ^ In file included from ../../source-gcc/gcc/diagnostic.h:34:0, from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31: ../../source-gcc/gcc/pretty-print.h:29:3: error: #error "You must define INCLUDE_MEMORY before including system.h to use pretty-print.h" # error "You must define INCLUDE_MEMORY before including system.h to use pretty-print.h" ^ In file included from ../../source-gcc/gcc/diagnostic.h:34:0, from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31: ../../source-gcc/gcc/pretty-print.h:280:16: error: 'unique_ptr' in namespace 'std' does not name a template type virtual std::unique_ptr clone () const; ^ In file included from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31:0: ../../source-gcc/gcc/diagnostic.h:585:32: error: 'std::unique_ptr' has not been declared void set_output_format (std::unique_ptr output_format); ^ [...] PR other/116613 gcc/ * config/gcn/mkoffload.cc: Add '#define INCLUDE_MEMORY'. Diff: --- gcc/config/gcn/mkoffload.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index c1d80aae59c7..17a334211347 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -24,6 +24,7 @@ This is not a complete assembler. We presume the source is well formed from the compiler and can die horribly if it is not. */ +#define INCLUDE_MEMORY #include "config.h" #include "system.h" #include "coretypes.h"
[gcc/devel/nothrow-detection] Fixed extract_types_for_resx function
https://gcc.gnu.org/g:c756ee328c82211698ddcaf9e3475c763884d7bb commit c756ee328c82211698ddcaf9e3475c763884d7bb Author: Pranil Dey Date: Fri Oct 25 00:56:59 2024 +0530 Fixed extract_types_for_resx function The function was recursive in nature and there is a chance of runnign out of stack, so now ann iterative approach was used to get the types for resx Diff: --- gcc/tree-eh.cc | 89 +- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc index df90d1bc2049..e62fed90c6dd 100644 --- a/gcc/tree-eh.cc +++ b/gcc/tree-eh.cc @@ -3183,60 +3183,59 @@ stmt_throw_types (function *, gimple *stmt, vec *ret_vector) } } -// To get the all exception types from a resx stmt -static bool -extract_types_for_resx (basic_block bb, vec *ret_vector) +// To get the all exception types from a resx stmt (iterative version) +bool +extract_types_for_resx (gimple *resx_stmt, vec *ret_vector) { - edge e; - edge_iterator ei; + basic_block start_bb = gimple_bb (resx_stmt); + hash_set visited_blocks; + vec block_stack; - // Iterate over edges to walk up the basic blocks - FOR_EACH_EDGE (e, ei, bb->preds) - { -// Get the last stmt of the basic block as it is an EH stmt -bb = e->src; -gimple_stmt_iterator gsi = gsi_last_bb (bb); -gimple *last_stmt = gsi_stmt (gsi); + block_stack.safe_push(start_bb); -if (bb->aux) + while (!block_stack.is_empty()) + { +basic_block bb = block_stack.pop(); +if (visited_blocks.contains(bb)) continue; -bb->aux = (void *)1; -if (last_stmt && (e->flags & EDGE_EH)) +visited_blocks.add(bb); + +edge e; +edge_iterator ei; +gimple_stmt_iterator gsi = gsi_last_bb(bb); +gimple *last_stmt = gsi_stmt(gsi); + + +FOR_EACH_EDGE(e, ei, bb->preds) +{ + basic_block pred_bb = e->src; + + if (e->flags & EDGE_EH) { -if (gimple_code (last_stmt) == GIMPLE_CALL) - { -// check if its a throw -if (!extract_types_for_call (as_a (last_stmt), - ret_vector)) - return false; -continue; - } -else if (gimple_code (last_stmt) == GIMPLE_RESX) - { -// Recursively processing resx -// FIXME: to get this linear, we should cache results. -if (!extract_types_for_resx (last_stmt, ret_vector)) - return false; -continue; - } +gimple_stmt_iterator pred_gsi = gsi_last_bb(pred_bb); +gimple *pred_last_stmt = gsi_stmt(pred_gsi); + +if (gimple_code(pred_last_stmt) == GIMPLE_CALL) +{ + if (!extract_types_for_call(as_a(pred_last_stmt), ret_vector)) +return false; +} +else if (gimple_code(pred_last_stmt) == GIMPLE_RESX) +{ + // Add the predecessor block to the stack for further exploration + block_stack.safe_push(pred_bb); +} } -/* FIXME: remove recursion here, so we do not run out of stack. */ -else if (!extract_types_for_resx (e->src, ret_vector)) - return false; + else + { +block_stack.safe_push(pred_bb); + } +} } - return true; -} -// To get the all exception types from a resx stmt -bool -extract_types_for_resx (gimple *resx_stmt, vec *ret_vector) -{ - basic_block bb = gimple_bb (resx_stmt); - bool ret = extract_types_for_resx (bb, ret_vector); - /* FIXME: this is non-linear. */ - clear_aux_for_blocks (); - return ret; + clear_aux_for_blocks(); + return true; } // To get the types being thrown outside of a function
[gcc/aoliva/heads/testme] (2 commits) ifcombine across noncontiguous blocks
The branch 'aoliva/heads/testme' was updated to point to: a050f30dcecc... ifcombine across noncontiguous blocks It previously pointed to: 1070f7874f55... ifcombine across noncontiguous blocks Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 1070f78... ifcombine across noncontiguous blocks cb75ab7... extend ifcombine_replace_cond to handle noncontiguous ifcom Summary of changes (added commits): --- a050f30... ifcombine across noncontiguous blocks aee55fb... extend ifcombine_replace_cond to handle noncontiguous ifcom
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:f7a9315f62c2d2bcafb854b6abaaec722907 commit f7a9315f62c2d2bcafb854b6abaaec722907 Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Diff: --- gcc/tree-ssa-ifcombine.cc | 159 +- 1 file changed, 128 insertions(+), 31 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 6be5d969de88..bd46a5242154 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -50,6 +50,21 @@ along with GCC; see the file COPYING3. If not see false) >= 2) #endif +/* Return FALSE iff the COND_BB ends with a conditional whose result is not a + known constant. */ + +static bool +known_succ_p (basic_block cond_bb) +{ + gcond *cond = safe_dyn_cast (*gsi_last_bb (cond_bb)); + + if (!cond) +return true; + + return (CONSTANT_CLASS_P (gimple_cond_lhs (cond)) + && CONSTANT_CLASS_P (gimple_cond_rhs (cond))); +} + /* This pass combines COND_EXPRs to simplify control flow. It currently recognizes bit tests and comparisons in chains that represent logical and or logical or of two COND_EXPRs. @@ -70,25 +85,34 @@ along with GCC; see the file COPYING3. If not see is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t->dest == *then_bb && e->dest == *else_bb) + || (t->dest == *else_bb && e->dest == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -390,7 +414,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, gcc_assert (inner_taken->dest == outer2->dest); if (outer_to_inner_bb == inner_cond_bb - && constant_condition_p (outer_cond_bb)) + && known_succ_p (outer_cond_bb)) { /* Path outer_cond_bb->(outer2) needs to be merged into path outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) @@ -414,7 +438,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, outer_to_inner->probability = profile_probability::always (); outer2->probability = profile_probability::never (); } - else if (constant_condition_p (inner_cond_bb)) + else if (known_succ_p (inner_cond_bb)) { /* Path inner_cond_bb->(inner_taken) needs to be merged into path outer_cond_bb->(outer2). We've accumulated the probabilities from @@ -881,19 +905,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-ef
[gcc/aoliva/heads/testme] ifcombine across noncontiguous blocks
The branch 'aoliva/heads/testme' was updated to point to: f7a9315f62c2... ifcombine across noncontiguous blocks It previously pointed to: 3eb8edeb0ee0... ifcombine across noncontiguous blocks Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 3eb8ede... ifcombine across noncontiguous blocks Summary of changes (added commits): --- f7a9315... ifcombine across noncontiguous blocks
[gcc r15-4594] asan: Fix up build_check_stmt gsi handling [PR117209]
https://gcc.gnu.org/g:885143fa77599c44bfdd4e8e6b6987b7824db6ba commit r15-4594-g885143fa77599c44bfdd4e8e6b6987b7824db6ba Author: Jakub Jelinek Date: Thu Oct 24 12:45:34 2024 +0200 asan: Fix up build_check_stmt gsi handling [PR117209] gsi_safe_insert_before properly updates gsi_bb in gimple_stmt_iterator in case it splits objects, but unfortunately build_check_stmt was in some places (but not others) using a copy of the iterator rather than the iterator passed from callers and so didn't propagate that to callers. I guess it didn't matter much before when it was just using gsi_insert_before as that really didn't change the iterator. The !before_p case is apparently dead code, nothing is calling it with before_p=false since around 4.9. 2024-10-24 Jakub Jelinek PR sanitizer/117209 * asan.cc (maybe_cast_to_ptrmode): Formatting fix. (build_check_stmt): Don't copy *iter into gsi, perform all the updates on iter directly. * gcc.dg/asan/pr117209.c: New test. Diff: --- gcc/asan.cc | 14 +- gcc/testsuite/gcc.dg/asan/pr117209.c | 15 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/gcc/asan.cc b/gcc/asan.cc index 5f262d54a3ac..bc92d9c7a792 100644 --- a/gcc/asan.cc +++ b/gcc/asan.cc @@ -2610,7 +2610,7 @@ maybe_cast_to_ptrmode (location_t loc, tree len, gimple_stmt_iterator *iter, if (ptrofftype_p (len)) return len; gimple *g = gimple_build_assign (make_ssa_name (pointer_sized_int_node), - NOP_EXPR, len); + NOP_EXPR, len); gimple_set_location (g, loc); if (before_p) gsi_safe_insert_before (iter, g); @@ -2644,16 +2644,13 @@ build_check_stmt (location_t loc, tree base, tree len, bool is_non_zero_len, bool before_p, bool is_store, bool is_scalar_access, unsigned int align = 0) { - gimple_stmt_iterator gsi = *iter; gimple *g; gcc_assert (!(size_in_bytes > 0 && !is_non_zero_len)); gcc_assert (size_in_bytes == -1 || size_in_bytes >= 1); - gsi = *iter; - base = unshare_expr (base); - base = maybe_create_ssa_name (loc, base, &gsi, before_p); + base = maybe_create_ssa_name (loc, base, iter, before_p); if (len) { @@ -2704,12 +2701,11 @@ build_check_stmt (location_t loc, tree base, tree len, align / BITS_PER_UNIT)); gimple_set_location (g, loc); if (before_p) -gsi_safe_insert_before (&gsi, g); +gsi_safe_insert_before (iter, g); else { - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - gsi_next (&gsi); - *iter = gsi; + gsi_insert_after (iter, g, GSI_NEW_STMT); + gsi_next (iter); } } diff --git a/gcc/testsuite/gcc.dg/asan/pr117209.c b/gcc/testsuite/gcc.dg/asan/pr117209.c new file mode 100644 index ..34c71ba260b3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/asan/pr117209.c @@ -0,0 +1,15 @@ +/* PR sanitizer/117209 */ +/* { dg-do compile } */ +/* { dg-options "-fsanitize=address" } */ + +struct A { char a; }; +void foo (void); +__attribute__((returns_twice, const)) int bar (struct A); + +void +baz (struct A *x, int *y, int z) +{ + if (z) +foo (); + *y = bar (*x); +}
[gcc r15-4595] c++: Further fix for get_member_function_from_ptrfunc [PR117259]
https://gcc.gnu.org/g:b25d3201b6338d9f71c64f524ca2974d9a1f38e8 commit r15-4595-gb25d3201b6338d9f71c64f524ca2974d9a1f38e8 Author: Jakub Jelinek Date: Thu Oct 24 12:56:19 2024 +0200 c++: Further fix for get_member_function_from_ptrfunc [PR117259] The following testcase shows that the previous get_member_function_from_ptrfunc changes weren't sufficient and we still have cases where -fsanitize=undefined with pointers to member functions can cause wrong code being generated and related false positive warnings. The problem is that save_expr doesn't always create SAVE_EXPR, it can skip some invariant arithmetics and in the end it could be really large expressions which would be evaluated several times (and what is worse, with -fsanitize=undefined those expressions then can have SAVE_EXPRs added to their subparts for -fsanitize=bounds or -fsanitize=null or -fsanitize=alignment instrumentation). Tried to just build1 a SAVE_EXPR + add TREE_SIDE_EFFECTS instead of save_expr, but that doesn't work either, because cp_fold happily optimizes those SAVE_EXPRs away when it sees SAVE_EXPR operand is tree_invariant_p. So, the following patch instead of using save_expr or building SAVE_EXPR manually builds a TARGET_EXPR. Both types are pointers, so it doesn't need to be destroyed in any way, but TARGET_EXPR is what doesn't get optimized away immediately. 2024-10-24 Jakub Jelinek PR c++/117259 * typeck.cc (get_member_function_from_ptrfunc): Use force_target_expr rather than save_expr for instance_ptr and function. Don't call it for TREE_CONSTANT. * g++.dg/ubsan/pr117259.C: New test. Diff: --- gcc/cp/typeck.cc | 31 +-- gcc/testsuite/g++.dg/ubsan/pr117259.C | 13 + 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 71d879abef12..bfc0c560c106 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -4193,24 +4193,27 @@ get_member_function_from_ptrfunc (tree *instance_ptrptr, tree function, if (!nonvirtual && is_dummy_object (instance_ptr)) nonvirtual = true; - /* Use save_expr even when instance_ptr doesn't have side-effects, -unless it is a simple decl (save_expr won't do anything on -constants), so that we don't ubsan instrument the expression -multiple times. See PR116449. */ + /* Use force_target_expr even when instance_ptr doesn't have +side-effects, unless it is a simple decl or constant, so +that we don't ubsan instrument the expression multiple times. +Don't use save_expr, as save_expr can avoid building a SAVE_EXPR +and building a SAVE_EXPR manually can be optimized away during +cp_fold. See PR116449 and PR117259. */ if (TREE_SIDE_EFFECTS (instance_ptr) - || (!nonvirtual && !DECL_P (instance_ptr))) - { - instance_save_expr = save_expr (instance_ptr); - if (instance_save_expr == instance_ptr) - instance_save_expr = NULL_TREE; - else - instance_ptr = instance_save_expr; - } + || (!nonvirtual + && !DECL_P (instance_ptr) + && !TREE_CONSTANT (instance_ptr))) + instance_ptr = instance_save_expr + = force_target_expr (TREE_TYPE (instance_ptr), instance_ptr, + complain); /* See above comment. */ if (TREE_SIDE_EFFECTS (function) - || (!nonvirtual && !DECL_P (function))) - function = save_expr (function); + || (!nonvirtual + && !DECL_P (function) + && !TREE_CONSTANT (function))) + function + = force_target_expr (TREE_TYPE (function), function, complain); /* Start by extracting all the information from the PMF itself. */ e3 = pfn_from_ptrmemfunc (function); diff --git a/gcc/testsuite/g++.dg/ubsan/pr117259.C b/gcc/testsuite/g++.dg/ubsan/pr117259.C new file mode 100644 index ..2b7ba56c2a36 --- /dev/null +++ b/gcc/testsuite/g++.dg/ubsan/pr117259.C @@ -0,0 +1,13 @@ +// PR c++/117259 +// { dg-do compile } +// { dg-options "-Wuninitialized -fsanitize=undefined" } + +struct A { void foo () {} }; +struct B { void (A::*b) (); B (void (A::*x) ()) : b(x) {}; }; +const B c[1] = { &A::foo }; + +void +foo (A *x, int y) +{ + (x->*c[y].b) (); +}
[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:c5a9703abe8ddb40629fce9e098ef5820fdff8e2 commit c5a9703abe8ddb40629fce9e098ef5820fdff8e2 Author: Michael Meissner Date: Thu Oct 24 14:08:35 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 12 1 file changed, 12 insertions(+) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index 80f083698d2a..a48e8bcf5071 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -1,3 +1,15 @@ + Branch work182-sha, patch #412 + +Add p-future target-supports.exp + +2024-10-24 Michael Meissner + +gcc/testsuite/ + + * lib/target-supports.exp (check_effective_target_powerpc_future_ok): + New target. + (check_effective_target_powerpc_dense_math_ok): Likewise. + Branch work182-sha, patch #411 Add potential p-future XVRLD and XVRLDI instructions.
[gcc(refs/users/meissner/heads/work182-sha)] Add p-future target-supports.exp
https://gcc.gnu.org/g:58f00644caf178cd0b4b0d50ba151b4f156ea03d commit 58f00644caf178cd0b4b0d50ba151b4f156ea03d Author: Michael Meissner Date: Thu Oct 24 14:07:22 2024 -0400 Add p-future target-supports.exp 2024-10-24 Michael Meissner gcc/testsuite/ * lib/target-supports.exp (check_effective_target_powerpc_future_ok): New target. (check_effective_target_powerpc_dense_math_ok): Likewise. Diff: --- gcc/testsuite/lib/target-supports.exp | 35 +++ 1 file changed, 35 insertions(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index d113a08dff7b..f104f4295d9f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7366,6 +7366,41 @@ proc check_effective_target_power10_ok { } { } } +# Return 1 if this is a PowerPC target supporting -mcpu=future which enables +# some potential new instructions. +proc check_effective_target_powerpc_future_ok { } { + return [check_no_compiler_messages powerpc_future_ok object { + #ifndef _ARCH_PWR_FUTURE + #error "-mcpu=future is not supported" + #else + int dummy; + #endif + } "-mcpu=future"] +} + +# Return 1 if this is a PowerPC target supporting -mcpu=future which enables +# the dense math operations. +proc check_effective_target_powerpc_dense_math_ok { } { +if { ([istarget powerpc*-*-*]) } { + return [check_no_compiler_messages powerpc_dense_math_ok object { + __vector_quad vq; + int main (void) { + #ifndef __DENSE_MATH__ + #error "target does not have dense math support." + #else + /* Make sure we have dense math support. */ + __vector_quad dmr; + __asm__ ("dmsetaccz %A0" : "=wD" (dmr)); + vq = dmr; + #endif + return 0; + } + } "-mcpu=future"] +} else { + return 0; +} +} + # Return 1 if this is a PowerPC target supporting -mfloat128 via either # software emulation on power7/power8 systems or hardware support on power9.
[gcc(refs/users/aoliva/heads/testme)] extend ifcombine_replace_cond to handle noncontiguous ifcombine
https://gcc.gnu.org/g:aee55fb4fdf230ddb0e96564f60e5db527b1a8c4 commit aee55fb4fdf230ddb0e96564f60e5db527b1a8c4 Author: Alexandre Oliva Date: Thu Oct 24 05:25:30 2024 -0300 extend ifcombine_replace_cond to handle noncontiguous ifcombine Prepare to handle noncontiguous ifcombine, introducing logic to modify the outer condition when needed. There are two cases worth mentioning: - when blocks are noncontiguous, we have to place the combined condition in the outer block to avoid pessimizing carefully crafted short-circuited tests; - even when blocks are contiguous, we prepare for situations in which the combined condition has two tests, one to be placed in outer and the other in inner. This circumstance will not come up when noncontiguous ifcombine is first enabled, but it will when an improved fold_truth_andor is integrated with ifcombine. Combining the condition from inner into outer may require moving SSA DEFs used in the inner condition, and the changes implement this as well. for gcc/ChangeLog * tree-ssa-ifcombine.cc: Include bitmap.h. (ifcombine_mark_ssa_name): New. (struct ifcombine_mark_ssa_name_t): New. (ifcombine_mark_ssa_name_walk): New. (ifcombine_replace_cond): Prepare to handle noncontiguous and split-condition ifcombine. Diff: --- gcc/tree-ssa-ifcombine.cc | 184 +- 1 file changed, 180 insertions(+), 4 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index b5b72be29bbf..c271d1e86a9b 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa.h" #include "attribs.h" #include "asan.h" +#include "bitmap.h" #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT #define LOGICAL_OP_NON_SHORT_CIRCUIT \ @@ -460,17 +461,70 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, } } -/* Replace the conditions in INNER_COND with COND. - Replace OUTER_COND with a constant. */ +/* Set NAME's bit in USED if OUTER dominates it. */ + +static void +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer) +{ + if (SSA_NAME_IS_DEFAULT_DEF (name)) +return; + + gimple *def = SSA_NAME_DEF_STMT (name); + basic_block bb = gimple_bb (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, outer)) +return; + + bitmap_set_bit (used, SSA_NAME_VERSION (name)); +} + +/* Data structure passed to ifcombine_mark_ssa_name. */ +struct ifcombine_mark_ssa_name_t +{ + /* SSA_NAMEs that have been referenced. */ + bitmap used; + /* Dominating block of DEFs that might need moving. */ + basic_block outer; +}; + +/* Mark in DATA->used any SSA_NAMEs used in *t. */ + +static tree +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) +{ + ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_; + + if (*t && TREE_CODE (*t) == SSA_NAME) +ifcombine_mark_ssa_name (data->used, *t, data->outer); + + return NULL; +} + +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND + replaced with a constant, but if there are intervening blocks, it's best to + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ static bool ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, gcond *outer_cond, bool outer_inv, tree cond, bool must_canon, tree cond2) { - bool result_inv = inner_inv; + /* Split cond into cond2 if they're contiguous. ??? We might be able to + handle ORIF as well, inverting both conditions, but it's not clear that + this would be enough, and it never comes up. */ + if (!cond2 + && TREE_CODE (cond) == TRUTH_ANDIF_EXPR + && single_pred (gimple_bb (inner_cond)) == gimple_bb (outer_cond)) +{ + /* ??? Does this ever hit? */ + gcc_unreachable (); + cond2 = TREE_OPERAND (cond, 1); + cond = TREE_OPERAND (cond, 0); +} - gcc_checking_assert (!cond2); + bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond)) + != gimple_bb (outer_cond)); + bool result_inv = outer_p ? outer_inv : inner_inv; if (result_inv) cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); @@ -480,6 +534,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, else if (must_canon) return false; + if (outer_p) +{ + { + auto_bitmap used; + basic_block outer_bb = gimple_bb (outer_cond); + + /* Mark SSA DEFs that are referenced by cond and may thus need to be + moved to outer. */ + { + ifcombine_mark_ssa_name_t data = { used, outer_bb }; + walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL); + } +
[gcc r15-4603] Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule
https://gcc.gnu.org/g:8d01bc7dc453a44cd5b1913fb8a4104ee56fa410 commit r15-4603-g8d01bc7dc453a44cd5b1913fb8a4104ee56fa410 Author: Richard Sandiford Date: Thu Oct 24 14:22:32 2024 +0100 Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule match.pd had a rule to simplify ((X /[ex] A) +- B) * A -> X +- A * B when A and B are INTEGER_CSTs. This patch extends it to handle the case where the outer multiplication is by a factor of A, not just A itself. It also handles addition and multiplication of poly_ints. (Exact division by a poly_int seems unlikely.) gcc/ * match.pd: Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule to ((X /[ex] C1) +- C2) * (C1 * C3) -> (X * C3) +- (C1 * C2 * C3). gcc/testsuite/ * gcc.dg/tree-ssa/mulexactdiv-5.c: New test. * gcc.dg/tree-ssa/mulexactdiv-6.c: Likewise. * gcc.dg/tree-ssa/mulexactdiv-7.c: Likewise. * gcc.dg/tree-ssa/mulexactdiv-8.c: Likewise. * gcc.target/aarch64/sve/cnt_fold_3.c: Likewise. Diff: --- gcc/match.pd | 38 +-- gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c | 29 +++ gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c | 59 +++ gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c | 22 + gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c | 20 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c | 40 +++ 6 files changed, 194 insertions(+), 14 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index e3dabdc15d5f..b9621a47cdf1 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5497,24 +5497,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) optab_vector))) (eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); }))) -/* ((X /[ex] A) +- B) * A --> X +- A * B. */ +/* ((X /[ex] C1) +- C2) * (C1 * C3) --> (X * C3) +- (C1 * C2 * C3). */ (for op (plus minus) (simplify - (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@@1)) INTEGER_CST@2)) @1) - (if (tree_nop_conversion_p (type, TREE_TYPE (@2)) - && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2))) - (with - { - wi::overflow_type overflow; - wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2), - TYPE_SIGN (type), &overflow); - } + (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@1)) + poly_int_tree_p@2)) + poly_int_tree_p@3) + (with { poly_widest_int factor; } + (if (tree_nop_conversion_p (type, TREE_TYPE (@2)) + && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2)) + && multiple_p (wi::to_poly_widest (@3), wi::to_widest (@1), &factor)) +(with + { + wi::overflow_type overflow; +wide_int mul; + } (if (types_match (type, TREE_TYPE (@2)) -&& types_match (TREE_TYPE (@0), TREE_TYPE (@2)) && !overflow) - (op @0 { wide_int_to_tree (type, mul); }) + && types_match (TREE_TYPE (@0), TREE_TYPE (@2)) + && TREE_CODE (@2) == INTEGER_CST + && TREE_CODE (@3) == INTEGER_CST + && (mul = wi::mul (wi::to_wide (@2), wi::to_wide (@3), +TYPE_SIGN (type), &overflow), + !overflow)) + (op (mult @0 { wide_int_to_tree (type, factor); }) + { wide_int_to_tree (type, mul); }) (with { tree utype = unsigned_type_for (type); } - (convert (op (convert:utype @0) - (mult (convert:utype @1) (convert:utype @2)) + (convert (op (mult (convert:utype @0) + { wide_int_to_tree (utype, factor); }) + (mult (convert:utype @3) (convert:utype @2))) /* Canonicalization of binary operations. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c new file mode 100644 index ..37cd676fff69 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c @@ -0,0 +1,29 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +#define TEST_CMP(FN, DIV, ADD, MUL)\ + int \ + FN (int x) \ + {\ +if (x & 7) \ + __builtin_unreachable ();\ +x /= DIV; \ +x += ADD; \ +return x * MUL;\ + } + +TEST_CMP (f1, 2, 1, 6) +TEST_CMP (f2, 2, 2, 10) +TEST_CMP (f3, 4, 3, 80) +TEST_CMP (f4, 8, 4, 200) + +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-not {> 1, 6) +TEST_CMP (f2, 2, ~(~0U >> 2), 10) + +void +cmp1 (int x) +{ + if (x & 3) +__builtin_unreachable (); + + int y = x / 4; + y += (int) (~0U / 3U); + y *= 8
[gcc r15-4606] Try to simplify (X >> C1) * (C2 << C1) -> X * C2
https://gcc.gnu.org/g:af19e46c88fd75e31127cde239b8f28d8f9c4040 commit r15-4606-gaf19e46c88fd75e31127cde239b8f28d8f9c4040 Author: Richard Sandiford Date: Thu Oct 24 14:22:33 2024 +0100 Try to simplify (X >> C1) * (C2 << C1) -> X * C2 This patch adds a rule to simplify (X >> C1) * (C2 << C1) -> X * C2 when the low C1 bits of X are known to be zero. As with the earlier X >> C1 << (C2 + C1) patch, any single conversion is allowed between the shift and the multiplication. gcc/ * match.pd: Simplify (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. gcc/testsuite/ * gcc.dg/tree-ssa/shifts-3.c: New test. * gcc.dg/tree-ssa/shifts-4.c: Likewise. * gcc.target/aarch64/sve/cnt_fold_5.c: Likewise. Diff: --- gcc/match.pd | 13 + gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c | 65 +++ gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c | 23 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c | 38 + 4 files changed, 139 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 391c60bdfb32..148d0bc65d03 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4919,6 +4919,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && wi::to_widest (@2) >= wi::to_widest (@1) && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))) (lshift (convert @0) (minus @2 @1 + +/* (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. */ +(simplify + (mult (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1)) + poly_int_tree_p@2) + (with { poly_widest_int factor; } + (if (INTEGRAL_TYPE_P (type) + && wi::ltu_p (wi::to_wide (@1), element_precision (type)) + && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)) + && multiple_p (wi::to_poly_widest (@2), + widest_int (1) << tree_to_uhwi (@1), + &factor)) + (mult (convert @0) { wide_int_to_tree (type, factor); } #endif /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c new file mode 100644 index ..dcff518e630d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c @@ -0,0 +1,65 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +unsigned int +f1 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return x * 20; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + unsigned char y = x; + y >>= 2; + return y * 36; +} + +unsigned long +f3 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return (unsigned long) x * 88; +} + +int +f4 (int x) +{ + if (x & 15) +__builtin_unreachable (); + x >>= 4; + return x * 48; +} + +unsigned int +f5 (int x) +{ + if (x & 31) +__builtin_unreachable (); + x >>= 5; + return x * 3200; +} + +unsigned int +f6 (unsigned int x) +{ + if (x & 1) +__builtin_unreachable (); + x >>= 1; + return x * (~0U / 3 & -2); +} + +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-not {>= 2; + return x * 10; +} + +unsigned int +f2 (unsigned int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 3; + return x * 24; +} + +/* { dg-final { scan-tree-dump-times { + +/* +** f1: +** ... +** cntd[^\n]+ +** ... +** mul [^\n]+ +** ret +*/ +uint64_t +f1 (int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return (uint64_t) x * svcnth (); +} + +/* +** f2: +** ... +** asr [^\n]+ +** ... +** ret +*/ +uint64_t +f2 (int x) +{ + if (x & 3) +__builtin_unreachable (); + x >>= 2; + return (uint64_t) x * svcntw (); +}
[gcc r15-4601] Use get_nonzero_bits to simplify trunc_div to exact_div
https://gcc.gnu.org/g:e2e798b86074010a8d5da16ce0b199fcec70a50e commit r15-4601-ge2e798b86074010a8d5da16ce0b199fcec70a50e Author: Richard Sandiford Date: Thu Oct 24 14:22:31 2024 +0100 Use get_nonzero_bits to simplify trunc_div to exact_div There are a limited number of existing rules that benefit from knowing that a division is exact. Later patches will add more. gcc/ * match.pd: Simplify X / (1 << C) to X /[ex] (1 << C) if the low C bits of X are clear gcc/testsuite/ * gcc.dg/tree-ssa/cmpexactdiv-6.c: New test. Diff: --- gcc/match.pd | 9 + gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c | 29 +++ 2 files changed, 38 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 9024277e5d34..2e7f06ecbe45 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5456,6 +5456,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) TYPE_PRECISION (type)), 0)) (convert @0))) +#if GIMPLE +/* X / (1 << C) -> X /[ex] (1 << C) if the low C bits of X are clear. */ +(simplify + (trunc_div (with_possible_nonzero_bits2 @0) integer_pow2p@1) + (if (INTEGRAL_TYPE_P (type) + && !TYPE_UNSIGNED (type) + && wi::multiple_of_p (get_nonzero_bits (@0), wi::to_wide (@1), SIGNED)) + (exact_div @0 @1))) +#endif /* (X /[ex] A) * A -> X. */ (simplify diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c new file mode 100644 index ..82d517b05abd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c @@ -0,0 +1,29 @@ +/* { dg-options "-O2 -fdump-tree-optimized-raw" } */ + +typedef __INTPTR_TYPE__ intptr_t; + +int +f1 (int x, int y) +{ + if ((x & 1) || (y & 1)) +__builtin_unreachable (); + x /= 2; + y /= 2; + return x < y; +} + +int +f2 (void *ptr1, void *ptr2, void *ptr3) +{ + ptr1 = __builtin_assume_aligned (ptr1, 4); + ptr2 = __builtin_assume_aligned (ptr2, 4); + ptr3 = __builtin_assume_aligned (ptr3, 4); + intptr_t diff1 = (intptr_t) ptr1 - (intptr_t) ptr2; + intptr_t diff2 = (intptr_t) ptr1 - (intptr_t) ptr3; + diff1 /= 2; + diff2 /= 2; + return diff1 < diff2; +} + +/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */ +/* { dg-final { scan-tree-dump-not {
[gcc r14-10833] c++: remove dg-warning [PR117274]
https://gcc.gnu.org/g:2d47645cd47c9a84a69343b641a6ee741a85dc75 commit r14-10833-g2d47645cd47c9a84a69343b641a6ee741a85dc75 Author: Jason Merrill Date: Thu Oct 24 11:13:30 2024 -0400 c++: remove dg-warning [PR117274] This warning was added for GCC 15, don't expect it. PR c++/117274 PR c++/117107 gcc/testsuite/ChangeLog: * g++.dg/cpp2a/decomp10.C: Remove captured binding warning. Diff: --- gcc/testsuite/g++.dg/cpp2a/decomp10.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp10.C b/gcc/testsuite/g++.dg/cpp2a/decomp10.C index 8fe425b5d01e..cadeee347b4c 100644 --- a/gcc/testsuite/g++.dg/cpp2a/decomp10.C +++ b/gcc/testsuite/g++.dg/cpp2a/decomp10.C @@ -12,7 +12,7 @@ struct tuple { void check_tuple_like() { tuple t; auto [v, r] = t; // { dg-warning "structured bindings" "" { target c++14_down } } -(void)[v, r] { // { dg-warning "captured structured" "" { target c++17_down } } +(void)[v, r] { decltype(v) x; }; }
[gcc/aoliva/heads/testme] ifcombine across noncontiguous blocks
The branch 'aoliva/heads/testme' was updated to point to: 53a0460c1f49... ifcombine across noncontiguous blocks It previously pointed to: a050f30dcecc... ifcombine across noncontiguous blocks Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- a050f30... ifcombine across noncontiguous blocks Summary of changes (added commits): --- 53a0460... ifcombine across noncontiguous blocks
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:53a0460c1f498ae9bd2e4da34bf2c22bd74f1181 commit 53a0460c1f498ae9bd2e4da34bf2c22bd74f1181 Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Rework ifcombine to support merging conditions from noncontiguous blocks. This depends on earlier preparation changes. The function that attempted to ifcombine a block with its immediate predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks eligible for ifcombine, attempting to combine with them. The function that actually drives the combination of a pair of blocks, tree_ssa_ifcombine_bb_1, now takes an additional parameter: the successor of outer that leads to inner. The function that recognizes if_then_else patterns is modified to enable testing without distinguishing between then and else, or to require nondegenerate conditions, that aren't worth combining with. for gcc/ChangeLog * tree-ssa-ifcombine.cc (recognize_if_then_else): Support relaxed then/else testing; require nondegenerate condition otherwise. (tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it instead of inner_cond_bb. Adjust callers. (tree_ssa_ifcombine_bb): Loop over dominating outer blocks eligible for ifcombine. (pass_tree_ifcombine::execute): Noted potential need for changes to the post-combine logic. Diff: --- gcc/tree-ssa-ifcombine.cc | 152 +- 1 file changed, 123 insertions(+), 29 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index c271d1e86a9b..a21cc22cf589 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb) is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t->dest == *then_bb && e->dest == *else_bb) + || (t->dest == *else_bb && e->dest == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -899,19 +908,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-effects. */ if (phi_pred_bb != else_bb - && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb) + && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb) && same_phi_args_p (outer_cond_bb, phi_pred_bb, el
[gcc r15-4612] testsuite: Require effective target pie for pr113197
https://gcc.gnu.org/g:bcd56224d74cdd8dc3c77097de51e97bc7b6d181 commit r15-4612-gbcd56224d74cdd8dc3c77097de51e97bc7b6d181 Author: Dimitar Dimitrov Date: Thu Oct 24 19:59:42 2024 +0300 testsuite: Require effective target pie for pr113197 The test for PR113197 explicitly enables PIE. But targets without PIE emit warnings when -fpie is passed (e.g. pru and avr), which causes the test to fail. Fix by adding an effective target requirement for PIE. With this patch, the test now is marked as unsupported for pru-unknown-elf. Testing for x86_64-pc-linux-gnu passes with current mainline, and fails if the fix from r15-4018-g02f4efe3c12cf7 is reverted. PR ipa/113197 gcc/testsuite/ChangeLog: * gcc.dg/lto/pr113197_0.c: Require effective target pie. Signed-off-by: Dimitar Dimitrov Diff: --- gcc/testsuite/gcc.dg/lto/pr113197_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.dg/lto/pr113197_0.c b/gcc/testsuite/gcc.dg/lto/pr113197_0.c index 293c8207dee0..6fd86245d30a 100644 --- a/gcc/testsuite/gcc.dg/lto/pr113197_0.c +++ b/gcc/testsuite/gcc.dg/lto/pr113197_0.c @@ -1,4 +1,5 @@ /* { dg-lto-do link } */ +/* { dg-require-effective-target pie } */ /* { dg-lto-options { { -O -flto -fpie } } } */ /* { dg-extra-ld-options { -r -nostdlib -flinker-output=nolto-rel } } */
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:3eb8edeb0ee01ed4549c7ca3e5705b32688c1cbe commit 3eb8edeb0ee01ed4549c7ca3e5705b32688c1cbe Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Diff: --- gcc/tree-ssa-ifcombine.cc | 144 -- 1 file changed, 115 insertions(+), 29 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 6be5d969de88..d0d1889f2511 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -50,6 +50,21 @@ along with GCC; see the file COPYING3. If not see false) >= 2) #endif +/* Return FALSE iff the COND_BB ends with a conditional whose result is not a + known constant. */ + +static bool +known_succ_p (basic_block cond_bb) +{ + gcond *cond = safe_dyn_cast (*gsi_last_bb (cond_bb)); + + if (!cond) +return true; + + return (CONSTANT_CLASS_P (gimple_cond_lhs (cond)) + && CONSTANT_CLASS_P (gimple_cond_rhs (cond))); +} + /* This pass combines COND_EXPRs to simplify control flow. It currently recognizes bit tests and comparisons in chains that represent logical and or logical or of two COND_EXPRs. @@ -70,25 +85,34 @@ along with GCC; see the file COPYING3. If not see is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t->dest == *then_bb && e->dest == *else_bb) + || (t->dest == *else_bb && e->dest == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -390,7 +414,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, gcc_assert (inner_taken->dest == outer2->dest); if (outer_to_inner_bb == inner_cond_bb - && constant_condition_p (outer_cond_bb)) + && known_succ_p (outer_cond_bb)) { /* Path outer_cond_bb->(outer2) needs to be merged into path outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) @@ -414,7 +438,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, outer_to_inner->probability = profile_probability::always (); outer2->probability = profile_probability::never (); } - else if (constant_condition_p (inner_cond_bb)) + else if (known_succ_p (inner_cond_bb)) { /* Path inner_cond_bb->(inner_taken) needs to be merged into path outer_cond_bb->(outer2). We've accumulated the probabilities from @@ -881,19 +905,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-ef
[gcc r15-4593] SVE intrinsics: Fold svsra with op1 all zeros to svlsr/svasr.
https://gcc.gnu.org/g:f6fbc0d2422ce9bea6a23226f4a13a76ffd1784b commit r15-4593-gf6fbc0d2422ce9bea6a23226f4a13a76ffd1784b Author: Jennifer Schmitz Date: Thu Oct 17 02:31:47 2024 -0700 SVE intrinsics: Fold svsra with op1 all zeros to svlsr/svasr. A common idiom in intrinsics loops is to have accumulator intrinsics in an unrolled loop with an accumulator initialized to zero at the beginning. Propagating the initial zero accumulator into the first iteration of the loop and simplifying the first accumulate instruction is a desirable transformation that we should teach GCC. Therefore, this patch folds svsra to svlsr/svasr if op1 is all zeros, producing the lower latency instructions LSR/ASR instead of USRA/SSRA. We implemented this optimization in svsra_impl::fold. Tests were added to check the produced assembly for use of LSR/ASR. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ * config/aarch64/aarch64-sve-builtins-sve2.cc (svsra_impl::fold): Fold svsra to svlsr/svasr if op1 is all zeros. gcc/testsuite/ * gcc.target/aarch64/sve2/acle/asm/sra_s32.c: New test. * gcc.target/aarch64/sve2/acle/asm/sra_s64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/sra_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/sra_u64.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 28 ++ .../gcc.target/aarch64/sve2/acle/asm/sra_s32.c | 9 +++ .../gcc.target/aarch64/sve2/acle/asm/sra_s64.c | 9 +++ .../gcc.target/aarch64/sve2/acle/asm/sra_u32.c | 9 +++ .../gcc.target/aarch64/sve2/acle/asm/sra_u64.c | 9 +++ 5 files changed, 64 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 6a20a613f832..ddd6e466ee3a 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -417,6 +417,34 @@ public: class svsra_impl : public function_base { +public: + gimple * + fold (gimple_folder &f) const override + { +/* Fold to svlsr/svasr if op1 is all zeros. */ +tree op1 = gimple_call_arg (f.call, 0); +if (!integer_zerop (op1)) + return NULL; +function_instance instance ("svlsr", functions::svlsr, + shapes::binary_uint_opt_n, MODE_n, + f.type_suffix_ids, GROUP_none, PRED_x); +if (!f.type_suffix (0).unsigned_p) + { + instance.base_name = "svasr"; + instance.base = functions::svasr; + } +gcall *call = f.redirect_call (instance); +/* Add a ptrue as predicate, because unlike svsra, svlsr/svasr are + predicated intrinsics. */ +gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); +/* For svsra, the shift amount (imm3) is uint64_t for all function types, + but for svlsr/svasr, imm3 has the same width as the function type. */ +tree imm3 = gimple_call_arg (f.call, 2); +tree imm3_prec = wide_int_to_tree (f.scalar_type (0), + wi::to_widest (imm3)); +gimple_call_set_arg (call, 2, imm3_prec); +return call; + } public: rtx expand (function_expander &e) const override diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c index ac992dc7b1c6..86cf4bd8137f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c @@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_32_s32_tied2, svint32_t, TEST_UNIFORM_Z (sra_32_s32_untied, svint32_t, z0 = svsra_n_s32 (z1, z2, 32), z0 = svsra (z1, z2, 32)) + +/* +** sra_2_s32_zeroop1: +** asr z0\.s, z1\.s, #2 +** ret +*/ +TEST_UNIFORM_Z (sra_2_s32_zeroop1, svint32_t, + z0 = svsra_n_s32 (svdup_s32 (0), z1, 2), + z0 = svsra (svdup_s32 (0), z1, 2)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c index 9ea5657ab88d..7b39798ba1d5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c @@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_64_s64_tied2, svint64_t, TEST_UNIFORM_Z (sra_64_s64_untied, svint64_t, z0 = svsra_n_s64 (z1, z2, 64), z0 = svsra (z1, z2, 64)) + +/* +** sra_2_s64_zeroop1: +** asr z0\.d, z1\.d, #2 +** ret +*/ +TEST_UNIFORM_Z (sra_2_s64_zeroop1, svint64_t, + z0 = svsra_n_s64 (svdup_s64 (0), z1, 2), + z0 = svsra (svdup_s64 (0), z1, 2)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u32.c b/gcc/testsuite/gcc.
[gcc/aoliva/heads/testme] ifcombine across noncontiguous blocks
The branch 'aoliva/heads/testme' was updated to point to: 3eb8edeb0ee0... ifcombine across noncontiguous blocks It previously pointed to: c0ef27f42446... ifcombine across noncontiguous blocks Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- c0ef27f... ifcombine across noncontiguous blocks Summary of changes (added commits): --- 3eb8ede... ifcombine across noncontiguous blocks
[gcc/aoliva/heads/testbase] (705 commits) SVE intrinsics: Fold division and multiplication by -1 to n
The branch 'aoliva/heads/testbase' was updated to point to: fc40202c1ac5... SVE intrinsics: Fold division and multiplication by -1 to n It previously pointed to: 85910e650a61... x86: Extend AVX512 Vectorization for Popcount in Various Mo Diff: Summary of changes (added commits): --- fc40202... SVE intrinsics: Fold division and multiplication by -1 to n (*) 90e38c4... SVE intrinsics: Add constant folding for svindex. (*) 078f7c4... [PATCH] RISC-V: override alignment of function/jump/loop (*) a616b7e... libffi: LoongArch: Fix soft-float builds of libffi (*) 8df549f... testsuite: Fix up pr116488.c and pr117226.c tests [PR116488 (*) 03b469e... RISC-V: Add testcases for form 4 of signed vector SAT_ADD (*) fa546b6... Daily bump. (*) 820464e... aarch64: Fix warning in aarch64_ptrue_reg (*) 36e3e68... match: Reject non-const internal functions [PR117260] (*) 5467f5b... ginclude: stdalign.h should define __xxx_is_defined macros (*) f342d66... top-level: Add pull request template for Forgejo (*) 779c039... jit: reset state in varasm.cc [PR117275] (*) 9ffcf1f... aarch64: Improve scalar mode popcount expansion by using SV (*) 774ad67... Implement operator_pointer_diff::fold_range (*) 4b0f238... libstdc++: Add -D_GLIBCXX_ASSERTIONS default for -O0 to API (*) 5a661ec... libstdc++: Add GLIBCXX_TESTSUITE_STDS example to docs (*) f565063... diagnostics: implement buffering for non-textual formats [P (*) de2dc62... libstdc++: Replace std::__to_address in C++20 branch in array_type_nelts_minus_one (*) f8687bc... libbacktrace: don't get confused by overlapping address ran (*) aaa855f... hppa: Fix up pa.opt.urls (*) 1f07dea... Handle GFC_STD_UNSIGNED like a standard in error messages. (*) 44a81aa... hppa: Add LRA support (*) b039d06... [PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generati (*) 212d868... [PATCH 2/7] RISC-V: Fix uninitialized reg in memcpy (*) f244492... [PATCH 1/7] RISC-V: Fix indentation in riscv_vector::expand (*) 3a12ac4... i386: Fix the order of operands in andn3 [PR (*) d0a9ae1... libstdc++: Reuse std::__assign_one in ( (*) 4d8373f... RISC-V: Add testcases for form 4 of vector signed SAT_SUB (*) b976292... RISC-V: Add testcases for form 3 of vector signed SAT_SUB (*) 5920bc8... Match: Support form 3 for vector signed integer SAT_SUB (*) 72d24d2... RISC-V: Add testcases for form 2 of vector signed SAT_SUB (*) 5667400... tree-optimization/116290 - fix compare-debug issue in ldist (*) b717c46... SH: Fix cost estimation of mem load/store (*) 7ec8b4b... SH: Add -fno-math-errno to fsca,fsrra tests. (*) c21402e... Daily bump. (*) 90a4851... libstdc++: testsuite: adjust name_fortify test for pre-defi (*) 27f6b37... libstdc++: Fix ranges::copy_backward for a single memcpyabl (*) 2ef62aa... MAINTAINERS: Add myself to write after approval (*) a4eec6c... Revert "c++: Fix overeager Woverloaded-virtual with convers (*) f0f1155... m68k: replace reload_in_progress by reload_in_progress || l (*) 1506027... tree-optimization/116481 - avoid building function_type[] (*) 3269a72... Fortran: Use OpenACC's acc_on_device builtin, fix OpenMP' _ (*) c38385d... [RISC-V] Avoid unnecessary extensions when value is already (*) f08af08... Daily bump. (*) d09131e... Unsigned constants for ISO_FORTRAN_ENV and ISO_C_BINDING. (*) a9173a5... vect: Fix inconsistency in fully-masked lane-reducing op ge (*) f54d42e... tree-optimization/117104 - add missed guards to max(a,b) != (*) ba773a8... RISC-V] Slightly improve broadcasting small constants into (*) 34b77d1... Fortran/OpenMP: Warn when mapping polymorphic variables (*) 5cf85a2... bootstrap: Fix genmatch build where system gcc defaults to (*) c1034d7... gcc.target/i386/pr55583.c: Use long long for 64-bit integer (*) 80d0e10... gcc.target/i386/pr115749.c: Use word_mode integer (*) a4ce868... gcc.target/i386/invariant-ternlog-1.c: Also scan (%edx) (*) c397a8c... libcpp, genmatch: Use gcc_diag instead of printf for libcpp (*) c20c9d8... Fortran: Unify gfc_get_location handling; fix expr->ts bug (*) a564261... testsuite/i386: Add vector sat_sub testcases [PR112600] (*) 2a865ad... MAINTAINERS: Add myself to write after approval (*) 60163c8... c++: Fix overeager Woverloaded-virtual with conversion oper (*) c4af4fe... RISC-V: Add testcases for form 1 of vector signed SAT_SUB (*) 2a7f490... RISC-V: Implement vector SAT_SUB for signed integer (*) d339dbe... Vect: Try the pattern of vector signed integer SAT_SUB (*) b4f2fcc... Match: Support form 1 for vector signed integer SAT_SUB (*) b45e3ad... Daily bump. (*) 54b3f8e... Introduce GFC_STD_UNSIGNED. (*) c92477e... gcc.target/i386: Replace long with long long (*) 69f91fb... g++.target/i386/pr105953.C: Skip for x32 (*) 3ba65a7... gcc.target/i386/pr115407.c: Only run for lp64 (*) 3b095ac... Fix thinko in previous change (*) 01c4d1f... libstdc++: Rearrange std::move_it
[gcc(refs/users/aoliva/heads/testme)] introduce ifcombine_replace_cond
https://gcc.gnu.org/g:3e47b273d7c0ebe66a739e2e73c98d99b14cff2a commit 3e47b273d7c0ebe66a739e2e73c98d99b14cff2a Author: Alexandre Oliva Date: Thu Oct 24 05:25:26 2024 -0300 introduce ifcombine_replace_cond Diff: --- gcc/tree-ssa-ifcombine.cc | 130 +- 1 file changed, 59 insertions(+), 71 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index eae32e1e1999..d2be5e81c671 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -399,6 +399,50 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, outer2->probability = profile_probability::never (); } +/* Replace the conditions in INNER_COND with COND. + Replace OUTER_COND with a constant. */ + +static tree +ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, + gcond *outer_cond, bool outer_inv, + tree cond, bool must_canon, tree) +{ + tree ret = cond; + bool result_inv = inner_inv; + + if (result_inv) +cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); + + if (tree tcanon = canonicalize_cond_expr_cond (cond)) +cond = tcanon; + else if (must_canon) +return NULL_TREE; + +{ + if (!is_gimple_condexpr_for_cond (cond)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond); + cond = force_gimple_operand_gsi_1 (&gsi, cond, +is_gimple_condexpr_for_cond, +NULL, true, GSI_SAME_STMT); + } + gimple_cond_set_condition_from_tree (inner_cond, cond); + update_stmt (inner_cond); + + /* Leave CFG optimization to cfg_cleanup. */ + gimple_cond_set_condition_from_tree (outer_cond, + outer_inv + ? boolean_false_node + : boolean_true_node); + update_stmt (outer_cond); +} + + update_profile_after_ifcombine (gimple_bb (inner_cond), + gimple_bb (outer_cond)); + + return ret; +} + /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. inner_inv, outer_inv indicate whether the conditions are inverted. @@ -445,26 +489,13 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, t2 = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t); t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, true, GSI_SAME_STMT); - t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR, - boolean_type_node, t2, t); - t = canonicalize_cond_expr_cond (t); - if (!t) - return false; - if (!is_gimple_condexpr_for_cond (t)) - { - gsi = gsi_for_stmt (inner_cond); - t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond, - NULL, true, GSI_SAME_STMT); - } - gimple_cond_set_condition_from_tree (inner_cond, t); - update_stmt (inner_cond); - /* Leave CFG optimization to cfg_cleanup. */ - gimple_cond_set_condition_from_tree (outer_cond, - outer_inv ? boolean_false_node : boolean_true_node); - update_stmt (outer_cond); + t = fold_build2 (EQ_EXPR, boolean_type_node, t2, t); - update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); + if (!ifcombine_replace_cond (inner_cond, inner_inv, + outer_cond, outer_inv, + t, true, NULL_TREE)) + return false; if (dump_file) { @@ -484,9 +515,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, In that case remove the outer test and change the inner one to test for name & (bits1 | bits2) != 0. */ else if (recognize_bits_test (inner_cond, &name1, &bits1, !inner_inv) - && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv)) + && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv)) { - gimple_stmt_iterator gsi; tree t; if ((TREE_CODE (name1) == SSA_NAME @@ -529,33 +559,14 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, bits1 = fold_convert (TREE_TYPE (bits2), bits1); } - /* Do it. */ - gsi = gsi_for_stmt (inner_cond); t = fold_build2 (BIT_IOR_EXPR, TREE_TYPE (name1), bits1, bits2); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR, boolean_type_node, t, + t = fold_build2 (EQ_EXPR, boolean_type_node, t,
[gcc(refs/users/aoliva/heads/testme)] drop redundant ifcombine_ifandif parm
https://gcc.gnu.org/g:ebc7c0f5cc561a8dd26ab36dec48c2c13b6a9c24 commit ebc7c0f5cc561a8dd26ab36dec48c2c13b6a9c24 Author: Alexandre Oliva Date: Thu Oct 24 05:25:24 2024 -0300 drop redundant ifcombine_ifandif parm Diff: --- gcc/tree-ssa-ifcombine.cc | 17 ++--- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index ed20a231951a..eae32e1e1999 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -401,13 +401,12 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. - inner_inv, outer_inv and result_inv indicate whether the conditions - are inverted. + inner_inv, outer_inv indicate whether the conditions are inverted. Returns true if the edges to the common else basic-block were merged. */ static bool ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, - basic_block outer_cond_bb, bool outer_inv, bool result_inv) + basic_block outer_cond_bb, bool outer_inv) { gimple_stmt_iterator gsi; tree name1, name2, bit1, bit2, bits1, bits2; @@ -693,8 +692,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false, - false); + return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false); } /* And a version where the outer condition is negated. */ @@ -711,8 +709,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true, - false); + return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true); } /* The || form is characterized by a common then_bb with the @@ -731,8 +728,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true, - true); + return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true); } /* And a version where the outer condition is negated. */ @@ -748,8 +744,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, ... */ - return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false, - true); + return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false); } return false;
[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks
https://gcc.gnu.org/g:c0ef27f424469ce14023e5e65e434ade2ffc1dc0 commit c0ef27f424469ce14023e5e65e434ade2ffc1dc0 Author: Alexandre Oliva Date: Thu Oct 24 05:25:33 2024 -0300 ifcombine across noncontiguous blocks Diff: --- gcc/tree-ssa-ifcombine.cc | 144 -- 1 file changed, 115 insertions(+), 29 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 6be5d969de88..970556a7801c 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -50,6 +50,21 @@ along with GCC; see the file COPYING3. If not see false) >= 2) #endif +/* Return FALSE iff the COND_BB ends with a conditional whose result is not a + known constant. */ + +static bool +known_succ_p (basic_block cond_bb) +{ + gcond *cond = safe_dyn_cast (*gsi_last_bb (cond_bb)); + + if (!cond) +return true; + + return (CONSTANT_CLASS_P (gimple_cond_lhs (cond)) + && CONSTANT_CLASS_P (gimple_cond_rhs (cond))); +} + /* This pass combines COND_EXPRs to simplify control flow. It currently recognizes bit tests and comparisons in chains that represent logical and or logical or of two COND_EXPRs. @@ -70,25 +85,34 @@ along with GCC; see the file COPYING3. If not see is left to CFG cleanup and DCE. */ -/* Recognize a if-then-else CFG pattern starting to match with the - COND_BB basic-block containing the COND_EXPR. The recognized - then end else blocks are stored to *THEN_BB and *ELSE_BB. If - *THEN_BB and/or *ELSE_BB are already set, they are required to - match the then and else basic-blocks to make the pattern match. - Returns true if the pattern matched, false otherwise. */ +/* Recognize a if-then-else CFG pattern starting to match with the COND_BB + basic-block containing the COND_EXPR. If !SUCCS_ANY, the condition must not + resolve to a constant for a match. Returns true if the pattern matched, + false otherwise. In case of a !SUCCS_ANY match, the recognized then end + else blocks are stored to *THEN_BB and *ELSE_BB. If *THEN_BB and/or + *ELSE_BB are already set, they are required to match the then and else + basic-blocks to make the pattern match. If SUCCS_ANY, *THEN_BB and *ELSE_BB + will not be filled in, and they will be found to match even if reversed. */ static bool recognize_if_then_else (basic_block cond_bb, - basic_block *then_bb, basic_block *else_bb) + basic_block *then_bb, basic_block *else_bb, + bool succs_any = false) { edge t, e; - if (EDGE_COUNT (cond_bb->succs) != 2) + if (EDGE_COUNT (cond_bb->succs) != 2 + || (!succs_any && known_succ_p (cond_bb))) return false; /* Find the then/else edges. */ t = EDGE_SUCC (cond_bb, 0); e = EDGE_SUCC (cond_bb, 1); + + if (succs_any) +return ((t == *then_bb && e == *else_bb) + || (t == *else_bb && e == *then_bb)); + if (!(t->flags & EDGE_TRUE_VALUE)) std::swap (t, e); if (!(t->flags & EDGE_TRUE_VALUE) @@ -390,7 +414,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, gcc_assert (inner_taken->dest == outer2->dest); if (outer_to_inner_bb == inner_cond_bb - && constant_condition_p (outer_cond_bb)) + && known_succ_p (outer_cond_bb)) { /* Path outer_cond_bb->(outer2) needs to be merged into path outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) @@ -414,7 +438,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, outer_to_inner->probability = profile_probability::always (); outer2->probability = profile_probability::never (); } - else if (constant_condition_p (inner_cond_bb)) + else if (known_succ_p (inner_cond_bb)) { /* Path inner_cond_bb->(inner_taken) needs to be merged into path outer_cond_bb->(outer2). We've accumulated the probabilities from @@ -881,19 +905,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, /* Helper function for tree_ssa_ifcombine_bb. Recognize a CFG pattern and dispatch to the appropriate if-conversion helper for a particular set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB. - PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. */ + PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB. + OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards + INNER_COND_BB. */ static bool tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, basic_block then_bb, basic_block else_bb, -basic_block phi_pred_bb) +basic_block phi_pred_bb, basic_block outer_succ_bb) { /* The && form is characterized by a common else_bb with the two edges leading to it mergable. The latter is guaranteed by matching PHI arguments in the else_bb and the inner cond_bb having no side-effects. */ if (phi_pr
[gcc(refs/users/aoliva/heads/testme)] introduce ifcombine_replace_cond
https://gcc.gnu.org/g:11bf09a22577a9ed775a3a47a70afe5ee063d072 commit 11bf09a22577a9ed775a3a47a70afe5ee063d072 Author: Alexandre Oliva Date: Thu Oct 24 05:25:30 2024 -0300 introduce ifcombine_replace_cond Diff: --- gcc/tree-ssa-ifcombine.cc | 187 +- 1 file changed, 183 insertions(+), 4 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index d9595132512f..6be5d969de88 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa.h" #include "attribs.h" #include "asan.h" +#include "bitmap.h" #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT #define LOGICAL_OP_NON_SHORT_CIRCUIT \ @@ -445,16 +446,72 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, } } -/* Replace the conditions in INNER_COND with COND. - Replace OUTER_COND with a constant. */ +/* Set NAME's bit in USED if OUTER dominates it. */ + +static void +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer) +{ + if (SSA_NAME_IS_DEFAULT_DEF (name)) +return; + + gimple *def = SSA_NAME_DEF_STMT (name); + basic_block bb = gimple_bb (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, outer)) +return; + + bitmap_set_bit (used, SSA_NAME_VERSION (name)); +} + +/* Data structure passed to ifcombine_mark_ssa_name. */ +struct ifcombine_mark_ssa_name_t +{ + /* SSA_NAMEs that have been referenced. */ + bitmap used; + /* Dominating block of DEFs that might need moving. */ + basic_block outer; +}; + +/* Mark in DATA->used any SSA_NAMEs used in *t. */ + +static tree +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) +{ + ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_; + + if (*t && TREE_CODE (*t) == SSA_NAME) +ifcombine_mark_ssa_name (data->used, *t, data->outer); + + return NULL; +} + +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND + replaced with a constant, but if there are intervening blocks, it's best to + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ static tree ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, gcond *outer_cond, bool outer_inv, - tree cond, bool must_canon, tree) + tree cond, bool must_canon, tree cond2) { tree ret = cond; - bool result_inv = inner_inv; + if (cond2) +ret = fold_build2 (TRUTH_AND_EXPR, TREE_TYPE (ret), ret, cond2); + + /* Split cond into cond2 if they're contiguous. ??? We might be able to + handle ORIF as well, inverting both conditions, but it's not clear that + this would be enough, and it never comes up. */ + if (!cond2 + && TREE_CODE (cond) == TRUTH_ANDIF_EXPR + && single_pred (gimple_bb (inner_cond)) == gimple_bb (outer_cond)) +{ + cond2 = TREE_OPERAND (cond, 1); + cond = TREE_OPERAND (cond, 0); +} + + bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond)) + != gimple_bb (outer_cond)); + bool result_inv = outer_p ? outer_inv : inner_inv; if (result_inv) cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); @@ -464,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, else if (must_canon) return NULL_TREE; + if (outer_p) +{ + { + auto_bitmap used; + basic_block outer_bb = gimple_bb (outer_cond); + + /* Mark SSA DEFs that are referenced by cond and may thus need to be + moved to outer. */ + { + ifcombine_mark_ssa_name_t data = { used, outer_bb }; + walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL); + } + + if (!bitmap_empty_p (used)) + { + /* Iterate up from inner_cond, moving DEFs identified as used by + cond, and marking USEs in the DEFs for moving as well. */ + gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond); + for (basic_block bb = gimple_bb (inner_cond); +bb != outer_bb; bb = single_pred (bb)) + { + for (gimple_stmt_iterator gsitr = gsi_last_bb (bb); +!gsi_end_p (gsitr); gsi_prev (&gsitr)) + { + gimple *stmt = gsi_stmt (gsitr); + bool move = false; + tree t; + ssa_op_iter it; + + FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_DEF) + if (bitmap_bit_p (used, SSA_NAME_VERSION (t))) + { + move = true; + break; + } + + if (!move) + continue; + + /* Mark uses in STMT before moving it. */ + FOR_EACH_SSA_TREE_OPERAND (t, stmt, it,
[gcc(refs/users/aoliva/heads/testme)] allow vuses in ifcombine blocks
https://gcc.gnu.org/g:6c0e7c5e3db8ae23e01f17fbe37282cac27cc5b7 commit 6c0e7c5e3db8ae23e01f17fbe37282cac27cc5b7 Author: Alexandre Oliva Date: Thu Oct 24 05:25:21 2024 -0300 allow vuses in ifcombine blocks Diff: --- gcc/tree-ssa-ifcombine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 6a3bc99190d9..ed20a231951a 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb) enum tree_code rhs_code; if (gimple_has_side_effects (stmt) || gimple_could_trap_p (stmt) - || gimple_vuse (stmt) + || gimple_vdef (stmt) /* We need to rewrite stmts with undefined overflow to use unsigned arithmetic but cannot do so for signed division. */ || ((ass = dyn_cast (stmt))
[gcc(refs/users/aoliva/heads/testme)] adjust update_profile_after_ifcombine for noncontiguous ifcombine
https://gcc.gnu.org/g:d586ea36dabe213c3d378fc8ed699ecba7b01e66 commit d586ea36dabe213c3d378fc8ed699ecba7b01e66 Author: Alexandre Oliva Date: Thu Oct 24 05:25:28 2024 -0300 adjust update_profile_after_ifcombine for noncontiguous ifcombine Diff: --- gcc/tree-ssa-ifcombine.cc | 94 +++ 1 file changed, 70 insertions(+), 24 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index d2be5e81c671..d9595132512f 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -356,14 +356,28 @@ recognize_bits_test (gcond *cond, tree *name, tree *bits, bool inv) } -/* Update profile after code in outer_cond_bb was adjusted so - outer_cond_bb has no condition. */ +/* Update profile after code in either outer_cond_bb or inner_cond_bb was + adjusted so that it has no condition. */ static void update_profile_after_ifcombine (basic_block inner_cond_bb, basic_block outer_cond_bb) { - edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb); + /* In the following we assume that inner_cond_bb has single predecessor. */ + gcc_assert (single_pred_p (inner_cond_bb)); + + basic_block outer_to_inner_bb = inner_cond_bb; + profile_probability prob = profile_probability::always (); + for (;;) +{ + basic_block parent = single_pred (outer_to_inner_bb); + prob *= find_edge (parent, outer_to_inner_bb)->probability; + if (parent == outer_cond_bb) + break; + outer_to_inner_bb = parent; +} + + edge outer_to_inner = find_edge (outer_cond_bb, outer_to_inner_bb); edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner ? EDGE_SUCC (outer_cond_bb, 1) : EDGE_SUCC (outer_cond_bb, 0)); @@ -374,29 +388,61 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, std::swap (inner_taken, inner_not_taken); gcc_assert (inner_taken->dest == outer2->dest); - /* In the following we assume that inner_cond_bb has single predecessor. */ - gcc_assert (single_pred_p (inner_cond_bb)); - - /* Path outer_cond_bb->(outer2) needs to be merged into path - outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) - and probability of inner_not_taken updated. */ - - inner_cond_bb->count = outer_cond_bb->count; + if (outer_to_inner_bb == inner_cond_bb + && constant_condition_p (outer_cond_bb)) +{ + /* Path outer_cond_bb->(outer2) needs to be merged into path +outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken) +and probability of inner_not_taken updated. */ + + inner_cond_bb->count = outer_cond_bb->count; + + /* Handle special case where inner_taken probability is always. In this +case we know that the overall outcome will be always as well, but +combining probabilities will be conservative because it does not know +that outer2->probability is inverse of +outer_to_inner->probability. */ + if (inner_taken->probability == profile_probability::always ()) + ; + else + inner_taken->probability = outer2->probability + + outer_to_inner->probability * inner_taken->probability; + inner_not_taken->probability = profile_probability::always () + - inner_taken->probability; - /* Handle special case where inner_taken probability is always. In this case - we know that the overall outcome will be always as well, but combining - probabilities will be conservative because it does not know that - outer2->probability is inverse of outer_to_inner->probability. */ - if (inner_taken->probability == profile_probability::always ()) -; + outer_to_inner->probability = profile_probability::always (); + outer2->probability = profile_probability::never (); +} + else if (constant_condition_p (inner_cond_bb)) +{ + /* Path inner_cond_bb->(inner_taken) needs to be merged into path +outer_cond_bb->(outer2). We've accumulated the probabilities from +outer_cond_bb->(outer)->...->inner_cond_bb in prob, so we have to +adjust that by inner_taken, and make inner unconditional. */ + + prob *= inner_taken->probability; + outer2->probability += prob; + outer_to_inner->probability = profile_probability::always () + - outer2->probability; + + inner_taken->probability = profile_probability::never (); + inner_not_taken->probability = profile_probability::always (); +} else -inner_taken->probability = outer2->probability + outer_to_inner->probability - * inner_taken->probability; - inner_not_taken->probability = profile_probability::always () -- inner_taken->probability; - - outer_to_inner->probability = profile_probability::always (); - outer2->probability = profile_probability::never (); +{ + /* We've moved part of the inner cond to outer, but we d
[gcc r15-4590] SVE intrinsics: Add constant folding for svindex.
https://gcc.gnu.org/g:90e38c4ffad086a82635e8ea9bf0e7e9e02f1ff7 commit r15-4590-g90e38c4ffad086a82635e8ea9bf0e7e9e02f1ff7 Author: Jennifer Schmitz Date: Tue Oct 15 07:58:14 2024 -0700 SVE intrinsics: Add constant folding for svindex. This patch folds svindex with constant arguments into a vector series. We implemented this in svindex_impl::fold using the function build_vec_series. For example, svuint64_t f1 () { return svindex_u642 (10, 3); } compiled with -O2 -march=armv8.2-a+sve, is folded to {10, 13, 16, ...} in the gimple pass lower. This optimization benefits cases where svindex is used in combination with other gimple-level optimizations. For example, svuint64_t f2 () { return svmul_x (svptrue_b64 (), svindex_u64 (10, 3), 5); } has previously been compiled to f2: index z0.d, #10, #3 mul z0.d, z0.d, #5 ret Now, it is compiled to f2: mov x0, 50 index z0.d, x0, #15 ret We added test cases checking - the application of the transform during gimple for constant arguments, - the interaction with another gimple-level optimization. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svindex_impl::fold): Add constant folding. gcc/testsuite/ * gcc.target/aarch64/sve/index_const_fold.c: New test. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc| 14 + .../gcc.target/aarch64/sve/index_const_fold.c | 35 ++ 2 files changed, 49 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 70bd83005d7c..e47acb67aeea 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1301,6 +1301,20 @@ public: class svindex_impl : public function_base { +public: + gimple * + fold (gimple_folder &f) const override + { +/* Apply constant folding if base and step are integer constants. */ +tree vec_type = TREE_TYPE (f.lhs); +tree base = gimple_call_arg (f.call, 0); +tree step = gimple_call_arg (f.call, 1); +if (TREE_CODE (base) != INTEGER_CST || TREE_CODE (step) != INTEGER_CST) + return NULL; +return gimple_build_assign (f.lhs, + build_vec_series (vec_type, base, step)); + } + public: rtx expand (function_expander &e) const override diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c new file mode 100644 index ..7abb803f58ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +#include +#include + +#define INDEX_CONST(TYPE, TY) \ + sv##TYPE f_##TY##_index_const () \ + {\ +return svindex_##TY (10, 3); \ + } + +#define MULT_INDEX(TYPE, TY) \ + sv##TYPE f_##TY##_mult_index () \ + {\ +return svmul_x (svptrue_b8 (), \ + svindex_##TY (10, 3), \ + 5); \ + } + +#define ALL_TESTS(TYPE, TY)\ + INDEX_CONST (TYPE, TY) \ + MULT_INDEX (TYPE, TY) + +ALL_TESTS (uint8_t, u8) +ALL_TESTS (uint16_t, u16) +ALL_TESTS (uint32_t, u32) +ALL_TESTS (uint64_t, u64) +ALL_TESTS (int8_t, s8) +ALL_TESTS (int16_t, s16) +ALL_TESTS (int32_t, s32) +ALL_TESTS (int64_t, s64) + +/* { dg-final { scan-tree-dump-times "return \\{ 10, 13, 16, ... \\}" 8 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "return \\{ 50, 65, 80, ... \\}" 8 "optimized" } } */
[gcc r15-4591] SVE intrinsics: Fold division and multiplication by -1 to neg
https://gcc.gnu.org/g:fc40202c1ac5d585bb236cdaf3a3968927e970a0 commit r15-4591-gfc40202c1ac5d585bb236cdaf3a3968927e970a0 Author: Jennifer Schmitz Date: Tue Oct 1 08:01:13 2024 -0700 SVE intrinsics: Fold division and multiplication by -1 to neg Because a neg instruction has lower latency and higher throughput than sdiv and mul, svdiv and svmul by -1 can be folded to svneg. For svdiv, this is already implemented on the RTL level; for svmul, the optimization was still missing. This patch implements folding to svneg for both operations using the gimple_folder. For svdiv, the transform is applied if the divisor is -1. Svmul is folded if either of the operands is -1. A case distinction of the predication is made to account for the fact that svneg_m has 3 arguments (argument 0 holds the values for the inactive lanes), while svneg_x and svneg_z have only 2 arguments. Tests were added or adjusted to check the produced assembly and runtime tests were added to check correctness. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Fold division by -1 to svneg. (svmul_impl::fold): Fold multiplication by -1 to svneg. gcc/testsuite/ * gcc.target/aarch64/sve/acle/asm/div_s32.c: New test. * gcc.target/aarch64/sve/acle/asm/mul_s16.c: Adjust expected outcome. * gcc.target/aarch64/sve/acle/asm/mul_s32.c: New test. * gcc.target/aarch64/sve/acle/asm/mul_s64.c: Adjust expected outcome. * gcc.target/aarch64/sve/acle/asm/mul_s8.c: Likewise. * gcc.target/aarch64/sve/div_const_run.c: New test. * gcc.target/aarch64/sve/mul_const_run.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc| 73 ++ .../gcc.target/aarch64/sve/acle/asm/div_s32.c | 59 + .../gcc.target/aarch64/sve/acle/asm/mul_s16.c | 5 +- .../gcc.target/aarch64/sve/acle/asm/mul_s32.c | 46 +- .../gcc.target/aarch64/sve/acle/asm/mul_s64.c | 5 +- .../gcc.target/aarch64/sve/acle/asm/mul_s8.c | 7 +-- .../gcc.target/aarch64/sve/div_const_run.c | 10 ++- .../gcc.target/aarch64/sve/mul_const_run.c | 10 ++- 8 files changed, 187 insertions(+), 28 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index e47acb67aeea..327688756d1b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -768,6 +768,27 @@ public: if (integer_zerop (op1) || integer_zerop (op2)) return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); +/* If the divisor is all integer -1, fold to svneg. */ +tree pg = gimple_call_arg (f.call, 0); +if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2)) + { + function_instance instance ("svneg", functions::svneg, + shapes::unary, MODE_none, + f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = f.redirect_call (instance); + unsigned offset_index = 0; + if (f.pred == PRED_m) + { + offset_index = 1; + gimple_call_set_arg (call, 0, op1); + } + else + gimple_set_num_ops (call, 5); + gimple_call_set_arg (call, offset_index, pg); + gimple_call_set_arg (call, offset_index + 1, op1); + return call; + } + /* If the divisor is a uniform power of 2, fold to a shift instruction. */ tree op2_cst = uniform_integer_cst_p (op2); @@ -2047,12 +2068,37 @@ public: if (integer_zerop (op1) || integer_zerop (op2)) return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); +/* If one of the operands is all integer -1, fold to svneg. */ +tree pg = gimple_call_arg (f.call, 0); +tree negated_op = NULL; +if (integer_minus_onep (op2)) + negated_op = op1; +else if (integer_minus_onep (op1)) + negated_op = op2; +if (!f.type_suffix (0).unsigned_p && negated_op) + { + function_instance instance ("svneg", functions::svneg, + shapes::unary, MODE_none, + f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = f.redirect_call (instance); + unsigned offset_index = 0; + if (f.pred == PRED_m) + { + offset_index = 1; + gimple_call_set_arg (call, 0, op1); + } + else + gimple_set_num_ops (call, 5); + gimple_call_set_arg (call, offset_index, pg); + gimple_call_set_arg (call, offset_index + 1, negated_op); + return call; + } + /* If one of