[gcc r15-4620] c++/modules: Prevent maybe_clone_decl being called multiple times [PR115007]

2024-10-24 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:058ed8705a7b38bef2c107b6ff5de243aebd57b4

commit r15-4620-g058ed8705a7b38bef2c107b6ff5de243aebd57b4
Author: Nathaniel Shead 
Date:   Wed Aug 21 00:50:53 2024 +1000

c++/modules: Prevent maybe_clone_decl being called multiple times [PR115007]

The ICE in the linked PR is caused because maybe_clone_decl is not
prepared to be called on a declaration that has already had clones
created; what happens otherwise is that start_preparsed_function early
exits and never sets up cfun, causing a segfault later on.

To fix this we ensure that post_load_processing only calls
maybe_clone_decl if TREE_ASM_WRITTEN has not been marked on the
declaration yet, and (if maybe_clone_decls succeeds) marks this flag on
the decl so that it doesn't get called again later when finalising
deferred vague linkage declarations in c_parse_final_cleanups.

As a bonus this now allows us to only keep the DECL_SAVED_TREE around in
expand_or_defer_fn_1 for modules which have CMIs, which will have
benefits for LTO performance in non-interface TUs.

For clarity we also update the streaming code to do post_load_decls for
maybe in-charge cdtors rather than any DECL_ABSTRACT_P declaration, as
this is more accurate to the decls affected by maybe_clone_body.

PR c++/115007

gcc/cp/ChangeLog:

* module.cc (module_state::read_cluster): Replace
DECL_ABSTRACT_P with DECL_MAYBE_IN_CHARGE_CDTOR_P.
(post_load_processing): Check and mark TREE_ASM_WRITTEN.
* semantics.cc (expand_or_defer_fn_1): Use the more specific
module_maybe_has_cmi_p instead of modules_p.

gcc/testsuite/ChangeLog:

* g++.dg/modules/virt-6_a.C: New test.
* g++.dg/modules/virt-6_b.C: New test.

Signed-off-by: Nathaniel Shead 

Diff:
---
 gcc/cp/module.cc|  7 ---
 gcc/cp/semantics.cc |  2 +-
 gcc/testsuite/g++.dg/modules/virt-6_a.C | 13 +
 gcc/testsuite/g++.dg/modules/virt-6_b.C |  6 ++
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index fd9b1d3bf2e1..d7494cc813a0 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -15539,7 +15539,7 @@ module_state::read_cluster (unsigned snum)
 
   if (abstract)
;
-  else if (DECL_ABSTRACT_P (decl))
+  else if (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl))
vec_safe_push (post_load_decls, decl);
   else
{
@@ -17958,10 +17958,11 @@ post_load_processing ()
 
   dump () && dump ("Post-load processing of %N", decl);
 
-  gcc_checking_assert (DECL_ABSTRACT_P (decl));
+  gcc_checking_assert (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl));
   /* Cloning can cause loading -- specifically operator delete for
 the deleting dtor.  */
-  maybe_clone_body (decl);
+  if (!TREE_ASM_WRITTEN (decl) && maybe_clone_body (decl))
+   TREE_ASM_WRITTEN (decl) = 1;
 }
 
   cfun = old_cfun;
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8c1a8b3c68dc..266fba11eec8 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -5242,7 +5242,7 @@ expand_or_defer_fn_1 (tree fn)
 demand, so we also need to keep the body.  Otherwise we don't
 need it anymore.  */
   if (!DECL_DECLARED_CONSTEXPR_P (fn)
- && !(modules_p () && vague_linkage_p (fn)))
+ && !(module_maybe_has_cmi_p () && vague_linkage_p (fn)))
DECL_SAVED_TREE (fn) = NULL_TREE;
   return false;
 }
diff --git a/gcc/testsuite/g++.dg/modules/virt-6_a.C 
b/gcc/testsuite/g++.dg/modules/virt-6_a.C
new file mode 100644
index ..68e466ace3ff
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/virt-6_a.C
@@ -0,0 +1,13 @@
+// PR c++/115007
+// { dg-additional-options "-fmodules-ts -Wno-global-module" }
+// { dg-module-cmi M:a }
+
+module;
+struct S {
+  virtual ~S() = default;
+  virtual void f() = 0;
+};
+module M:a;
+extern S* p;
+template  void format(T) { p->~S(); }
+template void format(int);
diff --git a/gcc/testsuite/g++.dg/modules/virt-6_b.C 
b/gcc/testsuite/g++.dg/modules/virt-6_b.C
new file mode 100644
index ..c53f5fac742b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/virt-6_b.C
@@ -0,0 +1,6 @@
+// PR c++/115007
+// { dg-additional-options "-fmodules-ts" }
+// { dg-module-cmi M }
+
+export module M;
+import :a;


[gcc r15-4621] c++/modules: Support decloned cdtors

2024-10-24 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:29efc621b7c66ec67d10fc87cddbb3f1ab709fb2

commit r15-4621-g29efc621b7c66ec67d10fc87cddbb3f1ab709fb2
Author: Nathaniel Shead 
Date:   Wed Aug 21 01:08:36 2024 +1000

c++/modules: Support decloned cdtors

When compiling with '-fdeclone-ctor-dtor' (enabled by default with -Os),
we run into issues where we don't correctly emit the underlying
functions.  We also need to ensure that COMDAT constructors are marked
as such before 'maybe_clone_body' attempts to propagate COMDAT groups to
the new thunks.

gcc/cp/ChangeLog:

* module.cc (post_load_processing): Mark COMDAT as needed, emit
declarations if maybe_clone_body fails.

gcc/testsuite/ChangeLog:

* g++.dg/modules/clone-2_a.C: New test.
* g++.dg/modules/clone-2_b.C: New test.
* g++.dg/modules/clone-3_a.C: New test.
* g++.dg/modules/clone-3_b.C: New test.

Signed-off-by: Nathaniel Shead 

Diff:
---
 gcc/cp/module.cc | 20 
 gcc/testsuite/g++.dg/modules/clone-2_a.C |  7 +++
 gcc/testsuite/g++.dg/modules/clone-2_b.C |  5 +
 gcc/testsuite/g++.dg/modules/clone-3_a.C |  9 +
 gcc/testsuite/g++.dg/modules/clone-3_b.C |  8 
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index d7494cc813a0..90ad67daf72b 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -17959,10 +17959,22 @@ post_load_processing ()
   dump () && dump ("Post-load processing of %N", decl);
 
   gcc_checking_assert (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl));
-  /* Cloning can cause loading -- specifically operator delete for
-the deleting dtor.  */
-  if (!TREE_ASM_WRITTEN (decl) && maybe_clone_body (decl))
-   TREE_ASM_WRITTEN (decl) = 1;
+
+  if (DECL_COMDAT (decl))
+   comdat_linkage (decl);
+  if (!TREE_ASM_WRITTEN (decl))
+   {
+ /* Cloning can cause loading -- specifically operator delete for
+the deleting dtor.  */
+ if (maybe_clone_body (decl))
+   TREE_ASM_WRITTEN (decl) = 1;
+ else
+   {
+ /* We didn't clone the cdtor, make sure we emit it.  */
+ note_vague_linkage_fn (decl);
+ cgraph_node::finalize_function (decl, true);
+   }
+   }
 }
 
   cfun = old_cfun;
diff --git a/gcc/testsuite/g++.dg/modules/clone-2_a.C 
b/gcc/testsuite/g++.dg/modules/clone-2_a.C
new file mode 100644
index ..47e21581fdc5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/clone-2_a.C
@@ -0,0 +1,7 @@
+// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" }
+// { dg-module-cmi M }
+
+export module M;
+export struct S {
+  inline S(int) {}
+};
diff --git a/gcc/testsuite/g++.dg/modules/clone-2_b.C 
b/gcc/testsuite/g++.dg/modules/clone-2_b.C
new file mode 100644
index ..80c1e149518b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/clone-2_b.C
@@ -0,0 +1,5 @@
+// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" }
+
+import M;
+
+S s(0);
diff --git a/gcc/testsuite/g++.dg/modules/clone-3_a.C 
b/gcc/testsuite/g++.dg/modules/clone-3_a.C
new file mode 100644
index ..87de746f5c2c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/clone-3_a.C
@@ -0,0 +1,9 @@
+// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" }
+// { dg-module-cmi M }
+
+export module M;
+
+struct A {};
+export struct B : virtual A {
+  inline B (int) {}
+};
diff --git a/gcc/testsuite/g++.dg/modules/clone-3_b.C 
b/gcc/testsuite/g++.dg/modules/clone-3_b.C
new file mode 100644
index ..23c9ac4a8046
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/clone-3_b.C
@@ -0,0 +1,8 @@
+// { dg-module-do link }
+// { dg-additional-options "-fmodules-ts -fdeclone-ctor-dtor" }
+
+import M;
+
+int main() {
+  B b(0);
+}


[gcc/aoliva/heads/testme] (6 commits) ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 1070f7874f55... ifcombine across noncontiguous blocks

It previously pointed to:

 f7a9315f62c2... ifcombine across noncontiguous blocks

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  f7a9315... ifcombine across noncontiguous blocks
  11bf09a... introduce ifcombine_replace_cond
  d586ea3... adjust update_profile_after_ifcombine for noncontiguous ifc
  3e47b27... introduce ifcombine_replace_cond
  ebc7c0f... drop redundant ifcombine_ifandif parm
  6c0e7c5... allow vuses in ifcombine blocks


Summary of changes (added commits):
---

  1070f78... ifcombine across noncontiguous blocks
  cb75ab7... extend ifcombine_replace_cond to handle noncontiguous ifcom
  b7de5a6... adjust update_profile_after_ifcombine for noncontiguous ifc
  5b44192... introduce ifcombine_replace_cond
  db11a63... drop redundant ifcombine_ifandif parm
  683dabf... allow vuses in ifcombine blocks


[gcc(refs/users/aoliva/heads/testme)] drop redundant ifcombine_ifandif parm

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:db11a63b19b436accc5f0b4afebec10c5ab8aae6

commit db11a63b19b436accc5f0b4afebec10c5ab8aae6
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:24 2024 -0300

drop redundant ifcombine_ifandif parm

In preparation to changes that may modify both inner and outer
conditions in ifcombine, drop the redundant parameter result_inv, that
is always identical to inner_inv.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc (ifcombine_ifandif): Drop redundant
result_inv parm.  Adjust all callers.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index ed20a231951a..0a2ba970548c 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -401,14 +401,14 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
-   inner_inv, outer_inv and result_inv indicate whether the conditions
-   are inverted.
+   inner_inv, outer_inv indicate whether the conditions are inverted.
Returns true if the edges to the common else basic-block were merged.  */
 
 static bool
 ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
-  basic_block outer_cond_bb, bool outer_inv, bool result_inv)
+  basic_block outer_cond_bb, bool outer_inv)
 {
+  bool result_inv = inner_inv;
   gimple_stmt_iterator gsi;
   tree name1, name2, bit1, bit2, bits1, bits2;
 
@@ -693,8 +693,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false,
-   false);
+  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false);
 }
 
   /* And a version where the outer condition is negated.  */
@@ -711,8 +710,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true,
-   false);
+  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true);
 }
 
   /* The || form is characterized by a common then_bb with the
@@ -731,8 +729,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true,
-   true);
+  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true);
 }
 
   /* And a version where the outer condition is negated.  */
@@ -748,8 +745,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false,
-   true);
+  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false);
 }
 
   return false;


[gcc r15-4607] Record nonzero bits in the irange_bitmask of POLY_INT_CSTs

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:b02503d3fe79778ccc149b0abbc736f7972603da

commit r15-4607-gb02503d3fe79778ccc149b0abbc736f7972603da
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:34 2024 +0100

Record nonzero bits in the irange_bitmask of POLY_INT_CSTs

At the moment, ranger punts entirely on POLY_INT_CSTs.  Numerical
ranges are a bit difficult, unless we do start modelling bounds on
the indeterminates.  But we can at least track the nonzero bits.

gcc/
* value-query.cc (range_query::get_tree_range): Use get_nonzero_bits
to populate the irange_bitmask of a POLY_INT_CST.

gcc/testsuite/
* gcc.target/aarch64/sve/cnt_fold_6.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c | 75 +++
 gcc/value-query.cc|  7 +++
 2 files changed, 82 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c
new file mode 100644
index ..9d9e1ca93303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_6.c
@@ -0,0 +1,75 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include 
+
+/*
+** f1:
+** ...
+** cntb(x[0-9]+)
+** ...
+** add x[0-9]+, \1, #?16
+** ...
+** csel[^\n]+
+** ret
+*/
+uint64_t
+f1 (int x)
+{
+  uint64_t y = x ? svcnth () : svcnth () + 8;
+  y >>= 3;
+  y <<= 4;
+  return y;
+}
+
+/*
+** f2:
+** ...
+** (?:and|[al]sr)  [^\n]+
+** ...
+** ret
+*/
+uint64_t
+f2 (int x)
+{
+  uint64_t y = x ? svcnth () : svcnth () + 8;
+  y >>= 4;
+  y <<= 5;
+  return y;
+}
+
+/*
+** f3:
+** ...
+** cntw(x[0-9]+)
+** ...
+** add x[0-9]+, \1, #?16
+** ...
+** csel[^\n]+
+** ret
+*/
+uint64_t
+f3 (int x)
+{
+  uint64_t y = x ? svcntd () : svcntd () + 8;
+  y >>= 1;
+  y <<= 2;
+  return y;
+}
+
+/*
+** f4:
+** ...
+** (?:and|[al]sr)  [^\n]+
+** ...
+** ret
+*/
+uint64_t
+f4 (int x)
+{
+  uint64_t y = x ? svcntd () : svcntd () + 8;
+  y >>= 2;
+  y <<= 3;
+  return y;
+}
diff --git a/gcc/value-query.cc b/gcc/value-query.cc
index cac2cb5b2bc0..34499da1a987 100644
--- a/gcc/value-query.cc
+++ b/gcc/value-query.cc
@@ -375,6 +375,13 @@ range_query::get_tree_range (vrange &r, tree expr, gimple 
*stmt,
   }
 
 default:
+  if (POLY_INT_CST_P (expr))
+   {
+ unsigned int precision = TYPE_PRECISION (type);
+ r.set_varying (type);
+ r.update_bitmask ({ wi::zero (precision), get_nonzero_bits (expr) });
+ return true;
+   }
   break;
 }
   if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))


[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:a050f30dcecc725fd5b8cdea57101710f030d81a

commit a050f30dcecc725fd5b8cdea57101710f030d81a
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Rework ifcombine to support merging conditions from noncontiguous
blocks.  This depends on earlier preparation changes.

The function that attempted to ifcombine a block with its immediate
predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks
eligible for ifcombine, attempting to combine with them.

The function that actually drives the combination of a pair of blocks,
tree_ssa_ifcombine_bb_1, now takes an additional parameter: the
successor of outer that leads to inner.

The function that recognizes if_then_else patterns is modified to
enable testing without distinguishing between then and else, or to
require nondegenerate conditions, that aren't worth combining with.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc (recognize_if_then_else): Support
relaxed then/else testing; require nondegenerate condition
otherwise.
(tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it
instead of inner_cond_bb.  Adjust callers.
(tree_ssa_ifcombine_bb): Loop over dominating outer blocks
eligible for ifcombine.
(pass_tree_ifcombine::execute): Noted potential need for
changes to the post-combine logic.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 140 --
 1 file changed, 111 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index c271d1e86a9b..d7cb75619978 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb)
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t->dest == *then_bb && e->dest == *else_bb)
+   || (t->dest == *else_bb && e->dest == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -899,19 +908,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-effects.  */
   if (phi_pred_bb != else_bb
-  && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb)
+  && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb)
   && same_phi_args_p (outer_cond_bb, phi_pred_bb, el

[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:a8ecc1a3ff1faece3363d50d7c501123c2be6a5b

commit a8ecc1a3ff1faece3363d50d7c501123c2be6a5b
Author: Michael Meissner 
Date:   Thu Oct 24 12:26:28 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 143 +++---
 1 file changed, 126 insertions(+), 17 deletions(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index fe43d0cb19a8..de75ac6f0e81 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,18 +1,8 @@
- Branch work182-sha, patch #402 
-
-Add missing test.
-
-2024-10-16  Michael Meissner  
-
-gcc/testsuite/
-
-   * gcc.target/powerpc/vector-rotate-left.c: New test.
-
- Branch work182-sha, patch #401 
+ Branch work182-sha, patch #411 was reverted 

 
 Add potential p-future XVRLD and XVRLDI instructions.
 
-2024-10-16  Michael Meissner  
+2024-10-24  Michael Meissner  
 
 gcc/
 
@@ -24,11 +14,128 @@ gcc/
* config/rs6000/rs6000.md (isa attribute): Add xvrlw.
(enabled attribute): Add support for xvrlw.
 
- Branch work182-sha, patch #400 
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+
+ Branch work182-sha, patch #410 was reverted 

+
+PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
+
+The multibuff.c benchmark attached to the PR target/117251 compiled for Power10
+PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
+compared to GCC 11 - GCC 13, due to excessive amounts of spilling.
+
+The main function for the multibuf.c file has 3,747 lines, all of which are
+using vector unsigned long long.  There are 696 vector rotates (all rotates are
+constant), 1,824 vector xor's and 600 vector andc's.
+
+In looking at it, the main thing that steps out is the reason for either
+spilling or moving variables is the support in fusion.md (generated by
+genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and other
+vec_xor's feeding into vec_xor.
+
+On the powerpc for power10, there is a special fusion mode that happens if the
+machine has a VANDC or VXOR instruction that is adjacent to a VXOR instruction
+and the VANDC/VXOR feeds into the 2nd VXOR instruction.
+
+While the Power10 has 64 vector registers (which uses the XXL prefix to do
+logical operations), the fusion only works with the older Altivec instruction
+set (which uses the V prefix).  The Altivec instruction only has 32 vector
+registers (which are overlaid over the VSX vector registers 32-63).
+
+By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do this
+fusion, it means that the register allocator has more register pressure for the
+traditional Altivec registers instead of the VSX registers.
+
+In addition, since there are vector rotates, these rotates only work on the
+traditional Altivec registers, which adds to the Altivec register pressure.
+
+Finally in addition to doing the explicit xor, andc, and rotates using the
+Altivec registers, we have to also load vector constants for the rotate amount
+and these registers also are allocated as Altivec registers.
 
-Initial support for adding xxeval fusion support.
+Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 has
+many more vector moves that the later compilers.  Thus even though it has way
+less spills, the vector moves are why GCC 11 have the slowest results.
 
-2024-10-16  Michael Meissner  
+There is an instruction that was added in power10 (XXEVAL) that does provide
+fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.
+
+The latency of XXEVAL is slightly more than the fused VANDC/VXOR or VXOR/VXOR,
+so I have written the patch to prefer doing the Altivec instructions if they
+don't need a temporary register.
+
+Here are the results for adding support for XXEVAL for the multibuff.c
+benchmark attached to the PR.  Note that we essentially recover the speed with
+this patch that were lost with GCC 14 and the current trunk:
+
+  XXEVALTrunk   GCC14   GCC13   GCC12GCC11
+  ---   -   -   --
+Benchmark time in seconds   5.53 6.156.265.575.61 9.56
+
+Fuse VANDC -> VXOR   209 600  600 600 600   600
+Fuse VXOR -> VXOR  0 240  240 120 120   120
+XXEVAL to fuse ANDC -> XOR   391   00   0   0 0
+XXEVAL to fuse XOR -> XOR240   00   0   0 0
+
+Spill vector to stack 78 364  364 172 184   110
+Load spilled vector from stack   431 962  962 713 723   166
+Vector moves  10 100  100  70  72 3,055
+
+Vector rota

[gcc r15-4623] SVE intrinsics: Fold svaba with op1 all zeros to svabd.

2024-10-24 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:0b22f0585348335369298c7d39afd171758eebe9

commit r15-4623-g0b22f0585348335369298c7d39afd171758eebe9
Author: Jennifer Schmitz 
Date:   Thu Oct 24 05:11:31 2024 -0700

SVE intrinsics: Fold svaba with op1 all zeros to svabd.

Similar to
https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665780.html,
this patch implements folding of svaba to svabd if op1 is all zeros,
resulting in the use of UABD/SABD instructions instead of UABA/SABA.
Tests were added to check the produced assembly for use of UABD/SABD,
also for the _n case.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
* config/aarch64/aarch64-sve-builtins-sve2.cc
(svaba_impl::fold): Fold svaba to svabd if op1 is all zeros.

gcc/testsuite/
* gcc.target/aarch64/sve2/acle/asm/aba_s32.c: New tests.
* gcc.target/aarch64/sve2/acle/asm/aba_s64.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/aba_u32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/aba_u64.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 18 +
 .../gcc.target/aarch64/sve2/acle/asm/aba_s32.c | 23 ++
 .../gcc.target/aarch64/sve2/acle/asm/aba_s64.c | 22 +
 .../gcc.target/aarch64/sve2/acle/asm/aba_u32.c | 22 +
 .../gcc.target/aarch64/sve2/acle/asm/aba_u64.c | 22 +
 5 files changed, 107 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index ddd6e466ee3a..d29c2209fdfe 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -80,6 +80,24 @@ unspec_sqrdcmlah (int rot)
 
 class svaba_impl : public function_base
 {
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+/* Fold to svabd if op1 is all zeros.  */
+tree op1 = gimple_call_arg (f.call, 0);
+if (!integer_zerop (op1))
+  return NULL;
+function_instance instance ("svabd", functions::svabd,
+   shapes::binary_opt_n, f.mode_suffix_id,
+   f.type_suffix_ids, GROUP_none, PRED_x);
+gcall *call = f.redirect_call (instance);
+/* Add a ptrue as predicate, because unlike svaba, svabd is
+   predicated.  */
+gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ()));
+return call;
+  }
+
 public:
   rtx
   expand (function_expander &e) const override
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c
index 73c002825267..655ad6302414 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c
@@ -108,3 +108,26 @@ TEST_UNIFORM_Z (aba_11_s32_tied2, svint32_t,
 TEST_UNIFORM_Z (aba_11_s32_untied, svint32_t,
z0 = svaba_n_s32 (z1, z2, 11),
z0 = svaba (z1, z2, 11))
+
+/*
+** aba_11_s32_zeroop1n:
+** ptrue   (p[0-7])\.b, all
+** mov z0\.s, #11
+** sabdz0\.s, \1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (aba_11_s32_zeroop1n, svint32_t,
+   z0 = svaba_n_s32 (svdup_s32 (0), z1, 11),
+   z0 = svaba (svdup_s32 (0), z1, 11))
+
+
+/*
+** aba_11_s32_zeroop1:
+** ptrue   (p[0-7])\.b, all
+** mov z0\.s, #11
+** sabdz0\.s, \1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (aba_11_s32_zeroop1, svint32_t,
+   z0 = svaba_s32 (svdup_s32 (0), z1, svdup_s32 (11)),
+   z0 = svaba (svdup_s32 (0), z1, svdup_s32 (11)))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c
index 0c169dbf6136..8b1eb7d2f4e6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c
@@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_s64_tied2, svint64_t,
 TEST_UNIFORM_Z (aba_11_s64_untied, svint64_t,
z0 = svaba_n_s64 (z1, z2, 11),
z0 = svaba (z1, z2, 11))
+
+/*
+** aba_11_s64_zeroop1n:
+** ptrue   (p[0-7])\.b, all
+** mov z0\.d, #11
+** sabdz0\.d, \1/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (aba_11_s64_zeroop1n, svint64_t,
+   z0 = svaba_n_s64 (svdup_s64 (0), z1, 11),
+   z0 = svaba (svdup_s64 (0), z1, 11))
+
+/*
+** aba_11_s64_zeroop1:
+** ptrue   (p[0-7])\.b, all
+** mov z0\.d, #11
+** sabdz0\.d, \1/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (aba_11_s64_zeroop1, svint64_t,
+   z0 = svaba_s64 (svdup_s64 (0), z1, svdup_s64 (11)),
+   z0 = svaba (svdup_s64 (0), z1, svdup_s64 (11)))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve

[gcc r15-4608] aarch64: libstdc++: Use shufflevector instead of shuffle in opt_random.h

2024-10-24 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:cc33531df065c20a52cb0c35637f096978128b46

commit r15-4608-gcc33531df065c20a52cb0c35637f096978128b46
Author: Ricardo Jesus 
Date:   Mon Oct 14 14:28:02 2024 +0100

aarch64: libstdc++: Use shufflevector instead of shuffle in opt_random.h

This patch modifies the implementation of the vectorized mersenne
twister random number generator to use __builtin_shufflevector instead
of __builtin_shuffle. This makes it (almost) compatible with Clang.

To make the implementation fully compatible with Clang, Clang will need
to support internal Neon types like __Uint8x16_t and __Uint32x4_t, which
currently it does not. This looks like an oversight in Clang and so will
be addressed separately.

I see no codegen change with this patch.

Bootstrapped and tested on aarch64-none-linux-gnu.

libstdc++-v3/ChangeLog:

* config/cpu/aarch64/opt/ext/opt_random.h (__VEXT): Replace uses
of __builtin_shuffle with __builtin_shufflevector.
(__aarch64_lsl_128): Move shift amount to a template parameter.
(__aarch64_lsr_128): Move shift amount to a template parameter.
(__aarch64_recursion): Update call sites of __aarch64_lsl_128
and __aarch64_lsr_128.

Signed-off-by: Ricardo Jesus 

Diff:
---
 .../config/cpu/aarch64/opt/ext/opt_random.h| 28 --
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h 
b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
index 7f756d1572f3..7eb816abcd00 100644
--- a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
+++ b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
@@ -35,13 +35,13 @@
 #ifdef __ARM_NEON
 
 #ifdef __ARM_BIG_ENDIAN
-# define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \
-{16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
- 24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C})
+# define __VEXT(_A,_B,_C) __builtin_shufflevector (_A, _B, \
+16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
+24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C)
 #else
-# define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \
-{_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
- _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
+# define __VEXT(_A,_B,_C) __builtin_shufflevector (_B, _A, \
+_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
+_C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15)
 #endif
 
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -52,9 +52,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   namespace {
 // Logical Shift right 128-bits by c * 8 bits
 
-__extension__ extern __inline __Uint32x4_t
+__extension__
+template
+extern __inline __Uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__aarch64_lsr_128 (__Uint8x16_t __a, __const int __c)
+__aarch64_lsr_128 (__Uint8x16_t __a)
 {
   const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0};
@@ -64,9 +66,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 // Logical Shift left 128-bits by c * 8 bits
 
-__extension__ extern __inline __Uint32x4_t
+__extension__
+template
+extern __inline __Uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__aarch64_lsl_128 (__Uint8x16_t __a, __const int __c)
+__aarch64_lsl_128 (__Uint8x16_t __a)
 {
   const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0};
@@ -82,14 +86,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __Uint32x4_t __e)
 {
   __Uint32x4_t __y = (__b >> __sr1);
-  __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2);
+  __Uint32x4_t __z = __aarch64_lsr_128<__sr2> ((__Uint8x16_t) __c);
 
   __Uint32x4_t __v = __d << __sl1;
 
   __z = __z ^ __a;
   __z = __z ^ __v;
 
-  __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2);
+  __Uint32x4_t __x = __aarch64_lsl_128<__sl2> ((__Uint8x16_t) __a);
 
   __y = __y & __e;
   __z = __z ^ __x;


[gcc r15-4600] Make more places handle exact_div like trunc_div

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:3e93828e601c62176ea2b4a1dd0b5a1db5657a8e

commit r15-4600-g3e93828e601c62176ea2b4a1dd0b5a1db5657a8e
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:30 2024 +0100

Make more places handle exact_div like trunc_div

I tried to look for places where we were handling TRUNC_DIV_EXPR
more favourably than EXACT_DIV_EXPR.

Most of the places that I looked at but didn't change were handling
div/mod pairs.  But there's bound to be others I missed...

gcc/
* match.pd: Extend some rules to handle exact_div like trunc_div.
* tree.h (trunc_or_exact_div_p): New function.
* tree-ssa-loop-niter.cc (is_rshift_by_1): Use it.
* tree-ssa-loop-ivopts.cc (force_expr_to_var_cost): Handle
EXACT_DIV_EXPR.

Diff:
---
 gcc/match.pd| 60 +++--
 gcc/tree-ssa-loop-ivopts.cc |  2 ++
 gcc/tree-ssa-loop-niter.cc  |  2 +-
 gcc/tree.h  | 13 ++
 4 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0455dfa69937..9024277e5d34 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -492,27 +492,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
of A starting from shift's type sign bit are zero, as
(unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL,
so it is valid only if A >> 31 is zero.  */
-(simplify
- (trunc_div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2)))
- (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
-  && (!VECTOR_TYPE_P (type)
- || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
- || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
-  && (useless_type_conversion_p (type, TREE_TYPE (@1))
- || (element_precision (type) >= element_precision (TREE_TYPE (@1))
- && (TYPE_UNSIGNED (TREE_TYPE (@1))
- || (element_precision (type)
- == element_precision (TREE_TYPE (@1)))
- || (INTEGRAL_TYPE_P (type)
- && (tree_nonzero_bits (@0)
- & wi::mask (element_precision (TREE_TYPE (@1)) - 1,
- true,
- element_precision (type))) == 0)
-   (if (!VECTOR_TYPE_P (type)
-   && useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1))
-   && element_precision (TREE_TYPE (@3)) < element_precision (type))
-(convert (rshift @3 @2))
-(rshift @0 @2
+(for div (trunc_div exact_div)
+ (simplify
+  (div (convert?@0 @3) (convert2? (lshift integer_onep@1 @2)))
+  (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
+   && (!VECTOR_TYPE_P (type)
+  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
+  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
+   && (useless_type_conversion_p (type, TREE_TYPE (@1))
+  || (element_precision (type) >= element_precision (TREE_TYPE (@1))
+  && (TYPE_UNSIGNED (TREE_TYPE (@1))
+  || (element_precision (type)
+  == element_precision (TREE_TYPE (@1)))
+  || (INTEGRAL_TYPE_P (type)
+  && (tree_nonzero_bits (@0)
+  & wi::mask (element_precision (TREE_TYPE (@1)) - 1,
+  true,
+  element_precision (type))) == 0)
+(if (!VECTOR_TYPE_P (type)
+&& useless_type_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1))
+&& element_precision (TREE_TYPE (@3)) < element_precision (type))
+ (convert (rshift @3 @2))
+ (rshift @0 @2)
 
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
undefined behavior in constexpr evaluation, and assuming that the division
@@ -947,13 +948,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ build_one_cst (utype); })))
 
 /* Simplify (unsigned t * 2)/2 -> unsigned t & 0x7FFF.  */
-(simplify
- (trunc_div (mult @0 integer_pow2p@1) @1)
- (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0)))
-  (bit_and @0 { wide_int_to_tree
-   (type, wi::mask (TYPE_PRECISION (type)
-- wi::exact_log2 (wi::to_wide (@1)),
-false, TYPE_PRECISION (type))); })))
+(for div (trunc_div exact_div)
+ (simplify
+  (div (mult @0 integer_pow2p@1) @1)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (bit_and @0 { wide_int_to_tree
+(type, wi::mask (TYPE_PRECISION (type)
+ - wi::exact_log2 (wi::to_wide (@1)),
+ false, TYPE_PRECISION (type))); }
 
 /* Simplify (unsigned t / 2) * 2 -> unsigned t & ~1.  */
 (simplify
@@ -5740,7 +5742,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Sink binary operation to branches, but on

[gcc r15-4616] testsuite: Use -fno-ipa-icf in gcc.dg/stack-check-2.c

2024-10-24 Thread Joseph Myers via Gcc-cvs
https://gcc.gnu.org/g:097994003cb3b09af2b07238e54f08b89dd34369

commit r15-4616-g097994003cb3b09af2b07238e54f08b89dd34369
Author: Joseph Myers 
Date:   Thu Oct 24 19:41:26 2024 +

testsuite: Use -fno-ipa-icf in gcc.dg/stack-check-2.c

One test failing with a -std=gnu23 default that I wanted to
investigate further is gcc.dg/stack-check-2.c.  The failures are

FAIL: gcc.dg/stack-check-2.c scan-tree-dump-not optimized "tail call"
FAIL: gcc.dg/stack-check-2.c scan-tree-dump-not tailc "tail call"

but it turns out the tail calls in question are not the ones that test
is actually checking for.  Rather, when () is interpreted as (void) in
C23 mode, ICF notices that certain functions are identical and so
turns test_indirect_2 into a tail call to text_indirect_1 and
test_indirect_casted_2 into a tail call to test_indirect_casted_1
(which it didn't do previously when one function used () and one used
(void)).

To avoid these spurious failures, make the test use -fno-ipa-icf
rather than relying on () and (void) giving different function types
to avoid ICF.

Tested for x86_64-pc-linux-gnu.

* gcc.dg/stack-check-2.c: Use -fno-ipa-icf.

Diff:
---
 gcc/testsuite/gcc.dg/stack-check-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/stack-check-2.c 
b/gcc/testsuite/gcc.dg/stack-check-2.c
index 196c4bbfbdda..a821c0ef8657 100644
--- a/gcc/testsuite/gcc.dg/stack-check-2.c
+++ b/gcc/testsuite/gcc.dg/stack-check-2.c
@@ -12,7 +12,7 @@
depend on to elide stack probes.  */
 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fstack-clash-protection -fdump-tree-tailc 
-fdump-tree-optimized" } */
+/* { dg-options "-O2 -fstack-clash-protection -fdump-tree-tailc 
-fdump-tree-optimized -fno-ipa-icf" } */
 /* { dg-require-effective-target supports_stack_clash_protection } */
 
 extern void foo (void) __attribute__ ((__noreturn__));


[gcc/aoliva/heads/testme] (2 commits) fold fold_truth_andor field merging into ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 5a9cf11ec7d1... fold fold_truth_andor field merging into ifcombine

It previously pointed to:

 4f8e1ea7f2e1... fold fold_truth_andor field merging into ifcombine

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  4f8e1ea... fold fold_truth_andor field merging into ifcombine
  88b6065... preserve TRUTH_ANDIF handling in ifcombine_replace_cond


Summary of changes (added commits):
---

  5a9cf11... fold fold_truth_andor field merging into ifcombine
  374dec3... handle TRUTH_ANDIF cond exprs in ifcombine_replace_cond


[gcc r15-4618] AVR: target/116953 - Restore recog_data after calling jump_over_one_insn_p.

2024-10-24 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:ca0ab7a0ac18911181e9161cfb8b87fb90039612

commit r15-4618-gca0ab7a0ac18911181e9161cfb8b87fb90039612
Author: Georg-Johann Lay 
Date:   Tue Oct 22 11:51:44 2024 +0200

AVR: target/116953 - Restore recog_data after calling jump_over_one_insn_p.

The previous fix for PR116953 is incomplete because references to
recog_data are escaping avr_out_sbxx_branch() in the form of %-operands
in the returned asm code template.  This patch reverts the previous fix,
and re-extracts the operands by means of extract_constrain_insn_cached()
after the call of jump_over_one_insn_p().

PR target/116953
gcc/
* config/avr/avr.cc (avr_out_sbxx_branch): Revert previous fix
for PR116953 (r15-4078).  Run extract_constrain_insn_cached
on the current insn after calling jump_over_one_insn_p.

Diff:
---
 gcc/config/avr/avr.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 735d05b1e747..b69a9c24aa0d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -13603,16 +13603,16 @@ avr_hard_regno_rename_ok (unsigned int old_reg, 
unsigned int new_reg)
Operand 3: label to jump to if the test is true.  */
 
 const char *
-avr_out_sbxx_branch (rtx_insn *insn, rtx xop[])
+avr_out_sbxx_branch (rtx_insn *insn, rtx operands[])
 {
-  // jump_over_one_insn_p may call extract on the next insn, clobbering
-  // recog_data.operand.  Hence make a copy of the operands (PR116953).
-  rtx operands[] = { xop[0], xop[1], xop[2], xop[3] };
-
   rtx_code comp = GET_CODE (operands[0]);
   bool long_jump = get_attr_length (insn) >= 4;
   bool reverse = long_jump || jump_over_one_insn_p (insn, operands[3]);
 
+  // PR116953: jump_over_one_insn_p may call extract on the next insn,
+  // clobbering recog_data.operand.  Thus, restore recog_data.
+  extract_constrain_insn_cached (insn);
+
   if (comp == GE)
 comp = EQ;
   else if (comp == LT)


[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:95ecd3d784f74351018560ac99d6398ca429a252

commit 95ecd3d784f74351018560ac99d6398ca429a252
Author: Michael Meissner 
Date:   Thu Oct 24 12:26:59 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index de75ac6f0e81..dc35b2de5a28 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,4 +1,4 @@
- Branch work182-sha, patch #411 was reverted 

+ Branch work182-sha, patch #411 
 
 Add potential p-future XVRLD and XVRLDI instructions.
 
@@ -18,7 +18,7 @@ gcc/testsuite/
 
* gcc.target/powerpc/vector-rotate-left.c: New test.
 
- Branch work182-sha, patch #410 was reverted 

+ Branch work182-sha, patch #410 
 
 PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations


[gcc r15-4613] libstdc++: Fix typos in tests using macros for std::float128_t support

2024-10-24 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:f7bcdf449148872d576b261e9bf385b0e12be0c0

commit r15-4613-gf7bcdf449148872d576b261e9bf385b0e12be0c0
Author: Jonathan Wakely 
Date:   Thu Oct 24 11:38:39 2024 +0100

libstdc++: Fix typos in tests using macros for std::float128_t support

These tests check `_GLIBCXX_DOUBLE_IS_IEEE_BINARY128` but that's never
defined, it should be "LDOUBLE" not "DOUBLE".

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/complex/ext_c++23.cc: Fix typo in macro.
* testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc:
Likewise.
* testsuite/26_numerics/headers/cmath/functions_std_c++23.cc:
Likewise.
* testsuite/26_numerics/headers/cmath/nextafter_c++23.cc:
Likewise.

Diff:
---
 libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc | 2 +-
 libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc | 2 +-
 libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc | 2 +-
 libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc 
b/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc
index 0fc3d6f1a666..5f4ff06448e0 100644
--- a/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc
+++ b/libstdc++-v3/testsuite/26_numerics/complex/ext_c++23.cc
@@ -70,7 +70,7 @@ main()
   }
 #endif
 #if defined(__STDCPP_FLOAT128_T__) \
-&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \
+&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \
|| defined(_GLIBCXX_HAVE_FLOAT128_MATH))
   {
 std::float128_t p[2] = {};
diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc
index 3c2377fd6987..983027ff6546 100644
--- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc
+++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/constexpr_std_c++23.cc
@@ -119,7 +119,7 @@ main()
   test_functions();
 #endif
 #if defined(__STDCPP_FLOAT128_T__) \
-&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \
+&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \
|| defined(_GLIBCXX_HAVE_FLOAT128_MATH))
   test_functions();
 #endif
diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc
index ea68ac5da755..bf07493ecd47 100644
--- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc
+++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc
@@ -131,7 +131,7 @@ main()
   }
 #endif
 #if defined(__STDCPP_FLOAT128_T__) \
-&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \
+&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \
|| defined(_GLIBCXX_HAVE_FLOAT128_MATH))
   {
 std::float128_t p[128] = {};
diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc
index 91767d22cc3f..2d0f8017f4aa 100644
--- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc
+++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/nextafter_c++23.cc
@@ -117,7 +117,7 @@ main ()
   test ();
 #endif
 #if defined(__STDCPP_FLOAT128_T__) \
-&& (defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY128) \
+&& (defined(_GLIBCXX_LDOUBLE_IS_IEEE_BINARY128) \
|| defined(_GLIBCXX_HAVE_FLOAT128_MATH))
   test ();
 #endif


[gcc r15-4605] Handle POLY_INT_CSTs in get_nonzero_bits

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:d6c4badffafa295f6082b7d74de314e131f30a96

commit r15-4605-gd6c4badffafa295f6082b7d74de314e131f30a96
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:33 2024 +0100

Handle POLY_INT_CSTs in get_nonzero_bits

This patch extends get_nonzero_bits to handle POLY_INT_CSTs,
The easiest (but also most useful) case is that the number
of trailing zeros in the runtime value is at least the number
of trailing zeros in each individual component.

In principle, we could do this for coeffs 1 and above only,
and then OR in ceoff 0.  This would give ~0x11 for [14, 32], say.
But that's future work.

gcc/
* tree-ssanames.cc (get_nonzero_bits): Handle POLY_INT_CSTs.
* match.pd (with_possible_nonzero_bits): Likewise.

gcc/testsuite/
* gcc.target/aarch64/sve/cnt_fold_4.c: New test.

Diff:
---
 gcc/match.pd  |  2 +
 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c | 61 +++
 gcc/tree-ssanames.cc  |  3 ++
 3 files changed, 66 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 17613ec8ade4..391c60bdfb32 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2872,6 +2872,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
possibly set.  */
 (match with_possible_nonzero_bits
  INTEGER_CST@0)
+(match with_possible_nonzero_bits
+ POLY_INT_CST@0)
 (match with_possible_nonzero_bits
  SSA_NAME@0
  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c
new file mode 100644
index ..b7a53701993c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include 
+
+/*
+** f1:
+** cnthx0
+** ret
+*/
+uint64_t
+f1 ()
+{
+  uint64_t x = svcntw ();
+  x >>= 2;
+  return x << 3;
+}
+
+/*
+** f2:
+** [^\n]+
+** [^\n]+
+** ...
+** ret
+*/
+uint64_t
+f2 ()
+{
+  uint64_t x = svcntd ();
+  x >>= 2;
+  return x << 3;
+}
+
+/*
+** f3:
+** cntbx0, all, mul #4
+** ret
+*/
+uint64_t
+f3 ()
+{
+  uint64_t x = svcntd ();
+  x >>= 1;
+  return x << 6;
+}
+
+/*
+** f4:
+** [^\n]+
+** [^\n]+
+** ...
+** ret
+*/
+uint64_t
+f4 ()
+{
+  uint64_t x = svcntd ();
+  x >>= 2;
+  return x << 2;
+}
diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 4f83fcbb5171..ae6a0cd48fe6 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -502,6 +502,9 @@ get_nonzero_bits (const_tree name)
   if (TREE_CODE (name) == INTEGER_CST)
 return wi::to_wide (name);
 
+  if (POLY_INT_CST_P (name))
+return -known_alignment (wi::to_poly_wide (name));
+
   /* Use element_precision instead of TYPE_PRECISION so complex and
  vector types get a non-zero precision.  */
   unsigned int precision = element_precision (TREE_TYPE (name));


[gcc r15-4604] Try to simplify (X >> C1) << (C1 + C2) -> X << C2

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:ec8e8d359690e7347e6e718cc9254d59f694e138

commit r15-4604-gec8e8d359690e7347e6e718cc9254d59f694e138
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:32 2024 +0100

Try to simplify (X >> C1) << (C1 + C2) -> X << C2

This patch adds a rule to simplify (X >> C1) << (C1 + C2) -> X << C2
when the low C1 bits of X are known to be zero.

Any single conversion can take place between the shifts.  E.g. for
a truncating conversion, any extra bits of X that are preserved by
truncating after the shift are immediately lost by the shift left.
And the sign bits used for an extending conversion are the same as
the sign bits used for the rshift.  (A double conversion of say
int->unsigned->uint64_t would be wrong though.)

gcc/
* match.pd: Simplify (X >> C1) << (C1 + C2) -> X << C2 if the
low C1 bits of X are zero.

gcc/testsuite/
* gcc.dg/tree-ssa/shifts-1.c: New test.
* gcc.dg/tree-ssa/shifts-2.c: Likewise.

Diff:
---
 gcc/match.pd | 13 +++
 gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c | 61 
 gcc/testsuite/gcc.dg/tree-ssa/shifts-2.c | 21 +++
 3 files changed, 95 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index b9621a47cdf1..17613ec8ade4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4906,6 +4906,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
- TYPE_PRECISION (TREE_TYPE (@2)
   (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1
 
+#if GIMPLE
+/* (X >> C1) << (C1 + C2) -> X << C2 if the low C1 bits of X are zero.  */
+(simplify
+ (lshift (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1))
+ INTEGER_CST@2)
+ (if (INTEGRAL_TYPE_P (type)
+  && wi::ltu_p (wi::to_wide (@1), element_precision (type))
+  && wi::ltu_p (wi::to_wide (@2), element_precision (type))
+  && wi::to_widest (@2) >= wi::to_widest (@1)
+  && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)))
+  (lshift (convert @0) (minus @2 @1
+#endif
+
 /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for
unsigned x OR truncate into the precision(type) - c lowest bits
of signed x (if they have mode precision or a precision of 1).  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c
new file mode 100644
index ..d88500ca8ddf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-1.c
@@ -0,0 +1,61 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+unsigned int
+f1 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return x << 3;
+}
+
+unsigned int
+f2 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  unsigned char y = x;
+  y >>= 2;
+  return y << 3;
+}
+
+unsigned long
+f3 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return (unsigned long) x << 3;
+}
+
+int
+f4 (int x)
+{
+  if (x & 15)
+__builtin_unreachable ();
+  x >>= 4;
+  return x << 5;
+}
+
+unsigned int
+f5 (int x)
+{
+  if (x & 31)
+__builtin_unreachable ();
+  x >>= 5;
+  return x << 6;
+}
+
+unsigned int
+f6 (unsigned int x)
+{
+  if (x & 1)
+__builtin_unreachable ();
+  x >>= 1;
+  return x << (sizeof (int) * __CHAR_BIT__ - 1);
+}
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
+/* { dg-final { scan-tree-dump-not {>= 3;
+  return x << 4;
+}
+
+unsigned int
+f2 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return x << 1;
+}
+
+/* { dg-final { scan-tree-dump-times {

[gcc(refs/users/meissner/heads/work182-sha)] Add potential p-future XVRLD and XVRLDI instructions.

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d7334a2776a8498ac757eed08c6c4989c8e99c86

commit d7334a2776a8498ac757eed08c6c4989c8e99c86
Author: Michael Meissner 
Date:   Thu Oct 24 12:23:17 2024 -0400

Add potential p-future XVRLD and XVRLDI instructions.

2024-10-24  Michael Meissner  

gcc/

* config/rs6000/altivec.md (altivec_vrl): Add support for a
possible XVRLD instruction in the future.
(altivec_vrl_immediate): New insns.
* config/rs6000/predicates.md (vector_shift_immediate): New 
predicate.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
* config/rs6000/rs6000.md (isa attribute): Add xvrlw.
(enabled attribute): Add support for xvrlw.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 35 +++---
 gcc/config/rs6000/predicates.md| 26 
 gcc/config/rs6000/rs6000.h |  3 ++
 gcc/config/rs6000/rs6000.md|  6 +++-
 .../gcc.target/powerpc/vector-rotate-left.c| 34 +
 5 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 00dad4b91f1c..d4ee50322ca1 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1983,12 +1983,39 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+;; However for testing, allow other xvrl variants.  In particular, XVRLD for
+;; the sha3 tests for multibuf/singlebuf.
 (define_insn "altivec_vrl"
-  [(set (match_operand:VI2 0 "register_operand" "=v")
-(rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
-   (match_operand:VI2 2 "register_operand" "v")))]
+  [(set (match_operand:VI2 0 "register_operand" "=v,wa")
+(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa")
+   (match_operand:VI2 2 "register_operand" "v,wa")))]
   ""
-  "vrl %0,%1,%2"
+  "@
+   vrl %0,%1,%2
+   xvrl %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "isa" "*,xvrlw")])
+
+(define_insn "*altivec_vrl_immediate"
+  [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa")
+   (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa")
+   (match_operand:VI2 2 "vector_shift_immediate" 
"j,wM,wE,wS")))]
+  "TARGET_XVRLW && "
+{
+  rtx op2 = operands[2];
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op2, mode, &num_insns, &value))
+gcc_unreachable ();
+
+  operands[3] = GEN_INT (value & 0xff);
+  return "xvrli %x0,%x1,%3";
+}
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vrlq"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 1d95e34557e5..fccfbd7e4904 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -728,6 +728,32 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is a CONST_VECTOR whose elements are all the
+;; same and the elements can be an immediate shift or rotate factor
+(define_predicate "vector_shift_immediate"
+  (match_code "const_vector,vec_duplicate,const_int")
+{
+  int value = 256;
+  int num_insns = -1;
+
+  if (zero_constant (op, mode) || all_ones_constant (op, mode))
+return true;
+
+  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+return false;
+
+  switch (mode)
+{
+case V16QImode: return IN_RANGE (value, 0, 7);
+case V8HImode:  return IN_RANGE (value, 0, 15);
+case V4SImode:  return IN_RANGE (value, 0, 31);
+case V2DImode:  return IN_RANGE (value, 0, 63);
+default:break;
+}
+
+  return false;
+})
+  
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 8cfd9faf77dc..1a168c2c9596 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -581,6 +581,9 @@ extern int rs6000_vector_align[];
below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 68fbfec95546..420f20d4524b 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -369,7 +369,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we impl

[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d658729b2a69cf7e63d7afbdba4d60e66db844f9

commit d658729b2a69cf7e63d7afbdba4d60e66db844f9
Author: Michael Meissner 
Date:   Thu Oct 24 12:27:43 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index dc35b2de5a28..c6151fce09a4 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -74,21 +74,21 @@ this patch that were lost with GCC 14 and the current trunk:
   ---   -   -   --
 Benchmark time in seconds   5.53 6.156.265.575.61 9.56
 
-Fuse VANDC -> VXOR   209 600  600 600 600   600
-Fuse VXOR -> VXOR  0 240  240 120 120   120
-XXEVAL to fuse ANDC -> XOR   391   00   0   0 0
-XXEVAL to fuse XOR -> XOR240   00   0   0 0
+Fuse VANDC -> VXOR   209 600  600 600 600  600
+Fuse VXOR -> VXOR  0 240  240 120 120  120
+XXEVAL to fuse ANDC -> XOR   391   00   0   00
+XXEVAL to fuse XOR -> XOR240   00   0   00
 
-Spill vector to stack 78 364  364 172 184   110
-Load spilled vector from stack   431 962  962 713 723   166
-Vector moves  10 100  100  70  72 3,055
+Spill vector to stack 78 364  364 172 184  110
+Load spilled vector from stack   431 962  962 713 723  166
+Vector moves  10 100  100  70  723,055
 
-Vector rotate right  696 696  696 696 696   696
-XXLANDC or VANDC 209 600  600 600 600   600
-XXLXOR or VXOR   953   1,8241,824   1,824   1,824 1,825
-XXEVAL   631   00   0   0 0
+Vector rotate right  696 696  696 696 696  696
+XXLANDC or VANDC 209 600  600 600 600  600
+XXLXOR or VXOR   953   1,8241,824   1,824   1,8241,825
+XXEVAL   631   00   0   00
 
-Load vector rotate constants  24  24   24  24  2424
+Load vector rotate constants  24  24   24  24  24   24
 
 
 Here are the results for adding support for XXEVAL for the singlebuff.c


[gcc r15-4611] libstdc++: Simplify std::__throw_bad_variant_access

2024-10-24 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:0dbc588acaa27a3a56bc9173bd577e1293f10046

commit r15-4611-g0dbc588acaa27a3a56bc9173bd577e1293f10046
Author: Jonathan Wakely 
Date:   Tue Oct 22 16:06:12 2024 +0100

libstdc++: Simplify std::__throw_bad_variant_access

This removes the overload of __throw_bad_variant_access that must be
called with a string literal. This avoids a potential source of
undefined behaviour if that function got misused. The other overload
that takes a bool parameter can be adjusted to take an integer index
selecting one of the four possible string literals to use, ensuring
that the std::bad_variant_access constructor is only called with those
literals.

Passing an index outside the range [0,3] is bogus, but will still select
a valid string literal and avoid undefined behaviour.

libstdc++-v3/ChangeLog:

* include/std/variant (__throw_bad_variant_access(unsigned)):
Define new function as inline friend, with namespace-scope
declaration using noreturn attribute.
(__throw_bad_variant_access(const char*)): Remove.
(__throw_bad_variant_access(bool)): Remove.
(visit, visit): Adjust calls to __throw_bad_variant_access.

Reviewed-by: Patrick Palka 

Diff:
---
 libstdc++-v3/include/std/variant | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index cf532126d798..bd0f9c3252a5 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -1402,6 +1402,8 @@ namespace __detail::__variant
   && (is_swappable_v<_Types> && ...))>
 swap(variant<_Types...>&, variant<_Types...>&) = delete;
 
+  [[noreturn]] void __throw_bad_variant_access(unsigned);
+
   class bad_variant_access : public exception
   {
   public:
@@ -1411,28 +1413,24 @@ namespace __detail::__variant
 { return _M_reason; }
 
   private:
+// Must only be called with a string literal
 bad_variant_access(const char* __reason) noexcept : _M_reason(__reason) { }
 
 // Must point to a string with static storage duration:
 const char* _M_reason = "bad variant access";
 
-friend void __throw_bad_variant_access(const char* __what);
+friend void __throw_bad_variant_access([[maybe_unused]] unsigned __n)
+{
+  [[maybe_unused]] static constexpr const char* __reasons[] = {
+   "std::get: wrong index for variant",
+   "std::get: variant is valueless",
+   "std::visit: variant is valueless",
+   "std::visit: variant is valueless",
+  };
+  _GLIBCXX_THROW_OR_ABORT(bad_variant_access(__reasons[__n % 4u]));
+}
   };
 
-  // Must only be called with a string literal
-  inline void
-  __throw_bad_variant_access(const char* __what)
-  { _GLIBCXX_THROW_OR_ABORT(bad_variant_access(__what)); }
-
-  inline void
-  __throw_bad_variant_access(bool __valueless)
-  {
-if (__valueless) [[__unlikely__]]
-  __throw_bad_variant_access("std::get: variant is valueless");
-else
-  __throw_bad_variant_access("std::get: wrong index for variant");
-  }
-
   template
 class variant
 : private __detail::__variant::_Variant_base<_Types...>,
@@ -1941,7 +1939,7 @@ namespace __detail::__variant
   namespace __variant = std::__detail::__variant;
 
   if ((__variant::__as(__variants).valueless_by_exception() || ...))
-   __throw_bad_variant_access("std::visit: variant is valueless");
+   __throw_bad_variant_access(2);
 
   using _Result_type
= __detail::__variant::__visit_result_t<_Visitor, _Variants...>;
@@ -1981,7 +1979,7 @@ namespace __detail::__variant
   namespace __variant = std::__detail::__variant;
 
   if ((__variant::__as(__variants).valueless_by_exception() || ...))
-   __throw_bad_variant_access("std::visit: variant is valueless");
+   __throw_bad_variant_access(3);
 
   return std::__do_visit<_Res>(std::forward<_Visitor>(__visitor),
  __variant::__as(std::forward<_Variants>(__variants))...);


[gcc(refs/users/meissner/heads/work182-sha)] PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:754997fd268a5e798da4af34ace7db9f6d30a720

commit 754997fd268a5e798da4af34ace7db9f6d30a720
Author: Michael Meissner 
Date:   Thu Oct 24 12:21:09 2024 -0400

PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

The multibuff.c benchmark attached to the PR target/117251 compiled for 
Power10
PowerPC that implement SHA3 has a slowdown in the current trunk and GCC 14
compared to GCC 11 - GCC 13, due to excessive amounts of spilling.

The main function for the multibuf.c file has 3,747 lines, all of which are
using vector unsigned long long.  There are 696 vector rotates (all rotates 
are
constant), 1,824 vector xor's and 600 vector andc's.

In looking at it, the main thing that steps out is the reason for either
spilling or moving variables is the support in fusion.md (generated by
genfusion.pl) that tries to fuse the vec_andc feeding into vec_xor, and 
other
vec_xor's feeding into vec_xor.

On the powerpc for power10, there is a special fusion mode that happens if 
the
machine has a VANDC or VXOR instruction that is adjacent to a VXOR 
instruction
and the VANDC/VXOR feeds into the 2nd VXOR instruction.

While the Power10 has 64 vector registers (which uses the XXL prefix to do
logical operations), the fusion only works with the older Altivec 
instruction
set (which uses the V prefix).  The Altivec instruction only has 32 vector
registers (which are overlaid over the VSX vector registers 32-63).

By having the combiner patterns fuse_vandc_vxor and fuse_vxor_vxor to do 
this
fusion, it means that the register allocator has more register pressure for 
the
traditional Altivec registers instead of the VSX registers.

In addition, since there are vector rotates, these rotates only work on the
traditional Altivec registers, which adds to the Altivec register pressure.

Finally in addition to doing the explicit xor, andc, and rotates using the
Altivec registers, we have to also load vector constants for the rotate 
amount
and these registers also are allocated as Altivec registers.

Current trunk and GCC 12-14 have more vector spills than GCC 11, but GCC 11 
has
many more vector moves that the later compilers.  Thus even though it has 
way
less spills, the vector moves are why GCC 11 have the slowest results.

There is an instruction that was added in power10 (XXEVAL) that does provide
fusion between VSX vectors that includes ANDC->XOR and XOR->XOR fusion.

The latency of XXEVAL is slightly more than the fused VANDC/VXOR or 
VXOR/VXOR,
so I have written the patch to prefer doing the Altivec instructions if they
don't need a temporary register.

Here are the results for adding support for XXEVAL for the multibuff.c
benchmark attached to the PR.  Note that we essentially recover the speed 
with
this patch that were lost with GCC 14 and the current trunk:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   5.53 6.156.265.575.61 
9.56

Fuse VANDC -> VXOR   209 600  600 600 600   
600
Fuse VXOR -> VXOR  0 240  240 120 120   
120
XXEVAL to fuse ANDC -> XOR   391   00   0   0   
  0
XXEVAL to fuse XOR -> XOR240   00   0   0   
  0

Spill vector to stack 78 364  364 172 184   
110
Load spilled vector from stack   431 962  962 713 723   
166
Vector moves  10 100  100  70  72 
3,055

Vector rotate right  696 696  696 696 696   
696
XXLANDC or VANDC 209 600  600 600 600   
600
XXLXOR or VXOR   953   1,8241,824   1,824   1,824 
1,825
XXEVAL   631   00   0   0   
  0

Load vector rotate constants  24  24   24  24  24   
 24

Here are the results for adding support for XXEVAL for the singlebuff.c
benchmark attached to the PR.  Note that adding XXEVAL greatly speeds up 
this
particular benchmark:

  XXEVALTrunk   GCC14   GCC13   GCC12
GCC11
  ---   -   -   -
-
Benchmark time in seconds   4.46 5.405.405.355.36 
7.54

Fuse VANDC -> VXOR   210  600 600 600 600  
600
Fuse VXOR -> VXOR  0  240 240 120 120  
120
XXEVAL to fuse ANDC -> XOR   3900 

[gcc(refs/users/meissner/heads/work182-sha)] Revert changes

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:06e655c67c6f696cfedda0e5519c874a1d25a3da

commit 06e655c67c6f696cfedda0e5519c874a1d25a3da
Author: Michael Meissner 
Date:   Thu Oct 24 12:15:54 2024 -0400

Revert changes

Diff:
---
 gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c | 0
 gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c | 0
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-1.c
deleted file mode 100644
index e69de29bb2d1..
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vector-fused-2.c
deleted file mode 100644
index e69de29bb2d1..


[gcc/aoliva/heads/testme] (4 commits) fold fold_truth_andor field merging into ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 4f8e1ea7f2e1... fold fold_truth_andor field merging into ifcombine

It previously pointed to:

 53a0460c1f49... ifcombine across noncontiguous blocks

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  53a0460... ifcombine across noncontiguous blocks
  aee55fb... extend ifcombine_replace_cond to handle noncontiguous ifcom


Summary of changes (added commits):
---

  4f8e1ea... fold fold_truth_andor field merging into ifcombine
  88b6065... preserve TRUTH_ANDIF handling in ifcombine_replace_cond
  bb30d95... ifcombine across noncontiguous blocks
  46fa49f... extend ifcombine_replace_cond to handle noncontiguous ifcom


[gcc(refs/users/aoliva/heads/testme)] fold fold_truth_andor field merging into ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:4f8e1ea7f2e14bbe3b687f9e2911acd46eec0bfe

commit 4f8e1ea7f2e14bbe3b687f9e2911acd46eec0bfe
Author: Alexandre Oliva 
Date:   Sun Oct 20 21:02:05 2024 -0300

fold fold_truth_andor field merging into ifcombine

This patch introduces various improvements to the logic that merges
field compares, moving it into ifcombine.

Before the patch, we could merge:

  (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)

into something like:

  (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK)

if both of A's fields live within the same alignment boundaries, and
so do B's, at the same relative positions.  Constants may be used
instead of the object B.

The initial goal of this patch was to enable such combinations when a
field crossed alignment boundaries, e.g. for packed types.  We can't
generally access such fields with a single memory access, so when we
come across such a compare, we will attempt to combine each access
separately.

Some merging opportunities were missed because of right-shifts,
compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
narrowing conversions, especially after earlier merges.  This patch
introduces handlers for several cases involving these.

The merging of multiple field accesses into wider bitfield-like
accesses is undesirable to do too early in compilation, so we move it
from folding to ifcombine, and extend ifcombine to merge noncontiguous
compares, absent intervening side effects.  VUSEs used to prevent
ifcombine; that seemed excessively conservative, since relevant side
effects were already tested, including the possibility of trapping
loads, so that's removed.

Unlike earlier ifcombine, when merging noncontiguous compares the
merged compare must replace the earliest compare, which may require
moving up the DEFs that contributed to the latter compare.

When it is the second of a noncontiguous pair of compares that first
accesses a word, we may merge the first compare with part of the
second compare that refers to the same word, keeping the compare of
the remaining bits at the spot where the second compare used to be.

Handling compares with non-constant fields was somewhat generalized
from what fold used to do, now handling non-adjacent fields, even if a
field of one object crosses an alignment boundary but the other
doesn't.


The -Wno-error for toplev.o on rs6000 is because of toplev.c's:

  if ((flag_sanitize & SANITIZE_ADDRESS)
  && !FRAME_GROWS_DOWNWARD)

and rs6000.h's:

#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0   \
  || (flag_sanitize & SANITIZE_ADDRESS) != 0)

The mutually exclusive conditions involving flag_sanitize are now
noticed and reported by ifcombine's warning on mutually exclusive
compares.  i386's needs -Wno-error for insn-attrtab.o for similar
reasons.


for  gcc/ChangeLog

* fold-const.cc (make_bit_field): Export.
(all_ones_mask_p): Drop.
(unextend, decode_field_reference, fold_truth_andor_1): Move
field compare merging logic...
* gimple-fold.cc: ... here.
(ssa_is_substitutable_p, is_cast_p, is_binop_p): New.
(prepare_xor, follow_load): New.
(compute_split_boundary_from_align): New.
(make_bit_field_load, build_split_load): New.
(reuse_split_load, mergeable_loads_p): New.
(fold_truth_andor_maybe_separate): New.
* tree-ssa-ifcombine.cc: Include bitmap.h.
(constant_condition_p): New.
(recognize_if_then_else_nc, recognize_if_succs): New.
(bb_no_side_effects_p): Don't reject VUSEs.
(update_profile_after_ifcombine): Adjust for noncontiguous
merges.
(ifcombine_mark_ssa_name): New.
(struct ifcombine_mark_ssa_name_t): New.
(ifcombine_mark_ssa_name_walk): New.
(ifcombine_replace_cond): Extended for noncontiguous merges
after factoring out of...
(ifcombine_ifandif): ... this.  Drop result_inv arg.  Try
fold_truth_andor_maybe_separate.
(tree_ssa_ifcombine_bb_1): Add outer_succ_bb arg.  Call
recognize_if_then_else_nc.  Adjust ifcombine_ifandif calls.
(tree_ssa_ifcombine_bb): Return the earliest affected block.
Call recognize_if_then_else_nc.  Try noncontiguous blocks.
(pass_tree_ifcombine::execute): Retry affected blocks.
* config/i386/t-i386 (insn-attrtab.o-warn): Disable errors.
* config/rs6000/t-rs6000 (toplev.o-warn): Likewise.

for  gcc/testsuite/ChangeLog

* gcc.dg/field-merge-1.c: New.
* gcc.dg/field-merge-2.c: New.
 

[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:bb30d957fbed1e33f862cf49109e221dacb36fb0

commit bb30d957fbed1e33f862cf49109e221dacb36fb0
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Rework ifcombine to support merging conditions from noncontiguous
blocks.  This depends on earlier preparation changes.

The function that attempted to ifcombine a block with its immediate
predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks
eligible for ifcombine, attempting to combine with them.

The function that actually drives the combination of a pair of blocks,
tree_ssa_ifcombine_bb_1, now takes an additional parameter: the
successor of outer that leads to inner.

The function that recognizes if_then_else patterns is modified to
enable testing without distinguishing between then and else, or to
require nondegenerate conditions, that aren't worth combining with.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc (recognize_if_then_else): Support
relaxed then/else testing; require nondegenerate condition
otherwise.
(tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it
instead of inner_cond_bb.  Adjust callers.
(tree_ssa_ifcombine_bb): Loop over dominating outer blocks
eligible for ifcombine.
(pass_tree_ifcombine::execute): Noted potential need for
changes to the post-combine logic.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 152 +-
 1 file changed, 123 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 71c7c9074e94..817c95b20252 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb)
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t->dest == *then_bb && e->dest == *else_bb)
+   || (t->dest == *else_bb && e->dest == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -886,19 +895,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-effects.  */
   if (phi_pred_bb != else_bb
-  && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb)
+  && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb)
   && same_phi_args_p (outer_cond_bb, phi_pred_bb, el

[gcc(refs/users/aoliva/heads/testme)] extend ifcombine_replace_cond to handle noncontiguous ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:46fa49fec2574a95dad05c297eebcb41f8dd8dc1

commit 46fa49fec2574a95dad05c297eebcb41f8dd8dc1
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:30 2024 -0300

extend ifcombine_replace_cond to handle noncontiguous ifcombine

Prepare to handle noncontiguous ifcombine, introducing logic to modify
the outer condition when needed.  There are two cases worth
mentioning:

- when blocks are noncontiguous, we have to place the combined
  condition in the outer block to avoid pessimizing carefully crafted
  short-circuited tests;

- even when blocks are contiguous, we prepare for situations in which
  the combined condition has two tests, one to be placed in outer and
  the other in inner.  This circumstance will not come up when
  noncontiguous ifcombine is first enabled, but it will when
  an improved fold_truth_andor is integrated with ifcombine.

Combining the condition from inner into outer may require moving SSA
DEFs used in the inner condition, and the changes implement this as
well.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc: Include bitmap.h.
(ifcombine_mark_ssa_name): New.
(struct ifcombine_mark_ssa_name_t): New.
(ifcombine_mark_ssa_name_walk): New.
(ifcombine_replace_cond): Prepare to handle noncontiguous and
split-condition ifcombine.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 173 --
 1 file changed, 168 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index b5b72be29bbf..71c7c9074e94 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa.h"
 #include "attribs.h"
 #include "asan.h"
+#include "bitmap.h"
 
 #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
 #define LOGICAL_OP_NON_SHORT_CIRCUIT \
@@ -460,17 +461,57 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 }
 }
 
-/* Replace the conditions in INNER_COND with COND.
-   Replace OUTER_COND with a constant.  */
+/* Set NAME's bit in USED if OUTER dominates it.  */
+
+static void
+ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer)
+{
+  if (SSA_NAME_IS_DEFAULT_DEF (name))
+return;
+
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  basic_block bb = gimple_bb (def);
+  if (!dominated_by_p (CDI_DOMINATORS, bb, outer))
+return;
+
+  bitmap_set_bit (used, SSA_NAME_VERSION (name));
+}
+
+/* Data structure passed to ifcombine_mark_ssa_name.  */
+struct ifcombine_mark_ssa_name_t
+{
+  /* SSA_NAMEs that have been referenced.  */
+  bitmap used;
+  /* Dominating block of DEFs that might need moving.  */
+  basic_block outer;
+};
+
+/* Mark in DATA->used any SSA_NAMEs used in *t.  */
+
+static tree
+ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
+{
+  ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_;
+
+  if (*t && TREE_CODE (*t) == SSA_NAME)
+ifcombine_mark_ssa_name (data->used, *t, data->outer);
+
+  return NULL;
+}
+
+/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
+   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
+   replaced with a constant, but if there are intervening blocks, it's best to
+   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
 
 static bool
 ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
gcond *outer_cond, bool outer_inv,
tree cond, bool must_canon, tree cond2)
 {
-  bool result_inv = inner_inv;
-
-  gcc_checking_assert (!cond2);
+  bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond))
+  != gimple_bb (outer_cond));
+  bool result_inv = outer_p ? outer_inv : inner_inv;
 
   if (result_inv)
 cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
@@ -480,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
   else if (must_canon)
 return false;
 
+  if (outer_p)
+{
+  {
+   auto_bitmap used;
+   basic_block outer_bb = gimple_bb (outer_cond);
+
+   /* Mark SSA DEFs that are referenced by cond and may thus need to be
+  moved to outer.  */
+   {
+ ifcombine_mark_ssa_name_t data = { used, outer_bb };
+ walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL);
+   }
+
+   if (!bitmap_empty_p (used))
+ {
+   /* Iterate up from inner_cond, moving DEFs identified as used by
+  cond, and marking USEs in the DEFs for moving as well.  */
+   gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond);
+   for (basic_block bb = gimple_bb (inner_cond);
+bb != outer_bb; bb = single_pred (bb))
+ {
+   for (gimple_stmt_iterator gsitr = gsi_last_bb (bb);
+!gsi_end_p (gs

[gcc r15-4614] libstdc++: Fix test broken when using COW std::string

2024-10-24 Thread Francois Dumont via Gcc-cvs
https://gcc.gnu.org/g:d01dc97a26d2f5034ca135f46094aa52c44cc90a

commit r15-4614-gd01dc97a26d2f5034ca135f46094aa52c44cc90a
Author: François Dumont 
Date:   Thu Oct 24 20:30:16 2024 +0200

libstdc++: Fix test broken when using COW std::string

libstdc++-v3/ChangeLog:

* testsuite/23_containers/unordered_map/96088.cc (test03): Fix 
increments
value when _GLIBCXX_USE_CXX11_ABI is equal to 0.

Diff:
---
 libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
index b5be7d06aa03..ee41675a16ba 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
@@ -233,9 +233,8 @@ test03()
 um.insert(v.begin(), v.end());
 VERIFY( um.size() == 1 );
 
-// Allocate array of buckets, a node, the std::string value and the
-// std::string key (unless COW).
-constexpr std::size_t increments = _GLIBCXX_USE_CXX11_ABI ? 4 : 3;
+// Allocate array of buckets, a node, and the 2 std::string (unless COW).
+constexpr std::size_t increments = _GLIBCXX_USE_CXX11_ABI ? 4 : 2;
 
 VERIFY( __gnu_test::counter::count() == origin + increments );
 VERIFY( __gnu_test::counter::get()._M_increments == increments );


[gcc(refs/users/meissner/heads/work182-sha)] Revert changes

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0f27c445e6267e0a544ae030a2fb9e7058fc8ec0

commit 0f27c445e6267e0a544ae030a2fb9e7058fc8ec0
Author: Michael Meissner 
Date:   Thu Oct 24 12:11:15 2024 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/altivec.md   |  35 +-
 gcc/config/rs6000/predicates.md|  26 -
 gcc/config/rs6000/rs6000.h |   3 -
 gcc/config/rs6000/rs6000.md|   6 +-
 .../gcc.target/powerpc/p10-vector-fused-1.c| 409 -
 .../gcc.target/powerpc/p10-vector-fused-2.c| 936 -
 .../gcc.target/powerpc/vector-rotate-left.c|  34 -
 7 files changed, 5 insertions(+), 1444 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index d4ee50322ca1..00dad4b91f1c 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1983,39 +1983,12 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; -mcpu=future adds a vector rotate left word variant.  There is no vector
-;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
-;; altivec_vrl and will match for -mcpu=future, while other cpus will
-;; match the generic insn.
-;; However for testing, allow other xvrl variants.  In particular, XVRLD for
-;; the sha3 tests for multibuf/singlebuf.
 (define_insn "altivec_vrl"
-  [(set (match_operand:VI2 0 "register_operand" "=v,wa")
-(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa")
-   (match_operand:VI2 2 "register_operand" "v,wa")))]
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+(rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
+   (match_operand:VI2 2 "register_operand" "v")))]
   ""
-  "@
-   vrl %0,%1,%2
-   xvrl %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")
-   (set_attr "isa" "*,xvrlw")])
-
-(define_insn "*altivec_vrl_immediate"
-  [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa")
-   (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa")
-   (match_operand:VI2 2 "vector_shift_immediate" 
"j,wM,wE,wS")))]
-  "TARGET_XVRLW && "
-{
-  rtx op2 = operands[2];
-  int value = 256;
-  int num_insns = -1;
-
-  if (!xxspltib_constant_p (op2, mode, &num_insns, &value))
-gcc_unreachable ();
-
-  operands[3] = GEN_INT (value & 0xff);
-  return "xvrli %x0,%x1,%3";
-}
+  "vrl %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vrlq"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index fccfbd7e4904..1d95e34557e5 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -728,32 +728,6 @@
   return num_insns == 1;
 })
 
-;; Return 1 if the operand is a CONST_VECTOR whose elements are all the
-;; same and the elements can be an immediate shift or rotate factor
-(define_predicate "vector_shift_immediate"
-  (match_code "const_vector,vec_duplicate,const_int")
-{
-  int value = 256;
-  int num_insns = -1;
-
-  if (zero_constant (op, mode) || all_ones_constant (op, mode))
-return true;
-
-  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
-return false;
-
-  switch (mode)
-{
-case V16QImode: return IN_RANGE (value, 0, 7);
-case V8HImode:  return IN_RANGE (value, 0, 15);
-case V4SImode:  return IN_RANGE (value, 0, 31);
-case V2DImode:  return IN_RANGE (value, 0, 63);
-default:break;
-}
-
-  return false;
-})
-  
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 1a168c2c9596..8cfd9faf77dc 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -581,9 +581,6 @@ extern int rs6000_vector_align[];
below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
-/* Whether we have XVRLW support.  */
-#define TARGET_XVRLW   TARGET_FUTURE
-
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 420f20d4524b..68fbfec95546 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -369,7 +369,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10,xxeval,xvrlw"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10,xxeval"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -426,10 +426,6 @@
  (match_test "TARGET_PREFIXED && TARGET_XXEVAL"))
  (const_int 1)
 
- (and (eq_attr "isa" "xvrlw")
- (match_test "TARGET_XVRLW"))
- (const_int 1)
-
 ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
diff -

[gcc r15-4619] c++: Handle ABI for non-polymorphic dynamic classes

2024-10-24 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:6713f05a2aeb852c3f4d738c8c5dbad816624323

commit r15-4619-g6713f05a2aeb852c3f4d738c8c5dbad816624323
Author: Nathaniel Shead 
Date:   Wed Aug 21 00:42:42 2024 +1000

c++: Handle ABI for non-polymorphic dynamic classes

The Itanium ABI has specific rules for when virtual tables for dynamic
classes should be emitted.  However we didn't consider structures with
virtual inheritance but no virtual members as dynamic classes for ABI
purposes; this patch fixes this.

gcc/cp/ChangeLog:

* decl2.cc (import_export_class): Use TYPE_CONTAINS_VPTR_P
instead of TYPE_POLYMORPHIC_P.
(import_export_decl): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/modules/virt-5_a.C: New test.
* g++.dg/modules/virt-5_b.C: New test.

Signed-off-by: Nathaniel Shead 

Diff:
---
 gcc/cp/decl2.cc |  4 ++--
 gcc/testsuite/g++.dg/modules/virt-5_a.C | 16 
 gcc/testsuite/g++.dg/modules/virt-5_b.C | 11 +++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index a455eb481b1a..fa32ce35c8c4 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -2434,7 +2434,7 @@ import_export_class (tree ctype)
translation unit, then export the class; otherwise, import
it.  */
   import_export = -1;
-  else if (TYPE_POLYMORPHIC_P (ctype))
+  else if (TYPE_CONTAINS_VPTR_P (ctype))
 {
   tree cdecl = TYPE_NAME (ctype);
   if (DECL_LANG_SPECIFIC (cdecl) && DECL_MODULE_ATTACH_P (cdecl))
@@ -3530,7 +3530,7 @@ import_export_decl (tree decl)
  class_type = type;
  import_export_class (type);
  if (CLASSTYPE_INTERFACE_KNOWN (type)
- && TYPE_POLYMORPHIC_P (type)
+ && TYPE_CONTAINS_VPTR_P (type)
  && CLASSTYPE_INTERFACE_ONLY (type)
  /* If -fno-rtti was specified, then we cannot be sure
 that RTTI information will be emitted with the
diff --git a/gcc/testsuite/g++.dg/modules/virt-5_a.C 
b/gcc/testsuite/g++.dg/modules/virt-5_a.C
new file mode 100644
index ..f4c6abe85ef6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/virt-5_a.C
@@ -0,0 +1,16 @@
+// { dg-additional-options "-fmodules-ts" }
+// { dg-module-cmi M }
+
+export module M;
+
+struct C {};
+struct B : virtual C {};
+
+// Despite no non-inline key function, this is still a dynamic class
+// and so by the Itanium ABI 5.2.3 should be uniquely emitted in this TU
+export struct A : B {
+  inline A (int) {}
+};
+
+// { dg-final { scan-assembler {_ZTTW1M1A:} } }
+// { dg-final { scan-assembler {_ZTVW1M1A:} } }
diff --git a/gcc/testsuite/g++.dg/modules/virt-5_b.C 
b/gcc/testsuite/g++.dg/modules/virt-5_b.C
new file mode 100644
index ..785dd92ac1ee
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/virt-5_b.C
@@ -0,0 +1,11 @@
+// { dg-module-do link }
+// { dg-additional-options "-fmodules-ts" }
+
+import M;
+
+int main() {
+  A a(0);
+}
+
+// { dg-final { scan-assembler-not {_ZTTW1M1A:} } }
+// { dg-final { scan-assembler-not {_ZTVW1M1A:} } }


[gcc r15-4622] c++/modules: Propagate some missing flags on type definitions

2024-10-24 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:6aba48a8cc128e54ee243d451ac9a843ff41c4f9

commit r15-4622-g6aba48a8cc128e54ee243d451ac9a843ff41c4f9
Author: Nathaniel Shead 
Date:   Thu Oct 24 18:10:52 2024 +1100

c++/modules: Propagate some missing flags on type definitions

Noticed while testing my fix for PR c++/113814.  Not all of these are
easily testable but I've tested a couple that were straight-forward.

For consistency also adds a new TYPE_WARN_IF_NOT_ALIGN_RAW flag to match
the decl version Nathan added.

gcc/cp/ChangeLog:

* module.cc (trees_in::read_class_def): Propagate some missing
flags from the streamed-in definition.

gcc/ChangeLog:

* tree.h (TYPE_WARN_IF_NOT_ALIGN_RAW): New accessor.
(TYPE_WARN_IF_NOT_ALIGN): Use it.
(SET_TYPE_WARN_IF_NOT_ALIGN): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/modules/class-10_a.H: New test.
* g++.dg/modules/class-10_b.C: New test.

Signed-off-by: Nathaniel Shead 

Diff:
---
 gcc/cp/module.cc  | 20 +++-
 gcc/testsuite/g++.dg/modules/class-10_a.H |  6 ++
 gcc/testsuite/g++.dg/modules/class-10_b.C | 19 +++
 gcc/tree.h|  8 +---
 4 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 90ad67daf72b..297ef85bb1e9 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -12379,8 +12379,12 @@ trees_in::read_class_def (tree defn, tree 
maybe_template)
 
  /* Core pieces.  */
  TYPE_MODE_RAW (type) = TYPE_MODE_RAW (type_dup);
+ TYPE_ALIGN_RAW (type) = TYPE_ALIGN_RAW (type_dup);
+ TYPE_WARN_IF_NOT_ALIGN_RAW (type)
+   = TYPE_WARN_IF_NOT_ALIGN_RAW (type_dup);
+ TYPE_USER_ALIGN (type) = TYPE_USER_ALIGN (type_dup);
+
  SET_DECL_MODE (defn, DECL_MODE (maybe_dup));
- TREE_ADDRESSABLE (type) = TREE_ADDRESSABLE (type_dup);
  DECL_SIZE (defn) = DECL_SIZE (maybe_dup);
  DECL_SIZE_UNIT (defn) = DECL_SIZE_UNIT (maybe_dup);
  DECL_ALIGN_RAW (defn) = DECL_ALIGN_RAW (maybe_dup);
@@ -12388,12 +12392,26 @@ trees_in::read_class_def (tree defn, tree 
maybe_template)
= DECL_WARN_IF_NOT_ALIGN_RAW (maybe_dup);
  DECL_USER_ALIGN (defn) = DECL_USER_ALIGN (maybe_dup);
 
+ TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (type_dup);
+ TYPE_CXX_ODR_P (type) = TYPE_CXX_ODR_P (type_dup);
+ TYPE_NO_FORCE_BLK (type) = TYPE_NO_FORCE_BLK (type_dup);
+ TYPE_TRANSPARENT_AGGR (type) = TYPE_TRANSPARENT_AGGR (type_dup);
+ TYPE_CONTAINS_PLACEHOLDER_INTERNAL (type)
+   = TYPE_CONTAINS_PLACEHOLDER_INTERNAL (type_dup);
+
+ TYPE_EMPTY_P (type) = TYPE_EMPTY_P (type_dup);
+ TREE_ADDRESSABLE (type) = TREE_ADDRESSABLE (type_dup);
+
  /* C++ pieces.  */
  TYPE_POLYMORPHIC_P (type) = TYPE_POLYMORPHIC_P (type_dup);
+ CLASSTYPE_FINAL (type) = CLASSTYPE_FINAL (type_dup);
+
  TYPE_HAS_USER_CONSTRUCTOR (type)
= TYPE_HAS_USER_CONSTRUCTOR (type_dup);
  TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type)
= TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type_dup);
+ TYPE_NEEDS_CONSTRUCTING (type)
+   = TYPE_NEEDS_CONSTRUCTING (type_dup);
 
  if (auto ls = TYPE_LANG_SPECIFIC (type_dup))
{
diff --git a/gcc/testsuite/g++.dg/modules/class-10_a.H 
b/gcc/testsuite/g++.dg/modules/class-10_a.H
new file mode 100644
index ..177cf57fec13
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/class-10_a.H
@@ -0,0 +1,6 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+
+struct alignas(16) Align {};
+struct Final final {};
+struct NeedsConstructing { NeedsConstructing(); };
diff --git a/gcc/testsuite/g++.dg/modules/class-10_b.C 
b/gcc/testsuite/g++.dg/modules/class-10_b.C
new file mode 100644
index ..2f982124f3e7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/class-10_b.C
@@ -0,0 +1,19 @@
+// { dg-additional-options "-fmodules-ts -Wno-pedantic" }
+// Test bits and pieces of merging information
+// from class defs into forward declarations
+
+struct Align;
+struct Final;
+struct NeedsConstructing;
+
+import "class-10_a.H";
+
+static_assert(alignof(Align) == 16);
+
+struct TestFinal : Final {};  // { dg-error "cannot derive" }
+
+struct TestNeedsConstructing {
+  struct {
+NeedsConstructing a;  // { dg-error "with constructor not allowed in 
anonymous aggregate" }
+  };
+};
diff --git a/gcc/tree.h b/gcc/tree.h
index efda032a220c..66e08793c2ec 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -2357,13 +2357,15 @@ extern tree vector_element_bits_tree (const_tree);
 
 /* The minimum alignment necessary for objects of this type without
warning.  The value is an int, measured in bits.  */
+#define TYPE_WARN_IF_NOT_ALIGN_RAW(NODE) \
+(TYPE_CHECK (NODE)->type_common.warn_if_not_ali

[gcc(refs/users/aoliva/heads/testme)] extend ifcombine_replace_cond to handle noncontiguous ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:cb75ab71333fe6fb59bd73288baf5b7170d80cfa

commit cb75ab71333fe6fb59bd73288baf5b7170d80cfa
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:30 2024 -0300

extend ifcombine_replace_cond to handle noncontiguous ifcombine

Prepare to handle noncontiguous ifcombine, introducing logic to modify
the outer condition when needed.  There are two cases worth
mentioning:

- when blocks are noncontiguous, we have to place the combined
  condition in the outer block to avoid pessimizing carefully crafted
  short-circuited tests;

- even when blocks are contiguous, we prepare for situations in which
  the combined condition has two tests, one to be placed in outer and
  the other in inner.  This circumstance will not come up when
  noncontiguous ifcombine is first enabled, but it will when
  an improved fold_truth_andor is integrated with ifcombine.

Combining the condition from inner into outer may require moving SSA
DEFs used in the inner condition, and the changes implement this as
well.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc: Include bitmap.h.
(ifcombine_mark_ssa_name): New.
(struct ifcombine_mark_ssa_name_t): New.
(ifcombine_mark_ssa_name_walk): New.
(ifcombine_replace_cond): Prepare to handle noncontiguous and
split-condition ifcombine.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 184 +-
 1 file changed, 181 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index b5b72be29bbf..8c6eaaaf5e5c 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa.h"
 #include "attribs.h"
 #include "asan.h"
+#include "bitmap.h"
 
 #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
 #define LOGICAL_OP_NON_SHORT_CIRCUIT \
@@ -460,8 +461,48 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 }
 }
 
-/* Replace the conditions in INNER_COND with COND.
-   Replace OUTER_COND with a constant.  */
+/* Set NAME's bit in USED if OUTER dominates it.  */
+
+static void
+ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer)
+{
+  if (SSA_NAME_IS_DEFAULT_DEF (name))
+return;
+
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  basic_block bb = gimple_bb (def);
+  if (!dominated_by_p (CDI_DOMINATORS, bb, outer))
+return;
+
+  bitmap_set_bit (used, SSA_NAME_VERSION (name));
+}
+
+/* Data structure passed to ifcombine_mark_ssa_name.  */
+struct ifcombine_mark_ssa_name_t
+{
+  /* SSA_NAMEs that have been referenced.  */
+  bitmap used;
+  /* Dominating block of DEFs that might need moving.  */
+  basic_block outer;
+};
+
+/* Mark in DATA->used any SSA_NAMEs used in *t.  */
+
+static tree
+ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
+{
+  ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_;
+
+  if (*t && TREE_CODE (*t) == SSA_NAME)
+ifcombine_mark_ssa_name (data->used, *t, data->outer);
+
+  return NULL;
+}
+
+/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
+   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
+   replaced with a constant, but if there are intervening blocks, it's best to
+   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
 
 static bool
 ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
@@ -470,7 +511,22 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
 {
   bool result_inv = inner_inv;
 
-  gcc_checking_assert (!cond2);
+  /* Split cond into cond2 if they're contiguous.  ??? We might be able to
+ handle ORIF as well, inverting both conditions, but it's not clear that
+ this would be enough, and it never comes up.  */
+  if (!cond2
+  && TREE_CODE (cond) == TRUTH_ANDIF_EXPR
+  && single_pred (gimple_bb (inner_cond)) == gimple_bb (outer_cond))
+{
+  /* ??? Does this ever hit?  */
+  gcc_unreachable ();
+  cond2 = TREE_OPERAND (cond, 1);
+  cond = TREE_OPERAND (cond, 0);
+}
+
+  bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond))
+  != gimple_bb (outer_cond));
+  bool result_inv = outer_p ? outer_inv : inner_inv;
 
   if (result_inv)
 cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
@@ -480,6 +536,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
   else if (must_canon)
 return false;
 
+  if (outer_p)
+{
+  {
+   auto_bitmap used;
+   basic_block outer_bb = gimple_bb (outer_cond);
+
+   /* Mark SSA DEFs that are referenced by cond and may thus need to be
+  moved to outer.  */
+   {
+ ifcombine_mark_ssa_name_t data = { used, outer_bb };
+ walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL);
+   }
+
+   if (!bitmap_empty_p (used))
+ 

[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:1070f7874f55ec00c345a3b5c77c8fa0bc0e1612

commit 1070f7874f55ec00c345a3b5c77c8fa0bc0e1612
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Rework ifcombine to support merging conditions from noncontiguous
blocks.  This depends on earlier preparation changes.

The function that attempted to ifcombine a block with its immediate
predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks
eligible for ifcombine, attempting to combine with them.

The function that actually drives the combination of a pair of blocks,
tree_ssa_ifcombine_bb_1, now takes an additional parameter: the
successor of outer that leads to inner.

The function that recognizes if_then_else patterns is modified to
enable testing without distinguishing between then and else, or to
require nondegenerate conditions, that aren't worth combining with.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc (recognize_if_then_else): Support
relaxed then/else testing; require nondegenerate condition
otherwise.
(tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it
instead of inner_cond_bb.  Adjust callers.
(tree_ssa_ifcombine_bb): Loop over dominating outer blocks
eligible for ifcombine.
(pass_tree_ifcombine::execute): Noted potential need for
changes to the post-combine logic.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 140 --
 1 file changed, 111 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 8c6eaaaf5e5c..d84c450db059 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb)
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t->dest == *then_bb && e->dest == *else_bb)
+   || (t->dest == *else_bb && e->dest == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -901,19 +910,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-effects.  */
   if (phi_pred_bb != else_bb
-  && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb)
+  && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb)
   && same_phi_args_p (outer_cond_bb, phi_pred_bb, el

[gcc(refs/users/aoliva/heads/testme)] introduce ifcombine_replace_cond

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:5b4419273e4384451828861958027a82d5c38806

commit 5b4419273e4384451828861958027a82d5c38806
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:26 2024 -0300

introduce ifcombine_replace_cond

Refactor ifcombine_ifandif, moving the common code from the various
paths that apply the combined condition to a new function.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc (ifcombine_replace_cond): Factor out
of...
(ifcombine_ifandif): ... this.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 137 ++
 1 file changed, 65 insertions(+), 72 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 0a2ba970548c..6dcf5e6efe1d 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -399,6 +399,51 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer2->probability = profile_probability::never ();
 }
 
+/* Replace the conditions in INNER_COND with COND.
+   Replace OUTER_COND with a constant.  */
+
+static bool
+ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
+   gcond *outer_cond, bool outer_inv,
+   tree cond, bool must_canon, tree cond2)
+{
+  bool result_inv = inner_inv;
+
+  gcc_checking_assert (!cond2);
+
+  if (result_inv)
+cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
+
+  if (tree tcanon = canonicalize_cond_expr_cond (cond))
+cond = tcanon;
+  else if (must_canon)
+return false;
+
+{
+  if (!is_gimple_condexpr_for_cond (cond))
+   {
+ gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond);
+ cond = force_gimple_operand_gsi_1 (&gsi, cond,
+is_gimple_condexpr_for_cond,
+NULL, true, GSI_SAME_STMT);
+   }
+  gimple_cond_set_condition_from_tree (inner_cond, cond);
+  update_stmt (inner_cond);
+
+  /* Leave CFG optimization to cfg_cleanup.  */
+  gimple_cond_set_condition_from_tree (outer_cond,
+  outer_inv
+  ? boolean_false_node
+  : boolean_true_node);
+  update_stmt (outer_cond);
+}
+
+  update_profile_after_ifcombine (gimple_bb (inner_cond),
+ gimple_bb (outer_cond));
+
+  return true;
+}
+
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
inner_inv, outer_inv indicate whether the conditions are inverted.
@@ -408,7 +453,6 @@ static bool
 ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
   basic_block outer_cond_bb, bool outer_inv)
 {
-  bool result_inv = inner_inv;
   gimple_stmt_iterator gsi;
   tree name1, name2, bit1, bit2, bits1, bits2;
 
@@ -446,26 +490,13 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
   t2 = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t);
   t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
 true, GSI_SAME_STMT);
-  t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR,
-  boolean_type_node, t2, t);
-  t = canonicalize_cond_expr_cond (t);
-  if (!t)
-   return false;
-  if (!is_gimple_condexpr_for_cond (t))
-   {
- gsi = gsi_for_stmt (inner_cond);
- t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond,
- NULL, true, GSI_SAME_STMT);
-   }
-  gimple_cond_set_condition_from_tree (inner_cond, t);
-  update_stmt (inner_cond);
 
-  /* Leave CFG optimization to cfg_cleanup.  */
-  gimple_cond_set_condition_from_tree (outer_cond,
-   outer_inv ? boolean_false_node : boolean_true_node);
-  update_stmt (outer_cond);
+  t = fold_build2 (EQ_EXPR, boolean_type_node, t2, t);
 
-  update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
+  if (!ifcombine_replace_cond (inner_cond, inner_inv,
+  outer_cond, outer_inv,
+  t, true, NULL_TREE))
+   return false;
 
   if (dump_file)
{
@@ -485,9 +516,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  In that case remove the outer test and change the inner one to
  test for name & (bits1 | bits2) != 0.  */
   else if (recognize_bits_test (inner_cond, &name1, &bits1, !inner_inv)
-  && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv))
+  && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv))
 {
-  gimple_stmt_iterator gsi;
   tree t;
 
   if ((TREE_CODE (name1) == SSA_NAME
@@ -530,33 +560,14 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  bits1 = fold_convert (TREE_TYPE (bits2), bits1);
 

[gcc(refs/users/aoliva/heads/testme)] allow vuses in ifcombine blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:683dabf0a585970652c3a217231f6897922ec1b7

commit 683dabf0a585970652c3a217231f6897922ec1b7
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:21 2024 -0300

allow vuses in ifcombine blocks

Disallowing vuses in blocks for ifcombine is too strict, and it
prevents usefully moving fold_truth_andor into ifcombine.  That
tree-level folder has long ifcombined loads, absent other relevant
side effects.


for  gcc/ChangeLog

* tree-ssa-ifcombine.c (bb_no_side_effects_p): Allow vuses,
but not vdefs.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 6a3bc99190d9..ed20a231951a 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb)
   enum tree_code rhs_code;
   if (gimple_has_side_effects (stmt)
  || gimple_could_trap_p (stmt)
- || gimple_vuse (stmt)
+ || gimple_vdef (stmt)
  /* We need to rewrite stmts with undefined overflow to use
 unsigned arithmetic but cannot do so for signed division.  */
  || ((ass = dyn_cast  (stmt))


[gcc r15-4615] Use unique_ptr in more places in pretty_printer/diagnostics: 'gcc/config/gcn/mkoffload.cc' [PR116613

2024-10-24 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:b3aa301db1b09b533b3635791a98d6bf906e9a15

commit r15-4615-gb3aa301db1b09b533b3635791a98d6bf906e9a15
Author: Thomas Schwinge 
Date:   Thu Oct 24 20:56:21 2024 +0200

Use unique_ptr in more places in pretty_printer/diagnostics: 
'gcc/config/gcn/mkoffload.cc' [PR116613]

After recent commit bf43fe6aa966eaf397ea3b8ebd6408d3d124e285
"Use unique_ptr in more places in pretty_printer/diagnostics [PR116613]":

[...]
In file included from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31:0:
../../source-gcc/gcc/diagnostic.h:29:3: error: #error "You must define 
INCLUDE_MEMORY before including system.h to use diagnostic.h"
 # error "You must define INCLUDE_MEMORY before including system.h to 
use diagnostic.h"
   ^
In file included from ../../source-gcc/gcc/diagnostic.h:34:0,
 from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31:
../../source-gcc/gcc/pretty-print.h:29:3: error: #error "You must 
define INCLUDE_MEMORY before including system.h to use pretty-print.h"
 # error "You must define INCLUDE_MEMORY before including system.h to 
use pretty-print.h"
   ^
In file included from ../../source-gcc/gcc/diagnostic.h:34:0,
 from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31:
../../source-gcc/gcc/pretty-print.h:280:16: error: 'unique_ptr' in 
namespace 'std' does not name a template type
   virtual std::unique_ptr clone () const;
^
In file included from ../../source-gcc/gcc/config/gcn/mkoffload.cc:31:0:
../../source-gcc/gcc/diagnostic.h:585:32: error: 'std::unique_ptr' has 
not been declared
   void set_output_format (std::unique_ptr 
output_format);
^
[...]

PR other/116613
gcc/
* config/gcn/mkoffload.cc: Add '#define INCLUDE_MEMORY'.

Diff:
---
 gcc/config/gcn/mkoffload.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index c1d80aae59c7..17a334211347 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -24,6 +24,7 @@
This is not a complete assembler.  We presume the source is well
formed from the compiler and can die horribly if it is not.  */
 
+#define INCLUDE_MEMORY
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"


[gcc/devel/nothrow-detection] Fixed extract_types_for_resx function

2024-10-24 Thread Pranil Dey via Gcc-cvs
https://gcc.gnu.org/g:c756ee328c82211698ddcaf9e3475c763884d7bb

commit c756ee328c82211698ddcaf9e3475c763884d7bb
Author: Pranil Dey 
Date:   Fri Oct 25 00:56:59 2024 +0530

Fixed extract_types_for_resx function

The function was recursive in nature and there is a chance of runnign out 
of stack, so now ann iterative approach was used to get the types for resx

Diff:
---
 gcc/tree-eh.cc | 89 +-
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc
index df90d1bc2049..e62fed90c6dd 100644
--- a/gcc/tree-eh.cc
+++ b/gcc/tree-eh.cc
@@ -3183,60 +3183,59 @@ stmt_throw_types (function *, gimple *stmt, vec 
*ret_vector)
 }
 }
 
-// To get the all exception types from a resx stmt
-static bool
-extract_types_for_resx (basic_block bb, vec *ret_vector)
+// To get the all exception types from a resx stmt (iterative version)
+bool
+extract_types_for_resx (gimple *resx_stmt, vec *ret_vector)
 {
-  edge e;
-  edge_iterator ei;
+  basic_block start_bb = gimple_bb (resx_stmt);
+  hash_set visited_blocks;
+  vec block_stack;
 
-  // Iterate over edges to walk up the basic blocks
-  FOR_EACH_EDGE (e, ei, bb->preds)
-  {
-// Get the last stmt of the basic block as it is an EH stmt
-bb = e->src;
-gimple_stmt_iterator gsi = gsi_last_bb (bb);
-gimple *last_stmt = gsi_stmt (gsi);
+  block_stack.safe_push(start_bb);
 
-if (bb->aux)
+  while (!block_stack.is_empty())
+  {
+basic_block bb = block_stack.pop();
+if (visited_blocks.contains(bb))
   continue;
-bb->aux = (void *)1;
 
-if (last_stmt && (e->flags & EDGE_EH))
+visited_blocks.add(bb);
+
+edge e;
+edge_iterator ei;
+gimple_stmt_iterator gsi = gsi_last_bb(bb);
+gimple *last_stmt = gsi_stmt(gsi);
+
+
+FOR_EACH_EDGE(e, ei, bb->preds)
+{
+  basic_block pred_bb = e->src;
+
+  if (e->flags & EDGE_EH)
   {
-if (gimple_code (last_stmt) == GIMPLE_CALL)
-  {
-// check if its a throw
-if (!extract_types_for_call (as_a (last_stmt),
- ret_vector))
-  return false;
-continue;
-  }
-else if (gimple_code (last_stmt) == GIMPLE_RESX)
-  {
-// Recursively processing resx
-// FIXME: to get this linear, we should cache results.
-if (!extract_types_for_resx (last_stmt, ret_vector))
-  return false;
-continue;
-  }
+gimple_stmt_iterator pred_gsi = gsi_last_bb(pred_bb);
+gimple *pred_last_stmt = gsi_stmt(pred_gsi);
+
+if (gimple_code(pred_last_stmt) == GIMPLE_CALL)
+{
+  if (!extract_types_for_call(as_a(pred_last_stmt), 
ret_vector))
+return false; 
+}
+else if (gimple_code(pred_last_stmt) == GIMPLE_RESX)
+{
+  // Add the predecessor block to the stack for further exploration
+  block_stack.safe_push(pred_bb);
+}
   }
-/* FIXME: remove recursion here, so we do not run out of stack.  */
-else if (!extract_types_for_resx (e->src, ret_vector))
-  return false;
+  else
+  {
+block_stack.safe_push(pred_bb);
+  }
+}
   }
-  return true;
-}
 
-// To get the all exception types from a resx stmt
-bool
-extract_types_for_resx (gimple *resx_stmt, vec *ret_vector)
-{
-  basic_block bb = gimple_bb (resx_stmt);
-  bool ret = extract_types_for_resx (bb, ret_vector);
-  /* FIXME: this is non-linear.  */
-  clear_aux_for_blocks ();
-  return ret;
+  clear_aux_for_blocks();
+  return true;
 }
 
 // To get the types being thrown outside of a function


[gcc/aoliva/heads/testme] (2 commits) ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 a050f30dcecc... ifcombine across noncontiguous blocks

It previously pointed to:

 1070f7874f55... ifcombine across noncontiguous blocks

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  1070f78... ifcombine across noncontiguous blocks
  cb75ab7... extend ifcombine_replace_cond to handle noncontiguous ifcom


Summary of changes (added commits):
---

  a050f30... ifcombine across noncontiguous blocks
  aee55fb... extend ifcombine_replace_cond to handle noncontiguous ifcom


[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:f7a9315f62c2d2bcafb854b6abaaec722907

commit f7a9315f62c2d2bcafb854b6abaaec722907
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 159 +-
 1 file changed, 128 insertions(+), 31 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 6be5d969de88..bd46a5242154 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -50,6 +50,21 @@ along with GCC; see the file COPYING3.  If not see
 false) >= 2)
 #endif
 
+/* Return FALSE iff the COND_BB ends with a conditional whose result is not a
+   known constant.  */
+
+static bool
+known_succ_p (basic_block cond_bb)
+{
+  gcond *cond = safe_dyn_cast  (*gsi_last_bb (cond_bb));
+
+  if (!cond)
+return true;
+
+  return (CONSTANT_CLASS_P (gimple_cond_lhs (cond))
+ && CONSTANT_CLASS_P (gimple_cond_rhs (cond)));
+}
+
 /* This pass combines COND_EXPRs to simplify control flow.  It
currently recognizes bit tests and comparisons in chains that
represent logical and or logical or of two COND_EXPRs.
@@ -70,25 +85,34 @@ along with GCC; see the file COPYING3.  If not see
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t->dest == *then_bb && e->dest == *else_bb)
+   || (t->dest == *else_bb && e->dest == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -390,7 +414,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   gcc_assert (inner_taken->dest == outer2->dest);
 
   if (outer_to_inner_bb == inner_cond_bb
-  && constant_condition_p (outer_cond_bb))
+  && known_succ_p (outer_cond_bb))
 {
   /* Path outer_cond_bb->(outer2) needs to be merged into path
 outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
@@ -414,7 +438,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer_to_inner->probability = profile_probability::always ();
   outer2->probability = profile_probability::never ();
 }
-  else if (constant_condition_p (inner_cond_bb))
+  else if (known_succ_p (inner_cond_bb))
 {
   /* Path inner_cond_bb->(inner_taken) needs to be merged into path
 outer_cond_bb->(outer2).  We've accumulated the probabilities from
@@ -881,19 +905,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-ef

[gcc/aoliva/heads/testme] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 f7a9315f62c2... ifcombine across noncontiguous blocks

It previously pointed to:

 3eb8edeb0ee0... ifcombine across noncontiguous blocks

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  3eb8ede... ifcombine across noncontiguous blocks


Summary of changes (added commits):
---

  f7a9315... ifcombine across noncontiguous blocks


[gcc r15-4594] asan: Fix up build_check_stmt gsi handling [PR117209]

2024-10-24 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:885143fa77599c44bfdd4e8e6b6987b7824db6ba

commit r15-4594-g885143fa77599c44bfdd4e8e6b6987b7824db6ba
Author: Jakub Jelinek 
Date:   Thu Oct 24 12:45:34 2024 +0200

asan: Fix up build_check_stmt gsi handling [PR117209]

gsi_safe_insert_before properly updates gsi_bb in gimple_stmt_iterator
in case it splits objects, but unfortunately build_check_stmt was in
some places (but not others) using a copy of the iterator rather than
the iterator passed from callers and so didn't propagate that to callers.
I guess it didn't matter much before when it was just using
gsi_insert_before as that really didn't change the iterator.
The !before_p case is apparently dead code, nothing is calling it with
before_p=false since around 4.9.

2024-10-24  Jakub Jelinek  

PR sanitizer/117209
* asan.cc (maybe_cast_to_ptrmode): Formatting fix.
(build_check_stmt): Don't copy *iter into gsi, perform all
the updates on iter directly.

* gcc.dg/asan/pr117209.c: New test.

Diff:
---
 gcc/asan.cc  | 14 +-
 gcc/testsuite/gcc.dg/asan/pr117209.c | 15 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index 5f262d54a3ac..bc92d9c7a792 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -2610,7 +2610,7 @@ maybe_cast_to_ptrmode (location_t loc, tree len, 
gimple_stmt_iterator *iter,
   if (ptrofftype_p (len))
 return len;
   gimple *g = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
- NOP_EXPR, len);
+  NOP_EXPR, len);
   gimple_set_location (g, loc);
   if (before_p)
 gsi_safe_insert_before (iter, g);
@@ -2644,16 +2644,13 @@ build_check_stmt (location_t loc, tree base, tree len,
  bool is_non_zero_len, bool before_p, bool is_store,
  bool is_scalar_access, unsigned int align = 0)
 {
-  gimple_stmt_iterator gsi = *iter;
   gimple *g;
 
   gcc_assert (!(size_in_bytes > 0 && !is_non_zero_len));
   gcc_assert (size_in_bytes == -1 || size_in_bytes >= 1);
 
-  gsi = *iter;
-
   base = unshare_expr (base);
-  base = maybe_create_ssa_name (loc, base, &gsi, before_p);
+  base = maybe_create_ssa_name (loc, base, iter, before_p);
 
   if (len)
 {
@@ -2704,12 +2701,11 @@ build_check_stmt (location_t loc, tree base, tree len,
 align / BITS_PER_UNIT));
   gimple_set_location (g, loc);
   if (before_p)
-gsi_safe_insert_before (&gsi, g);
+gsi_safe_insert_before (iter, g);
   else
 {
-  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
-  gsi_next (&gsi);
-  *iter = gsi;
+  gsi_insert_after (iter, g, GSI_NEW_STMT);
+  gsi_next (iter);
 }
 }
 
diff --git a/gcc/testsuite/gcc.dg/asan/pr117209.c 
b/gcc/testsuite/gcc.dg/asan/pr117209.c
new file mode 100644
index ..34c71ba260b3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr117209.c
@@ -0,0 +1,15 @@
+/* PR sanitizer/117209 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=address" } */
+
+struct A { char a; };
+void foo (void);
+__attribute__((returns_twice, const)) int bar (struct A);
+
+void
+baz (struct A *x, int *y, int z)
+{
+  if (z)
+foo (); 
+  *y = bar (*x);
+}


[gcc r15-4595] c++: Further fix for get_member_function_from_ptrfunc [PR117259]

2024-10-24 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:b25d3201b6338d9f71c64f524ca2974d9a1f38e8

commit r15-4595-gb25d3201b6338d9f71c64f524ca2974d9a1f38e8
Author: Jakub Jelinek 
Date:   Thu Oct 24 12:56:19 2024 +0200

c++: Further fix for get_member_function_from_ptrfunc [PR117259]

The following testcase shows that the previous 
get_member_function_from_ptrfunc
changes weren't sufficient and we still have cases where
-fsanitize=undefined with pointers to member functions can cause wrong code
being generated and related false positive warnings.

The problem is that save_expr doesn't always create SAVE_EXPR, it can skip
some invariant arithmetics and in the end it could be really large
expressions which would be evaluated several times (and what is worse, with
-fsanitize=undefined those expressions then can have SAVE_EXPRs added to
their subparts for -fsanitize=bounds or -fsanitize=null or
-fsanitize=alignment instrumentation).  Tried to just build1 a SAVE_EXPR
+ add TREE_SIDE_EFFECTS instead of save_expr, but that doesn't work either,
because cp_fold happily optimizes those SAVE_EXPRs away when it sees
SAVE_EXPR operand is tree_invariant_p.

So, the following patch instead of using save_expr or building SAVE_EXPR
manually builds a TARGET_EXPR.  Both types are pointers, so it doesn't need
to be destroyed in any way, but TARGET_EXPR is what doesn't get optimized
away immediately.

2024-10-24  Jakub Jelinek  

PR c++/117259
* typeck.cc (get_member_function_from_ptrfunc): Use 
force_target_expr
rather than save_expr for instance_ptr and function.  Don't call it
for TREE_CONSTANT.

* g++.dg/ubsan/pr117259.C: New test.

Diff:
---
 gcc/cp/typeck.cc  | 31 +--
 gcc/testsuite/g++.dg/ubsan/pr117259.C | 13 +
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 71d879abef12..bfc0c560c106 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -4193,24 +4193,27 @@ get_member_function_from_ptrfunc (tree 
*instance_ptrptr, tree function,
   if (!nonvirtual && is_dummy_object (instance_ptr))
nonvirtual = true;
 
-  /* Use save_expr even when instance_ptr doesn't have side-effects,
-unless it is a simple decl (save_expr won't do anything on
-constants), so that we don't ubsan instrument the expression
-multiple times.  See PR116449.  */
+  /* Use force_target_expr even when instance_ptr doesn't have
+side-effects, unless it is a simple decl or constant, so
+that we don't ubsan instrument the expression multiple times.
+Don't use save_expr, as save_expr can avoid building a SAVE_EXPR
+and building a SAVE_EXPR manually can be optimized away during
+cp_fold.  See PR116449 and PR117259.  */
   if (TREE_SIDE_EFFECTS (instance_ptr)
- || (!nonvirtual && !DECL_P (instance_ptr)))
-   {
- instance_save_expr = save_expr (instance_ptr);
- if (instance_save_expr == instance_ptr)
-   instance_save_expr = NULL_TREE;
- else
-   instance_ptr = instance_save_expr;
-   }
+ || (!nonvirtual
+ && !DECL_P (instance_ptr)
+ && !TREE_CONSTANT (instance_ptr)))
+   instance_ptr = instance_save_expr
+ = force_target_expr (TREE_TYPE (instance_ptr), instance_ptr,
+  complain);
 
   /* See above comment.  */
   if (TREE_SIDE_EFFECTS (function)
- || (!nonvirtual && !DECL_P (function)))
-   function = save_expr (function);
+ || (!nonvirtual
+ && !DECL_P (function)
+ && !TREE_CONSTANT (function)))
+   function
+ = force_target_expr (TREE_TYPE (function), function, complain);
 
   /* Start by extracting all the information from the PMF itself.  */
   e3 = pfn_from_ptrmemfunc (function);
diff --git a/gcc/testsuite/g++.dg/ubsan/pr117259.C 
b/gcc/testsuite/g++.dg/ubsan/pr117259.C
new file mode 100644
index ..2b7ba56c2a36
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ubsan/pr117259.C
@@ -0,0 +1,13 @@
+// PR c++/117259
+// { dg-do compile }
+// { dg-options "-Wuninitialized -fsanitize=undefined" }
+
+struct A { void foo () {} };
+struct B { void (A::*b) (); B (void (A::*x) ()) : b(x) {}; };
+const B c[1] = { &A::foo };
+
+void
+foo (A *x, int y)
+{
+  (x->*c[y].b) ();
+}


[gcc(refs/users/meissner/heads/work182-sha)] Update ChangeLog.*

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c5a9703abe8ddb40629fce9e098ef5820fdff8e2

commit c5a9703abe8ddb40629fce9e098ef5820fdff8e2
Author: Michael Meissner 
Date:   Thu Oct 24 14:08:35 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 80f083698d2a..a48e8bcf5071 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,3 +1,15 @@
+ Branch work182-sha, patch #412 
+
+Add p-future target-supports.exp
+
+2024-10-24  Michael Meissner  
+
+gcc/testsuite/
+
+   * lib/target-supports.exp (check_effective_target_powerpc_future_ok):
+   New target.
+   (check_effective_target_powerpc_dense_math_ok): Likewise.
+
  Branch work182-sha, patch #411 
 
 Add potential p-future XVRLD and XVRLDI instructions.


[gcc(refs/users/meissner/heads/work182-sha)] Add p-future target-supports.exp

2024-10-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:58f00644caf178cd0b4b0d50ba151b4f156ea03d

commit 58f00644caf178cd0b4b0d50ba151b4f156ea03d
Author: Michael Meissner 
Date:   Thu Oct 24 14:07:22 2024 -0400

Add p-future target-supports.exp

2024-10-24  Michael Meissner  

gcc/testsuite/

* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New target.
(check_effective_target_powerpc_dense_math_ok): Likewise.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index d113a08dff7b..f104f4295d9f 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7366,6 +7366,41 @@ proc check_effective_target_power10_ok { } {
 }
 }
 
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# some potential new instructions.
+proc check_effective_target_powerpc_future_ok { } {
+   return [check_no_compiler_messages powerpc_future_ok object {
+   #ifndef _ARCH_PWR_FUTURE
+   #error "-mcpu=future is not supported"
+   #else
+   int dummy;
+   #endif
+   } "-mcpu=future"]
+}
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future which enables
+# the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+if { ([istarget powerpc*-*-*]) } {
+   return [check_no_compiler_messages powerpc_dense_math_ok object {
+   __vector_quad vq;
+   int main (void) {
+   #ifndef __DENSE_MATH__
+   #error "target does not have dense math support."
+   #else
+   /* Make sure we have dense math support.  */
+ __vector_quad dmr;
+ __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+ vq = dmr;
+   #endif
+   return 0;
+   }
+   } "-mcpu=future"]
+} else {
+   return 0;
+}
+}
+
 # Return 1 if this is a PowerPC target supporting -mfloat128 via either
 # software emulation on power7/power8 systems or hardware support on power9.


[gcc(refs/users/aoliva/heads/testme)] extend ifcombine_replace_cond to handle noncontiguous ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:aee55fb4fdf230ddb0e96564f60e5db527b1a8c4

commit aee55fb4fdf230ddb0e96564f60e5db527b1a8c4
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:30 2024 -0300

extend ifcombine_replace_cond to handle noncontiguous ifcombine

Prepare to handle noncontiguous ifcombine, introducing logic to modify
the outer condition when needed.  There are two cases worth
mentioning:

- when blocks are noncontiguous, we have to place the combined
  condition in the outer block to avoid pessimizing carefully crafted
  short-circuited tests;

- even when blocks are contiguous, we prepare for situations in which
  the combined condition has two tests, one to be placed in outer and
  the other in inner.  This circumstance will not come up when
  noncontiguous ifcombine is first enabled, but it will when
  an improved fold_truth_andor is integrated with ifcombine.

Combining the condition from inner into outer may require moving SSA
DEFs used in the inner condition, and the changes implement this as
well.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc: Include bitmap.h.
(ifcombine_mark_ssa_name): New.
(struct ifcombine_mark_ssa_name_t): New.
(ifcombine_mark_ssa_name_walk): New.
(ifcombine_replace_cond): Prepare to handle noncontiguous and
split-condition ifcombine.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 184 +-
 1 file changed, 180 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index b5b72be29bbf..c271d1e86a9b 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa.h"
 #include "attribs.h"
 #include "asan.h"
+#include "bitmap.h"
 
 #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
 #define LOGICAL_OP_NON_SHORT_CIRCUIT \
@@ -460,17 +461,70 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 }
 }
 
-/* Replace the conditions in INNER_COND with COND.
-   Replace OUTER_COND with a constant.  */
+/* Set NAME's bit in USED if OUTER dominates it.  */
+
+static void
+ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer)
+{
+  if (SSA_NAME_IS_DEFAULT_DEF (name))
+return;
+
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  basic_block bb = gimple_bb (def);
+  if (!dominated_by_p (CDI_DOMINATORS, bb, outer))
+return;
+
+  bitmap_set_bit (used, SSA_NAME_VERSION (name));
+}
+
+/* Data structure passed to ifcombine_mark_ssa_name.  */
+struct ifcombine_mark_ssa_name_t
+{
+  /* SSA_NAMEs that have been referenced.  */
+  bitmap used;
+  /* Dominating block of DEFs that might need moving.  */
+  basic_block outer;
+};
+
+/* Mark in DATA->used any SSA_NAMEs used in *t.  */
+
+static tree
+ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
+{
+  ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_;
+
+  if (*t && TREE_CODE (*t) == SSA_NAME)
+ifcombine_mark_ssa_name (data->used, *t, data->outer);
+
+  return NULL;
+}
+
+/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
+   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
+   replaced with a constant, but if there are intervening blocks, it's best to
+   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
 
 static bool
 ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
gcond *outer_cond, bool outer_inv,
tree cond, bool must_canon, tree cond2)
 {
-  bool result_inv = inner_inv;
+  /* Split cond into cond2 if they're contiguous.  ??? We might be able to
+ handle ORIF as well, inverting both conditions, but it's not clear that
+ this would be enough, and it never comes up.  */
+  if (!cond2
+  && TREE_CODE (cond) == TRUTH_ANDIF_EXPR
+  && single_pred (gimple_bb (inner_cond)) == gimple_bb (outer_cond))
+{
+  /* ??? Does this ever hit?  */
+  gcc_unreachable ();
+  cond2 = TREE_OPERAND (cond, 1);
+  cond = TREE_OPERAND (cond, 0);
+}
 
-  gcc_checking_assert (!cond2);
+  bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond))
+  != gimple_bb (outer_cond));
+  bool result_inv = outer_p ? outer_inv : inner_inv;
 
   if (result_inv)
 cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
@@ -480,6 +534,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
   else if (must_canon)
 return false;
 
+  if (outer_p)
+{
+  {
+   auto_bitmap used;
+   basic_block outer_bb = gimple_bb (outer_cond);
+
+   /* Mark SSA DEFs that are referenced by cond and may thus need to be
+  moved to outer.  */
+   {
+ ifcombine_mark_ssa_name_t data = { used, outer_bb };
+ walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL);
+   }
+

[gcc r15-4603] Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:8d01bc7dc453a44cd5b1913fb8a4104ee56fa410

commit r15-4603-g8d01bc7dc453a44cd5b1913fb8a4104ee56fa410
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:32 2024 +0100

Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule

match.pd had a rule to simplify ((X /[ex] A) +- B) * A -> X +- A * B
when A and B are INTEGER_CSTs.  This patch extends it to handle the
case where the outer multiplication is by a factor of A, not just
A itself.  It also handles addition and multiplication of poly_ints.
(Exact division by a poly_int seems unlikely.)

gcc/
* match.pd: Generalise ((X /[ex] A) +- B) * A -> X +- A * B rule
to ((X /[ex] C1) +- C2) * (C1 * C3) -> (X * C3) +- (C1 * C2 * C3).

gcc/testsuite/
* gcc.dg/tree-ssa/mulexactdiv-5.c: New test.
* gcc.dg/tree-ssa/mulexactdiv-6.c: Likewise.
* gcc.dg/tree-ssa/mulexactdiv-7.c: Likewise.
* gcc.dg/tree-ssa/mulexactdiv-8.c: Likewise.
* gcc.target/aarch64/sve/cnt_fold_3.c: Likewise.

Diff:
---
 gcc/match.pd  | 38 +--
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c | 29 +++
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-6.c | 59 +++
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-7.c | 22 +
 gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-8.c | 20 
 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_3.c | 40 +++
 6 files changed, 194 insertions(+), 14 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index e3dabdc15d5f..b9621a47cdf1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5497,24 +5497,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
optab_vector)))
(eq (trunc_mod @0 @1) { build_zero_cst (TREE_TYPE (@0)); })))
 
-/* ((X /[ex] A) +- B) * A  -->  X +- A * B.  */
+/* ((X /[ex] C1) +- C2) * (C1 * C3)  -->  (X * C3) +- (C1 * C2 * C3).  */
 (for op (plus minus)
  (simplify
-  (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@@1)) 
INTEGER_CST@2)) @1)
-  (if (tree_nop_conversion_p (type, TREE_TYPE (@2))
-   && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2)))
-   (with
- {
-   wi::overflow_type overflow;
-   wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2),
-  TYPE_SIGN (type), &overflow);
- }
+  (mult (convert1? (op (convert2? (exact_div @0 INTEGER_CST@1))
+  poly_int_tree_p@2))
+   poly_int_tree_p@3)
+  (with { poly_widest_int factor; }
+   (if (tree_nop_conversion_p (type, TREE_TYPE (@2))
+   && tree_nop_conversion_p (TREE_TYPE (@0), TREE_TYPE (@2))
+   && multiple_p (wi::to_poly_widest (@3), wi::to_widest (@1), &factor))
+(with
+  {
+   wi::overflow_type overflow;
+wide_int mul;
+  }
  (if (types_match (type, TREE_TYPE (@2))
-&& types_match (TREE_TYPE (@0), TREE_TYPE (@2)) && !overflow)
-  (op @0 { wide_int_to_tree (type, mul); })
+ && types_match (TREE_TYPE (@0), TREE_TYPE (@2))
+ && TREE_CODE (@2) == INTEGER_CST
+ && TREE_CODE (@3) == INTEGER_CST
+ && (mul = wi::mul (wi::to_wide (@2), wi::to_wide (@3),
+TYPE_SIGN (type), &overflow),
+ !overflow))
+  (op (mult @0 { wide_int_to_tree (type, factor); })
+ { wide_int_to_tree (type, mul); })
   (with { tree utype = unsigned_type_for (type); }
-   (convert (op (convert:utype @0)
-   (mult (convert:utype @1) (convert:utype @2))
+   (convert (op (mult (convert:utype @0)
+ { wide_int_to_tree (utype, factor); })
+   (mult (convert:utype @3) (convert:utype @2)))
 
 /* Canonicalization of binary operations.  */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
new file mode 100644
index ..37cd676fff69
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mulexactdiv-5.c
@@ -0,0 +1,29 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+#define TEST_CMP(FN, DIV, ADD, MUL)\
+  int  \
+  FN (int x)   \
+  {\
+if (x & 7) \
+  __builtin_unreachable ();\
+x /= DIV;  \
+x += ADD;  \
+return x * MUL;\
+  }
+
+TEST_CMP (f1, 2, 1, 6)
+TEST_CMP (f2, 2, 2, 10)
+TEST_CMP (f3, 4, 3, 80)
+TEST_CMP (f4, 8, 4, 200)
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
+/* { dg-final { scan-tree-dump-not {> 1, 6)
+TEST_CMP (f2, 2, ~(~0U >> 2), 10)
+
+void
+cmp1 (int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+
+  int y = x / 4;
+  y += (int) (~0U / 3U);
+  y *= 8

[gcc r15-4606] Try to simplify (X >> C1) * (C2 << C1) -> X * C2

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:af19e46c88fd75e31127cde239b8f28d8f9c4040

commit r15-4606-gaf19e46c88fd75e31127cde239b8f28d8f9c4040
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:33 2024 +0100

Try to simplify (X >> C1) * (C2 << C1) -> X * C2

This patch adds a rule to simplify (X >> C1) * (C2 << C1) -> X * C2
when the low C1 bits of X are known to be zero.  As with the earlier
X >> C1 << (C2 + C1) patch, any single conversion is allowed between
the shift and the multiplication.

gcc/
* match.pd: Simplify (X >> C1) * (C2 << C1) -> X * C2 if the
low C1 bits of X are zero.

gcc/testsuite/
* gcc.dg/tree-ssa/shifts-3.c: New test.
* gcc.dg/tree-ssa/shifts-4.c: Likewise.
* gcc.target/aarch64/sve/cnt_fold_5.c: Likewise.

Diff:
---
 gcc/match.pd  | 13 +
 gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c  | 65 +++
 gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c  | 23 
 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c | 38 +
 4 files changed, 139 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 391c60bdfb32..148d0bc65d03 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4919,6 +4919,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && wi::to_widest (@2) >= wi::to_widest (@1)
   && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)))
   (lshift (convert @0) (minus @2 @1
+
+/* (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero.  */
+(simplify
+ (mult (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1))
+   poly_int_tree_p@2)
+ (with { poly_widest_int factor; }
+  (if (INTEGRAL_TYPE_P (type)
+   && wi::ltu_p (wi::to_wide (@1), element_precision (type))
+   && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))
+   && multiple_p (wi::to_poly_widest (@2),
+ widest_int (1) << tree_to_uhwi (@1),
+ &factor))
+   (mult (convert @0) { wide_int_to_tree (type, factor); }
 #endif
 
 /* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
new file mode 100644
index ..dcff518e630d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
@@ -0,0 +1,65 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+unsigned int
+f1 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return x * 20;
+}
+
+unsigned int
+f2 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  unsigned char y = x;
+  y >>= 2;
+  return y * 36;
+}
+
+unsigned long
+f3 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return (unsigned long) x * 88;
+}
+
+int
+f4 (int x)
+{
+  if (x & 15)
+__builtin_unreachable ();
+  x >>= 4;
+  return x * 48;
+}
+
+unsigned int
+f5 (int x)
+{
+  if (x & 31)
+__builtin_unreachable ();
+  x >>= 5;
+  return x * 3200;
+}
+
+unsigned int
+f6 (unsigned int x)
+{
+  if (x & 1)
+__builtin_unreachable ();
+  x >>= 1;
+  return x * (~0U / 3 & -2);
+}
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
+/* { dg-final { scan-tree-dump-not {>= 2;
+  return x * 10;
+}
+
+unsigned int
+f2 (unsigned int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 3;
+  return x * 24;
+}
+
+/* { dg-final { scan-tree-dump-times {
+
+/*
+** f1:
+** ...
+** cntd[^\n]+
+** ...
+** mul [^\n]+
+** ret
+*/
+uint64_t
+f1 (int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return (uint64_t) x * svcnth ();
+}
+
+/*
+** f2:
+** ...
+** asr [^\n]+
+** ...
+** ret
+*/
+uint64_t
+f2 (int x)
+{
+  if (x & 3)
+__builtin_unreachable ();
+  x >>= 2;
+  return (uint64_t) x * svcntw ();
+}


[gcc r15-4601] Use get_nonzero_bits to simplify trunc_div to exact_div

2024-10-24 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:e2e798b86074010a8d5da16ce0b199fcec70a50e

commit r15-4601-ge2e798b86074010a8d5da16ce0b199fcec70a50e
Author: Richard Sandiford 
Date:   Thu Oct 24 14:22:31 2024 +0100

Use get_nonzero_bits to simplify trunc_div to exact_div

There are a limited number of existing rules that benefit from
knowing that a division is exact.  Later patches will add more.

gcc/
* match.pd: Simplify X / (1 << C) to X /[ex] (1 << C) if the
low C bits of X are clear

gcc/testsuite/
* gcc.dg/tree-ssa/cmpexactdiv-6.c: New test.

Diff:
---
 gcc/match.pd  |  9 +
 gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c | 29 +++
 2 files changed, 38 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 9024277e5d34..2e7f06ecbe45 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5456,6 +5456,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
TYPE_PRECISION (type)), 0))
(convert @0)))
 
+#if GIMPLE
+/* X / (1 << C) -> X /[ex] (1 << C) if the low C bits of X are clear.  */
+(simplify
+ (trunc_div (with_possible_nonzero_bits2 @0) integer_pow2p@1)
+ (if (INTEGRAL_TYPE_P (type)
+  && !TYPE_UNSIGNED (type)
+  && wi::multiple_of_p (get_nonzero_bits (@0), wi::to_wide (@1), SIGNED))
+  (exact_div @0 @1)))
+#endif
 
 /* (X /[ex] A) * A -> X.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c
new file mode 100644
index ..82d517b05abd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpexactdiv-6.c
@@ -0,0 +1,29 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+typedef __INTPTR_TYPE__ intptr_t;
+
+int
+f1 (int x, int y)
+{
+  if ((x & 1) || (y & 1))
+__builtin_unreachable ();
+  x /= 2;
+  y /= 2;
+  return x < y;
+}
+
+int
+f2 (void *ptr1, void *ptr2, void *ptr3)
+{
+  ptr1 = __builtin_assume_aligned (ptr1, 4);
+  ptr2 = __builtin_assume_aligned (ptr2, 4);
+  ptr3 = __builtin_assume_aligned (ptr3, 4);
+  intptr_t diff1 = (intptr_t) ptr1 - (intptr_t) ptr2;
+  intptr_t diff2 = (intptr_t) ptr1 - (intptr_t) ptr3;
+  diff1 /= 2;
+  diff2 /= 2;
+  return diff1 < diff2;
+}
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
+/* { dg-final { scan-tree-dump-not {

[gcc r14-10833] c++: remove dg-warning [PR117274]

2024-10-24 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:2d47645cd47c9a84a69343b641a6ee741a85dc75

commit r14-10833-g2d47645cd47c9a84a69343b641a6ee741a85dc75
Author: Jason Merrill 
Date:   Thu Oct 24 11:13:30 2024 -0400

c++: remove dg-warning [PR117274]

This warning was added for GCC 15, don't expect it.

PR c++/117274
PR c++/117107

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/decomp10.C: Remove captured binding warning.

Diff:
---
 gcc/testsuite/g++.dg/cpp2a/decomp10.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp10.C 
b/gcc/testsuite/g++.dg/cpp2a/decomp10.C
index 8fe425b5d01e..cadeee347b4c 100644
--- a/gcc/testsuite/g++.dg/cpp2a/decomp10.C
+++ b/gcc/testsuite/g++.dg/cpp2a/decomp10.C
@@ -12,7 +12,7 @@ struct tuple {
   void check_tuple_like() {
 tuple t;
 auto [v, r] = t; // { dg-warning "structured bindings" "" { target 
c++14_down } }
-(void)[v, r] {   // { dg-warning "captured structured" "" { target 
c++17_down } }
+(void)[v, r] {
 decltype(v) x;
 };
   }


[gcc/aoliva/heads/testme] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 53a0460c1f49... ifcombine across noncontiguous blocks

It previously pointed to:

 a050f30dcecc... ifcombine across noncontiguous blocks

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  a050f30... ifcombine across noncontiguous blocks


Summary of changes (added commits):
---

  53a0460... ifcombine across noncontiguous blocks


[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:53a0460c1f498ae9bd2e4da34bf2c22bd74f1181

commit 53a0460c1f498ae9bd2e4da34bf2c22bd74f1181
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Rework ifcombine to support merging conditions from noncontiguous
blocks.  This depends on earlier preparation changes.

The function that attempted to ifcombine a block with its immediate
predecessor, tree_ssa_ifcombine_bb, now loops over dominating blocks
eligible for ifcombine, attempting to combine with them.

The function that actually drives the combination of a pair of blocks,
tree_ssa_ifcombine_bb_1, now takes an additional parameter: the
successor of outer that leads to inner.

The function that recognizes if_then_else patterns is modified to
enable testing without distinguishing between then and else, or to
require nondegenerate conditions, that aren't worth combining with.


for  gcc/ChangeLog

* tree-ssa-ifcombine.cc (recognize_if_then_else): Support
relaxed then/else testing; require nondegenerate condition
otherwise.
(tree_ssa_ifcombine_bb_1): Add outer_succ_bb parm, use it
instead of inner_cond_bb.  Adjust callers.
(tree_ssa_ifcombine_bb): Loop over dominating outer blocks
eligible for ifcombine.
(pass_tree_ifcombine::execute): Noted potential need for
changes to the post-combine logic.

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 152 +-
 1 file changed, 123 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index c271d1e86a9b..a21cc22cf589 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -85,25 +85,34 @@ known_succ_p (basic_block cond_bb)
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t->dest == *then_bb && e->dest == *else_bb)
+   || (t->dest == *else_bb && e->dest == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -899,19 +908,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-effects.  */
   if (phi_pred_bb != else_bb
-  && recognize_if_then_else (outer_cond_bb, &inner_cond_bb, &else_bb)
+  && recognize_if_then_else (outer_cond_bb, &outer_succ_bb, &else_bb)
   && same_phi_args_p (outer_cond_bb, phi_pred_bb, el

[gcc r15-4612] testsuite: Require effective target pie for pr113197

2024-10-24 Thread Dimitar Dimitrov via Gcc-cvs
https://gcc.gnu.org/g:bcd56224d74cdd8dc3c77097de51e97bc7b6d181

commit r15-4612-gbcd56224d74cdd8dc3c77097de51e97bc7b6d181
Author: Dimitar Dimitrov 
Date:   Thu Oct 24 19:59:42 2024 +0300

testsuite: Require effective target pie for pr113197

The test for PR113197 explicitly enables PIE.  But targets without PIE
emit warnings when -fpie is passed (e.g. pru and avr), which causes the
test to fail.

Fix by adding an effective target requirement for PIE.

With this patch, the test now is marked as unsupported for
pru-unknown-elf.  Testing for x86_64-pc-linux-gnu passes with current
mainline, and fails if the fix from r15-4018-g02f4efe3c12cf7 is
reverted.

PR ipa/113197

gcc/testsuite/ChangeLog:

* gcc.dg/lto/pr113197_0.c: Require effective target pie.

Signed-off-by: Dimitar Dimitrov 

Diff:
---
 gcc/testsuite/gcc.dg/lto/pr113197_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/lto/pr113197_0.c 
b/gcc/testsuite/gcc.dg/lto/pr113197_0.c
index 293c8207dee0..6fd86245d30a 100644
--- a/gcc/testsuite/gcc.dg/lto/pr113197_0.c
+++ b/gcc/testsuite/gcc.dg/lto/pr113197_0.c
@@ -1,4 +1,5 @@
 /* { dg-lto-do link } */
+/* { dg-require-effective-target pie } */
 /* { dg-lto-options { { -O -flto -fpie } } } */
 /* { dg-extra-ld-options { -r -nostdlib -flinker-output=nolto-rel } } */


[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:3eb8edeb0ee01ed4549c7ca3e5705b32688c1cbe

commit 3eb8edeb0ee01ed4549c7ca3e5705b32688c1cbe
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 144 --
 1 file changed, 115 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 6be5d969de88..d0d1889f2511 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -50,6 +50,21 @@ along with GCC; see the file COPYING3.  If not see
 false) >= 2)
 #endif
 
+/* Return FALSE iff the COND_BB ends with a conditional whose result is not a
+   known constant.  */
+
+static bool
+known_succ_p (basic_block cond_bb)
+{
+  gcond *cond = safe_dyn_cast  (*gsi_last_bb (cond_bb));
+
+  if (!cond)
+return true;
+
+  return (CONSTANT_CLASS_P (gimple_cond_lhs (cond))
+ && CONSTANT_CLASS_P (gimple_cond_rhs (cond)));
+}
+
 /* This pass combines COND_EXPRs to simplify control flow.  It
currently recognizes bit tests and comparisons in chains that
represent logical and or logical or of two COND_EXPRs.
@@ -70,25 +85,34 @@ along with GCC; see the file COPYING3.  If not see
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t->dest == *then_bb && e->dest == *else_bb)
+   || (t->dest == *else_bb && e->dest == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -390,7 +414,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   gcc_assert (inner_taken->dest == outer2->dest);
 
   if (outer_to_inner_bb == inner_cond_bb
-  && constant_condition_p (outer_cond_bb))
+  && known_succ_p (outer_cond_bb))
 {
   /* Path outer_cond_bb->(outer2) needs to be merged into path
 outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
@@ -414,7 +438,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer_to_inner->probability = profile_probability::always ();
   outer2->probability = profile_probability::never ();
 }
-  else if (constant_condition_p (inner_cond_bb))
+  else if (known_succ_p (inner_cond_bb))
 {
   /* Path inner_cond_bb->(inner_taken) needs to be merged into path
 outer_cond_bb->(outer2).  We've accumulated the probabilities from
@@ -881,19 +905,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-ef

[gcc r15-4593] SVE intrinsics: Fold svsra with op1 all zeros to svlsr/svasr.

2024-10-24 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:f6fbc0d2422ce9bea6a23226f4a13a76ffd1784b

commit r15-4593-gf6fbc0d2422ce9bea6a23226f4a13a76ffd1784b
Author: Jennifer Schmitz 
Date:   Thu Oct 17 02:31:47 2024 -0700

SVE intrinsics: Fold svsra with op1 all zeros to svlsr/svasr.

A common idiom in intrinsics loops is to have accumulator intrinsics
in an unrolled loop with an accumulator initialized to zero at the 
beginning.
Propagating the initial zero accumulator into the first iteration
of the loop and simplifying the first accumulate instruction is a
desirable transformation that we should teach GCC.
Therefore, this patch folds svsra to svlsr/svasr if op1 is all zeros,
producing the lower latency instructions LSR/ASR instead of USRA/SSRA.
We implemented this optimization in svsra_impl::fold.

Tests were added to check the produced assembly for use of LSR/ASR.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
* config/aarch64/aarch64-sve-builtins-sve2.cc
(svsra_impl::fold): Fold svsra to svlsr/svasr if op1 is all zeros.

gcc/testsuite/
* gcc.target/aarch64/sve2/acle/asm/sra_s32.c: New test.
* gcc.target/aarch64/sve2/acle/asm/sra_s64.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/sra_u32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/sra_u64.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 28 ++
 .../gcc.target/aarch64/sve2/acle/asm/sra_s32.c |  9 +++
 .../gcc.target/aarch64/sve2/acle/asm/sra_s64.c |  9 +++
 .../gcc.target/aarch64/sve2/acle/asm/sra_u32.c |  9 +++
 .../gcc.target/aarch64/sve2/acle/asm/sra_u64.c |  9 +++
 5 files changed, 64 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 6a20a613f832..ddd6e466ee3a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -417,6 +417,34 @@ public:
 
 class svsra_impl : public function_base
 {
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+/* Fold to svlsr/svasr if op1 is all zeros.  */
+tree op1 = gimple_call_arg (f.call, 0);
+if (!integer_zerop (op1))
+  return NULL;
+function_instance instance ("svlsr", functions::svlsr,
+   shapes::binary_uint_opt_n, MODE_n,
+   f.type_suffix_ids, GROUP_none, PRED_x);
+if (!f.type_suffix (0).unsigned_p)
+  {
+   instance.base_name = "svasr";
+   instance.base = functions::svasr;
+  }
+gcall *call = f.redirect_call (instance);
+/* Add a ptrue as predicate, because unlike svsra, svlsr/svasr are
+   predicated intrinsics.  */
+gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ()));
+/* For svsra, the shift amount (imm3) is uint64_t for all function types,
+   but for svlsr/svasr, imm3 has the same width as the function type.  */
+tree imm3 = gimple_call_arg (f.call, 2);
+tree imm3_prec = wide_int_to_tree (f.scalar_type (0),
+  wi::to_widest (imm3));
+gimple_call_set_arg (call, 2, imm3_prec);
+return call;
+  }
 public:
   rtx
   expand (function_expander &e) const override
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c
index ac992dc7b1c6..86cf4bd8137f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s32.c
@@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_32_s32_tied2, svint32_t,
 TEST_UNIFORM_Z (sra_32_s32_untied, svint32_t,
z0 = svsra_n_s32 (z1, z2, 32),
z0 = svsra (z1, z2, 32))
+
+/*
+** sra_2_s32_zeroop1:
+** asr z0\.s, z1\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (sra_2_s32_zeroop1, svint32_t,
+   z0 = svsra_n_s32 (svdup_s32 (0), z1, 2),
+   z0 = svsra (svdup_s32 (0), z1, 2))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c
index 9ea5657ab88d..7b39798ba1d5 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_s64.c
@@ -91,3 +91,12 @@ TEST_UNIFORM_Z (sra_64_s64_tied2, svint64_t,
 TEST_UNIFORM_Z (sra_64_s64_untied, svint64_t,
z0 = svsra_n_s64 (z1, z2, 64),
z0 = svsra (z1, z2, 64))
+
+/*
+** sra_2_s64_zeroop1:
+** asr z0\.d, z1\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (sra_2_s64_zeroop1, svint64_t,
+   z0 = svsra_n_s64 (svdup_s64 (0), z1, 2),
+   z0 = svsra (svdup_s64 (0), z1, 2))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sra_u32.c 
b/gcc/testsuite/gcc.

[gcc/aoliva/heads/testme] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 3eb8edeb0ee0... ifcombine across noncontiguous blocks

It previously pointed to:

 c0ef27f42446... ifcombine across noncontiguous blocks

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  c0ef27f... ifcombine across noncontiguous blocks


Summary of changes (added commits):
---

  3eb8ede... ifcombine across noncontiguous blocks


[gcc/aoliva/heads/testbase] (705 commits) SVE intrinsics: Fold division and multiplication by -1 to n

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 fc40202c1ac5... SVE intrinsics: Fold division and multiplication by -1 to n

It previously pointed to:

 85910e650a61... x86: Extend AVX512 Vectorization for Popcount in Various Mo

Diff:

Summary of changes (added commits):
---

  fc40202... SVE intrinsics: Fold division and multiplication by -1 to n (*)
  90e38c4... SVE intrinsics: Add constant folding for svindex. (*)
  078f7c4... [PATCH] RISC-V: override alignment of function/jump/loop (*)
  a616b7e... libffi: LoongArch: Fix soft-float builds of libffi (*)
  8df549f... testsuite: Fix up pr116488.c and pr117226.c tests [PR116488 (*)
  03b469e... RISC-V: Add testcases for form 4 of signed vector SAT_ADD (*)
  fa546b6... Daily bump. (*)
  820464e... aarch64: Fix warning in aarch64_ptrue_reg (*)
  36e3e68... match: Reject non-const internal functions [PR117260] (*)
  5467f5b... ginclude: stdalign.h should define __xxx_is_defined macros  (*)
  f342d66... top-level: Add pull request template for Forgejo (*)
  779c039... jit: reset state in varasm.cc [PR117275] (*)
  9ffcf1f... aarch64: Improve scalar mode popcount expansion by using SV (*)
  774ad67... Implement operator_pointer_diff::fold_range (*)
  4b0f238... libstdc++: Add -D_GLIBCXX_ASSERTIONS default for -O0 to API (*)
  5a661ec... libstdc++: Add GLIBCXX_TESTSUITE_STDS example to docs (*)
  f565063... diagnostics: implement buffering for non-textual formats [P (*)
  de2dc62... libstdc++: Replace std::__to_address in C++20 branch in  array_type_nelts_minus_one (*)
  f8687bc... libbacktrace: don't get confused by overlapping address ran (*)
  aaa855f... hppa: Fix up pa.opt.urls (*)
  1f07dea... Handle GFC_STD_UNSIGNED like a standard in error messages. (*)
  44a81aa... hppa: Add LRA support (*)
  b039d06... [PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generati (*)
  212d868... [PATCH 2/7] RISC-V: Fix uninitialized reg in memcpy (*)
  f244492... [PATCH 1/7] RISC-V: Fix indentation in riscv_vector::expand (*)
  3a12ac4... i386: Fix the order of operands in andn3 [PR (*)
  d0a9ae1... libstdc++: Reuse std::__assign_one in  ( (*)
  4d8373f... RISC-V: Add testcases for form 4 of vector signed SAT_SUB (*)
  b976292... RISC-V: Add testcases for form 3 of vector signed SAT_SUB (*)
  5920bc8... Match: Support form 3 for vector signed integer SAT_SUB (*)
  72d24d2... RISC-V: Add testcases for form 2 of vector signed SAT_SUB (*)
  5667400... tree-optimization/116290 - fix compare-debug issue in ldist (*)
  b717c46... SH: Fix cost estimation of mem load/store (*)
  7ec8b4b... SH: Add -fno-math-errno to fsca,fsrra tests. (*)
  c21402e... Daily bump. (*)
  90a4851... libstdc++: testsuite: adjust name_fortify test for pre-defi (*)
  27f6b37... libstdc++: Fix ranges::copy_backward for a single memcpyabl (*)
  2ef62aa... MAINTAINERS: Add myself to write after approval (*)
  a4eec6c... Revert "c++: Fix overeager Woverloaded-virtual with convers (*)
  f0f1155... m68k: replace reload_in_progress by reload_in_progress || l (*)
  1506027... tree-optimization/116481 - avoid building function_type[] (*)
  3269a72... Fortran: Use OpenACC's acc_on_device builtin, fix OpenMP' _ (*)
  c38385d... [RISC-V] Avoid unnecessary extensions when value is already (*)
  f08af08... Daily bump. (*)
  d09131e... Unsigned constants for ISO_FORTRAN_ENV and ISO_C_BINDING. (*)
  a9173a5... vect: Fix inconsistency in fully-masked lane-reducing op ge (*)
  f54d42e... tree-optimization/117104 - add missed guards to max(a,b) != (*)
  ba773a8... RISC-V] Slightly improve broadcasting small constants into  (*)
  34b77d1... Fortran/OpenMP: Warn when mapping polymorphic variables (*)
  5cf85a2... bootstrap: Fix genmatch build where system gcc defaults to  (*)
  c1034d7... gcc.target/i386/pr55583.c: Use long long for 64-bit integer (*)
  80d0e10... gcc.target/i386/pr115749.c: Use word_mode integer (*)
  a4ce868... gcc.target/i386/invariant-ternlog-1.c: Also scan (%edx) (*)
  c397a8c... libcpp, genmatch: Use gcc_diag instead of printf for libcpp (*)
  c20c9d8... Fortran: Unify gfc_get_location handling; fix expr->ts bug (*)
  a564261... testsuite/i386: Add vector sat_sub testcases [PR112600] (*)
  2a865ad... MAINTAINERS: Add myself to write after approval (*)
  60163c8... c++: Fix overeager Woverloaded-virtual with conversion oper (*)
  c4af4fe... RISC-V: Add testcases for form 1 of vector signed SAT_SUB (*)
  2a7f490... RISC-V: Implement vector SAT_SUB for signed integer (*)
  d339dbe... Vect: Try the pattern of vector signed integer SAT_SUB (*)
  b4f2fcc... Match: Support form 1 for vector signed integer SAT_SUB (*)
  b45e3ad... Daily bump. (*)
  54b3f8e... Introduce GFC_STD_UNSIGNED. (*)
  c92477e... gcc.target/i386: Replace long with long long (*)
  69f91fb... g++.target/i386/pr105953.C: Skip for x32 (*)
  3ba65a7... gcc.target/i386/pr115407.c: Only run for lp64 (*)
  3b095ac... Fix thinko in previous change (*)
  01c4d1f... libstdc++: Rearrange std::move_it

[gcc(refs/users/aoliva/heads/testme)] introduce ifcombine_replace_cond

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:3e47b273d7c0ebe66a739e2e73c98d99b14cff2a

commit 3e47b273d7c0ebe66a739e2e73c98d99b14cff2a
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:26 2024 -0300

introduce ifcombine_replace_cond

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 130 +-
 1 file changed, 59 insertions(+), 71 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index eae32e1e1999..d2be5e81c671 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -399,6 +399,50 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer2->probability = profile_probability::never ();
 }
 
+/* Replace the conditions in INNER_COND with COND.
+   Replace OUTER_COND with a constant.  */
+
+static tree
+ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
+   gcond *outer_cond, bool outer_inv,
+   tree cond, bool must_canon, tree)
+{
+  tree ret = cond;
+  bool result_inv = inner_inv;
+
+  if (result_inv)
+cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
+
+  if (tree tcanon = canonicalize_cond_expr_cond (cond))
+cond = tcanon;
+  else if (must_canon)
+return NULL_TREE;
+
+{
+  if (!is_gimple_condexpr_for_cond (cond))
+   {
+ gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond);
+ cond = force_gimple_operand_gsi_1 (&gsi, cond,
+is_gimple_condexpr_for_cond,
+NULL, true, GSI_SAME_STMT);
+   }
+  gimple_cond_set_condition_from_tree (inner_cond, cond);
+  update_stmt (inner_cond);
+
+  /* Leave CFG optimization to cfg_cleanup.  */
+  gimple_cond_set_condition_from_tree (outer_cond,
+  outer_inv
+  ? boolean_false_node
+  : boolean_true_node);
+  update_stmt (outer_cond);
+}
+
+  update_profile_after_ifcombine (gimple_bb (inner_cond),
+ gimple_bb (outer_cond));
+
+  return ret;
+}
+
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
inner_inv, outer_inv indicate whether the conditions are inverted.
@@ -445,26 +489,13 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
   t2 = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t);
   t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
 true, GSI_SAME_STMT);
-  t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR,
-  boolean_type_node, t2, t);
-  t = canonicalize_cond_expr_cond (t);
-  if (!t)
-   return false;
-  if (!is_gimple_condexpr_for_cond (t))
-   {
- gsi = gsi_for_stmt (inner_cond);
- t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond,
- NULL, true, GSI_SAME_STMT);
-   }
-  gimple_cond_set_condition_from_tree (inner_cond, t);
-  update_stmt (inner_cond);
 
-  /* Leave CFG optimization to cfg_cleanup.  */
-  gimple_cond_set_condition_from_tree (outer_cond,
-   outer_inv ? boolean_false_node : boolean_true_node);
-  update_stmt (outer_cond);
+  t = fold_build2 (EQ_EXPR, boolean_type_node, t2, t);
 
-  update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
+  if (!ifcombine_replace_cond (inner_cond, inner_inv,
+  outer_cond, outer_inv,
+  t, true, NULL_TREE))
+   return false;
 
   if (dump_file)
{
@@ -484,9 +515,8 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  In that case remove the outer test and change the inner one to
  test for name & (bits1 | bits2) != 0.  */
   else if (recognize_bits_test (inner_cond, &name1, &bits1, !inner_inv)
-  && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv))
+  && recognize_bits_test (outer_cond, &name2, &bits2, !outer_inv))
 {
-  gimple_stmt_iterator gsi;
   tree t;
 
   if ((TREE_CODE (name1) == SSA_NAME
@@ -529,33 +559,14 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  bits1 = fold_convert (TREE_TYPE (bits2), bits1);
}
 
-  /* Do it.  */
-  gsi = gsi_for_stmt (inner_cond);
   t = fold_build2 (BIT_IOR_EXPR, TREE_TYPE (name1), bits1, bits2);
-  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-   true, GSI_SAME_STMT);
   t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t);
-  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-   true, GSI_SAME_STMT);
-  t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR, boolean_type_node, t,
+  t = fold_build2 (EQ_EXPR, boolean_type_node, t,
  

[gcc(refs/users/aoliva/heads/testme)] drop redundant ifcombine_ifandif parm

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:ebc7c0f5cc561a8dd26ab36dec48c2c13b6a9c24

commit ebc7c0f5cc561a8dd26ab36dec48c2c13b6a9c24
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:24 2024 -0300

drop redundant ifcombine_ifandif parm

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index ed20a231951a..eae32e1e1999 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -401,13 +401,12 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
-   inner_inv, outer_inv and result_inv indicate whether the conditions
-   are inverted.
+   inner_inv, outer_inv indicate whether the conditions are inverted.
Returns true if the edges to the common else basic-block were merged.  */
 
 static bool
 ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
-  basic_block outer_cond_bb, bool outer_inv, bool result_inv)
+  basic_block outer_cond_bb, bool outer_inv)
 {
   gimple_stmt_iterator gsi;
   tree name1, name2, bit1, bit2, bits1, bits2;
@@ -693,8 +692,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false,
-   false);
+  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, false);
 }
 
   /* And a version where the outer condition is negated.  */
@@ -711,8 +709,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true,
-   false);
+  return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb, true);
 }
 
   /* The || form is characterized by a common then_bb with the
@@ -731,8 +728,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true,
-   true);
+  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, true);
 }
 
   /* And a version where the outer condition is negated.  */
@@ -748,8 +744,7 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
   
 ...
*/
-  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false,
-   true);
+  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false);
 }
 
   return false;


[gcc(refs/users/aoliva/heads/testme)] ifcombine across noncontiguous blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:c0ef27f424469ce14023e5e65e434ade2ffc1dc0

commit c0ef27f424469ce14023e5e65e434ade2ffc1dc0
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:33 2024 -0300

ifcombine across noncontiguous blocks

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 144 --
 1 file changed, 115 insertions(+), 29 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 6be5d969de88..970556a7801c 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -50,6 +50,21 @@ along with GCC; see the file COPYING3.  If not see
 false) >= 2)
 #endif
 
+/* Return FALSE iff the COND_BB ends with a conditional whose result is not a
+   known constant.  */
+
+static bool
+known_succ_p (basic_block cond_bb)
+{
+  gcond *cond = safe_dyn_cast  (*gsi_last_bb (cond_bb));
+
+  if (!cond)
+return true;
+
+  return (CONSTANT_CLASS_P (gimple_cond_lhs (cond))
+ && CONSTANT_CLASS_P (gimple_cond_rhs (cond)));
+}
+
 /* This pass combines COND_EXPRs to simplify control flow.  It
currently recognizes bit tests and comparisons in chains that
represent logical and or logical or of two COND_EXPRs.
@@ -70,25 +85,34 @@ along with GCC; see the file COPYING3.  If not see
is left to CFG cleanup and DCE.  */
 
 
-/* Recognize a if-then-else CFG pattern starting to match with the
-   COND_BB basic-block containing the COND_EXPR.  The recognized
-   then end else blocks are stored to *THEN_BB and *ELSE_BB.  If
-   *THEN_BB and/or *ELSE_BB are already set, they are required to
-   match the then and else basic-blocks to make the pattern match.
-   Returns true if the pattern matched, false otherwise.  */
+/* Recognize a if-then-else CFG pattern starting to match with the COND_BB
+   basic-block containing the COND_EXPR.  If !SUCCS_ANY, the condition must not
+   resolve to a constant for a match.  Returns true if the pattern matched,
+   false otherwise.  In case of a !SUCCS_ANY match, the recognized then end
+   else blocks are stored to *THEN_BB and *ELSE_BB.  If *THEN_BB and/or
+   *ELSE_BB are already set, they are required to match the then and else
+   basic-blocks to make the pattern match.  If SUCCS_ANY, *THEN_BB and *ELSE_BB
+   will not be filled in, and they will be found to match even if reversed.  */
 
 static bool
 recognize_if_then_else (basic_block cond_bb,
-   basic_block *then_bb, basic_block *else_bb)
+   basic_block *then_bb, basic_block *else_bb,
+   bool succs_any = false)
 {
   edge t, e;
 
-  if (EDGE_COUNT (cond_bb->succs) != 2)
+  if (EDGE_COUNT (cond_bb->succs) != 2
+  || (!succs_any && known_succ_p (cond_bb)))
 return false;
 
   /* Find the then/else edges.  */
   t = EDGE_SUCC (cond_bb, 0);
   e = EDGE_SUCC (cond_bb, 1);
+
+  if (succs_any)
+return ((t == *then_bb && e == *else_bb)
+   || (t == *else_bb && e == *then_bb));
+
   if (!(t->flags & EDGE_TRUE_VALUE))
 std::swap (t, e);
   if (!(t->flags & EDGE_TRUE_VALUE)
@@ -390,7 +414,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   gcc_assert (inner_taken->dest == outer2->dest);
 
   if (outer_to_inner_bb == inner_cond_bb
-  && constant_condition_p (outer_cond_bb))
+  && known_succ_p (outer_cond_bb))
 {
   /* Path outer_cond_bb->(outer2) needs to be merged into path
 outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
@@ -414,7 +438,7 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer_to_inner->probability = profile_probability::always ();
   outer2->probability = profile_probability::never ();
 }
-  else if (constant_condition_p (inner_cond_bb))
+  else if (known_succ_p (inner_cond_bb))
 {
   /* Path inner_cond_bb->(inner_taken) needs to be merged into path
 outer_cond_bb->(outer2).  We've accumulated the probabilities from
@@ -881,19 +905,21 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
 /* Helper function for tree_ssa_ifcombine_bb.  Recognize a CFG pattern and
dispatch to the appropriate if-conversion helper for a particular
set of INNER_COND_BB, OUTER_COND_BB, THEN_BB and ELSE_BB.
-   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.  */
+   PHI_PRED_BB should be one of INNER_COND_BB, THEN_BB or ELSE_BB.
+   OUTER_SUCC_BB is the successor of OUTER_COND_BB on the path towards
+   INNER_COND_BB.  */
 
 static bool
 tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb,
 basic_block then_bb, basic_block else_bb,
-basic_block phi_pred_bb)
+basic_block phi_pred_bb, basic_block outer_succ_bb)
 {
   /* The && form is characterized by a common else_bb with
  the two edges leading to it mergable.  The latter is
  guaranteed by matching PHI arguments in the else_bb and
  the inner cond_bb having no side-effects.  */
   if (phi_pr

[gcc(refs/users/aoliva/heads/testme)] introduce ifcombine_replace_cond

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:11bf09a22577a9ed775a3a47a70afe5ee063d072

commit 11bf09a22577a9ed775a3a47a70afe5ee063d072
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:30 2024 -0300

introduce ifcombine_replace_cond

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 187 +-
 1 file changed, 183 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index d9595132512f..6be5d969de88 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa.h"
 #include "attribs.h"
 #include "asan.h"
+#include "bitmap.h"
 
 #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
 #define LOGICAL_OP_NON_SHORT_CIRCUIT \
@@ -445,16 +446,72 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 }
 }
 
-/* Replace the conditions in INNER_COND with COND.
-   Replace OUTER_COND with a constant.  */
+/* Set NAME's bit in USED if OUTER dominates it.  */
+
+static void
+ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer)
+{
+  if (SSA_NAME_IS_DEFAULT_DEF (name))
+return;
+
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  basic_block bb = gimple_bb (def);
+  if (!dominated_by_p (CDI_DOMINATORS, bb, outer))
+return;
+
+  bitmap_set_bit (used, SSA_NAME_VERSION (name));
+}
+
+/* Data structure passed to ifcombine_mark_ssa_name.  */
+struct ifcombine_mark_ssa_name_t
+{
+  /* SSA_NAMEs that have been referenced.  */
+  bitmap used;
+  /* Dominating block of DEFs that might need moving.  */
+  basic_block outer;
+};
+
+/* Mark in DATA->used any SSA_NAMEs used in *t.  */
+
+static tree
+ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
+{
+  ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_;
+
+  if (*t && TREE_CODE (*t) == SSA_NAME)
+ifcombine_mark_ssa_name (data->used, *t, data->outer);
+
+  return NULL;
+}
+
+/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
+   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
+   replaced with a constant, but if there are intervening blocks, it's best to
+   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
 
 static tree
 ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
gcond *outer_cond, bool outer_inv,
-   tree cond, bool must_canon, tree)
+   tree cond, bool must_canon, tree cond2)
 {
   tree ret = cond;
-  bool result_inv = inner_inv;
+  if (cond2)
+ret = fold_build2 (TRUTH_AND_EXPR, TREE_TYPE (ret), ret, cond2);
+
+  /* Split cond into cond2 if they're contiguous.  ??? We might be able to
+ handle ORIF as well, inverting both conditions, but it's not clear that
+ this would be enough, and it never comes up.  */
+  if (!cond2
+  && TREE_CODE (cond) == TRUTH_ANDIF_EXPR
+  && single_pred (gimple_bb (inner_cond)) == gimple_bb (outer_cond))
+{
+  cond2 = TREE_OPERAND (cond, 1);
+  cond = TREE_OPERAND (cond, 0);
+}
+
+  bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond))
+  != gimple_bb (outer_cond));
+  bool result_inv = outer_p ? outer_inv : inner_inv;
 
   if (result_inv)
 cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
@@ -464,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
   else if (must_canon)
 return NULL_TREE;
 
+  if (outer_p)
+{
+  {
+   auto_bitmap used;
+   basic_block outer_bb = gimple_bb (outer_cond);
+
+   /* Mark SSA DEFs that are referenced by cond and may thus need to be
+  moved to outer.  */
+   {
+ ifcombine_mark_ssa_name_t data = { used, outer_bb };
+ walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL);
+   }
+
+   if (!bitmap_empty_p (used))
+ {
+   /* Iterate up from inner_cond, moving DEFs identified as used by
+  cond, and marking USEs in the DEFs for moving as well.  */
+   gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond);
+   for (basic_block bb = gimple_bb (inner_cond);
+bb != outer_bb; bb = single_pred (bb))
+ {
+   for (gimple_stmt_iterator gsitr = gsi_last_bb (bb);
+!gsi_end_p (gsitr); gsi_prev (&gsitr))
+ {
+   gimple *stmt = gsi_stmt (gsitr);
+   bool move = false;
+   tree t;
+   ssa_op_iter it;
+
+   FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_DEF)
+ if (bitmap_bit_p (used, SSA_NAME_VERSION (t)))
+   {
+ move = true;
+ break;
+   }
+
+   if (!move)
+ continue;
+
+   /* Mark uses in STMT before moving it.  */
+   FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, 

[gcc(refs/users/aoliva/heads/testme)] allow vuses in ifcombine blocks

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:6c0e7c5e3db8ae23e01f17fbe37282cac27cc5b7

commit 6c0e7c5e3db8ae23e01f17fbe37282cac27cc5b7
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:21 2024 -0300

allow vuses in ifcombine blocks

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 6a3bc99190d9..ed20a231951a 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb)
   enum tree_code rhs_code;
   if (gimple_has_side_effects (stmt)
  || gimple_could_trap_p (stmt)
- || gimple_vuse (stmt)
+ || gimple_vdef (stmt)
  /* We need to rewrite stmts with undefined overflow to use
 unsigned arithmetic but cannot do so for signed division.  */
  || ((ass = dyn_cast  (stmt))


[gcc(refs/users/aoliva/heads/testme)] adjust update_profile_after_ifcombine for noncontiguous ifcombine

2024-10-24 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:d586ea36dabe213c3d378fc8ed699ecba7b01e66

commit d586ea36dabe213c3d378fc8ed699ecba7b01e66
Author: Alexandre Oliva 
Date:   Thu Oct 24 05:25:28 2024 -0300

adjust update_profile_after_ifcombine for noncontiguous ifcombine

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 94 +++
 1 file changed, 70 insertions(+), 24 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index d2be5e81c671..d9595132512f 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -356,14 +356,28 @@ recognize_bits_test (gcond *cond, tree *name, tree *bits, 
bool inv)
 }
 
 
-/* Update profile after code in outer_cond_bb was adjusted so
-   outer_cond_bb has no condition.  */
+/* Update profile after code in either outer_cond_bb or inner_cond_bb was
+   adjusted so that it has no condition.  */
 
 static void
 update_profile_after_ifcombine (basic_block inner_cond_bb,
basic_block outer_cond_bb)
 {
-  edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb);
+  /* In the following we assume that inner_cond_bb has single predecessor.  */
+  gcc_assert (single_pred_p (inner_cond_bb));
+
+  basic_block outer_to_inner_bb = inner_cond_bb;
+  profile_probability prob = profile_probability::always ();
+  for (;;)
+{
+  basic_block parent = single_pred (outer_to_inner_bb);
+  prob *= find_edge (parent, outer_to_inner_bb)->probability;
+  if (parent == outer_cond_bb)
+   break;
+  outer_to_inner_bb = parent;
+}
+
+  edge outer_to_inner = find_edge (outer_cond_bb, outer_to_inner_bb);
   edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner
 ? EDGE_SUCC (outer_cond_bb, 1)
 : EDGE_SUCC (outer_cond_bb, 0));
@@ -374,29 +388,61 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
 std::swap (inner_taken, inner_not_taken);
   gcc_assert (inner_taken->dest == outer2->dest);
 
-  /* In the following we assume that inner_cond_bb has single predecessor.  */
-  gcc_assert (single_pred_p (inner_cond_bb));
-
-  /* Path outer_cond_bb->(outer2) needs to be merged into path
- outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
- and probability of inner_not_taken updated.  */
-
-  inner_cond_bb->count = outer_cond_bb->count;
+  if (outer_to_inner_bb == inner_cond_bb
+  && constant_condition_p (outer_cond_bb))
+{
+  /* Path outer_cond_bb->(outer2) needs to be merged into path
+outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
+and probability of inner_not_taken updated.  */
+
+  inner_cond_bb->count = outer_cond_bb->count;
+
+  /* Handle special case where inner_taken probability is always. In this
+case we know that the overall outcome will be always as well, but
+combining probabilities will be conservative because it does not know
+that outer2->probability is inverse of
+outer_to_inner->probability.  */
+  if (inner_taken->probability == profile_probability::always ())
+   ;
+  else
+   inner_taken->probability = outer2->probability
+ + outer_to_inner->probability * inner_taken->probability;
+  inner_not_taken->probability = profile_probability::always ()
+   - inner_taken->probability;
 
-  /* Handle special case where inner_taken probability is always. In this case
- we know that the overall outcome will be always as well, but combining
- probabilities will be conservative because it does not know that
- outer2->probability is inverse of outer_to_inner->probability.  */
-  if (inner_taken->probability == profile_probability::always ())
-;
+  outer_to_inner->probability = profile_probability::always ();
+  outer2->probability = profile_probability::never ();
+}
+  else if (constant_condition_p (inner_cond_bb))
+{
+  /* Path inner_cond_bb->(inner_taken) needs to be merged into path
+outer_cond_bb->(outer2).  We've accumulated the probabilities from
+outer_cond_bb->(outer)->...->inner_cond_bb in prob, so we have to
+adjust that by inner_taken, and make inner unconditional.  */
+
+  prob *= inner_taken->probability;
+  outer2->probability += prob;
+  outer_to_inner->probability = profile_probability::always ()
+   - outer2->probability;
+
+  inner_taken->probability = profile_probability::never ();
+  inner_not_taken->probability = profile_probability::always ();
+}
   else
-inner_taken->probability = outer2->probability + 
outer_to_inner->probability
-  * inner_taken->probability;
-  inner_not_taken->probability = profile_probability::always ()
-- inner_taken->probability;
-
-  outer_to_inner->probability = profile_probability::always ();
-  outer2->probability = profile_probability::never ();
+{
+  /* We've moved part of the inner cond to outer, but we d

[gcc r15-4590] SVE intrinsics: Add constant folding for svindex.

2024-10-24 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:90e38c4ffad086a82635e8ea9bf0e7e9e02f1ff7

commit r15-4590-g90e38c4ffad086a82635e8ea9bf0e7e9e02f1ff7
Author: Jennifer Schmitz 
Date:   Tue Oct 15 07:58:14 2024 -0700

SVE intrinsics: Add constant folding for svindex.

This patch folds svindex with constant arguments into a vector series.
We implemented this in svindex_impl::fold using the function 
build_vec_series.
For example,
svuint64_t f1 ()
{
  return svindex_u642 (10, 3);
}
compiled with -O2 -march=armv8.2-a+sve, is folded to {10, 13, 16, ...}
in the gimple pass lower.
This optimization benefits cases where svindex is used in combination with
other gimple-level optimizations.
For example,
svuint64_t f2 ()
{
return svmul_x (svptrue_b64 (), svindex_u64 (10, 3), 5);
}
has previously been compiled to
f2:
index   z0.d, #10, #3
mul z0.d, z0.d, #5
ret
Now, it is compiled to
f2:
mov x0, 50
index   z0.d, x0, #15
ret

We added test cases checking
- the application of the transform during gimple for constant arguments,
- the interaction with another gimple-level optimization.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
* config/aarch64/aarch64-sve-builtins-base.cc
(svindex_impl::fold): Add constant folding.

gcc/testsuite/
* gcc.target/aarch64/sve/index_const_fold.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc| 14 +
 .../gcc.target/aarch64/sve/index_const_fold.c  | 35 ++
 2 files changed, 49 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 70bd83005d7c..e47acb67aeea 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1301,6 +1301,20 @@ public:
 
 class svindex_impl : public function_base
 {
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+/* Apply constant folding if base and step are integer constants.  */
+tree vec_type = TREE_TYPE (f.lhs);
+tree base = gimple_call_arg (f.call, 0);
+tree step = gimple_call_arg (f.call, 1);
+if (TREE_CODE (base) != INTEGER_CST || TREE_CODE (step) != INTEGER_CST)
+  return NULL;
+return gimple_build_assign (f.lhs,
+   build_vec_series (vec_type, base, step));
+  }
+
 public:
   rtx
   expand (function_expander &e) const override
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c 
b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c
new file mode 100644
index ..7abb803f58ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include 
+#include 
+
+#define INDEX_CONST(TYPE, TY)  \
+  sv##TYPE f_##TY##_index_const () \
+  {\
+return svindex_##TY (10, 3);   \
+  }
+
+#define MULT_INDEX(TYPE, TY)   \
+  sv##TYPE f_##TY##_mult_index ()  \
+  {\
+return svmul_x (svptrue_b8 (), \
+   svindex_##TY (10, 3),   \
+   5); \
+  }
+
+#define ALL_TESTS(TYPE, TY)\
+  INDEX_CONST (TYPE, TY)   \
+  MULT_INDEX (TYPE, TY)
+
+ALL_TESTS (uint8_t, u8)
+ALL_TESTS (uint16_t, u16)
+ALL_TESTS (uint32_t, u32)
+ALL_TESTS (uint64_t, u64)
+ALL_TESTS (int8_t, s8)
+ALL_TESTS (int16_t, s16)
+ALL_TESTS (int32_t, s32)
+ALL_TESTS (int64_t, s64)
+
+/* { dg-final { scan-tree-dump-times "return \\{ 10, 13, 16, ... \\}" 8 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times "return \\{ 50, 65, 80, ... \\}" 8 
"optimized" } } */


[gcc r15-4591] SVE intrinsics: Fold division and multiplication by -1 to neg

2024-10-24 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:fc40202c1ac5d585bb236cdaf3a3968927e970a0

commit r15-4591-gfc40202c1ac5d585bb236cdaf3a3968927e970a0
Author: Jennifer Schmitz 
Date:   Tue Oct 1 08:01:13 2024 -0700

SVE intrinsics: Fold division and multiplication by -1 to neg

Because a neg instruction has lower latency and higher throughput than
sdiv and mul, svdiv and svmul by -1 can be folded to svneg. For svdiv,
this is already implemented on the RTL level; for svmul, the
optimization was still missing.
This patch implements folding to svneg for both operations using the
gimple_folder. For svdiv, the transform is applied if the divisor is -1.
Svmul is folded if either of the operands is -1. A case distinction of
the predication is made to account for the fact that svneg_m has 3 arguments
(argument 0 holds the values for the inactive lanes), while svneg_x and
svneg_z have only 2 arguments.
Tests were added or adjusted to check the produced assembly and runtime
tests were added to check correctness.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
* config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold):
Fold division by -1 to svneg.
(svmul_impl::fold): Fold multiplication by -1 to svneg.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/asm/div_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/mul_s16.c: Adjust expected 
outcome.
* gcc.target/aarch64/sve/acle/asm/mul_s32.c: New test.
* gcc.target/aarch64/sve/acle/asm/mul_s64.c: Adjust expected 
outcome.
* gcc.target/aarch64/sve/acle/asm/mul_s8.c: Likewise.
* gcc.target/aarch64/sve/div_const_run.c: New test.
* gcc.target/aarch64/sve/mul_const_run.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc| 73 ++
 .../gcc.target/aarch64/sve/acle/asm/div_s32.c  | 59 +
 .../gcc.target/aarch64/sve/acle/asm/mul_s16.c  |  5 +-
 .../gcc.target/aarch64/sve/acle/asm/mul_s32.c  | 46 +-
 .../gcc.target/aarch64/sve/acle/asm/mul_s64.c  |  5 +-
 .../gcc.target/aarch64/sve/acle/asm/mul_s8.c   |  7 +--
 .../gcc.target/aarch64/sve/div_const_run.c | 10 ++-
 .../gcc.target/aarch64/sve/mul_const_run.c | 10 ++-
 8 files changed, 187 insertions(+), 28 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index e47acb67aeea..327688756d1b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -768,6 +768,27 @@ public:
 if (integer_zerop (op1) || integer_zerop (op2))
   return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
 
+/* If the divisor is all integer -1, fold to svneg.  */
+tree pg = gimple_call_arg (f.call, 0);
+if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2))
+  {
+   function_instance instance ("svneg", functions::svneg,
+   shapes::unary, MODE_none,
+   f.type_suffix_ids, GROUP_none, f.pred);
+   gcall *call = f.redirect_call (instance);
+   unsigned offset_index = 0;
+   if (f.pred == PRED_m)
+ {
+   offset_index = 1;
+   gimple_call_set_arg (call, 0, op1);
+ }
+   else
+ gimple_set_num_ops (call, 5);
+   gimple_call_set_arg (call, offset_index, pg);
+   gimple_call_set_arg (call, offset_index + 1, op1);
+   return call;
+  }
+
 /* If the divisor is a uniform power of 2, fold to a shift
instruction.  */
 tree op2_cst = uniform_integer_cst_p (op2);
@@ -2047,12 +2068,37 @@ public:
 if (integer_zerop (op1) || integer_zerop (op2))
   return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
 
+/* If one of the operands is all integer -1, fold to svneg.  */
+tree pg = gimple_call_arg (f.call, 0);
+tree negated_op = NULL;
+if (integer_minus_onep (op2))
+  negated_op = op1;
+else if (integer_minus_onep (op1))
+  negated_op = op2;
+if (!f.type_suffix (0).unsigned_p && negated_op)
+  {
+   function_instance instance ("svneg", functions::svneg,
+   shapes::unary, MODE_none,
+   f.type_suffix_ids, GROUP_none, f.pred);
+   gcall *call = f.redirect_call (instance);
+   unsigned offset_index = 0;
+   if (f.pred == PRED_m)
+ {
+   offset_index = 1;
+   gimple_call_set_arg (call, 0, op1);
+ }
+   else
+ gimple_set_num_ops (call, 5);
+   gimple_call_set_arg (call, offset_index, pg);
+   gimple_call_set_arg (call, offset_index + 1, negated_op);
+   return call;
+  }
+
 /* If one of