[gcc r15-3137] ifcvt: disallow call instructions in noce_convert_multiple_sets [PR116358]

2024-08-23 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:c9e2d0ec6eabc2a6b8c00984b2b2bc48565bb99b

commit r15-3137-gc9e2d0ec6eabc2a6b8c00984b2b2bc48565bb99b
Author: Manolis Tsamis 
Date:   Thu Aug 22 02:59:11 2024 -0700

ifcvt: disallow call instructions in noce_convert_multiple_sets [PR116358]

Similar to not allowing jump instructions in the generated code, we
also shouldn't allow call instructions in noce_convert_multiple_sets.
In the case of PR116358 a libcall was generated from force_operand.

PR middle-end/116358

gcc/ChangeLog:

* ifcvt.cc (noce_convert_multiple_sets): Disallow call insns.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr116358.c: New test.

Diff:
---
 gcc/ifcvt.cc|  2 +-
 gcc/testsuite/gcc.target/aarch64/pr116358.c | 15 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index da59c9078919..b136d7dbbba3 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3550,7 +3550,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 return false;
 
   for (insn = seq; insn; insn = NEXT_INSN (insn))
-if (JUMP_P (insn)
+if (JUMP_P (insn) || CALL_P (insn)
|| recog_memoized (insn) == -1)
   return false;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116358.c 
b/gcc/testsuite/gcc.target/aarch64/pr116358.c
new file mode 100644
index ..0a5fd9e02b92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116358.c
@@ -0,0 +1,15 @@
+/* PR middle-end/116358 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long f(int b, int c, long long d)
+{
+  if (c) {
+long long bb = b;
+long long t2 = (bb < 16 ? bb : 16);
+d =  t2 - 16;
+  }
+  return d;
+}
+
+/* { dg-final { scan-assembler-not "bl" } } */


[gcc r15-3138] ifcvt: Do not overwrite results in noce_convert_multiple_sets [PR116372, PR116405]

2024-08-23 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:a9f5e23aba1a6f4ec32f1147b340a8145d827da9

commit r15-3138-ga9f5e23aba1a6f4ec32f1147b340a8145d827da9
Author: Manolis Tsamis 
Date:   Tue Aug 20 09:16:29 2024 +0200

ifcvt: Do not overwrite results in noce_convert_multiple_sets [PR116372, 
PR116405]

Now that more operations are allowed for noce_convert_multiple_sets,
it is possible that the same register appears multiple times as target
in a basic block.  After noce_convert_multiple_sets_1 is called we
potentially also emit register moves from temporaries back to the
original targets.  In some cases where the target registers overlap
with the block's condition, these register moves may overwrite
intermediate variables because they're emitted after the if-converted
code.  To address this issue we now iterate backwards and keep track
of seen registers when emitting these final register moves.

PR rtl-optimization/116372
PR rtl-optimization/116405

gcc/ChangeLog:

* ifcvt.cc (noce_convert_multiple_sets): Iterate backwards and track
target registers.

gcc/testsuite/ChangeLog:

* gcc.dg/pr116372.c: New test.
* gcc.dg/pr116405.c: New test.

Diff:
---
 gcc/ifcvt.cc| 22 ++
 gcc/testsuite/gcc.dg/pr116372.c | 13 +
 gcc/testsuite/gcc.dg/pr116405.c | 17 +
 3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index b136d7dbbba3..6487574c5149 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3515,10 +3515,24 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
  given an empty BB to convert, and we can't handle that.  */
   gcc_assert (!insn_info.is_empty ());
 
-  /* Now fixup the assignments.  */
-  for (unsigned i = 0; i < insn_info.length (); i++)
-if (insn_info[i]->target != insn_info[i]->temporary)
-  noce_emit_move_insn (insn_info[i]->target, insn_info[i]->temporary);
+  /* Now fixup the assignments.
+ PR116405: Iterate in reverse order and keep track of the targets so that
+ a move does not overwrite a subsequent value when multiple instructions
+ have the same target.  */
+  unsigned i;
+  noce_multiple_sets_info *info;
+  bitmap set_targets = BITMAP_ALLOC (®_obstack);
+  FOR_EACH_VEC_ELT_REVERSE (insn_info, i, info)
+{
+  gcc_checking_assert (REG_P (info->target));
+
+  if (info->target != info->temporary
+ && !bitmap_bit_p (set_targets, REGNO (info->target)))
+   noce_emit_move_insn (info->target, info->temporary);
+
+  bitmap_set_bit (set_targets, REGNO (info->target));
+}
+  BITMAP_FREE (set_targets);
 
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
diff --git a/gcc/testsuite/gcc.dg/pr116372.c b/gcc/testsuite/gcc.dg/pr116372.c
new file mode 100644
index ..e9878ac5042b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116372.c
@@ -0,0 +1,13 @@
+/* PR rtl-optimization/116372 */
+/* { dg-do run } */
+/* { dg-options "-O1" } */ 
+/* { dg-additional-options "-march=z13" { target s390x-*-* } } */
+
+long x = -0x7fff - 1;
+int main (void)
+{
+  long y = x % (-0xf - 1);
+  if (-0x7fff - 1 + y == x == 0)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr116405.c b/gcc/testsuite/gcc.dg/pr116405.c
new file mode 100644
index ..9223f15a2987
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116405.c
@@ -0,0 +1,17 @@
+/* PR rtl-optimization/116405 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-ssa-phiopt -fno-tree-dce" } */ 
+
+int printf(const char *, ...);
+int a, b = 2, c = 1;
+unsigned d, e;
+int main() {
+ L:
+  a = -1 / c;
+  d = ~(b && (c && ~e) & b);
+  printf("0\n");
+  c = 0;
+  if (d != -1)
+goto L;
+  return 0;
+}


[gcc r15-2889] ifcvt: handle sequences that clobber flags in noce_convert_multiple_sets

2024-08-12 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:28b3812c9d81203ae3d6a5350d8f828f4e659e50

commit r15-2889-g28b3812c9d81203ae3d6a5350d8f828f4e659e50
Author: Manolis Tsamis 
Date:   Fri Jun 30 13:06:42 2023 +0200

ifcvt: handle sequences that clobber flags in noce_convert_multiple_sets

This is an extension of what was done in PR106590.

Currently if a sequence generated in noce_convert_multiple_sets clobbers the
condition rtx (cc_cmp or rev_cc_cmp) then only seq1 is used afterwards
(sequences that emit the comparison itself). Since this applies only from 
the
next iteration it assumes that the sequences generated (in particular seq2)
doesn't clobber the condition rtx itself before using it in the 
if_then_else,
which is only true in specific cases (currently only register/subregister 
moves
are allowed).

This patch changes this so it also tests if seq2 clobbers cc_cmp/rev_cc_cmp 
in
the current iteration. It also checks whether the resulting sequence 
clobbers
the condition attached to the jump. This makes it possible to include 
arithmetic
operations in noce_convert_multiple_sets.

It also makes the code that checks whether the condition is used outside of 
the
if_then_else emitted more robust.

gcc/ChangeLog:

* ifcvt.cc (check_for_cc_cmp_clobbers): Use modified_in_p instead.
(noce_convert_multiple_sets_1): Don't use seq2 if it clobbers 
cc_cmp.
Punt if seq clobbers cond. Refactor the code that sets 
read_comparison.

Diff:
---
 gcc/ifcvt.cc | 127 +--
 1 file changed, 79 insertions(+), 48 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 58ed42673e5b..58c34aaf1ee4 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3592,20 +3592,6 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   return true;
 }
 
-/* Helper function for noce_convert_multiple_sets_1.  If store to
-   DEST can affect P[0] or P[1], clear P[0].  Called via note_stores.  */
-
-static void
-check_for_cc_cmp_clobbers (rtx dest, const_rtx, void *p0)
-{
-  rtx *p = (rtx *) p0;
-  if (p[0] == NULL_RTX)
-return;
-  if (reg_overlap_mentioned_p (dest, p[0])
-  || (p[1] && reg_overlap_mentioned_p (dest, p[1])))
-p[0] = NULL_RTX;
-}
-
 /* This goes through all relevant insns of IF_INFO->then_bb and tries to
create conditional moves.  In case a simple move sufficis the insn
should be listed in NEED_NO_CMOV.  The rewired-src cases should be
@@ -3731,36 +3717,71 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
*if_info,
 creating an additional compare for each.  If successful, costing
 is easier and this sequence is usually preferred.  */
   if (cc_cmp)
-   seq2 = try_emit_cmove_seq (if_info, temp, cond,
-  new_val, old_val, need_cmov,
-  &cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
+   {
+ seq2 = try_emit_cmove_seq (if_info, temp, cond,
+new_val, old_val, need_cmov,
+&cost2, &temp_dest2, cc_cmp, rev_cc_cmp);
+
+ /* The if_then_else in SEQ2 may be affected when cc_cmp/rev_cc_cmp is
+clobbered.  We can't safely use the sequence in this case.  */
+ for (rtx_insn *iter = seq2; iter; iter = NEXT_INSN (iter))
+   if (modified_in_p (cc_cmp, iter)
+ || (rev_cc_cmp && modified_in_p (rev_cc_cmp, iter)))
+ {
+   seq2 = NULL;
+   break;
+ }
+   }
 
   /* The backend might have created a sequence that uses the
-condition.  Check this.  */
+condition as a value.  Check this.  */
+
+  /* We cannot handle anything more complex than a reg or constant.  */
+  if (!REG_P (XEXP (cond, 0)) && !CONSTANT_P (XEXP (cond, 0)))
+   read_comparison = true;
+
+  if (!REG_P (XEXP (cond, 1)) && !CONSTANT_P (XEXP (cond, 1)))
+   read_comparison = true;
+
   rtx_insn *walk = seq2;
-  while (walk)
+  int if_then_else_count = 0;
+  while (walk && !read_comparison)
{
- rtx set = single_set (walk);
+ rtx exprs_to_check[2];
+ unsigned int exprs_count = 0;
 
- if (!set || !SET_SRC (set))
+ rtx set = single_set (walk);
+ if (set && XEXP (set, 1)
+ && GET_CODE (XEXP (set, 1)) == IF_THEN_ELSE)
{
- walk = NEXT_INSN (walk);
- continue;
+ /* We assume that this is the cmove created by the backend that
+naturally uses the condition.  */
+ exprs_to_check[exprs_count++] = XEXP (XEXP (set, 1), 1);
+ exprs_to_check[exprs_count++] = XEXP (XEXP (set, 1), 2);
+ if_then_else_count++;
}
+ else if (NONDEBUG_INSN_P (walk))
+   exprs_to_check[exprs_count++] = PATTERN (walk);
 
- rtx src = SET_S

[gcc r15-2890] ifcvt: Allow more operations in multiple set if conversion

2024-08-12 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:72c9b5f438f22cca493b4e2a8a2a31ff61bf1477

commit r15-2890-g72c9b5f438f22cca493b4e2a8a2a31ff61bf1477
Author: Manolis Tsamis 
Date:   Fri Jun 30 14:05:15 2023 +0200

ifcvt: Allow more operations in multiple set if conversion

Currently the operations allowed for if conversion of a basic block
with multiple sets are few, namely REG, SUBREG and CONST_INT (as
controlled by bb_ok_for_noce_convert_multiple_sets).

This commit allows more operations (arithmetic, compare, etc) to
participate in if conversion. The target's profitability hook and
ifcvt's costing is expected to reject sequences that are unprofitable.

This is especially useful for targets which provide a rich selection
of conditional instructions (like aarch64 which has cinc, csneg,
csinv, ccmp, ...)  which are currently not used in basic blocks with
more than a single set.

For targets that have a rich selection of conditional instructions,
like aarch64, we have seen an ~5x increase of profitable if
conversions for multiple set blocks in SPEC CPU 2017 benchmarks.

gcc/ChangeLog:

* ifcvt.cc (try_emit_cmove_seq): Modify comments.
(noce_convert_multiple_sets_1): Modify comments.
(bb_ok_for_noce_convert_multiple_sets): Allow more operations.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/ifcvt_multiple_sets_arithm.c: New test.

Diff:
---
 gcc/ifcvt.cc   | 34 --
 .../aarch64/ifcvt_multiple_sets_arithm.c   | 79 ++
 2 files changed, 92 insertions(+), 21 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 58c34aaf1ee4..f496a46e600d 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3432,13 +3432,13 @@ try_emit_cmove_seq (struct noce_if_info *if_info, rtx 
temp,
 /* We have something like:
 
  if (x > y)
-   { i = a; j = b; k = c; }
+   { i = EXPR_A; j = EXPR_B; k = EXPR_C; }
 
Make it:
 
- tmp_i = (x > y) ? a : i;
- tmp_j = (x > y) ? b : j;
- tmp_k = (x > y) ? c : k;
+ tmp_i = (x > y) ? EXPR_A : i;
+ tmp_j = (x > y) ? EXPR_B : j;
+ tmp_k = (x > y) ? EXPR_C : k;
  i = tmp_i;
  j = tmp_j;
  k = tmp_k;
@@ -3858,11 +3858,10 @@ noce_convert_multiple_sets_1 (struct noce_if_info 
*if_info,
 
 
 
-/* Return true iff basic block TEST_BB is comprised of only
-   (SET (REG) (REG)) insns suitable for conversion to a series
-   of conditional moves.  Also check that we have more than one set
-   (other routines can handle a single set better than we would), and
-   fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
+/* Return true iff basic block TEST_BB is suitable for conversion to a
+   series of conditional moves.  Also check that we have more than one
+   set (other routines can handle a single set better than we would),
+   and fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  While going
through the insns store the sum of their potential costs in COST.  */
 
 static bool
@@ -3888,20 +3887,13 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
test_bb, unsigned *cost)
   rtx dest = SET_DEST (set);
   rtx src = SET_SRC (set);
 
-  /* We can possibly relax this, but for now only handle REG to REG
-(including subreg) moves.  This avoids any issues that might come
-from introducing loads/stores that might violate data-race-freedom
-guarantees.  */
-  if (!REG_P (dest))
-   return false;
-
-  if (!((REG_P (src) || CONSTANT_P (src))
-   || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
- && subreg_lowpart_p (src
+  /* Do not handle anything involving memory loads/stores since it might
+violate data-race-freedom guarantees.  */
+  if (!REG_P (dest) || contains_mem_rtx_p (src))
return false;
 
-  /* Destination must be appropriate for a conditional write.  */
-  if (!noce_operand_ok (dest))
+  /* Destination and source must be appropriate.  */
+  if (!noce_operand_ok (dest) || !noce_operand_ok (src))
return false;
 
   /* We must be able to conditionally move in this mode.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c 
b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
new file mode 100644
index ..ba7f948aba57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_arithm.c
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ce1" } */
+
+void sink2(int, int);
+void sink3(int, int, int);
+
+void cond1(int cond, int x, int y)
+{
+  if (cond)
+{
+  x = x << 4;
+  y = 1;
+}
+
+  sink2(x, y);
+}
+
+void cond2(int cond, int x, int y)
+{
+  if (cond)
+{
+  x++;
+  y++;
+}
+
+  sink2(x, y);
+}
+
+void cond3(int cond, int x1, int x2, int x3)
+{
+  if (cond)
+{
+  x1++;
+  x2++;
+  x3++;
+}
+

[gcc r15-2891] ifcvt: Handle multiple rewired regs and refactor noce_convert_multiple_sets

2024-08-12 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:b219cbeda72d23b7ad6ff12cd159784b7ef00667

commit r15-2891-gb219cbeda72d23b7ad6ff12cd159784b7ef00667
Author: Manolis Tsamis 
Date:   Tue Aug 29 11:47:39 2023 +0200

ifcvt: Handle multiple rewired regs and refactor noce_convert_multiple_sets

The existing implementation of need_cmov_or_rewire and
noce_convert_multiple_sets_1 assumes that sets are either REG or SUBREG.
This commit enchances them so they can handle/rewire arbitrary set 
statements.

To do that a new helper struct noce_multiple_sets_info is introduced which 
is
used by noce_convert_multiple_sets and its helper functions. This results in
cleaner function signatures, improved efficientcy (a number of vecs and hash
set/map are replaced with a single vec of struct) and simplicity.

gcc/ChangeLog:

* ifcvt.cc (need_cmov_or_rewire): Renamed 
init_noce_multiple_sets_info.
(init_noce_multiple_sets_info): Initialize noce_multiple_sets_info.
(noce_convert_multiple_sets_1): Use noce_multiple_sets_info and 
handle
rewiring of multiple registers.
(noce_convert_multiple_sets): Updated to use 
noce_multiple_sets_info.
* ifcvt.h (struct noce_multiple_sets_info): Introduce new struct
noce_multiple_sets_info to store info for 
noce_convert_multiple_sets.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/ifcvt_multiple_sets_rewire.c: New test.

Diff:
---
 gcc/ifcvt.cc   | 243 +
 gcc/ifcvt.h|  16 ++
 .../aarch64/ifcvt_multiple_sets_rewire.c   |  20 ++
 3 files changed, 141 insertions(+), 138 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index f496a46e600d..3e25f30b67ee 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -98,14 +98,10 @@ static bool dead_or_predicable (basic_block, basic_block, 
basic_block,
edge, bool);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
-static void need_cmov_or_rewire (basic_block, hash_set *,
-hash_map *);
+static void init_noce_multiple_sets_info (basic_block,
+  auto_delete_vec &);
 static bool noce_convert_multiple_sets_1 (struct noce_if_info *,
- hash_set *,
- hash_map *,
- auto_vec *,
- auto_vec *,
- auto_vec *, int *);
+  auto_delete_vec &, int *);
 
 /* Count the number of non-jump active insns in BB.  */
 
@@ -3487,24 +3483,13 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
   rtx x = XEXP (cond, 0);
   rtx y = XEXP (cond, 1);
 
-  /* The true targets for a conditional move.  */
-  auto_vec targets;
-  /* The temporaries introduced to allow us to not consider register
- overlap.  */
-  auto_vec temporaries;
-  /* The insns we've emitted.  */
-  auto_vec unmodified_insns;
-
-  hash_set need_no_cmov;
-  hash_map rewired_src;
-
-  need_cmov_or_rewire (then_bb, &need_no_cmov, &rewired_src);
+  auto_delete_vec insn_info;
+  init_noce_multiple_sets_info (then_bb, insn_info);
 
   int last_needs_comparison = -1;
 
   bool ok = noce_convert_multiple_sets_1
-(if_info, &need_no_cmov, &rewired_src, &targets, &temporaries,
- &unmodified_insns, &last_needs_comparison);
+(if_info, insn_info, &last_needs_comparison);
   if (!ok)
   return false;
 
@@ -3519,8 +3504,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   end_sequence ();
   start_sequence ();
   ok = noce_convert_multiple_sets_1
-   (if_info, &need_no_cmov, &rewired_src, &targets, &temporaries,
-&unmodified_insns, &last_needs_comparison);
+   (if_info, insn_info, &last_needs_comparison);
   /* Actually we should not fail anymore if we reached here,
 but better still check.  */
   if (!ok)
@@ -3529,12 +3513,12 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
 
   /* We must have seen some sort of insn to insert, otherwise we were
  given an empty BB to convert, and we can't handle that.  */
-  gcc_assert (!unmodified_insns.is_empty ());
+  gcc_assert (!insn_info.is_empty ());
 
   /* Now fixup the assignments.  */
-  for (unsigned i = 0; i < targets.length (); i++)
-if (targets[i] != temporaries[i])
-  noce_emit_move_insn (targets[i], temporaries[i]);
+  for (unsigned i = 0; i < insn_info.length (); i++)
+if (insn_info[i]->target != insn_info[i]->temporary)
+  noce_emit_move_insn (insn_info[i]->target, insn_info[i]->temporary);
 
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
@@ -3549,10 +3533,10 @@ noce_convert_multiple_sets (struct noce_if_info 
*if_info)
 set_used_flags (insn);
 
   /* Mark all our temporari

[gcc r15-2903] ifcvt: Fix force_operand ICE in noce_convert_multiple_sets [PR116353]

2024-08-13 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:cc00a737e840986f4916918759ba214a95ae6888

commit r15-2903-gcc00a737e840986f4916918759ba214a95ae6888
Author: Manolis Tsamis 
Date:   Tue Aug 13 10:17:49 2024 +0200

ifcvt: Fix force_operand ICE in noce_convert_multiple_sets [PR116353]

Now that more operations are allowed for noce_convert_multiple_sets,
we need to check noce_can_force_operand on the sequence before calling
try_emit_cmove_seq.  Otherwise an inappropriate argument may be given
to copy_to_mode_reg and result in an ICE.

PR tree-optimization/116353

gcc/ChangeLog:

* ifcvt.cc (bb_ok_for_noce_convert_multiple_sets): Check
noce_can_force_operand.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr116353.c: New test.

Diff:
---
 gcc/ifcvt.cc |  6 ++--
 gcc/testsuite/gcc.target/i386/pr116353.c | 55 
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 3e25f30b67ee..da59c9078919 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -3938,8 +3938,10 @@ bb_ok_for_noce_convert_multiple_sets (basic_block 
test_bb, unsigned *cost)
   rtx src = SET_SRC (set);
 
   /* Do not handle anything involving memory loads/stores since it might
-violate data-race-freedom guarantees.  */
-  if (!REG_P (dest) || contains_mem_rtx_p (src))
+violate data-race-freedom guarantees.  Make sure we can force SRC
+to a register as that may be needed in try_emit_cmove_seq.  */
+  if (!REG_P (dest) || contains_mem_rtx_p (src)
+ || !noce_can_force_operand (src))
return false;
 
   /* Destination and source must be appropriate.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr116353.c 
b/gcc/testsuite/gcc.target/i386/pr116353.c
new file mode 100644
index ..8e254653d5d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116353.c
@@ -0,0 +1,55 @@
+/* PR tree-optimization/116353 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+enum desmode { C };
+struct {
+  unsigned char des_ivec[];
+} _des_crypt_desp;
+int des_SPtrans_6_0, des_SPtrans_4_0, des_encrypt_encrypt, des_encrypt_i;
+long des_encrypt_s_0, _des_crypt_tin1, _des_crypt_tout0, _des_crypt_tout1,
+_des_crypt_tin0;
+enum desmode _des_crypt_desp_0;
+unsigned long _des_crypt_tbuf[2];
+char _des_crypt_out;
+void des_encrypt(unsigned long *buf) {
+  long l, r, t;
+  l = buf[0];
+  r = buf[1];
+  t = r;
+  r ^= l ^= t < 6;
+  if (des_encrypt_encrypt)
+for (;; des_encrypt_i += 4)
+  des_encrypt_s_0 ^= des_SPtrans_4_0 | des_SPtrans_6_0;
+  buf[1] = r;
+}
+void _des_crypt() {
+  long xor0, xor1;
+  unsigned char *in;
+  int cbc_mode = _des_crypt_desp_0;
+  in = _des_crypt_desp.des_ivec;
+  xor0 = xor1 = 0;
+  for (;;) {
+_des_crypt_tin0 = *in++;
+_des_crypt_tin0 |= *in++ << 8;
+_des_crypt_tin0 |= *in++ << 16;
+_des_crypt_tin0 |= (long)*in << 24;
+_des_crypt_tin1 = *in++;
+_des_crypt_tin1 |= *in++ << 8;
+_des_crypt_tin1 |= *in++ << 16;
+_des_crypt_tin1 |= (long)*in << 24;
+_des_crypt_tbuf[0] = _des_crypt_tin0;
+_des_crypt_tbuf[1] = _des_crypt_tin1;
+des_encrypt(_des_crypt_tbuf);
+if (cbc_mode) {
+  _des_crypt_tout0 = xor0;
+  _des_crypt_tout1 = _des_crypt_tbuf[1] ^ xor1;
+  xor0 = _des_crypt_tin0;
+  xor1 = _des_crypt_tin1;
+} else {
+  _des_crypt_tout0 = _des_crypt_tbuf[0];
+  _des_crypt_tout1 = _des_crypt_tbuf[1];
+}
+_des_crypt_out = _des_crypt_tout0 * _des_crypt_tout1;
+  }
+}


[gcc r15-818] MATCH: Look through VIEW_CONVERT when folding VEC_PERM_EXPRs.

2024-05-24 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:6d6f324bda1ccb51cd43ff9d4d017eb71bb2d690

commit r15-818-g6d6f324bda1ccb51cd43ff9d4d017eb71bb2d690
Author: Manolis Tsamis 
Date:   Wed Nov 1 12:27:28 2023 +0100

MATCH: Look through VIEW_CONVERT when folding VEC_PERM_EXPRs.

The match.pd patterns to merge two vector permutes into one fail when a
potentially no-op view convert expressions is between the two permutes.
This change lifts this restriction.

gcc/ChangeLog:

* match.pd: Allow no-op view_convert between permutes.

gcc/testsuite/ChangeLog:

* gcc.dg/fold-perm-2.c: New test.

Diff:
---
 gcc/match.pd   | 14 --
 gcc/testsuite/gcc.dg/fold-perm-2.c | 16 
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 7081d76d56a..024e3350465 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10082,19 +10082,21 @@ and,
  d = VEC_PERM_EXPR ;  */
 
 (simplify
- (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
+ (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 VECTOR_CST@5)
  (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
   (with
{
  machine_mode result_mode = TYPE_MODE (type);
- machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+ machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
  int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
  vec_perm_builder builder0;
  vec_perm_builder builder1;
  vec_perm_builder builder2 (nelts, nelts, 1);
}
-   (if (tree_to_vec_perm_builder (&builder0, @3)
-   && tree_to_vec_perm_builder (&builder1, @4))
+   (if (tree_to_vec_perm_builder (&builder0, @4)
+   && tree_to_vec_perm_builder (&builder1, @5)
+   && TYPE_SIZE (TREE_TYPE (TREE_TYPE (@0)))
+  == TYPE_SIZE (TREE_TYPE (TREE_TYPE (@1
 (with
  {
vec_perm_indices sel0 (builder0, 2, nelts);
@@ -10116,10 +10118,10 @@ and,
   ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
  || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
   : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
-op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
  }
  (if (op0)
-  (vec_perm @1 @2 { op0; })))
+  (view_convert (vec_perm @2 @3 { op0; }
 
 /* Merge
  c = VEC_PERM_EXPR ;
diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c 
b/gcc/testsuite/gcc.dg/fold-perm-2.c
new file mode 100644
index 000..1a4ab4065de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int veci __attribute__ ((vector_size (4 * sizeof (int;
+typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned 
int;
+
+void fun (veci *a, veci *b, veci *c)
+{
+  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
+  vecu r2 = __builtin_convertvector (r1, vecu);
+  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
+  *c = __builtin_convertvector (r3, veci);
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */


[gcc r15-3866] match: Fix A || B not optimized to true when !B implies A [PR114326]

2024-09-25 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:a88d6c6d777ad7c9235e2e620318f26e5347e50a

commit r15-3866-ga88d6c6d777ad7c9235e2e620318f26e5347e50a
Author: Konstantinos Eleftheriou 
Date:   Wed Aug 7 17:54:30 2024 +0200

match: Fix A || B not optimized to true when !B implies A [PR114326]

In expressions like (a != b || ((a ^ b) & c) == d) and
(a != b || (a ^ b) == c), (a ^ b) is folded to false.
In the equivalent expressions (((a ^ b) & c) == d || a != b) and
((a ^ b) == c || a != b) this is not happening.

This patch adds the following simplifications in match.pd:
((a ^ b) & c) cmp d || a != b --> 0 cmp d || a != b
(a ^ b) cmp c || a != b --> 0 cmp c || a != b

PR tree-optimization/114326

gcc/ChangeLog:

* match.pd: Add two patterns to fold a ^ b to 0, when a == b.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/fold-xor-and-or.c: New test.
* gcc.dg/tree-ssa/fold-xor-or.c: New test.

Tested-by: Christoph Müllner 
Signed-off-by: Philipp Tomsich 
Signed-off-by: Konstantinos Eleftheriou 

Diff:
---
 gcc/match.pd| 30 ++
 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c | 55 +
 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c | 55 +
 3 files changed, 140 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 7150b8e78cab..d10ac7d6cd20 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3787,6 +3787,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (if (types_match (type, TREE_TYPE (@0)))
  (bit_xor @0 { build_one_cst (type); } ))
 
+/* ((a ^ b) & c) cmp d || a != b --> (0 cmp d || a != b). */
+(for cmp (simple_comparison)
+  (simplify
+(bit_ior
+  (cmp:c
+   (bit_and:c
+ (bit_xor:c @0 @1)
+ tree_expr_nonzero_p@2)
+   @3)
+  (ne:c@4 @0 @1))
+(bit_ior
+  (cmp
+   { build_zero_cst (TREE_TYPE (@0)); }
+   @3)
+  @4)))
+
+/* (a ^ b) cmp c || a != b --> (0 cmp c || a != b). */
+(for cmp (simple_comparison)
+  (simplify
+(bit_ior
+  (cmp:c
+   (bit_xor:c @0 @1)
+   @2)
+  (ne:c@3 @0 @1))
+(bit_ior
+  (cmp
+   { build_zero_cst (TREE_TYPE (@0)); }
+   @2)
+  @3)))
+
 /* We can't reassociate at all for saturating types.  */
 (if (!TYPE_SATURATING (type))
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
new file mode 100644
index ..e5dc98e7541d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+typedef unsigned long int uint64_t;
+
+int cmp1(int d1, int d2) {
+  if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2)
+return 0;
+  return 1;
+}
+
+int cmp2(int d1, int d2) {
+  if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0)
+return 0;
+  return 1;
+}
+
+int cmp3(int d1, int d2) {
+  if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1)
+return 0;
+  return 1;
+}
+
+int cmp4(int d1, int d2) {
+  if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1)))
+return 0;
+  return 1;
+}
+
+int cmp1_64(uint64_t d1, uint64_t d2) {
+  if (((d1 ^ d2) & 0xabcd) == 0 || d1 != d2)
+return 0;
+  return 1;
+}
+
+int cmp2_64(uint64_t d1, uint64_t d2) {
+  if (d1 != d2 || ((d1 ^ d2) & 0xabcd) == 0)
+return 0;
+  return 1;
+}
+
+int cmp3_64(uint64_t d1, uint64_t d2) {
+  if (10 > (0xabcd & (d2 ^ d1)) || d2 != d1)
+return 0;
+  return 1;
+}
+
+int cmp4_64(uint64_t d1, uint64_t d2) {
+  if (d2 != d1 || 10 > (0xabcd & (d2 ^ d1)))
+return 0;
+  return 1;
+}
+
+/* The if should be removed, so the condition should not exist */
+/* { dg-final { scan-tree-dump-not "d1_\[0-9\]+.D. \\^ d2_\[0-9\]+.D." 
"optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
new file mode 100644
index ..c55cfbcc84c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+typedef unsigned long int uint64_t;
+
+int cmp1(int d1, int d2) {
+  if ((d1 ^ d2) == 0xabcd || d1 != d2)
+return 0;
+  return 1;
+}
+
+int cmp2(int d1, int d2) {
+  if (d1 != d2 || (d1 ^ d2) == 0xabcd)
+return 0;
+  return 1;
+}
+
+int cmp3(int d1, int d2) {
+  if (0xabcd > (d2 ^ d1) || d2 != d1)
+return 0;
+  return 1;
+}
+
+int cmp4(int d1, int d2) {
+  if (d2 != d1 || 0xabcd > (d2 ^ d1))
+return 0;
+  return 1;
+}
+
+int cmp1_64(uint64_t d1, uint64_t d2) {
+  if ((d1 ^ d2) == 0xabcd || d1 != d2)
+return 0;
+  return 1;
+}
+
+int cmp2_64(uint64_t d1, uint64_t d2) {
+  if (d1 != d2 || (d1 ^ d2) == 0xabcd)
+return 0;
+  return 1;
+}
+
+int cmp3_64(uint64_t d1, uint64_t d2) {
+  if (0xabcd > (d2 ^ d1) || d2 != d1)
+return 0;
+  return 1;
+}
+
+int cmp4_64(uint64_t d1, uint64_t d2) {
+  if (d2 != d1 || 0xabcd > (d2 ^ d1))
+  

[gcc r15-3862] match: Change (A * B) + (-C) to (B - C/A) * A, if C multiple of A [PR109393]

2024-09-25 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:08b8341f209be7c7e301853bdbbcad4f8e1695f5

commit r15-3862-g08b8341f209be7c7e301853bdbbcad4f8e1695f5
Author: Konstantinos Eleftheriou 
Date:   Thu Sep 5 15:59:59 2024 +0200

match: Change (A * B) + (-C) to (B - C/A) * A, if C multiple of A [PR109393]

The following function:

int foo(int *a, int j)
{
  int k = j - 1;
  return a[j - 1] == a[k];
}

does not fold to `return 1;` using -O2 or higher. The cause of this is that
the expression `4 * j + (-4)` for the index computation is not folded to
`4 * (j - 1)`. Existing simplifications that handle similar cases are 
applied
when A == C, which is not the case in this instance.

A previous attempt to address this issue is
https://gcc.gnu.org/pipermail/gcc-patches/2024-April/649896.html

This patch adds the following simplification in match.pd:
(A * B) + (-C) -> (B - C/A) * A, if C a multiple of A

which also handles cases where the index is j - 2, j - 3, etc.

Bootstrapped for all languages and regression tested on x86-64 and aarch64.

PR tree-optimization/109393

gcc/ChangeLog:

* match.pd: (A * B) + (-C) -> (B - C/A) * A, if C a multiple of A.

gcc/testsuite/ChangeLog:

* gcc.dg/pr109393.c: New test.

Tested-by: Christoph Müllner 
Signed-off-by: Philipp Tomsich 
Signed-off-by: Konstantinos Eleftheriou 

Diff:
---
 gcc/match.pd| 21 -
 gcc/testsuite/gcc.dg/pr109393.c | 23 +++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 940292d0d497..7150b8e78cab 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4276,7 +4276,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 ? wi::max_value (TYPE_PRECISION (type), SIGNED)
 : wi::min_value (TYPE_PRECISION (type), SIGNED))
 && single_use (@3))
- (mult (plusminus @2 { build_one_cst (type); }) @0))
+  (mult (plusminus @2 { build_one_cst (type); }) @0)
+ /* (A * B) + (-C) -> (B - C/A) * A, if C is a multiple of A.  */
+ (if (!ALL_FRACT_MODE_P (TYPE_MODE (type)))
+  (simplify
+(plus (mult:cs integer_nonzerop@0 @1) INTEGER_CST@2)
+/* Exclude the case that @2 == min to prevent UB when calculating abs
+   and (B - C/A).  */
+(if (TREE_CODE (type) == INTEGER_TYPE
+   && wi::neg_p (wi::to_wide (@2))
+   && wi::to_wide (@2) != wi::min_value (TYPE_PRECISION (type), SIGNED))
+  (with {
+   wide_int c0 = wi::to_wide (@0);
+   wide_int c2 = wi::to_wide (@2);
+   wide_int c2_abs = wi::abs (c2); }
+  (if (wi::multiple_of_p (c2_abs, c0, TYPE_SIGN (type)))
+   (with {
+ /* Calculate @2 / @0 in order to factorize the expression.  */
+ wide_int div_res = wi::sdiv_trunc (c2, c0);
+ tree div_cst = wide_int_to_tree (type, div_res); }
+   (mult (plus @1 { div_cst; }) @0
 
 #if GIMPLE
 /* Canonicalize X + (X << C) into X * (1 + (1 << C)) and
diff --git a/gcc/testsuite/gcc.dg/pr109393.c b/gcc/testsuite/gcc.dg/pr109393.c
new file mode 100644
index ..17bf93307964
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr109393.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/109393 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(int *a, int j)
+{
+  int k = j - 1;
+  return a[j - 1] == a[k];
+}
+
+int foo2(int *a, int j)
+{
+  int k = j - 5;
+  return a[j - 5] == a[k];
+}
+
+int bar(int *a, int j)
+{
+  int k = j - 1;
+  return (&a[j + 1] - 2) == &a[k];
+}
+
+/* { dg-final { scan-tree-dump-times "return 1;" 3 "optimized" } } */
\ No newline at end of file


[gcc r15-5640] Add target-independent store forwarding avoidance pass

2024-11-24 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:1d8de1e93ea00f7797f61cf8e05c47ca86f21f8c

commit r15-5640-g1d8de1e93ea00f7797f61cf8e05c47ca86f21f8c
Author: Konstantinos Eleftheriou 
Date:   Wed Oct 16 10:31:39 2024 +0200

Add target-independent store forwarding avoidance pass

This pass detects cases of expensive store forwarding and tries to
avoid them by reordering the stores and using suitable bit insertion
sequences.  For example it can transform this:

 strbw2, [x1, 1]
 ldr x0, [x1]  # Expensive store forwarding to larger load.

To:

 ldr x0, [x1]
 strbw2, [x1]
 bfi x0, x2, 0, 8

Assembly like this can appear with bitfields or type punning / unions.
On stress-ng when running the cpu-union microbenchmark the following
speedups have been observed.

  Neoverse-N1:  +29.4%
  Intel Coffeelake: +13.1%
  AMD 5950X:+17.5%

The transformation is rejected on cases that cause store_bit_field to
generate subreg expressions on different register classes.  Files
avoid-store-forwarding-4.c and avoid-store-forwarding-5.c contain such
cases and have been marked as XFAIL.

Due to biasing of its operands in store_bit_field, there is a special
handling for machines with BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN. The
need for this was exosed by an issue exposed on the H8 architecture,
which uses big-endian ordering, but BITS_BIG_ENDIAN is false. In that
case, the START parameter of store_bit_field needs to be calculated
from the end of the destination register.

gcc/ChangeLog:

* Makefile.in (OBJS): Add avoid-store-forwarding.o.
* common.opt (favoid-store-forwarding): New option.
* common.opt.urls: Regenerate.
* doc/invoke.texi: New param store-forwarding-max-distance.
* doc/passes.texi: Document new pass.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in: Document new pass.
* params.opt (store-forwarding-max-distance): New param.
* passes.def: Add pass_rtl_avoid_store_forwarding before
pass_early_remat.
* target.def (avoid_store_forwarding_p): New DEFHOOK.
* target.h (struct store_fwd_info): Declare.
* targhooks.cc (default_avoid_store_forwarding_p): New function.
* targhooks.h (default_avoid_store_forwarding_p): Declare.
* tree-pass.h (make_pass_rtl_avoid_store_forwarding): Declare.
* avoid-store-forwarding.cc: New file.
* avoid-store-forwarding.h: New file.
* timevar.def (TV_AVOID_STORE_FORWARDING): New timevar.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/avoid-store-forwarding-1.c: New test.
* gcc.target/aarch64/avoid-store-forwarding-2.c: New test.
* gcc.target/aarch64/avoid-store-forwarding-3.c: New test.
* gcc.target/aarch64/avoid-store-forwarding-4.c: New test.
* gcc.target/aarch64/avoid-store-forwarding-5.c: New test.
* gcc.target/x86_64/abi/callabi/avoid-store-forwarding-1.c: New 
test.
* gcc.target/x86_64/abi/callabi/avoid-store-forwarding-2.c: New 
test.

Co-authored-by: Philipp Tomsich 
Signed-off-by: Philipp Tomsich 
Signed-off-by: Konstantinos Eleftheriou 

Diff:
---
 gcc/Makefile.in|   1 +
 gcc/avoid-store-forwarding.cc  | 651 +
 gcc/avoid-store-forwarding.h   |  56 ++
 gcc/common.opt |   4 +
 gcc/common.opt.urls|   3 +
 gcc/doc/invoke.texi|   9 +
 gcc/doc/passes.texi|   8 +
 gcc/doc/tm.texi|   8 +
 gcc/doc/tm.texi.in |   2 +
 gcc/params.opt |   4 +
 gcc/passes.def |   1 +
 gcc/target.def |  10 +
 gcc/target.h   |   3 +
 gcc/targhooks.cc   |  27 +
 gcc/targhooks.h|   3 +
 .../gcc.target/aarch64/avoid-store-forwarding-1.c  |  27 +
 .../gcc.target/aarch64/avoid-store-forwarding-2.c  |  39 ++
 .../gcc.target/aarch64/avoid-store-forwarding-3.c  |  30 +
 .../gcc.target/aarch64/avoid-store-forwarding-4.c  |  26 +
 .../gcc.target/aarch64/avoid-store-forwarding-5.c  |  41 ++
 .../x86_64/abi/callabi/avoid-store-forwarding-1.c  |  28 +
 .../x86_64/abi/callabi/avoid-store-forwarding-2.c  |  39 ++
 gcc/timevar.def|   1 +
 gcc/tree-pass.h|   1 +
 24 files changed, 1022 insertions(+)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 0cbb3633e01e..ead8d2eb094c

[gcc r15-5983] avoid-store-forwarding: bail when an instruction may throw [PR117816]

2024-12-06 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:b317dca04e3ffb31144f79cb804ff6835c2a9af8

commit r15-5983-gb317dca04e3ffb31144f79cb804ff6835c2a9af8
Author: kelefth 
Date:   Thu Dec 5 11:11:27 2024 +0100

avoid-store-forwarding: bail when an instruction may throw [PR117816]

Avoid-store-forwarding doesn't handle the case where an instruction in
the store-load sequence contains a REG_EH_REGION note, leading to the
insertion of instructions after it, while it should be the last
instruction in the basic block. This causes an ICE when compiling
using `-O -fnon-call-exceptions -favoid-store-forwarding
-fno-forward-propagate -finstrument-functions`.

This patch rejects the transformation when there are instructions in
the sequence that may throw an exeption.

PR rtl-optimization/117816

gcc/ChangeLog:

* avoid-store-forwarding.cc 
(store_forwarding_analyzer::avoid_store_forwarding):
Reject the transformation when having instructions that may
throw exceptions in the sequence.

gcc/testsuite/ChangeLog:

* gcc.dg/pr117816.c: New test.

Diff:
---
 gcc/avoid-store-forwarding.cc   |  2 +-
 gcc/testsuite/gcc.dg/pr117816.c | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index b1fa1678dc30..1b8c35bc6cb7 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -429,7 +429,7 @@ store_forwarding_analyzer::avoid_store_forwarding 
(basic_block bb)
 
   rtx set = single_set (insn);
 
-  if (!set)
+  if (!set || insn_could_throw_p (insn))
{
  store_exprs.truncate (0);
  continue;
diff --git a/gcc/testsuite/gcc.dg/pr117816.c b/gcc/testsuite/gcc.dg/pr117816.c
new file mode 100644
index ..6a9fc5fa1415
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr117816.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fnon-call-exceptions -favoid-store-forwarding 
-fno-forward-propagate -finstrument-functions" } */
+
+char *p;
+int y;
+long x;
+
+void foo()
+{
+  x /= *(int *)__builtin_memmove(&y, 4 + p, 3);
+}


[gcc r15-6464] avoid-store-forwarding: fix reg init on load-eliminiation [PR117835]

2024-12-30 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:c86e1c54c6f8771d08a8c070717b80607f990f8a

commit r15-6464-gc86e1c54c6f8771d08a8c070717b80607f990f8a
Author: kelefth 
Date:   Mon Dec 16 14:36:59 2024 +0100

avoid-store-forwarding: fix reg init on load-eliminiation [PR117835]

During the initialization of the base register for the zero-offset
store, in the case that we are eliminating the load, we used a
paradoxical subreg assuming that we don't care about the higher bits
of the register. This led to writing wrong values when we were not
updating the whole register.

This patch fixes the issue by zero-extending the value stored in the
base register instead of using a paradoxical subreg.

Bootstrapped/regtested on x86 and AArch64.

PR rtl-optimization/117835
PR rtl-optimization/117872

gcc/ChangeLog:

* avoid-store-forwarding.cc
(store_forwarding_analyzer::process_store_forwarding):
Zero-extend the value stored in the base register instead of
using a paradoxical subreg.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr117835.c: New test.

Diff:
---
 gcc/avoid-store-forwarding.cc|  6 +-
 gcc/testsuite/gcc.target/i386/pr117835.c | 20 
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 1b8c35bc6cb7..fa83e10fedca 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -238,11 +238,7 @@ process_store_forwarding (vec &stores, 
rtx_insn *load_insn,
{
  start_sequence ();
 
- /* We can use a paradoxical subreg to force this to a wider mode, as
-the only use will be inserting the bits (i.e., we don't care about
-the value of the higher bits).  */
- rtx ext0 = lowpart_subreg (GET_MODE (dest), it->mov_reg,
-GET_MODE (it->mov_reg));
+ rtx ext0 = gen_rtx_ZERO_EXTEND (GET_MODE (dest), it->mov_reg);
  if (ext0)
{
  rtx_insn *move0 = emit_move_insn (dest, ext0);
diff --git a/gcc/testsuite/gcc.target/i386/pr117835.c 
b/gcc/testsuite/gcc.target/i386/pr117835.c
new file mode 100644
index ..eac71aac916b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117835.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-options "-O -favoid-store-forwarding -mno-push-args 
--param=store-forwarding-max-distance=0 -Wno-psabi" } */
+
+typedef __attribute__((__vector_size__ (64))) unsigned short V;
+
+__attribute__((__noipa__)) V
+foo (V v, V)
+{
+  return v;
+}
+
+int main ()
+{
+  V a = (V){3, 5, 0, 8, 9, 3, 5, 1, 3, 4, 2, 5, 5, 0, 5, 3, 61886};
+  V b = (V){6, 80, 15, 2, 2, 1, 1, 3, 5};
+  V x = foo (a, b);
+  for (unsigned i = 0; i < sizeof(x)/sizeof(x[0]); i++)
+if (x[i] != a[i])
+  __builtin_abort();
+}
\ No newline at end of file


[gcc r15-7353] testsuite: XFAIL test in pr109393.c for ilp32 targets [PR116845]

2025-02-04 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:adf1da77593f8851c6b78d22ebbc1124bbaf1de5

commit r15-7353-gadf1da77593f8851c6b78d22ebbc1124bbaf1de5
Author: kelefth 
Date:   Tue Feb 4 11:49:03 2025 +0100

testsuite: XFAIL test in pr109393.c for ilp32 targets [PR116845]

The match.pd canonicalization that this testcase checks for,
is not applied on ilp32 targets.

This XFAILs the test on ilp32 targets.

PR testsuite/116845

gcc/testsuite/ChangeLog:

* gcc.dg/pr109393.c: XFAIL on ilp32 targets.

Diff:
---
 gcc/testsuite/gcc.dg/pr109393.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr109393.c b/gcc/testsuite/gcc.dg/pr109393.c
index b2dd5a0b645c..108d30913894 100644
--- a/gcc/testsuite/gcc.dg/pr109393.c
+++ b/gcc/testsuite/gcc.dg/pr109393.c
@@ -20,4 +20,5 @@ int bar(int *a, int j)
   return (&a[j + 1] - 2) == &a[k];
 }
 
-/* { dg-final { scan-tree-dump-times "return 1;" 3 "optimized" } } */
+/* The pattern is not applied on ilp32 targets (PR116845).  */
+/* { dg-final { scan-tree-dump-times "return 1;" 3 "optimized" { xfail { ilp32 
} } } } */


[gcc r16-14] doc: Clarify REG_EH_REGION note usage

2025-04-18 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:19af15ba7ad041b58b7926775cce81be9f5ec013

commit r16-14-g19af15ba7ad041b58b7926775cce81be9f5ec013
Author: kelefth 
Date:   Thu Mar 13 15:42:48 2025 +0100

doc: Clarify REG_EH_REGION note usage

The documentation for the REG_EH_REGION could easily be read
(especially by non-native speakers) to indicate that it should be
attached to insn at the destination of an excpetion edge.  Despite the
original text saying that the note "specifies the destination," it is
actually always attached to the source instruction.

This updates the documentation to make it clear that the REG_EH_REGION
note is always attached to instructions originating an exception edge
and that the value of the note specifies where the exception edge
leads to.

Co-Developed-by: Philipp Tomsich 

gcc/ChangeLog:

* doc/cfg.texi: Update the exception handling section for the
REG_EH_REGION notes to make it clear that the note is attached
to the instruction throwing the exception.

Diff:
---
 gcc/doc/cfg.texi | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/cfg.texi b/gcc/doc/cfg.texi
index b8c6427c0765..bbd66946b759 100644
--- a/gcc/doc/cfg.texi
+++ b/gcc/doc/cfg.texi
@@ -297,10 +297,12 @@ edge.  The opposite conversion is difficult, but should 
not happen
 anyway.  The edges can be eliminated via @code{purge_dead_edges} call.
 
 @findex REG_EH_REGION, EDGE_ABNORMAL_CALL
-In the RTL representation, the destination of an exception edge is
-specified by @code{REG_EH_REGION} note attached to the insn.
-In case of a trapping call the @code{EDGE_ABNORMAL_CALL} flag is set
-too.  In the @code{GIMPLE} representation, this extra flag is not set.
+In the RTL representation, a @code{REG_EH_REGION} note is attached to
+an instruction that can throw an exception.  The destination of the
+exception edge originating at such an instruction is specified by the
+value of the @code{REG_EH_REGION} note.  In case of a trapping call
+the @code{EDGE_ABNORMAL_CALL} flag is set too.  In the @code{GIMPLE}
+representation, this extra flag is not set.
 
 @findex may_trap_p, tree_could_trap_p
 In the RTL representation, the predicate @code{may_trap_p} may be used


[gcc r16-15] avoid-store-forwarding: Fix reg init on load-elimination [PR119160]

2025-04-18 Thread Philipp Tomsich via Gcc-cvs
https://gcc.gnu.org/g:7e628ff49f7f890d5337369d7b4f8e21a1f17029

commit r16-15-g7e628ff49f7f890d5337369d7b4f8e21a1f17029
Author: kelefth 
Date:   Thu Mar 13 11:49:39 2025 +0100

avoid-store-forwarding: Fix reg init on load-elimination [PR119160]

In the case that we are eliminating the load instruction, we use zero_extend
for the initialization of the base register for the zero-offset store.
This causes issues when the store and the load use the same mode,
as we are trying to generate a zero_extend with the same inner and
outer modes.

This patch fixes the issue by zero-extending the value stored in the
base register only when the load's mode is wider than the store's mode.

PR rtl-optimization/119160

gcc/ChangeLog:

* avoid-store-forwarding.cc (process_store_forwarding):
Zero-extend the value stored in the base register, in case
of load-elimination, only when the mode of the destination
is wider.

gcc/testsuite/ChangeLog:

* gcc.dg/pr119160.c: New test.

Diff:
---
 gcc/avoid-store-forwarding.cc   | 11 ---
 gcc/testsuite/gcc.dg/pr119160.c | 26 ++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 34a7bba40439..ded8d7e596e0 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -238,10 +238,15 @@ process_store_forwarding (vec &stores, 
rtx_insn *load_insn,
{
  start_sequence ();
 
- rtx ext0 = gen_rtx_ZERO_EXTEND (GET_MODE (dest), it->mov_reg);
- if (ext0)
+ machine_mode dest_mode = GET_MODE (dest);
+ rtx base_reg = it->mov_reg;
+ if (known_gt (GET_MODE_BITSIZE (dest_mode),
+   GET_MODE_BITSIZE (GET_MODE (it->mov_reg
+   base_reg = gen_rtx_ZERO_EXTEND (dest_mode, it->mov_reg);
+
+ if (base_reg)
{
- rtx_insn *move0 = emit_move_insn (dest, ext0);
+ rtx_insn *move0 = emit_move_insn (dest, base_reg);
  if (recog_memoized (move0) >= 0)
{
  insns = get_insns ();
diff --git a/gcc/testsuite/gcc.dg/pr119160.c b/gcc/testsuite/gcc.dg/pr119160.c
new file mode 100644
index ..b4629a11d9d2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr119160.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -finstrument-functions-once -favoid-store-forwarding 
-fnon-call-exceptions -fschedule-insns -mgeneral-regs-only -Wno-psabi" } */
+
+typedef __attribute__((__vector_size__ (32))) int V;
+
+void
+foo (V v, V, V, V *r)
+{
+  V u = (V){} + v[0];
+  *r = u;
+}
+
+__attribute__((__noipa__)) void
+bar(int x)
+{
+ if (x != 2) __builtin_abort();
+}
+
+int
+main ()
+{
+  V x;
+  foo ((V){ 2, 3 }, (V){ }, (V){ }, &x);
+  for (unsigned i = 0; i < sizeof(x)/sizeof(x[0]); i++)
+bar(x[i]);
+}
\ No newline at end of file