[gcc r15-3697] RISC-V: Fix signed SAT_ADD test case for int64_t

2024-09-18 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1d1687513400c1b805bf5924f732c21dbf166ae8

commit r15-3697-g1d1687513400c1b805bf5924f732c21dbf166ae8
Author: Pan Li 
Date:   Fri Sep 13 09:16:48 2024 +0800

RISC-V: Fix signed SAT_ADD test case for int64_t

The int8_t test for signed SAT_ADD is sat_s_add-1.c, the sat_s_add-4.c
should be for int64_t.  Thus, update sat_s_add-4.c for int64_t type.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_s_add-4.c: Update test for int64_t
instead of int8_t.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
index f85675c1a053..12c9540eaeca 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
@@ -5,26 +5,25 @@
 #include "sat_arith.h"
 
 /*
-** sat_s_add_int8_t_fmt_1:
+** sat_s_add_int64_t_fmt_1:
 ** add\s+[atx][0-9]+,\s*a0,\s*a1
 ** xor\s+[atx][0-9]+,\s*a0,\s*a1
 ** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
-** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
-** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
 ** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
-** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
 ** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
-** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** li\s+[atx][0-9]+,\s*-1
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** neg\s+[atx][0-9]+,\s*[atx][0-9]+
 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
 ** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
-** slliw\s+a0,\s*a0,\s*24
-** sraiw\s+a0,\s*a0,\s*24
 ** ret
 */
-DEF_SAT_S_ADD_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
 
 /* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */


[gcc r15-3694] c++: alias of decltype(lambda) is opaque [PR116714, PR107390]

2024-09-18 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:82c2acd0bc4411524a8248fcdce219927d921a71

commit r15-3694-g82c2acd0bc4411524a8248fcdce219927d921a71
Author: Patrick Palka 
Date:   Wed Sep 18 13:50:43 2024 -0400

c++: alias of decltype(lambda) is opaque [PR116714, PR107390]

Here for

  using type = decltype([]{});
  static_assert(is_same_v);

we strip the alias ahead of time during template argument coercion
which effectively transforms the template-id into

  is_same_v

which is wrong because later substitution into the template-id will
produce two new lambdas with distinct types and cause is_same_v to
return false.

This demonstrates that such aliases should be considered opaque (a
notion that we recently introduced in r15-2331-g523836716137d0).
(An alternative solution might be to consider memoizing lambda-expr
substitution rather than always producing a new lambda, but this is
much simpler.)

PR c++/116714
PR c++/107390

gcc/cp/ChangeLog:

* pt.cc (dependent_opaque_alias_p): Also return true for a
decltype(lambda) alias.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/lambda-uneval18.C: New test.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/pt.cc | 11 ++--
 gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C | 39 
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 769e7999dac1..e826206be164 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -6759,8 +6759,15 @@ dependent_opaque_alias_p (const_tree t)
 {
   return (TYPE_P (t)
  && typedef_variant_p (t)
- && any_dependent_type_attributes_p (DECL_ATTRIBUTES
- (TYPE_NAME (t;
+ && (any_dependent_type_attributes_p (DECL_ATTRIBUTES
+  (TYPE_NAME (t)))
+ /* Treat a dependent decltype(lambda) alias as opaque so that we
+don't prematurely strip it when used as a template argument.
+Otherwise substitution into each occurrence of the (stripped)
+alias would incorrectly yield a distinct lambda type.  */
+ || (TREE_CODE (t) == DECLTYPE_TYPE
+ && TREE_CODE (DECLTYPE_TYPE_EXPR (t)) == LAMBDA_EXPR
+ && !typedef_variant_p (DECL_ORIGINAL_TYPE (TYPE_NAME (t));
 }
 
 /* Return the number of innermost template parameters in TMPL.  */
diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C 
b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C
new file mode 100644
index ..b7d864c62453
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C
@@ -0,0 +1,39 @@
+// PR c++/116714
+// PR c++/107390
+// { dg-do compile { target c++20 } }
+
+template
+inline constexpr bool is_same_v = __is_same(T, U);
+
+template
+struct is_same { static constexpr bool value = false; };
+
+template
+struct is_same { static constexpr bool value = true; };
+
+template
+void f() {
+  using type = decltype([]{});
+  static_assert(is_same_v);
+  static_assert(is_same::value);
+};
+
+template
+void g() {
+  using ty1 = decltype([]{});
+  using ty2 = ty1;
+  static_assert(is_same_v);
+  static_assert(is_same::value);
+};
+
+template
+void h() {
+  using ty1 = decltype([]{});
+  using ty2 = decltype([]{});
+  static_assert(!is_same_v);
+  static_assert(!is_same::value);
+};
+
+template void f();
+template void g();
+template void h();


[gcc r15-3695] [PATCH] configure: fix typos

2024-09-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:cc62b2c3da118f08f71d2ae9c08bafb55b35767a

commit r15-3695-gcc62b2c3da118f08f71d2ae9c08bafb55b35767a
Author: Andrew Kreimer 
Date:   Wed Sep 18 11:50:58 2024 -0600

[PATCH] configure: fix typos

/
* configure.ac: Fix typos.
* configure: Rebuilt.

Diff:
---
 configure| 2 +-
 configure.ac | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index e9583f2ba0c7..6a77d454fd53 100755
--- a/configure
+++ b/configure
@@ -9086,7 +9086,7 @@ if test -d ${srcdir}/gcc; then
 lang_requires_boot_languages=
 # set srcdir during sourcing lang_frag to the gcc dir.
 # Sadly overriding srcdir on the . line doesn't work in plain sh as it
-# polutes this shell
+# pollutes this shell
 saved_srcdir=${srcdir}
 srcdir=${srcdir}/gcc . ${lang_frag}
 srcdir=${saved_srcdir}
diff --git a/configure.ac b/configure.ac
index f61dbe64a942..2567757e74df 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2136,7 +2136,7 @@ if test -d ${srcdir}/gcc; then
 lang_requires_boot_languages=
 # set srcdir during sourcing lang_frag to the gcc dir.
 # Sadly overriding srcdir on the . line doesn't work in plain sh as it
-# polutes this shell
+# pollutes this shell
 saved_srcdir=${srcdir}
 srcdir=${srcdir}/gcc . ${lang_frag}
 srcdir=${saved_srcdir}


[gcc r15-3699] testsuite/gcc.dg/pr84877.c: Add machinery to stabilize stack aligmnent

2024-09-18 Thread Hans-Peter Nilsson via Gcc-cvs
https://gcc.gnu.org/g:b1ea710b1bcdda233f96538c5404228d2b244e01

commit r15-3699-gb1ea710b1bcdda233f96538c5404228d2b244e01
Author: Hans-Peter Nilsson 
Date:   Thu Sep 5 17:02:23 2024 +0200

testsuite/gcc.dg/pr84877.c: Add machinery to stabilize stack aligmnent

This test awkwardly "blinks"; xfails and xpasses apparently
randomly for cris-elf using the "gdb simulator".  On
inspection, I see that the stack address depends on the
number of environment variables, deliberately passed to the
simulator, each adding the size of a pointer.

This test is IMHO important enough not to be just skipped
just because it blinks (fixing the actual problem is a
different task).

I guess a random non-16 stack-alignment could happen for
other targets as well, so let's try and add a generic
machinery to "stabilize" the test as failing, by allocating
a dynamic amount to make sure it's misaligned.  The most
target-dependent item here is an offset between the incoming
stack-pointer value (within main in the added framework) and
outgoing (within "xmain" as called from main when setting up
the p0 parameter).  I know there are other wonderful stack
shapes, but such targets would fall under the "complicated
situations"-label and are no worse off than before.

* gcc.dg/pr84877.c: Try to make the test result consistent by
misaligning the stack.

Diff:
---
 gcc/testsuite/gcc.dg/pr84877.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/pr84877.c b/gcc/testsuite/gcc.dg/pr84877.c
index e82991f42dd4..2f2e29578df9 100644
--- a/gcc/testsuite/gcc.dg/pr84877.c
+++ b/gcc/testsuite/gcc.dg/pr84877.c
@@ -3,6 +3,32 @@
 
 #include 
 
+#ifdef __CRIS__
+#define OUTGOING_SP_OFFSET (-sizeof (void *))
+/* Suggestion: append #elif defined() after this 
comment,
+   either defining OUTGOING_SP_OFFSET to whatever the pertinent amount is at 
-O2,
+   if that makes your target consistently fail this test, or define
+   DO_NOT_TAMPER for more complicated situations.  Either way, compile with
+   -DDO_NO_TAMPER to avoid any meddling.  */
+#endif
+
+#if defined (OUTGOING_SP_OFFSET) && !defined (DO_NOT_TAMPER)
+extern int xmain () __attribute__ ((__noipa__));
+int main ()
+{
+  uintptr_t misalignment
+= (OUTGOING_SP_OFFSET
++ (15 & (uintptr_t) __builtin_stack_address ()));
+  /* Allocate a minimal amount if the stack was accidentally aligned.  */
+  void *q = __builtin_alloca (misalignment == 0);
+  xmain ();
+  /* Fake use to avoid the "allocation" being optimized out.  */
+  asm volatile ("" : : "rm" (q));
+  return 0;
+}
+#define main xmain
+#endif
+
 struct U {
 int M0;
 int M1;


[gcc r15-3690] contrib: Set check-params-in-docs.py to skip tables of values of a param

2024-09-18 Thread Filip Kastl via Gcc-cvs
https://gcc.gnu.org/g:4b7e6d5faa137f18a36d8c6323a8640e61ee48f1

commit r15-3690-g4b7e6d5faa137f18a36d8c6323a8640e61ee48f1
Author: Filip Kastl 
Date:   Wed Sep 18 16:38:30 2024 +0200

contrib: Set check-params-in-docs.py to skip tables of values of a param

Currently check-params-in-docs.py reports extra params being listed in
invoke.texi.  However, those aren't actual params but items in a table of
possible values of the aarch64-autove-preference param.

This patch changes check-params-in-docs.py to ignore similar tables.

contrib/ChangeLog:

* check-params-in-docs.py: Skip tables of values of a param.
Remove code that skips items beginning with a number.

Signed-off-by: Filip Kastl 

Diff:
---
 contrib/check-params-in-docs.py | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/contrib/check-params-in-docs.py b/contrib/check-params-in-docs.py
index ccdb8d721696..102f0e64e989 100755
--- a/contrib/check-params-in-docs.py
+++ b/contrib/check-params-in-docs.py
@@ -66,14 +66,23 @@ texi = takewhile(lambda x: '@node Instrumentation Options' 
not in x, texi)
 texi = list(texi)[1:]
 
 texi_params = []
+skip = False
 for line in texi:
+# Skip @table @samp sections of manual where values of a param are usually
+# listed
+if skip:
+if line.startswith('@end table'):
+skip = False
+continue
+elif line.startswith('@table @samp'):
+skip = True
+continue
+
 for token in ('@item ', '@itemx '):
 if line.startswith(token):
 texi_params.append(line[len(token):])
 break
 
-# Skip digits
-texi_params = [x for x in texi_params if not x[0].isdigit()]
 # Skip target-specific params
 texi_params = [x for x in texi_params if not target_specific(x)]


[gcc r15-3691] [PATCH v3] RISC-V: Fixed incorrect semantic description in DF to DI pattern in the Zfa extension on

2024-09-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:85fcf740342e308da4776a45a4cd726987725a6a

commit r15-3691-g85fcf740342e308da4776a45a4cd726987725a6a
Author: Jin Ma 
Date:   Wed Sep 18 08:56:23 2024 -0600

[PATCH v3] RISC-V: Fixed incorrect semantic description in DF to DI pattern 
in the Zfa extension on rv32.

gcc/ChangeLog:

* config/riscv/riscv.md: Change "truncate" to unspec for the Zfa 
extension on rv32.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zfa-fmovh-fmovp-bug.c: New test.

Diff:
---
 gcc/config/riscv/riscv.md| 16 +---
 gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c |  9 +
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fd1cbebc435b..0410d990ec58 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -56,6 +56,8 @@
   UNSPEC_FLT_QUIET
   UNSPEC_FLE_QUIET
   UNSPEC_COPYSIGN
+  UNSPEC_FMV_X_W
+  UNSPEC_FMVH_X_D
   UNSPEC_RINT
   UNSPEC_ROUND
   UNSPEC_FLOOR
@@ -2626,8 +2628,9 @@
 
 (define_insn "movsidf2_low_rv32"
   [(set (match_operand:SI  0 "register_operand" "=  r")
-   (truncate:SI
-   (match_operand:DF 1 "register_operand"  "zmvf")))]
+   (unspec:SI
+   [(match_operand:DF 1 "register_operand" "zmvf")]
+   UNSPEC_FMV_X_W))]
   "TARGET_HARD_FLOAT && !TARGET_64BIT && TARGET_ZFA"
   "fmv.x.w\t%0,%1"
   [(set_attr "move_type" "fmove")
@@ -2636,11 +2639,10 @@
 
 
 (define_insn "movsidf2_high_rv32"
-  [(set (match_operand:SI  0 "register_operand""=  r")
-   (truncate:SI
-(lshiftrt:DF
-(match_operand:DF 1 "register_operand" "zmvf")
-(const_int 32]
+  [(set (match_operand:SI  0 "register_operand" "=  r")
+   (unspec:SI
+   [(match_operand:DF 1 "register_operand" "zmvf")]
+   UNSPEC_FMVH_X_D))]
   "TARGET_HARD_FLOAT && !TARGET_64BIT && TARGET_ZFA"
   "fmvh.x.d\t%0,%1"
   [(set_attr "move_type" "fmove")
diff --git a/gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c 
b/gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c
new file mode 100644
index ..e00047b09e3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c
@@ -0,0 +1,9 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zfa -mabi=ilp32d -O2 -g" } */
+
+unsigned int
+foo (double a) {
+  unsigned int tt = *(unsigned long long *)&a & 0x;
+  return tt;
+}


[gcc r15-3680] tree-optimization/116585 - SSA corruption with split_constant_offset

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750

commit r15-3680-g1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750
Author: Richard Biener 
Date:   Wed Sep 18 09:52:55 2024 +0200

tree-optimization/116585 - SSA corruption with split_constant_offset

split_constant_offset when looking through SSA defs can end up
picking SSA leafs that are subject to abnormal coalescing.  This
can lead to downstream consumers to insert code based on the
result (like from dataref analysis) in places that violate constraints
for abnormal coalescing.  It's best to not expand defs whose operands
are subject to abnormal coalescing - and not either do something when
a subexpression has operands like that already.

PR tree-optimization/116585
* tree-data-ref.cc (split_constant_offset_1): When either
operand is subject to abnormal coalescing do no further
processing.

* gcc.dg/torture/pr116585.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116585.c | 32 
 gcc/tree-data-ref.cc| 11 ---
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116585.c 
b/gcc/testsuite/gcc.dg/torture/pr116585.c
new file mode 100644
index ..108c481e1043
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116585.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+
+char *s1, *s2;
+extern int* my_alloc (int);
+extern int _setjmp ();
+extern void bar();
+void foo(int s1len, int s2len)
+{
+  int e;
+  e = _setjmp ();
+{
+  int l, i;
+  int *md = my_alloc(((sizeof(int)) * (s1len + 1) * (s2len)));
+  s1len++;
+  for (; s1len; l)
+   for (; s2len; l)
+ for (; s1len; i)
+   {
+ int j = 1;
+ for (; j < s2len; j++)
+   {
+ int cost;
+ if (s1[1] == s2[1])
+   cost = 0;
+ else
+   cost = 1;
+ md[j * s1len ] = ((cost));
+   }
+   }
+  bar();
+}
+}
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index 48798f458b80..26e6d9a56572 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -766,6 +766,14 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 return false;
 
+  if (TREE_CODE (op0) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
+return false;
+  if (op1
+  && TREE_CODE (op1) == SSA_NAME
+  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
+return false;
+
   switch (code)
 {
 case INTEGER_CST:
@@ -861,9 +869,6 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
 
 case SSA_NAME:
   {
-   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
- return false;
-
gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
enum tree_code subcode;


[gcc r15-3687] [PATCH] RISC-V: Fix th.extu operands exceeding range on rv32.

2024-09-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ec34a4481b63bb5028b2a8c61322a7a3d362b27c

commit r15-3687-gec34a4481b63bb5028b2a8c61322a7a3d362b27c
Author: Xianmiao Qu 
Date:   Wed Sep 18 07:28:44 2024 -0600

[PATCH] RISC-V: Fix th.extu operands exceeding range on rv32.

The Combine Pass may generate zero_extract instructions that are out of 
range.
Drawing from other architectures like AArch64, we should impose restrictions
on the "*th_extu4" pattern.

gcc/
* config/riscv/thead.md (*th_extu4): Fix th.extu
operands exceeding range on rv32.

gcc/testsuite/
* gcc.target/riscv/xtheadbb-extu-4.c: New.

Diff:
---
 gcc/config/riscv/thead.md|  4 +++-
 gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c | 17 +
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
index 2a3af76b55c2..7a76cc8cf4a9 100644
--- a/gcc/config/riscv/thead.md
+++ b/gcc/config/riscv/thead.md
@@ -85,7 +85,9 @@
(zero_extract:GPR (match_operand:GPR 1 "register_operand" "r")
(match_operand 2 "const_int_operand")
(match_operand 3 "const_int_operand")))]
-  "TARGET_XTHEADBB"
+  "TARGET_XTHEADBB
+   && (UINTVAL (operands[2]) + UINTVAL (operands[3])
+   <= GET_MODE_BITSIZE (mode))"
 {
   operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) - 1);
   return "th.extu\t%0,%1,%2,%3";
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c 
b/gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c
new file mode 100644
index ..41d3fc1f5b40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { rv32 } } } */
+/* { dg-options "-march=rv32gc_xtheadbb" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Og" "-Oz" } } */
+
+struct c {
+  int f : 25;
+} d;
+
+int b;
+extern unsigned int e[];
+
+void g()
+{
+  d.f = e[2] >> (b << ~4194303 + 4194332) - 58096371;
+}
+
+/* { dg-final { scan-assembler-not {th.extu\t[ax][0-9]+,[ax][0-9]+,37,13} } } 
*/
\ No newline at end of file


[gcc r15-3688] [PATCH 1/2] RISC-V: Fix the outer_code when calculating the cost of SET expression.

2024-09-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ad5bfc2b7044ba962396de0dabcad1cd54234689

commit r15-3688-gad5bfc2b7044ba962396de0dabcad1cd54234689
Author: Xianmiao Qu 
Date:   Wed Sep 18 07:35:12 2024 -0600

[PATCH 1/2] RISC-V: Fix the outer_code when calculating the cost of SET 
expression.

I think it is a typo. When calculating the 'SET_SRC (x)' cost,
outer_code should be set to SET.

gcc/
* config/riscv/riscv.cc (riscv_rtx_costs): Fix the outer_code
when calculating the cost of SET expression.

Diff:
---
 gcc/config/riscv/riscv.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index fbf2da71e10d..7be3939a7f93 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3773,7 +3773,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
  *total = COSTS_N_INSNS (1);
  return true;
}
- riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
+ riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed);
  return true;
}


[gcc r15-3689] Fail vectorization when not using SLP and --param vect-force-slp == 1

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:de1389e24e8dc98b65bc8d40976172214ac4ecc0

commit r15-3689-gde1389e24e8dc98b65bc8d40976172214ac4ecc0
Author: Richard Biener 
Date:   Sun Sep 8 11:21:19 2024 +0200

Fail vectorization when not using SLP and --param vect-force-slp == 1

The following adds --param vect-force-slp to enable the transition
to full SLP.  Full SLP is enforced during stmt analysis where it
detects failed SLP discovery and at loop analysis time where it
avoids analyzing a loop with SLP disabled.  Failure to SLP results
in vectorization to fail.

* params.opt (vect-force-slp): New param, default 0.
* doc/invoke.texi (--param vect-force-slp): Document.
* tree-vect-loop.cc (vect_analyze_loop_2): When analyzing
without SLP but --param vect-force-slp is 1 fail.
* tree-vect-stmts.cc (vect_analyze_stmt): Fail vectorization
for non-SLP stmts when --param vect-force-slp is 1.

Diff:
---
 gcc/doc/invoke.texi| 3 +++
 gcc/params.opt | 4 
 gcc/tree-vect-loop.cc  | 6 ++
 gcc/tree-vect-stmts.cc | 6 ++
 4 files changed, 19 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 511b8c8d3111..b91fb9c9cca6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -16955,6 +16955,9 @@ this parameter.  The default value of this parameter is 
50.
 @item vect-induction-float
 Enable loop vectorization of floating point inductions.
 
+@item vect-force-slp
+Force the use of SLP when vectorizing, fail if not possible.
+
 @item vrp-block-limit
 Maximum number of basic blocks before VRP switches to a lower memory algorithm.
 
diff --git a/gcc/params.opt b/gcc/params.opt
index c17ba17b91b0..949b47544980 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1198,6 +1198,10 @@ The maximum factor which the loop vectorizer applies to 
the cost of statements i
 Common Joined UInteger Var(param_vect_induction_float) Init(1) IntegerRange(0, 
1) Param Optimization
 Enable loop vectorization of floating point inductions.
 
+-param=vect-force-slp=
+Common Joined UInteger Var(param_vect_force_slp) Init(0) IntegerRange(0, 1) 
Param Optimization
+Force the use of SLP when vectorizing, fail if not possible.
+
 -param=vrp-block-limit=
 Common Joined UInteger Var(param_vrp_block_limit) Init(15) Optimization 
Param
 Maximum number of basic blocks before VRP switches to a fast model with less 
memory requirements.
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 62c7f90779fa..d42694d19747 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2891,6 +2891,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal,
   /* This is the point where we can re-start analysis with SLP forced off.  */
 start_over:
 
+  /* When we arrive here with SLP disabled and we are supposed
+ to use SLP for everything fail vectorization.  */
+  if (!slp && param_vect_force_slp)
+return opt_result::failure_at (vect_location,
+  "may need non-SLP handling\n");
+
   /* Apply the suggested unrolling factor, this was determined by the backend
  during finish_cost the first time we ran the analyzis for this
  vector mode.  */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b1353c91fce1..495f45e40e63 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -13353,6 +13353,12 @@ vect_analyze_stmt (vec_info *vinfo,
   return opt_result::success ();
 }
 
+  /* When we arrive here with a non-SLP statement and we are supposed
+ to use SLP for everything fail vectorization.  */
+  if (!node && param_vect_force_slp)
+return opt_result::failure_at (stmt_info->stmt,
+  "needs non-SLP handling\n");
+
   ok = true;
   if (!bb_vinfo
   && (STMT_VINFO_RELEVANT_P (stmt_info)


[gcc r15-3692] hppa: Add peephole2 optimizations for REG+D loads and stores

2024-09-18 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:4b03750f8cda0a8745b10639a8ac7df71aced0cc

commit r15-3692-g4b03750f8cda0a8745b10639a8ac7df71aced0cc
Author: John David Anglin 
Date:   Wed Sep 18 11:02:32 2024 -0400

hppa: Add peephole2 optimizations for REG+D loads and stores

The PA 1.x architecture only supports long displacements in
integer loads and stores.  Floating-point loads and stores
only support short displacements.  As a result, we have to
wait until reload is complete before generating insns with
long displacements.

The PA 2.0 architecture supports long displacements in both
integer and floating-point loads and stores.

The peephole2 optimizations added in this change are only
enabled when 14-bit long displacements aren't supported for
floating-point loads and stores.

2024-09-18  John David Anglin  

gcc/ChangeLog:

* config/pa/pa.h (GENERAL_REGNO_P): Define.
* config/pa/pa.md: Add SImode and SFmode peephole2
patterns to generate loads and stores with long
displacements.

Diff:
---
 gcc/config/pa/pa.h  |   3 ++
 gcc/config/pa/pa.md | 100 
 2 files changed, 103 insertions(+)

diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 7e45c358895b..6fcc2fa2ac76 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -480,6 +480,9 @@ extern rtx hppa_pic_save_rtx (void);
 #define INDEX_REG_CLASS GENERAL_REGS
 #define BASE_REG_CLASS GENERAL_REGS
 
+/* True if register is a general register.  */
+#define GENERAL_REGNO_P(N) ((N) >= 1 && (N) <= 31)
+
 #define FP_REG_CLASS_P(CLASS) \
   ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS)
 
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 1e781efb66b0..f0520bb2c353 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -2280,6 +2280,58 @@
(set_attr "pa_combine_type" "addmove")
(set_attr "length" "4")])
 
+; Rewrite RTL using a REG+D store.  This will allow the insn that
+; computes the address to be deleted if the register it sets is dead.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+   (plus:SI (match_operand:SI 1 "register_operand" "")
+(match_operand:SI 2 "const_int_operand" "")))
+   (set (mem:SI (match_dup 0))
+   (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+; Rewrite RTL using a REG+D load.  This will allow the insn that
+; computes the address to be deleted if the register it sets is dead.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+   (plus:SI (match_operand:SI 1 "register_operand" "")
+(match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+   (mem:SI (match_dup 0)))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && REGNO (operands[1]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (match_dup 3) (mem:SI (plus:SI (match_dup 1) (match_dup 2
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+   (plus:SI (match_operand:SI 1 "register_operand" "")
+(match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+   (mem:SI (match_dup 0)))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (match_dup 3) (mem:SI (plus:SI (match_dup 1) (match_dup 2]
+  "")
+
 ; Rewrite RTL using an indexed store.  This will allow the insn that
 ; computes the address to be deleted if the register it sets is dead.
 (define_peephole2
@@ -4507,6 +4559,54 @@
(set_attr "pa_combine_type" "addmove")
(set_attr "length" "4")])
 
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+   (plus:SI (match_operand:SI 1 "register_operand" "")
+(match_operand:SI 2 "const_int_operand" "")))
+   (set (mem:SF (match_dup 0))
+   (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (mem:SF (plus:SI (match_dup 1) (match_dup 

[gcc r15-3693] jit: Ensure ssize_t is defined

2024-09-18 Thread François-Xavier Coudert via Gcc-cvs
https://gcc.gnu.org/g:fe1ed68000d5e9d41ed48ef1202fd21c8b8c9ff8

commit r15-3693-gfe1ed68000d5e9d41ed48ef1202fd21c8b8c9ff8
Author: Francois-Xavier Coudert 
Date:   Sat May 11 17:08:05 2024 +0200

jit: Ensure ssize_t is defined

On some targets it seems that ssize_t is not defined by any of the
headers transitively included by .  This leads to a bootstrap
fail when jit is enabled.

gcc/jit/ChangeLog:

* libgccjit.h: Include 

Diff:
---
 gcc/jit/libgccjit.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h
index 1d5be27374ec..03bfc0f58a53 100644
--- a/gcc/jit/libgccjit.h
+++ b/gcc/jit/libgccjit.h
@@ -21,6 +21,11 @@ along with GCC; see the file COPYING3.  If not see
 #define LIBGCCJIT_H
 
 #include 
+#ifdef __has_include
+#if __has_include ()
+#include 
+#endif
+#endif
 
 #ifdef __cplusplus
 extern "C" {


[gcc r14-10679] tree-optimization/116258 - fix i386 testcase

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:198b13e248e7adfea2d28c4e66ac9f5006b8f825

commit r14-10679-g198b13e248e7adfea2d28c4e66ac9f5006b8f825
Author: Richard Biener 
Date:   Thu Aug 8 09:35:42 2024 +0200

tree-optimization/116258 - fix i386 testcase

With -march=cascadelake we use vpermilps instead of shufps.

PR tree-optimization/116258
* gcc.target/i386/pr116258.c: Also allow vpermilps.

(cherry picked from commit 5aa4cd913e48cfce3ca0ab58cf6f80f55dbb0f58)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr116258.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr116258.c 
b/gcc/testsuite/gcc.target/i386/pr116258.c
index bd7d3a97b2c8..cb67e4085c5d 100644
--- a/gcc/testsuite/gcc.target/i386/pr116258.c
+++ b/gcc/testsuite/gcc.target/i386/pr116258.c
@@ -10,5 +10,5 @@
   return (x + h(t));
 }
 
-/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-times "shufps|permilps" 1 } } */
 /* { dg-final { scan-assembler-not "unpck" } } */


[gcc r14-10680] aarch64/testsuite: Add testcases for recently fixed PRs

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:090926ba817bee6de7ee210efeea5d43d5335868

commit r14-10680-g090926ba817bee6de7ee210efeea5d43d5335868
Author: Andrew Pinski 
Date:   Wed Aug 7 09:36:38 2024 -0700

aarch64/testsuite: Add testcases for recently fixed PRs

The commit for PR 116258, added a x86_64 specific testcase,
I thought it would be a good idea to add an aarch64 testcase too.
And since it also fixed VLA vectors too so add a SVE testcase.

Pushed as obvious after a test for aarch64-linux-gnu.

PR middle-end/116258
PR middle-end/116259

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr116258.c: New test.
* gcc.target/aarch64/sve/pr116259-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 2c6174402ea315ecf618cfcba741e8cb18bc5282)

Diff:
---
 gcc/testsuite/gcc.target/aarch64/pr116258.c   | 17 +
 gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c | 12 
 2 files changed, 29 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c 
b/gcc/testsuite/gcc.target/aarch64/pr116258.c
new file mode 100644
index ..e727ad4b72a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosve"
+
+#define vect16 __attribute__((vector_size(16)))
+#define h(a) __builtin_assoc_barrier((a))
+
+ vect16 float  f( vect16 float  x, vect16 float vconstants0)
+{
+  vect16 float  t = (x * (vconstants0[0]));
+  return (x + h(t));
+}
+
+/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-not "dup\t" } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c
new file mode 100644
index ..bb2eed4728c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* PR middle-end/116259 */
+
+#include 
+
+/* PAREN_EXPR lowering for VLA vectors was ICEing.
+   It should not be lowered in a similar way as moves
+   are not lowered.  */
+svfloat64_t f(svfloat64_t x)
+{
+  return __builtin_assoc_barrier(x);
+}


[gcc r14-10677] middle-end/115641 - invalid address construction

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:98dc0471d5409701ae700cd7aba8716fdc500401

commit r14-10677-g98dc0471d5409701ae700cd7aba8716fdc500401
Author: Richard Biener 
Date:   Thu Jul 18 13:35:33 2024 +0200

middle-end/115641 - invalid address construction

fold_truth_andor_1 via make_bit_field_ref builds an address of
a CALL_EXPR which isn't valid GENERIC and later causes an ICE.
The following simply avoids the folding for f ().a != 1 || f ().b != 2
as it is a premature optimization anyway.  The alternative would
have been to build a TARGET_EXPR around the call.  To get this far
f () has to be const as otherwise the two calls are not semantically
equivalent for the optimization.

PR middle-end/115641
* fold-const.cc (decode_field_reference): If the inner
reference isn't something we can take the address of, fail.

* gcc.dg/torture/pr115641.c: New testcase.

(cherry picked from commit 3670c70c561656a19f6bff36dd229f18120af127)

Diff:
---
 gcc/fold-const.cc   |  3 +++
 gcc/testsuite/gcc.dg/torture/pr115641.c | 29 +
 2 files changed, 32 insertions(+)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index f496b3436df4..644a39456890 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -4992,6 +4992,9 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   || *pbitsize < 0
   || offset != 0
   || TREE_CODE (inner) == PLACEHOLDER_EXPR
+  /* We eventually want to build a larger reference and need to take
+the address of this.  */
+  || (!REFERENCE_CLASS_P (inner) && !DECL_P (inner))
   /* Reject out-of-bound accesses (PR79731).  */
   || (! AGGREGATE_TYPE_P (TREE_TYPE (inner))
  && compare_tree_int (TYPE_SIZE (TREE_TYPE (inner)),
diff --git a/gcc/testsuite/gcc.dg/torture/pr115641.c 
b/gcc/testsuite/gcc.dg/torture/pr115641.c
new file mode 100644
index ..65fb09ca64fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115641.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+typedef struct {
+  char hours, day, month;
+  short year;
+} T;
+
+T g (void)
+{
+  T now;
+  now.hours = 1;
+  now.day = 2;
+  now.month = 3;
+  now.year = 4;
+  return now;
+}
+
+__attribute__((const)) T f (void)
+{
+  T virk = g ();
+  return virk;
+}
+
+int main ()
+{
+  if (f ().hours != 1 || f ().day != 2 || f ().month != 3 || f ().year != 4)
+__builtin_abort ();
+  return 0;
+}


[gcc r14-10678] tree-optimization/116258 - do not lower PAREN_EXPR of vectors

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:05db1bea8c1d61d8d9cdb8ede5e305766869d136

commit r14-10678-g05db1bea8c1d61d8d9cdb8ede5e305766869d136
Author: Richard Biener 
Date:   Wed Aug 7 13:54:53 2024 +0200

tree-optimization/116258 - do not lower PAREN_EXPR of vectors

The following avoids lowering of PAREN_EXPR of vectors as unsupported
to scalars.  Instead PAREN_EXPR is like a plain move or a VIEW_CONVERT.

PR tree-optimization/116258
* tree-vect-generic.cc (expand_vector_operations_1): Do not
lower PAREN_EXPR.

* gcc.target/i386/pr116258.c: New testcase.

(cherry picked from commit 5b97d1a2102dca57918947d7e40a6ca68871)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr116258.c | 14 ++
 gcc/tree-vect-generic.cc |  9 +++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr116258.c 
b/gcc/testsuite/gcc.target/i386/pr116258.c
new file mode 100644
index ..bd7d3a97b2c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116258.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+#define vect16 __attribute__((vector_size(16)))
+#define h(a) __builtin_assoc_barrier((a))
+
+ vect16 float  f( vect16 float  x, vect16 float vconstants0)
+{
+  vect16 float  t = (x * (vconstants0[0]));
+  return (x + h(t));
+}
+
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "unpck" } } */
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index ab640096ca26..86d273923bb5 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -2190,10 +2190,15 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi,
}
 }
 
+  /* Plain moves do not need lowering.  */
+  if (code == SSA_NAME
+  || code == VIEW_CONVERT_EXPR
+  || code == PAREN_EXPR)
+return;
+
   if (CONVERT_EXPR_CODE_P (code)
   || code == FLOAT_EXPR
-  || code == FIX_TRUNC_EXPR
-  || code == VIEW_CONVERT_EXPR)
+  || code == FIX_TRUNC_EXPR)
 return;
 
   /* The signedness is determined from input argument.  */


[gcc r14-10683] fold: Fix `a * 1j` if a has side effects [PR116454]

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:12c00048d9f3598e57b98ec7723f7356bd255d04

commit r14-10683-g12c00048d9f3598e57b98ec7723f7356bd255d04
Author: Andrew Pinski 
Date:   Wed Aug 21 17:41:38 2024 -0700

fold: Fix `a * 1j` if a has side effects [PR116454]

The problem here was a missing save_expr around arg0 since
it is used twice, once in REALPART_EXPR and once in IMAGPART_EXPR.
Thia adds the save_expr and reformats the code slightly so it is a
little easier to understand.  It excludes the case when arg0 is
a COMPLEX_EXPR since in that case we'll end up with the distinct
real and imaginary parts.  This is important to retain early
optimization in some testcases.

Bootstapped and tested on x86_64-linux-gnu with no regressions.

PR middle-end/116454

gcc/ChangeLog:

* fold-const.cc (fold_binary_loc): Fix `a * +-1i`
by wrapping arg0 with save_expr when it is not COMPLEX_EXPR.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116454-1.c: New test.
* gcc.dg/torture/pr116454-2.c: New test.

Signed-off-by: Andrew Pinski 
Co-Authored-By: Richard Biener  
(cherry picked from commit b07f8a301158e53717b8688cc8ea430b6f02574c)

Diff:
---
 gcc/fold-const.cc | 32 +--
 gcc/testsuite/gcc.dg/torture/pr116454-1.c | 16 
 gcc/testsuite/gcc.dg/torture/pr116454-2.c | 12 
 3 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 644a39456890..869f6363560d 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -12081,17 +12081,29 @@ fold_binary_loc (location_t loc, enum tree_code code, 
tree type,
{
  tree rtype = TREE_TYPE (TREE_TYPE (arg0));
  if (real_onep (TREE_IMAGPART (arg1)))
-   return
- fold_build2_loc (loc, COMPLEX_EXPR, type,
-  negate_expr (fold_build1_loc (loc, IMAGPART_EXPR,
-rtype, arg0)),
-  fold_build1_loc (loc, REALPART_EXPR, rtype, 
arg0));
+   {
+ if (TREE_CODE (arg0) != COMPLEX_EXPR)
+   arg0 = save_expr (arg0);
+ tree iarg0 = fold_build1_loc (loc, IMAGPART_EXPR,
+   rtype, arg0);
+ tree rarg0 = fold_build1_loc (loc, REALPART_EXPR,
+   rtype, arg0);
+ return fold_build2_loc (loc, COMPLEX_EXPR, type,
+ negate_expr (iarg0),
+ rarg0);
+   }
  else if (real_minus_onep (TREE_IMAGPART (arg1)))
-   return
- fold_build2_loc (loc, COMPLEX_EXPR, type,
-  fold_build1_loc (loc, IMAGPART_EXPR, rtype, 
arg0),
-  negate_expr (fold_build1_loc (loc, REALPART_EXPR,
-rtype, arg0)));
+   {
+ if (TREE_CODE (arg0) != COMPLEX_EXPR)
+   arg0 = save_expr (arg0);
+ tree iarg0 = fold_build1_loc (loc, IMAGPART_EXPR,
+   rtype, arg0);
+ tree rarg0 = fold_build1_loc (loc, REALPART_EXPR,
+   rtype, arg0);
+ return fold_build2_loc (loc, COMPLEX_EXPR, type,
+ iarg0,
+ negate_expr (rarg0));
+   }
}
 
  /* Optimize z * conj(z) for floating point complex numbers.
diff --git a/gcc/testsuite/gcc.dg/torture/pr116454-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116454-1.c
new file mode 100644
index ..6210dcce4a42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116454-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math" } */
+
+static int t = 0;
+_Complex float f()
+{
+t++;
+return 0;
+}
+int main() {
+   t = 0;
+   /* Would cause f() to be incorrectly invoked twice. */
+   f() * 1j;
+   if (t != 1)
+  __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr116454-2.c 
b/gcc/testsuite/gcc.dg/torture/pr116454-2.c
new file mode 100644
index ..a1e1604e6169
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116454-2.c
@@ -0,0 +1,12 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ffast-math" } */
+_Complex float arr[2];
+
+int main() {
+  _Complex float *ptr;
+  ptr = arr;
+  *++ptr * 1j; 
+  /* ptr should only increment once, not twice. */
+  if (ptr != arr + 1)
+__builtin_abort ();
+}


[gcc r14-10681] tree-optimization/116274 - overzealous SLP vectorization

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:d5d4f3bae5a9478dc2189e53da933175a6d7b197

commit r14-10681-gd5d4f3bae5a9478dc2189e53da933175a6d7b197
Author: Richard Biener 
Date:   Thu Aug 8 11:36:43 2024 +0200

tree-optimization/116274 - overzealous SLP vectorization

The following tries to address that the vectorizer fails to have
precise knowledge of argument and return calling conventions and
views some accesses as loads and stores that are not.
This is mainly important when doing basic-block vectorization as
otherwise loop indexing would force such arguments to memory.

On x86 the reduction in the number of apparent loads and stores
often dominates cost analysis so the following tries to mitigate
this aggressively by adjusting only the scalar load and store
cost, reducing them to the cost of a simple scalar statement,
but not touching the vector access cost which would be much
harder to estimate.  Thereby we error on the side of not performing
basic-block vectorization.

PR tree-optimization/116274
* tree-vect-slp.cc (vect_bb_slp_scalar_cost): Cost scalar loads
and stores as simple scalar stmts when they access a non-global,
not address-taken variable that doesn't have BLKmode assigned.

* gcc.target/i386/pr116274-2.c: New testcase.

(cherry picked from commit b8ea13ebf1211714503fd72f25c04376483bfa53)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr116274-2.c |  9 +
 gcc/tree-vect-slp.cc   | 12 +++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr116274-2.c 
b/gcc/testsuite/gcc.target/i386/pr116274-2.c
new file mode 100644
index ..d5811344b935
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116274-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-slp2-optimized" } */
+
+struct a { long x,y; };
+long test(struct a a) { return a.x+a.y; }
+
+/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } */
+/* { dg-final { scan-assembler-times "addl|leaq" 1 } } */
+/* { dg-final { scan-assembler-not "padd" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 0795605ec527..d0635b7a146c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7102,7 +7102,17 @@ next_lane:
   vect_cost_for_stmt kind;
   if (STMT_VINFO_DATA_REF (orig_stmt_info))
{
- if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
+ data_reference_p dr = STMT_VINFO_DATA_REF (orig_stmt_info);
+ tree base = get_base_address (DR_REF (dr));
+ /* When the scalar access is to a non-global not address-taken
+decl that is not BLKmode assume we can access it with a single
+non-load/store instruction.  */
+ if (DECL_P (base)
+ && !is_global_var (base)
+ && !TREE_ADDRESSABLE (base)
+ && DECL_MODE (base) != BLKmode)
+   kind = scalar_stmt;
+ else if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
kind = scalar_load;
  else
kind = scalar_store;


[gcc r14-10682] tree-optimization/116380 - bogus SSA update with loop distribution

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:8f8a7e1f1904e389e736c0265b4f515f3ce13659

commit r14-10682-g8f8a7e1f1904e389e736c0265b4f515f3ce13659
Author: Richard Biener 
Date:   Wed Aug 21 13:56:40 2024 +0200

tree-optimization/116380 - bogus SSA update with loop distribution

When updating LC PHIs after copying loops we have to handle defs
defined outside of the loop appropriately (by not setting them to
NULL ...).  This mimics how we handle this in the SSA updating
code of the vectorizer.

PR tree-optimization/116380
* tree-loop-distribution.cc (copy_loop_before): Handle
out-of-loop defs appropriately.

* gcc.dg/torture/pr116380.c: New testcase.

(cherry picked from commit af0d2d95a5f767d92bd64f959679fb4612247b0b)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116380.c | 16 
 gcc/tree-loop-distribution.cc   |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116380.c 
b/gcc/testsuite/gcc.dg/torture/pr116380.c
new file mode 100644
index ..5ffd99459d26
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116380.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-scev-cprop" } */
+
+int a[3], d[3], c;
+int f(int e, int b)
+{
+  for (; e < 3; e++)
+{
+  a[0] = 0;
+  if (b)
+   c = b;
+  d[e] = 0;
+  a[e] = 0;
+}
+  return e;
+}
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index c5a05ee151df..cb804ba48ffe 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -980,6 +980,9 @@ copy_loop_before (class loop *loop, bool 
redirect_lc_phi_defs)
  if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME)
{
  tree new_def = get_current_def (USE_FROM_PTR (use_p));
+ if (!new_def)
+   /* Something defined outside of the loop.  */
+   continue;
  SET_USE (use_p, new_def);
}
}


[gcc r14-10684] tree-optimization/116610 - wrong SLP induction bias for mask peeling

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:cacc976a71027e7da8e3438b60da76ecdf990d38

commit r14-10684-gcacc976a71027e7da8e3438b60da76ecdf990d38
Author: Richard Biener 
Date:   Thu Sep 5 11:18:57 2024 +0200

tree-optimization/116610 - wrong SLP induction bias for mask peeling

The following fixes a mistake when applying the bias for peeling via
masking to the inital value of SLP inductions.

This resolves gcc.target/aarch64/sve/peel_ind_1.c (a scan-assembler
only unfortunately) when forcing single-lane SLP for it.

PR tree-optimization/116610
* tree-vect-loop.cc (vectorizable_induction): Use MINUS_EXPR
to apply a mask peeling adjustment.

(cherry picked from commit 6a1a856ba78589f7f5285b00ecd40ba2bbeef8b0)

Diff:
---
 gcc/tree-vect-loop.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index acc6b75fb170..dcd61292caf1 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10228,7 +10228,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
  vec_steps.safe_push (vec_step);
  tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
  if (peel_mul)
-   step_mul = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+   step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype,
 step_mul, peel_mul);
  if (!init_node)
vec_init = gimple_build_vector (&init_stmts, &init_elts);


[gcc r14-10685] middle-end/115426 - wrong gimplification of "rm" asm output operand

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:5b264a4b95b8f27c3c73892892d5c2030d3c8ea7

commit r14-10685-g5b264a4b95b8f27c3c73892892d5c2030d3c8ea7
Author: Richard Biener 
Date:   Tue Jun 11 13:11:08 2024 +0200

middle-end/115426 - wrong gimplification of "rm" asm output operand

When the operand is gimplified to an extract of a register or a
register we have to disallow memory as we otherwise fail to
gimplify it properly.  Instead of

  __asm__("" : "=rm" __imag );

we want

  __asm__("" : "=rm" D.2772);
  _1 = REALPART_EXPR ;
  r = COMPLEX_EXPR <_1, D.2772>;

otherwise SSA rewrite will fail and generate wrong code with 'r'
left bare in the asm output.

PR middle-end/115426
* gimplify.cc (gimplify_asm_expr): Handle "rm" output
constraint gimplified to a register (operation).

* gcc.dg/pr115426.c: New testcase.

(cherry picked from commit a4bbdec2be1c9f8fb49276b8a54ee86024ceac17)

Diff:
---
 gcc/gimplify.cc |  8 
 gcc/testsuite/gcc.dg/pr115426.c | 14 ++
 2 files changed, 22 insertions(+)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 5753eb90ff5d..401b663591f6 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -7035,6 +7035,14 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p)
  ret = tret;
}
 
+  /* If the gimplified operand is a register we do not allow memory.  */
+  if (allows_reg
+ && allows_mem
+ && (is_gimple_reg (TREE_VALUE (link))
+ || (handled_component_p (TREE_VALUE (link))
+ && is_gimple_reg (TREE_OPERAND (TREE_VALUE (link), 0)
+   allows_mem = 0;
+
   /* If the constraint does not allow memory make sure we gimplify
  it to a register if it is not already but its base is.  This
 happens for complex and vector components.  */
diff --git a/gcc/testsuite/gcc.dg/pr115426.c b/gcc/testsuite/gcc.dg/pr115426.c
new file mode 100644
index ..02bfc3f21fa7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115426.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu11" } */
+
+_Complex int fcs (_Complex int r)
+{
+  __asm__("" : "=rm" (__imag__ r));
+  return r;
+}
+
+_Complex int fcs2 (_Complex int r)
+{
+  __asm__("" : "=m" (__imag__ r));
+  return r;
+}


[gcc r14-10686] Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned.

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a308afb455d648d54430355cb07345a338501348

commit r14-10686-ga308afb455d648d54430355cb07345a338501348
Author: Joern Rennecke 
Date:   Wed Aug 7 02:48:45 2024 +0100

Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned.

gcc/testsuite/
* g++.dg/vect/pr115278.cc: Make cast's type agree with
assignment destination WRITE.

(cherry picked from commit b844775283a620b8826adf734ecfc97d820c3611)

Diff:
---
 gcc/testsuite/g++.dg/vect/pr115278.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr115278.cc 
b/gcc/testsuite/g++.dg/vect/pr115278.cc
index 331075fb2781..df521e43a97c 100644
--- a/gcc/testsuite/g++.dg/vect/pr115278.cc
+++ b/gcc/testsuite/g++.dg/vect/pr115278.cc
@@ -21,7 +21,7 @@ union BitfieldStructUnion {
 BitfieldStructUnion(uint32_t value_low, uint32_t value_high) : 
value_low(value_low), value_high(value_high) {}
 };
 
-volatile uint32_t *WRITE = (volatile unsigned*)0x42;
+volatile uint32_t *WRITE = (volatile uint32_t *)0x42;
 
 void buggy() {
 for (int i = 0; i < runs; i++) {


[gcc r13-9044] doc: Add more alias option and reorder Intel CPU -march documentation

2024-09-18 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:de1d625849a7760da5d5a3a08601d8ac890c6100

commit r13-9044-gde1d625849a7760da5d5a3a08601d8ac890c6100
Author: Haochen Jiang 
Date:   Wed Sep 18 11:20:15 2024 +0800

doc: Add more alias option and reorder Intel CPU -march documentation

This patch is backported from GCC15 with some tweaks.

Since r15-3539, there are requests coming in to add other alias option
documentation. This patch will add all of them, including corei7, 
corei7-avx,
core-avx-i, core-avx2, atom, slm and emerarldrapids.

Also in the patch, I reordered that part of documentation, currently all
the CPUs/products are just all over the place. I regrouped them by
date-to-now products (since the very first CPU to latest Panther Lake), 
P-core
(since the clients become hybrid cores, starting from Sapphire Rapids) and
E-core (since Bonnell to latest Clearwater Forest). In GCC14 and
eariler GCC, Xeon Phi CPUs are still there, I put them after E-core
CPUs.

And in the patch, I refined the product names in documentation.

gcc/ChangeLog:

* doc/invoke.texi: Add corei7, corei7-avx, core-avx-i,
core-avx2, atom, slm and emerarldrapids. Reorder the -march
documentation by splitting them into date-to-now products, P-core,
E-core and Xeon Phi. Refine the product names in documentation.

Diff:
---
 gcc/doc/invoke.texi | 203 +++-
 1 file changed, 105 insertions(+), 98 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0f665ed6779a..28a3d0ae291b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -32372,6 +32372,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, 
SSE2, SSE3, SSSE3, CX16,
 SAHF and FXSR instruction set support.
 
 @item nehalem
+@itemx corei7
 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support.
 
@@ -32380,17 +32381,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, 
SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support.
 
 @item sandybridge
+@itemx corei7-avx
 Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set
 support.
 
 @item ivybridge
+@itemx core-avx-i
 Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND
 and F16C instruction set support.
 
 @item haswell
-Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
+@itemx core-avx2
+Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND,
 F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support.
 
@@ -32406,65 +32410,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, 
PCLMUL, FSGSBASE, RDRND,
 F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES,
 CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support.
 
-@item bonnell
-Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3
-instruction set support.
-
-@item silvermont
-Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, 
SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND
-instruction set support.
-
-@item goldmont
-Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA,
-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction
-set support.
-
-@item goldmont-plus
-Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES,
-SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE,
-RDPID and SGX instruction set support.
-
-@item tremont
-Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA,
-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID,
-SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set
-support.
-
-@item sierraforest
-Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set
-support.
-
-@item grandridge
-Intel Grand Ridge CPU with 64-bit 

[gcc(refs/users/aoliva/heads/testme)] fold truth-and only in ifcombine

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:15a55a94711d51d95fb6b5ba763903d75e85324e

commit 15a55a94711d51d95fb6b5ba763903d75e85324e
Author: Alexandre Oliva 
Date:   Tue Sep 17 20:15:35 2024 -0300

fold truth-and only in ifcombine

Diff:
---
 gcc/gimple-fold.cc|  2 ++
 gcc/tree-ssa-ifcombine.cc | 24 +---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 85a0ec028030..5b7d83edbea9 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8738,12 +8738,14 @@ maybe_fold_and_comparisons (tree type,
 op2b, outer_cond_bb))
 return t;
 
+#if 0
   if (tree t = fold_truth_andor_maybe_separate (UNKNOWN_LOCATION,
TRUTH_ANDIF_EXPR, type,
code2, op2a, op2b,
code1, op1a, op1b,
NULL))
 return t;
+#endif
 
   return NULL_TREE;
 }
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 79a4bdd363b9..61480e5fa894 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -399,6 +399,14 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer2->probability = profile_probability::never ();
 }
 
+/* FIXME: move to a header file.  */
+extern tree
+fold_truth_andor_maybe_separate (location_t loc,
+enum tree_code code, tree truth_type,
+enum tree_code lcode, tree ll_arg, tree lr_arg,
+enum tree_code rcode, tree rl_arg, tree rr_arg,
+tree *separatep);
+
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
inner_inv, outer_inv and result_inv indicate whether the conditions
@@ -576,7 +584,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
   else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
   && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison)
 {
-  tree t;
+  tree t, ts = NULL_TREE;
   enum tree_code inner_cond_code = gimple_cond_code (inner_cond);
   enum tree_code outer_cond_code = gimple_cond_code (outer_cond);
 
@@ -599,7 +607,17 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
outer_cond_code,
gimple_cond_lhs (outer_cond),
gimple_cond_rhs (outer_cond),
-   gimple_bb (outer_cond
+   gimple_bb (outer_cond)))
+ && !(t = ts = (fold_truth_andor_maybe_separate
+(UNKNOWN_LOCATION, TRUTH_ANDIF_EXPR,
+ boolean_type_node,
+ outer_cond_code,
+ gimple_cond_lhs (outer_cond),
+ gimple_cond_rhs (outer_cond),
+ inner_cond_code,
+ gimple_cond_lhs (inner_cond),
+ gimple_cond_rhs (inner_cond),
+ NULL
{
  {
  tree t1, t2;
@@ -636,7 +654,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  NULL, true, GSI_SAME_STMT);
 }
   /* ??? Fold should avoid this.  */
-  else if (!is_gimple_condexpr_for_cond (t))
+  else if (ts && !is_gimple_condexpr_for_cond (t))
goto gimplify_after_fold;
   if (result_inv)
t = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t);


[gcc r15-3683] match.pd: Check trunc_mod vector obtap before folding.

2024-09-18 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:6f3b6a451771cd54c98768e7db3c5d58aab2b6aa

commit r15-3683-g6f3b6a451771cd54c98768e7db3c5d58aab2b6aa
Author: Jennifer Schmitz 
Date:   Thu Sep 5 08:10:02 2024 -0700

match.pd: Check trunc_mod vector obtap before folding.

In the pattern X - (X / Y) * Y to X % Y, this patch guards the
simplification for vector types by a check for:
1) Support of the mod optab for vectors OR
2) Application before vector lowering for non-VL vectors.
This is to prevent reverting vectorization of modulo to div/mult/sub
if the target does not support vector mod optab.

The patch was bootstrapped and tested with no regression on
aarch64-linux-gnu and x86_64-linux-gnu.
OK for mainline?

Signed-off-by: Jennifer Schmitz 

gcc/
PR tree-optimization/116569
* match.pd: Guard simplification to trunc_mod with check for
mod optab support.

gcc/testsuite/
PR tree-optimization/116569
* gcc.dg/torture/pr116569.c: New test.

Diff:
---
 gcc/match.pd|  7 ++-
 gcc/testsuite/gcc.dg/torture/pr116569.c | 18 ++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5566c0e4c41c..4aa610e22708 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -894,7 +894,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* X - (X / Y) * Y is the same as X % Y.  */
 (simplify
  (minus (convert1? @0) (convert2? (mult:c (trunc_div @@0 @@1) @1)))
- (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+ (if (INTEGRAL_TYPE_P (type)
+  || (VECTOR_INTEGER_TYPE_P (type)
+ && ((optimize_vectors_before_lowering_p ()
+  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
+ || target_supports_op_p (type, TRUNC_MOD_EXPR,
+  optab_vector
   (convert (trunc_mod @0 @1
 
 /* x * (1 + y / x) - y -> x - y % x */
diff --git a/gcc/testsuite/gcc.dg/torture/pr116569.c 
b/gcc/testsuite/gcc.dg/torture/pr116569.c
new file mode 100644
index ..b74c749721bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116569.c
@@ -0,0 +1,18 @@
+/* { dg-additional-options "-mcpu=neoverse-v2" { target aarch64*-*-* } } */
+int a;
+short b, c, e;
+long d, f;
+long g (long h)
+{
+  if (h)
+return h;
+  return d;
+}
+void i (int h[][0][0][0])
+{
+  for (short j; j; j += 3)
+{
+  a = g(h[1][2] ? 0 : h[1][1][1][1]);
+  b = e ?: f % c;
+}
+}


[gcc r15-3685] c++: -Wdangling-reference diagnostic

2024-09-18 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:5c8f9f4d4cebabf85e68c5bdbe2d4ee6646edc7c

commit r15-3685-g5c8f9f4d4cebabf85e68c5bdbe2d4ee6646edc7c
Author: Jason Merrill 
Date:   Mon Sep 16 13:29:05 2024 +0200

c++: -Wdangling-reference diagnostic

The -Wdangling-reference diagnostic talks about the full-expression, but
prints one call, while the full-expression in a declaration is the entire
initialization.  It seems more useful to point out the temporary that the
compiler thinks we might be getting a dangling reference to.

gcc/cp/ChangeLog:

* call.cc (do_warn_dangling_reference): Return temporary
instead of the call it's passed to.
(maybe_warn_dangling_reference): Adjust diagnostic.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wdangling-reference1.C: Adjust diagnostic.

Diff:
---
 gcc/cp/call.cc   | 23 +++
 gcc/testsuite/g++.dg/warn/Wdangling-reference1.C |  2 +-
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 1ecf3aac7051..3f753e2d2f98 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -14253,19 +14253,18 @@ reference_like_class_p (tree ctype)
   return false;
 }
 
-/* Helper for maybe_warn_dangling_reference to find a problematic CALL_EXPR
-   that initializes the LHS (and at least one of its arguments represents
-   a temporary, as outlined in maybe_warn_dangling_reference), or NULL_TREE
+/* Helper for maybe_warn_dangling_reference to find a problematic temporary
+   in EXPR (as outlined in maybe_warn_dangling_reference), or NULL_TREE
if none found.  For instance:
 
- const S& s = S().self(); // S::self (&TARGET_EXPR <...>)
- const int& r = (42, f(1)); // f(1)
- const int& t = b ? f(1) : f(2); // f(1)
- const int& u = b ? f(1) : f(g); // f(1)
- const int& v = b ? f(g) : f(2); // f(2)
+ const S& s = S().self(); // S()
+ const int& r = (42, f(1)); // temporary for passing 1 to f
+ const int& t = b ? f(1) : f(2); // temporary for 1
+ const int& u = b ? f(1) : f(g); // temporary for 1
+ const int& v = b ? f(g) : f(2); // temporary for 2
  const int& w = b ? f(g) : f(g); // NULL_TREE
  const int& y = (f(1), 42); // NULL_TREE
- const int& z = f(f(1)); // f(f(1))
+ const int& z = f(f(1)); // temporary for 1
 
EXPR is the initializer.  If ARG_P is true, we're processing an argument
to a function; the point is to distinguish between, for example,
@@ -14365,7 +14364,7 @@ do_warn_dangling_reference (tree expr, bool arg_p)
&& !reference_related_p (TREE_TYPE (rettype),
 TREE_TYPE (arg)))
  continue;
-   return expr;
+   return arg;
  }
  /* Don't warn about member functions like:
  std::any a(...);
@@ -14438,8 +14437,8 @@ maybe_warn_dangling_reference (const_tree decl, tree 
init)
   auto_diagnostic_group d;
   if (warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wdangling_reference,
  "possibly dangling reference to a temporary"))
-   inform (EXPR_LOCATION (call), "the temporary was destroyed at "
-   "the end of the full expression %qE", call);
+   inform (EXPR_LOCATION (call), "%qT temporary created here",
+   TREE_TYPE (call));
 }
 }
 
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C 
b/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C
index a184317dd5c3..5e60a4158367 100644
--- a/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C
@@ -117,7 +117,7 @@ const B& b10 = lox (H{}); // { dg-warning "dangling 
reference" }
 
 struct S {
   const int &r; // { dg-warning "dangling reference" }
-  S() : r(f(10)) { } // { dg-message "destroyed" }
+  S() : r(f(10)) { } // { dg-message "created" }
 };
 
 // From cppreference.


[gcc r15-3684] c++: -Wdangling-reference and empty class [PR115361]

2024-09-18 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:8733d5d3873977d6ca82d71b28728650f988e9c8

commit r15-3684-g8733d5d3873977d6ca82d71b28728650f988e9c8
Author: Jason Merrill 
Date:   Sun Sep 15 13:50:04 2024 +0200

c++: -Wdangling-reference and empty class [PR115361]

We can't have a dangling reference to an empty class unless it's
specifically to that class or one of its bases.  This was giving a
false positive on the _ExtractKey pattern in libstdc++ hashtable.h.

This also adjusts the order of arguments to reference_related_p, which
is relevant for empty classes (unlike scalars).

Several of the classes in the testsuite needed to gain data members to
continue to warn.

PR c++/115361

gcc/cp/ChangeLog:

* call.cc (do_warn_dangling_reference): Check is_empty_class.

gcc/testsuite/ChangeLog:

* g++.dg/ext/attr-no-dangling6.C
* g++.dg/ext/attr-no-dangling7.C
* g++.dg/ext/attr-no-dangling8.C
* g++.dg/ext/attr-no-dangling9.C
* g++.dg/warn/Wdangling-reference1.C
* g++.dg/warn/Wdangling-reference2.C
* g++.dg/warn/Wdangling-reference3.C: Make classes non-empty.
* g++.dg/warn/Wdangling-reference23.C: New test.

Diff:
---
 gcc/cp/call.cc| 12 +++-
 gcc/testsuite/g++.dg/ext/attr-no-dangling6.C  |  6 +++---
 gcc/testsuite/g++.dg/ext/attr-no-dangling7.C  |  6 +++---
 gcc/testsuite/g++.dg/ext/attr-no-dangling8.C  |  2 ++
 gcc/testsuite/g++.dg/ext/attr-no-dangling9.C  |  1 +
 gcc/testsuite/g++.dg/warn/Wdangling-reference1.C  |  1 +
 gcc/testsuite/g++.dg/warn/Wdangling-reference2.C  |  2 +-
 gcc/testsuite/g++.dg/warn/Wdangling-reference23.C | 14 ++
 gcc/testsuite/g++.dg/warn/Wdangling-reference3.C  |  1 +
 9 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 664088eed9c7..1ecf3aac7051 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -14356,12 +14356,14 @@ do_warn_dangling_reference (tree expr, bool arg_p)
if ((arg = do_warn_dangling_reference (arg, /*arg_p=*/true)))
  {
/* If we know the temporary could not bind to the return type,
-  don't warn.  This is for scalars only because for classes
-  we can't be sure we are not returning its sub-object.  */
-   if (SCALAR_TYPE_P (TREE_TYPE (arg))
+  don't warn.  This is for scalars and empty classes only
+  because for other classes we can't be sure we are not
+  returning its sub-object.  */
+   if ((SCALAR_TYPE_P (TREE_TYPE (arg))
+|| is_empty_class (TREE_TYPE (arg)))
&& TYPE_REF_P (rettype)
-   && !reference_related_p (TREE_TYPE (arg),
-TREE_TYPE (rettype)))
+   && !reference_related_p (TREE_TYPE (rettype),
+TREE_TYPE (arg)))
  continue;
return expr;
  }
diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C 
b/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C
index 5b349e8e6827..1fc426d20d3d 100644
--- a/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C
+++ b/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C
@@ -2,9 +2,9 @@
 // { dg-do compile { target c++20 } }
 // { dg-options "-Wdangling-reference" }
 
-class X { };
-const X x1;
-const X x2;
+class X { int i; };
+const X x1 {};
+const X x2 {};
 
 constexpr bool val () { return true; }
 struct ST { static constexpr bool value = true; };
diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C 
b/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C
index a5fb809e6bdb..04c6badf0b6f 100644
--- a/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C
+++ b/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C
@@ -2,9 +2,9 @@
 // { dg-do compile { target c++20 } }
 // { dg-options "-Wdangling-reference" }
 
-class X { };
-const X x1;
-const X x2;
+class X { int i; };
+const X x1 {};
+const X x2 {};
 
 template
 [[gnu::no_dangling(N)]] const X& get(const int& i); // { dg-error "parameter 
packs not expanded" }
diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C 
b/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C
index 8208d751a4bb..aa196315a38a 100644
--- a/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C
+++ b/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C
@@ -8,6 +8,7 @@ template constexpr bool is_reference_v = true;
 
 template 
 struct [[gnu::no_dangling(is_reference_v)]] S {
+  int i;
   int &foo (const int &);
 };
 
@@ -15,6 +16,7 @@ template 
 struct X {
   template 
   struct [[gnu::no_dangling(is_reference_v && is_reference_v)]] Y {
+int i;
 int &foo (const int &);
   };
 };
diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling9.C 
b/gcc/testsuite/g++.dg/ext/attr-no-dangling9.C
index 65b4f7145a92..d7fd897de539 100644
--- a/gcc/t

[gcc r13-9033] tree-optimization/115197 - fix ICE w/ constant in LC PHI and loop distribution

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:062168c8bd4dbca659a5c6cc581f40e409f7d2ad

commit r13-9033-g062168c8bd4dbca659a5c6cc581f40e409f7d2ad
Author: Richard Biener 
Date:   Thu May 23 14:36:39 2024 +0200

tree-optimization/115197 - fix ICE w/ constant in LC PHI and loop 
distribution

Forgot a check for an SSA name before trying to replace a PHI arg with
its current definition.

PR tree-optimization/115197
* tree-loop-distribution.cc (copy_loop_before): Constant PHI
args remain the same.

* gcc.dg/pr115197.c: New testcase.

(cherry picked from commit 2b2476d4d18c92b8aba3567ebccd2100c2f7c258)

Diff:
---
 gcc/testsuite/gcc.dg/pr115197.c | 14 ++
 gcc/tree-loop-distribution.cc   |  7 +--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr115197.c b/gcc/testsuite/gcc.dg/pr115197.c
new file mode 100644
index ..00d674b3bd9a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115197.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-scev-cprop -ftree-pre 
-ftree-loop-distribute-patterns" } */
+
+int a, b[2], c, d, e, f[2];
+int main() {
+  while (a)
+if (d) {
+  if (e)
+return 0;
+  for (; c; c++)
+f[c] = 0 < (b[c] = ~(f[c + 1] < a));
+}
+  return 0;
+}
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 3d92d1c73b5f..907610d56704 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -963,8 +963,11 @@ copy_loop_before (class loop *loop, bool 
redirect_lc_phi_defs)
  if (virtual_operand_p (gimple_phi_result (phi)))
continue;
  use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
- tree new_def = get_current_def (USE_FROM_PTR (use_p));
- SET_USE (use_p, new_def);
+ if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME)
+   {
+ tree new_def = get_current_def (USE_FROM_PTR (use_p));
+ SET_USE (use_p, new_def);
+   }
}
 }


[gcc r13-9034] tree-optimization/115278 - fix DSE in if-conversion wrt volatiles

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:22c9080c88cd133e048cd9dcacd2fa13d8fd267f

commit r13-9034-g22c9080c88cd133e048cd9dcacd2fa13d8fd267f
Author: Richard Biener 
Date:   Fri May 31 10:14:25 2024 +0200

tree-optimization/115278 - fix DSE in if-conversion wrt volatiles

The following adds the missing guard for volatile stores to the
embedded DSE in the loop if-conversion pass.

PR tree-optimization/115278
* tree-if-conv.cc (ifcvt_local_dce): Do not DSE volatile stores.

* g++.dg/vect/pr115278.cc: New testcase.

(cherry picked from commit 65dbe0ab7cdaf2aa84b09a74e594f0faacf1945c)

Diff:
---
 gcc/testsuite/g++.dg/vect/pr115278.cc | 38 +++
 gcc/tree-if-conv.cc   |  4 +++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr115278.cc 
b/gcc/testsuite/g++.dg/vect/pr115278.cc
new file mode 100644
index ..331075fb2781
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr115278.cc
@@ -0,0 +1,38 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+// { dg-additional-options "-fdump-tree-optimized" }
+
+#include 
+
+const int runs = 92;
+
+union BitfieldStructUnion {
+struct {
+uint64_t a : 17;
+uint64_t padding: 39;
+uint64_t b : 8;
+} __attribute__((packed));
+
+struct {
+uint32_t value_low;
+uint32_t value_high;
+} __attribute__((packed));
+
+BitfieldStructUnion(uint32_t value_low, uint32_t value_high) : 
value_low(value_low), value_high(value_high) {}
+};
+
+volatile uint32_t *WRITE = (volatile unsigned*)0x42;
+
+void buggy() {
+for (int i = 0; i < runs; i++) {
+BitfieldStructUnion rt{*WRITE, *WRITE};
+
+rt.a = 99;
+rt.b = 1;
+
+*WRITE = rt.value_low;
+*WRITE = rt.value_high;
+}
+}
+
+// { dg-final { scan-tree-dump-times "\\\*WRITE\[^\r\n\]* ={v} " 2 "optimized" 
} }
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index b1dbb8706ed6..71f5d98c2129 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -3194,7 +3194,9 @@ ifcvt_local_dce (class loop *loop)
   gimple_stmt_iterator gsiprev = gsi;
   gsi_prev (&gsiprev);
   stmt = gsi_stmt (gsi);
-  if (gimple_store_p (stmt) && gimple_vdef (stmt))
+  if (!gimple_has_volatile_ops (stmt)
+ && gimple_store_p (stmt)
+ && gimple_vdef (stmt))
{
  tree lhs = gimple_get_lhs (stmt);
  ao_ref write;


[gcc r13-9035] Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned.

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c4525852c20f10c2027dd8aa4ed9f28df1a9a4be

commit r13-9035-gc4525852c20f10c2027dd8aa4ed9f28df1a9a4be
Author: Joern Rennecke 
Date:   Wed Aug 7 02:48:45 2024 +0100

Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned.

gcc/testsuite/
* g++.dg/vect/pr115278.cc: Make cast's type agree with
assignment destination WRITE.

(cherry picked from commit b844775283a620b8826adf734ecfc97d820c3611)

Diff:
---
 gcc/testsuite/g++.dg/vect/pr115278.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr115278.cc 
b/gcc/testsuite/g++.dg/vect/pr115278.cc
index 331075fb2781..df521e43a97c 100644
--- a/gcc/testsuite/g++.dg/vect/pr115278.cc
+++ b/gcc/testsuite/g++.dg/vect/pr115278.cc
@@ -21,7 +21,7 @@ union BitfieldStructUnion {
 BitfieldStructUnion(uint32_t value_low, uint32_t value_high) : 
value_low(value_low), value_high(value_high) {}
 };
 
-volatile uint32_t *WRITE = (volatile unsigned*)0x42;
+volatile uint32_t *WRITE = (volatile uint32_t *)0x42;
 
 void buggy() {
 for (int i = 0; i < runs; i++) {


[gcc r13-9037] tree-optimization/115669 - fix SLP reduction association

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e630a20d8367eb2e1929edcaaa03ffe2951b0851

commit r13-9037-ge630a20d8367eb2e1929edcaaa03ffe2951b0851
Author: Richard Biener 
Date:   Thu Jun 27 11:26:08 2024 +0200

tree-optimization/115669 - fix SLP reduction association

The following avoids associating a reduction path as that might
get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order.
This is a latent issue with SLP reductions but now easily exposed
as we're doing single-lane SLP reductions.

When we achieved SLP only we can move and update this meta-data.

PR tree-optimization/115669
* tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate
chains that participate in a reduction.

* gcc.dg/vect/pr115669.c: New testcase.

(cherry picked from commit 7886830bb45c4f5dca0496d4deae9a45204d78f5)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++
 gcc/tree-vect-slp.cc |  3 +++
 2 files changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c 
b/gcc/testsuite/gcc.dg/vect/pr115669.c
new file mode 100644
index ..361a17a64e68
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115669.c
@@ -0,0 +1,22 @@
+/* { dg-additional-options "-fwrapv" } */
+
+#include "tree-vect.h"
+
+int a = 10;
+unsigned b;
+long long c[100];
+int foo()
+{
+  long long *d = c;
+  for (short e = 0; e < a; e++)
+b += ~(d ? d[e] : 0);
+  return b;
+}
+
+int main()
+{
+  check_vect ();
+  if (foo () != -10)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c01dc02afff6..c228087df734 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1897,6 +1897,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   else if (is_a  (vinfo)
   /* ???  We don't handle !vect_internal_def defs below.  */
   && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+  /* ???  Do not associate a reduction, this will wreck REDUC_IDX
+ mapping as long as that exists on the stmt_info level.  */
+  && STMT_VINFO_REDUC_IDX (stmt_info) == -1
   && is_gimple_assign (stmt_info->stmt)
   && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt))
   || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)


[gcc r13-9036] tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:120f25440da533b4c16268a21feb1d864bb1a843

commit r13-9036-g120f25440da533b4c16268a21feb1d864bb1a843
Author: Richard Biener 
Date:   Tue Jun 25 16:13:02 2024 +0200

tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield

The following makes analysis and transform agree on constraints.

PR tree-optimization/115646
* tree-call-cdce.cc (check_pow): Check for bit_sz values
as allowed by transform.

* gcc.dg/pr115646.c: New testcase.

(cherry picked from commit 453b1d291d1a0f89087ad91cf6b1bed1ec68eff3)

Diff:
---
 gcc/testsuite/gcc.dg/pr115646.c | 13 +
 gcc/tree-call-cdce.cc   |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr115646.c b/gcc/testsuite/gcc.dg/pr115646.c
new file mode 100644
index ..24bc1e45
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115646.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern double pow(double x, double y);
+
+struct S {
+unsigned int a : 3, b : 8, c : 21;
+};
+
+void foo (struct S *p)
+{
+  pow (p->c, 42);
+}
diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc
index 143975dd112f..f8148093058c 100644
--- a/gcc/tree-call-cdce.cc
+++ b/gcc/tree-call-cdce.cc
@@ -260,7 +260,7 @@ check_pow (gcall *pow_call)
   /* If the type of the base is too wide,
  the resulting shrink wrapping condition
 will be too conservative.  */
-  if (bit_sz > MAX_BASE_INT_BIT_SIZE)
+  if (bit_sz != 8 && bit_sz != 16 && bit_sz != MAX_BASE_INT_BIT_SIZE)
 return false;
 
   return true;


[gcc r13-9039] tree-optimization/115701 - factor out maybe_duplicate_ssa_info_at_copy

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:0dee54fabeb1d7bdf74cdeee080c7e5201008d6e

commit r13-9039-g0dee54fabeb1d7bdf74cdeee080c7e5201008d6e
Author: Richard Biener 
Date:   Sun Jun 30 11:28:11 2024 +0200

tree-optimization/115701 - factor out maybe_duplicate_ssa_info_at_copy

The following factors out the code that preserves SSA info of the LHS
of a SSA copy LHS = RHS when LHS is about to be eliminated to RHS.

PR tree-optimization/115701
* tree-ssanames.h (maybe_duplicate_ssa_info_at_copy): Declare.
* tree-ssanames.cc (maybe_duplicate_ssa_info_at_copy): New
function, split out from ...
* tree-ssa-copy.cc (fini_copy_prop): ... here.
* tree-ssa-sccvn.cc (eliminate_dom_walker::eliminate_stmt): ...
and here.

(cherry picked from commit b5c64b413fd5bc03a1a8ef86d005892071e42cbe)

Diff:
---
 gcc/tree-ssa-copy.cc  | 32 ++--
 gcc/tree-ssa-sccvn.cc | 21 ++---
 gcc/tree-ssanames.cc  | 28 
 gcc/tree-ssanames.h   |  3 ++-
 4 files changed, 34 insertions(+), 50 deletions(-)

diff --git a/gcc/tree-ssa-copy.cc b/gcc/tree-ssa-copy.cc
index 4cfa116326f3..8f6954995e0c 100644
--- a/gcc/tree-ssa-copy.cc
+++ b/gcc/tree-ssa-copy.cc
@@ -527,38 +527,10 @@ fini_copy_prop (void)
  || copy_of[i].value == var)
continue;
 
-  /* In theory the points-to solution of all members of the
- copy chain is their intersection.  For now we do not bother
-to compute this but only make sure we do not lose points-to
-information completely by setting the points-to solution
-of the representative to the first solution we find if
-it doesn't have one already.  */
+  /* Duplicate points-to and range info appropriately.  */
   if (copy_of[i].value != var
  && TREE_CODE (copy_of[i].value) == SSA_NAME)
-   {
- basic_block copy_of_bb
-   = gimple_bb (SSA_NAME_DEF_STMT (copy_of[i].value));
- basic_block var_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
- if (POINTER_TYPE_P (TREE_TYPE (var))
- && SSA_NAME_PTR_INFO (var)
- && !SSA_NAME_PTR_INFO (copy_of[i].value))
-   {
- duplicate_ssa_name_ptr_info (copy_of[i].value,
-  SSA_NAME_PTR_INFO (var));
- /* Points-to information is cfg insensitive,
-but [E]VRP might record context sensitive alignment
-info, non-nullness, etc.  So reset context sensitive
-info if the two SSA_NAMEs aren't defined in the same
-basic block.  */
- if (var_bb != copy_of_bb)
-   reset_flow_sensitive_info (copy_of[i].value);
-   }
- else if (!POINTER_TYPE_P (TREE_TYPE (var))
-  && SSA_NAME_RANGE_INFO (var)
-  && !SSA_NAME_RANGE_INFO (copy_of[i].value)
-  && var_bb == copy_of_bb)
-   duplicate_ssa_name_range_info (copy_of[i].value, var);
-   }
+   maybe_duplicate_ssa_info_at_copy (var, copy_of[i].value);
 }
 
   class copy_folder copy_folder;
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index fa1d8d9214ee..55ae05dc4cd2 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -6771,27 +6771,10 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, 
gimple_stmt_iterator *gsi)
 
   /* If this now constitutes a copy duplicate points-to
 and range info appropriately.  This is especially
-important for inserted code.  See tree-ssa-copy.cc
-for similar code.  */
+important for inserted code.  */
   if (sprime
  && TREE_CODE (sprime) == SSA_NAME)
-   {
- basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime));
- if (POINTER_TYPE_P (TREE_TYPE (lhs))
- && SSA_NAME_PTR_INFO (lhs)
- && ! SSA_NAME_PTR_INFO (sprime))
-   {
- duplicate_ssa_name_ptr_info (sprime,
-  SSA_NAME_PTR_INFO (lhs));
- if (b != sprime_b)
-   reset_flow_sensitive_info (sprime);
-   }
- else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-  && SSA_NAME_RANGE_INFO (lhs)
-  && ! SSA_NAME_RANGE_INFO (sprime)
-  && b == sprime_b)
-   duplicate_ssa_name_range_info (sprime, lhs);
-   }
+   maybe_duplicate_ssa_info_at_copy (lhs, sprime);
 
   /* Inhibit the use of an inserted PHI on a loop header when
 the address of the memory reference is a simple induction
diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 08aa166ef176..0181737b8a30 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -752,6 +752,34 @@ duplicate_ssa_name_range_info (tree name, tree src)
 }
 }
 
+/* For a SSA copy DEST = SRC duplicate SSA info present on DEST to SRC
+   to preser

[gcc r13-9040] tree-optimization/115701 - fix maybe_duplicate_ssa_info_at_copy

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:29c236c57f272944b1f9fffbe248689fb86e91f4

commit r13-9040-g29c236c57f272944b1f9fffbe248689fb86e91f4
Author: Richard Biener 
Date:   Sun Jun 30 11:34:43 2024 +0200

tree-optimization/115701 - fix maybe_duplicate_ssa_info_at_copy

The following restricts copying of points-to info from defs that
might be in regions invoking UB and are never executed.

PR tree-optimization/115701
* tree-ssanames.cc (maybe_duplicate_ssa_info_at_copy):
Only copy info from within the same BB.

* gcc.dg/torture/pr115701.c: New testcase.

(cherry picked from commit b77f17c5feec9614568bf2dee7f7d811465ee4a5)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr115701.c | 22 ++
 gcc/tree-ssanames.cc| 22 --
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr115701.c 
b/gcc/testsuite/gcc.dg/torture/pr115701.c
new file mode 100644
index ..9b7c34b23d78
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115701.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* IPA PTA disables local PTA recompute after IPA.  */
+/* { dg-additional-options "-fipa-pta" } */
+
+int a, c, d;
+static int b;
+int main()
+{
+  int *e = &a, **f = &e;
+  while (1) {
+int **g, ***h = &f;
+if (c)
+  *g = e;
+else if (!b)
+  break;
+*e = **g;
+e = &d;
+  }
+  if (e != &a)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 0181737b8a30..de1897a33dff 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -758,25 +758,19 @@ duplicate_ssa_name_range_info (tree name, tree src)
 void
 maybe_duplicate_ssa_info_at_copy (tree dest, tree src)
 {
+  /* While points-to info is flow-insensitive we have to avoid copying
+ info from not executed regions invoking UB to dominating defs.  */
+  if (gimple_bb (SSA_NAME_DEF_STMT (src))
+  != gimple_bb (SSA_NAME_DEF_STMT (dest)))
+return;
+
   if (POINTER_TYPE_P (TREE_TYPE (dest))
   && SSA_NAME_PTR_INFO (dest)
   && ! SSA_NAME_PTR_INFO (src))
-{
-  duplicate_ssa_name_ptr_info (src, SSA_NAME_PTR_INFO (dest));
-  /* Points-to information is cfg insensitive,
-but VRP might record context sensitive alignment
-info, non-nullness, etc.  So reset context sensitive
-info if the two SSA_NAMEs aren't defined in the same
-basic block.  */
-  if (gimple_bb (SSA_NAME_DEF_STMT (src))
- != gimple_bb (SSA_NAME_DEF_STMT (dest)))
-   reset_flow_sensitive_info (src);
-}
+duplicate_ssa_name_ptr_info (src, SSA_NAME_PTR_INFO (dest));
   else if (INTEGRAL_TYPE_P (TREE_TYPE (dest))
   && SSA_NAME_RANGE_INFO (dest)
-  && ! SSA_NAME_RANGE_INFO (src)
-  && (gimple_bb (SSA_NAME_DEF_STMT (src))
-  == gimple_bb (SSA_NAME_DEF_STMT (dest
+  && ! SSA_NAME_RANGE_INFO (src))
 duplicate_ssa_name_range_info (src, dest);
 }


[gcc r13-9041] tree-optimization/115841 - reduction epilogue placement issue

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e87c0c7f7ab1e7acd0ffbac0b15e020275f97ca8

commit r13-9041-ge87c0c7f7ab1e7acd0ffbac0b15e020275f97ca8
Author: Richard Biener 
Date:   Tue Jul 16 11:53:17 2024 +0200

tree-optimization/115841 - reduction epilogue placement issue

When emitting the compensation to the vectorized main loop for
a vector reduction value to be re-used in the vectorized epilogue
we fail to place it in the correct block when the main loop is
known to be entered (no loop_vinfo->main_loop_edge) but the
epilogue is not (a loop_vinfo->skip_this_loop_edge).  The code
currently disregards this situation.

With the recent znver4 cost fix I couldn't trigger this situation
with the testcase but I adjusted it so it could eventually trigger
on other targets.

PR tree-optimization/115841
* tree-vect-loop.cc (vect_transform_cycle_phi): Correctly
place the partial vector reduction for the accumulator
re-use when the main loop cannot be skipped but the
epilogue can.

* gcc.dg/vect/pr115841.c: New testcase.

(cherry picked from commit 016c947b02e79a5c0c0c2d4ad5cb71aa04db3efd)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115841.c | 42 
 gcc/tree-vect-loop.cc|  7 +++---
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115841.c 
b/gcc/testsuite/gcc.dg/vect/pr115841.c
new file mode 100644
index ..aa5c66004a03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115841.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fcommon -fvect-cost-model=dynamic --param 
vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-mavx512vl" { target avx512vl } } */
+
+/* To trigger the bug costing needs to determine that aligning the A170
+   accesses with a prologue is good and there should be a vectorized
+   epilogue with a smaller vector size, re-using the vector accumulator
+   from the vectorized main loop that's statically known to execute
+   but the epilogue loop is not.  */
+
+static unsigned char xl[192];
+unsigned char A170[192*3];
+
+void jerate (unsigned char *, unsigned char *);
+float foo (unsigned n)
+{
+  jerate (xl, A170);
+
+  unsigned i = 32;
+  int kr = 1;
+  float sfn11s = 0.f;
+  float sfn12s = 0.f;
+  do
+{
+  int krm1 = kr - 1;
+  long j = krm1;
+  float a = (*(float(*)[n])A170)[j];
+  float b = (*(float(*)[n])xl)[j];
+  float c = a * b;
+  float d = c * 6.93149983882904052734375e-1f;
+  float e = (*(float(*)[n])A170)[j+48];
+  float f = (*(float(*)[n])A170)[j+96];
+  float g = d * e;
+  sfn11s = sfn11s + g;
+  float h = f * d;
+  sfn12s = sfn12s + h;
+  kr++;
+}
+  while (--i != 0);
+  float tem = sfn11s + sfn12s;
+  return tem;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8bb2e3ff1c82..7a319e7f98ef 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -8148,14 +8148,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
  /* And the reduction could be carried out using a different sign.  */
  if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
def = gimple_convert (&stmts, vectype_out, def);
- if (loop_vinfo->main_loop_edge)
+ edge e;
+ if ((e = loop_vinfo->main_loop_edge)
+ || (e = loop_vinfo->skip_this_loop_edge))
{
  /* While we'd like to insert on the edge this will split
 blocks and disturb bookkeeping, we also will eventually
 need this on the skip edge.  Rely on sinking to
 fixup optimal placement and insert in the pred.  */
- gimple_stmt_iterator gsi
-   = gsi_last_bb (loop_vinfo->main_loop_edge->src);
+ gimple_stmt_iterator gsi = gsi_last_bb (e->src);
  /* Insert before a cond that eventually skips the
 epilogue.  */
  if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))


[gcc r13-9038] tree-optimization/115694 - ICE with complex store rewrite

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:07c12b394dfb424404019b745b5e4a9e938f6693

commit r13-9038-g07c12b394dfb424404019b745b5e4a9e938f6693
Author: Richard Biener 
Date:   Sun Jun 30 13:07:14 2024 +0200

tree-optimization/115694 - ICE with complex store rewrite

The following adds a missed check when forwprop attempts to rewrite
a complex store.

PR tree-optimization/115694
* tree-ssa-forwprop.cc (pass_forwprop::execute): Check the
store is complex before rewriting it.

* g++.dg/torture/pr115694.C: New testcase.

(cherry picked from commit 543a5b9da964f821b9e723ed9c93d6cdca464d47)

Diff:
---
 gcc/testsuite/g++.dg/torture/pr115694.C | 13 +
 gcc/tree-ssa-forwprop.cc|  2 ++
 2 files changed, 15 insertions(+)

diff --git a/gcc/testsuite/g++.dg/torture/pr115694.C 
b/gcc/testsuite/g++.dg/torture/pr115694.C
new file mode 100644
index ..bbce47decf83
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr115694.C
@@ -0,0 +1,13 @@
+// { dg-do compile }
+
+_Complex a;
+typedef struct {
+  double a[2];
+} b;
+void c(b);
+void d()
+{
+  _Complex b1 = a;
+  b t = __builtin_bit_cast (b, b1);
+  c(t);
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 862a7f2b92a7..f05016763c08 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3722,6 +3722,8 @@ pass_forwprop::execute (function *fun)
  && gimple_store_p (use_stmt)
  && !gimple_has_volatile_ops (use_stmt)
  && is_gimple_assign (use_stmt)
+ && (TREE_CODE (TREE_TYPE (gimple_assign_lhs (use_stmt)))
+ == COMPLEX_TYPE)
  && (TREE_CODE (gimple_assign_lhs (use_stmt))
  != TARGET_MEM_REF))
{


[gcc r13-9042] tree-optimization/116057 - wrong code with CCP and vector CTORs

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:ef25f1dd600cc9351c80e3e018d7170e16a2c6ff

commit r13-9042-gef25f1dd600cc9351c80e3e018d7170e16a2c6ff
Author: Richard Biener 
Date:   Wed Jul 24 13:16:35 2024 +0200

tree-optimization/116057 - wrong code with CCP and vector CTORs

The following fixes an issue with CCPs likely_value when faced with
a vector CTOR containing undef SSA names and constants.  This should
be classified as CONSTANT and not UNDEFINED.

PR tree-optimization/116057
* tree-ssa-ccp.cc (likely_value): Also walk CTORs in stmt
operands to look for constants.

* gcc.dg/torture/pr116057.c: New testcase.

(cherry picked from commit 1ea551514b9c285d801ac5ab8d78b22483ff65af)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116057.c | 20 
 gcc/tree-ssa-ccp.cc | 11 +++
 2 files changed, 31 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116057.c 
b/gcc/testsuite/gcc.dg/torture/pr116057.c
new file mode 100644
index ..a7021c8e746e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116057.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-additional-options "-Wno-psabi" } */
+
+#define vect8 __attribute__((vector_size(8)))
+
+vect8 int __attribute__((noipa))
+f(int a)
+{
+  int b;
+  vect8 int t={1,1};
+  if(a) return t;
+  return (vect8 int){0, b};
+}
+
+int main ()
+{
+  if (f(0)[0] != 0)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 6c9da603ef95..074edb68679f 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -759,6 +759,17 @@ likely_value (gimple *stmt)
continue;
   if (is_gimple_min_invariant (op))
has_constant_operand = true;
+  else if (TREE_CODE (op) == CONSTRUCTOR)
+   {
+ unsigned j;
+ tree val;
+ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (op), j, val)
+   if (CONSTANT_CLASS_P (val))
+ {
+   has_constant_operand = true;
+   break;
+ }
+   }
 }
 
   if (has_constant_operand)


[gcc r15-3682] reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.

2024-09-18 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:5bfb91c14f98f6750281217f737b3d95c4e73584

commit r15-3682-g5bfb91c14f98f6750281217f737b3d95c4e73584
Author: Georg-Johann Lay 
Date:   Fri Sep 6 11:23:06 2024 +0200

reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.

The new macro is required because reload and LRA are using different
representations for a multi-register frame pointer.  As ELIMINABLE_REGS
is used to initialize static const objects, it can't depend on -mlra.

PR rtl-optimization/116326
gcc/
* reload1.cc (reg_eliminate_1): Initialize from
RELOAD_ELIMINABLE_REGS if defined.
* config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from 
ELIMINABLE_REGS.
(ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
* doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer)
: Add documentation.
* doc/tm.texi: Rebuild.
gcc/testsuite/
* gcc.target/avr/torture/lra-pr116324.c: New test.
* gcc.target/avr/torture/lra-pr116325.c: New test.

Diff:
---
 gcc/config/avr/avr.h   |   9 +-
 gcc/doc/tm.texi|   8 ++
 gcc/doc/tm.texi.in |   8 ++
 gcc/reload1.cc |   6 ++
 .../gcc.target/avr/torture/lra-pr116324.c  |  86 +++
 .../gcc.target/avr/torture/lra-pr116325.c  | 117 +
 6 files changed, 233 insertions(+), 1 deletion(-)

diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 1cf4180e5343..3fa2ee76c435 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -308,12 +308,19 @@ enum reg_class {
 
 #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
 
-#define ELIMINABLE_REGS {  \
+#define RELOAD_ELIMINABLE_REGS {   \
 { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },   \
 { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },   \
 { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
 { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
 
+#define ELIMINABLE_REGS\
+  {\
+{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },  \
+{ ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },  \
+{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM } \
+  }
+
 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)   \
   OFFSET = avr_initial_elimination_offset (FROM, TO)
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cc33084ed322..9e520429ba91 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4005,6 +4005,14 @@ Note that the elimination of the argument pointer with 
the stack pointer is
 specified first since that is the preferred elimination.
 @end defmac
 
+@defmac RELOAD_ELIMINABLE_REGS
+Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where
+it takes precedence over @code{ELIMINABLE_REGS}.  This macro can be useful
+during the transition to LRA because there are cases where reload and LRA
+disagree on how eliminable registers should be represented. For an example,
+see @file{avr.h}.
+@end defmac
+
 @deftypefn {Target Hook} bool TARGET_CAN_ELIMINATE (const int @var{from_reg}, 
const int @var{to_reg})
 This target hook should return @code{true} if the compiler is allowed to
 try to replace register number @var{from_reg} with register number
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 8af3f4145058..a34674e33c99 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3179,6 +3179,14 @@ Note that the elimination of the argument pointer with 
the stack pointer is
 specified first since that is the preferred elimination.
 @end defmac
 
+@defmac RELOAD_ELIMINABLE_REGS
+Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where
+it takes precedence over @code{ELIMINABLE_REGS}.  This macro can be useful
+during the transition to LRA because there are cases where reload and LRA
+disagree on how eliminable registers should be represented. For an example,
+see @file{avr.h}.
+@end defmac
+
 @hook TARGET_CAN_ELIMINATE
 
 @defmac INITIAL_ELIMINATION_OFFSET (@var{from-reg}, @var{to-reg}, 
@var{offset-var})
diff --git a/gcc/reload1.cc b/gcc/reload1.cc
index 2e059b099703..120328e0f2f7 100644
--- a/gcc/reload1.cc
+++ b/gcc/reload1.cc
@@ -283,7 +283,13 @@ static const struct elim_table_1
   const int to;
 } reg_eliminate_1[] =
 
+  /* Reload and LRA don't agree on how a multi-register frame pointer
+ is represented for elimination.  See avr.h for a use case.  */
+#ifdef RELOAD_ELIMINABLE_REGS
+  RELOAD_ELIMINABLE_REGS;
+#else
   ELIMINABLE_REGS;
+#endif
 
 #define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1)
 
diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c 
b/gcc/testsuite/g

[gcc r12-10716] doc: Add more alias option and reorder Intel CPU -march documentation

2024-09-18 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:8483527158024d200b3a9e4edecbe188fa22fdaa

commit r12-10716-g8483527158024d200b3a9e4edecbe188fa22fdaa
Author: Haochen Jiang 
Date:   Wed Sep 18 11:20:15 2024 +0800

doc: Add more alias option and reorder Intel CPU -march documentation

This patch is backported from GCC15 with some tweaks.

Since r15-3539, there are requests coming in to add other alias option
documentation. This patch will add all of them, including corei7, 
corei7-avx,
core-avx-i, core-avx2, atom and slm.

Also in the patch, I reordered that part of documentation, currently all
the CPUs/products are just all over the place. I regrouped them by
date-to-now products (since the very first CPU to latest Panther Lake), 
P-core
(since the clients become hybrid cores, starting from Sapphire Rapids) and
E-core (since Bonnell). In GCC14 and eariler GCC, Xeon Phi CPUs are still
there, I put them after E-core CPUs.

And in the patch, I refined the product names in documentation.

gcc/ChangeLog:

* doc/invoke.texi: Add corei7, corei7-avx, core-avx-i,
core-avx2, atom, and slm. Reorder the -march documentation by
splitting them into date-to-now products, P-core, E-core and
Xeon Phi. Refine the product names in documentation.

Diff:
---
 gcc/doc/invoke.texi | 162 +++-
 1 file changed, 84 insertions(+), 78 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fbfa3241e7f6..5db66718d10b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31449,6 +31449,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, 
SSE2, SSE3, SSSE3, CX16,
 SAHF and FXSR instruction set support.
 
 @item nehalem
+@itemx corei7
 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support.
 
@@ -31457,17 +31458,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, 
SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support.
 
 @item sandybridge
+@itemx corei7-avx
 Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set
 support.
 
 @item ivybridge
+@itemx core-avx-i
 Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND
 and F16C instruction set support.
 
 @item haswell
-Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
+@itemx core-avx2
+Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND,
 F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support.
 
@@ -31483,47 +31487,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, 
PCLMUL, FSGSBASE, RDRND,
 F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES,
 CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support.
 
-@item bonnell
-Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3
-instruction set support.
-
-@item silvermont
-Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, 
SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND
-instruction set support.
-
-@item goldmont
-Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA,
-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction
-set support.
-
-@item goldmont-plus
-Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES,
-SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE,
-RDPID and SGX instruction set support.
-
-@item tremont
-Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA,
-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID,
-SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set
-support.
-
-@item knl
-Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
-RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
-AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support.
-
-@item knm
-Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
-RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
-AVX512PF, AVX512ER, AVX512F,

[gcc r15-3700] i386: Add missing avx512f-mask-type.h include

2024-09-18 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:2b7b8d3bb52a23aa8b1d6e9a2d57c83db2078f73

commit r15-3700-g2b7b8d3bb52a23aa8b1d6e9a2d57c83db2078f73
Author: Haochen Jiang 
Date:   Sat Sep 14 15:55:53 2024 +0800

i386: Add missing avx512f-mask-type.h include

Since commit r15-3594, we fixed the bugs in MASK_TYPE for AVX10.2
testcases, but we missed the following four.

The tests are not FAIL since the binutils part haven't been merged
yet, which leads to UNSUPPORTED test. But the avx512f-mask-type.h
needs to be included, otherwise, it will be compile error.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-vpdpbssd-2.c: Include
avx512f-mask-type.h.
* gcc.target/i386/avx10_2-vminmaxsd-2.c: Ditto.
* gcc.target/i386/avx10_2-vminmaxsh-2.c: Ditto.
* gcc.target/i386/avx10_2-vminmaxss-2.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c | 2 ++
 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c| 1 +
 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c| 1 +
 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c| 1 +
 4 files changed, 5 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c
index add9de893511..624a1a8e50ea 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c
@@ -13,6 +13,8 @@
 #define SRC_SIZE (AVX512F_LEN / 8)
 #define SIZE (AVX512F_LEN / 32)
 
+#include "avx512f-mask-type.h"
+
 static void
 CALC (int *r, int *dst, char *s1, char *s2)
 {
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c
index 1e2d78c4068d..f550e09be6c9 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c
@@ -8,6 +8,7 @@
 #include "avx10-helper.h"
 #include 
 #include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
 
 void static
 CALC (double *r, double *s1, double *s2, int R)
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c
index e6a93c403b50..dbf1087d9c3b 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c
@@ -8,6 +8,7 @@
 #include "avx10-helper.h"
 #include 
 #include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
 
 void static
 CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R)
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c
index 47177e696409..7baa396a2d3f 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c
@@ -8,6 +8,7 @@
 #include "avx10-helper.h"
 #include 
 #include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
 
 void static
 CALC (float *r, float *s1, float *s2, int R)


[gcc r15-3701] i386: Enhance AVX10.2 convert tests

2024-09-18 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:89e62d42f366cd835022f0ba00ba1d10305ae0ce

commit r15-3701-g89e62d42f366cd835022f0ba00ba1d10305ae0ce
Author: Haochen Jiang 
Date:   Thu Sep 5 11:27:33 2024 +0800

i386: Enhance AVX10.2 convert tests

For AVX10.2 convert tests, all of them are missing mask tests
previously, this patch will add them in the tests.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c: Enhance mask test.
* gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvthf82ph-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c: Ditto.
* gcc.target/i386/avx512f-helper.h: Fix a typo in macro define.

Diff:
---
 .../gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c| 35 ++
 .../gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c | 25 +---
 .../i386/avx10_2-512-vcvtbiasph2bf8s-2.c   | 28 +
 .../gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c | 25 +---
 .../i386/avx10_2-512-vcvtbiasph2hf8s-2.c   | 25 +---
 .../gcc.target/i386/avx10_2-512-vcvthf82ph-2.c | 27 +
 .../gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c  | 25 
 .../gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c | 25 
 .../gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c  | 25 
 .../gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c | 25 
 .../gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c   | 29 +-
 .../gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c  | 27 +
 .../gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c   | 27 +
 .../gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c  | 27 +
 gcc/testsuite/gcc.target/i386/avx512f-helper.h |  2 +-
 15 files changed, 295 insertions(+), 82 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c
index 40dbe18abbe8..5e355ae53d41 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c
@@ -10,24 +10,25 @@
 #include "avx10-helper.h"
 #include 
 
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
 
 static void
 CALC (_Float16 *res_ref, float *src1, float *src2)
 {
   float fp32;
   int i;
-  for (i = 0; i < SIZE_RES / 2; i++)
+  for (i = 0; i < SIZE / 2; i++)
 {
   fp32 = (float) 2 * i + 7 + i * 0.5;
   res_ref[i] = fp32;
   src2[i] = fp32;
 }
-  for (i = SIZE_RES / 2; i < SIZE_RES; i++)
+  for (i = SIZE / 2; i < SIZE; i++)
 {
   fp32 = (float)2 * i + 7 + i * 0.5;
   res_ref[i] = fp32;
-  src1[i - (SIZE_RES / 2)] = fp32;
+  src1[i - (SIZE / 2)] = fp32;
 }
 }
 
@@ -35,17 +36,27 @@ void
 TEST (void)
 {
   int i;
-  UNION_TYPE (AVX512F_LEN, h) res1;
+  UNION_TYPE (AVX512F_LEN, h) res1, res2, res3;
   UNION_TYPE (AVX512F_LEN, ) src1, src2;
-  _Float16 res_ref[SIZE_RES];
-  float fp32;
-  
-  for (i = 0; i < SIZE_RES; i++)
-res1.a[i] = 5;
-  
+  MASK_TYPE mask = MASK_VALUE;
+  _Float16 res_ref[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+res2.a[i] = DEFAULT_VALUE;
+
   CALC (res_ref, src1.a, src2.a);
-  
+
   res1.x = INTRINSIC (_cvtx2ps_ph) (src1.x, src2.x);
   if (UNION_CHECK (AVX512F_LEN, h) (res1, res_ref))
 abort ();
+
+  res2.x = INTRINSIC (_mask_cvtx2ps_ph) (res2.x, mask, src1.x, src2.x);
+  MASK_MERGE (h) (res_ref, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, h) (res2, res_ref))
+abort ();
+
+  res3.x = INTRINSIC (_maskz_cvtx2ps_ph) (mask, src1.x, src2.x);
+  MASK_ZERO (h) (res_ref, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, h) (res3, res_ref))
+abort ();
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c
index 9ce3c9059f1f..08450418daed 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c
@@ -15,6 +15,9 @@
 #define SRC_F16 (AVX512F_LEN / 16)
 #define DST_F8_I8 (AVX512F_LEN_HALF / 8)
 #define DST_F16 (AVX512F_LEN_HALF / 16)
+#define SIZE SRC_F16 
+
+#

[gcc r15-3681] AVR: doc/install.texi - Update avr specific installation notes.

2024-09-18 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:cdeebc71c48db922b14d34c361e15660c1e31fc1

commit r15-3681-gcdeebc71c48db922b14d34c361e15660c1e31fc1
Author: Georg-Johann Lay 
Date:   Tue Sep 17 11:26:19 2024 +0200

AVR: doc/install.texi - Update avr specific installation notes.

gcc/
* doc/install.texi (Host/Target specific installation notes for GCC)
[avr]: Update web links to AVR-LibC and AVR Options.
Remove outdated note about Binutils.

Diff:
---
 gcc/doc/install.texi | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 1ca0f14b44d1..e339d736969a 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -2695,7 +2695,7 @@ functions like @code{__addsf3} to be omitted from 
@file{libgcc.a} on
 the assumption that it will be provided by @file{libm.a}.  For more
 technical details, cf. @uref{https://gcc.gnu.org/PR54461,,PR54461}.
 It is not supported for
-RTEMS configurations, which currently use newlib.  The option is
+RTEMS configurations, which currently use Newlib.  The option is
 supported since version 4.7.2 and is the default in 4.8.0 and newer.
 
 @item --with-double=@{32|64|32,64|64,32@}
@@ -4007,27 +4007,23 @@ applications.  There are no standard Unix 
configurations.
 Collection (GCC)},
 @end ifnothtml
 @ifhtml
-See ``AVR Options'' in the main manual
+See @uref{https://gcc.gnu.org/onlinedocs/gcc/AVR-Options.html,,AVR Options}
+in the main manual
 @end ifhtml
 for the list of supported MCU types.
 
-Use @samp{configure --target=avr --enable-languages="c"} to configure GCC@.
+Use @samp{configure --target=avr --enable-languages="c,c++"} to configure GCC@.
 
 Further installation notes and other useful information about AVR tools
 can also be obtained from:
 
 @itemize @bullet
 @item
-@uref{http://www.nongnu.org/avr/,,http://www.nongnu.org/avr/}
+@uref{https://avrdudes.github.io/avr-libc/avr-libc-user-manual/install_tools.html,,AVR-LibC:
 Building and Installing the GNU Tool Chain}
+@item
+@uref{https://github.com/sprintersb/atest?tab=readme-ov-file#running-the-avr-gcc-testsuite-using-the-avrtest-simulator,,AVRtest:
 Running the avr-gcc Testsuite}
 @end itemize
 
-The following error:
-@smallexample
-Error: register required
-@end smallexample
-
-indicates that you should upgrade to a newer version of the binutils.
-
 @html
 
 @end html


[gcc r15-3686] [PATCH] RISC-V: Allow zero operand for DI variants of vssubu.vx

2024-09-18 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:0756f335fb6e455641850a76e68f892f1f82ada2

commit r15-3686-g0756f335fb6e455641850a76e68f892f1f82ada2
Author: Bohan Lei 
Date:   Wed Sep 18 07:20:23 2024 -0600

[PATCH] RISC-V: Allow zero operand for DI variants of vssubu.vx

The RISC-V vector machine description relies on the helper function
`sew64_scalar_helper` to emit actual insns for the DI variants of
vssub.vx and vssubu.vx.  This works with vssub.vx, but can cause
problems with vssubu.vx with the scalar operand being constant zero,
because `has_vi_variant_p` returns false, and the operand will be taken
without being loaded into a reg.  The attached testcases can cause an
internal compiler error as a result.

Allowing a constant zero operand in those insns seems to be a simple
solution that only affects minimum existing code.

gcc/ChangeLog:

* config/riscv/vector.md: Allow zero operand for DI variants of
vssubu.vx

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vssubu-1.c: New test.
* gcc.target/riscv/rvv/base/vssubu-2.c: New test.

Diff:
---
 gcc/config/riscv/vector.md |  8 
 gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c | 11 +++
 gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c | 11 +++
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index d0677325ba1d..92e3061c7f85 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -4400,10 +4400,10 @@
  (sat_int_minus_binop:VI_D
(match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr")
(vec_duplicate:VI_D
- (match_operand: 4 "register_operand"  "  r,  r,  r,  r")))
+ (match_operand: 4 "reg_or_0_operand"  "  rJ, rJ, rJ, rJ")))
  (match_operand:VI_D 2 "vector_merge_operand"   " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
-  "v.vx\t%0,%3,%4%p1"
+  "v.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "")
(set_attr "mode" "")])
 
@@ -4422,10 +4422,10 @@
(match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr")
(vec_duplicate:VI_D
  (sign_extend:
-   (match_operand: 4 "register_operand" "  r,  r,  r,  
r"
+   (match_operand: 4 "reg_or_0_operand" "  rJ, rJ, rJ, 
rJ"
  (match_operand:VI_D 2 "vector_merge_operand"   " vu,  0, vu,  
0")))]
   "TARGET_VECTOR && !TARGET_64BIT"
-  "v.vx\t%0,%3,%4%p1"
+  "v.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "")
(set_attr "mode" "")])
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c
new file mode 100644
index ..f19b42aed04c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } */
+
+#include 
+
+vuint64m1_t test_vssubu_vx_u64m1(vuint64m1_t op1)
+{
+  return __riscv_vssubu_vx_u64m1(op1,0,0);
+}
+
+/* { dg-final { scan-assembler-not {\tvssubu} } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c
new file mode 100644
index ..cb4e4f48a9b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gcv -mabi=ilp32d" } */
+
+#include 
+
+vuint64m1_t test_vssubu_vx_u64m1(vuint64m1_t op1)
+{
+  return __riscv_vssubu_vx_u64m1(op1,0,0);
+}
+
+/* { dg-final { scan-assembler-not {\tvssubu} } } */
\ No newline at end of file


[gcc r14-10687] tree-optimization/116460 - ICE with DCE in forwprop

2024-09-18 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:bdc5937df68a197032e800fc27361037d016cbf1

commit r14-10687-gbdc5937df68a197032e800fc27361037d016cbf1
Author: Richard Biener 
Date:   Mon Aug 26 13:50:00 2024 +0200

tree-optimization/116460 - ICE with DCE in forwprop

The following avoids removing stmts with defs that might still have
uses in the IL before calling simple_dce_from_worklist which might
remove those as that will wreck debug stmt generation.  Instead first
perform use-based DCE and then remove stmts which may have uses in
code that CFG cleanup will remove.  This requires tracking stmts
in to_remove by their SSA def so we can check whether it was removed
before without running into the issue that PHIs can be ggc_free()d
upon removal.  So this adds to_remove_defs in addition to to_remove
which has to stay to track GIMPLE_NOPs we want to elide.

PR tree-optimization/116460
* tree-ssa-forwprop.cc (pass_forwprop::execute): First do
simple_dce_from_worklist and then remove stmts in to_remove.
Track defs to be removed in to_remove_defs.

* g++.dg/torture/pr116460.C: New testcase.

(cherry picked from commit 172637cf0d9b7b2798f83b9c5f9598b449675cb0)

Diff:
---
 gcc/testsuite/g++.dg/torture/pr116460.C | 609 
 gcc/tree-ssa-forwprop.cc|  38 +-
 2 files changed, 637 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/g++.dg/torture/pr116460.C 
b/gcc/testsuite/g++.dg/torture/pr116460.C
new file mode 100644
index ..3c7d6372fba2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116460.C
@@ -0,0 +1,609 @@
+// { dg-do compile }
+// { dg-additional-options "-g" }
+
+namespace std {
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+void __throw_length_error(const char *) __attribute__((__noreturn__, 
__cold__));
+}
+extern "C++" {
+namespace std __attribute__((__visibility__("default"))) {
+  template  struct __is_integer {
+enum { __value = 1 };
+  };
+  template  struct __is_nonvolatile_trivially_copyable {
+enum { __value = __is_trivially_copyable(_Tp) };
+  };
+  template  struct __memcpyable {};
+  template 
+  struct __memcpyable<_Tp *, _Tp *> : __is_nonvolatile_trivially_copyable<_Tp> 
{
+  };
+  template 
+  struct __memcpyable<_Tp *, const _Tp *>
+  : __is_nonvolatile_trivially_copyable<_Tp> {};
+  template  struct __is_move_iterator {
+enum { __value = 0 };
+  };
+  template  inline _Iterator __miter_base(_Iterator __it) {
+return __it;
+  }
+} // namespace )
+}
+namespace __gnu_cxx __attribute__((__visibility__("default"))) {
+  template 
+  struct __is_integer_nonstrict : public std::__is_integer<_Tp> {
+using std::__is_integer<_Tp>::__value;
+enum { __width = __value ? sizeof(_Tp) * 8 : 0 };
+  };
+  template  struct __numeric_traits_integer {
+static const bool __is_signed = (_Value)(-1) < 0;
+static const int __digits =
+__is_integer_nonstrict<_Value>::__width - __is_signed;
+static const _Value __max =
+__is_signed ? (_Value)1 << (__digits - 1)) - 1) << 1) + 1)
+: ~(_Value)0;
+  };
+  template 
+  struct __numeric_traits : public __numeric_traits_integer<_Value> {};
+} // namespace )
+namespace std __attribute__((__visibility__("default"))) {
+  template  struct integral_constant {
+static constexpr _Tp value = __v;
+using type = integral_constant<_Tp, __v>;
+  };
+  template  using __bool_constant = integral_constant;
+  using true_type = __bool_constant;
+  using false_type = __bool_constant;
+  template  struct enable_if {};
+  template  struct enable_if { using type = _Tp; };
+  template 
+  using __enable_if_t = typename enable_if<_Cond, _Tp>::type;
+  template  struct __conditional {
+template  using type = _Tp;
+  };
+  template 
+  using __conditional_t =
+  typename __conditional<_Cond>::template type<_If, _Else>;
+  namespace __detail {
+  template  auto __and_fn(...) -> false_type;
+  }
+  template 
+  struct __and_ : decltype(__detail::__and_fn<_Bn...>(0)) {};
+  template  struct __not_ : __bool_constant 
{};
+  template  using __void_t = void;
+  template 
+  struct is_trivial : public __bool_constant<__is_trivial(_Tp)> {};
+  template  _Up __declval(int);
+  template  auto declval() noexcept->decltype(__declval<_Tp>(0));
+  template 
+  using __is_constructible_impl =
+  __bool_constant<__is_constructible(_Tp, _Args...)>;
+  template 
+  struct __add_lvalue_reference_helper {
+using type = _Tp &;
+  };
+  template 
+  using __add_lval_ref_t = typename __add_lvalue_reference_helper<_Tp>::type;
+  template 
+  struct is_copy_constructible
+  : public __is_constructible_impl<_Tp, __add_lval_ref_t> {};
+  template 
+  struct __add_rvalue_reference_helper {
+using type = _Tp;
+  };
+  template 
+  using __add_rval_ref_t = typename __add_rvalue_reference_helper<_Tp>::type;
+  template 
+  struct is_move_

[gcc r15-3696] libstdc++: add braces

2024-09-18 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:aa338bdd46a4946e9d5ac0923ce9bf9bc621c852

commit r15-3696-gaa338bdd46a4946e9d5ac0923ce9bf9bc621c852
Author: Jason Merrill 
Date:   Sun Sep 15 11:48:46 2024 +0200

libstdc++: add braces

GCC compiles with -fno-exceptions, so __throw_exception_again is a no-op,
and compilation gives a -Wempty-body warning here, so let's wrap it as is
already done in a few other files.

libstdc++-v3/ChangeLog:

* include/bits/basic_ios.h: Add braces.

Diff:
---
 libstdc++-v3/include/bits/basic_ios.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/basic_ios.h 
b/libstdc++-v3/include/bits/basic_ios.h
index bc3be4d2e371..2c2334d0fe3c 100644
--- a/libstdc++-v3/include/bits/basic_ios.h
+++ b/libstdc++-v3/include/bits/basic_ios.h
@@ -171,7 +171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Turn this on without causing an ios::failure to be thrown.
_M_streambuf_state |= __state;
if (this->exceptions() & __state)
- __throw_exception_again;
+ { __throw_exception_again; }
   }
 
   /**


[gcc r15-3702] doc: Add more alias option and reorder Intel CPU -march documentation

2024-09-18 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:877fb9bdb06d18df51c6043f74dde66fe6f46b78

commit r15-3702-g877fb9bdb06d18df51c6043f74dde66fe6f46b78
Author: Haochen Jiang 
Date:   Wed Sep 18 11:20:15 2024 +0800

doc: Add more alias option and reorder Intel CPU -march documentation

Since r15-3539, there are requests coming in to add other alias option
documentation. This patch will add all ot them, including corei7, 
corei7-avx,
core-avx-i, core-avx2, atom, slm, gracemont and emerarldrapids.

Also in the patch, I reordered that part of documentation, currently all
the CPUs/products are just all over the place. I regrouped them by
date-to-now products (since the very first CPU to latest Panther Lake), 
P-core
(since the clients become hybrid cores, starting from Sapphire Rapids) and
E-core (since Bonnell to latest Clearwater Forest).

And in the patch, I refined the product names in documentation.

gcc/ChangeLog:

* doc/invoke.texi: Add corei7, corei7-avx, core-avx-i,
core-avx2, atom, slm, gracemont and emerarldrapids. Reorder
the -march documentation by splitting them into date-to-now
products, P-core and E-core. Refine the product names in
documentation.

Diff:
---
 gcc/doc/invoke.texi | 234 +++-
 1 file changed, 121 insertions(+), 113 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b91fb9c9cca6..2bcf71c35f7b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -34601,6 +34601,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, 
SSE2, SSE3, SSSE3, CX16,
 SAHF and FXSR instruction set support.
 
 @item nehalem
+@itemx corei7
 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support.
 
@@ -34609,16 +34610,19 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, 
SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support.
 
 @item sandybridge
+@itemx corei7-avx
 Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set
 support.
 
 @item ivybridge
+@itemx core-avx-i
 Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND
 and F16C instruction set support.
 
 @item haswell
+@itemx core-avx2
 Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3,
 SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND,
 F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support.
@@ -34635,61 +34639,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, 
PCLMUL, FSGSBASE, RDRND,
 F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES,
 CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support.
 
-@item bonnell
-Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3
-instruction set support.
-
-@item silvermont
-Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, 
SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND
-instruction set support.
-
-@item goldmont
-Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA,
-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction
-set support.
-
-@item goldmont-plus
-Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES,
-SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE,
-RDPID and SGX instruction set support.
-
-@item tremont
-Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3,
-SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA,
-RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID,
-SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set
-support.
-
-@item sierraforest
-Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT,
-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI,
-AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set
-support.
-
-@item grandridge
-Intel Grand Ridge CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC,
-XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI,
-MOVDIR64B, CLDEMOTE

[gcc r15-3704] i386: Add ssemov2, sseicvt2 for some load instructions that use memory on operand2

2024-09-18 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:1cf1bf7899985df31e1ebccb5d6f1ca762991dcf

commit r15-3704-g1cf1bf7899985df31e1ebccb5d6f1ca762991dcf
Author: Hu, Lin1 
Date:   Wed Sep 11 10:10:40 2024 +0800

i386: Add ssemov2, sseicvt2 for some load instructions that use memory on 
operand2

The memory attr of some instructions should be 'load', but these are
'none', currently.

gcc/ChangeLog:

* config/i386/i386.md: Add ssemov2, sseicvt2.
* config/i386/sse.md (sse2_cvtsi2sd): Apply sseicvt2.
(sse2_cvtsi2sdq): Ditto.
(vec_set_0): Apply ssemov2 for 4, 6.

Diff:
---
 gcc/config/i386/i386.md | 11 +++
 gcc/config/i386/sse.md  |  6 --
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c04415149490..9c2a0aa61126 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -539,10 +539,10 @@
str,bitmanip,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
fxch,fistp,fisttp,frndint,
-   sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+   sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1,
ssemul,sseimul,ssediv,sselog,sselog1,
sseishft,sseishft1,ssecmp,ssecomi,
-   ssecvt,ssecvt1,sseicvt,sseins,
+   ssecvt,ssecvt1,sseicvt,sseicvt2,sseins,
sseshuf,sseshuf1,ssemuladd,sse4arg,
lwp,mskmov,msklog,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
@@ -560,10 +560,10 @@
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
  fxch,fistp,fisttp,frndint")
   (const_string "i387")
-(eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+(eq_attr "type" "sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1,
  ssemul,sseimul,ssediv,sselog,sselog1,
  sseishft,sseishft1,ssecmp,ssecomi,
- ssecvt,ssecvt1,sseicvt,sseins,
+ ssecvt,ssecvt1,sseicvt,sseicvt2,sseins,
  sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
   (const_string "sse")
 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
@@ -858,6 +858,9 @@
   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
  (match_operand 2 "memory_operand"))
   (const_string "load")
+(and (eq_attr "type" "ssemov2,sseicvt2")
+ (match_operand 2 "memory_operand"))
+  (const_string "load")
 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
  (match_operand 3 "memory_operand"))
   (const_string "load")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ae61182d0cc..ff4f33b7b637 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8876,7 +8876,7 @@
cvtsi2sd{l}\t{%2, %0|%0, %2}
vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
-   (set_attr "type" "sseicvt")
+   (set_attr "type" "sseicvt2")
(set_attr "athlon_decode" "double,direct,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "double,direct,*")
@@ -8898,7 +8898,7 @@
cvtsi2sd{q}\t{%2, %0|%0, %2}
vcvtsi2sd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
-   (set_attr "type" "sseicvt")
+   (set_attr "type" "sseicvt2")
(set_attr "athlon_decode" "double,direct,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "double,direct,*")
@@ -11808,6 +11808,8 @@
  (const_string "imov")
(eq_attr "alternative" "14")
  (const_string "fmov")
+   (eq_attr "alternative" "4,6")
+ (const_string "ssemov2")
   ]
   (const_string "ssemov")))
(set (attr "addr")


[gcc(refs/users/aoliva/heads/testme)] rework truth_andor folding into tree-ssa-ifcombine

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:d041471d649c47763535d673ad689654d3630223

commit d041471d649c47763535d673ad689654d3630223
Author: Alexandre Oliva 
Date:   Tue Sep 17 20:15:22 2024 -0300

rework truth_andor folding into tree-ssa-ifcombine

Diff:
---
 gcc/fold-const.cc | 1048 +
 gcc/gimple-fold.cc| 1149 +
 gcc/tree-ssa-ifcombine.cc |7 +-
 3 files changed, 1170 insertions(+), 1034 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 6dbb9208dc29..552a706ab6de 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -137,7 +137,6 @@ static tree range_successor (tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, enum tree_code,
tree, tree, tree, tree);
-static tree unextend (tree, int, int, tree);
 static tree extract_muldiv (tree, tree, enum tree_code, tree, bool *);
 static tree extract_muldiv_1 (tree, tree, enum tree_code, tree, bool *);
 static tree fold_binary_op_with_conditional_arg (location_t,
@@ -4701,7 +4700,7 @@ invert_truthvalue_loc (location_t loc, tree arg)
is the original memory reference used to preserve the alias set of
the access.  */
 
-static tree
+tree
 make_bit_field_ref (location_t loc, tree inner, tree orig_inner, tree type,
HOST_WIDE_INT bitsize, poly_int64 bitpos,
int unsignedp, int reversep)
@@ -4951,212 +4950,6 @@ optimize_bit_field_compare (location_t loc, enum 
tree_code code,
   return lhs;
 }
 
-/* If *R_ARG is a constant zero, and L_ARG is a possibly masked
-   BIT_XOR_EXPR, return 1 and set *r_arg to l_arg.
-   Otherwise, return 0.
-
-   The returned value should be passed to decode_field_reference for it
-   to handle l_arg, and then doubled for r_arg.  */
-static int
-prepare_xor (tree l_arg, tree *r_arg)
-{
-  int ret = 0;
-
-  if (!integer_zerop (*r_arg))
-return ret;
-
-  tree exp = l_arg;
-  STRIP_NOPS (exp);
-
-  if (TREE_CODE (exp) == BIT_AND_EXPR)
-{
-  tree and_mask = TREE_OPERAND (exp, 1);
-  exp = TREE_OPERAND (exp, 0);
-  STRIP_NOPS (exp); STRIP_NOPS (and_mask);
-  if (TREE_CODE (and_mask) != INTEGER_CST)
-   return ret;
-}
-
-  if (TREE_CODE (exp) == BIT_XOR_EXPR)
-{
-  *r_arg = l_arg;
-  return 1;
-}
-
-  return ret;
-}
-
-/* Subroutine for fold_truth_andor_1: decode a field reference.
-
-   If EXP is a comparison reference, we return the innermost reference.
-
-   *PBITSIZE is set to the number of bits in the reference, *PBITPOS is
-   set to the starting bit number.
-
-   If the innermost field can be completely contained in a mode-sized
-   unit, *PMODE is set to that mode.  Otherwise, it is set to VOIDmode.
-
-   *PVOLATILEP is set to 1 if the any expression encountered is volatile;
-   otherwise it is not changed.
-
-   *PUNSIGNEDP is set to the signedness of the field.
-
-   *PREVERSEP is set to the storage order of the field.
-
-   *PMASK is set to the mask used.  This is either contained in a
-   BIT_AND_EXPR or derived from the width of the field.
-
-   *PAND_MASK is set to the mask found in a BIT_AND_EXPR, if any.
-
-   XOR_WHICH is 1 or 2 if EXP was found to be a (possibly masked)
-   BIT_XOR_EXPR compared with zero.  We're to take the first or second
-   operand thereof if so.  It should be zero otherwise.
-
-   Return 0 if this is not a component reference or is one that we can't
-   do anything with.  */
-
-static tree
-decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
-   HOST_WIDE_INT *pbitpos, machine_mode *pmode,
-   int *punsignedp, int *preversep, int *pvolatilep,
-   tree *pmask, tree *pand_mask, int xor_which)
-{
-  tree exp = *exp_;
-  tree outer_type = 0;
-  tree and_mask = 0;
-  tree mask, inner, offset;
-  tree unsigned_type;
-  unsigned int precision;
-  HOST_WIDE_INT shiftrt = 0;
-
-  /* All the optimizations using this function assume integer fields.
- There are problems with FP fields since the type_for_size call
- below can fail for, e.g., XFmode.  */
-  if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-return NULL_TREE;
-
-  /* We are interested in the bare arrangement of bits, so strip everything
- that doesn't affect the machine mode.  However, record the type of the
- outermost expression if it may matter below.  */
-  if (CONVERT_EXPR_P (exp)
-  || TREE_CODE (exp) == NON_LVALUE_EXPR)
-outer_type = TREE_TYPE (exp);
-  STRIP_NOPS (exp);
-
-  if (TREE_CODE (exp) == BIT_AND_EXPR)
-{
-  and_mask = TREE_OPERAND (exp, 1);
-  exp = TREE_OPERAND (exp, 0);
-  STRIP_NOPS (exp); STRIP_NOPS (and_mask);
-  if (TREE_CODE (and_mask) != INTEGER_CST)
-   return NULL_TREE;
-}
-
-  if (xor_which)
-{
-  gcc_checking_assert (TREE_CODE (exp) == BIT_XOR_EXPR);
-  

[gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:6ce741d00f03f73e1fb3e797e85707aef9cfd832

commit 6ce741d00f03f73e1fb3e797e85707aef9cfd832
Author: Alexandre Oliva 
Date:   Tue Sep 17 20:15:28 2024 -0300

check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++---
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 64426bd76977..85a0ec028030 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7395,7 +7396,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum 
tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7476,9 +7477,10 @@ is_cast_p (tree *name)
   if (gimple_num_ops (def) != 2)
break;
 
-  if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+  if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+   break;
  *name = gimple_assign_rhs1 (def);
  continue;
}
@@ -7515,8 +7517,7 @@ is_binop_p (enum tree_code code, tree *name)
  return 0;
 
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
  *name = gimple_assign_rhs1 (def);
  continue;
@@ -7524,7 +7525,7 @@ is_binop_p (enum tree_code code, tree *name)
  return 0;
 
case 3:
- ;
+ break;
}
 
   if (gimple_assign_rhs_code (def) != code)
@@ -7569,6 +7570,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+  && !SSA_NAME_IS_DEFAULT_DEF (exp))
+{
+  gimple *def = SSA_NAME_DEF_STMT (exp);
+  if (gimple_assign_load_p (def))
+   {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+   }
+}
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
If EXP is a comparison reference, we return the innermost reference.
@@ -7595,6 +7616,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero.  We're to take the first or second
operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with.  */
 
@@ -7602,7 +7626,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
-   tree *pmask, tree *pand_mask, int xor_which)
+   tree *pmask, tree *pand_mask, int xor_which,
+   gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7612,11 +7637,13 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
  There are problems with FP fields since the type_for_size call
  below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-return 0;
+return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
  that doesn't affect the machine mode.  However, record the type of the
@@ -7626,7 +7653,7 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
 {
   if (TREE_CODE (and_mask) != INTEGER_CST)
-   return 0;
+   return NULL_TREE;
 }
 
   if (xor_which)
@@ -7644,16 +7671,18 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
 {
   if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-   return 0;
+   return NULL_TREE;
   shiftrt = tree_to_shwi (shift);
   if (shiftrt <= 0)
-   return 0;
+   return NULL_TREE;
 }
 
   if (tree t = is_cast_p (&exp))
 if (!outer_type)
   outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = ge

[gcc/aoliva/heads/testme] (46 commits) support noncontiguous ifcombine

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 a29037a8f9c7... support noncontiguous ifcombine

It previously pointed to:

 8a7e9581280c... support noncontiguous ifcombine

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  8a7e958... support noncontiguous ifcombine
  4f6753d... support noncontiguous ifcombine
  7b7dfff... relax ifcombine to accept vuses
  e731ae8... fold truth-and only in ifcombine
  fbf1f80... check for mergeable loads, choose insertion points accordin
  b4b872b... rework truth_andor folding into tree-ssa-ifcombine
  8aa412b... assorted improvements for fold_truth_andor_1


Summary of changes (added commits):
---

  a29037a... support noncontiguous ifcombine
  3ed1ed8... refactor ifcombine
  b0b68cb... support noncontiguous ifcombine
  575a4da... relax ifcombine to accept vuses
  15a55a9... fold truth-and only in ifcombine
  6ce741d... check for mergeable loads, choose insertion points accordin
  d041471... rework truth_andor folding into tree-ssa-ifcombine
  d675d49... assorted improvements for fold_truth_andor_1
  d6d8445... c++: fix constexpr cast from void* diag issue [PR116741] (*)
  7ca4868... c++: ICE with -Wtautological-compare in template [PR116534] (*)
  dfe0d43... c++: crash with anon VAR_DECL [PR116676] (*)
  e311dd1... SVE intrinsics: Fold svdiv with all-zero operands to zero v (*)
  008f451... Daily bump. (*)
  a92f54f... aarch64: Improve vector constant generation using SVE INDEX (*)
  58bc39c... modula2: gcc/m2/Make-lang.in fix includes during bootstrap  (*)
  f544838... AVR: Update weblinks to AVR-LibC. (*)
  4af196b... aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for SVE i (*)
  f6e629a... PR modula2/116181 Use GCC tree location_t and separate poin (*)
  7fb1117... AVR: Tweak >= and < compares with consts that are 0 mod 256 (*)
  952df9c... riscv: Fix duplicate assmbler label in @tlsdesc insn (*)
  eb67e23... libstdc++: Add .editorconfig files (*)
  48a0f69... vect: Set pattern_stmt_p on the newly created stmt_vec_info (*)
  8d402c3... AVR: Tidy up enum and struct tags. (*)
  9f8e182... AVR: Partially revert r15-3623. (*)
  719edcb... libstdc++: Update link to installation docs (*)
  4f2cd25... Daily bump. (*)
  d204bee... fortran: Remove useless nested end of scalarization chain h (*)
  a9f9391... c++: __extension__ and -Wconditionally-supported (*)
  5ef73ba... c++: conversion location (*)
  2af87d9... libstdc++: Adjust std::span::iterator to be ADL-proof (*)
  1dde83f... libstdc++: Enable most of  for freestanding (*)
  f91fe35... libstdc++: Add assertion for valid facet type arguments (*)
  c5fd1a4... libstdc++: Make PSTL algorithms accept C++20 iterators [PR1 (*)
  368ba7a... c++, coroutines: Fix handling of bool await_suspend() [PR11 (*)
  6e4244e... phi-opt: Improve heuristics for factoring out with constant (*)
  0b31335... vect: release defs of removed statement (*)
  d2f10fc... Mark the copy/move constructor/operator= of auto_bitmap as  (*)
  e07fbc9... Daily bump. (*)
  1dd6dd1... testsuite; Fix execute/pr52286.c for 16bit (*)
  8b5e547... c++: avoid init_priority warning in system header (*)
  005f717... c++: Don't mix timevar_start and auto_cond_timevar for TV_N (*)
  a900349... AVR: Use rtx code copysign. (*)
  99b8be4... libstdc++: Tweak localized formatting for floating-point ty (*)
  01670a4... libstdc++: Refactor loops in std::__platform_semaphore (*)
  49cb715... testsuite: adjust pragma-diag-17.c diagnostics (*)
  bec1f2c... c++: Fix g++.dg/ext/sve-sizeless-1.C regression (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testme' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc/aoliva/heads/testbase] (38 commits) c++: fix constexpr cast from void* diag issue [PR116741]

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 d6d8445c8550... c++: fix constexpr cast from void* diag issue [PR116741]

It previously pointed to:

 b56bd542942b... testsuite: a few more hostedlib adjustments

Diff:

Summary of changes (added commits):
---

  d6d8445... c++: fix constexpr cast from void* diag issue [PR116741] (*)
  7ca4868... c++: ICE with -Wtautological-compare in template [PR116534] (*)
  dfe0d43... c++: crash with anon VAR_DECL [PR116676] (*)
  e311dd1... SVE intrinsics: Fold svdiv with all-zero operands to zero v (*)
  008f451... Daily bump. (*)
  a92f54f... aarch64: Improve vector constant generation using SVE INDEX (*)
  58bc39c... modula2: gcc/m2/Make-lang.in fix includes during bootstrap  (*)
  f544838... AVR: Update weblinks to AVR-LibC. (*)
  4af196b... aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for SVE i (*)
  f6e629a... PR modula2/116181 Use GCC tree location_t and separate poin (*)
  7fb1117... AVR: Tweak >= and < compares with consts that are 0 mod 256 (*)
  952df9c... riscv: Fix duplicate assmbler label in @tlsdesc insn (*)
  eb67e23... libstdc++: Add .editorconfig files (*)
  48a0f69... vect: Set pattern_stmt_p on the newly created stmt_vec_info (*)
  8d402c3... AVR: Tidy up enum and struct tags. (*)
  9f8e182... AVR: Partially revert r15-3623. (*)
  719edcb... libstdc++: Update link to installation docs (*)
  4f2cd25... Daily bump. (*)
  d204bee... fortran: Remove useless nested end of scalarization chain h (*)
  a9f9391... c++: __extension__ and -Wconditionally-supported (*)
  5ef73ba... c++: conversion location (*)
  2af87d9... libstdc++: Adjust std::span::iterator to be ADL-proof (*)
  1dde83f... libstdc++: Enable most of  for freestanding (*)
  f91fe35... libstdc++: Add assertion for valid facet type arguments (*)
  c5fd1a4... libstdc++: Make PSTL algorithms accept C++20 iterators [PR1 (*)
  368ba7a... c++, coroutines: Fix handling of bool await_suspend() [PR11 (*)
  6e4244e... phi-opt: Improve heuristics for factoring out with constant (*)
  0b31335... vect: release defs of removed statement (*)
  d2f10fc... Mark the copy/move constructor/operator= of auto_bitmap as  (*)
  e07fbc9... Daily bump. (*)
  1dd6dd1... testsuite; Fix execute/pr52286.c for 16bit (*)
  8b5e547... c++: avoid init_priority warning in system header (*)
  005f717... c++: Don't mix timevar_start and auto_cond_timevar for TV_N (*)
  a900349... AVR: Use rtx code copysign. (*)
  99b8be4... libstdc++: Tweak localized formatting for floating-point ty (*)
  01670a4... libstdc++: Refactor loops in std::__platform_semaphore (*)
  49cb715... testsuite: adjust pragma-diag-17.c diagnostics (*)
  bec1f2c... c++: Fix g++.dg/ext/sve-sizeless-1.C regression (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testbase' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/users/aoliva/heads/testme)] relax ifcombine to accept vuses

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:575a4da1213668119e0e60326a7b18f7c1a342d6

commit 575a4da1213668119e0e60326a7b18f7c1a342d6
Author: Alexandre Oliva 
Date:   Tue Sep 17 20:15:46 2024 -0300

relax ifcombine to accept vuses

Diff:
---
 gcc/config/i386/t-i386   |  2 ++
 gcc/testsuite/gcc.dg/field-merge-6.c | 26 ++
 gcc/tree-ssa-ifcombine.cc|  2 +-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index bf4ae109af98..1b904787ec62 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -79,3 +79,5 @@ s-i386-bt: $(srcdir)/config/i386/i386-builtin-types.awk \
$(AWK) -f $^ > tmp-bt.inc
$(SHELL) $(srcdir)/../move-if-change tmp-bt.inc i386-builtin-types.inc
$(STAMP) $@
+
+insn-attrtab.o-warn = -Wno-error
diff --git a/gcc/testsuite/gcc.dg/field-merge-6.c 
b/gcc/testsuite/gcc.dg/field-merge-6.c
new file mode 100644
index ..7fd48a138d14
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-6.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+/* { dg-shouldfail } */
+
+/* Check that the third compare won't be pulled ahead of the second one and
+   prevent, which would prevent the NULL pointer dereference that should cause
+   the execution to fail.  */
+
+struct s {
+  char a, b;
+  int *p;
+};
+
+struct s a = { 0, 1, 0 };
+struct s b = { 0, 0, 0 };
+
+int f () {
+  return (a.a != b.a
+ || *b.p != *a.p
+ || a.b != b.b);
+}
+
+int main() {
+  f ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 61480e5fa894..7678c87e0170 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb)
   enum tree_code rhs_code;
   if (gimple_has_side_effects (stmt)
  || gimple_could_trap_p (stmt)
- || gimple_vuse (stmt)
+ /* || gimple_vuse (stmt) */
  /* We need to rewrite stmts with undefined overflow to use
 unsigned arithmetic but cannot do so for signed division.  */
  || ((ass = dyn_cast  (stmt))


[gcc(refs/users/aoliva/heads/testme)] support noncontiguous ifcombine

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:a29037a8f9c752e41a906f0eac66ff3792e98bcc

commit a29037a8f9c752e41a906f0eac66ff3792e98bcc
Author: Alexandre Oliva 
Date:   Tue Sep 17 20:15:55 2024 -0300

support noncontiguous ifcombine

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 3d57c615d827..79ccc70b2678 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -779,13 +779,13 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
if-conversion helper.  We start with BB as the innermost
worker basic-block.  Returns true if a transformation was done.  */
 
-static bool
+static basic_block
 tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 {
   basic_block then_bb = NULL, else_bb = NULL;
 
   if (!recognize_if_then_else (inner_cond_bb, &then_bb, &else_bb))
-return false;
+return NULL;
 
   /* Recognize && and || of two conditions with a common
  then/else block which entry edges we can merge.  That is:
@@ -802,7 +802,7 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 
   if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb,
   then_bb, else_bb, inner_cond_bb))
-   return true;
+   return bb;
 
   if (forwarder_block_to (else_bb, then_bb))
{
@@ -814,7 +814,7 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 edge from outer_cond_bb and the forwarder block.  */
  if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, else_bb,
   then_bb, else_bb))
-   return true;
+   return bb;
}
   else if (forwarder_block_to (then_bb, else_bb))
{
@@ -826,11 +826,11 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 edge from outer_cond_bb and the forwarder block.  */
  if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, else_bb,
   then_bb, then_bb))
-   return true;
+   return bb;
}
 }
 
-  return false;
+  return NULL;
 }
 
 /* Main entry for the tree if-conversion pass.  */
@@ -881,12 +881,14 @@ pass_tree_ifcombine::execute (function *fun)
  inner ones, and also that we do not try to visit a removed
  block.  This is opposite of PHI-OPT, because we cascade the
  combining rather than cascading PHIs. */
+  basic_block seen = NULL;
+  bool changed = false;
   for (i = n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS - 1; i >= 0; i--)
 {
   basic_block bb = bbs[i];
 
   if (safe_is_a  (*gsi_last_bb (bb)))
-   if (tree_ssa_ifcombine_bb (bb))
+   if (basic_block outer_bb = tree_ssa_ifcombine_bb (bb))
  {
/* Clear range info from all stmts in BB which is now executed
   conditional on a always true/false condition.  */
@@ -905,7 +907,24 @@ pass_tree_ifcombine::execute (function *fun)
  rewrite_to_defined_overflow (&gsi);
  }
cfg_changed |= true;
+   if (seen)
+ changed |= true;
+   else
+ seen = bb;
+   /* Go back and check whether the modified outer_bb can be further
+  optimized.  ??? How could it?  */
+   do
+ i++;
+   while (bbs[i] != outer_bb);
+   continue;
  }
+
+  if (bb == seen)
+   {
+ gcc_assert (!changed);
+ seen = NULL;
+ changed = false;
+   }
 }
 
   free (bbs);


[gcc(refs/users/aoliva/heads/testme)] refactor ifcombine

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:3ed1ed8f0533f3f3f4372a2280c4e1c29304cd78

commit 3ed1ed8f0533f3f3f4372a2280c4e1c29304cd78
Author: Alexandre Oliva 
Date:   Thu Sep 19 02:43:51 2024 -0300

refactor ifcombine

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 181 +++---
 1 file changed, 89 insertions(+), 92 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index eb4317bebdfb..3d57c615d827 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -107,6 +107,14 @@ recognize_if_then_else (basic_block cond_bb,
   if (!*else_bb)
 *else_bb = e->dest;
 
+  gcond *cond = safe_dyn_cast  (*gsi_last_bb (cond_bb));
+  if (!cond)
+return false;
+
+  if (CONSTANT_CLASS_P (gimple_cond_lhs (cond))
+  && CONSTANT_CLASS_P (gimple_cond_rhs (cond)))
+return false;
+
   return true;
 }
 
@@ -407,15 +415,67 @@ fold_truth_andor_maybe_separate (location_t loc,
 enum tree_code rcode, tree rl_arg, tree rr_arg,
 tree *separatep);
 
+/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
+   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
+   replaced with a constant, but if there are intervening blocks, it's best to
+   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
+
+static tree
+ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
+   gcond *outer_cond, bool outer_inv,
+   tree cond, bool must_canon,
+   tree cond2)
+{
+  tree t = cond;
+  bool result_inv = inner_inv;
+
+  /* ??? Support intervening blocks.  */
+  if (single_pred (gimple_bb (inner_cond)) != gimple_bb (outer_cond))
+return NULL_TREE;
+
+  /* ??? Use both conditions.  */
+  if (cond2)
+t = fold_build2 (TRUTH_AND_EXPR, TREE_TYPE (t), cond, cond2);
+
+  /* ??? Insert at outer_cond.  */
+  if (result_inv)
+t = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t);
+  tree ret = t;
+
+  if (tree tcanon = canonicalize_cond_expr_cond (t))
+ret = t = tcanon;
+  else if (must_canon)
+return NULL_TREE;
+  if (!is_gimple_condexpr_for_cond (t))
+{
+  gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond);
+  t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond,
+ NULL, true, GSI_SAME_STMT);
+}
+  gimple_cond_set_condition_from_tree (inner_cond, t);
+  update_stmt (inner_cond);
+
+  /* Leave CFG optimization to cfg_cleanup.  */
+  gimple_cond_set_condition_from_tree (outer_cond,
+  outer_inv
+  ? boolean_false_node
+  : boolean_true_node);
+  update_stmt (outer_cond);
+
+  update_profile_after_ifcombine (gimple_bb (inner_cond),
+ gimple_bb (outer_cond));
+
+  return ret;
+}
+
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
-   inner_inv, outer_inv and result_inv indicate whether the conditions
-   are inverted.
+   inner_inv, outer_inv indicate whether the conditions are inverted.
Returns true if the edges to the common else basic-block were merged.  */
 
 static bool
 ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
-  basic_block outer_cond_bb, bool outer_inv, bool result_inv)
+  basic_block outer_cond_bb, bool outer_inv)
 {
   gimple_stmt_iterator gsi;
   tree name1, name2, bit1, bit2, bits1, bits2;
@@ -454,26 +514,13 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
   t2 = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t);
   t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
 true, GSI_SAME_STMT);
-  t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR,
-  boolean_type_node, t2, t);
-  t = canonicalize_cond_expr_cond (t);
-  if (!t)
-   return false;
-  if (!is_gimple_condexpr_for_cond (t))
-   {
- gsi = gsi_for_stmt (inner_cond);
- t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond,
- NULL, true, GSI_SAME_STMT);
-   }
-  gimple_cond_set_condition_from_tree (inner_cond, t);
-  update_stmt (inner_cond);
 
-  /* Leave CFG optimization to cfg_cleanup.  */
-  gimple_cond_set_condition_from_tree (outer_cond,
-   outer_inv ? boolean_false_node : boolean_true_node);
-  update_stmt (outer_cond);
+  t = fold_build2 (EQ_EXPR, boolean_type_node, t2, t);
 
-  update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb);
+  if (!ifcombine_replace_cond (inner_cond, inner_inv,
+  outer_cond, outer_inv,
+  t, true, NULL_TREE))
+   return false;
 
   if (dump_f

[gcc(refs/users/aoliva/heads/testme)] support noncontiguous ifcombine

2024-09-18 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:b0b68cbc1ed13ee0c61e0e2d768d997e8a1dfaa8

commit b0b68cbc1ed13ee0c61e0e2d768d997e8a1dfaa8
Author: Alexandre Oliva 
Date:   Tue Sep 17 20:15:50 2024 -0300

support noncontiguous ifcombine

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 7678c87e0170..eb4317bebdfb 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -798,10 +798,10 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
if (a && b)
 ;
  This requires a single predecessor of the inner cond_bb.  */
-  if (single_pred_p (inner_cond_bb)
-  && bb_no_side_effects_p (inner_cond_bb))
+  for (basic_block bb = inner_cond_bb;
+   single_pred_p (bb) && bb_no_side_effects_p (bb); )
 {
-  basic_block outer_cond_bb = single_pred (inner_cond_bb);
+  basic_block outer_cond_bb = bb = single_pred (bb);
 
   if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb,
   then_bb, else_bb, inner_cond_bb))