[gcc r15-2053] PR tree-optimization/114661: Generalize MULT_EXPR recognition in match.pd.

2024-07-16 Thread Roger Sayle via Gcc-cvs
https://gcc.gnu.org/g:df9451936c6c9e4faea371e3f188e1fc6b6d39e3

commit r15-2053-gdf9451936c6c9e4faea371e3f188e1fc6b6d39e3
Author: Roger Sayle 
Date:   Tue Jul 16 07:58:28 2024 +0100

PR tree-optimization/114661: Generalize MULT_EXPR recognition in match.pd.

This patch resolves PR tree-optimization/114661, by generalizing the set
of expressions that we canonicalize to multiplication.  This extends the
optimization(s) contributed (by me) back in July 2021.
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575999.html

The existing transformation folds (X*C1)^(X< 3) __builtin_unreachable();
return c << 18 | c << 15 |
   c << 12 | c << 9 |
   c << 6 | c << 3 | c;
}

GCC on x86_64 with -O2 previously generated:

mul:movzbl  %dil, %edi
leal(%rdi,%rdi,8), %edx
leal0(,%rdx,8), %eax
movl%edx, %ecx
sall$15, %edx
orl %edi, %eax
sall$9, %ecx
orl %ecx, %eax
orl %edx, %eax
ret

with this patch we now generate:

mul:movzbl  %dil, %eax
imull   $299593, %eax, %eax
ret

2024-07-16  Roger Sayle  
Richard Biener  

gcc/ChangeLog
PR tree-optimization/114661
* match.pd ((X*C1)|(X*C2) to X*(C1+C2)): Allow optional useless
type conversions around multiplications, such as those inserted
by this transformation.

gcc/testsuite/ChangeLog
PR tree-optimization/114661
* gcc.dg/pr114661.c: New test case.

Diff:
---
 gcc/match.pd| 43 +
 gcc/testsuite/gcc.dg/pr114661.c | 10 ++
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3759c64d461f..24a0bbead3e7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4171,30 +4171,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
Likewise, handle (X< 0
-   && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
-   (with { wide_int wone = wi::one (TYPE_PRECISION (type));
+   && (tree_nonzero_bits (@5) & tree_nonzero_bits (@3)) == 0)
+   (with { tree t = type;
+  if (!TYPE_OVERFLOW_WRAPS (t))
+t = unsigned_type_for (t);
+  wide_int wone = wi::one (TYPE_PRECISION (type));
   wide_int c = wi::add (wi::to_wide (@2),
 wi::lshift (wone, wi::to_wide (@4))); }
-(mult @1 { wide_int_to_tree (type, c); }
+(convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })
  (simplify
-  (op:c (mult:s@0 @1 INTEGER_CST@2)
+  (op:c (nop_convert?:s@3 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
@1)
-  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
-   && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
-   (mult @1
-{ wide_int_to_tree (type,
-wi::add (wi::to_wide (@2), 1)); })))
+  (if (INTEGRAL_TYPE_P (type)
+   && (tree_nonzero_bits (@3) & tree_nonzero_bits (@1)) == 0)
+   (with { tree t = type;
+  if (!TYPE_OVERFLOW_WRAPS (t))
+t = unsigned_type_for (t);
+  wide_int c = wi::add (wi::to_wide (@2), 1); }
+(convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })
  (simplify
   (op (lshift:s@0 @1 INTEGER_CST@2)
   (lshift:s@3 @1 INTEGER_CST@4))
diff --git a/gcc/testsuite/gcc.dg/pr114661.c b/gcc/testsuite/gcc.dg/pr114661.c
new file mode 100644
index ..e6b5c69dba86
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114661.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+unsigned mul(unsigned char c) {
+if (c > 3) __builtin_unreachable();
+return c << 18 | c << 15 |
+c << 12 | c << 9 |
+c << 6 | c << 3 | c;
+}
+/* { dg-final { scan-tree-dump-times " \\* 299593" 1 "evrp" } } */


[gcc(refs/users/meissner/heads/work171-bugs)] Fix last change.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d16ee83caaff471d6f202443a66b13780da55f23

commit d16ee83caaff471d6f202443a66b13780da55f23
Author: Michael Meissner 
Date:   Tue Jul 16 03:14:35 2024 -0400

Fix last change.

2024-07-16  Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652

* gcc.target/powerpc/abs128-1.c: Fix typos.
* gcc.target/powerpc/copysign128-1.c: Likewise.
* gcc.target/powerpc/divkc3-1.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c  | 3 ++-
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c | 3 ++-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c  | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/abs128-1.c 
b/gcc/testsuite/gcc.target/powerpc/abs128-1.c
index 3449c9ca94d8..e8702ec3127a 100644
--- a/gcc/testsuite/gcc.target/powerpc/abs128-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/abs128-1.c
@@ -1,5 +1,6 @@
-/* { dg-do run { target { powerpc64*-*-* && vsx_hw && ppc_float128_sw } } } */
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
 /* { dg-options "-mvsx" } */
+/* { dg-require-effective-target ppc_float128_sw } */
 
 void abort ();
 
diff --git a/gcc/testsuite/gcc.target/powerpc/copysign128-1.c 
b/gcc/testsuite/gcc.target/powerpc/copysign128-1.c
index 1e8ae5fa7533..ac8528b53273 100644
--- a/gcc/testsuite/gcc.target/powerpc/copysign128-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/copysign128-1.c
@@ -1,5 +1,6 @@
-/* { dg-do run { target { powerpc64*-*-* && vsx_hw && ppc_float128_sw } } } */
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
 /* { dg-options "-mvsx" } */
+/* { dg-require-effective-target ppc_float128_sw } */
 
 void abort ();
 
diff --git a/gcc/testsuite/gcc.target/powerpc/divkc3-1.c 
b/gcc/testsuite/gcc.target/powerpc/divkc3-1.c
index 2b4f08ecef51..cb7335f2a755 100644
--- a/gcc/testsuite/gcc.target/powerpc/divkc3-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/divkc3-1.c
@@ -1,5 +1,6 @@
-/* { dg-do run { target { powerpc64*-*-* && p8vector_hw && ppc_float128_sw } } 
} */
+/* { dg-do run { target { powerpc64*-*-* && p8vector_hw } } } */
 /* { dg-options "-mvsx" } */
+/* { dg-require-effective-target ppc_float128_sw } */
 
 void abort ();


[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:33e7ebfe00ba322e5272571682c63aece6276c98

commit 33e7ebfe00ba322e5272571682c63aece6276c98
Author: Michael Meissner 
Date:   Tue Jul 16 03:15:43 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 99b0a6a4ec40..dc2944b781bc 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,3 +1,18 @@
+ Branch work171-bugs, patch #324 
+
+Fix last change.
+
+2024-07-16  Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+
+   * gcc.target/powerpc/abs128-1.c: Fix typos.
+   * gcc.target/powerpc/copysign128-1.c: Likewise.
+   * gcc.target/powerpc/divkc3-1.c: Likewise.
+
  Branch work171-bugs, patch #323 
 
 Remove -mfloat128 option.


[gcc r15-2054] Fixup unaligned load/store cost for znver4

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:1e3aa9c9278db69d4bdb661a750a7268789188d6

commit r15-2054-g1e3aa9c9278db69d4bdb661a750a7268789188d6
Author: Richard Biener 
Date:   Mon Jul 15 13:01:24 2024 +0200

Fixup unaligned load/store cost for znver4

Currently unaligned YMM and ZMM load and store costs are cheaper than
aligned which causes the vectorizer to purposely mis-align accesses
by adding an alignment prologue.  It looks like the unaligned costs
were simply left untouched from znver3 where they equate the aligned
costs when tweaking aligned costs for znver4.  The following makes
the unaligned costs equal to the aligned costs.

This avoids the miscompile seen in PR115843 but it's of course not
a real fix for the issue uncovered there.  But it makes it qualify
as a regression fix.

PR tree-optimization/115843
* config/i386/x86-tune-costs.h (znver4_cost): Update unaligned
load and store cost from the aligned costs.

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index a933794ed505..2ac75c35aee6 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1924,8 +1924,8 @@ struct processor_costs znver4_cost = {
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 12, 12},   /* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 6, 6}, /* cost of unaligned loads.  */
-  {8, 8, 8, 8, 8}, /* cost of unaligned stores.  */
+  {6, 6, 10, 10, 12},  /* cost of unaligned loads.  */
+  {8, 8, 8, 12, 12},   /* cost of unaligned stores.  */
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */


[gcc r15-2055] tree-optimization/115843 - fix wrong-code with fully-masked loop and peeling

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a177be05f6952c3f7e62186d2e138d96c475b81a

commit r15-2055-ga177be05f6952c3f7e62186d2e138d96c475b81a
Author: Richard Biener 
Date:   Mon Jul 15 13:50:58 2024 +0200

tree-optimization/115843 - fix wrong-code with fully-masked loop and peeling

When AVX512 uses a fully masked loop and peeling we fail to create the
correct initial loop mask when the mask is composed of multiple
components in some cases.  The following fixes this by properly applying
the bias for the component to the shift amount.

PR tree-optimization/115843
* tree-vect-loop-manip.cc
(vect_set_loop_condition_partial_vectors_avx512): Properly
bias the shift of the initial mask for alignment peeling.

* gcc.dg/vect/pr115843.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115843.c | 41 
 gcc/tree-vect-loop-manip.cc  |  8 +--
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115843.c 
b/gcc/testsuite/gcc.dg/vect/pr115843.c
new file mode 100644
index ..3dbb6c792788
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115843.c
@@ -0,0 +1,41 @@
+/* { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { 
target avx512f_runtime } } */
+
+#include "tree-vect.h"
+
+typedef __UINT64_TYPE__ BITBOARD;
+BITBOARD KingPressureMask1[64], KingSafetyMask1[64];
+
+void __attribute__((noinline))
+foo()
+{
+  for (int i = 0; i < 64; i++)
+{
+  if ((i & 7) == 0)
+   KingPressureMask1[i] = KingSafetyMask1[i + 1];
+  else if ((i & 7) == 7)
+   KingPressureMask1[i] = KingSafetyMask1[i - 1];
+  else
+   KingPressureMask1[i] = KingSafetyMask1[i];
+}
+}
+
+BITBOARD verify[64]
+  = {1, 1, 2, 3, 4, 5, 6, 6, 9, 9, 10, 11, 12, 13, 14, 14, 17, 17, 18, 19,
+20, 21, 22, 22, 25, 25, 26, 27, 28, 29, 30, 30, 33, 33, 34, 35, 36, 37, 38,
+38, 41, 41, 42, 43, 44, 45, 46, 46, 49, 49, 50, 51, 52, 53, 54, 54, 57, 57,
+58, 59, 60, 61, 62, 62};
+
+int main()
+{
+  check_vect ();
+
+#pragma GCC novector
+  for (int i = 0; i < 64; ++i)
+KingSafetyMask1[i] = i;
+  foo ();
+#pragma GCC novector
+  for (int i = 0; i < 64; ++i)
+if (KingPressureMask1[i] != verify[i])
+  __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ac13873cd88d..57dbcbe862cd 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1149,10 +1149,14 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
loop *loop,
  /* ???  But when the shift amount isn't constant this requires
 a round-trip to GRPs.  We could apply the bias to either
 side of the compare instead.  */
- tree shift = gimple_build (&preheader_seq, MULT_EXPR,
+ tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
 TREE_TYPE (niters_skip), niters_skip,
 build_int_cst (TREE_TYPE (niters_skip),
-   
rgc.max_nscalars_per_iter));
+   bias));
+ shift = gimple_build (&preheader_seq, MULT_EXPR,
+   TREE_TYPE (niters_skip), shift,
+   build_int_cst (TREE_TYPE (niters_skip),
+  rgc.max_nscalars_per_iter));
  init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
TREE_TYPE (init_ctrl),
init_ctrl, shift);


[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:9a87fdccc1209cfd436aaad7e3fa3b27049990e6

commit 9a87fdccc1209cfd436aaad7e3fa3b27049990e6
Author: Michael Meissner 
Date:   Tue Jul 16 04:41:11 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index dc2944b781bc..0b0b1bf63356 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,3 +1,20 @@
+ Branch work171-bugs, patch #325 
+
+Fix last change.
+
+2024-07-16  Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+
+   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Require float128
+   support.
+   * lib/target-supports.exp
+   (check_effective_target_base_quadfloat_support): Add check for explicit
+   float128.
+
  Branch work171-bugs, patch #324 
 
 Fix last change.


[gcc(refs/users/meissner/heads/work171-bugs)] Fix last change.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:23b6b57519e181ae6a10a609061a922bb5694aa7

commit 23b6b57519e181ae6a10a609061a922bb5694aa7
Author: Michael Meissner 
Date:   Tue Jul 16 04:40:06 2024 -0400

Fix last change.

2024-07-16  Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652

* gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Require float128
support.
* lib/target-supports.exp
(check_effective_target_base_quadfloat_support): Add check for 
explicit
float128.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c | 1 +
 gcc/testsuite/lib/target-supports.exp   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c
index f0e03c5173d2..081fb2e2995f 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c
@@ -2,6 +2,7 @@
 /* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9 -save-temps" } */
+/* { dg-require-effective-target ppc_float128_sw } */
 
 #include 
 #include 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6b460f24cc3a..9e94fd78d925 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3953,7 +3953,7 @@ proc add_options_for___float128 { flags } {
 
 proc check_effective_target_base_quadfloat_support { } {
 if { [istarget powerpc*-*-*] } {
-   return [check_vsx_hw_available]
+   return [check_effective_target_powerpc_float128_sw_ok]
 }
 return 1
 }


[gcc r15-2056] s390: Emulate vec_cmp{eq,gt,gtu} for 128-bit integers

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:1b575bb24a7a3d2b00197dd5deb4c26b313f442b

commit r15-2056-g1b575bb24a7a3d2b00197dd5deb4c26b313f442b
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 10:41:41 2024 +0200

s390: Emulate vec_cmp{eq,gt,gtu} for 128-bit integers

Mode iterator V_HW enables V1TI for target VXE which means
vec_cmpv1tiv1ti becomes available which leads to an ICE since there is
no corresponding insn.

Fixed by emulating comparisons and enabling mode V1TI unconditionally
for V_HW.  For the sake of symmetry, I also added TI mode to V_HW since
TF mode is already included.  As a consequence the consumers of V_HW
vec_{splat,slb,sld,sldw,sldb,srdb,srab,srb,test_mask_int,test_mask}
also become available for 128-bit integers.

This fixes gcc.c-torture/execute/pr105613.c and gcc.dg/pr106063.c.

gcc/ChangeLog:

* config/s390/vector.md (V_HW): Enable V1TI unconditionally and
add TI.
(vec_cmpu): Add 128-bit integer
variants.
(*vec_cmpeq_nocc_emu): Emulate operation.
(*vec_cmpgt_nocc_emu): Emulate operation.
(*vec_cmpgtu_nocc_emu): Emulate operation.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/vec-cmp-emu-1.c: New test.
* gcc.target/s390/vector/vec-cmp-emu-2.c: New test.
* gcc.target/s390/vector/vec-cmp-emu-3.c: New test.

Diff:
---
 gcc/config/s390/vector.md  | 113 ++---
 .../gcc.target/s390/vector/vec-cmp-emu-1.c |  35 +++
 .../gcc.target/s390/vector/vec-cmp-emu-2.c |  18 
 .../gcc.target/s390/vector/vec-cmp-emu-3.c |  17 
 4 files changed, 171 insertions(+), 12 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 636788596574..756011728938 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -30,7 +30,7 @@
 ; V_HW2 is for having two iterators expanding independently e.g. vcond.
 ; It's similar to V_HW, but not fully identical: V1TI is not included, because
 ; there are no 128-bit compares.
-(define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE") V2DF
+(define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V1TI TI V2DF
 (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
 (TF "TARGET_VXE")])
 (define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
@@ -50,6 +50,7 @@
 (define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
 (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
 (define_mode_iterator VI_HW_QH  [V16QI V8HI])
+(define_mode_iterator VI_HW_T   [V1TI TI])
 
 ; Directly supported vector modes with a certain number of elements
 (define_mode_iterator V_HW_2   [V2DI V2DF])
@@ -151,7 +152,7 @@
(V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI 
"V8HI")
(V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
(V1DI "V1DI") (V2DI "V2DI")
-   (V1TI "V1TI")
+   (V1TI "V1TI") (TI "V1TI")
(V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
(V1DF "V1DI") (V2DF "V2DI")
(V1TF "V1TI") (TF "V1TI")])
@@ -160,7 +161,7 @@
(V1HI "v1hi") (V2HI "v2hi") (V4HI "v4hi") (V8HI 
"v8hi")
(V1SI "v1si") (V2SI "v2si") (V4SI "v4si")
(V1DI "v1di") (V2DI "v2di")
-   (V1TI "v1ti")
+   (V1TI "v1ti") (TI "v1ti")
(V1SF "v1si") (V2SF "v2si") (V4SF "v4si")
(V1DF "v1di") (V2DF "v2di")
(V1TF "v1ti") (TF   "v1ti")])
@@ -1960,11 +1961,11 @@
   DONE;
 })
 
-(define_expand "vec_cmpu"
-  [(set (match_operand:VI_HW0 "register_operand" "")
-   (match_operator:VI_HW   1 ""
- [(match_operand:VI_HW 2 "register_operand" "")
-  (match_operand:VI_HW 3 "register_operand" "")]))]
+(define_expand "vec_cmpu"
+  [(set (match_operand:VIT_HW0 "register_operand" "")
+   (match_operator:VIT_HW   1 ""
+ [(match_operand:VIT_HW 2 "register_operand" "")
+  (match_operand:VIT_HW 3 "register_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], 
operands[3]);
@@ -1979,6 +1980,94 @@
   "vc\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
+(define_insn_and_split "*vec_cmpeq_nocc_emu"
+  [(set (match_operand:VI_HW_T 0 "register_operand" "=v")
+   (eq:VI_HW_T (match_operand:VI_HW_T 1 "register_operand"  "v")
+   (match_operand:VI_HW_T 2 "register_operand"  "v")))]
+  "TARGET_VX"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 3)
+   (eq:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+   (vec_select:V2DI (match_dup 3) (parall

[gcc r15-2058] s390: Drop vcond{,u} expanders

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:75c0bf997d2808561451e62aa6b7ae7c8e32b9e9

commit r15-2058-g75c0bf997d2808561451e62aa6b7ae7c8e32b9e9
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 10:41:52 2024 +0200

s390: Drop vcond{,u} expanders

Optabs vcond{,u} will be removed for GCC 15.  Since regtest shows no
fallout, dropping the expanders, now.

gcc/ChangeLog:

PR target/114189
* config/s390/vector.md (V_HW2): Remove.
(vcond): Remove.
(vcondu): Remove.

Diff:
---
 gcc/config/s390/vector.md | 35 ---
 1 file changed, 35 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index c8e8029167d3..69efbbb61acd 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -27,14 +27,9 @@
V2SF V4SF V1DF V2DF V1TF V1TI TI])
 
 ; All modes directly supported by the hardware having full vector reg size
-; V_HW2 is for having two iterators expanding independently e.g. vcond.
-; It's similar to V_HW, but not fully identical: V1TI is not included, because
-; there are no 128-bit compares.
 (define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V1TI TI V2DF
 (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
 (TF "TARGET_VXE")])
-(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
-(V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
 (define_mode_iterator VT_HW_HSDT [V8HI V4SI V4SF V2DI V2DF V1TI V1TF TI TF])
 (define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF])
@@ -729,36 +724,6 @@
 }
 })
 
-(define_expand "vcond"
-  [(set (match_operand:V_HW 0 "register_operand" "")
-   (if_then_else:V_HW
-(match_operator 3 "vcond_comparison_operator"
-[(match_operand:V_HW2 4 "register_operand" "")
- (match_operand:V_HW2 5 "nonmemory_operand" "")])
-(match_operand:V_HW 1 "nonmemory_operand" "")
-(match_operand:V_HW 2 "nonmemory_operand" "")))]
-  "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS 
(mode)"
-{
-  s390_expand_vcond (operands[0], operands[1], operands[2],
-GET_CODE (operands[3]), operands[4], operands[5]);
-  DONE;
-})
-
-(define_expand "vcondu"
-  [(set (match_operand:V_HW 0 "register_operand" "")
-   (if_then_else:V_HW
-(match_operator 3 "comparison_operator"
-[(match_operand:V_HW2 4 "register_operand" "")
- (match_operand:V_HW2 5 "nonmemory_operand" "")])
-(match_operand:V_HW 1 "nonmemory_operand" "")
-(match_operand:V_HW 2 "nonmemory_operand" "")))]
-  "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS 
(mode)"
-{
-  s390_expand_vcond (operands[0], operands[1], operands[2],
-GET_CODE (operands[3]), operands[4], operands[5]);
-  DONE;
-})
-
 (define_expand "vcond_mask_"
   [(set (match_operand:VT 0 "register_operand" "")
(if_then_else:VT


[gcc r15-2057] s390: Enable vcond_mask for 128-bit ops

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:6d1095788e23c27061421c7d180209264ebb32f7

commit r15-2057-g6d1095788e23c27061421c7d180209264ebb32f7
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 10:41:46 2024 +0200

s390: Enable vcond_mask for 128-bit ops

In preparation of dropping vcond{,u,eq} optabs
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654690.html
enable 128-bit operands for vcond_mask---including integer as well as
floating point.

This fixes partially PR115519 w.r.t. autovec-long-double-signaling-*.c
tests.

gcc/ChangeLog:

* config/s390/vector.md: Enable vcond_mask for 128-bit ops.

Diff:
---
 gcc/config/s390/vector.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 756011728938..c8e8029167d3 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -760,12 +760,12 @@
 })
 
 (define_expand "vcond_mask_"
-  [(set (match_operand:V 0 "register_operand" "")
-   (if_then_else:V
+  [(set (match_operand:VT 0 "register_operand" "")
+   (if_then_else:VT
 (eq (match_operand: 3 "register_operand" "")
 (match_dup 4))
-(match_operand:V 2 "register_operand" "")
-(match_operand:V 1 "register_operand" "")))]
+(match_operand:VT 2 "register_operand" "")
+(match_operand:VT 1 "register_operand" "")))]
   "TARGET_VX"
   "operands[4] = CONST0_RTX (mode);")


[gcc/aoliva/heads/testme] (82 commits) [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 529d91ce3711... [strub] adjust all at-calls type variants at once

It previously pointed to:

 3b1df5149519... [analyzer] [testsuite] avoid unexpected null dereference wa

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  3b1df51... [analyzer] [testsuite] avoid unexpected null dereference wa
  2f1f926... [libstdc++] [testsuite] avoid arbitrary errno codes
  35b8b9a... [libstdc++] [testsuite] require dfprt on some tests
  e7140d9... [libstdc++] [testsuite] xfail 128bit from_chars on all aarc
  ef1d9ef... [i386] adjust flag_omit_frame_pointer in a single function 


Summary of changes (added commits):
---

  529d91c... [strub] adjust all at-calls type variants at once
  bf8e80f... [i386] adjust flag_omit_frame_pointer in a single function  (*)
  4306f76... RISC-V: Fix testcase for vector .SAT_SUB in zip benchmark (*)
  8f87b3c... AVR: avr-md - Simplify GET_MODE and GET_MODE_BITSIZE. (*)
  9964edf... varasm: Add support for emitting binary data with the new g (*)
  f27bf48... Fix SSA_NAME leak due to def_stmt is removed before use_stm (*)
  681ff5c... [APX NF] Add a pass to convert legacy insn to NF insns (*)
  60ba989... arm: Fix the expected output of the test pr111235.c  [PR115 (*)
  bf26413... RISC-V: Implement locality for __builtin_prefetch (*)
  8b14920... aarch64: Fix the expected output of the test cpy_1.c [PR115 (*)
  da37a27... CRIS: Adjust gcc.dg/tree-ssa/loop-1.c (*)
  9f52163... RISC-V: Add md files for vector BFloat16 (*)
  281f021... RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic (*)
  02a3bf5... AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [P (*)
  666f167... RISC-V: Add vector type of BFloat16 format (*)
  8ccfa57... Daily bump. (*)
  74e6dfb... i386: Tweak i386-expand.cc to restore bootstrap on RHEL. (*)
  44c9403... c, objc: Add -Wunterminated-string-initialization (*)
  74595c7... CRIS: Fix up last comment. (*)
  a01b40c... CRIS: Disable late-combine by default, related PR115883 (*)
  944e425... Daily bump. (*)
  677ef11... Document return value in write_cv_integer (*)
  800e06c... Make sure CodeView symbols are aligned (*)
  45a4f34... Avoid magic numbers when writing CodeView padding (*)
  f53087a... Add CodeView enum cv_sym_type (*)
  131fcb5... Add CodeView enum cv_leaf_type (*)
  d211100... fortran: Correctly evaluate scalar MASK arguments of MINLOC (*)
  6fc24a0... Add gcc.gnu.org account names to MAINTAINERS (*)
  7d73c01... diagnostics: add highlight-a vs highlight-b in colorization (*)
  abf3964... tree-optimization/115868 - ICE with .MASK_CALL in simdclone (*)
  2ee5b58... Daily bump. (*)
  dd2840a... doc: Update GNU Modula 2 mailing list links (*)
  a6f551d... [PR rtl-optimization/115876] Fix one of two ubsan reported  (*)
  b3d4a02... doc: remove @opindex for fconcepts-ts (*)
  08776be... Fix Xcode 16 build break with NULL != nullptr (*)
  6e7053a... rtl-ssa: Fix prev_any_insn [PR115785] (*)
  f4047a8... modula2: bootstrap fix for string and vector headers. (*)
  ae829a2... [RISC-V] Avoid unnecessary sign extension after memcmp (*)
  13757e5... c++/modules: Add testcase for fixed issue with usings [PR11 (*)
  1f7a21c... c++/modules: Handle redefinitions of using-decls (*)
  d6bf4b1... c++: Introduce USING_DECLs for non-function usings [PR11468 (*)
  61715e9... s390: Fully exploit vgm, vgbm, vrepi (*)
  e6680d3... s390: Fix output template for movv1qi (*)
  6b5d263... i386: Some AVX512 ternlog expansion refinements. (*)
  56de68a... s390: Align *cjump_64 and *icjump_64 (*)
  7bcef75... aarch64: Avoid alloca in target attribute parsing (*)
  26dfb3f... [libstdc++] [testsuite] require dfprt on some tests (*)
  ccfe715... [alpha] adjust MEM alignment for block move [PR115459] (*)
  c6f38e5... RISC-V: NO_WARNING preferred else value for RVV (*)
  a55d24b... fortran: Factor the evaluation of MINLOC/MAXLOC's BACK argu (*)
  63d7d59... RISC-V: Disable misaligned vector access in hook riscv_slow (*)
  3ea47ea... RISC-V: Add SiFive extensions, xsfvcp and xsfcease (*)
  f7e4000... rs6000: Remove vcond{,u} expanders (*)
  4f4478f... tree-optimization/115867 - ICE with simdcall vectorization  (*)
  a91c51c... [committed] Fix m68k bootstrap segfault with late-combine (*)
  bf406a5... libbacktrace: avoid infinite recursion (*)
  3bc1a86... LoongArch: Remove unreachable codes. (*)
  abeb6c8... LoongArch: TFmode is not allowed to be stored in the float  (*)
  d7318f4... libbacktrace: don't fail if symbol size is unknown (*)
  b870086... libbacktrace: correctly gather Mach-O symbol table (*)
  88ff050... Daily bump. (*)
  8f7c06d... libbacktrace: fix testsuite for clang (*)
  43763bd... libstdc++: Test that std::atomic_ref uses the primary (*)
  79d3f17... libstdc++: the specialization atomic_ref should use t (*)
  b96789a... libbacktrace: suggest how to fix missing debug info (*)
  02f7525... libback

[gcc/aoliva/heads/testbase] (81 commits) [i386] adjust flag_omit_frame_pointer in a single function

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 bf8e80f9d164... [i386] adjust flag_omit_frame_pointer in a single function 

It previously pointed to:

 36e5e409190e... RISC-V: c implies zca, and conditionally zcf & zcd

Diff:

Summary of changes (added commits):
---

  bf8e80f... [i386] adjust flag_omit_frame_pointer in a single function  (*)
  4306f76... RISC-V: Fix testcase for vector .SAT_SUB in zip benchmark (*)
  8f87b3c... AVR: avr-md - Simplify GET_MODE and GET_MODE_BITSIZE. (*)
  9964edf... varasm: Add support for emitting binary data with the new g (*)
  f27bf48... Fix SSA_NAME leak due to def_stmt is removed before use_stm (*)
  681ff5c... [APX NF] Add a pass to convert legacy insn to NF insns (*)
  60ba989... arm: Fix the expected output of the test pr111235.c  [PR115 (*)
  bf26413... RISC-V: Implement locality for __builtin_prefetch (*)
  8b14920... aarch64: Fix the expected output of the test cpy_1.c [PR115 (*)
  da37a27... CRIS: Adjust gcc.dg/tree-ssa/loop-1.c (*)
  9f52163... RISC-V: Add md files for vector BFloat16 (*)
  281f021... RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic (*)
  02a3bf5... AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [P (*)
  666f167... RISC-V: Add vector type of BFloat16 format (*)
  8ccfa57... Daily bump. (*)
  74e6dfb... i386: Tweak i386-expand.cc to restore bootstrap on RHEL. (*)
  44c9403... c, objc: Add -Wunterminated-string-initialization (*)
  74595c7... CRIS: Fix up last comment. (*)
  a01b40c... CRIS: Disable late-combine by default, related PR115883 (*)
  944e425... Daily bump. (*)
  677ef11... Document return value in write_cv_integer (*)
  800e06c... Make sure CodeView symbols are aligned (*)
  45a4f34... Avoid magic numbers when writing CodeView padding (*)
  f53087a... Add CodeView enum cv_sym_type (*)
  131fcb5... Add CodeView enum cv_leaf_type (*)
  d211100... fortran: Correctly evaluate scalar MASK arguments of MINLOC (*)
  6fc24a0... Add gcc.gnu.org account names to MAINTAINERS (*)
  7d73c01... diagnostics: add highlight-a vs highlight-b in colorization (*)
  abf3964... tree-optimization/115868 - ICE with .MASK_CALL in simdclone (*)
  2ee5b58... Daily bump. (*)
  dd2840a... doc: Update GNU Modula 2 mailing list links (*)
  a6f551d... [PR rtl-optimization/115876] Fix one of two ubsan reported  (*)
  b3d4a02... doc: remove @opindex for fconcepts-ts (*)
  08776be... Fix Xcode 16 build break with NULL != nullptr (*)
  6e7053a... rtl-ssa: Fix prev_any_insn [PR115785] (*)
  f4047a8... modula2: bootstrap fix for string and vector headers. (*)
  ae829a2... [RISC-V] Avoid unnecessary sign extension after memcmp (*)
  13757e5... c++/modules: Add testcase for fixed issue with usings [PR11 (*)
  1f7a21c... c++/modules: Handle redefinitions of using-decls (*)
  d6bf4b1... c++: Introduce USING_DECLs for non-function usings [PR11468 (*)
  61715e9... s390: Fully exploit vgm, vgbm, vrepi (*)
  e6680d3... s390: Fix output template for movv1qi (*)
  6b5d263... i386: Some AVX512 ternlog expansion refinements. (*)
  56de68a... s390: Align *cjump_64 and *icjump_64 (*)
  7bcef75... aarch64: Avoid alloca in target attribute parsing (*)
  26dfb3f... [libstdc++] [testsuite] require dfprt on some tests (*)
  ccfe715... [alpha] adjust MEM alignment for block move [PR115459] (*)
  c6f38e5... RISC-V: NO_WARNING preferred else value for RVV (*)
  a55d24b... fortran: Factor the evaluation of MINLOC/MAXLOC's BACK argu (*)
  63d7d59... RISC-V: Disable misaligned vector access in hook riscv_slow (*)
  3ea47ea... RISC-V: Add SiFive extensions, xsfvcp and xsfcease (*)
  f7e4000... rs6000: Remove vcond{,u} expanders (*)
  4f4478f... tree-optimization/115867 - ICE with simdcall vectorization  (*)
  a91c51c... [committed] Fix m68k bootstrap segfault with late-combine (*)
  bf406a5... libbacktrace: avoid infinite recursion (*)
  3bc1a86... LoongArch: Remove unreachable codes. (*)
  abeb6c8... LoongArch: TFmode is not allowed to be stored in the float  (*)
  d7318f4... libbacktrace: don't fail if symbol size is unknown (*)
  b870086... libbacktrace: correctly gather Mach-O symbol table (*)
  88ff050... Daily bump. (*)
  8f7c06d... libbacktrace: fix testsuite for clang (*)
  43763bd... libstdc++: Test that std::atomic_ref uses the primary (*)
  79d3f17... libstdc++: the specialization atomic_ref should use t (*)
  b96789a... libbacktrace: suggest how to fix missing debug info (*)
  02f7525... libbacktrace: remove trailing whitespace (*)
  26c9b09... libstdc++: Switch gcc.gnu.org links to https (*)
  74d8acc... [to-be-committed,RISC-V] Eliminate unnecessary sign extensi (*)
  4e0aa05... Ranger: Mark a few classes as final (*)
  8dbc02b... libstdc++: Disable expensive test for debug mode [PR108636] (*)
  7c11fdd... mve: Fix vsetq_lane for 64-bit elements with lane 1 [PR 115 (*)
  44fc801... recog: Avoid validate_change shortcut for groups [PR115782] (*)
  b9513c6... Fix bootstrap broken by gcc-15-1965-ge4f2f46e015 (*)
  7387117... Fix gimp

[gcc(refs/users/aoliva/heads/testme)] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:529d91ce3711739c56a502cdf7d6b7b657776c3d

commit 529d91ce3711739c56a502cdf7d6b7b657776c3d
Author: Alexandre Oliva 
Date:   Tue Jul 16 05:33:07 2024 -0300

[strub] adjust all at-calls type variants at once

TYPE_ARG_TYPES of type variants must compare equal, according to
verify_type, but adjust_at_calls_type didn't preserve this invariant.

Adjust the main type variant and propagate TYPE_ARG_TYPES to all
variants.  While at that, also adjust the canonical type and its
variants, and then verify_type.


for  gcc/ChangeLog

PR c/115848
* ipa-strub.cc (pass_ipa_strub::adjust_at_calls_type_main):
Rename from...
(pass_ipa_strub::adjust_at_calls_type): ... this.  Preserve
TYPE_ARG_TYPES across all variants.  Adjust TYPE_CANONICAL and
verify_type.

for  gcc/testsuite/ChangeLog

PR c/115848
* c-c++-common/strub-pr115848.c: New.

Diff:
---
 gcc/ipa-strub.cc| 41 +++--
 gcc/testsuite/c-c++-common/strub-pr115848.c |  6 +
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc
index 8fa7bdf53002..15d91c994bf8 100644
--- a/gcc/ipa-strub.cc
+++ b/gcc/ipa-strub.cc
@@ -1891,6 +1891,7 @@ public:
 
 #undef DEF_IDENT
 
+  static inline int adjust_at_calls_type_main (tree);
   static inline int adjust_at_calls_type (tree);
   static inline void adjust_at_calls_call (cgraph_edge *, int, tree);
   static inline void adjust_at_calls_calls (cgraph_node *);
@@ -2348,15 +2349,51 @@ strub_watermark_parm (tree fndecl)
   gcc_unreachable ();
 }
 
+/* Adjust a STRUB_AT_CALLS function TYPE and all its variants,
+   preserving TYPE_ARG_TYPES identity, adding a watermark pointer if
+   it hasn't been added yet.  Return the named argument count.  */
+
+int
+pass_ipa_strub::adjust_at_calls_type (tree type)
+{
+  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+
+  tree tmain = TYPE_MAIN_VARIANT (type);
+  tree orig_types = TYPE_ARG_TYPES (tmain);
+  gcc_checking_assert (TYPE_ARG_TYPES (type) == orig_types);
+  int named_args = adjust_at_calls_type_main (tmain);
+  tree mod_types = TYPE_ARG_TYPES (tmain);
+
+  if (mod_types != orig_types)
+for (tree other = TYPE_NEXT_VARIANT (tmain);
+other != NULL_TREE; other = TYPE_NEXT_VARIANT (other))
+  {
+   gcc_checking_assert (TYPE_ARG_TYPES (other) == orig_types);
+   TYPE_ARG_TYPES (other) = mod_types;
+  }
+
+  if (TYPE_CANONICAL (type)
+  && TYPE_MAIN_VARIANT (TYPE_CANONICAL (type)) != tmain)
+{
+  int ret = adjust_at_calls_type (TYPE_CANONICAL (type));
+  gcc_checking_assert (named_args == ret);
+}
+
+  if (flag_checking)
+verify_type (type);
+
+  return named_args;
+}
+
 /* Adjust a STRUB_AT_CALLS function TYPE, adding a watermark pointer if it
hasn't been added yet.  Return the named argument count.  */
 
 int
-pass_ipa_strub::adjust_at_calls_type (tree type)
+pass_ipa_strub::adjust_at_calls_type_main (tree type)
 {
   int named_args = 0;
 
-  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+  gcc_checking_assert (TYPE_MAIN_VARIANT (type) == type);
 
   if (!TYPE_ARG_TYPES (type))
 return named_args;
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848.c 
b/gcc/testsuite/c-c++-common/strub-pr115848.c
new file mode 100644
index ..658dbaafe01d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848.c
@@ -0,0 +1,6 @@
+/* { dg-do link } */
+/* { dg-options "-flto" } */
+
+typedef void __attribute__((__strub__)) a(int, int);
+a(b);
+void c() { b(0, 0); }


[gcc r15-2059] Fixup unaligned load/store cost for znver5

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:896393791ee34ffc176c87d232dfee735db3aaab

commit r15-2059-g896393791ee34ffc176c87d232dfee735db3aaab
Author: Richard Biener 
Date:   Tue Jul 16 10:45:27 2024 +0200

Fixup unaligned load/store cost for znver5

Currently unaligned YMM and ZMM load and store costs are cheaper than
aligned which causes the vectorizer to purposely mis-align accesses
by adding an alignment prologue.  It looks like the unaligned costs
were simply copied from the bogus znver4 costs.  The following makes
the unaligned costs equal to the aligned costs like in the fixed znver4
version.

* config/i386/x86-tune-costs.h (znver5_cost): Update unaligned
load and store cost from the aligned costs.

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 2ac75c35aee6..769f334e5318 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2060,8 +2060,8 @@ struct processor_costs znver5_cost = {
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 12, 12},   /* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 6, 6}, /* cost of unaligned loads.  */
-  {8, 8, 8, 8, 8}, /* cost of unaligned stores.  */
+  {6, 6, 10, 10, 12},  /* cost of unaligned loads.  */
+  {8, 8, 8, 12, 12},   /* cost of unaligned stores.  */
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */


[gcc r15-2060] s390: Fix unresolved iterators bhfgq and xdee

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:a4abda934aa426137f059934629d3241f008e113

commit r15-2060-ga4abda934aa426137f059934629d3241f008e113
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 11:23:10 2024 +0200

s390: Fix unresolved iterators bhfgq and xdee

Code attribute bhfgq is missing a mapping for TF.  This results in
unresolved iterators in assembler templates for *bswaptf.

With the TF mapping added the base mnemonics vlbr and vstbr are not
"used" anymore but only the extended mnemonics (vlbr was
interpreted as vlbr; likewise for vstbr).  Therefore, remove the base
mnemonics from the scheduling description, otherwise, genattrtab would
error about unknown mnemonics.

Similarly, we end up with unresolved iterators in assembler templates
for mulfprx23 since code attribute xdee is missing a mapping for FPRX2.

gcc/ChangeLog:

* config/s390/3931.md (vlbr, vstbr): Remove.
* config/s390/s390.md (xdee): Add FPRX2 mapping.
* config/s390/vector.md (bhfgq): Add TF mapping.

Diff:
---
 gcc/config/s390/3931.md   | 5 -
 gcc/config/s390/s390.md   | 2 +-
 gcc/config/s390/vector.md | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/gcc/config/s390/3931.md b/gcc/config/s390/3931.md
index 632c2456b6a3..9f7a4c58755c 100644
--- a/gcc/config/s390/3931.md
+++ b/gcc/config/s390/3931.md
@@ -404,7 +404,6 @@ vlvgg,
 vlvgh,
 vlvgp,
 vst,
-vstbr,
 vstbrf,
 vstbrg,
 vstbrh,
@@ -627,7 +626,6 @@ tm,
 tmy,
 vl,
 vlbb,
-vlbr,
 vlbrf,
 vlbrg,
 vlbrh,
@@ -661,7 +659,6 @@ vlreph,
 vlrl,
 vlrlr,
 vst,
-vstbr,
 vstbrf,
 vstbrg,
 vstbrh,
@@ -2148,7 +2145,6 @@ vistrfs,
 vistrhs,
 vl,
 vlbb,
-vlbr,
 vlbrf,
 vlbrg,
 vlbrh,
@@ -2240,7 +2236,6 @@ tbegin,
 tbeginc,
 tend,
 vst,
-vstbr,
 vstbrf,
 vstbrg,
 vstbrh,
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 303026f6af7c..3d5759d62521 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -745,7 +745,7 @@
 ;; In FP templates, a  in "mr" will expand to "mxr" in
 ;; TF/TDmode, "mdr" in DF/DDmode, "meer" in SFmode and "mer in
 ;; SDmode.
-(define_mode_attr xdee [(TF "x") (DF "d") (SF "ee") (TD "x") (DD "d") (SD 
"e")])
+(define_mode_attr xdee [(TF "x") (FPRX2 "x") (DF "d") (SF "ee") (TD "x") (DD 
"d") (SD "e")])
 
 ;; The decimal floating point variants of add, sub, div and mul support 3
 ;; fp register operands.  The following attributes allow to merge the bfp and
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 69efbbb61acd..a75b7cb58257 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -132,7 +132,7 @@
(V1TI "q") (TI "q")
(V1SF "f") (V2SF "f") (V4SF "f")
(V1DF "g") (V2DF "g")
-   (V1TF "q")])
+   (V1TF "q") (TF "q")])
 
 ; This is for vmalhw. It gets an 'w' attached to avoid confusion with
 ; multiply and add logical high vmalh.


[gcc r14-10426] [i386] restore recompute to override opts after change [PR113719]

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:102bcf147892855463c5854119aacda752ed033c

commit r14-10426-g102bcf147892855463c5854119aacda752ed033c
Author: Alexandre Oliva 
Date:   Tue Jul 16 06:27:06 2024 -0300

[i386] restore recompute to override opts after change [PR113719]

The first patch for PR113719 regressed gcc.dg/ipa/iinline-attr.c on
toolchains configured to --enable-frame-pointer, because the
optimization node created within handle_optimize_attribute had
flag_omit_frame_pointer incorrectly set, whereas
default_optimization_node didn't.  With this difference,
can_inline_edge_by_limits_p flagged an optimization mismatch and we
refused to inline the function that had a redundant optimization flag
into one that didn't, which is exactly what is tested for there.

This patch restores the calls to ix86_default_align and
ix86_recompute_optlev_based_flags that used to be, and ought to be,
issued during TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE, but preserves the
intent of the original change, of having those functions called at
different spots within ix86_option_override_internal.  To that end,
the remaining bits were refactored into a separate function, that was
in turn adjusted to operate on explicitly-passed opts and opts_set,
rather than going for their global counterparts.


for  gcc/ChangeLog

PR target/113719
* config/i386/i386-options.cc
(ix86_override_options_after_change_1): Add opts and opts_set
parms, operate on them, after factoring out of...
(ix86_override_options_after_change): ... this.  Restore calls
of ix86_default_align and ix86_recompute_optlev_based_flags.
(ix86_option_override_internal): Call the factored-out bits.

(cherry picked from commit bf2fc0a27b35de039c3d45e6d7ea9ad0a8a305ba)

Diff:
---
 gcc/config/i386/i386-options.cc | 59 -
 1 file changed, 40 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index d97464f2c74b..6e8fcbdaa28a 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1925,37 +1925,58 @@ ix86_recompute_optlev_based_flags (struct gcc_options 
*opts,
 }
 }
 
-/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
+/* Implement part of TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
 
-void
-ix86_override_options_after_change (void)
+static void
+ix86_override_options_after_change_1 (struct gcc_options *opts,
+ struct gcc_options *opts_set)
 {
+#define OPTS_SET_P(OPTION) opts_set->x_ ## OPTION
+#define OPTS(OPTION) opts->x_ ## OPTION
+
   /* Disable unrolling small loops when there's explicit
  -f{,no}unroll-loop.  */
-  if ((OPTION_SET_P (flag_unroll_loops))
- || (OPTION_SET_P (flag_unroll_all_loops)
-&& flag_unroll_all_loops))
+  if ((OPTS_SET_P (flag_unroll_loops))
+ || (OPTS_SET_P (flag_unroll_all_loops)
+&& OPTS (flag_unroll_all_loops)))
 {
-  if (!OPTION_SET_P (ix86_unroll_only_small_loops))
-   ix86_unroll_only_small_loops = 0;
+  if (!OPTS_SET_P (ix86_unroll_only_small_loops))
+   OPTS (ix86_unroll_only_small_loops) = 0;
   /* Re-enable -frename-registers and -fweb if funroll-loops
 enabled.  */
-  if (!OPTION_SET_P (flag_web))
-   flag_web = flag_unroll_loops;
-  if (!OPTION_SET_P (flag_rename_registers))
-   flag_rename_registers = flag_unroll_loops;
+  if (!OPTS_SET_P (flag_web))
+   OPTS (flag_web) = OPTS (flag_unroll_loops);
+  if (!OPTS_SET_P (flag_rename_registers))
+   OPTS (flag_rename_registers) = OPTS (flag_unroll_loops);
   /* -fcunroll-grow-size default follws -f[no]-unroll-loops.  */
-  if (!OPTION_SET_P (flag_cunroll_grow_size))
-   flag_cunroll_grow_size = flag_unroll_loops
-|| flag_peel_loops
-|| optimize >= 3;
+  if (!OPTS_SET_P (flag_cunroll_grow_size))
+   OPTS (flag_cunroll_grow_size)
+ = (OPTS (flag_unroll_loops)
+|| OPTS (flag_peel_loops)
+|| OPTS (optimize) >= 3);
 }
   else
 {
-  if (!OPTION_SET_P (flag_cunroll_grow_size))
-   flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
+  if (!OPTS_SET_P (flag_cunroll_grow_size))
+   OPTS (flag_cunroll_grow_size)
+ = (OPTS (flag_peel_loops)
+|| OPTS (optimize) >= 3);
 }
 
+#undef OPTS
+#undef OPTS_SET_P
+}
+
+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
+
+void
+ix86_override_options_after_change (void)
+{
+  ix86_default_align (&global_options);
+
+  ix86_recompute_optlev_based_flags (&global_options, &global_options_set);
+
+  ix86_override_options_after_change_1 (&global_options, &global_options_set);
 }
 
 /* Clear stack slot assignments remembered from previous functions.
@@ -2527,7 +25

[gcc r14-10427] [i386] adjust flag_omit_frame_pointer in a single function [PR113719]

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:7bc63f1c70331763989d72b7df051e0ce67ff84c

commit r14-10427-g7bc63f1c70331763989d72b7df051e0ce67ff84c
Author: Alexandre Oliva 
Date:   Tue Jul 16 06:27:09 2024 -0300

[i386] adjust flag_omit_frame_pointer in a single function [PR113719]

The first two patches for PR113719 have each regressed
gcc.dg/ipa/iinline-attr.c on a different target.  The reason for this
instability is that there are competing flag_omit_frame_pointer
overriders on x86:

- ix86_recompute_optlev_based_flags computes and sets a
  -f[no-]omit-frame-pointer default depending on
  USE_IX86_FRAME_POINTER and, in 32-bit mode, optimize_size

- ix86_option_override_internal enables flag_omit_frame_pointer for
  -momit-leaf-frame-pointer to take effect

ix86_option_override[_internal] calls
ix86_recompute_optlev_based_flags before setting
flag_omit_frame_pointer.  It is called during global process_options.

But ix86_recompute_optlev_based_flags is also called by
parse_optimize_options, during attribute processing, and at that
point, ix86_option_override is not called, so the final overrider for
global options is not applied to the optimize attributes.  If they
differ, the testcase fails.

In order to fix this, we need to process all overriders of this option
whenever we process any of them.  Since this setting is affected by
optimization options, it makes sense to compute it in
parse_optimize_options, rather than in process_options.


for  gcc/ChangeLog

PR target/113719
* config/i386/i386-options.cc (ix86_option_override_internal):
Move flag_omit_frame_pointer final overrider...
(ix86_recompute_optlev_based_flags): ... here.

(cherry picked from commit bf8e80f9d164f8778d86a3dc50e501cf19a9eff1)

Diff:
---
 gcc/config/i386/i386-options.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 6e8fcbdaa28a..af450dba73dd 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1923,6 +1923,12 @@ ix86_recompute_optlev_based_flags (struct gcc_options 
*opts,
opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
}
 }
+
+  /* Keep nonleaf frame pointers.  */
+  if (opts->x_flag_omit_frame_pointer)
+opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
+opts->x_flag_omit_frame_pointer = 1;
 }
 
 /* Implement part of TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
@@ -2623,12 +2629,6 @@ ix86_option_override_internal (bool main_args_p,
 opts->x_target_flags |= MASK_NO_RED_ZONE;
 }
 
-  /* Keep nonleaf frame pointers.  */
-  if (opts->x_flag_omit_frame_pointer)
-opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
-  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
-opts->x_flag_omit_frame_pointer = 1;
-
   /* If we're doing fast math, we don't care about comparison order
  wrt NaNs.  This lets us use a shorter comparison sequence.  */
   if (opts->x_flag_finite_math_only)


[gcc r13-8915] [i386] restore recompute to override opts after change [PR113719]

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:0b9d6829b503cfc72c4271ead2948d8100cce25c

commit r13-8915-g0b9d6829b503cfc72c4271ead2948d8100cce25c
Author: Alexandre Oliva 
Date:   Tue Jul 16 06:48:18 2024 -0300

[i386] restore recompute to override opts after change [PR113719]

The first patch for PR113719 regressed gcc.dg/ipa/iinline-attr.c on
toolchains configured to --enable-frame-pointer, because the
optimization node created within handle_optimize_attribute had
flag_omit_frame_pointer incorrectly set, whereas
default_optimization_node didn't.  With this difference,
can_inline_edge_by_limits_p flagged an optimization mismatch and we
refused to inline the function that had a redundant optimization flag
into one that didn't, which is exactly what is tested for there.

This patch restores the calls to ix86_default_align and
ix86_recompute_optlev_based_flags that used to be, and ought to be,
issued during TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE, but preserves the
intent of the original change, of having those functions called at
different spots within ix86_option_override_internal.  To that end,
the remaining bits were refactored into a separate function, that was
in turn adjusted to operate on explicitly-passed opts and opts_set,
rather than going for their global counterparts.


for  gcc/ChangeLog

PR target/113719
* config/i386/i386-options.cc
(ix86_override_options_after_change_1): Add opts and opts_set
parms, operate on them, after factoring out of...
(ix86_override_options_after_change): ... this.  Restore calls
of ix86_default_align and ix86_recompute_optlev_based_flags.
(ix86_option_override_internal): Call the factored-out bits.

(cherry picked from commit bf2fc0a27b35de039c3d45e6d7ea9ad0a8a305ba)

Diff:
---
 gcc/config/i386/i386-options.cc | 59 -
 1 file changed, 40 insertions(+), 19 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index cdbe2dc62010..4c27e4e01957 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1870,37 +1870,58 @@ ix86_recompute_optlev_based_flags (struct gcc_options 
*opts,
 }
 }
 
-/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
+/* Implement part of TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
 
-void
-ix86_override_options_after_change (void)
+static void
+ix86_override_options_after_change_1 (struct gcc_options *opts,
+ struct gcc_options *opts_set)
 {
+#define OPTS_SET_P(OPTION) opts_set->x_ ## OPTION
+#define OPTS(OPTION) opts->x_ ## OPTION
+
   /* Disable unrolling small loops when there's explicit
  -f{,no}unroll-loop.  */
-  if ((OPTION_SET_P (flag_unroll_loops))
- || (OPTION_SET_P (flag_unroll_all_loops)
-&& flag_unroll_all_loops))
+  if ((OPTS_SET_P (flag_unroll_loops))
+ || (OPTS_SET_P (flag_unroll_all_loops)
+&& OPTS (flag_unroll_all_loops)))
 {
-  if (!OPTION_SET_P (ix86_unroll_only_small_loops))
-   ix86_unroll_only_small_loops = 0;
+  if (!OPTS_SET_P (ix86_unroll_only_small_loops))
+   OPTS (ix86_unroll_only_small_loops) = 0;
   /* Re-enable -frename-registers and -fweb if funroll-loops
 enabled.  */
-  if (!OPTION_SET_P (flag_web))
-   flag_web = flag_unroll_loops;
-  if (!OPTION_SET_P (flag_rename_registers))
-   flag_rename_registers = flag_unroll_loops;
+  if (!OPTS_SET_P (flag_web))
+   OPTS (flag_web) = OPTS (flag_unroll_loops);
+  if (!OPTS_SET_P (flag_rename_registers))
+   OPTS (flag_rename_registers) = OPTS (flag_unroll_loops);
   /* -fcunroll-grow-size default follws -f[no]-unroll-loops.  */
-  if (!OPTION_SET_P (flag_cunroll_grow_size))
-   flag_cunroll_grow_size = flag_unroll_loops
-|| flag_peel_loops
-|| optimize >= 3;
+  if (!OPTS_SET_P (flag_cunroll_grow_size))
+   OPTS (flag_cunroll_grow_size)
+ = (OPTS (flag_unroll_loops)
+|| OPTS (flag_peel_loops)
+|| OPTS (optimize) >= 3);
 }
   else
 {
-  if (!OPTION_SET_P (flag_cunroll_grow_size))
-   flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
+  if (!OPTS_SET_P (flag_cunroll_grow_size))
+   OPTS (flag_cunroll_grow_size)
+ = (OPTS (flag_peel_loops)
+|| OPTS (optimize) >= 3);
 }
 
+#undef OPTS
+#undef OPTS_SET_P
+}
+
+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
+
+void
+ix86_override_options_after_change (void)
+{
+  ix86_default_align (&global_options);
+
+  ix86_recompute_optlev_based_flags (&global_options, &global_options_set);
+
+  ix86_override_options_after_change_1 (&global_options, &global_options_set);
 }
 
 /* Clear stack slot assignments remembered from previous functions.
@@ -2417,7 +243

[gcc r13-8916] [i386] adjust flag_omit_frame_pointer in a single function [PR113719]

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:52959e34c8a7a0473784ca044487d05e791286c1

commit r13-8916-g52959e34c8a7a0473784ca044487d05e791286c1
Author: Alexandre Oliva 
Date:   Tue Jul 16 06:48:36 2024 -0300

[i386] adjust flag_omit_frame_pointer in a single function [PR113719]

The first two patches for PR113719 have each regressed
gcc.dg/ipa/iinline-attr.c on a different target.  The reason for this
instability is that there are competing flag_omit_frame_pointer
overriders on x86:

- ix86_recompute_optlev_based_flags computes and sets a
  -f[no-]omit-frame-pointer default depending on
  USE_IX86_FRAME_POINTER and, in 32-bit mode, optimize_size

- ix86_option_override_internal enables flag_omit_frame_pointer for
  -momit-leaf-frame-pointer to take effect

ix86_option_override[_internal] calls
ix86_recompute_optlev_based_flags before setting
flag_omit_frame_pointer.  It is called during global process_options.

But ix86_recompute_optlev_based_flags is also called by
parse_optimize_options, during attribute processing, and at that
point, ix86_option_override is not called, so the final overrider for
global options is not applied to the optimize attributes.  If they
differ, the testcase fails.

In order to fix this, we need to process all overriders of this option
whenever we process any of them.  Since this setting is affected by
optimization options, it makes sense to compute it in
parse_optimize_options, rather than in process_options.


for  gcc/ChangeLog

PR target/113719
* config/i386/i386-options.cc (ix86_option_override_internal):
Move flag_omit_frame_pointer final overrider...
(ix86_recompute_optlev_based_flags): ... here.

(cherry picked from commit bf8e80f9d164f8778d86a3dc50e501cf19a9eff1)

Diff:
---
 gcc/config/i386/i386-options.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 4c27e4e01957..a4cff4e615f0 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1868,6 +1868,12 @@ ix86_recompute_optlev_based_flags (struct gcc_options 
*opts,
opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
}
 }
+
+  /* Keep nonleaf frame pointers.  */
+  if (opts->x_flag_omit_frame_pointer)
+opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
+opts->x_flag_omit_frame_pointer = 1;
 }
 
 /* Implement part of TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
@@ -2509,12 +2515,6 @@ ix86_option_override_internal (bool main_args_p,
 opts->x_target_flags |= MASK_NO_RED_ZONE;
 }
 
-  /* Keep nonleaf frame pointers.  */
-  if (opts->x_flag_omit_frame_pointer)
-opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
-  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
-opts->x_flag_omit_frame_pointer = 1;
-
   /* If we're doing fast math, we don't care about comparison order
  wrt NaNs.  This lets us use a shorter comparison sequence.  */
   if (opts->x_flag_finite_math_only)


[gcc/aoliva/heads/testme] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 98c3e8b915af... [strub] adjust all at-calls type variants at once

It previously pointed to:

 529d91ce3711... [strub] adjust all at-calls type variants at once

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  529d91c... [strub] adjust all at-calls type variants at once


Summary of changes (added commits):
---

  98c3e8b... [strub] adjust all at-calls type variants at once


[gcc(refs/users/aoliva/heads/testme)] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:98c3e8b915af9b76960457a811ce5e3a744029f3

commit 98c3e8b915af9b76960457a811ce5e3a744029f3
Author: Alexandre Oliva 
Date:   Tue Jul 16 05:33:07 2024 -0300

[strub] adjust all at-calls type variants at once

TYPE_ARG_TYPES of type variants must compare equal, according to
verify_type, but adjust_at_calls_type didn't preserve this invariant.

Adjust the main type variant and propagate TYPE_ARG_TYPES to all
variants.  While at that, also adjust the canonical type and its
variants, and then verify_type.


for  gcc/ChangeLog

PR c/115848
* ipa-strub.cc (pass_ipa_strub::adjust_at_calls_type_main):
Rename from...
(pass_ipa_strub::adjust_at_calls_type): ... this.  Preserve
TYPE_ARG_TYPES across all variants.  Adjust TYPE_CANONICAL and
verify_type.

for  gcc/testsuite/ChangeLog

PR c/115848
* c-c++-common/strub-pr115848.c: New.
* c-c++-common/strub-pr115848-b.c: New.

Diff:
---
 gcc/ipa-strub.cc  | 41 +--
 gcc/testsuite/c-c++-common/strub-pr115848-b.c |  2 ++
 gcc/testsuite/c-c++-common/strub-pr115848.c   |  7 +
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc
index 8fa7bdf53002..15d91c994bf8 100644
--- a/gcc/ipa-strub.cc
+++ b/gcc/ipa-strub.cc
@@ -1891,6 +1891,7 @@ public:
 
 #undef DEF_IDENT
 
+  static inline int adjust_at_calls_type_main (tree);
   static inline int adjust_at_calls_type (tree);
   static inline void adjust_at_calls_call (cgraph_edge *, int, tree);
   static inline void adjust_at_calls_calls (cgraph_node *);
@@ -2348,15 +2349,51 @@ strub_watermark_parm (tree fndecl)
   gcc_unreachable ();
 }
 
+/* Adjust a STRUB_AT_CALLS function TYPE and all its variants,
+   preserving TYPE_ARG_TYPES identity, adding a watermark pointer if
+   it hasn't been added yet.  Return the named argument count.  */
+
+int
+pass_ipa_strub::adjust_at_calls_type (tree type)
+{
+  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+
+  tree tmain = TYPE_MAIN_VARIANT (type);
+  tree orig_types = TYPE_ARG_TYPES (tmain);
+  gcc_checking_assert (TYPE_ARG_TYPES (type) == orig_types);
+  int named_args = adjust_at_calls_type_main (tmain);
+  tree mod_types = TYPE_ARG_TYPES (tmain);
+
+  if (mod_types != orig_types)
+for (tree other = TYPE_NEXT_VARIANT (tmain);
+other != NULL_TREE; other = TYPE_NEXT_VARIANT (other))
+  {
+   gcc_checking_assert (TYPE_ARG_TYPES (other) == orig_types);
+   TYPE_ARG_TYPES (other) = mod_types;
+  }
+
+  if (TYPE_CANONICAL (type)
+  && TYPE_MAIN_VARIANT (TYPE_CANONICAL (type)) != tmain)
+{
+  int ret = adjust_at_calls_type (TYPE_CANONICAL (type));
+  gcc_checking_assert (named_args == ret);
+}
+
+  if (flag_checking)
+verify_type (type);
+
+  return named_args;
+}
+
 /* Adjust a STRUB_AT_CALLS function TYPE, adding a watermark pointer if it
hasn't been added yet.  Return the named argument count.  */
 
 int
-pass_ipa_strub::adjust_at_calls_type (tree type)
+pass_ipa_strub::adjust_at_calls_type_main (tree type)
 {
   int named_args = 0;
 
-  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+  gcc_checking_assert (TYPE_MAIN_VARIANT (type) == type);
 
   if (!TYPE_ARG_TYPES (type))
 return named_args;
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848-b.c 
b/gcc/testsuite/c-c++-common/strub-pr115848-b.c
new file mode 100644
index ..b88297d4e91b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848-b.c
@@ -0,0 +1,2 @@
+/* { dg-skip-if part { *-*-* } } */
+void __attribute__((__strub__)) b(int, int) {}
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848.c 
b/gcc/testsuite/c-c++-common/strub-pr115848.c
new file mode 100644
index ..97fdecfb0058
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848.c
@@ -0,0 +1,7 @@
+/* { dg-do link } */
+/* { dg-options "-flto" } */
+/* { dg-additional-sources "strub-pr115848-b.c" } */
+
+typedef void __attribute__((__strub__)) a(int, int);
+a(b);
+void c() { b(0, 0); }


[gcc/aoliva/heads/testme] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 0b2b739ca5bb... [strub] adjust all at-calls type variants at once

It previously pointed to:

 98c3e8b915af... [strub] adjust all at-calls type variants at once

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  98c3e8b... [strub] adjust all at-calls type variants at once


Summary of changes (added commits):
---

  0b2b739... [strub] adjust all at-calls type variants at once


[gcc(refs/users/aoliva/heads/testme)] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:0b2b739ca5bbdc9342b4c67d3de816f456877926

commit 0b2b739ca5bbdc9342b4c67d3de816f456877926
Author: Alexandre Oliva 
Date:   Tue Jul 16 05:33:07 2024 -0300

[strub] adjust all at-calls type variants at once

TYPE_ARG_TYPES of type variants must compare equal, according to
verify_type, but adjust_at_calls_type didn't preserve this invariant.

Adjust the main type variant and propagate TYPE_ARG_TYPES to all
variants.  While at that, also adjust the canonical type and its
variants, and then verify_type.


for  gcc/ChangeLog

PR c/115848
* ipa-strub.cc (pass_ipa_strub::adjust_at_calls_type_main):
Rename from...
(pass_ipa_strub::adjust_at_calls_type): ... this.  Preserve
TYPE_ARG_TYPES across all variants.  Adjust TYPE_CANONICAL and
verify_type.

for  gcc/testsuite/ChangeLog

PR c/115848
* c-c++-common/strub-pr115848.c: New.

Diff:
---
 gcc/ipa-strub.cc| 41 +++--
 gcc/testsuite/c-c++-common/strub-pr115848.c |  6 +
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc
index 8fa7bdf53002..15d91c994bf8 100644
--- a/gcc/ipa-strub.cc
+++ b/gcc/ipa-strub.cc
@@ -1891,6 +1891,7 @@ public:
 
 #undef DEF_IDENT
 
+  static inline int adjust_at_calls_type_main (tree);
   static inline int adjust_at_calls_type (tree);
   static inline void adjust_at_calls_call (cgraph_edge *, int, tree);
   static inline void adjust_at_calls_calls (cgraph_node *);
@@ -2348,15 +2349,51 @@ strub_watermark_parm (tree fndecl)
   gcc_unreachable ();
 }
 
+/* Adjust a STRUB_AT_CALLS function TYPE and all its variants,
+   preserving TYPE_ARG_TYPES identity, adding a watermark pointer if
+   it hasn't been added yet.  Return the named argument count.  */
+
+int
+pass_ipa_strub::adjust_at_calls_type (tree type)
+{
+  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+
+  tree tmain = TYPE_MAIN_VARIANT (type);
+  tree orig_types = TYPE_ARG_TYPES (tmain);
+  gcc_checking_assert (TYPE_ARG_TYPES (type) == orig_types);
+  int named_args = adjust_at_calls_type_main (tmain);
+  tree mod_types = TYPE_ARG_TYPES (tmain);
+
+  if (mod_types != orig_types)
+for (tree other = TYPE_NEXT_VARIANT (tmain);
+other != NULL_TREE; other = TYPE_NEXT_VARIANT (other))
+  {
+   gcc_checking_assert (TYPE_ARG_TYPES (other) == orig_types);
+   TYPE_ARG_TYPES (other) = mod_types;
+  }
+
+  if (TYPE_CANONICAL (type)
+  && TYPE_MAIN_VARIANT (TYPE_CANONICAL (type)) != tmain)
+{
+  int ret = adjust_at_calls_type (TYPE_CANONICAL (type));
+  gcc_checking_assert (named_args == ret);
+}
+
+  if (flag_checking)
+verify_type (type);
+
+  return named_args;
+}
+
 /* Adjust a STRUB_AT_CALLS function TYPE, adding a watermark pointer if it
hasn't been added yet.  Return the named argument count.  */
 
 int
-pass_ipa_strub::adjust_at_calls_type (tree type)
+pass_ipa_strub::adjust_at_calls_type_main (tree type)
 {
   int named_args = 0;
 
-  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+  gcc_checking_assert (TYPE_MAIN_VARIANT (type) == type);
 
   if (!TYPE_ARG_TYPES (type))
 return named_args;
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848.c 
b/gcc/testsuite/c-c++-common/strub-pr115848.c
new file mode 100644
index ..6c8618dad88a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848.c
@@ -0,0 +1,6 @@
+/* { dg-do link } */
+/* { dg-options "-flto -r" } */
+
+typedef void __attribute__((__strub__)) a(int, int);
+a(b);
+void c() { b(0, 0); }


[gcc/aoliva/heads/testme] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 e59e3433b03c... [strub] adjust all at-calls type variants at once

It previously pointed to:

 0b2b739ca5bb... [strub] adjust all at-calls type variants at once

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  0b2b739... [strub] adjust all at-calls type variants at once


Summary of changes (added commits):
---

  e59e343... [strub] adjust all at-calls type variants at once


[gcc(refs/users/aoliva/heads/testme)] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:e59e3433b03cdd8ad9c4670873f5f9749d8a080b

commit e59e3433b03cdd8ad9c4670873f5f9749d8a080b
Author: Alexandre Oliva 
Date:   Tue Jul 16 05:33:07 2024 -0300

[strub] adjust all at-calls type variants at once

TYPE_ARG_TYPES of type variants must compare equal, according to
verify_type, but adjust_at_calls_type didn't preserve this invariant.

Adjust the main type variant and propagate TYPE_ARG_TYPES to all
variants.  While at that, also adjust the canonical type and its
variants, and then verify_type.


for  gcc/ChangeLog

PR c/115848
* ipa-strub.cc (pass_ipa_strub::adjust_at_calls_type_main):
Rename from...
(pass_ipa_strub::adjust_at_calls_type): ... this.  Preserve
TYPE_ARG_TYPES across all variants.  Adjust TYPE_CANONICAL and
verify_type.

for  gcc/testsuite/ChangeLog

PR c/115848
* c-c++-common/strub-pr115848.c: New.
* c-c++-common/strub-pr115848-b.c: New.

Diff:
---
 gcc/ipa-strub.cc  | 41 +--
 gcc/testsuite/c-c++-common/strub-pr115848-b.c |  6 
 gcc/testsuite/c-c++-common/strub-pr115848.c   |  7 +
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc
index 8fa7bdf53002..15d91c994bf8 100644
--- a/gcc/ipa-strub.cc
+++ b/gcc/ipa-strub.cc
@@ -1891,6 +1891,7 @@ public:
 
 #undef DEF_IDENT
 
+  static inline int adjust_at_calls_type_main (tree);
   static inline int adjust_at_calls_type (tree);
   static inline void adjust_at_calls_call (cgraph_edge *, int, tree);
   static inline void adjust_at_calls_calls (cgraph_node *);
@@ -2348,15 +2349,51 @@ strub_watermark_parm (tree fndecl)
   gcc_unreachable ();
 }
 
+/* Adjust a STRUB_AT_CALLS function TYPE and all its variants,
+   preserving TYPE_ARG_TYPES identity, adding a watermark pointer if
+   it hasn't been added yet.  Return the named argument count.  */
+
+int
+pass_ipa_strub::adjust_at_calls_type (tree type)
+{
+  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+
+  tree tmain = TYPE_MAIN_VARIANT (type);
+  tree orig_types = TYPE_ARG_TYPES (tmain);
+  gcc_checking_assert (TYPE_ARG_TYPES (type) == orig_types);
+  int named_args = adjust_at_calls_type_main (tmain);
+  tree mod_types = TYPE_ARG_TYPES (tmain);
+
+  if (mod_types != orig_types)
+for (tree other = TYPE_NEXT_VARIANT (tmain);
+other != NULL_TREE; other = TYPE_NEXT_VARIANT (other))
+  {
+   gcc_checking_assert (TYPE_ARG_TYPES (other) == orig_types);
+   TYPE_ARG_TYPES (other) = mod_types;
+  }
+
+  if (TYPE_CANONICAL (type)
+  && TYPE_MAIN_VARIANT (TYPE_CANONICAL (type)) != tmain)
+{
+  int ret = adjust_at_calls_type (TYPE_CANONICAL (type));
+  gcc_checking_assert (named_args == ret);
+}
+
+  if (flag_checking)
+verify_type (type);
+
+  return named_args;
+}
+
 /* Adjust a STRUB_AT_CALLS function TYPE, adding a watermark pointer if it
hasn't been added yet.  Return the named argument count.  */
 
 int
-pass_ipa_strub::adjust_at_calls_type (tree type)
+pass_ipa_strub::adjust_at_calls_type_main (tree type)
 {
   int named_args = 0;
 
-  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+  gcc_checking_assert (TYPE_MAIN_VARIANT (type) == type);
 
   if (!TYPE_ARG_TYPES (type))
 return named_args;
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848-b.c 
b/gcc/testsuite/c-c++-common/strub-pr115848-b.c
new file mode 100644
index ..9b9e134b3f41
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848-b.c
@@ -0,0 +1,6 @@
+/* { dg-skip-if part { *-*-* } } */
+void __attribute__((__strub__)) b(int, int) {}
+void c(void);
+int main() {
+  c();
+}
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848.c 
b/gcc/testsuite/c-c++-common/strub-pr115848.c
new file mode 100644
index ..97fdecfb0058
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848.c
@@ -0,0 +1,7 @@
+/* { dg-do link } */
+/* { dg-options "-flto" } */
+/* { dg-additional-sources "strub-pr115848-b.c" } */
+
+typedef void __attribute__((__strub__)) a(int, int);
+a(b);
+void c() { b(0, 0); }


[gcc/aoliva/heads/testme] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 110c93a4411d... [strub] adjust all at-calls type variants at once

It previously pointed to:

 e59e3433b03c... [strub] adjust all at-calls type variants at once

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  e59e343... [strub] adjust all at-calls type variants at once


Summary of changes (added commits):
---

  110c93a... [strub] adjust all at-calls type variants at once


[gcc(refs/users/aoliva/heads/testme)] [strub] adjust all at-calls type variants at once

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:110c93a4411dbdaf3581364996a7d9760d1247bd

commit 110c93a4411dbdaf3581364996a7d9760d1247bd
Author: Alexandre Oliva 
Date:   Tue Jul 16 05:33:07 2024 -0300

[strub] adjust all at-calls type variants at once

TYPE_ARG_TYPES of type variants must compare equal, according to
verify_type, but adjust_at_calls_type didn't preserve this invariant.

Adjust the main type variant and propagate TYPE_ARG_TYPES to all
variants.  While at that, also adjust the canonical type and its
variants, and then verify_type.


for  gcc/ChangeLog

PR c/115848
* ipa-strub.cc (pass_ipa_strub::adjust_at_calls_type_main):
Rename from...
(pass_ipa_strub::adjust_at_calls_type): ... this.  Preserve
TYPE_ARG_TYPES across all variants.  Adjust TYPE_CANONICAL and
verify_type.

for  gcc/testsuite/ChangeLog

PR c/115848
* c-c++-common/strub-pr115848.c: New.
* c-c++-common/strub-pr115848-b.c: New.

Diff:
---
 gcc/ipa-strub.cc  | 41 +--
 gcc/testsuite/c-c++-common/strub-pr115848-b.c |  6 
 gcc/testsuite/c-c++-common/strub-pr115848.c   |  8 ++
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc
index 8fa7bdf53002..15d91c994bf8 100644
--- a/gcc/ipa-strub.cc
+++ b/gcc/ipa-strub.cc
@@ -1891,6 +1891,7 @@ public:
 
 #undef DEF_IDENT
 
+  static inline int adjust_at_calls_type_main (tree);
   static inline int adjust_at_calls_type (tree);
   static inline void adjust_at_calls_call (cgraph_edge *, int, tree);
   static inline void adjust_at_calls_calls (cgraph_node *);
@@ -2348,15 +2349,51 @@ strub_watermark_parm (tree fndecl)
   gcc_unreachable ();
 }
 
+/* Adjust a STRUB_AT_CALLS function TYPE and all its variants,
+   preserving TYPE_ARG_TYPES identity, adding a watermark pointer if
+   it hasn't been added yet.  Return the named argument count.  */
+
+int
+pass_ipa_strub::adjust_at_calls_type (tree type)
+{
+  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+
+  tree tmain = TYPE_MAIN_VARIANT (type);
+  tree orig_types = TYPE_ARG_TYPES (tmain);
+  gcc_checking_assert (TYPE_ARG_TYPES (type) == orig_types);
+  int named_args = adjust_at_calls_type_main (tmain);
+  tree mod_types = TYPE_ARG_TYPES (tmain);
+
+  if (mod_types != orig_types)
+for (tree other = TYPE_NEXT_VARIANT (tmain);
+other != NULL_TREE; other = TYPE_NEXT_VARIANT (other))
+  {
+   gcc_checking_assert (TYPE_ARG_TYPES (other) == orig_types);
+   TYPE_ARG_TYPES (other) = mod_types;
+  }
+
+  if (TYPE_CANONICAL (type)
+  && TYPE_MAIN_VARIANT (TYPE_CANONICAL (type)) != tmain)
+{
+  int ret = adjust_at_calls_type (TYPE_CANONICAL (type));
+  gcc_checking_assert (named_args == ret);
+}
+
+  if (flag_checking)
+verify_type (type);
+
+  return named_args;
+}
+
 /* Adjust a STRUB_AT_CALLS function TYPE, adding a watermark pointer if it
hasn't been added yet.  Return the named argument count.  */
 
 int
-pass_ipa_strub::adjust_at_calls_type (tree type)
+pass_ipa_strub::adjust_at_calls_type_main (tree type)
 {
   int named_args = 0;
 
-  gcc_checking_assert (same_strub_mode_in_variants_p (type));
+  gcc_checking_assert (TYPE_MAIN_VARIANT (type) == type);
 
   if (!TYPE_ARG_TYPES (type))
 return named_args;
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848-b.c 
b/gcc/testsuite/c-c++-common/strub-pr115848-b.c
new file mode 100644
index ..9b9e134b3f41
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848-b.c
@@ -0,0 +1,6 @@
+/* { dg-skip-if part { *-*-* } } */
+void __attribute__((__strub__)) b(int, int) {}
+void c(void);
+int main() {
+  c();
+}
diff --git a/gcc/testsuite/c-c++-common/strub-pr115848.c 
b/gcc/testsuite/c-c++-common/strub-pr115848.c
new file mode 100644
index ..158654090721
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/strub-pr115848.c
@@ -0,0 +1,8 @@
+/* { dg-do link } */
+/* { dg-require-effective-target lto } */
+/* { dg-options "-flto" } */
+/* { dg-additional-sources "strub-pr115848-b.c" } */
+
+typedef void __attribute__((__strub__)) a(int, int);
+a(b);
+void c() { b(0, 0); }


[gcc r11-11578] Fixup unaligned load/store cost for znver4

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:bcb2a35a0c04417c407a97d9ff05c2af1d6d1b8d

commit r11-11578-gbcb2a35a0c04417c407a97d9ff05c2af1d6d1b8d
Author: Richard Biener 
Date:   Mon Jul 15 13:01:24 2024 +0200

Fixup unaligned load/store cost for znver4

Currently unaligned YMM and ZMM load and store costs are cheaper than
aligned which causes the vectorizer to purposely mis-align accesses
by adding an alignment prologue.  It looks like the unaligned costs
were simply left untouched from znver3 where they equate the aligned
costs when tweaking aligned costs for znver4.  The following makes
the unaligned costs equal to the aligned costs.

This avoids the miscompile seen in PR115843 but it's of course not
a real fix for the issue uncovered there.  But it makes it qualify
as a regression fix.

PR tree-optimization/115843
* config/i386/x86-tune-costs.h (znver4_cost): Update unaligned
load and store cost from the aligned costs.

(cherry picked from commit 1e3aa9c9278db69d4bdb661a750a7268789188d6)

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 48100d104156..58dd711864c8 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1894,8 +1894,8 @@ struct processor_costs znver4_cost = {
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 12, 12},   /* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 6, 6}, /* cost of unaligned loads.  */
-  {8, 8, 8, 8, 8}, /* cost of unaligned stores.  */
+  {6, 6, 10, 10, 12},  /* cost of unaligned loads.  */
+  {8, 8, 8, 12, 12},   /* cost of unaligned stores.  */
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */


[gcc r14-10428] RISC-V: testsuite: Properly gate LTO tests

2024-07-16 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:ea5907d6d458b1c9318814c96ebb277c7c8505f5

commit r14-10428-gea5907d6d458b1c9318814c96ebb277c7c8505f5
Author: Christoph Müllner 
Date:   Fri Jul 5 09:53:34 2024 +0200

RISC-V: testsuite: Properly gate LTO tests

There are two test cases with the following skip directive:
  dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" }
This reads as: skip if both '-flto' and '-fno-fat-lto-objects'
are present.  This is not the case if only '-flto' is present.

Since both tests depend on instruction sequences (one does
check-function-bodies the other tests for an assembler error
message), they won't work reliably with fat LTO objects.

Let's change the skip line to gate the test on '-flto'
to avoid failing tests like this:

FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto   
check-function-bodies interrupt
FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto 
-flto-partition=none   check-function-bodies interrupt
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 9)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 9)

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-misaligned.c: Remove
"-fno-fat-lto-objects" from skip condition.
* gcc.target/riscv/pr93202.c: Likewise.

(cherry picked from commit 0717d50fc4ff983b79093bdef43b04e4584cc3cd)
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c | 2 +-
 gcc/testsuite/gcc.target/riscv/pr93202.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c 
b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
index b5f8e6c2bbef..912f180e4d65 100644
--- a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fno-schedule-insns 
-fno-schedule-insns2" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 /*  Make sure no stack offset are misaligned.
 **  interrupt:
diff --git a/gcc/testsuite/gcc.target/riscv/pr93202.c 
b/gcc/testsuite/gcc.target/riscv/pr93202.c
index 5501191ea52c..5de003fac421 100644
--- a/gcc/testsuite/gcc.target/riscv/pr93202.c
+++ b/gcc/testsuite/gcc.target/riscv/pr93202.c
@@ -1,7 +1,7 @@
 /* PR inline-asm/93202 */
 /* { dg-do compile { target fpic } } */
 /* { dg-options "-fpic" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 void
 foo (void)


[gcc r14-10433] [alpha] adjust MEM alignment for block move [PR115459]

2024-07-16 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:c8fdef7fc25dafc8c7a12727c1046b3c7f2b89bb

commit r14-10433-gc8fdef7fc25dafc8c7a12727c1046b3c7f2b89bb
Author: Alexandre Oliva 
Date:   Tue Jul 16 08:54:20 2024 -0300

[alpha] adjust MEM alignment for block move [PR115459]

Before issuing loads or stores for a block move, adjust the MEM
alignments if analysis of the addresses enabled the inference of
stricter alignment.  This ensures that the MEMs are sufficiently
aligned for the corresponding insns, which avoids trouble in case of
e.g. substitutions into SUBREGs.


for  gcc/ChangeLog

PR target/115459
* config/alpha/alpha.cc (alpha_expand_block_move): Adjust
MEMs to match inferred alignment.

(cherry picked from commit ccfe7151803956d178947d0afda0bd66ce097275)

Diff:
---
 gcc/config/alpha/alpha.cc | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 1126cea1f7ba..e090e74b9d07 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -3820,6 +3820,12 @@ alpha_expand_block_move (rtx operands[])
   else if (a >= 16 && c % 2 == 0)
src_align = 16;
}
+
+  if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align)
+   {
+ orig_src = shallow_copy_rtx (orig_src);
+ set_mem_align (orig_src, src_align);
+   }
 }
 
   tmp = XEXP (orig_dst, 0);
@@ -3841,6 +3847,12 @@ alpha_expand_block_move (rtx operands[])
   else if (a >= 16 && c % 2 == 0)
dst_align = 16;
}
+
+  if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align)
+   {
+ orig_dst = shallow_copy_rtx (orig_dst);
+ set_mem_align (orig_dst, dst_align);
+   }
 }
 
   ofs = 0;


[gcc r13-8917] s390: Align *cjump_64 and *icjump_64

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:544b65cddf296a63dfb91c6ffa4f474ae9d70052

commit r13-8917-g544b65cddf296a63dfb91c6ffa4f474ae9d70052
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 13:59:06 2024 +0200

s390: Align *cjump_64 and *icjump_64

During machine reorg we optimize backward jumps and transform insns as
e.g.

(jump_insn 118 117 119 (set (pc)
(if_then_else (ne (reg:CCRAW 33 %cc)
(const_int 8 [0x8]))
(label_ref 134)
(pc))) "dec_math_1.f90":204:8 discrim 1 2161 {*cjump_64}
 (expr_list:REG_DEAD (reg:CCRAW 33 %cc)
(int_list:REG_BR_PROB 719407028 (nil)))
 -> 134)

into

(jump_insn 118 117 432 (set (pc)
(if_then_else (ne (reg:CCRAW 33 %cc)
(const_int 8 [0x8]))
(pc)
(label_ref 433))) "dec_math_1.f90":204:8 discrim 1 -1
 (expr_list:REG_DEAD (reg:CCRAW 33 %cc)
(int_list:REG_BR_PROB 719407028 (nil)))
 -> 433)

The latter is not recognized anymore since *icjump_64 only matches
CC_REGNUM against zero.  Fixed by aligning *cjump_64 and *icjump_64.

gcc/ChangeLog:

* config/s390/s390.md (*icjump_64): Allow raw CC comparisons,
i.e., any constant integer between 0 and 15 for CC comparisons.

(cherry picked from commit 56de68aba6cb9cf3022d9e303eec6c6cdb49ad4d)

Diff:
---
 gcc/config/s390/s390.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 00d39608e1d7..50a828f2bbba 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -9480,7 +9480,8 @@
 (define_insn "*icjump_64"
   [(set (pc)
 (if_then_else
-  (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+  (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+  (match_operand 2 
"const_int_operand" "")])
   (pc)
   (label_ref (match_operand 0 "" ""]
   ""


[gcc r13-8918] s390: Fix output template for movv1qi

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:1accf7036570cbb0fef9afa595634be03f8c14e8

commit r13-8918-g1accf7036570cbb0fef9afa595634be03f8c14e8
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 13:59:38 2024 +0200

s390: Fix output template for movv1qi

Although for instructions MVI and MVIY it does not make a difference
whether the immediate is interpreted as signed or unsigned, GAS expects
unsigned immediates for instruction format SI_URD.

gcc/ChangeLog:

* config/s390/vector.md (mov): Fix output template for
movv1qi.

(cherry picked from commit e6680d3f392f7f7cc2a1515276213e21e9eeab1c)

Diff:
---
 gcc/config/s390/vector.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 21bec729efa7..1bae1056951c 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -359,8 +359,8 @@
lr\t%0,%1
mvi\t%0,0
mviy\t%0,0
-   mvi\t%0,-1
-   mviy\t%0,-1
+   mvi\t%0,255
+   mviy\t%0,255
lhi\t%0,0
lhi\t%0,-1
llc\t%0,%1


[gcc r12-10619] s390: Align *cjump_64 and *icjump_64

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:06d825719fd4b71c8e3d34fd9756be7f847b

commit r12-10619-g06d825719fd4b71c8e3d34fd9756be7f847b
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 14:01:50 2024 +0200

s390: Align *cjump_64 and *icjump_64

During machine reorg we optimize backward jumps and transform insns as
e.g.

(jump_insn 118 117 119 (set (pc)
(if_then_else (ne (reg:CCRAW 33 %cc)
(const_int 8 [0x8]))
(label_ref 134)
(pc))) "dec_math_1.f90":204:8 discrim 1 2161 {*cjump_64}
 (expr_list:REG_DEAD (reg:CCRAW 33 %cc)
(int_list:REG_BR_PROB 719407028 (nil)))
 -> 134)

into

(jump_insn 118 117 432 (set (pc)
(if_then_else (ne (reg:CCRAW 33 %cc)
(const_int 8 [0x8]))
(pc)
(label_ref 433))) "dec_math_1.f90":204:8 discrim 1 -1
 (expr_list:REG_DEAD (reg:CCRAW 33 %cc)
(int_list:REG_BR_PROB 719407028 (nil)))
 -> 433)

The latter is not recognized anymore since *icjump_64 only matches
CC_REGNUM against zero.  Fixed by aligning *cjump_64 and *icjump_64.

gcc/ChangeLog:

* config/s390/s390.md (*icjump_64): Allow raw CC comparisons,
i.e., any constant integer between 0 and 15 for CC comparisons.

(cherry picked from commit 56de68aba6cb9cf3022d9e303eec6c6cdb49ad4d)

Diff:
---
 gcc/config/s390/s390.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index aaa247d7612f..5b174e0d866f 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -9472,7 +9472,8 @@
 (define_insn "*icjump_64"
   [(set (pc)
 (if_then_else
-  (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+  (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+  (match_operand 2 
"const_int_operand" "")])
   (pc)
   (label_ref (match_operand 0 "" ""]
   ""


[gcc r12-10620] s390: Fix output template for movv1qi

2024-07-16 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:9e00ae3e23eef8bff497981e00853ca092772201

commit r12-10620-g9e00ae3e23eef8bff497981e00853ca092772201
Author: Stefan Schulze Frielinghaus 
Date:   Tue Jul 16 14:01:58 2024 +0200

s390: Fix output template for movv1qi

Although for instructions MVI and MVIY it does not make a difference
whether the immediate is interpreted as signed or unsigned, GAS expects
unsigned immediates for instruction format SI_URD.

gcc/ChangeLog:

* config/s390/vector.md (mov): Fix output template for
movv1qi.

(cherry picked from commit e6680d3f392f7f7cc2a1515276213e21e9eeab1c)

Diff:
---
 gcc/config/s390/vector.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 624729814afd..e795b4ffef7f 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -357,8 +357,8 @@
lr\t%0,%1
mvi\t%0,0
mviy\t%0,0
-   mvi\t%0,-1
-   mviy\t%0,-1
+   mvi\t%0,255
+   mviy\t%0,255
lhi\t%0,0
lhi\t%0,-1
llc\t%0,%1


[gcc r15-2062] libiberty/buildargv: POSIX behaviour for backslash handling

2024-07-16 Thread Andrew Burgess via Gcc-cvs
https://gcc.gnu.org/g:a87954610f5e48a89b63a3194df9f5087bdc2f77

commit r15-2062-ga87954610f5e48a89b63a3194df9f5087bdc2f77
Author: Andrew Burgess 
Date:   Wed Dec 6 16:45:31 2023 +

libiberty/buildargv: POSIX behaviour for backslash handling

GDB makes use of the libiberty function buildargv for splitting the
inferior (program being debugged) argument string in the case where
the inferior is not being started under a shell.

I have recently been working to improve this area of GDB, and have
tracked done some of the unexpected behaviour to the libiberty
function buildargv, and how it handles backslash escapes.

For reference, I've been mostly reading:

  https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html

The issues that I would like to fix are:

  1. Backslashes within single quotes should not be treated as an
  escape, thus: '\a' should split to \a, retaining the backslash.

  2. Backslashes within double quotes should only act as an escape if
  they are immediately before one of the characters $ (dollar),
  ` (backtick), " (double quote), ` (backslash), or \n (newline).  In
  all other cases a backslash should not be treated as an escape
  character.  Thus: "\a" should split to \a, but "\$" should split to
  $.

  3. A backslash-newline sequence should be treated as a line
  continuation, both the backslash and the newline should be removed.

I've updated libiberty and also added some tests.  All the existing
libiberty tests continue to pass, but I'm not sure if there is more
testing that should be done, buildargv is used within lto-wraper.cc,
so maybe there's some testing folk can suggest that I run?

2024-07-16  Andrew Burgess  

libiberty/

* argv.c (buildargv): Backslashes within single quotes are
literal, backslashes only escape POSIX defined special characters
within double quotes, and backslashed newlines should act as line
continuations.
* testsuite/test-expandargv.c: Add new tests 7, 8, and 9.

Diff:
---
 libiberty/argv.c  |  8 ++--
 libiberty/testsuite/test-expandargv.c | 34 ++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/libiberty/argv.c b/libiberty/argv.c
index 45f168546032..d9d32e59e720 100644
--- a/libiberty/argv.c
+++ b/libiberty/argv.c
@@ -224,9 +224,13 @@ char **buildargv (const char *input)
  if (bsquote)
{
  bsquote = 0;
- *arg++ = *input;
+ if (*input != '\n')
+   *arg++ = *input;
}
- else if (*input == '\\')
+ else if (*input == '\\'
+  && !squote
+  && (!dquote
+  || strchr ("$`\"\\\n", *(input + 1)) != NULL))
{
  bsquote = 1;
}
diff --git a/libiberty/testsuite/test-expandargv.c 
b/libiberty/testsuite/test-expandargv.c
index 1e9cb0a0d5a8..ea1aeb0eda2b 100644
--- a/libiberty/testsuite/test-expandargv.c
+++ b/libiberty/testsuite/test-expandargv.c
@@ -142,6 +142,40 @@ const char *test_data[] = {
   "b",
   0,
 
+  /* Test 7 - No backslash removal within single quotes.  */
+  "'a\\$VAR' '\\\"'",/* Test 7 data */
+  ARGV0,
+  "@test-expandargv-7.lst",
+  0,
+  ARGV0,
+  "a\\$VAR",
+  "\\\"",
+  0,
+
+  /* Test 8 - Remove backslash / newline pairs.  */
+  "\"ab\\\ncd\" ef\\\ngh",/* Test 8 data */
+  ARGV0,
+  "@test-expandargv-8.lst",
+  0,
+  ARGV0,
+  "abcd",
+  "efgh",
+  0,
+
+  /* Test 9 - Backslash within double quotes.  */
+  "\"\\$VAR\" \"\\`\" \"\\\"\" \"\" \"\\n\" \"\\t\"",/* Test 9 data */
+  ARGV0,
+  "@test-expandargv-9.lst",
+  0,
+  ARGV0,
+  "$VAR",
+  "`",
+  "\"",
+  "\\",
+  "\\n",
+  "\\t",
+  0,
+
   0 /* Test done marker, don't remove. */
 };


[gcc r15-2063] libiberty/buildargv: handle input consisting of only white space

2024-07-16 Thread Andrew Burgess via Gcc-cvs
https://gcc.gnu.org/g:5e1d530da87a6d2aa7e719744cb278e7e54a6623

commit r15-2063-g5e1d530da87a6d2aa7e719744cb278e7e54a6623
Author: Andrew Burgess 
Date:   Sat Feb 10 11:22:13 2024 +

libiberty/buildargv: handle input consisting of only white space

GDB makes use of the libiberty function buildargv for splitting the
inferior (program being debugged) argument string in the case where
the inferior is not being started under a shell.

I have recently been working to improve this area of GDB, and noticed
some unexpected behaviour to the libiberty function buildargv, when
the input is a string consisting only of white space.

What I observe is that if the input to buildargv is a string
containing only white space, then buildargv will return an argv list
containing a single empty argument, e.g.:

  char **argv = buildargv (" ");
  assert (*argv[0] == '\0');
  assert (argv[1] == NULL);

We get the same output from buildargv if the input is a single space,
or multiple spaces.  Other white space characters give the same
results.

This doesn't seem right to me, and in fact, there appears to be a work
around for this issue in expandargv where we have this code:

  /* If the file is empty or contains only whitespace, buildargv would
 return a single empty argument.  In this context we want no arguments,
 instead.  */
  if (only_whitespace (buffer))
{
  file_argv = (char **) xmalloc (sizeof (char *));
  file_argv[0] = NULL;
}
  else
/* Parse the string.  */
file_argv = buildargv (buffer);

I think that the correct behaviour in this situation is to return an
empty argv array, e.g.:

  char **argv = buildargv (" ");
  assert (argv[0] == NULL);

And it turns out that this is a trivial change to buildargv.  The diff
does look big, but this is because I've re-indented a block.  Check
with 'git diff -b' to see the minimal changes.  I've also removed the
work around from expandargv.

When testing this sort of thing I normally write the tests first, and
then fix the code.  In this case test-expandargv.c has sort-of been
used as a mechanism for testing the buildargv function (expandargv
does call buildargv most of the time), however, for this particular
issue the work around in expandargv (mentioned above) masked the
buildargv bug.

I did consider adding a new test-buildargv.c file, however, this would
have basically been a copy & paste of test-expandargv.c (with some
minor changes to call buildargv).  This would be fine now, but feels
like we would eventually end up with one file not being updated as
much as the other, and so test coverage would suffer.

Instead, I have added some explicit buildargv testing to the
test-expandargv.c file, this reuses the test input that is already
defined for expandargv.

Of course, once I removed the work around from expandargv then we now
do always call buildargv from expandargv, and so the bug I'm fixing
would impact both expandargv and buildargv, so maybe the new testing
is redundant?  I tend to think more testing is always better, so I've
left it in for now.

2024-07-16  Andrew Burgess  

libiberty/

* argv.c (buildargv): Treat input of only whitespace as an empty
argument list.
(expandargv): Remove work around for intput that is only
whitespace.
* testsuite/test-expandargv.c: Add new tests 10, 11, and 12.
Extend testing to call buildargv in more cases.

Diff:
---
 libiberty/argv.c  | 108 +--
 libiberty/testsuite/test-expandargv.c | 136 --
 2 files changed, 166 insertions(+), 78 deletions(-)

diff --git a/libiberty/argv.c b/libiberty/argv.c
index d9d32e59e720..675336273f3a 100644
--- a/libiberty/argv.c
+++ b/libiberty/argv.c
@@ -212,71 +212,74 @@ char **buildargv (const char *input)
  argv[argc] = NULL;
}
  /* Begin scanning arg */
- arg = copybuf;
- while (*input != EOS)
+ if (*input != EOS)
{
- if (ISSPACE (*input) && !squote && !dquote && !bsquote)
+ arg = copybuf;
+ while (*input != EOS)
{
- break;
-   }
- else
-   {
- if (bsquote)
-   {
- bsquote = 0;
- if (*input != '\n')
-   *arg++ = *input;
-   }
- else if (*input == '\\'
-  && !squote
-  && (!dquote
-  || strchr ("$`\"\\\n", *(input + 1)) != NULL))
+ if (ISSPACE (*input) && !sq

[gcc r15-2064] AVR: Allow more combinations of XOR / IOR with byte-shifts.

2024-07-16 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:cca1229b85f2ad9422773fdb954d0924fa1cd350

commit r15-2064-gcca1229b85f2ad9422773fdb954d0924fa1cd350
Author: Georg-Johann Lay 
Date:   Tue Jul 16 15:17:23 2024 +0200

AVR: Allow more combinations of XOR / IOR with byte-shifts.

This patch takes some existing patterns that have QImode as one
input and uses a mode iterator to allow for more modes to match.
These insns are split after reload into *xorqi3 resp. *iorqi3 insn(s).

gcc/
* config/avr/avr-protos.h (avr_emit_xior_with_shift): New proto.
* config/avr/avr.cc (avr_emit_xior_with_shift): New function.
* config/avr/avr.md (any_lshift): New code iterator.
(*.): New insn-and-split.
(.0): Replaces...
(*qi.byte0): ...this one.
(*.): Replaces...
(*qi.byte1-3): ...this one.

Diff:
---
 gcc/config/avr/avr-protos.h |  1 +
 gcc/config/avr/avr.cc   | 52 ++
 gcc/config/avr/avr.md   | 78 +
 3 files changed, 104 insertions(+), 27 deletions(-)

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 6e02161759ca..d3fa6c677232 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -88,6 +88,7 @@ extern rtx avr_to_int_mode (rtx);
 extern void avr_expand_prologue (void);
 extern void avr_expand_epilogue (bool);
 extern bool avr_emit_cpymemhi (rtx*);
+extern void avr_emit_xior_with_shift (rtx_insn*, rtx*, int);
 extern int avr_epilogue_uses (int regno);
 extern bool avr_split_tiny_move (rtx_insn *insn, rtx *operands);
 
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4a7cbd0e7bc6..d2a08c60c3ad 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -9754,6 +9754,58 @@ avr_out_bitop (rtx insn, rtx *xop, int *plen)
 }
 
 
+/* Emit code for
+
+   XOP[0] = XOP[0]  (XOP[1]  BITOFF)
+
+   where XOP[0] and XOP[1] are hard registers with integer mode,
+is XOR or IOR, and  is LSHIFTRT or ASHIFT with a
+   non-negative shift offset BITOFF.  This function emits the operation
+   in terms of byte-wise operations in QImode.  */
+
+void
+avr_emit_xior_with_shift (rtx_insn *insn, rtx *xop, int bitoff)
+{
+  rtx src = SET_SRC (single_set (insn));
+  RTX_CODE xior = GET_CODE (src);
+  gcc_assert (xior == XOR || xior == IOR);
+  gcc_assert (bitoff % 8 == 0);
+
+  // Work out the shift offset in bytes; negative for shift right.
+  RTX_CODE shift = GET_CODE (XEXP (src, 0));
+  int byteoff = 0?0
+: shift == ASHIFT ? bitoff / 8
+: shift == LSHIFTRT ? -bitoff / 8
+// Not a shift but something like REG or ZERO_EXTEND:
+// Use xop[1] as is, without shifting it.
+: 0;
+
+  // Work out which hard REGNOs belong to the operands.
+  int size0 = GET_MODE_SIZE (GET_MODE (xop[0]));
+  int size1 = GET_MODE_SIZE (GET_MODE (xop[1]));
+  int regno0_lo = REGNO (xop[0]), regno0_hi = regno0_lo + size0 - 1;
+  int regno1_lo = REGNO (xop[1]), regno1_hi = regno1_lo + size1 - 1;
+  int regoff = regno0_lo - regno1_lo + byteoff;
+
+  // The order of insns matters in the rare case when xop[1] overlaps xop[0].
+  int beg = regoff > 0 ? regno1_hi : regno1_lo;
+  int end = regoff > 0 ? regno1_lo : regno1_hi;
+  int inc = regoff > 0 ? -1 : 1;
+
+  rtx (*gen)(rtx,rtx,rtx) = xior == XOR ? gen_xorqi3 : gen_iorqi3;
+
+  for (int i = beg; i != end + inc; i += inc)
+{
+  if (IN_RANGE (i + regoff, regno0_lo, regno0_hi))
+   {
+ rtx reg0 = all_regs_rtx[i + regoff];
+ rtx reg1 = all_regs_rtx[i];
+ emit_insn (gen (reg0, reg0, reg1));
+   }
+}
+}
+
+
 /* Output sign extension from XOP[1] to XOP[0] and return "".
If PLEN == NULL, print assembler instructions to perform the operation;
otherwise, set *PLEN to the length of the instruction sequence (in words)
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index e67284421b64..cf9541422a33 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -313,6 +313,7 @@
 (define_code_iterator any_extract [sign_extract zero_extract])
 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
 (define_code_iterator any_shift   [lshiftrt ashiftrt ashift])
+(define_code_iterator any_lshift  [lshiftrt ashift]) ; logic shift
 
 (define_code_iterator piaop [plus ior and])
 (define_code_iterator bitop [xor ior and])
@@ -9350,46 +9351,69 @@
   })
 
 
+;; "*iorsi.ashift""*iorpsi.ashift""*iorhi.ashift"
+;; "*xorsi.ashift""*xorpsi.ashift""*xorhi.ashift"
+;; "*iorsi.lshiftrt"  "*iorpsi.lshiftrt"  "*iorhi.lshiftrt"
+;; "*xorsi.lshiftrt"  "*xorpsi.lshiftrt"  "*xorhi.lshiftrt"
+(define_insn_and_split "*."
+  [(set (match_operand:HISI 0 "register_operand"   
"=r")
+(xior:HISI (any_lshift:HISI (match_operand:HISI 1 "register_operand"   
 "r")
+(match_operand:QI 3 
"const_8_16_24_operand" "n"))
+   (match_operand:HISI 2 "regis

[gcc r15-2065] tree-optimization/115841 - reduction epilogue placement issue

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:016c947b02e79a5c0c0c2d4ad5cb71aa04db3efd

commit r15-2065-g016c947b02e79a5c0c0c2d4ad5cb71aa04db3efd
Author: Richard Biener 
Date:   Tue Jul 16 11:53:17 2024 +0200

tree-optimization/115841 - reduction epilogue placement issue

When emitting the compensation to the vectorized main loop for
a vector reduction value to be re-used in the vectorized epilogue
we fail to place it in the correct block when the main loop is
known to be entered (no loop_vinfo->main_loop_edge) but the
epilogue is not (a loop_vinfo->skip_this_loop_edge).  The code
currently disregards this situation.

With the recent znver4 cost fix I couldn't trigger this situation
with the testcase but I adjusted it so it could eventually trigger
on other targets.

PR tree-optimization/115841
* tree-vect-loop.cc (vect_transform_cycle_phi): Correctly
place the partial vector reduction for the accumulator
re-use when the main loop cannot be skipped but the
epilogue can.

* gcc.dg/vect/pr115841.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115841.c | 42 
 gcc/tree-vect-loop.cc|  7 +++---
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115841.c 
b/gcc/testsuite/gcc.dg/vect/pr115841.c
new file mode 100644
index ..aa5c66004a03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115841.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fcommon -fvect-cost-model=dynamic --param 
vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-mavx512vl" { target avx512vl } } */
+
+/* To trigger the bug costing needs to determine that aligning the A170
+   accesses with a prologue is good and there should be a vectorized
+   epilogue with a smaller vector size, re-using the vector accumulator
+   from the vectorized main loop that's statically known to execute
+   but the epilogue loop is not.  */
+
+static unsigned char xl[192];
+unsigned char A170[192*3];
+
+void jerate (unsigned char *, unsigned char *);
+float foo (unsigned n)
+{
+  jerate (xl, A170);
+
+  unsigned i = 32;
+  int kr = 1;
+  float sfn11s = 0.f;
+  float sfn12s = 0.f;
+  do
+{
+  int krm1 = kr - 1;
+  long j = krm1;
+  float a = (*(float(*)[n])A170)[j];
+  float b = (*(float(*)[n])xl)[j];
+  float c = a * b;
+  float d = c * 6.93149983882904052734375e-1f;
+  float e = (*(float(*)[n])A170)[j+48];
+  float f = (*(float(*)[n])A170)[j+96];
+  float g = d * e;
+  sfn11s = sfn11s + g;
+  float h = f * d;
+  sfn12s = sfn12s + h;
+  kr++;
+}
+  while (--i != 0);
+  float tem = sfn11s + sfn12s;
+  return tem;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index a64b5082bd18..b8124a321280 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -9026,14 +9026,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
  /* And the reduction could be carried out using a different sign.  */
  if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
def = gimple_convert (&stmts, vectype_out, def);
- if (loop_vinfo->main_loop_edge)
+ edge e;
+ if ((e = loop_vinfo->main_loop_edge)
+ || (e = loop_vinfo->skip_this_loop_edge))
{
  /* While we'd like to insert on the edge this will split
 blocks and disturb bookkeeping, we also will eventually
 need this on the skip edge.  Rely on sinking to
 fixup optimal placement and insert in the pred.  */
- gimple_stmt_iterator gsi
-   = gsi_last_bb (loop_vinfo->main_loop_edge->src);
+ gimple_stmt_iterator gsi = gsi_last_bb (e->src);
  /* Insert before a cond that eventually skips the
 epilogue.  */
  if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))


[gcc r15-2066] Lower zeroing array assignment to memset for allocatable arrays.

2024-07-16 Thread Prathamesh Kulkarni via Gcc-cvs
https://gcc.gnu.org/g:616627245fb06106f7c5bc4a36784acc8ec166f0

commit r15-2066-g616627245fb06106f7c5bc4a36784acc8ec166f0
Author: Prathamesh Kulkarni 
Date:   Tue Jul 16 19:43:54 2024 +0530

Lower zeroing array assignment to memset for allocatable arrays.

gcc/fortran/ChangeLog:
* trans-expr.cc (gfc_trans_zero_assign): Handle allocatable arrays.

gcc/testsuite/ChangeLog:
* gfortran.dg/array_memset_3.f90: New test.

Signed-off-by: Prathamesh Kulkarni 

Diff:
---
 gcc/fortran/trans-expr.cc| 31 +++--
 gcc/testsuite/gfortran.dg/array_memset_3.f90 | 52 
 2 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 3ff248549c6e..fc23fb1a7ebf 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -11499,18 +11499,24 @@ gfc_trans_zero_assign (gfc_expr * expr)
   type = TREE_TYPE (dest);
   if (POINTER_TYPE_P (type))
 type = TREE_TYPE (type);
-  if (!GFC_ARRAY_TYPE_P (type))
-return NULL_TREE;
-
-  /* Determine the length of the array.  */
-  len = GFC_TYPE_ARRAY_SIZE (type);
-  if (!len || TREE_CODE (len) != INTEGER_CST)
+  if (GFC_ARRAY_TYPE_P (type))
+{
+  /* Determine the length of the array.  */
+  len = GFC_TYPE_ARRAY_SIZE (type);
+  if (!len || TREE_CODE (len) != INTEGER_CST)
+   return NULL_TREE;
+}
+  else if (GFC_DESCRIPTOR_TYPE_P (type)
+ && gfc_is_simply_contiguous (expr, false, false))
+{
+  if (POINTER_TYPE_P (TREE_TYPE (dest)))
+   dest = build_fold_indirect_ref_loc (input_location, dest);
+  len = gfc_conv_descriptor_size (dest, GFC_TYPE_ARRAY_RANK (type));
+  dest = gfc_conv_descriptor_data_get (dest);
+}
+  else
 return NULL_TREE;
 
-  tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
-  len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, len,
-fold_convert (gfc_array_index_type, tmp));
-
   /* If we are zeroing a local array avoid taking its address by emitting
  a = {} instead.  */
   if (!POINTER_TYPE_P (TREE_TYPE (dest)))
@@ -11518,6 +11524,11 @@ gfc_trans_zero_assign (gfc_expr * expr)
   dest, build_constructor (TREE_TYPE (dest),
  NULL));
 
+  /* Multiply len by element size.  */
+  tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
+  len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
+len, fold_convert (gfc_array_index_type, tmp));
+
   /* Convert arguments to the correct types.  */
   dest = fold_convert (pvoid_type_node, dest);
   len = fold_convert (size_type_node, len);
diff --git a/gcc/testsuite/gfortran.dg/array_memset_3.f90 
b/gcc/testsuite/gfortran.dg/array_memset_3.f90
new file mode 100644
index ..f3945aacbbdf
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/array_memset_3.f90
@@ -0,0 +1,52 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-tree-original" }
+
+subroutine test1(n)
+  implicit none
+integer(8) :: n
+real(4), allocatable :: z(:,:,:)
+
+allocate(z(n, 100, 200))
+z = 0
+end subroutine
+
+subroutine test2(n)
+  implicit none
+integer(8) :: n
+integer, allocatable :: z(:,:,:)
+
+allocate(z(n, 100, 200))
+z = 0
+end subroutine
+
+subroutine test3(n)
+  implicit none
+integer(8) :: n
+logical, allocatable :: z(:,:,:)
+
+allocate(z(n, 100, 200))
+z = .false. 
+end subroutine
+
+subroutine test4(n, z)
+   implicit none
+   integer :: n
+   real, pointer :: z(:,:,:) ! need not be contiguous!
+   z = 0
+end subroutine
+
+subroutine test5(n, z)
+   implicit none
+   integer :: n
+   real, contiguous, pointer :: z(:,:,:)
+   z = 0
+end subroutine
+
+subroutine test6 (n, z)
+   implicit none
+   integer :: n
+   real, contiguous, pointer :: z(:,:,:)
+   z(:,::1,:) = 0
+end subroutine
+
+! { dg-final { scan-tree-dump-times "__builtin_memset" 5 "original" } }


[gcc r14-10434] Fixup unaligned load/store cost for znver4

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:d702a957753caf020cb550d143e9e9a62f79e9f5

commit r14-10434-gd702a957753caf020cb550d143e9e9a62f79e9f5
Author: Richard Biener 
Date:   Mon Jul 15 13:01:24 2024 +0200

Fixup unaligned load/store cost for znver4

Currently unaligned YMM and ZMM load and store costs are cheaper than
aligned which causes the vectorizer to purposely mis-align accesses
by adding an alignment prologue.  It looks like the unaligned costs
were simply left untouched from znver3 where they equate the aligned
costs when tweaking aligned costs for znver4.  The following makes
the unaligned costs equal to the aligned costs.

This avoids the miscompile seen in PR115843 but it's of course not
a real fix for the issue uncovered there.  But it makes it qualify
as a regression fix.

PR tree-optimization/115843
* config/i386/x86-tune-costs.h (znver4_cost): Update unaligned
load and store cost from the aligned costs.

(cherry picked from commit 1e3aa9c9278db69d4bdb661a750a7268789188d6)

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index d34b5cc2..d0168eebdc15 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1924,8 +1924,8 @@ struct processor_costs znver4_cost = {
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 12, 12},   /* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 6, 6}, /* cost of unaligned loads.  */
-  {8, 8, 8, 8, 8}, /* cost of unaligned stores.  */
+  {6, 6, 10, 10, 12},  /* cost of unaligned loads.  */
+  {8, 8, 8, 12, 12},   /* cost of unaligned stores.  */
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */


[gcc r14-10437] tree-optimization/115701 - factor out maybe_duplicate_ssa_info_at_copy

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:6f74a5f5dc12bc337068f0f6a554d72604488959

commit r14-10437-g6f74a5f5dc12bc337068f0f6a554d72604488959
Author: Richard Biener 
Date:   Sun Jun 30 11:28:11 2024 +0200

tree-optimization/115701 - factor out maybe_duplicate_ssa_info_at_copy

The following factors out the code that preserves SSA info of the LHS
of a SSA copy LHS = RHS when LHS is about to be eliminated to RHS.

PR tree-optimization/115701
* tree-ssanames.h (maybe_duplicate_ssa_info_at_copy): Declare.
* tree-ssanames.cc (maybe_duplicate_ssa_info_at_copy): New
function, split out from ...
* tree-ssa-copy.cc (fini_copy_prop): ... here.
* tree-ssa-sccvn.cc (eliminate_dom_walker::eliminate_stmt): ...
and here.

(cherry picked from commit b5c64b413fd5bc03a1a8ef86d005892071e42cbe)

Diff:
---
 gcc/tree-ssa-copy.cc  | 32 ++--
 gcc/tree-ssa-sccvn.cc | 21 ++---
 gcc/tree-ssanames.cc  | 28 
 gcc/tree-ssanames.h   |  3 ++-
 4 files changed, 34 insertions(+), 50 deletions(-)

diff --git a/gcc/tree-ssa-copy.cc b/gcc/tree-ssa-copy.cc
index bb88472304c2..9c9ec47adcaa 100644
--- a/gcc/tree-ssa-copy.cc
+++ b/gcc/tree-ssa-copy.cc
@@ -527,38 +527,10 @@ fini_copy_prop (void)
  || copy_of[i].value == var)
continue;
 
-  /* In theory the points-to solution of all members of the
- copy chain is their intersection.  For now we do not bother
-to compute this but only make sure we do not lose points-to
-information completely by setting the points-to solution
-of the representative to the first solution we find if
-it doesn't have one already.  */
+  /* Duplicate points-to and range info appropriately.  */
   if (copy_of[i].value != var
  && TREE_CODE (copy_of[i].value) == SSA_NAME)
-   {
- basic_block copy_of_bb
-   = gimple_bb (SSA_NAME_DEF_STMT (copy_of[i].value));
- basic_block var_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
- if (POINTER_TYPE_P (TREE_TYPE (var))
- && SSA_NAME_PTR_INFO (var)
- && !SSA_NAME_PTR_INFO (copy_of[i].value))
-   {
- duplicate_ssa_name_ptr_info (copy_of[i].value,
-  SSA_NAME_PTR_INFO (var));
- /* Points-to information is cfg insensitive,
-but [E]VRP might record context sensitive alignment
-info, non-nullness, etc.  So reset context sensitive
-info if the two SSA_NAMEs aren't defined in the same
-basic block.  */
- if (var_bb != copy_of_bb)
-   reset_flow_sensitive_info (copy_of[i].value);
-   }
- else if (!POINTER_TYPE_P (TREE_TYPE (var))
-  && SSA_NAME_RANGE_INFO (var)
-  && !SSA_NAME_RANGE_INFO (copy_of[i].value)
-  && var_bb == copy_of_bb)
-   duplicate_ssa_name_range_info (copy_of[i].value, var);
-   }
+   maybe_duplicate_ssa_info_at_copy (var, copy_of[i].value);
 }
 
   class copy_folder copy_folder;
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 02c3bd5f5381..0b5c638df455 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -6871,27 +6871,10 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, 
gimple_stmt_iterator *gsi)
 
   /* If this now constitutes a copy duplicate points-to
 and range info appropriately.  This is especially
-important for inserted code.  See tree-ssa-copy.cc
-for similar code.  */
+important for inserted code.  */
   if (sprime
  && TREE_CODE (sprime) == SSA_NAME)
-   {
- basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime));
- if (POINTER_TYPE_P (TREE_TYPE (lhs))
- && SSA_NAME_PTR_INFO (lhs)
- && ! SSA_NAME_PTR_INFO (sprime))
-   {
- duplicate_ssa_name_ptr_info (sprime,
-  SSA_NAME_PTR_INFO (lhs));
- if (b != sprime_b)
-   reset_flow_sensitive_info (sprime);
-   }
- else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-  && SSA_NAME_RANGE_INFO (lhs)
-  && ! SSA_NAME_RANGE_INFO (sprime)
-  && b == sprime_b)
-   duplicate_ssa_name_range_info (sprime, lhs);
-   }
+   maybe_duplicate_ssa_info_at_copy (lhs, sprime);
 
   /* Inhibit the use of an inserted PHI on a loop header when
 the address of the memory reference is a simple induction
diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 1753a421a0ba..5ad7d117bd33 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -757,6 +757,34 @@ duplicate_ssa_name_range_info (tree name, tree src)
 }
 }
 
+/* For a SSA copy DEST = SRC duplicate SSA info present on DEST to SRC
+   to prese

[gcc r14-10438] tree-optimization/115701 - fix maybe_duplicate_ssa_info_at_copy

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e01012c459c931ae39558b019107226c232fa4d1

commit r14-10438-ge01012c459c931ae39558b019107226c232fa4d1
Author: Richard Biener 
Date:   Sun Jun 30 11:34:43 2024 +0200

tree-optimization/115701 - fix maybe_duplicate_ssa_info_at_copy

The following restricts copying of points-to info from defs that
might be in regions invoking UB and are never executed.

PR tree-optimization/115701
* tree-ssanames.cc (maybe_duplicate_ssa_info_at_copy):
Only copy info from within the same BB.

* gcc.dg/torture/pr115701.c: New testcase.

(cherry picked from commit b77f17c5feec9614568bf2dee7f7d811465ee4a5)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr115701.c | 22 ++
 gcc/tree-ssanames.cc| 22 --
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr115701.c 
b/gcc/testsuite/gcc.dg/torture/pr115701.c
new file mode 100644
index ..9b7c34b23d78
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115701.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* IPA PTA disables local PTA recompute after IPA.  */
+/* { dg-additional-options "-fipa-pta" } */
+
+int a, c, d;
+static int b;
+int main()
+{
+  int *e = &a, **f = &e;
+  while (1) {
+int **g, ***h = &f;
+if (c)
+  *g = e;
+else if (!b)
+  break;
+*e = **g;
+e = &d;
+  }
+  if (e != &a)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 5ad7d117bd33..6c2525900abf 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -763,25 +763,19 @@ duplicate_ssa_name_range_info (tree name, tree src)
 void
 maybe_duplicate_ssa_info_at_copy (tree dest, tree src)
 {
+  /* While points-to info is flow-insensitive we have to avoid copying
+ info from not executed regions invoking UB to dominating defs.  */
+  if (gimple_bb (SSA_NAME_DEF_STMT (src))
+  != gimple_bb (SSA_NAME_DEF_STMT (dest)))
+return;
+
   if (POINTER_TYPE_P (TREE_TYPE (dest))
   && SSA_NAME_PTR_INFO (dest)
   && ! SSA_NAME_PTR_INFO (src))
-{
-  duplicate_ssa_name_ptr_info (src, SSA_NAME_PTR_INFO (dest));
-  /* Points-to information is cfg insensitive,
-but VRP might record context sensitive alignment
-info, non-nullness, etc.  So reset context sensitive
-info if the two SSA_NAMEs aren't defined in the same
-basic block.  */
-  if (gimple_bb (SSA_NAME_DEF_STMT (src))
- != gimple_bb (SSA_NAME_DEF_STMT (dest)))
-   reset_flow_sensitive_info (src);
-}
+duplicate_ssa_name_ptr_info (src, SSA_NAME_PTR_INFO (dest));
   else if (INTEGRAL_TYPE_P (TREE_TYPE (dest))
   && SSA_NAME_RANGE_INFO (dest)
-  && ! SSA_NAME_RANGE_INFO (src)
-  && (gimple_bb (SSA_NAME_DEF_STMT (src))
-  == gimple_bb (SSA_NAME_DEF_STMT (dest
+  && ! SSA_NAME_RANGE_INFO (src))
 duplicate_ssa_name_range_info (src, dest);
 }


[gcc r14-10435] Fixup unaligned load/store cost for znver5

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:4a04110ec8388b6540380cfedbe50af1b29e3e36

commit r14-10435-g4a04110ec8388b6540380cfedbe50af1b29e3e36
Author: Richard Biener 
Date:   Tue Jul 16 10:45:27 2024 +0200

Fixup unaligned load/store cost for znver5

Currently unaligned YMM and ZMM load and store costs are cheaper than
aligned which causes the vectorizer to purposely mis-align accesses
by adding an alignment prologue.  It looks like the unaligned costs
were simply copied from the bogus znver4 costs.  The following makes
the unaligned costs equal to the aligned costs like in the fixed znver4
version.

* config/i386/x86-tune-costs.h (znver5_cost): Update unaligned
load and store cost from the aligned costs.

(cherry picked from commit 896393791ee34ffc176c87d232dfee735db3aaab)

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index d0168eebdc15..8348ab8230ad 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2060,8 +2060,8 @@ struct processor_costs znver5_cost = {
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
   {8, 8, 8, 12, 12},   /* cost of storing SSE register
   in 32bit, 64bit, 128bit, 256bit and 
512bit */
-  {6, 6, 6, 6, 6}, /* cost of unaligned loads.  */
-  {8, 8, 8, 8, 8}, /* cost of unaligned stores.  */
+  {6, 6, 10, 10, 12},  /* cost of unaligned loads.  */
+  {8, 8, 8, 12, 12},   /* cost of unaligned stores.  */
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */


[gcc r14-10436] tree-optimization/115867 - ICE with simdcall vectorization in masked loop

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:ca275b68ef11d7d70bff8d7426e45b3734b3

commit r14-10436-gca275b68ef11d7d70bff8d7426e45b3734b3
Author: Richard Biener 
Date:   Thu Jul 11 10:18:55 2024 +0200

tree-optimization/115867 - ICE with simdcall vectorization in masked loop

When only a loop mask is to be supplied for the inbranch arg to a
simd function we fail to handle integer mode masks correctly.  We
need to guess the number of elements represented by it.  This assumes
that excess arguments are all for masks, I wasn't able to create
a simdclone with more than one integer mode mask argument.

The gcc.dg/vect/vect-simd-clone-20.c exercises this with -mavx512vl

PR tree-optimization/115867
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Properly
guess the number of mask elements for integer mode masks.

(cherry picked from commit 4f4478f0f31263997bfdc4159f90e58dd79b38f9)

Diff:
---
 gcc/tree-vect-stmts.cc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 21e8fe98e44a..eed5c7d821cb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4716,7 +4716,12 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
  SIMD_CLONE_ARG_TYPE_MASK);
 
  tree masktype = bestn->simdclone->args[mask_i].vector_type;
- callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
+ if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+   /* Guess the number of lanes represented by masktype.  */
+   callee_nelements = exact_div (bestn->simdclone->simdlen,
+ bestn->simdclone->nargs - nargs);
+ else
+   callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
  o = vector_unroll_factor (nunits, callee_nelements);
  for (m = j * o; m < (j + 1) * o; m++)
{


[gcc r14-10439] tree-optimization/115843 - fix wrong-code with fully-masked loop and peeling

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:06829e593d2e5611e7924624cb8228795691e2b7

commit r14-10439-g06829e593d2e5611e7924624cb8228795691e2b7
Author: Richard Biener 
Date:   Mon Jul 15 13:50:58 2024 +0200

tree-optimization/115843 - fix wrong-code with fully-masked loop and peeling

When AVX512 uses a fully masked loop and peeling we fail to create the
correct initial loop mask when the mask is composed of multiple
components in some cases.  The following fixes this by properly applying
the bias for the component to the shift amount.

PR tree-optimization/115843
* tree-vect-loop-manip.cc
(vect_set_loop_condition_partial_vectors_avx512): Properly
bias the shift of the initial mask for alignment peeling.

* gcc.dg/vect/pr115843.c: New testcase.

(cherry picked from commit a177be05f6952c3f7e62186d2e138d96c475b81a)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115843.c | 41 
 gcc/tree-vect-loop-manip.cc  |  8 +--
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115843.c 
b/gcc/testsuite/gcc.dg/vect/pr115843.c
new file mode 100644
index ..3dbb6c792788
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115843.c
@@ -0,0 +1,41 @@
+/* { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { 
target avx512f_runtime } } */
+
+#include "tree-vect.h"
+
+typedef __UINT64_TYPE__ BITBOARD;
+BITBOARD KingPressureMask1[64], KingSafetyMask1[64];
+
+void __attribute__((noinline))
+foo()
+{
+  for (int i = 0; i < 64; i++)
+{
+  if ((i & 7) == 0)
+   KingPressureMask1[i] = KingSafetyMask1[i + 1];
+  else if ((i & 7) == 7)
+   KingPressureMask1[i] = KingSafetyMask1[i - 1];
+  else
+   KingPressureMask1[i] = KingSafetyMask1[i];
+}
+}
+
+BITBOARD verify[64]
+  = {1, 1, 2, 3, 4, 5, 6, 6, 9, 9, 10, 11, 12, 13, 14, 14, 17, 17, 18, 19,
+20, 21, 22, 22, 25, 25, 26, 27, 28, 29, 30, 30, 33, 33, 34, 35, 36, 37, 38,
+38, 41, 41, 42, 43, 44, 45, 46, 46, 49, 49, 50, 51, 52, 53, 54, 54, 57, 57,
+58, 59, 60, 61, 62, 62};
+
+int main()
+{
+  check_vect ();
+
+#pragma GCC novector
+  for (int i = 0; i < 64; ++i)
+KingSafetyMask1[i] = i;
+  foo ();
+#pragma GCC novector
+  for (int i = 0; i < 64; ++i)
+if (KingPressureMask1[i] != verify[i])
+  __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 43c7881c640d..1ece4a58bd50 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1149,10 +1149,14 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
loop *loop,
  /* ???  But when the shift amount isn't constant this requires
 a round-trip to GRPs.  We could apply the bias to either
 side of the compare instead.  */
- tree shift = gimple_build (&preheader_seq, MULT_EXPR,
+ tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
 TREE_TYPE (niters_skip), niters_skip,
 build_int_cst (TREE_TYPE (niters_skip),
-   
rgc.max_nscalars_per_iter));
+   bias));
+ shift = gimple_build (&preheader_seq, MULT_EXPR,
+   TREE_TYPE (niters_skip), shift,
+   build_int_cst (TREE_TYPE (niters_skip),
+  rgc.max_nscalars_per_iter));
  init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
TREE_TYPE (init_ctrl),
init_ctrl, shift);


[gcc r14-10440] tree-optimization/115841 - reduction epilogue placement issue

2024-07-16 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:59ed01d5e3d2b0e59163d3248bdba9f1e35de599

commit r14-10440-g59ed01d5e3d2b0e59163d3248bdba9f1e35de599
Author: Richard Biener 
Date:   Tue Jul 16 11:53:17 2024 +0200

tree-optimization/115841 - reduction epilogue placement issue

When emitting the compensation to the vectorized main loop for
a vector reduction value to be re-used in the vectorized epilogue
we fail to place it in the correct block when the main loop is
known to be entered (no loop_vinfo->main_loop_edge) but the
epilogue is not (a loop_vinfo->skip_this_loop_edge).  The code
currently disregards this situation.

With the recent znver4 cost fix I couldn't trigger this situation
with the testcase but I adjusted it so it could eventually trigger
on other targets.

PR tree-optimization/115841
* tree-vect-loop.cc (vect_transform_cycle_phi): Correctly
place the partial vector reduction for the accumulator
re-use when the main loop cannot be skipped but the
epilogue can.

* gcc.dg/vect/pr115841.c: New testcase.

(cherry picked from commit 016c947b02e79a5c0c0c2d4ad5cb71aa04db3efd)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115841.c | 42 
 gcc/tree-vect-loop.cc|  7 +++---
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115841.c 
b/gcc/testsuite/gcc.dg/vect/pr115841.c
new file mode 100644
index ..aa5c66004a03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115841.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast -fcommon -fvect-cost-model=dynamic --param 
vect-partial-vector-usage=1" } */
+/* { dg-additional-options "-mavx512vl" { target avx512vl } } */
+
+/* To trigger the bug costing needs to determine that aligning the A170
+   accesses with a prologue is good and there should be a vectorized
+   epilogue with a smaller vector size, re-using the vector accumulator
+   from the vectorized main loop that's statically known to execute
+   but the epilogue loop is not.  */
+
+static unsigned char xl[192];
+unsigned char A170[192*3];
+
+void jerate (unsigned char *, unsigned char *);
+float foo (unsigned n)
+{
+  jerate (xl, A170);
+
+  unsigned i = 32;
+  int kr = 1;
+  float sfn11s = 0.f;
+  float sfn12s = 0.f;
+  do
+{
+  int krm1 = kr - 1;
+  long j = krm1;
+  float a = (*(float(*)[n])A170)[j];
+  float b = (*(float(*)[n])xl)[j];
+  float c = a * b;
+  float d = c * 6.93149983882904052734375e-1f;
+  float e = (*(float(*)[n])A170)[j+48];
+  float f = (*(float(*)[n])A170)[j+96];
+  float g = d * e;
+  sfn11s = sfn11s + g;
+  float h = f * d;
+  sfn12s = sfn12s + h;
+  kr++;
+}
+  while (--i != 0);
+  float tem = sfn11s + sfn12s;
+  return tem;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 832399f7e9d7..feed73585921 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -8880,14 +8880,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
  /* And the reduction could be carried out using a different sign.  */
  if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
def = gimple_convert (&stmts, vectype_out, def);
- if (loop_vinfo->main_loop_edge)
+ edge e;
+ if ((e = loop_vinfo->main_loop_edge)
+ || (e = loop_vinfo->skip_this_loop_edge))
{
  /* While we'd like to insert on the edge this will split
 blocks and disturb bookkeeping, we also will eventually
 need this on the skip edge.  Rely on sinking to
 fixup optimal placement and insert in the pred.  */
- gimple_stmt_iterator gsi
-   = gsi_last_bb (loop_vinfo->main_loop_edge->src);
+ gimple_stmt_iterator gsi = gsi_last_bb (e->src);
  /* Insert before a cond that eventually skips the
 epilogue.  */
  if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))


[gcc r15-2067] PR modula2/115957 ICE on procedure local const declaration

2024-07-16 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:d9709fafb2c498ba2f4c920f953c9b78fa3bf114

commit r15-2067-gd9709fafb2c498ba2f4c920f953c9b78fa3bf114
Author: Gaius Mulley 
Date:   Tue Jul 16 15:27:21 2024 +0100

PR modula2/115957 ICE on procedure local const declaration

An ICE would occur if a constant was declared using a variable term.
This fix catches variable terms in constant expressions and generates
an unrecoverable error.

gcc/m2/ChangeLog:

PR modula2/115957
* gm2-compiler/M2StackAddress.mod (PopAddress): Detect tail=NIL
and generate an internal error.
* gm2-compiler/PCBuild.bnf (InConstParameter): New variable.
(InConstBlock): New variable.
(ErrorString): Rewrite using MetaErrorStringT0.
(ErrorArrayAt): Rewrite using MetaErrorStringT0.
(WarnMissingToken): Use MetaErrorStringT0.
(CompilationUnit): Set seenError FALSE.
(init): Initialize InConstParameter and InConstBlock.
(ConstantDeclaration): Set InConstBlock.
(ConstSetOrQualidentOrFunction): Call CheckNotVar if not
InConstParameter and InConstBlock.
(ConstActualParameters): Set InConstParameter TRUE and restore
value at the end.
* gm2-compiler/PCSymBuild.def (CheckNotVar): New procedure.
Remove all unnecessary export qualified list.
* gm2-compiler/PCSymBuild.mod (CheckNotVar): New procedure.

gcc/testsuite/ChangeLog:

PR modula2/115957
* gm2/errors/fail/badconst.mod: New test.
* gm2/pim/fail/tinyadr.mod: New test.

Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-compiler/M2StackAddress.mod | 11 +---
 gcc/m2/gm2-compiler/PCBuild.bnf| 40 +-
 gcc/m2/gm2-compiler/PCSymBuild.def | 38 +---
 gcc/m2/gm2-compiler/PCSymBuild.mod | 18 +-
 gcc/testsuite/gm2/errors/fail/badconst.mod | 19 ++
 gcc/testsuite/gm2/pim/fail/tinyadr.mod | 12 +
 6 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/gcc/m2/gm2-compiler/M2StackAddress.mod 
b/gcc/m2/gm2-compiler/M2StackAddress.mod
index c7262dce3b38..ff65b42059c4 100644
--- a/gcc/m2/gm2-compiler/M2StackAddress.mod
+++ b/gcc/m2/gm2-compiler/M2StackAddress.mod
@@ -157,9 +157,14 @@ BEGIN
 END ;
 DISPOSE(b)
  END ;
- WITH s^.tail^ DO
-DEC(items) ;
-RETURN( bucket[items] )
+ IF s^.tail = NIL
+ THEN
+InternalError ('stack underflow')
+ ELSE
+WITH s^.tail^ DO
+   DEC(items) ;
+   RETURN( bucket[items] )
+END
  END
   END
END
diff --git a/gcc/m2/gm2-compiler/PCBuild.bnf b/gcc/m2/gm2-compiler/PCBuild.bnf
index 46f46af73ffe..0e45b2e889cc 100644
--- a/gcc/m2/gm2-compiler/PCBuild.bnf
+++ b/gcc/m2/gm2-compiler/PCBuild.bnf
@@ -47,7 +47,7 @@ IMPLEMENTATION MODULE PCBuild ;
 FROM M2LexBuf IMPORT currentstring, currenttoken, GetToken, InsertToken,
  InsertTokenAndRewind, GetTokenNo, MakeVirtualTok ;
 
-FROM M2Error IMPORT ErrorStringAt, WriteFormat1, WriteFormat2 ;
+FROM M2MetaError IMPORT MetaErrorStringT0 ;
 FROM NameKey IMPORT NulName, Name, makekey ;
 FROM DynamicStrings IMPORT String, InitString, KillString, Mark, ConCat, 
ConCatChar ;
 FROM M2Printf IMPORT printf0 ;
@@ -102,7 +102,8 @@ FROM PCSymBuild IMPORT PCStartBuildProgModule,
PushConstType,
PushConstAttributeType,
PushConstAttributePairType,
-   PushRType ;
+   PushRType,
+   CheckNotVar ;
 
 FROM SymbolTable IMPORT MakeGnuAsm, PutGnuAsmVolatile, PutGnuAsm, 
PutGnuAsmInput,
 PutGnuAsmOutput, PutGnuAsmTrash, PutGnuAsmVolatile,
@@ -127,13 +128,15 @@ CONST
Pass1 = FALSE ;
 
 VAR
-   WasNoError  : BOOLEAN ;
+   InConstParameter,
+   InConstBlock,
+   seenError   : BOOLEAN ;
 
 
 PROCEDURE ErrorString (s: String) ;
 BEGIN
-   ErrorStringAt (s, GetTokenNo ()) ;
-   WasNoError := FALSE
+   MetaErrorStringT0 (GetTokenNo (), s) ;
+   seenError := TRUE
 END ErrorString ;
 
 
@@ -145,7 +148,7 @@ END ErrorArray ;
 
 PROCEDURE ErrorArrayAt (a: ARRAY OF CHAR; tok: CARDINAL) ;
 BEGIN
-   ErrorStringAt (InitString(a), tok)
+   MetaErrorStringT0 (tok, InitString (a))
 END ErrorArrayAt ;
 
 
@@ -220,7 +223,7 @@ BEGIN
str := DescribeStop(s0, s1, s2) ;
 
str := ConCat(InitString('syntax error,'), Mark(str)) ;
-   ErrorStringAt(str, GetTokenNo())
+   MetaErrorStringT0 (GetTokenNo (), str)
 END WarnMissingToken ;
 
 
@@ -338,9 +341,9 @@ END Expect ;
 
 PROCEDURE CompilationUnit () : BOOLEAN ;
 BEGIN
-   WasNoError := TRUE ;
+   seenError := FALSE ;
FileUnit(SetOfStop0{eoftok}, SetOfStop1{}, SetOfStop2{}) ;
-   RETURN( WasNoError )

[gcc r15-2068] AVR: Overhaul add and sub insns that extend one operand.

2024-07-16 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:a074780fce3751cbedc0307dd1967a5a7d8e5660

commit r15-2068-ga074780fce3751cbedc0307dd1967a5a7d8e5660
Author: Georg-Johann Lay 
Date:   Fri Jul 12 13:02:55 2024 +0200

AVR: Overhaul add and sub insns that extend one operand.

These are insns of the forms

  (set (regA:M)
   (plus:M (extend:M (regB:L))
   (regA:M)))
and

  (set (regA:M)
   (minus:M (regA:M)
(extend:M (regB:L

where "extend" may be a sign-extend or zero-extend,
and the integer modes are  SImode >= M > L >= QImode.

The existing patterns are now represented in terms of insns
with mode iterators and a code iterator over any_extend,
and these new insn support all valid combinations of M and L
(which previously was not the case).

gcc/
* config/avr/avr.cc (avr_out_minus): Assimilate into...
(avr_out_plus_ext): ...this new function.
(avr_adjust_insn_length) [ADJUST_LEN_PLUS_EXT]: Handle case.
(avr_rtx_costs_1) [PLUS, MINUS]: Adjust RTX costs.
* config/avr/avr.md (adjust_len) : Add new attribute 
value.
(*addpsi3_zero_extend.hi_split): Assimilate...
(*addpsi3_zero_extend.qi_split): Assimilate...
(*addsi3_zero_extend_split): Assimilate...
(*addsi3_zero_extend.hi_split): Assimilate...
(*addpsi3_sign_extend.hi_split): Assimilate...
(*addhi3.sign_extend1_split): Assimilate...
(*add3.._split): ...into this
new insn-and-split.
(*addpsi3_zero_extend.hi): Assimilate...
(*addpsi3_zero_extend.qi): Assimilate...
(*addsi3_zero_extend): Assimilate...
(*addsi3_zero_extend.hi): Assimilate...
(*addpsi3_sign_extend.hi): Assimilate...
(*addhi3.sign_extend1): Assimilate...
(*add3..): ...into this new insn.
(*subpsi3_sign_extend.hi_split): Assimilate...
(*subhi3.sign_extend2_split): Assimilate...
(*sub3.zero_extend._split): Assimilate...
(*sub3._split): ...into this new
insn-and-split.
(*subpsi3_sign_extend.hi): Assimilate...
(*subhi3.sign_extend2): Assimilate...
(*sub3.zero_extend.): Assimilate...
(*sub3..): ...into this new insn.
(*sub3.zero_extend.): Use avr_out_plus_ext
for asm out.
* config/avr/avr-protos.h (avr_out_minus): Remove.
(avr_out_plus_ext): New proto.
gcc/testsuite/
* gcc.target/avr/torture/add-extend.c: New test.
* gcc.target/avr/torture/sub-extend.c: New test.

Diff:
---
 gcc/config/avr/avr-protos.h   |   2 +-
 gcc/config/avr/avr.cc | 107 +++--
 gcc/config/avr/avr.md | 264 ++
 gcc/testsuite/gcc.target/avr/torture/add-extend.c | 109 +
 gcc/testsuite/gcc.target/avr/torture/sub-extend.c | 109 +
 5 files changed, 377 insertions(+), 214 deletions(-)

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index d3fa6c677232..5fdb13057570 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -96,7 +96,7 @@ extern void avr_output_addr_vec (rtx_insn*, rtx);
 extern const char *avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]);
 extern const char* avr_out_bitop (rtx, rtx*, int*);
 extern const char* avr_out_plus (rtx, rtx*, int* =NULL, bool =true);
-extern const char* avr_out_minus (rtx*);
+extern const char* avr_out_plus_ext (rtx_insn*, rtx*, int*);
 extern const char* avr_out_round (rtx_insn *, rtx*, int* =NULL);
 extern const char* avr_out_addto_sp (rtx*, int*);
 extern const char* avr_out_xload (rtx_insn *, rtx*, int*);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index d2a08c60c3ad..5fc046a310e5 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -8843,30 +8843,90 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
 }
 
 
-/* Output subtraction of integer registers XOP[0] and XOP[2] and return ""
+/* Output addition of registers YOP[0] and YOP[1]
 
-  XOP[0] = XOP[0] - XOP[2]
+  YOP[0] += extend (YOP[1])
 
-   where the mode of XOP[0] is in { HI, PSI, SI }, and the mode of
-   XOP[2] is in { QI, HI, PSI }.  When the mode of XOP[0] is larger
-   than the mode of XOP[2], then the latter is zero-extended on the fly.
-   The number of instructions will be the mode size of XOP[0].  */
+   or subtraction of registers YOP[0] and YOP[2]
+
+  YOP[0] -= extend (YOP[2])
+
+   where the integer modes satisfy  SI >= YOP[0].mode > YOP[1/2].mode >= QI,
+   and the extension may be sign- or zero-extend.  Returns "".
+
+   If PLEN == NULL output the instructions.
+   If PLEN != NULL set *PLEN to the length of the sequence in words.  */
 
 const char *
-avr_out_minus (rtx *xop)
+avr_out_plus_ext (rtx_insn *ins

[gcc r15-2069] rtl-ssa: Enforce earlyclobbers on hard-coded clobbers [PR115891]

2024-07-16 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:9f9faebb8ebfc0103461641cc49ba0b21877b2b1

commit r15-2069-g9f9faebb8ebfc0103461641cc49ba0b21877b2b1
Author: Richard Sandiford 
Date:   Tue Jul 16 15:31:17 2024 +0100

rtl-ssa: Enforce earlyclobbers on hard-coded clobbers [PR115891]

The asm in the testcase has a memory operand and also clobbers ax.
The clobber means that ax cannot be used to hold inputs, which
extends to the address of the memory.

I think I had an implicit assumption that constrain_operands
would enforce this, but in hindsight, that clearly wasn't going
to be true.  constrain_operands only looks at constraints, and
these clobbers are by definition outside the constraint system.
(And that's why they have to be handled conservatively, since there's
no way to distinguish the earlyclobber and non-earlyclobber cases.)

The semantics of hard-coded clobbers are generic enough that I think
they should be handled directly by rtl-ssa, rather than by consumers.
And in the context of rtl-ssa, the easiest way to check for a clash is
to walk the list of input registers, which we already have to hand.
It therefore seemed better not to push this down to a more generic
rtl helper.

The patch detects hard-coded clobbers in the same way as regrename:
by temporarily stubbing out the operands with pc_rtx.

gcc/
PR rtl-optimization/115891
* rtl-ssa/changes.cc (find_clobbered_access): New function.
(recog_level2): Use it to check for overlap between input
registers and hard-coded clobbers.  Conditionally reset
recog_data.insn after changing the insn code.

gcc/testsuite/
PR rtl-optimization/115891
* gcc.target/i386/pr115891.c: New test.

Diff:
---
 gcc/rtl-ssa/changes.cc   | 60 +++-
 gcc/testsuite/gcc.target/i386/pr115891.c | 10 ++
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
index 6b6f7cd5d3ab..43c7b8e1e605 100644
--- a/gcc/rtl-ssa/changes.cc
+++ b/gcc/rtl-ssa/changes.cc
@@ -944,6 +944,25 @@ add_clobber (insn_change &change, add_regno_clobber_fn 
add_regno_clobber,
   return true;
 }
 
+// See if PARALLEL pattern PAT clobbers any of the registers in ACCESSES.
+// Return one such access if so, otherwise return null.
+static access_info *
+find_clobbered_access (access_array accesses, rtx pat)
+{
+  rtx subpat;
+  for (int i = 0; i < XVECLEN (pat, 0); ++i)
+if (GET_CODE (subpat = XVECEXP (pat, 0, i)) == CLOBBER)
+  {
+   rtx x = XEXP (subpat, 0);
+   if (REG_P (x))
+ for (auto *access : accesses)
+   if (access->regno () >= REGNO (x)
+   && access->regno () < END_REGNO (x))
+ return access;
+  }
+  return nullptr;
+}
+
 // Try to recognize the new form of the insn associated with CHANGE,
 // adding any clobbers that are necessary to make the instruction match
 // an .md pattern.  Return true on success.
@@ -1035,9 +1054,48 @@ recog_level2 (insn_change &change, add_regno_clobber_fn 
add_regno_clobber)
   pat = newpat;
 }
 
+  INSN_CODE (rtl) = icode;
+  if (recog_data.insn == rtl)
+recog_data.insn = nullptr;
+
+  // See if the pattern contains any hard-coded clobbers of registers
+  // that are also inputs to the instruction.  The standard rtl semantics
+  // treat such clobbers as earlyclobbers, since there is no way of proving
+  // which clobbers conflict with the inputs and which don't.
+  //
+  // (Non-hard-coded clobbers are handled by constraint satisfaction instead.)
+  rtx subpat;
+  if (GET_CODE (pat) == PARALLEL)
+for (int i = 0; i < XVECLEN (pat, 0); ++i)
+  if (GET_CODE (subpat = XVECEXP (pat, 0, i)) == CLOBBER
+ && REG_P (XEXP (subpat, 0)))
+   {
+ // Stub out all operands, so that we can tell which registers
+ // are hard-coded.
+ extract_insn (rtl);
+ for (int j = 0; j < recog_data.n_operands; ++j)
+   *recog_data.operand_loc[j] = pc_rtx;
+
+ auto *use = find_clobbered_access (change.new_uses, pat);
+
+ // Restore the operands.
+ for (int j = 0; j < recog_data.n_operands; ++j)
+   *recog_data.operand_loc[j] = recog_data.operand[j];
+
+ if (use)
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "register %d is both clobbered"
+  " and used as an input:\n", use->regno ());
+ print_rtl_single (dump_file, pat);
+   }
+ return false;
+   }
+   }
+
   // check_asm_operands checks the constraints after RA, so we don't
   // need to do it again.
-  INSN_CODE (rtl) = icode;
   if (reload_completed && !asm_p)
 {
   extract_insn (rtl);
diff --git a/gcc/testsuite/gcc.target/i386/pr115891.c 
b/gcc/testsuite/gcc.t

[gcc r15-2070] recog: restrict paradoxical mode punning in insn_propagation [PR115901]

2024-07-16 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:851ec9960b084ad37556ec627e6931e985e41a24

commit r15-2070-g851ec9960b084ad37556ec627e6931e985e41a24
Author: Richard Sandiford 
Date:   Tue Jul 16 15:31:17 2024 +0100

recog: restrict paradoxical mode punning in insn_propagation [PR115901]

In g:44fc801e97a8dc626a4806ff4124439003420b20 I'd extended
insn_propagation to handle simple cases of hard-reg mode punning.
One of the checks was that the new use mode occupied the same
number of registers as the original definition mode.  However,
as PR115901 shows, we need to avoid increasing the size of any
registers in the punned "to" expression as well.

Specifically, the test includes a DImode move from GPR x0 to
a vector register, followed by a V2DI use of the vector register.
The simplification would then create a V2DI spanning x0 and x1,
manufacturing a new, unwanted use of x1.

Checking for that kind of thing directly seems too cumbersome,
and is not related to the original motivation (which was to improve
handling of shared vector zeros on aarch64).  This patch therefore
restricts the paradoxical case to constants.

gcc/
PR rtl-optimization/115901
* recog.cc (insn_propagation::apply_to_rvalue_1): Restrict
paradoxical mode punning to cases where "to" is constant.

gcc/testsuite/
PR rtl-optimization/115901
* gcc.dg/torture/pr115901.c: New test.

Diff:
---
 gcc/recog.cc|  8 
 gcc/testsuite/gcc.dg/torture/pr115901.c | 14 ++
 2 files changed, 22 insertions(+)

diff --git a/gcc/recog.cc b/gcc/recog.cc
index 7710c55b7452..54b317126c29 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -1082,6 +1082,14 @@ insn_propagation::apply_to_rvalue_1 (rtx *loc)
  || !REG_CAN_CHANGE_MODE_P (REGNO (x), GET_MODE (from),
 GET_MODE (x)))
return false;
+ /* If the reference is paradoxical and the replacement
+value contains registers, we would need to check that the
+simplification below does not increase REG_NREGS for those
+registers either.  It seems simpler to punt on nonconstant
+values instead.  */
+ if (paradoxical_subreg_p (GET_MODE (x), GET_MODE (from))
+ && !CONSTANT_P (to))
+   return false;
  newval = simplify_subreg (GET_MODE (x), to, GET_MODE (from),
subreg_lowpart_offset (GET_MODE (x),
   GET_MODE (from)));
diff --git a/gcc/testsuite/gcc.dg/torture/pr115901.c 
b/gcc/testsuite/gcc.dg/torture/pr115901.c
new file mode 100644
index ..244af857d887
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115901.c
@@ -0,0 +1,14 @@
+/* { dg-additional-options "-ftrivial-auto-var-init=zero" } */
+
+int p;
+void g(long);
+#define vec16 __attribute__((vector_size(16)))
+
+void l(vec16 long *);
+void h()
+{
+  long inv1;
+  vec16 long  inv = {p, inv1};
+  g (p);
+  l(&inv);
+}


[gcc r15-2071] rtl-ssa: Fix removal of order_nodes [PR115929]

2024-07-16 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:fec38d7987dd6d68b234b0076b57ac66a30a3a1d

commit r15-2071-gfec38d7987dd6d68b234b0076b57ac66a30a3a1d
Author: Richard Sandiford 
Date:   Tue Jul 16 15:33:23 2024 +0100

rtl-ssa: Fix removal of order_nodes [PR115929]

order_nodes are used to implement ordered comparisons between
two insns with the same program point number.  remove_insn would
remove an order_node from its splay tree, but didn't remove it
from the insn.  This caused confusion if the insn was later
reinserted somewhere else that also needed an order_node.

gcc/
PR rtl-optimization/115929
* rtl-ssa/insns.cc (function_info::remove_insn): Remove an
order_node from the instruction as well as from the splay tree.

gcc/testsuite/
PR rtl-optimization/115929
* gcc.dg/torture/pr115929-1.c: New test.

Diff:
---
 gcc/rtl-ssa/insns.cc  |  5 +++-
 gcc/testsuite/gcc.dg/torture/pr115929-1.c | 45 +++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc
index 7e26bfd978fe..bc30734df89f 100644
--- a/gcc/rtl-ssa/insns.cc
+++ b/gcc/rtl-ssa/insns.cc
@@ -393,7 +393,10 @@ void
 function_info::remove_insn (insn_info *insn)
 {
   if (insn_info::order_node *order = insn->get_order_node ())
-insn_info::order_splay_tree::remove_node (order);
+{
+  insn_info::order_splay_tree::remove_node (order);
+  insn->remove_note (order);
+}
 
   if (auto *note = insn->find_note ())
 {
diff --git a/gcc/testsuite/gcc.dg/torture/pr115929-1.c 
b/gcc/testsuite/gcc.dg/torture/pr115929-1.c
new file mode 100644
index ..19b831ab99ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115929-1.c
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-fno-gcse -fschedule-insns -fno-guess-branch-probability 
-fno-tree-fre -fno-tree-ch" } */
+
+int printf(const char *, ...);
+int a[6], b, c;
+char d, l;
+struct {
+  char e;
+  int f;
+  int : 8;
+  long g;
+  long h;
+} i[1][9] = {0};
+unsigned j;
+void n(char p) { b = b >> 8 ^ a[b ^ p]; }
+int main() {
+  int k, o;
+  while (b) {
+k = 0;
+for (; k < 9; k++) {
+  b = b ^ a[l];
+  n(j);
+  if (o)
+printf(&d);
+  long m = i[c][k].f;
+  b = b >> 8 ^ a[l];
+  n(m >> 32);
+  n(m);
+  if (o)
+printf("%d", d);
+  b = b >> 8 ^ l;
+  n(2);
+  n(0);
+  if (o)
+printf(&d);
+  b = b ^ a[l];
+  n(i[c][k].g >> 2);
+  n(i[c][k].g);
+  if (o)
+printf(&d);
+  printf("%d", i[c][k].f);
+}
+  }
+  return 0;
+}


[gcc/matz/heads/x86-ssw] x86: implement separate shrink wrapping

2024-07-16 Thread Michael Matz via Gcc-cvs
The branch 'matz/heads/x86-ssw' was updated to point to:

 298b1dd7fb81... x86: implement separate shrink wrapping

It previously pointed to:

 fbf3ff6bc169... x86-ssw: Deal with deallocated frame in epilogue

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  fbf3ff6... x86-ssw: Deal with deallocated frame in epilogue
  3b04b65... Revert "Add target hook shrink_wrap.cleanup_components"
  826dd85... Add target hook shrink_wrap.cleanup_components
  4e6291b... x86-ssw: tidy and commentary
  495a687... x86-ssw: Adjust testcase
  d213bc5... x86-ssw: precise using of moves
  cf6d794... x86-ssw: adjust testcase
  c5a72cc... x86-ssw: fix testcases
  f917195... x86-ssw: disable if DRAP reg is needed
  5a9a70a... x86-ssw: don't clobber flags
  eb94eb7... x86: implement separate shrink wrapping


Summary of changes (added commits):
---

  298b1dd... x86: implement separate shrink wrapping


[gcc(refs/users/matz/heads/x86-ssw)] x86: implement separate shrink wrapping

2024-07-16 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:298b1dd7fb8189eb22ae604973083ae80b135ae7

commit 298b1dd7fb8189eb22ae604973083ae80b135ae7
Author: Michael Matz 
Date:   Sun Jun 30 03:52:39 2024 +0200

x86: implement separate shrink wrapping

this adds support for the infrastructure for shrink wrapping
separate components to the x86 target.  The components we track
are individual registers to save/restore and the frame allocation
itself.

There are various limitations where we give up:
* when the frame becomes too large
* when any complicated realignment is needed (DRAP or not)
* when the calling convention requires certain forms of
  pro- or epilogues (e.g. SEH on win64)
* when the function is "special" (uses eh_return and the like);
  most of that is already avoided by the generic infrastructure
  in shrink-wrap.cc
* when we must not use moves to save/restore registers for any reasons
  (stack checking being one notable one)
and so on.

For the last point we now differ between not being able to use moves
(then we disable separate shrink wrapping) and merely not wanting to use
moves (e.g. because push/pop is equally fast).  In the latter case we
don't disable separate shrink wrapping, but do use moves for those
functions where it does something.

Apart from that it's fairly straight forward: for components selected
by the infrastructure to be separately shrink-wrapped emit code to
save/restore them in the appropriate hook (for the frame-alloc
component to adjust the stack pointer), remember them, and don't emit
any code for those in the normal expand_prologue and expand_epilogue
expanders.  But as the x86 prologue and epilogue generators are quite
a twisty maze with many cases to deal with this also adds some aborts
and asserts for things that are unexpected.

The static instruction count of functions can increase (when
separate shrink wrapping emits some component sequences into multiple
block) and the instructions itself can become larger (moves vs.
push/pop), so there's a code size increase for functions where this
does something.  The dynamic insn count decreases for at least one
path through the function (and doesn't increase for others).

Two testcases need separate shrink wrapping disabled because they
check for specific generated assembly instruction counts and sequences
or specific messages in the pro_and_epilogue dump file, which turn out
different with separate shrink wrapping.

gcc/
* config/i386/i386.h (struct i86_frame.cannot_use_moves):
Add member.
(struct machine_function.ssw_min_reg,
ssw_max_reg, reg_wrapped_separately, frame_alloc_separately,
anything_separately): Add members.
* config/i386/i386.cc (ix86_compute_frame_layout): Split out
cannot_use_moves from save_regs_using_move computation.
(ix_86_emit_save_regs): Ensure not using this under separate
shrink wrapping.
(ix86_emit_save_regs_using_mov, ix86_emit_save_sse_regs_using_mov,
ix86_emit_restore_reg_using_pop, ix86_emit_restore_reg_using_pop2,
ix86_emit_restore_regs_using_pop): Don't handle separately shrink
wrapped components.
(ix86_expand_prologue): Handle separate shrink wrapping.
(ix86_emit_restore_reg_using_mov): New function, split out
from ...
(ix86_emit_restore_regs_using_mov): ... here and ...
(ix86_emit_restore_sse_regs_using_mov): ... here.
(ix86_expand_epilogue): Handle separate shrink wrapping.
(NCOMPONENTS, SW_FRAME): Add new defines.
(separate_frame_alloc_p, ix86_get_separate_components,
ix86_components_for_bb, ix86_disqualify_components,
ix86_init_frame_state, ix86_alloc_frame, ix86_dealloc_frame,
ix86_process_reg_components, ix86_emit_prologue_components,
ix86_emit_epilogue_components, ix86_set_handled_components):
Add new functions.
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS,
TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB,
TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define target hook
macros.

gcc/testsuite
* gcc.dg/stack-check-5.c: Disable separate shrink wrapping.
* gcc.target/x86_64/abi/callabi/leaf-2.c: Ditto.

Diff:
---
 gcc/config/i386/i386.cc| 491 ++---
 gcc/config/i386/i386.h |   5 +
 gcc/testsuite/gcc.dg/stack-check-5.c   |   2 +-
 .../gcc.target/x86_64/abi/callabi/leaf-2.c |   2 +-
 4 files changed, 447 insertions(+)

[gcc/matz/heads/x86-ssw] x86: Implement separate shrink wrapping

2024-07-16 Thread Michael Matz via Gcc-cvs
The branch 'matz/heads/x86-ssw' was updated to point to:

 f0d9a4c9d44c... x86: Implement separate shrink wrapping

It previously pointed to:

 298b1dd7fb81... x86: implement separate shrink wrapping

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  298b1dd... x86: implement separate shrink wrapping


Summary of changes (added commits):
---

  f0d9a4c... x86: Implement separate shrink wrapping


[gcc(refs/users/matz/heads/x86-ssw)] x86: Implement separate shrink wrapping

2024-07-16 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:f0d9a4c9d44c463f86699d7f054722d5d0a20d09

commit f0d9a4c9d44c463f86699d7f054722d5d0a20d09
Author: Michael Matz 
Date:   Sun Jun 30 03:52:39 2024 +0200

x86: Implement separate shrink wrapping

this adds support for the infrastructure for shrink wrapping
separate components to the x86 target.  The components we track
are individual registers to save/restore and the frame allocation
itself.

There are various limitations where we give up:
* when the frame becomes too large
* when any complicated realignment is needed (DRAP or not)
* when the calling convention requires certain forms of
  pro- or epilogues (e.g. SEH on win64)
* when the function is "special" (uses eh_return and the like);
  most of that is already avoided by the generic infrastructure
  in shrink-wrap.cc
* when we must not use moves to save/restore registers for any reasons
  (stack checking being one notable one)
and so on.

For the last point we now differ between not being able to use moves
(then we disable separate shrink wrapping) and merely not wanting to use
moves (e.g. because push/pop is equally fast).  In the latter case we
don't disable separate shrink wrapping, but do use moves for those
functions where it does something.

Apart from that it's fairly straight forward: for components selected
by the infrastructure to be separately shrink-wrapped emit code to
save/restore them in the appropriate hook (for the frame-alloc
component to adjust the stack pointer), remember them, and don't emit
any code for those in the normal expand_prologue and expand_epilogue
expanders.  But as the x86 prologue and epilogue generators are quite
a twisty maze with many cases to deal with this also adds some aborts
and asserts for things that are unexpected.

The static instruction count of functions can increase (when
separate shrink wrapping emits some component sequences into multiple
block) and the instructions itself can become larger (moves vs.
push/pop), so there's a code size increase for functions where this
does something.  The dynamic insn count decreases for at least one
path through the function (and doesn't increase for others).

Two testcases need separate shrink wrapping disabled because they
check for specific generated assembly instruction counts and sequences
or specific messages in the pro_and_epilogue dump file, which turn out
different with separate shrink wrapping.

gcc/
* config/i386/i386.h (struct i86_frame.cannot_use_moves):
Add member.
(struct machine_function.ssw_min_reg,
ssw_max_reg, reg_wrapped_separately, frame_alloc_separately,
anything_separately): Add members.
* config/i386/i386.cc (ix86_compute_frame_layout): Split out
cannot_use_moves from save_regs_using_move computation.
(ix_86_emit_save_regs): Ensure not using this under separate
shrink wrapping.
(ix86_emit_save_regs_using_mov, ix86_emit_save_sse_regs_using_mov,
ix86_emit_restore_reg_using_pop, ix86_emit_restore_reg_using_pop2,
ix86_emit_restore_regs_using_pop): Don't handle separately shrink
wrapped components.
(ix86_expand_prologue): Handle separate shrink wrapping.
(ix86_emit_restore_reg_using_mov): New function, split out
from ...
(ix86_emit_restore_regs_using_mov): ... here and ...
(ix86_emit_restore_sse_regs_using_mov): ... here.
(ix86_expand_epilogue): Handle separate shrink wrapping.
(NCOMPONENTS, SW_FRAME): Add new defines.
(separate_frame_alloc_p, ix86_get_separate_components,
ix86_components_for_bb, ix86_disqualify_components,
ix86_init_frame_state, ix86_alloc_frame, ix86_dealloc_frame,
ix86_process_reg_components, ix86_emit_prologue_components,
ix86_emit_epilogue_components, ix86_set_handled_components):
Add new functions.
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS,
TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB,
TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define target hook
macros.

gcc/testsuite/
* gcc.dg/stack-check-5.c: Disable separate shrink wrapping.
* gcc.target/x86_64/abi/callabi/leaf-2.c: Ditto.

Diff:
---
 gcc/config/i386/i386.cc| 491 ++---
 gcc/config/i386/i386.h |   5 +
 gcc/testsuite/gcc.dg/stack-check-5.c   |   2 +-
 .../gcc.target/x86_64/abi/callabi/leaf-2.c |   2 +-
 4 files changed, 447 insertions(+

[gcc r15-2072] Fortran: Simplify len_trim with array ref and fix mapping bug[PR84868].

2024-07-16 Thread Paul Thomas via Gcc-cvs
https://gcc.gnu.org/g:9f966b6a8ff0244dd6f8bf36d876799d5f9bbaee

commit r15-2072-g9f966b6a8ff0244dd6f8bf36d876799d5f9bbaee
Author: Paul Thomas 
Date:   Tue Jul 16 15:56:44 2024 +0100

Fortran: Simplify len_trim with array ref and fix mapping bug[PR84868].

2024-07-16  Paul Thomas  

gcc/fortran
PR fortran/84868
* simplify.cc (gfc_simplify_len_trim): If the argument is an
element of a parameter array, simplify all the elements and
build a new parameter array to hold the result, after checking
that it doesn't already exist.
* trans-expr.cc (gfc_get_interface_mapping_array) if a string
length is available, use it for the typespec.
(gfc_add_interface_mapping): Supply the se string length.

gcc/testsuite/
PR fortran/84868
* gfortran.dg/pr84868.f90: New test.

Diff:
---
 gcc/fortran/simplify.cc   | 75 +++
 gcc/fortran/trans-expr.cc | 18 +---
 gcc/testsuite/gfortran.dg/pr84868.f90 | 84 +++
 3 files changed, 171 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc
index 7a5d31c01a65..60b717fea9a7 100644
--- a/gcc/fortran/simplify.cc
+++ b/gcc/fortran/simplify.cc
@@ -4637,6 +4637,81 @@ gfc_simplify_len_trim (gfc_expr *e, gfc_expr *kind)
   if (k == -1)
 return &gfc_bad_expr;
 
+  /* If the expression is either an array element or section, an array
+ parameter must be built so that the reference can be applied. Constant
+ references should have already been simplified away. All other cases
+ can proceed to translation, where kind conversion will occur silently.  */
+  if (e->expr_type == EXPR_VARIABLE
+  && e->ts.type == BT_CHARACTER
+  && e->symtree->n.sym->attr.flavor == FL_PARAMETER
+  && e->ref && e->ref->type == REF_ARRAY
+  && e->ref->u.ar.type != AR_FULL
+  && e->symtree->n.sym->value)
+{
+  char name[2*GFC_MAX_SYMBOL_LEN + 12];
+  gfc_namespace *ns = e->symtree->n.sym->ns;
+  gfc_symtree *st;
+  gfc_expr *expr;
+  gfc_expr *p;
+  gfc_constructor *c;
+  int cnt = 0;
+
+  sprintf (name, "_len_trim_%s_%s", e->symtree->n.sym->name,
+  ns->proc_name->name);
+  st = gfc_find_symtree (ns->sym_root, name);
+  if (st)
+   goto already_built;
+
+  /* Recursively call this fcn to simplify the constructor elements.  */
+  expr = gfc_copy_expr (e->symtree->n.sym->value);
+  expr->ts.type = BT_INTEGER;
+  expr->ts.kind = k;
+  expr->ts.u.cl = NULL;
+  c = gfc_constructor_first (expr->value.constructor);
+  for (; c; c = gfc_constructor_next (c))
+   {
+ if (c->iterator)
+   continue;
+
+ if (c->expr && c->expr->ts.type == BT_CHARACTER)
+   {
+ p = gfc_simplify_len_trim (c->expr, kind);
+ if (p == NULL)
+   goto clean_up;
+ gfc_replace_expr (c->expr, p);
+ cnt++;
+   }
+   }
+
+  if (cnt)
+   {
+ /* Build a new parameter to take the result.  */
+ st = gfc_new_symtree (&ns->sym_root, name);
+ st->n.sym = gfc_new_symbol (st->name, ns);
+ st->n.sym->value = expr;
+ st->n.sym->ts = expr->ts;
+ st->n.sym->attr.dimension = 1;
+ st->n.sym->attr.save = SAVE_IMPLICIT;
+ st->n.sym->attr.flavor = FL_PARAMETER;
+ st->n.sym->as = gfc_copy_array_spec (e->symtree->n.sym->as);
+ gfc_set_sym_referenced (st->n.sym);
+ st->n.sym->refs++;
+ gfc_commit_symbol (st->n.sym);
+
+already_built:
+ /* Build a return expression.  */
+ expr = gfc_copy_expr (e);
+ expr->ts = st->n.sym->ts;
+ expr->symtree = st;
+ gfc_expression_rank (expr);
+ return expr;
+   }
+
+clean_up:
+  gfc_free_expr (expr);
+  return NULL;
+}
+
   if (e->expr_type != EXPR_CONSTANT)
 return NULL;
 
diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index fc23fb1a7ebf..410256742537 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -4474,12 +4474,15 @@ gfc_get_interface_mapping_charlen 
(gfc_interface_mapping * mapping,
 
 static tree
 gfc_get_interface_mapping_array (stmtblock_t * block, gfc_symbol * sym,
-gfc_packed packed, tree data)
+gfc_packed packed, tree data, tree len)
 {
   tree type;
   tree var;
 
-  type = gfc_typenode_for_spec (&sym->ts);
+  if (len != NULL_TREE && (TREE_CONSTANT (len) || VAR_P (len)))
+type = gfc_get_character_type_len (sym->ts.kind, len);
+  else
+type = gfc_typenode_for_spec (&sym->ts);
   type = gfc_get_nodesc_array_type (type, sym->as, packed,
!sym->attr.target && !sym->attr.pointer
&& !sym->attr.pro

[gcc(refs/users/meissner/heads/work171-bugs)] Revert changes

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:abb176b702aa3e67dad6d1d724194e144cd85d8e

commit abb176b702aa3e67dad6d1d724194e144cd85d8e
Author: Michael Meissner 
Date:   Tue Jul 16 11:19:52 2024 -0400

Revert changes

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c|  3 +--
 .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c  |  1 -
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c   |  3 +--
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c|  3 +--
 gcc/testsuite/gcc.target/powerpc/float128-3.c  |  1 -
 gcc/testsuite/gcc.target/powerpc/float128-5.c  |  1 -
 gcc/testsuite/gcc.target/powerpc/float128-math.c   |  3 +--
 gcc/testsuite/gcc.target/powerpc/inf128-1.c|  3 +--
 gcc/testsuite/gcc.target/powerpc/mulkc3-1.c|  3 +--
 gcc/testsuite/gcc.target/powerpc/nan128-1.c|  3 +--
 gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr104253.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/pr70640.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr70669.c |  3 +--
 gcc/testsuite/gcc.target/powerpc/pr79004.c |  3 +--
 gcc/testsuite/gcc.target/powerpc/pr79038-1.c   |  3 +--
 gcc/testsuite/gcc.target/powerpc/pr81959.c |  3 +--
 gcc/testsuite/gcc.target/powerpc/pr85657-1.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr85657-2.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99708.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/signbit-1.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/signbit-2.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/signbit-3.c   |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 24 +-
 libgcc/config.host | 12 +--
 libgcc/config/rs6000/t-float128|  8 +---
 libgcc/config/rs6000/t-float128-hw |  3 ++-
 libgcc/config/rs6000/t-float128-p10-hw |  3 ++-
 libgcc/configure   |  8 +---
 libgcc/configure.ac|  8 +---
 30 files changed, 47 insertions(+), 73 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/abs128-1.c 
b/gcc/testsuite/gcc.target/powerpc/abs128-1.c
index e8702ec3127a..fe5206daff8c 100644
--- a/gcc/testsuite/gcc.target/powerpc/abs128-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/abs128-1.c
@@ -1,6 +1,5 @@
 /* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
-/* { dg-options "-mvsx" } */
-/* { dg-require-effective-target ppc_float128_sw } */
+/* { dg-options "-mfloat128 -mvsx" } */
 
 void abort ();
 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c
index 081fb2e2995f..f0e03c5173d2 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c
@@ -2,7 +2,6 @@
 /* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9 -save-temps" } */
-/* { dg-require-effective-target ppc_float128_sw } */
 
 #include 
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/copysign128-1.c 
b/gcc/testsuite/gcc.target/powerpc/copysign128-1.c
index ac8528b53273..429dfc072e3b 100644
--- a/gcc/testsuite/gcc.target/powerpc/copysign128-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/copysign128-1.c
@@ -1,6 +1,5 @@
 /* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
-/* { dg-options "-mvsx" } */
-/* { dg-require-effective-target ppc_float128_sw } */
+/* { dg-options "-mfloat128 -mvsx" } */
 
 void abort ();
 
diff --git a/gcc/testsuite/gcc.target/powerpc/divkc3-1.c 
b/gcc/testsuite/gcc.target/powerpc/divkc3-1.c
index cb7335f2a755..89bf04f12a97 100644
--- a/gcc/testsuite/gcc.target/powerpc/divkc3-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/divkc3-1.c
@@ -1,6 +1,5 @@
 /* { dg-do run { target { powerpc64*-*-* && p8vector_hw } } } */
-/* { dg-options "-mvsx" } */
-/* { dg-require-effective-target ppc_float128_sw } */
+/* { dg-options "-mfloat128 -mvsx" } */
 
 void abort ();
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-3.c 
b/gcc/testsuite/gcc.target/powerpc/float128-3.c
index e58bccdfa159..e62ad5f5247f 100644
--- a/gcc/testsuite/gcc.target/powerpc/float128-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/float128-3.c
@@ -1,7 +1,6 @@
 /* { dg-do compile { target { powerpc*-*-linux* } } } */
 /* { dg-options "-O2 -mvsx -mno-float128" } */
 /* { dg-require-effective-target powerpc_vsx } */
-/* { dg-require-effective-target ppc_float128_sw } */
 
 /* Test that we can use #pragma GCC target to enable -mfloat128.  */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-5.c 
b/gcc/testsuite/gcc.target/powerpc/float128-5.c
index 8a5d8ceff216..8a9eee971fbc 100644
--- a/gcc/testsuite/gcc.target/powerpc/float128-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/float128-5.c
@@ -1,7 +1,6 @@
 /* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
 /* { dg-op

[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:643ea344d77c82cadd4adcba6e8fa9ac46875f56

commit 643ea344d77c82cadd4adcba6e8fa9ac46875f56
Author: Michael Meissner 
Date:   Tue Jul 16 11:24:54 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 153 ++---
 1 file changed, 5 insertions(+), 148 deletions(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 0b0b1bf63356..e2530576840c 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,151 +1,8 @@
- Branch work171-bugs, patch #325 
-
-Fix last change.
-
-2024-07-16  Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-
-   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Require float128
-   support.
-   * lib/target-supports.exp
-   (check_effective_target_base_quadfloat_support): Add check for explicit
-   float128.
-
- Branch work171-bugs, patch #324 
-
-Fix last change.
-
-2024-07-16  Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-
-   * gcc.target/powerpc/abs128-1.c: Fix typos.
-   * gcc.target/powerpc/copysign128-1.c: Likewise.
-   * gcc.target/powerpc/divkc3-1.c: Likewise.
-
- Branch work171-bugs, patch #323 
-
-Remove -mfloat128 option.
-
-2024-07-16  Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-
-   * gcc.target/powerpc/abs128-1.c: Remove passing -mfloat128.  If needed,
-   add explicit requires for float128.
-   * gcc.target/powerpc/copysign128-1.c: Likewise.
-   * gcc.target/powerpc/divkc3-1.c: Likewise.
-   * gcc.target/powerpc/float128-3.c: Likewise.
-   * gcc.target/powerpc/float128-5.c: Likewise.
-   * gcc.target/powerpc/float128-math.c: Likewise.
-   * gcc.target/powerpc/inf128-1.c: Likewise.
-   * gcc.target/powerpc/mulkc3-1.c: Likewise.
-   * gcc.target/powerpc/nan128-1.c: Likewise.
-   * gcc.target/powerpc/p9-lxvx-stxvx-3.c: Likewise.
-   * gcc.target/powerpc/pr104253.c: Likewise.
-   * gcc.target/powerpc/pr70640.c: Likewise.
-   * gcc.target/powerpc/pr70669.c: Likewise.
-   * gcc.target/powerpc/pr79004.c: Likewise.
-   * gcc.target/powerpc/pr79038-1.c: Likewise.
-   * gcc.target/powerpc/pr81959.c: Likewise.
-   * gcc.target/powerpc/pr85657-1.c: Likewise.
-   * gcc.target/powerpc/pr85657-2.c: Likewise.
-   * gcc.target/powerpc/pr99708.c: Likewise.
-   * gcc.target/powerpc/signbit-1.c: Likewise.
-   * gcc.target/powerpc/signbit-2.c: Likewise.
-   * gcc.target/powerpc/signbit-3.c: Likewise.
-
- Branch work171-bugs, patch #322 
-
-Fix typos.
-
-2024-07-15  Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-   * lib/target-supports.exp (check_ppc_float128_sw_available): Fix typo in
-   last change.
-   (check_effective_target_ppc_ieee128_ok): Likewise.
-
- Branch work171-bugs, patch #321 
-
-Do not add -mfloat128 to the tests.
-
-2024-07-15  Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-   * lib/target-supports.exp (check_ppc_float128_sw_available): Do not add
-   -mfloat128 on PowerPC tests.
-   (check_ppc_float128_hw_available): Likewise.
-   (check_effective_target_ppc_ieee128_ok): Likewise.
-   (add_options_for___float128): Likewise.
-   (check_effective_target_power10_ok): Likewise.
-   (check_effective_target_powerpc_float128_sw_ok): Likewise.
-   (check_effective_target_powerpc_float128_hw_ok): Likewise.
-
- Branch work171-bugs, patch #320 
-
-Do not add -mvsx when building or testing the float128 support.
-
-In the past, we would add -mvsx when building the float128 support in libgcc.
-This allowed us to build the float128 support on a big endian system where the
-default cpu is power4.  While the libgcc support can be built, given there is 
no
-glibc support for float128 available.
-
-However, adding -mvsx and building the libgcc float128 support causes problems
-if you set the default cpu to something like a 7540, which does not have VSX
-support.  The assembler complains that when the code does a ".machine 7450", 
you
-cannot use VSX instructions.
-
-With these patches, the float128 libgcc support is only built if the default
-compiler has VSX support.  If somebody wanted to enable the glibc support for
-big endian, they would need to set the base cpu to power8 to enable building 
the
-libgcc float128 libraries.
-
-In addition to the changes in libgcc, this patch also changes the GCC tests so
-that it will only test float128 if the default compiler enables the VSX
-instruction set.  Otherwise all of the float128 tests will fail because the
-libgcc support is not available.
-

[gcc r15-2073] AVR: testsuite - Add noipa function attribute to noclone functions.

2024-07-16 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:f8b302c98378b54e09c5f20cd6f6197871311da2

commit r15-2073-gf8b302c98378b54e09c5f20cd6f6197871311da2
Author: Georg-Johann Lay 
Date:   Tue Jul 16 17:33:18 2024 +0200

AVR: testsuite - Add noipa function attribute to noclone functions.

Many functions under test have the noinline and noclone function
attributes attached so that no (constant) values are propagated
into the functions, so that we actually are testing what's supposed
to be tested.  In order to enforce that, noipa may also be required
when inter-procedural analysis / optimizations are on.

gcc/testsuite/
* gcc.target/avr/isr-test.h: Add noipa function attribute
to noclone functions.
* gcc.target/avr/pr114981-powif.c: Same.
* gcc.target/avr/pr114981-powil.c: Same.
* gcc.target/avr/pr71676-1.c: Same.
* gcc.target/avr/pr71676-2.c: Same.
* gcc.target/avr/pr71676-3.c: Same.
* gcc.target/avr/pr71676.c: Same.
* gcc.target/avr/torture/fix-types.h: Same.
* gcc.target/avr/torture/fuse-add.c: Same.
* gcc.target/avr/torture/get-mem.c: Same.
* gcc.target/avr/torture/insv-anyshift-hi.c: Same.
* gcc.target/avr/torture/insv-anyshift-si.c: Same.
* gcc.target/avr/torture/isr-02-call.c: Same.
* gcc.target/avr/torture/isr-03-fixed.c: Same.
* gcc.target/avr/torture/pr109650-1.c: Same.
* gcc.target/avr/torture/pr109650-2.c: Same.
* gcc.target/avr/torture/pr109907-1.c: Same.
* gcc.target/avr/torture/pr109907-2.c: Same.
* gcc.target/avr/torture/pr114132-2.c: Same.
* gcc.target/avr/torture/pr39633.c: Same.
* gcc.target/avr/torture/pr51782-1.c: Same.
* gcc.target/avr/torture/pr61055.c: Same.
* gcc.target/avr/torture/pr61443.c: Same.
* gcc.target/avr/torture/pr64331.c: Same.
* gcc.target/avr/torture/pr77326.c: Same.
* gcc.target/avr/torture/pr83729.c: Same.
* gcc.target/avr/torture/pr83801.c: Same.
* gcc.target/avr/torture/pr87376.c: Same.
* gcc.target/avr/torture/pr88236-pr115726.c: Same.
* gcc.target/avr/torture/pr92606.c: Same.
* gcc.target/avr/torture/pr98762.c: Same.
* gcc.target/avr/torture/sat-hr-plus-minus.c: Same.
* gcc.target/avr/torture/sat-k-plus-minus.c: Same.
* gcc.target/avr/torture/sat-llk-plus-minus.c: Same.
* gcc.target/avr/torture/sat-r-plus-minus.c: Same.
* gcc.target/avr/torture/sat-uhr-plus-minus.c: Same.
* gcc.target/avr/torture/sat-uk-plus-minus.c: Same.
* gcc.target/avr/torture/sat-ullk-plus-minus.c: Same.
* gcc.target/avr/torture/sat-ur-plus-minus.c: Same.
* gcc.target/avr/torture/set-mem.c: Same.
* gcc.target/avr/torture/tiny-progmem.c: Same.

Diff:
---
 gcc/testsuite/gcc.target/avr/isr-test.h|  6 ++--
 gcc/testsuite/gcc.target/avr/pr114981-powif.c  |  2 +-
 gcc/testsuite/gcc.target/avr/pr114981-powil.c  |  2 +-
 gcc/testsuite/gcc.target/avr/pr71676-1.c   | 32 +++---
 gcc/testsuite/gcc.target/avr/pr71676-2.c   |  2 +-
 gcc/testsuite/gcc.target/avr/pr71676-3.c   |  2 +-
 gcc/testsuite/gcc.target/avr/pr71676.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/fix-types.h   |  8 +++---
 gcc/testsuite/gcc.target/avr/torture/fuse-add.c|  4 +--
 gcc/testsuite/gcc.target/avr/torture/get-mem.c |  2 +-
 .../gcc.target/avr/torture/insv-anyshift-hi.c  |  6 ++--
 .../gcc.target/avr/torture/insv-anyshift-si.c  |  4 +--
 gcc/testsuite/gcc.target/avr/torture/isr-02-call.c |  2 +-
 .../gcc.target/avr/torture/isr-03-fixed.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr109650-1.c  |  4 +--
 gcc/testsuite/gcc.target/avr/torture/pr109650-2.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr109907-1.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr109907-2.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr114132-2.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr39633.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr51782-1.c   |  8 +++---
 gcc/testsuite/gcc.target/avr/torture/pr61055.c |  8 +++---
 gcc/testsuite/gcc.target/avr/torture/pr61443.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr64331.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr77326.c |  4 +--
 gcc/testsuite/gcc.target/avr/torture/pr83729.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr83801.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr87376.c | 10 +++
 .../gcc.target/avr/torture/pr88236-pr115726.c  | 10 +++
 gcc/testsuite/gcc.target/avr/torture/pr92606.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr98762.c |  2 +-
 .../gcc.target/avr/torture/sat-hr-plus-minus.c | 16 +--
 

[gcc r15-2074] c++, coroutines, contracts: Handle coroutine and void functions [PR110871, PR110872, PR115434].

2024-07-16 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:d1706235ed2b274a2d1fa3c3039b5874b4ae7a0e

commit r15-2074-gd1706235ed2b274a2d1fa3c3039b5874b4ae7a0e
Author: Iain Sandoe 
Date:   Sat Jun 15 17:47:33 2024 +0100

c++, coroutines, contracts: Handle coroutine and void functions 
[PR110871,PR110872,PR115434].

The current implementation of contracts emits the checks into function
bodies in three places; for pre-conditions at the start of the body,
for asserts in-line in the function body and for post-conditions as an
addition to return statements.

In general (at least with existing "2a" contract semantics) the in-line
contract asserts behave as expected.

However, the mechanism is not applicable to:

 * Handling pre conditions in coroutines since, for those, the standard
  specifies a wrapping of the original function body by functionality
  implementing initial and final suspends (along with some housekeeping
  to route exceptions).  Thus for such transformed function bodies, the
  preconditions then get actioned after the initial suspend, which does
  not behave as intended.

  * Handling post conditions in functions that do not have return
statements (which applies to coroutines and void functions).

In the following, we identify a potentially transformed function body
(in the case of coroutines, this is usually called the "ramp()" function).

The patch here re-implements the code insertion in one of the two
following ways (code for exposition only):

  * For functions with no post-conditions we wrap the potentially
transformed function as follows:

  {
 handle_pre_condition_checking ();
 potentially_transformed_function_body ();
  }

  This implements the intent that the preconditions are processed after
  the function parameters are initialised but before any other actions.

  * For functions with post-conditions:

  if (preconditions_exist)
handle_pre_condition_checking ();
  try
   {
 potentially_transformed_function_body ();
   }
  finally
   {
 handle_post_condition_checking ();
   }
  else [only if the function is not marked noexcept(true) ]
   {
 ;
   }

In this, post-conditions [that might apply to the return value etc.]
are evaluated on every non-exceptional edge out of the function.

At present, the model here is that exceptions thrown by the function
propagate upwards as if there were no contracts present.  If the desired
semantic becomes that an exception is counted as equivalent to a contract
violation - then we can add a second handler in place of the empty
statement.

This patch specifically does not address changes to code-gen and constexpr
handling that are contained in P2900.

PR c++/115434
PR c++/110871
PR c++/110872

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_constant_expression): Handle EH_ELSE_EXPR.
* contracts.cc (finish_contract_attribute): Remove excess line.
(build_contract_condition_function): Post condition handlers are
void now.
(emit_postconditions_cleanup): Remove.
(emit_postconditions): New.
(add_pre_condition_fn_call): New.
(add_post_condition_fn_call): New.
(apply_preconditions): New.
(apply_postconditions): New.
(maybe_apply_function_contracts): New.
(apply_postcondition_to_return): Remove.
* contracts.h (apply_postcondition_to_return): Remove.
(maybe_apply_function_contracts): Add.
* coroutines.cc (coro_build_actor_or_destroy_function): Do not
copy contracts to coroutine helpers.
* decl.cc (finish_function): Handle wrapping a possibly
transformed function body in contract checks.
* typeck.cc (check_return_expr): Remove handling of post
conditions on return expressions.

gcc/ChangeLog:

* gimplify.cc (struct gimplify_ctx): Add a flag to show we are
expending a handler.
(gimplify_expr): When we are expanding a handler, and the body
transforms might have re-written DECL_RESULT into a gimple var,
ensure that hander references to DECL_RESULT are also re-written
to refer to the gimple var.  When we are processing an EH_ELSE
expression, then add it if either of the cleanup slots is in
use.

gcc/testsuite/ChangeLog:

* g++.dg/contracts/pr115434.C: New test.
* g++.dg/coroutines/pr110871.C: New test.
* g++.dg/coroutines/pr110872.C: New test.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/cp/constexpr.cc|   8 +
 gcc/cp/contracts.cc  

[gcc(refs/users/meissner/heads/work171-bugs)] Do not add -mvsx when building or testing the float128 support.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1e3d2a86601bea1763bb25bb4e5b23a761ae7d4c

commit 1e3d2a86601bea1763bb25bb4e5b23a761ae7d4c
Author: Michael Meissner 
Date:   Tue Jul 16 12:00:37 2024 -0400

Do not add -mvsx when building or testing the float128 support.

In the past, we would add -mvsx when building the float128 support in 
libgcc.
This allowed us to build the float128 support on a big endian system where 
the
default cpu is power4.  While the libgcc support can be built, given there 
is no
glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

With these patches, the float128 libgcc support is only built if the default
compiler has VSX support.  If somebody wanted to enable the glibc support 
for
big endian, they would need to set the base cpu to power8 to enable 
building the
libgcc float128 libraries.

2024-07-16 Michael Meissner  

libgcc/

PR target/115800
PR target/113652
* config.host (powerpc*-*-linux*): Do not add t-float128-hw or
t-float128-p10-hw if the default compiler does not support float128.
* config/rs6000/t-float128 (FP128_CFLAGS_SW): Do not add -mvsx when
building the basic float128 support.
* config/rs6000/t-float128-hw (FP128_CFLAGS_HW): Likewise.
* config/rs6000/t-float128-p10-hw (FP128_3_1_CFLAGS_HW): Likewise.
* configure.ac (powerpc*-*-linux*): Do not add -mvsx when testing
whether to build the float128 support.
* configure: Regenerate.

Diff:
---
 libgcc/config.host | 12 ++--
 libgcc/config/rs6000/t-float128|  8 +++-
 libgcc/config/rs6000/t-float128-hw |  3 +--
 libgcc/config/rs6000/t-float128-p10-hw |  3 +--
 libgcc/configure   |  8 +++-
 libgcc/configure.ac|  8 +++-
 6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9fae51d4ce7d..261b08859a4d 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1292,14 +1292,14 @@ powerpc*-*-linux*)
 
if test $libgcc_cv_powerpc_float128 = yes; then
tmake_file="${tmake_file} rs6000/t-float128"
-   fi
 
-   if test $libgcc_cv_powerpc_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-hw"
-   fi
+   if test $libgcc_cv_powerpc_float128_hw = yes; then
+   tmake_file="${tmake_file} rs6000/t-float128-hw"
 
-   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-p10-hw"
+   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
+   tmake_file="${tmake_file} 
rs6000/t-float128-p10-hw"
+   fi
+   fi
fi
 
extra_parts="$extra_parts ecrti.o ecrtn.o ncrti.o ncrtn.o"
diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128
index b09b5664af0e..93e78adcd624 100644
--- a/libgcc/config/rs6000/t-float128
+++ b/libgcc/config/rs6000/t-float128
@@ -74,7 +74,13 @@ fp128_includes   = $(srcdir)/soft-fp/double.h \
  $(srcdir)/soft-fp/soft-fp.h
 
 # Build the emulator without ISA 3.0 hardware support.
-FP128_CFLAGS_SW = -Wno-type-limits -mvsx -mfloat128 \
+#
+# In the past we added -mvsx to build the float128 specific libraries with the
+# VSX instruction set.  This allowed the big endian GCC on server platforms to
+# build the float128 support.  However, is causes problems when other default
+# cpu targets are used such as the 7450.
+
+FP128_CFLAGS_SW = -Wno-type-limits -mfloat128 \
   -mno-float128-hardware -mno-gnu-attribute \
   -I$(srcdir)/soft-fp \
   -I$(srcdir)/config/rs6000 \
diff --git a/libgcc/config/rs6000/t-float128-hw 
b/libgcc/config/rs6000/t-float128-hw
index ed67b572580f..82726c98b983 100644
--- a/libgcc/config/rs6000/t-float128-hw
+++ b/libgcc/config/rs6000/t-float128-hw
@@ -23,8 +23,7 @@ fp128_ifunc_obj   = $(fp128_ifunc_static_obj) 
$(fp128_ifunc_shared_obj)
 fp128_sed_hw   = -hw
 
 # Build the hardware support functions with appropriate hardware support
-FP128_CFLAGS_HW = -Wno-type-limits -mvsx -mfloat128 \
-  -mcpu=power9 \
+FP128_CFLAGS_HW = -Wno-type-limits -mfloat128 -mcpu=power9 \
   -mfloat128-hardware -mno-gnu-attribute \
   -I$(srcdir)/soft-fp \
   -I$(srcdir)/config/rs6000 \

[gcc(refs/users/meissner/heads/work171-bugs)] Do not add -mvsx when building or testing the float128 support.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:f36f5b39fac1029515774924f25b837b68fe2a0f

commit f36f5b39fac1029515774924f25b837b68fe2a0f
Author: Michael Meissner 
Date:   Tue Jul 16 12:01:08 2024 -0400

Do not add -mvsx when building or testing the float128 support.

In the past, we would add -mvsx when building the float128 support in 
libgcc.
This allowed us to build the float128 support on a big endian system where 
the
default cpu is power4.  While the libgcc support can be built, given there 
is no
glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

This patch changes the GCC tests so that it will only do the IEEE 128-bit 
tests
if the default compiler enables the VSX instruction set by default.  
Otherwise
all of the float128 tests will fail because the libgcc support is not 
available.

2024-07-16 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/abs128-1.c: Remove -mvsx option.  Add explicit
check for the float128 support.
* gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
* gcc.target/powerpc/copysign128-1.c: Likewise.
* gcc.target/powerpc/divkc3-1.c: Likewise.
* gcc.target/powerpc/float128-3.c: Likewise.
* gcc.target/powerpc/float128-5.c: Likewise.
* gcc.target/powerpc/float128-complex-2.: Likewise.
* gcc.target/powerpc/float128-math.: Likewise.
* gcc.target/powerpc/inf128-1.: Likewise.
* gcc.target/powerpc/mulkc3-1.c: Likewise.
* gcc.target/powerpc/nan128-1.c: Likewise.
* gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
* gcc.target/powerpc/pr104253.: Likewise.
* gcc.target/powerpc/pr70669.c: Likewise.
* gcc.target/powerpc/pr79004.c: Likewise.
* gcc.target/powerpc/pr79038-1.c: Likewise.
* gcc.target/powerpc/pr81959.c: Likewise.
* gcc.target/powerpc/pr85657-1.: Likewise.
* gcc.target/powerpc/pr85657-2.c: Likewise.
* gcc.target/powerpc/pr99708.: Likewise.
* gcc.target/powerpc/signbit-1.c: Likewise.
* gcc.target/powerpc/signbit-2.c: Likewise.
* lib/target-supports.exp (check_ppc_float128_sw_available): 
Likewise.
(check_ppc_float128_hw_available): Likewise.
(add_options_for___float128): Likewise.
(check_effective_target___float128): Likewise.
(check_effective_target_base_quadfloat_support): Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c |  1 +
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c|  3 ++-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-3.c   |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-5.c   |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-complex-2.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-math.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/inf128-1.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/mulkc3-1.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/nan128-1.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c  |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr104253.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr70669.c  |  3 ++-
 gcc/testsuite/gcc.target/powerpc/pr79004.c  |  4 ++--
 gcc/testsuite/gcc.target/powerpc/pr79038-1.c|  4 ++--
 gcc/testsuite/gcc.target/powerpc/pr81959.c  |  3 ++-
 gcc/testsuite/gcc.target/powerpc/pr85657-1.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/pr85657-2.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99708.c  |  2 +-
 gcc/testsuite/gcc.target/powerpc/signbit-1.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/signbit-2.c|  2 +-
 gcc/testsuite/lib/target-supports.exp   | 10 +-
 23 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/abs128-1.c 
b/gcc/testsuite/gcc.target/powerpc/abs128-1.c
index fe5206daff8c..ee4c1aa24747 100644
--- a/gcc/testsuite/gcc.target/powerpc/abs128-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/abs128-1.c
@@ -1,5 +1,6 @@
 /* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
-/* { dg-options "-mfloat128 -mvsx" } */
+/* { dg-options "-mfloat128" } */
+/* { dg-require-effective-target ppc_float128_sw } */
 
 void ab

[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:1461a33efbfbdff7fe805581db5bf1bc9f3e00f2

commit 1461a33efbfbdff7fe805581db5bf1bc9f3e00f2
Author: Michael Meissner 
Date:   Tue Jul 16 12:07:26 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 88 ++
 1 file changed, 88 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index e2530576840c..1c112f0c0ed6 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,3 +1,91 @@
+ Branch work171-bugs, patch #331 
+
+Do not add -mvsx when testing the float128 support.
+
+In the past, we would add -mvsx when building the float128 support in libgcc.
+This allowed us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is 
no
+glibc support for float128 available.
+
+However, adding -mvsx and building the libgcc float128 support causes problems
+if you set the default cpu to something like a 7540, which does not have VSX
+support.  The assembler complains that when the code does a ".machine 7450", 
you
+cannot use VSX instructions.
+
+This patch changes the GCC tests so that it will only do the IEEE 128-bit tests
+if the default compiler enables the VSX instruction set by default.  Otherwise
+all of the float128 tests will fail because the libgcc support is not 
available.
+
+2024-07-16 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/abs128-1.c: Remove -mvsx option.  Add explicit
+   check for the float128 support.
+   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
+   * gcc.target/powerpc/copysign128-1.c: Likewise.
+   * gcc.target/powerpc/divkc3-1.c: Likewise.
+   * gcc.target/powerpc/float128-3.c: Likewise.
+   * gcc.target/powerpc/float128-5.c: Likewise.
+   * gcc.target/powerpc/float128-complex-2.: Likewise.
+   * gcc.target/powerpc/float128-math.: Likewise.
+   * gcc.target/powerpc/inf128-1.: Likewise.
+   * gcc.target/powerpc/mulkc3-1.c: Likewise.
+   * gcc.target/powerpc/nan128-1.c: Likewise.
+   * gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
+   * gcc.target/powerpc/pr104253.: Likewise.
+   * gcc.target/powerpc/pr70669.c: Likewise.
+   * gcc.target/powerpc/pr79004.c: Likewise.
+   * gcc.target/powerpc/pr79038-1.c: Likewise.
+   * gcc.target/powerpc/pr81959.c: Likewise.
+   * gcc.target/powerpc/pr85657-1.: Likewise.
+   * gcc.target/powerpc/pr85657-2.c: Likewise.
+   * gcc.target/powerpc/pr99708.: Likewise.
+   * gcc.target/powerpc/signbit-1.c: Likewise.
+   * gcc.target/powerpc/signbit-2.c: Likewise.
+   * lib/target-supports.exp (check_ppc_float128_sw_available): Likewise.
+   (check_ppc_float128_hw_available): Likewise.
+   (add_options_for___float128): Likewise.
+   (check_effective_target___float128): Likewise.
+   (check_effective_target_base_quadfloat_support): Likewise.
+
+ Branch work171-bugs, patch #330 
+
+Do not add -mvsx when building libgcc float128 support.
+
+In the past, we would add -mvsx when building the float128 support in libgcc.
+This allowed us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is 
no
+glibc support for float128 available.
+
+However, adding -mvsx and building the libgcc float128 support causes problems
+if you set the default cpu to something like a 7540, which does not have VSX
+support.  The assembler complains that when the code does a ".machine 7450", 
you
+cannot use VSX instructions.
+
+With these patches, the float128 libgcc support is only built if the default
+compiler has VSX support.  If somebody wanted to enable the glibc support for
+big endian, they would need to set the base cpu to power8 to enable building 
the
+libgcc float128 libraries.
+
+2024-07-16 Michael Meissner  
+
+libgcc/
+
+   PR target/115800
+   PR target/113652
+   * config.host (powerpc*-*-linux*): Do not add t-float128-hw or
+   t-float128-p10-hw if the default compiler does not support float128.
+   * config/rs6000/t-float128 (FP128_CFLAGS_SW): Do not add -mvsx when
+   building the basic float128 support.
+   * config/rs6000/t-float128-hw (FP128_CFLAGS_HW): Likewise.
+   * config/rs6000/t-float128-p10-hw (FP128_3_1_CFLAGS_HW): Likewise.
+   * configure.ac (powerpc*-*-linux*): Do not add -mvsx when testing
+   whether to build the float128 support.
+   * configure: Regenerate.
+
  Branch work171-bugs, patch #325 was reverted 

  Branch work171-bugs, patch #324 was reverted 

  Branch work171-bugs, patch #323 was reverted 



[gcc(refs/users/matz/heads/x86-ssw)] x86: Implement separate shrink wrapping

2024-07-16 Thread Michael Matz via Gcc-cvs
https://gcc.gnu.org/g:86676836d6cb8289c53ff3dffcf8583505a7e0f5

commit 86676836d6cb8289c53ff3dffcf8583505a7e0f5
Author: Michael Matz 
Date:   Sun Jun 30 03:52:39 2024 +0200

x86: Implement separate shrink wrapping

this adds support for the infrastructure for shrink wrapping
separate components to the x86 target.  The components we track
are individual registers to save/restore and the frame allocation
itself.

There are various limitations where we give up:
* when the frame becomes too large
* when any complicated realignment is needed (DRAP or not)
* when the calling convention requires certain forms of
  pro- or epilogues (e.g. SEH on win64)
* when the function is "special" (uses eh_return and the like);
  most of that is already avoided by the generic infrastructure
  in shrink-wrap.cc
* when we must not use moves to save/restore registers for any reasons
  (stack checking being one notable one)
and so on.

For the last point we now differ between not being able to use moves
(then we disable separate shrink wrapping) and merely not wanting to use
moves (e.g. because push/pop is equally fast).  In the latter case we
don't disable separate shrink wrapping, but do use moves for those
functions where it does something.

Apart from that it's fairly straight forward: for components selected
by the infrastructure to be separately shrink-wrapped emit code to
save/restore them in the appropriate hook (for the frame-alloc
component to adjust the stack pointer), remember them, and don't emit
any code for those in the normal expand_prologue and expand_epilogue
expanders.  But as the x86 prologue and epilogue generators are quite
a twisty maze with many cases to deal with this also adds some aborts
and asserts for things that are unexpected.

The static instruction count of functions can increase (when
separate shrink wrapping emits some component sequences into multiple
block) and the instructions itself can become larger (moves vs.
push/pop), so there's a code size increase for functions where this
does something.  The dynamic insn count decreases for at least one
path through the function (and doesn't increase for others).

Two testcases need separate shrink wrapping disabled because they
check for specific generated assembly instruction counts and sequences
or specific messages in the pro_and_epilogue dump file, which turn out
different with separate shrink wrapping.

gcc/
* config/i386/i386.h (struct i86_frame.cannot_use_moves):
Add member.
(struct machine_function.ssw_min_reg,
ssw_max_reg, reg_wrapped_separately, frame_alloc_separately,
anything_separately): Add members.
* config/i386/i386.cc (ix86_compute_frame_layout): Split out
cannot_use_moves from save_regs_using_move computation.
(ix_86_emit_save_regs): Ensure not using this under separate
shrink wrapping.
(ix86_emit_save_regs_using_mov, ix86_emit_save_sse_regs_using_mov,
ix86_emit_restore_reg_using_pop, ix86_emit_restore_reg_using_pop2,
ix86_emit_restore_regs_using_pop): Don't handle separately shrink
wrapped components.
(ix86_expand_prologue): Handle separate shrink wrapping.
(ix86_emit_restore_reg_using_mov): New function, split out
from ...
(ix86_emit_restore_regs_using_mov): ... here and ...
(ix86_emit_restore_sse_regs_using_mov): ... here.
(ix86_expand_epilogue): Handle separate shrink wrapping.
(NCOMPONENTS, SW_FRAME): Add new defines.
(separate_frame_alloc_p, ix86_get_separate_components,
ix86_components_for_bb, ix86_disqualify_components,
ix86_init_frame_state, ix86_alloc_frame, ix86_dealloc_frame,
ix86_process_reg_components, ix86_emit_prologue_components,
ix86_emit_epilogue_components, ix86_set_handled_components):
Add new functions.
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS,
TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB,
TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define target hook
macros.

gcc/testsuite/
* gcc.dg/stack-check-5.c: Disable separate shrink wrapping.
* gcc.target/x86_64/abi/callabi/leaf-2.c: Ditto.

Diff:
---
 gcc/config/i386/i386.cc| 491 ++---
 gcc/config/i386/i386.h |   5 +
 gcc/testsuite/gcc.dg/stack-check-5.c   |   2 +-
 .../gcc.target/x86_64/abi/callabi/leaf-2.c |   2 +-
 4 files changed, 447 insertions(+

[gcc r15-2075] AVR: testsuite - Attribute ipa implies noinline and noclone.

2024-07-16 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:a3d1469c7c7e152fa7a5dbc95dbc6d1f3792bbd8

commit r15-2075-ga3d1469c7c7e152fa7a5dbc95dbc6d1f3792bbd8
Author: Georg-Johann Lay 
Date:   Tue Jul 16 19:53:24 2024 +0200

AVR: testsuite - Attribute ipa implies noinline and noclone.

gcc/testsuite/
* gcc.target/avr/isr-test.h: Attribute ipa implies noinline and 
noclone.
* gcc.target/avr/pr114981-powif.c: Same.
* gcc.target/avr/pr114981-powil.c: Same.
* gcc.target/avr/pr71676-1.c: Same.
* gcc.target/avr/pr71676-2.c: Same.
* gcc.target/avr/pr71676-3.c: Same.
* gcc.target/avr/pr71676.c: Same.
* gcc.target/avr/torture/add-extend.c: Same.
* gcc.target/avr/torture/fix-types.h: Same.
* gcc.target/avr/torture/fuse-add.c: Same.
* gcc.target/avr/torture/get-mem.c: Same.
* gcc.target/avr/torture/insv-anyshift-hi.c: Same.
* gcc.target/avr/torture/insv-anyshift-si.c: Same.
* gcc.target/avr/torture/isr-02-call.c: Same.
* gcc.target/avr/torture/isr-03-fixed.c: Same.
* gcc.target/avr/torture/pr109650-1.c: Same.
* gcc.target/avr/torture/pr109650-2.c: Same.
* gcc.target/avr/torture/pr109907-1.c: Same.
* gcc.target/avr/torture/pr109907-2.c: Same.
* gcc.target/avr/torture/pr114132-2.c: Same.
* gcc.target/avr/torture/pr39633.c: Same.
* gcc.target/avr/torture/pr51782-1.c: Same.
* gcc.target/avr/torture/pr61055.c: Same.
* gcc.target/avr/torture/pr61443.c: Same.
* gcc.target/avr/torture/pr64331.c: Same.
* gcc.target/avr/torture/pr77326.c: Same.
* gcc.target/avr/torture/pr83729.c: Same.
* gcc.target/avr/torture/pr83801.c: Same.
* gcc.target/avr/torture/pr87376.c: Same.
* gcc.target/avr/torture/pr88236-pr115726.c: Same.
* gcc.target/avr/torture/pr92606.c: Same.
* gcc.target/avr/torture/pr98762.c: Same.
* gcc.target/avr/torture/sat-hr-plus-minus.c: Same.
* gcc.target/avr/torture/sat-k-plus-minus.c: Same.
* gcc.target/avr/torture/sat-llk-plus-minus.c: Same.
* gcc.target/avr/torture/sat-r-plus-minus.c: Same.
* gcc.target/avr/torture/sat-uhr-plus-minus.c: Same.
* gcc.target/avr/torture/sat-uk-plus-minus.c: Same.
* gcc.target/avr/torture/sat-ullk-plus-minus.c: Same.
* gcc.target/avr/torture/sat-ur-plus-minus.c: Same.
* gcc.target/avr/torture/set-mem.c: Same.
* gcc.target/avr/torture/sub-extend.c: Same.
* gcc.target/avr/torture/tiny-progmem.c: Same.

Diff:
---
 gcc/testsuite/gcc.target/avr/isr-test.h|  6 ++--
 gcc/testsuite/gcc.target/avr/pr114981-powif.c  |  2 +-
 gcc/testsuite/gcc.target/avr/pr114981-powil.c  |  2 +-
 gcc/testsuite/gcc.target/avr/pr71676-1.c   | 32 +++---
 gcc/testsuite/gcc.target/avr/pr71676-2.c   |  2 +-
 gcc/testsuite/gcc.target/avr/pr71676-3.c   |  2 +-
 gcc/testsuite/gcc.target/avr/pr71676.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/add-extend.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/fix-types.h   |  8 +++---
 gcc/testsuite/gcc.target/avr/torture/fuse-add.c|  4 +--
 gcc/testsuite/gcc.target/avr/torture/get-mem.c |  2 +-
 .../gcc.target/avr/torture/insv-anyshift-hi.c  |  6 ++--
 .../gcc.target/avr/torture/insv-anyshift-si.c  |  4 +--
 gcc/testsuite/gcc.target/avr/torture/isr-02-call.c |  2 +-
 .../gcc.target/avr/torture/isr-03-fixed.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr109650-1.c  |  4 +--
 gcc/testsuite/gcc.target/avr/torture/pr109650-2.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr109907-1.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr109907-2.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr114132-2.c  |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr39633.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr51782-1.c   |  8 +++---
 gcc/testsuite/gcc.target/avr/torture/pr61055.c |  8 +++---
 gcc/testsuite/gcc.target/avr/torture/pr61443.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr64331.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr77326.c |  4 +--
 gcc/testsuite/gcc.target/avr/torture/pr83729.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr83801.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr87376.c | 10 +++
 .../gcc.target/avr/torture/pr88236-pr115726.c  | 10 +++
 gcc/testsuite/gcc.target/avr/torture/pr92606.c |  2 +-
 gcc/testsuite/gcc.target/avr/torture/pr98762.c |  2 +-
 .../gcc.target/avr/torture/sat-hr-plus-minus.c | 16 +--
 .../gcc.target/avr/torture/sat-k-plus-minus.c  | 16 +--
 .../gcc.target/avr/torture/sat-llk-plus-minus.c| 16 +--
 .../gcc.target/avr/torture/sat-r-plus-minus.c  | 16 ++

[gcc r15-2076] c++/contracts: ICE in C++ Contracts with '-fno-exceptions' [PR 110159]

2024-07-16 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:40a990c8b512fd25bd7d7b45aa509e1880d77209

commit r15-2076-g40a990c8b512fd25bd7d7b45aa509e1880d77209
Author: Nina Ranns 
Date:   Thu Jul 11 17:47:34 2024 +0100

c++/contracts: ICE in C++ Contracts with '-fno-exceptions' [PR 110159]

We currently only initialise terminate_fn if exceptions are enabled.
However, contract handling requires terminate_fn when building the
contract because a contract failure may result in std::terminate call
regardless of whether the exceptions are enabled. Refactored
init_exception_processing to extract the initialisation of
terminate_fn. New function init_terminate_fn added that initialises
terminate_fn if it hasn't already been initialised. Call to terminate_fn
added in cxx_init_decl_processing if contracts are enabled.

PR c++/110159

gcc/cp/ChangeLog:

* cp-tree.h (init_terminate_fn): Declaration of a new function.
* decl.cc (cxx_init_decl_processing): If contracts are enabled,
call init_terminate_fn.
* except.cc (init_exception_processing): Function refactored to
call init_terminate_fn.
(init_terminate_fn): Added new function that initializes
terminate_fn if it hasn't already been initialised.

gcc/testsuite/ChangeLog:

* g++.dg/contracts/pr110159.C: New test.

Signed-off-by: Nina Ranns 

Diff:
---
 gcc/cp/cp-tree.h  |  1 +
 gcc/cp/decl.cc|  3 +++
 gcc/cp/except.cc  | 31 +++
 gcc/testsuite/g++.dg/contracts/pr110159.C | 27 +++
 4 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c1a371bc7218..c6f102564ce0 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7194,6 +7194,7 @@ extern void qualified_name_lookup_error   (tree, 
tree, tree,
 location_t);
 
 /* in except.cc */
+extern void init_terminate_fn  (void);
 extern void init_exception_processing  (void);
 extern tree expand_start_catch_block   (tree);
 extern void expand_end_catch_block (void);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index d64b993329dd..66e8a973cce5 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -5172,6 +5172,9 @@ cxx_init_decl_processing (void)
   if (flag_exceptions)
 init_exception_processing ();
 
+  if (flag_contracts)
+init_terminate_fn ();
+
   if (modules_p ())
 init_modules (parse_in);
 
diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc
index 1eb3ba53b4b5..3c69ab695028 100644
--- a/gcc/cp/except.cc
+++ b/gcc/cp/except.cc
@@ -42,6 +42,28 @@ static tree wrap_cleanups_r (tree *, int *, void *);
 static bool is_admissible_throw_operand_or_catch_parameter (tree, bool,
tsubst_flags_t);
 
+/* Initializes the node to std::terminate, which is used in exception
+  handling and contract handling.  */
+
+void
+init_terminate_fn (void)
+{
+  if (terminate_fn)
+return;
+
+  tree tmp;
+
+  push_nested_namespace (std_node);
+  tmp = build_function_type_list (void_type_node, NULL_TREE);
+  terminate_fn = build_cp_library_fn_ptr ("terminate", tmp,
+  ECF_NOTHROW | ECF_NORETURN
+  | ECF_COLD);
+  gcc_checking_assert (TREE_THIS_VOLATILE (terminate_fn)
+  && TREE_NOTHROW (terminate_fn));
+  pop_nested_namespace (std_node);
+
+}
+
 /* Sets up all the global eh stuff that needs to be initialized at the
start of compilation.  */
 
@@ -51,14 +73,7 @@ init_exception_processing (void)
   tree tmp;
 
   /* void std::terminate (); */
-  push_nested_namespace (std_node);
-  tmp = build_function_type_list (void_type_node, NULL_TREE);
-  terminate_fn = build_cp_library_fn_ptr ("terminate", tmp,
-  ECF_NOTHROW | ECF_NORETURN
-  | ECF_COLD);
-  gcc_checking_assert (TREE_THIS_VOLATILE (terminate_fn)
-  && TREE_NOTHROW (terminate_fn));
-  pop_nested_namespace (std_node);
+  init_terminate_fn ();
 
   /* void __cxa_call_unexpected(void *); */
   tmp = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
diff --git a/gcc/testsuite/g++.dg/contracts/pr110159.C 
b/gcc/testsuite/g++.dg/contracts/pr110159.C
new file mode 100644
index ..614b466b1a39
--- /dev/null
+++ b/gcc/testsuite/g++.dg/contracts/pr110159.C
@@ -0,0 +1,27 @@
+// check that contracts can be handled even when exceptions are disabled
+// { dg-do run }
+// { dg-options "-std=c++2a -fcontracts -fno-exceptions " }
+// { dg-output "contract violation in function f at .* a<5" }
+
+#include 
+#include 
+
+int terminate_called = 0;
+void my_term()
+{
+std::exit(0);
+}
+
+
+void f(int 

[gcc(refs/users/meissner/heads/work171-bugs)] Do not add -mvsx or -mfloat128 when testing the float128 support.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7b9d4c1b70a4690dcd91af5b756b1eccf06cc12d

commit 7b9d4c1b70a4690dcd91af5b756b1eccf06cc12d
Author: Michael Meissner 
Date:   Tue Jul 16 17:19:27 2024 -0400

Do not add -mvsx or -mfloat128 when testing the float128 support.

2024-07-16 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* lib/target-supports.exp (check_ppc_float128_sw_available): Do not 
add
-mfloat128 or -mfloat128-hardware.
(check_ppc_float128_hw_available): Likewise.
(check_effective_target_ppc_ieee128_ok): Likewise.
(add_options_for___float128): Likewise.
(check_effective_target_powerpc_float128_sw_ok): Likewise.
(check_effective_target_powerpc_float128_hw_ok): Likewise.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index bd8416a51886..beb8e2877e57 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2979,7 +2979,6 @@ proc check_ppc_float128_sw_available { } {
 || [istarget *-*-darwin*]} {
expr 0
} else {
-   set options "-mfloat128"
check_runtime_nocache ppc_float128_sw_available {
volatile __float128 x = 1.0q;
volatile __float128 y = 2.0q;
@@ -2988,7 +2987,7 @@ proc check_ppc_float128_sw_available { } {
__float128 z = x + y;
return (z != 3.0q);
}
-   } $options
+   } ""
}
 }]
 }
@@ -3005,7 +3004,6 @@ proc check_ppc_float128_hw_available { } {
 || [istarget *-*-darwin*]} {
expr 0
} else {
-   set options "-mfloat128 -mfloat128-hardware -mcpu=power9"
check_runtime_nocache ppc_float128_hw_available {
volatile __float128 x = 1.0q;
volatile __float128 y = 2.0q;
@@ -3017,7 +3015,7 @@ proc check_ppc_float128_hw_available { } {
__asm__ ("xsaddqp %0,%1,%2" : "+v" (w) : "v" (x), "v" (y));
return ((z != 3.0q) || (z != w));
}
-   } $options
+   } ""
}
 }]
 }
@@ -3030,14 +3028,13 @@ proc check_effective_target_ppc_ieee128_ok { } {
 || [istarget *-*-vxworks*]} {
expr 0
} else {
-   set options "-mfloat128"
check_runtime_nocache ppc_ieee128_ok {
int main()
{
  __ieee128 a;
  return 0;
}
-   } $options
+   } ""
}
 }]
 }
@@ -3946,9 +3943,6 @@ proc check_effective_target___float128 { } {
 }
 
 proc add_options_for___float128 { flags } {
-if { [istarget powerpc*-*-linux*] } {
-   return "$flags -mfloat128"
-}
 return "$flags"
 }
 
@@ -7217,8 +7211,9 @@ proc check_effective_target_power10_ok { } {
 }
 }
 
-# Return 1 if this is a PowerPC target supporting -mfloat128 via either
-# software emulation on power7/power8 systems or hardware support on power9.
+# Return 1 if this is a PowerPC target supporting IEEE 128-bit floating point
+# via either software emulation on power7/power8 systems or hardware support on
+# power9.
 
 proc check_effective_target_powerpc_float128_sw_ok { } {
 if { [istarget powerpc*-*-*]
@@ -7234,14 +7229,14 @@ proc check_effective_target_powerpc_float128_sw_ok { } {
__float128 z = x + y;
return (z == 3.0q);
}
-   } "-mfloat128"]
+   } ""]
 } else {
return 0
 }
 }
 
-# Return 1 if this is a PowerPC target supporting -mfloat128 via hardware
-# support on power9.
+# Return 1 if this is a PowerPC target supporting IEEE 128-bit floating point
+# via hardware support on power9 and later systems.
 
 proc check_effective_target_powerpc_float128_hw_ok { } {
 if { [istarget powerpc*-*-*]
@@ -7258,7 +7253,7 @@ proc check_effective_target_powerpc_float128_hw_ok { } {
__asm__ ("xsaddqp %0,%1,%2" : "=v" (z) : "v" (x), "v" (y));
return (z == 3.0q);
}
-   } "-mfloat128-hardware"]
+   } ""]
 } else {
return 0
 }


[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:63a24f668df0d510b8ddfb661ca29f1226422e4e

commit 63a24f668df0d510b8ddfb661ca29f1226422e4e
Author: Michael Meissner 
Date:   Tue Jul 16 17:21:15 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 1c112f0c0ed6..4d89e1bb665b 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,3 +1,21 @@
+ Branch work171-bugs, patch #332 
+
+Do not add -mvsx or -mfloat128 when testing the float128 support.
+
+2024-07-16 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * lib/target-supports.exp (check_ppc_float128_sw_available): Do not add
+   -mfloat128 or -mfloat128-hardware.
+   (check_ppc_float128_hw_available): Likewise.
+   (check_effective_target_ppc_ieee128_ok): Likewise.
+   (add_options_for___float128): Likewise.
+   (check_effective_target_powerpc_float128_sw_ok): Likewise.
+   (check_effective_target_powerpc_float128_hw_ok): Likewise.
+
  Branch work171-bugs, patch #331 
 
 Do not add -mvsx when testing the float128 support.


[gcc(refs/users/meissner/heads/work171-bugs)] Remove -mfloat128 on pr99708.c

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:811742d6a0da4455ae57225a927f9f85121f0655

commit 811742d6a0da4455ae57225a927f9f85121f0655
Author: Michael Meissner 
Date:   Tue Jul 16 19:37:14 2024 -0400

Remove -mfloat128 on pr99708.c

2024-07-16 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/pr99708.c: Remove -mfloat128 option.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr99708.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr99708.c 
b/gcc/testsuite/gcc.target/powerpc/pr99708.c
index 8a3fd0aa0b51..a81dab92fd4c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr99708.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr99708.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 /* { dg-skip-if "" { powerpc*-*-darwin* powerpc-ibm-aix* } } */
 /* { require-effective-target ppc_float128_sw } */
-/* { dg-options "-O2 -mfloat128" } */
+/* { dg-options "-O2" } */
 
 /*
  * PR target/99708


[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7df008e34eff29d4a2834cfe531ec870114daee9

commit 7df008e34eff29d4a2834cfe531ec870114daee9
Author: Michael Meissner 
Date:   Tue Jul 16 19:38:34 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 4d89e1bb665b..c2ab4905fe51 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,3 +1,15 @@
+ Branch work171-bugs, patch #333 
+
+Remove -mfloat128 on pr99708.c
+
+2024-07-16 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/pr99708.c: Remove -mfloat128 option.
+
  Branch work171-bugs, patch #332 
 
 Do not add -mvsx or -mfloat128 when testing the float128 support.


[gcc r15-2077] range-ops should return the requested boolean type.

2024-07-16 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:73a8286d3ae266955fa921da1fa1328a587e7bb7

commit r15-2077-g73a8286d3ae266955fa921da1fa1328a587e7bb7
Author: Andrew MacLeod 
Date:   Tue Jul 16 12:38:11 2024 -0400

range-ops should return the requested boolean type.

The pointer based relation operator's fold_range () routines should
return a boolean range with the requested type, not the default type.

PR tree-optimization/115951
* range-op-ptr.cc (operator_equal::fold_range): Return a boolean
range with the requested type.
(operator_not_equal::fold_range): Likewise.
(operator_lt::fold_range): Likewise.
(operator_le::fold_range): Likewise.
(operator_gt::fold_range): Likewise.
(operator_ge::fold_range): Likewise.

Diff:
---
 gcc/range-op-ptr.cc | 50 +-
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/gcc/range-op-ptr.cc b/gcc/range-op-ptr.cc
index 1f41236e7107..24e206c00cdd 100644
--- a/gcc/range-op-ptr.cc
+++ b/gcc/range-op-ptr.cc
@@ -977,9 +977,9 @@ operator_equal::fold_range (irange &r, tree type,
   if (op1_const && op2_const)
 {
   if (wi::eq_p (op1.lower_bound (), op2.upper_bound()))
-   r = range_true ();
+   r = range_true (type);
   else
-   r = range_false ();
+   r = range_false (type);
 }
   else
 {
@@ -988,14 +988,14 @@ operator_equal::fold_range (irange &r, tree type,
   prange tmp = op1;
   tmp.intersect (op2);
   if (tmp.undefined_p ())
-   r = range_false ();
+   r = range_false (type);
   // Check if a constant cannot satisfy the bitmask requirements.
   else if (op2_const && !op1.get_bitmask ().member_p (op2.lower_bound ()))
-r = range_false ();
+r = range_false (type);
   else if (op1_const && !op2.get_bitmask ().member_p (op1.lower_bound ()))
-r = range_false ();
+r = range_false (type);
   else
-   r = range_true_and_false ();
+   r = range_true_and_false (type);
 }
 
   //update_known_bitmask (r, EQ_EXPR, op1, op2);
@@ -1076,9 +1076,9 @@ operator_not_equal::fold_range (irange &r, tree type,
   if (op1_const && op2_const)
 {
   if (wi::ne_p (op1.lower_bound (), op2.upper_bound()))
-   r = range_true ();
+   r = range_true (type);
   else
-   r = range_false ();
+   r = range_false (type);
 }
   else
 {
@@ -1087,14 +1087,14 @@ operator_not_equal::fold_range (irange &r, tree type,
   prange tmp = op1;
   tmp.intersect (op2);
   if (tmp.undefined_p ())
-   r = range_true ();
+   r = range_true (type);
   // Check if a constant cannot satisfy the bitmask requirements.
   else if (op2_const && !op1.get_bitmask ().member_p (op2.lower_bound ()))
-r = range_true ();
+r = range_true (type);
   else if (op1_const && !op2.get_bitmask ().member_p (op1.lower_bound ()))
-r = range_true ();
+r = range_true (type);
   else
-   r = range_true_and_false ();
+   r = range_true_and_false (type);
 }
 
   //update_known_bitmask (r, NE_EXPR, op1, op2);
@@ -1173,14 +1173,14 @@ operator_lt::fold_range (irange &r, tree type,
   gcc_checking_assert (sign == TYPE_SIGN (op2.type ()));
 
   if (wi::lt_p (op1.upper_bound (), op2.lower_bound (), sign))
-r = range_true ();
+r = range_true (type);
   else if (!wi::lt_p (op1.lower_bound (), op2.upper_bound (), sign))
-r = range_false ();
+r = range_false (type);
   // Use nonzero bits to determine if < 0 is false.
   else if (op2.zero_p () && !wi::neg_p (op1.get_nonzero_bits (), sign))
-r = range_false ();
+r = range_false (type);
   else
-r = range_true_and_false ();
+r = range_true_and_false (type);
 
   //update_known_bitmask (r, LT_EXPR, op1, op2);
   return true;
@@ -1266,11 +1266,11 @@ operator_le::fold_range (irange &r, tree type,
   gcc_checking_assert (sign == TYPE_SIGN (op2.type ()));
 
   if (wi::le_p (op1.upper_bound (), op2.lower_bound (), sign))
-r = range_true ();
+r = range_true (type);
   else if (!wi::le_p (op1.lower_bound (), op2.upper_bound (), sign))
-r = range_false ();
+r = range_false (type);
   else
-r = range_true_and_false ();
+r = range_true_and_false (type);
 
   //update_known_bitmask (r, LE_EXPR, op1, op2);
   return true;
@@ -1355,11 +1355,11 @@ operator_gt::fold_range (irange &r, tree type,
   gcc_checking_assert (sign == TYPE_SIGN (op2.type ()));
 
   if (wi::gt_p (op1.lower_bound (), op2.upper_bound (), sign))
-r = range_true ();
+r = range_true (type);
   else if (!wi::gt_p (op1.upper_bound (), op2.lower_bound (), sign))
-r = range_false ();
+r = range_false (type);
   else
-r = range_true_and_false ();
+r = range_true_and_false (type);
 
   //update_known_bitmask (r, GT_EXPR, op1, op2);
   return true;
@@ -1444,11 +1444,11 @@ operator_ge::fold_range (irange &r, tree type,
   gcc

[gcc r15-2079] c++/modules: Propagate BINDING_VECTOR_*_DUPS_P on realloc [PR99242]

2024-07-16 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:1aa0f1627857c3e2d90982bdb07ca78ca10b26f3

commit r15-2079-g1aa0f1627857c3e2d90982bdb07ca78ca10b26f3
Author: Nathaniel Shead 
Date:   Mon Jul 8 22:25:17 2024 +1000

c++/modules: Propagate BINDING_VECTOR_*_DUPS_P on realloc [PR99242]

When importing modules, when a binding vector for a name runs out of
slots it gets reallocated with a larger size, and existing bindings are
copied across.  However, the flags to indicate whether deduping needs to
occur did not: this causes ICEs, as it allows a duplicate binding to be
added which then violates assumptions later on.

PR c++/99242

gcc/cp/ChangeLog:

* name-lookup.cc (append_imported_binding_slot): Propagate dups
flags.

gcc/testsuite/ChangeLog:

* g++.dg/modules/pr99242_a.H: New test.
* g++.dg/modules/pr99242_b.H: New test.
* g++.dg/modules/pr99242_c.H: New test.
* g++.dg/modules/pr99242_d.C: New test.

Signed-off-by: Nathaniel Shead 

Diff:
---
 gcc/cp/name-lookup.cc| 4 
 gcc/testsuite/g++.dg/modules/pr99242_a.H | 3 +++
 gcc/testsuite/g++.dg/modules/pr99242_b.H | 3 +++
 gcc/testsuite/g++.dg/modules/pr99242_c.H | 3 +++
 gcc/testsuite/g++.dg/modules/pr99242_d.C | 7 +++
 5 files changed, 20 insertions(+)

diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc
index 361dc3d953d1..8823ab71c600 100644
--- a/gcc/cp/name-lookup.cc
+++ b/gcc/cp/name-lookup.cc
@@ -353,6 +353,10 @@ append_imported_binding_slot (tree *slot, tree name, 
unsigned ix)
 
   tree new_vec = make_binding_vec (name, want);
   BINDING_VECTOR_NUM_CLUSTERS (new_vec) = have + 1;
+  BINDING_VECTOR_GLOBAL_DUPS_P (new_vec)
+   = BINDING_VECTOR_GLOBAL_DUPS_P (*slot);
+  BINDING_VECTOR_PARTITION_DUPS_P (new_vec)
+   = BINDING_VECTOR_PARTITION_DUPS_P (*slot);
   memcpy (BINDING_VECTOR_CLUSTER_BASE (new_vec),
  BINDING_VECTOR_CLUSTER_BASE (*slot),
  have * sizeof (binding_cluster));
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_a.H 
b/gcc/testsuite/g++.dg/modules/pr99242_a.H
new file mode 100644
index ..2df0b4184ab3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_a.H
@@ -0,0 +1,3 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+bool __is_constant_evaluated();
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_b.H 
b/gcc/testsuite/g++.dg/modules/pr99242_b.H
new file mode 100644
index ..2df0b4184ab3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_b.H
@@ -0,0 +1,3 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+bool __is_constant_evaluated();
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_c.H 
b/gcc/testsuite/g++.dg/modules/pr99242_c.H
new file mode 100644
index ..2df0b4184ab3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_c.H
@@ -0,0 +1,3 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+bool __is_constant_evaluated();
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_d.C 
b/gcc/testsuite/g++.dg/modules/pr99242_d.C
new file mode 100644
index ..1046d1af9846
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_d.C
@@ -0,0 +1,7 @@
+// { dg-additional-options "-fmodules-ts" }
+bool __is_constant_evaluated();
+import "pr99242_a.H";
+void f() { __is_constant_evaluated(); }
+import "pr99242_b.H";
+import "pr99242_c.H";
+void g() { __is_constant_evaluated(); }


[gcc r15-2080] rs6000: ROP - Emit hashst and hashchk insns on Power8 and later [PR114759]

2024-07-16 Thread Peter Bergner via Gcc-cvs
https://gcc.gnu.org/g:a05c3d23d1e1c8d2971b123804fc7a61a3561adb

commit r15-2080-ga05c3d23d1e1c8d2971b123804fc7a61a3561adb
Author: Peter Bergner 
Date:   Wed Jun 19 16:07:29 2024 -0500

rs6000: ROP - Emit hashst and hashchk insns on Power8 and later [PR114759]

We currently only emit the ROP-protect hash* insns for Power10, where the
insns were added to the architecture.  We want to emit them for earlier
cpus (where they operate as NOPs), so that if those older binaries are
ever executed on a Power10, then they'll be protected from ROP attacks.
Binutils accepts hashst and hashchk back to Power8, so change GCC to emit
them for Power8 and later.  This matches clang's behavior.

2024-06-19  Peter Bergner  

gcc/
PR target/114759
* config/rs6000/rs6000-logue.cc (rs6000_stack_info): Use 
TARGET_POWER8.
(rs6000_emit_prologue): Likewise.
* config/rs6000/rs6000.md (hashchk): Likewise.
(hashst): Likewise.
Fix whitespace.

gcc/testsuite/
PR target/114759
* gcc.target/powerpc/pr114759-2.c: New test.
* lib/target-supports.exp (rop_ok): Use
check_effective_target_has_arch_pwr8.

Diff:
---
 gcc/config/rs6000/rs6000-logue.cc |  6 +++---
 gcc/config/rs6000/rs6000.md   |  6 +++---
 gcc/testsuite/gcc.target/powerpc/pr114759-2.c | 17 +
 gcc/testsuite/lib/target-supports.exp |  2 +-
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index 193e2122c0f9..48f406330714 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -716,7 +716,7 @@ rs6000_stack_info (void)
   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
   info->rop_hash_size = 0;
 
-  if (TARGET_POWER10
+  if (TARGET_POWER8
   && info->calls_p
   && DEFAULT_ABI == ABI_ELFv2
   && rs6000_rop_protect)
@@ -3272,7 +3272,7 @@ rs6000_emit_prologue (void)
   /* NOTE: The hashst isn't needed if we're going to do a sibcall,
  but there's no way to know that here.  Harmless except for
  performance, of course.  */
-  if (TARGET_POWER10 && rs6000_rop_protect && info->rop_hash_size != 0)
+  if (TARGET_POWER8 && rs6000_rop_protect && info->rop_hash_size != 0)
 {
   gcc_assert (DEFAULT_ABI == ABI_ELFv2);
   rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
@@ -5051,7 +5051,7 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type)
 
   /* The ROP hash check must occur after the stack pointer is restored
  (since the hash involves r1), and is not performed for a sibcall.  */
-  if (TARGET_POWER10
+  if (TARGET_POWER8
   && rs6000_rop_protect
   && info->rop_hash_size != 0
   && epilogue_type != EPILOGUE_TYPE_SIBCALL)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 276a5c9cf2d3..679aac5a10d5 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -15808,9 +15808,9 @@
 
 (define_insn "hashst"
   [(set (match_operand:DI 0 "simple_offsettable_mem_operand" "=m")
-(unspec_volatile:DI [(match_operand:DI 1 "int_reg_operand" "r")]
+   (unspec_volatile:DI [(match_operand:DI 1 "int_reg_operand" "r")]
UNSPEC_HASHST))]
-  "TARGET_POWER10 && rs6000_rop_protect"
+  "TARGET_POWER8 && rs6000_rop_protect"
 {
   static char templ[32];
   const char *p = rs6000_privileged ? "p" : "";
@@ -15823,7 +15823,7 @@
   [(unspec_volatile [(match_operand:DI 0 "int_reg_operand" "r")
 (match_operand:DI 1 "simple_offsettable_mem_operand" "m")]
UNSPEC_HASHCHK)]
-  "TARGET_POWER10 && rs6000_rop_protect"
+  "TARGET_POWER8 && rs6000_rop_protect"
 {
   static char templ[32];
   const char *p = rs6000_privileged ? "p" : "";
diff --git a/gcc/testsuite/gcc.target/powerpc/pr114759-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr114759-2.c
new file mode 100644
index ..3881ebd416e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr114759-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mrop-protect" } */
+/* { dg-require-effective-target rop_ok } Only enable on supported ABIs.  */
+
+/* Verify we generate ROP-protect hash insns when compiling for Power8.  */
+
+extern void foo (void);
+
+int
+bar (void)
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler-times {\mhashst\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mhashchk\M} 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index f001c28072f6..daa0c75d2bc3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7452,7 +7452,7 @@ proc check_effective_target_powerpc_elfv2 { } {
 # Return 1 if this is a PowerPC target supporting -mrop-protect
 
 proc check_effective_

[gcc r15-2081] rs6000: Error on CPUs and ABIs that don't support the ROP protection insns [PR114759]

2024-07-16 Thread Peter Bergner via Gcc-cvs
https://gcc.gnu.org/g:6f2bab9b5d1ce1914c748b7dcd8638dafaa98df7

commit r15-2081-g6f2bab9b5d1ce1914c748b7dcd8638dafaa98df7
Author: Peter Bergner 
Date:   Mon Jul 15 16:57:32 2024 -0500

rs6000: Error on CPUs and ABIs that don't support the ROP protection insns 
[PR114759]

We currently silently ignore the -mrop-protect option for old CPUs we don't
support with the ROP hash insns, but we throw an error for unsupported ABIs.
This patch treats unsupported CPUs and ABIs similarly by throwing an error
both both.  This matches clang behavior and allows us to simplify our tests
in the code that generates our prologue and epilogue code.

2024-06-26  Peter Bergner  

gcc/
PR target/114759
* config/rs6000/rs6000.cc (rs6000_option_override_internal): 
Disallow
CPUs and ABIs that do no support the ROP protection insns.
* config/rs6000/rs6000-logue.cc (rs6000_stack_info): Remove now
unneeded tests.
(rs6000_emit_prologue): Likewise.
Remove unneeded gcc_assert.
(rs6000_emit_epilogue): Likewise.
* config/rs6000/rs6000.md: Likewise.

gcc/testsuite/
PR target/114759
* gcc.target/powerpc/pr114759-3.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-logue.cc | 22 ++
 gcc/config/rs6000/rs6000.cc   | 12 
 gcc/config/rs6000/rs6000.md   |  4 ++--
 gcc/testsuite/gcc.target/powerpc/pr114759-3.c | 19 +++
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index 48f406330714..edc0d6c8f520 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -716,17 +716,11 @@ rs6000_stack_info (void)
   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
   info->rop_hash_size = 0;
 
-  if (TARGET_POWER8
-  && info->calls_p
-  && DEFAULT_ABI == ABI_ELFv2
-  && rs6000_rop_protect)
+  /* If we want ROP protection and this function makes a call, indicate
+ we need to create a stack slot to save the hashed return address in.  */
+  if (rs6000_rop_protect
+  && info->calls_p)
 info->rop_hash_size = 8;
-  else if (rs6000_rop_protect && DEFAULT_ABI != ABI_ELFv2)
-{
-  /* We can't check this in rs6000_option_override_internal since
-DEFAULT_ABI isn't established yet.  */
-  error ("%qs requires the ELFv2 ABI", "-mrop-protect");
-}
 
   /* Determine if we need to save the condition code registers.  */
   if (save_reg_p (CR2_REGNO)
@@ -3272,9 +3266,8 @@ rs6000_emit_prologue (void)
   /* NOTE: The hashst isn't needed if we're going to do a sibcall,
  but there's no way to know that here.  Harmless except for
  performance, of course.  */
-  if (TARGET_POWER8 && rs6000_rop_protect && info->rop_hash_size != 0)
+  if (info->rop_hash_size)
 {
-  gcc_assert (DEFAULT_ABI == ABI_ELFv2);
   rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
   rtx addr = gen_rtx_PLUS (Pmode, stack_ptr,
   GEN_INT (info->rop_hash_save_offset));
@@ -5051,12 +5044,9 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type)
 
   /* The ROP hash check must occur after the stack pointer is restored
  (since the hash involves r1), and is not performed for a sibcall.  */
-  if (TARGET_POWER8
-  && rs6000_rop_protect
-  && info->rop_hash_size != 0
+  if (info->rop_hash_size
   && epilogue_type != EPILOGUE_TYPE_SIBCALL)
 {
-  gcc_assert (DEFAULT_ABI == ABI_ELFv2);
   rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
   rtx addr = gen_rtx_PLUS (Pmode, stack_ptr,
   GEN_INT (info->rop_hash_save_offset));
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 195f2af9062e..d4eec5c472f8 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -4844,6 +4844,18 @@ rs6000_option_override_internal (bool global_init_p)
}
 }
 
+  /* We only support ROP protection on certain targets.  */
+  if (rs6000_rop_protect)
+{
+  /* Disallow CPU targets we don't support.  */
+  if (!TARGET_POWER8)
+   error ("%<-mrop-protect%> requires %<-mcpu=power8%> or later");
+
+  /* Disallow ABI targets we don't support.  */
+  if (DEFAULT_ABI != ABI_ELFv2)
+   error ("%<-mrop-protect%> requires the ELFv2 ABI");
+}
+
   /* Initialize all of the registers.  */
   rs6000_init_hard_regno_mode_ok (global_init_p);
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 679aac5a10d5..45a4a8cfb907 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -15810,7 +15810,7 @@
   [(set (match_operand:DI 0 "simple_offsettable_mem_operand" "=m")
(unspec_volatile:DI [(match_operand:DI 1 "int_reg_operand" "r")]
  

[gcc r14-10442] c++/modules: Propagate BINDING_VECTOR_*_DUPS_P on realloc [PR99242]

2024-07-16 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:5fad0b552c5851fb6ae6eb3616e50cc25af1391d

commit r14-10442-g5fad0b552c5851fb6ae6eb3616e50cc25af1391d
Author: Nathaniel Shead 
Date:   Mon Jul 8 22:25:17 2024 +1000

c++/modules: Propagate BINDING_VECTOR_*_DUPS_P on realloc [PR99242]

When importing modules, when a binding vector for a name runs out of
slots it gets reallocated with a larger size, and existing bindings are
copied across.  However, the flags to indicate whether deduping needs to
occur did not: this causes ICEs, as it allows a duplicate binding to be
added which then violates assumptions later on.

PR c++/99242

gcc/cp/ChangeLog:

* name-lookup.cc (append_imported_binding_slot): Propagate dups
flags.

gcc/testsuite/ChangeLog:

* g++.dg/modules/pr99242_a.H: New test.
* g++.dg/modules/pr99242_b.H: New test.
* g++.dg/modules/pr99242_c.H: New test.
* g++.dg/modules/pr99242_d.C: New test.

Signed-off-by: Nathaniel Shead 
(cherry picked from commit 1aa0f1627857c3e2d90982bdb07ca78ca10b26f3)

Diff:
---
 gcc/cp/name-lookup.cc| 4 
 gcc/testsuite/g++.dg/modules/pr99242_a.H | 3 +++
 gcc/testsuite/g++.dg/modules/pr99242_b.H | 3 +++
 gcc/testsuite/g++.dg/modules/pr99242_c.H | 3 +++
 gcc/testsuite/g++.dg/modules/pr99242_d.C | 7 +++
 5 files changed, 20 insertions(+)

diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc
index 4dffc0e9acc8..b752598564a3 100644
--- a/gcc/cp/name-lookup.cc
+++ b/gcc/cp/name-lookup.cc
@@ -352,6 +352,10 @@ append_imported_binding_slot (tree *slot, tree name, 
unsigned ix)
 
   tree new_vec = make_binding_vec (name, want);
   BINDING_VECTOR_NUM_CLUSTERS (new_vec) = have + 1;
+  BINDING_VECTOR_GLOBAL_DUPS_P (new_vec)
+   = BINDING_VECTOR_GLOBAL_DUPS_P (*slot);
+  BINDING_VECTOR_PARTITION_DUPS_P (new_vec)
+   = BINDING_VECTOR_PARTITION_DUPS_P (*slot);
   memcpy (BINDING_VECTOR_CLUSTER_BASE (new_vec),
  BINDING_VECTOR_CLUSTER_BASE (*slot),
  have * sizeof (binding_cluster));
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_a.H 
b/gcc/testsuite/g++.dg/modules/pr99242_a.H
new file mode 100644
index ..2df0b4184ab3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_a.H
@@ -0,0 +1,3 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+bool __is_constant_evaluated();
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_b.H 
b/gcc/testsuite/g++.dg/modules/pr99242_b.H
new file mode 100644
index ..2df0b4184ab3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_b.H
@@ -0,0 +1,3 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+bool __is_constant_evaluated();
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_c.H 
b/gcc/testsuite/g++.dg/modules/pr99242_c.H
new file mode 100644
index ..2df0b4184ab3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_c.H
@@ -0,0 +1,3 @@
+// { dg-additional-options "-fmodule-header" }
+// { dg-module-cmi {} }
+bool __is_constant_evaluated();
diff --git a/gcc/testsuite/g++.dg/modules/pr99242_d.C 
b/gcc/testsuite/g++.dg/modules/pr99242_d.C
new file mode 100644
index ..1046d1af9846
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr99242_d.C
@@ -0,0 +1,7 @@
+// { dg-additional-options "-fmodules-ts" }
+bool __is_constant_evaluated();
+import "pr99242_a.H";
+void f() { __is_constant_evaluated(); }
+import "pr99242_b.H";
+import "pr99242_c.H";
+void g() { __is_constant_evaluated(); }


[gcc r15-2082] libbacktrace: update xcoff.c for base_address changes

2024-07-16 Thread Ian Lance Taylor via Gcc-cvs
https://gcc.gnu.org/g:f438299ef6860b8233ffe1c5fda7d63f2f6c56ae

commit r15-2082-gf438299ef6860b8233ffe1c5fda7d63f2f6c56ae
Author: Ian Lance Taylor 
Date:   Tue Jul 16 21:27:05 2024 -0700

libbacktrace: update xcoff.c for base_address changes

* xcoff.c (struct xcoff_fileline_data): Change base_address field
to struct libbacktrace_base_address.
(xcoff_initialize_syminfo): Change base_address to struct
libbacktrace_base_address.  Use libbacktrace_add_base.
(xcoff_initialize_fileline): Likewise.
(xcoff_lookup_pc): Use libbacktrace_add_base.
(xcoff_add): Change base_address to struct
libbacktrace_base_address.
(xcoff_armem_add, xcoff_add_shared_libs): Likewise.
(backtrace_initialize): Likewise.
* Makefile.am (xcoff.lo): Remove unused target.
(xcoff_32.lo, xcoff_64.lo): New targets.
* Makefile.in: Regenerate.

Diff:
---
 libbacktrace/Makefile.am |  3 ++-
 libbacktrace/Makefile.in |  3 ++-
 libbacktrace/xcoff.c | 46 +++---
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
index 8215cfd9bd5b..4a2bc038127e 100644
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -702,7 +702,8 @@ sort.lo: config.h backtrace.h internal.h
 stest.lo: config.h backtrace.h internal.h
 state.lo: config.h backtrace.h backtrace-supported.h internal.h
 unknown.lo: config.h backtrace.h internal.h
-xcoff.lo: config.h backtrace.h internal.h
+xcoff_32.lo: config.h backtrace.h internal.h
+xcoff_64.lo: config.h backtrace.h internal.h
 xztest.lo: config.h backtrace.h backtrace-supported.h internal.h testlib.h
 ztest.lo: config.h backtrace.h backtrace-supported.h internal.h testlib.h
 
diff --git a/libbacktrace/Makefile.in b/libbacktrace/Makefile.in
index 7241e70002dd..7b61bbffe5df 100644
--- a/libbacktrace/Makefile.in
+++ b/libbacktrace/Makefile.in
@@ -2772,7 +2772,8 @@ sort.lo: config.h backtrace.h internal.h
 stest.lo: config.h backtrace.h internal.h
 state.lo: config.h backtrace.h backtrace-supported.h internal.h
 unknown.lo: config.h backtrace.h internal.h
-xcoff.lo: config.h backtrace.h internal.h
+xcoff_32.lo: config.h backtrace.h internal.h
+xcoff_64.lo: config.h backtrace.h internal.h
 xztest.lo: config.h backtrace.h backtrace-supported.h internal.h testlib.h
 ztest.lo: config.h backtrace.h backtrace-supported.h internal.h testlib.h
 
diff --git a/libbacktrace/xcoff.c b/libbacktrace/xcoff.c
index e22f15dd5cec..01443c48401b 100644
--- a/libbacktrace/xcoff.c
+++ b/libbacktrace/xcoff.c
@@ -385,7 +385,7 @@ struct xcoff_fileline_data
   size_t linenos_size;
   uint64_t lnnoptr0;
   /* Loader address.  */
-  uintptr_t base_address;
+  struct libbacktrace_base_address base_address;
 };
 
 /* Information we gather for the DWARF sections we care about.  */
@@ -586,7 +586,7 @@ xcoff_symname (const b_xcoff_syment *asym,
 
 static int
 xcoff_initialize_syminfo (struct backtrace_state *state,
- uintptr_t base_address,
+ struct libbacktrace_base_address base_address,
  const b_xcoff_syment *syms, size_t nsyms,
  const unsigned char *strtab, size_t strtab_size,
  backtrace_error_callback error_callback, void *data,
@@ -628,7 +628,8 @@ xcoff_initialize_syminfo (struct backtrace_state *state,
{
  const b_xcoff_auxent *aux = (const b_xcoff_auxent *) (asym + 1);
  xcoff_symbols[j].name = xcoff_symname (asym, strtab, strtab_size);
- xcoff_symbols[j].address = base_address + asym->n_value;
+ xcoff_symbols[j].address =
+   libbacktrace_add_base (asym->n_value, base_address);
  /* x_fsize will be 0 if there is no debug information.  */
  xcoff_symbols[j].size = aux->x_fcn.x_fsize;
  ++j;
@@ -766,7 +767,8 @@ xcoff_lookup_pc (struct backtrace_state *state 
ATTRIBUTE_UNUSED,
   lineno = (const b_xcoff_lineno *) lineptr;
   if (lineno->l_lnno == 0)
break;
-  if (pc <= fdata->base_address + lineno->l_addr.l_paddr)
+  if (pc <= libbacktrace_add_base (lineno->l_addr.l_paddr,
+  fdata->base_address))
break;
   match = lnnoptr;
   lnno = lineno->l_lnno;
@@ -860,7 +862,7 @@ xcoff_fileline (struct backtrace_state *state, uintptr_t pc,
 
 static int
 xcoff_initialize_fileline (struct backtrace_state *state,
-  uintptr_t base_address,
+  struct libbacktrace_base_address base_address,
   const b_xcoff_scnhdr *sects,
   const b_xcoff_syment *syms, size_t nsyms,
   const unsigned char *strtab, size_t strtab_size,
@@ -1001,7 +1003,7 @@ xcoff_initialize_fileline (struct backtrace_state *state,
fn->name

[gcc r15-2083] expr: Allow same precision modes conversion between {ibm_extended, ieee_quad}_format

2024-07-16 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:3f6e6d4b408a26f69816f18d88dde4d983677488

commit r15-2083-g3f6e6d4b408a26f69816f18d88dde4d983677488
Author: Kewen Lin 
Date:   Wed Jul 17 00:14:18 2024 -0500

expr: Allow same precision modes conversion between {ibm_extended, 
ieee_quad}_format

With some historical reasons, rs6000 defines KFmode, TFmode
and IFmode to have different mode precisions, but it causes
some issues and needs some workarounds such as PR112993.
So we are going to make all rs6000 128 bit scalar FP modes
have 128 bit precision.  Be prepared for that, this patch
is to make function convert_mode_scalar allow same precision
FP modes conversion if their underlying formats are
ibm_extended_format and ieee_quad_format respectively, just
like the existing special treatment on arm_bfloat_half_format
<-> ieee_half_format.  It also factors out all the relevant
checks into a lambda function.  Besides, similar to ieee fp16
-> bfloat conversion, it adopts trunc_optab rather than
sext_optab for ibm128 to ieee128 conversion.

PR target/112993

gcc/ChangeLog:

* expr.cc (convert_mode_scalar): Allow same precision conversion
between scalar floating point modes if whose underlying format is
ibm_extended_format or ieee_quad_format, and refactor assertion
with new lambda function acceptable_same_precision_modes.  Use
trunc_optab rather than sext_optab for ibm128 to ieee128 conversion.
* optabs-libfuncs.cc (gen_trunc_conv_libfunc): Use trunc_optab 
rather
than sext_optab for ibm128 to ieee128 conversion.

Diff:
---
 gcc/expr.cc| 39 ++-
 gcc/optabs-libfuncs.cc |  4 +++-
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ffbac5136923..2089c2b86a98 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -338,6 +338,29 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
   enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
  : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
 
+  auto acceptable_same_precision_modes
+= [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
+{
+  if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
+   return true;
+
+  /* arm_bfloat_half_format <-> ieee_half_format */
+  if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
+  && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
+ || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
+ && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
+   return true;
+
+  /* ibm_extended_format <-> ieee_quad_format */
+  if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
+  && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
+ || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
+ && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
+   return true;
+
+  return false;
+};
+
   if (to_real)
 {
   rtx value;
@@ -346,18 +369,16 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
 
   gcc_assert ((GET_MODE_PRECISION (from_mode)
   != GET_MODE_PRECISION (to_mode))
- || (DECIMAL_FLOAT_MODE_P (from_mode)
- != DECIMAL_FLOAT_MODE_P (to_mode))
- || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
- && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
- || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
- && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
+ || acceptable_same_precision_modes (from_mode, to_mode));
 
   if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
{
- if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
- && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
-   /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
+ if ((REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
+  && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
+ || (REAL_MODE_FORMAT (to_mode) == &ieee_quad_format
+ && REAL_MODE_FORMAT (from_mode) == &ibm_extended_format))
+   /* libgcc implements just __trunchfbf2, not __extendhfbf2;
+  and __trunctfkf2, not __extendtfkf2.  */
tab = trunc_optab;
  else
/* Conversion between decimal float and binary float, same
diff --git a/gcc/optabs-libfuncs.cc b/gcc/optabs-libfuncs.cc
index 26729910d92b..ab97eace80e5 100644
--- a/gcc/optabs-libfuncs.cc
+++ b/gcc/optabs-libfuncs.cc
@@ -591,7 +591,9 @@ gen_trunc_conv_libfunc (convert_optab tab,
 
   if (GET_MODE_PRECISION (float_fmode) <= GET_MODE_PRECISION (float_tmode)
   && (REAL_MODE_FORMAT (float_tmode) != &arm_bfloat_half_format

[gcc r15-2085] fortran: Teach get_real_kind_from_node for Power 128 fp modes [PR112993]

2024-07-16 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:de6969fd311307e34904fc1f85603a9d92938974

commit r15-2085-gde6969fd311307e34904fc1f85603a9d92938974
Author: Kewen Lin 
Date:   Wed Jul 17 00:16:59 2024 -0500

fortran: Teach get_real_kind_from_node for Power 128 fp modes [PR112993]

Previously effective target fortran_real_c_float128 never
passes on Power regardless of the default 128 long double
is ibmlongdouble or ieeelongdouble.  It's due to that TF
mode is always used for kind 16 real, which has precision
127, while the node float128_type_node for c_float128 has
128 type precision, get_real_kind_from_node can't find a
matching as it only checks gfc_real_kinds[i].mode_precision
and type precision.

With changing TFmode/IFmode/KFmode to have the same mode
precision 128, now fortran_real_c_float12 can pass with
ieeelongdouble enabled by default and test cases guarded
with it get tested accordingly.  But with ibmlongdouble
enabled by default, since TFmode has precision 128 which
is the same as type precision 128 of float128_type_node,
get_real_kind_from_node considers kind for TFmode matches
float128_type_node, but it's wrong as at this time point
TFmode is with ibm extended format.  So this patch is to
teach get_real_kind_from_node to check one more field which
can be differentiable from the underlying real format, it
can avoid the unexpected matching when there more than one
modes have the same precisoin.

PR target/112993

gcc/fortran/ChangeLog:

* trans-types.cc (get_real_kind_from_node): Consider the case where
more than one modes have the same precision.

Diff:
---
 gcc/fortran/trans-types.cc | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc
index 0ef67723fcd3..f7b80a9761c4 100644
--- a/gcc/fortran/trans-types.cc
+++ b/gcc/fortran/trans-types.cc
@@ -183,7 +183,21 @@ get_real_kind_from_node (tree type)
 
   for (i = 0; gfc_real_kinds[i].kind != 0; i++)
 if (gfc_real_kinds[i].mode_precision == TYPE_PRECISION (type))
-  return gfc_real_kinds[i].kind;
+  {
+   /* On Power, we have three 128-bit scalar floating-point modes
+  and all of their types have 128 bit type precision, so we
+  should check underlying real format details further.  */
+#if defined(HAVE_TFmode) && defined(HAVE_IFmode) && defined(HAVE_KFmode)
+   if (gfc_real_kinds[i].kind == 16)
+ {
+   machine_mode mode = TYPE_MODE (type);
+   const struct real_format *fmt = REAL_MODE_FORMAT (mode);
+   if (fmt->p != gfc_real_kinds[i].digits)
+ continue;
+ }
+#endif
+   return gfc_real_kinds[i].kind;
+  }
 
   return -4;
 }


[gcc r15-2084] rs6000: Make all 128 bit scalar FP modes have 128 bit precision [PR112993]

2024-07-16 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:33dca0a4c1c421625cedb2d6105ef1c05f6b774e

commit r15-2084-g33dca0a4c1c421625cedb2d6105ef1c05f6b774e
Author: Kewen Lin 
Date:   Wed Jul 17 00:14:43 2024 -0500

rs6000: Make all 128 bit scalar FP modes have 128 bit precision [PR112993]

On rs6000, there are three 128 bit scalar floating point
modes TFmode, IFmode and KFmode.  With some historical
reasons, we defines them with different mode precisions,
that is KFmode 126, TFmode 127 and IFmode 128.  But in
fact all of them should have the same mode precision 128,
this special setting has caused some issues like some
unexpected failures mentioned in [1] and also made us have
to introduce some workarounds, such as: the workaround in
build_common_tree_nodes for KFmode 126, the workaround in
range_compatible_p for same mode but different precision
issue.

This patch is to make these three 128 bit scalar floating
point modes TFmode, IFmode and KFmode have 128 bit mode
precision, and keep the order same as previous in order
to make machine independent parts of the compiler not try
to widen IFmode to TFmode.  Besides, build_common_tree_nodes
adopts the newly added hook mode_for_floating_type so we
don't need to worry about unexpected mode for long double
type node.

In function convert_mode_scalar, with the proposed change,
it adopts sext_optab for converting ieee128 format mode to
ibm128 format mode while trunc_optab for converting ibm128
format mode to ieee128 format mode.  Thus this patch removes
useless extend and trunc optab supports, supplements new
define_expands expandkftf2 and trunctfkf2 to align with
convert_mode_scalar implementation.  It also unnames two
define_insn_and_split to avoid conflicts and make them more
clear.  Considering the current implementation that there is
no chance to have KF <-> IF conversion (since either of them
would be TF already), it adds two dummy define_expands to
assert this.

[1] https://inbox.sourceware.org/gcc-patches/
718677e7-614d-7977-312d-05a75e1fd...@linux.ibm.com/

PR target/112993

gcc/ChangeLog:

* config/rs6000/rs6000-modes.def (IFmode, KFmode, TFmode): Define
with FLOAT_MODE instead of FRACTIONAL_FLOAT_MODE, don't use special
precisions any more.
(rs6000-modes.h): Remove include.
* config/rs6000/rs6000-modes.h: Remove.
* config/rs6000/rs6000.h (rs6000-modes.h): Remove include.
* config/rs6000/t-rs6000: Remove rs6000-modes.h include.
* config/rs6000/rs6000.cc (rs6000_option_override_internal): Replace
all uses of FLOAT_PRECISION_TFmode with 128.
(rs6000_c_mode_for_floating_type): Likewise.
* config/rs6000/rs6000.md (define_expand extendiftf2): Remove.
(define_expand extendifkf2): Remove.
(define_expand extendtfkf2): Remove.
(define_expand trunckftf2): Remove.
(define_expand trunctfif2): Remove.
(define_expand extendtfif2): Add new assertion.
(define_expand expandkftf2): New.
(define_expand trunciftf2): Add new assertion.
(define_expand trunctfkf2): New.
(define_expand truncifkf2): Change with gcc_unreachable.
(define_expand expandkfif2): New.
(define_insn_and_split extendkftf2): Rename to  ...
(define_insn_and_split *extendkftf2): ... this.
(define_insn_and_split trunctfkf2): Rename to ...
(define_insn_and_split *extendtfkf2): ... this.

Diff:
---
 gcc/config/rs6000/rs6000-modes.def | 31 +++---
 gcc/config/rs6000/rs6000-modes.h   | 36 
 gcc/config/rs6000/rs6000.cc|  9 ++---
 gcc/config/rs6000/rs6000.h |  5 ---
 gcc/config/rs6000/rs6000.md| 67 +++---
 gcc/config/rs6000/t-rs6000 |  1 -
 6 files changed, 41 insertions(+), 108 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-modes.def 
b/gcc/config/rs6000/rs6000-modes.def
index 094b246c834d..b69593c40a61 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -18,12 +18,11 @@
along with GCC; see the file COPYING3.  If not see
.  */
 
-/* We order the 3 128-bit floating point types so that IFmode (IBM 128-bit
-   floating point) is the 128-bit floating point type with the highest
-   precision (128 bits).  This so that machine independent parts of the
-   compiler do not try to widen IFmode to TFmode on ISA 3.0 (power9) that has
-   hardware support for IEEE 128-bit.  We set TFmode (long double mode) in
-   between, and KFmode (explicit __float128) below it.
+/* We order the 3 128-bit floating point type modes here as KFmode, TFmode and
+   IFmode, it is the same as the previous order, to make machine in

[gcc r15-2086] ranger: Revert the workaround introduced in PR112788 [PR112993]

2024-07-16 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:fa86f510f51e6d940a28ea997fca3a6e3f50b4d3

commit r15-2086-gfa86f510f51e6d940a28ea997fca3a6e3f50b4d3
Author: Kewen Lin 
Date:   Wed Jul 17 00:17:42 2024 -0500

ranger: Revert the workaround introduced in PR112788 [PR112993]

This reverts commit r14-6478-gfda8e2f8292a90 "range:
Workaround different type precision between _Float128 and
long double [PR112788]" as the fixes for PR112993 make
all 128 bits scalar floating point have the same 128 bit
precision, this workaround isn't needed any more.

PR target/112993

gcc/ChangeLog:

* value-range.h (range_compatible_p): Remove the workaround on
different type precision between _Float128 and long double.

Diff:
---
 gcc/value-range.h | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/gcc/value-range.h b/gcc/value-range.h
index 334ea1bc338c..03af758d152c 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -1764,13 +1764,7 @@ range_compatible_p (tree type1, tree type2)
   // types_compatible_p requires conversion in both directions to be useless.
   // GIMPLE only requires a cast one way in order to be compatible.
   // Ranges really only need the sign and precision to be the same.
-  return TYPE_SIGN (type1) == TYPE_SIGN (type2)
-&& (TYPE_PRECISION (type1) == TYPE_PRECISION (type2)
-// FIXME: As PR112788 shows, for now on rs6000 _Float128 has
-// type precision 128 while long double has type precision 127
-// but both have the same mode so their precision is actually
-// the same, workaround it temporarily.
-|| (SCALAR_FLOAT_TYPE_P (type1)
-&& TYPE_MODE (type1) == TYPE_MODE (type2)));
+  return (TYPE_PRECISION (type1) == TYPE_PRECISION (type2)
+ && TYPE_SIGN (type1) == TYPE_SIGN (type2));
 }
 #endif // GCC_VALUE_RANGE_H


[gcc r15-2087] tree: Remove KFmode workaround [PR112993]

2024-07-16 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:b5c813ed6035cf6ef831927e66e184a5847afbe6

commit r15-2087-gb5c813ed6035cf6ef831927e66e184a5847afbe6
Author: Kewen Lin 
Date:   Wed Jul 17 00:19:00 2024 -0500

tree: Remove KFmode workaround [PR112993]

The fix for PR112993 makes KFmode have 128 bit mode precision,
we don't need this workaround to fix up the type precision any
more, and just go with mode precision.  So this patch is to
remove KFmode workaround.

PR target/112993

gcc/ChangeLog:

* tree.cc (build_common_tree_nodes): Drop the workaround for rs6000
KFmode precision adjustment.

Diff:
---
 gcc/tree.cc | 9 -
 1 file changed, 9 deletions(-)

diff --git a/gcc/tree.cc b/gcc/tree.cc
index 2d2d5b6db6ed..a2d431662bd5 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9633,15 +9633,6 @@ build_common_tree_nodes (bool signed_char)
   if (!targetm.floatn_mode (n, extended).exists (&mode))
continue;
   int precision = GET_MODE_PRECISION (mode);
-  /* Work around the rs6000 KFmode having precision 113 not
-128.  */
-  const struct real_format *fmt = REAL_MODE_FORMAT (mode);
-  gcc_assert (fmt->b == 2 && fmt->emin + fmt->emax == 3);
-  int min_precision = fmt->p + ceil_log2 (fmt->emax - fmt->emin);
-  if (!extended)
-   gcc_assert (min_precision == n);
-  if (precision < min_precision)
-   precision = min_precision;
   FLOATN_NX_TYPE_NODE (i) = make_node (REAL_TYPE);
   TYPE_PRECISION (FLOATN_NX_TYPE_NODE (i)) = precision;
   layout_type (FLOATN_NX_TYPE_NODE (i));


[gcc r15-2088] rs6000: Change optab for ibm128 and ieee128 conversion

2024-07-16 Thread Kewen Lin via Gcc-cvs
https://gcc.gnu.org/g:dd4d71ca4d8d4252eb33a3202380524e6d43ba05

commit r15-2088-gdd4d71ca4d8d4252eb33a3202380524e6d43ba05
Author: Kewen Lin 
Date:   Wed Jul 17 00:19:30 2024 -0500

rs6000: Change optab for ibm128 and ieee128 conversion

Currently for 128 bit floating-point ibm128 and ieee128
formats conversion, the corresponding libcalls are:
  ibm128 -> ieee128 "__trunctfkf2"
  ieee128 -> ibm128 "__extendkftf2"
, and generic code handling (like convert_mode_scalar) also
adopts sext_optab for ieee128 -> ibm128 while trunc_optab
for ibm128 -> ieee128.  But in rs6000 port as function
rs6000_expand_float128_convert and init_float128_ieee show,
we adopt sext_optab for ibm128 -> ieee128 with "__trunctfkf2"
while trunc_optab for ieee128 -> ibm128 with "__extendkftf2".

To make them consistent and avoid some surprises, this patch
is to adjust rs6000 internal handlings by adopting trunc_optab
for ibm128 -> ieee128 with "__trunctfkf2" while sext_optab for
ieee128 -> ibm128 with "__extendkftf2".

gcc/ChangeLog:

* config/rs6000/rs6000.cc (init_float128_ieee): Use trunc_optab 
rather
than sext_optab for converting FLOAT128_IBM_P mode to 
FLOAT128_IEEE_P
mode, and use sext_optab rather than trunc_optab for converting
FLOAT128_IEEE_P mode to FLOAT128_IBM_P mode.
(rs6000_expand_float128_convert): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 905e6cb6a942..2c0a7fc8cefa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -11476,13 +11476,13 @@ init_float128_ieee (machine_mode mode)
   set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
   set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
 
-  set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
+  set_conv_libfunc (trunc_optab, mode, IFmode, "__trunctfkf2");
   if (mode != TFmode && FLOAT128_IBM_P (TFmode))
-   set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
+   set_conv_libfunc (trunc_optab, mode, TFmode, "__trunctfkf2");
 
-  set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
+  set_conv_libfunc (sext_optab, IFmode, mode, "__extendkftf2");
   if (mode != TFmode && FLOAT128_IBM_P (TFmode))
-   set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
+   set_conv_libfunc (sext_optab, TFmode, mode, "__extendkftf2");
 
   set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
   set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
@@ -15640,7 +15640,7 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool 
unsigned_p)
case E_IFmode:
case E_TFmode:
  if (FLOAT128_IBM_P (src_mode))
-   cvt = sext_optab;
+   cvt = trunc_optab;
  else
do_move = true;
  break;
@@ -15702,7 +15702,7 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool 
unsigned_p)
case E_IFmode:
case E_TFmode:
  if (FLOAT128_IBM_P (dest_mode))
-   cvt = trunc_optab;
+   cvt = sext_optab;
  else
do_move = true;
  break;


[gcc(refs/users/meissner/heads/work171-bugs)] Fix typo.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:3592d2a50b9c34384530a7fe1959485b869080a4

commit 3592d2a50b9c34384530a7fe1959485b869080a4
Author: Michael Meissner 
Date:   Wed Jul 17 01:44:24 2024 -0400

Fix typo.

2024-07-17 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/pr99708.c: Use dg-require-effective-target, not
require-effective-target.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr99708.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr99708.c 
b/gcc/testsuite/gcc.target/powerpc/pr99708.c
index a81dab92fd4c..6232ff009494 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr99708.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr99708.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-skip-if "" { powerpc*-*-darwin* powerpc-ibm-aix* } } */
-/* { require-effective-target ppc_float128_sw } */
+/* { dg-require-effective-target ppc_float128_sw } */
 /* { dg-options "-O2" } */
 
 /*


[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:d32ac7851596695441389fdceec703d2ad823206

commit d32ac7851596695441389fdceec703d2ad823206
Author: Michael Meissner 
Date:   Wed Jul 17 01:45:18 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 13 +
 1 file changed, 13 insertions(+)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index c2ab4905fe51..2ef69cc3e3d9 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,3 +1,16 @@
+ Branch work171-bugs, patch #334 
+
+Fix typo.
+
+2024-07-17 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/pr99708.c: Use dg-require-effective-target, not
+   require-effective-target.
+
  Branch work171-bugs, patch #333 
 
 Remove -mfloat128 on pr99708.c


[gcc(refs/users/meissner/heads/work171-bugs)] Revert changes

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:50e8cc25318493246e824f7acb6f112ddf55db66

commit 50e8cc25318493246e824f7acb6f112ddf55db66
Author: Michael Meissner 
Date:   Wed Jul 17 02:27:41 2024 -0400

Revert changes

Diff:
---
 gcc/ChangeLog.bugs | 99 +-
 gcc/testsuite/gcc.target/powerpc/abs128-1.c|  3 +-
 .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c  |  1 -
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c   |  3 +-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c|  3 +-
 gcc/testsuite/gcc.target/powerpc/float128-3.c  |  3 +-
 gcc/testsuite/gcc.target/powerpc/float128-5.c  |  3 +-
 .../gcc.target/powerpc/float128-complex-2.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-math.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/inf128-1.c|  3 +-
 gcc/testsuite/gcc.target/powerpc/mulkc3-1.c|  3 +-
 gcc/testsuite/gcc.target/powerpc/nan128-1.c|  3 +-
 gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr104253.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/pr70669.c |  3 +-
 gcc/testsuite/gcc.target/powerpc/pr79004.c |  4 +-
 gcc/testsuite/gcc.target/powerpc/pr79038-1.c   |  4 +-
 gcc/testsuite/gcc.target/powerpc/pr81959.c |  3 +-
 gcc/testsuite/gcc.target/powerpc/pr85657-1.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr85657-2.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99708.c |  4 +-
 gcc/testsuite/gcc.target/powerpc/signbit-1.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/signbit-2.c   |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 27 +++---
 24 files changed, 44 insertions(+), 141 deletions(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 2ef69cc3e3d9..8668084757cc 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,98 +1,7 @@
- Branch work171-bugs, patch #334 
-
-Fix typo.
-
-2024-07-17 Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-   * gcc.target/powerpc/pr99708.c: Use dg-require-effective-target, not
-   require-effective-target.
-
- Branch work171-bugs, patch #333 
-
-Remove -mfloat128 on pr99708.c
-
-2024-07-16 Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-   * gcc.target/powerpc/pr99708.c: Remove -mfloat128 option.
-
- Branch work171-bugs, patch #332 
-
-Do not add -mvsx or -mfloat128 when testing the float128 support.
-
-2024-07-16 Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-   * lib/target-supports.exp (check_ppc_float128_sw_available): Do not add
-   -mfloat128 or -mfloat128-hardware.
-   (check_ppc_float128_hw_available): Likewise.
-   (check_effective_target_ppc_ieee128_ok): Likewise.
-   (add_options_for___float128): Likewise.
-   (check_effective_target_powerpc_float128_sw_ok): Likewise.
-   (check_effective_target_powerpc_float128_hw_ok): Likewise.
-
- Branch work171-bugs, patch #331 
-
-Do not add -mvsx when testing the float128 support.
-
-In the past, we would add -mvsx when building the float128 support in libgcc.
-This allowed us to build the float128 support on a big endian system where the
-default cpu is power4.  While the libgcc support can be built, given there is 
no
-glibc support for float128 available.
-
-However, adding -mvsx and building the libgcc float128 support causes problems
-if you set the default cpu to something like a 7540, which does not have VSX
-support.  The assembler complains that when the code does a ".machine 7450", 
you
-cannot use VSX instructions.
-
-This patch changes the GCC tests so that it will only do the IEEE 128-bit tests
-if the default compiler enables the VSX instruction set by default.  Otherwise
-all of the float128 tests will fail because the libgcc support is not 
available.
-
-2024-07-16 Michael Meissner  
-
-gcc/testsuite/
-
-   PR target/115800
-   PR target/113652
-   * gcc.target/powerpc/abs128-1.c: Remove -mvsx option.  Add explicit
-   check for the float128 support.
-   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
-   * gcc.target/powerpc/copysign128-1.c: Likewise.
-   * gcc.target/powerpc/divkc3-1.c: Likewise.
-   * gcc.target/powerpc/float128-3.c: Likewise.
-   * gcc.target/powerpc/float128-5.c: Likewise.
-   * gcc.target/powerpc/float128-complex-2.: Likewise.
-   * gcc.target/powerpc/float128-math.: Likewise.
-   * gcc.target/powerpc/inf128-1.: Likewise.
-   * gcc.target/powerpc/mulkc3-1.c: Likewise.
-   * gcc.target/powerpc/nan128-1.c: Likewise.
-   * gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
-   * gcc.target/powerpc/pr104253.: Likewise.
-   * gcc.target/powerpc/pr70669.c: Likewise.
-   

[gcc(refs/users/meissner/heads/work171-bugs)] Do not add -mvsx when testing the float128 support.

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5c2cb471b6848d9ce36cb8200bfe5768c323068c

commit 5c2cb471b6848d9ce36cb8200bfe5768c323068c
Author: Michael Meissner 
Date:   Wed Jul 17 02:43:37 2024 -0400

Do not add -mvsx when testing the float128 support.

In the past, we would add -mvsx when building the float128 support in 
libgcc.
This allowed us to build the float128 support on a big endian system where 
the
default cpu is power4.  While the libgcc support can be built, given there 
is no
glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

After patching libgcc to not build the float128 support unless the host can
support float128 normally, this patch changes the GCC tests so that it will 
only
do the IEEE 128-bit tests if the default compiler enables the VSX 
instruction
set by default.  Otherwise all of the float128 tests will fail because the
libgcc support is not available.

In addition to not doing the float128 tests when the compiler does not 
natively
support float128, this patch also removes adding -mvsx, -mfloat128, and
-mfloat128-hardware enable the support if the compiler did not natively 
enable
it.

2024-07-17 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, 
and
-mfloat128-hardware options to float128 test.  Add explicit checks 
for
the float128 support, rather than just using VSX as a stand in, or
assuming we can silently enable VSX if the default is power4.  For
pr99708.c, also use the correct spelling to disable the float128 
tests.
* gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
* gcc.target/powerpc/copysign128-1.c: Likewise.
* gcc.target/powerpc/divkc3-1.c: Likewise.
* gcc.target/powerpc/float128-3.c: Likewise.
* gcc.target/powerpc/float128-5.c: Likewise.
* gcc.target/powerpc/float128-complex-2.: Likewise.
* gcc.target/powerpc/float128-math.: Likewise.
* gcc.target/powerpc/inf128-1.: Likewise.
* gcc.target/powerpc/mulkc3-1.c: Likewise.
* gcc.target/powerpc/nan128-1.c: Likewise.
* gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
* gcc.target/powerpc/pr104253.: Likewise.
* gcc.target/powerpc/pr70669.c: Likewise.
* gcc.target/powerpc/pr79004.c: Likewise.
* gcc.target/powerpc/pr79038-1.c: Likewise.
* gcc.target/powerpc/pr81959.c: Likewise.
* gcc.target/powerpc/pr85657-1.: Likewise.
* gcc.target/powerpc/pr85657-2.c: Likewise.
* gcc.target/powerpc/pr99708.: Likewise.
* gcc.target/powerpc/signbit-1.c: Likewise.
* gcc.target/powerpc/signbit-2.c: Likewise.
* lib/target-supports.exp (check_ppc_float128_sw_available): 
Likewise.
(check_ppc_float128_hw_available): Likewise.
(check_effective_target_ppc_ieee128_ok): Likewise.
(add_options_for___float128): Likewise.
(check_effective_target___float128): Likewise.
(check_effective_target_base_quadfloat_support): Likewise.
(check_effective_target_powerpc_float128_sw_ok): Likewise.
(check_effective_target_powerpc_float128_hw_ok): Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c|  3 ++-
 .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c  |  1 +
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c   |  3 ++-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c|  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-3.c  |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-5.c  |  3 ++-
 .../gcc.target/powerpc/float128-complex-2.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-math.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/inf128-1.c|  3 ++-
 gcc/testsuite/gcc.target/powerpc/mulkc3-1.c|  3 ++-
 gcc/testsuite/gcc.target/powerpc/nan128-1.c|  3 ++-
 gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-3.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr104253.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/pr70669.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/pr79004.c |  4 ++--
 gcc/testsuite/gcc.target/powerpc/pr79038-1.c   |  4 ++--
 gcc/testsuite/gcc.target/powerpc/pr81959.c |  3 ++-
 gcc/testsuite/gcc.target/powerpc/pr85657-1.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr85657-2.c   |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99708.c |  4 ++--
 gcc/testsuite/gcc.target/powerpc/signbit-1.c   |  2 +-
 g

[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*

2024-07-16 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:080bc022d64e40faa21e9463272b3a84f14645d2

commit 080bc022d64e40faa21e9463272b3a84f14645d2
Author: Michael Meissner 
Date:   Wed Jul 17 02:44:45 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.bugs | 67 +-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
index 8668084757cc..07ace132d6c9 100644
--- a/gcc/ChangeLog.bugs
+++ b/gcc/ChangeLog.bugs
@@ -1,4 +1,69 @@
- Branch work171-bugs, patch #334 was reverted 

+ Branch work171-bugs, patch #335 
+
+Do not add -mvsx when testing the float128 support.
+
+In the past, we would add -mvsx when building the float128 support in libgcc.
+This allowed us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is 
no
+glibc support for float128 available.
+
+However, adding -mvsx and building the libgcc float128 support causes problems
+if you set the default cpu to something like a 7540, which does not have VSX
+support.  The assembler complains that when the code does a ".machine 7450", 
you
+cannot use VSX instructions.
+
+After patching libgcc to not build the float128 support unless the host can
+support float128 normally, this patch changes the GCC tests so that it will 
only
+do the IEEE 128-bit tests if the default compiler enables the VSX instruction
+set by default.  Otherwise all of the float128 tests will fail because the
+libgcc support is not available.
+
+In addition to not doing the float128 tests when the compiler does not natively
+support float128, this patch also removes adding -mvsx, -mfloat128, and
+-mfloat128-hardware enable the support if the compiler did not natively enable
+it.
+
+2024-07-17 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, and
+   -mfloat128-hardware options to float128 test.  Add explicit checks for
+   the float128 support, rather than just using VSX as a stand in, or
+   assuming we can silently enable VSX if the default is power4.  For
+   pr99708.c, also use the correct spelling to disable the float128 tests.
+   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
+   * gcc.target/powerpc/copysign128-1.c: Likewise.
+   * gcc.target/powerpc/divkc3-1.c: Likewise.
+   * gcc.target/powerpc/float128-3.c: Likewise.
+   * gcc.target/powerpc/float128-5.c: Likewise.
+   * gcc.target/powerpc/float128-complex-2.: Likewise.
+   * gcc.target/powerpc/float128-math.: Likewise.
+   * gcc.target/powerpc/inf128-1.: Likewise.
+   * gcc.target/powerpc/mulkc3-1.c: Likewise.
+   * gcc.target/powerpc/nan128-1.c: Likewise.
+   * gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
+   * gcc.target/powerpc/pr104253.: Likewise.
+   * gcc.target/powerpc/pr70669.c: Likewise.
+   * gcc.target/powerpc/pr79004.c: Likewise.
+   * gcc.target/powerpc/pr79038-1.c: Likewise.
+   * gcc.target/powerpc/pr81959.c: Likewise.
+   * gcc.target/powerpc/pr85657-1.: Likewise.
+   * gcc.target/powerpc/pr85657-2.c: Likewise.
+   * gcc.target/powerpc/pr99708.: Likewise.
+   * gcc.target/powerpc/signbit-1.c: Likewise.
+   * gcc.target/powerpc/signbit-2.c: Likewise.
+   * lib/target-supports.exp (check_ppc_float128_sw_available): Likewise.
+   (check_ppc_float128_hw_available): Likewise.
+   (check_effective_target_ppc_ieee128_ok): Likewise.
+   (add_options_for___float128): Likewise.
+   (check_effective_target___float128): Likewise.
+   (check_effective_target_base_quadfloat_support): Likewise.
+   (check_effective_target_powerpc_float128_sw_ok): Likewise.
+   (check_effective_target_powerpc_float128_hw_ok): Likewise.
+
  Branch work171-bugs, patch #333 was reverted 

  Branch work171-bugs, patch #332 was reverted 

  Branch work171-bugs, patch #331 was reverted 



[gcc r15-2089] rs6000: Remove redundant guard for float128 mode pattern

2024-07-16 Thread HaoChen Gui via Gcc-cvs
https://gcc.gnu.org/g:ecc2c3cb7235f2d3a75cb3909cace7f6a38a4062

commit r15-2089-gecc2c3cb7235f2d3a75cb3909cace7f6a38a4062
Author: Haochen Gui 
Date:   Wed Jul 17 14:47:36 2024 +0800

rs6000: Remove redundant guard for float128 mode pattern

gcc/
* config/rs6000/rs6000.md (movcc, *movcc_p10,
*movcc_invert_p10, *fpmask, *xxsel,
@ieee_128bit_vsx_abs2, *ieee_128bit_vsx_nabs2,
add3, sub3, mul3, div3, sqrt2,
copysign3, copysign3_hard, copysign3_soft,
@neg2_hw, @abs2_hw, *nabs2_hw, fma4_hw,
*fms4_hw, *nfma4_hw, *nfms4_hw,
extend2_hw, truncdf2_hw,
truncsf2_hw, fix_2_hw,
fix_trunc2,
*fix_trunc2_mem,
float_di2_hw, float_si2_hw,
float2, floatuns_di2_hw,
floatuns_si2_hw, floatuns2,
floor2, ceil2, btrunc2, round2,
add3_odd, sub3_odd, mul3_odd, div3_odd,
sqrt2_odd, fma4_odd, *fms4_odd, *nfma4_odd,
*nfms4_odd, truncdf2_odd, *cmp_hw for IEEE128):
Remove guard FLOAT128_IEEE_P.
(@extenddf2_fprs, @extenddf2_vsx,
truncdf2_internal1, truncdf2_internal2,
fix_trunc_helper, neg2, *cmp_internal1,
*cmp_internal2 for IBM128): Remove guard 
FLOAT128_IBM_P.

Diff:
---
 gcc/config/rs6000/rs6000.md | 115 ++--
 1 file changed, 57 insertions(+), 58 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6a2891c5e509..f59be5365708 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5701,7 +5701,7 @@
 (if_then_else:IEEE128 (match_operand 1 "comparison_operator")
   (match_operand:IEEE128 2 "gpc_reg_operand")
   (match_operand:IEEE128 3 "gpc_reg_operand")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
 {
   if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
 DONE;
@@ -5718,7 +5718,7 @@
 (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
 (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
(clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5750,7 +5750,7 @@
 (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
 (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
(clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5785,7 +5785,7 @@
 (match_operand:IEEE128 3 "altivec_register_operand" "v")])
 (match_operand:V2DI 4 "all_ones_constant" "")
 (match_operand:V2DI 5 "zero_constant" "")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "xscmp%V1qp %0,%2,%3"
   [(set_attr "type" "fpcompare")])
 
@@ -5796,7 +5796,7 @@
 (match_operand:V2DI 2 "zero_constant" ""))
 (match_operand:IEEE128 3 "altivec_register_operand" "v")
 (match_operand:IEEE128 4 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
@@ -8869,7 +8869,7 @@
 (match_operand:DF 1 "nonimmediate_operand" "d,m,d")))
(use (match_operand:DF 2 "nonimmediate_operand" "m,m,d"))]
   "!TARGET_VSX && TARGET_HARD_FLOAT
-   && TARGET_LONG_DOUBLE_128 && FLOAT128_IBM_P (mode)"
+   && TARGET_LONG_DOUBLE_128"
   "#"
   "&& reload_completed"
   [(set (match_dup 3) (match_dup 1))
@@ -8886,7 +8886,7 @@
   [(set (match_operand:IBM128 0 "gpc_reg_operand" "=d,d")
(float_extend:IBM128
 (match_operand:DF 1 "nonimmediate_operand" "wa,m")))]
-  "TARGET_LONG_DOUBLE_128 && TARGET_VSX && FLOAT128_IBM_P (mode)"
+  "TARGET_LONG_DOUBLE_128 && TARGET_VSX"
   "#"
   "&& reload_completed"
   [(set (match_dup 2) (match_dup 1))
@@ -8932,7 +8932,7 @@
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
(float_truncate:DF
 (match_operand:IBM128 1 "gpc_reg_operand" "0,d")))]
-  "FLOAT128_IBM_P (mode) && !TARGET_XL_COMPAT
+  "!TARGET_XL_COMPAT
&& TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128"
   "@
#
@@ -8948,7 +8948,7 @@
 (define_insn "truncdf2_internal2"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "d")))]
-  "FLOAT128_IBM_P (mode) && TARGET_XL_COMPAT && TARGET_HARD_FLOAT
+  "TARGET_XL_COMPAT && TARGET_HARD_FLOAT
&& TARGET_LONG_DOUBLE_128"
   "fadd %0,%1,%L1"
   [(set_attr "type" "fp")])
@@ -9001,7 +9001,7 @@
(unspec:DF [(match