[gcc r15-9633] ipa: Do not emit info about temporary clones to ipa-clones dump (PR119852)

2025-05-07 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:77780c31485eeb71e9fabf8ea9d4b1af0c3be595

commit r15-9633-g77780c31485eeb71e9fabf8ea9d4b1af0c3be595
Author: Martin Jambor 
Date:   Tue May 6 17:28:43 2025 +0200

ipa: Do not emit info about temporary clones to ipa-clones dump (PR119852)

As described in PR 119852, the output of -fdump-ipa-clones can contain
"(null)" as the suffix/reason for cloning when we need to create a
clone to hold the original function during recursive inlining.  Such
clone is never output and so should not be part of the dump output
either.

gcc/ChangeLog:

2025-04-23  Martin Jambor  

PR ipa/119852
* cgraphclones.cc (dump_callgraph_transformation): Document the
function.  Do not dump if suffix is NULL.

gcc/testsuite/ChangeLog:

2025-04-23  Martin Jambor  

PR ipa/119852
* gcc.dg/ipa/pr119852.c: New test.

(cherry picked from commit fb5829a01651d427a63a12c44ecc8baa47dbfc83)

Diff:
---
 gcc/cgraphclones.cc | 10 +++-
 gcc/testsuite/gcc.dg/ipa/pr119852.c | 50 +
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index e6223fa1f5cc..bf5bc41cde9c 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -307,12 +307,20 @@ cgraph_node::expand_all_artificial_thunks ()
   e = e->next_caller;
 }
 
+/* Dump information about creation of a call graph node clone to the dump file
+   created by the -fdump-ipa-clones option.  ORIGINAL is the function being
+   cloned, CLONE is the new clone.  SUFFIX is a string that helps identify the
+   reason for cloning, often it is the suffix used by a particular IPA pass to
+   create unique function names.  SUFFIX can be NULL and in that case the
+   dumping will not take place, which must be the case only for helper clones
+   which will never be emitted to the output.  */
+
 void
 dump_callgraph_transformation (const cgraph_node *original,
   const cgraph_node *clone,
   const char *suffix)
 {
-  if (symtab->ipa_clones_dump_file)
+  if (suffix && symtab->ipa_clones_dump_file)
 {
   fprintf (symtab->ipa_clones_dump_file,
   "Callgraph clone;%s;%d;%s;%d;%d;%s;%d;%s;%d;%d;%s\n",
diff --git a/gcc/testsuite/gcc.dg/ipa/pr119852.c 
b/gcc/testsuite/gcc.dg/ipa/pr119852.c
new file mode 100644
index ..eab8d21293cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr119852.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-clones"  } */
+
+typedef struct rtx_def *rtx;
+enum rtx_code {
+  LAST_AND_UNUSED_RTX_CODE};
+extern const char * const rtx_format[((int) LAST_AND_UNUSED_RTX_CODE)];
+struct rtx_def {
+  enum rtx_code code;
+};
+typedef int (*rtx_function) (rtx *, void *);
+extern int for_each_rtx (rtx *, rtx_function, void *);
+int
+replace_label (rtx *x, void *data)
+{
+  rtx l = *x;
+  if (l == (rtx) 0)
+{
+ {
+   rtx new_c, new_l;
+   for_each_rtx (&new_c, replace_label, data);
+ }
+}
+}
+static int
+for_each_rtx_1 (rtx exp, int n, rtx_function f, void *data)
+{
+  int result, i, j;
+  const char *format = (rtx_format[(int) (((enum rtx_code) (exp)->code))]);
+  rtx *x;
+  for (; format[n] != '\0'; n++)
+{
+  switch (format[n])
+ {
+ case 'e':
+   result = (*f) (x, data);
+ {
+   result = for_each_rtx_1 (*x, i, f, data);
+ }
+ }
+}
+}
+int
+for_each_rtx (rtx *x, rtx_function f, void *data)
+{
+  int i;
+  return for_each_rtx_1 (*x, i, f, data);
+}
+
+/* { dg-final { scan-ipa-dump-not "(null)"  "ipa-clones"  } } */


[gcc r16-449] gimple: Add gimple_with_undefined_signed_overflow and use it [PR111276]

2025-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d884e9c5c746a41b069905f990fa72b9e550

commit r16-449-gd884e9c5c746a41b069905f990fa72b9e550
Author: Andrew Pinski 
Date:   Thu Sep 26 23:25:17 2024 +

gimple: Add gimple_with_undefined_signed_overflow and use it [PR111276]

While looking into the ifcombine, I noticed that rewrite_to_defined_overflow
was rewriting already defined code. In the previous attempt at fixing this,
the review mentioned we should not be calling rewrite_to_defined_overflow
in those cases. The places which called rewrite_to_defined_overflow didn't
always check the lhs of the assignment. This fixes the problem by
introducing a helper function which is to be used before calling
rewrite_to_defined_overflow.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111276
* gimple-fold.cc (arith_code_with_undefined_signed_overflow): Make 
static.
(gimple_with_undefined_signed_overflow): New function.
* gimple-fold.h (arith_code_with_undefined_signed_overflow): Remove.
(gimple_with_undefined_signed_overflow): Add declaration.
* tree-if-conv.cc (if_convertible_gimple_assign_stmt_p): Use
gimple_with_undefined_signed_overflow instead of manually
checking lhs and the code of the stmt.
(predicate_statements): Likewise.
* tree-ssa-ifcombine.cc (ifcombine_rewrite_to_defined_overflow): 
Likewise.
* tree-ssa-loop-im.cc (move_computations_worker): Likewise.
* tree-ssa-reassoc.cc (update_range_test): Likewise. Reformat.
* tree-scalar-evolution.cc (final_value_replacement_loop): Use
gimple_with_undefined_signed_overflow instead of
arith_code_with_undefined_signed_overflow.
* tree-ssa-loop-split.cc (split_loop): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-fold.cc   | 26 +-
 gcc/gimple-fold.h|  2 +-
 gcc/tree-if-conv.cc  | 16 +++-
 gcc/tree-scalar-evolution.cc |  5 +
 gcc/tree-ssa-ifcombine.cc| 10 ++
 gcc/tree-ssa-loop-im.cc  |  6 +-
 gcc/tree-ssa-loop-split.cc   |  5 +
 gcc/tree-ssa-reassoc.cc  | 40 
 8 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 5884b79cce5a..7721795b20df 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -10573,7 +10573,7 @@ gimple_fold_indirect_ref (tree t)
integer types involves undefined behavior on overflow and the
operation can be expressed with unsigned arithmetic.  */
 
-bool
+static bool
 arith_code_with_undefined_signed_overflow (tree_code code)
 {
   switch (code)
@@ -10590,6 +10590,30 @@ arith_code_with_undefined_signed_overflow (tree_code 
code)
 }
 }
 
+/* Return true if STMT has an operation that operates on a signed
+   integer types involves undefined behavior on overflow and the
+   operation can be expressed with unsigned arithmetic.  */
+
+bool
+gimple_with_undefined_signed_overflow (gimple *stmt)
+{
+  if (!is_gimple_assign (stmt))
+return false;
+  tree lhs = gimple_assign_lhs (stmt);
+  if (!lhs)
+return false;
+  tree lhs_type = TREE_TYPE (lhs);
+  if (!INTEGRAL_TYPE_P (lhs_type)
+  && !POINTER_TYPE_P (lhs_type))
+return false;
+  if (!TYPE_OVERFLOW_UNDEFINED (lhs_type))
+return false;
+  if (!arith_code_with_undefined_signed_overflow
+   (gimple_assign_rhs_code (stmt)))
+return false;
+  return true;
+}
+
 /* Rewrite STMT, an assignment with a signed integer or pointer arithmetic
operation that can be transformed to unsigned arithmetic by converting
its operand, carrying out the operation in the corresponding unsigned
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 2790d0ffc652..5fcfdcda81b1 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -59,7 +59,7 @@ extern tree gimple_get_virt_method_for_vtable (HOST_WIDE_INT, 
tree,
 extern tree gimple_fold_indirect_ref (tree);
 extern bool gimple_fold_builtin_sprintf (gimple_stmt_iterator *);
 extern bool gimple_fold_builtin_snprintf (gimple_stmt_iterator *);
-extern bool arith_code_with_undefined_signed_overflow (tree_code);
+extern bool gimple_with_undefined_signed_overflow (gimple *);
 extern void rewrite_to_defined_overflow (gimple_stmt_iterator *);
 extern gimple_seq rewrite_to_defined_overflow (gimple *);
 extern void replace_call_with_value (gimple_stmt_iterator *, tree);
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 5b63bf67fe0e..fe8aee057b34 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1066,11 +1066,7 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt,
fprintf (dump_file, "tree could trap...\n");
   return false;
 }
-  else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-   || POINTER_TYPE_P (TREE_TYPE (lhs)))

[gcc r16-445] libgomp.fortran/map-alloc-comp-9{, -usm}.f90: Add unified_shared_memory variant

2025-05-07 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:9565076f9b810541aeb63cb621d694326aa12216

commit r16-445-g9565076f9b810541aeb63cb621d694326aa12216
Author: Tobias Burnus 
Date:   Wed May 7 13:46:51 2025 +0200

libgomp.fortran/map-alloc-comp-9{,-usm}.f90: Add unified_shared_memory 
variant

When host memory is device accessible - independent whether mapping is done 
or
not (i.e. self map), the 'vtab' pointer becomes accessible, which stores the
dynamic type's type and size information.

In principle, we want to test: USM available but mapping is still done, but
as there is no simple + reliable not-crashing way to test for this, those
checks are skipped in the (pre)existing test file map-alloc-comp-9.f90.

Or rather: those are only active with self-maps, which is currently only 
true
for the host.

This commit adds map-alloc-comp-9-usm.f90 which runs the same test with
'omp requires unified_shared_memory'.  While OpenMP permits both actual
mapping and self maps with this flag, it in theory covers the missing cases.
However, currently, GCC always uses self maps with USM. Still, having a
device-run self-maps check is better than nothing, even if it misses the
most interesting case.

libgomp/ChangeLog:

* testsuite/libgomp.fortran/map-alloc-comp-9.f90: Process 
differently
when USE_USM_REQUIREMENT is set.
* testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90: New test.

Diff:
---
 .../libgomp.fortran/map-alloc-comp-9-usm.f90  | 11 +++
 .../testsuite/libgomp.fortran/map-alloc-comp-9.f90| 19 +++
 2 files changed, 30 insertions(+)

diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 
b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
new file mode 100644
index ..90378c0e42a2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
@@ -0,0 +1,11 @@
+! { dg-additional-options "-cpp -DUSE_USM_REQUIREMENT=1 -Wno-openmp" }
+!
+! We silence the warning:
+!  Mapping of polymorphic list item '...' is unspecified behavior [-Wopenmp]
+!
+! Ensure that polymorphic mapping is diagnosed as undefined behavior
+! Ensure that static access to polymorphic variables works
+
+! Run map-alloc-comp-9.f90 in unified-shared-memory mode
+
+#include "map-alloc-comp-9.f90"
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 
b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
index 3cec39218f56..26c73d75c09b 100644
--- a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
@@ -1,8 +1,19 @@
+! { dg-additional-options "-cpp" }
+!
 ! Ensure that polymorphic mapping is diagnosed as undefined behavior
 ! Ensure that static access to polymorphic variables works
 
+! Some extended tests are only run with shared memory
+! To enforce this (where possible) on the device side:
+!   #define USE_USM_REQUIREMENT
+! which is done in map-alloc-comp-9-usm.f90
+
 subroutine test(case)
 implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+  !$omp requires unified_shared_memory
+#endif
+
 type t
   integer :: x(4)
 end type t
@@ -73,10 +84,14 @@ var4%y2(2)%y%x%x = -7 * [,,,]
 var4%y2(2)%y%x2(1)%x = -8 * [,,,]
 var4%y2(2)%y%x2(2)%x = -9 * [,,,]
 
+#ifdef USE_USM_REQUIREMENT
+is_shared_mem = .true.
+#else
 is_shared_mem = .false.
 !$omp target map(to: is_shared_mem)
   is_shared_mem = .true.
 !$omp end target
+#endif
 
 if (case == 1) then
   ! implicit mapping
@@ -532,6 +547,10 @@ end subroutine test
 program main
   use omp_lib
   implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+  !$omp requires unified_shared_memory
+#endif
+
   interface
 subroutine test(case)
   integer, value :: case


[gcc r15-9632] Document option -fdump-ipa-clones

2025-05-07 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:99e2f1138c61e851cfa08712aa73e2689d314fd1

commit r15-9632-g99e2f1138c61e851cfa08712aa73e2689d314fd1
Author: Martin Jambor 
Date:   Tue May 6 17:28:42 2025 +0200

Document option -fdump-ipa-clones

I have noticed that the option -fdump-ipa-clones is not documented
although there are users who depend on it.  This patch adds the
missing documentation along with the description of the information it
dumps and the format it uses.

I am never quite sure which of the texinfo mark-ups is the most
appropriate in which situation, I'll of course incorporate any
feedback on this as well as the general wording of the text.

After we settle on a version, I'd like to backport the documentation
also at least to GCC 15, 14 and 13.

Is it perhaps OK for master and the branches or what would better be
changed?

Thanks,

Martin

gcc/ChangeLog:

2025-04-23  Martin Jambor  

* doc/invoke.texi (Developer Options): Document -fdump-ipa-clones.

(cherry picked from commit 6ecc2fee06bdd60da0e9b3fe6660b553dbdca3ca)

Diff:
---
 gcc/doc/invoke.texi | 87 +
 1 file changed, 87 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c2e1bf8031b8..617a3d8ae182 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20748,6 +20748,93 @@ By default, the dump will contain messages about 
successful
 optimizations (equivalent to @option{-optimized}) together with
 low-level details about the analysis.
 
+@opindex fdump-ipa-clones
+@item -fdump-ipa-clones
+
+Create a dump file containing information about creation of call graph
+node clones and removals of call graph nodes during inter-procedural
+optimizations and transformations.  Its main intended use is that tools
+that create live-patches can determine the set of functions that need to
+be live-patched to completely replace a particular function (see
+@option{-flive-patching}).  The file name is generated by appending
+suffix @code{ipa-clones} to the source file name, and the file is
+created in the same directory as the output file.  Each entry in the
+file is on a separate line containing semicolon separated fields.
+
+In the case of call graph clone creation, the individual fields are:
+
+@enumerate
+@item
+String @code{Callgraph clone}.
+
+@item
+Name of the function being cloned as it is presented to the assembler.
+
+@item
+A number that uniquely represents the function being cloned in the call
+graph.  Note that the number is unique only within a compilation unit or
+within whole-program analysis but is likely to be different in the two
+phases.
+
+@item
+The file name of the source file where the function is defined.
+
+@item
+The line on which the function definition is located.
+
+@item
+The column where the function definition is located.
+
+@item
+Name of the new function clone as it is presented to the assembler.
+
+@item
+A number that uniquely represents the new function clone in the call
+graph.  Note that the number is unique only within a compilation unit or
+within whole-program analysis but is likely to be different in the two
+phases.
+
+@item
+The file name of the source file where the source code location of the
+new clone points to.
+
+@item
+The line to which the source code location of the new clone points to.
+
+@item
+The column to which the source code location of the new clone points to.
+
+@item
+A string that determines the reason for cloning.
+
+@end enumerate
+
+In the case of call graph clone removal, the individual fields are:
+
+@enumerate
+@item
+String @code{Callgraph removal}.
+
+@item
+Name of the function being removed as it would be presented to the assembler.
+
+@item
+A number that uniquely represents the function being cloned in the call
+graph.  Note that the number is unique only within a compilation unit or
+within whole-program analysis but is likely to be different in the two
+phases.
+
+@item
+The file name of the source file where the function is defined.
+
+@item
+The line on which the function definition is located.
+
+@item
+The column where the function definition is located.
+
+@end enumerate
+
 @opindex fdump-lang
 @item -fdump-lang
 Dump language-specific information.  The file name is made by appending


[gcc r16-456] [PATCH] RISC-V: Minimal support for zama16b extension.

2025-05-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:974b079741f902fcf4323dfcecbbffdb9f56f3bf

commit r16-456-g974b079741f902fcf4323dfcecbbffdb9f56f3bf
Author: Dongyan Chen 
Date:   Wed May 7 11:33:06 2025 -0600

[PATCH] RISC-V: Minimal support for zama16b extension.

This patch support zama16b extension[1].
To enable GCC to recognize and process zama16b extension correctly at 
compile time.

[1] https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: New extension.
* config/riscv/riscv.opt: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-48.c: New test.

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 2 ++
 gcc/config/riscv/riscv.opt   | 2 ++
 gcc/testsuite/gcc.target/riscv/arch-48.c | 5 +
 3 files changed, 9 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0233e1a108be..ca14eb96b253 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -327,6 +327,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zalrsc", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zabha", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zacas", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zama16b", ISA_SPEC_CLASS_NONE, 1, 0},
 
   {"zba", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbb", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1657,6 +1658,7 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   RISCV_EXT_FLAG_ENTRY ("zalrsc",  x_riscv_za_subext, MASK_ZALRSC),
   RISCV_EXT_FLAG_ENTRY ("zabha",   x_riscv_za_subext, MASK_ZABHA),
   RISCV_EXT_FLAG_ENTRY ("zacas",   x_riscv_za_subext, MASK_ZACAS),
+  RISCV_EXT_FLAG_ENTRY ("zama16b", x_riscv_za_subext, MASK_ZAMA16B),
 
   RISCV_EXT_FLAG_ENTRY ("zba", x_riscv_zb_subext, MASK_ZBA),
   RISCV_EXT_FLAG_ENTRY ("zbb", x_riscv_zb_subext, MASK_ZBB),
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 9e471be4055c..80593ee139c1 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -274,6 +274,8 @@ Mask(ZA64RS)  Var(riscv_za_subext)
 
 Mask(ZA128RS) Var(riscv_za_subext)
 
+Mask(ZAMA16B) Var(riscv_za_subext)
+
 TargetVariable
 int riscv_zb_subext
 
diff --git a/gcc/testsuite/gcc.target/riscv/arch-48.c 
b/gcc/testsuite/gcc.target/riscv/arch-48.c
new file mode 100644
index ..58a558ec1924
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-48.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zama16b -mabi=lp64" } */
+int foo()
+{
+}


[gcc r16-459] Canonicalize vec_merge in simplify_ternary_operation

2025-05-07 Thread Pengxuan Zheng via Gcc-cvs
https://gcc.gnu.org/g:9b13bea07706a7cae0185f8a860d67209308c050

commit r16-459-g9b13bea07706a7cae0185f8a860d67209308c050
Author: Pengxuan Zheng 
Date:   Thu Feb 6 16:16:32 2025 -0800

Canonicalize vec_merge in simplify_ternary_operation

Similar to the canonicalization done in combine, we canonicalize vec_merge 
with
swap_communattive_operands_p in simplify_ternary_operation too.

gcc/ChangeLog:

* config/aarch64/aarch64-protos.h (aarch64_exact_log2_inverse): New.
* config/aarch64/aarch64-simd.md (aarch64_simd_vec_set_zero):
Update pattern accordingly.
* config/aarch64/aarch64.cc (aarch64_exact_log2_inverse): New.
* simplify-rtx.cc (simplify_context::simplify_ternary_operation):
Canonicalize vec_merge.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64-protos.h |  1 +
 gcc/config/aarch64/aarch64-simd.md  | 10 ++
 gcc/config/aarch64/aarch64.cc   | 10 ++
 gcc/simplify-rtx.cc |  7 +++
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index c83c35c6d71e..c935e7bcf33d 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1055,6 +1055,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
  rtx *, rtx *, rtx *);
 void aarch64_expand_subvti (rtx, rtx, rtx,
rtx, rtx, rtx, rtx, bool);
+int aarch64_exact_log2_inverse (unsigned int, rtx);
 
 
 /* Initialize builtins for SIMD intrinsics.  */
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index e2afe87e5130..1099e742cbf7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1193,12 +1193,14 @@
 (define_insn "aarch64_simd_vec_set_zero"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
-   (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
-   (match_operand:VALL_F16 3 "register_operand" "0")
+   (match_operand:VALL_F16 1 "register_operand" "0")
+   (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
(match_operand:SI 2 "immediate_operand" "i")))]
-  "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
+  "TARGET_SIMD && aarch64_exact_log2_inverse (, operands[2]) >= 0"
   {
-int elt = ENDIAN_LANE_N (, exact_log2 (INTVAL (operands[2])));
+int elt = ENDIAN_LANE_N (,
+aarch64_exact_log2_inverse (,
+operands[2]));
 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 return "ins\\t%0.[%p2], zr";
   }
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2dc5f4c4b59d..9e3f2885bccb 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23914,6 +23914,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned 
int num_operands,
   return true;
 }
 
+/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest
+   NELTS bits, if OP is a power of 2.  Otherwise, returns -1.  */
+
+int
+aarch64_exact_log2_inverse (unsigned int nelts, rtx op)
+{
+  return exact_log2 ((~INTVAL (op))
+& ((HOST_WIDE_INT_1U << nelts) - 1));
+}
+
 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
HIGH (exclusive).  */
 void
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 7bcbe11370fa..b34fd2f4b9ea 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7387,6 +7387,13 @@ simplify_context::simplify_ternary_operation (rtx_code 
code, machine_mode mode,
  return gen_rtx_CONST_VECTOR (mode, v);
}
 
+ if (swap_commutative_operands_p (op0, op1)
+ /* Two operands have same precedence, then first bit of mask
+select first operand.  */
+ || (!swap_commutative_operands_p (op1, op0) && !(sel & 1)))
+   return simplify_gen_ternary (code, mode, mode, op1, op0,
+GEN_INT (~sel & mask));
+
  /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n)
 if no element from a appears in the result.  */
  if (GET_CODE (op0) == VEC_MERGE)


[gcc r16-437] libstdc++: Remove use of undefined GLIBCXX_LANG_{PUSH, POP} [PR120147]

2025-05-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:83ef989ee189902ca1d434feb0f3dd50519e92cb

commit r16-437-g83ef989ee189902ca1d434feb0f3dd50519e92cb
Author: Jonathan Wakely 
Date:   Wed May 7 10:44:49 2025 +0100

libstdc++: Remove use of undefined GLIBCXX_LANG_{PUSH,POP} [PR120147]

Commit r16-427-g86627faec10da5 was using the new GLIBCXX_LANG_PUSH and
GLIBCXX_LANG_POP macros from a change that I haven't pushed yet,
resulting in changes to CXXFLAGS not being restored after the
GLIBCXX_ENABLE_BACKTRACE checks.

libstdc++-v3/ChangeLog:

PR libstdc++/120147
* acinclude.m4 (GLIBCXX_ENABLE_BACKTRACE): Restore use of
AC_LANG_CPLUSPLUS.
* configure: Regenerate.

Diff:
---
 libstdc++-v3/acinclude.m4 |  6 --
 libstdc++-v3/configure| 20 
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 0fc74d00a98f..204bed5b27be 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -5290,7 +5290,8 @@ AC_DEFUN([GLIBCXX_ENABLE_BACKTRACE], [
 
   BACKTRACE_CPPFLAGS="-D_GNU_SOURCE"
 
-  GLIBCXX_LANG_PUSH
+  AC_LANG_CPLUSPLUS
+  old_CXXFLAGS="$CXXFLAGS"
 
   # libbacktrace's own configure.ac only tests atomics for int,
   # but the code actually uses atomics for size_t and pointers as well.
@@ -5356,7 +5357,8 @@ EOF
 rm -f conftest*
   fi
 
-  GLIBCXX_LANG_POP
+  CXXFLAGS="$old_CXXFLAGS"
+  AC_LANG_RESTORE
 
   if test "$glibcxx_cv_libbacktrace_atomics" = yes; then
 BACKTRACE_CPPFLAGS="$BACKTRACE_CPPFLAGS -DHAVE_ATOMIC_FUNCTIONS=1"
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 3fd03b8a95d6..0529ff5708f1 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -53537,7 +53537,13 @@ fi
 
   BACKTRACE_CPPFLAGS="-D_GNU_SOURCE"
 
-  GLIBCXX_LANG_PUSH
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS 
conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+  old_CXXFLAGS="$CXXFLAGS"
 
   # libbacktrace's own configure.ac only tests atomics for int,
   # but the code actually uses atomics for size_t and pointers as well.
@@ -53578,7 +53584,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
+if ac_fn_cxx_try_link "$LINENO"; then :
   glibcxx_cv_libbacktrace_atomics=yes
 else
   glibcxx_cv_libbacktrace_atomics=no
@@ -53595,7 +53601,7 @@ $as_echo "$glibcxx_cv_libbacktrace_atomics" >&6; }
 CXXFLAGS='-O0 -S'
 
 cat > conftest.$ac_ext << EOF
-#line 53598 "configure"
+#line 53604 "configure"
 #include 
 int main()
 {
@@ -53633,7 +53639,13 @@ $as_echo "$glibcxx_cv_libbacktrace_atomics" >&6; }
 rm -f conftest*
   fi
 
-  GLIBCXX_LANG_POP
+  CXXFLAGS="$old_CXXFLAGS"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
 
   if test "$glibcxx_cv_libbacktrace_atomics" = yes; then
 BACKTRACE_CPPFLAGS="$BACKTRACE_CPPFLAGS -DHAVE_ATOMIC_FUNCTIONS=1"


[gcc r14-11747] vect: Use original LHS type for gather pattern [PR118950].

2025-05-07 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:bed0053a28e238cb9c079a3ec89341541390f933

commit r14-11747-gbed0053a28e238cb9c079a3ec89341541390f933
Author: Robin Dapp 
Date:   Fri Feb 21 07:19:40 2025 +0100

vect: Use original LHS type for gather pattern [PR118950].

In PR118950 we do not zero masked elements in a gather load.
While recognizing a gather/scatter pattern we do not use the original
type of the LHS.  This matters because the type can differ with bool
patterns (e.g. _Bool vs unsigned char) and we don't notice the need
for zeroing out the padding bytes.

This patch just uses the original LHS's type.

PR middle-end/118950

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Use
original LHS's type.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118950.c: New test.

(cherry picked from commit f3d4208e798afafcba5246334004e9646e390681)

Diff:
---
 .../gcc.target/riscv/rvv/autovec/pr118950.c| 29 ++
 gcc/tree-vect-patterns.cc  |  3 ++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c
new file mode 100644
index ..604d4264eac1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+unsigned char a;
+long long r;
+_Bool h = 1;
+short j[23];
+_Bool k[3][23];
+
+void b(_Bool h, short j[], _Bool k[][23]) {
+  for (int m = 0; m < 23; m += 3)
+for (short n = 0; n < 22; n += 4)
+  a = ({
+unsigned char o = a;
+unsigned char p = j[n] ? h : k[m][n];
+o > p ? o : p;
+  });
+}
+
+int main() {
+  for (int m = 0; m < 23; ++m)
+j[m] = 10;
+  b(h, j, k);
+  r = a;
+  if (r != 1)
+__builtin_abort ();
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 87c2acff386d..ed6cac77d2a1 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6264,7 +6264,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
   else
pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
   offset, scale, zero);
-  tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
+  tree lhs = gimple_get_lhs (stmt_info->stmt);
+  tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   gimple_call_set_lhs (pattern_stmt, load_lhs);
 }
   else


[gcc r16-434] Fortran: Source allocation of pure module function rejected [PR119948]

2025-05-07 Thread Paul Thomas via Gcc-cvs
https://gcc.gnu.org/g:625b805544101ae90fbe789a5eeba44cd14e89fb

commit r16-434-g625b805544101ae90fbe789a5eeba44cd14e89fb
Author: Paul Thomas 
Date:   Wed May 7 08:52:52 2025 +0100

Fortran: Source allocation of pure module function rejected [PR119948]

2025-05-07  Paul Thomas  
and Steven G. Kargl  

gcc/fortran
PR fortran/119948
* primary.cc (match_variable): Module procedures with sym the
same as result can be treated as variables, although marked
external.

gcc/testsuite/
PR fortran/119948
* gfortran.dg/pr119948.f90: Update to incorporate failing test,
where module procedure is the result. Test submodule cases.

Diff:
---
 gcc/fortran/primary.cc |  2 +-
 gcc/testsuite/gfortran.dg/pr119948.f90 | 51 --
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
index 72ecc7ccf934..ec4e13548c4c 100644
--- a/gcc/fortran/primary.cc
+++ b/gcc/fortran/primary.cc
@@ -4396,7 +4396,7 @@ match_variable (gfc_expr **result, int equiv_flag, int 
host_flag)
 case FL_PROCEDURE:
   /* Check for a nonrecursive function result variable.  */
   if (sym->attr.function
- && !sym->attr.external
+ && (!sym->attr.external || sym->abr_modproc_decl)
  && sym->result == sym
  && (gfc_is_function_return_value (sym, gfc_current_ns)
  || (sym->attr.entry
diff --git a/gcc/testsuite/gfortran.dg/pr119948.f90 
b/gcc/testsuite/gfortran.dg/pr119948.f90
index 9ecb08095613..2e36fae5a9de 100644
--- a/gcc/testsuite/gfortran.dg/pr119948.f90
+++ b/gcc/testsuite/gfortran.dg/pr119948.f90
@@ -1,7 +1,8 @@
-! { dg-do compile }
+! { dg-do run }
 !
-! Test the fix for PR119948, which used to fail as indicated below with,
-! "Error: Bad allocate-object at (1) for a PURE procedure"
+! Test the fix for PR119948, which used to fail as indicated below with:
+! (1) "Error: Bad allocate-object at (1) for a PURE procedure"
+! (2) "Error: ‘construct_test2 at (1) is not a variable"
 !
 ! Contributed by Damian Rouson  
 !
@@ -18,33 +19,65 @@ module test_m
   type(test_t) :: test
   type(test_t), intent(in) :: arg
 end function
-pure module function construct_test_sub(arg) result(test)
+
+pure module function construct_test2(arg)
+  implicit none
+  type(test_t) construct_test2
+  type(test_t), intent(in) :: arg
+end function
+
+pure module function construct_test_3(arg) result(test)
   implicit none
   type(test_t) :: test
   type(test_t), intent(in) :: arg
 end function
+
+pure module function construct_test_4(arg)
+  implicit none
+  type(test_t) :: construct_test_4
+  type(test_t), intent(in) :: arg
+end function
   end interface
 
 contains
   module procedure construct_test
-allocate(test%i, source = arg%i) ! Used to fail here
+allocate(test%i, source = arg%i) ! Fail #1
+  end procedure
+
+  module procedure construct_test2
+allocate(construct_test2%i, source = arg%i)! Fail #2
   end procedure
 end module
 
 submodule (test_m)test_s
 contains
-  module procedure construct_test_sub
+  module procedure construct_test_3
 allocate(test%i, source = arg%i) ! This was OK.
   end procedure
+
+  module procedure construct_test_4
+allocate(construct_test_4%i, source = arg%i) ! This was OK.
+  end procedure
 end submodule
 
   use test_m
   type(test_t) :: res, dummy
-  dummy%i = 42
+!
+  dummy%i = int (rand () * 1e6)
   res = construct_test (dummy)
   if (res%i /= dummy%i) stop 1
-  dummy%i = -42
-  res = construct_test_sub (dummy)
+!
+  dummy%i = int (rand () * 1e6)
+  res = construct_test2 (dummy)
   if (res%i /= dummy%i) stop 2
+!
+  dummy%i = int (rand () * 1e6)
+  res = construct_test_3 (dummy)
+  if (res%i /= dummy%i) stop 3
+
+  dummy%i = int (rand () * 1e6)
+  res = construct_test_4 (dummy)
+  if (res%i /= dummy%i) stop 4
+
   deallocate (res%i, dummy%i)
 end


[gcc r16-435] Fix name mismatch for fortran.

2025-05-07 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:ae987baeb5fb70877fd007db80b77e374f676c76

commit r16-435-gae987baeb5fb70877fd007db80b77e374f676c76
Author: hongtao.liu 
Date:   Tue Jan 14 01:13:22 2025 +0100

Fix name mismatch for fortran.

Function name in afdo_string_table is step3d_t_tile.
but DECL_ASSEMBLER_NAME (edge->callee->decl))) gets
__step3d_t_mod_MOD_step3d_t_tile, Looks like the prefix is not in the
debug string table.
The patch uses
afdo_string_table->get_index_by_decl (edge->callee->decl) instead.

gcc/ChangeLog:

PR gcov-profile/118508
* auto-profile.cc
(autofdo_source_profile::get_callsite_total_count): Fix name
mismatch for fortran.

Diff:
---
 gcc/auto-profile.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 7e0e8c66124e..9966d9312e37 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -848,8 +848,8 @@ autofdo_source_profile::get_callsite_total_count (
 
   function_instance *s = get_function_instance_by_inline_stack (stack);
   if (s == NULL
-  || afdo_string_table->get_index (IDENTIFIER_POINTER (
- DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
+  ||(afdo_string_table->get_index_by_decl (edge->callee->decl)
+!= s->name()))
 return 0;
 
   return s->total_count ();


[gcc r16-436] x86: Insert extra move for mode size smaller than natural size

2025-05-07 Thread H.J. Lu via Gcc-cvs
https://gcc.gnu.org/g:b8c4b6aa8e0521770c6f9fd48dd13dd85e3a2fc9

commit r16-436-gb8c4b6aa8e0521770c6f9fd48dd13dd85e3a2fc9
Author: H.J. Lu 
Date:   Thu May 1 06:30:41 2025 +0800

x86: Insert extra move for mode size smaller than natural size

When generating a SUBREG from V16QI to V2HF, validate_subreg fails since
V2HF is a floating point vector and its size (4 bytes) is smaller than its
natural size (word size).  Insert an extra move with a QI vector SUBREG of
the same size to avoid validate_subreg failure.

gcc/

PR target/120036
* config/i386/i386-features.cc (ix86_get_vector_load_mode):
Handle 8/4/2 bytes.
(remove_redundant_vector_load): If the mode size is smaller than
its natural size, first insert an extra move with a QI vector
SUBREG of the same size to avoid validate_subreg failure.

gcc/testsuite/

PR target/120036
* g++.target/i386/pr120036.C: New test.
* gcc.target/i386/pr117839-3a.c: Likewise.
* gcc.target/i386/pr117839-3b.c: Likewise.

Signed-off-by: H.J. Lu 

Diff:
---
 gcc/config/i386/i386-features.cc|  39 +-
 gcc/testsuite/g++.target/i386/pr120036.C| 113 
 gcc/testsuite/gcc.target/i386/pr117839-3a.c |  22 ++
 gcc/testsuite/gcc.target/i386/pr117839-3b.c |   5 ++
 4 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 31f3ee2ef171..1ba5ac4faa4c 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3309,8 +3309,16 @@ ix86_get_vector_load_mode (unsigned int size)
 mode = V64QImode;
   else if (size == 32)
 mode = V32QImode;
-  else
+  else if (size == 16)
 mode = V16QImode;
+  else if (size == 8)
+mode = V8QImode;
+  else if (size == 4)
+mode = V4QImode;
+  else if (size == 2)
+mode = V2QImode;
+  else
+gcc_unreachable ();
   return mode;
 }
 
@@ -3338,13 +3346,36 @@ replace_vector_const (machine_mode vector_mode, rtx 
vector_const,
   if (SUBREG_P (dest) || mode == vector_mode)
replace = vector_const;
   else
-   replace = gen_rtx_SUBREG (mode, vector_const, 0);
+   {
+ unsigned int size = GET_MODE_SIZE (mode);
+ if (size < ix86_regmode_natural_size (mode))
+   {
+ /* If the mode size is smaller than its natural size,
+first insert an extra move with a QI vector SUBREG
+of the same size to avoid validate_subreg failure.  */
+ machine_mode vmode = ix86_get_vector_load_mode (size);
+ rtx vreg;
+ if (mode == vmode)
+   vreg = vector_const;
+ else
+   {
+ vreg = gen_reg_rtx (vmode);
+ rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+ rtx pat = gen_rtx_SET (vreg, vsubreg);
+ rtx_insn *vinsn = emit_insn_before (pat, insn);
+ df_insn_rescan (vinsn);
+   }
+ replace = gen_rtx_SUBREG (mode, vreg, 0);
+   }
+ else
+   replace = gen_rtx_SUBREG (mode, vector_const, 0);
+   }
 
-  /* NB: Don't run recog_memoized here since vector SUBREG may not
-be valid.  Let LRA handle vector SUBREG.  */
   SET_SRC (set) = replace;
   /* Drop possible dead definitions.  */
   PATTERN (insn) = set;
+  INSN_CODE (insn) = -1;
+  recog_memoized (insn);
   df_insn_rescan (insn);
 }
 }
diff --git a/gcc/testsuite/g++.target/i386/pr120036.C 
b/gcc/testsuite/g++.target/i386/pr120036.C
new file mode 100644
index ..a2fc24f12863
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr120036.C
@@ -0,0 +1,113 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-O2 -std=c++11 -march=sapphirerapids -fPIC" } */
+
+typedef _Float16 Native;
+struct float16_t
+{
+  Native native;
+  float16_t ();
+  float16_t (Native arg) : native (arg) {}
+  operator Native ();
+  float16_t
+  operator+ (float16_t rhs)
+  {
+return native + rhs.native;
+  }
+  float16_t
+  operator* (float16_t)
+  {
+return native * native;
+  }
+};
+template  struct Simd
+{
+  static constexpr int kPrivateLanes = N;
+};
+template  struct ClampNAndPow2
+{
+  using type = Simd;
+};
+template  struct CappedTagChecker
+{
+  static constexpr int N = sizeof (int) ? kLimit : 0;
+  using type = typename ClampNAndPow2::type;
+};
+template 
+using CappedTag = typename CappedTagChecker::type;
+template 
+int
+Lanes (D)
+{
+  return D::kPrivateLanes;
+}
+template  int Zero (D);
+template  using VFromD = decltype (Zero (D ()));
+struct Vec512
+{
+  __attribute__ ((__vector_size__ (16))) _Float16 raw;
+};
+Vec512 Zero (Simd<2>);
+template  void ReduceSum (D, VFromD);
+struct Dot
+{
+  template 
+  static T
+  Compute (D d, T *pa, int num_elements)
+  {

[gcc r16-446] AArch64: Fold SVE load/store with certain ptrue patterns to LDR/STR.

2025-05-07 Thread Jennifer Schmitz via Gcc-cvs
https://gcc.gnu.org/g:210d06502f22964c7214586c54f8eb54a6965bfd

commit r16-446-g210d06502f22964c7214586c54f8eb54a6965bfd
Author: Jennifer Schmitz 
Date:   Fri Feb 14 00:46:13 2025 -0800

AArch64: Fold SVE load/store with certain ptrue patterns to LDR/STR.

SVE loads/stores using predicates that select the bottom 8, 16, 32, 64,
or 128 bits of a register can be folded to ASIMD LDR/STR, thus avoiding the
predicate.
For example,
svuint8_t foo (uint8_t *x) {
  return svld1 (svwhilelt_b8 (0, 16), x);
}
was previously compiled to:
foo:
ptrue   p3.b, vl16
ld1bz0.b, p3/z, [x0]
ret

and is now compiled to:
foo:
ldr q0, [x0]
ret

The optimization is applied during the expand pass and was implemented
by making the following changes to maskload and
maskstore:
- the existing define_insns were renamed and new define_expands for 
maskloads
  and maskstores were added with nonmemory_operand as predicate such that 
the
  SVE predicate matches both register operands and constant-vector operands.
- if the SVE predicate is a constant vector and contains a pattern as
  described above, an ASIMD load/store is emitted instead of the SVE 
load/store.

The patch implements the optimization for LD1 and ST1, for 8-bit, 16-bit,
32-bit, 64-bit, and 128-bit moves, for all full SVE data vector modes.

Follow-up patches for LD2/3/4 and ST2/3/4 and potentially partial SVE vector
modes are planned.

The patch was bootstrapped and tested on aarch64-linux-gnu, no regression.

Signed-off-by: Jennifer Schmitz 

gcc/
PR target/117978
* config/aarch64/aarch64-protos.h: Declare
aarch64_emit_load_store_through_mode and aarch64_sve_maskloadstore.
* config/aarch64/aarch64-sve.md
(maskload): New define_expand folding maskloads with
certain predicate patterns to ASIMD loads.
(*aarch64_maskload): Renamed from 
maskload.
(maskstore): New define_expand folding maskstores with
certain predicate patterns to ASIMD stores.
(*aarch64_maskstore): Renamed from 
maskstore.
* config/aarch64/aarch64.cc
(aarch64_emit_load_store_through_mode): New function emitting a
load/store through subregs of a given mode.
(aarch64_emit_sve_pred_move): Refactor to use
aarch64_emit_load_store_through_mode.
(aarch64_expand_maskloadstore): New function to emit ASIMD 
loads/stores
for maskloads/stores with SVE predicates with VL1, VL2, VL4, VL8, or
VL16 patterns.
(aarch64_partial_ptrue_length): New function returning number of 
leading
set bits in a predicate.

gcc/testsuite/
PR target/117978
* gcc.target/aarch64/sve/acle/general/whilelt_5.c: Adjust expected
outcome.
* gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c: New test.
* gcc.target/aarch64/sve/while_7.c: Adjust expected outcome.
* gcc.target/aarch64/sve/while_9.c: Adjust expected outcome.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h|  2 +
 gcc/config/aarch64/aarch64-sve.md  | 38 -
 gcc/config/aarch64/aarch64.cc  | 98 +++---
 .../aarch64/sve/acle/general/whilelt_5.c   | 24 --
 .../aarch64/sve/ldst_ptrue_pat_128_to_neon.c   | 81 ++
 gcc/testsuite/gcc.target/aarch64/sve/while_7.c |  4 +-
 gcc/testsuite/gcc.target/aarch64/sve/while_9.c |  2 +-
 7 files changed, 227 insertions(+), 22 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 1ca86c9d175d..c83c35c6d71e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1026,6 +1026,8 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int);
 rtx aarch64_ptrue_reg (machine_mode, machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
+void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
+bool aarch64_expand_maskloadstore (rtx *, machine_mode);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 7bf12ff25ccd..f39af6e24d51 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1286,7 +1286,24 @@
 ;; -
 
 ;; Predicated LD1 (single).
-(define_insn "maskload"
+(define_expand "maskload"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+   (unspec:SVE_ALL
+ [(match_operand

[gcc r16-447] i386: implement costs for float<->int conversions in ix86_vector_costs::add_stmt_cost

2025-05-07 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:2c8d632d9ed4e3aeee2156ba17fe631ecbc90dbf

commit r16-447-g2c8d632d9ed4e3aeee2156ba17fe631ecbc90dbf
Author: Jan Hubicka 
Date:   Wed May 7 15:33:44 2025 +0200

i386: implement costs for float<->int conversions in 
ix86_vector_costs::add_stmt_cost

This patch adds pattern matching for float<->int conversions both as normal
statements and promote_demote.  While updating promote_demote I noticed that
in cleanups I turned "stmt_cost =" into "int stmt_cost = " which turned
the existing FP costing to NOOP. I also added comment on how demotes are 
done
when turning i.e. 32bit into 8bit value (which is the case of pr19919.c).

The patch disables vectorization in pr119919.c on generic tuning, but keeps
it at both zen and skylake+. The underlying problem is bad cost of 
open-coded
scatter which is tracked by 119902 so I simply added -mtune=znver1 so the 
testcase
keeps testing vectorization.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Add 
FLOAT_EXPR;
FIX_TRUNC_EXPR and vec_promote_demote costs.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr119919.c: Add -mtune=znver1

Diff:
---
 gcc/config/i386/i386.cc  | 50 +---
 gcc/testsuite/gcc.target/i386/pr119919.c |  2 +-
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index bef95ea18c87..fd36ea802c00 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25767,6 +25767,26 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
  (ix86_tune_cost, GET_MODE_BITSIZE (mode));
  break;
 
+   case FLOAT_EXPR:
+   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtsi2ss;
+   else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87.  */
+ stmt_cost = ix86_cost->fadd;
+   else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+   break;
+
+   case FIX_TRUNC_EXPR:
+   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtss2si;
+   else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87.  */
+ stmt_cost = ix86_cost->fadd;
+   else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+   break;
+
case COND_EXPR:
  {
/* SSE2 conditinal move sequence is:
@@ -25930,8 +25950,7 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
break;
   }
 
-  if (kind == vec_promote_demote
-  && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt
+  if (kind == vec_promote_demote)
 {
   int outer_size
= tree_to_uhwi
@@ -25941,16 +25960,25 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
= tree_to_uhwi
(TYPE_SIZE
(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt;
-  int stmt_cost = vec_fp_conversion_cost
-   (ix86_tune_cost, GET_MODE_BITSIZE (mode));
-  /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will 
end
-up doing two conversions and packing them.  */
+  bool inner_fp = FLOAT_TYPE_P
+   (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+  if (fp && inner_fp)
+   stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+  else if (fp && !inner_fp)
+   stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+  else if (!fp && inner_fp)
+   stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+  else
+   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+  /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+greater than inner size we will end up doing two conversions and
+packing them.  We always pack pairs; if the size difference is greater
+it is split into multiple demote operations.  */
   if (inner_size > outer_size)
-   {
- int n = inner_size / outer_size;
- stmt_cost = stmt_cost * n
- + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
-   }
+   stmt_cost = stmt_cost * 2
+   + ix86_vec_cost (mode, ix86_cost->sse_op);
 }
 
   /* If we do elementwise loads into a vector then we are bound by
diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c 
b/gcc/testsuite/gcc.target/i386/pr119919.c
index ed646561bd1f..e39819f682db 100644
--- a/gcc/testsuite/gcc.target/i386/pr119919.c
+++ b/gcc/testsuite/gcc.target/i386/pr119919.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -msse2 -fdump-tree-ve

[gcc r16-450] libcpp: Further fixes for incorrect line numbers in large files [PR120061]

2025-05-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:edf745dc519ddbfef127e2789bf11bfbacd300b7

commit r16-450-gedf745dc519ddbfef127e2789bf11bfbacd300b7
Author: Jakub Jelinek 
Date:   Wed May 7 17:25:42 2025 +0200

libcpp: Further fixes for incorrect line numbers in large files [PR120061]

The backport of the PR108900 fix to 14 branch broke building chromium
because static_assert (__LINE__ == expected_line_number, ""); now triggers
as the __LINE__ values are off by one.
This isn't the case on the trunk and 15 branch because we've switched
to 64-bit location_t and so one actually needs far longer header files
to trigger it.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120061#c11
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120061#c12
contain (large) testcases in patch form which show on the 14 branch
that the first one used to fail before the PR108900 backport and now
works correctly, while the second one attempts to match the chromium
behavior and it used to pass before the PR108900 backport and now it
FAILs.
The two testcases show rare problematic cases, because
do_include_common -> parse_include -> check_eol -> check_eol_1 ->
cpp_get_token_1 -> _cpp_lex_token -> _cpp_lex_direct -> linemap_line_start
triggers there
  /* Allocate the new line_map.  However, if the current map only has a
 single line we can sometimes just increase its column_bits 
instead. */
  if (line_delta < 0
  || last_line != ORDINARY_MAP_STARTING_LINE_NUMBER (map)
  || SOURCE_COLUMN (map, highest) >= (1U << (column_bits - 
range_bits))
  || ( /* We can't reuse the map if the line offset is sufficiently
  large to cause overflow when computing location_t values. 
 */
  (to_line - ORDINARY_MAP_STARTING_LINE_NUMBER (map))
  >= (((uint64_t) 1)
  << (CHAR_BIT * sizeof (linenum_type) - column_bits)))
  || range_bits < map->m_range_bits)
map = linemap_check_ordinary
(const_cast 
  (linemap_add (set, LC_RENAME,
ORDINARY_MAP_IN_SYSTEM_HEADER_P (map),
ORDINARY_MAP_FILE_NAME (map),
to_line)));
and so creates a new ordinary map on the line right after the
(problematic) #include line.
Now, in the spot that r14-11679-g8a884140c2bcb7 patched,
pfile->line_table->highest_location in all 3 tests (also
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120061#c13
) is before the decrement the start of the line after the #include line and 
so
the decrement is really desirable in that case to put highest_location
somewhere on the line where the #include actually is.
But at the same time it is also undesirable, because if we do decrement it,
then linemap_add LC_ENTER called from _cpp_do_file_change will then
  /* Generate a start_location above the current highest_location.
 If possible, make the low range bits be zero.  */
  location_t start_location = set->highest_location + 1;
  unsigned range_bits = 0;
  if (start_location < LINE_MAP_MAX_LOCATION_WITH_COLS)
range_bits = set->default_range_bits;
  start_location += (1 << range_bits) - 1;
  start_location &=  ~((1 << range_bits) - 1);

  linemap_assert (!LINEMAPS_ORDINARY_USED (set)
  || (start_location
  >= MAP_START_LOCATION (LINEMAPS_LAST_ORDINARY_MAP 
(set;
and we can end up with the new LC_ENTER ordinary map having the same
start_location as the preceding LC_RENAME one.
Next thing that happens is computation of included_from:
  if (reason == LC_ENTER)
{
  if (set->depth == 0)
map->included_from = 0;
  else
/* The location of the end of the just-closed map.  */
map->included_from
  = (((map[0].start_location - 1 - map[-1].start_location)
  & ~((1 << map[-1].m_column_and_range_bits) - 1))
 + map[-1].start_location);
The normal case (e.g. with the testcase included at the start of this 
comment) is
that map[-1] starts somewhere earlier and so map->included_from computation 
above
nicely computes location_t which expands to the start of the #include line.
With r14-11679 reverted, for #c11 as well as #c12
map[0].start_location == map[-1].start_location above, and so it is
((location_t) -1 & ~((1 << map[-1].m_column_and_range_bits) - 1)))
+ map[-1].start_location,
which happens to be start of the #include line.
For #c11 map[0].start_location is 0x53a0 and map[-1] has
m_column_and_range_bits 7 and map[-2] has m_column_and_range_bits 12 and
map[0].included_from is set to 0x5320.
For #c12 map[0].start_location is 0x606c0402 and map[-2].start_location is
0x606

[gcc r16-448] Loop-IM: Hoist (non-expensive) stmts to executed all loop when running before PRE

2025-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8335fd561fa823d32556512c09dfce44463e8eaa

commit r16-448-g8335fd561fa823d32556512c09dfce44463e8eaa
Author: Andrew Pinski 
Date:   Sun May 4 19:24:09 2025 +

Loop-IM: Hoist (non-expensive) stmts to executed all loop when running 
before PRE

While fixing up how rewrite_to_defined_overflow works, 
gcc.dg/Wrestrict-22.c started
to fail. This is because `d p+ 2` would moved by LIM and then be rewritten 
not using
pointer plus. The rewriting part is correct behavior. It only recently 
started to be
moved out; due to r16-190-g6901d56fea2132.
Which has the following comment:
```
When we run before PRE and PRE is active hoist all expressions
since PRE would do so anyway and we can preserve range info
but PRE cannot.
```
This is not true if hoisting past the always executed point; so, instead of 
hoisting
these statements all the way out of the max loops, take into account the 
always executed
loop too.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-ssa-loop-im.cc (compute_invariantness): Hoist to the always 
executed point
if ignorning the cost.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-loop-im.cc | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
index a3ca5af3e3e4..b7f9f9befa54 100644
--- a/gcc/tree-ssa-loop-im.cc
+++ b/gcc/tree-ssa-loop-im.cc
@@ -1241,12 +1241,24 @@ compute_invariantness (basic_block bb)
   lim_data->cost);
}
 
-  if (lim_data->cost >= LIM_EXPENSIVE
- /* When we run before PRE and PRE is active hoist all expressions
-since PRE would do so anyway and we can preserve range info
-but PRE cannot.  */
- || (flag_tree_pre && !in_loop_pipeline))
+  if (lim_data->cost >= LIM_EXPENSIVE)
set_profitable_level (stmt);
+  /* When we run before PRE and PRE is active hoist all expressions
+to the always executed loop since PRE would do so anyway
+and we can preserve range info while PRE cannot.  */
+  else if (flag_tree_pre && !in_loop_pipeline
+  && outermost)
+   {
+ class loop *mloop = lim_data->max_loop;
+ if (loop_depth (outermost) > loop_depth (mloop))
+   {
+ mloop = outermost;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "  constraining to loop depth %d\n\n\n",
+loop_depth (mloop));
+   }
+ set_level (stmt, bb->loop_father, mloop);
+   }
 }
 }


[gcc r13-9641] debug/101533 - ICE with variant typedef DIE generation

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:e6652764a4558fe9487f17e6c7b18fbc577e8df7

commit r13-9641-ge6652764a4558fe9487f17e6c7b18fbc577e8df7
Author: Richard Biener 
Date:   Wed Mar 5 14:24:50 2025 +0100

debug/101533 - ICE with variant typedef DIE generation

There's a sanity check in gen_type_die_with_usage that trips
unnecessarily for a case where the relevant DIE has already been
generated successfully in other ways.  The following keys the
existing TREE_ASM_WRITTEN check on the correct object, honoring
this and does nothing instead of ICEing for the testcase at hand.

PR debug/101533
* dwarf2out.cc (gen_type_die_with_usage): When we have
output the typedef already do nothing for a typedef variant.
Do not set TREE_ASM_WRITTEN on the type.

* g++.dg/debug/pr101533.C: New testcase.

(cherry picked from commit 99a3f013c3bb8bc022ca488b40aa18fd97b5224d)

Diff:
---
 gcc/dwarf2out.cc  |  6 ++
 gcc/testsuite/g++.dg/debug/pr101533.C | 11 +++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index bd82b86a829c..b1a846c55a3e 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -26187,10 +26187,10 @@ gen_type_die_with_usage (tree type, dw_die_ref 
context_die,
  for the parent typedef which TYPE is a type of.  */
   if (typedef_variant_p (type))
 {
-  if (TREE_ASM_WRITTEN (type))
+  tree name = TYPE_NAME (type);
+  if (TREE_ASM_WRITTEN (name))
return;
 
-  tree name = TYPE_NAME (type);
   tree origin = decl_ultimate_origin (name);
   if (origin != NULL && origin != name)
{
@@ -26204,8 +26204,6 @@ gen_type_die_with_usage (tree type, dw_die_ref 
context_die,
   /* Give typedefs the right scope.  */
   context_die = scope_die_for (type, context_die);
 
-  TREE_ASM_WRITTEN (type) = 1;
-
   gen_decl_die (name, NULL, NULL, context_die);
   return;
 }
diff --git a/gcc/testsuite/g++.dg/debug/pr101533.C 
b/gcc/testsuite/g++.dg/debug/pr101533.C
new file mode 100644
index ..fc1e2e742a1d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/debug/pr101533.C
@@ -0,0 +1,11 @@
+// { dg-do compile }
+// { dg-options "-g" }
+
+template  class T
+{
+  typedef struct {} a __attribute__((aligned));
+};
+void f ()
+{
+  T();
+}


[gcc r13-9640] middle-end/101478 - ICE with degenerate address during gimplification

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:068aa9a7a9cc163ee5f31bffd746a8cd2ebfef7e

commit r13-9640-g068aa9a7a9cc163ee5f31bffd746a8cd2ebfef7e
Author: Richard Biener 
Date:   Wed Jul 31 10:07:45 2024 +0200

middle-end/101478 - ICE with degenerate address during gimplification

When we gimplify &MEM[0B + 4] we are re-folding the address in case
types are not canonical which ends up with a constant address that
recompute_tree_invariant_for_addr_expr ICEs on.  Properly guard
that call.

PR middle-end/101478
* gimplify.cc (gimplify_addr_expr): Check we still have an
ADDR_EXPR before calling recompute_tree_invariant_for_addr_expr.

* gcc.dg/pr101478.c: New testcase.

(cherry picked from commit 33ead6400ad59d4b38fa0527a9a7b53a28114ab7)

Diff:
---
 gcc/gimplify.cc |  3 ++-
 gcc/testsuite/gcc.dg/pr101478.c | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 62dad63fdadd..9c22139bd990 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -6586,7 +6586,8 @@ gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p)
*expr_p = build_fold_addr_expr (op0);
 
   /* Make sure TREE_CONSTANT and TREE_SIDE_EFFECTS are set properly.  */
-  recompute_tree_invariant_for_addr_expr (*expr_p);
+  if (TREE_CODE (*expr_p) == ADDR_EXPR)
+   recompute_tree_invariant_for_addr_expr (*expr_p);
 
   /* If we re-built the ADDR_EXPR add a conversion to the original type
  if required.  */
diff --git a/gcc/testsuite/gcc.dg/pr101478.c b/gcc/testsuite/gcc.dg/pr101478.c
new file mode 100644
index ..527620ea0f11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101478.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+struct obj {
+  int n;
+  int l;
+};
+int main()
+{
+  (struct obj *)((char *)(__SIZE_TYPE__)({ 0; }) - (char *)&((struct obj 
*)0)->l);
+}


[gcc r16-438] libstdc++: Fix width computation for the chrono formatting [PR120114]

2025-05-07 Thread Tomasz Kaminski via Gcc-cvs
https://gcc.gnu.org/g:52f6ab55051ff43fd1b40ff06d9501043f8ba844

commit r16-438-g52f6ab55051ff43fd1b40ff06d9501043f8ba844
Author: Tomasz Kamiński 
Date:   Mon May 5 16:32:58 2025 +0200

libstdc++: Fix width computation for the chrono formatting [PR120114]

Use `__unicode::_field_width` to compute the field width of the output when 
writting
the formatted output for std::chrono::types. This applies both to 
characters copied
from format string, and one produced by localized formatting.

We also use _Str_sink::view() instead of get(), which avoids copying the 
content of
the buffer to std::string in case of small output.

PR libstdc++/120114

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__formatter_chrono::_M_format): Use 
__field_width.
* testsuite/std/time/format/pr120114.cc: New test.

Reviewed-by: Jonathan Wakely 
Signed-off-by: Tomasz Kamiński 

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h  |   9 +-
 libstdc++-v3/testsuite/std/time/format/pr120114.cc | 125 +
 2 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index b7f6f5f49e5e..620227a9f352 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -705,8 +705,13 @@ namespace __format
if (__write_direct)
  return __out;
 
- auto __str = std::move(__sink).get();
- return __format::__write_padded_as_spec(__str, __str.size(),
+ auto __str = __sink.view();
+ size_t __width;
+ if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
+   __width = __unicode::__field_width(__str);
+ else
+   __width = __str.size();
+ return __format::__write_padded_as_spec(__str, __width,
  __fc, _M_spec);
}
 
diff --git a/libstdc++-v3/testsuite/std/time/format/pr120114.cc 
b/libstdc++-v3/testsuite/std/time/format/pr120114.cc
new file mode 100644
index ..c630bb35a9d0
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/time/format/pr120114.cc
@@ -0,0 +1,125 @@
+// { dg-do run { target c++23 } }
+// { dg-options "-fexec-charset=UTF-8" }
+// { dg-timeout-factor 2 }
+
+#include 
+#include 
+#include 
+
+#define WIDEN_(C, S) ::std::__format::_Widen(S, L##S)
+#define WIDEN(S) WIDEN_(_CharT, S)
+
+template
+void
+test_from_format_string()
+{
+  std::basic_string<_CharT> res;
+  using namespace std::chrono_literals;
+  auto date = 2025y/std::chrono::May/05d;
+
+  res = std::format(WIDEN("{:+<13%F\U0001f921}"), date);
+  VERIFY( res == WIDEN("2025-05-05\U0001f921+") );
+
+  res = std::format(WIDEN("{:->15%F\U0001f921}"), date);
+  VERIFY( res == WIDEN("---2025-05-05\U0001f921") );
+
+  res = std::format(WIDEN("{:=^20%F\U0001f921}"), date);
+  VERIFY( res == WIDEN("2025-05-05\U0001f921") );
+}
+
+template
+void
+test_formatted_value()
+{
+  // Custom time_put facet which returns Ideographic Telegraph Symbol
+  // for given month for Om.
+  struct TimePut : std::time_put<_CharT>
+  {
+using iter_type = std::time_put<_CharT>::iter_type;
+using char_type = std::time_put<_CharT>::char_type;
+
+iter_type
+do_put(iter_type out, std::ios_base& io, char_type fill, const tm* t,
+  char format, char modifier) const override
+{
+  if (format != 'm' && modifier != 'm')
+   return std::time_put<_CharT>::do_put(out, io, fill, t, format, 
modifier);
+  std::basic_string_view<_CharT> str;
+  switch (t->tm_mon)
+   {
+case 0:
+  str = WIDEN("\u32C0");
+  break;
+case 1:
+  str = WIDEN("\u32C1");
+  break;
+case 2:
+  str = WIDEN("\u32C2");
+  break;
+case 3:
+  str = WIDEN("\u32C3");
+  break;
+case 4:
+  str = WIDEN("\u32C4");
+  break;
+case 5:
+  str = WIDEN("\u32C5");
+  break;
+case 6:
+  str = WIDEN("\u32C6");
+  break;
+case 7:
+  str = WIDEN("\u32C7");
+  break;
+case 8:
+  str = WIDEN("\u32C8");
+  break;
+case 9:
+  str = WIDEN("\u32C9");
+  break;
+case 10:
+  str = WIDEN("\u32CA");
+  break;
+case 11:
+  str = WIDEN("\u32CB");
+  break;
+   };
+   return std::copy(str.begin(), str.end(), out);
+}
+  };
+  const std::locale loc(std::locale::classic(), new TimePut);
+
+  std::basic_string<_CharT> res;
+
+  res = std::format(loc, WIDEN("{:<1L%Om}"), std::chrono::January);
+  VERIFY( res == WIDEN("\u32C0") );
+
+  res = std::format(loc, WIDEN("{:>2L%Om}"), std::chrono::February);
+  VERIFY( res == WIDEN("\u32C1") );
+
+  res = std::format(loc, WIDEN("{:<3L%Om}"), std::chrono::March);
+  VERIFY( res == WIDEN("\u32C2

[gcc r15-9635] c++: C++17/20 class layout divergence [PR120012]

2025-05-07 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:809c5d7486f91d59e798e0c70ac887f837039ece

commit r15-9635-g809c5d7486f91d59e798e0c70ac887f837039ece
Author: Jason Merrill 
Date:   Wed Apr 30 10:18:46 2025 -0400

c++: C++17/20 class layout divergence [PR120012]

C++20 made a class with only explicitly defaulted constructors no longer
aggregate, and this wrongly affected whether the class is considered "POD
for layout purposes" under the ABI.

Conveniently, we already have check_non_pod_aggregate to diagnose cases
where this makes a difference, due to PR103681 around a C++14 aggregate
change.

This backport is the same code change as the trunk version, but since
-fabi-version=21 cannot be selected, the fix is not available, only the
warning, so the first testcase is different.

PR c++/120012

gcc/cp/ChangeLog:

* cp-tree.h (struct lang_type): Add non_aggregate_pod.
(CLASSTYPE_NON_AGGREGATE_POD): New.
* class.cc (check_bases_and_members): Set it.
(check_non_pod_aggregate): Diagnose it.

gcc/testsuite/ChangeLog:

* g++.dg/abi/base-defaulted1.C: New test.
* g++.dg/abi/base-defaulted1a.C: New test.

(cherry picked from commit e6e3b0772ed40cc65a544bbe744ece62d8b9713e)

Diff:
---
 gcc/cp/cp-tree.h|  8 -
 gcc/cp/class.cc | 54 +++--
 gcc/testsuite/g++.dg/abi/base-defaulted1.C  | 22 
 gcc/testsuite/g++.dg/abi/base-defaulted1a.C | 23 
 4 files changed, 95 insertions(+), 12 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 7798efba3dba..eb32ec0ce4ed 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -2491,6 +2491,7 @@ struct GTY(()) lang_type {
   unsigned unique_obj_representations_set : 1;
   bool erroneous : 1;
   bool non_pod_aggregate : 1;
+  bool non_aggregate_pod : 1;
 
   /* When adding a flag here, consider whether or not it ought to
  apply to a template instance if it applies to the template.  If
@@ -2499,7 +2500,7 @@ struct GTY(()) lang_type {
   /* There are some bits left to fill out a 32-bit word.  Keep track
  of this by updating the size of this bitfield whenever you add or
  remove a flag.  */
-  unsigned dummy : 3;
+  unsigned dummy : 2;
 
   tree primary_base;
   vec *vcall_indices;
@@ -2826,6 +2827,11 @@ struct GTY(()) lang_type {
with a hash_set only filled in when abi_version_crosses (17).  */
 #define CLASSTYPE_NON_POD_AGGREGATE(NODE) \
   (LANG_TYPE_CLASS_CHECK (NODE)->non_pod_aggregate)
+
+/* True if this class is layout-POD though it's not an aggregate in C++20 and
+   above (c++/120012).  This could also be a hash_set.  */
+#define CLASSTYPE_NON_AGGREGATE_POD(NODE) \
+  (LANG_TYPE_CLASS_CHECK (NODE)->non_aggregate_pod)
 
 /* Additional macros for inheritance information.  */
 
diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index 2b694b98e565..6767ac10358b 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -6413,9 +6413,7 @@ check_bases_and_members (tree t)
  Again, other conditions for being an aggregate are checked
  elsewhere.  */
   CLASSTYPE_NON_AGGREGATE (t)
-|= ((cxx_dialect < cxx20
-? type_has_user_provided_or_explicit_constructor (t)
-: TYPE_HAS_USER_CONSTRUCTOR (t))
+|= (type_has_user_provided_or_explicit_constructor (t)
|| TYPE_POLYMORPHIC_P (t));
   /* This is the C++98/03 definition of POD; it changed in C++0x, but we
  retain the old definition internally for ABI reasons.  */
@@ -6437,6 +6435,20 @@ check_bases_and_members (tree t)
CLASSTYPE_NON_LAYOUT_POD_P (t) = true;
 }
 
+  /* P1008: Prohibit aggregates with user-declared constructors.  */
+  if (cxx_dialect >= cxx20 && TYPE_HAS_USER_CONSTRUCTOR (t))
+{
+  CLASSTYPE_NON_AGGREGATE (t) = true;
+  if (!CLASSTYPE_NON_LAYOUT_POD_P (t))
+   {
+ /* c++/120012: The C++20 aggregate change affected layout.  */
+ if (!abi_version_at_least (21))
+   CLASSTYPE_NON_LAYOUT_POD_P (t) = true;
+ if (abi_version_crosses (21))
+   CLASSTYPE_NON_AGGREGATE_POD (t) = true;
+   }
+}
+
   /* If the only explicitly declared default constructor is user-provided,
  set TYPE_HAS_COMPLEX_DFLT.  */
   if (!TYPE_HAS_COMPLEX_DFLT (t)
@@ -6809,7 +6821,8 @@ end_of_class (tree t, eoc_mode mode)
 static void
 check_non_pod_aggregate (tree field)
 {
-  if (!abi_version_crosses (17) || cxx_dialect < cxx14)
+  if ((!abi_version_crosses (17) || cxx_dialect < cxx14)
+  && (!abi_version_crosses (21) || cxx_dialect < cxx20))
 return;
   if (TREE_CODE (field) != FIELD_DECL
   || (!DECL_FIELD_IS_BASE (field)
@@ -6822,7 +6835,8 @@ check_non_pod_aggregate (tree field)
   tree type = TREE_TYPE (field);
   if (TYPE_IDENTIFIER (type) == as_base_identifier)
 type = TYPE_CONTEXT (type);
-  if (!CLASS_TYPE_P (type) || !CLASSTYPE_NON_POD_AGGREGATE (ty

[gcc r15-9634] c++: let plain -Wabi warn about future changes

2025-05-07 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:fa55a6c92b50543352fef16e962511984998f123

commit r15-9634-gfa55a6c92b50543352fef16e962511984998f123
Author: Jason Merrill 
Date:   Thu May 1 09:42:40 2025 -0400

c++: let plain -Wabi warn about future changes

c_common_post_options limits flag_abi_version and flag_abi_compat_version to
actual ABI version numbers, but let's not do that for warn_abi_version; we
might want to add a warning relative to a future ABI version that isn't
available in the current release, such backporting the PR120012 warning.

Also allow plain -Wabi to include such a warning without complaining that
it's useless.

Also warn about an unsupported -fabi-version argument.

gcc/c-family/ChangeLog:

* c-opts.cc (c_common_post_options): Let plain -Wabi warn
about changes in a future version.

(cherry picked from commit 11e62bc6d9f8109a98facd1f90d4602869eb12e7)

Diff:
---
 gcc/c-family/c-opts.cc | 32 +---
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index d43b3aef1024..108eaabd5d47 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1085,12 +1085,21 @@ c_common_post_options (const char **pfilename)
   /* Change flag_abi_version to be the actual current ABI level, for the
  benefit of c_cpp_builtins, and to make comparison simpler.  */
   const int latest_abi_version = 20;
+  /* Possibly different for non-default ABI fixes within a release.  */
+  const int default_abi_version = latest_abi_version;
   /* Generate compatibility aliases for ABI v13 (8.2) by default.  */
   const int abi_compat_default = 13;
 
+  if (flag_abi_version > latest_abi_version)
+warning (0, "%<-fabi-version=%d%> is not supported, using =%d",
+flag_abi_version, latest_abi_version);
+
+  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+  flag_abi_version, default_abi_version);
+
 #define clamp(X) if (X == 0 || X > latest_abi_version) X = latest_abi_version
   clamp (flag_abi_version);
-  clamp (warn_abi_version);
+  /* Don't clamp warn_abi_version, let it be 0 or out of bounds.  */
   clamp (flag_abi_compat_version);
 #undef clamp
 
@@ -1101,24 +1110,17 @@ c_common_post_options (const char **pfilename)
 flag_abi_compat_version = warn_abi_version;
   else if (warn_abi_version == -1 && flag_abi_compat_version == -1)
 {
-  warn_abi_version = latest_abi_version;
-  if (flag_abi_version == latest_abi_version)
-   {
- auto_diagnostic_group d;
- if (warning (OPT_Wabi, "%<-Wabi%> won%'t warn about anything"))
-   {
- inform (input_location, "%<-Wabi%> warns about differences "
- "from the most up-to-date ABI, which is also used "
- "by default");
- inform (input_location, "use e.g. %<-Wabi=11%> to warn about "
- "changes from GCC 7");
-   }
- flag_abi_compat_version = abi_compat_default;
-   }
+  warn_abi_version = 0;
+  if (flag_abi_version == default_abi_version)
+   flag_abi_compat_version = abi_compat_default;
   else
flag_abi_compat_version = latest_abi_version;
 }
 
+  /* Allow warnings vs ABI versions beyond what we currently support.  */
+  if (warn_abi_version == 0)
+warn_abi_version = 1000;
+
   /* By default, enable the new inheriting constructor semantics along with ABI
  11.  New and old should coexist fine, but it is a change in what
  artificial symbols are generated.  */


[gcc r16-451] libstdc++: Add missing export for std::is_layout_compatible_v [PR120159]

2025-05-07 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:8a1f3615c01cdbf5b2d37448c8bb09a96d5e3330

commit r16-451-g8a1f3615c01cdbf5b2d37448c8bb09a96d5e3330
Author: Jonathan Wakely 
Date:   Wed May 7 17:06:11 2025 +0100

libstdc++: Add missing export for std::is_layout_compatible_v [PR120159]

libstdc++-v3/ChangeLog:

PR libstdc++/120159
* src/c++23/std.cc.in (is_layout_compatible_v): Export.

Diff:
---
 libstdc++-v3/src/c++23/std.cc.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/src/c++23/std.cc.in b/libstdc++-v3/src/c++23/std.cc.in
index 73316d168c94..d45ae632ace3 100644
--- a/libstdc++-v3/src/c++23/std.cc.in
+++ b/libstdc++-v3/src/c++23/std.cc.in
@@ -3114,6 +3114,7 @@ export namespace std
 #if __cpp_lib_is_layout_compatible
   using std::is_corresponding_member;
   using std::is_layout_compatible;
+  using std::is_layout_compatible_v;
 #endif
 #if __cpp_lib_is_pointer_interconvertible
   using std::is_pointer_interconvertible_base_of;


[gcc r15-9631] libstdc++: Fix width computation for the chrono formatting [PR120114]

2025-05-07 Thread Tomasz Kaminski via Libstdc++-cvs
https://gcc.gnu.org/g:e52f71b6dd888f0d5548d9f5bd139601dbafb3be

commit r15-9631-ge52f71b6dd888f0d5548d9f5bd139601dbafb3be
Author: Tomasz Kamiński 
Date:   Mon May 5 16:32:58 2025 +0200

libstdc++: Fix width computation for the chrono formatting [PR120114]

Use `__unicode::_field_width` to compute the field width of the output when 
writting
the formatted output for std::chrono::types. This applies both to 
characters copied
from format string, and one produced by localized formatting.

We also use _Str_sink::view() instead of get(), which avoids copying the 
content of
the buffer to std::string in case of small output.

PR libstdc++/120114

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (__formatter_chrono::_M_format): Use 
__field_width.
* testsuite/std/time/format/pr120114.cc: New test.

Reviewed-by: Jonathan Wakely 
Signed-off-by: Tomasz Kamiński 
(cherry picked from commit 52f6ab55051ff43fd1b40ff06d9501043f8ba844)

Diff:
---
 libstdc++-v3/include/bits/chrono_io.h  |  10 +-
 libstdc++-v3/testsuite/std/time/format/pr120114.cc | 125 +
 2 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index b7f6f5f49e5e..f5aa8a3e18af 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -705,8 +705,14 @@ namespace __format
if (__write_direct)
  return __out;
 
- auto __str = std::move(__sink).get();
- return __format::__write_padded_as_spec(__str, __str.size(),
+ auto __span = __sink.view();
+ __string_view __str(__span.data(), __span.size());
+ size_t __width;
+ if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
+   __width = __unicode::__field_width(__str);
+ else
+   __width = __str.size();
+ return __format::__write_padded_as_spec(__str, __width,
  __fc, _M_spec);
}
 
diff --git a/libstdc++-v3/testsuite/std/time/format/pr120114.cc 
b/libstdc++-v3/testsuite/std/time/format/pr120114.cc
new file mode 100644
index ..c630bb35a9d0
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/time/format/pr120114.cc
@@ -0,0 +1,125 @@
+// { dg-do run { target c++23 } }
+// { dg-options "-fexec-charset=UTF-8" }
+// { dg-timeout-factor 2 }
+
+#include 
+#include 
+#include 
+
+#define WIDEN_(C, S) ::std::__format::_Widen(S, L##S)
+#define WIDEN(S) WIDEN_(_CharT, S)
+
+template
+void
+test_from_format_string()
+{
+  std::basic_string<_CharT> res;
+  using namespace std::chrono_literals;
+  auto date = 2025y/std::chrono::May/05d;
+
+  res = std::format(WIDEN("{:+<13%F\U0001f921}"), date);
+  VERIFY( res == WIDEN("2025-05-05\U0001f921+") );
+
+  res = std::format(WIDEN("{:->15%F\U0001f921}"), date);
+  VERIFY( res == WIDEN("---2025-05-05\U0001f921") );
+
+  res = std::format(WIDEN("{:=^20%F\U0001f921}"), date);
+  VERIFY( res == WIDEN("2025-05-05\U0001f921") );
+}
+
+template
+void
+test_formatted_value()
+{
+  // Custom time_put facet which returns Ideographic Telegraph Symbol
+  // for given month for Om.
+  struct TimePut : std::time_put<_CharT>
+  {
+using iter_type = std::time_put<_CharT>::iter_type;
+using char_type = std::time_put<_CharT>::char_type;
+
+iter_type
+do_put(iter_type out, std::ios_base& io, char_type fill, const tm* t,
+  char format, char modifier) const override
+{
+  if (format != 'm' && modifier != 'm')
+   return std::time_put<_CharT>::do_put(out, io, fill, t, format, 
modifier);
+  std::basic_string_view<_CharT> str;
+  switch (t->tm_mon)
+   {
+case 0:
+  str = WIDEN("\u32C0");
+  break;
+case 1:
+  str = WIDEN("\u32C1");
+  break;
+case 2:
+  str = WIDEN("\u32C2");
+  break;
+case 3:
+  str = WIDEN("\u32C3");
+  break;
+case 4:
+  str = WIDEN("\u32C4");
+  break;
+case 5:
+  str = WIDEN("\u32C5");
+  break;
+case 6:
+  str = WIDEN("\u32C6");
+  break;
+case 7:
+  str = WIDEN("\u32C7");
+  break;
+case 8:
+  str = WIDEN("\u32C8");
+  break;
+case 9:
+  str = WIDEN("\u32C9");
+  break;
+case 10:
+  str = WIDEN("\u32CA");
+  break;
+case 11:
+  str = WIDEN("\u32CB");
+  break;
+   };
+   return std::copy(str.begin(), str.end(), out);
+}
+  };
+  const std::locale loc(std::locale::classic(), new TimePut);
+
+  std::basic_string<_CharT> res;
+
+  res = std::format(loc, WIDEN("{:<1L%Om}"), std::chrono::January);
+  VERIFY( res == WIDEN("\u32C0") );
+
+  res = std::format(loc, WIDEN("{:>2L%Om}"), std::chrono::February);
+

[gcc r16-439] s390: Add cstoreti4 expander

2025-05-07 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:15edd7d60352d4cfa95ed1cf14e6ded041b469ba

commit r16-439-g15edd7d60352d4cfa95ed1cf14e6ded041b469ba
Author: Stefan Schulze Frielinghaus 
Date:   Wed May 7 12:43:42 2025 +0200

s390: Add cstoreti4 expander

For target VXE3 just emit a 128-bit comparison followed by a conditional
load.  For targets prior VXE3, emulate the 128-bit comparison and make
use of a conditional load, too.

gcc/ChangeLog:

* config/s390/s390-protos.h (s390_expand_cstoreti4): New
function.
* config/s390/s390.cc (s390_expand_cstoreti4): New function.
* config/s390/s390.md (CC_SUZ): New mode iterator.
(l): New mode attribute.
(cc_tolower): New mode attribute.
* config/s390/vector.md (cstoreti4): New expander.
(*vec_cmpv2di_lane0_): New insn.
(*vec_cmpti_): New insn.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/cstoreti-1.c: New test.
* gcc.target/s390/vector/cstoreti-2.c: New test.

Diff:
---
 gcc/config/s390/s390-protos.h |   1 +
 gcc/config/s390/s390.cc   |  82 +-
 gcc/config/s390/s390.md   |   4 +
 gcc/config/s390/vector.md |  30 +
 gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c | 127 ++
 gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c |  25 +
 6 files changed, 266 insertions(+), 3 deletions(-)

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e8c7f8308496..d760a7e20ff7 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
 extern void s390_expand_vec_strlen (rtx, rtx, rtx);
 extern void s390_expand_vec_movstr (rtx, rtx, rtx);
 extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx);
 extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
 extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
 extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index e3edf8595131..2d44cecfeeda 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -7210,6 +7210,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, 
rtx count)
  NULL_RTX, 1, OPTAB_DIRECT);
 }
 
+/* Expand optab cstoreti4.  */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+  rtx_code code = GET_CODE (cmp);
+
+  if (TARGET_VXE3)
+{
+  rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+  emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+  return;
+}
+
+  /* Prior VXE3 emulate the comparison.  For an (in)equality test exploit
+ VECTOR COMPARE EQUAL.  For a relational test, first compare the high part
+ via VECTOR ELEMENT COMPARE (LOGICAL).  If the high part does not equal,
+ then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+ Otherweise, if the high part equals, then perform a subsequent VECTOR
+ COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION.  */
+
+  op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+  op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+  if (code == EQ || code == NE)
+{
+  s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+  return;
+}
+
+  /* Normalize code into either GE(U) or GT(U).  */
+  if (code == LT || code == LE || code == LTU || code == LEU)
+{
+  std::swap (op1, op2);
+  code = swap_condition (code);
+}
+
+  /* For (un)signed comparisons
+ - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+   if the relation does _not_ hold.
+ - high(op1) >  high(op2) instruction VECG op2, op1 sets CC1
+   if the relation holds.  */
+  if (code == GT || code == GTU)
+std::swap (op1, op2);
+  machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+  rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+  emit_insn (
+gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0;
+  rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+  rtx lab = gen_label_rtx ();
+  s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+  /* At this point we have that high(op1) == high(op2).  Thus, test the low
+ part, now.  For unsigned comparisons
+ - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+   if the relation does _not_ hold.
+ - low(op1) >  low(op2) instruction VCHLGS op1, op2 sets CC1
+   if the relation holds.  */
+  std::

[gcc r16-443] libstdc++: Add tests for std::extents.

2025-05-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:0deefb9300043fc096e958154813ddf77b943fb3

commit r16-443-g0deefb9300043fc096e958154813ddf77b943fb3
Author: Luc Grosheintz 
Date:   Tue Apr 29 14:46:10 2025 +0200

libstdc++: Add tests for std::extents.

A prior commit added std::extents, this commit adds the tests. The bulk
is focussed on testing the constructors. These are split into three
groups:

1. the ctor from other extents and the copy ctor,
2. the ctor from a pack of integer-like objects,
3. the ctor from shapes, i.e. span and array.

For each group check that the ctor:
* produces an object with the expected values for extent,
* is implicit if and only if required,
* is constexpr,
* doesn't change the rank of the extent.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/mdspan/extents/class_mandates_neg.cc: New 
test.
* testsuite/23_containers/mdspan/extents/ctor_copy.cc: New test.
* testsuite/23_containers/mdspan/extents/ctor_ints.cc: New test.
* testsuite/23_containers/mdspan/extents/ctor_shape.cc: New test.
* testsuite/23_containers/mdspan/extents/custom_integer.cc: New 
test.
* testsuite/23_containers/mdspan/extents/misc.cc: New test.

Signed-off-by: Luc Grosheintz 

Diff:
---
 .../mdspan/extents/class_mandates_neg.cc   |   8 +
 .../23_containers/mdspan/extents/ctor_copy.cc  |  82 
 .../23_containers/mdspan/extents/ctor_ints.cc  |  62 ++
 .../23_containers/mdspan/extents/ctor_shape.cc | 160 +++
 .../23_containers/mdspan/extents/custom_integer.cc |  87 
 .../testsuite/23_containers/mdspan/extents/misc.cc | 224 +
 6 files changed, 623 insertions(+)

diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
new file mode 100644
index ..b654e3920a8a
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/class_mandates_neg.cc
@@ -0,0 +1,8 @@
+// { dg-do compile { target c++23 } }
+#include
+
+std::extents e1; // { dg-error "from here" }
+std::extents e2;// { dg-error "from here" }
+// { dg-prune-output "dynamic or representable as _IndexType" }
+// { dg-prune-output "must be integral" }
+// { dg-prune-output "invalid use of incomplete type" }
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_copy.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_copy.cc
new file mode 100644
index ..a7b3a169301e
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_copy.cc
@@ -0,0 +1,82 @@
+// { dg-do run { target c++23 } }
+#include 
+
+#include 
+
+// Test the copy ctor and the ctor from other extents.
+
+constexpr auto dyn = std::dynamic_extent;
+
+// Not constructible
+static_assert(!std::is_constructible_v,
+  std::extents>);
+
+static_assert(!std::is_constructible_v,
+  std::extents>);
+
+static_assert(!std::is_constructible_v,
+  std::extents>);
+
+static_assert(!std::is_constructible_v,
+  std::extents>);
+
+// Nothrow constructible
+static_assert(std::is_nothrow_constructible_v,
+ std::extents>);
+static_assert(std::is_nothrow_constructible_v,
+ std::extents>);
+
+// Implicit conversion
+static_assert(!std::is_convertible_v,
+std::extents>);
+static_assert(std::is_convertible_v,
+   std::extents>);
+
+static_assert(!std::is_convertible_v,
+std::extents>);
+static_assert(std::is_convertible_v,
+   std::extents>);
+
+static_assert(!std::is_convertible_v,
+std::extents>);
+static_assert(std::is_convertible_v,
+   std::extents>);
+
+static_assert(!std::is_convertible_v,
+std::extents>);
+static_assert(std::is_convertible_v,
+   std::extents>);
+
+template
+  constexpr void
+  test_ctor(const Other& other)
+  {
+auto e = std::extents(other);
+VERIFY(e == other);
+  }
+
+constexpr int
+test_all()
+{
+  auto e0 = std::extents();
+  test_ctor(e0);
+
+  auto e1 = std::extents();
+  test_ctor(e1);
+  test_ctor(e1);
+  test_ctor(e1);
+
+  auto e2 = std::extents{1, 2, 3};
+  test_ctor(e2);
+  test_ctor(e2);
+  test_ctor(e2);
+  return true;
+}
+
+int
+main()
+{
+  test_all();
+  static_assert(test_all());
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_ints.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_ints.cc
new file mode 100644
index ..3a70efd8

[gcc r16-442] libstdc++: Implement std::extents [PR107761].

2025-05-07 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:a33b5db97c4e87bd02f1af1a75b2d84376442fa0

commit r16-442-ga33b5db97c4e87bd02f1af1a75b2d84376442fa0
Author: Luc Grosheintz 
Date:   Tue Apr 29 14:46:09 2025 +0200

libstdc++: Implement std::extents [PR107761].

This implements std::extents from  according to N4950 and
contains partial progress towards PR107761.

If an extent changes its type, there's a precondition in the standard,
that the value is representable in the target integer type. This
precondition is not checked at runtime.

The precondition for 'extents::{static_,}extent' is that '__r < rank()'.
For extents this precondition is always violated and results in
calling __builtin_trap. For all other specializations it's checked via
__glibcxx_assert.

PR libstdc++/107761

libstdc++-v3/ChangeLog:

* include/std/mdspan (extents): New class.
* src/c++23/std.cc.in: Add 'using std::extents'.

Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/include/std/mdspan  | 261 +++
 libstdc++-v3/src/c++23/std.cc.in |   6 +-
 2 files changed, 266 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 78a00a5aa529..aee96dda7cd0 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -33,6 +33,12 @@
 #pragma GCC system_header
 #endif
 
+#include 
+#include 
+#include 
+#include 
+#include 
+
 #define __glibcxx_want_mdspan
 #include 
 
@@ -41,6 +47,261 @@
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
+  namespace __mdspan
+  {
+template
+  class _ExtentsStorage
+  {
+  public:
+   static consteval bool
+   _S_is_dyn(size_t __ext) noexcept
+   { return __ext == dynamic_extent; }
+
+   template
+ static constexpr _IndexType
+ _S_int_cast(const _OIndexType& __other) noexcept
+ { return _IndexType(__other); }
+
+   static constexpr size_t _S_rank = _Extents.size();
+
+   // For __r in [0, _S_rank], _S_dynamic_index[__r] is the number
+   // of dynamic extents up to (and not including) __r.
+   //
+   // If __r is the index of a dynamic extent, then
+   // _S_dynamic_index[__r] is the index of that extent in
+   // _M_dynamic_extents.
+   static constexpr auto _S_dynamic_index = [] consteval
+   {
+ array __ret;
+ size_t __dyn = 0;
+ for(size_t __i = 0; __i < _S_rank; ++__i)
+   {
+ __ret[__i] = __dyn;
+ __dyn += _S_is_dyn(_Extents[__i]);
+   }
+ __ret[_S_rank] = __dyn;
+ return __ret;
+   }();
+
+   static constexpr size_t _S_rank_dynamic = _S_dynamic_index[_S_rank];
+
+   // For __r in [0, _S_rank_dynamic), _S_dynamic_index_inv[__r] is the
+   // index of the __r-th dynamic extent in _Extents.
+   static constexpr auto _S_dynamic_index_inv = [] consteval
+   {
+ array __ret;
+ for (size_t __i = 0, __r = 0; __i < _S_rank; ++__i)
+   if (_S_is_dyn(_Extents[__i]))
+ __ret[__r++] = __i;
+ return __ret;
+   }();
+
+   static constexpr size_t
+   _S_static_extent(size_t __r) noexcept
+   { return _Extents[__r]; }
+
+   constexpr _IndexType
+   _M_extent(size_t __r) const noexcept
+   {
+ auto __se = _Extents[__r];
+ if (__se == dynamic_extent)
+   return _M_dynamic_extents[_S_dynamic_index[__r]];
+ else
+   return __se;
+   }
+
+   template
+ constexpr void
+ _M_init_dynamic_extents(_GetOtherExtent __get_extent) noexcept
+ {
+   for(size_t __i = 0; __i < _S_rank_dynamic; ++__i)
+ {
+   size_t __di = __i;
+   if constexpr (_OtherRank != _S_rank_dynamic)
+ __di = _S_dynamic_index_inv[__i];
+   _M_dynamic_extents[__i] = _S_int_cast(__get_extent(__di));
+ }
+ }
+
+   constexpr
+   _ExtentsStorage() noexcept = default;
+
+   template
+ constexpr
+ _ExtentsStorage(const _ExtentsStorage<_OIndexType, _OExtents>&
+ __other) noexcept
+ {
+   _M_init_dynamic_extents<_S_rank>([&__other](size_t __i)
+ { return __other._M_extent(__i); });
+ }
+
+   template
+ constexpr
+ _ExtentsStorage(span __exts) noexcept
+ {
+   _M_init_dynamic_extents<_Nm>(
+ [&__exts](size_t __i) -> const _OIndexType&
+ { return __exts[__i]; });
+ }
+
+  private:
+   using _S_storage = __array_traits<_IndexType, _S_rank_dynamic>::_Type;
+   [[no_unique_address]] _S_storage _M_dynamic_extents;
+  };
+
+template
+  concept __valid_index_type =
+   is_convertible_v<_OIndexType, _SIndexType> &&
+   is_nothrow_constructib

[gcc r16-441] libstdc++: Add header mdspan to the build-system.

2025-05-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:4c9eef71db5e8e693c44ac4090247541e5a43063

commit r16-441-g4c9eef71db5e8e693c44ac4090247541e5a43063
Author: Luc Grosheintz 
Date:   Tue Apr 29 14:46:08 2025 +0200

libstdc++: Add header mdspan to the build-system.

Creates a nearly empty header mdspan and adds it to the build-system and
Doxygen config file.

libstdc++-v3/ChangeLog:

* doc/doxygen/user.cfg.in: Add .
* include/Makefile.am: Ditto.
* include/Makefile.in: Ditto.
* include/precompiled/stdc++.h: Ditto.
* include/std/mdspan: New file.

Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/doc/doxygen/user.cfg.in  |  1 +
 libstdc++-v3/include/Makefile.am  |  1 +
 libstdc++-v3/include/Makefile.in  |  1 +
 libstdc++-v3/include/precompiled/stdc++.h |  1 +
 libstdc++-v3/include/std/mdspan   | 48 +++
 5 files changed, 52 insertions(+)

diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in 
b/libstdc++-v3/doc/doxygen/user.cfg.in
index 19ae67a67bac..e926c6707f67 100644
--- a/libstdc++-v3/doc/doxygen/user.cfg.in
+++ b/libstdc++-v3/doc/doxygen/user.cfg.in
@@ -880,6 +880,7 @@ INPUT  = @srcdir@/doc/doxygen/doxygroups.cc 
\
  include/list \
  include/locale \
  include/map \
+ include/mdspan \
  include/memory \
  include/memory_resource \
  include/mutex \
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 537774c26689..1140fa0dffd1 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -38,6 +38,7 @@ std_freestanding = \
${std_srcdir}/generator \
${std_srcdir}/iterator \
${std_srcdir}/limits \
+   ${std_srcdir}/mdspan \
${std_srcdir}/memory \
${std_srcdir}/numbers \
${std_srcdir}/numeric \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 7b96b2207f84..c96e981acd6e 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -396,6 +396,7 @@ std_freestanding = \
${std_srcdir}/generator \
${std_srcdir}/iterator \
${std_srcdir}/limits \
+   ${std_srcdir}/mdspan \
${std_srcdir}/memory \
${std_srcdir}/numbers \
${std_srcdir}/numeric \
diff --git a/libstdc++-v3/include/precompiled/stdc++.h 
b/libstdc++-v3/include/precompiled/stdc++.h
index f4b312d9e470..e7d89c927049 100644
--- a/libstdc++-v3/include/precompiled/stdc++.h
+++ b/libstdc++-v3/include/precompiled/stdc++.h
@@ -228,6 +228,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
new file mode 100644
index ..78a00a5aa529
--- /dev/null
+++ b/libstdc++-v3/include/std/mdspan
@@ -0,0 +1,48 @@
+//  -*- C++ -*-
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// .
+
+/** @file mdspan
+ *  This is a Standard C++ Library header.
+ */
+
+#ifndef _GLIBCXX_MDSPAN
+#define _GLIBCXX_MDSPAN 1
+
+#ifdef _GLIBCXX_SYSHDR
+#pragma GCC system_header
+#endif
+
+#define __glibcxx_want_mdspan
+#include 
+
+#ifdef __glibcxx_mdspan
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+_GLIBCXX_END_NAMESPACE_VERSION
+}
+#endif
+#endif


[gcc r16-440] libstdc++: Setup internal FTM for mdspan.

2025-05-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:369c439ca19725712426defb441663c529b27dd3

commit r16-440-g369c439ca19725712426defb441663c529b27dd3
Author: Luc Grosheintz 
Date:   Tue Apr 29 14:46:07 2025 +0200

libstdc++: Setup internal FTM for mdspan.

Uses the FTM infrastructure to create an internal feature testing macro
for partial availability of mdspan; which is then used to hide the
contents of the header mdspan when compiling against a standard prior to
C++23.

libstdc++-v3/ChangeLog:

* include/bits/version.def: Add internal feature testing macro
__glibcxx_mdspan.
* include/bits/version.h: Regenerate.

Signed-off-by: Luc Grosheintz 

Diff:
---
 libstdc++-v3/include/bits/version.def | 9 +
 libstdc++-v3/include/bits/version.h   | 9 +
 2 files changed, 18 insertions(+)

diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index 282667eabda6..f4d3de88bb2b 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -999,6 +999,15 @@ ftms = {
   };
 };
 
+ftms = {
+  name = mdspan;
+  no_stdname = true; // FIXME: remove
+  values = {
+v = 1; // FIXME: 202207
+cxxmin = 23;
+  };
+};
+
 ftms = {
   name = ssize;
   values = {
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index bb7c0479c723..d5d75cef2de1 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1114,6 +1114,15 @@
 #endif /* !defined(__cpp_lib_span) && defined(__glibcxx_want_span) */
 #undef __glibcxx_want_span
 
+#if !defined(__cpp_lib_mdspan)
+# if (__cplusplus >= 202100L)
+#  define __glibcxx_mdspan 1L
+#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
+#  endif
+# endif
+#endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
+#undef __glibcxx_want_mdspan
+
 #if !defined(__cpp_lib_ssize)
 # if (__cplusplus >= 202002L)
 #  define __glibcxx_ssize 201902L


[gcc r16-444] libstdc++: Fix module std export for std::extents

2025-05-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:0a5855d14dbe25c0890da5c5550013c795aab90e

commit r16-444-g0a5855d14dbe25c0890da5c5550013c795aab90e
Author: Jonathan Wakely 
Date:   Wed May 7 11:54:24 2025 +0100

libstdc++: Fix module std export for std::extents

libstdc++-v3/ChangeLog:

* src/c++23/std.cc.in: Fix export for std::extents.

Diff:
---
 libstdc++-v3/src/c++23/std.cc.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libstdc++-v3/src/c++23/std.cc.in b/libstdc++-v3/src/c++23/std.cc.in
index 0df27cd7e7da..73316d168c94 100644
--- a/libstdc++-v3/src/c++23/std.cc.in
+++ b/libstdc++-v3/src/c++23/std.cc.in
@@ -1834,10 +1834,13 @@ export namespace std
 }
 
 // 
+#if __glibcxx_mdspan
+export namespace std
 {
   using std::extents;
   // FIXME layout_*, default_accessor and mdspan
 }
+#endif
 
 // 20.2 
 export namespace std


[gcc r13-9635] tree-optimization/111125 - avoid BB vectorization in novector loops

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:337d3cdfa8bb8b8404d62ea45690095f9b566cbb

commit r13-9635-g337d3cdfa8bb8b8404d62ea45690095f9b566cbb
Author: Richard Biener 
Date:   Thu Aug 24 11:10:43 2023 +0200

tree-optimization/25 - avoid BB vectorization in novector loops

When a loop is marked with

  #pragma GCC novector

the following makes sure to also skip BB vectorization for contained
blocks.  That avoids gcc.dg/vect/bb-slp-29.c failing on aarch64
because of extra BB vectorization therein.  I'm not specifically
dealing with sub-loops of novector loops, the desired semantics
isn't documented.

PR tree-optimization/25
* tree-vect-slp.cc (vect_slp_function): Split at novector
loop entry, do not push blocks in novector loops.

(cherry picked from commit 43da77a4f1636280c4259402c9c2c543e6ec6c0b)

Diff:
---
 gcc/tree-vect-slp.cc | 41 +
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c228087df734..d5b114dbcc9c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7714,6 +7714,17 @@ vect_slp_function (function *fun)
 bbs[0]->loop_father->num, bb->index);
  split = true;
}
+  else if (!bbs.is_empty ()
+  && bb->loop_father->header == bb
+  && bb->loop_father->dont_vectorize)
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"splitting region at dont-vectorize loop %d "
+"entry at bb%d\n",
+bb->loop_father->num, bb->index);
+ split = true;
+   }
 
   if (split && !bbs.is_empty ())
{
@@ -7721,19 +7732,25 @@ vect_slp_function (function *fun)
  bbs.truncate (0);
}
 
-  /* We need to be able to insert at the head of the region which
-we cannot for region starting with a returns-twice call.  */
   if (bbs.is_empty ())
-   if (gcall *first = safe_dyn_cast  (first_stmt (bb)))
- if (gimple_call_flags (first) & ECF_RETURNS_TWICE)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"skipping bb%d as start of region as it "
-"starts with returns-twice call\n",
-bb->index);
- continue;
-   }
+   {
+ /* We need to be able to insert at the head of the region which
+we cannot for region starting with a returns-twice call.  */
+ if (gcall *first = safe_dyn_cast  (first_stmt (bb)))
+   if (gimple_call_flags (first) & ECF_RETURNS_TWICE)
+ {
+   if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+  "skipping bb%d as start of region as it "
+  "starts with returns-twice call\n",
+  bb->index);
+   continue;
+ }
+ /* If the loop this BB belongs to is marked as not to be vectorized
+honor that also for BB vectorization.  */
+ if (bb->loop_father->dont_vectorize)
+   continue;
+   }
 
   bbs.safe_push (bb);


[gcc r13-9638] tree-optimization/87984 - hard register assignments not preserved

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:f2d0fb238b1a6eab85924515f0c1ffc2f939e6a7

commit r13-9638-gf2d0fb238b1a6eab85924515f0c1ffc2f939e6a7
Author: Richard Biener 
Date:   Fri Feb 28 10:36:11 2025 +0100

tree-optimization/87984 - hard register assignments not preserved

The following disables redundant store elimination to hard register
variables which isn't valid.

PR tree-optimization/87984
* tree-ssa-dom.cc (dom_opt_dom_walker::optimize_stmt): Do
not perform redundant store elimination to hard register
variables.
* tree-ssa-sccvn.cc (eliminate_dom_walker::eliminate_stmt):
Likewise.

* gcc.target/i386/pr87984.c: New testcase.

(cherry picked from commit 535115caaf97f5201fb528f67f15b4c52be5619d)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr87984.c | 23 +++
 gcc/tree-ssa-dom.cc |  4 +++-
 gcc/tree-ssa-sccvn.cc   |  2 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr87984.c 
b/gcc/testsuite/gcc.target/i386/pr87984.c
new file mode 100644
index ..39a6a7480f9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87984.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+__attribute__((noipa))
+int f(void)
+{
+  int o = 0;
+  for (int i = 0; i < 3; i++)
+{
+  register int a asm("eax");
+  a = 1;
+  asm("add %1, %0" : "+r"(o) : "r"(a));
+  asm("xor %%eax, %%eax" ::: "eax");
+}
+  return o;
+}
+
+int main()
+{
+  if (f() != 3)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-dom.cc b/gcc/tree-ssa-dom.cc
index 32769c2068a7..8779cb934120 100644
--- a/gcc/tree-ssa-dom.cc
+++ b/gcc/tree-ssa-dom.cc
@@ -2389,7 +2389,9 @@ dom_opt_dom_walker::optimize_stmt (basic_block bb, 
gimple_stmt_iterator *si,
 
   /* Perform simple redundant store elimination.  */
   if (gimple_assign_single_p (stmt)
- && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
+ && TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME
+ && (TREE_CODE (gimple_assign_lhs (stmt)) != VAR_DECL
+ || !DECL_HARD_REGISTER (gimple_assign_lhs (stmt
{
  tree lhs = gimple_assign_lhs (stmt);
  tree rhs = gimple_assign_rhs1 (stmt);
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 3c9db41a8b2f..a526268e0780 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -6984,6 +6984,8 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, 
gimple_stmt_iterator *gsi)
   if (gimple_assign_single_p (stmt)
   && !gimple_has_volatile_ops (stmt)
   && !is_gimple_reg (gimple_assign_lhs (stmt))
+  && (TREE_CODE (gimple_assign_lhs (stmt)) != VAR_DECL
+ || !DECL_HARD_REGISTER (gimple_assign_lhs (stmt)))
   && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME
  || is_gimple_min_invariant (gimple_assign_rhs1 (stmt
 {


[gcc r13-9639] lto/91299 - weak definition inlined with LTO

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:da579a833cf5a5ae463be3f4da70df782612a800

commit r13-9639-gda579a833cf5a5ae463be3f4da70df782612a800
Author: Richard Biener 
Date:   Fri Feb 28 14:09:29 2025 +0100

lto/91299 - weak definition inlined with LTO

The following fixes a thinko in the handling of interposed weak
definitions which confused the interposition check in
get_availability by setting DECL_EXTERNAL too early.

PR lto/91299
gcc/lto/
* lto-symtab.cc (lto_symtab_merge_symbols): Set DECL_EXTERNAL
only after calling get_availability.

gcc/testsuite/
* gcc.dg/lto/pr91299_0.c: New testcase.
* gcc.dg/lto/pr91299_1.c: Likewise.

(cherry picked from commit bc34db5b12e008f6ec4fdf4ebd22263c8617e5e3)

Diff:
---
 gcc/lto/lto-symtab.cc|  2 +-
 gcc/testsuite/gcc.dg/lto/pr91299_0.c | 16 
 gcc/testsuite/gcc.dg/lto/pr91299_1.c |  6 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/gcc/lto/lto-symtab.cc b/gcc/lto/lto-symtab.cc
index 2b57d0d53719..9836223632b3 100644
--- a/gcc/lto/lto-symtab.cc
+++ b/gcc/lto/lto-symtab.cc
@@ -1016,7 +1016,6 @@ lto_symtab_merge_symbols (void)
  || node->resolution == LDPR_RESOLVED_EXEC
  || node->resolution == LDPR_RESOLVED_DYN))
{
- DECL_EXTERNAL (node->decl) = 1;
  /* If alias to local symbol was preempted by external definition,
 we know it is not pointing to the local symbol.  Remove it.  */
  if (node->alias
@@ -1042,6 +1041,7 @@ lto_symtab_merge_symbols (void)
  node->remove_all_references ();
}
}
+ DECL_EXTERNAL (node->decl) = 1;
}
 
  if (!(cnode = dyn_cast  (node))
diff --git a/gcc/testsuite/gcc.dg/lto/pr91299_0.c 
b/gcc/testsuite/gcc.dg/lto/pr91299_0.c
new file mode 100644
index ..d9a8b21d6b84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr91299_0.c
@@ -0,0 +1,16 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options { { -O2 -flto } } } */
+
+__attribute__((weak)) int get_t(void)
+{
+  return 0;
+}
+
+int a;
+int main(void)
+{
+  a = get_t();
+  if (a != 1)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr91299_1.c 
b/gcc/testsuite/gcc.dg/lto/pr91299_1.c
new file mode 100644
index ..29a28520f7b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr91299_1.c
@@ -0,0 +1,6 @@
+/* { dg-options "-fno-lto" } */
+
+int get_t(void)
+{
+return 1;
+}


[gcc r13-9636] middle-end/66279 - gimplification clobbers shared asm constraints

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a145eaf1128ef4392cf6e624f4b23648cbe645eb

commit r13-9636-ga145eaf1128ef4392cf6e624f4b23648cbe645eb
Author: Richard Biener 
Date:   Fri Feb 28 09:58:36 2025 +0100

middle-end/66279 - gimplification clobbers shared asm constraints

When the C++ frontend clones a CTOR we do not copy ASM_EXPR constraints
fully as walk_tree does not recurse to TREE_PURPOSE of TREE_LIST nodes.
At this point doing that seems too dangerous so the following instead
avoids gimplification of ASM_EXPRs to clobber the shared constraints
and unshares it there, like it also unshares TREE_VALUE when it
re-writes a "+" output constraint to separate "=" output and matching
input constraint.

PR middle-end/66279
* gimplify.cc (gimplify_asm_expr): Copy TREE_PURPOSE before
rewriting it for "+" processing.

* g++.dg/pr66279.C: New testcase.

(cherry picked from commit 95f5d6cc17e7d6b689674756c62b6b5e1284afd0)

Diff:
---
 gcc/gimplify.cc|  1 +
 gcc/testsuite/g++.dg/pr66279.C | 23 +++
 2 files changed, 24 insertions(+)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index bb9086bb213e..62dad63fdadd 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -6726,6 +6726,7 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p)
  /* Turn the in/out constraint into an output constraint.  */
  char *p = xstrdup (constraint);
  p[0] = '=';
+ TREE_PURPOSE (link) = unshare_expr (TREE_PURPOSE (link));
  TREE_VALUE (TREE_PURPOSE (link)) = build_string (constraint_len, p);
 
  /* And add a matching input constraint.  */
diff --git a/gcc/testsuite/g++.dg/pr66279.C b/gcc/testsuite/g++.dg/pr66279.C
new file mode 100644
index ..c878044a83b3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr66279.C
@@ -0,0 +1,23 @@
+// { dg-do run }
+
+struct A {};
+
+struct B : public virtual A
+{
+  B();
+};
+
+B::B()
+{
+  unsigned int x = 42;
+
+  __asm__ __volatile__ ("" : "+r"(x));
+
+  if (x != 42)
+__builtin_abort ();
+}
+
+int main()
+{
+  B b;
+}


[gcc r13-9637] c++/79786 - bougs invocation of DATA_ABI_ALIGNMENT macro

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c0e3078580f4834e4d3d1ba517016a0220b4e3b6

commit r13-9637-gc0e3078580f4834e4d3d1ba517016a0220b4e3b6
Author: Richard Biener 
Date:   Mon Feb 3 11:27:20 2025 +0100

c++/79786 - bougs invocation of DATA_ABI_ALIGNMENT macro

The first argument is supposed to be a type, not a decl.

PR c++/79786
gcc/cp/
* rtti.cc (emit_tinfo_decl): Fix DATA_ABI_ALIGNMENT invocation.

(cherry picked from commit 6ec19825b4e72611cdbd4749feed67b61392aa81)

Diff:
---
 gcc/cp/rtti.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/rtti.cc b/gcc/cp/rtti.cc
index 7878929c2467..8f9ae106d200 100644
--- a/gcc/cp/rtti.cc
+++ b/gcc/cp/rtti.cc
@@ -1739,7 +1739,8 @@ emit_tinfo_decl (tree decl)
   /* Avoid targets optionally bumping up the alignment to improve
 vector instruction accesses, tinfo are never accessed this way.  */
 #ifdef DATA_ABI_ALIGNMENT
-  SET_DECL_ALIGN (decl, DATA_ABI_ALIGNMENT (decl, TYPE_ALIGN (TREE_TYPE 
(decl;
+  SET_DECL_ALIGN (decl, DATA_ABI_ALIGNMENT (TREE_TYPE (decl),
+   TYPE_ALIGN (TREE_TYPE (decl;
   DECL_USER_ALIGN (decl) = true;
 #endif
   return true;


[gcc r15-9636] libstdc++: Add missing export for std::is_layout_compatible_v [PR120159]

2025-05-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:d0e6d797b35ae89044f967160d4abe3868190f0f

commit r15-9636-gd0e6d797b35ae89044f967160d4abe3868190f0f
Author: Jonathan Wakely 
Date:   Wed May 7 17:06:11 2025 +0100

libstdc++: Add missing export for std::is_layout_compatible_v [PR120159]

libstdc++-v3/ChangeLog:

PR libstdc++/120159
* src/c++23/std.cc.in (is_layout_compatible_v): Export.

(cherry picked from commit 8a1f3615c01cdbf5b2d37448c8bb09a96d5e3330)

Diff:
---
 libstdc++-v3/src/c++23/std.cc.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/src/c++23/std.cc.in b/libstdc++-v3/src/c++23/std.cc.in
index 930a489ff44b..de34e649bba9 100644
--- a/libstdc++-v3/src/c++23/std.cc.in
+++ b/libstdc++-v3/src/c++23/std.cc.in
@@ -3107,6 +3107,7 @@ export namespace std
 #if __cpp_lib_is_layout_compatible
   using std::is_corresponding_member;
   using std::is_layout_compatible;
+  using std::is_layout_compatible_v;
 #endif
 #if __cpp_lib_is_pointer_interconvertible
   using std::is_pointer_interconvertible_base_of;


[gcc r16-453] libfortran: Add 5 missing UNSIGNED symbols [PR120153]

2025-05-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:e0c783f31b39a56cbfcc27d84633f9254b5b508d

commit r16-453-ge0c783f31b39a56cbfcc27d84633f9254b5b508d
Author: Jakub Jelinek 
Date:   Wed May 7 18:47:50 2025 +0200

libfortran: Add 5 missing UNSIGNED symbols [PR120153]

While looking at PR120152, I have noticed that libgfortran.so doesn't
export 5 *m16* symbols I would have expected that should be exported.
This is caused by 2 issues, one filename was forgotten to be added in 
r15-4124
to i_maxloc1_c (guess because generated/maxloc1_16_i16.c was kept in the
position after generated/maxloc1_8_m16.c and the i -> m difference wasn't
spotted), and one some garbage prefix on HAVE_GFC_UINTEGER_16 macro.

The first two hunks of this patch fix that.
Though, as GCC 15.1 has been released already, we can't add these symbols
to GFORTRAN_15 symbol version as they've never been there, so the patch
adds them to a new GFORTRAN_15.2 symbol version instead.

2025-05-07  Jakub Jelinek  

PR libfortran/120153
* Makefile.am (i_maxloc1_c): Add generated/maxloc1_16_m16.c.
* intrinsics/random.c (arandom_m16): Use #ifdef HAVE_GFC_UINTEGER_16
guard rather than #ifdef GFC_HAVE_GFC_UINTEGER_16.
* gfortran.map (GFORTRAN_15): Remove _gfortran_arandom_m16,
_gfortran_maxloc1_16_m16, _gfortran_mmaxloc1_16_m16 and
_gfortran_smaxloc1_16_m16.
(GFORTRAN_15.2): New symbol version, add those 4 symbols to it.
* generated/maxloc1_16_m16.c: New file.
* Makefile.in: Regenerate.

Diff:
---
 libgfortran/Makefile.am|   1 +
 libgfortran/Makefile.in|  21 +-
 libgfortran/generated/maxloc1_16_m16.c | 591 +
 libgfortran/gfortran.map   |  12 +-
 libgfortran/intrinsics/random.c|   2 +-
 5 files changed, 614 insertions(+), 13 deletions(-)

diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am
index 073af6af0239..60aa949fb629 100644
--- a/libgfortran/Makefile.am
+++ b/libgfortran/Makefile.am
@@ -420,6 +420,7 @@ generated/maxloc1_8_m8.c \
 generated/maxloc1_16_m8.c \
 generated/maxloc1_4_m16.c \
 generated/maxloc1_8_m16.c \
+generated/maxloc1_16_m16.c \
 generated/maxloc1_4_r4.c \
 generated/maxloc1_8_r4.c \
 generated/maxloc1_16_r4.c \
diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in
index e8e3e5af082d..c171b3d62192 100644
--- a/libgfortran/Makefile.in
+++ b/libgfortran/Makefile.in
@@ -275,14 +275,15 @@ am__objects_8 = generated/maxloc1_4_i1.lo 
generated/maxloc1_8_i1.lo \
generated/maxloc1_8_m4.lo generated/maxloc1_16_m4.lo \
generated/maxloc1_4_m8.lo generated/maxloc1_8_m8.lo \
generated/maxloc1_16_m8.lo generated/maxloc1_4_m16.lo \
-   generated/maxloc1_8_m16.lo generated/maxloc1_4_r4.lo \
-   generated/maxloc1_8_r4.lo generated/maxloc1_16_r4.lo \
-   generated/maxloc1_4_r8.lo generated/maxloc1_8_r8.lo \
-   generated/maxloc1_16_r8.lo generated/maxloc1_4_r10.lo \
-   generated/maxloc1_8_r10.lo generated/maxloc1_16_r10.lo \
-   generated/maxloc1_4_r16.lo generated/maxloc1_8_r16.lo \
-   generated/maxloc1_16_r16.lo generated/maxloc1_4_r17.lo \
-   generated/maxloc1_8_r17.lo generated/maxloc1_16_r17.lo
+   generated/maxloc1_8_m16.lo generated/maxloc1_16_m16.lo \
+   generated/maxloc1_4_r4.lo generated/maxloc1_8_r4.lo \
+   generated/maxloc1_16_r4.lo generated/maxloc1_4_r8.lo \
+   generated/maxloc1_8_r8.lo generated/maxloc1_16_r8.lo \
+   generated/maxloc1_4_r10.lo generated/maxloc1_8_r10.lo \
+   generated/maxloc1_16_r10.lo generated/maxloc1_4_r16.lo \
+   generated/maxloc1_8_r16.lo generated/maxloc1_16_r16.lo \
+   generated/maxloc1_4_r17.lo generated/maxloc1_8_r17.lo \
+   generated/maxloc1_16_r17.lo
 am__objects_9 = generated/maxval_i1.lo generated/maxval_i2.lo \
generated/maxval_i4.lo generated/maxval_i8.lo \
generated/maxval_i16.lo generated/maxval_m1.lo \
@@ -1227,6 +1228,7 @@ generated/maxloc1_8_m8.c \
 generated/maxloc1_16_m8.c \
 generated/maxloc1_4_m16.c \
 generated/maxloc1_8_m16.c \
+generated/maxloc1_16_m16.c \
 generated/maxloc1_4_r4.c \
 generated/maxloc1_8_r4.c \
 generated/maxloc1_16_r4.c \
@@ -2358,6 +2360,8 @@ generated/maxloc1_4_m16.lo: generated/$(am__dirstamp) \
generated/$(DEPDIR)/$(am__dirstamp)
 generated/maxloc1_8_m16.lo: generated/$(am__dirstamp) \
generated/$(DEPDIR)/$(am__dirstamp)
+generated/maxloc1_16_m16.lo: generated/$(am__dirstamp) \
+   generated/$(DEPDIR)/$(am__dirstamp)
 generated/maxloc1_4_r4.lo: generated/$(am__dirstamp) \
generated/$(DEPDIR)/$(am__dirstamp)
 generated/maxloc1_8_r4.lo: generated/$(am__dirstamp) \
@@ -4216,6 +4220,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ 
@am__quote@generated/$(DEPDIR)/maxloc1_16_i4.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ 
@am__quote@generated/$(DEPDIR)/maxloc1_16_i8.Plo@am__quote@
 @AMDEP

[gcc r16-454] arm: Only reverse FP inequalities when -ffinite-math-only [PR110796...]

2025-05-07 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:0a339746e7646bacf2c8aa5512268d23660f26f9

commit r16-454-g0a339746e7646bacf2c8aa5512268d23660f26f9
Author: Richard Earnshaw 
Date:   Fri Mar 28 12:59:03 2025 +

arm: Only reverse FP inequalities when -ffinite-math-only [PR110796...]

On Arm we have been failing to fully implement support for IEEE NaNs
in inequality comparisons because we have allowed reversing of
inequalities in a way that allows SELECT_CC_MODE to produce different
answers.  For example, the reverse of GT is UNLE, but if we pass these
two RTL codes to SELECT_CC_MODE, the former will return CCFPEmode,
while the latter CCFPmode.

It would be possible to allow fully reversible FPmodes, but to do so
would involve adding yet more RTL codes, something like NOT_GT and
NOT_UNLE, for the cases we cannot currently reverse.  NOT_GT would
then have the same condition code generation as UNLT, but the same
mode selection as GT.

In the mean time, we need to restrict REVERSIBLE_CC_MODE to
non-floating modes unless we are compiling with -ffinite-math-only.  In
that case we can continue to reverse the comparisons, but now we want
to always select CCFPmode as there's no need to consider the exception
raising cases.

PR target/110796
PR target/118446

gcc/ChangeLog:

* config/arm/arm.h (REVERSIBLE_CC_MODE): FP modes are only
reversible if flag_finite_math_only.
* config/arm/arm.cc (arm_select_cc_mode): Return CCFPmode for all
FP comparisons if flag_finite_math_only.

gcc/testsuite/ChangeLog:

* gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust due to no-longer
emitting VCMPE when -ffast-math..

Diff:
---
 gcc/config/arm/arm.cc   | 4 +++-
 gcc/config/arm/arm.h| 6 +-
 gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c | 3 +--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 670f487bcce3..fccddb0e7bc5 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -16218,7 +16218,9 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
case LE:
case GT:
case GE:
- return CCFPEmode;
+ return (flag_finite_math_only
+ ? CCFPmode
+ : CCFPEmode);
 
default:
  gcc_unreachable ();
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8472b7561272..08d3f0dae3da 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2257,7 +2257,11 @@ extern int making_const_table;
 
 #define SELECT_CC_MODE(OP, X, Y)  arm_select_cc_mode (OP, X, Y)
 
-#define REVERSIBLE_CC_MODE(MODE) 1
+/* Floating-point modes cannot be reversed unless we don't care about
+   NaNs.  */
+#define REVERSIBLE_CC_MODE(MODE)   \
+  (flag_finite_math_only   \
+   || !((MODE) == CCFPmode || (MODE) == CCFPEmode))
 
 #define REVERSE_CONDITION(CODE,MODE) \
   (((MODE) == CCFPmode || (MODE) == CCFPEmode) \
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c 
b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
index 52b87376dc78..f3fea524809e 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
@@ -106,8 +106,7 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
 /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 
} }  */
 
 /* For float16_t.  */
-/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } }  */
-/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } }  */
+/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 6 } }  */
 
 /* For float16x4_t.  */
 /* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } }  */


[gcc r16-452] ibfortran: Readd 15 accidentally removed libgfortran symbols [PR120152]

2025-05-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:41c8e9b61defb6c616bf35ac9693a52f848afca3

commit r16-452-g41c8e9b61defb6c616bf35ac9693a52f848afca3
Author: Jakub Jelinek 
Date:   Wed May 7 18:46:51 2025 +0200

ibfortran: Readd 15 accidentally removed libgfortran symbols [PR120152]

The r15-4124-gc0002a675a92e76d change seems to have accidentally
dropped 5 sourcefiles from i_maxloc1_c, which resulted in dropping
15 GFORTRAN_8 symbols on x86_64 and 6 on i686.

The following patch adds it back, so that we export those symbols
again, fixing the ABI problem.

2025-05-07  Jakub Jelinek  

PR libfortran/120152
* Makefile.am (i_maxloc1_c): Readd generated/maxloc1_4_i8.c,
generated/maxloc1_8_i8.c, generated/maxloc1_16_i8.c,
generated/maxloc1_4_i16.c, generated/maxloc1_8_i16.c.  Move
generated/maxloc1_16_i16.c entry earlier in the list.
* Makefile.in: Regenerated.

Diff:
---
 libgfortran/Makefile.am |  7 ++-
 libgfortran/Makefile.in | 48 +++-
 2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am
index 21b35c76a06d..073af6af0239 100644
--- a/libgfortran/Makefile.am
+++ b/libgfortran/Makefile.am
@@ -400,6 +400,12 @@ generated/maxloc1_16_i2.c \
 generated/maxloc1_4_i4.c \
 generated/maxloc1_8_i4.c \
 generated/maxloc1_16_i4.c \
+generated/maxloc1_4_i8.c \
+generated/maxloc1_8_i8.c \
+generated/maxloc1_16_i8.c \
+generated/maxloc1_4_i16.c \
+generated/maxloc1_8_i16.c \
+generated/maxloc1_16_i16.c \
 generated/maxloc1_4_m1.c \
 generated/maxloc1_8_m1.c \
 generated/maxloc1_16_m1.c \
@@ -414,7 +420,6 @@ generated/maxloc1_8_m8.c \
 generated/maxloc1_16_m8.c \
 generated/maxloc1_4_m16.c \
 generated/maxloc1_8_m16.c \
-generated/maxloc1_16_i16.c \
 generated/maxloc1_4_r4.c \
 generated/maxloc1_8_r4.c \
 generated/maxloc1_16_r4.c \
diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in
index 6a63d8876b18..e8e3e5af082d 100644
--- a/libgfortran/Makefile.in
+++ b/libgfortran/Makefile.in
@@ -265,22 +265,24 @@ am__objects_8 = generated/maxloc1_4_i1.lo 
generated/maxloc1_8_i1.lo \
generated/maxloc1_16_i1.lo generated/maxloc1_4_i2.lo \
generated/maxloc1_8_i2.lo generated/maxloc1_16_i2.lo \
generated/maxloc1_4_i4.lo generated/maxloc1_8_i4.lo \
-   generated/maxloc1_16_i4.lo generated/maxloc1_4_m1.lo \
+   generated/maxloc1_16_i4.lo generated/maxloc1_4_i8.lo \
+   generated/maxloc1_8_i8.lo generated/maxloc1_16_i8.lo \
+   generated/maxloc1_4_i16.lo generated/maxloc1_8_i16.lo \
+   generated/maxloc1_16_i16.lo generated/maxloc1_4_m1.lo \
generated/maxloc1_8_m1.lo generated/maxloc1_16_m1.lo \
generated/maxloc1_4_m2.lo generated/maxloc1_8_m2.lo \
generated/maxloc1_16_m2.lo generated/maxloc1_4_m4.lo \
generated/maxloc1_8_m4.lo generated/maxloc1_16_m4.lo \
generated/maxloc1_4_m8.lo generated/maxloc1_8_m8.lo \
generated/maxloc1_16_m8.lo generated/maxloc1_4_m16.lo \
-   generated/maxloc1_8_m16.lo generated/maxloc1_16_i16.lo \
-   generated/maxloc1_4_r4.lo generated/maxloc1_8_r4.lo \
-   generated/maxloc1_16_r4.lo generated/maxloc1_4_r8.lo \
-   generated/maxloc1_8_r8.lo generated/maxloc1_16_r8.lo \
-   generated/maxloc1_4_r10.lo generated/maxloc1_8_r10.lo \
-   generated/maxloc1_16_r10.lo generated/maxloc1_4_r16.lo \
-   generated/maxloc1_8_r16.lo generated/maxloc1_16_r16.lo \
-   generated/maxloc1_4_r17.lo generated/maxloc1_8_r17.lo \
-   generated/maxloc1_16_r17.lo
+   generated/maxloc1_8_m16.lo generated/maxloc1_4_r4.lo \
+   generated/maxloc1_8_r4.lo generated/maxloc1_16_r4.lo \
+   generated/maxloc1_4_r8.lo generated/maxloc1_8_r8.lo \
+   generated/maxloc1_16_r8.lo generated/maxloc1_4_r10.lo \
+   generated/maxloc1_8_r10.lo generated/maxloc1_16_r10.lo \
+   generated/maxloc1_4_r16.lo generated/maxloc1_8_r16.lo \
+   generated/maxloc1_16_r16.lo generated/maxloc1_4_r17.lo \
+   generated/maxloc1_8_r17.lo generated/maxloc1_16_r17.lo
 am__objects_9 = generated/maxval_i1.lo generated/maxval_i2.lo \
generated/maxval_i4.lo generated/maxval_i8.lo \
generated/maxval_i16.lo generated/maxval_m1.lo \
@@ -1205,6 +1207,12 @@ generated/maxloc1_16_i2.c \
 generated/maxloc1_4_i4.c \
 generated/maxloc1_8_i4.c \
 generated/maxloc1_16_i4.c \
+generated/maxloc1_4_i8.c \
+generated/maxloc1_8_i8.c \
+generated/maxloc1_16_i8.c \
+generated/maxloc1_4_i16.c \
+generated/maxloc1_8_i16.c \
+generated/maxloc1_16_i16.c \
 generated/maxloc1_4_m1.c \
 generated/maxloc1_8_m1.c \
 generated/maxloc1_16_m1.c \
@@ -1219,7 +1227,6 @@ generated/maxloc1_8_m8.c \
 generated/maxloc1_16_m8.c \
 generated/maxloc1_4_m16.c \
 generated/maxloc1_8_m16.c \
-generated/maxloc1_16_i16.c \
 generated/maxloc1_4_r4.c \
 generated/maxloc1_8_r4.c \
 generated/maxloc1_16_r4.c \
@@ -2311,6 +2318,18 @@ generated/maxloc1_

[gcc r16-455] arm: select CCFPEmode for LTGT [PR91323]

2025-05-07 Thread Richard Earnshaw via Gcc-cvs
https://gcc.gnu.org/g:fe10ca6e3cf583640155812b230a0153ce4dc7b7

commit r16-455-gfe10ca6e3cf583640155812b230a0153ce4dc7b7
Author: Richard Earnshaw 
Date:   Mon Mar 31 18:06:54 2025 +0100

arm: select CCFPEmode for LTGT [PR91323]

Besides Arm, there are three other ports that define both CCFPmode and
CCFPEmode.  AArch64 and Sparc return CCFPEmode for LTGT; the other,
Visium, doesn't support LTGT at all.

AArch64 was changed in r8-5286-g8332c5ee8c5f3b, and Sparc with
r10-2926-g000a5f8d23c04c.

I suspect this issue is latent on Arm because cbranch?f4 and cstore?f4
reject LTGT and UNEQ and we fall back to a generic expansion which
happens to work.  Nevertheless, this patch updates the relevant bits
of the Arm port to match the specification introduced in
r10-2926-g000a5f8d23c04c.

gcc/ChangeLog:

PR target/91323
* config/arm/arm.cc (arm_select_cc_mode): Use CCFPEmode for LTGT.

Diff:
---
 gcc/config/arm/arm.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index fccddb0e7bc5..6bdb68aa7881 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -16211,13 +16211,13 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
case UNGT:
case UNGE:
case UNEQ:
-   case LTGT:
  return CCFPmode;
 
case LT:
case LE:
case GT:
case GE:
+   case LTGT:
  return (flag_finite_math_only
  ? CCFPmode
  : CCFPEmode);


[gcc/aoliva/heads/testme] (677 commits) [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 24d496c01070... [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for

It previously pointed to:

 be268a2c399d... [riscv] vec_dup immediate constants in pred_broadcast expan

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  be268a2... [riscv] vec_dup immediate constants in pred_broadcast expan
  f802912... [testsuite] [ppc] use dg-do-if in vec-mul.c
  edcce17... [testsuite] [ppc] require ifunc for target_clones test
  4f7f491... [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for
  b7c549d... [testsuite] [ppc] require vsx for vec-cmpne tests
  cad5de7... [testsuite] [ppc] tolerate -mfloat128 warning in pr99708.c
  19953f0... [testsuite] [ppc] allow implicit fuction declarations in pr
  ae68f0f... [testsuite] [ppc] add -mfloat128 to pr67808.c
  8e55b1c... [testsuite] [ppc] add -mdejagnu-cpu=power7 to pr17381.c
  d6bd882... [testsuite] [ppc] add -mno-strict-align to pr111449-1.c
  1484e4c... [testsuite] [ppc] add -maltivec to pr111380-2.c
  99caade... [testsuite] [ppc] skip -msoft-float tests when testing with
  750f45c... [testsuite] [ppc] disable -mpowerpc64 for various ilp32 asm
  8dd47ff... [testsuite] [ppc] newlib sets FE_VXSOFT on raise FE_INVALID
  7e3b458... [testsuite] [ppc] block-cmp-8 should require powerpc64
  8af5f7b... [testsuite] [ppc] add -mfloat128 to __ieee128-using bfp tes
  7375e3e... [testsuite] [ppc] xfail pr52451.c on ppc [PR58684]
  9e6f595... [testsuite] [ppc] pr87600, pr89313: test for __PPC__ as wel
  6b8e609... [testsuite] [ppc] ipa-sra-19.c: pass -Wno-psabi on powerpc-
  ee34cca... [testsuite] [ppc] require float128 available for copysign
  ade8fda... [testsuite] [ppc] compile [PR112822] with -mvsx


Summary of changes (added commits):
---

  24d496c... [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for
  90cfdd8... [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for
  e639772... [testsuite] [ppc] add -mpowerpc-gfxopt or -mcmpb to copysig
  611953b... [testsuite] [ppc] require float128 available for copysign
  316b10c... add explicit ABI and align options to pr88233.c
  5129a6a... [testsuite] [ppc] pr110071 requires power6 for shrink-wrapp
  0f3e0b4... [testsuite] [ppc] expect vectorization in gen-vect-11c.c
  1b6961e... [testsuite] [ppc] disable strict align for block-cmp-[14].c
  6bf32c9... [vxworks] wrap base/b_NULL.h to override NULL
  655e5e8... libstdc++-v3: testsuite: increase future/members/poll timin
  50ba118... libstdc++-v3: testsuite: lengthen stop_request wait_until t
  094d994... libstdc++-v3: no -latomic on vxworks
  266afd9... [testsuite] [vxworks] skip macros from implicitly-included 
  fd7e661... [testsuite] [vxworks] netinet includes atomic, reqs c++11
  674a643... [testsuite] [vxworks] add -gno-strict-dwarf to pr111409.c
  265317a... [testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT
  4ae3add... vxworks: libstdc++: include ioLib.h for dup()
  0d50da6... vxworks: libgcc: include string.h for memset
  3bc96cf... vxworks: undefine TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL
  f14798d... [testsuite] [ppc] use dg-do-if in vec-mul.c
  952ab85... [testsuite] [ppc] require vsx for vec-cmpne tests
  f01e8cf... [testsuite] [ppc] tolerate -mfloat128 warning in pr99708.c
  b8404d6... [testsuite] [ppc] allow implicit fuction declarations in pr
  fdde782... [testsuite] [ppc] add -mfloat128 to pr67808.c
  a9ac375... [testsuite] [ppc] add -mdejagnu-cpu=power7 to pr17381.c
  504687c... [testsuite] [ppc] add -mno-strict-align to pr111449-1.c
  10cd85a... [testsuite] [ppc] add -maltivec to pr111380-2.c
  7ab274d... [testsuite] [ppc] skip -msoft-float tests when testing with
  99c2f2c... [testsuite] [ppc] disable -mpowerpc64 for various ilp32 asm
  ca80c28... [testsuite] [ppc] newlib sets FE_VXSOFT on raise FE_INVALID
  b4cdc19... [testsuite] [ppc] block-cmp-8 should require powerpc64
  e48cada... [testsuite] [ppc] add -mfloat128 to __ieee128-using bfp tes
  4354092... [testsuite] [ppc] xfail pr52451.c on ppc [PR58684]
  672acf7... [testsuite] [ppc] pr87600, pr89313: test for __PPC__ as wel
  9b13bea... Canonicalize vec_merge in simplify_ternary_operation (*)
  5b2b7bc... Daily bump. (*)
  49c4491... [RISC-V][PR target/120137][PR target/120154] Don't create o (*)
  974b079... [PATCH] RISC-V: Minimal support for zama16b extension. (*)
  fe10ca6... arm: select CCFPEmode for LTGT [PR91323] (*)
  0a33974... arm: Only reverse FP inequalities when -ffinite-math-only [ (*)
  e0c783f... libfortran: Add 5 missing UNSIGNED symbols [PR120153] (*)
  41c8e9b... ibfortran: Readd 15 accidentally removed libgfortran symbol (*)
  8a1f361... libstdc++: Add missing export for std::is_layout_compatible (*)
  edf745d... libcpp: Further fixes for incorrect line numbers in large f (*)
  d884e9c... gimple: Add gimple_with_undefined_signed_overflow and use i (*)
  8335fd5... Loop-IM: H

[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] xfail pr52451.c on ppc [PR58684]

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:43540928eac60c4313b9bbe3b3e651582dfebfd5

commit 43540928eac60c4313b9bbe3b3e651582dfebfd5
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:31 2025 -0300

[testsuite] [ppc] xfail pr52451.c on ppc [PR58684]

Like pr91323.c, pr52451.c fails on all powerpc variants (except where
already skipped), because it uses fcmpu even when qNaNs should flag FP
exceptions.


for  gcc/testsuite/ChangeLog

PR target/58684
* gcc.dg/torture/pr52451.c: Expect execution fail on
powerpc*-*-*.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr52451.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr52451.c 
b/gcc/testsuite/gcc.dg/torture/pr52451.c
index aa19c1bc39a5..c611aeeb53f8 100644
--- a/gcc/testsuite/gcc.dg/torture/pr52451.c
+++ b/gcc/testsuite/gcc.dg/torture/pr52451.c
@@ -1,4 +1,5 @@
-/* { dg-do run } */
+/* { dg-do run { xfail powerpc*-*-* } } */
+/* remove the xfail for powerpc when pr58684 is fixed */
 /* { dg-add-options ieee } */
 /* { dg-require-effective-target fenv_exceptions_long_double } */
 /* { dg-skip-if "fenv" { powerpc-ibm-aix* } } */


[gcc/aoliva/heads/testbase] (643 commits) Canonicalize vec_merge in simplify_ternary_operation

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 9b13bea07706... Canonicalize vec_merge in simplify_ternary_operation

It previously pointed to:

 5957b9919c9e... c++: nested lambda capture pack [PR119345]

Diff:

Summary of changes (added commits):
---

  9b13bea... Canonicalize vec_merge in simplify_ternary_operation (*)
  5b2b7bc... Daily bump. (*)
  49c4491... [RISC-V][PR target/120137][PR target/120154] Don't create o (*)
  974b079... [PATCH] RISC-V: Minimal support for zama16b extension. (*)
  fe10ca6... arm: select CCFPEmode for LTGT [PR91323] (*)
  0a33974... arm: Only reverse FP inequalities when -ffinite-math-only [ (*)
  e0c783f... libfortran: Add 5 missing UNSIGNED symbols [PR120153] (*)
  41c8e9b... ibfortran: Readd 15 accidentally removed libgfortran symbol (*)
  8a1f361... libstdc++: Add missing export for std::is_layout_compatible (*)
  edf745d... libcpp: Further fixes for incorrect line numbers in large f (*)
  d884e9c... gimple: Add gimple_with_undefined_signed_overflow and use i (*)
  8335fd5... Loop-IM: Hoist (non-expensive) stmts to executed all loop w (*)
  2c8d632... i386: implement costs for float<->int conversions in ix86_v (*)
  210d065... AArch64: Fold SVE load/store with certain ptrue patterns to (*)
  9565076... libgomp.fortran/map-alloc-comp-9{,-usm}.f90: Add unified_sh (*)
  0a5855d... libstdc++: Fix module std export for std::extents (*)
  0deefb9... libstdc++: Add tests for std::extents. (*)
  a33b5db... libstdc++: Implement std::extents [PR107761]. (*)
  4c9eef7... libstdc++: Add header mdspan to the build-system. (*)
  369c439... libstdc++: Setup internal FTM for mdspan. (*)
  15edd7d... s390: Add cstoreti4 expander (*)
  52f6ab5... libstdc++: Fix width computation for the chrono formatting  (*)
  83ef989... libstdc++: Remove use of undefined GLIBCXX_LANG_{PUSH,POP}  (*)
  b8c4b6a... x86: Insert extra move for mode size smaller than natural s (*)
  ae987ba... Fix name mismatch for fortran. (*)
  625b805... Fortran: Source allocation of pure module function rejected (*)
  2c46a74... [RISC-V] Avoid unnecessary andi with -1 argument (*)
  d4da447... Daily bump. (*)
  82126b2... [PATCH] RISC-V: Minimal support for sdtrig and ssstrict ext (*)
  aed2a44... [PATCH] RISC-V: Recognized svadu and svade extension (*)
  62ffaef... i386: Add costs for integer<->float conversions (*)
  d057163... Fortran: Fix ICE with use of c_associated. (*)
  86627fa... libstdc++: Rewrite atomic builtin checks [PR70560] (*)
  df1d436... libstdc++: Fix  parallel algos for move-only value (*)
  a067cbc... libstdc++: Fix dangling pointer in fs::path::operator+=(*th (*)
  ccf0b93... libstdc++: Fix -Wmismatched-tags warnings for _Safe_iterato (*)
  e7a2b8b... Fix PR 119928, formal arguments used to wrongly inferred fo (*)
  76c882e... ipa: Drop the default value of suffix parameter of create_c (*)
  1eaee43... ipa: Fix create_version_clone_with_body declaration and com (*)
  fb5829a... ipa: Do not emit info about temporary clones to ipa-clones  (*)
  6ecc2fe... Document option -fdump-ipa-clones (*)
  67e79da... libgcobol: Fix bootstrap for targets without program_invoca (*)
  f4fa41c... diagnostics: use diagnostic_option_id in one more place (*)
  52fe950... json: implement JSON pointer; use it in sarif-replay [PR117 (*)
  9fb44cc... diagnostics: support XML and JSON kinds of logical location (*)
  f25e178... sarif output: capture nesting of logical locations [PR11617 (*)
  8ab6899... diagnostics: add logical_location_manager; reimplement logi (*)
  bf6d854... libgdiagnostics: add accessors for diagnostic_logical_locat (*)
  4cd741d... [RISC-V][PR middle-end/114512] Recognize more bext idioms f (*)
  b3a3280... RISC-V: Add testcases for vec_duplicate + vadd.vv combine w (*)
  c10491e... RISC-V: Add testcases for vec_duplicate + vadd.vv combine w (*)
  1276430... RISC-V: Add testcases for vec_duplicate + vadd.vv combine w (*)
  2b5baad... RISC-V: Combine vec_duplicate + vadd.vv to vadd.vx on GR2VR (*)
  9e9eb78... RISC-V: Add gr2vr cost helper function (*)
  17c1602... RISC-V: Add new option --param=gpr2vr-cost= for rvv insn (*)
  ac9fec0... libstdc++: Add noexcept to some std::counted_iterator opera (*)
  76c3310... tree-optimization/115 - STLF fails with BB vectorizatio (*)
  8147560... gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074] (*)
  673d446... Fix i386 bootstrap on non-Windows platforms (*)
  5e363ff... tree-optimization/120031 - CTZ pattern matching fails a cas (*)
  16e301a... Allow a PCH to be mapped to a different address (*)
  0aea633... Implement Windows TLS (*)
  c397934... libgomp: Update SVE test (*)
  2572d46... libphobos: enable for sparc64-unknown-linux-gnu (*)
  e04de46... Daily bump. (*)
  05d75c5... [RISC-V][PR target/119971] Avoid losing shift count masking (*)
  c182f4d... i386: Do not use explicit operands for MOVS instructions [P (*)
  bb83283... PR modula2/120117: ICE when attempting to obtain the MAX of (*)
  7f285b7.

[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] pr87600, pr89313: test for __PPC__ as well

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:672acf7095f9fb60f0707289e6467e5cf995de49

commit 672acf7095f9fb60f0707289e6467e5cf995de49
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:28 2025 -0300

[testsuite] [ppc] pr87600, pr89313: test for __PPC__ as well

gcc.dg/pr87600.h and gcc.dg/pr89313.c test for __powerpc__ and
__POWERPC__ to choose ppc register names, but ppc-elf defines neither;
it defines __PPC__, so test for that as well.


for  gcc/testsuite/ChangeLog

* pr87600.h (REG1, REG2): Test for __PPC__ as well.
* pr89313.c (REG): Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/pr87600.h | 2 +-
 gcc/testsuite/gcc.dg/pr89313.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr87600.h b/gcc/testsuite/gcc.dg/pr87600.h
index af91f6345cd4..c89071eb7891 100644
--- a/gcc/testsuite/gcc.dg/pr87600.h
+++ b/gcc/testsuite/gcc.dg/pr87600.h
@@ -7,7 +7,7 @@
 #elif defined (__i386__)
 # define REG1 "%eax"
 # define REG2 "%edx"
-#elif defined (__powerpc__) || defined (__POWERPC__)
+#elif defined (__powerpc__) || defined (__POWERPC__) || defined (__PPC__)
 # define REG1 "r3"
 # define REG2 "r4"
 #elif defined (__s390__)
diff --git a/gcc/testsuite/gcc.dg/pr89313.c b/gcc/testsuite/gcc.dg/pr89313.c
index 76cb0910b967..7de64da6f5b5 100644
--- a/gcc/testsuite/gcc.dg/pr89313.c
+++ b/gcc/testsuite/gcc.dg/pr89313.c
@@ -8,7 +8,7 @@
 # define REG "r0"
 #elif defined (__i386__)
 # define REG "%eax"
-#elif defined (__powerpc__) || defined (__POWERPC__)
+#elif defined (__powerpc__) || defined (__POWERPC__) || defined (__PPC__)
 # define REG "r3"
 #elif defined (__s390__)
 # define REG "0"


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] disable -mpowerpc64 for various ilp32 asm-out checks

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:99c2f2cb897928a49c85e2486cc0c9551208b20f

commit 99c2f2cb897928a49c85e2486cc0c9551208b20f
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:44 2025 -0300

[testsuite] [ppc] disable -mpowerpc64 for various ilp32 asm-out checks

Multiple tests on ilp32 get TARGET_POWERPC64 enabled by -mdejagnu-cpu
options, but the results they expect are only attained without
enabling it, so disable it explicitly.


for  gcc/testsuite/ChangeLog

* gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c:
Compile with -mno-powerpc64 on ilp32.
* gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/builtins-1.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p9.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p9.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p7.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p9.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-insert-int-p9.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fold-vec-insert-short-p9.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/fusion-p10-logadd.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/loop_align.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/ppc-target-4.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/pr79251.p7.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/pr79251.p8.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/pr79251.p9.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/pr96933-2.c: Likewise.
* gcc/testsuite/gcc.target/powerpc/vsu/vec-xl-len-13.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/vsu/vec-xst-len-13.c:
Likewise.
* gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/builtins-1.c   | 2 ++
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p8.c | 3 ++-
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-char.p9.c | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p7.c   | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p8.c   | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p7.c| 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-float.p8.c| 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p7.c  | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c  | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p9.c  | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p7.c | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-longlong.p8.c | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c| 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p8.c| 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-insert-char-p9.c  | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-insert-float-p8.c | 1 +
 gcc/testsuite/gcc.target/powerpc/fold-vec-inse

[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] skip -msoft-float tests when testing with -mhard-float

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:7ab274dfe7945bd4043c7801fb085e6c66d1

commit 7ab274dfe7945bd4043c7801fb085e6c66d1
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:48 2025 -0300

[testsuite] [ppc] skip -msoft-float tests when testing with -mhard-float

Testing ppc-elf with -mhard-float conflicts with explicit -msoft-float
in gcc.target/powerpc/ppc-fma-6.c and gcc.target/powerpc/pr105334.c.
Skip these tests under -mhard-float.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/ppc-fma-6.c: Skip on -mhard-float.
* gcc.target/powerpc/pr105334.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c | 1 +
 gcc/testsuite/gcc.target/powerpc/pr105334.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c 
b/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
index 9d6c15300970..f29d84545e87 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
@@ -4,6 +4,7 @@
 /* { dg-options "-O2 -mdejagnu-cpu=power5 -std=c99 -msoft-float" } */
 /* { dg-final { scan-assembler-not "fmadd" } } */
 /* { dg-final { scan-assembler-not "xsfmadd" } } */
+/* { dg-skip-if "" { *-*-* } { "-mhard-float" } } */
 
 /* Test whether -msoft-float turns off the macros math.h uses for
FP_FAST_FMA{,F,L}.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr105334.c 
b/gcc/testsuite/gcc.target/powerpc/pr105334.c
index 7664e033dd0d..2d7a1a8e50ba 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr105334.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr105334.c
@@ -2,6 +2,7 @@
incompatible and warns it.  */
 /* { dg-skip-if "aix long-double-128 soft-float" { powerpc*-*-aix* } } */
 /* { dg-options "-mlong-double-128 -msoft-float" } */
+/* { dg-skip-if "" { *-*-* } { "-mhard-float" } } */
 
 /* Verify there is no ICE.  */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] require vsx for vec-cmpne tests

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:952ab8532a24227c12455d84b9c59d6df8460ae6

commit 952ab8532a24227c12455d84b9c59d6df8460ae6
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:10 2025 -0300

[testsuite] [ppc] require vsx for vec-cmpne tests

The gcc.target/powerpc/vec-cmpne.c and .../vec-cmpne-runnable.c tests
need both vsx and vmx support, but vsx is taken for granted, which
doesn't hold on ppc-elf.  Add the appropriate requirements and
options.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/vec-cmpne.c: Require vsx compile-time
support.  Add -mvsx.
* gcc.target/powerpc/vec-cmpne-runnable.c: Require vsx runtime
support.  Add -mvsx.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/vec-cmpne-runnable.c | 3 ++-
 gcc/testsuite/gcc.target/powerpc/vec-cmpne.c  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vec-cmpne-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/vec-cmpne-runnable.c
index c7fff12c69e0..c072ef16b1c0 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-cmpne-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-cmpne-runnable.c
@@ -1,6 +1,7 @@
 /* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
 /* { dg-require-effective-target vmx_hw } */
-/* { dg-options "-maltivec -O2 " } */
+/* { dg-options "-mvsx -maltivec -O2 " } */
 
 /* Test that the vec_cmpne builtin works as expected.  */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-cmpne.c 
b/gcc/testsuite/gcc.target/powerpc/vec-cmpne.c
index ad93abdb9c37..248981ddcea1 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-cmpne.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-cmpne.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-options "-maltivec -O2" } */
+/* { dg-options "-mvsx -maltivec -O2" } */
 /* { dg-require-effective-target powerpc_altivec } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
 
 /* Test that the vec_cmpne builtin generates the expected Altivec
instructions.  */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] allow implicit fuction declarations in pr92661.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:b8404d659d4d85fa52388e77083fab06a63d79dc

commit b8404d659d4d85fa52388e77083fab06a63d79dc
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:04 2025 -0300

[testsuite] [ppc] allow implicit fuction declarations in pr92661.c

gcc.target/powerpc/pr92661.c expects and tolerates errors about dfp
builtins when dfp is not supported, but the C front end no longer
accepts calls of undeclared functions by default, even with -w.
Adding -fpermissive would do, but I thought it would be too broad, so
I went for -Wno-error=implicit-function-declaration.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr92661.c: Allow implicit function decls.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr92661.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr92661.c 
b/gcc/testsuite/gcc.target/powerpc/pr92661.c
index d9500dbfdb71..0eb3c654d2d3 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92661.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92661.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-w -O2 -mdejagnu-cpu=power9" } */
+/* { dg-options "-w -O2 -mdejagnu-cpu=power9 
-Wno-error=implicit-function-declaration" } */
 
 /* PR92661: The following tests should not ICE, regardless of
whether the target supports DFP or not.  */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] use dg-do-if in vec-mul.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:f14798d063b3a3390130b8777a18b1d4cba5b57d

commit f14798d063b3a3390130b8777a18b1d4cba5b57d
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:14 2025 -0300

[testsuite] [ppc] use dg-do-if in vec-mul.c

The overriding of dg-do in gcc.target/powerpc/vec-mul.c I put there
last year didn't quite work.  It needed the newly-added dg-do-if to
work the way I wished.  Fix it, and simplify it.

While at that, I found out that when target matched, dg-do-if didn't
call dg-do correctly, because it dropped the leading argument early
on.  Fix that.

Finally, I realized that the first uses of dg-do-if unintentionally
disabled non-x86 platforms, so add them back.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/vec-mul.c: Use dg-do-if to fix and
improve coverage.
* lib/target-supports-dg (dg-do-if): Pass all args on to
dg-do.
* gcc.dg/vect/vect-simd-clone-16f.c: Also enable on !x86.
* gcc.dg/vect/vect-simd-clone-17f.c: Likewise.
* gcc.dg/vect/vect-simd-clone-18f.c: Likewise.
* gcc.dg/vect/vect-simd-clone-20.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c | 2 +-
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c | 2 +-
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c | 2 +-
 gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/vec-mul.c  | 4 ++--
 gcc/testsuite/lib/target-supports-dg.exp| 5 ++---
 6 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
index bb3b081b0e3d..5461fe241768 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
@@ -1,4 +1,4 @@
-/* { dg-do-if compile { target { sse2_runtime && { ! sse4_runtime } } } } */
+/* { dg-do-if compile { target { { ! x86 } || { sse2_runtime && { ! 
sse4_runtime } } } } } */
 /* { dg-require-effective-target vect_simd_clones } */
 /* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-msse4" { target sse4 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
index 504465614c98..165c3cae92bf 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
@@ -1,4 +1,4 @@
-/* { dg-do-if compile { target { sse2_runtime && { ! sse4_runtime } } } } */
+/* { dg-do-if compile { target { { ! x86 } || { sse2_runtime && { ! 
sse4_runtime } } } } } */
 /* { dg-require-effective-target vect_simd_clones } */
 /* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-msse4" { target sse4 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
index 0c418d432482..eda28856cad6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
@@ -1,4 +1,4 @@
-/* { dg-do-if compile { target { sse2_runtime && { ! sse4_runtime } } } } */
+/* { dg-do-if compile { target { { ! x86 } || { sse2_runtime && { ! 
sse4_runtime } } } } } */
 /* { dg-require-effective-target vect_simd_clones } */
 /* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-msse4" { target sse4 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c
index 3e626fc4d4d5..bb32a1aab248 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-20.c
@@ -1,4 +1,4 @@
-/* { dg-do-if compile { target { sse2_runtime && { ! sse4_runtime } } } } */
+/* { dg-do-if compile { target { { ! x86 } || { sse2_runtime && { ! 
sse4_runtime } } } } } */
 /* { dg-require-effective-target vect_simd_clones } */
 /* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-msse4" { target sse4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-mul.c 
b/gcc/testsuite/gcc.target/powerpc/vec-mul.c
index 915c92303274..0c2ffcf46f1a 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-mul.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-mul.c
@@ -1,5 +1,5 @@
-/* { dg-do compile { target { { ! vsx_hw } && powerpc_vsx } } } */
-/* { dg-do run { target vsx_hw } } */
+/* { dg-do compile { target powerpc_vsx } } */
+/* { dg-do-if run { target vsx_hw } } */
 /* { dg-options "-mvsx -O3" } */
 
 /* Test that the vec_mul builtin works as expected.  */
diff --git a/gcc/testsuite/lib/target-supports-dg.exp 
b/gcc/testsuite/lib/target-supports-dg.exp
index 422ea8380845..2dca8e15c429 100644
--- a/gcc/testsuite/lib/target-supports-dg.exp
+++ b/gcc/testsuite/lib/target-supports-dg.exp
@@ -422,9 +422,8 @@ proc check-flags { a

[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] block-cmp-8 should require powerpc64

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:b4cdc19abcaeeba75bb71f2d705c62c34ba6bddb

commit b4cdc19abcaeeba75bb71f2d705c62c34ba6bddb
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:38 2025 -0300

[testsuite] [ppc] block-cmp-8 should require powerpc64

gcc.target/powerpc/block-cmp-8.c is an execution test on ilp32.  It
tests for support for the 64-bit ISA in the compiler, but not for the
ability to execute powerpc64 instructions, so the test fails on 32-bit
hardware.  Require powerpc64 instead.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/block-cmp-8.c: Require powerpc64
instruction execution support.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/block-cmp-8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/block-cmp-8.c 
b/gcc/testsuite/gcc.target/powerpc/block-cmp-8.c
index 22a48c8fadfc..0f353338 100644
--- a/gcc/testsuite/gcc.target/powerpc/block-cmp-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/block-cmp-8.c
@@ -1,6 +1,6 @@
 /* { dg-do run { target ilp32 } } */
 /* { dg-options "-O2 -mpowerpc64" } */
-/* { dg-require-effective-target has_arch_ppc64 } */
+/* { dg-require-effective-target powerpc64 } */
 /* { dg-timeout-factor 2 } */
 
 /* Verify memcmp on m32 mpowerpc64 */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] newlib sets FE_VXSOFT on raise FE_INVALID

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:ca80c28674a30108e0d79f6182fa46de43820183

commit ca80c28674a30108e0d79f6182fa46de43820183
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:41 2025 -0300

[testsuite] [ppc] newlib sets FE_VXSOFT on raise FE_INVALID

The implementation of the fe*except primitives in newlib sets the
FE_VXSOFT bit when raising FE_INVALID, and the test doesn't expect
that.  Skip it: the tested builtin expansions are for glibc only
anyway.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c:
Skip on newlib/ppc.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c 
b/gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c
index 28c2a00ec520..b9260a123d77 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c
@@ -1,6 +1,7 @@
 /* { dg-do run } */
 /* { dg-require-effective-target fenv_exceptions } */
 /* { dg-options "-lm -fno-builtin" } */
+/* { dg-skip-if "raise FE_INVALID sets FE_VXSOFT on newlib" { powerpc*-*-e* } 
} */
 
 /* This testcase ensures that the builtins are correctly expanded and match the
expected result.


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] add -mno-strict-align to pr111449-1.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:504687cfee4a16942386d87dc6ee7f4e285b6f93

commit 504687cfee4a16942386d87dc6ee7f4e285b6f93
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:54 2025 -0300

[testsuite] [ppc] add -mno-strict-align to pr111449-1.c

gcc.target/powerpc/pr111449-1.c's expected results only come about
without strict alignment, so disable it explicitly.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr111449-1.c: Add -mno-strict-align.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr111449-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr111449-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr111449-1.c
index e65794c7ae76..1a81befa9689 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr111449-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr111449-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */
+/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2 -mno-strict-align" } */
 /* { dg-require-effective-target powerpc_vsx } */
 
 /* Ensure vector mode is used for 16-byte by pieces equality compare.  */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] add -mfloat128 to pr67808.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:fdde782c7689e8c1a2299c814fcd914cd0c40174

commit fdde782c7689e8c1a2299c814fcd914cd0c40174
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:01 2025 -0300

[testsuite] [ppc] add -mfloat128 to pr67808.c

gcc.target/powerpc/pr67808.c in some cases expects both 128-bit long
double types to be defined, but -mlong-double-128 doesn't guarantee
that without -mfloat128 on targets that would get the IEEE128 type as
long double.  Add -mfloat128 to ensure the desired IBM 128-bit
floating-point type is available as expected.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr67808.c: Add -mfloat128, and tolerate
its warning.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr67808.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr67808.c 
b/gcc/testsuite/gcc.target/powerpc/pr67808.c
index 4ddadb719461..a86574141d0c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr67808.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr67808.c
@@ -1,7 +1,8 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
-/* { dg-options "-O1 -mvsx -mdejagnu-cpu=power7 -mlong-double-128" } */
+/* { dg-options "-O1 -mvsx -mdejagnu-cpu=power7 -mfloat128 -mlong-double-128" 
} */
 /* { dg-require-effective-target powerpc_vsx } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /* PR 67808: LRA ICEs on simple double to long double conversion test case */
 
@@ -9,7 +10,7 @@
 /* If long double is IEEE 128-bit, we need to use the __ibm128 type instead of
long double.  We can't use __ibm128 on systems that don't support IEEE
128-bit floating point, because the type is not enabled on those
-   systems.  */
+   systems.  Without -mfloat128, the __ibm128 type may be undefined.  */
 #define LDOUBLE __ibm128
 
 #elif defined(__LONG_DOUBLE_IBM128__)


[gcc(refs/users/aoliva/heads/testme)] vxworks: undefine TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:3bc96cf6b3ba4579c2bdfe6a8c4404052c0c0353

commit 3bc96cf6b3ba4579c2bdfe6a8c4404052c0c0353
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:16 2025 -0300

vxworks: undefine TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL

config.gcc arranges for vxworks 7r2+ targets to include linux.h,
because of the similarity, but linux.h defines
TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL to a function declared in
linux-protos.h, and defined in linux.cc, neither of which vxworks
targets include.  Undefine it in vxworks.h.


for  gcc/ChangeLog

* config/vxworks.h (TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL):
Undefine.

Diff:
---
 gcc/config/vxworks.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index 204a8e000d40..1ad4c1553ba9 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -433,3 +433,6 @@ extern void vxworks_emit_call_builtin___clear_cache (rtx 
begin, rtx end);
so silence the warning (instead of passing -flinker-output=nolto-rel).  */
 #undef LTO_PLUGIN_SPEC
 #define LTO_PLUGIN_SPEC "%{!mrtp:-plugin-opt=-linker-output-auto-nolto-rel}"
+
+/* Undo the linux.h definition.  */
+#undef TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] add -maltivec to pr111380-2.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:10cd85adbe9c6cacc1d4a9c2d6ff5ae982a3379e

commit 10cd85adbe9c6cacc1d4a9c2d6ff5ae982a3379e
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:51 2025 -0300

[testsuite] [ppc] add -maltivec to pr111380-2.c

gcc.target/powerpc/pr111380-2.c requires altivec to be enabled to hit
the expected option mismatch and inline error, so enable it after
checking for compiler support.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr111380-2.c: Add -maltivec, require it.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr111380-2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr111380-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr111380-2.c
index 7b363940643b..bf7cb3a5c8bf 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr111380-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr111380-2.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_int } */
-/* { dg-options "-O2 -mno-vsx" } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -mno-vsx -maltivec" } */
 
 /* Verify it emits error message on inlining even without LTO.  */


[gcc(refs/users/aoliva/heads/testme)] [vxworks] wrap base/b_NULL.h to override NULL

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:6bf32c902cab08f10c17782dcd207c4463339f02

commit 6bf32c902cab08f10c17782dcd207c4463339f02
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:47 2025 -0300

[vxworks] wrap base/b_NULL.h to override NULL

Some versions of vxworks define NULL to __nullptr in C++, assuming
C++11, which breaks at least a number of analyzer tests that get
exercised in C++98 mode.

Wrap the header that defines NULL so that, after including it, we
override the NULL definition with the one provided by stddef.h.

That required some infrastructure to enable subdirectories in extra
headers.  Since USER_H filenames appear as dependencies, that limits
the possibilities or markup, so I went for a filesystem-transparent
sequence that doesn't appear in any extra_headers whatsoever, namely
/././, to mark the beginning of the desired install name.


for  gcc/ChangeLog

* config/vxworks/base/b_NULL.h: New.
* config.gcc (extra_headers) <*-*-vxworks*>: Add it.
* Makefile.in (stmp-int-hdrs): Support /././ markers in USER_H
to mark the beginning of the install name.  Document.
* doc/sourcebuild.texi (Headers): Document /././ marker.

Diff:
---
 gcc/Makefile.in  | 21 -
 gcc/config.gcc   |  3 +++
 gcc/config/vxworks/base/b_NULL.h | 28 
 gcc/doc/sourcebuild.texi |  5 -
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 55b4cd7dbed3..251c1ed3f006 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3475,9 +3475,10 @@ gcov-tool$(exeext): $(GCOV_TOOL_OBJS) $(LIBDEPS)
 stmp-int-hdrs: $(STMP_FIXINC) $(T_GLIMITS_H) $(T_STDINT_GCC_H) $(USER_H) 
fixinc_list
 # Copy in the headers provided with gcc.
 #
-# The sed command gets just the last file name component;
-# this is necessary because VPATH could add a dirname.
-# Using basename would be simpler, but some systems don't have it.
+# If the sequence /././ appears somewhere after srcdir in a USER_H
+# name, install the header with the name after the marker, even if
+# that name involves subdirectories.  Otherwise, install it as the
+# basename (but some systems don't have the basename program).
 #
 # The touch command is here to workaround an AIX/Linux NFS bug.
 #
@@ -3488,10 +3489,20 @@ stmp-int-hdrs: $(STMP_FIXINC) $(T_GLIMITS_H) 
$(T_STDINT_GCC_H) $(USER_H) fixinc_
-if [ -d include-fixed ] ; then true; else mkdir include-fixed; chmod 
a+rx include-fixed; fi
for file in .. $(USER_H); do \
  if [ X$$file != X.. ]; then \
-   realfile=`echo $$file | sed -e 's|.*/\([^/]*\)$$|\1|'`; \
+   case $$file in \
+ "$(srcdir)"*/././*) \
+   realfile=`echo $$file | sed -e 's|^.*/\./\./||'`; \
+   case $$realfile in \
+ */*) \
+   realdir=`echo $$realfile | sed -e 's|/[^/]*$$||'`; \
+   $(install_sh) -d include/$$realdir;; \
+   esac;; \
+ *) \
+   realfile=`echo $$file | sed -e 's|.*/\([^/]*\)$$|\1|'`;; \
+   esac; \
$(STAMP) include/$$realfile; \
rm -f include/$$realfile; \
-   cp $$file include; \
+   cp $$file include/$$realfile; \
chmod a+r include/$$realfile; \
  fi; \
done
diff --git a/gcc/config.gcc b/gcc/config.gcc
index afbf82fd2b8f..6c453192de0c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1126,6 +1126,9 @@ case ${target} in
   extra_headers="${extra_headers} ../vxworks/math.h ../vxworks/complex.h"
   extra_headers="${extra_headers} ../vxworks/inttypes.h ../vxworks/setjmp.h"
 
+  # The /././ sequence marks the beginning of the install name.
+  extra_headers="${extra_headers} ../vxworks/././base/b_NULL.h"
+
   # We provide (a tailored version of) stdint.h
   tm_file="${tm_file} vxworks-stdint.h"
   use_gcc_stdint=provide
diff --git a/gcc/config/vxworks/base/b_NULL.h b/gcc/config/vxworks/base/b_NULL.h
new file mode 100644
index ..d398ff265b72
--- /dev/null
+++ b/gcc/config/vxworks/base/b_NULL.h
@@ -0,0 +1,28 @@
+/* This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime L

[gcc(refs/users/aoliva/heads/testme)] [testsuite] [vxworks] add -gno-strict-dwarf to pr111409.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:674a643474cf8531ddb5b866fc92963acc3ef52a

commit 674a643474cf8531ddb5b866fc92963acc3ef52a
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:28 2025 -0300

[testsuite] [vxworks] add -gno-strict-dwarf to pr111409.c

The expected macro debug information is not issued with
-gstrict-dwarf, and ports such as vxworks default to that.  Allow
non-strict dwarf for the test.


for  gcc/testsuite/ChangeLog

* gcc.dg/pr111409.c: Allow non-strict dwarf.

Diff:
---
 gcc/testsuite/gcc.dg/pr111409.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr111409.c b/gcc/testsuite/gcc.dg/pr111409.c
index 7d5190506624..d685879e64b9 100644
--- a/gcc/testsuite/gcc.dg/pr111409.c
+++ b/gcc/testsuite/gcc.dg/pr111409.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "split DWARF unsupported" { hppa*-*-hpux* powerpc*-ibm-aix* 
*-*-darwin* } } */
-/* { dg-options "-gsplit-dwarf -g3 -dA" } */
+/* { dg-options "-gsplit-dwarf -g3 -dA -gno-strict-dwarf" } */
 /* { dg-final { scan-assembler-times {\.section\t"?\.debug_macro} 1 } } */
 /* { dg-final { scan-assembler-not {\.byte\s+0x7\s*#\s*Import} } } */


[gcc(refs/users/aoliva/heads/testme)] libstdc++-v3: no -latomic on vxworks

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:094d994ebc2b5d7b8672ce96c375ea00bd7ae361

commit 094d994ebc2b5d7b8672ce96c375ea00bd7ae361
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:38 2025 -0300

libstdc++-v3: no -latomic on vxworks

libatomic is disabled on vxworks because it's part of libc, and not
very granular there, so a separately-built libatomic often triggers
link errors over duplicate definitions.

So, don't link with -latomic, but keep atomic tests enabled.

Unfortunately, some fence and flag primitives that are declared as
functions and then defined as macros are not defined as functions in
libc, so the tests for non-macro calls fail.  Expect those failures.


for  gcc/testsuite/ChangeLog

* gcc.dg/atomic/stdatomic-fence-2.c: Xfail on vxworks.
* gcc.dg/atomic/stdatomic-flag-2.c: Likewise.
* lib/atomic-dg.exp (atomic_init): Don't add -latomic on
vxworks.

Diff:
---
 gcc/testsuite/gcc.dg/atomic/stdatomic-fence-2.c | 1 +
 gcc/testsuite/gcc.dg/atomic/stdatomic-flag-2.c  | 1 +
 gcc/testsuite/lib/atomic-dg.exp | 6 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/atomic/stdatomic-fence-2.c 
b/gcc/testsuite/gcc.dg/atomic/stdatomic-fence-2.c
index 6916e89576c3..44d3c33dd50b 100644
--- a/gcc/testsuite/gcc.dg/atomic/stdatomic-fence-2.c
+++ b/gcc/testsuite/gcc.dg/atomic/stdatomic-fence-2.c
@@ -1,6 +1,7 @@
 /* Test atomic_*_fence routines for existence and execution with each
valid memory model.  Out-of-line function calls.  */
 /* { dg-do run } */
+/* { dg-do-if compile { target *-*-vxworks* } } */
 /* { dg-options "-std=c11 -pedantic-errors" } */
 
 #include 
diff --git a/gcc/testsuite/gcc.dg/atomic/stdatomic-flag-2.c 
b/gcc/testsuite/gcc.dg/atomic/stdatomic-flag-2.c
index e4e3a6ef33d7..b74a54e95918 100644
--- a/gcc/testsuite/gcc.dg/atomic/stdatomic-flag-2.c
+++ b/gcc/testsuite/gcc.dg/atomic/stdatomic-flag-2.c
@@ -2,6 +2,7 @@
function calls.  */
 /* The test needs a lockless atomic implementation.  */
 /* { dg-do run } */
+/* { dg-do-if compile { target *-*-vxworks* } } */
 /* { dg-options "-std=c11 -pedantic-errors" } */
 
 #include 
diff --git a/gcc/testsuite/lib/atomic-dg.exp b/gcc/testsuite/lib/atomic-dg.exp
index 83b225a4d3cc..b47282c50587 100644
--- a/gcc/testsuite/lib/atomic-dg.exp
+++ b/gcc/testsuite/lib/atomic-dg.exp
@@ -71,7 +71,11 @@ proc atomic_init { args } {
}
 }
 
-append link_flags " -latomic "
+if { [istarget *-*-vxworks*] } {
+   # vxworks provides libatomic as part of libc.
+} else {
+   append link_flags " -latomic "
+}
 
 if [info exists TEST_ALWAYS_FLAGS] {
set atomic_saved_TEST_ALWAYS_FLAGS $TEST_ALWAYS_FLAGS


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT1__ to 1

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:265317a6ba090332e390e8256727cc0f7b9129a8

commit 265317a6ba090332e390e8256727cc0f7b9129a8
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:25 2025 -0300

[testsuite] [analyzer] [vxworks] define __STDC_WANT_LIB_EXT1__ to 1

vxworks' headers use #if instead of #ifdef to test for
__STDC_WANT_LIB_EXT1__, so the definition in the analyzer test
strotok-cppreference.c catches a bug there, but not something it's
meant to catch or that we could fix in GCC, so amend the definition to
sidestep the libc bug.


for  gcc/testsuite/ChangeLog

* c-c++-common/analyzer/strtok-cppreference.c
(__STDC_WANT_LIB_EXT1__): Define to 1.

Diff:
---
 gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c 
b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
index a396c643f116..96117276ffc3 100644
--- a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
+++ b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
@@ -13,7 +13,7 @@
 
 /* { dg-additional-options " -Wno-analyzer-too-complex 
-Wno-analyzer-symbol-too-complex" } */
 
-#define __STDC_WANT_LIB_EXT1__ 0
+#define __STDC_WANT_LIB_EXT1__ 1
 #include 
 #include 


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] disable strict align for block-cmp-[14].c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:1b6961ee7e81e69324be319b2cafb8b068a70dca

commit 1b6961ee7e81e69324be319b2cafb8b068a70dca
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:50 2025 -0300

[testsuite] [ppc] disable strict align for block-cmp-[14].c

The expected memcmp inline expansion assumes -mno-strict-align, so
make it explicit in case strict-align is enabled by default.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/block-cmp-1.c: Add -mno-strict-align.
* gcc.target/powerpc/block-cmp-4.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/block-cmp-1.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/block-cmp-4.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/block-cmp-1.c 
b/gcc/testsuite/gcc.target/powerpc/block-cmp-1.c
index cd076cf1dce4..0a563dedcb6c 100644
--- a/gcc/testsuite/gcc.target/powerpc/block-cmp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/block-cmp-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-vsx" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-vsx -mno-strict-align" } */
 /* { dg-skip-if "" { has_arch_ppc64 && ilp32 } } */
 /* { dg-final { scan-assembler-not {\mb[l]? memcmp\M} } }  */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c 
b/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
index ee9a015adae3..43550fd9fb9d 100644
--- a/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target be } } */
-/* { dg-options "-O2 -mdejagnu-cpu=power7" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power7 -mno-strict-align" } */
 /* { dg-skip-if "" { has_arch_ppc64 && ilp32 } } */
 /* { dg-final { scan-assembler-not {\mb[l]? memcmp\M} } } */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] expect vectorization in gen-vect-11c.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:0f3e0b4c9c57d32a46144fa8f328d023e1cadd4b

commit 0f3e0b4c9c57d32a46144fa8f328d023e1cadd4b
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:53 2025 -0300

[testsuite] [ppc] expect vectorization in gen-vect-11c.c

The first loop in main gets stores "vectorized" on powerpc into
full-word stores, even without any vector instruction support, so the
test's expectation of no loop vectorization is not met.


for  gcc/testsuite/ChangeLog

* gcc.dg/tree-ssa/gen-vect-11c.c: xfail the test for no
vectorization on powerpc*-*-*.

(cherry picked from commit 6069f500a0b6c320a12d240a508731aa001486d0)

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c 
b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
index 22ff44cf66da..116f6af23388 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
@@ -39,4 +39,4 @@ int main ()
 }
 
 
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail 
amdgcn*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail 
amdgcn*-*-* powerpc*-*-* } } } */


[gcc(refs/users/aoliva/heads/testme)] libstdc++-v3: testsuite: lengthen stop_request wait_until timeout

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:50ba118637038be979b16d2c53f16ca31243ad4c

commit 50ba118637038be979b16d2c53f16ca31243ad4c
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:41 2025 -0300

libstdc++-v3: testsuite: lengthen stop_request wait_until timeout

30_threads/condition_variable_any/stop_token/wait_on.cc's
test_wait_until occasionally fails on vxworks under very high load, in
a way that suggests wait_until times out before the main thread
requests it to stop.  Extend the timeouts to make more room for the
stop request.


for  libstdc++-v3/ChangeLog

* testsuite/30_threads/condition_variable_any/stop_token/wait_on.cc
(test_wait_until): Extend the timeout for a stop request.

Diff:
---
 .../testsuite/30_threads/condition_variable_any/stop_token/wait_on.cc   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/libstdc++-v3/testsuite/30_threads/condition_variable_any/stop_token/wait_on.cc
 
b/libstdc++-v3/testsuite/30_threads/condition_variable_any/stop_token/wait_on.cc
index e069acde2fef..1ffc2a8cd848 100644
--- 
a/libstdc++-v3/testsuite/30_threads/condition_variable_any/stop_token/wait_on.cc
+++ 
b/libstdc++-v3/testsuite/30_threads/condition_variable_any/stop_token/wait_on.cc
@@ -61,7 +61,7 @@ void test_wait_until(bool ck = true)
 
   std::stop_source src;
 
-  auto abst = std::chrono::steady_clock::now() + 1.0s;
+  auto abst = std::chrono::steady_clock::now() + (ck ? 5.0s : 1.0s);
   auto tok = src.get_token();
   std::thread t([ck, &ready, &mtx, &cv, abst, tok]
 {


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [vxworks] netinet includes atomic, reqs c++11

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:fd7e661fe39a0aa5bcc2136967b562d9d33b1bb1

commit fd7e661fe39a0aa5bcc2136967b562d9d33b1bb1
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:32 2025 -0300

[testsuite] [vxworks] netinet includes atomic, reqs c++11

On vxworks, the included netinet/in.h header indirectly includes
, that fails on C++ <11.  Skip the test.


for  gcc/testsuite/ChangeLog

* c-c++-common/analyzer/fd-glibc-byte-stream-socket.c: Skip on
vxworks with C++ < 11.

Diff:
---
 gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c 
b/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c
index fd57d3b0894a..2a44e452127c 100644
--- a/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c
+++ b/gcc/testsuite/c-c++-common/analyzer/fd-glibc-byte-stream-socket.c
@@ -5,6 +5,8 @@
 /* { dg-additional-options "-fno-exceptions" } */
 
 /* { dg-skip-if "" { hppa*-*-hpux* powerpc*-*-aix* } } */
+/* On vxworks, netinet/in.h indirectly includes atomic, that requires C++11.  
*/
+/* { dg-skip-if "" { *-*-vxworks* && { c++ && { ! c++11 } } } } */
 
 #include 
 #include 


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] tolerate -mfloat128 warning in pr99708.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:f01e8cf25655ee0fefdf30fce5acada019118ad7

commit f01e8cf25655ee0fefdf30fce5acada019118ad7
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:07 2025 -0300

[testsuite] [ppc] tolerate -mfloat128 warning in pr99708.c

gcc.target/powerpc/pr99708.c uses -mfloat128, and that causes the
usual "may not be fully supported" warning that we need to prune on
such tests.  Tolerate it.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr99708.c: Prune -mfloat128 warning.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr99708.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr99708.c 
b/gcc/testsuite/gcc.target/powerpc/pr99708.c
index 5d2f435a746b..8eb36f5c938f 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr99708.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr99708.c
@@ -2,6 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* powerpc-ibm-aix* } } */
 /* { dg-require-effective-target ppc_float128_sw } */
 /* { dg-options "-O2 -mvsx -mfloat128" } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /*
  * PR target/99708


[gcc(refs/users/aoliva/heads/testme)] vxworks: libstdc++: include ioLib.h for dup()

2025-05-07 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:4ae3add0e596776d8084524c48e8d8a60aedf894

commit 4ae3add0e596776d8084524c48e8d8a60aedf894
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:22 2025 -0300

vxworks: libstdc++: include ioLib.h for dup()

vxworks's dup function is not declared in unistd.h, but c++23/print.cc
expects to be able to call it if unistd.h is available.  On vxworks,
the function is only declared in ioLib.h, so arrange to include it.


for  libstdc++-v3/ChangeLog

* src/c++23/print.cc [__VXWORKS__]: Include ioLib.h.

Diff:
---
 libstdc++-v3/src/c++23/print.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc
index 8ba714059672..f34369950096 100644
--- a/libstdc++-v3/src/c++23/print.cc
+++ b/libstdc++-v3/src/c++23/print.cc
@@ -43,6 +43,10 @@
 # include   // isatty
 #endif
 
+#ifdef __VXWORKS__
+#include 
+#endif
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION


[gcc(refs/users/aoliva/heads/testme)] libstdc++-v3: testsuite: increase future/members/poll timing tolerance

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:655e5e878ecd84d4652c56e2a165164951eac29c

commit 655e5e878ecd84d4652c56e2a165164951eac29c
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:44 2025 -0300

libstdc++-v3: testsuite: increase future/members/poll timing tolerance

In 30_threads/future/members/poll.c, despite the calibration and the
large tolerance, wait_until_sys_min has occasionally come up to almost
320 times as long as ready.  Tolerate that much measurement noise.


for  libstdc++-v3/ChangeLog

* testsuite/30_threads/future/members/poll.cc (main): Increase
tolerance for measurement noise.

Diff:
---
 libstdc++-v3/testsuite/30_threads/future/members/poll.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/testsuite/30_threads/future/members/poll.cc 
b/libstdc++-v3/testsuite/30_threads/future/members/poll.cc
index 2113cfde70d6..cffe4a331da7 100644
--- a/libstdc++-v3/testsuite/30_threads/future/members/poll.cc
+++ b/libstdc++-v3/testsuite/30_threads/future/members/poll.cc
@@ -154,13 +154,13 @@ int main()
 
   // Polling before ready using wait_until(min) should not be terribly
   // slow.  We hope for no more than 100x slower, but a little over
-  // 100x has been observed, and since the measurements may have a lot
+  // 316x has been observed, and since the measurements may have a lot
   // of noise, and increasing the measurement precision through
   // additional iterations would make the test run for too long on
   // systems with very low clock precision (60Hz clocks are not
   // unheard of), we tolerate a lot of error.
-  VERIFY( wait_until_sys_min < (ready * 200) );
-  VERIFY( wait_until_steady_min < (ready * 200) );
+  VERIFY( wait_until_sys_min < (ready * 320) );
+  VERIFY( wait_until_steady_min < (ready * 320) );
 
   // The following two tests fail with GCC 11, see
   // https://gcc.gnu.org/pipermail/libstdc++/2020-November/051422.html


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [vxworks] skip macros from implicitly-included vxConfig.h

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:266afd9eea80fe2176be208980c3ff1f6403e57b

commit 266afd9eea80fe2176be208980c3ff1f6403e57b
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:35 2025 -0300

[testsuite] [vxworks] skip macros from implicitly-included vxConfig.h

On vxworks, vxConfig.h is implicitly included, and it defines multiple
macros in the namespace reserved for the implementation.

g++.dg/modules/macro-5_a.H tests that macros from the command-line do
not make the module output, but it can't tell them from macros from
implicitly-included headers, so the test fails.

Stricten the pattern to avoid matching reserved macro names.


for  gcc/testsuite/ChangeLog

* g++.dg/modules/macro-5_a.H: Don't match macros in the
namespace reserved for the implementation.

Diff:
---
 gcc/testsuite/g++.dg/modules/macro-5_a.H | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/modules/macro-5_a.H 
b/gcc/testsuite/g++.dg/modules/macro-5_a.H
index d0913d1c53e5..a44a82abfae5 100644
--- a/gcc/testsuite/g++.dg/modules/macro-5_a.H
+++ b/gcc/testsuite/g++.dg/modules/macro-5_a.H
@@ -2,4 +2,4 @@
 // command line macros are not exported
 // { dg-module-cmi {} }
 
-// { dg-final { scan-lang-dump-not {Writing macro #define [_a-zA-Z0-9]* at 
[0-9]*} module } }
+// { dg-final { scan-lang-dump-not {Writing macro #define [a-z][_a-zA-Z0-9]* 
at [0-9]*} module } }


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] add -mpowerpc-gfxopt or -mcmpb to copysign tests

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:e639772f09eba78dda05b839dcab413fc4a78bd3

commit e639772f09eba78dda05b839dcab413fc4a78bd3
Author: Alexandre Oliva 
Date:   Thu May 8 02:19:07 2025 -0300

[testsuite] [ppc] add -mpowerpc-gfxopt or -mcmpb to copysign tests

Requiring float128_sw for ifn_copysign on ppc was an error; for SFmode
and DFmode, the condition is far more elaborate.  It takes hard_float
in addition to any of -mcmpb, vsx vectors for the mode, or
-mpowerpc-gfxopt with fast-math (-ffinith-math-only and
-fno-signed-zeros).

A number of ifn_copysign tests add custom options for x86, so I'm
adding only the ppc hard_float requirement to ifn_copysign, and ppc
options that suffice to enable ifn_copysign along with other options
already present in each test.

As on s390, ppc prefers copysign over -abs, so adjust the tests.


for  gcc/testsuite/ChangeLog

* lib/target-supports.exp (check_effective_target_ifn_copysign):
Require hard float on ppc.
* gcc.dg/pr55152-2.c: Add -mpowerpc-gfxopt on ppc.
* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
* gcc.dg/fold-copysign-1.c: Add -mcmpb on ppc.
* gcc.dg/abs-4.c: Likewise.  Expect 3 copysigns on ppc.
* gcc.dg/tree-ssa/backprop-6.c: Likewise.  Likewise.

Diff:
---
 gcc/testsuite/gcc.dg/pr55152-2.c| 1 +
 gcc/testsuite/gcc.dg/tree-ssa/abs-4.c   | 9 +
 gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c  | 9 +
 gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c | 1 +
 gcc/testsuite/lib/target-supports.exp   | 2 +-
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 24068cffa4a8..ed293c0cae3e 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow 
-fdump-tree-optimized" } */
 /* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-mpowerpc-gfxopt" { target { powerpc*-*-* } } } */
 
 double g (double a)
 {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 4144d1cd954a..4d2654af3bd7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-tree-optimized" } */
 /* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-mcmpb" { target { powerpc*-*-* } } } */
 /* PR tree-optimization/109829 */
 
 float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
@@ -10,9 +11,9 @@ long double abs_ld(long double x) { return 
__builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target { 
ifn_copysign && { ! { s390*-*-* } } } } } } */
-/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target { 
ifn_copysign && { ! { s390*-*-* } } } } } } */
-/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target { 
ifn_copysign && { ! { s390*-*-* } } } } } } */
-/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 3 "optimized" { target { 
ifn_copysign && s390*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target { 
ifn_copysign && { ! { powerpc*-*-* s390*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target { 
ifn_copysign && { ! { powerpc*-*-* s390*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target { 
ifn_copysign && { ! { powerpc*-*-* s390*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 3 "optimized" { target { 
ifn_copysign && { powerpc*-*-* s390*-*-* } } } } } */
 /* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! 
ifn_copysign } } } } */
 /* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! 
ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c 
b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index dbde681e3832..42db33107aea 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-backprop-details" }  */
 /* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* 
x86_64-*-* } && ilp32 } } } */
+/* { dg-additional-options "-mcmpb" { target { powerpc*-*-* } } } */
 
 void start (void *);
 void end (void *);
@@ -27,9 +28,9 @@ TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "ba

[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] pr110071 requires power6 for shrink-wrapping

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:5129a6a9ee018f045b7e49f78f9dcce4aa7b1411

commit 5129a6a9ee018f045b7e49f78f9dcce4aa7b1411
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:56 2025 -0300

[testsuite] [ppc] pr110071 requires power6 for shrink-wrapping

The test's expectation of shrink-wrapping is only met starting at
power6.  At earlier CPUs, the register allocator prefers to preserve
an incoming argument around a call in a call-saved register, rather
than in a stack slot, and that prevents shrink-wrapping.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr110071.c: Expect shrink-wrapping
starting at power6.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr110071.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr110071.c 
b/gcc/testsuite/gcc.target/powerpc/pr110071.c
index 282349c74442..0548e8bfa790 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr110071.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr110071.c
@@ -12,4 +12,6 @@ foo (long i, long cond)
   return i+1;
 }
 
-/* { dg-final { scan-rtl-dump-times "Performing shrink-wrapping" 1 
"pro_and_epilogue" } } */
+/* Before power6, i is preserved in a call-saved register rather than in
+   memory, which prevents shrink-wrapping.  */
+/* { dg-final { scan-rtl-dump-times "Performing shrink-wrapping" 1 
"pro_and_epilogue" { target has_arch_pwr6 } } } */


[gcc(refs/users/aoliva/heads/testme)] add explicit ABI and align options to pr88233.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:316b10c544965dbb913a12ad2f83bdfb2fbd66b3

commit 316b10c544965dbb913a12ad2f83bdfb2fbd66b3
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:59 2025 -0300

add explicit ABI and align options to pr88233.c

We've observed failures of this test on powerpc configurations that
default to different calling conventions and alignment requirements.
Both settings are needed for the original expectations to be met.

The test was later modified to have different expectations for big and
little endian code generation.  This patch restores the original
codegen expectations, that, with the explicit options, don't vary any
more.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr88233.c: Make some alignment strictness
and calling conventions assumptions explicit.  Restore uniform
codegen expectations.

(cherry picked from commit 91f23229cef6dc7c2af20032737b8f3b1ad31e21)
(combined with commit 18bed7a078a2f55fc39f40e4709a6f561de115fd)

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr88233.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr88233.c 
b/gcc/testsuite/gcc.target/powerpc/pr88233.c
index 27c73717a3f7..46a3ebfa2877 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr88233.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr88233.c
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-strict-align 
-fpcc-struct-return" } */
 
 typedef struct { double a[2]; } A;
 A
@@ -9,6 +9,5 @@ foo (const A *a)
 }
 
 /* { dg-final { scan-assembler-not {\mmtvsr} } } */
-/* { dg-final { scan-assembler-times {\mlxvd2x\M} 1 { target { be } } } } */
-/* { dg-final { scan-assembler-times {\mstxvd2x\M} 1 { target { be } } } } */
-/* { dg-final { scan-assembler-times {\mlfd\M} 2 { target { le } } } } */
+/* { dg-final { scan-assembler-times {\mlxvd2x\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M} 1 } } */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for ilp32

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:90cfdd861ba487ea88e94d4a4f87de910f123e8c

commit 90cfdd861ba487ea88e94d4a4f87de910f123e8c
Author: Alexandre Oliva 
Date:   Thu May 8 02:19:09 2025 -0300

[testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for ilp32

gcc.target/powerpc/vsx-builtin-7.c uses fewer xxpermdi insns than
expected on ilp32.  Adjust.


for gcc/testsuite/ChangeLog

* gcc.target/powerpc/vsx-builtin-7.c: Adjust xxpermdi count on
ilp32.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
index 4de240c6a95d..20e4483c1aa7 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
@@ -194,8 +194,9 @@ vector unsigned __int128 splat_uint128 (unsigned __int128 
x) { return vec_splats
 /* { dg-final { scan-assembler-times {\mrldic\M} 0  { target { be && ilp32 } } 
} } */
 /* { dg-final { scan-assembler-times {\mrldic\M} 65 { target { be && lp64 } } 
} } */
 /* { dg-final { scan-assembler-times {\mrldic\M} 65 { target le } } } */
-/* { dg-final { scan-assembler-times "xxpermdi" 33 { target be } } } */
-/* { dg-final { scan-assembler-times "xxpermdi" 35 { target le } } } */
+/* { dg-final { scan-assembler-times "xxpermdi" 32 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times "xxpermdi" 33 { target { be && lp64 } } } 
} */
+/* { dg-final { scan-assembler-times "xxpermdi" 35 { target { le && lp64 } } } 
} */
 /* { dg-final { scan-assembler-times "vspltisb" 2 } } */
 /* { dg-final { scan-assembler-times "vspltish" 2 } } */
 /* { dg-final { scan-assembler-times "vspltisw" 2 { target be } } } */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for lp64 as well

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:24d496c010705440f083921868e9a346671e1718

commit 24d496c010705440f083921868e9a346671e1718
Author: Alexandre Oliva 
Date:   Thu May 8 02:19:13 2025 -0300

[testsuite] [ppc] adjust vsx-builtin-7.c xxpermdi count for lp64 as well

xxpermdi (and rldic) instruction counts are slightly lower than
expected on lp64 as well.  Adjust.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/vsx-builtin-7.c: Adjust expected counts.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
index 20e4483c1aa7..832a365dd394 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
@@ -192,11 +192,11 @@ vector unsigned __int128 splat_uint128 (unsigned __int128 
x) { return vec_splats
  */
 
 /* { dg-final { scan-assembler-times {\mrldic\M} 0  { target { be && ilp32 } } 
} } */
-/* { dg-final { scan-assembler-times {\mrldic\M} 65 { target { be && lp64 } } 
} } */
-/* { dg-final { scan-assembler-times {\mrldic\M} 65 { target le } } } */
+/* { dg-final { scan-assembler-times {\mrldic\M} 64 { target { be && lp64 } } 
} } */
+/* { dg-final { scan-assembler-times {\mrldic\M} 64 { target le } } } */
 /* { dg-final { scan-assembler-times "xxpermdi" 32 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times "xxpermdi" 33 { target { be && lp64 } } } 
} */
-/* { dg-final { scan-assembler-times "xxpermdi" 35 { target { le && lp64 } } } 
} */
+/* { dg-final { scan-assembler-times "xxpermdi" 32 { target { be && lp64 } } } 
} */
+/* { dg-final { scan-assembler-times "xxpermdi" 34 { target { le && lp64 } } } 
} */
 /* { dg-final { scan-assembler-times "vspltisb" 2 } } */
 /* { dg-final { scan-assembler-times "vspltish" 2 } } */
 /* { dg-final { scan-assembler-times "vspltisw" 2 { target be } } } */


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] require float128 available for copysign

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:611953b225af91529e0d9714f9c28f0223dc97c0

commit 611953b225af91529e0d9714f9c28f0223dc97c0
Author: Alexandre Oliva 
Date:   Thu May 8 02:19:03 2025 -0300

[testsuite] [ppc] require float128 available for copysign

The rs6000.md copysign3 expander requires the mode to satisfy
FLOAT128_IEEE_P, so requiring float128 on ppc for ifn_copysign
effective target is hopefully a close-enough approximation.

gcc.dg/fold-copysign-1.c and gcc.dg/pr55152-2.c fail on ppc-elf
without this.


for  gcc/testsuite/ChangeLog

* lib/target-supports.exp (check_effective_target_ifn_copysign):
Require float128 on ppc.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 287e51bbfc66..d86ff3f9738b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8420,7 +8420,8 @@ proc check_effective_target_ifn_copysign { } {
  && [is-effective-target sse])
 || ([istarget loongarch*-*-*]
 && [check_effective_target_hard_float])
-|| [istarget powerpc*-*-*]
+|| ([istarget powerpc*-*-*]
+&& [check_ppc_float128_sw_available])
 || [istarget alpha*-*-*]
 || [istarget aarch64*-*-*]
 || [is-effective-target arm_neon]


[gcc(refs/users/aoliva/heads/testme)] vxworks: libgcc: include string.h for memset

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:0d50da6082ac6c06c148ac94736ec009bb260e0a

commit 0d50da6082ac6c06c148ac94736ec009bb260e0a
Author: Alexandre Oliva 
Date:   Thu May 8 02:18:19 2025 -0300

vxworks: libgcc: include string.h for memset

gthr-vxworks-thread.c calls memset in __ghtread_cond_signal, but it
fails ot include , where this function is declared, and GCC
14 rejects calls of undeclared functions.  Include the required
header.


for  libgcc/ChangeLog

* config/gthr-vxworks-thread.c: Include string.h for memset.

Diff:
---
 libgcc/config/gthr-vxworks-thread.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libgcc/config/gthr-vxworks-thread.c 
b/libgcc/config/gthr-vxworks-thread.c
index 17c60faba48f..31f291aca67e 100644
--- a/libgcc/config/gthr-vxworks-thread.c
+++ b/libgcc/config/gthr-vxworks-thread.c
@@ -33,6 +33,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #include 
 #include 
+#include 
 
 #define __TIMESPEC_TO_NSEC(timespec) \
   ((long long)timespec.tv_sec * 10 + (long long)timespec.tv_nsec)


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] add -mdejagnu-cpu=power7 to pr17381.c

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:a9ac3759711d5b6c6d8bba452dd40ba2cde4284f

commit a9ac3759711d5b6c6d8bba452dd40ba2cde4284f
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:58 2025 -0300

[testsuite] [ppc] add -mdejagnu-cpu=power7 to pr17381.c

Below power7, it seems to be more profitable to compress the
floating-point constants and use an additional fp register move to
"extend" it.  Only at power7 and above do we keep the constants
separate and load them, getting to the expected 'fmr' count.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr17381.c: Compile for power7.  Justify.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/pr17381.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr17381.c 
b/gcc/testsuite/gcc.target/powerpc/pr17381.c
index e6222c130af1..b137c687776e 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr17381.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr17381.c
@@ -1,6 +1,8 @@
 /* PR target/17381 - Unnecessary register move for float extend */
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-mdejagnu-cpu=power7 -O2" } */
+/* Up to power6, we compress the floating-point constant 1.0 and share it with
+   1.0f, but the float_extend comes out as a second fmr.  */
 
 double d;
 float test1(float fParm)


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [ppc] add -mfloat128 to __ieee128-using bfp tests

2025-05-07 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:e48cadae0fd2e16774d5a129804213bd70b296dd

commit e48cadae0fd2e16774d5a129804213bd70b296dd
Author: Alexandre Oliva 
Date:   Thu May 8 02:17:34 2025 -0300

[testsuite] [ppc] add -mfloat128 to __ieee128-using bfp tests

Some ppc bfp tests use __ieee128 without ensuring it's available.
Require ppc_ieee128_ok, add -mfloat128 to get the type defined,
and tolerate the warning that this option may trigger.


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/bfp/scalar-extract-sig-5.c: Require
ppc_ieee128_ok, add -mfloat128, tolerate its warning.
* gcc.target/powerpc/bfp/scalar-insert-exp-11.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-8.c: Likewise.
* gcc.target/powerpc/bfp/scalar-test-data-class-11.c: Likewise.
* gcc.target/powerpc/bfp/scalar-test-neg-5.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-5.c  | 4 +++-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-5.c  | 4 +++-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-11.c  | 4 +++-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-8.c   | 4 +++-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-11.c | 4 +++-
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-5.c | 4 +++-
 6 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-5.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-5.c
index 672aac7ed373..b3aca034cc0c 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-5.c
@@ -1,7 +1,9 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -mvsx" } */
+/* { dg-options "-mdejagnu-cpu=power9 -mvsx -mfloat128" } */
 /* { dg-require-effective-target ilp32 } */
+/* { dg-require-effective-target ppc_ieee128_ok } */
 /* { dg-require-effective-target powerpc_vsx } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /* This test only runs on 32-bit configurations, where a compiler error
should be issued because this builtin is not available on
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-5.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-5.c
index 5b6d763a26f5..97b2b7e9a642 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-5.c
@@ -1,7 +1,9 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -mvsx" } */
+/* { dg-options "-mdejagnu-cpu=power9 -mvsx -mfloat128" } */
 /* { dg-require-effective-target ilp32 } */
+/* { dg-require-effective-target ppc_ieee128_ok } */
 /* { dg-require-effective-target powerpc_vsx } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /* This test only runs on 32-bit configurations, producing a compiler
error because the builtin requires 64 bits.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-11.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-11.c
index 3a0529ed9a1b..3a556740516f 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-11.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-11.c
@@ -1,7 +1,9 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -mvsx" } */
+/* { dg-options "-mdejagnu-cpu=power9 -mvsx -mfloat128" } */
 /* { dg-require-effective-target ilp32 } */
+/* { dg-require-effective-target ppc_ieee128_ok } */
 /* { dg-require-effective-target powerpc_vsx } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /* This test only runs on 32-bit configurations, where a compiler error
should be issued because this builtin is not available on
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-8.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-8.c
index d9984c7678f3..9427f89d18f2 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-8.c
@@ -1,7 +1,9 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-options "-mdejagnu-cpu=power9 -mvsx" } */
+/* { dg-options "-mdejagnu-cpu=power9 -mvsx -mfloat128" } */
 /* { dg-require-effective-target ilp32 } */
+/* { dg-require-effective-target ppc_ieee128_ok } */
 /* { dg-require-effective-target powerpc_vsx } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /* This test only runs on 32-bit configurations, where a compiler error
should be issued because this builtin is not available on
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-11.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-11.c
index 8da98569de8a..132058304cdd

[gcc r16-460] tree-optimization/120089 - force all PHIs live for early-break vect

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9def392a1b63a198d15d972f73b4afc888389d7c

commit r16-460-g9def392a1b63a198d15d972f73b4afc888389d7c
Author: Richard Biener 
Date:   Mon May 5 14:29:34 2025 +0200

tree-optimization/120089 - force all PHIs live for early-break vect

The following makes sure to even mark unsupported PHIs live when
doing early-break vectorization since otherwise we fail to validate
we can vectorize those and generate wrong code based on the scalar
PHIs which would only work with a vectorization factor of one.

PR tree-optimization/120089
* tree-vect-stmts.cc (vect_stmt_relevant_p): Mark all
PHIs live when not already so and doing early-break
vectorization.
(vect_mark_stmts_to_be_vectorized): Skip virtual PHIs.
* tree-vect-slp.cc (vect_analyze_slp): Robustify handling
of early-break forced IVs.

* gcc.dg/vect/vect-early-break_134-pr120089.c: New testcase.

Diff:
---
 .../gcc.dg/vect/vect-early-break_134-pr120089.c| 66 ++
 gcc/tree-vect-slp.cc   | 17 +++---
 gcc/tree-vect-stmts.cc | 19 ---
 3 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c
new file mode 100644
index ..4d8199ca6373
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c
@@ -0,0 +1,66 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-funswitch-loops" } */
+
+#include "tree-vect.h"
+
+typedef int type;
+typedef type Vec2[2];
+
+struct BytesVec {
+type d[100];
+};
+
+__attribute__((noipa)) struct BytesVec
+buildVertexBufferData(const Vec2 *origVertices, bool needsZW,
+  unsigned paddingSize, unsigned long t) {
+const unsigned vertexCount = t;
+struct BytesVec data = (struct BytesVec){.d = {0}};
+type *nextVertexPtr = data.d;
+
+for (unsigned vertexIdx = 0u; vertexIdx < vertexCount; ++vertexIdx) {
+
+if (vertexIdx > t)
+__builtin_trap();
+__builtin_memcpy(nextVertexPtr, &origVertices[vertexIdx],
+ 2 * sizeof(type));
+nextVertexPtr += 2;
+
+if (needsZW) {
+nextVertexPtr += 2;
+}
+
+nextVertexPtr += paddingSize;
+}
+
+return data;
+}
+Vec2 origVertices[] = {
+{0, 1}, {2, 3}, {4, 5}, {6, 7},
+{8, 9}, {10, 11}, {12, 13}, {14, 15},
+{16, 17}, {18, 19}, {20, 21}, {22, 23},
+{24, 25}, {26, 27}, {27, 28}, {29, 30},
+};
+
+int main()
+{
+  check_vect ();
+  struct BytesVec vec
+= buildVertexBufferData(origVertices, false, 0,
+   sizeof(origVertices) / sizeof(origVertices[0]));
+
+  int errors = 0;
+  for (unsigned i = 0; i < 100; i++) {
+  if (i / 2 < sizeof(origVertices) / sizeof(origVertices[0])) {
+ int ii = i;
+ int e = origVertices[ii / 2][ii % 2];
+ if (vec.d[i] != e)
+   errors++;
+  } else {
+ if (vec.d[i] != 0)
+   errors++;
+  }
+  }
+  if (errors)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 562e2227c7c4..80e9c010b1a1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5042,14 +5042,17 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size,
vec roots = vNULL;
vec remain = vNULL;
gphi *phi = as_a (STMT_VINFO_STMT (stmt_info));
-   stmts.create (1);
tree def = gimple_phi_arg_def_from_edge (phi, latch_e);
stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
-   stmts.quick_push (vect_stmt_to_vectorize (lc_info));
-   vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
-stmts, roots, remain,
-max_tree_size, &limit,
-bst_map, NULL, force_single_lane);
+   if (lc_info)
+ {
+   stmts.create (1);
+   stmts.quick_push (vect_stmt_to_vectorize (lc_info));
+   vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+stmts, roots, remain,
+max_tree_size, &limit,
+bst_map, NULL, force_single_lane);
+ }
/* When the latch def is from a different cycle this can only
   be a induction.  Build a simple instance for this.
   ???  We should be able to start discovery from the PHI
@@ -5059,8 +5062,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
tem.quick_push (stmt_info);
if (!bst_map->get (tem))
  {
-   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info)
- 

[gcc r16-462] tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9e85d056cd15befffb39d2f84902d21eda4d98eb

commit r16-462-g9e85d056cd15befffb39d2f84902d21eda4d98eb
Author: Richard Biener 
Date:   Tue May 6 13:29:42 2025 +0200

tree-optimization/119589 - alignment analysis for VF > 1 and 
VMAT_STRIDED_SLP

The following fixes the alignment analysis done by the VMAT_STRIDED_SLP
code which for the case of VF > 1 currently relies on dataref analysis
which assumes consecutive accesses.  But the code generation advances
by DR_STEP between each iteration which requires us to assess that
individual DR_STEP preserve the alignment rather than only VF * DR_STEP.
This allows us to use vector aligned accesses in some cases.

PR tree-optimization/119589
PR tree-optimization/119586
PR tree-optimization/119155
* tree-vect-stmts.cc (vectorizable_store): Verify
DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when
VF > 1 and VMAT_STRIDED_SLP.  Use vector aligned accesses when
we can.
(vectorizable_load): Likewise.

Diff:
---
 gcc/tree-vect-stmts.cc | 47 ++-
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index af7114d41923..a8762baa076c 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8791,6 +8791,15 @@ vectorizable_store (vec_info *vinfo,
  if (n == const_nunits)
{
  int mis_align = dr_misalignment (first_dr_info, vectype);
+ /* With VF > 1 we advance the DR by step, if that is constant
+and only aligned when performed VF times, DR alignment
+analysis can analyze this as aligned since it assumes
+contiguous accesses.  But that is not how we code generate
+here, so adjust for this.  */
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+   mis_align = -1;
  dr_alignment_support dr_align
= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
 mis_align);
@@ -8812,6 +8821,10 @@ vectorizable_store (vec_info *vinfo,
  ltype = build_vector_type (elem_type, n);
  lvectype = vectype;
  int mis_align = dr_misalignment (first_dr_info, ltype);
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+   mis_align = -1;
  dr_alignment_support dr_align
= vect_supportable_dr_alignment (vinfo, dr_info, ltype,
 mis_align);
@@ -8872,17 +8885,10 @@ vectorizable_store (vec_info *vinfo,
}
}
  unsigned align;
- /* ???  We'd want to use
-  if (alignment_support_scheme == dr_aligned)
-align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-since doing that is what we assume we can in the above checks.
-But this interferes with groups with gaps where for example
-VF == 2 makes the group in the unrolled loop aligned but the
-fact that we advance with step between the two subgroups
-makes the access to the second unaligned.  See PR119586.
-We have to anticipate that here or adjust code generation to
-avoid the misaligned loads by means of permutations.  */
- align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+ if (alignment_support_scheme == dr_aligned)
+   align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+ else
+   align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
  /* Alignment is at most the access size if we do multiple stores.  */
  if (nstores > 1)
align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
@@ -10810,6 +10816,15 @@ vectorizable_load (vec_info *vinfo,
  if (n == const_nunits)
{
  int mis_align = dr_misalignment (first_dr_info, vectype);
+ /* With VF > 1 we advance the DR by step, if that is constant
+and only aligned when performed VF times, DR alignment
+analysis can analyze this as aligned since it assumes
+contiguous accesses.  But that is not how we code generate
+here, so adjust for this.  */
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+   mis_align = -1;
  dr_alignment_support dr_align
= vect_su

[gcc r16-461] tree-optimization/120143 - ICE with failed early break store move

2025-05-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:da377e7ebf84a05943fb768eaeb7d682dee865fa

commit r16-461-gda377e7ebf84a05943fb768eaeb7d682dee865fa
Author: Richard Biener 
Date:   Wed May 7 09:43:54 2025 +0200

tree-optimization/120143 - ICE with failed early break store move

The early break vectorization store moving was incorrectly trying
to move the pattern stmt instead of the original one which failed
to register and then confused virtual SSA form due to the update
triggered by a degenerate virtual PHI.

PR tree-optimization/120143
* tree-vect-data-refs.cc (vect_analyze_early_break_dependences):
Move/update the original stmts, not the pattern stmts which
lack virtual operands and are not in the IL.

* gcc.dg/vect/vect-early-break_135-pr120143.c: New testcase.

Diff:
---
 .../gcc.dg/vect/vect-early-break_135-pr120143.c| 18 ++
 gcc/tree-vect-data-refs.cc |  1 -
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c
new file mode 100644
index ..1ee30a821e2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-O3 -fwhole-program" } */
+
+short a;
+extern _Bool b[][23];
+short g = 6;
+int v[4];
+int x[3];
+void c(short g, int v[], int x[]) {
+  for (;;)
+for (unsigned y = 0; y < 023; y++) {
+  b[y][y] = v[y];
+  for (_Bool aa = 0; aa < (_Bool)g; aa = x[y])
+a = a > 0;
+}
+}
+int main() { c(g, v, x); }
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 231a3cab4f80..9fd1ef296506 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -734,7 +734,6 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
 
  stmt_vec_info stmt_vinfo
= vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt));
- stmt = STMT_VINFO_STMT (stmt_vinfo);
  auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
  if (!dr_ref)
continue;


[gcc r16-457] [RISC-V][PR target/120137][PR target/120154] Don't create out-of-range permutation constants

2025-05-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:49c44911df72f55e2004ffa9f5eb362de29ca188

commit r16-457-g49c44911df72f55e2004ffa9f5eb362de29ca188
Author: Jeff Law 
Date:   Wed May 7 15:06:58 2025 -0600

[RISC-V][PR target/120137][PR target/120154] Don't create out-of-range 
permutation constants

To make hashing sensible we canonicalize constant vectors in the hash table 
so
that their first entry always has the value zero.  That normalization can
result in a value that can't be represented in the element mode.

So before entering anything into the hash table we need to verify the
normalized entries will fit into the element's mode.

This fixes both 120137 and its duplicate 120154.  This has been tested in my
tester.  I'm just waiting for the pre-commit tester to render its verdict.

PR target/120137
PR target/120154
gcc/
* config/riscv/riscv-vect-permconst.cc (process_bb): Verify each
canonicalized element fits into the vector element mode.

gcc/testsuite/

* gcc.target/riscv/pr120137.c: New test.
* gcc.target/riscv/pr120154.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vect-permconst.cc  | 20 +++-
 gcc/testsuite/gcc.target/riscv/pr120137.c | 12 
 gcc/testsuite/gcc.target/riscv/pr120154.c | 22 ++
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-vect-permconst.cc 
b/gcc/config/riscv/riscv-vect-permconst.cc
index feecc7ed6da0..8e13cf8d5587 100644
--- a/gcc/config/riscv/riscv-vect-permconst.cc
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -203,6 +203,24 @@ vector_permconst::process_bb (basic_block bb)
   if (bias < 0 || bias > 16384 / 8)
continue;
 
+  /* We need to verify that each element would be a valid value
+in the inner mode after applying the bias.  */
+  machine_mode inner = GET_MODE_INNER (GET_MODE (cvec));
+  HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant ();
+  int i;
+  for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+   {
+ HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias;
+ if (val != sext_hwi (val, precision))
+   break;
+   }
+
+  /* If the loop terminated early, then we found a case where the
+adjusted constant would not fit, so we can't record the constant
+for this case (it's unlikely to be useful anyway.  */
+  if (i != CONST_VECTOR_NUNITS (cvec).to_constant ())
+   continue;
+
   /* At this point we have a load of a constant integer vector from the
 constant pool.  That constant integer vector is hopefully a
 permutation constant.  We need to make a copy of the vector and
@@ -211,7 +229,7 @@ vector_permconst::process_bb (basic_block bb)
 XXX This violates structure sharing conventions.  */
   rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ());
 
-  for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+  for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
 
   rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
diff --git a/gcc/testsuite/gcc.target/riscv/pr120137.c 
b/gcc/testsuite/gcc.target/riscv/pr120137.c
new file mode 100644
index ..c55a1c1b5bf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr120137.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mrvv-vector-bits=zvl -mabi=lp64" } */
+
+char b[13][13];
+void c() {
+  for (int d = 0; d < 13; ++d)
+for (int e = 0; e < 13; ++e)
+  b[d][e] = e == 0 ? -98 : 38;
+}
+
+
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr120154.c 
b/gcc/testsuite/gcc.target/riscv/pr120154.c
new file mode 100644
index ..fd849ca154ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr120154.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gv -mabi=lp64" } */
+
+
+
+typedef __attribute__((__vector_size__(4))) char V;
+
+V g;
+
+V
+bar(V a, V b)
+{
+  V s = a + b + g;
+  return s;
+}
+
+V
+foo()
+{
+  return bar((V){20}, (V){23, 150});
+}
+