date:20240904

[gcc r15-3441] Zen5 tuning part 5: update instruction latencies in x86-tune-costs

2024-09-04 Thread Jan Hubicka via Gcc-cvs

https://gcc.gnu.org/g:4292297a0f938ffc953422fa246ff00fe345fe3d

commit r15-3441-g4292297a0f938ffc953422fa246ff00fe345fe3d
Author: Jan Hubicka 
Date:   Wed Sep 4 09:19:08 2024 +0200

Zen5 tuning part 5: update instruction latencies in x86-tune-costs

there is nothing exciting in this patch.  I measured latencies and also 
compared
them with newly released optimization guide.  There are no dramatic changes
compared to zen4.  One interesting new bit is that addss is faster and can 
be
2 cycles when fed by another addss.

I also increased the large insn bound since decoders seems no longer require
instructions to be 8 bytes or less.

gcc/ChangeLog:

* config/i386/x86-tune-costs.h (znver5_cost): Update instruction
costs.

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 28 +---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index b90567fbbf2..1b3227ace16 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2034,6 +2034,7 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (1),   /* cost of a lea instruction.  */
   COSTS_N_INSNS (1),   /* variable shift costs.  */
   COSTS_N_INSNS (1),   /* constant shift costs.  */
+  /* mul has latency 3, executes in 3 integer units.  */
   {COSTS_N_INSNS (3),  /* cost of starting multiply for QI.  */
COSTS_N_INSNS (3),  /*   HI.  */
COSTS_N_INSNS (3),  /*   SI.  */
@@ -2041,6 +2042,8 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (3)}, /*  other.  */
   0,   /* cost of multiply per each bit
   set.  */
+  /* integer divide has latency of 8 cycles
+ plus 1 for every 9 bits of quotient.  */
   {COSTS_N_INSNS (10), /* cost of a divide/mod for QI.  */
COSTS_N_INSNS (11), /*  HI.  */
COSTS_N_INSNS (13), /*  SI.  */
@@ -2048,7 +2051,7 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (16)},/*  
other.  */
   COSTS_N_INSNS (1),   /* cost of movsx.  */
   COSTS_N_INSNS (1),   /* cost of movzx.  */
-  8,   /* "large" insn.  */
+  15,  /* "large" insn.  */
   9,   /* MOVE_RATIO.  */
   6,   /* CLEAR_RATIO */
   {6, 6, 6},   /* cost of loading integer registers
@@ -2065,12 +2068,13 @@ struct processor_costs znver5_cost = {
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */
-  /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
- throughput 5.  Approx 7 uops do not depend on vector size and every load
- is 5 uops.  */
+
+  /* TODO: gather and scatter instructions are currently disabled in
+ x86-tune.def.  In some cases they are however a win, see PR116582
+ We however need good cost model for them.  */
   14, 10,  /* Gather load static, per_elt.  */
   14, 20,  /* Gather store static, per_elt.  */
-  32,  /* size of l1 cache.  */
+  48,  /* size of l1 cache.  */
   1024,/* size of l2 cache.  */
   64,  /* size of prefetch block.  */
   /* New AMD processors never drop prefetches; if they cannot be performed
@@ -2080,6 +2084,8 @@ struct processor_costs znver5_cost = {
  time).  */
   100, /* number of parallel prefetches.  */
   3,   /* Branch cost.  */
+  /* TODO x87 latencies are still based on znver4.
+ Probably not very important these days.  */
   COSTS_N_INSNS (7),   /* cost of FADD and FSUB insns.  */
   COSTS_N_INSNS (7),   /* cost of FMUL instruction.  */
   /* Latency of fdiv is 8-15.  */
@@ -2089,16 +2095,24 @@ struct processor_costs znver5_cost = {
   /* Latency of fsqrt is 4-10.  */
   COSTS_N_INSNS (25),  /* cost of FSQRT instruction.  */
 
+  /* SSE instructions have typical throughput 4 and latency 1.  */
   COSTS_N_INSNS (1),   /* cost of cheap SSE instruction.  */
-  COSTS_N_INSNS (3),   /* cost of ADDSS/SD SUBSS/SD insns.  */
+  /* ADDSS has throughput 2 and latency 2
+ (in some cases when source is

[gcc r15-3442] Also lower SLP grouped loads with just one consumer

2024-09-04 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:7164d982663738c255a1a71a5d4f38dc51c2a3cb

commit r15-3442-g7164d982663738c255a1a71a5d4f38dc51c2a3cb
Author: Richard Biener 
Date:   Mon Sep 2 15:00:05 2024 +0200

Also lower SLP grouped loads with just one consumer

This makes sure to produce interleaving schemes or load-lanes
for single-element interleaving and other permutes that otherwise
would use more than three vectors.

It exposes the latent issue that single-element interleaving with
large gaps can be inefficient - the mitigation in get_group_load_store_type
doesn't trigger when we clear the load permutation.

It also exposes the fact that not all permutes can be lowered in
the best way in a vector length agnostic way so I've added an
exception to keep power-of-two size contiguous aligned chunks
unlowered (unless we want load-lanes).  The optimal handling
of load/store vectorization is going to continue to be a learning
process.

* tree-vect-slp.cc (vect_lower_load_permutations): Also
process single-use grouped loads.
Avoid lowering contiguous aligned power-of-two sized
chunks, those are better handled by the vector size
specific SLP code generation.
* tree-vect-stmts.cc (get_group_load_store_type): Drop
the unrelated requirement of a load permutation for the
single-element interleaving limit.

* gcc.dg/vect/slp-46.c: Remove XFAIL.

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-46.c |  2 +-
 gcc/tree-vect-slp.cc   | 56 ++
 gcc/tree-vect-stmts.cc |  1 -
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-46.c 
b/gcc/testsuite/gcc.dg/vect/slp-46.c
index b44a673f7de..016580e7a95 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-46.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-46.c
@@ -98,4 +98,4 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { 
xfail { vect_load_lanes && vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } 
} */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 2b05032790e..d35e0609174 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4315,6 +4315,37 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
  && ld_lanes_lanes == 0)
continue;
 
+  /* Build the permute to get the original load permutation order.  */
+  bool contiguous = true;
+  lane_permutation_t final_perm;
+  final_perm.create (SLP_TREE_LANES (load));
+  for (unsigned i = 0; i < SLP_TREE_LANES (load); ++i)
+   {
+ final_perm.quick_push
+   (std::make_pair (0, SLP_TREE_LOAD_PERMUTATION (load)[i]));
+ if (i != 0
+ && (SLP_TREE_LOAD_PERMUTATION (load)[i]
+ != SLP_TREE_LOAD_PERMUTATION (load)[i-1] + 1))
+   contiguous = false;
+   }
+
+  /* When the load permutation accesses a contiguous unpermuted,
+power-of-two aligned and sized chunk leave the load alone.
+We can likely (re-)load it more efficiently rather than
+extracting it from the larger load.
+???  Long-term some of the lowering should move to where
+the vector types involved are fixed.  */
+  if (ld_lanes_lanes == 0
+ && contiguous
+ && (SLP_TREE_LANES (load) > 1 || loads.size () == 1)
+ && pow2p_hwi (SLP_TREE_LANES (load))
+ && SLP_TREE_LOAD_PERMUTATION (load)[0] % SLP_TREE_LANES (load) == 0
+ && group_lanes % SLP_TREE_LANES (load) == 0)
+   {
+ final_perm.release ();
+ continue;
+   }
+
   /* First build (and possibly re-use) a load node for the
 unpermuted group.  Gaps in the middle and on the end are
 represented with NULL stmts.  */
@@ -4338,13 +4369,6 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
 &max_nunits, matches, &limit,
 &tree_size, bst_map);
 
-  /* Build the permute to get the original load permutation order.  */
-  lane_permutation_t final_perm;
-  final_perm.create (SLP_TREE_LANES (load));
-  for (unsigned i = 0; i < SLP_TREE_LANES (load); ++i)
-   final_perm.quick_push
- (std::make_pair (0, SLP_TREE_LOAD_PERMUTATION (load)[i]));
-
   if (ld_lanes_lanes != 0)
{
  /* ???  If this is not in sync with what get_load_store_type
@@ -4503,20 +4527,16 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
  && STMT_VINFO_GROUPED_ACCESS (b0)
  && DR_GROUP_FIRST_ELEMENT (a0) == DR_GROUP_FIRST_ELEMENT (b0))
continue;
-  /* Just one SLP load of a possible group, leave those alone.  */
-  if (i == firsti + 1)
-   {
- firsti = i;
- continue;
-   }
-  /

[gcc r15-3443] rust: avoid clobbering LIBS

2024-09-04 Thread Marc Poulhi?s via Gcc-cvs

https://gcc.gnu.org/g:da3a2985fff39ee8ec1b9f48699e3b8197e439e6

commit r15-3443-gda3a2985fff39ee8ec1b9f48699e3b8197e439e6
Author: Marc Poulhiès 
Date:   Mon Aug 5 17:41:17 2024 +0200

rust: avoid clobbering LIBS

Save LIBS around calls to AC_SEARCH_LIBS to avoid clobbering $LIBS.

ChangeLog:

* configure: Regenerate.
* configure.ac: Save LIBS around calls to AC_SEARCH_LIBS.

Signed-off-by: Marc Poulhiès 
Reviewed-by: Thomas Schwinge 
Tested-by: Thomas Schwinge 

Diff:
---
 configure| 15 ---
 configure.ac | 15 ---
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/configure b/configure
index 51bf1d1add1..e9583f2ba0c 100755
--- a/configure
+++ b/configure
@@ -8878,9 +8878,12 @@ fi
 
 # Rust requires -ldl and -lpthread if you are using an old glibc that does not 
include them by
 # default, so we check for them here
-
+# We are doing the test here and not in the gcc/configure to be able to nicely 
disable the
+# build of the Rust frontend in case a dep is missing.
 missing_rust_dynlibs=none
 
+save_LIBS="$LIBS"
+LIBS=
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing 
dlopen" >&5
 $as_echo_n "checking for library containing dlopen... " >&6; }
 if ${ac_cv_search_dlopen+:} false; then :
@@ -8993,16 +8996,14 @@ if test "$ac_res" != no; then :
 
 fi
 
+CRAB1_LIBS="$LIBS"
+LIBS="$save_LIBS"
 
-if test "$ac_cv_search_dlopen" = -ldl; then
-CRAB1_LIBS="$CRAB1_LIBS -ldl"
-elif test "$ac_cv_search_dlopen" = no; then
+if test "$ac_cv_search_dlopen" = no; then
 missing_rust_dynlibs="libdl"
 fi
 
-if test "$ac_cv_search_pthread_create" = -lpthread; then
-CRAB1_LIBS="$CRAB1_LIBS -lpthread"
-elif test "$ac_cv_search_pthread_create" = no; then
+if test "$ac_cv_search_pthread_create" = no; then
 missing_rust_dynlibs="$missing_rust_dynlibs, libpthread"
 fi
 
diff --git a/configure.ac b/configure.ac
index 20457005e29..f61dbe64a94 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2039,21 +2039,22 @@ AC_SUBST(PICFLAG)
 
 # Rust requires -ldl and -lpthread if you are using an old glibc that does not 
include them by
 # default, so we check for them here
-
+# We are doing the test here and not in the gcc/configure to be able to nicely 
disable the
+# build of the Rust frontend in case a dep is missing.
 missing_rust_dynlibs=none
 
+save_LIBS="$LIBS"
+LIBS=
 AC_SEARCH_LIBS([dlopen], [dl])
 AC_SEARCH_LIBS([pthread_create], [pthread])
+CRAB1_LIBS="$LIBS"
+LIBS="$save_LIBS"
 
-if test "$ac_cv_search_dlopen" = -ldl; then
-CRAB1_LIBS="$CRAB1_LIBS -ldl"
-elif test "$ac_cv_search_dlopen" = no; then
+if test "$ac_cv_search_dlopen" = no; then
 missing_rust_dynlibs="libdl"
 fi
 
-if test "$ac_cv_search_pthread_create" = -lpthread; then
-CRAB1_LIBS="$CRAB1_LIBS -lpthread"
-elif test "$ac_cv_search_pthread_create" = no; then
+if test "$ac_cv_search_pthread_create" = no; then
 missing_rust_dynlibs="$missing_rust_dynlibs, libpthread"
 fi

[gcc r15-3444] Add 'gcc.target/nvptx/alias-weak-1.c'

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:2267d254eb6ad782cef7b462f2bb2128bc8ace30

commit r15-3444-g2267d254eb6ad782cef7b462f2bb2128bc8ace30
Author: Thomas Schwinge 
Date:   Wed Sep 4 09:58:32 2024 +0200

Add 'gcc.target/nvptx/alias-weak-1.c'

... testing for the GCC/nvptx "weak alias definitions not supported" error
diagnostic (limitation of PTX).

gcc/testsuite/
* gcc.target/nvptx/alias-weak-1.c: New.

Diff:
---
 gcc/testsuite/gcc.target/nvptx/alias-weak-1.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/testsuite/gcc.target/nvptx/alias-weak-1.c 
b/gcc/testsuite/gcc.target/nvptx/alias-weak-1.c
new file mode 100644
index 000..37d9543fc7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alias-weak-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-add-options ptx_alias } */
+
+void __f ()
+{
+}
+
+void f () __attribute__ ((weak, alias ("__f")));
+/* { dg-error {weak alias definitions not supported} {} { target *-*-* } .-1 }
+   (limitation of PTX).  */

[gcc r15-3445] Add 'gcc.target/nvptx/alias-to-alias-1.c'

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:a89321c890b96c583671b73fc802e87545e4a2b1

commit r15-3445-ga89321c890b96c583671b73fc802e87545e4a2b1
Author: Thomas Schwinge 
Date:   Wed Sep 4 09:44:33 2024 +0200

Add 'gcc.target/nvptx/alias-to-alias-1.c'

... similar to alias to alias usage in 'libgomp.c-c++-common/pr96390.c'.

PR target/104957
gcc/testsuite/
* gcc.target/nvptx/alias-to-alias-1.c: New.

Diff:
---
 gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c 
b/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c
new file mode 100644
index 000..3db79d1fc0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c
@@ -0,0 +1,27 @@
+/* Alias to alias; 'libgomp.c-c++-common/pr96390.c'.  */
+
+/* { dg-do compile } */
+/* { dg-add-options ptx_alias } */
+
+int v;
+
+void foo () { v = 42; }
+void bar () __attribute__((alias ("foo")));
+void baz () __attribute__((alias ("bar")));
+
+int
+main (void)
+{
+  baz ();
+  if (v != 42)
+__builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "\\.alias bar,foo;" 1 } } */
+/* { dg-final { scan-assembler-times "\\.visible \\.func foo;" 1 } } */
+/* { dg-final { scan-assembler-times "\\.visible \\.func bar;" 1 } } */
+
+/* { dg-final { scan-assembler-times "\\.alias baz,bar;" 1 } } */
+/* { dg-final { scan-assembler-times "\\.visible \\.func baz;" 1 } } */

[gcc r15-3446] nvptx: Specify '-mno-alias' for 'gcc.dg/pr60797.c' [PR60797, PR104957]

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:b9be3113a9365e8380397abb23abc71f049cf4f1

commit r15-3446-gb9be3113a9365e8380397abb23abc71f049cf4f1
Author: Thomas Schwinge 
Date:   Sun Jul 21 22:23:40 2024 +0200

nvptx: Specify '-mno-alias' for 'gcc.dg/pr60797.c' [PR60797, PR104957]

2014 Subversion r209299 (Git commit 
8330537b5b58bd0532a0a49f9cbd59bf526a7847)
"Fix PR60797" added this test case, which we now amend so that it's able to
test its thing also in '--target=nvptx-none' configurations with symbol 
alias
support enabled (..., and test nvptx '-mno-alias').

PR middle-end/60797
PR target/104957
gcc/testsuite/
* gcc.dg/pr60797.c: For nvptx, specify '-mno-alias'.

Diff:
---
 gcc/testsuite/gcc.dg/pr60797.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr60797.c b/gcc/testsuite/gcc.dg/pr60797.c
index 45090bae502..0485b2de172 100644
--- a/gcc/testsuite/gcc.dg/pr60797.c
+++ b/gcc/testsuite/gcc.dg/pr60797.c
@@ -1,5 +1,7 @@
-/* { dg-do compile } */
-/* { dg-skip-if "" { alias } } */
+/* If there's support for symbol aliases, have to 'dg-skip-if' -- unless
+   there's a way to disable this support.
+   { dg-additional-options -mno-alias { target nvptx-*-* } }
+   { dg-skip-if "" { { ! nvptx-*-* } && alias } } */
 
 extern int foo __attribute__((alias("bar"))); /* { dg-error "supported" } */
 int main()

[gcc r15-3447] Document 'pass_postreload' vs. 'pass_late_compilation'

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:438381ef759ee8b5b04c5723a8334354ba2a30e5

commit r15-3447-g438381ef759ee8b5b04c5723a8334354ba2a30e5
Author: Thomas Schwinge 
Date:   Fri Jun 28 16:04:18 2024 +0200

Document 'pass_postreload' vs. 'pass_late_compilation'

See Subversion r217124 (Git commit 433e4164339f18d0b8798968444a56b681b5232c)
"Reorganize post-ra pipeline for targets without register allocation".

gcc/
* passes.cc: Document 'pass_postreload' vs. 'pass_late_compilation'.
* passes.def: Likewise.

Diff:
---
 gcc/passes.cc  | 14 +-
 gcc/passes.def |  3 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/gcc/passes.cc b/gcc/passes.cc
index 057850f4dec..e2a07ebedf5 100644
--- a/gcc/passes.cc
+++ b/gcc/passes.cc
@@ -660,6 +660,10 @@ make_pass_rest_of_compilation (gcc::context *ctxt)
 
 namespace {
 
+/* A container pass (only) for '!targetm.no_register_allocation' targets, for
+   passes to run if reload completed (..., but not run them if it failed, for
+   example for an invalid 'asm').  See also 'pass_late_compilation'.  */
+
 const pass_data pass_data_postreload =
 {
   RTL_PASS, /* type */
@@ -681,7 +685,12 @@ public:
   {}
 
   /* opt_pass methods: */
-  bool gate (function *) final override { return reload_completed; }
+  bool gate (function *) final override
+  {
+if (reload_completed)
+  gcc_checking_assert (!targetm.no_register_allocation);
+return reload_completed;
+  }
 
 }; // class pass_postreload
 
@@ -695,6 +704,9 @@ make_pass_postreload (gcc::context *ctxt)
 
 namespace {
 
+/* A container pass like 'pass_postreload', but for passes to run also for
+   'targetm.no_register_allocation' targets.  */
+
 const pass_data pass_data_late_compilation =
 {
   RTL_PASS, /* type */
diff --git a/gcc/passes.def b/gcc/passes.def
index b06d6d45f63..6d98c3c9282 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -509,6 +509,9 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_early_remat);
   NEXT_PASS (pass_ira);
   NEXT_PASS (pass_reload);
+  /* In the following, some passes are tied to 'pass_postreload' and others
+to 'pass_late_compilation'.  The difference is that the latter also
+run for 'targetm.no_register_allocation' targets.  */
   NEXT_PASS (pass_postreload);
   PUSH_INSERT_PASSES_WITHIN (pass_postreload)
  NEXT_PASS (pass_postreload_cse);

[gcc r15-3448] Fix gimple_debug_cfg declaration

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:347a953d855c6b246b1604bdf4728f615cb471b6

commit r15-3448-g347a953d855c6b246b1604bdf4728f615cb471b6
Author: Frederik Harwath 
Date:   Tue Nov 16 16:08:40 2021 +0100

Fix gimple_debug_cfg declaration

Silence a warning. The argument type did not match the definition.

gcc/ChangeLog:

* tree-cfg.h (gimple_debug_cfg): Change argument type from int
to dump_flags_t.

Diff:
---
 gcc/tree-cfg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
index 0564b79b4ab..e55991740e8 100644
--- a/gcc/tree-cfg.h
+++ b/gcc/tree-cfg.h
@@ -45,7 +45,7 @@ extern void clear_special_calls (void);
 extern edge find_taken_edge (basic_block, tree);
 extern void gimple_debug_bb (basic_block);
 extern basic_block gimple_debug_bb_n (int);
-extern void gimple_debug_cfg (int);
+extern void gimple_debug_cfg (dump_flags_t);
 extern void gimple_dump_cfg (FILE *, dump_flags_t);
 extern void dump_cfg_stats (FILE *);
 extern void debug_cfg_stats (void);

[gcc r15-3449] Fix branch prediction dump message

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:35e4414bac06927387fb7a6fe10b373e766da1c1

commit r15-3449-g35e4414bac06927387fb7a6fe10b373e766da1c1
Author: Frederik Harwath 
Date:   Tue Nov 16 16:13:51 2021 +0100

Fix branch prediction dump message

Instead of, for instance, "Loop got predicted 1 to iterate 10 times"
the message should be "Loop 1 got predicted to iterate 10 times".

gcc/ChangeLog:

* predict.cc (pass_profile::execute): Fix dump message.

Co-authored-by: Thomas Schwinge 

Diff:
---
 gcc/predict.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/predict.cc b/gcc/predict.cc
index 43e3694cb42..f611161f4aa 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -4210,7 +4210,7 @@ pass_profile::execute (function *fun)
  sreal iterations;
  for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
if (expected_loop_iterations_by_profile (loop, &iterations))
-fprintf (dump_file, "Loop got predicted %d to iterate %f times.\n",
+fprintf (dump_file, "Loop %d got predicted to iterate %f times.\n",
   loop->num, iterations.to_double ());
}
   return 0;

[gcc r14-10629] Update LDPT_REGISTER_CLAIM_FILE_HOOK_V2 linker plugin hook

2024-09-04 Thread H.J. Lu via Gcc-cvs

https://gcc.gnu.org/g:66eb7b752ab61c02348d6af10945af3ff92b6d77

commit r14-10629-g66eb7b752ab61c02348d6af10945af3ff92b6d77
Author: H.J. Lu 
Date:   Wed Aug 21 07:25:25 2024 -0700

Update LDPT_REGISTER_CLAIM_FILE_HOOK_V2 linker plugin hook

This hook allows the BFD linker plugin to distinguish calls to
claim_file_handler that know the object is being used by the linker
(from ldmain.c:add_archive_element), from calls that don't know it's
being used by the linker (from elf_link_is_defined_archive_symbol); in
the latter case, the plugin should avoid including the unused LTO archive
members in link output.  To get the proper support for archives with LTO
common symbols, the linker fix

commit a6f8fe0a9e9cbe871652e46ba7c22d5e9fb86208
Author: H.J. Lu 
Date:   Wed Aug 14 20:50:02 2024 -0700

lto: Don't include unused LTO archive members in output

is required.

PR lto/116361
* lto-plugin.c (claim_file_handler_v2): Rename claimed to
can_be_claimed.  Include the LTO object only if it is known to
be included in link output.

Signed-off-by: H.J. Lu 
(cherry picked from commit a98dd536b1017c2b814a3465206c6c01b2890998)

Diff:
---
 lto-plugin/lto-plugin.c | 53 +
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c
index 152648338b9..61b0de62f52 100644
--- a/lto-plugin/lto-plugin.c
+++ b/lto-plugin/lto-plugin.c
@@ -1191,16 +1191,19 @@ process_offload_section (void *data, const char *name, 
off_t offset, off_t len)
   return 1;
 }
 
-/* Callback used by a linker to check if the plugin will claim FILE. Writes
-   the result in CLAIMED.  If KNOWN_USED, the object is known by the linker
-   to be used, or an older API version is in use that does not provide that
-   information; otherwise, the linker is only determining whether this is
-   a plugin object and it should not be registered as having offload data if
-   not claimed by the plugin.  */
+/* Callback used by a linker to check if the plugin can claim FILE.
+   Writes the result in CAN_BE_CLAIMED.  If KNOWN_USED != 0, the object
+   is known by the linker to be included in link output, or an older API
+   version is in use that does not provide that information.  Otherwise,
+   the linker is only determining whether this is a plugin object and
+   only the symbol table is needed by the linker.  In this case, the
+   object should not be included in link output and this function will
+   be called by the linker again with KNOWN_USED != 0 after the linker
+   decides the object should be included in link output. */
 
 static enum ld_plugin_status
-claim_file_handler_v2 (const struct ld_plugin_input_file *file, int *claimed,
-  int known_used)
+claim_file_handler_v2 (const struct ld_plugin_input_file *file,
+  int *can_be_claimed, int known_used)
 {
   enum ld_plugin_status status;
   struct plugin_objfile obj;
@@ -1229,7 +1232,7 @@ claim_file_handler_v2 (const struct ld_plugin_input_file 
*file, int *claimed,
 }
   lto_file.handle = file->handle;
 
-  *claimed = 0;
+  *can_be_claimed = 0;
   obj.file = file;
   obj.found = 0;
   obj.offload = false;
@@ -1286,15 +1289,19 @@ claim_file_handler_v2 (const struct 
ld_plugin_input_file *file, int *claimed,
  lto_file.symtab.syms);
   check (status == LDPS_OK, LDPL_FATAL, "could not add symbols");
 
-  LOCK_SECTION;
-  num_claimed_files++;
-  claimed_files =
-   xrealloc (claimed_files,
- num_claimed_files * sizeof (struct plugin_file_info));
-  claimed_files[num_claimed_files - 1] = lto_file;
-  UNLOCK_SECTION;
+  /* Include it only if it is known to be used for link output.  */
+  if (known_used)
+   {
+ LOCK_SECTION;
+ num_claimed_files++;
+ claimed_files =
+   xrealloc (claimed_files,
+ num_claimed_files * sizeof (struct plugin_file_info));
+ claimed_files[num_claimed_files - 1] = lto_file;
+ UNLOCK_SECTION;
+   }
 
-  *claimed = 1;
+  *can_be_claimed = 1;
 }
 
   LOCK_SECTION;
@@ -1310,10 +1317,10 @@ claim_file_handler_v2 (const struct 
ld_plugin_input_file *file, int *claimed,
   /* If this is an LTO file without offload, and it is the first LTO file, save
  the pointer to the last offload file in the list.  Further offload LTO
  files will be inserted after it, if any.  */
-  if (*claimed && !obj.offload && offload_files_last_lto == NULL)
+  if (*can_be_claimed && !obj.offload && offload_files_last_lto == NULL)
 offload_files_last_lto = offload_files_last;
 
-  if (obj.offload && (known_used || obj.found > 0))
+  if (obj.offload && known_used && obj.found > 0)
 {
   /* Add file to the list.  The order must be exactly the same as the final
 orde

[gcc r14-10630] lto: Don't check obj.found for offload section

2024-09-04 Thread H.J. Lu via Gcc-cvs

https://gcc.gnu.org/g:0562522e1290da08bc5a89182b9ae80c3cc9d1db

commit r14-10630-g0562522e1290da08bc5a89182b9ae80c3cc9d1db
Author: H.J. Lu 
Date:   Fri Aug 23 05:36:45 2024 -0700

lto: Don't check obj.found for offload section

obj.found is the number of LTO symbols.  We should include the offload
section when it is used by linker even if there are no LTO symbols.

PR lto/116361
* lto-plugin.c (claim_file_handler_v2): Don't check obj.found
for the offload section.

Signed-off-by: H.J. Lu 
(cherry picked from commit cb51e0b236c7d492af2033582230e78d8b55290f)

Diff:
---
 lto-plugin/lto-plugin.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c
index 61b0de62f52..c564b36eb92 100644
--- a/lto-plugin/lto-plugin.c
+++ b/lto-plugin/lto-plugin.c
@@ -1320,7 +1320,7 @@ claim_file_handler_v2 (const struct ld_plugin_input_file 
*file,
   if (*can_be_claimed && !obj.offload && offload_files_last_lto == NULL)
 offload_files_last_lto = offload_files_last;
 
-  if (obj.offload && known_used && obj.found > 0)
+  if (obj.offload && known_used)
 {
   /* Add file to the list.  The order must be exactly the same as the final
 order after recompilation and linking, otherwise host and target tables

[gcc r15-3450] nvptx: Use 'enum ptx_version', 'enum ptx_isa' instead of 'int'

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:fee2fbedbb43ad7a017a33ed2b820be79b75e7e5

commit r15-3450-gfee2fbedbb43ad7a017a33ed2b820be79b75e7e5
Author: Thomas Schwinge 
Date:   Mon Jul 22 10:49:16 2024 +0200

nvptx: Use 'enum ptx_version', 'enum ptx_isa' instead of 'int'

This allows getting rid of the respective type casts.  No change in behavior
intended.

gcc/
* config/nvptx/gen-opt.sh: Use 'enum ptx_isa' instead of 'int'.
* config/nvptx/nvptx-gen.opt: Regenerate.
* config/nvptx/nvptx.opt: Use 'enum ptx_version' instead of 'int'.
* config/nvptx/nvptx-opts.h (enum ptx_isa): Add 'PTX_ISA_unset'.
(enum ptx_version): Add 'PTX_VERSION_unset'.
* config/nvptx/nvptx-c.cc (nvptx_cpu_cpp_builtins): Adjust.
* config/nvptx/nvptx.cc (default_ptx_version_option)
(handle_ptx_version_option, nvptx_option_override)
(nvptx_file_start): Likewise.

Diff:
---
 gcc/config/nvptx/gen-opt.sh| 14 +-
 gcc/config/nvptx/nvptx-c.cc|  6 ++
 gcc/config/nvptx/nvptx-gen.opt |  2 +-
 gcc/config/nvptx/nvptx-opts.h  |  4 +++-
 gcc/config/nvptx/nvptx.cc  | 24 
 gcc/config/nvptx/nvptx.opt |  9 ++---
 6 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/gcc/config/nvptx/gen-opt.sh b/gcc/config/nvptx/gen-opt.sh
index 3f7838251d2..6022f51f897 100644
--- a/gcc/config/nvptx/gen-opt.sh
+++ b/gcc/config/nvptx/gen-opt.sh
@@ -38,12 +38,24 @@ echo
 
 . $gen_copyright_sh opt
 
+# Not emitting the following here (in addition to having it in 'nvptx.opt'), as
+# we'll otherwise run into:
+# 
+# gtyp-input.list:10: file [...]/gcc/config/nvptx/nvptx-opts.h specified 
more than once for language (all)
+# make[2]: *** [Makefile:2981: s-gtype] Error 1
+: ||
+cat .
 
 Enum
-Name(ptx_isa) Type(int)
+Name(ptx_isa) Type(enum ptx_isa)
 Known PTX ISA target architectures (for use with the -misa= option):
 
 EnumValue
diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h
index f8975327223..fb5147c143e 100644
--- a/gcc/config/nvptx/nvptx-opts.h
+++ b/gcc/config/nvptx/nvptx-opts.h
@@ -22,6 +22,7 @@
 
 enum ptx_isa
 {
+  PTX_ISA_unset,
 #define NVPTX_SM(XX, SEP) PTX_ISA_SM ## XX SEP
 #define NVPTX_SM_SEP ,
 #include "nvptx-sm.def"
@@ -31,7 +32,8 @@ enum ptx_isa
 
 enum ptx_version
 {
-  PTX_VERSION_default,
+  PTX_VERSION_unset,
+  PTX_VERSION_default = PTX_VERSION_unset,
   PTX_VERSION_3_0,
   PTX_VERSION_3_1,
   PTX_VERSION_4_2,
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 2a8f713c680..144b8d0c874 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -231,8 +231,7 @@ first_ptx_version_supporting_sm (enum ptx_isa sm)
 static enum ptx_version
 default_ptx_version_option (void)
 {
-  enum ptx_version first
-= first_ptx_version_supporting_sm ((enum ptx_isa) ptx_isa_option);
+  enum ptx_version first = first_ptx_version_supporting_sm (ptx_isa_option);
 
   /* Pick a version that supports the sm.  */
   enum ptx_version res = first;
@@ -311,20 +310,21 @@ sm_version_to_string (enum ptx_isa sm)
 static void
 handle_ptx_version_option (void)
 {
-  if (!OPTION_SET_P (ptx_version_option)
-  || ptx_version_option == PTX_VERSION_default)
+  if (!OPTION_SET_P (ptx_version_option))
+gcc_checking_assert (ptx_version_option == PTX_VERSION_default);
+
+  if (ptx_version_option == PTX_VERSION_default)
 {
   ptx_version_option = default_ptx_version_option ();
   return;
 }
 
-  enum ptx_version first
-= first_ptx_version_supporting_sm ((enum ptx_isa) ptx_isa_option);
+  enum ptx_version first = first_ptx_version_supporting_sm (ptx_isa_option);
 
   if (ptx_version_option < first)
 error ("PTX version (%<-mptx%>) needs to be at least %s to support 
selected"
   " %<-misa%> (sm_%s)", ptx_version_to_string (first),
-  sm_version_to_string ((enum ptx_isa)ptx_isa_option));
+  sm_version_to_string (ptx_isa_option));
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.  */
@@ -336,7 +336,9 @@ nvptx_option_override (void)
 
   /* Via nvptx 'OPTION_DEFAULT_SPECS', '-misa' always appears on the command
  line; but handle the case that the compiler is not run via the driver.  */
-  if (!OPTION_SET_P (ptx_isa_option))
+  gcc_checking_assert ((ptx_isa_option == PTX_ISA_unset)
+  == (!OPTION_SET_P (ptx_isa_option)));
+  if (ptx_isa_option == PTX_ISA_unset)
 fatal_error (UNKNOWN_LOCATION, "%<-march=%> must be specified");
 
   handle_ptx_version_option ();
@@ -5953,13 +5955,11 @@ nvptx_file_start (void)
   fputs ("// BEGIN PREAMBLE\n", asm_out_file);
 
   fputs ("\t.version\t", asm_out_file);
-  fputs (ptx_version_to_string ((enum ptx_version)ptx_version_option),
-asm_out_file);
+  fputs (ptx_version_to_string (ptx_version_option), asm_out_file);
   fputs ("\n", asm_out_file);
 
   fputs ("\t.target\tsm_", asm_out_file);
-  fp

[gcc r15-3451] Use dg-additional-options for gfortran.dg/vect/vect-8.f90 and RISC-V

2024-09-04 Thread Richard Biener via Gcc-cvs

https://gcc.gnu.org/g:284feaa809294995d6c133b8d002850a069c1ded

commit r15-3451-g284feaa809294995d6c133b8d002850a069c1ded
Author: Richard Biener 
Date:   Wed Sep 4 16:00:00 2024 +0200

Use dg-additional-options for gfortran.dg/vect/vect-8.f90 and RISC-V

r14-9122-g67a29f99cc8138 disabled scheduling on a lot of testcases
for RISC-V for PR113249 but using dg-options.  This makes
gfortran.dg/vect/vect-8.f90 UNRESOLVED as it relies on default
flags to enable vectorization.

The following uses dg-additional-options instead.

Tested on riscv64-linux with qemu-user, pushed.

I didn't check all the other adjusted tests for similar issues.

* gfortran.dg/vect/vect-8.f90: Use dg-additional-options.

Diff:
---
 gcc/testsuite/gfortran.dg/vect/vect-8.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 
b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
index f77ec9fb87a..557a523e2bd 100644
--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
@@ -2,7 +2,7 @@
 ! { dg-require-effective-target vect_double }
 ! { dg-additional-options "-fno-tree-loop-distribute-patterns 
-finline-matmul-limit=0" }
 ! PR113249
-! { dg-options "-fno-schedule-insns -fno-schedule-insns2" { target { 
riscv*-*-* } } }
+! { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" { target 
{ riscv*-*-* } } }
 
 module lfk_prec
  integer, parameter :: dp=kind(1.d0)

[gcc r15-3452] object-size: Use simple_dce_from_worklist in object-size pass

2024-09-04 Thread Andrew Pinski via Gcc-cvs

https://gcc.gnu.org/g:97e011a472e16ddab67d7374ee9c3db040b62798

commit r15-3452-g97e011a472e16ddab67d7374ee9c3db040b62798
Author: Andrew Pinski 
Date:   Tue Sep 3 12:48:46 2024 -0700

object-size: Use simple_dce_from_worklist in object-size pass

While trying to see if there was a way to improve object-size pass
to use the ranger (for pointer plus), I noticed that it leaves around
the statement containing __builtin_object_size if it was reduced to a 
constant.
This fixes that by using simple_dce_from_worklist.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-object-size.cc (object_sizes_execute): Mark lhs for maybe 
dceing
if doing a propagate. Call simple_dce_from_worklist.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-object-size.cc | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
index 4c1fa9b555f..6544730e153 100644
--- a/gcc/tree-object-size.cc
+++ b/gcc/tree-object-size.cc
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "gimplify-me.h"
 #include "gimplify.h"
+#include "tree-ssa-dce.h"
 
 struct object_size_info
 {
@@ -2187,6 +2188,7 @@ static unsigned int
 object_sizes_execute (function *fun, bool early)
 {
   todo = 0;
+  auto_bitmap sdce_worklist;
 
   basic_block bb;
   FOR_EACH_BB_FN (bb, fun)
@@ -2277,13 +2279,18 @@ object_sizes_execute (function *fun, bool early)
 
  /* Propagate into all uses and fold those stmts.  */
  if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
-   replace_uses_by (lhs, result);
+   {
+ replace_uses_by (lhs, result);
+ /* Mark lhs as being possiblely DCEd. */
+ bitmap_set_bit (sdce_worklist, SSA_NAME_VERSION (lhs));
+   }
  else
replace_call_with_value (&i, result);
}
 }
 
   fini_object_sizes ();
+  simple_dce_from_worklist (sdce_worklist);
   return todo;
 }

[gcc r15-3453] coros: mark .CO_YIELD as LEAF [PR106973]

2024-09-04 Thread Arsen Arsenovic via Gcc-cvs

https://gcc.gnu.org/g:7b7ad3f4b2455072f42e7884b93fd96ebb920bc8

commit r15-3453-g7b7ad3f4b2455072f42e7884b93fd96ebb920bc8
Author: Arsen Arsenović 
Date:   Tue Sep 3 17:14:13 2024 +0200

coros: mark .CO_YIELD as LEAF [PR106973]

We rely on .CO_YIELD calls being followed by an assignment (optionally)
and then a switch/if in the same basic block.  This implies that a
.CO_YIELD can never end a block.  However, since a call to .CO_YIELD is
still a call, if the function containing it calls setjmp, GCC thinks
that the .CO_YIELD can introduce abnormal control flow, and generates an
edge for the call.

We know this is not the case; .CO_YIELD calls get removed quite early on
and have no effect, and result in no other calls, so .CO_YIELD can be
considered a leaf function, preventing generating an edge when calling
it.

PR c++/106973 - coroutine generator and setjmp

PR c++/106973

gcc/ChangeLog:

* internal-fn.def (CO_YIELD): Mark as ECF_LEAF.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr106973.C: New test.

Diff:
---
 gcc/internal-fn.def|  2 +-
 gcc/testsuite/g++.dg/coroutines/pr106973.C | 22 ++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 75b527b1ab0..23b4ab02b30 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -569,7 +569,7 @@ DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
 
 /* For coroutines.  */
 DEF_INTERNAL_FN (CO_ACTOR, ECF_NOTHROW | ECF_LEAF, NULL)
-DEF_INTERNAL_FN (CO_YIELD, ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (CO_YIELD, ECF_NOTHROW | ECF_LEAF, NULL)
 DEF_INTERNAL_FN (CO_SUSPN, ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (CO_FRAME, ECF_PURE | ECF_NOTHROW | ECF_LEAF, NULL)
 
diff --git a/gcc/testsuite/g++.dg/coroutines/pr106973.C 
b/gcc/testsuite/g++.dg/coroutines/pr106973.C
new file mode 100644
index 000..6db6cbc7711
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr106973.C
@@ -0,0 +1,22 @@
+// https://gcc.gnu.org/PR106973
+// { dg-require-effective-target indirect_jumps }
+#include 
+#include 
+
+struct generator;
+struct generator_promise {
+  generator get_return_object();
+  std::suspend_always initial_suspend();
+  std::suspend_always final_suspend() noexcept;
+  std::suspend_always yield_value(int);
+  void unhandled_exception();
+};
+
+struct generator {
+  using promise_type = generator_promise;
+};
+jmp_buf foo_env;
+generator foo() {
+  setjmp(foo_env);
+  co_yield 1;
+}

[gcc r15-3454] c++: add a testcase for [PR 108620]

2024-09-04 Thread Arsen Arsenovic via Gcc-cvs

https://gcc.gnu.org/g:858918ef4233c837ab85819ad159bf452df3a7fb

commit r15-3454-g858918ef4233c837ab85819ad159bf452df3a7fb
Author: Arsen Arsenović 
Date:   Tue Sep 3 20:58:55 2024 +0200

c++: add a testcase for [PR 108620]

Fixed by r15-2540-g32e678b2ed7521.  Add a testcase, as the original ones
do not cover this particular failure mode.

gcc/testsuite/ChangeLog:

PR c++/108620
* g++.dg/coroutines/pr108620.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/coroutines/pr108620.C | 95 ++
 1 file changed, 95 insertions(+)

diff --git a/gcc/testsuite/g++.dg/coroutines/pr108620.C 
b/gcc/testsuite/g++.dg/coroutines/pr108620.C
new file mode 100644
index 000..e8016b9f8a2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr108620.C
@@ -0,0 +1,95 @@
+// https://gcc.gnu.org/PR108620
+#include 
+#include 
+#include 
+
+template
+struct task;
+
+template 
+struct task_private_data {
+  inline task_private_data() noexcept : data_(nullptr) {}
+  inline task_private_data(PrivateDataType* input) noexcept : data_(input) {}
+  inline task_private_data(task_private_data&& other) noexcept = default;
+  inline task_private_data& operator=(task_private_data&&) noexcept = default;
+  inline task_private_data(const task_private_data&) = delete;
+  inline task_private_data& operator=(const task_private_data&) = delete;
+  inline ~task_private_data() {}
+
+  inline bool await_ready() const noexcept { return true; }
+  inline PrivateDataType* await_resume() const noexcept { return data_; }
+  inline void await_suspend(std::coroutine_handle<>) noexcept {}
+
+  PrivateDataType* data_;
+};
+
+template
+struct task_context {
+PrivateDataType data_;
+};
+
+template
+struct task {
+using self_type = task;
+std::shared_ptr> context_;
+
+task(const std::shared_ptr>& input): 
context_(input) {}
+
+static auto yield_private_data() noexcept { return 
task_private_data{}; }
+
+struct promise_type {
+  std::shared_ptr> context_;
+
+  template
+  promise_type(Input&& input, Rest&&...) {
+context_ = std::make_shared>();
+context_->data_ = std::forward(input);
+  }
+
+  auto get_return_object() noexcept { return self_type{context_}; }
+  std::suspend_never initial_suspend() noexcept { return {}; }
+  std::suspend_never final_suspend() noexcept { return {}; }
+  void unhandled_exception() { throw; }
+
+  template
+  void return_value(ReturnType&&) {}
+
+  template 
+  inline task_private_data yield_value(
+  task_private_data&& input) noexcept {
+input.data_ = &context_->data_;
+return task_private_data(input.data_);
+  }
+};
+};
+
+template
+task call1(TArg&& arg, OutputType& output) {
+OutputType* ptr = co_yield task::yield_private_data();
+output = *ptr;
+co_return 0;
+}
+
+
+struct container {
+std::string* ptr;
+};
+
+template
+task call2(TArg&& arg, container& output) {
+output.ptr = co_yield task::yield_private_data();
+co_return 0;
+}
+
+int main() {
+  // success
+  std::string output1;
+  call1(std::string("hello1"), output1);
+  std::cout<< "output1: "<< output1<< std::endl;
+
+  // crash
+  container output2;
+  auto task2 = call2(std::string("hello2"), output2);
+  std::cout<< "output2: "<< *output2.ptr<< std::endl;
+  return 0;
+}

[gcc(refs/users/meissner/heads/work177-tar)] Add support for the TAR register.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:5c2283f478e908718024e5b3b53dc8c4db3a8fe9

commit 5c2283f478e908718024e5b3b53dc8c4db3a8fe9
Author: Michael Meissner 
Date:   Wed Sep 4 11:36:25 2024 -0400

Add support for the TAR register.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/constraints.md (h constraint): Add TAR register to 
the
documentation.
(wt constraint): New constraint.
* config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar.
(POWERPC_MASKS): Likewise.
* config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register 
support.
(alt_reg_names): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only
hold scalar integer modes of an appropriate size.  Add TAR register
support.
(rs6000_debug_reg_global): Print the register class that wt maps 
too.
(rs6000_init_hard_regno_mode_ok): Add TAR register support.
(rs6000_conditional_register_usage): Add TAR register support.
(print_operand): Likewise.
(rs6000_debugger_regno): Likewise.
(rs6000_opt_masks): Add support for -mtar.
* config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register
support.
(FIXED_REGISTERS): Likewise.
(CALL_REALLY_USED_REGISTERS): Likewise.
(REG_ALLOC_ORDER): Likewise.
(enum reg_class): Likewise.
(REG_CLASS_NAMES): Likewise.
(REG_CLASS_CONTENTS): Likewise.
(enum r6000_reg_class_enum): Add support for the wt constraint.
* config/rs6000/rs6000.md (TAR_REGNO): New constant.
(call_indirect_nonlocal_sysv): Likewise.
(call_value_indirect_nonlocal_sysv): Likewise.
(call_indirect_aix): Likewise.
(call_value_indirect_aix): Likewise.
(call_indirect_elfv2): Likewise.
(call_indirect_pcrel): Likewise.
(call_value_indirect_elfv2): Likewise.
(call_value_indirect_pcrel): Likewise.
(*sibcall_indirect_nonlocal_sysv): Likewise.
(sibcall_value_indirect_nonlocal_sysv): Likewise.
(indirect_jump): Likewise.
(@indirect_jump_nospec): Likewise.
(@tablejump_insn_normal): Likewise.
(@tablejump_insn_nospec): Likewise.
* config/rs6000/rs6000.opt (-mtar): New option.

gcc/testsuite/

* gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR 
register.
* gcc.target/powerpc/pr51513.c: Likewise.
* gcc.target/powerpc/safe-indirect-jump-2.c: Likewise.
* gcc.target/powerpc/safe-indirect-jump-3.c: Likewise.
* gcc.target/powerpc/tar-register.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md   |  5 +-
 gcc/config/rs6000/rs6000-cpus.def  |  4 +-
 gcc/config/rs6000/rs6000.cc| 58 +++---
 gcc/config/rs6000/rs6000.h | 31 +++-
 gcc/config/rs6000/rs6000.md| 33 ++--
 gcc/config/rs6000/rs6000.opt   |  4 ++
 gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c|  4 +-
 gcc/testsuite/gcc.target/powerpc/pr51513.c |  4 +-
 .../gcc.target/powerpc/safe-indirect-jump-2.c  |  2 +-
 .../gcc.target/powerpc/safe-indirect-jump-3.c  |  2 +-
 gcc/testsuite/gcc.target/powerpc/tar-register.c| 34 +
 11 files changed, 138 insertions(+), 43 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042..14f0465d7ae 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -57,7 +57,7 @@
   "@internal A compatibility alias for @code{wa}.")
 
 (define_register_constraint "h" "SPECIAL_REGS"
-  "@internal A special register (@code{vrsave}, @code{ctr}, or @code{lr}).")
+  "@internal A special register (@code{vrsave}, @code{ctr}, @code{lr} or 
@code{tar}).")
 
 (define_register_constraint "c" "CTR_REGS"
   "The count register, @code{ctr}.")
@@ -91,6 +91,9 @@
   "@internal Like @code{r}, if @option{-mpowerpc64} is used; otherwise,
@code{NO_REGS}.")
 
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
+  "The tar register, @code{tar}.")
+
 (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
   "@internal Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise,
@code{NO_REGS}.")
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8..a7ecd38f8ee 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -64,7 +64,8 @@
  | OPTION_MASK_MODULO  \
  | OPTION_MASK_P9_MINMAX   \
  |

[gcc(refs/users/meissner/heads/work177-tar)] Remove SPR alternatives for move insns.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:a719aa37a9ddcea20ef9da9c2097ee48465da758

commit a719aa37a9ddcea20ef9da9c2097ee48465da758
Author: Michael Meissner 
Date:   Wed Sep 4 11:38:07 2024 -0400

Remove SPR alternatives for move insns.

2024-09-04  Michael Meissner  

* config/rs6000/rs6000.md (mov_internal): Remove alternatives 
for
moving values to/from SPR registers.
(movcc_): Likewise.
(movsf_hardfloat): Likewise.
(movsd_hardfloat): Likewise.
(mov_softfloat): Likewise.
(mov_hardfloat64): Likewise.
(mov_softfloat64): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.md | 114 +---
 1 file changed, 44 insertions(+), 70 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2c932061b93..16f3cd1ba6b 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8099,16 +8099,16 @@
 
 ;; MR  LHZ/LBZLXSI*ZXSTH/STBSTXSI*XLI
 ;; XXLOR   load 0 load -1VSPLTI*#  MFVSRWZ
-;; MTVSRWZ MF%1   MT%1   NOP
+;; MTVSRWZ
 (define_insn "*mov_internal"
   [(set (match_operand:QHI 0 "nonimmediate_operand"
"=r,r, wa,m, ?Z,r,
 wa,wa,wa,v, ?v,r,
-wa,r, *c*l,  *h")
+wa")
(match_operand:QHI 1 "input_operand"
"r, m, ?Z,r, wa,i,
 wa,O, wM,wB,wS,wa,
-r, *h,r, 0"))]
+r"))]
   "gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode)"
   "@
@@ -8124,22 +8124,19 @@
vspltis %0,%1
#
mfvsrwz %0,%x1
-   mtvsrwz %x0,%1
-   mf%1 %0
-   mt%0 %1
-   nop"
+   mtvsrwz %x0,%1"
   [(set_attr "type"
"*, load,  fpload,store, fpstore,   *,
 vecsimple, vecperm,   vecperm,   vecperm,   vecperm,   mfvsr,
-mtvsr, mfjmpr,mtjmpr,*")
+mtvsr")
(set_attr "length"
"*, *, *, *, *, *,
 *, *, *, *, 8, *,
-*, *, *, *")
+*")
(set_attr "isa"
"*, *, p9v,   *, p9v,   *,
 p9v,   p9v,   p9v,   p9v,   p9v,   p9v,
-p9v,   *, *, *")])
+p9v")])
 
 
 ;; Here is how to move condition codes around.  When we store CC data in
@@ -8155,9 +8152,9 @@
 
 (define_insn "*movcc_"
   [(set (match_operand:CC_any 0 "nonimmediate_operand"
-   "=y,x,?y,y,r,r,r,r, r,*c*l,r,m")
+   "=y,x,?y,y,r,r,r,r,r,m")
(match_operand:CC_any 1 "general_operand"
-   " y,r, r,O,x,y,r,I,*h,   r,m,r"))]
+   " y,r, r,O,x,y,r,I,m,r"))]
   "register_operand (operands[0], mode)
|| register_operand (operands[1], mode)"
   "@
@@ -8169,8 +8166,6 @@
mfcr %0%Q1\;rlwinm %0,%0,%f1,0xf000
mr %0,%1
li %0,%1
-   mf%1 %0
-   mt%0 %1
lwz%U1%X1 %0,%1
stw%U0%X0 %1,%0"
   [(set_attr_alternative "type"
@@ -8184,11 +8179,9 @@
(const_string "mfcrf") (const_string "mfcr"))
   (const_string "integer")
   (const_string "integer")
-  (const_string "mfjmpr")
-  (const_string "mtjmpr")
   (const_string "load")
   (const_string "store")])
-   (set_attr "length" "*,*,12,*,*,8,*,*,*,*,*,*")])
+   (set_attr "length" "*,*,12,*,*,8,*,*,*,*")])
 
 ;; For floating-point, we normally deal with the floating-point registers
 ;; unless -msoft-float is used.  The sole exception is that parameter passing
@@ -8239,17 +8232,17 @@
 ;;
 ;; LWZ  LFSLXSSP   LXSSPX STFS   STXSSP
 ;; STXSSPX  STWXXLXOR  LI FMRXSCPSGNDP
-;; MR   MT  MF   NOPXXSPLTIDP
+;; MR   XXSPLTIDP
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 "=!r,   f, v,  wa,m, wY,
  Z, m, wa, !r,f, wa,
- !r,*c*l,  !r, *h,wa")
+ !r,wa")
(match_operand:SF 1 "input_operand"
 "m, m, wY, Z, f, v,
  wa,r, j,  j, f, wa,
- r, r, *h, 0, eP"))]
+ r, eP"))]
   "(register_operand (operands[0], SFmode)
|| register_operand (ope

[gcc(refs/users/meissner/heads/work177-tar)] Update ChangeLog.*

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:ca08fa1266ef9d4678a9ee5b094a06c4f753bda9

commit ca08fa1266ef9d4678a9ee5b094a06c4f753bda9
Author: Michael Meissner 
Date:   Wed Sep 4 11:40:35 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.tar | 83 ++-
 1 file changed, 82 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
index 6fe8a38bffc..6ccfe220dfc 100644
--- a/gcc/ChangeLog.tar
+++ b/gcc/ChangeLog.tar
@@ -1,6 +1,87 @@
+ Branch work177-tar, patch #301 
+
+Remove SPR alternatives for move insns.
+
+2024-08-19  Michael Meissner  
+
+   * config/rs6000/rs6000.md (mov_internal): Remove alternatives for
+   moving values to/from SPR registers.
+   (movcc_): Likewise.
+   (movsf_hardfloat): Likewise.
+   (movsd_hardfloat): Likewise.
+   (mov_softfloat): Likewise.
+   (mov_hardfloat64): Likewise.
+   (mov_softfloat64): Likewise.
+
+ Branch work177-tar, patch #300 
+
+Add support for the TAR register.
+
+2024-08-19  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (h constraint): Add TAR register to the
+   documentation.
+   (wt constraint): New constraint.
+   * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar.
+   (POWERPC_MASKS): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register support.
+   (alt_reg_names): Likewise.
+   (rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only
+   hold scalar integer modes of an appropriate size.  Add TAR register
+   support.
+   (rs6000_debug_reg_global): Print the register class that wt maps too.
+   (rs6000_init_hard_regno_mode_ok): Add TAR register support.
+   (rs6000_conditional_register_usage): Add TAR register support.
+   (print_operand): Likewise.
+   (rs6000_debugger_regno): Likewise.
+   (rs6000_opt_masks): Add support for -mtar.
+   * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register
+   support.
+   (FIXED_REGISTERS): Likewise.
+   (CALL_REALLY_USED_REGISTERS): Likewise.
+   (REG_ALLOC_ORDER): Likewise.
+   (enum reg_class): Likewise.
+   (REG_CLASS_NAMES): Likewise.
+   (REG_CLASS_CONTENTS): Likewise.
+   (enum r6000_reg_class_enum): Add support for the wt constraint.
+   * config/rs6000/rs6000.md (TAR_REGNO): New constant.
+   (call_indirect_nonlocal_sysv): Likewise.
+   (call_value_indirect_nonlocal_sysv): Likewise.
+   (call_indirect_aix): Likewise.
+   (call_value_indirect_aix): Likewise.
+   (call_indirect_elfv2): Likewise.
+   (call_indirect_pcrel): Likewise.
+   (call_value_indirect_elfv2): Likewise.
+   (call_value_indirect_pcrel): Likewise.
+   (*sibcall_indirect_nonlocal_sysv): Likewise.
+   (sibcall_value_indirect_nonlocal_sysv): Likewise.
+   (indirect_jump): Likewise.
+   (@indirect_jump_nospec): Likewise.
+   (@tablejump_insn_normal): Likewise.
+   (@tablejump_insn_nospec): Likewise.
+   * config/rs6000/rs6000.opt (-mtar): New option.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR register.
+   * gcc.target/powerpc/pr51513.c: Likewise.
+   * gcc.target/powerpc/safe-indirect-jump-2.c: Likewise.
+   * gcc.target/powerpc/safe-indirect-jump-3.c: Likewise.
+   * gcc.target/powerpc/tar-register.c: New test.
+
  Branch work177-tar, baseline 
 
+Add ChangeLog.tar and update REVISION.
+
+2024-08-16  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.tar: New file for branch.
+   * REVISION: Update.
+
 2024-09-03   Michael Meissner  
 
Clone branch
-

[gcc(refs/users/meissner/heads/work177-libs)] Do not build IEEE 128-bit libgfortran support if VSX is not available.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b766e0eb3bd24ca74f54cddc957db0b3ba84187e

commit b766e0eb3bd24ca74f54cddc957db0b3ba84187e
Author: Michael Meissner 
Date:   Wed Sep 4 11:29:29 2024 -0400

Do not build IEEE 128-bit libgfortran support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libgfortran
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libstdc++-v3 library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-09-04  Michael Meissner  

libgfortran/

PR target/115800
* configure.ac (powerpc64le*-linux*): Check to see that the compiler
uses VSX before enabling IEEE 128-bit support.
* configure: Regenerate.
* kinds-override.h (GFC_REAL_17): Add check for __VSX__.
* libgfortran.h (POWER_IEEE128): Likewise.

Diff:
---
 libgfortran/configure| 7 +--
 libgfortran/configure.ac | 3 +++
 libgfortran/kinds-override.h | 2 +-
 libgfortran/libgfortran.h| 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/libgfortran/configure b/libgfortran/configure
index 11a1bc5f070..2708e5c7eca 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -5981,6 +5981,9 @@ if test "x$GCC" = "xyes"; then
 #if __SIZEOF_LONG_DOUBLE__ != 16
   #error long double is double
   #endif
+  #if !defined(__VSX__)
+  #error VSX is not available
+  #endif
 int
 main ()
 {
@@ -12847,7 +12850,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12850 "configure"
+#line 12853 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12953,7 +12956,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12956 "configure"
+#line 12959 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index cca1ea0ea97..cfaeb9717ab 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -148,6 +148,9 @@ if test "x$GCC" = "xyes"; then
   AC_PREPROC_IFELSE(
 [AC_LANG_PROGRAM([[#if __SIZEOF_LONG_DOUBLE__ != 16
   #error long double is double
+  #endif
+  #if !defined(__VSX__)
+  #error VSX is not available
   #endif]],
  [[(void) 0;]])],
 [AM_FCFLAGS="$AM_FCFLAGS -mabi=ibmlongdouble -mno-gnu-attribute";
diff --git a/libgfortran/kinds-override.h b/libgfortran/kinds-override.h
index f6b4956c5ca..51f440e5323 100644
--- a/libgfortran/kinds-override.h
+++ b/libgfortran/kinds-override.h
@@ -30,7 +30,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #endif
 
 /* Keep these conditions on one line so grep can filter it out.  */
-#if defined(__powerpc64__)  && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  && 
__SIZEOF_LONG_DOUBLE__ == 16
+#if defined(__powerpc64__)  && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  && 
__SIZEOF_LONG_DOUBLE__ == 16 && defined(__VSX__)
 typedef _Float128 GFC_REAL_17;
 typedef _Complex _Float128 GFC_COMPLEX_17;
 #define HAVE_GFC_REAL_17
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index effa3732c18..70db350ba01 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -104,7 +104,7 @@ typedef off_t gfc_offset;
 #endif
 
 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ \
-&& defined __GLIBC_PREREQ
+&& defined __GLIBC_PREREQ && defined(__VSX__)
 #if __GLIBC_PREREQ (2, 32)
 #define POWER_IEEE128 1
 #endif

[gcc(refs/users/meissner/heads/work177-libs)] Do not build IEEE 128-bit libstdc++ support if VSX is not available.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:61b479aadbe46f39409f8f3996e3192adf60b66d

commit 61b479aadbe46f39409f8f3996e3192adf60b66d
Author: Michael Meissner 
Date:   Wed Sep 4 11:30:57 2024 -0400

Do not build IEEE 128-bit libstdc++ support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libstdc++-v3
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libgfortran library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-09-04  Michael Meissner  

libstdc++-v3/

PR target/115800
* configure.ac (powerpc*-*-linux*): Don't enable IEEE 128-bit on 
PowerPC
systems without VSX.
* configure: Regenerate.
* numeric_traits.h: Don't enable IEEE 128-bit on PowerPC systems 
without
VSX.

Diff:
---
 libstdc++-v3/configure| 68 ++-
 libstdc++-v3/configure.ac | 58 --
 libstdc++-v3/include/ext/numeric_traits.h |  2 +-
 3 files changed, 86 insertions(+), 42 deletions(-)

diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 005c4a29fd0..ae7944beb78 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -51379,8 +51379,31 @@ $as_echo "#define _GLIBCXX_LONG_DOUBLE_COMPAT 1" 
>>confdefs.h
 case "$target" in
   powerpc*-*-linux*)
LONG_DOUBLE_COMPAT_FLAGS="$LONG_DOUBLE_COMPAT_FLAGS -mno-gnu-attribute"
-# Check for IEEE128 support in libm:
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 
in -lm" >&5
+   # Eliminate little endian systems without VSX
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+ #ifndef __VSX__
+ #error "IEEE 128-bit needs VSX"
+ #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_ieee128_possible=yes
+else
+  ac_ieee128_possible=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   if test $ac_ieee128_possible = yes; then
+  # Check for IEEE128 support in libm:
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 
in -lm" >&5
 $as_echo_n "checking for __frexpieee128 in -lm... " >&6; }
 if ${ac_cv_lib_m___frexpieee128+:} false; then :
   $as_echo_n "(cached) " >&6
@@ -51425,18 +51448,18 @@ else
   ac_ldbl_ieee128_in_libc=no
 fi
 
-if test $ac_ldbl_ieee128_in_libc = yes; then
-  # Determine which long double format is the compiler's default:
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  if test $ac_ldbl_ieee128_in_libc = yes; then
+# Determine which long double format is the compiler's default:
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 int
 main ()
 {
 
-#ifndef __LONG_DOUBLE_IEEE128__
-#error compiler defaults to ibm128
-#endif
+  #ifndef __LONG_DOUBLE_IEEE128__
+  #error compiler defaults to ibm128
+  #endif
 
   ;
   return 0;
@@ -51448,21 +51471,28 @@ else
   ac_ldbl_ieee128_default=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  # Library objects should use default long double format.
-  if test "$ac_ldbl_ieee128_default" = yes; then
-LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
-# Except for the ones that explicitly use these flags:
-LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble 
-mno-gnu-attribute -Wno-psabi"
-  else
-LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
-LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ieeelongdouble 
-mno-gnu-attribute -Wno-psabi"
-  fi
+# Library objects should use default long double format.
+if test "$ac_ldbl_ieee128_default" = yes; then
+  LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
+  # Except for the ones that explicitly use these flags:
+  LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble 
-mno-gnu-attribute -Wno-psabi"
+else
+  LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
+  LONG_DOUBLE_ALT128_COMPAT_FL

[gcc(refs/users/meissner/heads/work177-libs)] Do not build IEEE 128-bit libstdc++ support if VSX is not available.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:3696abc890a0e9acbd4e93c688d0e238c011ee76

commit 3696abc890a0e9acbd4e93c688d0e238c011ee76
Author: Michael Meissner 
Date:   Wed Sep 4 11:31:56 2024 -0400

Do not build IEEE 128-bit libstdc++ support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libstdc++-v3
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libgfortran library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-09-04  Michael Meissner  

libstdc++-v3/

PR target/115800
* configure.ac (powerpc*-*-linux*): Don't enable IEEE 128-bit on 
PowerPC
systems without VSX.
* configure: Regenerate.
* numeric_traits.h: Don't enable IEEE 128-bit on PowerPC systems 
without
VSX.

Diff:
---
 libgcc/config.host | 12 ++--
 libgcc/config/rs6000/t-float128|  8 +++-
 libgcc/config/rs6000/t-float128-hw |  3 +--
 libgcc/config/rs6000/t-float128-p10-hw |  3 +--
 libgcc/configure   |  8 +++-
 libgcc/configure.ac|  8 +++-
 6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9fae51d4ce7..261b08859a4 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1292,14 +1292,14 @@ powerpc*-*-linux*)
 
if test $libgcc_cv_powerpc_float128 = yes; then
tmake_file="${tmake_file} rs6000/t-float128"
-   fi
 
-   if test $libgcc_cv_powerpc_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-hw"
-   fi
+   if test $libgcc_cv_powerpc_float128_hw = yes; then
+   tmake_file="${tmake_file} rs6000/t-float128-hw"
 
-   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-p10-hw"
+   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
+   tmake_file="${tmake_file} 
rs6000/t-float128-p10-hw"
+   fi
+   fi
fi
 
extra_parts="$extra_parts ecrti.o ecrtn.o ncrti.o ncrtn.o"
diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128
index b09b5664af0..93e78adcd62 100644
--- a/libgcc/config/rs6000/t-float128
+++ b/libgcc/config/rs6000/t-float128
@@ -74,7 +74,13 @@ fp128_includes   = $(srcdir)/soft-fp/double.h \
  $(srcdir)/soft-fp/soft-fp.h
 
 # Build the emulator without ISA 3.0 hardware support.
-FP128_CFLAGS_SW = -Wno-type-limits -mvsx -mfloat128 \
+#
+# In the past we added -mvsx to build the float128 specific libraries with the
+# VSX instruction set.  This allowed the big endian GCC on server platforms to
+# build the float128 support.  However, is causes problems when other default
+# cpu targets are used such as the 7450.
+
+FP128_CFLAGS_SW = -Wno-type-limits -mfloat128 \
   -mno-float128-hardware -mno-gnu-attribute \
   -I$(srcdir)/soft-fp \
   -I$(srcdir)/config/rs6000 \
diff --git a/libgcc/config/rs6000/t-float128-hw 
b/libgcc/config/rs6000/t-float128-hw
index ed67b572580..82726c98b98 100644
--- a/libgcc/config/rs6000/t-float128-hw
+++ b/libgcc/config/rs6000/t-float128-hw
@@ -23,8 +23,7 @@ fp128_ifunc_obj   = $(fp128_ifunc_static_obj) 
$(fp128_ifunc_shared_obj)
 fp128_sed_hw   = -hw
 
 # Build the hardware support functions with appropriate hardware support
-FP128_CFLAGS_HW = -Wno-type-limits -mvsx -mfloat128 \
-  -mcpu=power9 \
+FP128_CFLAGS_HW = -Wno-type-limits -mfloat128 -mcpu=power9 \
   -mfloat128-hardware -mno-gnu-attribute \
   -I$(srcdir)/soft-fp \
   -I$(srcdir)/config/rs6000 \
diff --git a/libgcc/config/rs6000/t-float128-p10-hw 
b/libgcc/config/rs6000/t-float128-p10-hw
index edaaee0e478..ee50d248ca1 100644
--- a/libgcc/config/rs6000/t-float128-p10-hw
+++ b/libgcc/config/rs6000/t-float128-p10-hw
@@ -13,8 +13,7 @@ fp128_3_1_hw_shared_obj

[gcc(refs/users/meissner/heads/work177-libs)] Do not add -mvsx when testing the float128 support.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:db95aceee0bf62edcb1fa34e5c61279fa09ed158

commit db95aceee0bf62edcb1fa34e5c61279fa09ed158
Author: Michael Meissner 
Date:   Wed Sep 4 11:33:00 2024 -0400

Do not add -mvsx when testing the float128 support.

Currently, we add -mvsx when building the float128 support in libgcc.  This
allows us to build the float128 support on a big endian system where the
default cpu is power4.  While the libgcc support can be built, given there 
is
no glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

After patching libgcc to not build the float128 support unless the host can
support float128 normally, this patch changes the GCC tests so that it will 
only
do the IEEE 128-bit tests if the default compiler enables the VSX 
instruction
set by default.  Otherwise all of the float128 tests will fail because the
libgcc support is not available.

In addition to not doing the float128 tests when the compiler does not 
natively
support float128, this patch also removes adding -mvsx, -mfloat128, and
-mfloat128-hardware enable the support if the compiler did not natively 
enable
it.

I built little endian compilers and there were no regressions.

I built big endian compilers with the --with-cpu=power5 configure option, 
and I
verified that none of the float128 support functions are built.

I also built big endian compilers on a power9 with the --with-cpu=native
configure option, and I verified that the float128 support functions were
built, since the default compiler used the VSX instruction set.

I verified that on both sets of big endian builds, that all of the float128
tests were skipped, since there is no support for float128 in glibc and the 
GCC
compiler does not enable float128 on those systems.

Can I check these patches into the trunk assuming the original bugzilla 
author
says they fix the problem?

2024-09-04 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, 
and
-mfloat128-hardware options to float128 test.  Add explicit checks 
for
the float128 support, rather than just using VSX as a stand in, or
assuming we can silently enable VSX if the default is power4.  For
pr99708.c, also use the correct spelling to disable the float128 
tests.
* gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
* gcc.target/powerpc/copysign128-1.c: Likewise.
* gcc.target/powerpc/divkc3-1.c: Likewise.
* gcc.target/powerpc/float128-3.c: Likewise.
* gcc.target/powerpc/float128-5.c: Likewise.
* gcc.target/powerpc/float128-complex-2.: Likewise.
* gcc.target/powerpc/float128-math.: Likewise.
* gcc.target/powerpc/inf128-1.: Likewise.
* gcc.target/powerpc/mulkc3-1.c: Likewise.
* gcc.target/powerpc/nan128-1.c: Likewise.
* gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
* gcc.target/powerpc/pr104253.: Likewise.
* gcc.target/powerpc/pr70669.c: Likewise.
* gcc.target/powerpc/pr79004.c: Likewise.
* gcc.target/powerpc/pr79038-1.c: Likewise.
* gcc.target/powerpc/pr81959.c: Likewise.
* gcc.target/powerpc/pr85657-1.: Likewise.
* gcc.target/powerpc/pr85657-2.c: Likewise.
* gcc.target/powerpc/pr99708.: Likewise.
* gcc.target/powerpc/signbit-1.c: Likewise.
* gcc.target/powerpc/signbit-2.c: Likewise.
* lib/target-supports.exp (check_ppc_float128_sw_available): 
Likewise.
(check_ppc_float128_hw_available): Likewise.
(check_effective_target_ppc_ieee128_ok): Likewise.
(add_options_for___float128): Likewise.
(check_effective_target___float128): Likewise.
(check_effective_target_base_quadfloat_support): Likewise.
(check_effective_target_powerpc_float128_sw_ok): Likewise.
(check_effective_target_powerpc_float128_hw_ok): Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c|  3 ++-
 .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c  |  1 +
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c   |  3 ++-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-3.c  |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-5.c  |  3 ++-
 .../gcc.target/powerpc/float128-complex-2.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-math.c   |  2 +-

[gcc(refs/users/meissner/heads/work177-libs)] Update ChangeLog.*

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:fde55641b666e342f2de9889b930fb8bbec4d691

commit fde55641b666e342f2de9889b930fb8bbec4d691
Author: Michael Meissner 
Date:   Wed Sep 4 11:44:02 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.libs | 210 -
 1 file changed, 209 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
index 03ebc5600b2..a9d6a95459a 100644
--- a/gcc/ChangeLog.libs
+++ b/gcc/ChangeLog.libs
@@ -1,6 +1,214 @@
+ Branch work177-libs, patch #503 
+
+Do not add -mvsx when testing the float128 support.
+
+Currently, we add -mvsx when building the float128 support in libgcc.  This
+allows us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is
+no glibc support for float128 available.
+
+However, adding -mvsx and building the libgcc float128 support causes problems
+if you set the default cpu to something like a 7540, which does not have VSX
+support.  The assembler complains that when the code does a ".machine 7450", 
you
+cannot use VSX instructions.
+
+After patching libgcc to not build the float128 support unless the host can
+support float128 normally, this patch changes the GCC tests so that it will 
only
+do the IEEE 128-bit tests if the default compiler enables the VSX instruction
+set by default.  Otherwise all of the float128 tests will fail because the
+libgcc support is not available.
+
+In addition to not doing the float128 tests when the compiler does not natively
+support float128, this patch also removes adding -mvsx, -mfloat128, and
+-mfloat128-hardware enable the support if the compiler did not natively enable
+it.
+
+I built little endian compilers and there were no regressions.
+
+I built big endian compilers with the --with-cpu=power5 configure option, and I
+verified that none of the float128 support functions are built.
+
+I also built big endian compilers on a power9 with the --with-cpu=native
+configure option, and I verified that the float128 support functions were
+built, since the default compiler used the VSX instruction set.
+
+I verified that on both sets of big endian builds, that all of the float128
+tests were skipped, since there is no support for float128 in glibc and the GCC
+compiler does not enable float128 on those systems.
+
+Can I check these patches into the trunk assuming the original bugzilla author
+says they fix the problem?
+
+2024-09-04 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, and
+   -mfloat128-hardware options to float128 test.  Add explicit checks for
+   the float128 support, rather than just using VSX as a stand in, or
+   assuming we can silently enable VSX if the default is power4.  For
+   pr99708.c, also use the correct spelling to disable the float128 tests.
+   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
+   * gcc.target/powerpc/copysign128-1.c: Likewise.
+   * gcc.target/powerpc/divkc3-1.c: Likewise.
+   * gcc.target/powerpc/float128-3.c: Likewise.
+   * gcc.target/powerpc/float128-5.c: Likewise.
+   * gcc.target/powerpc/float128-complex-2.: Likewise.
+   * gcc.target/powerpc/float128-math.: Likewise.
+   * gcc.target/powerpc/inf128-1.: Likewise.
+   * gcc.target/powerpc/mulkc3-1.c: Likewise.
+   * gcc.target/powerpc/nan128-1.c: Likewise.
+   * gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
+   * gcc.target/powerpc/pr104253.: Likewise.
+   * gcc.target/powerpc/pr70669.c: Likewise.
+   * gcc.target/powerpc/pr79004.c: Likewise.
+   * gcc.target/powerpc/pr79038-1.c: Likewise.
+   * gcc.target/powerpc/pr81959.c: Likewise.
+   * gcc.target/powerpc/pr85657-1.: Likewise.
+   * gcc.target/powerpc/pr85657-2.c: Likewise.
+   * gcc.target/powerpc/pr99708.: Likewise.
+   * gcc.target/powerpc/signbit-1.c: Likewise.
+   * gcc.target/powerpc/signbit-2.c: Likewise.
+   * lib/target-supports.exp (check_ppc_float128_sw_available): Likewise.
+   (check_ppc_float128_hw_available): Likewise.
+   (check_effective_target_ppc_ieee128_ok): Likewise.
+   (add_options_for___float128): Likewise.
+   (check_effective_target___float128): Likewise.
+   (check_effective_target_base_quadfloat_support): Likewise.
+   (check_effective_target_powerpc_float128_sw_ok): Likewise.
+   (check_effective_target_powerpc_float128_hw_ok): Likewise.
+
+ Branch work177-libs, patch #502 
+
+Do not add -mvsx when building libgcc float128 support.
+
+Currently, we add -mvsx when building the float128 support in libgcc.  This
+allows us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is
+no glibc s

[gcc(refs/users/meissner/heads/work177-dmf)] Use vector pair load/store for memcpy with -mcpu=future

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:9713c43be031a884541dc63e6c6efae74f7da3ce

commit 9713c43be031a884541dc63e6c6efae74f7da3ce
Author: Michael Meissner 
Date:   Wed Sep 4 11:48:12 2024 -0400

Use vector pair load/store for memcpy with -mcpu=future

In the development for the power10 processor, GCC did not enable using the 
load
vector pair and store vector pair instructions when optimizing things like
memory copy.  This patch enables using those instructions if -mcpu=future is
used.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable 
using
load vector pair and store vector pair instructions for memory copy
operations.
(POWERPC_MASKS): Make the bit for enabling using load vector pair 
and
store vector pair operations set and reset when the PowerPC 
processor is
changed.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8..74151be4048 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -86,7 +86,8 @@
 
 #define POWER11_MASKS_SERVER   ISA_3_1_MASKS_SERVER
 
-#define FUTURE_MASKS_SERVERPOWER11_MASKS_SERVER
+#define FUTURE_MASKS_SERVER(POWER11_MASKS_SERVER   \
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\
@@ -116,6 +117,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=.  */
 #define POWERPC_MASKS  (OPTION_MASK_ALTIVEC\
+| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\
 | OPTION_MASK_CMPB \
 | OPTION_MASK_CRYPTO   \
 | OPTION_MASK_DFP  \

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2653-Add wD constraint.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:76f0343e31fc5da729986de72551d8622aafd728

commit 76f0343e31fc5da729986de72551d8622aafd728
Author: Michael Meissner 
Date:   Wed Sep 4 11:49:07 2024 -0400

RFC2653-Add wD constraint.

This patch adds a new constraint ('wD') that matches the accumulator 
registers
that overlap with VSX registers 0..31 on power10.  Future patches will add 
the
support for a separate accumulator register class that will be used when the
support for dense math registes is added.

2024-09-03   Michael Meissner  

* config/rs6000/constraints.md (wD): New constraint.
* config/rs6000/mma.md (mma_): Prepare for alternate 
accumulator
registers.  Use wD constraint instead of 'd' constraint.  Use
accumulator_operand instead of fpr_reg_operand.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")]
MMA_ACC))]
   "TARGET_MMA"
   " %A0"
@@ -523,7 +523,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_VV))]
@@ -532,8 +532,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_AVV))]
@@ -542,7 +542,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
MMA_PV))]
@@ -551,8 +551,8 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:OO 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
MMA_APV))]
@@ -561,7 +561,7 @@
   [(set_attr "type" "mma")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -574,8 +574,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
(match_operand:SI 4 "const_0_to_15_operand" "n,n")
@@ -588,7 +588,7 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
(match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
(match_operand:SI 3 "const_0_to_15_operand" "n,n")
@@ -601,8 +601,8 @@
(set_attr "prefixed" "yes")])
 
 (define_insn "mma_"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
-   (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+  [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
+   (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2653-Add support for dense math registers.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:0ad27476f30202855d64376550757839733f533c

commit 0ad27476f30202855d64376550757839733f533c
Author: Michael Meissner 
Date:   Wed Sep 4 11:50:40 2024 -0400

RFC2653-Add support for dense math registers.

The MMA subsystem added the notion of accumulator registers as an optional
feature of ISA 3.1 (power10).  In ISA 3.1, these accumulators overlapped 
with
the VSX registers 0..31, but logically the accumulator registers were 
separate
from the FPR registers.  In ISA 3.1, it was anticipated that in future 
systems,
the accumulator registers may no overlap with the FPR registers.  This patch
adds the support for dense math registers as separate registers.

This particular patch does not change the MMA support to use the 
accumulators
within the dense math registers.  This patch just adds the basic support for
having separate DMRs.  The next patch will switch the MMA support to use the
accumulators if -mcpu=future is used.

For testing purposes, I added an undocumented option '-mdense-math' to 
enable
or disable the dense math support.

This patch adds a new constraint (wD).  If MMA is selected but dense math is
not selected (i.e. -mcpu=power10), the wD constraint will allow access to
accumulators that overlap with VSX registers 0..31.  If both MMA and dense 
math
are selected (i.e. -mcpu=future), the wD constraint will only allow dense 
math
registers.

This patch modifies the existing %A output modifier.  If MMA is selected but
dense math is not selected, then %A output modifier converts the VSX 
register
number to the accumulator number, by dividing it by 4.  If both MMA and 
dense
math are selected, then %A will map the separate DMR registers into 0..7.

The intention is that user code using extended asm can be modified to run on
both MMA without dense math and MMA with dense math:

1)  If possible, don't use extended asm, but instead use the MMA 
built-in
functions;

2)  If you do need to write extended asm, change the d constraints
targetting accumulators should now use wD;

3)  Only use the built-in zero, assemble and disassemble functions 
create
move data between vector quad types and dense math accumulators.
I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the
extended asm code.  The reason is these instructions assume there 
is a
1-to-1 correspondence between 4 adjacent FPR registers and an
accumulator that overlaps with those instructions.  With 
accumulators
now being separate registers, there no longer is a 1-to-1
correspondence.

It is possible that the mangling for DMRs and the GDB register numbers may
produce other changes in the future.

2024-09-04   Michael Meissner  

* config/rs6000/mma.md (UNSPEC_MMA_DMSETDMRZ): New unspec.
(movxo): Add comments about dense math registers.
(movxo_nodm): Rename from movxo and restrict the usage to machines
without dense math registers.
(movxo_dm): New insn for movxo support for machines with dense math
registers.
(mma_): Restrict usage to machines without dense math 
registers.
(mma_xxsetaccz): Add a define_expand wrapper, and add support for 
dense
math registers.
(mma_dmsetaccz): New insn.
* config/rs6000/predicates.md (dmr_operand): New predicate.
(accumulator_operand): Add support for dense math registers.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): 
Do
not issue a de-prime instruction when disassembling a vector quad 
on a
system with dense math registers.
* config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): 
Define
__DENSE_MATH__ if we have dense math registers.
* config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE.
(enum rs6000_reload_reg_type): Add RELOAD_REG_DMR.
(LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD
constraint.
(reload_reg_map): Likewise.
(rs6000_reg_names): Likewise.
(alt_reg_names): Likewise.
(rs6000_hard_regno_nregs_internal): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Likewise.
(rs6000_secondary_reload_memory): Add support for DMR registers.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(print_operand): Make %A handle both FPRs and DMRs.

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2653-PowerPC: Switch to dense math names for all MMA operations.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:e0b9b958eef1fa1bb51267d4971bb0416fbe

commit e0b9b958eef1fa1bb51267d4971bb0416fbe
Author: Michael Meissner 
Date:   Wed Sep 4 11:51:51 2024 -0400

RFC2653-PowerPC: Switch to dense math names for all MMA operations.

This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense 
math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the
instruction.  However, the prefixed instructions have a 'pm' prefix, and we 
add
the 'dm' prefix afterwards.  To prevent having two sets of parallel int
attributes, we remove the "pm" prefix from the instruction string in the
attributes, and add it later, both in the insn name and in the output 
template.

2024-09-04   Michael Meissner  

gcc/

* config/rs6000/mma.md (vvi4i4i8): Change the instruction to not 
have a
"pm" prefix.
(avvi4i4i8): Likewise.
(vvi4i4i2): Likewise.
(avvi4i4i2): Likewise.
(vvi4i4): Likewise.
(avvi4i4): Likewise.
(pvi4i2): Likewise.
(apvi4i2): Likewise.
(vvi4i4i4): Likewise.
(avvi4i4i4): Likewise.
(mma_): Add support for running on DMF systems, generating the 
dense
math instruction and using the dense math accumulators.
(mma_): Likewise.
(mma_): Likewise.
(mma_): Likewise.
(mma_pm): Add support for running on DMF systems, 
generating
the dense math instruction and using the dense math accumulators.
Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm'
prefixes based on whether we have the original MMA specification or 
if
we have dense math support.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.
(mma_pm): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md | 157 +++
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index ae6e7e9695b..2e04eb653fa 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -225,44 +225,47 @@
 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
 
-(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+;; The "pm" prefix is not in these expansions, so that we can generate
+;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
+;; without dense math registers.
+(define_int_attr vvi4i4i8  [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
 
-(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   
"pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP   "xvi4ger8pp")])
 
-(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2")
-(UNSPEC_MMA_PMXVI16GER2S   "pmxvi16ger2s")
-(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2")
-(UNSPEC_MMA_PMXVBF16GER2   
"pmxvbf16ger2")])
+(define_int_attr vvi4i4i2  [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2")
+(UNSPEC_MMA_PMXVI16GER2S   "xvi16ger2s")
+(UNSPEC_MMA_PMXVF16GER2"xvf16ger2")
+(UNSPEC_MMA_PMXVBF16GER2   "xvbf16ger2")])
 
-(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "pmxvi16ger2pp")
-(UNSPEC_MMA_PMXVI16GER2SPP 
"pmxvi16ger2spp")
-(UNSPEC_MMA_PMXVF16GER2PP  "pmxvf16ger2pp")
-(UNSPEC_MMA_PMXVF16GER2PN  "pmxvf16ger2pn")
-(UNSPEC_MMA_PMXVF16GER2NP  "pmxvf16ger2np")
-(UNSPEC_MMA_PMXVF16GER2NN  "pmxvf16ger2nn")
-(UNSPEC_MMA_PMXVBF16GER2PP 
"pmxvbf16ger2pp")
-(UNSPEC_MMA_PMXVBF16GER2PN 
"pmxvbf16ger2pn")
-(UNSPEC_MMA_PMXVBF16GER2NP 
"pmxvbf16ger2np")
-(UNSPEC_MMA_PMXVBF16GER2NN 
"pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP  "xvi16ger2pp")
+(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
+(UNSPEC_MMA_PMXVF16GER2PP  "xvf16ger2pp")
+(UNSPEC_MMA_PMXVF16GER2PN  "xvf16g

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:62146c78439db4835ad918255a08e57f41de47ce

commit 62146c78439db4835ad918255a08e57f41de47ce
Author: Michael Meissner 
Date:   Wed Sep 4 11:53:39 2024 -0400

RFC2653-PowerPC: Add support for 1,024 bit DMR registers.

This patch is a prelimianry patch to add the full 1,024 bit dense math 
register
(DMRs) for -mcpu=future.  The MMA 512-bit accumulators map onto the top of 
the
DMR register.

This patch only adds the new 1,024 bit register support.  It does not add
support for any instructions that need 1,024 bit registers instead of 512 
bit
registers.

I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit
registers.  The 'wD' constraint added in previous patches is used for these
registers.  I added support to do load and store of DMRs via the VSX 
registers,
since there are no load/store dense math instructions.  I added the new 
keyword
'__dmr' to create 1,024 bit types that can be loaded into DMRs.  At 
present, I
don't have aliases for __dmr512 and __dmr1024 that we've discussed 
internally.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-09-04   Michael Meissner  

gcc/

* config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec.
(UNSPEC_DM_INSERT512_LOWER): Likewise.
(UNSPEC_DM_EXTRACT512): Likewise.
(UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise.
(UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise.
(movtdo): New define_expand and define_insn_and_split to implement 
1,024
bit DMR registers.
(movtdo_insert512_upper): New insn.
(movtdo_insert512_lower): Likewise.
(movtdo_extract512): Likewise.
(reload_dmr_from_memory): Likewise.
(reload_dmr_to_memory): Likewise.
* config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR
support.
(rs6000_init_builtins): Add support for __dmr keyword.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add 
support
for TDOmode.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-modes.def (TDOmode): New mode.
* config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add
support for TDOmode.
(rs6000_hard_regno_mode_ok_uncached): Likewise.
(rs6000_hard_regno_mode_ok): Likewise.
(rs6000_modes_tieable_p): Likewise.
(rs6000_debug_reg_global): Likewise.
(rs6000_setup_reg_addr_masks): Likewise.
(rs6000_init_hard_regno_mode_ok): Add support for TDOmode.  Setup 
reload
hooks for DMR mode.
(reg_offset_addressing_ok_p): Add support for TDOmode.
(rs6000_emit_move): Likewise.
(rs6000_secondary_reload_simple_move): Likewise.
(rs6000_preferred_reload_class): Likewise.
(rs6000_secondary_reload_class): Likewise.
(rs6000_mangle_type): Add mangling for __dmr type.
(rs6000_dmr_register_move_cost): Add support for TDOmode.
(rs6000_split_multireg_move): Likewise.
(rs6000_invalid_conversion): Likewise.
* config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode.
(enum rs6000_builtin_type_index): Add DMR type nodes.
(dmr_type_node): Likewise.
(ptr_dmr_type_node): Likewise.

gcc/testsuite/

* gcc.target/powerpc/dm-1024bit.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md  | 154 ++
 gcc/config/rs6000/rs6000-builtin.cc   |  17 +++
 gcc/config/rs6000/rs6000-call.cc  |  10 +-
 gcc/config/rs6000/rs6000-modes.def|   4 +
 gcc/config/rs6000/rs6000.cc   | 101 -
 gcc/config/rs6000/rs6000.h|   6 +-
 gcc/testsuite/gcc.target/powerpc/dm-1024bit.c |  63 +++
 7 files changed, 321 insertions(+), 34 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 2e04eb653fa..8461499e1c3 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -92,6 +92,11 @@
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
UNSPEC_MMA_DMSETDMRZ
+   UNSPEC_DM_INSERT512_UPPER
+   UNSPEC_DM_INSERT512_LOWER
+   UNSPEC_DM_EXTRACT512
+   UNSPEC_DMR_RELOAD_FROM_MEMORY
+   UNSPEC_DMR_RELOAD_TO_MEMORY
   ])
 
 (define_c_enum "unspecv"
@@ -793,3 +798,152 @@
 }
   [(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+;; TDOmode (__dmr keyword for 1,024 bit registers).
+(define_expand "movtdo"
+  [(set (match_operand:TDO 0 "nonimmediate_operand")
+   (match_operand:TDO 1 "input_operand"))]
+  "TARGET_MMA_DENSE_MATH"
+{
+  rs6000_emit_move (operands[0], operands[1], TDOmode);
+  DONE;
+})
+
+(define_insn_and_split "*movtdo"
+  [(set (match_operand:TDO 0

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2653-Add dense math test for new instruction names.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:d753057babaa0596db1ae5cf9d8641311644b8da

commit d753057babaa0596db1ae5cf9d8641311644b8da
Author: Michael Meissner 
Date:   Wed Sep 4 11:52:44 2024 -0400

RFC2653-Add dense math test for new instruction names.

2024-09-04   Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/dm-double-test.c: New test.
* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
target test.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++
 gcc/testsuite/lib/target-supports.exp |  23 +++
 2 files changed, 217 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[0]; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[1]; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[2]; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B, C

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2677-Add xvrlw support.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:f75816fc6e800be9339c0337ebad3a70011b8cfa

commit f75816fc6e800be9339c0337ebad3a70011b8cfa
Author: Michael Meissner 
Date:   Wed Sep 4 12:03:06 2024 -0400

RFC2677-Add xvrlw support.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/altivec.md (xvrlw): New insn.
* config/rs6000/rs6000.h (TARGET_XVRLW): New macro.

gcc/testsuite/

* gcc.target/powerpc/vector-rotate-left.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md   | 14 +
 gcc/config/rs6000/rs6000.h |  3 ++
 .../gcc.target/powerpc/vector-rotate-left.c| 34 ++
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f..f891ccc7403 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1982,6 +1982,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+   (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+(match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl"
   [(set (match_operand:VI2 0 "register_operand" "=v")
 (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6a3fbc1e0fe..c4d8e52a28a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -590,6 +590,9 @@ extern int rs6000_vector_align[];
 /* Whether we have PADDIS support.  */
 #define TARGET_PADDIS  TARGET_FUTURE
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW   TARGET_FUTURE
+
 /* Whether the various reciprocal divide/square root estimate instructions
exist, and whether we should automatically generate code for the instruction
by default.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index 000..5a5f3775507
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include 
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));   /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2655-Add saturating subtract built-ins.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7a50bae26c4b3fb169869b9410d136b4395aa7d0

commit 7a50bae26c4b3fb169869b9410d136b4395aa7d0
Author: Michael Meissner 
Date:   Wed Sep 4 12:00:55 2024 -0400

RFC2655-Add saturating subtract built-ins.

This patch adds support for a saturating subtract built-in function that 
may be
added to a future PowerPC processor.  Note, if it is added, the name of the
built-in function may change before GCC 13 is released.  If the name 
changes,
we will submit a patch changing the name.

I also added support for providing dense math built-in functions, even 
though
at present, we have not added any new built-in functions for dense math.  
It is
likely we will want to add new dense math built-in functions as the dense 
math
support is fleshed out.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-09-04   Michael Meissner  

gcc/

* config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add 
support
for flagging invalid use of future built-in functions.
(rs6000_builtin_is_supported): Add support for future built-in
functions.
* config/rs6000/rs6000-builtins.def 
(__builtin_saturate_subtract32): New
built-in function for -mcpu=future.
(__builtin_saturate_subtract64): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add 
stanzas
for -mcpu=future built-ins.
(stanza_map): Likewise.
(enable_string): Likewise.
(struct attrinfo): Likewise.
(parse_bif_attrs): Likewise.
(write_decls): Likewise.
* config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
built-in insn declarations.
(sat_sub3_dot): Likewise.
(sat_sub3_dot2): Likewise.
* doc/extend.texi (Future PowerPC built-ins): New section.

gcc/testsuite/

* gcc.target/powerpc/subfus-1.c: New test.
* gcc.target/powerpc/subfus-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc | 17 
 gcc/config/rs6000/rs6000-builtins.def   | 10 +
 gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++---
 gcc/config/rs6000/rs6000.md | 60 +
 gcc/doc/extend.texi | 24 
 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++
 gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++
 7 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 8e4335e9b44..a5f33eb9da1 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode)
 case ENB_MMA:
   error ("%qs requires the %qs option", name, "-mmma");
   break;
+case ENB_FUTURE:
+  error ("%qs requires the %qs option", name, "-mcpu=future");
+  break;
+case ENB_FUTURE_64:
+  error ("%qs requires the %qs option and either the %qs or %qs option",
+name, "-mcpu=future", "-m64", "-mpowerpc64");
+  break;
+case ENB_DM:
+  error ("%qs requires the %qs or %qs options", name, "-mcpu=future",
+"-mdense-math");
+  break;
 default:
 case ENB_ALWAYS:
   gcc_unreachable ();
@@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins 
fncode)
   return TARGET_HTM;
 case ENB_MMA:
   return TARGET_MMA;
+case ENB_FUTURE:
+  return TARGET_FUTURE;
+case ENB_FUTURE_64:
+  return TARGET_FUTURE && TARGET_POWERPC64;
+case ENB_DM:
+  return TARGET_DENSE_MATH;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcf..7d47dc4e402 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -137,6 +137,8 @@
 ;   endian   Needs special handling for endianness
 ;   ibmldRestrict usage to the case when TFmode is IBM-128
 ;   ibm128   Restrict usage to the case where __ibm128 is supported or if ibmld
+;   future   Restrict usage to future instructions
+;   dm   Restrict usage to dense math
 ;
 ; Each attribute corresponds to extra processing required when
 ; the built-in is expanded.  All such special processing should
@@ -3933,3 +3935,11 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+[future]
+  const signed int __builtin_saturate_subtract32 (signed int, signed int);
+  SAT_SUBSI sat_subsi3 {}
+
+[future-64]
+  const signed long __builtin_saturate_subtract64 (signed long,  signed long);
+  SAT_SUBDI sat_subdi3 {}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc 
b/gcc/config/rs

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2656-Support load/store vector with right length.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:1f58c992798e8c14b5733ced7c8a284cfa6962fe

commit 1f58c992798e8c14b5733ced7c8a284cfa6962fe
Author: Michael Meissner 
Date:   Wed Sep 4 11:59:59 2024 -0400

RFC2656-Support load/store vector with right length.

This patch adds support for new instructions that may be added to the 
PowerPC
architecture in the future to enhance the load and store vector with length
instructions.

The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to 
use
since the count for the number of bytes must be in the top 8 bits of the GPR
register, instead of the bottom 8 bits.  This meant that code generating 
these
instructions typically had to do a shift left by 56 bits to get the count 
into
the right position.  In a future version of the PowerPC architecture, new
variants of these instructions might be added that expect the count to be in
the bottom 8 bits of the GPR register.  These patches add this support to 
GCC
if the user uses the -mcpu=future option.

I discovered that the code in rs6000-string.cc to generate ISA 3.1 
lxvl/stxvl
future lxvll/stxvll instructions would generate these instructions on 
32-bit.
However the patterns for these instructions is only done on 64-bit systems. 
 So
I added a check for 64-bit support before generating the instructions.

The patches have been tested on both little and big endian systems.  Can I 
check
it into the master branch?

2024-09-04   Michael Meissner  

gcc/

* config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
lxvl and stxvl on 32-bit.
* config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
the shift count automaticaly used in the insn.
(lxvrl): New insn for -mcpu=future.
(lxvrll): Likewise.
(stxvl): If -mcpu=future, generate the stxvl with the shift count
automaticaly used in the insn.
(stxvrl): New insn for -mcpu=future.
(stxvrll): Likewise.

gcc/testsuite/

* gcc.target/powerpc/lxvrl.c: New test.
* lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
New effective target.

Diff:
---
 gcc/config/rs6000/rs6000-string.cc   |   1 +
 gcc/config/rs6000/vsx.md | 122 +--
 gcc/testsuite/gcc.target/powerpc/lxvrl.c |  32 
 gcc/testsuite/lib/target-supports.exp|  12 +++
 4 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 3674c4bd984..818ff10a8ac 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
   if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
  && TARGET_BLOCK_OPS_VECTOR_PAIR
+ && TARGET_POWERPC64
  && bytes >= 32
  && (align >= 256 || !STRICT_ALIGNMENT))
{
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b2fc39acf4e..9a082ec2195 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5710,20 +5710,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-(ashift:DI (match_operand:DI 2 "register_operand")
-   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-   (unspec:V16QI
-[(match_operand:DI 1 "gpc_reg_operand")
-  (mem:V16QI (match_dup 1))
- (match_dup 3)]
-UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+len = shift_len;
+  else
+{
+  len = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (len, shift_len));
+}
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5747,6 +5759,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the

[gcc(refs/users/meissner/heads/work177-dmf)] RFC2686-Add paddis support.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b41c5c62dca5c1d8e945832b2c4583a59791a795

commit b41c5c62dca5c1d8e945832b2c4583a59791a795
Author: Michael Meissner 
Date:   Wed Sep 4 12:02:12 2024 -0400

RFC2686-Add paddis support.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eU): New constraint.
(eV): Likewise.
* config/rs6000/predicates.md (paddis_operand): New predicate.
(paddis_paddi_operand): Likewise.
(add_operand): Add paddis support.
* config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis 
support.
(num_insns_constant_multi): Likewise.
(print_operand): Add %B for paddis support.
* config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
(SIGNED_INTEGER_32BIT_P): Likewise.
* config/rs6000/rs6000.md (isa attribute): Add paddis support.
(enabled attribute); Likewise.
(add3): Likewise.
(adddi3 splitter): New splitter for paddis.
(movdi_internal64): Add paddis support.
(movdi splitter): New splitter for paddis.

gcc/testsuite/

* gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md  | 10 +++
 gcc/config/rs6000/predicates.md   | 52 +++-
 gcc/config/rs6000/rs6000.cc   | 25 ++
 gcc/config/rs6000/rs6000.h|  4 +
 gcc/config/rs6000/rs6000.md   | 96 ---
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++
 6 files changed, 197 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 277a30a8245..4d8d21fd6bb 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@internal integer constant that can be loaded with paddis + paddi"
+  (and (match_code "const_int")
+   (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index c03f87c2a19..c849642bfc8 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -369,6 +369,53 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are non-zero, paddis can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) != 0)
+return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an
+;; addi/addis/paddi instruction combination.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS && TARGET_POWERPC64)
+return 0;
+
+  /* If addi, addis, or paddi can handle the number, don't return true.  */
+  HOST_WIDE_INT value = INTVAL (op);
+  if (SIGNED_INTEGER_34BIT_P (value))
+return false;
+
+  /* If the number is too large for padds, return false.  */
+  if (!SIGNED_INTEGER_32BIT_P (value >> 32))
+return false;
+
+  /* If the bottom 32-bits are zero, we can use paddis alone to handle it.  */
+  if ((value & HOST_WIDE_INT_C(0x)) == 0)
+return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -1050,7 +1097,10 @@
   (if_then_else (match_code "const_int")
 (match_test "satisfies_constraint_I (op)
 || satisfies_constraint_L (op)
-|| satisfies_constraint_eI (op)")
+|| satisfies_constraint_eI (op)
+|| satisfies_constraint_eU (op)
+|| satisfies_constraint_eV (op)")
+
 (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 0a6c517eb70..34506cb44cf 100644
--- a/gcc/config/rs6

[gcc(refs/users/meissner/heads/work177-dmf)] Update ChangeLog.*

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:9df39fe1e59bee61ab222bd5111f3e222d013c97

commit 9df39fe1e59bee61ab222bd5111f3e222d013c97
Author: Michael Meissner 
Date:   Wed Sep 4 12:06:41 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.dmf | 449 +-
 1 file changed, 448 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
index 5686facb686..35a93b15f76 100644
--- a/gcc/ChangeLog.dmf
+++ b/gcc/ChangeLog.dmf
@@ -1,6 +1,453 @@
+ Branch work177-dmf, patch #113 
+
+RFC2677-Add xvrlw support.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/altivec.md (xvrlw): New insn.
+   * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-rotate-left.c: New test.
+
+ Branch work177-dmf, patch #112 
+
+RFC2686-Add paddis support.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (eU): New constraint.
+   (eV): Likewise.
+   * config/rs6000/predicates.md (paddis_operand): New predicate.
+   (paddis_paddi_operand): Likewise.
+   (add_operand): Add paddis support.
+   * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add paddis support.
+   (num_insns_constant_multi): Likewise.
+   (print_operand): Add %B for paddis support.
+   * config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
+   (SIGNED_INTEGER_32BIT_P): Likewise.
+   * config/rs6000/rs6000.md (isa attribute): Add paddis support.
+   (enabled attribute); Likewise.
+   (add3): Likewise.
+   (adddi3 splitter): New splitter for paddis.
+   (movdi_internal64): Add paddis support.
+   (movdi splitter): New splitter for paddis.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/prefixed-addis.c: New test.
+
+ Branch work177-dmf, patch #111 
+
+RFC2655-Add saturating subtract built-ins.
+
+This patch adds support for a saturating subtract built-in function that may be
+added to a future PowerPC processor.  Note, if it is added, the name of the
+built-in function may change before GCC 13 is released.  If the name changes,
+we will submit a patch changing the name.
+
+I also added support for providing dense math built-in functions, even though
+at present, we have not added any new built-in functions for dense math.  It is
+likely we will want to add new dense math built-in functions as the dense math
+support is fleshed out.
+
+The patches have been tested on both little and big endian systems.  Can I 
check
+it into the master branch?
+
+2024-09-04   Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support
+   for flagging invalid use of future built-in functions.
+   (rs6000_builtin_is_supported): Add support for future built-in
+   functions.
+   * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New
+   built-in function for -mcpu=future.
+   (__builtin_saturate_subtract64): Likewise.
+   * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas
+   for -mcpu=future built-ins.
+   (stanza_map): Likewise.
+   (enable_string): Likewise.
+   (struct attrinfo): Likewise.
+   (parse_bif_attrs): Likewise.
+   (write_decls): Likewise.
+   * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract
+   built-in insn declarations.
+   (sat_sub3_dot): Likewise.
+   (sat_sub3_dot2): Likewise.
+   * doc/extend.texi (Future PowerPC built-ins): New section.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/subfus-1.c: New test.
+   * gcc.target/powerpc/subfus-2.c: Likewise.
+
+ Branch work177-dmf, patch #110 
+
+RFC2656-Support load/store vector with right length.
+
+This patch adds support for new instructions that may be added to the PowerPC
+architecture in the future to enhance the load and store vector with length
+instructions.
+
+The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use
+since the count for the number of bytes must be in the top 8 bits of the GPR
+register, instead of the bottom 8 bits.  This meant that code generating these
+instructions typically had to do a shift left by 56 bits to get the count into
+the right position.  In a future version of the PowerPC architecture, new
+variants of these instructions might be added that expect the count to be in
+the bottom 8 bits of the GPR register.  These patches add this support to GCC
+if the user uses the -mcpu=future option.
+
+I discovered that the code in rs6000-string.cc to generate ISA 3.1 lxvl/stxvl
+future lxvll/stxvll instructions would generate these instructions on 32-bit.
+However the patterns for these instructions is only done on 64-bit systems.  So
+I added a check for 64-bit support before generating the instructions.
+
+The patches h

[gcc r15-3455] c++: noexcept and pointer to member function type [PR113108]

2024-09-04 Thread Marek Polacek via Gcc-cvs

https://gcc.gnu.org/g:c755c7a32590e2eef5a8b062b9756c1513910246

commit r15-3455-gc755c7a32590e2eef5a8b062b9756c1513910246
Author: Marek Polacek 
Date:   Tue Sep 3 13:04:09 2024 -0400

c++: noexcept and pointer to member function type [PR113108]

We ICE in nothrow_spec_p because it got a DEFERRED_NOEXCEPT.
This DEFERRED_NOEXCEPT was created in implicitly_declare_fn
when declaring

  Foo& operator=(Foo&&) = default;

in the test.  The problem is that in resolve_overloaded_unification
we call maybe_instantiate_noexcept before try_one_overload only in
the TEMPLATE_ID_EXPR case.

PR c++/113108

gcc/cp/ChangeLog:

* pt.cc (resolve_overloaded_unification): Call
maybe_instantiate_noexcept.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/noexcept-type28.C: New test.

Diff:
---
 gcc/cp/pt.cc |  2 ++
 gcc/testsuite/g++.dg/cpp1z/noexcept-type28.C | 18 ++
 2 files changed, 20 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 024fa8a5529..747e627f547 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -23787,6 +23787,8 @@ resolve_overloaded_unification (tree tparms,
 for (lkp_iterator iter (arg); iter; ++iter)
   {
tree fn = *iter;
+   if (flag_noexcept_type)
+ maybe_instantiate_noexcept (fn, tf_none);
if (try_one_overload (tparms, targs, tempargs, parm, TREE_TYPE (fn),
  strict, sub_strict, addr_p, explain_p)
&& (!goodfn || !decls_match (goodfn, fn)))
diff --git a/gcc/testsuite/g++.dg/cpp1z/noexcept-type28.C 
b/gcc/testsuite/g++.dg/cpp1z/noexcept-type28.C
new file mode 100644
index 000..bf0b927b8ec
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/noexcept-type28.C
@@ -0,0 +1,18 @@
+// PR c++/113108
+// { dg-do compile { target c++17 } }
+
+template 
+struct Foo {
+Foo& operator=(Foo&&) = default;
+T data;
+};
+
+template 
+void consume(Foo& (Foo::*)(Foo&&) ) {}
+
+template 
+void consume(Foo& (Foo::*)(Foo&&) noexcept) {}
+
+int main() {
+consume(&Foo::operator=);
+}

[gcc r15-3456] c++: cleanup coerce_template_template_parm

2024-09-04 Thread Marek Polacek via Gcc-cvs

https://gcc.gnu.org/g:dedf453477aa966493541a484b2d4ee5a17e5b61

commit r15-3456-gdedf453477aa966493541a484b2d4ee5a17e5b61
Author: Marek Polacek 
Date:   Wed Sep 4 13:43:52 2024 -0400

c++: cleanup coerce_template_template_parm

This function could use some sprucing up.

gcc/cp/ChangeLog:

* pt.cc (coerce_template_template_parm): Return bool instead of int.

Diff:
---
 gcc/cp/pt.cc | 35 ---
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 747e627f547..1225c668e87 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -7887,25 +7887,22 @@ convert_nontype_argument (tree type, tree expr, 
tsubst_flags_t complain)
   return convert_from_reference (expr);
 }
 
-/* Subroutine of coerce_template_template_parms, which returns 1 if
-   PARM_PARM and ARG_PARM match using the rule for the template
-   parameters of template template parameters. Both PARM and ARG are
-   template parameters; the rest of the arguments are the same as for
-   coerce_template_template_parms.
- */
-static int
-coerce_template_template_parm (tree parm,
-  tree arg,
-  tsubst_flags_t complain,
-  tree in_decl,
-  tree outer_args)
+/* Subroutine of coerce_template_template_parms, which returns true if
+   PARM and ARG match using the rule for the template parameters of
+   template template parameters.  Both PARM and ARG are template parameters;
+   the rest of the arguments are the same as for
+   coerce_template_template_parms.  */
+
+static bool
+coerce_template_template_parm (tree parm, tree arg, tsubst_flags_t complain,
+  tree in_decl, tree outer_args)
 {
   if (arg == NULL_TREE || error_operand_p (arg)
   || parm == NULL_TREE || error_operand_p (parm))
-return 0;
+return false;
 
   if (TREE_CODE (arg) != TREE_CODE (parm))
-return 0;
+return false;
 
   switch (TREE_CODE (parm))
 {
@@ -7916,7 +7913,7 @@ coerce_template_template_parm (tree parm,
   {
if (!coerce_template_template_parms
(parm, arg, complain, in_decl, outer_args))
- return 0;
+ return false;
   }
   /* Fall through.  */
 
@@ -7924,7 +7921,7 @@ coerce_template_template_parm (tree parm,
   if (TEMPLATE_TYPE_PARAMETER_PACK (TREE_TYPE (arg))
  && !TEMPLATE_TYPE_PARAMETER_PACK (TREE_TYPE (parm)))
/* Argument is a parameter pack but parameter is not.  */
-   return 0;
+   return false;
   break;
 
 case PARM_DECL:
@@ -7940,13 +7937,13 @@ coerce_template_template_parm (tree parm,
  tree t = tsubst (TREE_TYPE (parm), outer_args, complain, in_decl);
  if (!uses_template_parms (t)
  && !same_type_p (t, TREE_TYPE (arg)))
-   return 0;
+   return false;
}
 
   if (TEMPLATE_PARM_PARAMETER_PACK (DECL_INITIAL (arg))
  && !TEMPLATE_PARM_PARAMETER_PACK (DECL_INITIAL (parm)))
/* Argument is a parameter pack but parameter is not.  */
-   return 0;
+   return false;
 
   break;
 
@@ -7954,7 +7951,7 @@ coerce_template_template_parm (tree parm,
   gcc_unreachable ();
 }
 
-  return 1;
+  return true;
 }
 
 /* Coerce template argument list ARGLIST for use with template

[gcc r15-3457] [RISC-V] Fix scan test output after recent path-splitting changes

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:0455e85e4eda7d80bda967914d634fe5b71b7ffc

commit r15-3457-g0455e85e4eda7d80bda967914d634fe5b71b7ffc
Author: Jeff Law 
Date:   Wed Sep 4 12:07:09 2024 -0600

[RISC-V] Fix scan test output after recent path-splitting changes

The recent path splitting changes from Andrew result in identifying more
saturation idioms instead of just identifying an overflow check.  As a 
result
many of the tests in the RISC-V port started failing a scan check on the
.expand output.

As expected, identifying a saturation idiom is more helpful than 
identifying an
overflow check and the resultant code is better based on my spot checks.

So the right thing to do is to expect more saturation intrinsics in the 
.expand
output.

I've verified this fixes the regressions for riscv32-elf and riscv64-elf.
Pushing to the trunk.

gcc/testsuite
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Adjust
expected output.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c:
Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c | 2 +-
 gcc/testsuite

[gcc r14-10631] aarch64: Refactor check_required_extensions

2024-09-04 Thread Andrew Carlotti via Gcc-cvs

https://gcc.gnu.org/g:c6e04d1f92fc3b4215e7c179da5d4a51356f69a8

commit r14-10631-gc6e04d1f92fc3b4215e7c179da5d4a51356f69a8
Author: Andrew Carlotti 
Date:   Tue Aug 13 16:15:11 2024 +0100

aarch64: Refactor check_required_extensions

Replace TARGET_GENERAL_REGS_ONLY check with an explicit check that
aarch64_isa_flags enables all required extensions.  This will be more
flexible when repurposing this function for non-SVE intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc
(check_required_registers): Remove target check and rename to...
(report_missing_registers): ...this.
(check_required_extensions): Refactor.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins.cc | 38 --
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index f3983a123e3..46bd9bc5a35 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1094,27 +1094,19 @@ report_missing_extension (location_t location, tree 
fndecl,
   reported_missing_extension_p = true;
 }
 
-/* Check whether the registers required by SVE function fndecl are available.
-   Report an error against LOCATION and return false if not.  */
-static bool
-check_required_registers (location_t location, tree fndecl)
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when non-general registers are disabled.  */
+static void
+report_missing_registers (location_t location, tree fndecl)
 {
   /* Avoid reporting a slew of messages for a single oversight.  */
   if (reported_missing_registers_p)
-return false;
-
-  if (TARGET_GENERAL_REGS_ONLY)
-{
-  /* SVE registers are not usable when -mgeneral-regs-only option
-is specified.  */
-  error_at (location,
-   "ACLE function %qD is incompatible with the use of %qs",
-   fndecl, "-mgeneral-regs-only");
-  reported_missing_registers_p = true;
-  return false;
-}
+return;
 
-  return true;
+  error_at (location,
+   "ACLE function %qD is incompatible with the use of %qs",
+   fndecl, "-mgeneral-regs-only");
+  reported_missing_registers_p = true;
 }
 
 /* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
@@ -1124,9 +1116,19 @@ static bool
 check_required_extensions (location_t location, tree fndecl,
   aarch64_feature_flags required_extensions)
 {
+  if ((required_extensions & ~aarch64_isa_flags) == 0)
+return true;
+
   auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+
   if (missing_extensions == 0)
-return check_required_registers (location, fndecl);
+{
+  /* All required extensions are enabled in aarch64_asm_isa_flags, so the
+error must be the use of general-regs-only.  */
+  report_missing_registers (location, fndecl);
+  return false;
+}
+
 
   if (missing_extensions & AARCH64_FL_SM_OFF)
 {

[gcc r14-10632] aarch64: Move check_required_extensions

2024-09-04 Thread Andrew Carlotti via Gcc-cvs

https://gcc.gnu.org/g:422c3f1965d76a541385bdc419a4e1c46f007525

commit r14-10632-g422c3f1965d76a541385bdc419a4e1c46f007525
Author: Andrew Carlotti 
Date:   Tue Jul 18 16:40:58 2023 +0100

aarch64: Move check_required_extensions

Move SVE extension checking functionality to aarch64-builtins.cc, so
that it can be shared by non-SVE intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc (check_builtin_call)
(expand_builtin): Update calls to the below.
(report_missing_extension, report_missing_registers)
(check_required_extensions): Move out of aarch64_sve namespace,
rename, and move into...
* config/aarch64/aarch64-builtins.cc 
(aarch64_report_missing_extension)
(aarch64_report_missing_registers)
(aarch64_check_required_extensions) ...here.
* config/aarch64/aarch64-protos.h 
(aarch64_check_required_extensions):
Add prototype.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 100 +++
 gcc/config/aarch64/aarch64-protos.h|   2 +
 gcc/config/aarch64/aarch64-sve-builtins.cc | 107 ++---
 3 files changed, 106 insertions(+), 103 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 75d21de1401..ad3986ce002 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2098,6 +2098,106 @@ aarch64_general_builtin_decl (unsigned code, bool)
   return aarch64_builtin_decls[code];
 }
 
+/* True if we've already complained about attempts to use functions
+   when the required extension is disabled.  */
+static bool reported_missing_extension_p;
+
+/* True if we've already complained about attempts to use functions
+   which require registers that are missing.  */
+static bool reported_missing_registers_p;
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when extension EXTENSION is disabled.  */
+static void
+aarch64_report_missing_extension (location_t location, tree fndecl,
+ const char *extension)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_extension_p)
+return;
+
+  error_at (location, "ACLE function %qD requires ISA extension %qs",
+   fndecl, extension);
+  inform (location, "you can enable %qs using the command-line"
+ " option %<-march%>, or by using the %"
+ " attribute or pragma", extension);
+  reported_missing_extension_p = true;
+}
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when non-general registers are disabled.  */
+static void
+aarch64_report_missing_registers (location_t location, tree fndecl)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_registers_p)
+return;
+
+  error_at (location,
+   "ACLE function %qD is incompatible with the use of %qs",
+   fndecl, "-mgeneral-regs-only");
+  reported_missing_registers_p = true;
+}
+
+/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
+   enabled, given that those extensions are required for function FNDECL.
+   Report an error against LOCATION if not.  */
+bool
+aarch64_check_required_extensions (location_t location, tree fndecl,
+  aarch64_feature_flags required_extensions)
+{
+  if ((required_extensions & ~aarch64_isa_flags) == 0)
+return true;
+
+  auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+
+  if (missing_extensions == 0)
+{
+  /* All required extensions are enabled in aarch64_asm_isa_flags, so the
+error must be the use of general-regs-only.  */
+  aarch64_report_missing_registers (location, fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_SM_OFF)
+{
+  error_at (location, "ACLE function %qD cannot be called when"
+   " SME streaming mode is enabled", fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_SM_ON)
+{
+  error_at (location, "ACLE function %qD can only be called when"
+   " SME streaming mode is enabled", fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_ZA_ON)
+{
+  error_at (location, "ACLE function %qD can only be called from"
+   " a function that has %qs state", fndecl, "za");
+  return false;
+}
+
+  static const struct {
+aarch64_feature_flags flag;
+const char *name;
+  } extensions[] = {
+#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
+{ AARCH64_FL_##IDENT, EXT_NAME },
+#include "aarch64-option-extensions.def"
+  };
+
+  for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i)
+if (missing_extensions & extensions[i].flag)
+  {
+   aarch64_report_missing_extension (loc

[gcc r14-10633] aarch64: Fix tme intrinsic availability

2024-09-04 Thread Andrew Carlotti via Gcc-cvs

https://gcc.gnu.org/g:0a3a0d4a5e32e9b914b5c5cc283ee485adc6cd11

commit r14-10633-g0a3a0d4a5e32e9b914b5c5cc283ee485adc6cd11
Author: Andrew Carlotti 
Date:   Thu Oct 26 15:43:44 2023 +0100

aarch64: Fix tme intrinsic availability

The availability of tme intrinsics was previously gated at both
initialisation time (using global target options) and usage time
(accounting for function-specific target options).  This patch removes
the check at initialisation time, and also moves the intrinsics out of
the header file to allow for better error messages (matching the
existing error messages for SVE intrinsics).

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (aarch64_init_tme_builtins):
Define intrinsic names directly.
(aarch64_general_init_builtins): Move tme initialisation...
(handle_arm_acle_h): ...to here, and remove feature check.
(aarch64_general_check_builtin_call): Check tme intrinsics.
* config/aarch64/arm_acle.h (__tstart, __tcommit, __tcancel)
(__ttest): Remove.
(_TMFAILURE_*): Define unconditionally.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/tme_guard-1.c: New test.
* gcc.target/aarch64/acle/tme_guard-2.c: New test.
* gcc.target/aarch64/acle/tme_guard-3.c: New test.
* gcc.target/aarch64/acle/tme_guard-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 57 +-
 gcc/config/aarch64/arm_acle.h  | 36 +-
 .../gcc.target/aarch64/acle/tme_guard-1.c  |  9 
 .../gcc.target/aarch64/acle/tme_guard-2.c  | 10 
 .../gcc.target/aarch64/acle/tme_guard-3.c  |  9 
 .../gcc.target/aarch64/acle/tme_guard-4.c  | 10 
 6 files changed, 72 insertions(+), 59 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index ad3986ce002..70437641219 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1709,21 +1709,17 @@ aarch64_init_tme_builtins (void)
 = build_function_type_list (void_type_node, uint64_type_node, NULL);
 
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
-= aarch64_general_add_builtin ("__builtin_aarch64_tstart",
-  ftype_uint64_void,
-  AARCH64_TME_BUILTIN_TSTART);
+= aarch64_general_simulate_builtin ("__tstart", ftype_uint64_void,
+   AARCH64_TME_BUILTIN_TSTART);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
-= aarch64_general_add_builtin ("__builtin_aarch64_ttest",
-  ftype_uint64_void,
-  AARCH64_TME_BUILTIN_TTEST);
+= aarch64_general_simulate_builtin ("__ttest", ftype_uint64_void,
+   AARCH64_TME_BUILTIN_TTEST);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
-= aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
-  ftype_void_void,
-  AARCH64_TME_BUILTIN_TCOMMIT);
+= aarch64_general_simulate_builtin ("__tcommit", ftype_void_void,
+   AARCH64_TME_BUILTIN_TCOMMIT);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
-= aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
-  ftype_void_uint64,
-  AARCH64_TME_BUILTIN_TCANCEL);
+= aarch64_general_simulate_builtin ("__tcancel", ftype_void_uint64,
+   AARCH64_TME_BUILTIN_TCANCEL);
 }
 
 /* Add builtins for Random Number instructions.  */
@@ -1986,6 +1982,7 @@ handle_arm_acle_h (void)
 {
   if (TARGET_LS64)
 aarch64_init_ls64_builtins ();
+  aarch64_init_tme_builtins ();
 }
 
 /* Initialize fpsr fpcr getters and setters.  */
@@ -2078,9 +2075,6 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
-  if (TARGET_TME)
-aarch64_init_tme_builtins ();
-
   if (TARGET_MEMTAG)
 aarch64_init_memtag_builtins ();
 
@@ -2203,6 +2197,7 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
unsigned int code, tree fndecl,
unsigned int nargs ATTRIBUTE_UNUSED, tree *args)
 {
+  tree decl = aarch64_builtin_decls[code];
   switch (code)
 {
 case AARCH64_RSR:
@@ -2215,15 +2210,29 @@ aarch64_general_check_builtin_call (location_t 
location, vec,
 case AARCH64_WSR64:
 case AARCH64_WSRF:
 case AARCH64_WSRF64:
-  tree addr = STRIP_NOPS (args[0]);
-  if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
- || TREE_CODE (addr) != ADDR_EXPR
- || TREE_CODE (TREE_OPERAND (addr, 0))

[gcc r14-10634] aarch64: Fix memtag intrinsic availability

2024-09-04 Thread Andrew Carlotti via Gcc-cvs

https://gcc.gnu.org/g:8485606056ed3ca0d0acf6e0d943975e9173e7cd

commit r14-10634-g8485606056ed3ca0d0acf6e0d943975e9173e7cd
Author: Andrew Carlotti 
Date:   Tue Jul 18 20:09:38 2023 +0100

aarch64: Fix memtag intrinsic availability

The availability of memtag intrinsics and data types were determined
solely by the globally specified architecture features, which did not
reflect any changes specified in target pragmas or attributes.

This patch removes the initialisation-time guards for the intrinsics,
and replaces them with checks at use time. It also removes the macro
indirection from the header file - this simplifies the header, and
allows the missing extension error reporting to find the user-facing
intrinsic names.

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (aarch64_init_memtag_builtins):
Define intrinsic names directly.
(aarch64_general_init_builtins): Move memtag intialisation...
(handle_arm_acle_h): ...to here, and remove feature check.
(aarch64_general_check_builtin_call): Check memtag intrinsics.
* config/aarch64/arm_acle.h (__arm_mte_create_random_tag)
(__arm_mte_exclude_tag, __arm_mte_ptrdiff)
(__arm_mte_increment_tag, __arm_mte_set_tag, __arm_mte_get_tag):
Remove.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/memtag_guard-1.c: New test.
* gcc.target/aarch64/acle/memtag_guard-2.c: New test.
* gcc.target/aarch64/acle/memtag_guard-3.c: New test.
* gcc.target/aarch64/acle/memtag_guard-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 23 --
 gcc/config/aarch64/arm_acle.h  | 23 --
 .../gcc.target/aarch64/acle/memtag_guard-1.c   |  9 +
 .../gcc.target/aarch64/acle/memtag_guard-2.c   | 10 ++
 .../gcc.target/aarch64/acle/memtag_guard-3.c   |  9 +
 .../gcc.target/aarch64/acle/memtag_guard-4.c   | 10 ++
 6 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 70437641219..ba04424645d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1850,27 +1850,27 @@ aarch64_init_memtag_builtins (void)
 
 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
   aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
-= aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
-  T, AARCH64_MEMTAG_BUILTIN_##F); \
+= aarch64_general_simulate_builtin ("__arm_mte_"#N, T, \
+   AARCH64_MEMTAG_BUILTIN_##F); \
   aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
  AARCH64_MEMTAG_BUILTIN_START - 1] = \
{T, CODE_FOR_##I};
 
   fntype = build_function_type_list (ptr_type_node, ptr_type_node,
 uint64_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, create_random_tag, irg, fntype);
 
   fntype = build_function_type_list (uint64_type_node, ptr_type_node,
 uint64_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, exclude_tag, gmi, fntype);
 
   fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
 ptr_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, ptrdiff, subp, fntype);
 
   fntype = build_function_type_list (ptr_type_node, ptr_type_node,
 unsigned_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, increment_tag, addg, fntype);
 
   fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
   AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
@@ -1983,6 +1983,7 @@ handle_arm_acle_h (void)
   if (TARGET_LS64)
 aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
+  aarch64_init_memtag_builtins ();
 }
 
 /* Initialize fpsr fpcr getters and setters.  */
@@ -2075,9 +2076,6 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
-  if (TARGET_MEMTAG)
-aarch64_init_memtag_builtins ();
-
   if (in_lto_p)
 handle_arm_acle_h ();
 }
@@ -2234,7 +2232,12 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
 default:
   break;
 }
-  /* Default behavior.  */
+
+  if (code >= AARCH64_MEMTAG_BUILTIN_START
+  && code <= AARCH64_MEMTAG_BUILTI

[gcc r14-10635] aarch64: Fix ls64 intrinsic availability

2024-09-04 Thread Andrew Carlotti via Gcc-cvs

https://gcc.gnu.org/g:9899be7d5380c732b56ba155d4a9e192a2c483b1

commit r14-10635-g9899be7d5380c732b56ba155d4a9e192a2c483b1
Author: Andrew Carlotti 
Date:   Thu Oct 26 15:45:15 2023 +0100

aarch64: Fix ls64 intrinsic availability

The availability of ls64 intrinsics and data types were determined
solely by the globally specified architecture features, which did not
reflect any changes specified in target pragmas or attributes.

This patch removes the initialisation-time guards for the intrinsics,
and replaces them with checks at use time. We also get better error
messages when ls64 is not available (matching the existing error
messages for SVE intrinsics).

The data512_t type is made always available; this is consistent with the
present behaviour for Neon fp16/bf16 types.

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (handle_arm_acle_h): Remove
feature check at initialisation.
(aarch64_general_check_builtin_call): Check ls64 intrinsics.
* config/aarch64/arm_acle.h: (data512_t) Make always available.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/ls64_guard-1.c: New test.
* gcc.target/aarch64/acle/ls64_guard-2.c: New test.
* gcc.target/aarch64/acle/ls64_guard-3.c: New test.
* gcc.target/aarch64/acle/ls64_guard-4.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc   | 10 --
 gcc/config/aarch64/arm_acle.h|  2 --
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c |  9 +
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c | 10 ++
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c |  9 +
 gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c | 10 ++
 6 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index ba04424645d..d5b5700d25d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1980,8 +1980,7 @@ aarch64_init_data_intrinsics (void)
 void
 handle_arm_acle_h (void)
 {
-  if (TARGET_LS64)
-aarch64_init_ls64_builtins ();
+  aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
   aarch64_init_memtag_builtins ();
 }
@@ -2229,6 +2228,13 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
   return aarch64_check_required_extensions (location, decl,
AARCH64_FL_TME);
 
+case AARCH64_LS64_BUILTIN_LD64B:
+case AARCH64_LS64_BUILTIN_ST64B:
+case AARCH64_LS64_BUILTIN_ST64BV:
+case AARCH64_LS64_BUILTIN_ST64BV0:
+  return aarch64_check_required_extensions (location, decl,
+   AARCH64_FL_LS64);
+
 default:
   break;
 }
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index ab043267913..ab4e7e60e04 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -265,9 +265,7 @@ __crc32d (uint32_t __a, uint64_t __b)
 #define _TMFAILURE_INT0x0080u
 #define _TMFAILURE_TRIVIAL0x0100u
 
-#ifdef __ARM_FEATURE_LS64
 typedef __arm_data512_t data512_t;
-#endif
 
 #pragma GCC push_options
 #pragma GCC target ("+nothing+rng")
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c
new file mode 100644
index 000..7dfc193a293
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a" } */
+
+#include 
+
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p); /* { dg-error {ACLE function '__arm_ld64b' requires 
ISA extension 'ls64'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c
new file mode 100644
index 000..3ede05a81f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a" } */
+
+#include 
+
+#pragma GCC target("arch=armv8-a+ls64")
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c
new file mode 100644
index 000..e0fccdad7be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+ls64 -mgeneral-regs-only" } */
+
+#include 
+
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c
new file mode 100644
index 000..af1d9

[gcc r15-3458] Check DECL_NAMELESS in modified_type_die

2024-09-04 Thread Tom Tromey via Gcc-cvs

https://gcc.gnu.org/g:5326306e7d9d36eccc2c2f02e1357818625f057b

commit r15-3458-g5326306e7d9d36eccc2c2f02e1357818625f057b
Author: Tom Tromey 
Date:   Thu Aug 29 13:23:18 2024 -0600

Check DECL_NAMELESS in modified_type_die

While working on a patch to the Ada compiler, I found a spot in
dwarf2out.cc that calls add_name_attribute without respecting
DECL_NAMELESS.

gcc

* dwarf2out.cc (modified_type_die): Check DECL_NAMELESS.

Diff:
---
 gcc/dwarf2out.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 89c2fd02649..a7ec359bd0c 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -14019,6 +14019,7 @@ modified_type_die (tree type, int cv_quals, bool 
reverse,
   || (cv_quals == TYPE_UNQUALIFIED)))
  || (TREE_CODE (name) == TYPE_DECL
  && DECL_NAME (name)
+ && !DECL_NAMELESS (name)
  && (TREE_TYPE (name) == qualified_type
  || (lang_hooks.types.get_debug_type
  && (lang_hooks.types.get_debug_type (TREE_TYPE (name))

[gcc r14-10636] libstdc++: Fix std::variant to reject array types [PR116381]

2024-09-04 Thread Jonathan Wakely via Gcc-cvs

https://gcc.gnu.org/g:3b8a67b0cfc072f027eab24fb72d48d10cc890b4

commit r14-10636-g3b8a67b0cfc072f027eab24fb72d48d10cc890b4
Author: Jonathan Wakely 
Date:   Tue Aug 20 16:52:22 2024 +0100

libstdc++: Fix std::variant to reject array types [PR116381]

For the backport, rejecting array types is only done in strict modes.

libstdc++-v3/ChangeLog:

PR libstdc++/116381
* include/std/variant (variant): Fix conditions for
static_assert to match the spec.
* testsuite/20_util/variant/types_neg.cc: New test.

(cherry picked from commit 1e10b3b8825ee398f077500af6ae1f5db180983a)

Diff:
---
 libstdc++-v3/include/std/variant| 11 +++
 libstdc++-v3/testsuite/20_util/variant/types_neg.cc | 18 ++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 4e56134a6f7..834bb548f13 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -1374,10 +1374,13 @@ namespace __variant
 
   static_assert(sizeof...(_Types) > 0,
"variant must have at least one alternative");
-  static_assert(!(std::is_reference_v<_Types> || ...),
-   "variant must have no reference alternative");
-  static_assert(!(std::is_void_v<_Types> || ...),
-   "variant must have no void alternative");
+#ifdef __STRICT_ANSI__
+  static_assert(((std::is_object_v<_Types> && !is_array_v<_Types>) && ...),
+   "variant alternatives must be non-array object types");
+#else
+  static_assert((std::is_object_v<_Types> && ...),
+   "variant alternatives must be object types");
+#endif
 
   using _Base = __detail::__variant::_Variant_base<_Types...>;
 
diff --git a/libstdc++-v3/testsuite/20_util/variant/types_neg.cc 
b/libstdc++-v3/testsuite/20_util/variant/types_neg.cc
new file mode 100644
index 000..7d970e961c2
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/variant/types_neg.cc
@@ -0,0 +1,18 @@
+// { dg-do compile { target c++17 } }
+// { dg-add-options strict_std }
+
+# include 
+
+std::variant<> v0; // { dg-error "here" }
+// { dg-error "must have at least one alternative" "" { target *-*-* } 0 }
+std::variant v1; // { dg-error "here" }
+std::variant v2; // { dg-error "here" }
+std::variant v3; // { dg-error "here" }
+std::variant v4; // { dg-error "here" }
+std::variant v5; // { dg-error "here" }
+std::variant v6; // { dg-error "here" }
+// { dg-error "must be non-array object types" "" { target *-*-* } 0 }
+
+// All of variant's base classes are instantiated before checking any
+// static_assert, so we get lots of errors before the expected errors above.
+// { dg-excess-errors "" }

[gcc r14-10638] libstdc++: Specialize std::disable_sized_sentinel_for for std::move_iterator [PR116549]

2024-09-04 Thread Jonathan Wakely via Gcc-cvs

https://gcc.gnu.org/g:ee37d750262579a81799c5b56fe1ab936a840120

commit r14-10638-gee37d750262579a81799c5b56fe1ab936a840120
Author: Jonathan Wakely 
Date:   Mon Sep 2 11:29:13 2024 +0100

libstdc++: Specialize std::disable_sized_sentinel_for for 
std::move_iterator [PR116549]

LWG 3736 added a partial specialization of this variable template for
two std::move_iterator types. This is needed for the case where the
types satisfy std::sentinel_for and are subtractable, but do not model
the semantics requirements of std::sized_sentinel_for.

libstdc++-v3/ChangeLog:

PR libstdc++/116549
* include/bits/stl_iterator.h (disable_sized_sentinel_for):
Define specialization for two move_iterator types, as per LWG
3736.
* testsuite/24_iterators/move_iterator/lwg3736.cc: New test.

(cherry picked from commit 819deae0a5bee079a7d5582fafaa098c26144ae8)

Diff:
---
 libstdc++-v3/include/bits/stl_iterator.h   |  8 
 .../24_iterators/move_iterator/lwg3736.cc  | 52 ++
 2 files changed, 60 insertions(+)

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index d3823057270..20c0319f3a7 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -1822,6 +1822,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { return _ReturnType(__i); }
 
 #if __cplusplus > 201703L && __glibcxx_concepts
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 3736.  move_iterator missing disable_sized_sentinel_for specialization
+  template
+requires (!sized_sentinel_for<_Iterator1, _Iterator2>)
+inline constexpr bool
+disable_sized_sentinel_for,
+  move_iterator<_Iterator2>> = true;
+
   // [iterators.common] Common iterators
 
   namespace __detail
diff --git a/libstdc++-v3/testsuite/24_iterators/move_iterator/lwg3736.cc 
b/libstdc++-v3/testsuite/24_iterators/move_iterator/lwg3736.cc
new file mode 100644
index 000..eaf791b3089
--- /dev/null
+++ b/libstdc++-v3/testsuite/24_iterators/move_iterator/lwg3736.cc
@@ -0,0 +1,52 @@
+// { dg-do compile { target c++20 } }
+
+// 3736.  move_iterator missing disable_sized_sentinel_for specialization
+
+#include 
+
+template using MoveIter = std::move_iterator;
+
+using std::sized_sentinel_for;
+using std::disable_sized_sentinel_for;
+
+// These assertions always passed, even without LWG 3736:
+static_assert(sized_sentinel_for, MoveIter>);
+static_assert(sized_sentinel_for, MoveIter>);
+static_assert(not sized_sentinel_for, MoveIter>);
+static_assert(not sized_sentinel_for, std::default_sentinel_t>);
+static_assert(not disable_sized_sentinel_for, MoveIter>);
+
+// These types don't satisfy sized_sentinel_for anyway (because the subtraction
+// is ill-formed) but LWG 3736 makes the variable template explicitly false:
+static_assert(disable_sized_sentinel_for, MoveIter>);
+
+struct Iter
+{
+  using iterator_category = std::random_access_iterator_tag;
+  using value_type = int;
+  using pointer = int*;
+  using reference = int&;
+  using difference_type = long;
+
+  Iter() = default;
+  Iter& operator++();
+  Iter operator++(int);
+  Iter& operator--();
+  Iter operator--(int);
+  reference operator*() const;
+  pointer operator->() const;
+  Iter& operator+=(difference_type);
+  Iter& operator-=(difference_type);
+  friend Iter operator+(Iter, difference_type);
+  friend Iter operator+(difference_type, Iter);
+  friend Iter operator-(Iter, difference_type);
+  friend difference_type operator-(Iter, Iter);
+  bool operator==(Iter) const;
+};
+
+// Specialize the variable template so that Iter is not its own sized sentinel:
+template<> constexpr bool std::disable_sized_sentinel_for = true;
+static_assert( not sized_sentinel_for );
+
+// LWG 3736 means that affects std::move_iterator as well:
+static_assert( not sized_sentinel_for, MoveIter> );

[gcc r14-10637] libstdc++: Add missing feature-test macro in various headers

2024-09-04 Thread Jonathan Wakely via Gcc-cvs

https://gcc.gnu.org/g:469602619d32c24382c6f1c3e3d95c3db606c770

commit r14-10637-g469602619d32c24382c6f1c3e3d95c3db606c770
Author: Dhruv Chawla 
Date:   Mon Aug 26 11:09:19 2024 +0530

libstdc++: Add missing feature-test macro in various headers

version.syn#2 requires various headers to define
__cpp_lib_allocator_traits_is_always_equal. Currently, only  was
defining this macro. Implement fixes for the other headers as well.

Signed-off-by: Dhruv Chawla 

libstdc++-v3/ChangeLog:

* include/std/deque: Define macro
__glibcxx_want_allocator_traits_is_always_equal.
* include/std/forward_list: Likewise.
* include/std/list: Likewise.
* include/std/map: Likewise.
* include/std/scoped_allocator: Likewise.
* include/std/set: Likewise.
* include/std/string: Likewise.
* include/std/unordered_map: Likewise.
* include/std/unordered_set: Likewise.
* include/std/vector: Likewise.
* testsuite/20_util/headers/memory/version.cc: New test.
* testsuite/20_util/scoped_allocator/version.cc: Likewise.
* testsuite/21_strings/headers/string/version.cc: Likewise.
* testsuite/23_containers/deque/version.cc: Likewise.
* testsuite/23_containers/forward_list/version.cc: Likewise.
* testsuite/23_containers/list/version.cc: Likewise.
* testsuite/23_containers/map/version.cc: Likewise.
* testsuite/23_containers/set/version.cc: Likewise.
* testsuite/23_containers/unordered_map/version.cc: Likewise.
* testsuite/23_containers/unordered_set/version.cc: Likewise.
* testsuite/23_containers/vector/version.cc: Likewise.

(cherry picked from commit efe6efb6f315c7f97be8a850e0a84ff7f6651d85)

Diff:
---
 libstdc++-v3/include/std/deque| 1 +
 libstdc++-v3/include/std/forward_list | 1 +
 libstdc++-v3/include/std/list | 1 +
 libstdc++-v3/include/std/map  | 1 +
 libstdc++-v3/include/std/scoped_allocator | 3 +++
 libstdc++-v3/include/std/set  | 1 +
 libstdc++-v3/include/std/string   | 1 +
 libstdc++-v3/include/std/unordered_map| 1 +
 libstdc++-v3/include/std/unordered_set| 1 +
 libstdc++-v3/include/std/vector   | 1 +
 libstdc++-v3/testsuite/20_util/headers/memory/version.cc  | 8 
 libstdc++-v3/testsuite/20_util/scoped_allocator/version.cc| 8 
 libstdc++-v3/testsuite/21_strings/headers/string/version.cc   | 8 
 libstdc++-v3/testsuite/23_containers/deque/version.cc | 8 
 libstdc++-v3/testsuite/23_containers/forward_list/version.cc  | 8 
 libstdc++-v3/testsuite/23_containers/list/version.cc  | 8 
 libstdc++-v3/testsuite/23_containers/map/version.cc   | 8 
 libstdc++-v3/testsuite/23_containers/set/version.cc   | 8 
 libstdc++-v3/testsuite/23_containers/unordered_map/version.cc | 8 
 libstdc++-v3/testsuite/23_containers/unordered_set/version.cc | 8 
 libstdc++-v3/testsuite/23_containers/vector/version.cc| 8 
 21 files changed, 100 insertions(+)

diff --git a/libstdc++-v3/include/std/deque b/libstdc++-v3/include/std/deque
index 0bf8309c19a..69f8c0dcdcc 100644
--- a/libstdc++-v3/include/std/deque
+++ b/libstdc++-v3/include/std/deque
@@ -68,6 +68,7 @@
 #include 
 #include 
 
+#define __glibcxx_want_allocator_traits_is_always_equal
 #define __glibcxx_want_erase_if
 #define __glibcxx_want_nonmember_container_access
 #include 
diff --git a/libstdc++-v3/include/std/forward_list 
b/libstdc++-v3/include/std/forward_list
index 5ac74360808..dfd7d48d121 100644
--- a/libstdc++-v3/include/std/forward_list
+++ b/libstdc++-v3/include/std/forward_list
@@ -45,6 +45,7 @@
 # include 
 #endif
 
+#define __glibcxx_want_allocator_traits_is_always_equal
 #define __glibcxx_want_erase_if
 #define __glibcxx_want_incomplete_container_elements
 #define __glibcxx_want_list_remove_return_type
diff --git a/libstdc++-v3/include/std/list b/libstdc++-v3/include/std/list
index fce4e3d925b..ff632fc1ab2 100644
--- a/libstdc++-v3/include/std/list
+++ b/libstdc++-v3/include/std/list
@@ -69,6 +69,7 @@
 # include 
 #endif
 
+#define __glibcxx_want_allocator_traits_is_always_equal
 #define __glibcxx_want_erase_if
 #define __glibcxx_want_incomplete_container_elements
 #define __glibcxx_want_list_remove_return_type
diff --git a/libstdc++-v3/include/std/map b/libstdc++-v3/include/std/map
index 4a96e59a5bc..6520d9f744f 100644
--- a/libstdc++-v3/include/std/map
+++ b/libstdc++-v3/include/std/map
@@ -69,6 +69,7 @@
 # include 
 #endif
 
+#define __glibcxx_want_allocator_traits_is_always_equal
 #defin

[gcc r14-10639] libstdc++: Fix fs::hard_link_count behaviour on MinGW [PR113663]

2024-09-04 Thread Jonathan Wakely via Libstdc++-cvs

https://gcc.gnu.org/g:ec1bcd176339d43fd1ab3211f65915824ba1f26b

commit r14-10639-gec1bcd176339d43fd1ab3211f65915824ba1f26b
Author: Lennox Shou Hao Ho 
Date:   Mon Jul 29 21:09:27 2024 +0100

libstdc++: Fix fs::hard_link_count behaviour on MinGW [PR113663]

std::filesystem::hard_link_count() always returns 1 on
mingw-w64ucrt-11.0.1-r3 on Windows 10 19045

hard_link_count() queries _wstat64() on MinGW-w64
The MSFT documentation claims _wstat64() will always return 1 *non*-NTFS 
volumes

https://learn.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2013/14h5k7ff(v=vs.120)

My tests suggest that is not always true -
hard_link_count()/_wstat64() still returns 1 on NTFS.
GetFileInformationByHandle does return the correct result of 2.
Please see the PR for a minimal repro.

This patch changes the Windows implementation to always call
GetFileInformationByHandle.

PR libstdc++/113663

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (fs::equivalent): Moved helper class
auto_handle to anonymous namespace as auto_win_file_handle.
(fs::hard_link_count): Changed Windows implementation to use
information provided by GetFileInformationByHandle which is more
reliable.
* testsuite/27_io/filesystem/operations/hard_link_count.cc: New
test.

Signed-off-by: "Lennox" Shou Hao Ho 
Reviewed-by: Jonathan Wakely 
(cherry picked from commit 658193658f05e9a8ebf0bce8bab1f43bfee1)

Diff:
---
 libstdc++-v3/src/c++17/fs_ops.cc   | 59 ++
 .../27_io/filesystem/operations/hard_link_count.cc | 37 ++
 2 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index 07bc2a0fa88..81227c49dfd 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -822,6 +822,34 @@ fs::equivalent(const path& p1, const path& p2)
   return result;
 }
 
+#if _GLIBCXX_FILESYSTEM_IS_WINDOWS
+namespace
+{
+  // An RAII type that opens a handle for an existing file.
+  struct auto_win_file_handle
+  {
+explicit
+auto_win_file_handle(const fs::path& p_)
+: handle(CreateFileW(p_.c_str(), 0,
+FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
+{ }
+
+~auto_win_file_handle()
+{ if (*this) CloseHandle(handle); }
+
+explicit operator bool() const
+{ return handle != INVALID_HANDLE_VALUE; }
+
+bool get_info()
+{ return GetFileInformationByHandle(handle, &info); }
+
+HANDLE handle;
+BY_HANDLE_FILE_INFORMATION info;
+  };
+}
+#endif
+
 bool
 fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept
 {
@@ -858,27 +886,8 @@ fs::equivalent(const path& p1, const path& p2, error_code& 
ec) noexcept
   if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
return false;
 
-  struct auto_handle {
-   explicit auto_handle(const path& p_)
-   : handle(CreateFileW(p_.c_str(), 0,
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
-   { }
-
-   ~auto_handle()
-   { if (*this) CloseHandle(handle); }
-
-   explicit operator bool() const
-   { return handle != INVALID_HANDLE_VALUE; }
-
-   bool get_info()
-   { return GetFileInformationByHandle(handle, &info); }
-
-   HANDLE handle;
-   BY_HANDLE_FILE_INFORMATION info;
-  };
-  auto_handle h1(p1);
-  auto_handle h2(p2);
+  auto_win_file_handle h1(p1);
+  auto_win_file_handle h2(p2);
   if (!h1 || !h2)
{
  if (!h1 && !h2)
@@ -982,7 +991,13 @@ fs::hard_link_count(const path& p)
 std::uintmax_t
 fs::hard_link_count(const path& p, error_code& ec) noexcept
 {
-#ifdef _GLIBCXX_HAVE_SYS_STAT_H
+#if _GLIBCXX_FILESYSTEM_IS_WINDOWS
+  auto_win_file_handle h(p);
+  if (h && h.get_info())
+return static_cast(h.info.nNumberOfLinks);
+  ec = __last_system_error();
+  return static_cast(-1);
+#elif defined _GLIBCXX_HAVE_SYS_STAT_H
   return do_stat(p, ec, std::mem_fn(&stat_type::st_nlink),
 static_cast(-1));
 #else
diff --git 
a/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc
new file mode 100644
index 000..8b2fb4f190e
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc
@@ -0,0 +1,37 @@
+// { dg-do run { target c++17 } }
+// { dg-require-filesystem-ts "" }
+
+#include 
+#include 
+#include 
+
+namespace fs = std::filesystem;
+
+void test01()
+{
+  // PR libstdc++/113663
+
+  fs::path p1 = __gnu_test::nonexistent_path();
+  VERIFY( !fs::exists(p1) );
+
+  __gnu_test::scoped_file f1(p1);
+  VERIF

[gcc r14-10641] libstdc++: Fix error handling in fs::hard_link_count for Windows

2024-09-04 Thread Jonathan Wakely via Libstdc++-cvs

https://gcc.gnu.org/g:a054ba563bde3dc834c5efac72b91c2803e4e289

commit r14-10641-ga054ba563bde3dc834c5efac72b91c2803e4e289
Author: Jonathan Wakely 
Date:   Mon Sep 2 12:16:49 2024 +0100

libstdc++: Fix error handling in fs::hard_link_count for Windows

The recent change to use auto_win_file_handle for
std::filesystem::hard_link_count caused a regression. The
std::error_code argument should be cleared if no error occurs, but this
no longer happens. Add a call to ec.clear() in fs::hard_link_count to
fix this.

Also change the auto_win_file_handle class to take a reference to the
std::error_code and set it if an error occurs, to slightly simplify the
control flow in the fs::equiv_files function.

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (auto_win_file_handle): Add error_code&
member and set it if CreateFileW or GetFileInformationByHandle
fails.
(fs::equiv_files) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Simplify
control flow.
(fs::hard_link_count) [_GLIBCXX_FILESYSTEM_IS_WINDOWS]: Clear ec
on success.
* testsuite/27_io/filesystem/operations/hard_link_count.cc:
Check error handling.

(cherry picked from commit 71b1639c67b91554420cc38eb4c82323e535c816)

Diff:
---
 libstdc++-v3/src/c++17/fs_ops.cc   | 59 --
 .../27_io/filesystem/operations/hard_link_count.cc | 24 +
 2 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index 9606afa9f1f..946fefd9e44 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -829,23 +829,37 @@ namespace
   struct auto_win_file_handle
   {
 explicit
-auto_win_file_handle(const wchar_t* p)
+auto_win_file_handle(const wchar_t* p, std::error_code& ec) noexcept
 : handle(CreateFileW(p, 0,
 FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
-0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
-{ }
+0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0)),
+  ec(ec)
+{
+  if (handle == INVALID_HANDLE_VALUE)
+   ec = std::__last_system_error();
+}
 
 ~auto_win_file_handle()
 { if (*this) CloseHandle(handle); }
 
-explicit operator bool() const
+explicit operator bool() const noexcept
 { return handle != INVALID_HANDLE_VALUE; }
 
-bool get_info()
-{ return GetFileInformationByHandle(handle, &info); }
+bool get_info() noexcept
+{
+  if (GetFileInformationByHandle(handle, &info))
+   return true;
+  ec = std::__last_system_error();
+  return false;
+}
 
 HANDLE handle;
 BY_HANDLE_FILE_INFORMATION info;
+// Like errno, we only set this on error and never clear it.
+// This propagates an error_code to the caller when something goes wrong,
+// but the caller should not assume a non-zero ec means an error happened
+// unless they explicitly cleared it before passing it to our constructor.
+std::error_code& ec;
   };
 }
 #endif
@@ -866,23 +880,14 @@ fs::equiv_files([[maybe_unused]] const char_type* p1, 
const stat_type& st1,
   if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
 return false;
 
-  // Need to use GetFileInformationByHandle to get more info about the files.
-  auto_win_file_handle h1(p1);
-  auto_win_file_handle h2(p2);
-  if (!h1 || !h2)
-{
-  if (!h1 && !h2)
-   ec = __last_system_error();
-  return false;
-}
-  if (!h1.get_info() || !h2.get_info())
-{
-  ec = __last_system_error();
-  return false;
-}
-  return h1.info.dwVolumeSerialNumber == h2.info.dwVolumeSerialNumber
-  && h1.info.nFileIndexHigh == h2.info.nFileIndexHigh
-  && h1.info.nFileIndexLow == h2.info.nFileIndexLow;
+  // Use GetFileInformationByHandle to get more info about the files.
+  if (auto_win_file_handle h1{p1, ec})
+if (auto_win_file_handle h2{p2, ec})
+  if (h1.get_info() && h2.get_info())
+   return h1.info.dwVolumeSerialNumber == h2.info.dwVolumeSerialNumber
+&& h1.info.nFileIndexHigh == h2.info.nFileIndexHigh
+&& h1.info.nFileIndexLow == h2.info.nFileIndexLow;
+  return false;
 #endif // _GLIBCXX_FILESYSTEM_IS_WINDOWS
 }
 #endif // NEED_DO_COPY_FILE
@@ -1007,10 +1012,12 @@ std::uintmax_t
 fs::hard_link_count(const path& p, error_code& ec) noexcept
 {
 #if _GLIBCXX_FILESYSTEM_IS_WINDOWS
-  auto_win_file_handle h(p.c_str());
+  auto_win_file_handle h(p.c_str(), ec);
   if (h && h.get_info())
-return static_cast(h.info.nNumberOfLinks);
-  ec = __last_system_error();
+{
+  ec.clear();
+  return static_cast(h.info.nNumberOfLinks);
+}
   return static_cast(-1);
 #elif defined _GLIBCXX_HAVE_SYS_STAT_H
   return do_stat(p, ec, std::mem_fn(&stat_type::st_nlink),
diff --git 
a/libs

[gcc r14-10640] libstdc++: Fix overwriting files with fs::copy_file on Windows

2024-09-04 Thread Jonathan Wakely via Gcc-cvs

https://gcc.gnu.org/g:35c98149a5c7af9159fa7615d8d827b3aaa3cc97

commit r14-10640-g35c98149a5c7af9159fa7615d8d827b3aaa3cc97
Author: Jonathan Wakely 
Date:   Tue Jul 30 10:55:55 2024 +0100

libstdc++: Fix overwriting files with fs::copy_file on Windows

There are no inode numbers on Windows filesystems, so stat_type::st_ino
is always zero and the check for equivalent files in do_copy_file was
incorrectly identifying distinct files as equivalent. This caused
copy_file to incorrectly report errors when trying to overwrite existing
files.

The fs::equivalent function already does the right thing on Windows, so
factor that logic out into a new function that can be reused by
fs::copy_file.

The tests for fs::copy_file were quite inadequate, so this also adds
checks for that function's error conditions.

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (auto_win_file_handle): Change constructor
parameter from const path& to const wchar_t*.
(fs::equiv_files): New function.
(fs::equivalent): Use equiv_files.
* src/filesystem/ops-common.h (fs::equiv_files): Declare.
(do_copy_file): Use equiv_files.
* src/filesystem/ops.cc (fs::equiv_files): Define.
(fs::copy, fs::equivalent): Use equiv_files.
* testsuite/27_io/filesystem/operations/copy.cc: Test
overwriting directory contents recursively.
* testsuite/27_io/filesystem/operations/copy_file.cc: Test
overwriting existing files.

(cherry picked from commit 017e3f89b081e4828a588a3bd27b5feacea042b7)

Diff:
---
 libstdc++-v3/src/c++17/fs_ops.cc   |  71 +++-
 libstdc++-v3/src/filesystem/ops-common.h   |  12 +-
 libstdc++-v3/src/filesystem/ops.cc |  18 ++-
 .../testsuite/27_io/filesystem/operations/copy.cc  |   9 ++
 .../27_io/filesystem/operations/copy_file.cc   | 122 +
 5 files changed, 199 insertions(+), 33 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index 81227c49dfd..9606afa9f1f 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -350,7 +350,7 @@ fs::copy(const path& from, const path& to, copy_options 
options,
   f = make_file_status(from_st);
 
   if (exists(t) && !is_other(t) && !is_other(f)
-  && to_st.st_dev == from_st.st_dev && to_st.st_ino == from_st.st_ino)
+  && fs::equiv_files(from.c_str(), from_st, to.c_str(), to_st, ec))
 {
   ec = std::make_error_code(std::errc::file_exists);
   return;
@@ -829,8 +829,8 @@ namespace
   struct auto_win_file_handle
   {
 explicit
-auto_win_file_handle(const fs::path& p_)
-: handle(CreateFileW(p_.c_str(), 0,
+auto_win_file_handle(const wchar_t* p)
+: handle(CreateFileW(p, 0,
 FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0))
 { }
@@ -850,6 +850,44 @@ namespace
 }
 #endif
 
+#ifdef _GLIBCXX_HAVE_SYS_STAT_H
+#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-fs_ops.o too
+bool
+fs::equiv_files([[maybe_unused]] const char_type* p1, const stat_type& st1,
+   [[maybe_unused]] const char_type* p2, const stat_type& st2,
+   [[maybe_unused]] error_code& ec)
+{
+#if ! _GLIBCXX_FILESYSTEM_IS_WINDOWS
+  // For POSIX the device ID and inode number uniquely identify a file.
+  return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino;
+#else
+  // For Windows st_ino is not set, so can't be used to distinguish files.
+  // We can compare modes and device IDs as a cheap initial check:
+  if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
+return false;
+
+  // Need to use GetFileInformationByHandle to get more info about the files.
+  auto_win_file_handle h1(p1);
+  auto_win_file_handle h2(p2);
+  if (!h1 || !h2)
+{
+  if (!h1 && !h2)
+   ec = __last_system_error();
+  return false;
+}
+  if (!h1.get_info() || !h2.get_info())
+{
+  ec = __last_system_error();
+  return false;
+}
+  return h1.info.dwVolumeSerialNumber == h2.info.dwVolumeSerialNumber
+  && h1.info.nFileIndexHigh == h2.info.nFileIndexHigh
+  && h1.info.nFileIndexLow == h2.info.nFileIndexLow;
+#endif // _GLIBCXX_FILESYSTEM_IS_WINDOWS
+}
+#endif // NEED_DO_COPY_FILE
+#endif // _GLIBCXX_HAVE_SYS_STAT_H
+
 bool
 fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept
 {
@@ -881,30 +919,7 @@ fs::equivalent(const path& p1, const path& p2, error_code& 
ec) noexcept
   ec.clear();
   if (is_other(s1) || is_other(s2))
return false;
-#if _GLIBCXX_FILESYSTEM_IS_WINDOWS
-  // st_ino is not set, so can't be used to distinguish files
-  if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev)
-   return false;
-
-  auto_win_fil

[gcc r15-3459] [PATCH 1/3] RISC-V: Improve codegen for negative repeating large constants

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:cbea72b265e4c9d1a595bd3ecd11b325021925d0

commit r15-3459-gcbea72b265e4c9d1a595bd3ecd11b325021925d0
Author: Raphael Moreira Zinsly 
Date:   Wed Sep 4 17:21:24 2024 -0600

[PATCH 1/3] RISC-V: Improve codegen for negative repeating large constants

Improve handling of constants where its upper and lower 32-bit
halves are the same and have negative values.

e.g. for:

unsigned long f (void) { return 0xf0f0f0f0f0f0f0f0UL; }

Without the patch:

li  a0,-252645376
addia0,a0,240
li  a5,-252645376
addia5,a5,241
sllia5,a5,32
add a0,a5,a0

With the patch:

li  a5,252645376
addia5,a5,-241
sllia0,a5,32
add a0,a0,a5
xoria0,a0,-1

gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_split_integer_cost): Adjust the
cost of negative repeating constants.
(riscv_split_integer): Handle negative repeating constants.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/synthesis-11.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc | 29 +++
 gcc/testsuite/gcc.target/riscv/synthesis-11.c | 28 ++
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f82e64a6fec..a38cb72f09f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1242,18 +1242,20 @@ static int
 riscv_split_integer_cost (HOST_WIDE_INT val)
 {
   int cost;
-  unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
-  unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
+  unsigned HOST_WIDE_INT loval = val & 0x;
+  unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
   struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
 
   /* This routine isn't used by pattern conditions, so whether or
  not to allow new pseudos can be a function of where we are in the
- RTL pipeline.  We shouldn't need scratch pseudos for this case
- anyway.  */
+ RTL pipeline.  */
   bool allow_new_pseudos = can_create_pseudo_p ();
   cost = 2 + riscv_build_integer (codes, loval, VOIDmode, allow_new_pseudos);
   if (loval != hival)
 cost += riscv_build_integer (codes, hival, VOIDmode, allow_new_pseudos);
+  else if ((loval & 0x8000) != 0)
+cost = 3 + riscv_build_integer (codes, ~loval & 0x,
+   VOIDmode, allow_new_pseudos);
 
   return cost;
 }
@@ -1276,11 +1278,16 @@ riscv_integer_cost (HOST_WIDE_INT val, bool 
allow_new_pseudos)
 static rtx
 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
 {
-  unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
-  unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
+  unsigned HOST_WIDE_INT loval = val & 0x;
+  unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
   rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
+  rtx x = gen_reg_rtx (mode);
+  bool eq_neg = (loval == hival) && ((loval & 0x8000) != 0);
 
-  riscv_move_integer (lo, lo, loval, mode);
+  if (eq_neg)
+riscv_move_integer (lo, lo, ~loval & 0x, mode);
+  else
+riscv_move_integer (lo, lo, loval, mode);
 
   if (loval == hival)
   hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
@@ -1291,7 +1298,13 @@ riscv_split_integer (HOST_WIDE_INT val, machine_mode 
mode)
 }
 
   hi = force_reg (mode, hi);
-  return gen_rtx_PLUS (mode, hi, lo);
+  x = gen_rtx_PLUS (mode, hi, lo);
+  if (eq_neg)
+{
+  x = force_reg (mode, x);
+  x = gen_rtx_XOR (mode, x, GEN_INT (-1));
+}
+  return x;
 }
 
 /* Return true if X is a thread-local symbol.  */
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-11.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-11.c
new file mode 100644
index 000..89e48edb2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 60 } } */
+
+
+
+unsigned long foo_0xf857f2def857f2de(void) { return 0xf857f2def857f2deUL; }
+unsigned long foo_0x99660e6399660e63(void) { return 0x99660e6399660e63UL; }
+unsigned long foo_0x937f1b75937f1b

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Fix mixed input kind permute optimization

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:ad83da212ba5a5e27484fe7a147a28294fab8829

commit ad83da212ba5a5e27484fe7a147a28294fab8829
Author: Richard Biener 
Date:   Tue May 21 19:15:33 2024 +0200

Fix mixed input kind permute optimization

When change_vec_perm_layout runs into a permute combining two
nodes where one is invariant and one internal the partition of
one input can be -1 but the other might not be.  The following
supports this case by simply ignoring inputs with input partiton -1.

I'm not sure this is correct but it avoids ICEing when accessing
that partitions layout for gcc.target/i386/pr98928.c with the
change to avoid splitting store dataref groups during SLP discovery.

* tree-vect-slp.cc (change_vec_perm_layout): Ignore an
input partition of -1.

(cherry picked from commit 3507ab1b018a68500e49fa9f1de7caa0f1b53dda)

Diff:
---
 gcc/tree-vect-slp.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index f23a04560ff..2dbef5e7dc7 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4727,6 +4727,8 @@ change_vec_perm_layout (slp_tree node, lane_permutation_t 
&perm,
{
  slp_tree in_node = SLP_TREE_CHILDREN (node)[entry.first];
  unsigned int in_partition_i = m_vertices[in_node->vertex].partition;
+ if (in_partition_i == -1u)
+   continue;
  this_in_layout_i = m_partitions[in_partition_i].layout;
}
   if (this_in_layout_i > 0)

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:019b3585afb077022b1463dd7e786bec472ada31

commit 019b3585afb077022b1463dd7e786bec472ada31
Author: Pan Li 
Date:   Tue Aug 27 14:37:01 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3

This patch would like to add test cases for the unsigned scalar
.SAT_SUB IMM form 3.  Aka:

Form 3:
  #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
  {   \
return (T)IMM > y ? (T)IMM - y : 0;   \
  }

DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 23)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-10.c: New test.
* gcc.target/riscv/sat_u_sub_imm-10_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-10_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-12.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-10.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-11.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-12.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-9.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit cb0b8b62223b485a058a56fc5c6345974ebaa230)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h |  9 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c  | 21 
 .../gcc.target/riscv/sat_u_sub_imm-10_1.c  | 22 +
 .../gcc.target/riscv/sat_u_sub_imm-10_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c  | 20 
 .../gcc.target/riscv/sat_u_sub_imm-11_1.c  | 22 +
 .../gcc.target/riscv/sat_u_sub_imm-11_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-12.c  | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9.c   | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_1.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_2.c | 20 
 .../gcc.target/riscv/sat_u_sub_imm-run-10.c| 56 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-11.c| 55 +
 .../gcc.target/riscv/sat_u_sub_imm-run-12.c| 48 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-9.c | 56 ++
 15 files changed, 432 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index b05d973c4c6..406ac1d65db 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -215,6 +215,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
   return x >= (T)IMM ? x - (T)IMM : 0;  \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
+{   \
+  return (T)IMM > y ? (T)IMM - y : 0;   \
+}
+
 #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y)
@@ -232,6 +239,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
   if (sat_u_sub_imm##IMM##_##T##_fmt_1(y) != expect) __builtin_abort ()
 #define RUN_SAT_U_SUB_IMM_FMT_2(T, x, IMM, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
+#define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \
+  if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort ()
 
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
new file mode 100644
index 000..db450d7cfbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm6_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*6
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+*

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:51aee40c9d5d97b45d7d96781e1f43c9100702f6

commit 51aee40c9d5d97b45d7d96781e1f43c9100702f6
Author: Pan Li 
Date:   Tue Aug 27 15:14:40 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4

This patch would like to add test cases for the unsigned scalar
.SAT_SUB IMM form 4.  Aka:

Form 4:
  #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
  {   \
return x > (T)IMM ? x - (T)IMM : 0;   \
  }

DEF_SAT_U_SUB_IMM_FMT_4(uint64_t, 23)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-13.c: New test.
* gcc.target/riscv/sat_u_sub_imm-13_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-13_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-16.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-13.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-14.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-15.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-16.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 3989e31d867b3505f847ecb6d870eacacfdf47bf)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h |  9 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c  | 19 
 .../gcc.target/riscv/sat_u_sub_imm-13_1.c  | 19 
 .../gcc.target/riscv/sat_u_sub_imm-13_2.c  | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14.c  | 20 
 .../gcc.target/riscv/sat_u_sub_imm-14_1.c  | 21 +
 .../gcc.target/riscv/sat_u_sub_imm-14_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c  | 19 
 .../gcc.target/riscv/sat_u_sub_imm-15_1.c  | 21 +
 .../gcc.target/riscv/sat_u_sub_imm-15_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-16.c  | 18 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-13.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-14.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-15.c| 54 +
 .../gcc.target/riscv/sat_u_sub_imm-run-16.c| 48 +++
 15 files changed, 421 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 406ac1d65db..4e91821a1c8 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -222,6 +222,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
   return (T)IMM > y ? (T)IMM - y : 0;   \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
+{   \
+  return x > (T)IMM ? x - (T)IMM : 0;   \
+}
+
 #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y)
@@ -241,6 +248,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
   if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
 #define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort ()
+#define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \
+  if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
new file mode 100644
index 000..7dcbc3b1a12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm11_uint8_t_fmt_4:
+** addi\s+[atx][0-9]+,\s*a0,\s*-11
+** sltiu\s+a0,\s*[atx][0-9]+,\s*11
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_4(uint8_t, 11)
+
+/* { dg-final { sc

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add missing mode_idx for vrol and vror

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:1e62adc9cd6aa61948b106b1476fdf86eee7f3e0

commit 1e62adc9cd6aa61948b106b1476fdf86eee7f3e0
Author: Kito Cheng 
Date:   Tue Aug 27 21:27:02 2024 +0800

RISC-V: Add missing mode_idx for vrol and vror

We add pattern for vector rotate, but seems like we forgot adding
mode_idx which used in AVL propgation (riscv-avlprop.cc).

gcc/ChangeLog:

* config/riscv/vector.md (mode_idx): Add vrol and vror.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/rotr.c: New.

(cherry picked from commit 3cde331e9590944819621bcde41ddbffd9bbf0ba)

Diff:
---
 gcc/config/riscv/vector.md|  2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/rotr.c | 13 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 666719330c6..d0677325ba1 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -816,7 +816,7 @@
vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\

vfcvtitof,vfncvtitof,vfncvtftoi,vfncvtftof,vmalu,vmiota,vmidx,\

vimovxv,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,\
-   
vgather,vcompress,vmov,vnclip,vnshift,vandn,vcpop,vclz,vctz")
+   
vgather,vcompress,vmov,vnclip,vnshift,vandn,vcpop,vclz,vctz,vrol,vror")
   (const_int 0)
 
   (eq_attr "type" "vimovvx,vfmovvf")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/rotr.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/rotr.c
new file mode 100644
index 000..055b28d1e78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/rotr.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -fno-vect-cost-model 
-mrvv-vector-bits=zvl" } */
+
+typedef int a;
+void *b;
+a c;
+void d() {
+  a e = c, f =0;
+  short *g = b;
+  for (; f < e; f++)
+*(g + f) = (255 & (*(g + f) >> 8)) | *(g + f) << 8;
+}
+

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Partial: Just the testsuite bits.

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:a4b71f89a6e6090d775ab34e7ce365e8d8e8

commit a4b71f89a6e6090d775ab34e7ce365e8d8e8
Author: Pan Li 
Date:   Tue Aug 27 15:01:02 2024 +0800

Partial: Just the testsuite bits.

Vect: Reconcile the const_int operand type of unsigned .SAT_ADD

The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST.
For example _1 = .SAT_ADD (_2, 9) comes from below sample code.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9)

It will fail to vectorize as the vectorizable_call will check the
operands is type_compatiable but the imm will be (const_int 9) with
the SImode, which is different from _2 (DImode).  Aka:

uint64_t _1;
uint64_t _2;
_1 = .SAT_ADD (_2, 9);

This patch would like to reconcile the imm operand to the operand type
mode of _2 by fold_convert to make the vectorizable_call happy.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_sat_add_pattern): Add fold
convert for const_int to the type of operand 0.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 6dccd5710380429c7addec9fe92a1a0bcb2f3367)

Diff:
---
 .../autovec/binop/vec_sat_u_add_imm_reconcile-1.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-10.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-11.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-12.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-13.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-14.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-15.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-2.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-3.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-4.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-5.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-6.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-7.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-8.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-9.c|  9 +
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 20 
 16 files changed, 155 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:ef3f7e6aa699380b9925dabe7a1052a965622726

commit ef3f7e6aa699380b9925dabe7a1052a965622726
Author: Pan Li 
Date:   Fri Aug 30 14:07:12 2024 +0800

RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

In previous, we have some specially handling for both the .SAT_ADD and
.SAT_SUB for unsigned int.  There are similar to take care of SImode
in RV64 for zero extend.  Thus refactor these two helper function
into one for possible code duplication.

The below test suite are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge
the zero_extend handing from func riscv_gen_unsigned_xmode_reg.
(riscv_gen_unsigned_xmode_reg): Remove.
(riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx
instead of riscv_gen_unsigned_xmode_reg.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub-11.c: Adjust asm check.
* gcc.target/riscv/sat_u_sub-15.c: Ditto.
* gcc.target/riscv/sat_u_sub-19.c: Ditto.
* gcc.target/riscv/sat_u_sub-23.c: Ditto.
* gcc.target/riscv/sat_u_sub-27.c: Ditto.
* gcc.target/riscv/sat_u_sub-3.c: Ditto.
* gcc.target/riscv/sat_u_sub-31.c: Ditto.
* gcc.target/riscv/sat_u_sub-35.c: Ditto.
* gcc.target/riscv/sat_u_sub-39.c: Ditto.
* gcc.target/riscv/sat_u_sub-43.c: Ditto.
* gcc.target/riscv/sat_u_sub-47.c: Ditto.
* gcc.target/riscv/sat_u_sub-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto.

Signed-off-by: Pan Li 
(cherry picked from commit e96d4bf6a6e8b8a5ea1b81a79f4efa07dee77af1)

Diff:
---
 gcc/config/riscv/riscv.cc  | 99 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub-11.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-15.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-19.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-23.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-27.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-3.c   |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-31.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-35.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-39.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-43.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-47.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-7.c   |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-11_1.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-11_2.c  |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15_1.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15_2.c  |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c   |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c   |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c |  2 +
 25 files changed, 118 insertions(+), 53 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f87eab81d81..5e9870a574c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11893,19 +11893,56 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
-/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
-   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
-   the new zero extended Xmode rtx will be returned.
-   Or the gen_lowpart rtx of Xmode will be returned.  */
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+
+   If rtx is REG and Xmode, the RTX x will be returned directly.
+
+   If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
+   returned.
+
+   If rtx is const_int, a new REG rtx will be created to hold the value of

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Test: Move pr116278 run test to dg/torture [NFC]

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:e9a4727900785a5a0a5fd0bf0df999ee69e3d6c9

commit e9a4727900785a5a0a5fd0bf0df999ee69e3d6c9
Author: Pan Li 
Date:   Mon Aug 19 10:02:46 2024 +0800

Test: Move pr116278 run test to dg/torture [NFC]

Move the run test of pr116278 to dg/torture and leave the risc-v the
asm check under risc-v part.

PR target/116278

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: Take compile instead of run.
* gcc.target/riscv/pr116278-run-2.c: Ditto.
* gcc.dg/torture/pr116278-run-1.c: New test.
* gcc.dg/torture/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 3178786c88761e47b3cbe700a97a0de2b6e133cb)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116278-run-1.c   | 19 +++
 gcc/testsuite/gcc.dg/torture/pr116278-run-2.c   | 19 +++
 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c |  4 ++--
 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c |  4 ++--
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
new file mode 100644
index 000..ada3ac98445
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32 } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int8_t b[1];
+int8_t *d = b;
+int32_t c;
+
+int main() {
+  b[0] = -40;
+  uint16_t t = (uint16_t)d[0];
+
+  c = (t < 0xFFF6 ? t : 0xFFF6) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c 
b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c
new file mode 100644
index 000..5e72d15957e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32 } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int16_t b[1];
+int16_t *d = b;
+int64_t c;
+
+int main() {
+  b[0] = -40;
+  uint32_t t = (uint32_t)d[0];
+
+  c = (t < 0xFFF6u ? t : 0xFFF6u) + 9;
+
+  if (c != 4294967265)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
index d3812bdcdfb..67cf17ebc33 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -1,7 +1,7 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
-#include 
+#include 
 
 int8_t b[1];
 int8_t *d = b;
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
index 669cd4f003f..103602df258 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
@@ -1,7 +1,7 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
-#include 
+#include 
 
 int16_t b[1];
 int16_t *d = b;

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:246b7bd9a16863ac8abb5fb55240eefbf1026006

commit 246b7bd9a16863ac8abb5fb55240eefbf1026006
Author: Pan Li 
Date:   Fri Aug 30 08:36:45 2024 +0800

RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c: New 
test.

Signed-off-by: Pan Li 
(cherry picked from commit 72f3e9021e55f14e90773cf2966805a318f44842)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c  | 14 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-10.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-11.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-12.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-9.c| 28 ++
 8 files changed, 168 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
new file mode 100644
index 000..b6b605ac615
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm15_uint16_t_fmt_3:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint16_t, 15)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
new file mode 100644
index 000..6da86a1abe1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm33u_uint32_t_fmt_3:
+** ...
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint32_t, 33u)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
new file mode 100644
index 000..b6ff5a6d5d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm129ull_uint64_t_fmt_3:
+** ...
+

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:b2887fd8a312b02e23d5e6ae0191f517e4c421a3

commit b2887fd8a312b02e23d5e6ae0191f517e4c421a3
Author: Pan Li 
Date:   Fri Aug 30 11:01:37 2024 +0800

RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 4:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_4(T, IMM)   
\
  T __attribute__((noinline))   
\
  vec_sat_u_add_imm##IMM##_##T##_fmt_4 (T *out, T *in, unsigned limit)  
\
  { 
\
unsigned i; 
\
T ret;  
\
for (i = 0; i < limit; i++) 
\
  { 
\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) == 0 ? ret : -1; 
\
  } 
\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c: 
New test.

Signed-off-by: Pan Li 
(cherry picked from commit 56ed1dfa79c436b769f3266258d34d160b4330d9)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c | 14 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-13.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-14.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-15.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-16.c   | 28 ++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 20 
 9 files changed, 188 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
new file mode 100644
index 000..a9439dff39f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm9u_uint8_t_fmt_4:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*9
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint8_t, 9u)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
new file mode 100644
index 000..dbe47497599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm15_uint16_t_fmt_4:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint16_t, 15)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
new file mode 100644
index 000..0ac2e1b2942
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-sche

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix subreg of VLS modes larger than a vector [PR116086].

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:edba00e457e7677ffe66292090d60f8d50d70ee7

commit edba00e457e7677ffe66292090d60f8d50d70ee7
Author: Robin Dapp 
Date:   Tue Aug 27 10:25:34 2024 +0200

RISC-V: Fix subreg of VLS modes larger than a vector [PR116086].

When the source mode is potentially larger than one vector (e.g. an
LMUL2 mode for VLEN=128) we don't know which vector the subreg actually
refers to.  For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI))
could actually be the a full (high) vector register of a two-register
group (at VLEN=128) or the higher part of a single register (at VLEN>128).

As the subreg is statically ambiguous we prevent such situations in
can_change_mode_class.

The culprit in PR116086 is

 _12 = BIT_FIELD_REF ;

which can be expanded with a vector-vector extract (from V4DI to V2DI).
This patch adds a VLS-mode vector-vector extract that handles "halving"
cases like this one by sliding down the source vector, thus making sure
the correct part is used.

PR target/116086

gcc/ChangeLog:

* config/riscv/autovec.md (vec_extract): Add
vector-vector extract for VLS modes.
* config/riscv/riscv.cc (riscv_can_change_mode_class): Forbid
VLS modes larger than one vector.
* config/riscv/vector-iterators.md: Add vector-vector extract
iterators.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add effective target checks for
zvl256b and zvl512b.
* gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test.
* gcc.target/riscv/rvv/autovec/pr116086-2.c: New test.
* gcc.target/riscv/rvv/autovec/pr116086.c: New test.

(cherry picked from commit 4ff4875a79ccb302dc2401c32fe0af2187b61b99)

Diff:
---
 gcc/config/riscv/autovec.md|  35 
 gcc/config/riscv/riscv.cc  |  11 ++
 gcc/config/riscv/vector-iterators.md   | 202 +
 .../gcc.target/riscv/rvv/autovec/pr116086-2-run.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/pr116086-2.c  |  18 ++
 .../gcc.target/riscv/rvv/autovec/pr116086.c|  76 
 gcc/testsuite/lib/target-supports.exp  |  37 
 7 files changed, 385 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 4decaedbd82..a4e108268b4 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1453,6 +1453,41 @@
   DONE;
 })
 
+;; -
+;;  [INT,FP] Extract a vector from a vector.
+;; -
+;; TODO: This can be extended to allow basically any extract mode.
+;; For now this helps optimize VLS subregs like (subreg:V2DI (reg:V4DI) 16)
+;; that would otherwise need to go via memory.
+
+(define_expand "vec_extract"
+  [(set (match_operand:   0 "nonimmediate_operand")
+ (vec_select:
+   (match_operand:VLS_HAS_HALF  1 "register_operand")
+   (parallel
+[(match_operand 2 "immediate_operand")])))]
+  "TARGET_VECTOR"
+{
+  int sz = GET_MODE_NUNITS (mode).to_constant ();
+  int part = INTVAL (operands[2]);
+
+  rtx start = GEN_INT (part * sz);
+  rtx tmp = operands[1];
+
+  if (part != 0)
+{
+  tmp = gen_reg_rtx (mode);
+
+  rtx ops[] = {tmp, operands[1], start};
+  riscv_vector::emit_vlmax_insn
+   (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode),
+riscv_vector::BINARY_OP, ops);
+}
+
+  emit_move_insn (operands[0], gen_lowpart (mode, tmp));
+  DONE;
+})
+
 ;; -
 ;;  [FP] Binary operations
 ;; -
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 335ca5e6c89..f87eab81d81 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10670,6 +10670,17 @@ riscv_can_change_mode_class (machine_mode from, 
machine_mode to,
   if (reg_classes_intersect_p (V_REGS, rclass)
   && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
 return false;
+
+  /* Subregs of modes larger than one vector are ambiguous.
+ A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one
+ register group of two at VLEN = 128 or one register at VLEN >= 256 and
+ we cannot, statically, determine which part of it to extract.
+ Therefore prevent that.  */
+  if (reg_classes_intersect_p (V_REGS, rclass)
+  && riscv_v_ext_vls_mode_p (from)
+  && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from)))
+  return false;
+
   return !reg_classes_intersect_p (FP_REGS, rclass);
 }
 
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index cbbd24

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:94f4b718c181c9fe1f27a53c8babd38fa46c1640

commit 94f4b718c181c9fe1f27a53c8babd38fa46c1640
Author: Pan Li 
Date:   Sun Aug 18 12:49:47 2024 +0800

RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

QUAD:
DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-10.c: New test.
* gcc.target/riscv/sat_u_trunc-11.c: New test.
* gcc.target/riscv/sat_u_trunc-12.c: New test.
* gcc.target/riscv/sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/sat_u_trunc-run-12.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit ea81e21d5398bdacf883533fd738fc45ea8d6dd9)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c | 16 
 6 files changed, 102 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
new file mode 100644
index 000..5ea8e613901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint32_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
new file mode 100644
index 000..3b45e2af9ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
new file mode 100644
index 000..7ea2c93a301
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
new file mode 100644
index 000..2281610f335
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUNC_FMT_2_WRAP(T1, T2

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:d4f551d4316a38ea8ad9a1fe740dca5d4cbbd77c

commit d4f551d4316a38ea8ad9a1fe740dca5d4cbbd77c
Author: Pan Li 
Date:   Sun Aug 18 14:08:21 2024 +0800

RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x <= max ? (NT)x : (NT) max;\
  }

QUAD:
DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-16.c: New test.
* gcc.target/riscv/sat_u_trunc-17.c: New test.
* gcc.target/riscv/sat_u_trunc-18.c: New test.
* gcc.target/riscv/sat_u_trunc-run-16.c: New test.
* gcc.target/riscv/sat_u_trunc-run-17.c: New test.
* gcc.target/riscv/sat_u_trunc-run-18.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 5239902210a16b22d59d2cf8b535d615922a5c00)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c | 16 
 6 files changed, 102 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
new file mode 100644
index 000..f91da58c0ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint32_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
new file mode 100644
index 000..9813e1f79b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
new file mode 100644
index 000..eb799849f73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
new file mode 100644
index 000..20ceda6852e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUNC_FMT_3_WRAP(T1, T2

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed][PR rtl-optimization/116544] Fix test for promoted subregs

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:f35d49f0eb8ffc614cf233ad05b2fe86791c6719

commit f35d49f0eb8ffc614cf233ad05b2fe86791c6719
Author: Jeff Law 
Date:   Sun Sep 1 22:16:04 2024 -0600

[committed][PR rtl-optimization/116544] Fix test for promoted subregs

This is a small bug in the ext-dce code's handling of promoted subregs.

Essentially when we see a promoted subreg we need to make additional bit 
groups
live as various parts of the RTL path know that an extension of a suitably
promoted subreg can be trivially eliminated.

When I added support for dealing with this quirk I failed to account for the
larger modes properly and it ignored the case when the size of the inner 
object
was > 32 bits.  Opps.

This does _not_ fix the outstanding x86 issue.  That's caused by something
completely different and more concerning ;(

Bootstrapped and regression tested on x86.  Obviously fixes the testcase on
riscv as well.

Pushing to the trunk.

PR rtl-optimization/116544
gcc/
* ext-dce.cc (ext_dce_process_uses): Fix thinko in promoted subreg
handling.

gcc/testsuite/
* gcc.dg/torture/pr116544.c: New test.

(cherry picked from commit 0562976d62e095f3a00c799288dee4e5b20114e2)

Diff:
---
 gcc/ext-dce.cc  |  2 +-
 gcc/testsuite/gcc.dg/torture/pr116544.c | 22 ++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index fbe9719a8d5..dcd55cf1183 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -846,7 +846,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj,
bitmap_set_bit (livenow, rn + 1);
  if (size > 16)
bitmap_set_bit (livenow, rn + 2);
- if (size == 32)
+ if (size >= 32)
bitmap_set_bit (livenow, rn + 3);
  iter.skip_subrtxes ();
}
diff --git a/gcc/testsuite/gcc.dg/torture/pr116544.c 
b/gcc/testsuite/gcc.dg/torture/pr116544.c
new file mode 100644
index 000..15f52fecb3b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116544.c
@@ -0,0 +1,22 @@
+/* { dg-options "-fno-strict-aliasing -fwrapv" }
+/* { dg-do run { target longlong64 } } */
+
+extern void abort (void);
+long long a;
+signed char b[60];
+signed char c;
+long long d[60];
+int e[30];
+long long *f = d;
+static void g(long long *j, long k) { *j = k; }
+int main() {
+  d[5] = 0x1;
+  for (int h = 2; h < 7; h += 3)
+for (int i = 0; i < (c || b[h]) + 10; i += 11)
+  e[2] = f[h];
+  g(&a, e[2]);
+  if (a != 0)
+abort ();
+  return 0;
+}
+

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH] RISC-V: Optimize the cost of the DFmode register move for RV32.

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:82d58b78291d4108435da4411e8ebfa840ee4dc9

commit 82d58b78291d4108435da4411e8ebfa840ee4dc9
Author: Xianmiao Qu 
Date:   Sun Sep 1 22:28:13 2024 -0600

[PATCH] RISC-V: Optimize the cost of the DFmode register move for RV32.

Currently, in RV32, even with the D extension enabled, the cost of DFmode
register moves is still set to 'COSTS_N_INSNS (2)'. This results in the
'lower-subreg' pass splitting DFmode register moves into two SImode SUBREG
register moves, leading to the generation of many redundant instructions.

As an example, consider the following test case:
  double foo (int t, double a, double b)
  {
if (t > 0)
  return a;
else
  return b;
  }

When compiling with -march=rv32imafdc -mabi=ilp32d, the following code is 
generated:
  .cfi_startproc
  addisp,sp,-32
  .cfi_def_cfa_offset 32
  fsd fa0,8(sp)
  fsd fa1,16(sp)
  lw  a4,8(sp)
  lw  a5,12(sp)
  lw  a2,16(sp)
  lw  a3,20(sp)
  bgt a0,zero,.L1
  mv  a4,a2
  mv  a5,a3
  .L1:
  sw  a4,24(sp)
  sw  a5,28(sp)
  fld fa0,24(sp)
  addisp,sp,32
  .cfi_def_cfa_offset 0
  jr  ra
  .cfi_endproc

After adjust the DFmode register move's cost to 'COSTS_N_INSNS (1)', the
generated code is as follows, with a significant reduction in the number
of instructions.
  .cfi_startproc
  ble a0,zero,.L5
  ret
  .L5:
  fmv.d   fa0,fa1
  ret
  .cfi_endproc

gcc/
* config/riscv/riscv.cc (riscv_rtx_costs): Optimize the cost of the
DFmode register move for RV32.

gcc/testsuite/
* gcc.target/riscv/rv32-movdf-cost.c: New test.

(cherry picked from commit eca320bfe340beec9267bdb6021c7b387111)

Diff:
---
 gcc/config/riscv/riscv.cc|  5 +
 gcc/testsuite/gcc.target/riscv/rv32-movdf-cost.c | 13 +
 2 files changed, 18 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5e9870a574c..e634173bd53 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3600,6 +3600,11 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
   if (outer_code == INSN
  && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x
{
+ if (REG_P (SET_SRC (x)) && TARGET_DOUBLE_FLOAT && mode == DFmode)
+   {
+ *total = COSTS_N_INSNS (1);
+ return true;
+   }
  riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
  return true;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rv32-movdf-cost.c 
b/gcc/testsuite/gcc.target/riscv/rv32-movdf-cost.c
new file mode 100644
index 000..cb679e7b95f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rv32-movdf-cost.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafdc -mabi=ilp32d" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+double foo (int t, double a, double b)
+{
+  if (t > 0)
+return a;
+  else
+return b;
+}
+
+/* { dg-final { scan-assembler-not "fsd\t" } } */

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:cb75d2d9eca78adc5a95884bdd19907c330ab5aa

commit cb75d2d9eca78adc5a95884bdd19907c330ab5aa
Author: Pan Li 
Date:   Thu Aug 29 11:25:44 2024 +0800

RISC-V: Support form 1 of integer scalar .SAT_ADD

This patch would like to support the scalar signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │ mv   a5,a0
  12   │ add  a0,a0,a1
  13   │ xor  a1,a5,a1
  14   │ not  a1,a1
  15   │ xor  a4,a5,a0
  16   │ and  a1,a1,a4
  17   │ blt  a1,zero,.L5
  18   │ ret
  19   │ .L5:
  20   │ srai a5,a5,63
  21   │ li   a0,-1
  22   │ srli a0,a0,1
  23   │ xor  a0,a5,a0
  24   │ ret

After this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │ add  a2,a0,a1
  12   │ xor  a1,a0,a1
  13   │ xor  a5,a0,a2
  14   │ srli a5,a5,63
  15   │ srli a1,a1,63
  16   │ xori a1,a1,1
  17   │ and  a5,a5,a1
  18   │ srai a4,a0,63
  19   │ li   a3,-1
  20   │ srli a3,a3,1
  21   │ xor  a3,a3,a4
  22   │ neg  a4,a5
  23   │ and  a3,a3,a4
  24   │ addi a5,a5,-1
  25   │ and  a0,a2,a5
  26   │ or   a0,a0,a3
  27   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
decl for expanding ssadd.
* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
impl to gen the max int rtx.
(riscv_expand_ssadd): Add new func impl to expand the ssadd.
* config/riscv/riscv.md (ssadd3): Add new pattern for
signed integer .SAT_ADD.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data.
* gcc.target/riscv/sat_s_add-1.c: New test.
* gcc.target/riscv/sat_s_add-2.c: New test.
* gcc.target/riscv/sat_s_add-3.c: New test.
* gcc.target/riscv/sat_s_add-4.c: New test.
* gcc.target/riscv/sat_s_add-run-1.c: New test.
* gcc.target/riscv/sat_s_add-run-2.c: New test.
* gcc.target/riscv/sat_s_add-run-3.c: New test.
* gcc.target/riscv/sat_s_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 539fcaae67c6cf54bd377eba6c9d5b1792a3)

Diff:
---
 gcc/config/riscv/riscv-protos.h|  1 +
 gcc/config/riscv/riscv.cc  | 90 ++
 gcc/config/riscv/riscv.md  | 11 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 17 
 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 85 
 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c   | 30 
 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c   | 32 
 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c   | 31 
 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c   | 30 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c   | 16 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c   | 16 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c   | 16 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c   | 16 
 .../gcc.target/riscv/scalar_sat_binary_run_xxx.h   | 26 +++
 14 files changed, 417 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 926899ccad6..3358e3887b9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -134,6 +134,7 @@ extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
+extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e634173bd53..2fb444039f4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/con

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PR target/115921] Improve reassociation for rv64

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:0ff3a68e122baeafff5d58bad57ad0052dede067

commit 0ff3a68e122baeafff5d58bad57ad0052dede067
Author: Jeff Law 
Date:   Tue Sep 3 06:45:30 2024 -0600

[PR target/115921] Improve reassociation for rv64

As Jovan pointed out in pr115921, we're not reassociating expressions like 
this
on rv64:

(x & 0x3e) << 12

It generates something like this:

li  a5,258048
sllia0,a0,12
and a0,a0,a5

We have a pattern that's designed to clean this up.  Essentially 
reassociating
the operations so that we don't need to load the constant resulting in
something like this:

andia0,a0,63
sllia0,a0,12

That pattern wasn't working for certain constants due to its condition. The
condition is trying to avoid cases where this kind of reassociation would
hinder shadd generation on rv64.  That condition was just written poorly.

This patch tightens up that condition in a few ways.  First, there's no 
need to
worry about shadd cases if ZBA is not enabled.  Second we can't use shadd if
the shift value isn't 1, 2 or 3.  Finally rather than open-coding one of the
tests, we can use an existing operand predicate.

The net is we'll start performing this transformation in more cases on rv64
while still avoiding reassociation if it would spoil shadd generation.

PR target/115921
gcc/
* config/riscv/riscv.md (reassociate bitwise ops): Tighten test for
cases we do not want reassociate.

gcc/testsuite/
* gcc.target/riscv/pr115921.c: New test.

(cherry picked from commit 4371f656288f461335c47e98b8c038937a89764a)

Diff:
---
 gcc/J | 1064 +
 gcc/config/riscv/riscv.md |   10 +-
 gcc/testsuite/gcc.target/riscv/pr115921.c |   13 +
 3 files changed, 1083 insertions(+), 4 deletions(-)

diff --git a/gcc/J b/gcc/J
new file mode 100644
index 000..6b6332dd2c1
--- /dev/null
+++ b/gcc/J
@@ -0,0 +1,1064 @@
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
1) /* RTL-based forward propagation pass for GNU compiler.
+a945c346f57b gcc/fwprop.cc (Jakub Jelinek   2024-01-03 12:19:35 +0100
2)Copyright (C) 2005-2024 Free Software Foundation, Inc.
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
3)Contributed by Paolo Bonzini and Steven Bosscher.
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
4) 
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
5) This file is part of GCC.
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
6) 
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
7) GCC is free software; you can redistribute it and/or modify it under
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
8) the terms of the GNU General Public License as published by the Free
+9dcd6f09a3dd gcc/fwprop.c  (Nick Clifton2007-07-26 08:37:01 +
9) Software Foundation; either version 3, or (at your option) any later
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
10) version.
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
11) 
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
12) GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
13) WARRANTY; without even the implied warranty of MERCHANTABILITY or
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
14) FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
15) for more details.
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
16) 
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
17) You should have received a copy of the GNU General Public License
+9dcd6f09a3dd gcc/fwprop.c  (Nick Clifton2007-07-26 08:37:01 +   
18) along with GCC; see the file COPYING3.  If not see
+9dcd6f09a3dd gcc/fwprop.c  (Nick Clifton2007-07-26 08:37:01 +   
19) .  */
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
20) 
+0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
21) #define INCLUDE_ALGORITHM
+0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
22) #define INCLUDE_FUNCTIONAL
+d6849aa92666 gcc/fwprop.cc (Richard Sandiford   2024-07-25 13:25:32 +0100   
23) #define INCLUDE_ARRAY
+a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
24) #include "config.h"

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Drop file that should not have been committed.

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:98d6e0eefeaa53d6482ca1968d95a22455996af3

commit 98d6e0eefeaa53d6482ca1968d95a22455996af3
Author: Jeff Law 
Date:   Tue Sep 3 09:30:35 2024 -0600

Drop file that should not have been committed.

* J: Drop file that should not have been committed

(cherry picked from commit 36f63000c6f869f4f5550780d77b381b1a8b1700)

Diff:
---
 gcc/J | 1064 -
 1 file changed, 1064 deletions(-)

diff --git a/gcc/J b/gcc/J
deleted file mode 100644
index 6b6332dd2c1..000
--- a/gcc/J
+++ /dev/null
@@ -1,1064 +0,0 @@
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
1) /* RTL-based forward propagation pass for GNU compiler.
-a945c346f57b gcc/fwprop.cc (Jakub Jelinek   2024-01-03 12:19:35 +0100
2)Copyright (C) 2005-2024 Free Software Foundation, Inc.
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
3)Contributed by Paolo Bonzini and Steven Bosscher.
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
4) 
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
5) This file is part of GCC.
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
6) 
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
7) GCC is free software; you can redistribute it and/or modify it under
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +
8) the terms of the GNU General Public License as published by the Free
-9dcd6f09a3dd gcc/fwprop.c  (Nick Clifton2007-07-26 08:37:01 +
9) Software Foundation; either version 3, or (at your option) any later
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
10) version.
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
11) 
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
12) GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
13) WARRANTY; without even the implied warranty of MERCHANTABILITY or
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
14) FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
15) for more details.
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
16) 
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
17) You should have received a copy of the GNU General Public License
-9dcd6f09a3dd gcc/fwprop.c  (Nick Clifton2007-07-26 08:37:01 +   
18) along with GCC; see the file COPYING3.  If not see
-9dcd6f09a3dd gcc/fwprop.c  (Nick Clifton2007-07-26 08:37:01 +   
19) .  */
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
20) 
-0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
21) #define INCLUDE_ALGORITHM
-0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
22) #define INCLUDE_FUNCTIONAL
-d6849aa92666 gcc/fwprop.cc (Richard Sandiford   2024-07-25 13:25:32 +0100   
23) #define INCLUDE_ARRAY
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
24) #include "config.h"
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
25) #include "system.h"
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
26) #include "coretypes.h"
-c7131fb2b58a gcc/fwprop.c  (Andrew MacLeod  2015-07-08 00:53:03 +   
27) #include "backend.h"
-c7131fb2b58a gcc/fwprop.c  (Andrew MacLeod  2015-07-08 00:53:03 +   
28) #include "rtl.h"
-1815e313a8fb gcc/fwprop.cc (Uros Bizjak 2023-07-14 11:46:22 +0200   
29) #include "rtlanal.h"
-c7131fb2b58a gcc/fwprop.c  (Andrew MacLeod  2015-07-08 00:53:03 +   
30) #include "df.h"
-0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
31) #include "rtl-ssa.h"
-957060b5c5d2 gcc/fwprop.c  (Andrew MacLeod  2015-10-29 13:57:32 +   
32) 
-0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
33) #include "predict.h"
-60393bbc613a gcc/fwprop.c  (Andrew MacLeod  2014-10-27 12:41:01 +   
34) #include "cfgrtl.h"
-60393bbc613a gcc/fwprop.c  (Andrew MacLeod  2014-10-27 12:41:01 +   
35) #include "cfgcleanup.h"
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
36) #include "cfgloop.h"
-a52b023a5f03 gcc/fwprop.c  (Paolo Bonzini   2006-11-04 08:36:45 +   
37) #include "tree-pass.h"
-aa4e2d7ef068 gcc/fwprop.c  (Richard Sandiford   2014-08-28 06:23:26 +   
38) #include "rtl-iter.h"
-0b76990a9d75 gcc/fwprop.c  (Richard Sandiford   2020-12-17 00:15:12 +   
39) #include "target.h"
-a

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:b856428ef3b05b8fd69b439ade1b0cb1bb6e185e

commit b856428ef3b05b8fd69b439ade1b0cb1bb6e185e
Author: Pan Li 
Date:   Mon Sep 2 15:54:43 2024 +0800

RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD

This patch would like to allow the IMM operand of the unsigned
scalar .SAT_ADD.  Like the operand 0, the operand 1 of .SAT_ADD
will be zero extended to Xmode before underlying code generation.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_usadd): Zero extend
the second operand of usadd as the first operand does.
* config/riscv/riscv.md (usadd3): Allow imm operand for
scalar usadd pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add-11.c: Make asm check robust.
* gcc.target/riscv/sat_u_add-15.c: Ditto.
* gcc.target/riscv/sat_u_add-19.c: Ditto.
* gcc.target/riscv/sat_u_add-23.c: Ditto.
* gcc.target/riscv/sat_u_add-3.c: Ditto.
* gcc.target/riscv/sat_u_add-7.c: Ditto.

Signed-off-by: Pan Li 
(cherry picked from commit 9ea9d05908432fc5f3632f3e397e3709f95ef636)

Diff:
---
 gcc/config/riscv/riscv.cc | 2 +-
 gcc/config/riscv/riscv.md | 4 ++--
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c  | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2fb444039f4..06a2c66ea74 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11969,7 +11969,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
   rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
   /* Step-1: sum = x + y  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f7efafb8ab..9f94b5aa023 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4360,8 +4360,8 @@
 
 (define_expand "usadd3"
   [(match_operand:ANYI 0 "register_operand")
-   (match_operand:ANYI 1 "register_operand")
-   (match_operand:ANYI 2 "register_operand")]
+   (match_operand:ANYI 1 "reg_or_int_operand")
+   (match_operand:ANYI 2 "reg_or_int_operand")]
   ""
   {
 riscv_expand_usadd (operands[0], operands[1], operands[2]);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
index e248aeafa8e..bd830ececad 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_3:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
index bb8b991a84e..de615a6225e 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_4:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
index 7e4ae12f2f5..2b793e2f8fd 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_5:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
index 49bbb74a401..5de086e1138 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_6:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 1/3] RISC-V: Improve codegen for negative repeating large constants

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:2d69f0e3a2e86b3d637cf2440d12ced120db31b7

commit 2d69f0e3a2e86b3d637cf2440d12ced120db31b7
Author: Raphael Moreira Zinsly 
Date:   Wed Sep 4 17:21:24 2024 -0600

[PATCH 1/3] RISC-V: Improve codegen for negative repeating large constants

Improve handling of constants where its upper and lower 32-bit
halves are the same and have negative values.

e.g. for:

unsigned long f (void) { return 0xf0f0f0f0f0f0f0f0UL; }

Without the patch:

li  a0,-252645376
addia0,a0,240
li  a5,-252645376
addia5,a5,241
sllia5,a5,32
add a0,a5,a0

With the patch:

li  a5,252645376
addia5,a5,-241
sllia0,a5,32
add a0,a0,a5
xoria0,a0,-1

gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_split_integer_cost): Adjust the
cost of negative repeating constants.
(riscv_split_integer): Handle negative repeating constants.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/synthesis-11.c: New test.

(cherry picked from commit cbea72b265e4c9d1a595bd3ecd11b325021925d0)

Diff:
---
 gcc/config/riscv/riscv.cc | 29 +++
 gcc/testsuite/gcc.target/riscv/synthesis-11.c | 28 ++
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 06a2c66ea74..74de8c7f3d7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1241,18 +1241,20 @@ static int
 riscv_split_integer_cost (HOST_WIDE_INT val)
 {
   int cost;
-  unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
-  unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
+  unsigned HOST_WIDE_INT loval = val & 0x;
+  unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
   struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
 
   /* This routine isn't used by pattern conditions, so whether or
  not to allow new pseudos can be a function of where we are in the
- RTL pipeline.  We shouldn't need scratch pseudos for this case
- anyway.  */
+ RTL pipeline.  */
   bool allow_new_pseudos = can_create_pseudo_p ();
   cost = 2 + riscv_build_integer (codes, loval, VOIDmode, allow_new_pseudos);
   if (loval != hival)
 cost += riscv_build_integer (codes, hival, VOIDmode, allow_new_pseudos);
+  else if ((loval & 0x8000) != 0)
+cost = 3 + riscv_build_integer (codes, ~loval & 0x,
+   VOIDmode, allow_new_pseudos);
 
   return cost;
 }
@@ -1275,11 +1277,16 @@ riscv_integer_cost (HOST_WIDE_INT val, bool 
allow_new_pseudos)
 static rtx
 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
 {
-  unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
-  unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
+  unsigned HOST_WIDE_INT loval = val & 0x;
+  unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
   rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
+  rtx x = gen_reg_rtx (mode);
+  bool eq_neg = (loval == hival) && ((loval & 0x8000) != 0);
 
-  riscv_move_integer (lo, lo, loval, mode);
+  if (eq_neg)
+riscv_move_integer (lo, lo, ~loval & 0x, mode);
+  else
+riscv_move_integer (lo, lo, loval, mode);
 
   if (loval == hival)
   hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
@@ -1290,7 +1297,13 @@ riscv_split_integer (HOST_WIDE_INT val, machine_mode 
mode)
 }
 
   hi = force_reg (mode, hi);
-  return gen_rtx_PLUS (mode, hi, lo);
+  x = gen_rtx_PLUS (mode, hi, lo);
+  if (eq_neg)
+{
+  x = force_reg (mode, x);
+  x = gen_rtx_XOR (mode, x, GEN_INT (-1));
+}
+  return x;
 }
 
 /* Return true if X is a thread-local symbol.  */
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-11.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-11.c
new file mode 100644
index 000..89e48edb2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 60 } } */
+
+
+
+unsigned long foo_0xf857f2def857f2de(void) { return 0xf857f2def857f2deUL; }
+unsigned long foo_0x99660e6399660e63(void)

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] Fix scan test output after recent path-splitting changes

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:80ad2d7bf8d40f8fb384855b06b8c97ed5371015

commit 80ad2d7bf8d40f8fb384855b06b8c97ed5371015
Author: Jeff Law 
Date:   Wed Sep 4 12:07:09 2024 -0600

[RISC-V] Fix scan test output after recent path-splitting changes

The recent path splitting changes from Andrew result in identifying more
saturation idioms instead of just identifying an overflow check.  As a 
result
many of the tests in the RISC-V port started failing a scan check on the
.expand output.

As expected, identifying a saturation idiom is more helpful than 
identifying an
overflow check and the resultant code is better based on my spot checks.

So the right thing to do is to expect more saturation intrinsics in the 
.expand
output.

I've verified this fixes the regressions for riscv32-elf and riscv64-elf.
Pushing to the trunk.

gcc/testsuite
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Adjust
expected output.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c:
Likewise.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c:
Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Likewise.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Likewise.

(cherry picked from commit 0455e85e4eda7d80bda967914d634fe5b71b7ffc)

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c | 2 +-
 gcc/testsuite/gcc.target/

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Fix sign/carry bit handling in ext-dce.

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:8754b967c81742fa33f65a75ed87834feeff95b9

commit 8754b967c81742fa33f65a75ed87834feeff95b9
Author: Jeff Law 
Date:   Mon Jul 15 16:57:44 2024 -0600

Fix sign/carry bit handling in ext-dce.

My change to fix a ubsan issue broke handling propagation of the carry/sign 
bit
down through a right shift.  Thanks to Andreas for the analysis and proposed
fix and Sergei for the testcase.

PR rtl-optimization/115876
PR rtl-optimization/115916
gcc/
* ext-dce.cc (carry_backpropagate): Make return type unsigned as 
well.
Cast to signed for right shift to preserve sign bit.

gcc/testsuite/

* g++.dg/torture/pr115916.C: New test.

Co-author: Andreas Schwab 
Co-author: Sergei Trofimovich 

(cherry picked from commit 94b21f13763638f64e83e7f9959c7f1523b9eaed)

Diff:
---
 gcc/ext-dce.cc  |  4 +-
 gcc/testsuite/g++.dg/torture/pr115916.C | 90 +
 2 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index dcd55cf1183..2f3514ae797 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -480,7 +480,7 @@ binop_implies_op2_fully_live (rtx_code code)
binop_implies_op2_fully_live (e.g. shifts), the computed mask may
exclusively pertain to the first operand.  */
 
-HOST_WIDE_INT
+unsigned HOST_WIDE_INT
 carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
 {
   if (mask == 0)
@@ -511,7 +511,7 @@ carry_backpropagate (unsigned HOST_WIDE_INT mask, enum 
rtx_code code, rtx x)
 case ASHIFT:
   if (CONST_INT_P (XEXP (x, 1))
  && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode)))
-   return mask >> INTVAL (XEXP (x, 1));
+   return (HOST_WIDE_INT)mask >> INTVAL (XEXP (x, 1));
   return (2ULL << floor_log2 (mask)) - 1;
 
 /* We propagate for the shifted operand, but not the shift
diff --git a/gcc/testsuite/g++.dg/torture/pr115916.C 
b/gcc/testsuite/g++.dg/torture/pr115916.C
new file mode 100644
index 000..3d788678eaa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr115916.C
@@ -0,0 +1,90 @@
+/* { dg-do run } */
+
+#include 
+#include 
+
+struct ve {
+ve() = default;
+ve(const ve&) = default;
+ve& operator=(const ve&) = default;
+
+// note that the code usually uses the first half of this array
+uint8_t raw[16] = {};
+};
+
+static ve First8_(void) {
+ve m;
+__builtin_memset(m.raw, 0xff, 8);
+return m;
+}
+
+static ve And_(ve a, ve b) {
+ve au;
+__builtin_memcpy(au.raw, a.raw, 16);
+for (size_t i = 0; i < 8; ++i) {
+au.raw[i] &= b.raw[i];
+}
+return au;
+}
+
+__attribute__((noipa, optimize(0)))
+static void vec_assert(ve a) {
+if (a.raw[6] != 0x06 && a.raw[6] != 0x07)
+__builtin_trap();
+}
+
+static ve Reverse4_(ve v) {
+ve ret;
+for (size_t i = 0; i < 8; i += 4) {
+ret.raw[i + 0] = v.raw[i + 3];
+ret.raw[i + 1] = v.raw[i + 2];
+ret.raw[i + 2] = v.raw[i + 1];
+ret.raw[i + 3] = v.raw[i + 0];
+}
+return ret;
+}
+
+static ve DupEven_(ve v) {
+for (size_t i = 0; i < 8; i += 2) {
+v.raw[i + 1] = v.raw[i];
+}
+return v;
+}
+
+template 
+ve Per4LaneBlockShuffle_(ve v) {
+if (b) {
+return Reverse4_(v);
+} else {
+return DupEven_(v);
+}
+}
+
+template 
+static inline __attribute__((always_inline)) void 
DoTestPer4LaneBlkShuffle(const ve v) {
+ve actual = Per4LaneBlockShuffle_(v);
+const auto valid_lanes_mask = First8_();
+ve actual_masked = And_(valid_lanes_mask, actual);
+vec_assert(actual_masked);
+}
+
+static void DoTestPer4LaneBlkShuffles(const ve v) {
+alignas(128) uint8_t src_lanes[8];
+__builtin_memcpy(src_lanes, v.raw, 8);
+// need both, hm
+DoTestPer4LaneBlkShuffle(v);
+DoTestPer4LaneBlkShuffle(v);
+}
+
+__attribute__((noipa, optimize(0)))
+static void bug(void) {
+   uint8_t iv[8] = {1,2,3,4,5,6,7,8};
+   ve v;
+   __builtin_memcpy(v.raw, iv, 8);
+   DoTestPer4LaneBlkShuffles(v);
+}
+
+int main(void) {
+bug();
+}
+

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Just the testsuite changes...

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:5c27d6ce4cc63259d12e77eeda671adbc0d11530

commit 5c27d6ce4cc63259d12e77eeda671adbc0d11530
Author: Pan Li 
Date:   Sat Aug 24 10:16:28 2024 +0800

Just the testsuite changes...

Match: Add int type fits check for .SAT_ADD imm operand

This patch would like to add strict check for imm operand of .SAT_ADD
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_ADD pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_ADD.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_ADD (a, 12);
uint8_t sum = .SAT_ADD (a, 12u);
uint8_t sum = .SAT_ADD (a, 126u);
uint8_t sum = .SAT_ADD (a, 128u);
uint8_t sum = .SAT_ADD (a, 228);
uint8_t sum = .SAT_ADD (a, 223u);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_ADD (a, -1);
uint8_t sum = .SAT_ADD (a, 256u);
uint8_t sum = .SAT_ADD (a, 257);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_ADD imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm.
* gcc.target/riscv/sat_u_add_imm-12.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-16.c: Ditto.
* gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-10.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-11.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-12.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-13.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-14.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-15.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-16.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-17.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-18.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-19.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-20.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-21.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-22.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-23.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-24.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-25.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-26.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-27.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-28.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-29.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-30.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-31.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-32.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-33.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-34.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-35.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-36.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-37.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-38.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-39.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-40.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-41.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-42.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-43.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-44.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-45.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-46.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-47.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-48.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-49.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-50.c: New test.
* gc

[gcc r15-3461] i386: Integrate BFmode for Enhanced Vectorization in ix86_preferred_simd_mode

2024-09-04 Thread Levy Hsu via Gcc-cvs

https://gcc.gnu.org/g:b851bce473d8e573b1b586c71d47e71067b4fa66

commit r15-3461-gb851bce473d8e573b1b586c71d47e71067b4fa66
Author: Levy Hsu 
Date:   Tue Sep 3 15:44:06 2024 +0930

i386: Integrate BFmode for Enhanced Vectorization in 
ix86_preferred_simd_mode

This change adds BFmode support to the ix86_preferred_simd_mode function
enhancing SIMD vectorization for BF16 operations. The update ensures
optimized usage of SIMD capabilities improving performance and aligning
vector sizes with processor capabilities.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_preferred_simd_mode): Add BFmode 
Support.

Diff:
---
 gcc/config/i386/i386.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e8744fa77ea..c18a2647c2a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24574,6 +24574,14 @@ ix86_preferred_simd_mode (scalar_mode mode)
}
   return word_mode;
 
+case E_BFmode:
+  if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+   return V32BFmode;
+  else if (TARGET_AVX && !TARGET_PREFER_AVX128)
+   return V16BFmode;
+  else
+   return V8BFmode;
+
 case E_SFmode:
   if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
return V16SFmode;

[gcc r15-3462] i386: Support partial signbit/xorsign/copysign/abs/neg/and/xor/ior/andn for V2BF/V4BF

2024-09-04 Thread Levy Hsu via Gcc-cvs

https://gcc.gnu.org/g:d0c86be1ce7131aeca2cf3304a8d65a00da4f12a

commit r15-3462-gd0c86be1ce7131aeca2cf3304a8d65a00da4f12a
Author: Levy Hsu 
Date:   Mon Sep 2 13:27:46 2024 +0930

i386: Support partial signbit/xorsign/copysign/abs/neg/and/xor/ior/andn for 
V2BF/V4BF

This patch adds support for bf16 operations in V2BF and V4BF modes on i386,
handling signbit, xorsign, copysign, abs, neg, and various logical 
operations.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_build_const_vector): Add V2BF/V4BF.
(ix86_build_signbit_mask): Add V2BF/V4BF.
* config/i386/mmx.md: Modified supported logic op to use VHBF_32_64.

gcc/testsuite/ChangeLog:

* gcc.target/i386/part-vect-absnegbf.c: New test.

Diff:
---
 gcc/config/i386/i386.cc|  4 ++
 gcc/config/i386/mmx.md | 74 ++--
 gcc/testsuite/gcc.target/i386/part-vect-absnegbf.c | 81 ++
 3 files changed, 124 insertions(+), 35 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index c18a2647c2a..707b75a6d5d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16187,6 +16187,8 @@ ix86_build_const_vector (machine_mode mode, bool vect, 
rtx value)
 case E_V32BFmode:
 case E_V16BFmode:
 case E_V8BFmode:
+case E_V4BFmode:
+case E_V2BFmode:
   n_elt = GET_MODE_NUNITS (mode);
   v = rtvec_alloc (n_elt);
   scalar_mode = GET_MODE_INNER (mode);
@@ -16226,6 +16228,8 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, 
bool invert)
 case E_V32BFmode:
 case E_V16BFmode:
 case E_V8BFmode:
+case E_V4BFmode:
+case E_V2BFmode:
   vec_mode = mode;
   imode = HImode;
   break;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fac90cfd4d4..0cfa9bdabc3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -121,7 +121,7 @@
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr mmxintvecmode
   [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
-   (V4HF "V4HI") (V2HF "V2HI")])
+   (V4HF "V4HI") (V2HF "V2HI") (V4BF "V4HI") (V2BF "V2HI")])
 
 (define_mode_attr mmxintvecmodelower
   [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")
@@ -2147,18 +2147,22 @@
   DONE;
 })
 
+(define_mode_iterator VHBF_32_64
+ [V2BF (V4BF "TARGET_MMX_WITH_SSE")
+  V2HF (V4HF "TARGET_MMX_WITH_SSE")]) 
+
 (define_expand "2"
-  [(set (match_operand:VHF_32_64 0 "register_operand")
-   (absneg:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand")
+   (absneg:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand")))]
   "TARGET_SSE"
   "ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
 
 (define_insn_and_split "*mmx_"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
-   (absneg:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand" "0,x,x")))
-   (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+   (absneg:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand" "0,x,x")))
+   (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
   "TARGET_SSE"
   "#"
   "&& reload_completed"
@@ -2171,11 +2175,11 @@
   [(set_attr "isa" "noavx,noavx,avx")])
 
 (define_insn_and_split "*mmx_nabs2"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
-   (neg:VHF_32_64
- (abs:VHF_32_64
-   (match_operand:VHF_32_64 1 "register_operand" "0,x,x"
-   (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+   (neg:VHBF_32_64
+ (abs:VHBF_32_64
+   (match_operand:VHBF_32_64 1 "register_operand" "0,x,x"
+   (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
   "TARGET_SSE"
   "#"
   "&& reload_completed"
@@ -2466,11 +2470,11 @@
 ;
 
 (define_insn "*mmx_andnot3"
-  [(set (match_operand:VHF_32_64 0 "register_operand""=x,x")
-   (and:VHF_32_64
- (not:VHF_32_64
-   (match_operand:VHF_32_64 1 "register_operand" "0,x"))
- (match_operand:VHF_32_64 2 "register_operand"   "x,x")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand""=x,x")
+   (and:VHBF_32_64
+ (not:VHBF_32_64
+   (match_operand:VHBF_32_64 1 "register_operand" "0,x"))
+ (match_operand:VHBF_32_64 2 "register_operand"   "x,x")))]
   "TARGET_SSE"
   "@
andnps\t{%2, %0|%0, %2}
@@ -2481,10 +2485,10 @@
(set_attr "mode" "V4SF")])
 
 (define_insn "3"
-  [(set (match_operand:VHF_32_64 0 "register_operand"   "=x,x")
-   (any_logic:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand" "%0,x")
- (match_o

[gcc r15-3464] i386: Support partial vectorized FMA for V2BF/V4BF

2024-09-04 Thread Levy Hsu via Gcc-cvs

https://gcc.gnu.org/g:f9ca3fd1fe30f3ee6725bfe4a612e9a1234c11ac

commit r15-3464-gf9ca3fd1fe30f3ee6725bfe4a612e9a1234c11ac
Author: Levy Hsu 
Date:   Mon Sep 2 13:52:38 2024 +0800

i386: Support partial vectorized FMA for V2BF/V4BF

This patch introduces support for vectorized FMA operations for bf16 types 
in
V2BF and V4BF modes on the i386 architecture. New mode iterators and
define_expand entries for fma, fnma, fms, and fnms operations are added in
mmx.md, enhancing the i386 backend to handle these complex arithmetic 
operations.

gcc/ChangeLog:

* config/i386/mmx.md (TARGET_MMX_WITH_SSE): New mode iterator 
VBF_32_64
(fma4): define_expand for V2BF/V4BF fma4.
(fnma4): define_expand for V2BF/V4BF fnma4.
(fms4): define_expand for V2BF/V4BF fms4.
(fnms4): define_expand for V2BF/V4BF fnms4.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c: New test.

Diff:
---
 gcc/config/i386/mmx.md | 80 ++
 .../i386/avx10_2-partial-bf-vector-fma-1.c | 57 +++
 2 files changed, 137 insertions(+)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0cfa9bdabc3..4bc191b874b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2692,6 +2692,86 @@
   DONE;
 })
 
+(define_expand "fma4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+   (fma:VBF_32_64
+ (match_operand:VBF_32_64 1 "nonimmediate_operand")
+ (match_operand:VBF_32_64 2 "nonimmediate_operand")
+ (match_operand:VBF_32_64 3 "nonimmediate_operand")))]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (mode, operands[1]), 
mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (mode, operands[2]), 
mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (mode, operands[3]), 
mode);
+
+  emit_insn (gen_fmav8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_expand "fms4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+   (fma:VBF_32_64
+ (match_operand:VBF_32_64   1 "nonimmediate_operand")
+ (match_operand:VBF_32_64   2 "nonimmediate_operand")
+ (neg:VBF_32_64
+   (match_operand:VBF_32_64 3 "nonimmediate_operand"]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (mode, operands[1]), 
mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (mode, operands[2]), 
mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (mode, operands[3]), 
mode);
+
+  emit_insn (gen_fmsv8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_expand "fnma4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+   (fma:VBF_32_64
+ (neg:VBF_32_64
+   (match_operand:VBF_32_64 1 "nonimmediate_operand"))
+ (match_operand:VBF_32_64   2 "nonimmediate_operand")
+ (match_operand:VBF_32_64   3 "nonimmediate_operand")))]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (mode, operands[1]), 
mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (mode, operands[2]), 
mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (mode, operands[3]), 
mode);
+
+  emit_insn (gen_fnmav8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_expand "fnms4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+   (fma:VBF_32_64
+ (neg:VBF_32_64
+   (match_operand:VBF_32_64 1 "nonimmediate_operand"))
+ (match_operand:VBF_32_64   2 "nonimmediate_operand")
+ (neg:VBF_32_64
+   (match_operand:VBF_32_64 3 "nonimmediate_operand"]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (mode, operands[1]), 
mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (mode, operands[2]), 
mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (mode, operands[3]), 
mode);
+
+  emit_insn (gen_fnmsv8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8BFmode));
+  DONE;
+})
+
 
 ;;
 ;; Parallel half-precision floating point complex type operations
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c
new file mode 100644
index 000..72e17e99603
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ 
\\t\

[gcc(refs/users/meissner/heads/work177-vpair)] Initial vector-pair.h support

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:4362442ef3a8738a62ba36dfab2829705de71044

commit 4362442ef3a8738a62ba36dfab2829705de71044
Author: Michael Meissner 
Date:   Wed Sep 4 22:18:39 2024 -0400

Initial vector-pair.h support

2024-09-03  Michael Meissner  

gcc/

* config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers.
* config/rs6000/rs6000.cc (print_operand): Add %S output modifier.
* config/rs6000/vector-pair.h: New file.

Diff:
---
 gcc/config.gcc  |   2 +-
 gcc/config/rs6000/rs6000.cc |  46 +
 gcc/config/rs6000/vector-pair.h | 430 
 3 files changed, 477 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 0b794e977f6..3627bed8b86 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -537,7 +537,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h"
extra_headers="${extra_headers} nmmintrin.h immintrin.h x86gprintrin.h"
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
-   extra_headers="${extra_headers} amo.h"
+   extra_headers="${extra_headers} amo.h vector-pair.h"
case x$with_cpu in

xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f320d1762c3..fdcda403672 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -14414,6 +14414,52 @@ print_operand (FILE *file, rtx x, int code)
fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
   return;
 
+case 'S':
+  /* Like %L, but assume the second register is a VSX register.  This
+works on VSX registers and memory addresses.  */
+  if (REG_P (x))
+   {
+ int reg = REGNO (x);
+ if (!VSX_REGNO_P (reg) || (reg & 1) != 0)
+   output_operand_lossage ("invalid %%S value");
+ else
+   {
+ int vsx_reg = (FP_REGNO_P (reg)
+? reg - 32
+: reg - FIRST_ALTIVEC_REGNO + 32) + 1;
+
+#ifdef TARGET_REGNAMES  
+ if (TARGET_REGNAMES)
+   fprintf (file, "%%vs%d", vsx_reg);
+ else
+#endif
+   fprintf (file, "%d", vsx_reg);
+   }
+   }
+
+  else if (MEM_P (x))
+   {
+ machine_mode mode = GET_MODE (x);
+ /* Vectors and vector pairs can't have auto increment addreses.  */
+ if (GET_CODE (XEXP (x, 0)) == PRE_INC
+ || GET_CODE (XEXP (x, 0)) == PRE_DEC
+ || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+   output_operand_lossage ("invalid auto-increment %%S value");
+ else
+   output_address (mode, XEXP (adjust_address_nv (x, SImode,
+  UNITS_PER_WORD),
+ 0));
+
+ if (small_data_operand (x, GET_MODE (x)))
+   fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+reg_names[SMALL_DATA_REG]);
+   }
+
+  else
+   output_operand_lossage ("invalid %%S value");
+
+  return;
+
 case 't':
   /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
   if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
new file mode 100644
index 000..91725ef43ec
--- /dev/null
+++ b/gcc/config/rs6000/vector-pair.h
@@ -0,0 +1,430 @@
+/* PowerPC vector pair include file.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (al...@redhat.com).
+   Rewritten by Paolo Bonzini (bonz...@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* Provide support for vector pairs, even on systems that do not have native
+   support for loading and storing pairs of vectors.  */
+
+#ifndef _VECTOR_PAIR_H
+#define _VECTOR_PAIR_H

[gcc(refs/users/meissner/heads/work177-vpair)] Add support for vector pair unary and binary operations.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:ffd0ad5a0e7fe838b9137ebf7094e6cb8510c991

commit ffd0ad5a0e7fe838b9137ebf7094e6cb8510c991
Author: Michael Meissner 
Date:   Wed Sep 4 22:45:52 2024 -0400

Add support for vector pair unary and binary operations.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new
built-in functions for vector pair support.
* config/rs6000/rs6000-protos.h (enum vpair_split_unary): New
enumeration.
(vpair_split_unary): New declaration.
(vpair_split_binary): Likewise.
* config/rs6000/rs6000.cc (vpair_split_unary): New function to split
vector pair operations.
(vpair_split_binary): Likewise.
* config/rs6000/rs6000.md (toplevel): Include vector-pair.md.
* config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
* config/rs6000/vector-pair.md: New file.
* doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add
documentation for the new vector pair built-in functions.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-1.c: New test.
* gcc.target/powerpc/vector-pair-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def|  62 +
 gcc/config/rs6000/rs6000-protos.h|  12 ++
 gcc/config/rs6000/rs6000.cc  |  67 +
 gcc/config/rs6000/rs6000.md  |   1 +
 gcc/config/rs6000/t-rs6000   |   1 +
 gcc/config/rs6000/vector-pair.h  | 140 +++
 gcc/config/rs6000/vector-pair.md | 164 +++
 gcc/doc/extend.texi  |  51 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-1.c |  87 
 gcc/testsuite/gcc.target/powerpc/vector-pair-2.c |  86 
 10 files changed, 553 insertions(+), 118 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcf..cf22389542d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3933,3 +3933,65 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+;; Vector pair built-in functions with float elements
+  v256 __builtin_vpair_f32_abs (v256);
+VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_add (v256, v256);
+VPAIR_F32_ADD vpair_add_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_div (v256, v256);
+VPAIR_F32_DIV vpair_div_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_max (v256, v256);
+VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_min (v256, v256);
+VPAIR_F32_MIN vpair_smin_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_mul (v256, v256);
+VPAIR_F32_MUL vpair_mul_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_nabs (v256);
+VPAIR_F32_NABS vpair_nabs_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_neg (v256);
+VPAIR_F32_NEG vpair_neg_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_sqrt (v256);
+VPAIR_F32_SQRT vpair_sqrt_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_sub (v256, v256);
+VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
+
+;; Vector pair built-in functions with double elements
+  v256 __builtin_vpair_f64_abs (v256);
+VPAIR_F64_ABS vpair_abs_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_add (v256, v256);
+VPAIR_F64_ADD vpair_add_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_div (v256, v256);
+VPAIR_F64_DIV vpair_div_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_max (v256, v256);
+VPAIR_F64_MAX vpair_smax_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_min (v256, v256);
+VPAIR_F64_MIN vpair_smin_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_mul (v256, v256);
+VPAIR_F64_MUL vpair_mul_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_nabs (v256);
+VPAIR_F64_NABS vpair_nabs_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_neg (v256);
+VPAIR_F64_NEG vpair_neg_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_sqrt (v256);
+VPAIR_F64_SQRT vpair_sqrt_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_sub (v256, v256);
+VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index da658cd5ab2..7b8b3b0c237 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -161,6 +161,18 @@ extern bool rs6000_pcrel_p (void);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 extern void rs6000_output_addr_vec_elt (FILE *, int);
 
+/* If we are splitting a vector pair unary operator into two separate vector
+   operations, we need to generate a NEG if this is NABS.  */
+
+enum vpair_split_unary {
+  VPAIR_SPLIT_NORMAL,  /* No extra processing is needed.  */
+  VPAIR_SPLIT_NEGATE   /* Wrap operation with a NEG.  */
+};
+
+extern void vpair_split_unary (rtx [], machine_mode, enum rtx_code,
+  enum vpair_split

[gcc(refs/users/meissner/heads/work177-vpair)] Add vector pair init and splat.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:7f65d3e030ef489b057c8a8409a3aefc7cd62ede

commit 7f65d3e030ef489b057c8a8409a3aefc7cd62ede
Author: Michael Meissner 
Date:   Wed Sep 4 23:12:07 2024 -0400

Add vector pair init and splat.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
built-in function.
(__builtin_vpair_f32_splat): Likewise.
(__builtin_vpair_f64_splat): Likewise.
* config/rs6000/vector-pair.h: Update power10 splat patterns.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
(UNSPEC_VPAIR_SPLAT): Likewise.
(VPAIR_SPLAT_VMODE): New mode iterator.
(VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
(vpair_splat_name): Likewise.
(vpair_zero): New insn.
(vpair_splat_): New define_expand.
(vpair_splat__internal): New insns.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-5.c: New test.
* gcc.target/powerpc/vector-pair-6.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def|  10 +++
 gcc/config/rs6000/vector-pair.h  |  13 +--
 gcc/config/rs6000/vector-pair.md | 102 ++-
 gcc/doc/extend.texi  |   9 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-5.c |  54 
 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c |  56 +
 6 files changed, 232 insertions(+), 12 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 2bac0e58971..e0b1c744f7c 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3934,6 +3934,10 @@
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
 
+;; Vector pair built-in functions.
+  v256 __builtin_vpair_zero ();
+VPAIR_ZERO vpair_zero {mma}
+
 ;; Vector pair built-in functions with float elements
   v256 __builtin_vpair_f32_abs (v256);
 VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
@@ -3974,6 +3978,9 @@
   v256 __builtin_vpair_f32_nfms (v256, v256, v256);
 VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
 
+  v256 __builtin_vpair_f32_splat (float);
+VPAIR_F32_SPLAT vpair_splat_v8sf {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
 VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -4017,5 +4024,8 @@
   v256 __builtin_vpair_f64_nfms (v256, v256, v256);
 VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
 
+  v256 __builtin_vpair_f64_splat (double);
+VPAIR_F64_SPLAT vpair_splat_v4df {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
 VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
index e399e89e8e4..3c03e44f3f4 100644
--- a/gcc/config/rs6000/vector-pair.h
+++ b/gcc/config/rs6000/vector-pair.h
@@ -38,11 +38,7 @@ typedef __vector_pair vector_pair_t;
 #define VPAIR_FP_SECOND"S" /* Access 2nd VSX 
register.  */
 
 /* vector pair double operations on power10.  */
-#define vpair_f64_splat(R, A)  \
-  __asm__ ("xxpermdi %x0,%x1,%x1,0" "\n\t" \
-   "xxpermdi %" VPAIR_FP_SECOND "0,%x1,%x1,0"  \
-   : "=" VPAIR_FP_CONSTRAINT (*R)  \
-  : "wa" ((A)))
+#define vpair_f64_splat(R, A)  (*R) = __builtin_vpair_f64_splat (A)
 
 #define vpair_f64_neg(R,A) (*R) = __builtin_vpair_f64_neg (*A)
 #define vpair_f64_abs(R,A) (*R) = __builtin_vpair_f64_abs (*A)
@@ -63,12 +59,7 @@ typedef __vector_pair vector_pair_t;
 
 
 /* vector pair float operations on power10.  */
-#define vpair_f32_splat(R, A)  \
-  __asm__ ("xscvdpspn %x0,%x1" "\n\t"  \
-   "xxspltw %x0,%x0,0" "\n\t"  \
-   "xxlor %" VPAIR_FP_SECOND "0,%x0,%x0"   \
-   : "=" VPAIR_FP_CONSTRAINT (*R)  \
-  : "wa" (((float) (A
+#define vpair_f32_splat(R, A)  (*R) = __builtin_vpair_f32_splat (A)
 
 #define vpair_f32_neg(R,A) (*R) = __builtin_vpair_f32_neg (*A)
 #define vpair_f32_abs(R,A) (*R) = __builtin_vpair_f32_abs (*A)
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index fe8004b75d5..6fbc90cf528 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -39,7 +39,9 @@
UNSPEC_VPAIR_PLUS
UNSPEC_VPAIR_SMAX
UNSPEC_VPAIR_SMIN
-   UNSPEC_VPAIR_SQRT])
+   UNSPEC_VPAIR_SPLAT
+   UNSPEC_VPAIR_SQRT
+   UNSPEC_VPAIR_ZERO])
 
 ;; Vector pair element ID that defines the scaler element within the vector 
pair.
 (define_c_enum "vpair_element"
@@ -102,6 +104,104 @@
 ;; Map the scalar element ID into the appropriate insn type for divide.
 (define_int_attr vpair

[gcc(refs/users/meissner/heads/work177-vpair)] Add support for vector pair fma operations.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:bbb392f70a0774dc47e3cf31a8d4c74e3e7572e8

commit bbb392f70a0774dc47e3cf31a8d4c74e3e7572e8
Author: Michael Meissner 
Date:   Wed Sep 4 22:55:54 2024 -0400

Add support for vector pair fma operations.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
built-in.
(__builtin_vpair_f32_fms): Likewise.
(__builtin_vpair_f32_nfma): Likewise.
(__builtin_vpair_f32_nfms): Likewise.
(__builtin_vpair_f64_fma): Likewise.
(__builtin_vpair_f64_fms): Likewise.
(__builtin_vpair_f64_nfma): Likewise.
* config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
enumeration.
(vpair_split_fma): New declaration.
* config/rs6000/rs6000.cc (vpair_split_fma): New function to split
vector pair FMA operations.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
(vpair_stdname): Add UNSPEC_VPAIR_FMA.
(VPAIR_OP): Likewise.
(vpair_fma_4): New insns.
(vpair_fms_4): Likewise.
(vpair_nfma_4): Likewise.
(vpair_nfms_4): Likewise.
* config/rs6000/vector-pair.h: Update to use the power10 vector pair
built-in functions.
* doc/extend.texi (PowerPC Vector Pair Built-in Functions): 
Document new
vector pair fma built-in functions.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-3.c: New test.
* gcc.target/powerpc/vector-pair-4.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def| 24 ++
 gcc/config/rs6000/rs6000-protos.h| 13 
 gcc/config/rs6000/rs6000.cc  | 71 ++
 gcc/config/rs6000/vector-pair.h  | 57 +++---
 gcc/config/rs6000/vector-pair.md | 96 
 gcc/doc/extend.texi  | 25 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-3.c | 57 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-4.c | 57 ++
 8 files changed, 354 insertions(+), 46 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index cf22389542d..2bac0e58971 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3944,6 +3944,12 @@
   v256 __builtin_vpair_f32_div (v256, v256);
 VPAIR_F32_DIV vpair_div_v8sf3 {mma}
 
+  v256 __builtin_vpair_f32_fma (v256, v256, v256);
+VPAIR_F32_FMA vpair_fma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_fms (v256, v256, v256);
+VPAIR_F32_FMS vpair_fms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_max (v256, v256);
 VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
 
@@ -3962,6 +3968,12 @@
   v256 __builtin_vpair_f32_sqrt (v256);
 VPAIR_F32_SQRT vpair_sqrt_v8sf2 {mma}
 
+  v256 __builtin_vpair_f32_nfma (v256, v256, v256);
+VPAIR_F32_NFMA vpair_nfma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_nfms (v256, v256, v256);
+VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
 VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -3975,6 +3987,12 @@
   v256 __builtin_vpair_f64_div (v256, v256);
 VPAIR_F64_DIV vpair_div_v4df3 {mma}
 
+  v256 __builtin_vpair_f64_fma (v256, v256, v256);
+VPAIR_F64_FMA vpair_fma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_fms (v256, v256, v256);
+VPAIR_F64_FMS vpair_fms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_max (v256, v256);
 VPAIR_F64_MAX vpair_smax_v4df3 {mma}
 
@@ -3993,5 +4011,11 @@
   v256 __builtin_vpair_f64_sqrt (v256);
 VPAIR_F64_SQRT vpair_sqrt_v4df2 {mma}
 
+  v256 __builtin_vpair_f64_nfma (v256, v256, v256);
+VPAIR_F64_NFMA vpair_nfma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_nfms (v256, v256, v256);
+VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
 VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 7b8b3b0c237..bab5fb437c2 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -173,6 +173,19 @@ extern void vpair_split_unary (rtx [], machine_mode, enum 
rtx_code,
   enum vpair_split_unary);
 extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code);
 
+/* When we are splitting a vector pair FMA operation into two vector 
operations, we
+   may need to modify the code generated.  This enumeration encodes the
+   different choices.  */
+
+enum vpair_split_fma {
+  VPAIR_SPLIT_FMA, /* Fused multiply-add.  */
+  VPAIR_SPLIT_FMS, /* Fused multiply-subtract.  */
+  VPAIR_SPLIT_NFMA,/* Fused negate multiply-add.  */
+  VPAIR_SPLIT_NFMS /* Fused negate multiply-subtract.  */
+};
+
+extern void vpair_split_fma (rtx [], ma

[gcc(refs/users/meissner/heads/work177-vpair)] Add vector pair optimizations.

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:4913a857cdd00d6896b06a99b10ad7961681946a

commit 4913a857cdd00d6896b06a99b10ad7961681946a
Author: Michael Meissner 
Date:   Wed Sep 4 23:24:51 2024 -0400

Add vector pair optimizations.

2024-09-04  Michael Meissner  

gcc/

* config/rs6000/vector-pair.md (vpair_add_neg_3): 
New
combiner insn to convert vector plus/neg into a minus operation.
(vpair_fma__merge): Optimize multiply, 
add/subtract, and
negation into fma operations if the user specifies to create fmas.
(vpair_fma__merge): Likewise.
(vpair_fma__merge2): Likewise.
(vpair_nfma__merge): Likewise.
(vpair_nfms__merge): Likewise.
(vpair_nfms__merge2): Likewise.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-7.c: New test.
* gcc.target/powerpc/vector-pair-8.c: Likewise.
* gcc.target/powerpc/vector-pair-9.c: Likewise.
* gcc.target/powerpc/vector-pair-10.c: Likewise.
* gcc.target/powerpc/vector-pair-11.c: Likewise.
* gcc.target/powerpc/vector-pair-12xs.c: Likewise.

Diff:
---
 gcc/config/rs6000/vector-pair.md  | 224 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-10.c |  61 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-11.c |  65 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-12.c |  65 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-7.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-8.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-9.c  |  61 ++
 7 files changed, 512 insertions(+)

diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 6fbc90cf528..01d32e460f6 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -265,6 +265,31 @@
(set (attr "type") (if_then_else (match_test " == DIV")
(const_string "")
(const_string "")))])
+
+;; Optimize vector pair add of a negative value into a subtract.
+(define_insn_and_split "*vpair_add_neg_3"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+   (unspec:OO
+[(match_operand:OO 1 "vsx_register_operand" "wa")
+ (unspec:OO
+  [(match_operand:OO 2 "vsx_register_operand" "wa")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+VPAIR_FP_BINARY))]
+  "TARGET_MMA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_MINUS))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "")])
 
 ;; Vector pair fused-multiply (FMA) operations.  The last argument in the
 ;; UNSPEC is a CONST_INT which identifies what the scalar element is.
@@ -358,3 +383,202 @@
 }
   [(set_attr "length" "8")
(set_attr "type" "")])
+
+;; Optimize vector pair multiply and vector pair add into vector pair fma,
+;; providing the compiler would do this optimization for scalar and vectors.
+;; Unlike most of the define_insn_and_splits, this can be done before register
+;; allocation.
+(define_insn_and_split "*vpair_fma__merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+   (unspec:OO
+[(unspec:OO
+  [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+   (match_operand:OO 2 "vsx_register_operand" "wa,0")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_PLUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "")])
+
+;; Merge multiply and subtract.
+(define_insn_and_split "*vpair_fma__merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+   (unspec:OO
+[(unspec:OO
+  [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+   (match_operand:OO 2 "vsx_register_operand" "wa,0")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_MINUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+  [(match_dup 3)
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" ""

[gcc(refs/users/meissner/heads/work177-vpair)] Update ChangeLog.*

2024-09-04 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:f78233d1130358c2faf0c2c8f9a7f0818a548bd7

commit f78233d1130358c2faf0c2c8f9a7f0818a548bd7
Author: Michael Meissner 
Date:   Wed Sep 4 23:28:28 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 117 +++-
 1 file changed, 116 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index 958a2af2e81..7717495e866 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,6 +1,121 @@
+ Branch work177-vpair, patch #403 
+
+Add vector pair init and splat.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
+   built-in function.
+   (__builtin_vpair_f32_splat): Likewise.
+   (__builtin_vpair_f64_splat): Likewise.
+   * config/rs6000/vector-pair.h: Update power10 splat patterns.
+   * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
+   (UNSPEC_VPAIR_SPLAT): Likewise.
+   (VPAIR_SPLAT_VMODE): New mode iterator.
+   (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
+   (vpair_splat_name): Likewise.
+   (vpair_zero): New insn.
+   (vpair_splat_): New define_expand.
+   (vpair_splat__internal): New insns.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-5.c: New test.
+   * gcc.target/powerpc/vector-pair-6.c: Likewise.
+
+ Branch work177-vpair, patch #402 
+
+Add support for vector pair fma operations.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
+   built-in.
+   (__builtin_vpair_f32_fms): Likewise.
+   (__builtin_vpair_f32_nfma): Likewise.
+   (__builtin_vpair_f32_nfms): Likewise.
+   (__builtin_vpair_f64_fma): Likewise.
+   (__builtin_vpair_f64_fms): Likewise.
+   (__builtin_vpair_f64_nfma): Likewise.
+   * config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
+   enumeration.
+   (vpair_split_fma): New declaration.
+   * config/rs6000/rs6000.cc (vpair_split_fma): New function to split
+   vector pair FMA operations.
+   * config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
+   (vpair_stdname): Add UNSPEC_VPAIR_FMA.
+   (VPAIR_OP): Likewise.
+   (vpair_fma_4): New insns.
+   (vpair_fms_4): Likewise.
+   (vpair_nfma_4): Likewise.
+   (vpair_nfms_4): Likewise.
+   * config/rs6000/vector-pair.h: Update to use the power10 vector pair
+   built-in functions.
+   * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Document new
+   vector pair fma built-in functions.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-3.c: New test.
+   * gcc.target/powerpc/vector-pair-4.c: Likewise.
+
+ Branch work177-vpair, patch #401 
+
+Add support for vector pair unary and binary operations.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new
+   built-in functions for vector pair support.
+   * config/rs6000/rs6000-protos.h (enum vpair_split_unary): New
+   enumeration.
+   (vpair_split_unary): New declaration.
+   (vpair_split_binary): Likewise.
+   * config/rs6000/rs6000.cc (vpair_split_unary): New function to split
+   vector pair operations.
+   (vpair_split_binary): Likewise.
+   * config/rs6000/rs6000.md (toplevel): Include vector-pair.md.
+   * config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
+   * config/rs6000/vector-pair.md: New file.
+   * config/rs6000/vector-pair.h: Update power10 functions to call the new
+   vector pair built-in functions.
+   * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add
+   documentation for the new vector pair built-in functions.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-1.c: New test.
+   * gcc.target/powerpc/vector-pair-2.c: Likewise.
+
+ Branch work177-vpair, patch #400 
+
+Initial vector-pair.h support
+
+2024-09-03  Michael Meissner  
+
+gcc/
+
+   * config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers.
+   * config/rs6000/rs6000.cc (print_operand): Add %S output modifier.
+   * config/rs6000/vector-pair.h: New file.
+
  Branch work177-vpair, baseline 
 
+Add ChangeLog.vpair and update REVISION.
+
+2024-09-03  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.vpair: New file for branch.
+   * REVISION: Update.
+
 2024-09-03   Michael Meissner  
 
Clone branch
-

[gcc r15-3465] [PATCH] RISC-V: Make the setCC/REE tests robust to instruction selection

2024-09-04 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:de3ca363811a3974e4398ecdb1db2274efd61a1c

commit r15-3465-gde3ca363811a3974e4398ecdb1db2274efd61a1c
Author: Palmer Dabbelt 
Date:   Wed Sep 4 21:34:31 2024 -0600

[PATCH] RISC-V: Make the setCC/REE tests robust to instruction selection

These tests were checking that the output of the setCC instruction was bit
flipped, but it looks like they're really designed to test that
redundant sign extension elimination fires on conditionals from function
inputs.  Jeff just posed a patch to clean this code up with trips up on
the arbitrary xori/snez instruction selection decision changing, so
let's just robustify the tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sge.c: Adjust regex to match the input.
* gcc.target/riscv/sgeu.c: Likewise.
* gcc.target/riscv/sle.c: Likewise.
* gcc.target/riscv/sleu.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/riscv/sge.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/sgeu.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sle.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/sleu.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/sge.c 
b/gcc/testsuite/gcc.target/riscv/sge.c
index 5f7e7ae82db..70f934c4d0f 100644
--- a/gcc/testsuite/gcc.target/riscv/sge.c
+++ b/gcc/testsuite/gcc.target/riscv/sge.c
@@ -8,5 +8,5 @@ sge (int x, int y)
   return x >= y;
 }
 
-/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */
+/* { dg-final { scan-assembler "slt\\sa0,a0,a1" } } */
 /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sgeu.c 
b/gcc/testsuite/gcc.target/riscv/sgeu.c
index 234b9aa52bd..0ff21cfe5e0 100644
--- a/gcc/testsuite/gcc.target/riscv/sgeu.c
+++ b/gcc/testsuite/gcc.target/riscv/sgeu.c
@@ -8,5 +8,5 @@ sgeu (unsigned int x, unsigned int y)
   return x >= y;
 }
 
-/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */
+/* { dg-final { scan-assembler "sltu\\sa0,a0,a1" } } */
 /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sle.c 
b/gcc/testsuite/gcc.target/riscv/sle.c
index 3259c191598..770840d0564 100644
--- a/gcc/testsuite/gcc.target/riscv/sle.c
+++ b/gcc/testsuite/gcc.target/riscv/sle.c
@@ -8,5 +8,5 @@ sle (int x, int y)
   return x <= y;
 }
 
-/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */
+/* { dg-final { scan-assembler "sgt\\sa0,a0,a1" } } */
 /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sleu.c 
b/gcc/testsuite/gcc.target/riscv/sleu.c
index 301b8c32eb7..ae00ccc2067 100644
--- a/gcc/testsuite/gcc.target/riscv/sleu.c
+++ b/gcc/testsuite/gcc.target/riscv/sleu.c
@@ -8,5 +8,5 @@ sleu (unsigned int x, unsigned int y)
   return x <= y;
 }
 
-/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */
+/* { dg-final { scan-assembler "sgtu\\sa0,a0,a1"} } */
 /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */

[gcc r15-3466] Handle 'NUM' in 'PUSH_INSERT_PASSES_WITHIN'

2024-09-04 Thread Thomas Schwinge via Gcc-cvs

https://gcc.gnu.org/g:22af50a48d2b4371ee6d805d95f56212fbe611b1

commit r15-3466-g22af50a48d2b4371ee6d805d95f56212fbe611b1
Author: Thomas Schwinge 
Date:   Fri Jun 28 12:10:12 2024 +0200

Handle 'NUM' in 'PUSH_INSERT_PASSES_WITHIN'

..., such that also for repeated 'NEXT_PASS', 'PUSH_INSERT_PASSES_WITHIN' 
for a
given 'PASS', the 'PUSH_INSERT_PASSES_WITHIN' applies to the preceeding
'NEXT_PASS', and not unconditionally applies to the first 'NEXT_PASS'.

gcc/
* gen-pass-instances.awk: Handle 'PUSH_INSERT_PASSES_WITHIN'.
* pass_manager.h (PUSH_INSERT_PASSES_WITHIN): Adjust.
* passes.cc (PUSH_INSERT_PASSES_WITHIN): Likewise.

Diff:
---
 gcc/gen-pass-instances.awk | 28 +---
 gcc/pass_manager.h |  2 +-
 gcc/passes.cc  |  6 +++---
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk
index 449889663f7..871ac0cdb52 100644
--- a/gcc/gen-pass-instances.awk
+++ b/gcc/gen-pass-instances.awk
@@ -16,7 +16,7 @@
 
 # This Awk script takes passes.def and writes pass-instances.def,
 # counting the instances of each kind of pass, adding an instance number
-# to everywhere that NEXT_PASS is used.
+# to everywhere that NEXT_PASS or PUSH_INSERT_PASSES_WITHIN are used.
 # Also handle INSERT_PASS_AFTER, INSERT_PASS_BEFORE and REPLACE_PASS
 # directives.
 #
@@ -222,9 +222,31 @@ END {
  if (with_arg)
printf ",%s", with_arg;
  printf ")%s\n", postfix;
+
+ continue;
}
-  else
-   print lines[i];
+
+  ret = parse_line(lines[i], "PUSH_INSERT_PASSES_WITHIN");
+  if (ret)
+   {
+ pass_name = args[1];
+
+ pass_num = pass_final_counts[pass_name];
+ if (!pass_num)
+   {
+ print "ERROR: Can't locate instance of the pass mentioned in " 
pass_name;
+ exit 1;
+   }
+
+ printf "%s", prefix;
+ printf "PUSH_INSERT_PASSES_WITHIN";
+ printf " (%s, %s", pass_name, pass_num;
+ printf ")%s\n", postfix;
+
+ continue;
+   }
+
+  print lines[i];
 }
 }
 
diff --git a/gcc/pass_manager.h b/gcc/pass_manager.h
index be324d5dff7..edd775e9a9c 100644
--- a/gcc/pass_manager.h
+++ b/gcc/pass_manager.h
@@ -126,7 +126,7 @@ private:
 opt_pass *pass_copy_prop_8;  */
 
 #define INSERT_PASSES_AFTER(PASS)
-#define PUSH_INSERT_PASSES_WITHIN(PASS)
+#define PUSH_INSERT_PASSES_WITHIN(PASS, NUM)
 #define POP_INSERT_PASSES()
 #define NEXT_PASS(PASS, NUM) opt_pass *PASS ## _ ## NUM
 #define NEXT_PASS_WITH_ARG(PASS, NUM, ARG) NEXT_PASS (PASS, NUM)
diff --git a/gcc/passes.cc b/gcc/passes.cc
index e2a07ebedf5..775c3e46302 100644
--- a/gcc/passes.cc
+++ b/gcc/passes.cc
@@ -1585,7 +1585,7 @@ pass_manager::pass_manager (context *ctxt)
 
   /* Zero-initialize pass members.  */
 #define INSERT_PASSES_AFTER(PASS)
-#define PUSH_INSERT_PASSES_WITHIN(PASS)
+#define PUSH_INSERT_PASSES_WITHIN(PASS, NUM)
 #define POP_INSERT_PASSES()
 #define NEXT_PASS(PASS, NUM) PASS ## _ ## NUM = NULL
 #define NEXT_PASS_WITH_ARG(PASS, NUM, ARG) NEXT_PASS (PASS, NUM)
@@ -1615,9 +1615,9 @@ pass_manager::pass_manager (context *ctxt)
 *p = NULL; \
   }
 
-#define PUSH_INSERT_PASSES_WITHIN(PASS) \
+#define PUSH_INSERT_PASSES_WITHIN(PASS, NUM) \
   { \
-opt_pass **p = &(PASS ## _1)->sub;
+opt_pass **p = &(PASS ## _ ## NUM)->sub;
 
 #define POP_INSERT_PASSES() \
   }

84 matches

Mail list logo