[gcc r15-5012] Check LOOP_VINFO_PEELING_FOR_GAPS on epilog is supported

2024-11-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:76048bd0693e30a5abc67aa6dcce9f4973ea208e

commit r15-5012-g76048bd0693e30a5abc67aa6dcce9f4973ea208e
Author: Richard Biener 
Date:   Mon Nov 4 13:03:33 2024 +0100

Check LOOP_VINFO_PEELING_FOR_GAPS on epilog is supported

We need to check that an epilogue doesn't require 
LOOP_VINFO_PEELING_FOR_GAPS
in case the main loop didn't (the other way around is OK), the
computation whether the epilog is executed or not gets our of sync
otherwise.

* tree-vect-loop.cc (vect_analyze_loop_2): Move
vect_analyze_loop_costing after check whether we can do
peeling.  Add check on LOOP_VINFO_PEELING_FOR_GAPS for
epilogues.

Diff:
---
 gcc/tree-vect-loop.cc | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index e6d241482ce3..e91549a643b9 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3143,17 +3143,15 @@ start_over:
   " epilogue loop.\n");
 }
 
-  /* Check the costings of the loop make vectorizing worthwhile.  */
-  res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor);
-  if (res < 0)
-{
-  ok = opt_result::failure_at (vect_location,
-  "Loop costings may not be worthwhile.\n");
-  goto again;
-}
-  if (!res)
+  /* If the epilogue needs peeling for gaps but the main loop doesn't give
+ up on the epilogue.  */
+  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+  && LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+  && (LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo)
+ != LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)))
 return opt_result::failure_at (vect_location,
-  "Loop costings not worthwhile.\n");
+  "Epilogue loop requires peeling for gaps "
+  "but main loop does not.\n");
 
   /* If an epilogue loop is required make sure we can create one.  */
   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
@@ -3174,6 +3172,18 @@ start_over:
 }
 }
 
+  /* Check the costings of the loop make vectorizing worthwhile.  */
+  res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor);
+  if (res < 0)
+{
+  ok = opt_result::failure_at (vect_location,
+  "Loop costings may not be worthwhile.\n");
+  goto again;
+}
+  if (!res)
+return opt_result::failure_at (vect_location,
+  "Loop costings not worthwhile.\n");
+
   /* During peeling, we need to check if number of loop iterations is
  enough for both peeled prolog loop and vector loop.  This check
  can be merged along with threshold check of loop versioning, so


[gcc r15-5020] VN: Factor out inserting predicates for conditional

2024-11-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:b38f8294e4f29132c8cf4c5d3f3beb64bb0c499d

commit r15-5020-gb38f8294e4f29132c8cf4c5d3f3beb64bb0c499d
Author: Andrew Pinski 
Date:   Fri Nov 1 19:28:19 2024 -0700

VN: Factor out inserting predicates for conditional

To make it easier to add more predicates in some cases,
factor out the code. Plus it makes the code slightly more
readable since it is not indented as much.

Bootstrapped and tested on x86_64.

gcc/ChangeLog:

* tree-ssa-sccvn.cc (insert_predicates_for_cond): New function, 
factored out from ...
(process_bb): Here.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-sccvn.cc | 70 +++
 1 file changed, 37 insertions(+), 33 deletions(-)

diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 1c8bc884f1f2..a11bf9686703 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -7895,6 +7895,42 @@ insert_related_predicates_on_edge (enum tree_code code, 
tree *ops, edge pred_e)
 }
 }
 
+/* Insert on the TRUE_E true and FALSE_E false predicates
+   derived from LHS CODE RHS.  */
+static void
+insert_predicates_for_cond (tree_code code, tree lhs, tree rhs,
+   edge true_e, edge false_e)
+{
+  tree_code icode = invert_tree_comparison (code, HONOR_NANS (lhs));
+  tree ops[2];
+  ops[0] = lhs;
+  ops[1] = rhs;
+  if (true_e)
+vn_nary_op_insert_pieces_predicated (2, code, boolean_type_node, ops,
+boolean_true_node, 0, true_e);
+  if (false_e)
+vn_nary_op_insert_pieces_predicated (2, code, boolean_type_node, ops,
+boolean_false_node, 0, false_e);
+  if (icode != ERROR_MARK)
+{
+  if (true_e)
+   vn_nary_op_insert_pieces_predicated (2, icode, boolean_type_node, ops,
+boolean_false_node, 0, true_e);
+  if (false_e)
+   vn_nary_op_insert_pieces_predicated (2, icode, boolean_type_node, ops,
+boolean_true_node, 0, false_e);
+}
+  /* Relax for non-integers, inverted condition handled
+ above.  */
+  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+{
+  if (true_e)
+   insert_related_predicates_on_edge (code, ops, true_e);
+  if (false_e)
+   insert_related_predicates_on_edge (icode, ops, false_e);
+  }
+}
+
 /* Main stmt worker for RPO VN, process BB.  */
 
 static unsigned
@@ -8098,45 +8134,13 @@ process_bb (rpo_elim &avail, basic_block bb,
edge true_e, false_e;
extract_true_false_edges_from_block (bb, &true_e, &false_e);
enum tree_code code = gimple_cond_code (last);
-   enum tree_code icode
- = invert_tree_comparison (code, HONOR_NANS (lhs));
-   tree ops[2];
-   ops[0] = lhs;
-   ops[1] = rhs;
if ((do_region && bitmap_bit_p (exit_bbs, true_e->dest->index))
|| !can_track_predicate_on_edge (true_e))
  true_e = NULL;
if ((do_region && bitmap_bit_p (exit_bbs, false_e->dest->index))
|| !can_track_predicate_on_edge (false_e))
  false_e = NULL;
-   if (true_e)
- vn_nary_op_insert_pieces_predicated
-   (2, code, boolean_type_node, ops,
-boolean_true_node, 0, true_e);
-   if (false_e)
- vn_nary_op_insert_pieces_predicated
-   (2, code, boolean_type_node, ops,
-boolean_false_node, 0, false_e);
-   if (icode != ERROR_MARK)
- {
-   if (true_e)
- vn_nary_op_insert_pieces_predicated
-   (2, icode, boolean_type_node, ops,
-boolean_false_node, 0, true_e);
-   if (false_e)
- vn_nary_op_insert_pieces_predicated
-   (2, icode, boolean_type_node, ops,
-boolean_true_node, 0, false_e);
- }
-   /* Relax for non-integers, inverted condition handled
-  above.  */
-   if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
- {
-   if (true_e)
- insert_related_predicates_on_edge (code, ops, true_e);
-   if (false_e)
- insert_related_predicates_on_edge (icode, ops, false_e);
- }
+   insert_predicates_for_cond (code, lhs, rhs, true_e, false_e);
  }
break;
  }


[gcc r15-5014] Add LOOP_VINFO_MAIN_LOOP_INFO

2024-11-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:42d99f63cfccabe1d19177993abf4f1219d6f967

commit r15-5014-g42d99f63cfccabe1d19177993abf4f1219d6f967
Author: Richard Biener 
Date:   Mon Nov 4 12:58:41 2024 +0100

Add LOOP_VINFO_MAIN_LOOP_INFO

The following introduces LOOP_VINFO_MAIN_LOOP_INFO alongside
LOOP_VINFO_ORIG_LOOP_INFO so one can have both access to the main
vectorized loop info and the preceeding vectorized epilogue.
This is critical for correctness as we need to disallow never
executed epilogues by costing in vect_analyze_loop_costing as
we assume those do not exist when deciding to add a skip-vector
edge during peeling.  The patch also changes how multiple vector
epilogues are handled - instead of the epilogue_vinfos array in
the main loop info we now record the single epilogue_vinfo there
and further epilogues in the epilogue_vinfo member of the
epilogue info.  This simplifies code.

* tree-vectorizer.h (_loop_vec_info::main_loop_info): New.
(LOOP_VINFO_MAIN_LOOP_INFO): Likewise.
(_loop_vec_info::epilogue_vinfo): Change from epilogue_vinfos
from array to single element.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
main_loop_info and epilogue_vinfo.  Remove epilogue_vinfos
allocation.
(_loop_vec_info::~_loop_vec_info): Do not release epilogue_vinfos.
(vect_create_loop_vinfo): Rename parameter, set
LOOP_VINFO_MAIN_LOOP_INFO.
(vect_analyze_loop_1): Rename parameter.
(vect_analyze_loop_costing): Properly distinguish between
the main vector loop and the preceeding epilogue.
(vect_analyze_loop): Change for epilogue_vinfos no longer
being a vector.
* tree-vect-loop-manip.cc (vect_do_peeling): Simplify and
thereby handle a vector epilogue of a vector epilogue.

Diff:
---
 gcc/tree-vect-loop-manip.cc | 22 ++-
 gcc/tree-vect-loop.cc   | 67 -
 gcc/tree-vectorizer.h   | 12 ++--
 3 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 5bbeeddd8546..c8dc71532985 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3100,12 +3100,12 @@ vect_get_main_loop_result (loop_vec_info loop_vinfo, 
tree main_loop_value,
The analysis resulting in this epilogue loop's loop_vec_info was performed
in the same vect_analyze_loop call as the main loop's.  At that time
vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
-   vectorization factors than the main loop.  This list is stored in the main
-   loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide 
to
-   vectorize the epilogue loop for a lower vectorization factor,  the
-   loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
-   updated and linked to the epilogue loop.  This is later used to vectorize
-   the epilogue.  The reason the loop_vec_info needs updating is that it was
+   vectorization factors than the main loop.  This list is chained in the
+   loop's loop_vec_info in the 'epilogue_vinfo' member.  When we decide to
+   vectorize the epilogue loop for a lower vectorization factor, the
+   loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
+   This is later used to vectorize the epilogue.
+   The reason the loop_vec_info needs updating is that it was
constructed based on the original main loop, and the epilogue loop is a
copy of this loop, so all links pointing to statements in the original loop
need updating.  Furthermore, these loop_vec_infos share the
@@ -3128,7 +3128,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   profile_probability prob_prolog, prob_vector, prob_epilog;
   int estimated_vf;
   int prolog_peeling = 0;
-  bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
+  bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
   /* We currently do not support prolog peeling if the target alignment is not
  known at compile time.  'vect_gen_prolog_loop_niters' depends on the
  target alignment being constant.  */
@@ -3255,13 +3255,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   else
 niters_prolog = build_int_cst (type, 0);
 
-  loop_vec_info epilogue_vinfo = NULL;
-  if (vect_epilogues)
-{
-  epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
-  loop_vinfo->epilogue_vinfos.ordered_remove (0);
-}
-
+  loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
   tree niters_vector_mult_vf = NULL_TREE;
   /* Saving NITERs before the loop, as this may be changed by prologue.  */
   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 6059ce031d14..f4f16fc07a2d

[gcc r15-5025] btf: check hash maps are non-null before emptying

2024-11-07 Thread David Faust via Gcc-cvs
https://gcc.gnu.org/g:6571e8f863736b7705f59c9ab0f17b7c4fdbcf92

commit r15-5025-g6571e8f863736b7705f59c9ab0f17b7c4fdbcf92
Author: David Faust 
Date:   Thu Nov 7 09:19:51 2024 -0800

btf: check hash maps are non-null before emptying

These maps will always be non-null in btf_finalize under normal
circumstances, but be safe and verify that before trying to empty them.

gcc/
* btfout.cc (btf_finalize): Check that hash maps are non-null before
emptying them.

Diff:
---
 gcc/btfout.cc | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 083ca48d6279..4a6b5453e08e 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -1661,13 +1661,19 @@ btf_finalize (void)
   datasecs.release ();
 
   funcs = NULL;
-  func_map->empty ();
-  func_map = NULL;
+  if (func_map)
+{
+  func_map->empty ();
+  func_map = NULL;
+}
 
   if (debug_prune_btf)
 {
-  btf_used_types->empty ();
-  btf_used_types = NULL;
+  if (btf_used_types)
+   {
+ btf_used_types->empty ();
+ btf_used_types = NULL;
+   }
 
   fixups.release ();
   forwards = NULL;


[gcc r15-5024] ifcombine: For short circuit case, allow 2 convert defining statements [PR85605]

2024-11-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:2a2e6784074e1f7b679bc09b1a66982bf60645a5

commit r15-5024-g2a2e6784074e1f7b679bc09b1a66982bf60645a5
Author: Andrew Pinski 
Date:   Mon Oct 28 16:40:34 2024 -0700

ifcombine: For short circuit case, allow 2 convert defining statements 
[PR85605]

r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
into using either AND or OR. But it only allowed the inner condition
basic block having the conditional only. This changes to allow up to 2 
defining
statements as long as they are just integer to integer conversions for
either the lhs or rhs of the conditional.

This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more 
than before.

Boootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/85605

gcc/ChangeLog:

* tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New 
function.
(ifcombine_ifandif): Use can_combine_bbs_with_short_circuit
instead of checking if iterator is one before the last statement.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-9.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C   | 27 
 .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++
 .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++
 .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-9.c | 17 ++
 gcc/tree-ssa-ifcombine.cc  | 37 --
 5 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
new file mode 100644
index ..282cec8c6287
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param 
logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+#include 
+
+template
+inline bool cmp(T a, T2 b) {
+  return a<0 ? true : T2(a) < b;
+}
+
+template
+inline bool cmp2(T a, T2 b) {
+  return (a<0) | (T2(a) < b);
+}
+
+bool f(int a, int b) {
+return cmp(int64_t(a), unsigned(b));
+}
+
+bool f2(int a, int b) {
+return cmp2(int64_t(a), unsigned(b));
+}
+
+
+/* Both of these functions should be optimized to the same, and have an | in 
them. */
+/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
new file mode 100644
index ..1bdbb9358b46
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param 
logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed in 
the
+   inner bb which should be able to move too. */
+
+int t (int a, unsigned b)
+{
+  if (a > 0)
+  {
+signed t = b;
+if (t > 0)
+  return 0;
+  }
+  return 1;
+}
+/* { dg-final { scan-tree-dump "\&" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
new file mode 100644
index ..8d74b4932c5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param 
logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed in 
the
+   inner bb which should be able to move too. */
+
+int t (int a, unsigned b)
+{
+  if (a > 0)
+goto L1;
+  signed t = b;
+  if (t > 0)
+goto L1;
+  return 0;
+L1:
+  return 1;
+}
+/* { dg-final { scan-tree-dump "\|" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-9.c
new file mode 100644
index ..4e8350fad411
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-9.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param 
logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+/* Like ssa-ifcombine-ccmp-1.c but with conversion from short to int in the
+   inner bb which should be able to move too. */
+
+int t (int a, short b, int c)
+{
+  if (a > 0)
+  {
+if (c == b)
+  return 0;
+  }
+  return 1;
+}
+/* { dg-final { scan-tree-dump "\&" "optimized" } } */
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index b

[gcc r15-5021] VN: Handle `(a | b) !=/== 0` for predicates [PR117414]

2024-11-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:578002846620ed04192a4832e9f20b5c32816153

commit r15-5021-g578002846620ed04192a4832e9f20b5c32816153
Author: Andrew Pinski 
Date:   Fri Nov 1 20:06:30 2024 -0700

VN: Handle `(a | b) !=/== 0` for predicates [PR117414]

For `(a | b) == 0`, we can "assert" on the true edge that
both `a == 0` and `b == 0` but nothing on the false edge.
For `(a | b) != 0`, we can "assert" on the false edge that
both `a == 0` and `b == 0` but nothing on the true edge.
This adds that predicate and allows us to optimize f0, f1,
and f2 in fre-predicated-[12].c.

Changes since v1:
* v2: Use vn_valueize. Also canonicalize the comparison
  at the begining of insert_predicates_for_cond for
  constants to be on the rhs. Return early for
  non-ssa names on the lhs (after canonicalization).

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/117414

gcc/ChangeLog:

* tree-ssa-sccvn.cc (insert_predicates_for_cond): Canonicalize the 
comparison.
Don't insert anything if lhs is not a SSA_NAME. Handle `(a | b) 
!=/== 0`.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/fre-predicated-1.c: New test.
* gcc.dg/tree-ssa/fre-predicated-2.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-1.c | 53 
 gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-2.c | 27 
 gcc/tree-ssa-sccvn.cc| 36 
 3 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-1.c
new file mode 100644
index ..d56952f5f246
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-1.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR tree-optimization/117414 */
+
+/* Fre1 should figure out that `*aaa != 0`
+   For f0, f1, and f2. */
+
+
+void foo();
+int f0(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  int c = b != 0;
+  int d = t !=  0;
+  if (d | c)
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (*aaa) foo();
+  }
+  return 0;
+}
+
+int f1(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  if (b != 0 || t != 0)
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (*aaa) foo();
+  }
+  return 0;
+}
+
+
+int f2(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  if (b != 0)
+return 0;
+  if (t != 0)
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (*aaa) foo();
+  }
+   return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-2.c
new file mode 100644
index ..0123a5b54f73
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR tree-optimization/117414 */
+
+/* Fre1 should figure out that `*aaa != 0`
+   For f0, f1, and f2. */
+
+
+void foo();
+int f0(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  int d = b | t;
+  if (d == 0)
+;
+  else
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (*aaa) foo();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index a11bf9686703..c60ba6d7 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -7901,6 +7901,21 @@ static void
 insert_predicates_for_cond (tree_code code, tree lhs, tree rhs,
edge true_e, edge false_e)
 {
+  /* If both edges are null, then there is nothing to be done. */
+  if (!true_e && !false_e)
+return;
+
+  /* Canonicalize the comparison so the rhs are constants.  */
+  if (CONSTANT_CLASS_P (lhs))
+{
+  std::swap (lhs, rhs);
+  code = swap_tree_comparison (code);
+}
+
+  /* If the lhs is not a ssa name, don't record anything. */
+  if (TREE_CODE (lhs) != SSA_NAME)
+return;
+
   tree_code icode = invert_tree_comparison (code, HONOR_NANS (lhs));
   tree ops[2];
   ops[0] = lhs;
@@ -7929,6 +7944,27 @@ insert_predicates_for_cond (tree_code code, tree lhs, 
tree rhs,
   if (false_e)
insert_related_predicates_on_edge (icode, ops, false_e);
   }
+  if (integer_zerop (rhs)
+  && (code == NE_EXPR || code == EQ_EXPR))
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (lhs);
+  /* (a | b) == 0 ->
+   on true edge assert: a == 0 & b == 0. */
+  /* (a | b) != 0 ->
+   on false edge assert: a == 0 & b == 0. */
+  if (is_gimple_assign (def_stmt)
+ && gimple_assign_rhs_code (def_stmt) == BIT_IOR_EXPR)
+   {
+ edge e = code == EQ_EXPR ? t

[gcc r15-5022] VN: Handle `(A CMP B) !=/== 0` for predicates [PR117414]

2024-11-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:add4bb94459d6cecae11de279b49f9c1acb14394

commit r15-5022-gadd4bb94459d6cecae11de279b49f9c1acb14394
Author: Andrew Pinski 
Date:   Fri Nov 1 23:12:52 2024 -0700

VN: Handle `(A CMP B) !=/== 0` for predicates [PR117414]

After the last patch, we also want to record `(A CMP B) != 0`
as `(A CMP B)` and `(A CMP B) == 0` as `(A CMP B)` with the
true/false edges swapped.

This shows up more due to the new handling of
`(A | B) ==/!= 0` in insert_predicates_for_cond
as now we can notice these comparisons which were not seen before.

This is enough to fix the original issue in `gcc.dg/tree-ssa/pr111456-1.c`
and make sure we don't regress it when enhancing ifcombine.

This adds that predicate and allows us to optimize f
in fre-predicated-3.c.

Changes since v1:
* v2:  Use vn_valueize.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/117414

gcc/ChangeLog:

* tree-ssa-sccvn.cc (insert_predicates_for_cond): Handle `(A CMP B) 
!=/== 0`.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/fre-predicated-3.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-3.c | 46 
 gcc/tree-ssa-sccvn.cc| 14 
 2 files changed, 60 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-3.c
new file mode 100644
index ..4a89372fd703
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-3.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR tree-optimization/117414 */
+
+/* Fre1 should figure out that `*aaa != 0`
+   For f0, f1, and f2. */
+
+void foo();
+int f(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  int c = b == 0;
+  int d = t != 1;
+  if (c | d)
+return 0;
+
+  for(int i = 0; i < j; i++)
+  {
+if (*aaa)
+  ;
+else
+  foo();
+  }
+  return 0;
+}
+
+int f1(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  if (b == 0)
+return 0;
+  if (t != 1)
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (*aaa)
+  ;
+else
+  foo();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index c60ba6d7..67ed2cd8ffe1 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -7948,6 +7948,20 @@ insert_predicates_for_cond (tree_code code, tree lhs, 
tree rhs,
   && (code == NE_EXPR || code == EQ_EXPR))
 {
   gimple *def_stmt = SSA_NAME_DEF_STMT (lhs);
+  /* (A CMP B) != 0 is the same as (A CMP B).
+(A CMP B) == 0 is just (A CMP B) with the edges swapped.  */
+  if (is_gimple_assign (def_stmt)
+ && TREE_CODE_CLASS (gimple_assign_rhs_code (def_stmt)) == 
tcc_comparison)
+ {
+   tree_code nc = gimple_assign_rhs_code (def_stmt);
+   tree nlhs = vn_valueize (gimple_assign_rhs1 (def_stmt));
+   tree nrhs = vn_valueize (gimple_assign_rhs2 (def_stmt));
+   edge nt = true_e;
+   edge nf = false_e;
+   if (code == EQ_EXPR)
+ std::swap (nt, nf);
+   insert_predicates_for_cond (nc, nlhs, nrhs, nt, nf);
+ }
   /* (a | b) == 0 ->
on true edge assert: a == 0 & b == 0. */
   /* (a | b) != 0 ->


[gcc r15-5023] VN: Lookup `val != 0` if we got back val when looking up the predicate for GIMPLE_COND [PR117414]

2024-11-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:684e5ae90b64c3481f8a5cb7b9517daf79c78ab4

commit r15-5023-g684e5ae90b64c3481f8a5cb7b9517daf79c78ab4
Author: Andrew Pinski 
Date:   Fri Nov 1 23:20:22 2024 -0700

VN: Lookup `val != 0` if we got back val when looking up the predicate for 
GIMPLE_COND [PR117414]

Sometimes we get back a full ssa name when looking up the comparison of the 
GIMPLE_COND
rather than a predicate. We then want to lookup the `val != 0` for the 
predicate.

Note this might happen with other boolean assignments and COND_EXPR but I 
am not sure
if it is as important; I have not found a testcase yet.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/117414

gcc/ChangeLog:

* tree-ssa-sccvn.cc (process_bb): Lookup
`val != 0` if got back a ssa name when looking the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/fre-predicated-4.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-4.c | 38 
 gcc/tree-ssa-sccvn.cc| 10 +++
 2 files changed, 48 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-4.c
new file mode 100644
index ..fe9d2e2fb58f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fre-predicated-4.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR tree-optimization/117414 */
+
+/* Fre1 should figure out that `*aaa != 0`
+   For f0 and f1. */
+
+
+void foo();
+int f0(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  if (b == 0 || t == 1)
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (!*aaa) foo();
+  }
+  return 0;
+}
+
+int f1(int *aaa, int j, int t)
+{
+  int b = *aaa;
+  if (b == 0)
+return 0;
+  if (t == 1)
+return 0;
+  for(int i = 0; i < j; i++)
+  {
+if (!*aaa) foo();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "foo " "optimized" } } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 67ed2cd8ffe1..1967bbdca84d 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -8159,6 +8159,16 @@ process_bb (rpo_elim &avail, basic_block bb,
val = vn_nary_op_lookup_pieces (2, gimple_cond_code (last),
boolean_type_node, ops,
&vnresult);
+   /* Got back a ssa name, then try looking up `val != 0`
+  as it might have been recorded that way.  */
+   if (val && TREE_CODE (val) == SSA_NAME)
+ {
+   ops[0] = val;
+   ops[1] = build_zero_cst (TREE_TYPE (val));
+   val = vn_nary_op_lookup_pieces (2, NE_EXPR,
+   boolean_type_node, ops,
+   &vnresult);
+ }
/* Did we get a predicated value?  */
if (! val && vnresult && vnresult->predicated_values)
  {


[gcc r15-5013] Add LOOP_VINFO_DRS_ADVANCED_BY

2024-11-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:2c25af0e52a631e46a1731594301e5e63bc28992

commit r15-5013-g2c25af0e52a631e46a1731594301e5e63bc28992
Author: Richard Biener 
Date:   Mon Nov 4 13:09:21 2024 +0100

Add LOOP_VINFO_DRS_ADVANCED_BY

The following remembers how we advanced DRs when vectorizing an
epilogue.  When we want to vectorize the epilogue of such epilogue
we have to retain that advancement and add the advancement for this
vectorized epilogue.  Due to the way we copy and re-associate
stmt_vec_infos and DRs recording this advancement and re-applying
it for the next epilogue is simplest.

* tree-vectorizer.h (_loop_vec_info::drs_advanced_by): New.
(LOOP_VINFO_DRS_ADVANCED_BY): Likewise.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
drs_advanced_by.
(update_epilogue_loop_vinfo): Remember the DR advancement made.
(vect_transform_loop): Accumulate past advancements.

Diff:
---
 gcc/tree-vect-loop.cc | 9 +
 gcc/tree-vectorizer.h | 4 
 2 files changed, 13 insertions(+)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index e91549a643b9..6059ce031d14 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1072,6 +1072,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
 scalar_loop_scaling (profile_probability::uninitialized ()),
 scalar_loop (NULL),
 orig_loop_info (NULL),
+drs_advanced_by (NULL_TREE),
 vec_loop_iv_exit (NULL),
 vec_epilogue_loop_iv_exit (NULL),
 scalar_loop_iv_exit (NULL)
@@ -12302,6 +12303,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree 
advance)
  loop and its prologue.  */
   vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR);
 
+  /* Remember the advancement made.  */
+  LOOP_VINFO_DRS_ADVANCED_BY (epilogue_vinfo) = advance;
+
   epilogue_vinfo->shared->datarefs_copy.release ();
   epilogue_vinfo->shared->save_datarefs ();
 }
@@ -12849,6 +12853,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple 
*loop_vectorized_call)
 
   if (epilogue)
 {
+  /* Accumulate past advancements made.  */
+  if (LOOP_VINFO_DRS_ADVANCED_BY (loop_vinfo))
+   advance = fold_build2 (PLUS_EXPR, TREE_TYPE (advance),
+  LOOP_VINFO_DRS_ADVANCED_BY (loop_vinfo),
+  advance);
   update_epilogue_loop_vinfo (epilogue, advance);
 
   epilogue->simduid = loop->simduid;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 24227a69d4ac..5a1bd237beb6 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -994,6 +994,9 @@ public:
  analysis.  */
   vec<_loop_vec_info *> epilogue_vinfos;
 
+  /* If this is an epilogue loop the DR advancement applied.  */
+  tree drs_advanced_by;
+
   /* The controlling loop IV for the current loop when vectorizing.  This IV
  controls the natural exits of the loop.  */
   edge vec_loop_iv_exit;
@@ -1097,6 +1100,7 @@ public:
 #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
 #define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L)  (L)->inv_pattern_def_seq
+#define LOOP_VINFO_DRS_ADVANCED_BY(L)  (L)->drs_advanced_by
 
 #define LOOP_VINFO_FULLY_MASKED_P(L)   \
   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)  \


[gcc r15-5017] libgomp.texi: Document OpenMP's Interoperability Routines

2024-11-07 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:e52cfd4bc23de14f1e1795bdf7ec161d94b8c087

commit r15-5017-ge52cfd4bc23de14f1e1795bdf7ec161d94b8c087
Author: Tobias Burnus 
Date:   Thu Nov 7 16:13:06 2024 +0100

libgomp.texi: Document OpenMP's Interoperability Routines

libgomp/ChangeLog:

* libgomp.texi (OpenMP Technical Report 13): Remove 'iterator'
in 'map' clause of 'declare mapper' as it is already the list above.
(Interoperability Routines): Add.
(omp_target_memcpy_async, omp_target_memcpy_rect_async):
Document that depobj_list may be omitted in C++ and Fortran.

Diff:
---
 libgomp/libgomp.texi | 333 +++
 1 file changed, 312 insertions(+), 21 deletions(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6860963f3683..6679f6da4b9b 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -443,8 +443,7 @@ to address of matching mapped list item per 5.1, Sect. 
2.21.7.2 @tab N @tab
   of the @code{interop} construct @tab N @tab
 @item Invoke virtual member functions of C++ objects created on the host device
   on other devices @tab N @tab
-@item @code{iterator} and @code{mapper} as map-type modifier in @code{declare 
mapper}
-  @tab N @tab
+@item @code{mapper} as map-type modifier in @code{declare mapper} @tab N @tab
 @end multitable
 
 
@@ -668,7 +667,7 @@ specification in version 5.2.
 * Lock Routines::
 * Timing Routines::
 * Event Routine::
-@c * Interoperability Routines::
+* Interoperability Routines::
 * Memory Management Routines::
 @c * Tool Control Routine::
 * Environment Display Routine::
@@ -2211,8 +2210,9 @@ to the destination device's @var{dst} address shifted by 
@var{dst_offset}.
 Task dependence is expressed by passing an array of depend objects to
 @var{depobj_list}, where the number of array elements is passed as
 @var{depobj_count}; if the count is zero, the @var{depobj_list} argument is
-ignored.  The routine returns zero if the copying process has successfully
-been started and non-zero otherwise.
+ignored.  In C++ and Fortran, the @var{depobj_list} argument can also be
+omitted in that case.   The routine returns zero if the copying process has
+successfully been started and non-zero otherwise.
 
 Running this routine in a @code{target} region except on the initial device
 is not supported.
@@ -2332,7 +2332,8 @@ respectively.  The offset per dimension to the first 
element to be copied is
 given by the @var{dst_offset} and @var{src_offset} arguments.  Task dependence
 is expressed by passing an array of depend objects to @var{depobj_list}, where
 the number of array elements is passed as @var{depobj_count}; if the count is
-zero, the @var{depobj_list} argument is ignored.  The routine
+zero, the @var{depobj_list} argument is ignored.  In C++ and Fortran, the
+@var{depobj_list} argument can also be omitted in that case.  The routine
 returns zero on success and non-zero otherwise.
 
 The OpenMP specification only requires that @var{num_dims} up to three is
@@ -2961,21 +2962,311 @@ event handle that has already been fulfilled is also 
undefined.
 
 
 
-@c @node Interoperability Routines
-@c @section Interoperability Routines
-@c
-@c Routines to obtain properties from an @code{omp_interop_t} object.
-@c They have C linkage and do not throw exceptions.
-@c
-@c @menu
-@c * omp_get_num_interop_properties:: 
-@c * omp_get_interop_int:: 
-@c * omp_get_interop_ptr:: 
-@c * omp_get_interop_str:: 
-@c * omp_get_interop_name:: 
-@c * omp_get_interop_type_desc:: 
-@c * omp_get_interop_rc_desc:: 
-@c @end menu
+@node Interoperability Routines
+@section Interoperability Routines
+
+Routines to obtain properties from an object of OpenMP interop type.
+They have C linkage and do not throw exceptions.
+
+@menu
+* omp_get_num_interop_properties:: Get the number of implementation-specific 
properties
+* omp_get_interop_int:: Obtain integer-valued interoperability property
+* omp_get_interop_ptr:: Obtain pointer-valued interoperability property
+* omp_get_interop_str:: Obtain string-valued interoperability property
+* omp_get_interop_name:: Obtain the name of an interop_property value as string
+* omp_get_interop_type_desc:: Obtain type and description to an 
interop_property
+* omp_get_interop_rc_desc:: Obtain error string to an interop_rc error code
+@end menu
+
+
+
+@node omp_get_num_interop_properties
+@subsection @code{omp_get_num_interop_properties} -- Get the number of 
implementation-specific properties
+@table @asis
+@item @emph{Description}:
+The @code{omp_get_num_interop_properties} function returns the number of
+implementation-defined interoperability properties available for the passed
+@var{interop}, extending the OpenMP-defined properties.  The available OpenMP
+interop_property-type values range from @code{omp_ipr_first} to the value
+returned by @code{omp_get_num_interop_properties} minus one.
+
+No implementation-defined properties are currently defined i

[gcc r15-5026] bpf: avoid possible null deref in btf_ext_output [PR target/117447]

2024-11-07 Thread David Faust via Gcc-cvs
https://gcc.gnu.org/g:0e1382034246a594f1da8dbaee97c4a06743f31a

commit r15-5026-g0e1382034246a594f1da8dbaee97c4a06743f31a
Author: David Faust 
Date:   Thu Nov 7 09:27:07 2024 -0800

bpf: avoid possible null deref in btf_ext_output [PR target/117447]

The BPF-specific .BTF.ext section is always generated for BPF programs
if -gbtf is specified, and generating it requires BTF information and
assumes that the BTF info has already been generated.

Compiling non-C languages to BPF is not supported, nor is generating
CTF/BTF for non-C.  But, compiling another language like C++ to BPF
with -gbtf specified meant that we would try to generate the .BTF.ext
section anyway, and then ICE because no BTF information was available.

Add a check to bail out of btf_ext_output if the TU CTFC does not exist,
meaning no BTF info is available.

gcc/
PR target/117447
* config/bpf/btfext-out.cc (btf_ext_output): Bail if TU CTFC is 
null.

Diff:
---
 gcc/config/bpf/btfext-out.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/bpf/btfext-out.cc b/gcc/config/bpf/btfext-out.cc
index ca6241aa52ee..760b2b59ff6a 100644
--- a/gcc/config/bpf/btfext-out.cc
+++ b/gcc/config/bpf/btfext-out.cc
@@ -611,6 +611,9 @@ btf_ext_init (void)
 void
 btf_ext_output (void)
 {
+  if (!ctf_get_tu_ctfc ())
+return;
+
   output_btfext_header ();
   output_btfext_func_info (btf_ext);
   if (TARGET_BPF_CORE)


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement vector SAT_TRUNC for signed integer

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e88115c93dc7b3cb2a805db3612bdcc3a66b5dcd

commit e88115c93dc7b3cb2a805db3612bdcc3a66b5dcd
Author: Pan Li 
Date:   Mon Oct 14 10:14:31 2024 +0800

RISC-V: Implement vector SAT_TRUNC for signed integer

This patch would like to implement the sstrunc for vector signed integer.

Form 1:
  #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX)

Before this patch:
  27   │ vsetvli a5,a2,e64,m1,ta,ma
  28   │ vle64.v v1,0(a1)
  29   │ sllia3,a5,3
  30   │ sllia4,a5,2
  31   │ sub a2,a2,a5
  32   │ add a1,a1,a3
  33   │ vadd.vv v0,v1,v5
  34   │ vsetvli zero,zero,e32,mf2,ta,ma
  35   │ vnsrl.wxv2,v1,a6
  36   │ vncvt.x.x.w v1,v1
  37   │ vsetvli zero,zero,e64,m1,ta,ma
  38   │ vmsgtu.vv   v0,v0,v4
  39   │ vsetvli zero,zero,e32,mf2,ta,mu
  40   │ vneg.v  v2,v2
  41   │ vxor.vv v1,v2,v3,v0.t
  42   │ vse32.v v1,0(a0)
  43   │ add a0,a0,a4
  44   │ bne a2,zero,.L3

After this patch:
  16   │ vsetvli a5,a2,e32,mf2,ta,ma
  17   │ vle64.v v1,0(a1)
  18   │ sllia3,a5,3
  19   │ sllia4,a5,2
  20   │ sub a2,a2,a5
  21   │ add a1,a1,a3
  22   │ vnclip.wi   v1,v1,0
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a2,zero,.L3

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (sstrunc2): Add
new pattern sstrunc for double trunc.
(sstrunc2): Ditto but for quad trunc.
(sstrunc2): Ditto but for oct trunc.
* config/riscv/riscv-protos.h (expand_vec_double_sstrunc): Add
new func decl to expand double trunc.
(expand_vec_quad_sstrunc): Ditto but for quad trunc.
(expand_vec_oct_sstrunc): Ditto but for oct trunc.
* config/riscv/riscv-v.cc (expand_vec_double_sstrunc): Add new
func to expand double trunc.
(expand_vec_quad_sstrunc): Ditto but for quad trunc.
(expand_vec_oct_sstrunc): Ditto but for oct trunc.

Signed-off-by: Pan Li 
(cherry picked from commit b5a058154179ab16fe5f9e6aa331624363410aad)

Diff:
---
 gcc/config/riscv/autovec.md | 34 ++
 gcc/config/riscv/riscv-protos.h |  4 
 gcc/config/riscv/riscv-v.cc | 46 +
 3 files changed, 84 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a34f63c96516..774a3d337231 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2779,6 +2779,40 @@
   }
 )
 
+(define_expand "sstrunc2"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VWEXTI   1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1],
+ mode);
+DONE;
+  }
+)
+
+(define_expand "sstrunc2"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VQEXTI 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1], 
mode,
+  mode);
+DONE;
+  }
+)
+
+(define_expand "sstrunc2"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VOEXTI1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], mode,
+ mode,
+ mode);
+DONE;
+  }
+)
+
 ;; =
 ;; == Early break auto-vectorization patterns
 ;; =

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 2 of vector signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4ab025e6a1d8c4ed10d364c5563b5f3e440388ec

commit 4ab025e6a1d8c4ed10d364c5563b5f3e440388ec
Author: Pan Li 
Date:   Sat Oct 12 09:13:54 2024 +0800

RISC-V: Add testcases for form 2 of vector signed SAT_SUB

Form 2:
  #define DEF_VEC_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_sub_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T minus = (UT)x - (UT)y;   \
out[i] = (x ^ y) >= 0 || (minus ^ x) >= 0  \
  ? minus : x < 0 ? MIN : MAX; \
  }\
  }

DEF_VEC_SAT_S_SUB_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-2-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-2-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-2-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-2-i8.c: New 
test.

Signed-off-by: Pan Li 
(cherry picked from commit 72d24d2a130a54fbe1479cb85e5639a7eab6c971)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-2-i16.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-2-i32.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-2-i64.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-2-i8.c   |  9 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-2-i16.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-2-i32.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-2-i64.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-2-i8.c | 17 +
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 9 files changed, 126 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i16.c
new file mode 100644
index ..dec0359c5ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_2(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i32.c
new file mode 100644
index ..72b2d6778cca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i32.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_2(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i64.c
new file mode 100644
index ..3ca44589e427
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-2-i64.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_2(int64_t, uint64

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH] RISC-V: override alignment of function/jump/loop

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d51adfe84ba03e9206479518b03f01246db39505

commit d51adfe84ba03e9206479518b03f01246db39505
Author: Wang Pengcheng 
Date:   Wed Oct 23 23:11:53 2024 -0600

[PATCH] RISC-V: override alignment of function/jump/loop

Just like what AArch64 has done.

Signed-off-by: Wang Pengcheng 

gcc/ChangeLog:

* config/riscv/riscv.cc (struct riscv_tune_param): Add new
tune options.
(riscv_override_options_internal): Override the default alignment
when not optimizing for size.

(cherry picked from commit 078f7c4f1fcf4d7099d855afb02dbaf71bebddbf)

Diff:
---
 gcc/config/riscv/riscv.cc | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5b9d9b6b64be..56bd03f8ce7d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -294,6 +294,9 @@ struct riscv_tune_param
   bool overlap_op_by_pieces;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
+  const char *function_align = nullptr;
+  const char *jump_align = nullptr;
+  const char *loop_align = nullptr;
 };
 
 
@@ -10282,6 +10285,18 @@ riscv_override_options_internal (struct gcc_options 
*opts)
 ? &optimize_size_tune_info
 : cpu->tune_param;
 
+  /* If not optimizing for size, set the default
+  alignment to what the target wants.  */
+  if (!opts->x_optimize_size)
+{
+  if (opts->x_flag_align_loops && !opts->x_str_align_loops)
+   opts->x_str_align_loops = tune_param->loop_align;
+  if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
+   opts->x_str_align_jumps = tune_param->jump_align;
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+   opts->x_str_align_functions = tune_param->function_align;
+}
+
   /* Use -mtune's setting for slow_unaligned_access, even when optimizing
  for size.  For architectures that trap and emulate unaligned accesses,
  the performance cost is too great, even for -Os.  Similarly, if


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 4 of signed vector SAT_ADD

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:8cd0ebf8e1f184ccae8110ac274e5c893e84cfa4

commit 8cd0ebf8e1f184ccae8110ac274e5c893e84cfa4
Author: Pan Li 
Date:   Mon Sep 23 13:43:50 2024 +0800

RISC-V: Add testcases for form 4 of signed vector SAT_ADD

Form 4:
  #define DEF_VEC_SAT_S_ADD_FMT_4(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T sum; \
bool overflow = __builtin_add_overflow (x, y, &sum);   \
out[i] = !overflow ? sum : x < 0 ? MIN : MAX;  \
  }\
  }

DEF_VEC_SAT_S_ADD_FMT_4 (int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-13.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-14.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-15.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-run-16.c: New 
test.

Signed-off-by: Pan Li 
Signed-off-by: Pan Li 
(cherry picked from commit 03b469ee4768118807a3c74891c3c426b0c145ef)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_s_add-13.c |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-14.c |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-15.c |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-16.c |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-run-13.c | 17 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-run-14.c | 17 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-run-15.c | 17 +
 .../riscv/rvv/autovec/binop/vec_sat_s_add-run-16.c | 17 +
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 9 files changed, 126 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c
new file mode 100644
index ..ec3f8aee434f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-13.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_4(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c
new file mode 100644
index ..5542616c90ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-14.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_4(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vsadd\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c
new file mode 100644
index ..091bfd15edf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-15.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_ADD_FMT_4(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = 1.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:61532fbcc1679df99727353e9e0f40366ff43644

commit 61532fbcc1679df99727353e9e0f40366ff43644
Author: xuli 
Date:   Mon Oct 21 04:10:14 2024 +

RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = 1.

form 1:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
{   \
  return (T)IMM >= y ? (T)IMM - y : 0;  \
}

Passed the rv64gcv regression test.

Change-Id: I8805225b445cdbbc685f4f54a4d66c7ee8f748e1
Signed-off-by: Li Xu 
gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub_imm-1_4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4_2.c: New test.

(cherry picked from commit adf4ece4dc48deb1d1790efe104fa0cbcc22c0b6)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c | 21 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c | 23 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c | 20 +++
 4 files changed, 86 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c
new file mode 100644
index ..9229f3110848
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint8_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c
new file mode 100644
index ..db3294838901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint16_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c
new file mode 100644
index ..8073ee927fc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** sext\.w\s+a0,\s*a0
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint32_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c
new file mode 100644
index ..9a1ec6edf657
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint64_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Adjust the gather-scatter testcases due to middle-end change

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b5fe5d04d22ab11186542e2bbc86642eda87937f

commit b5fe5d04d22ab11186542e2bbc86642eda87937f
Author: Pan Li 
Date:   Wed Oct 23 16:43:37 2024 +0800

RISC-V: Adjust the gather-scatter testcases due to middle-end change

After we have MASK_LEN_STRIDED_LOAD{STORE} in the middle-end, the
strided case need to be adjust for IR check.

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c:
Adjust IR for MASK_LEN_LOAD check.
* gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c:
Ditto.
* gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c:
Ditto but for store.
* gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c:
Ditto.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 
(cherry picked from commit 372060d78715d9a4ab756b1b95796bd04c0be2bf)

Diff:
---
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c| 2 +-
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c   | 2 +-
 .../gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c
index 53263d16ae24..79b39f102bf2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 66 "optimized" 
} } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_LOAD " 66 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "optimized" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c
index 6fef474cf8e2..8a452e547a39 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 33 "optimized" 
} } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_LOAD " 33 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "optimized" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c
index ad23ed421290..ec8c3a5c63a4 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-1.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_SCATTER_STORE" 66 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_STORE" 66 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.SCATTER_STORE" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_SCATTER_STORE" "optimized" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c
index 65f3f00b8c26..b433b5b52104 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_store-2.c
@@ -40,6 +40,6 @@
 
 TEST_ALL (TEST_LOOP)
 
-/* { dg-final { scan-tree-dump-times " \.MASK_LEN_SCATTER_STORE" 44 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times " \.MASK_LEN_STRIDED_STORE " 44 
"optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.SCATTER_STORE" "optimized" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_SCATTER_STORE" "optimized" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = max -1.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:023d03a21501da0621083c5487cb2aa7b503f627

commit 023d03a21501da0621083c5487cb2aa7b503f627
Author: xuli 
Date:   Mon Oct 21 04:01:01 2024 +

RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = max -1.

form 1:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
{   \
  return (T)IMM >= y ? (T)IMM - y : 0;  \
}

Passed the rv64gcv regression test.

Change-Id: Idaa1ab41f2a5785112279ea8ee2c93236457b740
Signed-off-by: Li Xu 
gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub_imm-1_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4_1.c: New test.

(cherry picked from commit 93b6f287814bca3d10bcf53bb64db40d77eff5d7)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c | 23 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c | 25 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c | 20 +
 4 files changed, 89 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c
new file mode 100644
index ..6f2a493eebbe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm254_uint8_t_fmt_1:
+** li\s+[atx][0-9]+,\s*254
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 254)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c
new file mode 100644
index ..ed03c186046a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm65534_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-2
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint16_t, 65534)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c
new file mode 100644
index ..17d8e5f0b9fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm4294967294_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-2
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** sext\.w\s+a0,\s*a0
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint32_t, 4294967294)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c
new file mode 100644
index ..e6492190d171
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm18446744073709551614u_uint64_t_fmt_1:
+** li\s+[atx][0-9]+,\s*-2
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V:Bugfix for vlmul_ext and vlmul_trunc with NULL return value[pr117286]

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:43601fa23e1f4ea30469a25b75628655eb1cdac6

commit 43601fa23e1f4ea30469a25b75628655eb1cdac6
Author: xuli 
Date:   Mon Oct 28 04:41:09 2024 +

RISC-V:Bugfix for vlmul_ext and vlmul_trunc with NULL return value[pr117286]

This patch fixes following ICE:

test.c: In function 'func':
test.c:37:24: internal compiler error: Segmentation fault
   37 | vfloat16mf2_t vc = __riscv_vlmul_trunc_v_f16m1_f16mf2(vb);
  |^~

The root cause is that vlmul_trunc has a null return value.
gimple_call <__riscv_vlmul_trunc_v_f16m1_f16mf2, NULL, vb_13>
 ^^^

Passed the rv64gcv_zvfh regression test.

Singed-off-by: Li Xu 

PR target/117286

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc: Do not expand NULL 
return.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr117286.c: New test.

(cherry picked from commit 75caa17f5cb4e414919baff0435300b549a76eca)

Diff:
---
 gcc/config/riscv/riscv-vector-builtins-bases.cc|  4 
 gcc/testsuite/gcc.target/riscv/rvv/base/pr117286.c | 16 
 2 files changed, 20 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 193392fbcc2a..d78d9f214ac1 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1753,6 +1753,8 @@ public:
 
   rtx expand (function_expander &e) const override
   {
+if (!e.target)
+  return NULL_RTX;
 tree arg = CALL_EXPR_ARG (e.exp, 0);
 rtx src = expand_normal (arg);
 emit_move_insn (gen_lowpart (e.vector_mode (), e.target), src);
@@ -1767,6 +1769,8 @@ public:
 
   rtx expand (function_expander &e) const override
   {
+if (!e.target)
+  return NULL_RTX;
 rtx src = expand_normal (CALL_EXPR_ARG (e.exp, 0));
 emit_move_insn (e.target, gen_lowpart (GET_MODE (e.target), src));
 return e.target;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr117286.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117286.c
new file mode 100644
index ..dabb8ae0751d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117286.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O1" } */
+
+#include 
+_Float16 a[10];
+void func(){
+  int placeholder0 = 10;
+  _Float16* ptr_a = a;
+  for (size_t vl; placeholder0 > 0; placeholder0 -= vl){
+vl = __riscv_vsetvl_e16m1(placeholder0);
+vfloat16mf2_t va = __riscv_vle16_v_f16mf2(ptr_a, vl);
+vfloat16m1_t vb = __riscv_vlmul_ext_v_f16mf2_f16m1(va);
+vfloat16mf2_t vc = __riscv_vlmul_trunc_v_f16m1_f16mf2(vb);
+ptr_a += vl;
+  }
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 1 of MASK_LEN_STRIDED_LOAD{STORE}

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d8edb5cd745ffd9435d1651a6a5ae302c49fe633

commit d8edb5cd745ffd9435d1651a6a5ae302c49fe633
Author: Pan Li 
Date:   Wed Oct 23 16:52:01 2024 +0800

RISC-V: Add testcases for form 1 of MASK_LEN_STRIDED_LOAD{STORE}

Form 1:
  void __attribute__((noinline))\
  vec_strided_load_store_##T##_form_1 (T *restrict out, T *restrict in, \
   long stride, size_t size)\
  { \
for (size_t i = 0; i < size; i++)   \
  out[i * stride] = in[i * stride]; \
  }

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Add strided folder.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f64.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i8.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u16.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u32.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u64.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u8.c: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-f16.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-f32.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-f64.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i64.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u16.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u32.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u64.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-run-1-u8.c: 
New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st.h: New test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st_data.h: New 
test.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st_run.h: New 
test.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 
(cherry picked from commit 072d6bb67a51ceb9d7056f479f15f4c9f3b50b20)

Diff:
---
 .../rvv/autovec/strided/strided_ld_st-1-f16.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-f32.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-f64.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-i16.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-i32.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-i64.c  |   11 +
 .../riscv/rvv/autovec/strided/strided_ld_st-1-i8.c |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-u16.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-u32.c  |   11 +
 .../rvv/autovec/strided/strided_ld_st-1-u64.c  |   11 +
 .../riscv/rvv/autovec/strided/strided_ld_st-1-u8.c |   11 +
 .../rvv/autovec/strided/strided_ld_st-run-1-f16.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-f32.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-f64.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i16.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i32.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i64.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-i8.c   |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u16.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u32.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u64.c  |   15 +
 .../rvv/autovec/strided/strided_ld_st-run-1-u8.c   |   15 +
 .../riscv/rvv/autovec/strided/strided_ld_st.h  |   22 +
 .../riscv/rvv/autovec/strided/strided_ld_st_data.h | 1145 
 .../riscv/rvv/autovec/strided/strided_ld_st_run.h  |   27 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp |2 +
 26 files changed, 1482 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [target/117316] Fix initializer for riscv code alignment handling

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b07b227ad0ceade439f702356abe0a59a89b48d5

commit b07b227ad0ceade439f702356abe0a59a89b48d5
Author: Jeff Law 
Date:   Mon Oct 28 05:39:24 2024 -0600

[target/117316] Fix initializer for riscv code alignment handling

The construct used for initializing the code alignments in a recent change 
is
causing bootstrap problems on riscv64 as seen in the referenced bugzilla.

This patch adjusts the initializer by pushing the NULL down into each uarch
clause.  Bootstrapped on riscv64, regression test in flight, but given
bootstrap is broken it seemed advisable to move this forward now.

I'm so much looking forward to the day when we have performant hardware for
bootstrap testing...  Sigh.

Anyway, bootstrapped and installing on the trunk.

PR target/117316
gcc/
* config/riscv/riscv.cc (riscv_tune_param): Drop initializer.
(*_tune_info): Add initializers for code alignments.

(cherry picked from commit f475a31ab4c7f27f6f8c7a418412f9fddc371638)

Diff:
---
 gcc/config/riscv/riscv.cc | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 56bd03f8ce7d..f0d274653146 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -294,9 +294,9 @@ struct riscv_tune_param
   bool overlap_op_by_pieces;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
-  const char *function_align = nullptr;
-  const char *jump_align = nullptr;
-  const char *loop_align = nullptr;
+  const char *function_align;
+  const char *jump_align;
+  const char *loop_align;
 };
 
 
@@ -456,6 +456,9 @@ static const struct riscv_tune_param rocket_tune_info = {
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   NULL,/* vector cost */
+  NULL,/* function_align */
+  NULL,/* jump_align */
+  NULL,/* loop_align */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -475,6 +478,9 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   NULL,/* vector cost */
+  NULL,/* function_align */
+  NULL,/* jump_align */
+  NULL,/* loop_align */
 };
 
 /* Costs to use when optimizing for Sifive p400 Series.  */
@@ -494,6 +500,9 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,/* vector cost */
+  NULL,/* function_align */
+  NULL,/* jump_align */
+  NULL,/* loop_align */
 };
 
 /* Costs to use when optimizing for Sifive p600 Series.  */
@@ -513,6 +522,9 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,/* vector cost */
+  NULL,/* function_align */
+  NULL,/* jump_align */
+  NULL,/* loop_align */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -532,6 +544,9 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   NULL,/* vector cost */
+  NULL,/* function_align */
+  NULL,/* jump_align */
+  NULL,/* loop_align */
 };
 
 /* Costs to use when optimizing for xiangshan nanhu.  */
@@ -551,6 +566,9 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,  /* fusible_ops */
   NULL,/* vector cost */
+  NULL,  

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 1/2] RISC-V:Add intrinsic support for the CMOs extensions

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:eecb5e6ca11b2929bee236bcaeb561f3e232c47b

commit eecb5e6ca11b2929bee236bcaeb561f3e232c47b
Author: yulong 
Date:   Tue Oct 29 08:43:42 2024 -0600

[PATCH 1/2] RISC-V:Add intrinsic support for the CMOs extensions

gcc/ChangeLog:

* config.gcc: Add riscv_cmo.h.
* config/riscv/riscv_cmo.h: New file.

(cherry picked from commit d2c8548e0ce51dac6bc51d37236c50f98fca82f0)

Diff:
---
 gcc/config.gcc   |  2 +-
 gcc/config/riscv/riscv_cmo.h | 84 
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index a3566f5c77da..6cdaa1e80a60 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -553,7 +553,7 @@ riscv*)
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
extra_objs="${extra_objs} thead.o riscv-target-attr.o"
d_target_objs="riscv-d.o"
-   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h"
+   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h riscv_cmo.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.h"
;;
diff --git a/gcc/config/riscv/riscv_cmo.h b/gcc/config/riscv/riscv_cmo.h
new file mode 100644
index ..3514fd3f0fe4
--- /dev/null
+++ b/gcc/config/riscv/riscv_cmo.h
@@ -0,0 +1,84 @@
+/* RISC-V CMO Extension intrinsics include file.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#ifndef __RISCV_CMO_H
+#define __RISCV_CMO_H
+
+#if defined (__riscv_zicbom)
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_clean (void *addr)
+{
+__builtin_riscv_zicbom_cbo_clean (addr);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_flush (void *addr)
+{
+__builtin_riscv_zicbom_cbo_flush (addr);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_inval (void *addr)
+{
+__builtin_riscv_zicbom_cbo_inval (addr);
+}
+
+#endif // __riscv_zicbom
+
+#if defined (__riscv_zicbop)
+
+# define rnum 1
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_prefetch (void *addr, const int vs1, const int vs2)
+{
+__builtin_prefetch (addr,vs1,vs2);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_prefetchi ()
+{
+return __builtin_riscv_zicbop_cbo_prefetchi (rnum);
+}
+
+#endif // __riscv_zicbop
+
+#if defined (__riscv_zicboz)
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_cmo_zero (void *addr)
+{
+__builtin_riscv_zicboz_cbo_zero (addr);
+}
+
+#endif // __riscv_zicboz
+
+#endif // __RISCV_CMO_H


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: allow -fno-plt to disable PLT

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f8cab2ae987facf17539e3b4e8a9e26bad49130e

commit f8cab2ae987facf17539e3b4e8a9e26bad49130e
Author: Yangyu Chen 
Date:   Thu Oct 31 16:31:24 2024 +0800

RISC-V: allow -fno-plt to disable PLT

Currently, the RISC-V target uses the target specific mplt option to
control PLT generation. This patch deprecates the target specific mplt
option and uses the common fplt option instead. This allows users to
use the same option for most targets.

Co-Developed-by: Liao Shihua 
Signed-off-by: Yangyu Chen 

gcc/ChangeLog:

* config/riscv/predicates.md: Use flag_plt instead of TARGET_PLT.
* config/riscv/riscv.opt: alias common option fplt to mplt.

(cherry picked from commit 1f7b1c555c66cf55f9032ea14135f29d27d34811)

Diff:
---
 gcc/config/riscv/predicates.md | 2 +-
 gcc/config/riscv/riscv.opt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 9971fabc5873..55bcfa4fa4f1 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -329,7 +329,7 @@
 {
   enum riscv_symbol_type type;
   return (riscv_symbolic_constant_p (op, &type)
- && type == SYMBOL_GOT_DISP && !SYMBOL_REF_WEAK (op) && TARGET_PLT);
+ && type == SYMBOL_GOT_DISP && !SYMBOL_REF_WEAK (op) && flag_plt);
 })
 
 (define_predicate "call_insn_operand"
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 6360ed3984d0..5bc5d3002934 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -34,8 +34,8 @@ Target RejectNegative Joined UInteger Var(riscv_branch_cost)
 -mbranch-cost=NSet the cost of branches to roughly N instructions.
 
 mplt
-Target Var(TARGET_PLT) Init(1)
-When generating -fpic code, allow the use of PLTs. Ignored for fno-pic.
+Target Alias(fplt)
+This option is deprecated; use -fplt or -fno-plt instead.
 
 mabi=
 Target RejectNegative Joined Enum(abi_type) Var(riscv_abi) Init(ABI_ILP32) 
Negative(mabi=)


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ecb0d1b860e15371248a055ae4b2d8058bb8dd1a

commit ecb0d1b860e15371248a055ae4b2d8058bb8dd1a
Author: Pan Li 
Date:   Wed Oct 23 16:46:53 2024 +0800

RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}

This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
the RISC-V backend by leveraging the vector strided load/store insn.

For example:
void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
for (int i = 0; i < n; i++)
  a[i*stride] = b[i*stride] + 100;
}

Before this patch:
  38   │ vsetvli a5,a3,e32,m1,ta,ma
  39   │ vluxei64.v  v1,(a1),v4
  40   │ mul a4,a2,a5
  41   │ sub a3,a3,a5
  42   │ vadd.vv v1,v1,v2
  43   │ vsuxei64.v  v1,(a0),v4
  44   │ add a1,a1,a4
  45   │ add a0,a0,a4

After this patch:
  33   │ vsetvli a5,a3,e32,m1,ta,ma
  34   │ vlse32.vv1,0(a1),a2
  35   │ mul a4,a2,a5
  36   │ sub a3,a3,a5
  37   │ vadd.vv v1,v1,v2
  38   │ vsse32.vv1,0(a0),a2
  39   │ add a1,a1,a4
  40   │ add a0,a0,a4

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (mask_len_strided_load_): Add
new pattern for MASK_LEN_STRIDED_LOAD.
(mask_len_strided_store_): Ditto but for store.
* config/riscv/riscv-protos.h (expand_strided_load): Add new
func decl to expand strided load.
(expand_strided_store): Ditto but for store.
* config/riscv/riscv-v.cc (expand_strided_load): Add new
func impl to expand strided load.
(expand_strided_store): Ditto but for store.

Signed-off-by: Pan Li 
Co-Authored-By: Juzhe-Zhong 
(cherry picked from commit 30435cc261071d389d9a210f598170ecdd5ea13c)

Diff:
---
 gcc/config/riscv/autovec.md | 29 +++
 gcc/config/riscv/riscv-protos.h |  2 ++
 gcc/config/riscv/riscv-v.cc | 52 +
 3 files changed, 83 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 774a3d337231..1f1849d52372 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2889,3 +2889,32 @@
 DONE;
   }
 )
+
+;; =
+;; == Strided Load/Store
+;; =
+(define_expand "mask_len_strided_load_"
+  [(match_operand:V 0 "register_operand")
+   (match_operand   1 "pmode_reg_or_0_operand")
+   (match_operand   2 "pmode_reg_or_0_operand")
+   (match_operand:  3 "vector_mask_operand")
+   (match_operand   4 "autovec_length_operand")
+   (match_operand   5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_strided_load (mode, operands);
+DONE;
+  })
+
+(define_expand "mask_len_strided_store_"
+  [(match_operand   0 "pmode_reg_or_0_operand")
+   (match_operand   1 "pmode_reg_or_0_operand")
+   (match_operand:V 2 "register_operand")
+   (match_operand:  3 "vector_mask_operand")
+   (match_operand   4 "autovec_length_operand")
+   (match_operand   5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_strided_store (mode, operands);
+DONE;
+  })
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 54f472afd8d0..0a6b43f0c767 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -700,6 +700,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned 
HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, rtx);
 bool expand_vec_setmem (rtx, rtx, rtx);
 bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c48b87278a31..209b7ee88f18 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load)
 }
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD.  */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[0];
+  rtx base = ops[1];
+  rtx stride = ops[2];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+
+  insn_code icode = code_for_pred_strided_load (mode);
+  rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+  if (poly_int_rtx_p (len, &len_val)
+  && known_eq (len_val, GET_MODE_NUNITS (mode)))
+emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+  else
+{
+  len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+  emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] RISC-V: Add implication for M extension.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:273865dac950d3f1c3be88bc9d79ac05701c1bc3

commit 273865dac950d3f1c3be88bc9d79ac05701c1bc3
Author: Tsung Chun Lin 
Date:   Tue Oct 29 09:47:57 2024 -0600

[RISC-V] RISC-V: Add implication for M extension.

That M implies Zmmul.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: M implies Zmmul.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/attribute-15.c: Add _zmmul1p0 to arch string.
* gcc.target/riscv/attribute-16.c: Ditto.
* gcc.target/riscv/attribute-17.c: Ditto.
* gcc.target/riscv/attribute-18.c: Ditto.
* gcc.target/riscv/attribute-19.c: Ditto.
* gcc.target/riscv/pr110696.c: Ditto.
* gcc.target/riscv/target-attr-01.c: Ditto.
* gcc.target/riscv/target-attr-02.c: Ditto.
* gcc.target/riscv/target-attr-03.c: Ditto.
* gcc.target/riscv/target-attr-04.c: Ditto.
* gcc.target/riscv/target-attr-08.c: Ditto.
* gcc.target/riscv/target-attr-11.c: Ditto.
* gcc.target/riscv/target-attr-14.c: Ditto.
* gcc.target/riscv/target-attr-15.c: Ditto.
* gcc.target/riscv/target-attr-16.c: Ditto.
* gcc.target/riscv/rvv/base/pr114352-1.c: Likewise.
* gcc.target/riscv/rvv/base/pr114352-3.c: Likewise.
* gcc.dg/pr90838.c: Fix search string for rv64.

Co-Authored-By: Jeff Law  

(cherry picked from commit f003834badbfd9d0c0ad132de8b2f3d550ed120f)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 2 ++
 gcc/testsuite/gcc.dg/pr90838.c   | 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-15.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-16.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-17.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-18.c| 2 +-
 gcc/testsuite/gcc.target/riscv/attribute-19.c| 2 +-
 gcc/testsuite/gcc.target/riscv/pr110696.c| 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-1.c | 4 ++--
 gcc/testsuite/gcc.target/riscv/rvv/base/pr114352-3.c | 8 
 gcc/testsuite/gcc.target/riscv/target-attr-01.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-02.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-03.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-04.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-08.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-11.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/target-attr-14.c  | 4 ++--
 gcc/testsuite/gcc.target/riscv/target-attr-15.c  | 4 ++--
 gcc/testsuite/gcc.target/riscv/target-attr-16.c  | 4 ++--
 19 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 2adebe0b6f29..60595a3e3561 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -75,6 +75,8 @@ struct riscv_implied_info_t
 /* Implied ISA info, must end with NULL sentinel.  */
 static const riscv_implied_info_t riscv_implied_info[] =
 {
+  {"m", "zmmul"},
+
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},
diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
index 40aad70499d8..db7bcec3ea9b 100644
--- a/gcc/testsuite/gcc.dg/pr90838.c
+++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -77,7 +77,7 @@ int ctz4 (unsigned long x)
 /* { dg-final { scan-assembler-times "ctz\t"  1 { target { rv64 } } } } */
 /* { dg-final { scan-assembler-times "ctzw\t" 3 { target { rv64 } } } } */
 /* { dg-final { scan-assembler-times "andi\t" 2 { target { rv64 } } } } */
-/* { dg-final { scan-assembler-not "mul" { target { rv64 } } } } */
+/* { dg-final { scan-assembler-not "mul\t" { target { rv64 } } } } */
 
 /* { dg-final { scan-tree-dump-times {= \.CTZ} 3 "forwprop2" { target { rv32 } 
} } } */
 /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-15.c 
b/gcc/testsuite/gcc.target/riscv/attribute-15.c
index ac6caaecd4f7..d7a70e86aa1f 100644
--- a/gcc/testsuite/gcc.target/riscv/attribute-15.c
+++ b/gcc/testsuite/gcc.target/riscv/attribute-15.c
@@ -3,4 +3,4 @@
 int foo()
 {
 }
-/* { dg-final { scan-assembler ".attribute arch, 
\"rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zcf1p0\"" 
} } */
+/* { dg-final { scan-assembler ".attribute arch, 
\"rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_zmmul1p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zcf1p0\""
 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/attribute-16.c 
b/gcc/testsuite/gcc.target/riscv/attribute-16.c
index 539e426ca976..4818cbe90d48 100644
--- a/gcc/testsuite/gcc.target/riscv/attribute-16.c
+++ b/gcc/testsuite/gcc.target/riscv/attribute-16.c
@@ -3,4 +3,4 @@
 int foo()
 {
 }
-/* { dg-final { scan-assembler ".attribute arch, 
\"rv32i2p1_m2p0_a2p0_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zaamo

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for unsigned .SAT_SUB form 2 with IMM = 1.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:7c313c4bbea75040a5c7b8c06b5a79b36374633b

commit 7c313c4bbea75040a5c7b8c06b5a79b36374633b
Author: xuli 
Date:   Wed Oct 23 01:57:51 2024 +

RISC-V: Add testcases for unsigned .SAT_SUB form 2 with IMM = 1.

form2:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
{   \
  return x >= (T)IMM ? x - (T)IMM : 0;  \
}

Passed the rv64gcv regression test.

Signed-off-by: Li Xu 
gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub_imm-run-5.c: add run case for imm=1.
* gcc.target/riscv/sat_u_sub_imm-run-6.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-run-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-run-8.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-5_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-8_1.c: New test.

(cherry picked from commit 179a682d047500604c6612afb425acf481e1a6b2)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c   | 18 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c   | 19 +++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c   | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c   | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-6.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-7.c |  1 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-8.c |  1 +
 8 files changed, 75 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c
new file mode 100644
index ..42edfc59f8aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint8_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** subw\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint8_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c
new file mode 100644
index ..5250b90418aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint16_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** subw\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint16_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c
new file mode 100644
index ..99df0e4b683b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint32_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** subw\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint32_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c
new file mode 100644
index ..cbbc08339f4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-skip-if  "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint64_t_fmt_2:
+** snez\s+[atx][0-9]+,\s*a0
+** sub\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_2(uint64_t, 1)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_SUB" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-run-5.c
index 627e81bca4bd..fc3809590dee 100

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: fix const interleaved stepped vector with a scalar pattern

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b68a80eed8f0763f610ae67a07e12119bda3a7d7

commit b68a80eed8f0763f610ae67a07e12119bda3a7d7
Author: Vineet Gupta 
Date:   Thu Oct 24 15:15:40 2024 -0700

RISC-V: fix const interleaved stepped vector with a scalar pattern

When bisecting for ICE in PR/117353, commit 771256bcb9dd ("RISC-V: Emit 
costs for
bool and stepped const vectors") uncovered yet another latent issue (first 
noted [1])

  [1] https://github.com/patrick-rivos/gcc-postcommit-ci/issues/1625

This patch fixes some of the fortran regressions from that report.

Fixes 71a5ac6703d1 ("RISC-V: Support interleave vector with different step 
sequence")

rv64imafdcv_zvl256b_zba_zbb_zbs_zicond/lp64d/medlow
| # of unexpected case / # of unique unexpected 
case
|  gcc |  g++ | gfortran |
|  392 /   108 |7 / 3 |   91 /24 |
|  392 /   108 |7 / 3 |   67 /12 |

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Use IOR op.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/slp-interleave-5.c: New test.

Tested-by: Edwin Lu  # Pre-commit CU #2503
Signed-off-by: Vineet Gupta 
(cherry picked from commit 1905b59fdc58ce67e508b99dff105afebaaa9bb1)

Diff:
---
 gcc/config/riscv/riscv-v.cc|  6 ++--
 .../riscv/rvv/autovec/slp-interleave-5.c   | 35 ++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 209b7ee88f18..5e728f04cf51 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1501,9 +1501,9 @@ expand_const_vector (rtx target, rtx src)
gen_int_mode (builder.inner_bits_size (), new_smode),
NULL_RTX, false, OPTAB_DIRECT);
  rtx tmp2 = gen_reg_rtx (new_mode);
- rtx and_ops[] = {tmp2, tmp1, scalar};
- emit_vlmax_insn (code_for_pred_scalar (AND, new_mode),
-  BINARY_OP, and_ops);
+ rtx ior_ops[] = {tmp2, tmp1, scalar};
+ emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode),
+  BINARY_OP, ior_ops);
  emit_move_insn (result, gen_lowpart (mode, tmp2));
}
  else
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-5.c
new file mode 100644
index ..32cfe8a8688c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-5.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl1024b -mabi=lp64d -O3 
-fdump-tree-optimized-details" } */
+
+struct S { int a, b; } s[8];
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+{
+  s[i].b = 1;
+  s[i].a = i+1;
+}
+}
+
+/* { dg-final { scan-tree-dump-times "\{ 1, 1, 2, 1, 3, 1, 4, 1 \}" 1 
"optimized" } } */
+/* { dg-final { scan-assembler {vid\.v} } } */
+/* { dg-final { scan-assembler {vadd\.v} } } */
+/* { dg-final { scan-assembler {vor\.v} } } */
+
+void
+foo2 ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+{
+  s[i].b = 0;
+  s[i].a = i+1;
+}
+}
+
+/* { dg-final { scan-tree-dump-times "\{ 1, 0, 2, 0, 3, 0, 4, 0 \}" 1 
"optimized" } } */
+/* { dg-final { scan-assembler {vid\.v} } } */
+/* { dg-final { scan-assembler {vadd\.v} } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 2/2] RISC-V:Add intrinsic cases for the CMOs extensions

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4dd46cea79099f885fb00ca4c3a8ec7b88358728

commit 4dd46cea79099f885fb00ca4c3a8ec7b88358728
Author: yulong 
Date:   Tue Oct 29 08:44:45 2024 -0600

[PATCH 2/2] RISC-V:Add intrinsic cases for the CMOs extensions

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-32.c: New test.
* gcc.target/riscv/cmo-64.c: New test.

(cherry picked from commit b22d9c8f8216d15773dee4f9677c6b26aff507fd)

Diff:
---
 gcc/testsuite/gcc.target/riscv/cmo-32.c | 58 +
 gcc/testsuite/gcc.target/riscv/cmo-64.c | 58 +
 2 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/cmo-32.c 
b/gcc/testsuite/gcc.target/riscv/cmo-32.c
new file mode 100644
index ..8e733cc05fc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmo-32.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32} */
+/* { dg-options "-march=rv32gc_zicbom_zicbop_zicboz -mabi=ilp32" } */
+
+#include "riscv_cmo.h"
+
+void foo1 (void *addr)
+{
+__riscv_cmo_clean(0);
+__riscv_cmo_clean(addr);
+__riscv_cmo_clean((void*)0x111);
+}
+
+void foo2 (void *addr)
+{
+__riscv_cmo_flush(0);
+__riscv_cmo_flush(addr);
+__riscv_cmo_flush((void*)0x111);
+}
+
+void foo3 (void *addr)
+{
+__riscv_cmo_inval(0);
+__riscv_cmo_inval(addr);
+__riscv_cmo_inval((void*)0x111);
+}
+
+void foo4 (void *addr)
+{
+__riscv_cmo_prefetch(addr,0,0);
+__riscv_cmo_prefetch(addr,0,1);
+__riscv_cmo_prefetch(addr,0,2);
+__riscv_cmo_prefetch(addr,0,3);
+__riscv_cmo_prefetch(addr,1,0);
+__riscv_cmo_prefetch(addr,1,1);
+__riscv_cmo_prefetch(addr,1,2);
+__riscv_cmo_prefetch(addr,1,3);
+}
+
+int foo5 (int num)
+{
+return __riscv_cmo_prefetchi(num);
+}
+
+void foo6 (void *addr)
+{
+__riscv_cmo_zero(0);
+__riscv_cmo_zero(addr);
+__riscv_cmo_zero((void*)0x121);
+}
+
+/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
+/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
+/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-64.c 
b/gcc/testsuite/gcc.target/riscv/cmo-64.c
new file mode 100644
index ..e83eddbeb6f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmo-64.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc_zicbom_zicbop_zicboz -mabi=lp64d" } */
+
+#include "riscv_cmo.h"
+
+void foo1 (void *addr)
+{
+__riscv_cmo_clean(0);
+__riscv_cmo_clean(addr);
+__riscv_cmo_clean((void*)0x111);
+}
+
+void foo2 (void *addr)
+{
+__riscv_cmo_flush(0);
+__riscv_cmo_flush(addr);
+__riscv_cmo_flush((void*)0x111);
+}
+
+void foo3 (void *addr)
+{
+__riscv_cmo_inval(0);
+__riscv_cmo_inval(addr);
+__riscv_cmo_inval((void*)0x111);
+}
+
+void foo4 (void *addr)
+{
+__riscv_cmo_prefetch(addr,0,0);
+__riscv_cmo_prefetch(addr,0,1);
+__riscv_cmo_prefetch(addr,0,2);
+__riscv_cmo_prefetch(addr,0,3);
+__riscv_cmo_prefetch(addr,1,0);
+__riscv_cmo_prefetch(addr,1,1);
+__riscv_cmo_prefetch(addr,1,2);
+__riscv_cmo_prefetch(addr,1,3);
+}
+
+int foo5 (int num)
+{
+return __riscv_cmo_prefetchi(num);
+}
+
+void foo6 (void *addr)
+{
+__riscv_cmo_zero(0);
+__riscv_cmo_zero(addr);
+__riscv_cmo_zero((void*)0x121);
+}
+
+/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
+/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
+/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Do not inline when callee is versioned but caller is not

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3a533e006d9c81b0b26a95729647911f3c0c2113

commit 3a533e006d9c81b0b26a95729647911f3c0c2113
Author: Yangyu Chen 
Date:   Thu Oct 24 15:12:45 2024 +0800

RISC-V: Do not inline when callee is versioned but caller is not

When the callee is versioned but the caller is not, we should not inline
the callee into the caller, to prevent the default version of the callee
from being inlined into a not versioned caller.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_can_inline_p): Refuse to inline
when callee is versioned but caller is not.

(cherry picked from commit eb828a1e380e7bb5a708c899081541ee9130ff87)

Diff:
---
 gcc/config/riscv/riscv.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 912ae3ac0129..2ea91e1fc138 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7692,6 +7692,10 @@ riscv_compute_frame_info (void)
 static bool
 riscv_can_inline_p (tree caller, tree callee)
 {
+  /* Do not inline when callee is versioned but caller is not.  */
+  if (DECL_FUNCTION_VERSIONED (callee) && ! DECL_FUNCTION_VERSIONED (caller))
+return false;
+
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH v2 1/2] RISC-V: Make vectorized memset handle more cases

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:7e5d9abbc0d4692df374c7aa46a5b13ea8826f30

commit 7e5d9abbc0d4692df374c7aa46a5b13ea8826f30
Author: Craig Blackmore 
Date:   Mon Nov 4 13:55:19 2024 -0700

[PATCH v2 1/2] RISC-V: Make vectorized memset handle more cases

`expand_vec_setmem` only generated vectorized memset if it fitted into a
single vector store of at least (TARGET_MIN_VLEN / 8) bytes.  Also,
without dynamic LMUL the operation was always TARGET_MAX_LMUL even if it
would have fitted a smaller LMUL.

Allow vectorized memset to be generated for smaller lengths and smaller
LMUL by switching to using use_vector_string_op.  Smaller LMUL can be
seen in setmem-3.c:f3.  Smaller lengths will be seen after the second
patch in this series which selectively disables by pieces.

gcc/ChangeLog:

* config/riscv/riscv-string.cc
(use_vector_stringop_p): Add comment.
(expand_vec_setmem): Use use_vector_stringop_p instead of
check_vectorise_memory_operation.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/setmem-3.c: Expect smaller lmul.

(cherry picked from commit b30c6a5eabaf476663f1a1e41165967e782eccd3)

Diff:
---
 gcc/config/riscv/riscv-string.cc   | 37 +++---
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c |  6 ++--
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 118c02a40212..20395e19c604 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1062,6 +1062,9 @@ struct stringop_info {
 
MAX_EW is the maximum element width that the caller wants to use and
LENGTH_IN is the length of the stringop in bytes.
+
+   This is currently used for cpymem and setmem.  If expand_vec_cmpmem switches
+   to using it too then check_vectorise_memory_operation can be removed.
 */
 
 static bool
@@ -1600,41 +1603,39 @@ check_vectorise_memory_operation (rtx length_in, 
HOST_WIDE_INT &lmul_out)
 bool
 expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
 {
-  HOST_WIDE_INT lmul;
+  stringop_info info;
+
   /* Check we are able and allowed to vectorise this operation;
  bail if not.  */
-  if (!check_vectorise_memory_operation (length_in, lmul))
+  if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop)
 return false;
 
-  machine_mode vmode
-  = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul)
-   .require ();
   rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
-  rtx dst = change_address (dst_in, vmode, dst_addr);
+  rtx dst = change_address (dst_in, info.vmode, dst_addr);
 
-  rtx fill_value = gen_reg_rtx (vmode);
+  rtx fill_value = gen_reg_rtx (info.vmode);
   rtx broadcast_ops[] = { fill_value, fill_value_in };
 
   /* If the length is exactly vlmax for the selected mode, do that.
  Otherwise, use a predicated store.  */
-  if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
+  if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl)))
 {
-  emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
- broadcast_ops);
+  emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP,
+  broadcast_ops);
   emit_move_insn (dst, fill_value);
 }
   else
 {
-  if (!satisfies_constraint_K (length_in))
- length_in = force_reg (Pmode, length_in);
-  emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
- broadcast_ops, length_in);
+  if (!satisfies_constraint_K (info.avl))
+   info.avl = force_reg (Pmode, info.avl);
+  emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode),
+ riscv_vector::UNARY_OP, broadcast_ops, info.avl);
   machine_mode mask_mode
- = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode))
- .require ();
+   = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode))
+ .require ();
   rtx mask = CONSTM1_RTX (mask_mode);
-  emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in,
- get_avl_type_rtx (riscv_vector::NONVLMAX)));
+  emit_insn (gen_pred_store (info.vmode, dst, mask, fill_value, info.avl,
+get_avl_type_rtx (riscv_vector::NONVLMAX)));
 }
 
   return true;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
index 25be694d248a..52766fece76a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
@@ -21,13 +21,13 @@ f1 (void *a, int const b)
   return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1);
 }
 
-/* Vectorise+inline minimum vector register width using requested lmul.
+/* Vectorised code should use smallest lmu

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH v2 2/2] RISC-V: Disable by pieces for vector setmem length > UNITS_PER_WORD

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b4207141e5e1125f0d57d2a5d9e4fd8ae971b6a4

commit b4207141e5e1125f0d57d2a5d9e4fd8ae971b6a4
Author: Craig Blackmore 
Date:   Mon Nov 4 13:57:20 2024 -0700

[PATCH v2 2/2] RISC-V: Disable by pieces for vector setmem length > 
UNITS_PER_WORD

For fast unaligned access targets, by pieces uses up to UNITS_PER_WORD
size pieces resulting in more store instructions than needed.  For
example gcc.target/riscv/rvv/base/setmem-2.c:f1 built with
`-O3 -march=rv64gcv -mtune=thead-c906`:
```
f1:
vsetivlizero,8,e8,mf2,ta,ma
vmv.v.x v1,a1
vsetivlizero,0,e32,mf2,ta,ma
sb  a1,14(a0)
vmv.x.s a4,v1
vsetivlizero,8,e16,m1,ta,ma
vmv.x.s a5,v1
vse8.v  v1,0(a0)
sw  a4,8(a0)
sh  a5,12(a0)
ret
```

The slow unaligned access version built with `-O3 -march=rv64gcv` used
15 sb instructions:
```
f1:
sb  a1,0(a0)
sb  a1,1(a0)
sb  a1,2(a0)
sb  a1,3(a0)
sb  a1,4(a0)
sb  a1,5(a0)
sb  a1,6(a0)
sb  a1,7(a0)
sb  a1,8(a0)
sb  a1,9(a0)
sb  a1,10(a0)
sb  a1,11(a0)
sb  a1,12(a0)
sb  a1,13(a0)
sb  a1,14(a0)
ret
```

After this patch, the following is generated in both cases:
```
f1:
vsetivlizero,15,e8,m1,ta,ma
vmv.v.x v1,a1
vse8.v  v1,0(a0)
ret
```

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_use_by_pieces_infrastructure_p):
New function.
(TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Define.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113469.c: Expect mf2 setmem.
* gcc.target/riscv/rvv/base/setmem-2.c: Update f1 to expect
straight-line vector memset.
* gcc.target/riscv/rvv/base/setmem-3.c: Likewise.

(cherry picked from commit 6b315907c0353f71169a7555e653d29a981fef67)

Diff:
---
 gcc/config/riscv/riscv.cc | 19 +++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c |  3 ++-
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c| 12 +++-
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c| 12 +++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2ea91e1fc138..6551c1a88ea7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12682,6 +12682,22 @@ riscv_stack_clash_protection_alloca_probe_range (void)
   return STACK_CLASH_CALLER_GUARD;
 }
 
+static bool
+riscv_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+ unsigned alignment,
+ enum by_pieces_operation op, bool speed_p)
+{
+  /* For set/clear with size > UNITS_PER_WORD, by pieces uses vector broadcasts
+ with UNITS_PER_WORD size pieces.  Use setmem instead which can use
+ bigger chunks.  */
+  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR
+  && (op == CLEAR_BY_PIECES || op == SET_BY_PIECES)
+  && speed_p && size > UNITS_PER_WORD)
+return false;
+
+  return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -13044,6 +13060,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef TARGET_GET_RAW_RESULT_MODE
 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 
riscv_use_by_pieces_infrastructure_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
index d1c118c02d6e..f86084bdb40f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
@@ -51,4 +51,5 @@ void p(int buf, __builtin_va_list ab, int q) {
  } while (k);
 }
 
-/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*8,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
index faea442a4bdc..838fbebadff3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
+++ b/gcc/testsuite/gcc.target

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH v2] RISC-V: Fix gcc.target/riscv/rvv/base/cpymem-1.c f3

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:dda02bf87ee9770ee6c12f0bcc0bc9b70a664bb0

commit dda02bf87ee9770ee6c12f0bcc0bc9b70a664bb0
Author: Craig Blackmore 
Date:   Thu Oct 31 09:12:10 2024 -0600

[PATCH v2] RISC-V: Fix gcc.target/riscv/rvv/base/cpymem-1.c f3

The function body checks for f3 only ran with -mcmodel explicitly set
which meant I missed a regression in my local testing of:

  commit b039d06c9a810a3fab4c5eb9d50b0c7aff94b2d8
  Author: Craig Blackmore 
  Date:   Fri Oct 18 09:17:21 2024 -0600

  [PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generation

The failure showed up in the rivos CI and it is due to f3 now using
LMUL m1 instead of m8.

I have reworked the test to make it more robust and maintainable.  This
allowed most of the special casing of command line arguments to be
removed.  It also fixes an issue where some targets would enable
multiple versions of the function body check e.g. `-march=rv32gcv
-mcmodel=medany`.

Changes since v1: Added missing ChangeLog.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/cpymem-1.c: Fix and rework f3.

(cherry picked from commit d6868b284379ecb7deb65d60f6f17fd6c34c7d6f)

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c | 107 +
 1 file changed, 48 insertions(+), 59 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
index 6edb4c9253a4..81d14d836334 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
@@ -9,6 +9,8 @@
 extern void *memcpy(void *__restrict dest, const void *__restrict src, 
__SIZE_TYPE__ n);
 #endif
 
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
 /* memcpy should be implemented using the cpymem pattern.
 ** f1:
 XX \.L\d+: # local label is ignored
@@ -50,70 +52,57 @@ void f2 (__INT32_TYPE__* a, __INT32_TYPE__* b, int l)
Use extern here so that we get a known alignment, lest
DATA_ALIGNMENT force us to make the scan pattern accomodate
code for different alignments depending on word size.
-** f3: { target { { any-opts "-mcmodel=medlow" } && { no-opts 
"-march=rv64gcv_zvl512b" "-march=rv64gcv_zvl1024b" "-mrvv-max-lmul=dynamic" 
"-mrvv-max-lmul=m2" "-mrvv-max-lmul=m4" "-mrvv-max-lmul=m8" 
"-mrvv-vector-bits=zvl" } } }
-**lui\s+[ta][0-7],%hi\(a_a\)
-**addi\s+[ta][0-7],[ta][0-7],%lo\(a_a\)
-**lui\s+[ta][0-7],%hi\(a_b\)
-**addi\s+a4,[ta][0-7],%lo\(a_b\)
-**vsetivli\s+zero,16,e32,m8,ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medlow -mrvv-vector-bits=zvl" 
"-mcmodel=medlow -march=rv64gcv_zvl512b -mrvv-vector-bits=zvl" } && { no-opts 
"-march=rv64gcv_zvl1024b" } } }
-**lui\s+[ta][0-7],%hi\(a_a\)
-**lui\s+[ta][0-7],%hi\(a_b\)
-**addi\s+[ta][0-7],[ta][0-7],%lo\(a_a\)
-**addi\s+a4,[ta][0-7],%lo\(a_b\)
-**vl(1|4|2)re32\.v\s+v\d+,0\([ta][0-7]\)
-**vs(1|4|2)r\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medlow -march=rv64gcv_zvl1024b" 
"-mcmodel=medlow -march=rv64gcv_zvl512b" } && { no-opts "-mrvv-vector-bits=zvl" 
} } }
-**lui\s+[ta][0-7],%hi\(a_a\)
-**lui\s+[ta][0-7],%hi\(a_b\)
-**addi\s+a4,[ta][0-7],%lo\(a_b\)
-**vsetivli\s+zero,16,e32,(m1|m4|mf2),ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**addi\s+[ta][0-7],[ta][0-7],%lo\(a_a\)
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medany" } && { no-opts 
"-march=rv64gcv_zvl512b" "-march=rv64gcv_zvl256b" "-march=rv64gcv_zvl1024b" 
"-mrvv-max-lmul=dynamic" "-mrvv-max-lmul=m8" "-mrvv-max-lmul=m4" 
"-mrvv-vector-bits=zvl" } } }
-**lla\s+[ta][0-7],a_a
-**lla\s+[ta][0-7],a_b
-**vsetivli\s+zero,16,e32,m8,ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medany"  } && { no-opts 
"-march=rv64gcv_zvl512b" "-march=rv64gcv_zvl256b" "-march=rv64gcv" 
"-march=rv64gc_zve64d" "-march=rv64gc_zve32f" } } }
-**lla\s+[ta][0-7],a_b
-**vsetivli\s+zero,16,e32,m(f2|1|4),ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**lla\s+[ta][0-7],a_a
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
+** f3: { target { no-opts "-mrvv-vector-bits=zvl" } }
+**  (
+**  lui\s+[ta][0-7],%hi\(a_a\)
+**  lui\s+[ta][0-7],%hi\(a_b\)
+**  addi\s+[ta][0-7],[ta][0-7],%lo\(a_b\)
+**  vsetivli\s+zero,4,e32,m1,ta,ma
+**  |
+**  lui\s+[ta][0-7],%hi\(a_a\)
+**  lui\s+[ta][0-7],%hi\(a_b\)
+**  li\s+[ta][0-7],\d+
+**  addi\s+[ta][0-7],[ta][0-7],%lo\(a_b\)
+**  vsetvli\s+zero,[ta][0-7],e32,m1,ta,ma
+**  |
+**  lla\s+[ta][0-7],a_b
+**  vsetivli\s+zero,4,e32

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Split riscv_process_target_attr with const char *args argument

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:6af6c65c19ea1801adbee1089c6f7af7730406c8

commit 6af6c65c19ea1801adbee1089c6f7af7730406c8
Author: Yangyu Chen 
Date:   Thu Oct 24 15:10:57 2024 +0800

RISC-V: Split riscv_process_target_attr with const char *args argument

This patch splits static bool riscv_process_target_attr
(tree args, location_t loc) into two functions:

- bool riscv_process_target_attr (const char *args, location_t loc)
- static bool riscv_process_target_attr (tree args, location_t loc)

Thus, we can call `riscv_process_target_attr` with a `const char *`
argument.  This is useful for implementation of `target_version`
attribute.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_process_target_attr): New.
* config/riscv/riscv-target-attr.cc (riscv_process_target_attr):
Split into two functions with const char *args argument

(cherry picked from commit a57c16e50d478cc413e3e530db21de693e4eb2ae)

Diff:
---
 gcc/config/riscv/riscv-protos.h   |  2 ++
 gcc/config/riscv/riscv-target-attr.cc | 65 ---
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0a6b43f0c767..4ed04321d32c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -805,6 +805,8 @@ extern bool riscv_use_divmod_expander (void);
 void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 extern bool
 riscv_option_valid_attribute_p (tree, tree, tree, int);
+extern bool
+riscv_process_target_attr (const char *, location_t);
 extern void
 riscv_override_options_internal (struct gcc_options *);
 extern void riscv_option_override (void);
diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index bf14ade5ce08..8ce9607b3c9b 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -304,35 +304,13 @@ num_occurrences_in_str (char c, char *str)
   return res;
 }
 
-/* Parse the tree in ARGS that contains the target attribute information
+/* Parse the string in ARGS that contains the target attribute information
and update the global target options space.  */
 
-static bool
-riscv_process_target_attr (tree args, location_t loc)
+bool
+riscv_process_target_attr (const char *args, location_t loc)
 {
-  if (TREE_CODE (args) == TREE_LIST)
-{
-  do
-   {
- tree head = TREE_VALUE (args);
- if (head)
-   {
- if (!riscv_process_target_attr (head, loc))
-   return false;
-   }
- args = TREE_CHAIN (args);
-  } while (args);
-
-  return true;
-}
-
-  if (TREE_CODE (args) != STRING_CST)
-{
-  error_at (loc, "attribute % argument not a string");
-  return false;
-}
-
-  size_t len = strlen (TREE_STRING_POINTER (args));
+  size_t len = strlen (args);
 
   /* No need to emit warning or error on empty string here, generic code 
already
  handle this case.  */
@@ -343,7 +321,7 @@ riscv_process_target_attr (tree args, location_t loc)
 
   std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
-  strcpy (str_to_check, TREE_STRING_POINTER (args));
+  strcpy (str_to_check, args);
 
   /* Used to catch empty spaces between semi-colons i.e.
  attribute ((target ("attr1;;attr2"))).  */
@@ -366,7 +344,7 @@ riscv_process_target_attr (tree args, location_t loc)
   if (num_attrs != num_semicolons + 1)
 {
   error_at (loc, "malformed % attribute",
-   TREE_STRING_POINTER (args));
+   args);
   return false;
 }
 
@@ -376,6 +354,37 @@ riscv_process_target_attr (tree args, location_t loc)
   return true;
 }
 
+/* Parse the tree in ARGS that contains the target attribute information
+   and update the global target options space.  */
+
+static bool
+riscv_process_target_attr (tree args, location_t loc)
+{
+  if (TREE_CODE (args) == TREE_LIST)
+{
+  do
+   {
+ tree head = TREE_VALUE (args);
+ if (head)
+   {
+ if (!riscv_process_target_attr (head, loc))
+   return false;
+   }
+ args = TREE_CHAIN (args);
+  } while (args);
+
+  return true;
+}
+
+  if (TREE_CODE (args) != STRING_CST)
+{
+  error_at (loc, "attribute % argument not a string");
+  return false;
+}
+
+  return riscv_process_target_attr (TREE_STRING_POINTER (args), loc);
+}
+
 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P.
This is used to process attribute ((target ("..."))).
Note, that riscv_set_current_function() has not been called before,


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] Aggressively hoist VXRM assignments

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e2274cdb525545acc30055e7a420ab0b3308d34c

commit e2274cdb525545acc30055e7a420ab0b3308d34c
Author: Jeff Law 
Date:   Wed Oct 30 07:43:22 2024 -0600

[RISC-V] Aggressively hoist VXRM assignments

So a while back I was looking at pixel_avg for RISC-V where we try to
use vaaddu for the halfword-ceiling-average step.  The problem with
vaaddu is that you must set VXRM to a suitable rounding mode as it has
an undetermined state at function entry or after a function call.

It turns out some designs will fully flush their pipelines on a write to
VXRM which you can imagine is incredibly expensive.

VXRM assignments are handled by an LCM based algorithm to find "optimal"
placement points based on what insns in the stream need VXRM assignments
and the particular mode they need.

Unfortunately in pixel_avg an LCM algorithm only allows hoisting out of
the innermost loop, but not the outer loop.  The core issue is that LCM
does not allow any speculation and there are paths which would bypass
the inner loop (which don't actually trigger at runtime IIRC).

The expectation is that VXRM assignments should be exceedingly rare and
needing more than one mode even rarer.  So hoisting more aggressively
seems like a reasonable thing to do, but we don't want to burn too much
time trying to do something fancy.

So what this patch does is scan the IL once collecting any VXRM needs.
If the current function has precisely one VXRM mode needed, then we
pretend (for the sake of LCM) that the first instruction in the function
also has that need.

By doing so the VXRM assignment is essentially anticipated everywhere in
the function.  The standard LCM algorithm is run and has enough
information to hoist the VXRM assignment more aggressively, most often
to the prologue.

This helps the BPI in a measurable way (IIRC it was 2-3%).  It probably
helps some of the SiFive designs, but I've been told they still benefit
from the longer sequence of shifts & adds, hoisting just isn't enough
for those designs.  The Ventana design basically doesn't care where the
VXRM assignment is.  Point is we may want to have a tuning knob for the
patterns which need VXRM (vaadd[u], vasub[u]) at some point in the near
future.

Bootstrapped and regression tested on riscv64 and regression tested on
riscv32-elf and riscv64-elf.  We've been using this internally for a
while a while on spec as well.   Obviously I'll wait for the pre-commit
tester to do its thing.

gcc/
* config/riscv/riscv.cc (singleton_vxrm_need): New function.
(riscv_mode_needed): See if there is a singleton need and if so,
claim it happens on the first insn in the chain.

(cherry picked from commit a65e1487cda969e4763ae84577bf3e0d9e2b34aa)

Diff:
---
 gcc/config/riscv/riscv.cc | 69 +++
 1 file changed, 69 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f0d274653146..912ae3ac0129 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11806,6 +11806,65 @@ riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
   return mode;
 }
 
+/* If the current function needs a single VXRM mode, return it.  Else
+   return VXRM_MODE_NONE.
+
+   This is called on the first insn in the chain and scans the full function
+   once to collect VXRM mode settings.  If a single mode is needed, it will
+   often be better to set it once at the start of the function rather than
+   at an anticipation point.  */
+static int
+singleton_vxrm_need (void)
+{
+  /* Only needed for vector code.  */
+  if (!TARGET_VECTOR)
+return VXRM_MODE_NONE;
+
+  /* If ENTRY has more than once successor, then don't optimize, just to
+ keep things simple.  */
+  if (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) > 1)
+return VXRM_MODE_NONE;
+
+  /* Walk the IL noting if VXRM is needed and if there's more than one
+ mode needed.  */
+  bool found = false;
+  int saved_vxrm_mode;
+  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+{
+  if (!INSN_P (insn) || DEBUG_INSN_P (insn))
+   continue;
+
+  int code = recog_memoized (insn);
+  if (code < 0)
+   continue;
+
+  int vxrm_mode = get_attr_vxrm_mode (insn);
+  if (vxrm_mode == VXRM_MODE_NONE)
+   continue;
+
+  /* If this is the first VXRM need, note it.  */
+  if (!found)
+   {
+ saved_vxrm_mode = vxrm_mode;
+ found = true;
+ continue;
+   }
+
+  /* Not the first VXRM need.  If this is different than
+the saved need, then we're not going to be able to
+optimize and we can stop scanning now.  */
+  if (saved_vxrm_mode != vxrm_mode)
+   return VXRM_MODE_NONE;
+
+  /* Same mode as we've seen, keep scanning. 

[gcc r15-5030] libstdc++: Fix grammar in comment, again

2024-11-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:c26e83918b58040ff735a61de5acb6b3b96c5ea3

commit r15-5030-gc26e83918b58040ff735a61de5acb6b3b96c5ea3
Author: Jonathan Wakely 
Date:   Thu Nov 7 21:57:52 2024 +

libstdc++: Fix grammar in comment, again

libstdc++-v3/ChangeLog:

* include/bits/hashtable.h (_Hashtable): Fix comment grammar.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index d36b32a7e3fa..6c553fb4b08a 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -353,7 +353,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
"Functor used to map hash code to bucket index must be"
" noexcept");
 
-  // To compute bucket index we also need _ExtractKey be non-throwing.
+  // To compute bucket index we also need _ExtractKey to be non-throwing.
   static_assert(is_nothrow_default_constructible<_ExtractKey>::value,
"_ExtractKey must be nothrow default constructible");
   static_assert(noexcept(


[gcc r15-5031] libstdc++: Define __is_pair variable template for C++11

2024-11-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:dd08cdccc36d084eda0e2748c772f6bf9a7f412f

commit r15-5031-gdd08cdccc36d084eda0e2748c772f6bf9a7f412f
Author: Jonathan Wakely 
Date:   Fri Nov 1 10:09:55 2024 +

libstdc++: Define __is_pair variable template for C++11

libstdc++-v3/ChangeLog:

* include/bits/stl_pair.h (__is_pair): Define for C++11 and
C++14 as well.

Diff:
---
 libstdc++-v3/include/bits/stl_pair.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/libstdc++-v3/include/bits/stl_pair.h 
b/libstdc++-v3/include/bits/stl_pair.h
index e92fcad2d660..527fb9105f0b 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -1189,12 +1189,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
 inline constexpr size_t tuple_size_v> = 2;
+#endif
 
+#if __cplusplus >= 201103L
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++14-extensions" // variable templates
+#pragma GCC diagnostic ignored "-Wc++17-extensions" // inline variables
   template
 inline constexpr bool __is_pair = false;
 
   template
 inline constexpr bool __is_pair> = true;
+#pragma GCC diagnostic pop
 #endif
 
   /// @cond undocumented


[gcc r15-5032] libstdc++: Fix conversions to key/value types for hash table insertion [PR115285]

2024-11-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:90c578654a2c96032aa6621449859243df5f641b

commit r15-5032-g90c578654a2c96032aa6621449859243df5f641b
Author: Jonathan Wakely 
Date:   Tue Nov 5 17:19:06 2024 +

libstdc++: Fix conversions to key/value types for hash table insertion 
[PR115285]

The conversions to key_type and value_type that are performed when
inserting into _Hashtable need to be fixed to do any required
conversions explicitly. The current code assumes that conversions from
the parameter to the key_type or value_type can be done implicitly,
which isn't necessarily true.

Remove the _S_forward_key function which doesn't handle all cases and
either forward the parameter if it already has type cv key_type, or
explicitly construct a temporary of type key_type.

Similarly, the _ConvertToValueType specialization for maps doesn't
handle all cases either, for std::pair arguments only some value
categories are handled. Remove _ConvertToValueType and for the _M_insert
function for unique keys, either forward the argument unchanged or
explicitly construct a temporary of type value_type.

For the _M_insert overload for non-unique keys we don't need any
conversion at all, we can just forward the argument directly to where we
construct a node.

libstdc++-v3/ChangeLog:

PR libstdc++/115285
* include/bits/hashtable.h (_Hashtable::_S_forward_key): Remove.
(_Hashtable::_M_insert_unique_aux): Replace _S_forward_key with
a static_cast to a type defined using conditional_t.
(_Hashtable::_M_insert): Replace _ConvertToValueType with a
static_cast to a type defined using conditional_t.
* include/bits/hashtable_policy.h (_ConvertToValueType): Remove.
* testsuite/23_containers/unordered_map/insert/115285.cc: New test.
* testsuite/23_containers/unordered_set/insert/115285.cc: New test.
* testsuite/23_containers/unordered_set/96088.cc: Adjust
expected number of allocations.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h  | 33 ++-
 libstdc++-v3/include/bits/hashtable_policy.h   | 34 
 .../23_containers/unordered_map/insert/115285.cc   | 47 ++
 .../testsuite/23_containers/unordered_set/96088.cc |  2 +-
 .../23_containers/unordered_set/insert/115285.cc   | 28 +
 5 files changed, 88 insertions(+), 56 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 6c553fb4b08a..bd514cab798d 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -929,25 +929,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::pair
_M_insert_unique(_Kt&&, _Arg&&, _NodeGenerator&);
 
-  template
-   key_type
-   _S_forward_key(_Kt&& __k)
-   { return std::forward<_Kt>(__k); }
-
-  static const key_type&
-  _S_forward_key(const key_type& __k)
-  { return __k; }
-
-  static key_type&&
-  _S_forward_key(key_type&& __k)
-  { return std::move(__k); }
-
   template
std::pair
_M_insert_unique_aux(_Arg&& __arg, _NodeGenerator& __node_gen)
{
+ using _Kt = decltype(_ExtractKey{}(std::forward<_Arg>(__arg)));
+ constexpr bool __is_key_type
+   = is_same<__remove_cvref_t<_Kt>, key_type>::value;
+ using _Fwd_key = __conditional_t<__is_key_type, _Kt&&, key_type>;
  return _M_insert_unique(
-   _S_forward_key(_ExtractKey{}(std::forward<_Arg>(__arg))),
+   static_cast<_Fwd_key>(_ExtractKey{}(std::forward<_Arg>(__arg))),
std::forward<_Arg>(__arg), __node_gen);
}
 
@@ -956,10 +947,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert(_Arg&& __arg, _NodeGenerator& __node_gen,
  true_type /* __uks */)
{
- using __to_value
-   = __detail::_ConvertToValueType<_ExtractKey, value_type>;
+ using __detail::_Identity;
+ using _Vt = __conditional_t::value
+   || __is_pair<__remove_cvref_t<_Arg>>,
+ _Arg&&, value_type>;
  return _M_insert_unique_aux(
-   __to_value{}(std::forward<_Arg>(__arg)), __node_gen);
+  static_cast<_Vt>(std::forward<_Arg>(__arg)), __node_gen);
}
 
   template
@@ -967,10 +960,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert(_Arg&& __arg, _NodeGenerator& __node_gen,
  false_type __uks)
{
- using __to_value
-   = __detail::_ConvertToValueType<_ExtractKey, value_type>;
- return _M_insert(cend(),
-   __to_value{}(std::forward<_Arg>(__arg)), __node_gen, __uks);
+ return _M_insert(cend(), std::forward<_Arg>(__arg),
+  __node_gen, __uks);
}
 
   // Insert with hint

[gcc r15-5033] libstdc++: Improve comment for _Hashtable::_M_insert_unique_node

2024-11-07 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:e97179bacd067ccd3ee765632e0c034df152ccb6

commit r15-5033-ge97179bacd067ccd3ee765632e0c034df152ccb6
Author: Jonathan Wakely 
Date:   Thu Nov 7 16:51:58 2024 +

libstdc++: Improve comment for _Hashtable::_M_insert_unique_node

Clarify the effects if rehashing is needed. Document the __n_elt
parameter.

libstdc++-v3/ChangeLog:

* include/bits/hashtable.h (_M_insert_unique_node): Improve
comment.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index bd514cab798d..6bcba2de368e 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -893,9 +893,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   pair<__node_ptr, __hash_code>
   _M_compute_hash_code(__node_ptr __hint, const key_type& __k) const;
 
-  // Insert node __n with hash code __code, in bucket __bkt if no
-  // rehash (assumes no element with same key already present).
+  // Insert node __n with hash code __code, in bucket __bkt (or another
+  // bucket if rehashing is needed).
+  // Assumes no element with equivalent key is already present.
   // Takes ownership of __n if insertion succeeds, throws otherwise.
+  // __n_elt is an estimated number of elements we expect to insert,
+  // used as a hint for rehashing when inserting a range.
   iterator
   _M_insert_unique_node(size_type __bkt, __hash_code,
__node_ptr __n, size_type __n_elt = 1);


[gcc r14-10897] aarch64: Add support for FUJITSU-MONAKA (-mcpu=fujitsu-monaka) CPU

2024-11-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:6bcd3935f05056f1ed777882a70c247dc637cf6d

commit r14-10897-g6bcd3935f05056f1ed777882a70c247dc637cf6d
Author: Yuta Mukai 
Date:   Thu Nov 7 22:09:39 2024 +

aarch64: Add support for FUJITSU-MONAKA (-mcpu=fujitsu-monaka) CPU

This patch adds initial support for FUJITSU-MONAKA CPU.
The cost model will be corrected in the future.

2024-11-07  Yuta Mukai  

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Add 
fujitsu-monaka.
* config/aarch64/aarch64-tune.md: Regenerate.
* config/aarch64/aarch64.cc: Include fujitsu-monaka tuning model.
* doc/invoke.texi: Document -mcpu=fujitsu-monaka.
* config/aarch64/tuning_models/fujitsu_monaka.h: New file.

Diff:
---
 gcc/config/aarch64/aarch64-cores.def  |  1 +
 gcc/config/aarch64/aarch64-tune.md|  2 +-
 gcc/config/aarch64/aarch64.cc |  1 +
 gcc/config/aarch64/tuning_models/fujitsu_monaka.h | 65 +++
 gcc/doc/invoke.texi   |  2 +-
 5 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index f5536388f611..1ab09ea5f720 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -132,6 +132,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 
V8_2A,  (CRYPTO, PROFI
 
 /* Fujitsu ('F') cores. */
 AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A,  (F16, SVE), a64fx, 0x46, 0x001, -1)
+AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, LS64, 
RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 
0x46, 0x003, -1)
 
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  (CRYPTO, F16), tsv110,   0x48, 
0xd01, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 80254836e0ef..06e8680607bd 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,grace,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,grace,demeter,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 33a46c9eabec..32adc2fa9854 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -410,6 +410,7 @@ static const struct aarch64_flag_desc 
aarch64_tuning_flags[] =
 #include "tuning_models/neoversen2.h"
 #include "tuning_models/neoversev2.h"
 #include "tuning_models/a64fx.h"
+#include "tuning_models/fujitsu_monaka.h"
 
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
diff --git a/gcc/config/aarch64/tuning_models/fujitsu_monaka.h 
b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
new file mode 100644
index ..c3a1e0620e2f
--- /dev/null
+++ b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
@@ -0,0 +1,65 @@
+/* Tuning model description for FUJITSU-MONAKA.
+   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free 

[gcc r15-5016] Unify registered_pp_pragmas and registered_pragmas

2024-11-07 Thread Paul Iannetta via Gcc-cvs
https://gcc.gnu.org/g:06a725a6f77da0ac28d4ddf20bfb7f191363aa5f

commit r15-5016-g06a725a6f77da0ac28d4ddf20bfb7f191363aa5f
Author: Paul Iannetta 
Date:   Wed Oct 30 11:21:09 2024 +0100

Unify registered_pp_pragmas and registered_pragmas

Until now, the structures that keep pragma information were different
when in preprocessing only mode and in normal mode.  This change unifies
both so that the space and name of a pragma are always registered and
can be queried easily at a later time.

gcc/c-family/ChangeLog:

* c-pragma.cc (struct pragma_pp_data): Use (struct 
internal_pragma_handler);
(c_register_pragma_1): Always register name and space for all 
pragmas.
(c_invoke_pragma_handler): Adapt.
(c_invoke_early_pragma_handler): Likewise.
(c_pp_invoke_early_pragma_handler): Likewise.

Diff:
---
 gcc/c-family/c-pragma.cc | 66 +---
 1 file changed, 29 insertions(+), 37 deletions(-)

diff --git a/gcc/c-family/c-pragma.cc b/gcc/c-family/c-pragma.cc
index de7c378965d0..c78721824e3f 100644
--- a/gcc/c-family/c-pragma.cc
+++ b/gcc/c-family/c-pragma.cc
@@ -1488,17 +1488,15 @@ handle_pragma_float_const_decimal64 (cpp_reader *)
 
 /* A vector of registered pragma callbacks, which is never freed.   */
 
-static vec registered_pragmas;
 
-struct pragma_pp_data
+struct pragma_data
 {
   const char *space;
   const char *name;
-  pragma_handler_1arg early_handler;
+  struct internal_pragma_handler ihandler;
 };
 
-
-static vec registered_pp_pragmas;
+static vec registered_pragmas;
 
 struct omp_pragma_def { const char *name; unsigned int id; };
 static const struct omp_pragma_def oacc_pragmas[] = {
@@ -1594,10 +1592,10 @@ c_pp_lookup_pragma (unsigned int id, const char 
**space, const char **name)
   }
 
   if (id >= PRAGMA_FIRST_EXTERNAL
-  && (id < PRAGMA_FIRST_EXTERNAL + registered_pp_pragmas.length ()))
+  && (id < PRAGMA_FIRST_EXTERNAL + registered_pragmas.length ()))
 {
-  *space = registered_pp_pragmas[id - PRAGMA_FIRST_EXTERNAL].space;
-  *name = registered_pp_pragmas[id - PRAGMA_FIRST_EXTERNAL].name;
+  *space = registered_pragmas[id - PRAGMA_FIRST_EXTERNAL].space;
+  *name = registered_pragmas[id - PRAGMA_FIRST_EXTERNAL].name;
   return;
 }
 
@@ -1613,31 +1611,24 @@ c_register_pragma_1 (const char *space, const char 
*name,
 {
   unsigned id;
 
-  if (flag_preprocess_only)
-{
-  if (cpp_get_options (parse_in)->directives_only
- || !(allow_expansion || ihandler.early_handler.handler_1arg))
-   return;
+  pragma_data data;
+  data.space = space;
+  data.name = name;
 
-  pragma_pp_data pp_data;
-  pp_data.space = space;
-  pp_data.name = name;
-  pp_data.early_handler = ihandler.early_handler.handler_1arg;
-  registered_pp_pragmas.safe_push (pp_data);
-  id = registered_pp_pragmas.length ();
-  id += PRAGMA_FIRST_EXTERNAL - 1;
-}
-  else
-{
-  registered_pragmas.safe_push (ihandler);
-  id = registered_pragmas.length ();
-  id += PRAGMA_FIRST_EXTERNAL - 1;
-
-  /* The C front end allocates 8 bits in c_token.  The C++ front end
-keeps the pragma kind in the form of INTEGER_CST, so no small
-limit applies.  At present this is sufficient.  */
-  gcc_assert (id < 256);
-}
+  if (flag_preprocess_only
+  && (cpp_get_options (parse_in)->directives_only
+   || !(allow_expansion || ihandler.early_handler.handler_1arg)))
+return;
+
+  data.ihandler = ihandler;
+  registered_pragmas.safe_push (data);
+  id = registered_pragmas.length ();
+  id += PRAGMA_FIRST_EXTERNAL - 1;
+
+  /* The C front end allocates 8 bits in c_token.  The C++ front end
+ keeps the pragma kind in the form of INTEGER_CST, so no small
+ limit applies.  At present this is sufficient.  */
+  gcc_assert (id < 256);
 
   cpp_register_deferred_pragma (parse_in, space, name, id,
allow_expansion, false);
@@ -1731,7 +1722,7 @@ c_invoke_pragma_handler (unsigned int id)
   pragma_handler_2arg handler_2arg;
 
   id -= PRAGMA_FIRST_EXTERNAL;
-  ihandler = ®istered_pragmas[id];
+  ihandler = ®istered_pragmas[id].ihandler;
   if (ihandler->extra_data)
 {
   handler_2arg = ihandler->handler.handler_2arg;
@@ -1753,7 +1744,7 @@ c_invoke_early_pragma_handler (unsigned int id)
   pragma_handler_2arg handler_2arg;
 
   id -= PRAGMA_FIRST_EXTERNAL;
-  ihandler = ®istered_pragmas[id];
+  ihandler = ®istered_pragmas[id].ihandler;
   if (ihandler->extra_data)
 {
   handler_2arg = ihandler->early_handler.handler_2arg;
@@ -1771,10 +1762,11 @@ c_invoke_early_pragma_handler (unsigned int id)
 void
 c_pp_invoke_early_pragma_handler (unsigned int id)
 {
-  const auto data = ®istered_pp_pragmas[id - PRAGMA_FIRST_EXTERNAL];
-  if (data->early_handler)
+  const auto data = ®istered_pragmas[id - PRAGMA_FIRST_EXTERNAL];
+  pragma_handler_1arg handler = da

[gcc r15-5028] aarch64: Make PSEL dependent on SME rather than SME2

2024-11-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:afd3887262edbdd5d7be5f34658432fd3046a168

commit r15-5028-gafd3887262edbdd5d7be5f34658432fd3046a168
Author: Richard Sandiford 
Date:   Thu Nov 7 20:34:49 2024 +

aarch64: Make PSEL dependent on SME rather than SME2

The svpsel_lane intrinsics were wrongly classified as SME2+ only,
rather than as base SME intrinsics.  They should always be available
in streaming mode.

gcc/
* config/aarch64/aarch64-sve2.md (@aarch64_sve_psel)
(*aarch64_sve_psel_plus): Require TARGET_STREAMING
rather than TARGET_STREAMING_SME2.

gcc/testsuite/
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b64.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_b8.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_b8.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c16.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c16.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c32.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c32.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c64.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c64.c: ...here.
* gcc.target/aarch64/sme2/acle-asm/psel_lane_c8.c: Move to...
* gcc.target/aarch64/sme/acle-asm/psel_lane_c8.c: ...here.

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md| 4 ++--
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b16.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b32.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b64.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_b8.c  | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c16.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c32.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c64.c | 2 +-
 .../gcc.target/aarch64/{sme2 => sme}/acle-asm/psel_lane_c8.c  | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 08f83fc7ca07..ac27124fb74e 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -418,7 +418,7 @@
   (match_operand:SI 3 "register_operand" "Ucj")
   (const_int BHSD_BITS)]
  UNSPEC_PSEL))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_STREAMING"
   "psel\t%0, %1, %2.[%w3, 0]"
 )
 
@@ -432,7 +432,7 @@
 (match_operand:SI 4 "const_int_operand"))
   (const_int BHSD_BITS)]
  UNSPEC_PSEL))]
-  "TARGET_STREAMING_SME2
+  "TARGET_STREAMING
&& UINTVAL (operands[4]) < 128 / "
   "psel\t%0, %1, %2.[%w3, %4]"
 )
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c
similarity index 98%
rename from gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c
rename to gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c
index 704e9e375f5e..45dda808d2a6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b16.c
@@ -1,6 +1,6 @@
 /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
 
-#include "test_sme2_acle.h"
+#include "test_sme_acle.h"
 
 /*
 ** psel_lane_p0_p2_p7_0:
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c
similarity index 98%
rename from gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c
rename to gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c
index 7d9c7a129ea4..d3d1b7b42cac 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b32.c
@@ -1,6 +1,6 @@
 /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
 
-#include "test_sme2_acle.h"
+#include "test_sme_acle.h"
 
 /*
 ** psel_lane_p0_p2_p7_0:
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c 
b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b64.c
similarity index 98%
rename from gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c
rename to gcc/testsuite/gcc.target/aarch64/sme/acle-asm/psel_lane_b64.c
index a59032a57f61..8c1e014db650 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/psel_lane_b64.c
+++ b/

[gcc r15-5027] aarch64: Restrict FCLAMP to SME2

2024-11-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:f5962839d6e0c3115931e68d938d9a0cd7a383b1

commit r15-5027-gf5962839d6e0c3115931e68d938d9a0cd7a383b1
Author: Richard Sandiford 
Date:   Thu Nov 7 20:34:48 2024 +

aarch64: Restrict FCLAMP to SME2

There are two sets of patterns for FCLAMP: one set for single registers
and one set for multiple registers.  The multiple-register set was
correctly gated on SME2, but the single-register set only required SME.
This doesn't matter for ACLE usage, since the intrinsic definitions
are correctly gated.  But it does matter for automatic generation of
FCLAMP from separate minimum and maximum operations (either ACLE
intrinsics or autovectorised code).

gcc/
* config/aarch64/aarch64-sve2.md (@aarch64_sve_fclamp)
(*aarch64_sve_fclamp_x): Require TARGET_STREAMING_SME2
rather than TARGET_STREAMING_SME.

gcc/testsuite/
* gcc.target/aarch64/sme/clamp_3.c: Force sme2
* gcc.target/aarch64/sme/clamp_4.c: Likewise.
* gcc.target/aarch64/sme/clamp_5.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md |  4 ++--
 gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c |  2 ++
 gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c |  2 ++
 gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c | 24 
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 8047f405a17c..08f83fc7ca07 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1117,7 +1117,7 @@
 UNSPEC_FMAXNM)
   (match_operand:SVE_FULL_F 3 "register_operand")]
  UNSPEC_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
  [   w, %0, w, w; * ] fclamp\t%0., %2., 
%3.
  [ ?&w,  w, w, w; yes   ] movprfx\t%0, 
%1\;fclamp\t%0., %2., %3.
@@ -1137,7 +1137,7 @@
 UNSPEC_COND_FMAXNM)
   (match_operand:SVE_FULL_F 3 "register_operand")]
  UNSPEC_COND_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
  [   w, %0, w, w; * ] #
  [ ?&w,  w, w, w; yes   ] #
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
index 44959f794909..162de6224d58 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
@@ -2,6 +2,8 @@
 
 #include 
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE) \
   TYPE \
   tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
index 643b2635b90e..453c82cd8605 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
@@ -2,6 +2,8 @@
 
 #include 
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE) \
   TYPE \
   untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming
\
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
new file mode 100644
index ..7c5464bdc366
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
@@ -0,0 +1,24 @@
+// { dg-options "-O" }
+
+#include 
+
+#pragma GCC target "+nosme2"
+
+#define TEST(TYPE) \
+  TYPE \
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+  {\
+return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c);  \
+  }\
+   \
+  TYPE \
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+  {\
+return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c);  \
+  }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfclamp\t} } } */


[gcc r15-5029] aarch64: Fix gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c

2024-11-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:156f536d54b2f6f41de4719f9b3a8a33273a51a9

commit r15-5029-g156f536d54b2f6f41de4719f9b3a8a33273a51a9
Author: Richard Sandiford 
Date:   Thu Nov 7 20:34:50 2024 +

aarch64: Fix gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c

I missed a search-and-replace on this test, meaning that it was
duplicating bfmlalb_f32.c.

gcc/testsuite/
* gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c: Replace bfmla*
with bfmls*

Diff:
---
 .../gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c | 60 +++---
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
index f67316cd33ce..946af545141c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/bfmlslb_f32.c
@@ -3,63 +3,63 @@
 #include "test_sme2_acle.h"
 
 /*
-** bfmlalb_f32_tied1:
-** bfmlalb z0\.s, z4\.h, z5\.h
+** bfmlslb_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h
 ** ret
 */
-TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
-z0 = svbfmlalb_f32 (z0, z4, z5),
-z0 = svbfmlalb (z0, z4, z5))
+TEST_DUAL_Z (bfmlslb_f32_tied1, svfloat32_t, svbfloat16_t,
+z0 = svbfmlslb_f32 (z0, z4, z5),
+z0 = svbfmlslb (z0, z4, z5))
 
 /*
-** bfmlalb_f32_tied2:
+** bfmlslb_f32_tied2:
 ** mov (z[0-9]+)\.d, z0\.d
 ** movprfx z0, z4
-** bfmlalb z0\.s, \1\.h, z1\.h
+** bfmlslb z0\.s, \1\.h, z1\.h
 ** ret
 */
-TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
-z0_res = svbfmlalb_f32 (z4, z0, z1),
-z0_res = svbfmlalb (z4, z0, z1))
+TEST_DUAL_Z_REV (bfmlslb_f32_tied2, svfloat32_t, svbfloat16_t,
+z0_res = svbfmlslb_f32 (z4, z0, z1),
+z0_res = svbfmlslb (z4, z0, z1))
 
 /*
-** bfmlalb_f32_tied3:
+** bfmlslb_f32_tied3:
 ** mov (z[0-9]+)\.d, z0\.d
 ** movprfx z0, z4
-** bfmlalb z0\.s, z1\.h, \1\.h
+** bfmlslb z0\.s, z1\.h, \1\.h
 ** ret
 */
-TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
-z0_res = svbfmlalb_f32 (z4, z1, z0),
-z0_res = svbfmlalb (z4, z1, z0))
+TEST_DUAL_Z_REV (bfmlslb_f32_tied3, svfloat32_t, svbfloat16_t,
+z0_res = svbfmlslb_f32 (z4, z1, z0),
+z0_res = svbfmlslb (z4, z1, z0))
 
 /*
-** bfmlalb_f32_untied:
+** bfmlslb_f32_untied:
 ** movprfx z0, z1
-** bfmlalb z0\.s, z4\.h, z5\.h
+** bfmlslb z0\.s, z4\.h, z5\.h
 ** ret
 */
-TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
-z0 = svbfmlalb_f32 (z1, z4, z5),
-z0 = svbfmlalb (z1, z4, z5))
+TEST_DUAL_Z (bfmlslb_f32_untied, svfloat32_t, svbfloat16_t,
+z0 = svbfmlslb_f32 (z1, z4, z5),
+z0 = svbfmlslb (z1, z4, z5))
 
 /*
-** bfmlalb_h7_f32_tied1:
+** bfmlslb_h7_f32_tied1:
 ** mov (z[0-9]+\.h), h7
-** bfmlalb z0\.s, z4\.h, \1
+** bfmlslb z0\.s, z4\.h, \1
 ** ret
 */
-TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
- z0 = svbfmlalb_n_f32 (z0, z4, d7),
- z0 = svbfmlalb (z0, z4, d7))
+TEST_DUAL_ZD (bfmlslb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z0, z4, d7),
+ z0 = svbfmlslb (z0, z4, d7))
 
 /*
-** bfmlalb_h7_f32_untied:
+** bfmlslb_h7_f32_untied:
 ** mov (z[0-9]+\.h), h7
 ** movprfx z0, z1
-** bfmlalb z0\.s, z4\.h, \1
+** bfmlslb z0\.s, z4\.h, \1
 ** ret
 */
-TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
- z0 = svbfmlalb_n_f32 (z1, z4, d7),
- z0 = svbfmlalb (z1, z4, d7))
+TEST_DUAL_ZD (bfmlslb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z1, z4, d7),
+ z0 = svbfmlslb (z1, z4, d7))


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement scalar SAT_SUB for signed integer

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:15690407e1aadbdf8303a854c2f56e84e122c22e

commit 15690407e1aadbdf8303a854c2f56e84e122c22e
Author: Pan Li 
Date:   Wed Sep 25 09:36:05 2024 +0800

RISC-V: Implement scalar SAT_SUB for signed integer

This patch would like to implement the sssub form 1.  Aka:

Form 1:
  #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
return (x ^ y) >= 0\
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  10   │ sat_s_sub_int8_t_fmt_1:
  11   │ subwa5,a0,a1
  12   │ slliw   a5,a5,24
  13   │ sraiw   a5,a5,24
  14   │ xor a1,a0,a1
  15   │ xor a4,a0,a5
  16   │ and a1,a1,a4
  17   │ blt a1,zero,.L4
  18   │ mv  a0,a5
  19   │ ret
  20   │ .L4:
  21   │ sraia0,a0,63
  22   │ xoria5,a0,127
  23   │ mv  a0,a5
  24   │ ret

After this patch:
  10   │ sat_s_sub_int8_t_fmt_1:
  11   │ sub a4,a0,a1
  12   │ xor a5,a0,a4
  13   │ xor a1,a0,a1
  14   │ and a5,a5,a1
  15   │ srlia5,a5,7
  16   │ andia5,a5,1
  17   │ sraia0,a0,63
  18   │ xoria3,a0,127
  19   │ neg a0,a5
  20   │ addia5,a5,-1
  21   │ and a3,a3,a0
  22   │ and a0,a4,a5
  23   │ or  a0,a0,a3
  24   │ slliw   a0,a0,24
  25   │ sraiw   a0,a0,24
  26   │ ret

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_sssub): Add new func
decl for expanding signed SAT_SUB.
* config/riscv/riscv.cc (riscv_expand_sssub): Add new func impl
for expanding signed SAT_SUB.
* config/riscv/riscv.md (sssub3): Add new pattern sssub
for scalar signed integer.

Signed-off-by: Pan Li 
(cherry picked from commit b6ea98bcaf1dad506fa643df8df50187feeb7e35)

Diff:
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc   | 69 +
 gcc/config/riscv/riscv.md   | 11 +++
 3 files changed, 81 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 07a4d42e3a52..3d8775e582dc 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -136,6 +136,7 @@ extern void riscv_legitimize_poly_move (machine_mode, rtx, 
rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 583d552901d6..13e8338bb1b0 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12328,6 +12328,75 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implements the signed saturation sub standard name ssadd for int mode.
+
+   z = SAT_SUB(x, y).
+   =>
+   1.  minus = x - y
+   2.  xor_0 = x ^ y
+   3.  xor_1 = x ^ minus
+   4.  lt_0 = xor_1 < 0
+   5.  lt_1 = xor_0 < 0
+   6.  and = lt_0 & lt_1
+   7.  lt = x < 0
+   8.  neg = -lt
+   9.  max = INT_MAX
+   10. max = max ^ neg
+   11. neg = -and
+   12. max = max & neg
+   13. and = and - 1
+   14. z = minus & and
+   15. z = z | max  */
+
+void
+riscv_expand_sssub (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+  rtx shift_bits = GEN_INT (bitsize - 1);
+  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_minus = gen_reg_rtx (Xmode);
+  rtx xmode_xor_0 = gen_reg_rtx (Xmode);
+  rtx xmode_xor_1 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_0 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_1 = gen_reg_rtx (Xmode);
+  rtx xmode_and = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_neg = gen_reg_rtx (Xmode);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+
+  /* Step-1: mins = x - y, xor_0 = x ^ y, xor_1 = x ^ minus.  */
+  riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_minus);
+
+  /* Step-2: an

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] Add splitters to restore condops generation after recent phiopt changes

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:0e45f9a77a54860cf026e2d41838fff66fe83d50

commit 0e45f9a77a54860cf026e2d41838fff66fe83d50
Author: Jeff Law 
Date:   Mon Oct 7 11:49:21 2024 -0600

[RISC-V] Add splitters to restore condops generation after recent phiopt 
changes

V2:
  Fix typo in ChangeLog.
  Remove now extraneous comment in cset-sext.c.
  Throttle back branch cost to 1 in various tests

--

Andrew P's recent improvements to phiopt regressed on the riscv testsuite.

Essentially the new code presented to the RTL optimizers is straightline 
code rather than branchy for the CE pass to analyze and optimize.  In the 
absence of conditional move support or sfb, the new code would be better.

Unfortunately the presented form isn't a great fit for xventanacondops, 
zicond or xtheadcondmov.  The net is the resulting code is actually slightly 
worse than before.  Essentially sne+czero turned into sne+sne+and.

Thankfully, combine is presented with

(and (ne (op1) (const_int 0))
 (ne (op2) (const_int 0)))

As the RHS of a set.  We can use a 3->2 splitter to guide combine on how to 
profitably rewrite the sequence in a form suitable for condops.  Just splitting 
that would be enough to fix the regression, but I'm fairly confident that other 
cases need to be handled and would have regressed had the testsuite been more 
thorough.

One arm of the AND is going to turn into an sCC instruction.  We have a 
variety of those that we support.  The codes vary as do the allowed operands of 
the sCC.  That produces a set of new splitters to handle those cases.

The other arm is going to turn into a czero (or similar) instruction. That 
one can be generalized to eq/ne.  So another set for that generalization.

We can remove a couple of XFAILs in the rv32 space as it's behaving much 
more like rv64 at this point.

For SFB targets it's unclear if the new code is better or worse.  In both 
cases it's a 3 instruction sequence.   So I just adjusted the test.  If the new 
code is worse for SFB, someone with an understanding of the tradeoffs for an 
SFB target will need to make adjustments.

Tested in my tester on rv64gcv and rv32gc.  Will wait for the pre-commit 
testers to render their verdict before moving forward.

gcc/

* config/riscv/iterators.md (scc_0): New code iterator.
* config/riscv/zicond.md: New splitters to improve code generated 
for
cases like (and (scc) (scc)) for zicond, xventanacondops, 
xtheadcondmov.

gcc/testsuite/

* gcc.target/riscv/cset-sext-sfb.c: Turn off ssa-phiopt.
* gcc.target/riscv/cset-sext-thead.c: Do not check CE output 
anymore.
* gcc.target/riscv/cset-sext-ventana.c: Similarly.  Adjust branch 
cost.
* gcc.target/riscv/cset-sext-zicond.c: Similarly.
* gcc.target/riscv/cset-sext.c: Similarly.  No longer allow
"neg" in asm output.

(cherry picked from commit a2a956cf26e645bfddbc0b743b97472e298c7a8c)

Diff:
---
 gcc/config/riscv/iterators.md  |   2 +
 gcc/config/riscv/zicond.md | 112 +
 gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c |  12 +--
 gcc/testsuite/gcc.target/riscv/cset-sext-thead.c   |   3 +-
 gcc/testsuite/gcc.target/riscv/cset-sext-ventana.c |   3 +-
 gcc/testsuite/gcc.target/riscv/cset-sext-zicond.c  |   9 +-
 gcc/testsuite/gcc.target/riscv/cset-sext.c |  11 +-
 7 files changed, 131 insertions(+), 21 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 2844cb02ff09..872c542e9065 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -233,6 +233,8 @@
 (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
+;; Iterators for conditions we can emit a sCC against 0 or a reg directly
+(define_code_iterator scc_0  [eq ne gt gtu])
 
 ; atomics code iterator
 (define_code_iterator any_atomic [plus ior xor and])
diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md
index 3876be7f9d29..ab1a5337ee53 100644
--- a/gcc/config/riscv/zicond.md
+++ b/gcc/config/riscv/zicond.md
@@ -124,3 +124,115 @@
 {
   operands[2] = GEN_INT (1 << UINTVAL(operands[2]));
 })
+
+;; In some cases gimple can give us a sequence with a logical and
+;; of two sCC insns.  This can be implemented an sCC feeding a
+;; conditional zero.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+   (and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0))
+  (scc_0:X (match_operand:X 2 "register_operand")
+   (match_operand:X 3 "reg_or_0_operand"
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (scc_0:X (match_dup 2) (match_dup 3)))
+   

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 2 of scalar signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:599aacb21f10585fe5d6c9101e02238fc648c426

commit 599aacb21f10585fe5d6c9101e02238fc648c426
Author: Pan Li 
Date:   Thu Sep 26 20:21:10 2024 +0800

RISC-V: Add testcases for form 2 of scalar signed SAT_SUB

Form 2:
  #define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
if ((x ^ y) >= 0 || (minus ^ x) >= 0)  \
  return minus;\
return x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_SUB_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_sub-2-i16.c: New test.
* gcc.target/riscv/sat_s_sub-2-i32.c: New test.
* gcc.target/riscv/sat_s_sub-2-i64.c: New test.
* gcc.target/riscv/sat_s_sub-2-i8.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i16.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i32.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i64.c: New test.
* gcc.target/riscv/sat_s_sub-run-2-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit ed7b3e78183ffed49f197536239812fe77d7d687)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 +++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c   | 30 ++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i32.c   | 28 
 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i64.c   | 27 +++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i8.c| 28 
 .../gcc.target/riscv/sat_s_sub-run-2-i16.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-2-i32.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-2-i64.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-2-i8.c  | 16 
 9 files changed, 192 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 587f3f8348c2..66d393399a29 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -367,9 +367,24 @@ sat_s_sub_##T##_fmt_1 (T x, T y) \
 #define DEF_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \
   DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)
 
+#define DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX) \
+T __attribute__((noinline))  \
+sat_s_sub_##T##_fmt_2 (T x, T y) \
+{\
+  T minus = (UT)x - (UT)y;   \
+  if ((x ^ y) >= 0 || (minus ^ x) >= 0)  \
+return minus;\
+  return x < 0 ? MIN : MAX;  \
+}
+#define DEF_SAT_S_SUB_FMT_2_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX)
+
 #define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y)
 
+#define RUN_SAT_S_SUB_FMT_2(T, x, y) sat_s_sub_##T##_fmt_2(x, y)
+#define RUN_SAT_S_SUB_FMT_2_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_2(T, x, y)
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c
new file mode 100644
index ..6aac2c71ba44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_sub-2-i16.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_sub_int16_t_fmt_2:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*32768
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slliw\s+a0,\s*a0,\s*16
+** sraiw\s+a0,\s*a0,\s*16
+

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 1 of scalar signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:9ef5ea5baba5d85a41fd5c272289936055cbf877

commit 9ef5ea5baba5d85a41fd5c272289936055cbf877
Author: Pan Li 
Date:   Wed Sep 25 09:42:31 2024 +0800

RISC-V: Add testcases for form 1 of scalar signed SAT_SUB

Form 1:
  #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_1 (T x, T y) \
  {\
T minus = (UT)x - (UT)y;   \
return (x ^ y) >= 0\
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data for SAT_SUB.
* gcc.target/riscv/sat_s_sub-1-i16.c: New test.
* gcc.target/riscv/sat_s_sub-1-i32.c: New test.
* gcc.target/riscv/sat_s_sub-1-i64.c: New test.
* gcc.target/riscv/sat_s_sub-1-i8.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i16.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i32.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i64.c: New test.
* gcc.target/riscv/sat_s_sub-run-1-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit a2a78c0639dbebdab19d71f54edca99e7f9094fd)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 17 +
 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 73 ++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i16.c   | 30 +
 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i32.c   | 28 +
 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i64.c   | 27 
 gcc/testsuite/gcc.target/riscv/sat_s_sub-1-i8.c| 28 +
 .../gcc.target/riscv/sat_s_sub-run-1-i16.c | 16 +
 .../gcc.target/riscv/sat_s_sub-run-1-i32.c | 16 +
 .../gcc.target/riscv/sat_s_sub-run-1-i64.c | 16 +
 .../gcc.target/riscv/sat_s_sub-run-1-i8.c  | 16 +
 10 files changed, 267 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a2617b6db708..587f3f8348c2 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -353,6 +353,23 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\
   return x > IMM ? x - IMM : 0;   \
 }
 
+#define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
+T __attribute__((noinline))  \
+sat_s_sub_##T##_fmt_1 (T x, T y) \
+{\
+  T minus = (UT)x - (UT)y;   \
+  return (x ^ y) >= 0\
+? minus  \
+: (minus ^ x) >= 0   \
+  ? minus\
+  : x < 0 ? MIN : MAX;   \
+}
+#define DEF_SAT_S_SUB_FMT_1_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)
+
+#define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y)
+#define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y)
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index 75037c5d8065..39a1e17cd3d1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -37,6 +37,11 @@ TEST_BINARY_STRUCT (int16_t, ssadd)
 TEST_BINARY_STRUCT (int32_t, ssadd)
 TEST_BINARY_STRUCT (int64_t, ssadd)
 
+TEST_BINARY_STRUCT (int8_t,  sssub)
+TEST_BINARY_STRUCT (int16_t, sssub)
+TEST_BINARY_STRUCT (int32_t, sssub)
+TEST_BINARY_STRUCT (int64_t, sssub)
+
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   TEST_UNARY_DATA(uint8_t, uint16_t)[] =
 {
@@ -189,4 +194,72 @@ TEST_BINARY_STRUCT_DECL(int64_t, ssadd) 
TEST_BINARY_DATA(int64_t, ssadd)[] =
   { -9223372036854775803ll,   9223372036854775805ll,   2},
 };
 
+TEST_BINARY_STRUCT_DECL(int8_t, sssub) TEST_BINARY_DATA(int8_t, sssub)[] =
+{
+  {   0,0,0},
+  {   2,4,   -2},
+  { 126,   -1,  127},
+  { 127,   -1,  127},
+  { 127, -127,  127},
+  {  -7,   -4,   -3},
+  {-127,1, -128},
+  {-128,1, -128},
+  {-128,  127, 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 3 of scalar signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4598ef98bd41b9141e195e14ad3ce7e8063933cf

commit 4598ef98bd41b9141e195e14ad3ce7e8063933cf
Author: Pan Li 
Date:   Thu Oct 3 16:15:56 2024 +0800

RISC-V: Add testcases for form 3 of scalar signed SAT_SUB

Form 3:
  #define DEF_SAT_S_SUB_FMT_3(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_sub_##T##_fmt_3 (T x, T y) \
  {\
T minus;   \
bool overflow = __builtin_sub_overflow (x, y, &minus); \
return overflow ? x < 0 ? MIN : MAX : minus;   \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_sub-3-i16.c: New test.
* gcc.target/riscv/sat_s_sub-3-i32.c: New test.
* gcc.target/riscv/sat_s_sub-3-i64.c: New test.
* gcc.target/riscv/sat_s_sub-3-i8.c: New test.
* gcc.target/riscv/sat_s_sub-run-3-i16.c: New test.
* gcc.target/riscv/sat_s_sub-run-3-i32.c: New test.
* gcc.target/riscv/sat_s_sub-run-3-i64.c: New test.
* gcc.target/riscv/sat_s_sub-run-3-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit aac2bc48014dd418a5c9dc3a7c962c0f0bb48312)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 ++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i16.c   | 30 ++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i32.c   | 28 
 gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i64.c   | 27 +++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i8.c| 28 
 .../gcc.target/riscv/sat_s_sub-run-3-i16.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-3-i32.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-3-i64.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-3-i8.c  | 16 
 9 files changed, 191 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 66d393399a29..fd3879d31c5b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -379,12 +379,26 @@ sat_s_sub_##T##_fmt_2 (T x, T y) \
 #define DEF_SAT_S_SUB_FMT_2_WRAP(T, UT, MIN, MAX) \
   DEF_SAT_S_SUB_FMT_2(T, UT, MIN, MAX)
 
+#define DEF_SAT_S_SUB_FMT_3(T, UT, MIN, MAX) \
+T __attribute__((noinline))  \
+sat_s_sub_##T##_fmt_3 (T x, T y) \
+{\
+  T minus;   \
+  bool overflow = __builtin_sub_overflow (x, y, &minus); \
+  return overflow ? x < 0 ? MIN : MAX : minus;   \
+}
+#define DEF_SAT_S_SUB_FMT_3_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_SUB_FMT_3(T, UT, MIN, MAX)
+
 #define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y)
 
 #define RUN_SAT_S_SUB_FMT_2(T, x, y) sat_s_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_S_SUB_FMT_2_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_2(T, x, y)
 
+#define RUN_SAT_S_SUB_FMT_3(T, x, y) sat_s_sub_##T##_fmt_3(x, y)
+#define RUN_SAT_S_SUB_FMT_3_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_3(T, x, y)
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i16.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i16.c
new file mode 100644
index ..5a1368b11a96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_sub-3-i16.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_sub_int16_t_fmt_3:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*32768
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add an implicit dependency for Zawrs

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f3b7430f8db19df7acba09eb736b10de3291c283

commit f3b7430f8db19df7acba09eb736b10de3291c283
Author: Xiao Zeng 
Date:   Fri Sep 27 17:30:36 2024 +0800

RISC-V: Add an implicit dependency for Zawrs

There is a description in 
:

"The instructions in the Zawrs extension are only useful in conjunction
with the LR instruction, which is provided by the Zalrsc component
of the A extension."

It can be concluded that: zawrs -> zalrsc.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: zawrs -> zalrsc.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-38.c: New test.
* gcc.target/riscv/predef-39.c: New test.

Signed-off-by: Xiao Zeng 
(cherry picked from commit c01e3aaae79ecd439ad35063db3dee9775f3aefa)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc|  1 +
 gcc/testsuite/gcc.target/riscv/predef-38.c | 31 ++
 gcc/testsuite/gcc.target/riscv/predef-39.c | 31 ++
 3 files changed, 63 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index bd42fd01532b..a6abd903b98f 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -96,6 +96,7 @@ static const riscv_implied_info_t riscv_implied_info[] =
 
   {"zabha", "zaamo"},
   {"zacas", "zaamo"},
+  {"zawrs", "zalrsc"},
 
   {"zcmop", "zca"},
 
diff --git a/gcc/testsuite/gcc.target/riscv/predef-38.c 
b/gcc/testsuite/gcc.target/riscv/predef-38.c
new file mode 100644
index ..986c02b451a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-38.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i_zawrs -mabi=ilp32 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i)
+#error "__riscv_i"
+#endif
+
+#if !defined(__riscv_zawrs)
+#error "__riscv_zawrs"
+#endif
+
+#if !defined(__riscv_zalrsc)
+#error "__riscv_zalrsc"
+#endif
+
+#if defined(__riscv_a)
+#error "__riscv_a"
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-39.c 
b/gcc/testsuite/gcc.target/riscv/predef-39.c
new file mode 100644
index ..558164de8c44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-39.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64i_zawrs -mabi=lp64 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i)
+#error "__riscv_i"
+#endif
+
+#if !defined(__riscv_zawrs)
+#error "__riscv_zawrs"
+#endif
+
+#if !defined(__riscv_zalrsc)
+#error "__riscv_zalrsc"
+#endif
+
+#if defined(__riscv_a)
+#error "__riscv_a"
+#endif
+
+  return 0;
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH] RISC-V/libgcc: Fix incorrect and missing .cfi_offset for __riscv_save_[0-3] on RV32.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3303b857418214599c0b47ed4413721bb822398b

commit 3303b857418214599c0b47ed4413721bb822398b
Author: Tsung Chun Lin 
Date:   Tue Oct 1 09:10:29 2024 -0600

[PATCH] RISC-V/libgcc: Fix incorrect and missing .cfi_offset for 
__riscv_save_[0-3] on RV32.

0001-RISC-V-libgcc-Fix-incorrect-and-missing-.cfi_offset-.patch

From 06a370a0a2329dd4da0ffcab7c35ea7df2353baf Mon Sep 17 00:00:00 2001
From: Jim Lin 
Date: Tue, 1 Oct 2024 14:42:56 +0800
Subject: [PATCH] RISC-V/libgcc: Fix incorrect and missing .cfi_offset for
 __riscv_save_[0-3] on RV32.

libgcc/ChangeLog:

* config/riscv/save-restore.S: Fix .cfi_offset for
__riscv_save_[0-3] on RV32.

(cherry picked from commit 97fd777248f3c22f6baa5a25f25f7dd510ca5e63)

Diff:
---
 libgcc/config/riscv/save-restore.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libgcc/config/riscv/save-restore.S 
b/libgcc/config/riscv/save-restore.S
index 8a4391e1a978..30d06cc6e5bb 100644
--- a/libgcc/config/riscv/save-restore.S
+++ b/libgcc/config/riscv/save-restore.S
@@ -421,8 +421,9 @@ FUNC_BEGIN (__riscv_save_0)
   addi sp, sp, -16
   .cfi_def_cfa_offset 16
   sw s2, 0(sp)
+  .cfi_offset 18, -16
   sw s1, 4(sp)
-  .cfi_offset 9, -16
+  .cfi_offset 9, -12
   sw s0, 8(sp)
   .cfi_offset 8, -8
   sw ra, 12(sp)


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH] RISC-V/libgcc: Fix incorrect .cfi_offset for saving ra in __riscv_save_[0-3] on ilp32e.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a04990658b179392b600a5a3798492a6f2e3875f

commit a04990658b179392b600a5a3798492a6f2e3875f
Author: Tsung Chun Lin 
Date:   Fri Oct 4 08:02:07 2024 -0600

[PATCH] RISC-V/libgcc: Fix incorrect .cfi_offset for saving ra in 
__riscv_save_[0-3] on ilp32e.

From 8b3c5ebe8aacbcc4ddf1be8dea9a555e7e1bcc39 Mon Sep 17 00:00:00 2001
From: Jim Lin 
Date: Fri, 4 Oct 2024 14:48:12 +0800
Subject: [PATCH] RISC-V/libgcc: Fix incorrect .cfi_offset for saving ra in
 __riscv_save_[0-3] on ilp32e.

libgcc/ChangeLog:

* config/riscv/save-restore.S: Fix .cfi_offset for saving ra in
__riscv_save_[0-3] on ilp32e.

(cherry picked from commit 78d2af1fa53fe232ae00673f53c8b168d099c70f)

Diff:
---
 libgcc/config/riscv/save-restore.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgcc/config/riscv/save-restore.S 
b/libgcc/config/riscv/save-restore.S
index 30d06cc6e5bb..2c5d6bcea1a3 100644
--- a/libgcc/config/riscv/save-restore.S
+++ b/libgcc/config/riscv/save-restore.S
@@ -309,7 +309,7 @@ FUNC_BEGIN(__riscv_save_0)
   sw s0, 4(sp)
   .cfi_offset 8, -8
   sw ra, 8(sp)
-  .cfi_offset 1, 0
+  .cfi_offset 1, -4
   jr t0
   .cfi_endproc
 FUNC_END(__riscv_save_2)


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add implication for M extension.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:86e241cb8bdbc49ab348e7094b34320635b2b80d

commit 86e241cb8bdbc49ab348e7094b34320635b2b80d
Author: Tsung Chun Lin 
Date:   Tue Oct 8 17:40:59 2024 -0600

RISC-V: Add implication for M extension.

That M implies Zmmul.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: M implies Zmmul.

(cherry picked from commit 0a193466f2e87acef9b86e0d086bc6f6017518b0)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 2adebe0b6f29..60595a3e3561 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -75,6 +75,8 @@ struct riscv_implied_info_t
 /* Implied ISA info, must end with NULL sentinel.  */
 static const riscv_implied_info_t riscv_implied_info[] =
 {
+  {"m", "zmmul"},
+
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement TARGET_CAN_INLINE_P

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:6de223d810eac940c6375edd082d2c636d078e01

commit 6de223d810eac940c6375edd082d2c636d078e01
Author: Yangyu Chen 
Date:   Tue Oct 8 11:08:44 2024 -0600

RISC-V: Implement TARGET_CAN_INLINE_P

Currently, we lack support for TARGET_CAN_INLINE_P on the RISC-V
ISA. As a result, certain functions cannot be optimized with inlining
when specific options, such as __attribute__((target("arch=+v"))) .
This can lead to potential performance issues when building
retargetable binaries for RISC-V.

To address this, I have implemented the riscv_can_inline_p function.
This addition enables inlining when the callee either has no special
options or when the some options match, and also ensuring that the
callee's ISA is a subset of the caller's. I also check some other
options when there is no always_inline set.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (cl_opt_var_ref_t): Add
cl_opt_var_ref_t pointer to member of cl_target_option.
(struct riscv_ext_flag_table_t): Add new cl_opt_var_ref_t field.
(RISCV_EXT_FLAG_ENTRY): New macro to simplify the definition of
riscv_ext_flag_table.
(riscv_ext_is_subset): New function to check if the callee's ISA
is a subset of the caller's.
(riscv_x_target_flags_isa_mask): New function to get the mask of
ISA extension in x_target_flags of gcc_options.
* config/riscv/riscv-subset.h (riscv_ext_is_subset): Declare
riscv_ext_is_subset function.
(riscv_x_target_flags_isa_mask): Declare
riscv_x_target_flags_isa_mask function.
* config/riscv/riscv.cc (riscv_can_inline_p): New function.
(TARGET_CAN_INLINE_P): Implement TARGET_CAN_INLINE_P.

(cherry picked from commit 517d344e416c762a942a3633b6ec73a1d018016e)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 372 ++--
 gcc/config/riscv/riscv-subset.h |   3 +
 gcc/config/riscv/riscv.cc   |  66 ++
 3 files changed, 276 insertions(+), 165 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index a6abd903b98f..2adebe0b6f29 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1568,191 +1568,196 @@ riscv_arch_str (bool version_p)
 return std::string();
 }
 
-/* Type for pointer to member of gcc_options.  */
+/* Type for pointer to member of gcc_options and cl_target_option.  */
 typedef int (gcc_options::*opt_var_ref_t);
+typedef int (cl_target_option::*cl_opt_var_ref_t);
 
 /* Types for recording extension to internal flag.  */
 struct riscv_ext_flag_table_t {
   const char *ext;
   opt_var_ref_t var_ref;
+  cl_opt_var_ref_t cl_var_ref;
   int mask;
 };
 
+#define RISCV_EXT_FLAG_ENTRY(NAME, VAR, MASK) \
+  {NAME, &gcc_options::VAR, &cl_target_option::VAR, MASK}
+
 /* Mapping table between extension to internal flag.  */
 static const riscv_ext_flag_table_t riscv_ext_flag_table[] =
 {
-  {"e", &gcc_options::x_target_flags, MASK_RVE},
-  {"m", &gcc_options::x_target_flags, MASK_MUL},
-  {"a", &gcc_options::x_target_flags, MASK_ATOMIC},
-  {"f", &gcc_options::x_target_flags, MASK_HARD_FLOAT},
-  {"d", &gcc_options::x_target_flags, MASK_DOUBLE_FLOAT},
-  {"c", &gcc_options::x_target_flags, MASK_RVC},
-  {"v", &gcc_options::x_target_flags, MASK_FULL_V},
-  {"v", &gcc_options::x_target_flags, MASK_VECTOR},
-
-  {"zicsr",&gcc_options::x_riscv_zi_subext, MASK_ZICSR},
-  {"zifencei", &gcc_options::x_riscv_zi_subext, MASK_ZIFENCEI},
-  {"zicond",   &gcc_options::x_riscv_zi_subext, MASK_ZICOND},
-
-  {"za64rs",  &gcc_options::x_riscv_za_subext, MASK_ZA64RS},
-  {"za128rs", &gcc_options::x_riscv_za_subext, MASK_ZA128RS},
-  {"zawrs",   &gcc_options::x_riscv_za_subext, MASK_ZAWRS},
-  {"zaamo",   &gcc_options::x_riscv_za_subext, MASK_ZAAMO},
-  {"zalrsc",  &gcc_options::x_riscv_za_subext, MASK_ZALRSC},
-  {"zabha",   &gcc_options::x_riscv_za_subext, MASK_ZABHA},
-  {"zacas",   &gcc_options::x_riscv_za_subext, MASK_ZACAS},
-
-  {"zba",&gcc_options::x_riscv_zb_subext, MASK_ZBA},
-  {"zbb",&gcc_options::x_riscv_zb_subext, MASK_ZBB},
-  {"zbc",&gcc_options::x_riscv_zb_subext, MASK_ZBC},
-  {"zbs",&gcc_options::x_riscv_zb_subext, MASK_ZBS},
-
-  {"zfinx",&gcc_options::x_riscv_zinx_subext, MASK_ZFINX},
-  {"zdinx",&gcc_options::x_riscv_zinx_subext, MASK_ZDINX},
-  {"zhinx",&gcc_options::x_riscv_zinx_subext, MASK_ZHINX},
-  {"zhinxmin", &gcc_options::x_riscv_zinx_subext, MASK_ZHINXMIN},
-
-  {"zbkb",   &gcc_options::x_riscv_zk_subext, MASK_ZBKB},
-  {"zbkc",   &gcc_options::x_riscv_zk_subext, MASK_ZBKC},
-  {"zbkx",   &gcc_options::x_riscv_zk_subext, MASK_ZBKX},
-  {"zknd",   &gcc_options::x_riscv_zk_subext, MASK_ZKND},
-  {"zkne",   &gcc_options::x_riscv_zk_subext, MASK_ZKNE},
-  {"zknh",   &gcc_option

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 4 of scalar signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b59748c176c0bb6fad863ff65d9ab54bddadc20b

commit b59748c176c0bb6fad863ff65d9ab54bddadc20b
Author: Pan Li 
Date:   Thu Oct 3 16:47:52 2024 +0800

RISC-V: Add testcases for form 4 of scalar signed SAT_SUB

Form 4:
  #define DEF_SAT_S_SUB_FMT_4(T, UT, MIN, MAX)   \
  T __attribute__((noinline))\
  sat_s_sub_##T##_fmt_4 (T x, T y)   \
  {  \
T minus;   \
bool overflow = __builtin_sub_overflow (x, y, &minus); \
return !overflow ? minus : x < 0 ? MIN : MAX;  \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_sub-4-i16.c: New test.
* gcc.target/riscv/sat_s_sub-4-i32.c: New test.
* gcc.target/riscv/sat_s_sub-4-i64.c: New test.
* gcc.target/riscv/sat_s_sub-4-i8.c: New test.
* gcc.target/riscv/sat_s_sub-run-4-i16.c: New test.
* gcc.target/riscv/sat_s_sub-run-4-i32.c: New test.
* gcc.target/riscv/sat_s_sub-run-4-i64.c: New test.
* gcc.target/riscv/sat_s_sub-run-4-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 9252fc398c86ec0eac2c56283e2ded8ea6cfb70c)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 ++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i16.c   | 30 ++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i32.c   | 28 
 gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i64.c   | 27 +++
 gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i8.c| 28 
 .../gcc.target/riscv/sat_s_sub-run-4-i16.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-4-i32.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-4-i64.c | 16 
 .../gcc.target/riscv/sat_s_sub-run-4-i8.c  | 16 
 9 files changed, 191 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index fd3879d31c5b..7c3859cc1834 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -390,6 +390,17 @@ sat_s_sub_##T##_fmt_3 (T x, T y) \
 #define DEF_SAT_S_SUB_FMT_3_WRAP(T, UT, MIN, MAX) \
   DEF_SAT_S_SUB_FMT_3(T, UT, MIN, MAX)
 
+#define DEF_SAT_S_SUB_FMT_4(T, UT, MIN, MAX)   \
+T __attribute__((noinline))\
+sat_s_sub_##T##_fmt_4 (T x, T y)   \
+{  \
+  T minus;   \
+  bool overflow = __builtin_sub_overflow (x, y, &minus); \
+  return !overflow ? minus : x < 0 ? MIN : MAX;  \
+}
+#define DEF_SAT_S_SUB_FMT_4_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_SUB_FMT_4(T, UT, MIN, MAX)
+
 #define RUN_SAT_S_SUB_FMT_1(T, x, y) sat_s_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_S_SUB_FMT_1_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_1(T, x, y)
 
@@ -399,6 +410,9 @@ sat_s_sub_##T##_fmt_3 (T x, T y) \
 #define RUN_SAT_S_SUB_FMT_3(T, x, y) sat_s_sub_##T##_fmt_3(x, y)
 #define RUN_SAT_S_SUB_FMT_3_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_3(T, x, y)
 
+#define RUN_SAT_S_SUB_FMT_4(T, x, y) sat_s_sub_##T##_fmt_4(x, y)
+#define RUN_SAT_S_SUB_FMT_4_WRAP(T, x, y) RUN_SAT_S_SUB_FMT_4(T, x, y)
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i16.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i16.c
new file mode 100644
index ..60c22e25eb89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_sub-4-i16.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_sub_int16_t_fmt_4:
+** sub\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*32768
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Revert "RISC-V: Add implication for M extension."

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a67ec8f344c43a807ccd0fb1d8d74a579942700c

commit a67ec8f344c43a807ccd0fb1d8d74a579942700c
Author: Jeff Law 
Date:   Wed Oct 9 16:22:06 2024 -0600

Revert "RISC-V: Add implication for M extension."

This reverts commit 0a193466f2e87acef9b86e0d086bc6f6017518b0.

(cherry picked from commit df3bda457be49b29c09944a0d639ce9ec0b7d282)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 60595a3e3561..2adebe0b6f29 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -75,8 +75,6 @@ struct riscv_implied_info_t
 /* Implied ISA info, must end with NULL sentinel.  */
 static const riscv_implied_info_t riscv_implied_info[] =
 {
-  {"m", "zmmul"},
-
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 1 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:c268c7121cf51f5991a5900cee4aabc555e2440f

commit c268c7121cf51f5991a5900cee4aabc555e2440f
Author: Pan Li 
Date:   Tue Oct 8 11:28:44 2024 +0800

RISC-V: Add testcases for form 1 of scalar signed SAT_TRUNC

Form 1:
  #define DEF_SAT_S_TRUNC_FMT_1(WT, NT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN <= x && x <= (WT)NT_MAX   \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data for SAT_TRUNC.
* gcc.target/riscv/sat_s_trunc-1-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-1-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-1-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-1-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-1-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-1-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-1-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-1-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-1-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-1-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-1-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-1-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 8b407d5c6940a65d78a544f9c66850e619638171)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h |  15 +++
 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 110 +
 .../gcc.target/riscv/sat_s_trunc-1-i16-to-i8.c |  26 +
 .../gcc.target/riscv/sat_s_trunc-1-i32-to-i16.c|  28 ++
 .../gcc.target/riscv/sat_s_trunc-1-i32-to-i8.c |  26 +
 .../gcc.target/riscv/sat_s_trunc-1-i64-to-i16.c|  28 ++
 .../gcc.target/riscv/sat_s_trunc-1-i64-to-i32.c|  26 +
 .../gcc.target/riscv/sat_s_trunc-1-i64-to-i8.c |  26 +
 .../gcc.target/riscv/sat_s_trunc-run-1-i16-to-i8.c |  16 +++
 .../riscv/sat_s_trunc-run-1-i32-to-i16.c   |  16 +++
 .../gcc.target/riscv/sat_s_trunc-run-1-i32-to-i8.c |  16 +++
 .../riscv/sat_s_trunc-run-1-i64-to-i16.c   |  16 +++
 .../riscv/sat_s_trunc-run-1-i64-to-i32.c   |  16 +++
 .../gcc.target/riscv/sat_s_trunc-run-1-i64-to-i8.c |  16 +++
 14 files changed, 381 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 7c3859cc1834..80d7a69e7d9a 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -465,4 +465,19 @@ sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x)  \
 #define RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_4 (x)
 #define RUN_SAT_U_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_4(NT, WT, x)
 
+#define DEF_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN <= x && x <= (WT)NT_MAX   \
+? trunc   \
+: x < 0 ? NT_MIN : NT_MAX;\
+}
+#define DEF_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX)
+
+#define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
+#define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index 39a1e17cd3d1..9f9f7d0bcd19 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -42,6 +42,13 @@ TEST_BINARY_STRUCT (int16_t, sssub)
 TEST_BINARY_STRUCT (int32_t, sssub)
 TEST_BINARY_STRUCT (int64_t, sssub)
 
+TEST_UNARY_STRUCT (int8_t, int16_t)
+TEST_UNARY_STRUCT (int8_t, int32_t)
+TEST_UNARY_STRUCT (int8_t, int64_t)
+TEST_UNARY_STRUCT (int16_t, int32_t)
+TEST_UNARY_STRUCT (int16_t, int64_t)
+TEST_UNARY_STRUCT (int32_t, int64_t)
+
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   TEST_UNARY_DATA(uint8_t, uint16_t)[] =
 {
@@ -126,6 +133,109 @@ TEST_UNARY

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement scalar SAT_TRUNC for signed integer

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:cedd41718bb3c254fbdacfa0623840df3917b40d

commit cedd41718bb3c254fbdacfa0623840df3917b40d
Author: Pan Li 
Date:   Tue Oct 8 11:22:21 2024 +0800

RISC-V: Implement scalar SAT_TRUNC for signed integer

This patch would like to implement the sstrunc for scalar signed
integer.

Form 1:
  #define DEF_SAT_S_TRUNC_FMT_1(WT, NT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN <= x && x <= (WT)NT_MAX   \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

DEF_SAT_S_TRUNC_FMT_1(int64_t, int32_t, INT32_MIN, INT32_MAX)

Before this patch:
  10   │ sat_s_trunc_int64_t_to_int32_t_fmt_1:
  11   │ li  a5,1
  12   │ sllia5,a5,31
  13   │ li  a4,-1
  14   │ add a5,a0,a5
  15   │ srlia4,a4,32
  16   │ bgtua5,a4,.L2
  17   │ sext.w  a0,a0
  18   │ ret
  19   │ .L2:
  20   │ sraia5,a0,63
  21   │ li  a0,-2147483648
  22   │ xor a0,a0,a5
  23   │ not a0,a0
  24   │ ret

After this patch:
  10   │ sat_s_trunc_int64_t_to_int32_t_fmt_1:
  11   │ li  a5,-2147483648
  12   │ xoria3,a5,-1
  13   │ slt a4,a0,a3
  14   │ slt a5,a5,a0
  15   │ and a5,a4,a5
  16   │ sraia4,a0,63
  17   │ xor a4,a4,a3
  18   │ addia3,a5,-1
  19   │ neg a5,a5
  20   │ and a4,a4,a3
  21   │ and a0,a0,a5
  22   │ or  a0,a0,a4
  23   │ sext.w  a0,a0
  24   │ ret

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_sstrunc): Add new
func decl to expand SAT_TRUNC.
* config/riscv/riscv.cc (riscv_expand_sstrunc): Add new func
impl to expand SAT_TRUNC.
* config/riscv/riscv.md (sstrunc2):
Add new pattern for double truncation.
(sstrunc2): Ditto but for quad.
(sstrunc2): Ditto but for oct.

Signed-off-by: Pan Li 
(cherry picked from commit 110ccfa5c88544c5ec85d31b1ed2c2f9dac163fd)

Diff:
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc   | 61 +
 gcc/config/riscv/riscv.md   | 30 
 3 files changed, 92 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3d8775e582dc..1e6d10a14020 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -138,6 +138,7 @@ extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
+extern void riscv_expand_sstrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 13e8338bb1b0..0f93362af609 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12437,6 +12437,67 @@ riscv_expand_ustrunc (rtx dest, rtx src)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implement the signed saturation truncation for int mode.
+
+   b = SAT_TRUNC (a);
+   =>
+   1.  lt = a < max
+   2.  gt = min < a
+   3.  mask = lt & gt
+   4.  trunc_mask = -mask
+   5.  sat_mask = mask - 1
+   6.  lt = a < 0
+   7.  neg = -lt
+   8.  sat = neg ^ max
+   9.  trunc = src & trunc_mask
+   10. sat = sat & sat_mask
+   11. dest = trunc | sat  */
+
+void
+riscv_expand_sstrunc (rtx dest, rtx src)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant ();
+  HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127
+  HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128
+
+  rtx xmode_narrow_max = gen_reg_rtx (Xmode);
+  rtx xmode_narrow_min = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_gt = gen_reg_rtx (Xmode);
+  rtx xmode_src = gen_lowpart (Xmode, src);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_mask = gen_reg_rtx (Xmode);
+  rtx xmode_sat = gen_reg_rtx (Xmode);
+  rtx xmode_trunc = gen_reg_rtx (Xmode);
+  rtx xmode_sat_mask = gen_reg_rtx (Xmode);
+  rtx xmode_trunc_mask = gen_reg_rtx (Xmode);
+
+  /* Step-1: lt = src < max, gt = min < src, mask = lt & gt  */
+  emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode));
+  emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode));
+  riscv_emit_binary (LT, xm

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Optimize branches with shifted immediate operands

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:8c2010a09a3a63f908fcf999b3248343369185b9

commit 8c2010a09a3a63f908fcf999b3248343369185b9
Author: Jovan Vukic 
Date:   Wed Oct 9 16:53:38 2024 -0600

RISC-V: Optimize branches with shifted immediate operands

After the valuable feedback I received, it’s clear to me that the
oversight was in the tests showing the benefits of the patch. In the
test file, I added functions f5 and f6, which now generate more
efficient code with fewer instructions.

Before the patch:

f5:
li  a4,2097152
addia4,a4,-2048
li  a5,1167360
and a0,a0,a4
addia5,a5,-2048
beq a0,a5,.L4

f6:
li  a5,3407872
addia5,a5,-2048
and a0,a0,a5
li  a5,1114112
beq a0,a5,.L7

After the patch:

f5:
srlia5,a0,11
andia5,a5,1023
li  a4,569
beq a5,a4,.L5

f6:
srlia5,a0,11
andia5,a5,1663
li  a4,544
beq a5,a4,.L9

PR target/115921

gcc/ChangeLog:

* config/riscv/iterators.md (any_eq): New code iterator.
* config/riscv/riscv.h (COMMON_TRAILING_ZEROS): New macro.
(SMALL_AFTER_COMMON_TRAILING_SHIFT): Ditto.
* config/riscv/riscv.md 
(*branch_shiftedarith__shifted):
New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/branch-1.c: Additional tests.

(cherry picked from commit c8957c8779954c3b0bade1dde0a8987b4db157b4)

Diff:
---
 gcc/config/riscv/iterators.md |  4 
 gcc/config/riscv/riscv.h  | 12 
 gcc/config/riscv/riscv.md | 32 +++
 gcc/testsuite/gcc.target/riscv/branch-1.c | 18 ++---
 4 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 872c542e9065..081659499a99 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -233,6 +233,8 @@
 (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
+(define_code_iterator any_eq [eq ne])
+
 ;; Iterators for conditions we can emit a sCC against 0 or a reg directly
 (define_code_iterator scc_0  [eq ne gt gtu])
 
@@ -285,6 +287,8 @@
 (le "le")
 (gt "gt")
 (lt "lt")
+(eq "eq")
+(ne "ne")
 (ior "ior")
 (xor "xor")
 (and "and")
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 57e3a5e03eb1..a9211be29bb4 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -671,6 +671,18 @@ enum reg_class
 /* True if bit BIT is set in VALUE.  */
 #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
 
+/* Returns the smaller (common) number of trailing zeros for VAL1 and VAL2.  */
+#define COMMON_TRAILING_ZEROS(VAL1, VAL2)  \
+  (ctz_hwi (VAL1) < ctz_hwi (VAL2) \
+   ? ctz_hwi (VAL1)\
+   : ctz_hwi (VAL2))
+
+/* Returns true if both VAL1 and VAL2 are SMALL_OPERANDs after shifting by
+   the common number of trailing zeros.  */
+#define SMALL_AFTER_COMMON_TRAILING_SHIFT(VAL1, VAL2)  \
+  (SMALL_OPERAND ((VAL1) >> COMMON_TRAILING_ZEROS (VAL1, VAL2))
\
+   && SMALL_OPERAND ((VAL2) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)))
+
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD 1
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 688c07df46c4..78112afbb261 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3129,6 +3129,38 @@
 }
 [(set_attr "type" "branch")])
 
+(define_insn_and_split "*branch_shiftedarith__shifted"
+  [(set (pc)
+   (if_then_else (any_eq
+   (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+ (match_operand 2 "shifted_const_arith_operand" "i"))
+   (match_operand 3 "shifted_const_arith_operand" "i"))
+(label_ref (match_operand 0 "" ""))
+(pc)))
+   (clobber (match_scratch:X 4 "=&r"))
+   (clobber (match_scratch:X 5 "=&r"))]
+  "!SMALL_OPERAND (INTVAL (operands[2]))
+&& !SMALL_OPERAND (INTVAL (operands[3]))
+&& SMALL_AFTER_COMMON_TRAILING_SHIFT (INTVAL (operands[2]),
+INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (lshiftrt:X (match_dup 1) (match_dup 7)))
+   (set (match_dup 4) (and:X (match_dup 4) (match_dup 8)))
+   (set (match_dup 5

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V][PR target/116615] RISC-V: Use default LOGICAL_OP_NON_SHORT_CIRCUIT

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:5f91c4a2e4dfdf5ded4655a2bbcc41b99780fe23

commit 5f91c4a2e4dfdf5ded4655a2bbcc41b99780fe23
Author: Palmer Dabbelt 
Date:   Tue Oct 8 07:28:32 2024 -0600

[RISC-V][PR target/116615] RISC-V: Use default LOGICAL_OP_NON_SHORT_CIRCUIT

> We have cheap logical ops, so let's just move this back to the default
> to take advantage of the standard branch/op hueristics.
>
> gcc/ChangeLog:
>
> PR target/116615
> * config/riscv/riscv.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove.
> ---
> There's a bunch more discussion in the bug, but it's starting to smell
> like this was just a holdover from MIPS (where maybe it also shouldn't
> be set).  I haven't tested this, but I figured I'd send the patch to get
> a little more visibility.
>
> I guess we should also kick off something like a SPEC run to make sure
> there's no regressions?
So as I noted earlier, this appears to be a nice win on the BPI. Testsuite
fallout is minimal -- just the one SFB related test tripping at -Os that was
also hit by Andrew P's work.

After looking at it more closely, the SFB codegen and the codegen after
Andrew's work should be equivalent assuming two independent ops can dispatch
together.

The test actually generates sensible code at -Os.  It's the -Os in 
combination
with the -fno-ssa-phiopt that causes problems.   I think the best thing to 
do
here is just skip at -Os.  That still keeps a degree of testing the SFB 
path.

Tested successfully in my tester.  But will wait for the pre-commit tester 
to
render a verdict before moving forward.

PR target/116615
gcc/
* config/riscv/riscv.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove.

gcc/testsuite/

* gcc.target/riscv/cset-sext-sfb.c: Skip for -Os.

Co-authored-by: Jeff Law  

(cherry picked from commit 34ae3a992a0cc3240d07d69ff12a664cbb5c8be0)

Diff:
---
 gcc/config/riscv/riscv.h   | 2 --
 gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index dc6ac6153bd0..57e3a5e03eb1 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -943,8 +943,6 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
 #define TARGET_VECTOR_MISALIGN_SUPPORTED \
riscv_vector_unaligned_access_p
 
-#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
-
 /* Control the assembler format that we output.  */
 
 /* Output to assembler file text saying following lines
diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c 
b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c
index 6e9f8cc61de0..1ee45b33e152 100644
--- a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c
+++ b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" "-Os" } } */
 /* { dg-options "-march=rv32gc -mtune=sifive-7-series -mbranch-cost=1 
-fno-ssa-phiopt -fdump-rtl-ce1" { target { rv32 } } } */
 /* { dg-options "-march=rv64gc -mtune=sifive-7-series -mbranch-cost=1 
-fno-ssa-phiopt -fdump-rtl-ce1" { target { rv64 } } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:771a4038fa5f57b37b8931bb739fe167dae2d7fb

commit 771a4038fa5f57b37b8931bb739fe167dae2d7fb
Author: Tsung Chun Lin 
Date:   Tue Oct 8 17:44:38 2024 -0600

RISC-V: Enable builtin __riscv_mul with Zmmul extension.

From d5b254e19d1f37fe27c7e98a0160e5c22446cfea Mon Sep 17 00:00:00 2001
From: Jim Lin 
Date: Tue, 8 Oct 2024 13:14:32 +0800
Subject: [PATCH] RISC-V: Enable builtin __riscv_mul with Zmmul extension.

gcc/ChangeLog:

* config/riscv/riscv-c.cc: (riscv_cpu_cpp_builtins):
Enable builtin __riscv_mul with Zmmul extension.

(cherry picked from commit 2990f5802a727cbd717587c3a345fa940193049f)

Diff:
---
 gcc/config/riscv/riscv-c.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 71112d9c66d7..7e9c478e97bb 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -123,7 +123,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_ATOMIC)
 builtin_define ("__riscv_atomic");
 
-  if (TARGET_MUL)
+  if (TARGET_ZMMUL)
 builtin_define ("__riscv_mul");
   if (TARGET_DIV)
 builtin_define ("__riscv_div");


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Revert "RISC-V: Enable builtin __riscv_mul with Zmmul extension."

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:0d753781b949522fa740d8307d30b0f8795d867c

commit 0d753781b949522fa740d8307d30b0f8795d867c
Author: Jeff Law 
Date:   Wed Oct 9 16:21:56 2024 -0600

Revert "RISC-V: Enable builtin __riscv_mul with Zmmul extension."

This reverts commit 2990f5802a727cbd717587c3a345fa940193049f.

(cherry picked from commit e889235cb004b62f3004408283ce91eb20eb521a)

Diff:
---
 gcc/config/riscv/riscv-c.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 7e9c478e97bb..71112d9c66d7 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -123,7 +123,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_ATOMIC)
 builtin_define ("__riscv_atomic");
 
-  if (TARGET_ZMMUL)
+  if (TARGET_MUL)
 builtin_define ("__riscv_mul");
   if (TARGET_DIV)
 builtin_define ("__riscv_div");


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 1 of vector signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:86a667a882c82b97781673ceb8815e9c700fe56c

commit 86a667a882c82b97781673ceb8815e9c700fe56c
Author: Pan Li 
Date:   Fri Oct 11 12:12:03 2024 +0800

RISC-V: Add testcases for form 1 of vector signed SAT_SUB

Form 1:
  #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T minus = (UT)x - (UT)y;   \
out[i] = (x ^ y) >= 0  \
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }\
  }

DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: Add test
data for run test.
* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-1-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-1-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-1-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-1-i8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-1-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-1-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-1-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-1-i8.c: New 
test.

Signed-off-by: Pan Li 
(cherry picked from commit c4af4fe11e71c686ee06c1eebe9e64ad5a94410a)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_data.h | 264 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-1-i16.c  |   9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-1-i32.c  |   9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-1-i64.c  |   9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-1-i8.c   |   9 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-1-i16.c|  17 ++
 .../rvv/autovec/binop/vec_sat_s_sub-run-1-i32.c|  17 ++
 .../rvv/autovec/binop/vec_sat_s_sub-run-1-i64.c|  17 ++
 .../rvv/autovec/binop/vec_sat_s_sub-run-1-i8.c |  17 ++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   |  25 ++
 10 files changed, 393 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h
index 99d618168f30..32edc358a080 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h
@@ -598,4 +598,268 @@ int64_t TEST_BINARY_DATA_NAME(int64_t, int64_t, 
ssadd)[][3][N] =
   },
 };
 
+int8_t TEST_BINARY_DATA_NAME(int8_t, int8_t, sssub)[][3][N] =
+{
+  {
+{
+ 0,0,0,0,
+ 2,2,2,2,
+   126,  126,  126,  126,
+   127,  127,  127,  127,
+},
+{
+ 0,0,0,0,
+ 4,4,4,4,
+-2,   -2,   -2,   -2,
+  -127, -127, -127, -127,
+},
+{
+ 0,0,0,0,
+-2,   -2,   -2,   -2,
+   127,  127,  127,  127,
+   127,  127,  127,  127,
+},
+  },
+
+  {
+{
+-7,   -7,   -7,   -7,
+  -128, -128, -128, -128,
+  -127, -127, -127, -127,
+  -128, -128, -128, -128,
+},
+{
+-4,   -4,   -4,   -4,
+ 1,1,1,1,
+ 1,1,1,1,
+   127,  127,  127,  127,
+},
+{
+-3,   -3,   -3,   -3,
+  -128, -128, -128, -128,
+  -128, -128, -128, -128,
+  -128, -128, -128, -128,
+},
+  },
+
+  {
+{
+  -128, -128, -128, -128,
+   127,  127,  127,  127,
+  -125, -125, -125, -125,
+   126,  126,  126,  126,
+},
+{
+   127,  127,  127,  127,
+  -

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 8 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:cbfde0fde6cdbd4c4dbf646873adad5a9ad3b45c

commit cbfde0fde6cdbd4c4dbf646873adad5a9ad3b45c
Author: Pan Li 
Date:   Thu Oct 10 16:24:08 2024 +0800

RISC-V: Add testcases for form 8 of scalar signed SAT_TRUNC

Form 8:
  #define DEF_SAT_S_TRUNC_FMT_8(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_8 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN > x || x >= (WT)NT_MAX\
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-8-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-8-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-8-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-8-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-8-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-8-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-8-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-8-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-8-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-8-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-8-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-8-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit dd2d4b3fd87241dca658b68b4f9eef533b7fad36)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-8-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-8-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-8-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-8-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-8-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-8-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-8-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-8-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-8-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-8-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-8-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-8-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 189babd22f12..2cbd1f18c8d2 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -549,6 +549,18 @@ sat_s_trunc_##WT##_to_##NT##_fmt_7 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_7_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_8(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_8 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN > x || x >= (WT)NT_MAX\
+? x < 0 ? NT_MIN : NT_MAX \
+: trunc;  \
+}
+#define DEF_SAT_S_TRUNC_FMT_8_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_8(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
@@ -570,4 +582,7 @@ sat_s_trunc_##WT##_to_##NT##_fmt_7 (WT x) \
 #define RUN_SAT_S_TRUNC_FMT_7(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_7 (x)
 #define RUN_SAT_S_TRUNC_FMT_7_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_7(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_8(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_8 (x)
+#define RUN_SAT_S_TRUNC_FMT_8_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_8(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-8-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-8-i16-to-i8.c
new file mode 100644
index ..cf1916b958c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-8-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedul

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V:Bugfix for C++ code compilation failure with rv32imafc_zve32f[pr116883]

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:39bd5c9d0383801bb7c5371b54e07a9e7196fc4d

commit 39bd5c9d0383801bb7c5371b54e07a9e7196fc4d
Author: Li Xu 
Date:   Thu Oct 10 08:51:19 2024 -0600

RISC-V:Bugfix for C++ code compilation failure with 
rv32imafc_zve32f[pr116883]

From: xuli 

Example as follows:

int main()
{
  unsigned long arraya[128], arrayb[128], arrayc[128];
  for (int i = 0; i < 128; i++)
   {
  arraya[i] = arrayb[i] + arrayc[i];
   }
  return 0;
}

Compiled with -march=rv32imafc_zve32f -mabi=ilp32f, it will cause a 
compilation issue:

riscv_vector.h:40:25: error: ambiguating new declaration of 'vint64m4_t 
__riscv_vle64(vbool16_t, const long long int*, unsigned int)'
   40 | #pragma riscv intrinsic "vector"
  | ^~~~
riscv_vector.h:40:25: note: old declaration 'vint64m1_t 
__riscv_vle64(vbool64_t, const long long int*, unsigned int)'

With zvl=32b, vbool16_t is registered in init_builtins() with
type_common.precision=0x101 (nunits=2), mode_nunits[E_RVVMF16BI]=[2,2].

Normally, vbool64_t is only valid when TARGET_MIN_VLEN > 32, so vbool64_t
is not registered in init_builtins(), meaning vbool64_t=null.

In order to implement __attribute__((target("arch=+v"))), we must register
all vector types and all RVV intrinsics. Therefore, vbool64_t will be 
registered
by default with zvl=128b in reinit_builtins(), resulting in
type_common.precision=0x101 (nunits=2) and mode_nunits[E_RVVMF64BI]=[2,2].

We then get TYPE_VECTOR_SUBPARTS(vbool16_t) == 
TYPE_VECTOR_SUBPARTS(vbool64_t),
calculated using type_common.precision, resulting in 2. Since vbool16_t and
vbool64_t have the same element type (boolean_type), the compiler treats 
them
as the same type, leading to a re-declaration conflict.

After all types and intrinsics have been registered, processing
__attribute__((target("arch=+v"))) will update the parameters option and
init_adjust_machine_modes. Therefore, to avoid conflicts, we can choose
zvl=4096b for the null type reinit_builtins().

command option zvl=32b
  type nunits
  vbool64_t => null
  vbool32_t=> [1,1]
  vbool16_t=> [2,2]
  vbool8_t=>  [4,4]
  vbool4_t=>  [8,8]
  vbool2_t=>  [16,16]
  vbool1_t=>  [32,32]

reinit zvl=128b
  vbool64_t => [2,2] conflict with zvl32b vbool16_t=> [2,2]
reinit zvl=256b
  vbool64_t => [4,4] conflict with zvl32b vbool8_t=>  [4,4]
reinit zvl=512b
  vbool64_t => [8,8] conflict with zvl32b vbool4_t=>  [8,8]
reinit zvl=1024b
  vbool64_t => [16,16] conflict with zvl32b vbool2_t=>  [16,16]
reinit zvl=2048b
  vbool64_t => [32,32] conflict with zvl32b vbool1_t=>  [32,32]
reinit zvl=4096b
  vbool64_t => [64,64] zvl=4096b is ok

Signed-off-by: xuli 

PR target/116883

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic_flags_pollute): 
Choose zvl4096b
to initialize null type.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr116883.C: New test.

(cherry picked from commit fd8e590ff11266598d8f9b3d03d72ba7a6100512)

Diff:
---
 gcc/config/riscv/riscv-c.cc|  7 ++-
 gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C | 15 +++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 71112d9c66d7..c59f408d3a8e 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -59,7 +59,12 @@ riscv_pragma_intrinsic_flags_pollute (struct 
pragma_intrinsic_flags *flags)
   riscv_zvl_flags = riscv_zvl_flags
 | MASK_ZVL32B
 | MASK_ZVL64B
-| MASK_ZVL128B;
+| MASK_ZVL128B
+| MASK_ZVL256B
+| MASK_ZVL512B
+| MASK_ZVL1024B
+| MASK_ZVL2048B
+| MASK_ZVL4096B;
 
   riscv_vector_elen_flags = riscv_vector_elen_flags
 | MASK_VECTOR_ELEN_32
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
new file mode 100644
index ..15bbec40bdde
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr116883.C
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc_zve32f -mabi=ilp32f" } */
+
+#include 
+
+int main()
+{
+  unsigned long arraya[128], arrayb[128], arrayc[128];
+  for (int i; i < 128; i++)
+   {
+  arraya[i] = arrayb[i] + arrayc[i];
+   }
+  return 0;
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 2 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:95cd2154d39d6dc2134a00033e0ac553a1c99dd3

commit 95cd2154d39d6dc2134a00033e0ac553a1c99dd3
Author: Pan Li 
Date:   Wed Oct 9 10:33:31 2024 +0800

RISC-V: Add testcases for form 2 of scalar signed SAT_TRUNC

Form 2:
  #define DEF_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_2 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN < x && x < (WT)NT_MAX \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-2-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-2-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-2-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-2-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-2-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-2-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-2-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-2-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-2-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-2-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-2-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-2-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 00d04a7b237fad4928bcaac19b3d0f18ddf8810e)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-2-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-2-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-2-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-2-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-2-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-2-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-2-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-2-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-2-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-2-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-2-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-2-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 80d7a69e7d9a..f88432a38173 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -477,7 +477,22 @@ sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_2 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN < x && x < (WT)NT_MAX \
+? trunc   \
+: x < 0 ? NT_MIN : NT_MAX;\
+}
+#define DEF_SAT_S_TRUNC_FMT_2_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_2(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_2 (x)
+#define RUN_SAT_S_TRUNC_FMT_2_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_2(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-2-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-2-i16-to-i8.c
new file mode 100644
index ..9748b83e6980
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-2-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_trunc_int16_t_to_int8_t_fmt_2:
+** slti\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** li\s+[atx][0-9]+,\s*-128
+** 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 3 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f16d3f5e692668903b7e2e66a3046e043fb7

commit f16d3f5e692668903b7e2e66a3046e043fb7
Author: Pan Li 
Date:   Wed Oct 9 22:37:00 2024 +0800

RISC-V: Add testcases for form 3 of scalar signed SAT_TRUNC

Form 3:
  #define DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN < x && x <= (WT)NT_MAX\
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-3-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-3-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 77fceccf8bd05ebf0cf95fea3b34126431827a5d)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-3-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-3-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-3-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-3-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-3-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-3-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-3-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-3-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-3-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-3-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-3-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index f88432a38173..607bc4fc82e6 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -489,10 +489,25 @@ sat_s_trunc_##WT##_to_##NT##_fmt_2 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_2_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN < x && x <= (WT)NT_MAX\
+? trunc   \
+: x < 0 ? NT_MIN : NT_MAX;\
+}
+#define DEF_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
 #define RUN_SAT_S_TRUNC_FMT_2(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_2 (x)
 #define RUN_SAT_S_TRUNC_FMT_2_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_2(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_3(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_3 (x)
+#define RUN_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_3(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c
new file mode 100644
index ..7b8a663d53bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-3-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_trunc_int16_t_to_int8_t_fmt_3:
+** slti\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** li\s+[atx][0-9]+,\s*-128
+** slt\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 7 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e459ef25692afcddd50fdabaa48949c267f44cb5

commit e459ef25692afcddd50fdabaa48949c267f44cb5
Author: Pan Li 
Date:   Thu Oct 10 16:08:40 2024 +0800

RISC-V: Add testcases for form 7 of scalar signed SAT_TRUNC

Form 7:
  #define DEF_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_7 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN >= x || x >= (WT)NT_MAX   \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-7-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-7-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-7-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-7-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-7-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-7-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-7-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-7-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-7-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-7-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-7-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-7-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 43e347660f418529f104b67ebce0c5aa332687d7)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-7-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-7-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-7-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-7-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-7-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-7-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-7-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-7-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-7-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-7-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-7-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-7-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 7a5110248f48..189babd22f12 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -537,6 +537,18 @@ sat_s_trunc_##WT##_to_##NT##_fmt_6 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_6_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_7 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN >= x || x >= (WT)NT_MAX   \
+? x < 0 ? NT_MIN : NT_MAX \
+: trunc;  \
+}
+#define DEF_SAT_S_TRUNC_FMT_7_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
@@ -555,4 +567,7 @@ sat_s_trunc_##WT##_to_##NT##_fmt_6 (WT x) \
 #define RUN_SAT_S_TRUNC_FMT_6(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_6 (x)
 #define RUN_SAT_S_TRUNC_FMT_6_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_6(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_7(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_7 (x)
+#define RUN_SAT_S_TRUNC_FMT_7_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_7(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-7-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-7-i16-to-i8.c
new file mode 100644
index ..b931bec4ac6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-7-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedul

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 5 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:bda7ba63e0e170b201ab9d5539ce5b78e8854872

commit bda7ba63e0e170b201ab9d5539ce5b78e8854872
Author: Pan Li 
Date:   Thu Oct 10 15:35:33 2024 +0800

RISC-V: Add testcases for form 5 of scalar signed SAT_TRUNC

Form 5:
  #define DEF_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_5 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN > x || x > (WT)NT_MAX \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-5-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-5-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-5-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-5-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-5-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-5-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-5-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-5-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-5-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-5-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-5-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-5-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 303b3f5057cdb9acc415ff975eca3d470f0e1daf)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-5-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-5-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-5-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-5-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-5-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-5-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-5-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-5-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-5-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-5-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-5-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-5-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 0b3d0ea70737..e3c01724f07a 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -513,6 +513,18 @@ sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_4_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_5 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN > x || x > (WT)NT_MAX \
+? x < 0 ? NT_MIN : NT_MAX \
+: trunc;  \
+}
+#define DEF_SAT_S_TRUNC_FMT_5_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
@@ -525,4 +537,7 @@ sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
 #define RUN_SAT_S_TRUNC_FMT_4(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_4 (x)
 #define RUN_SAT_S_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_4(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_5(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_5 (x)
+#define RUN_SAT_S_TRUNC_FMT_5_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_5(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-5-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-5-i16-to-i8.c
new file mode 100644
index ..9f48295283e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-5-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedul

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 6 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:5a254091002952304a731a830932487eeaebd684

commit 5a254091002952304a731a830932487eeaebd684
Author: Pan Li 
Date:   Thu Oct 10 15:53:45 2024 +0800

RISC-V: Add testcases for form 6 of scalar signed SAT_TRUNC

Form 6:
  #define DEF_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_6 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN >= x || x > (WT)NT_MAX\
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-6-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-6-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-6-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-6-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-6-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-6-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-6-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-6-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-6-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-6-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-6-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-6-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit ffd351ac4968861122a4b1beae75167b1421e715)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-6-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-6-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-6-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-6-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-6-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-6-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-6-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-6-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-6-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-6-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-6-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-6-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index e3c01724f07a..7a5110248f48 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -525,6 +525,18 @@ sat_s_trunc_##WT##_to_##NT##_fmt_5 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_5_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_6 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN >= x || x > (WT)NT_MAX\
+? x < 0 ? NT_MIN : NT_MAX \
+: trunc;  \
+}
+#define DEF_SAT_S_TRUNC_FMT_6_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
@@ -540,4 +552,7 @@ sat_s_trunc_##WT##_to_##NT##_fmt_5 (WT x) \
 #define RUN_SAT_S_TRUNC_FMT_5(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_5 (x)
 #define RUN_SAT_S_TRUNC_FMT_5_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_5(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_6(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_6 (x)
+#define RUN_SAT_S_TRUNC_FMT_6_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_6(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-6-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-6-i16-to-i8.c
new file mode 100644
index ..3961eae2b9b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-6-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedul

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 4 of scalar signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b5d3b4d45b106110cc184748c3fba01f2a463d71

commit b5d3b4d45b106110cc184748c3fba01f2a463d71
Author: Pan Li 
Date:   Thu Oct 10 14:52:04 2024 +0800

RISC-V: Add testcases for form 4 of scalar signed SAT_TRUNC

Form 4:
  #define DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))  \
  sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
  { \
NT trunc = (NT)x;   \
return (WT)NT_MIN <= x && x < (WT)NT_MAX\
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  }

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-4-i64-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i16-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i32-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i32-to-i8.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i64-to-i16.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i64-to-i32.c: New test.
* gcc.target/riscv/sat_s_trunc-run-4-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 6e19e09c2a8303615627aa0e8163a4a9e4fcbd12)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 15 
 .../gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-4-i32-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-4-i32-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-4-i64-to-i16.c| 28 ++
 .../gcc.target/riscv/sat_s_trunc-4-i64-to-i32.c| 26 
 .../gcc.target/riscv/sat_s_trunc-4-i64-to-i8.c | 26 
 .../gcc.target/riscv/sat_s_trunc-run-4-i16-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-4-i32-to-i16.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-4-i32-to-i8.c | 16 +
 .../riscv/sat_s_trunc-run-4-i64-to-i16.c   | 16 +
 .../riscv/sat_s_trunc-run-4-i64-to-i32.c   | 16 +
 .../gcc.target/riscv/sat_s_trunc-run-4-i64-to-i8.c | 16 +
 13 files changed, 271 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 607bc4fc82e6..0b3d0ea70737 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -501,6 +501,18 @@ sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 #define DEF_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, NT_MIN, NT_MAX) \
   DEF_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX)
 
+#define DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
+NT __attribute__((noinline))  \
+sat_s_trunc_##WT##_to_##NT##_fmt_4 (WT x) \
+{ \
+  NT trunc = (NT)x;   \
+  return (WT)NT_MIN <= x && x < (WT)NT_MAX\
+? trunc   \
+: x < 0 ? NT_MIN : NT_MAX;\
+}
+#define DEF_SAT_S_TRUNC_FMT_4_WRAP(NT, WT, NT_MIN, NT_MAX) \
+  DEF_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX)
+
 #define RUN_SAT_S_TRUNC_FMT_1(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_S_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_1(NT, WT, x)
 
@@ -510,4 +522,7 @@ sat_s_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 #define RUN_SAT_S_TRUNC_FMT_3(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_3 (x)
 #define RUN_SAT_S_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_3(NT, WT, x)
 
+#define RUN_SAT_S_TRUNC_FMT_4(NT, WT, x) sat_s_trunc_##WT##_to_##NT##_fmt_4 (x)
+#define RUN_SAT_S_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_S_TRUNC_FMT_4(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c
new file mode 100644
index ..f2c4a297bd6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_trunc-4-i16-to-i8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_trunc_int16_t_to_int8_t_fmt_4:
+** slti\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V] Slightly improve broadcasting small constants into vectors

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4b3f2e950eb7f71b8445d2d268a2b73428be2ed5

commit 4b3f2e950eb7f71b8445d2d268a2b73428be2ed5
Author: Jeff Law 
Date:   Sat Oct 12 07:12:53 2024 -0600

RISC-V] Slightly improve broadcasting small constants into vectors

I probably spent way more time on this than it's worth...

I was looking at the code we generate for vector SAD and noticed that we 
were
being a bit silly.  Specifically:

li  a4,0# 272   [c=4 l=4]  *movsi_internal/1

Followed shortly by:

vmv.s.x v3,a4   # 261   [c=4 l=4]  *pred_broadcastrvvm1si/6

And no other uses of a4.  We could have used x0 trivially.

First we adjust the expander so that it doesn't force the constant into a
register.  In the matching pattern we change the appropriate source 
constraints
from "r" to "rJ" and the output template is changed to use %z for the 
operand.
The net is we drop the li completely and emit vmv.s.x,v3,x0.

But wait, there's more.  If we're broadcasting a constant in the range
[-16..15] into a vector, we currently load the constant into a register and 
use
vmv.v.r.  We can instead use vmv.v.i, which avoids loading the constant 
into a
GPR.  For that case we again avoid forcing the constant into a register in 
the
expander and adjust the output template to emit vmv.v.x or vmv.v.i based on
whether or not the appropriate operand is a constant or general purpose
register.  So again, we'll drop a load immediate into a scalar for this 
case.

Whether or not we should use vmv.v.i vs vmv.s.x for loading [-16..15] into 
the
0th element is probably uarch dependent.  The tradeoff is loading the GPR vs
the broadcast in the vector unit.  I didn't bother with this case.

Tested in my tester (which tests rv64gcv as a default codegen option). Will
wait for the pre-commit tester to render a verdict.

gcc/
* config/riscv/constraints.md (P): New constraint.
* config/riscv/vector.md (pred_broadcast expander): Do
not force small integers into GPRs so aggressively.
(pred_broadcast insn & splitter): Allow splatting small
constants across the vector register directly.  Allow splatting
(const_int 0) into element 0 directly.

(cherry picked from commit ba773a86f0377abccecd3e398dceb9408bba5a7c)

Diff:
---
 gcc/config/riscv/constraints.md |  5 +
 gcc/config/riscv/vector.md  | 22 --
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 3ab6d5426223..eb5a0bb75c72 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -70,6 +70,11 @@
   (and (match_code "const_int")
(match_test "ival == 8")))
 
+(define_constraint "P"
+  "A 5-bit signed immediate for vmv.v.i."
+  (and (match_code "const_int")
+   (match_test "IN_RANGE (ival, -16, 15)")))
+
 (define_constraint "K"
   "A 5-bit unsigned immediate for CSR access instructions."
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 92e3061c7f85..a21288f7af2a 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2095,6 +2095,16 @@
   emit_move_insn (tmp, gen_int_mode (value, Pmode));
   operands[3] = gen_rtx_SIGN_EXTEND (mode, tmp);
 }
+  /* Never load (const_int 0) into a register, that's silly.  */
+  else if (operands[3] == CONST0_RTX (mode))
+;
+  /* If we're broadcasting [-16..15] across more than just
+ element 0, then we can use vmv.v.i directly, thus avoiding
+ the load of the constant into a GPR.  */
+  else if (CONST_INT_P (operands[3])
+  && IN_RANGE (INTVAL (operands[3]), -16, 15)
+  && !satisfies_constraint_Wb1 (operands[1]))
+;
   else
 operands[3] = force_reg (mode, operands[3]);
 })
@@ -2111,18 +2121,18 @@
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (vec_duplicate:V_VLSI
-   (match_operand: 3 "direct_broadcast_operand"   " r,  
r,Wdm,Wdm,Wdm,Wdm,  r,  r"))
- (match_operand:V_VLSI 2 "vector_merge_operand""vu,  0, 
vu,  0, vu,  0, vu,  0")))]
+   (match_operand: 3 "direct_broadcast_operand"   
"rP,rP,Wdm,Wdm,Wdm,Wdm, rJ, rJ"))
+ (match_operand:V_VLSI 2 "vector_merge_operand""vu, 0, vu, 
 0, vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "@
-   vmv.v.x\t%0,%3
-   vmv.v.x\t%0,%3
+   vmv.v.%o3\t%0,%3
+   vmv.v.%o3\t%0,%3
vlse.v\t%0,%3,zero,%1.t
vlse.v\t%0,%3,zero,%1.t
vlse.v\t%0,%3,zero
vlse.v\t%0,%3,zero
-   vmv.s.x\t%0,%3
-   vmv.s.x\t%0,%3"
+   vmv.s.x\t%0,%z3
+   vmv.s.x\t%0,%z3"
   "(register_operand (operands[3], mode)
   || CONST_POLY_INT_P (operands[3]))
   && GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (Pmode)"


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add detailed comments on processing implied extensions. [NFC]

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d273fb1fff879b75b9534c1d51a2c0d9897a9825

commit d273fb1fff879b75b9534c1d51a2c0d9897a9825
Author: Yangyu Chen 
Date:   Mon Oct 14 18:31:06 2024 +0800

RISC-V: Add detailed comments on processing implied extensions. [NFC]

In some cases, we don't need to handle implied extensions. Add detailed
comments to help developers understand what implied ISAs should be
considered.

libgcc/ChangeLog:

* config/riscv/feature_bits.c (__init_riscv_features_bits_linux):
Add detailed comments on processing implied extensions.

Signed-off-by: Yangyu Chen 
(cherry picked from commit 1c507a02f29c6ca735f40f4b16b341ce9d5aa1b1)

Diff:
---
 libgcc/config/riscv/feature_bits.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libgcc/config/riscv/feature_bits.c 
b/libgcc/config/riscv/feature_bits.c
index 9bdbc466feea..a90e553b83d6 100644
--- a/libgcc/config/riscv/feature_bits.c
+++ b/libgcc/config/riscv/feature_bits.c
@@ -290,9 +290,12 @@ static void __init_riscv_features_bits_linux ()
 }
 
   const struct riscv_hwprobe hwprobe_ima_ext = hwprobes[4];
-
   /* Every time we add new extensions, we should check if previous extensions
- imply the new extension and set the corresponding bit.  */
+ imply the new extension and set the corresponding bit.
+ We don't need to handle cases where:
+ 1.  The new extension implies a previous extension (e.g., Zve32f -> F).
+ 2.  The extensions imply some other extensions appear in the same release
+version of Linux Kernel (e.g., Zbc - > Zbkc).  */
 
   if (hwprobe_ima_ext.value & RISCV_HWPROBE_IMA_FD)
 {
@@ -397,7 +400,7 @@ __init_riscv_feature_bits ()
 #ifdef __linux
   __init_riscv_features_bits_linux ();
 #else
-  /* Unsupported, just initlizaed that into all zeros.  */
+  /* Unsupported, just initialize that into all zeros.  */
   __riscv_feature_bits.length = 0;
   __riscv_vendor_feature_bits.length = 0;
   __riscv_cpu_model.mvendorid = 0;


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix UNRESOLVED testcases for SAT alu vector mode

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:8bae04250642e76adb4c69fac5ca52263c189984

commit 8bae04250642e76adb4c69fac5ca52263c189984
Author: Pan Li 
Date:   Tue Oct 15 09:19:44 2024 +0800

RISC-V: Fix UNRESOLVED testcases for SAT alu vector mode

Some saturation related alu testcases missed additional option
for expand check, which result in some UNRESOLVED issues.  This
patch would like to fix it by adding the option back as other
testcases.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Add
compile option for expanding check.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: Ditto.

Signed-off-by: Pan Li 
(cherry picked from commit 14493126c0f56dd201b27bfd28fb4575351a9725)

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c  | 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c| 1 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c| 1 +
 8 files changed, 8 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
index 236fe68123fb..1320b05e76cb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
 
 #include "../vec_sat_arith.h"
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c
index 2eda4197abba..e71758d9c4ea 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
 
 #include "../vec_sat_arith.h"
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
index ae97fece59bc..1626e857d28f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
 
 #include "../vec_sat_arith.h"
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c
index f0c5289764f7..8792bb6112b9 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
 
 #include "../vec_sat_arith.h"
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
index 7cde4c9d378e..4a93c7f89cbb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
 
 #include "../vec_sat_arith.h"
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c
index 341226838a37..bc6d441759f3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 3 of vector signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d92cec88f320dfc954d9c78497c214f62ad1e25d

commit d92cec88f320dfc954d9c78497c214f62ad1e25d
Author: Pan Li 
Date:   Sat Oct 12 10:40:30 2024 +0800

RISC-V: Add testcases for form 3 of vector signed SAT_SUB

Form 3:
  #define DEF_VEC_SAT_S_SUB_FMT_3(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_sub_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T minus;   \
bool overflow = __builtin_sub_overflow (x, y, &minus); \
out[i] = overflow ? x < 0 ? MIN : MAX : minus; \
  }\
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-3-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-3-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-3-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-3-i8.c: New 
test.

Signed-off-by: Pan Li 
(cherry picked from commit b97629226d9be496bc30bb13608ef1c2bcdceeb7)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-3-i16.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-3-i32.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-3-i64.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-3-i8.c   |  9 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-3-i16.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-3-i32.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-3-i64.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-3-i8.c | 17 +
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 9 files changed, 126 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i16.c
new file mode 100644
index ..c10dc0903c45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_3(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i32.c
new file mode 100644
index ..d1352ed56e4c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i32.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_3(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i64.c
new file mode 100644
index ..b86887d332bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-3-i64.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_3(int64_t, uint64_t, INT64_MIN, INT64_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement __init_riscv_feature_bits, __riscv_feature_bits, and __riscv_vendor_feature_bits

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b24a3bda663750278e690f5e3f2d50d31ca16ba4

commit b24a3bda663750278e690f5e3f2d50d31ca16ba4
Author: Kito Cheng 
Date:   Mon Oct 14 16:07:16 2024 +0800

RISC-V: Implement __init_riscv_feature_bits, __riscv_feature_bits, and 
__riscv_vendor_feature_bits

This provides a common abstraction layer to probe the available extensions 
at
run-time. These functions can be used to implement function 
multi-versioning or
to detect available extensions.

The advantages of providing this abstraction layer are:
- Easy to port to other new platforms.
- Easier to maintain in GCC for function multi-versioning.
  - For example, maintaining platform-dependent code in C code/libgcc is 
much
easier than maintaining it in GCC by creating GIMPLEs...

This API is intended to provide the capability to query minimal common 
available extensions on the system.

The API is defined in the riscv-c-api-doc:
https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc

Proposal to use unsigned long long for marchid and mimpid:
https://github.com/riscv-non-isa/riscv-c-api-doc/pull/91

Full function multi-versioning implementation will come later. We are 
posting
this first because we intend to backport it to the GCC 14 branch to unblock
LLVM 19 to use this with GCC 14.2, rather than waiting for GCC 15.

Changes since v7:
- Remove vendorID field in __riscv_vendor_feature_bits.
- Fix C implies Zcf only for RV32.
- Add more comments to kernel versions.

Changes since v6:
- Implement __riscv_cpu_model.
- Set new sub extension bits which implied from previous extensions.

Changes since v5:
- Minor fixes on indentation.

Changes since v4:
- Bump to newest riscv-c-api-doc with some new extensions like Zve*, Zc*
  Zimop, Zcmop, Zawrs.
- Rename the return variable name of hwprobe syscall.
- Minor fixes on indentation.

Changes since v3:
- Fix non-linux build.
- Let __init_riscv_feature_bits become constructor

Changes since v2:
- Prevent it initialize more than once.

Changes since v1:
- Fix the format.
- Prevented race conditions by introducing a local variable to avoid 
load/store
  operations during the computation of the feature bit.

Co-Developed-by: Yangyu Chen 
Signed-off-by: Yangyu Chen 

libgcc/ChangeLog:

* config/riscv/feature_bits.c: New.
* config/riscv/t-elf (LIB2ADD): Add feature_bits.c.

(cherry picked from commit ca44eb7f6a33ff3b93e7685606b4fc286ce0fe80)

Diff:
---
 libgcc/config/riscv/feature_bits.c | 409 +
 libgcc/config/riscv/t-elf  |   1 +
 2 files changed, 410 insertions(+)

diff --git a/libgcc/config/riscv/feature_bits.c 
b/libgcc/config/riscv/feature_bits.c
new file mode 100644
index ..9bdbc466feea
--- /dev/null
+++ b/libgcc/config/riscv/feature_bits.c
@@ -0,0 +1,409 @@
+/* Helper function for function multi-versioning for RISC-V.
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+.  */
+
+#define RISCV_FEATURE_BITS_LENGTH 2
+
+struct {
+  unsigned length;
+  unsigned long long features[RISCV_FEATURE_BITS_LENGTH];
+} __riscv_feature_bits __attribute__ ((visibility ("hidden"), nocommon));
+
+#define RISCV_VENDOR_FEATURE_BITS_LENGTH 1
+
+struct {
+  unsigned length;
+  unsigned long long features[RISCV_VENDOR_FEATURE_BITS_LENGTH];
+} __riscv_vendor_feature_bits __attribute__ ((visibility ("hidden"), 
nocommon));
+
+struct {
+  unsigned mvendorid;
+  unsigned long long marchid;
+  unsigned long long mimpid;
+} __riscv_cpu_model __attribute__ ((visibility ("hidden"), nocommon));
+
+#define A_GROUPID 0
+#define A_BITMASK (1ULL << 0)
+#define C_GROUPID 0
+#define C_BITMASK (1ULL << 2)
+#define D_GROUPID 0
+#define D_BITMASK (1ULL << 3)
+#define F_GROUPID 0
+#define F_BITMASK (1ULL << 5)
+#define I_GROUPID 0
+#define I_BITMASK (1ULL << 8)
+#define M_GROUPID 0
+#define M_BITMASK (

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Use biggest_mode as mode for constants.

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:8e28c8822d2985445de528b0f134a92239210fc4

commit 8e28c8822d2985445de528b0f134a92239210fc4
Author: Robin Dapp 
Date:   Tue Oct 15 12:10:48 2024 +0200

RISC-V: Use biggest_mode as mode for constants.

In compute_nregs_for_mode we expect that the current variable's mode is
at most as large as the biggest mode to be used for vectorization.

This might not be true for constants as they don't actually have a mode.
In that case, just use the biggest mode so max_number_of_live_regs
returns 1.

This fixes several test cases in the test suite.

gcc/ChangeLog:

PR target/116655

* config/riscv/riscv-vector-costs.cc (max_number_of_live_regs):
Use biggest mode instead of constant's saved mode.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr116655.c: New test.

(cherry picked from commit cc217a1ecb04c9234b2cce7ba3c27701a050e402)

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc| 14 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c | 11 +++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index a80e167597be..acd86d8f69a8 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -193,7 +193,7 @@ compute_local_program_points (
   /* Collect the stmts that is vectorized and mark their program point.  */
   for (i = 0; i < nbbs; i++)
{
- int point = 1;
+ unsigned int point = 1;
  basic_block bb = bbs[i];
  vec program_points = vNULL;
  if (dump_enabled_p ())
@@ -488,9 +488,15 @@ max_number_of_live_regs (loop_vec_info loop_vinfo, const 
basic_block bb,
   pair live_range = (*iter).second;
   for (i = live_range.first + 1; i <= live_range.second; i++)
{
- machine_mode mode = TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
-   ? BImode
-   : TYPE_MODE (TREE_TYPE (var));
+ machine_mode mode;
+ if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE)
+   mode = BImode;
+ /* Constants do not have a mode, just use the biggest so
+compute_nregs will return 1.  */
+ else if (TREE_CODE (var) == INTEGER_CST)
+   mode = biggest_mode;
+ else
+   mode = TYPE_MODE (TREE_TYPE (var));
  unsigned int nregs
= compute_nregs_for_mode (loop_vinfo, mode, biggest_mode, lmul);
  live_vars_vec[i] += nregs;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c
new file mode 100644
index ..36768e37d005
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64imv -mabi=lp64d -mrvv-max-lmul=dynamic" } */
+
+short a[5];
+int b() {
+  int c = 0;
+  for (; c <= 4; c++)
+if (a[c])
+  break;
+  return c;
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] Avoid unnecessary extensions when value is already extended

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ed2e094882fe423f929053e362fcb9485427d34c

commit ed2e094882fe423f929053e362fcb9485427d34c
Author: Jivan Hakobyan 
Date:   Sat Oct 12 19:10:50 2024 -0600

[RISC-V] Avoid unnecessary extensions when value is already extended

This is a minor patch from Jivan from roughly a year ago.  The basic
idea here is similar to what we do when extending values for the sake of
comparisons.  Specifically if the value is already known to be properly
extended, then an extension is just a copy.

The original idea was to use a similar patch, but which aborted to
identify cases where these unnecessary promotions where emitted.  All
that showed up when doing a testsuite run with that abort was the
promotions created by the arithmetic with overflow patterns such as addv.

Things like addv aren't *that* common so this never got high on my todo
list, even after a minor issue in this space was raised in bugzilla.

But with stage1 closing soon and no good reason not to go forward, I'm
submitting this into the pre-commit tester now.  My tester has been
using it since roughly Feb :-)  Plan would be to commit after the
pre-commit tester renders its verdict.

* config/riscv/riscv.md (zero_extendsidi2): If RHS is already
zero extended, then this is just a copy.
(extendsidi2): Similarly, but for sign extension.

(cherry picked from commit c38385ddbcce9fbeeaa788b6a4f50bd0653b0271)

Diff:
---
 gcc/config/riscv/riscv.md | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 78112afbb261..f1e9bd588500 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1813,7 +1813,15 @@
 (define_expand "zero_extendsidi2"
   [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
-  "TARGET_64BIT")
+  "TARGET_64BIT"
+{
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+  && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+{
+  emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
+  DONE;
+}
+})
 
 (define_insn_and_split "*zero_extendsidi2_internal"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -1894,7 +1902,15 @@
   [(set (match_operand:DI 0 "register_operand" "=r,r")
(sign_extend:DI
(match_operand:SI 1 "nonimmediate_operand" " r,m")))]
-  "TARGET_64BIT")
+  "TARGET_64BIT"
+{
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+  && SUBREG_PROMOTED_SIGNED_P (operands[1]))
+{
+  emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
+  DONE;
+}
+})
 
 (define_insn "*extendsidi2_internal"
   [(set (match_operand:DI 0 "register_operand" "=r,r")


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 4 of vector signed SAT_SUB

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ea35428b52483a61ced69af27bb3e49a657cf8e2

commit ea35428b52483a61ced69af27bb3e49a657cf8e2
Author: Pan Li 
Date:   Sat Oct 12 11:08:21 2024 +0800

RISC-V: Add testcases for form 4 of vector signed SAT_SUB

Form 4:
  #define DEF_VEC_SAT_S_SUB_FMT_4(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_sub_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T minus;   \
bool overflow = __builtin_sub_overflow (x, y, &minus); \
out[i] = !overflow ? minus : x < 0 ? MIN : MAX;\
  }\
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-4-i16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-4-i32.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-4-i64.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-run-4-i8.c: New 
test.

Signed-off-by: Pan Li 
(cherry picked from commit 4d8373f853269cd3a6f99ad0cb774fccd68cb874)

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-4-i16.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-4-i32.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-4-i64.c  |  9 +
 .../riscv/rvv/autovec/binop/vec_sat_s_sub-4-i8.c   |  9 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-4-i16.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-4-i32.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-4-i64.c| 17 +
 .../rvv/autovec/binop/vec_sat_s_sub-run-4-i8.c | 17 +
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 9 files changed, 126 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i16.c
new file mode 100644
index ..4497f0c1f83c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i32.c
new file mode 100644
index ..9f06e6a76509
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i32.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vssub\.vv} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i64.c
new file mode 100644
index ..e806fd06c003
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_s_sub-4-i64.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_SUB_FMT_4(int64_t, uint64_t, INT64_MIN, INT64_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 2/7] RISC-V: Fix uninitialized reg in memcpy

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e348a878cb7b60e13c06ed526aa32265f8fdd978

commit e348a878cb7b60e13c06ed526aa32265f8fdd978
Author: Craig Blackmore 
Date:   Fri Oct 18 09:06:58 2024 -0600

[PATCH 2/7] RISC-V: Fix uninitialized reg in memcpy

gcc/ChangeLog:

* config/riscv/riscv-string.cc (expand_block_move): Replace
`end` with `length_rtx` in gen_rtx_NE.

(cherry picked from commit 212d8685e4590c9f1168f503a383e3ea2639b418)

Diff:
---
 gcc/config/riscv/riscv-string.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 0c5ffd7d861e..0f1353baba3b 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1078,7 +1078,6 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
   bool need_loop = true;
   bool size_p = optimize_function_for_size_p (cfun);
   rtx src, dst;
-  rtx end = gen_reg_rtx (Pmode);
   rtx vec;
   rtx length_rtx = length_in;
 
@@ -1245,7 +1244,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
   emit_insn (gen_rtx_SET (length_rtx, gen_rtx_MINUS (Pmode, length_rtx, 
cnt)));
 
   /* Emit the loop condition.  */
-  rtx test = gen_rtx_NE (VOIDmode, end, const0_rtx);
+  rtx test = gen_rtx_NE (VOIDmode, length_rtx, const0_rtx);
   emit_jump_insn (gen_cbranch4 (Pmode, test, length_rtx, const0_rtx, 
label));
   emit_insn (gen_nop ());
 }


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 1/7] RISC-V: Fix indentation in riscv_vector::expand_block_move [NFC]

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ee4d8b5e9ccfc3363ce769f9c2b51dbe927de50f

commit ee4d8b5e9ccfc3363ce769f9c2b51dbe927de50f
Author: Craig Blackmore 
Date:   Fri Oct 18 09:01:35 2024 -0600

[PATCH 1/7] RISC-V: Fix indentation in riscv_vector::expand_block_move [NFC]

gcc/ChangeLog:

* config/riscv/riscv-string.cc (expand_block_move): Fix
indentation.

(cherry picked from commit f244492ec258d84ab253bd58ad57f31c65a2312d)

Diff:
---
 gcc/config/riscv/riscv-string.cc | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 4bb8bcec4a50..0c5ffd7d861e 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1086,22 +1086,22 @@ expand_block_move (rtx dst_in, rtx src_in, rtx 
length_in)
 {
   HOST_WIDE_INT length = INTVAL (length_in);
 
-/* By using LMUL=8, we can copy as many bytes in one go as there
-   are bits in a vector register.  If the entire block thus fits,
-   we don't need a loop.  */
-if (length <= TARGET_MIN_VLEN)
-  {
-   need_loop = false;
-
-   /* If a single scalar load / store pair can do the job, leave it
-  to the scalar code to do that.  */
-   /* ??? If fast unaligned access is supported, the scalar code could
-  use suitably sized scalars irrespective of alignment.  If that
-  gets fixed, we have to adjust the test here.  */
-
-   if (pow2p_hwi (length) && length <= potential_ew)
- return false;
-  }
+  /* By using LMUL=8, we can copy as many bytes in one go as there
+are bits in a vector register.  If the entire block thus fits,
+we don't need a loop.  */
+  if (length <= TARGET_MIN_VLEN)
+   {
+ need_loop = false;
+
+ /* If a single scalar load / store pair can do the job, leave it
+to the scalar code to do that.  */
+ /* ??? If fast unaligned access is supported, the scalar code could
+use suitably sized scalars irrespective of alignment.  If that
+gets fixed, we have to adjust the test here.  */
+
+ if (pow2p_hwi (length) && length <= potential_ew)
+   return false;
+   }
 
   /* Find the vector mode to use.  Using the largest possible element
 size is likely to give smaller constants, and thus potentially


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement vector SAT_SUB for signed integer

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:eb052e6234e110b857d895f1c004baa924bb9cc4

commit eb052e6234e110b857d895f1c004baa924bb9cc4
Author: Pan Li 
Date:   Fri Oct 11 12:05:10 2024 +0800

RISC-V: Implement vector SAT_SUB for signed integer

This patch would like to implement the sssub for vector signed integer.

Form 1:
  #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  void __attribute__((noinline))   \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T minus = (UT)x - (UT)y;   \
out[i] = (x ^ y) >= 0  \
  ? minus  \
  : (minus ^ x) >= 0   \
? minus\
: x < 0 ? MIN : MAX;   \
  }\
  }

DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  28   │ vle8.v  v1,0(a1)
  29   │ vle8.v  v2,0(a2)
  30   │ sub a3,a3,a5
  31   │ add a1,a1,a5
  32   │ add a2,a2,a5
  33   │ vsra.vi v4,v1,7
  34   │ vsub.vv v3,v1,v2
  35   │ vxor.vv v2,v1,v2
  36   │ vxor.vv v0,v1,v3
  37   │ vmslt.viv2,v2,0
  38   │ vmslt.viv0,v0,0
  39   │ vmand.mmv0,v0,v2
  40   │ vxor.vv v3,v4,v5,v0.t
  41   │ vse8.v  v3,0(a0)
  42   │ add a0,a0,a5

After this patch:
  25   │ vle8.v  v1,0(a1)
  26   │ vle8.v  v2,0(a2)
  27   │ sub a3,a3,a5
  28   │ add a1,a1,a5
  29   │ add a2,a2,a5
  30   │ vssub.vvv1,v1,v2
  31   │ vse8.v  v1,0(a0)
  32   │ add a0,a0,a5

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (sssub3): Add new pattern for
signed SAT_SUB.
* config/riscv/riscv-protos.h (expand_vec_sssub): Add new func
decl to expand sssub to vssub.
* config/riscv/riscv-v.cc (expand_vec_sssub): Add new func
impl to expand sssub to vssub.

Signed-off-by: Pan Li 
(cherry picked from commit 2a7f4904942fd0d988d7d29ba512ee4ee357bb13)

Diff:
---
 gcc/config/riscv/autovec.md | 11 +++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc |  9 +
 3 files changed, 21 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 836cdd4491f6..7dc78a488746 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2734,6 +2734,17 @@
   }
 )
 
+(define_expand "sssub3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], 
mode);
+DONE;
+  }
+)
+
 (define_expand "ustrunc2"
   [(match_operand: 0 "register_operand")
(match_operand:VWEXTI   1 "register_operand")]
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 1e6d10a14020..b2f5d72f494d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -649,6 +649,7 @@ void expand_vec_lfloor (rtx, rtx, machine_mode, 
machine_mode);
 void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
 void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
 void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
+void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
 void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
 void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ca3a80cceb9c..fba35652cc28 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4902,6 +4902,15 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, 
machine_mode vec_mode)
   emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode);
 }
 
+/* Expand the standard name ssadd3 for vector mode,  we can leverage
+   the vector fixed point vector single-width saturatin

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 1 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:2c2eb88266adde345ccc7efe51481e03f0a0046c

commit 2c2eb88266adde345ccc7efe51481e03f0a0046c
Author: Pan Li 
Date:   Mon Oct 14 10:21:39 2024 +0800

RISC-V: Add testcases for form 1 of vector signed SAT_TRUNC

Form 1:
  #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: Add test data 
for
signed SAT_TRUNC.
* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 1f3a9c08aff9aac53d6c12b658efc222cf91de9c)

Diff:
---
 .../riscv/rvv/autovec/unop/vec_sat_data.h  | 291 +
 .../rvv/autovec/unop/vec_sat_s_trunc-1-i16-to-i8.c |   9 +
 .../autovec/unop/vec_sat_s_trunc-1-i32-to-i16.c|   9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i8.c |   9 +
 .../autovec/unop/vec_sat_s_trunc-1-i64-to-i16.c|   9 +
 .../autovec/unop/vec_sat_s_trunc-1-i64-to-i32.c|   9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i8.c |   9 +
 .../autovec/unop/vec_sat_s_trunc-run-1-i16-to-i8.c |  16 ++
 .../unop/vec_sat_s_trunc-run-1-i32-to-i16.c|  16 ++
 .../autovec/unop/vec_sat_s_trunc-run-1-i32-to-i8.c |  16 ++
 .../unop/vec_sat_s_trunc-run-1-i64-to-i16.c|  16 ++
 .../unop/vec_sat_s_trunc-run-1-i64-to-i32.c|  16 ++
 .../autovec/unop/vec_sat_s_trunc-run-1-i64-to-i8.c |  16 ++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   |  22 ++
 14 files changed, 463 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h
index 6b23ec809f6c..a3643c5e1218 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h
@@ -25,6 +25,15 @@ TEST_UNARY_STRUCT(uint16_t, uint64_t)
 
 TEST_UNARY_STRUCT(uint32_t, uint64_t)
 
+TEST_UNARY_STRUCT(int8_t, int16_t)
+TEST_UNARY_STRUCT(int8_t, int32_t)
+TEST_UNARY_STRUCT(int8_t, int64_t)
+
+TEST_UNARY_STRUCT(int16_t, int32_t)
+TEST_UNARY_STRUCT(int16_t, int64_t)
+
+TEST_UNARY_STRUCT(int32_t, int64_t)
+
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   TEST_UNARY_DATA(uint8_t, uint16_t)[] =
 {
@@ -391,4 +400,286 @@ TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
   },
 };
 
+TEST_UNARY_STRUCT_DECL(int8_t, int16_t) \
+  TEST_UNARY_DATA(int8_t, int16_t)[] =
+{
+  {
+{
+  0,  0,  0,  0,
+ -1, -1, -1, -1,
+  1,  1,  1,  1,
+  2,  2,  2,  2,
+},
+{
+  0,  0,  0,  0,
+ -1, -1, -1, -1,
+  1,  1,  1,  1,
+  2,  2,  2,  2,
+},
+  },
+  {
+{
+   127,  127,  127, 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 4 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e6f47b02d26e982e5b47105432106c151f46

commit e6f47b02d26e982e5b47105432106c151f46
Author: Pan Li 
Date:   Mon Oct 14 11:41:02 2024 +0800

RISC-V: Add testcases for form 4 of vector signed SAT_TRUNC

Form 4:
  #define DEF_VEC_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x < (WT)NT_MAX  \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit f30ca9867a77c78f3a48bc124ab3bc4ce32283fa)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-4-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-4-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-4-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-4-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-4-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-4-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-4-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-4-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c
new file mode 100644
index ..2ac96aa1a35b
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_4(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c
new file mode 100644
index ..7fe8f2774767
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_4(int16_t, int32_t

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 4/7] RISC-V: Honour -mrvv-max-lmul in riscv_vector::expand_block_move

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:09eddbb34a38357aab6d3186c830ec7babdf6750

commit 09eddbb34a38357aab6d3186c830ec7babdf6750
Author: Craig Blackmore 
Date:   Sat Oct 19 06:57:06 2024 -0600

[PATCH 4/7] RISC-V: Honour -mrvv-max-lmul in riscv_vector::expand_block_move

Unlike the other vector string ops, expand_block_move was using max LMUL
m8 regardless of TARGET_MAX_LMUL.

The check for whether to generate inline vector code for movmem has been
moved from movmem to riscv_vector::expand_block_move to avoid
maintaining multiple versions of similar logic.  They already differed
on the minimum length for which they would generate vector code.  Now
that the expand_block_move value is used, movmem will be generated for
smaller lengths.

Limiting memcpy to m1 caused some memcpy loops to be generated in
the calling convention tests which makes it awkward to add suitable scan
assembler tests checking the return value being set, so
-mrvv-max-lmul=m8 has been added to these tests.  Other tests have been
adjusted to expect the new memcpy m1 generation where reasonably
straight-forward, otherwise -mrvv-max-lmul=m8 has been added.

pr111720-[0-9].c regressed because a memcpy loop is generated instead
of straight-line.  This reveals an existing issue where a redundant
straight-line memcpy gets eliminated but a memcpy loop does not
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117205).

For example, on pr111720-0.c after this patch:

-mrvv-max-lmul=m8:

test:
lui a5,%hi(.LANCHOR0)
li  a4,32
addisp,sp,-32
addia5,a5,%lo(.LANCHOR0)
vsetvli zero,a4,e8,m1,ta,ma
vle8.v  v8,0(a5)
addisp,sp,32
jr  ra

-mrvv-max-lmul=m1:

test:
addisp,sp,-32
lui a5,%hi(.LANCHOR0)
addia5,a5,%lo(.LANCHOR0)
mv  a2,sp
li  a3,32
.L2:
vsetvli a4,a3,e8,m1,ta,ma
vle8.v  v8,0(a5)
sub a3,a3,a4
add a5,a5,a4
vse8.v  v8,0(a2)
add a2,a2,a4
bne a3,zero,.L2
li  a5,32
vsetvli zero,a5,e8,m1,ta,ma
vle8.v  v8,0(sp)
addisp,sp,32
jr  ra

I have added -mrvv-max-lmul=m8 to pr111720-[0-9].c so that we continue
to test the elimination of straight-line memcpy.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (get_lmul_mode): New prototype.
(expand_block_move): Add bool parameter for movmem_p.
* config/riscv/riscv-string.cc (riscv_expand_block_move_scalar):
Pass movmem_p as false to riscv_vector::expand_block_move.
(expand_block_move): Add movmem_p parameter.  Return false if
loop needed and movmem_p is true.  Respect TARGET_MAX_LMUL.
* config/riscv/riscv-v.cc (get_lmul_mode): New function.
* config/riscv/riscv.md (movmem): Move checking for
whether to generate inline vector code to
riscv_vector::expand_block_move by passing movmem_p as true.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113206-1.c: Add
-mrvv-max-lmul=m8.
* gcc.target/riscv/rvv/autovec/pr113206-2.c: Likewise.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c: Add
-mrvv-max-lmul=m8 and adjust assembly scans.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c:
Likewise.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c:
Likewise.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c:
Likewise.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c:
Likewise.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c:
Likewise.
* gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c:
Likewise.
* gcc.target/riscv/rvv/autovec/vls/spill-4.c: Add
-mrvv-max-lmul=m8.
* gcc.target/riscv/rvv/autovec/vls/spill-7.c: Likewise.
* gcc.target/riscv/rvv/base/cpymem-1.c: Expect m1 in f1 and f2.
* gcc.target/riscv/rvv/base/cpymem-2.c: Add -mrvv-max-lmul=m8.
* gcc.target/riscv/rvv/base/movmem-1.c: Adjust f1 to a length
that will not get vectorized.
* gcc.target/riscv/rvv/base/pr111720-0.c: Add -mrvv-max-lmul=m8.
* gcc.target/riscv/rvv/base/pr111720-1.c: Likewise.
* gcc.target/riscv/rvv/base/pr111720-2.c: Likewise.
* gcc.target/riscv/rvv/base/pr111720-3.c: Likewise.
* gcc.target/riscv/rvv/base/pr111720-4.c: Likewise.
* gcc.target/riscv/rvv/base/pr111720-5.c: Likewise.
* gcc.target/ris

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 5/7] RISC-V: Move vector memcpy decision making to separate function [NFC]

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:95dd943483b5f42071e12ddf8c3385f196dfb899

commit 95dd943483b5f42071e12ddf8c3385f196dfb899
Author: Craig Blackmore 
Date:   Sat Oct 19 07:00:47 2024 -0600

[PATCH 5/7] RISC-V: Move vector memcpy decision making to separate function 
[NFC]

This moves the code for deciding whether to generate a vectorized
memcpy, what vector mode to use and whether a loop is needed out of
riscv_vector::expand_block_move and into a new function
riscv_vector::use_stringop_p so that it can be reused for other string
operations.

gcc/ChangeLog:

* config/riscv/riscv-string.cc (struct stringop_info): New.
(expand_block_move): Move decision making code to...
(use_vector_stringop_p): ...here.

(cherry picked from commit 12deddebf63217ffe26858c4a89f7aba5133f04c)

Diff:
---
 gcc/config/riscv/riscv-string.cc | 143 ---
 1 file changed, 87 insertions(+), 56 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 64fd6b290921..118c02a40212 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1051,35 +1051,31 @@ riscv_expand_block_clear (rtx dest, rtx length)
 
 namespace riscv_vector {
 
-/* Used by cpymemsi in riscv.md .  */
+struct stringop_info {
+  rtx avl;
+  bool need_loop;
+  machine_mode vmode;
+};
 
-bool
-expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p)
-{
-  /*
-memcpy:
-   mv a3, a0   # Copy destination
-loop:
-   vsetvli t0, a2, e8, m8, ta, ma  # Vectors of 8b
-   vle8.v v0, (a1) # Load bytes
-   add a1, a1, t0  # Bump pointer
-   sub a2, a2, t0  # Decrement count
-   vse8.v v0, (a3) # Store bytes
-   add a3, a3, t0  # Bump pointer
-   bnez a2, loop   # Any more?
-   ret # Return
-  */
-  gcc_assert (TARGET_VECTOR);
+/* If a vectorized stringop should be used populate INFO and return TRUE.
+   Otherwise return false and leave INFO unchanged.
 
-  HOST_WIDE_INT potential_ew
-= (MIN (MIN (MEM_ALIGN (src_in), MEM_ALIGN (dst_in)), BITS_PER_WORD)
-   / BITS_PER_UNIT);
-  machine_mode vmode = VOIDmode;
+   MAX_EW is the maximum element width that the caller wants to use and
+   LENGTH_IN is the length of the stringop in bytes.
+*/
+
+static bool
+use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
+  rtx length_in)
+{
   bool need_loop = true;
-  bool size_p = optimize_function_for_size_p (cfun);
-  rtx src, dst;
-  rtx vec;
-  rtx length_rtx = length_in;
+  machine_mode vmode = VOIDmode;
+  /* The number of elements in the stringop.  */
+  rtx avl = length_in;
+  HOST_WIDE_INT potential_ew = max_ew;
+
+  if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR))
+return false;
 
   if (CONST_INT_P (length_in))
 {
@@ -1113,17 +1109,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx 
length_in, bool movmem_p)
 for small element widths, we might allow larger element widths for
 loops too.  */
   if (need_loop)
-   {
- if (movmem_p)
-   /* Inlining general memmove is a pessimisation: we can't avoid
-  having to decide which direction to go at runtime, which is
-  costly in instruction count however for situations where the
-  entire move fits in one vector operation we can do all reads
-  before doing any writes so we don't have to worry so generate
-  the inline vector code in such situations.  */
-   return false;
- potential_ew = 1;
-   }
+   potential_ew = 1;
   for (; potential_ew; potential_ew >>= 1)
{
  scalar_int_mode elem_mode;
@@ -1193,7 +1179,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in, 
bool movmem_p)
  gcc_assert (potential_ew > 1);
}
   if (potential_ew > 1)
-   length_rtx = GEN_INT (length / potential_ew);
+   avl = GEN_INT (length / potential_ew);
 }
   else
 {
@@ -1203,35 +1189,80 @@ expand_block_move (rtx dst_in, rtx src_in, rtx 
length_in, bool movmem_p)
   /* A memcpy libcall in the worst case takes 3 instructions to prepare the
  arguments + 1 for the call.  When RVV should take 7 instructions and
  we're optimizing for size a libcall may be preferable.  */
-  if (size_p && need_loop)
+  if (optimize_function_for_size_p (cfun) && need_loop)
 return false;
 
-  /* length_rtx holds the (remaining) length of the required copy.
+  info.need_loop = need_loop;
+  info.vmode = vmode;
+  info.avl = avl;
+  return true;
+}
+
+/* Used by cpymemsi in riscv.md .  */
+
+bool
+expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p)
+{
+  /*
+memcpy:
+   mv a3, a0   # Copy destination
+l

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH][v5] RISC-V: add option -m(no-)autovec-segment

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a514689ebcb85872987e407f253b373335bc0148

commit a514689ebcb85872987e407f253b373335bc0148
Author: Greg McGary 
Date:   Sat Oct 19 08:21:56 2024 -0600

[PATCH][v5] RISC-V: add option -m(no-)autovec-segment

Add option -m(no-)autovec-segment to enable/disable autovectorizer
from emitting vector segment load/store instructions. This is useful for
performance experiments.

gcc/ChangeLog:
* config/riscv/autovec.md (vec_mask_len_load_lanes, 
vec_mask_len_store_lanes):
Predicate with TARGET_VECTOR_AUTOVEC_SEGMENT
* config/riscv/riscv-opts.h (TARGET_VECTOR_AUTOVEC_SEGMENT): New 
macro.
* config/riscv/riscv.opt (-m(no-)autovec-segment): New option.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-1.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-2.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-3.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-4.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-5.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-6.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg-7.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-1.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-2.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-3.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-4.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-5.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-6.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_load_noseg_run-7.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-1.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-2.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-3.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-4.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-5.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-6.c:
New test.
* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg-7.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-1.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-2.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-3.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-4.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-5.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-6.c:
New test.
* 
gcc.target/riscv/rvv/autovec/struct/mask_struct_store_noseg_run-7.c:
New test.
* gcc.target/riscv/rvv/autovec/no-segment.c: New test.

(cherry picked from commit f2989316139c3e7a99b1babe2606833c05b8a12f)

Diff:
---
 gcc/config/riscv/autovec.md|  4 +-
 gcc/config/riscv/riscv-opts.h  |  5 ++
 gcc/config/riscv/riscv.opt |  4 ++
 .../gcc.target/riscv/rvv/autovec/no-segment.c  | 61 ++
 .../rvv/autovec/struct/mask_struct_load_noseg-1.c  |  6 +++
 .../rvv/autovec/struct/mask_struct_load_noseg-2.c  |  6 +++
 .../rvv/autovec/struct/mask_struct_load_noseg-3.c  |  6 +++
 .../rvv/autovec/struct/mask_struct_load_noseg-4.c  |  6 +++
 .../rvv/autovec/struct/mask_struct_load_noseg-5.c  |  6 +++
 .../rvv/autovec/struct/mask_struct_load_noseg-6.c  |  6 +++
 .../rvv/autovec/struct/mask_struct_load_noseg-7.c  |  6 +++
 .../autovec/struct/mask_struct_load_noseg_run-1.c  |  4 ++
 .../autovec/struct/mask_struct_load_noseg_run-2.c  |  4 ++
 .../autovec/struct/mask_struct_load_noseg_run-3.c  |  4 ++
 .../autovec/struct/mask_struct_load_noseg_run-4.c  |  4 ++
 .../autovec/struct/mask_struct_load_noseg_run-5.c  |  4 ++
 .../autovec/struct/mask_struct_load_noseg_run-6.c  |  4 ++
 .../autovec/struct/mask_struct_load_noseg_run-7.c  |  4 ++
 .../rvv/autovec/struct/mask_struct_store_noseg-1.c |  6 +++
 .../rvv/au

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 7/7] RISC-V: Disable by pieces for vector setmem length > UNITS_PER_WORD

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f2009b5cb0d0c18e19c0e2badfea5777fb7cbcae

commit f2009b5cb0d0c18e19c0e2badfea5777fb7cbcae
Author: Craig Blackmore 
Date:   Sat Oct 19 07:08:31 2024 -0600

[PATCH 7/7] RISC-V: Disable by pieces for vector setmem length > 
UNITS_PER_WORD

For fast unaligned access targets, by pieces uses up to UNITS_PER_WORD
size pieces resulting in more store instructions than needed.  For
example gcc.target/riscv/rvv/base/setmem-1.c:f1 built with
`-O3 -march=rv64gcv -mtune=thead-c906`:
```
f1:
vsetivlizero,8,e8,mf2,ta,ma
vmv.v.x v1,a1
vsetivlizero,0,e32,mf2,ta,ma
sb  a1,14(a0)
vmv.x.s a4,v1
vsetivlizero,8,e16,m1,ta,ma
vmv.x.s a5,v1
vse8.v  v1,0(a0)
sw  a4,8(a0)
sh  a5,12(a0)
ret
```

The slow unaligned access version built with `-O3 -march=rv64gcv` used
15 sb instructions:
```
f1:
sb  a1,0(a0)
sb  a1,1(a0)
sb  a1,2(a0)
sb  a1,3(a0)
sb  a1,4(a0)
sb  a1,5(a0)
sb  a1,6(a0)
sb  a1,7(a0)
sb  a1,8(a0)
sb  a1,9(a0)
sb  a1,10(a0)
sb  a1,11(a0)
sb  a1,12(a0)
sb  a1,13(a0)
sb  a1,14(a0)
ret
```

After this patch, the following is generated in both cases:
```
f1:
vsetivlizero,15,e8,m1,ta,ma
vmv.v.x v1,a1
vse8.v  v1,0(a0)
ret
```

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_use_by_pieces_infrastructure_p):
New function.
(TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Define.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113469.c: Expect mf2 setmem.
* gcc.target/riscv/rvv/base/setmem-2.c: Update f1 to expect
straight-line vector memset.
* gcc.target/riscv/rvv/base/setmem-3.c: Likewise.

(cherry picked from commit 72ceddbfb78dbb95f0808c3eca1765e8cd48b023)

Diff:
---
 gcc/config/riscv/riscv.cc | 19 +++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c |  3 ++-
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c| 12 +++-
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c| 12 +++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2f7250a0cf60..e4dcc3a0ba7f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12570,6 +12570,22 @@ riscv_stack_clash_protection_alloca_probe_range (void)
   return STACK_CLASH_CALLER_GUARD;
 }
 
+static bool
+riscv_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+ unsigned alignment,
+ enum by_pieces_operation op, bool speed_p)
+{
+  /* For set/clear with size > UNITS_PER_WORD, by pieces uses vector broadcasts
+ with UNITS_PER_WORD size pieces.  Use setmem instead which can use
+ bigger chunks.  */
+  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR
+  && (op == CLEAR_BY_PIECES || op == SET_BY_PIECES)
+  && speed_p && size > UNITS_PER_WORD)
+return false;
+
+  return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -12932,6 +12948,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef TARGET_GET_RAW_RESULT_MODE
 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 
riscv_use_by_pieces_infrastructure_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
index d1c118c02d6e..f86084bdb40f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
@@ -51,4 +51,5 @@ void p(int buf, __builtin_va_list ab, int q) {
  } while (k);
 }
 
-/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*8,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
index faea442a4bdc..838fbebadff3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
+++ b/gcc/testsuite/gcc.target/r

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 2 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:591f45227fd6ecb6c6401bb6da9ff25921934ea9

commit 591f45227fd6ecb6c6401bb6da9ff25921934ea9
Author: Pan Li 
Date:   Mon Oct 14 11:09:55 2024 +0800

RISC-V: Add testcases for form 2 of vector signed SAT_TRUNC

Form 2:
  #define DEF_VEC_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN < x && x < (WT)NT_MAX   \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 033900fc175bbd67fd1a8c8f7410a21f8b04eda2)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-2-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-2-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-2-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-2-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-2-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-2-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-2-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-2-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c
new file mode 100644
index ..3e26e788c083
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_2(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c
new file mode 100644
index ..63797705a04a
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_2(int16_t, int32_t

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 3 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:db978aea92ebddbcb21ae28d2b86c003e45eeff4

commit db978aea92ebddbcb21ae28d2b86c003e45eeff4
Author: Pan Li 
Date:   Mon Oct 14 11:26:06 2024 +0800

RISC-V: Add testcases for form 3 of vector signed SAT_TRUNC

Form 3:
  #define DEF_VEC_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN < x && x < (WT)NT_MAX   \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit efa1617bfc095e0667df31a6f3a2c0319afbc8d0)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-3-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-3-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-3-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-3-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-3-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-3-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-3-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-3-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c
new file mode 100644
index ..392366def060
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_3(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c
new file mode 100644
index ..2b16049994a5
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_3(int16_t, int32_t

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generation

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b0158d651af43932755e4c5799df58771d6e5c06

commit b0158d651af43932755e4c5799df58771d6e5c06
Author: Craig Blackmore 
Date:   Fri Oct 18 09:17:21 2024 -0600

[PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generation

If riscv_vector::expand_block_move is generating a straight-line memcpy
using a predicated store, it tries to use a smaller LMUL to reduce
register pressure if it still allows an entire transfer.

This happens in the inner loop of riscv_vector::expand_block_move,
however, the vmode chosen by this loop gets overwritten later in the
function, so I have added the missing break from the outer loop.

I have also addressed a couple of issues with the conditions of the if
statement within the inner loop.

The first condition did not make sense to me:
```
  TARGET_MIN_VLEN * lmul <= nunits * BITS_PER_UNIT
```
I think this was supposed to be checking that the length fits within the
given LMUL, so I have changed it to do that.

The second condition:
```
  /* Avoid loosing the option of using vsetivli .  */
  && (nunits <= 31 * lmul || nunits > 31 * 8)
```
seems to imply that lmul affects the range of AVL immediate that
vsetivli can take but I don't think that is correct.  Anyway, I don't
think this condition is necessary because if we find a suitable mode we
should stick with it, regardless of whether it allowed vsetivli, rather
than continuing to try larger lmul which would increase register
pressure or smaller potential_ew which would increase AVL.  I have
removed this condition.

gcc/ChangeLog:

* config/riscv/riscv-string.cc (expand_block_move): Fix
condition for using smaller LMUL.  Break outer loop if a
suitable vmode has been found.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/pr112929-1.c: Expect smaller lmul.
* gcc.target/riscv/rvv/vsetvl/pr112988-1.c: Likewise.
* gcc.target/riscv/rvv/base/cpymem-3.c: New test.

(cherry picked from commit b039d06c9a810a3fab4c5eb9d50b0c7aff94b2d8)

Diff:
---
 gcc/config/riscv/riscv-string.cc   |  8 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c | 85 ++
 .../gcc.target/riscv/rvv/vsetvl/pr112929-1.c   |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/pr112988-1.c   |  2 +-
 4 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 0f1353baba3b..b590c5163543 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1153,9 +1153,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 Still, by choosing a lower LMUL factor that still allows
 an entire transfer, we can reduce register pressure.  */
  for (unsigned lmul = 1; lmul <= 4; lmul <<= 1)
-   if (TARGET_MIN_VLEN * lmul <= nunits * BITS_PER_UNIT
-   /* Avoid loosing the option of using vsetivli .  */
-   && (nunits <= 31 * lmul || nunits > 31 * 8)
+   if (length * BITS_PER_UNIT <= TARGET_MIN_VLEN * lmul
&& multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew)
&& (riscv_vector::get_vector_mode
 (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * lmul,
@@ -1163,6 +1161,10 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
  break;
}
 
+ /* Stop searching if a suitable vmode has been found.  */
+ if (vmode != VOIDmode)
+   break;
+
  /* The RVVM8?I modes are notionally 8 * BYTES_PER_RISCV_VECTOR bytes
 wide.  BYTES_PER_RISCV_VECTOR can't be evenly divided by
 the sizes of larger element types; the LMUL factor of 8 can at
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c
new file mode 100644
index ..f07078ba6a7c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-3.c
@@ -0,0 +1,85 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2 
-mrvv-max-lmul=m8" } */
+/* { dg-add-options riscv_v } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Check that vector memcpy with predicated store uses smaller LMUL where
+   possible.
+
+/* m1
+** f1:
+**  (
+**  vsetivli\s+zero,\d+,e8,m1,ta,ma
+**  |
+**  li\s+[ta][0-7],\d+
+**  vsetvli\s+zero,[ta][0-7],e8,m1,ta,ma
+**  )
+**  vle8.v\s+v\d+,0\(a1\)
+**  vse8.v\s+v\d+,0\(a0\)
+**  ret
+*/
+
+void f1 (char *d, char *s)
+{
+  __builtin_memcpy (d, s, MIN_VECTOR_BYTES - 1);
+}
+
+/* m2
+** f2:
+**  (
+**  vsetivli\s+zero,\d+,e8,m2,ta,ma
+**  |
+**  li\s+[ta][0-7],\d+
+**  vsetvli\s+zero,[ta][0-7],e8,m2,ta

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Revert "[PATCH 7/7] RISC-V: Disable by pieces for vector setmem length > UNITS_PER_WORD"

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:44ae4bc2958543c5bc14e1d9157550c8255832da

commit 44ae4bc2958543c5bc14e1d9157550c8255832da
Author: Jeff Law 
Date:   Sun Oct 20 10:35:18 2024 -0600

Revert "[PATCH 7/7] RISC-V: Disable by pieces for vector setmem length > 
UNITS_PER_WORD"

This reverts commit 72ceddbfb78dbb95f0808c3eca1765e8cd48b023.

(cherry picked from commit 01f50ebfd97a7bd17a4cc94c403a8e126986c02c)

Diff:
---
 gcc/config/riscv/riscv.cc | 19 ---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c |  3 +--
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c| 12 +---
 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c| 12 +---
 4 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e4dcc3a0ba7f..2f7250a0cf60 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12570,22 +12570,6 @@ riscv_stack_clash_protection_alloca_probe_range (void)
   return STACK_CLASH_CALLER_GUARD;
 }
 
-static bool
-riscv_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
- unsigned alignment,
- enum by_pieces_operation op, bool speed_p)
-{
-  /* For set/clear with size > UNITS_PER_WORD, by pieces uses vector broadcasts
- with UNITS_PER_WORD size pieces.  Use setmem instead which can use
- bigger chunks.  */
-  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR
-  && (op == CLEAR_BY_PIECES || op == SET_BY_PIECES)
-  && speed_p && size > UNITS_PER_WORD)
-return false;
-
-  return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
-}
-
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -12948,9 +12932,6 @@ riscv_use_by_pieces_infrastructure_p (unsigned 
HOST_WIDE_INT size,
 #undef TARGET_GET_RAW_RESULT_MODE
 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
 
-#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 
riscv_use_by_pieces_infrastructure_p
-
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
index f86084bdb40f..d1c118c02d6e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113469.c
@@ -51,5 +51,4 @@ void p(int buf, __builtin_va_list ab, int q) {
  } while (k);
 }
 
-/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*8,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
index 838fbebadff3..faea442a4bdc 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
@@ -5,17 +5,15 @@
 
 #define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
 
-/* Vectorise with no loop.
+/* Small memsets shouldn't be vectorised.
 ** f1:
 **  (
-**  vsetivli\s+zero,\d+,e8,m1,ta,ma
+**  sb\s+a1,0\(a0\)
+**  ...
 **  |
-**  li\s+a\d+,\d+
-**  vsetvli\s+zero,a\d+,e8,m1,ta,ma
+**  li\s+a2,\d+
+**  tail\s+memset
 **  )
-**  vmv\.v\.x\s+v\d+,a1
-**  vse8\.v\s+v\d+,0\(a0\)
-**  ret
 */
 void *
 f1 (void *a, int const b)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
index 02de9a94cc47..25be694d248a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
@@ -5,17 +5,15 @@
 
 #define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
 
-/* Vectorise with no loop.
+/* Small memsets shouldn't be vectorised.
 ** f1:
 **  (
-**  vsetivli\s+zero,\d+,e8,m1,ta,ma
+**  sb\s+a1,0\(a0\)
+**  ...
 **  |
-**  li\s+a\d+,\d+
-**  vsetvli\s+zero,a\d+,e8,m1,ta,ma
+**  li\s+a2,\d+
+**  tail\s+memset
 **  )
-**  vmv\.v\.x\s+v\d+,a1
-**  vse8\.v\s+v\d+,0\(a0\)
-**  ret
 */
 void *
 f1 (void *a, int const b)


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in ext-dce

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:7841513ff437829304619a750a550a41d6918cc4

commit 7841513ff437829304619a750a550a41d6918cc4
Author: Jeff Law 
Date:   Mon Oct 21 13:37:21 2024 -0600

[committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in 
ext-dce

A while back I noticed that the code to call carry_backpropagate was being
called after the optimization step.  Which seemed wrong, but at the time I
didn't have a testcase showing it as a problem.  Now I have 4 :-)

The way things used to work, the extension would be stripped away before
calling carry_backpropagte, meaning carry_backpropagate would never see a
SIGN_EXTENSION.  Thus the code trying to account for the sign extended bit 
was
never reached.

Getting that bit marked live is what's needed to fix these testcases. 
Fallout
is minor with just an adjustment needed to sensibly deal with vector modes 
in a
place where we didn't have them before.

I'm still somewhat concerned about this code.  Specifically whether or not 
we
can get in here with arbitrarily complex RTL, and if so do we need to 
recurse
down and look at those sub-expressions.

So while this patch fixes the most pressing issue, I wouldn't be terribly
surprised if we're back inside this code at some point.

Bootstrapped and regression tested on x86_64, ppc64le, riscv64, s390x, 
mips64,
loongarch, aarch64, m68k, alpha, hppa, sh4, sh4eb, perhaps something else 
that
I've forgotten...  Also tested on all the crosses in my tester.

PR rtl-optimization/116488
PR rtl-optimization/116579
PR rtl-optimization/116915
PR rtl-optimization/117226
gcc/
* ext-dce.cc (carry_backpropagate): Properly handle SIGN_EXTEND, add
ZERO_EXTEND handling as well.
(ext_dce_process_uses): Call carry_backpropagate before the 
optimization
step.

gcc/testsuite/
* gcc.dg/torture/pr116488.c: New test.
* gcc.dg/torture/pr116579.c: New test.
* gcc.dg/torture/pr116915.c: New test.
* gcc.dg/torture/pr117226.c: New test.

(cherry picked from commit 36e91df7716d34aa5694533837551593ec28f22b)

Diff:
---
 gcc/ext-dce.cc  | 34 +++--
 gcc/testsuite/gcc.dg/torture/pr116488.c | 20 +++
 gcc/testsuite/gcc.dg/torture/pr116579.c | 18 +
 gcc/testsuite/gcc.dg/torture/pr116915.c | 15 +++
 gcc/testsuite/gcc.dg/torture/pr117226.c | 17 +
 5 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 2f3514ae7976..a449b9f6b49c 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -478,7 +478,12 @@ binop_implies_op2_fully_live (rtx_code code)
holds true, and bits set in MASK are live in the result.  Compute a
mask of (potentially) live bits in the non-constant inputs.  In case of
binop_implies_op2_fully_live (e.g. shifts), the computed mask may
-   exclusively pertain to the first operand.  */
+   exclusively pertain to the first operand.
+
+   This looks wrong as we may have some important operations embedded as
+   operands of another operation.  For example, we might have an extension
+   wrapping a shift.  It really feels like this needs to be recursing down
+   into operands much more often.  */
 
 unsigned HOST_WIDE_INT
 carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
@@ -557,9 +562,26 @@ carry_backpropagate (unsigned HOST_WIDE_INT mask, enum 
rtx_code code, rtx x)
   return mmask;
 
 case SIGN_EXTEND:
-  if (mask & ~GET_MODE_MASK (GET_MODE_INNER (GET_MODE (XEXP (x, 0)
+  if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
+ || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
+   return -1;
+
+  /* We want the mode of the inner object.  We need to ensure its
+sign bit is on in MASK.  */
+  mode = GET_MODE (XEXP (x, 0));
+  if (mask & ~GET_MODE_MASK (GET_MODE_INNER (mode)))
mask |= 1ULL << (GET_MODE_BITSIZE (mode).to_constant () - 1);
-  return mask;
+
+  /* Recurse into the operand.  */
+  return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
+
+case ZERO_EXTEND:
+  if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
+ || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
+   return -1;
+
+  /* Recurse into the operand.  */
+  return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
 
 /* We propagate for the shifted operand, but not the shift
count.  The count is handled specially.  */
@@ -670,6 +692,8 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj,
  if (skipped_dest)
dst_mask = -1;
 
+ dst_mask = carry_backpropagate (dst_mask, code, src);
+
  /* ??? Could also handle ZERO_E

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 7 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:efc3b1248f054bd3d3326598561991b66371a25f

commit efc3b1248f054bd3d3326598561991b66371a25f
Author: Pan Li 
Date:   Mon Oct 14 15:10:46 2024 +0800

RISC-V: Add testcases for form 7 of vector signed SAT_TRUNC

Form 7:
  #define DEF_VEC_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_7 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN > x || x >= (WT)NT_MAX  \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit f138806811968a99bd81d7a60746279877df7ee8)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-7-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-7-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-7-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-7-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-7-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-7-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-7-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-7-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c
new file mode 100644
index ..a6eb2d5b0b2f
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_7(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c
new file mode 100644
index ..fd01c74d2df9
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_7(int16_t, int32_t

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 8 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:8a28d75bf29166a2d73490242c3bb75ed5522548

commit 8a28d75bf29166a2d73490242c3bb75ed5522548
Author: Pan Li 
Date:   Mon Oct 14 15:23:57 2024 +0800

RISC-V: Add testcases for form 8 of vector signed SAT_TRUNC

Form 8:
  #define DEF_VEC_SAT_S_TRUNC_FMT_8(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_8 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN >= x || x >= (WT)NT_MAX \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit cb131a401b7489cc17e2d70420cf9a916515b3f6)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-8-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-8-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-8-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-8-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-8-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-8-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-8-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-8-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c
new file mode 100644
index ..64f140f764e6
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_8(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c
new file mode 100644
index ..9bd95a52a012
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_8(int16_t, int32_t

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 5 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:cbadd73662a924befd0ff614f09d0bf75d8c3bac

commit cbadd73662a924befd0ff614f09d0bf75d8c3bac
Author: Pan Li 
Date:   Mon Oct 14 14:41:22 2024 +0800

RISC-V: Add testcases for form 5 of vector signed SAT_TRUNC

Form 5:
  #define DEF_VEC_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_5 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN > x || x > (WT)NT_MAX   \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 108c8ef03dd5dff96fd3a4aa31088e42d98a0624)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-5-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-5-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-5-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-5-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-5-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-5-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-5-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-5-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c
new file mode 100644
index ..49c076ad2779
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_5(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c
new file mode 100644
index ..a2a1aa40e017
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_5(int16_t, int32_t

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcases for form 6 of vector signed SAT_TRUNC

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:85a44e56370ad5bfe742e3ae5b406c81b5d02fb6

commit 85a44e56370ad5bfe742e3ae5b406c81b5d02fb6
Author: Pan Li 
Date:   Mon Oct 14 14:55:56 2024 +0800

RISC-V: Add testcases for form 6 of vector signed SAT_TRUNC

Form 6:
  #define DEF_VEC_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_6 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN >= x || x > (WT)NT_MAX  \
  ? x < 0 ? NT_MIN : NT_MAX \
  j: trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i8.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit f411abe7935e01b7e61f966d12a7a0850ca8f1c0)

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-6-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-6-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-6-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-6-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-6-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-6-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-6-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-6-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c
new file mode 100644
index ..c97057355c40
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_6(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c
new file mode 100644
index ..629c07347bb9
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_6(int16_t, int32_

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Partial cherry-pick (just risc-v bits) of:

2024-11-07 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:2bd87a408fbf6c2d04db3d228c07cdc1cfbfc1f1

commit 2bd87a408fbf6c2d04db3d228c07cdc1cfbfc1f1
Author: Jakub Jelinek 
Date:   Thu Nov 7 13:49:10 2024 -0700

Partial cherry-pick (just risc-v bits) of:

commit e48a65d3b3fcbcf6059df247d9c87a9a19b35861
Author: Jakub Jelinek 
Date:   Wed Oct 16 14:44:32 2024 +0200

Ternary operator formatting fixes

Diff:
---
 gcc/config/riscv/riscv.cc | 4 ++--
 gcc/config/riscv/zc.md| 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2f7250a0cf60..5b9d9b6b64be 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6621,8 +6621,8 @@ riscv_union_memmodels (enum memmodel model1, enum 
memmodel model2)
   model1 = memmodel_base (model1);
   model2 = memmodel_base (model2);
 
-  enum memmodel weaker = model1 <= model2 ? model1: model2;
-  enum memmodel stronger = model1 > model2 ? model1: model2;
+  enum memmodel weaker = model1 <= model2 ? model1 : model2;
+  enum memmodel stronger = model1 > model2 ? model1 : model2;
 
   switch (stronger)
 {
diff --git a/gcc/config/riscv/zc.md b/gcc/config/riscv/zc.md
index 5b948b41586f..6dc47da3acc9 100644
--- a/gcc/config/riscv/zc.md
+++ b/gcc/config/riscv/zc.md
@@ -1442,7 +1442,7 @@
 (match_operand:X 3 "zcmp_mv_sreg_operand" "r"))]
   "TARGET_ZCMP
&& (REGNO (operands[2]) != REGNO (operands[0]))"
-  { return (REGNO (operands[0]) == 
A0_REGNUM)?"cm.mva01s\t%1,%3":"cm.mva01s\t%3,%1"; }
+  { return (REGNO (operands[0]) == A0_REGNUM) ? "cm.mva01s\t%1,%3" : 
"cm.mva01s\t%3,%1"; }
   [(set_attr "mode" "")
(set_attr "type" "mvpair")])
 
@@ -1454,6 +1454,6 @@
   "TARGET_ZCMP
&& (REGNO (operands[0]) != REGNO (operands[2]))
&& (REGNO (operands[1]) != REGNO (operands[3]))"
-  { return (REGNO (operands[1]) == 
A0_REGNUM)?"cm.mvsa01\t%0,%2":"cm.mvsa01\t%2,%0"; }
+  { return (REGNO (operands[1]) == A0_REGNUM) ? "cm.mvsa01\t%0,%2" : 
"cm.mvsa01\t%2,%0"; }
   [(set_attr "mode" "")
(set_attr "type" "mvpair")])


[gcc r15-5008] rtl-optimization/117467 - 33% compile-time in rest of compilation

2024-11-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:7a07de2c60b3c513b6aef206e9b55b3ffefe8b39

commit r15-5008-g7a07de2c60b3c513b6aef206e9b55b3ffefe8b39
Author: Richard Biener 
Date:   Thu Nov 7 09:23:03 2024 +0100

rtl-optimization/117467 - 33% compile-time in rest of compilation

ext-dce uses TV_NONE, that's not OK for a pass taking 33% compile-time.
The following adds a timevar to it for proper blaming.

PR rtl-optimization/117467
* timevar.def (TV_EXT_DCE): New.
* ext-dce.cc (pass_data_ext_dce): Use TV_EXT_DCE.

Diff:
---
 gcc/ext-dce.cc  | 2 +-
 gcc/timevar.def | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index a449b9f6b49c..0ece37726c7e 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -1103,7 +1103,7 @@ const pass_data pass_data_ext_dce =
   RTL_PASS, /* type */
   "ext_dce", /* name */
   OPTGROUP_NONE, /* optinfo_flags */
-  TV_NONE, /* tv_id */
+  TV_EXT_DCE, /* tv_id */
   PROP_cfglayout, /* properties_required */
   0, /* properties_provided */
   0, /* properties_destroyed */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 0f9d2c0b0324..ae80a311a2d2 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -313,6 +313,7 @@ DEFTIMEVAR (TV_INITIALIZE_RTL, "initialize rtl")
 DEFTIMEVAR (TV_GIMPLE_LADDRESS   , "address lowering")
 DEFTIMEVAR (TV_TREE_LOOP_IFCVT   , "tree loop if-conversion")
 DEFTIMEVAR (TV_WARN_ACCESS   , "access analysis")
+DEFTIMEVAR (TV_EXT_DCE   , "ext dce")
 
 /* Everything else in rest_of_compilation not included above.  */
 DEFTIMEVAR (TV_EARLY_LOCAL  , "early local passes")


[gcc r15-5011] testsuite: Fix up pr116725.c test [PR116725]

2024-11-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:0dadf022de293c202ab21b0aeed7c9a4511f57d5

commit r15-5011-g0dadf022de293c202ab21b0aeed7c9a4511f57d5
Author: Jakub Jelinek 
Date:   Thu Nov 7 13:20:20 2024 +0100

testsuite: Fix up pr116725.c test [PR116725]

On Fri, Oct 18, 2024 at 02:05:59PM -0400, Antoni Boucher wrote:
> PR target/116725
> * gcc.target/i386/pr116725.c: Add test using those AVX 
builtins.

This test FAILs for me, as I don't have the latest gas around and the test
is dg-do assemble, so doesn't need just fixed compiler, but also assembler
which supports those instructions.

The following patch adds effective target directives to ensure assembler
supports those too.

2024-11-07  Jakub Jelinek  

PR target/116725
* gcc.target/i386/pr116725.c: Add dg-require-effective-target
avx512{dq,fp16,vl}.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr116725.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr116725.c 
b/gcc/testsuite/gcc.target/i386/pr116725.c
index 9e5070e16e71..932f77b6b3aa 100644
--- a/gcc/testsuite/gcc.target/i386/pr116725.c
+++ b/gcc/testsuite/gcc.target/i386/pr116725.c
@@ -2,6 +2,9 @@
 /* { dg-do assemble } */
 /* { dg-options "-masm=intel -mavx512dq -mavx512fp16 -mavx512vl" } */
 /* { dg-require-effective-target masm_intel } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512fp16 } */
+/* { dg-require-effective-target avx512vl } */
 
 #include 


[gcc r15-5018] libstdc++: Fix typo in comment in hashtable.h

2024-11-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:bcf40c70f8b0fbc54ec14adea31f274e1791b1ed

commit r15-5018-gbcf40c70f8b0fbc54ec14adea31f274e1791b1ed
Author: Jonathan Wakely 
Date:   Tue Nov 5 23:55:08 2024 +

libstdc++: Fix typo in comment in hashtable.h

And tweak grammar in a couple of comments.

libstdc++-v3/ChangeLog:

* include/bits/hashtable.h: Fix spelling in comment.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 47321a9cb135..8b312d25d7ae 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -344,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   struct __hash_code_base_access : __hash_code_base
   { using __hash_code_base::_M_bucket_index; };
 
-  // To get bucket index we need _RangeHash not to throw.
+  // To get bucket index we need _RangeHash to be non-throwing.
   static_assert(is_nothrow_default_constructible<_RangeHash>::value,
"Functor used to map hash code to bucket index"
" must be nothrow default constructible");
@@ -353,7 +353,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
"Functor used to map hash code to bucket index must be"
" noexcept");
 
-  // To compute bucket index we also need _ExtratKey not to throw.
+  // To compute bucket index we also need _ExtractKey be non-throwing.
   static_assert(is_nothrow_default_constructible<_ExtractKey>::value,
"_ExtractKey must be nothrow default constructible");
   static_assert(noexcept(


  1   2   >