[gcc r15-342] RISC-V: Make full-vec-move1.c test robust for optimization

2024-05-09 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b1520d2260c5e0cfcd7a4354fab70f66e2912ff2

commit r15-342-gb1520d2260c5e0cfcd7a4354fab70f66e2912ff2
Author: Pan Li 
Date:   Thu May 9 10:56:46 2024 +0800

RISC-V: Make full-vec-move1.c test robust for optimization

During investigate the support of early break autovec, we notice
the test full-vec-move1.c will be optimized to 'return 0;' in main
function body.  Because somehow the value of V type is compiler
time constant,  and then the second loop will be considered as
assert (true).

Thus,  the ccp4 pass will eliminate these stmt and just return 0.

typedef int16_t V __attribute__((vector_size (128)));

int main ()
{
  V v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
(v)[i] = i;

  V res = v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
assert (res[i] == i); // will be optimized to assert (true)
}

This patch would like to introduce a extern function to use the res[i]
that get rid of the ccp4 optimization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c:
Introduce extern func use to get rid of ccp4 optimization.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
index d73bad4af6f7..fae2ae91572f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -2,11 +2,12 @@
 /* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d 
-fno-vect-cost-model -mrvv-vector-bits=zvl" } */
 
 #include 
-#include 
 
 /* This would cause us to emit a vl1r.v for VNx4HImode even when
the hardware vector size vl > 64.  */
 
+extern int16_t test_element (int16_t);
+
 typedef int16_t V __attribute__((vector_size (128)));
 
 int main ()
@@ -14,9 +15,10 @@ int main ()
   V v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
 (v)[i] = i;
+
   V res = v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
-assert (res[i] == i);
+test_element (res[i]);
 }
 
 /* { dg-final { scan-assembler-not {vl[1248]r.v} } }  */


[gcc r15-435] RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar

2024-05-13 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:41b3cf262e61aee9d26380f1c820e0eaae740f50

commit r15-435-g41b3cf262e61aee9d26380f1c820e0eaae740f50
Author: Pan Li 
Date:   Sat May 11 15:25:28 2024 +0800

RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar

For the vfw vx format RVV intrinsic, the scalar type _Float16 also
requires the zvfh extension.  Unfortunately,  we only check the
vector tree type and miss the scalar _Float16 type checking.  For
example:

vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t 
vl)
{
  return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl);
}

It should report some error message like zvfh extension is required
instead of ICE for unreg insn.

This patch would like to make up such kind of validation for _Float16
in the RVV intrinsic API.  It will report some error like below when
there is no zvfh enabled.

error: built-in function '__riscv_vfwsub_wf_f32mf2(vs2,  rs1,  vl)'
  requires the zvfhmin or zvfh ISA extension

Passed the rv64gcv fully regression tests, included c/c++/fortran.

PR target/114988

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc
(validate_instance_type_required_extensions): New func impl to
validate the intrinisc func type ops.
(expand_builtin): Validate instance type before expand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114988-1.c: New test.
* gcc.target/riscv/rvv/base/pr114988-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-vector-builtins.cc  | 51 ++
 .../gcc.target/riscv/rvv/base/pr114988-1.c |  9 
 .../gcc.target/riscv/rvv/base/pr114988-2.c |  9 
 3 files changed, 69 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 192a6c230d1c..3fdb4400d70d 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4632,6 +4632,54 @@ gimple_fold_builtin (unsigned int code, 
gimple_stmt_iterator *gsi, gcall *stmt)
   return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold ();
 }
 
+static bool
+validate_instance_type_required_extensions (const rvv_type_info type,
+   tree exp)
+{
+  uint64_t exts = type.required_extensions;
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_16) &&
+!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zvfhmin or zvfh ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_32) &&
+!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zve32f, zve64f, zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_64) &&
+!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_64) &&
+!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zve64x, zve64f, zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  return true;
+}
+
 /* Expand a call to the RVV function with subcode CODE.  EXP is the call
expression and TARGET is the preferred location for the result.
Return the value of the lhs.  */
@@ -4649,6 +4697,9 @@ expand_builtin (unsigned int code, tree exp, rtx target)
   return target;
 }
 
+  if (!validate_instance_type_required_extensions (rfn.instance.type, exp))
+return target;
+
   return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
new file mode 100644
index ..b8474804c880
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t vl)
+{
+  return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl); /* { dg-error {built-in 
function '__riscv_vfwsub_wf_f32mf2\(vs2,  rs1,  vl\)' requires the zvfhmin or 
zvfh ISA extension} } */
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c
new file mode 100644
index ..49aa3141af31
--- /dev

[gcc r15-442] RISC-V: Fix format issue for trailing operator [NFC]

2024-05-13 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b6dc8464e613d1da2b28235bbd2f9c3fd4bc386b

commit r15-442-gb6dc8464e613d1da2b28235bbd2f9c3fd4bc386b
Author: Pan Li 
Date:   Tue May 14 09:38:55 2024 +0800

RISC-V: Fix format issue for trailing operator [NFC]

This patch would like to fix below format issue of trailing operator.

=== ERROR type #1: trailing operator (4 error(s)) ===
gcc/config/riscv/riscv-vector-builtins.cc:4641:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_16) &&
gcc/config/riscv/riscv-vector-builtins.cc:4651:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_32) &&
gcc/config/riscv/riscv-vector-builtins.cc:4661:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_64) &&
gcc/config/riscv/riscv-vector-builtins.cc:4670:36:  if ((exts &
RVV_REQUIRE_ELEN_64) &&

Passed the ./contrib/check_GNU_style.sh for this patch,  and double
checked there is no other format issue of the original patch.

Committed as format change.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc
(validate_instance_type_required_extensions): Remove the
operator from the trailing and put it to new line.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-vector-builtins.cc | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 3fdb4400d70d..c08d87a26807 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4638,8 +4638,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
 {
   uint64_t exts = type.required_extensions;
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_16) &&
-!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_16)
+&& !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
@@ -4648,8 +4648,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_32) &&
-!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_32)
+&& !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
@@ -4658,8 +4658,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_64) &&
-!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_64)
+&& !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the zve64d or v ISA extension",
@@ -4667,8 +4667,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_64) &&
-!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_64)
+&& !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "


[gcc r15-576] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

2024-05-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:52b0536710ff3f3ace72ab00ce9ef6c630cd1183

commit r15-576-g52b0536710ff3f3ace72ab00ce9ef6c630cd1183
Author: Pan Li 
Date:   Wed May 15 10:14:05 2024 +0800

Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

Given below example for the unsigned scalar integer uint64_t:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

The below tests are passed for this patch:
1. The riscv fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
to the return true switch case(s).
* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD match(es).
* optabs.def (OPTAB_NL): Remove fixed-point limitation for
us/ssadd.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
extern func decl generated in match.pd match.
(match_saturation_arith): New func impl to match the saturation 
arith.
(math_opts_dom_walker::after_dom_children): Try match saturation
arith when IOR expr.

Signed-off-by: Pan Li 

Diff:
---
 gcc/internal-fn.cc|  1 +
 gcc/internal-fn.def   |  2 ++
 gcc/match.pd  | 51 +++
 gcc/optabs.def|  4 ++--
 gcc/tree-ssa-math-opts.cc | 32 +
 5 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0a7053c2286c..73045ca8c8c1 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
 case IFN_UBSAN_CHECK_MUL:
 case IFN_ADD_OVERFLOW:
 case IFN_MUL_OVERFLOW:
+case IFN_SAT_ADD:
 case IFN_VEC_WIDEN_PLUS:
 case IFN_VEC_WIDEN_PLUS_LO:
 case IFN_VEC_WIDEN_PLUS_HI:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 848bb9dbff3f..25badbb86e56 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
  smulhrs, umulhrs, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 07e743ae464b..0f9c34fa8974 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| POINTER_TYPE_P (itype))
   && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))
 
+/* Unsigned Saturation Add */
+(match (usadd_left_part_1 @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_left_part_2 @0 @1)
+ (realpart (IFN_ADD_OVERFLOW:c @0 @1))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_matc

[gcc r15-577] Vect: Support new IFN SAT_ADD for unsigned vector int

2024-05-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:d4dee347b3fe1982bab26485ff31cd039c9df010

commit r15-577-gd4dee347b3fe1982bab26485ff31cd039c9df010
Author: Pan Li 
Date:   Wed May 15 10:14:06 2024 +0800

Vect: Support new IFN SAT_ADD for unsigned vector int

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615,
... }, vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, 
vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, 
vect__12.11_54);
  ...
}

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New
func decl generated by match.pd match.
(vect_recog_sat_add_pattern): New func impl to recog the pattern
for unsigned SAT_ADD.

Signed-off-by: Pan Li 

Diff:
---
 gcc/tree-vect-patterns.cc | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dfb7d8005262..a313dc64643e 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4487,6 +4487,57 @@ vect_recog_mult_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *
+ * And then simplied to
+ *   _12 = .SAT_ADD (_4, _6);
+ */
+
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+   tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+return NULL;
+
+  tree res_ops[2];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+{
+  tree itype = TREE_TYPE (res_ops[0]);
+  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (vtype != NULL_TREE
+   && direct_internal_fn_supported_p (IFN_SAT_ADD, vtype,
+  OPTIMIZE_FOR_BOTH))
+   {
+ *type_out = vtype;
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
+   res_ops[1]);
+
+ gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+ gimple_call_set_nothrow (call, /* nothrow_p */ false);
+ gimple_set_location (call, gimple_location (last_stmt));
+
+ vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+ return call;
+   }
+}
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
 
@@ -6987,6 +7038,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
   { vect_recog_mult_pattern, "mult" },
+  { vect_recog_sat_add_patter

[gcc r15-578] Vect: Support loop len in vectorizable early exit

2024-05-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:57f8a2f67c1536be23231808ab00613ab69193ed

commit r15-578-g57f8a2f67c1536be23231808ab00613ab69193ed
Author: Pan Li 
Date:   Thu May 16 09:58:13 2024 +0800

Vect: Support loop len in vectorizable early exit

This patch adds early break auto-vectorization support for target which
use length on partial vectorization.  Consider this following example:

unsigned vect_a[802];
unsigned vect_b[802];

void test (unsigned x, int n)
{
  for (int i = 0; i < n; i++)
  {
vect_b[i] = x + i;

if (vect_a[i] > x)
  break;

vect_a[i] = x;
  }
}

We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:

  ...
  _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
  _55 = (int) _87;
  ...
  mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
  vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
  if (vec_len_mask_72 != { 0, ... })
goto ; [5.50%]
  else
goto ; [94.50%]

The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
the loop len mask.
* tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
vect_gen_loop_len_mask for 1 or more stmt(s).
* tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
for vect_gen_loop_len_mask.

Signed-off-by: Pan Li 

Diff:
---
 gcc/tree-vect-loop.cc  | 27 +++
 gcc/tree-vect-stmts.cc | 17 +++--
 gcc/tree-vectorizer.h  |  4 
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 361aec064884..83c0544b6aa5 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
   return loop_len;
 }
 
+/* Generate the tree for the loop len mask and return it.  Given the lens,
+   nvectors, vectype, index and factor to gen the len mask as below.
+
+   tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+*/
+tree
+vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+   gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
+   unsigned int nvectors, tree vectype, tree stmt,
+   unsigned int index, unsigned int factor)
+{
+  tree all_one_mask = build_all_ones_cst (vectype);
+  tree all_zero_mask = build_zero_cst (vectype);
+  tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
index,
+   factor);
+  tree bias = build_int_cst (intQI_type_node,
+LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
+  tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
+  gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
+   all_one_mask, all_zero_mask, len,
+   bias);
+  gimple_call_set_lhs (call, len_mask);
+  gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
+
+  return len_mask;
+}
+
 /* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF.
If FLAT is true, the loop we started with had unrealistically flat
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b8a71605f1bc..672959501bb7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
 ncopies = vect_get_num_copies (loop_vinfo, vectype);
 
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
 
   /* Now build the new conditional.  Pattern gimple_conds get dropped during
  codegen so we must replace the original insn.  */
@@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
{
  if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
  OPTIMIZE_FOR_SPEED))
-   return false;
+   vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
  else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
 
-
   return true;
 }
 
@@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
  stmts[i], &cond_gsi);
workset.quick_push (stmt_mas

[gcc r15-582] RISC-V: Implement vectorizable early exit with vcond_mask_len

2024-05-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:6c1de786e53a11150feb16ba990d0d6c6fd910db

commit r15-582-g6c1de786e53a11150feb16ba990d0d6c6fd910db
Author: Pan Li 
Date:   Thu May 16 10:02:40 2024 +0800

RISC-V: Implement vectorizable early exit with vcond_mask_len

After we support the loop lens for the vectorizable,  we would like to
implement the feature for the RISC-V target.  Given below example:

unsigned vect_a[1923];
unsigned vect_b[1923];

void test (unsigned limit, int n)
{
  for (int i = 0; i < n; i++)
{
  vect_b[i] = limit + i;

  if (vect_a[i] > limit)
{
  ret = vect_b[i];
  return ret;
}

  vect_a[i] = limit;
}
}

Before this patch:
  ...
.L8:
  swa3,0(a5)
  addiw a0,a0,1
  addi  a4,a4,4
  addi  a5,a5,4
  beq   a1,a0,.L2
.L4:
  swa0,0(a4)
  lwa2,0(a5)
  bleu  a2,a3,.L8
  ret

After this patch:
  ...
.L5:
  vsetvli   a5,a3,e8,mf4,ta,ma
  vmv1r.v   v4,v2
  vsetvli   t4,zero,e32,m1,ta,ma
  vmv.v.x   v1,a5
  vadd.vv   v2,v2,v1
  vsetvli   zero,a5,e32,m1,ta,ma
  vadd.vv   v5,v4,v3
  slli  a6,a5,2
  vle32.v   v1,0(t1)
  vmsltu.vv v1,v3,v1
  vcpop.m   t4,v1
  beq   t4,zero,.L4
  vmv.x.s   a4,v4
.L3:
  ...

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/ChangeLog:

* 
config/riscv/autovec-opt.md(*vcond_mask_len_popcount_):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_): New pattern of
vcond_mask_len for vector bool mode.
(cbranch4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec 
UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount): Add VLS 
mode
to popcount pattern.
(@pred_popcount): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/autovec-opt.md| 33 
 gcc/config/riscv/autovec.md| 61 ++
 gcc/config/riscv/vector-iterators.md   |  1 +
 gcc/config/riscv/vector.md | 18 +++
 .../gcc.target/riscv/rvv/autovec/early-break-1.c   | 34 
 .../gcc.target/riscv/rvv/autovec/early-break-2.c   | 37 +
 6 files changed, 175 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d8680..04f85d8e4553 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@
 DONE;
   }
   [(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_"
+  [(set (match_operand:P 0 "register_operand")
+(popcount:P
+ (unspec:VB_VLS [
+  (unspec:VB_VLS [
+   (match_operand:VB_VLS 1 "register_operand")
+   (match_operand:VB_VLS 2 "const_1_operand")
+   (match_operand:VB_VLS 3 "const_0_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+  (match_operand 6 "autovec_length_operand")
+  (const_int 1)
+  (reg:SI VL_REGNUM)
+  (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+riscv_vector::emit_nonvlmax_insn (
+   code_for_pred_popcount (mode, Pmode),
+   riscv_vector::CPOP_OP,
+   operands, operands[4]);
+DONE;
+  }
+  [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075b..1ee3c8052fb4 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@
 DONE;
   }
 )
+
+;; =
+;; == Early break auto-vectorization patterns
+;; =
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_"
+  [(set (match_operand:VB 0 "register_operand")
+(unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] U

[gcc r15-583] RISC-V: Enable vectorizable early exit testsuite

2024-05-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:556e777298dac8574533935000c57335c5232921

commit r15-583-g556e777298dac8574533935000c57335c5232921
Author: Pan Li 
Date:   Thu May 16 10:04:10 2024 +0800

RISC-V: Enable vectorizable early exit testsuite

After we supported vectorizable early exit in RISC-V,  we would like to
enable the gcc vect test for vectorizable early test.

The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1.  We will improve that first and mark
this as xfail for RISC-V.

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c  | 2 ++
 gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
 gcc/testsuite/lib/target-supports.exp | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c 
b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98a..2f80bf89e5e6 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
 
   if (__builtin_memcmp (x, res, sizeof (x)) != 0)
 abort ();
+
+#pragma GCC novector
   for (int i = 0; i < 32; ++i)
 if (flag[i] != 0 && flag[i] != 1)
   abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb5..101ae1e0eaa1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
 /* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-require-effective-target vect_long_long } */
 
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } 
} */
 
 #include "tree-vect.h"
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6f5d477b1288..ec9baa4f32a3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v]
}}]
 }
 
@@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v_ok]
}}]
 }


[gcc r15-585] RISC-V: Cleanup some temporally files [NFC]

2024-05-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:d477d683d5c6db90c80d348c795709ae6444ba7a

commit r15-585-gd477d683d5c6db90c80d348c795709ae6444ba7a
Author: Pan Li 
Date:   Fri May 17 07:45:19 2024 +0800

RISC-V: Cleanup some temporally files [NFC]

Just notice some temporally files under gcc/config/riscv,
deleted as useless.

* Empty file j.
* Vim swap file.

gcc/ChangeLog:

* config/riscv/.riscv.cc.swo: Removed.
* config/riscv/j: Removed.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/.riscv.cc.swo | Bin 417792 -> 0 bytes
 gcc/config/riscv/j |   0
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/config/riscv/.riscv.cc.swo b/gcc/config/riscv/.riscv.cc.swo
deleted file mode 100644
index 77ed37353bee..
Binary files a/gcc/config/riscv/.riscv.cc.swo and /dev/null differ
diff --git a/gcc/config/riscv/j b/gcc/config/riscv/j
deleted file mode 100644
index e69de29bb2d1..


[gcc r15-642] RISC-V: Implement IFN SAT_ADD for both the scalar and vector

2024-05-17 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:34ed2b4593fa98b613632d0dde30b6ba3e7ecad9

commit r15-642-g34ed2b4593fa98b613632d0dde30b6ba3e7ecad9
Author: Pan Li 
Date:   Fri May 17 18:49:46 2024 +0800

RISC-V: Implement IFN SAT_ADD for both the scalar and vector

The patch implement the SAT_ADD in the riscv backend as the
sample for both the scalar and vector.  Given below vector
as example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2  <=  Vector Single-Width Saturating Add
  vse64.v v1,0(a0)
  ...

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand for
the unsigned SAT_ADD in vector mode.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl
to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit
the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl to
expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand for
the unsigned SAT_ADD in scalar mode.
* config/riscv/vector.md: Allow VLS mode for vsaddu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New 
test.
* gcc.target/riscv/sat_arith.h: New test.
* gcc.target/riscv/sat_u_add-1.c: New test.
* gcc.target/riscv/sat_u_add-2.c: New test.
* gcc.target/riscv/sat_u_add-3.c: New test.
* gcc.target/riscv/sat_u_add-4.c: New test.
* gcc.target/riscv/sat_u_add-run-1.c: New test.
* gcc.target/riscv/sat_u_add-run-2.c: New test.
* gcc.target/riscv/sat_u_add-run-3.c: New test.
* gcc.target/riscv/sat_u_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary.h: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/autovec.md| 17 +
 gcc/config/riscv/riscv-protos.h|  2 +
 gcc/config/riscv/riscv-v.cc| 19 ++
 gcc/config/riscv/riscv.cc  | 55 
 gcc/config/riscv/riscv.md  | 11 
 gcc/config/riscv/vector.md | 12 ++--
 .../riscv/rvv/autovec/binop/vec_sat_binary.h   | 33 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c  | 19 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-2.c  | 20 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-3.c  | 20 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-4.c  | 20 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c  | 75 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c  | 75 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c  | 75 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c  | 75 ++
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 31 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-1.c   | 19 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-2.c   | 21 ++
 gcc/testsuite/gcc.targe

[gcc r15-655] DSE: Fix ICE after allow vector type in get_stored_val

2024-05-19 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:88b3f83238087cbe2aa2c51c6054796856f2fb94

commit r15-655-g88b3f83238087cbe2aa2c51c6054796856f2fb94
Author: Pan Li 
Date:   Tue Apr 30 09:42:39 2024 +0800

DSE: Fix ICE after allow vector type in get_stored_val

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately,  the valididate_subreg
treats the vector type's size is less than vector register as
invalid.  Then we will have ICE here.

This patch would like to fix it by filter-out the invalid type size,
and make sure the subreg is valid for both the read_mode and store_mode
before perform the real gen_lowpart.

The below test suites are passed for this patch:

* The x86 bootstrap test.
* The x86 regression test.
* The riscv rv64gcv regression test.
* The riscv rv64gc regression test.
* The aarch64 regression test.

gcc/ChangeLog:

* dse.cc (get_stored_val): Make sure read_mode/write_mode
is valid subreg before gen_lowpart.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/dse.cc  |  4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c | 22 ++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf7..1596da91da08 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode 
read_mode,
 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
 && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-&& targetm.modes_tieable_p (read_mode, store_mode))
+&& targetm.modes_tieable_p (read_mode, store_mode)
+&& validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
+   subreg_lowpart_offset (read_mode, store_mode)))
 read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
 read_reg = extract_low_bits (read_mode, store_mode,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index ..5bb00b8f587e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = &x[y].u;
+
+  __builtin_memcpy (&q->x, &p.x, sizeof (float));
+  __builtin_memcpy (&q->y, &p.y, sizeof (float));
+
+  bar (&p);
+
+  return x[y].u.x + x[y].u.y;
+}


[gcc r15-2962] RISC-V: Fix factor in dwarf_poly_indeterminate_value [PR116305]

2024-08-16 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a11dcaff9fc94971188d54310d3053e9f68a0d3d

commit r15-2962-ga11dcaff9fc94971188d54310d3053e9f68a0d3d
Author: 曾治金 
Date:   Wed Aug 14 14:06:23 2024 +0800

RISC-V: Fix factor in dwarf_poly_indeterminate_value [PR116305]

This patch is to fix the bug (BugId:116305) introduced by the commit
bd93ef for risc-v target.

The commit bd93ef changes the chunk_num from 1 to TARGET_MIN_VLEN/128
if TARGET_MIN_VLEN is larger than 128 in riscv_convert_vector_bits. So
it changes the value of BYTES_PER_RISCV_VECTOR. For example, before
merging the commit bd93ef and if TARGET_MIN_VLEN is 256, the value
of BYTES_PER_RISCV_VECTOR should be [8, 8], but now [16, 16]. The value
of riscv_bytes_per_vector_chunk and BYTES_PER_RISCV_VECTOR are no longer
equal.

Prologue will use BYTES_PER_RISCV_VECTOR.coeffs[1] to estimate the vlenb
register value in riscv_legitimize_poly_move, and dwarf2cfi will also
get the estimated vlenb register value in 
riscv_dwarf_poly_indeterminate_value
to calculate the number of times to multiply the vlenb register value.

So need to change the factor from riscv_bytes_per_vector_chunk to
BYTES_PER_RISCV_VECTOR, otherwise we will get the incorrect dwarf
information. The incorrect example as follow:

```
csrr    t0,vlenb
slli    t1,t0,1
sub     sp,sp,t1

.cfi_escape 0xf,0xb,0x72,0,0x92,0xa2,0x38,0,0x34,0x1e,0x23,0x50,0x22
```

The sequence '0x92,0xa2,0x38,0' means the vlenb register, '0x34' means
the literal 4, '0x1e' means the multiply operation. But in fact, the
vlenb register value just need to multiply the literal 2.

PR target/116305

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_dwarf_poly_indeterminate_value): Take
BYTES_PER_RISCV_VECTOR for *factor instead of 
riscv_bytes_per_vector_chunk.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/scalable_vector_cfi.c: New test.

Signed-off-by: Zhijin Zeng 

Diff:
---
 gcc/config/riscv/riscv.cc  |  4 +--
 .../riscv/rvv/base/scalable_vector_cfi.c   | 32 ++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1f60d8f9711..8b7123e043e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11010,12 +11010,12 @@ static unsigned int
 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
  int *offset)
 {
-  /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
+  /* Polynomial invariant 1 == (VLENB / BYTES_PER_RISCV_VECTOR) - 1.
  1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
  2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
   */
   gcc_assert (i == 1);
-  *factor = riscv_bytes_per_vector_chunk;
+  *factor = BYTES_PER_RISCV_VECTOR.coeffs[1];
   *offset = 1;
   return RISCV_DWARF_VLENB;
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c
new file mode 100644
index 000..184da10caf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-g -O3 -march=rv64gcv -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } {"-O2" "-O1" "-O0" "-Og" "-Oz" "-flto"} } */
+/* { dg-final { scan-assembler {cfi_escape .*0x92,0xa2,0x38,0,0x32,0x1e} } } */
+
+#include "riscv_vector.h"
+
+#define PI_2 1.570796326795
+
+extern void func(float *result);
+
+void test(const float *ys, const float *xs, float *result, size_t length) {
+size_t gvl = __riscv_vsetvlmax_e32m2();
+vfloat32m2_t vpi2 = __riscv_vfmv_v_f_f32m2(PI_2, gvl);
+
+for(size_t i = 0; i < length;) {
+gvl = __riscv_vsetvl_e32m2(length - i);
+vfloat32m2_t y = __riscv_vle32_v_f32m2(ys, gvl);
+vfloat32m2_t x = __riscv_vle32_v_f32m2(xs, gvl);
+vbool16_t mask0  = __riscv_vmflt_vv_f32m2_b16(x, y, gvl);
+vfloat32m2_t fixpi = __riscv_vfrsub_vf_f32m2_mu(mask0, vpi2, vpi2, 0, 
gvl);
+
+__riscv_vse32_v_f32m2(result, fixpi, gvl);
+
+func(result);
+
+i += gvl;
+ys += gvl;
+xs += gvl;
+result += gvl;
+}
+}


[gcc r15-2977] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2

2024-08-17 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:6fbdbad97d451cc220a5654c8b97b9911485ef4a

commit r15-2977-g6fbdbad97d451cc220a5654c8b97b9911485ef4a
Author: Pan Li 
Date:   Sat Aug 17 18:04:00 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2

This patch would like to add test cases for the unsigned scalar
.SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

DEF_SAT_U_TRUC_FMT_2 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-7.c: New test.
* gcc.target/riscv/sat_u_trunc-8.c: New test.
* gcc.target/riscv/sat_u_trunc-9.c: New test.
* gcc.target/riscv/sat_u_trunc-run-7.c: New test.
* gcc.target/riscv/sat_u_trunc-run-8.c: New test.
* gcc.target/riscv/sat_u_trunc-run-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 12 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c | 19 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c | 16 
 7 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 37e0a60f21b..576a4926d1f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -227,7 +227,19 @@ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
 }
 #define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
 
+#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
+NT __attribute__((noinline)) \
+sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
+{\
+  WT max = (WT)(NT)-1;   \
+  return x > max ? (NT) max : (NT)x; \
+}
+#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT)
+
 #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
 
+#define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x)
+#define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c
new file mode 100644
index 000..95d513a15fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c
new file mode 100644
index 000..f168912293d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_2(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c
new file mode 100644
index 000..d82363d6aef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno

[gcc r15-2978] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3

2024-08-17 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:8d0efcf5581abf2560701f4143a0c2ccb261d1f7

commit r15-2978-g8d0efcf5581abf2560701f4143a0c2ccb261d1f7
Author: Pan Li 
Date:   Sat Aug 17 19:27:11 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3

This patch would like to add test cases for the unsigned scalar
.SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x <= max ? (NT)x : (NT) max;\
  }

DEF_SAT_U_TRUC_FMT_3 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-13.c: New test.
* gcc.target/riscv/sat_u_trunc-14.c: New test.
* gcc.target/riscv/sat_u_trunc-15.c: New test.
* gcc.target/riscv/sat_u_trunc-run-13.c: New test.
* gcc.target/riscv/sat_u_trunc-run-14.c: New test.
* gcc.target/riscv/sat_u_trunc-run-15.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h  | 12 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c | 19 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-13.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-14.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-15.c | 16 
 7 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 576a4926d1f..cf055410fd1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -236,10 +236,22 @@ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
 }
 #define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT)
 
+#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
+NT __attribute__((noinline)) \
+sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
+{\
+  WT max = (WT)(NT)-1;   \
+  return x <= max ? (NT)x : (NT) max;\
+}
+#define DEF_SAT_U_TRUC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_3(NT, WT)
+
 #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
 
 #define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x)
 #define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x)
 
+#define RUN_SAT_U_TRUC_FMT_3(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_3 (x)
+#define RUN_SAT_U_TRUC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_3(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c
new file mode 100644
index 000..58910793a80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c
new file mode 100644
index 000..236ea1d45f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_3(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c
new file mode 100644
index 000..33c3686c053
--- /de

[gcc r15-2979] RISC-V: Make sure high bits of usadd operands is clean for non-Xmode [PR116278]

2024-08-17 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:e8f31f4f58f0fcf1716fc1d9ee003fbcdda600c3

commit r15-2979-ge8f31f4f58f0fcf1716fc1d9ee003fbcdda600c3
Author: Pan Li 
Date:   Fri Aug 9 10:26:32 2024 +0800

RISC-V: Make sure high bits of usadd operands is clean for non-Xmode 
[PR116278]

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
  b[0] = -40;
  c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) 
+ 9;
  __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
(high:DI (symbol_ref:DI ("d") [flags 0x86]  )))
 (nil))
(insn 9 8 10 (set (reg/f:DI 142)
(mem/f/c:DI (lo_sum:DI (reg:DI 143)
(symbol_ref:DI ("d") [flags 0x86]  )) [1 d+0 S8 
A64]))
 (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
(sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
 (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb  a1,0(a5)   // a1 is -40, aka 0xffd8
lui a0,0x1a
addia5,a1,9
sllia5,a5,0x30
srlia5,a5,0x30 // a5 is 65505
sltua1,a5,a1   // compare 65505 and 0xffd8 => TRUE

The sltu try to compare 65505 and 0xffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

PR target/116278

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new
func impl to zero extend rtx.
(riscv_expand_usadd): Leverage above func to cleanup operands 0
and remove the special handing for SImode in RV64.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add-11.c: Adjust asm check body.
* gcc.target/riscv/sat_u_add-15.c: Ditto.
* gcc.target/riscv/sat_u_add-19.c: Ditto.
* gcc.target/riscv/sat_u_add-23.c: Ditto.
* gcc.target/riscv/sat_u_add-3.c: Ditto.
* gcc.target/riscv/sat_u_add-7.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-3.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-7.c: Ditto.
* gcc.target/riscv/pr116278-run-1.c: New test.
* gcc.target/riscv/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc | 34 +++
 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c   | 20 +
 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c   | 20 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-19.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-23.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c  |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-3.c  |  6 +++-
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-7.c  |  6 +++-
 13 files changed, 112 insertions(+), 22 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index c3877008d05..f266c45ed4d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11828,12 +11828,29 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
+/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
+   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
+   the new zero extended Xmode rtx will be returned.
+   Or the gen_lowpart rtx of Xmode will be returned.  */
+
+static rtx
+riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
+{
+  if (mode == Xmode)
+return x;
+
+  rtx xmode_reg = gen_reg_rtx (Xmode);
+  riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+
+  return xmode_reg;
+}
+
 /* Implements the unsigned saturation add standard name usadd for int mode.
 
z = SAT_ADD(x, y).
=>
1. sum = x + y.
-   2. sum = truncate (sum) for QI and HI only.
+   2. sum = truncate (sum) for non-Xmode.
3. lt = sum < x.
4. lt = -lt.
5. z = sum | lt.  */
@@ -11844,22 +11861,15 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   machine_mode mode = GET_MODE (dest);
   rtx xmode_sum = gen_re

[gcc r15-2980] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

2024-08-17 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a183b255be8ec8f434c3c39f3f4e01d6bd5566f8

commit r15-2980-ga183b255be8ec8f434c3c39f3f4e01d6bd5566f8
Author: Pan Li 
Date:   Tue Jul 23 11:18:48 2024 +0800

RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {\
bool overflow = x > (WT)(NT)(-1);  \
return ((NT)x) | (NT)-overflow;\
  }

DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   _Bool overflow;
   8   │   short unsigned int _1;
   9   │   short unsigned int _2;
  10   │   short unsigned int _3;
  11   │   uint16_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;pred:   ENTRY
  15   │   overflow_5 = x_4(D) > 65535;
  16   │   _1 = (short unsigned int) x_4(D);
  17   │   _2 = (short unsigned int) overflow_5;
  18   │   _3 = -_2;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;succ:   EXIT
  22   │
  23   │ }

After this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   uint16_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
quad truncation.
(ANYI_OCT_TRUNC): New iterator for oct truncation.
(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
(anyi_quad_truncated): Ditto but for lower case.
(anyi_oct_truncated): Ditto but for lower case.
* config/riscv/riscv.md (ustrunc2):
Add new pattern for quad truncation.
(ustrunc2): Ditto but for oct.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
the expand dump check times.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-4.c: New test.
* gcc.target/riscv/sat_u_trunc-5.c: New test.
* gcc.target/riscv/sat_u_trunc-6.c: New test.
* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
* gcc.target/riscv/sat_u_trunc-run-6.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/iterators.md  | 20 +
 gcc/config/riscv/riscv.md  | 20 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c |  2 +-
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 51 ++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c | 17 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c | 17 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c | 20 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c | 16 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c | 16 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c | 16 +++
 11 files changed, 195 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 0a669f560e3..2844cb02ff0 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@
 
 (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
 (define_mode_attr ANYI_DOUBLE_TRUNCATED [
   (HI "QI") (SI "HI") (DI "SI")
 ])
 
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+  (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+  (DI "QI")
+])
+
 (define_mode_attr anyi_double_truncated [
   (HI "qi") (SI "hi") (DI "si")
 ])
 
+(define_mode_attr anyi_quad_truncated [
+  (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+  (DI "qi")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterato

[gcc r15-3054] RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC]

2024-08-20 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1b72e07696a062e628c35e4bd25926c11ac18297

commit r15-3054-g1b72e07696a062e628c35e4bd25926c11ac18297
Author: Pan Li 
Date:   Tue Aug 20 21:08:23 2024 +0800

RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC]

Fix one typo `sat_truc` to `sat_trunc`, as well as `SAT_TRUC` to 
`SAT_TRUNC`.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Fix SAT_TRUNC typo.
* gcc.target/riscv/sat_u_trunc-1.c: Ditto.
* gcc.target/riscv/sat_u_trunc-13.c: Ditto.
* gcc.target/riscv/sat_u_trunc-14.c: Ditto.
* gcc.target/riscv/sat_u_trunc-15.c: Ditto.
* gcc.target/riscv/sat_u_trunc-2.c: Ditto.
* gcc.target/riscv/sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/sat_u_trunc-4.c: Ditto.
* gcc.target/riscv/sat_u_trunc-5.c: Ditto.
* gcc.target/riscv/sat_u_trunc-6.c: Ditto.
* gcc.target/riscv/sat_u_trunc-7.c: Ditto.
* gcc.target/riscv/sat_u_trunc-8.c: Ditto.
* gcc.target/riscv/sat_u_trunc-9.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-1.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-13.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-14.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-15.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-2.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-3.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-4.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-5.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-6.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-7.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-8.c: Ditto.
* gcc.target/riscv/sat_u_trunc-run-9.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 30 +++---
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c|  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c|  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c|  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-13.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-14.c  |  4 +--
 .../gcc.target/riscv/sat_u_trunc-run-15.c  |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c |  4 +--
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c |  4 +--
 25 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index cf055410fd1f..91853b60f592 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -218,40 +218,40 @@ sat_u_sub_##T##_fmt_12 (T x, T y)  \
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
 
-#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
+#define DEF_SAT_U_TRUNC_FMT_1(NT, WT)\
 NT __attribute__((noinline)) \
-sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
+sat_u_trunc_##WT##_to_##NT##_fmt_1 (WT x) \
 {\
   bool overflow = x > (WT)(NT)(-1);  \
   return ((NT)x) | (NT)-overflow;\
 }
-#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
+#define DEF_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_1(NT, WT)
 
-#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
+#define DEF_SAT_U_TRUNC_FMT_2(NT, WT)\
 NT __attribute__((noinline)) \
-sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
+sat_u_trunc_##WT##_to_##NT##_fmt_2 (WT x) \
 {\
   WT max = (WT)(NT)-1;   \
   return x > max ? (NT) max : (NT)x; \
 }
-#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT)
+#define DEF_SAT_U_TRUNC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_2(NT, WT)
 
-#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
+#define DEF_SAT

[gcc r15-3075] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 2

2024-08-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1e99e1ba79964f47f8850871d025209dfab73693

commit r15-3075-g1e99e1ba79964f47f8850871d025209dfab73693
Author: Pan Li 
Date:   Wed Aug 21 17:43:12 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 2

This patch would like to add test cases for the unsigned vector
.SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_VEC_SAT_U_TRUNC_FMT_2(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT max = (WT)(NT)-1;\
out[i] = in[i] > max ? (NT)max : (NT)in[i]; \
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_2 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-10.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-11.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-12.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c| 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c| 21 
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c| 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c | 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c | 21 
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c | 23 ++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-10.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-11.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-12.c  | 16 +++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c | 16 +++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c | 16 +++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c | 16 +++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 18 +
 13 files changed, 236 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c
new file mode 100644
index ..f5084e503ebd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_trunc_uint16_t_uint32_t_fmt_2:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
+** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** ...
+*/
+DEF_VEC_SAT_U_TRUNC_FMT_2 (uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c
new file mode 100644
index ..e2ab880a1aca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { 

[gcc r15-3076] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 3

2024-08-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:91f213908c0443b0249490b03b8046509f6e7e9d

commit r15-3076-g91f213908c0443b0249490b03b8046509f6e7e9d
Author: Pan Li 
Date:   Wed Aug 21 17:57:47 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 3

This patch would like to add test cases for the unsigned vector
.SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_VEC_SAT_U_TRUNC_FMT_3(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT max = (WT)(NT)-1;\
out[i] = in[i] <= max ? (NT)in[i] : (NT)max;\
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_3 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-13.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-14.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-15.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-16.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-17.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-18.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c| 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c| 21 
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c| 23 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c| 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c| 21 
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c| 19 ++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-13.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-14.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-15.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-16.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-17.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-18.c  | 16 +++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 18 +
 13 files changed, 236 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c
new file mode 100644
index ..49bdbdc36062
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_trunc_uint8_t_uint16_t_fmt_3:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
+** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** ...
+*/
+DEF_VEC_SAT_U_TRUNC_FMT_3 (uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c
new file mode 100644
index ..3ff696edcfee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final 

[gcc r15-3122] Match: Support form 4 for unsigned integer .SAT_TRUNC

2024-08-23 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:07988874c340e575521473b01dc98c8a1b2886b5

commit r15-3122-g07988874c340e575521473b01dc98c8a1b2886b5
Author: Pan Li 
Date:   Tue Aug 20 15:44:38 2024 +0800

Match: Support form 4 for unsigned integer .SAT_TRUNC

This patch would like to support the form 4 of the unsigned integer
.SAT_TRUNC. Aka below example:

Form 4:
  #define DEF_SAT_U_TRUC_FMT_4(NT, WT)   \
  NT __attribute__((noinline))   \
  sat_u_truc_##WT##_to_##NT##_fmt_4 (WT x)   \
  {  \
bool not_overflow = x <= (WT)(NT)(-1);   \
return ((NT)x) | (NT)((NT)not_overflow - 1); \
  }

DEF_SAT_U_TRUC_FMT_4(uint32_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x)
   6   │ {
   7   │   _Bool not_overflow;
   8   │   unsigned char _1;
   9   │   unsigned char _2;
  10   │   unsigned char _3;
  11   │   uint8_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;pred:   ENTRY
  15   │   not_overflow_5 = x_4(D) <= 255;
  16   │   _1 = (unsigned char) x_4(D);
  17   │   _2 = (unsigned char) not_overflow_5;
  18   │   _3 = _2 + 255;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;succ:   EXIT
  22   │
  23   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x)
   6   │ {
   7   │   uint8_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add form 4 for unsigned .SAT_TRUNC matching.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 65a3aae22430..78f1957e8c78 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3325,6 +3325,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
+/* Unsigned saturation truncate, case 3, sizeof (WT) > sizeof (NT).
+   SAT_U_TRUNC = (NT)X | ((NT)(X <= (WT)-1) + (NT)-1).  */
+(match (unsigned_integer_sat_trunc @0)
+ (bit_ior:c (plus:c (convert (le @0 INTEGER_CST@1)) INTEGER_CST@2)
+   (convert @0))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0)))
+ (with
+  {
+   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+   unsigned otype_precision = TYPE_PRECISION (type);
+   wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
+   wide_int max = wi::mask (otype_precision, false, otype_precision);
+   wide_int int_cst_1 = wi::to_wide (@1);
+   wide_int int_cst_2 = wi::to_wide (@2);
+  }
+  (if (wi::eq_p (trunc_max, int_cst_1) && wi::eq_p (max, int_cst_2))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)


[gcc r15-3172] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 4

2024-08-25 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:5ab1e238aa23d1773429f8f28abfb6ed16f655f6

commit r15-3172-g5ab1e238aa23d1773429f8f28abfb6ed16f655f6
Author: Pan Li 
Date:   Sun Aug 25 11:02:10 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 4

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 4.  Aka:

Form 4:
  #define DEF_SAT_U_TRUNC_FMT_4(NT, WT)  \
  NT __attribute__((noinline))   \
  sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x)  \
  {  \
bool not_overflow = x <= (WT)(NT)(-1);   \
return ((NT)x) | (NT)((NT)not_overflow - 1); \
  }

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_trunc-19.c: New test.
* gcc.target/riscv/sat_u_trunc-20.c: New test.
* gcc.target/riscv/sat_u_trunc-21.c: New test.
* gcc.target/riscv/sat_u_trunc-22.c: New test.
* gcc.target/riscv/sat_u_trunc-23.c: New test.
* gcc.target/riscv/sat_u_trunc-24.c: New test.
* gcc.target/riscv/sat_u_trunc-run-19.c: New test.
* gcc.target/riscv/sat_u_trunc-run-20.c: New test.
* gcc.target/riscv/sat_u_trunc-run-21.c: New test.
* gcc.target/riscv/sat_u_trunc-run-22.c: New test.
* gcc.target/riscv/sat_u_trunc-run-23.c: New test.
* gcc.target/riscv/sat_u_trunc-run-24.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h  | 12 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-21.c | 19 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-22.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-23.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-24.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-19.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-20.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-21.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-22.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-23.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-24.c | 16 
 13 files changed, 218 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 91853b60f592..229e1f0a5cda 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -245,6 +245,15 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 }
 #define DEF_SAT_U_TRUNC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_3(NT, WT)
 
+#define DEF_SAT_U_TRUNC_FMT_4(NT, WT)  \
+NT __attribute__((noinline))   \
+sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x)  \
+{  \
+  bool not_overflow = x <= (WT)(NT)(-1);   \
+  return ((NT)x) | (NT)((NT)not_overflow - 1); \
+}
+#define DEF_SAT_U_TRUNC_FMT_4_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_4(NT, WT)
+
 #define RUN_SAT_U_TRUNC_FMT_1(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_1 (x)
 #define RUN_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_1(NT, WT, x)
 
@@ -254,4 +263,7 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \
 #define RUN_SAT_U_TRUNC_FMT_3(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_3 (x)
 #define RUN_SAT_U_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_3(NT, WT, x)
 
+#define RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_4 (x)
+#define RUN_SAT_U_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_4(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c
new file mode 100644
index ..e61faffbbc6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint16_t_to_uint8_t_fmt_4:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_4(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c
new file mode 100644
index 

[gcc r15-3173] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 4

2024-08-25 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:8f2f7aabcef8d801af002a26885a97ccf9889099

commit r15-3173-g8f2f7aabcef8d801af002a26885a97ccf9889099
Author: Pan Li 
Date:   Sun Aug 25 14:15:40 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 4

This patch would like to add test cases for the unsigned vector
.SAT_TRUNC form 4.  Aka:

Form 4:
  #define DEF_VEC_SAT_U_TRUNC_FMT_4(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
bool not_overflow = in[i] <= (WT)(NT)(-1);  \
out[i] = ((NT)in[i]) | (NT)((NT)not_overflow - 1);  \
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_4 (uint32_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-19.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-20.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-21.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-22.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-23.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-24.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c| 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c| 21 
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c| 23 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c| 19 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c| 21 
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c| 19 ++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-19.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-20.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-21.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-22.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-23.c  | 16 +++
 .../rvv/autovec/unop/vec_sat_u_trunc-run-24.c  | 16 +++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 18 +
 13 files changed, 236 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c
new file mode 100644
index ..a80cefe46ab0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_trunc_uint8_t_uint16_t_fmt_4:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0
+** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** ...
+*/
+DEF_VEC_SAT_U_TRUNC_FMT_4 (uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c
new file mode 100644
index ..9a4d261d052d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final 

[gcc r15-3174] RISC-V: Support IMM for operand 0 of ussub pattern

2024-08-25 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:17be00916e51835dcc47e30ed32fc892ee0c581d

commit r15-3174-g17be00916e51835dcc47e30ed32fc892ee0c581d
Author: Pan Li 
Date:   Sat Aug 3 07:02:57 2024 +

RISC-V: Support IMM for operand 0 of ussub pattern

This patch would like to allow IMM for the operand 0 of ussub pattern.
Aka .SAT_SUB(1023, y) as the below example.

Form 1:
  #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
  {   \
return (T)IMM >= y ? (T)IMM - y : 0;  \
  }

DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023)

Before this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ bgtua0,a5,.L3
  13   │ sub a0,a5,a0
  14   │ ret
  15   │ .L3:
  16   │ li  a0,0
  17   │ ret

After this patch:
  10   │ sat_u_sub_imm82_uint64_t_fmt_1:
  11   │ li  a5,82
  12   │ sltua4,a5,a0
  13   │ addia4,a4,-1
  14   │ sub a0,a5,a0
  15   │ and a0,a4,a0
  16   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new
func impl to gen xmode rtx reg from operand rtx.
(riscv_expand_ussub): Gen xmode reg for operand 1.
* config/riscv/riscv.md: Allow const_int for operand 1.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_u_sub_imm-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-1_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-4.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc  | 46 +-
 gcc/config/riscv/riscv.md  |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 10 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c   | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c   | 21 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c | 21 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c   | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c | 21 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c   | 19 
 .../gcc.target/riscv/sat_u_sub_imm-run-1.c | 56 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-2.c | 56 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-3.c | 55 +
 .../gcc.target/riscv/sat_u_sub_imm-run-4.c | 48 +++
 17 files changed, 477 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8538d405f505..90a6e936558d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11907,6 +11907,50 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+
+   If rtx is REG,  the gen_lowpart of Xmode will be returned.
+
+   If rtx is const_int,  a new REG rtx will be created to hold the value of
+   const_int and then returned.
+
+   According to the gccint doc, the constants generated for modes with fewer
+   bits than in HOST_WIDE_INT must be sign extended to full width.  Thus there
+   will be two cases here,  take QImode as example.
+
+   For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
+   mov from const_int to the new REG rtx is good enough here.
+
+   For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
+   Aka 0xfffe in Xmode of RV64 but we actually need 0xfe in Xmode
+   

[gcc r15-3188] Match: Add int type fits check for .SAT_ADD imm operand

2024-08-26 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3b78aa3e316a22b4ae477c91866d47f654f129b1

commit r15-3188-g3b78aa3e316a22b4ae477c91866d47f654f129b1
Author: Pan Li 
Date:   Sat Aug 24 10:16:28 2024 +0800

Match: Add int type fits check for .SAT_ADD imm operand

This patch would like to add strict check for imm operand of .SAT_ADD
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_ADD pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_ADD.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_ADD (a, 12);
uint8_t sum = .SAT_ADD (a, 12u);
uint8_t sum = .SAT_ADD (a, 126u);
uint8_t sum = .SAT_ADD (a, 128u);
uint8_t sum = .SAT_ADD (a, 228);
uint8_t sum = .SAT_ADD (a, 223u);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_ADD (a, -1);
uint8_t sum = .SAT_ADD (a, 256u);
uint8_t sum = .SAT_ADD (a, 257);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_ADD imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm.
* gcc.target/riscv/sat_u_add_imm-12.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_add_imm-16.c: Ditto.
* gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-10.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-11.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-12.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-13.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-14.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-15.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-16.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-17.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-18.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-19.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-20.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-21.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-22.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-23.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-24.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-25.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-26.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-27.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-28.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-29.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-30.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-31.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-32.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-33.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-34.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-35.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-36.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-37.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-38.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-39.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-40.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-41.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-42.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-43.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-44.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-45.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-46.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-47.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-48.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-49.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-50.c: New test.
* gcc.target/riscv/sat_u_add_imm_

[gcc r15-3208] RISC-V: Support IMM for operand 1 of ussub pattern

2024-08-26 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a1062b0c07bb729cf6a1fff34929d22e5d5b633d

commit r15-3208-ga1062b0c07bb729cf6a1fff34929d22e5d5b633d
Author: Pan Li 
Date:   Mon Aug 26 15:58:52 2024 +0800

RISC-V: Support IMM for operand 1 of ussub pattern

This patch would like to allow IMM for the operand 1 of ussub pattern.
Aka .SAT_SUB(x, 22) as the below example.

Form 2:
  #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
  {   \
return x >= (T)IMM ? x - (T)IMM : 0;  \
  }

DEF_SAT_U_SUB_IMM_FMT_2(uint64_t, 1022)

It is almost the as support imm for operand 0 of ussub pattern, but
allow the second operand to be imm insted of the first operand.

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_ussub): Gen xmode for the
second operand, aka y in parameter.
* config/riscv/riscv.md (ussub3): Allow const_int for operand 
2.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-5.c: New test.
* gcc.target/riscv/sat_u_sub_imm-5_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-5_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-6_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-7_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-8.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-5.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-6.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-7.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc  |  2 +-
 gcc/config/riscv/riscv.md  |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h |  9 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5.c   | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_1.c | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_2.c | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6.c   | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_1.c | 21 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_2.c | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c   | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c | 21 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8.c   | 18 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-5.c | 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-6.c | 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-7.c | 54 +
 .../gcc.target/riscv/sat_u_sub_imm-run-8.c | 48 +++
 17 files changed, 423 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 90a6e936558d..1f544c1287ec 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11965,7 +11965,7 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y)
 {
   machine_mode mode = GET_MODE (dest);
   rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_y = riscv_gen_unsigned_xmode_reg (y, mode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
   rtx xmode_minus = gen_reg_rtx (Xmode);
   rtx xmode_dest = gen_reg_rtx (Xmode);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index a94705a8e7cc..3289ed2155ad 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4370,7 +4370,7 @@
 (define_expand "ussub3"
   [(match_operand:ANYI 0 "register_operand")
(match_operand:ANYI 1 "reg_or_int_operand")
-   (match_operand:ANYI 2 "register_operand")]
+   (match_operand:ANYI 2 "reg_or_int_operand")]
   ""
   {
 riscv_expand_ussub (operands[0], operands[1], operands[2]);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 4eca73586c87..c8ff8320d824 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -224,6 +224,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
   return (T)IMM >= y ? (T)IMM - y : 0;  \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
+{   \
+  return x >= (T)IMM ? x - (T)IMM : 0;  \
+}
+
 #d

[gcc r15-3238] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3

2024-08-27 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:cb0b8b62223b485a058a56fc5c6345974ebaa230

commit r15-3238-gcb0b8b62223b485a058a56fc5c6345974ebaa230
Author: Pan Li 
Date:   Tue Aug 27 14:37:01 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3

This patch would like to add test cases for the unsigned scalar
.SAT_SUB IMM form 3.  Aka:

Form 3:
  #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
  {   \
return (T)IMM > y ? (T)IMM - y : 0;   \
  }

DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 23)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-10.c: New test.
* gcc.target/riscv/sat_u_sub_imm-10_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-10_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-12.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-9_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-10.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-11.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-12.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h |  9 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c  | 21 
 .../gcc.target/riscv/sat_u_sub_imm-10_1.c  | 22 +
 .../gcc.target/riscv/sat_u_sub_imm-10_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c  | 20 
 .../gcc.target/riscv/sat_u_sub_imm-11_1.c  | 22 +
 .../gcc.target/riscv/sat_u_sub_imm-11_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-12.c  | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9.c   | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_1.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_2.c | 20 
 .../gcc.target/riscv/sat_u_sub_imm-run-10.c| 56 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-11.c| 55 +
 .../gcc.target/riscv/sat_u_sub_imm-run-12.c| 48 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-9.c | 56 ++
 15 files changed, 432 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index c8ff8320d824..b4339eb0dff9 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -231,6 +231,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
   return x >= (T)IMM ? x - (T)IMM : 0;  \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
+{   \
+  return (T)IMM > y ? (T)IMM - y : 0;   \
+}
+
 #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y)
@@ -248,6 +255,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x)  \
   if (sat_u_sub_imm##IMM##_##T##_fmt_1(y) != expect) __builtin_abort ()
 #define RUN_SAT_U_SUB_IMM_FMT_2(T, x, IMM, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
+#define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \
+  if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort ()
 
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
new file mode 100644
index ..db450d7cfbf8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm6_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*6
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_3(uint16_t, 6)
+
+/* { d

[gcc r15-3239] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4

2024-08-27 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3989e31d867b3505f847ecb6d870eacacfdf47bf

commit r15-3239-g3989e31d867b3505f847ecb6d870eacacfdf47bf
Author: Pan Li 
Date:   Tue Aug 27 15:14:40 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4

This patch would like to add test cases for the unsigned scalar
.SAT_SUB IMM form 4.  Aka:

Form 4:
  #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
  T __attribute__((noinline)) \
  sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
  {   \
return x > (T)IMM ? x - (T)IMM : 0;   \
  }

DEF_SAT_U_SUB_IMM_FMT_4(uint64_t, 23)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_sub_imm-13.c: New test.
* gcc.target/riscv/sat_u_sub_imm-13_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-13_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-14_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: New test.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: New test.
* gcc.target/riscv/sat_u_sub_imm-16.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-13.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-14.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-15.c: New test.
* gcc.target/riscv/sat_u_sub_imm-run-16.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h |  9 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c  | 19 
 .../gcc.target/riscv/sat_u_sub_imm-13_1.c  | 19 
 .../gcc.target/riscv/sat_u_sub_imm-13_2.c  | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14.c  | 20 
 .../gcc.target/riscv/sat_u_sub_imm-14_1.c  | 21 +
 .../gcc.target/riscv/sat_u_sub_imm-14_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c  | 19 
 .../gcc.target/riscv/sat_u_sub_imm-15_1.c  | 21 +
 .../gcc.target/riscv/sat_u_sub_imm-15_2.c  | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-16.c  | 18 +++
 .../gcc.target/riscv/sat_u_sub_imm-run-13.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-14.c| 55 ++
 .../gcc.target/riscv/sat_u_sub_imm-run-15.c| 54 +
 .../gcc.target/riscv/sat_u_sub_imm-run-16.c| 48 +++
 15 files changed, 421 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index b4339eb0dff9..a899979904b9 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -238,6 +238,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
   return (T)IMM > y ? (T)IMM - y : 0;   \
 }
 
+#define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
+{   \
+  return x > (T)IMM ? x - (T)IMM : 0;   \
+}
+
 #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y)
 #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y)
 #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y)
@@ -257,6 +264,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y)  \
   if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
 #define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort ()
+#define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \
+  if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
new file mode 100644
index ..7dcbc3b1a126
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm11_uint8_t_fmt_4:
+** addi\s+[atx][0-9]+,\s*a0,\s*-11
+** sltiu\s+a0,\s*[atx][0-9]+,\s*11
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_4(uint8_t, 11)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git 

[gcc r15-3241] Match: Support form 1 for scalar signed integer .SAT_ADD

2024-08-27 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:fe5f652bab420eb372645281f7fe3e5aa1534d01

commit r15-3241-gfe5f652bab420eb372645281f7fe3e5aa1534d01
Author: Pan Li 
Date:   Mon Aug 26 10:11:38 2024 +0800

Match: Support form 1 for scalar signed integer .SAT_ADD

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t sum;
   8   │   long unsigned int x.0_1;
   9   │   long unsigned int y.1_2;
  10   │   long unsigned int _3;
  11   │   long int _4;
  12   │   long int _5;
  13   │   int64_t _6;
  14   │   _Bool _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │   long int _14;
  18   │   long int _16;
  19   │   long int _17;
  20   │
  21   │ ;;   basic block 2, loop depth 0
  22   │ ;;pred:   ENTRY
  23   │   x.0_1 = (long unsigned int) x_7(D);
  24   │   y.1_2 = (long unsigned int) y_8(D);
  25   │   _3 = x.0_1 + y.1_2;
  26   │   sum_9 = (int64_t) _3;
  27   │   _4 = x_7(D) ^ y_8(D);
  28   │   _5 = x_7(D) ^ sum_9;
  29   │   _17 = ~_4;
  30   │   _16 = _5 & _17;
  31   │   if (_16 < 0)
  32   │ goto ; [41.00%]
  33   │   else
  34   │ goto ; [59.00%]
  35   │ ;;succ:   3
  36   │ ;;4
  37   │
  38   │ ;;   basic block 3, loop depth 0
  39   │ ;;pred:   2
  40   │   _11 = x_7(D) < 0;
  41   │   _12 = (long int) _11;
  42   │   _13 = -_12;
  43   │   _14 = _13 ^ 9223372036854775807;
  44   │ ;;succ:   4
  45   │
  46   │ ;;   basic block 4, loop depth 0
  47   │ ;;pred:   2
  48   │ ;;3
  49   │   # _6 = PHI 
  50   │   return _6;
  51   │ ;;succ:   EXIT
  52   │
  53   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd  | 15 +++
 gcc/tree-ssa-math-opts.cc | 35 ++-
 2 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5ee60963c904..be211535a49f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3192,6 +3192,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0) && int_fits_type_p (@1, type
 
+/* Signed saturation add, case 1:
+   T sum = (T)((UT)X + (UT)Y)
+   SAT_S_ADD = (X ^ sum) & !(X ^ Y) < 0 ? (-(T)(X < 0) ^ MAX) : sum;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0)
+ (nop_convert @1
+  (bit_not (bit_xor:c @0 @1)))
+   integer_zerop)
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+   @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* Unsigned saturation sub, case 1 (branch w

[gcc r15-3243] Vect: Reconcile the const_int operand type of unsigned .SAT_ADD

2024-08-28 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:6dccd5710380429c7addec9fe92a1a0bcb2f3367

commit r15-3243-g6dccd5710380429c7addec9fe92a1a0bcb2f3367
Author: Pan Li 
Date:   Tue Aug 27 15:01:02 2024 +0800

Vect: Reconcile the const_int operand type of unsigned .SAT_ADD

The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST.
For example _1 = .SAT_ADD (_2, 9) comes from below sample code.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9)

It will fail to vectorize as the vectorizable_call will check the
operands is type_compatiable but the imm will be (const_int 9) with
the SImode, which is different from _2 (DImode).  Aka:

uint64_t _1;
uint64_t _2;
_1 = .SAT_ADD (_2, 9);

This patch would like to reconcile the imm operand to the operand type
mode of _2 by fold_convert to make the vectorizable_call happy.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_sat_add_pattern): Add fold
convert for const_int to the type of operand 0.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: New test.
* 
gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../autovec/binop/vec_sat_u_add_imm_reconcile-1.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-10.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-11.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-12.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-13.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-14.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-15.c   |  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-2.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-3.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-4.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-5.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-6.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-7.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-8.c|  9 +
 .../autovec/binop/vec_sat_u_add_imm_reconcile-9.c|  9 +
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 20 
 gcc/tree-vect-patterns.cc|  3 +++
 17 files changed, 158 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c
new file mode 100644
index 

[gcc r15-3244] Test: Move pr116278 run test to dg/torture [NFC]

2024-08-28 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3178786c88761e47b3cbe700a97a0de2b6e133cb

commit r15-3244-g3178786c88761e47b3cbe700a97a0de2b6e133cb
Author: Pan Li 
Date:   Mon Aug 19 10:02:46 2024 +0800

Test: Move pr116278 run test to dg/torture [NFC]

Move the run test of pr116278 to dg/torture and leave the risc-v the
asm check under risc-v part.

PR target/116278

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116278-run-1.c: Take compile instead of run.
* gcc.target/riscv/pr116278-run-2.c: Ditto.
* gcc.dg/torture/pr116278-run-1.c: New test.
* gcc.dg/torture/pr116278-run-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr116278-run-1.c   | 19 +++
 gcc/testsuite/gcc.dg/torture/pr116278-run-2.c   | 19 +++
 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c |  4 ++--
 gcc/testsuite/gcc.target/riscv/pr116278-run-2.c |  4 ++--
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
new file mode 100644
index ..ada3ac984451
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32 } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int8_t b[1];
+int8_t *d = b;
+int32_t c;
+
+int main() {
+  b[0] = -40;
+  uint16_t t = (uint16_t)d[0];
+
+  c = (t < 0xFFF6 ? t : 0xFFF6) + 9;
+
+  if (c != 65505)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c 
b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c
new file mode 100644
index ..5e72d15957ea
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32 } */
+/* { dg-options "-O2" } */
+
+#include 
+
+int16_t b[1];
+int16_t *d = b;
+int64_t c;
+
+int main() {
+  b[0] = -40;
+  uint32_t t = (uint32_t)d[0];
+
+  c = (t < 0xFFF6u ? t : 0xFFF6u) + 9;
+
+  if (c != 4294967265)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
index d3812bdcdfb7..67cf17ebc33e 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c
@@ -1,7 +1,7 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
-#include 
+#include 
 
 int8_t b[1];
 int8_t *d = b;
diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
index 669cd4f003f1..103602df2584 100644
--- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
+++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c
@@ -1,7 +1,7 @@
-/* { dg-do run { target { riscv_v } } } */
+/* { dg-do compile } */
 /* { dg-options "-O2 -fdump-rtl-expand-details" } */
 
-#include 
+#include 
 
 int16_t b[1];
 int16_t *d = b;


[gcc r15-3348] RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM

2024-09-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:72f3e9021e55f14e90773cf2966805a318f44842

commit r15-3348-g72f3e9021e55f14e90773cf2966805a318f44842
Author: Pan Li 
Date:   Fri Aug 30 08:36:45 2024 +0800

RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 3:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
T ret; \
for (i = 0; i < limit; i++)\
  {\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \
  }\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c  | 14 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-10.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-11.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-12.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-9.c| 28 ++
 8 files changed, 168 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
new file mode 100644
index ..b6b605ac6158
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm15_uint16_t_fmt_3:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint16_t, 15)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
new file mode 100644
index ..6da86a1abe17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm33u_uint32_t_fmt_3:
+** ...
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint32_t, 33u)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
new file mode 100644
index ..b6ff5a6d5d68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm129ull_uint64_t_fmt_3:
+** ...
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/

[gcc r15-3349] RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM

2024-09-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:56ed1dfa79c436b769f3266258d34d160b4330d9

commit r15-3349-g56ed1dfa79c436b769f3266258d34d160b4330d9
Author: Pan Li 
Date:   Fri Aug 30 11:01:37 2024 +0800

RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM

This patch would like to add test cases for the unsigned vector .SAT_ADD
when one of the operand is IMM.

Form 4:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_4(T, IMM)   
\
  T __attribute__((noinline))   
\
  vec_sat_u_add_imm##IMM##_##T##_fmt_4 (T *out, T *in, unsigned limit)  
\
  { 
\
unsigned i; 
\
T ret;  
\
for (i = 0; i < limit; i++) 
\
  { 
\
out[i] = __builtin_add_overflow (in[i], IMM, &ret) == 0 ? ret : -1; 
\
  } 
\
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint64_t, 123)

The below test are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c: 
New test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c | 14 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-13.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-14.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-15.c   | 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-16.c   | 28 ++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 20 
 9 files changed, 188 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
new file mode 100644
index ..a9439dff39f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm9u_uint8_t_fmt_4:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*9
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint8_t, 9u)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
new file mode 100644
index ..dbe474975991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../vec_sat_arith.h"
+
+/*
+** vec_sat_u_add_imm15_uint16_t_fmt_4:
+** ...
+** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+** ...
+*/
+DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint16_t, 15)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
new file mode 100644
index ..0ac2e1b2942f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"

[gcc r15-3347] RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

2024-09-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:e96d4bf6a6e8b8a5ea1b81a79f4efa07dee77af1

commit r15-3347-ge96d4bf6a6e8b8a5ea1b81a79f4efa07dee77af1
Author: Pan Li 
Date:   Fri Aug 30 14:07:12 2024 +0800

RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

In previous, we have some specially handling for both the .SAT_ADD and
.SAT_SUB for unsigned int.  There are similar to take care of SImode
in RV64 for zero extend.  Thus refactor these two helper function
into one for possible code duplication.

The below test suite are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge
the zero_extend handing from func riscv_gen_unsigned_xmode_reg.
(riscv_gen_unsigned_xmode_reg): Remove.
(riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx
instead of riscv_gen_unsigned_xmode_reg.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub-11.c: Adjust asm check.
* gcc.target/riscv/sat_u_sub-15.c: Ditto.
* gcc.target/riscv/sat_u_sub-19.c: Ditto.
* gcc.target/riscv/sat_u_sub-23.c: Ditto.
* gcc.target/riscv/sat_u_sub-27.c: Ditto.
* gcc.target/riscv/sat_u_sub-3.c: Ditto.
* gcc.target/riscv/sat_u_sub-31.c: Ditto.
* gcc.target/riscv/sat_u_sub-35.c: Ditto.
* gcc.target/riscv/sat_u_sub-39.c: Ditto.
* gcc.target/riscv/sat_u_sub-43.c: Ditto.
* gcc.target/riscv/sat_u_sub-47.c: Ditto.
* gcc.target/riscv/sat_u_sub-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto.
* gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc  | 99 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub-11.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-15.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-19.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-23.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-27.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-3.c   |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-31.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-35.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-39.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-43.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-47.c  |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub-7.c   |  4 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-11_1.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-11_2.c  |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15_1.c  |  2 +
 .../gcc.target/riscv/sat_u_sub_imm-15_2.c  |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c   |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c   |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c |  2 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c |  2 +
 25 files changed, 118 insertions(+), 53 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 496dd177fe7f..75b37b532443 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11894,19 +11894,56 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
-/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
-   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
-   the new zero extended Xmode rtx will be returned.
-   Or the gen_lowpart rtx of Xmode will be returned.  */
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+
+   If rtx is REG and Xmode, the RTX x will be returned directly.
+
+   If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
+   returned.
+
+   If rtx is const_int, a new REG rtx will be created to hold the value of
+   const_int and then returned.
+
+   According to the gcci

[gcc r15-3351] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3

2024-09-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:5239902210a16b22d59d2cf8b535d615922a5c00

commit r15-3351-g5239902210a16b22d59d2cf8b535d615922a5c00
Author: Pan Li 
Date:   Sun Aug 18 14:08:21 2024 +0800

RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 3.  Aka:

Form 3:
  #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x <= max ? (NT)x : (NT) max;\
  }

QUAD:
DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-16.c: New test.
* gcc.target/riscv/sat_u_trunc-17.c: New test.
* gcc.target/riscv/sat_u_trunc-18.c: New test.
* gcc.target/riscv/sat_u_trunc-run-16.c: New test.
* gcc.target/riscv/sat_u_trunc-run-17.c: New test.
* gcc.target/riscv/sat_u_trunc-run-18.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c | 16 
 6 files changed, 102 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
new file mode 100644
index ..f91da58c0bae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint32_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
new file mode 100644
index ..9813e1f79b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint8_t_fmt_3:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
new file mode 100644
index ..eb799849f73a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint16_t_fmt_3:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_3(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
new file mode 100644
index ..20ceda6852e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUNC_FMT_3_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2

[gcc r15-3350] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

2024-09-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ea81e21d5398bdacf883533fd738fc45ea8d6dd9

commit r15-3350-gea81e21d5398bdacf883533fd738fc45ea8d6dd9
Author: Pan Li 
Date:   Sun Aug 18 12:49:47 2024 +0800

RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2

This patch would like to add test cases for the unsigned scalar quad and
oct .SAT_TRUNC form 2.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
WT max = (WT)(NT)-1;   \
return x > max ? (NT) max : (NT)x; \
  }

QUAD:
DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t)
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t)

OCT:
DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_trunc-10.c: New test.
* gcc.target/riscv/sat_u_trunc-11.c: New test.
* gcc.target/riscv/sat_u_trunc-12.c: New test.
* gcc.target/riscv/sat_u_trunc-run-10.c: New test.
* gcc.target/riscv/sat_u_trunc-run-11.c: New test.
* gcc.target/riscv/sat_u_trunc-run-12.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c | 17 +
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c | 16 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c | 16 
 6 files changed, 102 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
new file mode 100644
index ..5ea8e613901c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint32_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
new file mode 100644
index ..3b45e2af9ce3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint8_t_fmt_2:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
new file mode 100644
index ..7ea2c93a301f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_trunc_uint64_t_to_uint16_t_fmt_2:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUNC_FMT_2(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
new file mode 100644
index ..2281610f3353
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUNC_FMT_2_WRAP(T1, T2)
+
+#define DATA   TEST_UNARY_DATA_WRAP(T1, T2

[gcc r15-3390] RISC-V: Support form 1 of integer scalar .SAT_ADD

2024-09-02 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:539fcaae67c6cf54bd377eba6c9d5b1792a3

commit r15-3390-g539fcaae67c6cf54bd377eba6c9d5b1792a3
Author: Pan Li 
Date:   Thu Aug 29 11:25:44 2024 +0800

RISC-V: Support form 1 of integer scalar .SAT_ADD

This patch would like to support the scalar signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_1 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
return (x ^ y) < 0 \
  ? sum\
  : (sum ^ x) >= 0 \
? sum  \
: x < 0 ? MIN : MAX;   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │ mv   a5,a0
  12   │ add  a0,a0,a1
  13   │ xor  a1,a5,a1
  14   │ not  a1,a1
  15   │ xor  a4,a5,a0
  16   │ and  a1,a1,a4
  17   │ blt  a1,zero,.L5
  18   │ ret
  19   │ .L5:
  20   │ srai a5,a5,63
  21   │ li   a0,-1
  22   │ srli a0,a0,1
  23   │ xor  a0,a5,a0
  24   │ ret

After this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │ add  a2,a0,a1
  12   │ xor  a1,a0,a1
  13   │ xor  a5,a0,a2
  14   │ srli a5,a5,63
  15   │ srli a1,a1,63
  16   │ xori a1,a1,1
  17   │ and  a5,a5,a1
  18   │ srai a4,a0,63
  19   │ li   a3,-1
  20   │ srli a3,a3,1
  21   │ xor  a3,a3,a4
  22   │ neg  a4,a5
  23   │ and  a3,a3,a4
  24   │ addi a5,a5,-1
  25   │ and  a0,a2,a5
  26   │ or   a0,a0,a3
  27   │ ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
decl for expanding ssadd.
* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
impl to gen the max int rtx.
(riscv_expand_ssadd): Add new func impl to expand the ssadd.
* config/riscv/riscv.md (ssadd3): Add new pattern for
signed integer .SAT_ADD.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_arith_data.h: Add test data.
* gcc.target/riscv/sat_s_add-1.c: New test.
* gcc.target/riscv/sat_s_add-2.c: New test.
* gcc.target/riscv/sat_s_add-3.c: New test.
* gcc.target/riscv/sat_s_add-4.c: New test.
* gcc.target/riscv/sat_s_add-run-1.c: New test.
* gcc.target/riscv/sat_s_add-run-2.c: New test.
* gcc.target/riscv/sat_s_add-run-3.c: New test.
* gcc.target/riscv/sat_s_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-protos.h|  1 +
 gcc/config/riscv/riscv.cc  | 90 ++
 gcc/config/riscv/riscv.md  | 11 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 17 
 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 85 
 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c   | 30 
 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c   | 32 
 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c   | 31 
 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c   | 30 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c   | 16 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c   | 16 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c   | 16 
 gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c   | 16 
 .../gcc.target/riscv/scalar_sat_binary_run_xxx.h   | 26 +++
 14 files changed, 417 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 926899ccad64..3358e3887b95 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -134,6 +134,7 @@ extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
+extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d03e51f3a687..98720611e246 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12001,6 +12001,96 @@ riscv_expand_us

[gcc r15-3438] RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD

2024-09-03 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9ea9d05908432fc5f3632f3e397e3709f95ef636

commit r15-3438-g9ea9d05908432fc5f3632f3e397e3709f95ef636
Author: Pan Li 
Date:   Mon Sep 2 15:54:43 2024 +0800

RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD

This patch would like to allow the IMM operand of the unsigned
scalar .SAT_ADD.  Like the operand 0, the operand 1 of .SAT_ADD
will be zero extended to Xmode before underlying code generation.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_expand_usadd): Zero extend
the second operand of usadd as the first operand does.
* config/riscv/riscv.md (usadd3): Allow imm operand for
scalar usadd pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_add-11.c: Make asm check robust.
* gcc.target/riscv/sat_u_add-15.c: Ditto.
* gcc.target/riscv/sat_u_add-19.c: Ditto.
* gcc.target/riscv/sat_u_add-23.c: Ditto.
* gcc.target/riscv/sat_u_add-3.c: Ditto.
* gcc.target/riscv/sat_u_add-7.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc | 2 +-
 gcc/config/riscv/riscv.md | 4 ++--
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-3.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c  | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 98720611e246..f82e64a6fec8 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11970,7 +11970,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
   rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
   /* Step-1: sum = x + y  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f7efafb8abe..9f94b5aa0232 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4360,8 +4360,8 @@
 
 (define_expand "usadd3"
   [(match_operand:ANYI 0 "register_operand")
-   (match_operand:ANYI 1 "register_operand")
-   (match_operand:ANYI 2 "register_operand")]
+   (match_operand:ANYI 1 "reg_or_int_operand")
+   (match_operand:ANYI 2 "reg_or_int_operand")]
   ""
   {
 riscv_expand_usadd (operands[0], operands[1], operands[2]);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
index e248aeafa8ef..bd830ececad4 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_3:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
index bb8b991a84ee..de615a6225e9 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_4:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
index 7e4ae12f2f51..2b793e2f8fdb 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_5:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*a[01],\s*a[01]
 ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
index 49bbb74a401e..5de086e11384 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c
@@ -8,7 +8,7 @@
 ** sat_u_add_uint32_t_fmt_6:
 ** slli\s+[atx][0-9]+,\s*a0,\s*32
 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
-** add\s+[atx][0-9]+,\s*a0,\s*a1
+** add\s+[atx][0-9]+,\s*

[gcc r15-3502] Match: Add int type fits check for form 1 of .SAT_SUB imm operand

2024-09-05 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:019335b404c8d7fb2d234bb179745cc28693dd20

commit r15-3502-g019335b404c8d7fb2d234bb179745cc28693dd20
Author: Pan Li 
Date:   Mon Sep 2 09:48:46 2024 +0800

Match: Add int type fits check for form 1 of .SAT_SUB imm operand

This patch would like to add strict check for imm operand of .SAT_SUB
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_SUB pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_SUB.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (12, a);
uint8_t sum = .SAT_SUB (12u, a);
uint8_t sum = .SAT_SUB (126u, a);
uint8_t sum = .SAT_SUB (128u, a);
uint8_t sum = .SAT_SUB (228, a);
uint8_t sum = .SAT_SUB (223u, a);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (-1, a);
uint8_t sum = .SAT_SUB (256u, a);
uint8_t sum = .SAT_SUB (257, a);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_SUB imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd   |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++
 .../gcc.target/riscv/sat_u_add_imm_type_check-53.c | 18 +++
 .../gcc.target/riscv/sat_u_add_imm_type_check-54.c | 27 ++
 .../gcc.target/riscv/sat_u_add_imm_type_check-55.c | 18 +++
 .../gcc.target/riscv/sat_u_add_imm_type_check-56.c | 27 ++
 6 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 621306213e4..4b86d20e165 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3269,7 +3269,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_sub @0 @1)
  (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
- && types_match (type, @1))
+ && types_match (type, @1) && int_fits_type_p (@0, type))
  (with
   {
unsigned precision = TYPE_PRECISION (type);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 86cd6bea8df..187c94795f7 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -284,6 +284,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x)  \
 #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \
   if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\
+{ \
+  return IMM >= y ? IMM - y : 0;  \
+}
+
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\
+{ \
+  return IMM > y ? IMM - y : 0;   \
+}
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
new file mode 100644
index 000..c959eeb0d86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "sat_arith.h"
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull)
+
+/* { dg-final { scan-rtl-dump-not 

[gcc r15-3503] Match: Add int type fits check for form 2 of .SAT_SUB imm operand

2024-09-05 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a2e28b105cea4c44c3903d8d979c7a4afa1193f0

commit r15-3503-ga2e28b105cea4c44c3903d8d979c7a4afa1193f0
Author: Pan Li 
Date:   Mon Sep 2 11:33:08 2024 +0800

Match: Add int type fits check for form 2 of .SAT_SUB imm operand

This patch would like to add strict check for imm operand of .SAT_SUB
matching.  We have no type checking for imm operand in previous, which
may result in unexpected IL to be catched by .SAT_SUB pattern.

We leverage the int_fits_type_p here to make sure the imm operand is
a int type fits the result type of the .SAT_SUB.  For example:

Fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (a, 12);
uint8_t sum = .SAT_SUB (a, 12u);
uint8_t sum = .SAT_SUB (a, 126u);
uint8_t sum = .SAT_SUB (a, 128u);
uint8_t sum = .SAT_SUB (a, 228);
uint8_t sum = .SAT_SUB (a, 223u);

Not fits uint8_t:
uint8_t a;
uint8_t sum = .SAT_SUB (a, -1);
uint8_t sum = .SAT_SUB (a, 256u);
uint8_t sum = .SAT_SUB (a, 257);

The below test suite are passed for this patch:
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add int_fits_type_p check for .SAT_SUB imm operand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macros.
* gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test.
* gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd   |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++
 .../gcc.target/riscv/sat_u_add_imm_type_check-57.c | 18 +++
 .../gcc.target/riscv/sat_u_add_imm_type_check-58.c | 27 ++
 .../gcc.target/riscv/sat_u_add_imm_type_check-59.c | 18 +++
 .../gcc.target/riscv/sat_u_add_imm_type_check-60.c | 27 ++
 6 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4b86d20e165..bc6a83b47fb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3288,7 +3288,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_sub @0 @1)
  (plus (max @0 INTEGER_CST@1) INTEGER_CST@2)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
- && types_match (type, @1))
+ && types_match (type, @1) && int_fits_type_p (@1, type))
  (with
   {
unsigned precision = TYPE_PRECISION (type);
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 187c94795f7..a8672f66322 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -298,6 +298,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\
   return IMM > y ? IMM - y : 0;   \
 }
 
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\
+{ \
+  return x >= IMM ? x - IMM : 0;  \
+}
+
+#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \
+T __attribute__((noinline))   \
+sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\
+{ \
+  return x > IMM ? x - IMM : 0;   \
+}
+
 
/**/
 /* Saturation Truncate (unsigned and signed)  
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
new file mode 100644
index 000..1b193bcfb26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "sat_arith.h"
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u)
+
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll)
+DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull)
+
+/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */
diff --git a/gcc/testsuite/gcc.target/ris

[gcc r15-3569] Match: Support form 2 for scalar signed integer .SAT_ADD

2024-09-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a7eaf7d5edb194bae0d7d9bc3d20bb5730be57d8

commit r15-3569-ga7eaf7d5edb194bae0d7d9bc3d20bb5730be57d8
Author: Pan Li 
Date:   Tue Sep 3 15:39:16 2024 +0800

Match: Support form 2 for scalar signed integer .SAT_ADD

This patch would like to support the form 2 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 2:
  #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
  T __attribute__((noinline))  \
  sat_s_add_##T##_fmt_2 (T x, T y) \
  {\
T sum = (UT)x + (UT)y; \
   \
if ((x ^ y) < 0 || (sum ^ x) >= 0) \
  return sum;  \
   \
return x < 0 ? MIN : MAX;  \
  }

DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t sum;
   8   │   unsigned char x.0_1;
   9   │   unsigned char y.1_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _22;
  19   │   signed char _23;
  20   │
  21   │ ;;   basic block 2, loop depth 0
  22   │ ;;pred:   ENTRY
  23   │   x.0_1 = (unsigned char) x_7(D);
  24   │   y.1_2 = (unsigned char) y_8(D);
  25   │   _3 = x.0_1 + y.1_2;
  26   │   sum_9 = (int8_t) _3;
  27   │   _4 = x_7(D) ^ y_8(D);
  28   │   _5 = x_7(D) ^ sum_9;
  29   │   _23 = ~_4;
  30   │   _22 = _5 & _23;
  31   │   if (_22 >= 0)
  32   │ goto ; [42.57%]
  33   │   else
  34   │ goto ; [57.43%]
  35   │ ;;succ:   4
  36   │ ;;3
  37   │
  38   │ ;;   basic block 3, loop depth 0
  39   │ ;;pred:   2
  40   │   _11 = x_7(D) < 0;
  41   │   _12 = (signed char) _11;
  42   │   _13 = -_12;
  43   │   _14 = _13 ^ 127;
  44   │ ;;succ:   4
  45   │
  46   │ ;;   basic block 4, loop depth 0
  47   │ ;;pred:   2
  48   │ ;;3
  49   │   # _6 = PHI 
  50   │   return _6;
  51   │ ;;succ:   EXIT
  52   │
  53   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _6 = .SAT_ADD (x_7(D), y_8(D)); [tail call]
  12   │   return _6;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add the form 2 of signed .SAT_ADD matching.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index bc6a83b47fb0..a664be5fe238 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3207,6 +3207,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Signed saturation add, case 2:
+   T sum = (T)((UT)X + (UT)Y)
+   SAT_S_ADD = (X ^ sum) & !(X ^ Y) >= 0 ? sum : (-(T)(X < 0) ^ MAX);
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (ge (bit_and:c (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
+   (nop_convert @1
+  (bit_not (bit_xor:c @0 @1)))
+   integer_zerop)
+   @2
+   (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)


[gcc r15-3578] RISC-V: Fix asm check for Vector SAT_* due to middle-end change

2024-09-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:6bd3ee7f2f2f4beed5b9d9a530736ad69d2cac42

commit r15-3578-g6bd3ee7f2f2f4beed5b9d9a530736ad69d2cac42
Author: Pan Li 
Date:   Wed Sep 11 07:00:13 2024 +0800

RISC-V: Fix asm check for Vector SAT_* due to middle-end change

The middle-end change makes the effect on the layout of the assembly
for vector SAT_*.  This patch would like to fix it and make it robust.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust
asm check and make it robust.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto.
* gcc.target/riscv/rvv/autovec/bino

[gcc r15-3580] Vect: Support form 1 of vector signed integer .SAT_ADD

2024-09-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9b14a5823b685e3a604dc17b02c033f60ad90414

commit r15-3580-g9b14a5823b685e3a604dc17b02c033f60ad90414
Author: Pan Li 
Date:   Wed Sep 11 09:54:38 2024 +0800

Vect: Support form 1 of vector signed integer .SAT_ADD

This patch would like to support the vector signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
  #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX)   \
  void __attribute__((noinline)) \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *x, T *y, unsigned n) \
  {  \
for (unsigned i = 0; i < n; i++) \
  {  \
T sum = (UT)x[i] + (UT)y[i]; \
out[i] = (x[i] ^ y[i]) < 0   \
  ? sum  \
  : (sum ^ x[i]) >= 0\
? sum\
: x[i] < 0 ? MIN : MAX;  \
  }  \
  }

DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

If the backend implemented the vector mode of ssadd, we will see IR diff
similar as below:

Before this patch:
 108   │   _114 = .SELECT_VL (ivtmp_112, POLY_INT_CST [2, 2]);
 109   │   ivtmp_77 = _114 * 8;
 110   │   vect__4.9_80 = .MASK_LEN_LOAD (vectp_x.7_78, 64B, { -1, ...  }, 
_114, 0);
 111   │   vect__5.10_81 = VIEW_CONVERT_EXPR(vect__4.9_80);
 112   │   vect__7.13_85 = .MASK_LEN_LOAD (vectp_y.11_83, 64B, { -1, ...  
}, _114, 0);
 113   │   vect__8.14_86 = VIEW_CONVERT_EXPR(vect__7.13_85);
 114   │   vect__9.15_87 = vect__5.10_81 + vect__8.14_86;
 115   │   vect_sum_20.16_88 = VIEW_CONVERT_EXPR(vect__9.15_87);
 116   │   vect__10.17_89 = vect__4.9_80 ^ vect__7.13_85;
 117   │   vect__11.18_90 = vect__4.9_80 ^ vect_sum_20.16_88;
 118   │   mask__46.19_92 = vect__10.17_89 >= { 0, ... };
 119   │   _36 = vect__4.9_80 >> 63;
 120   │   mask__44.26_104 = vect__11.18_90 < { 0, ... };
 121   │   mask__43.27_105 = mask__46.19_92 & mask__44.26_104;
 122   │   _115 = .COND_XOR (mask__43.27_105, _36, { 9223372036854775807, 
... }, vect_sum_20.16_88);
 123   │   .MASK_LEN_STORE (vectp_out.29_108, 64B, { -1, ... }, _114, 0, 
_115);
 124   │   vectp_x.7_79 = vectp_x.7_78 + ivtmp_77;
 125   │   vectp_y.11_84 = vectp_y.11_83 + ivtmp_77;
 126   │   vectp_out.29_109 = vectp_out.29_108 + ivtmp_77;
 127   │   ivtmp_113 = ivtmp_112 - _114;

After this patch:
  94   │   # vectp_x.7_82 = PHI 
  95   │   # vectp_y.10_86 = PHI 
  96   │   # vectp_out.14_91 = PHI 
  97   │   # ivtmp_95 = PHI 
  98   │   _97 = .SELECT_VL (ivtmp_95, POLY_INT_CST [2, 2]);
  99   │   ivtmp_81 = _97 * 8;
 100   │   vect__4.9_84 = .MASK_LEN_LOAD (vectp_x.7_82, 64B, { -1, ...  }, 
_97, 0);
 101   │   vect__7.12_88 = .MASK_LEN_LOAD (vectp_y.10_86, 64B, { -1, ...  
}, _97, 0);
 102   │   vect_patt_40.13_89 = .SAT_ADD (vect__4.9_84, vect__7.12_88);
 103   │   .MASK_LEN_STORE (vectp_out.14_91, 64B, { -1, ... }, _97, 0, 
vect_patt_40.13_89);
 104   │   vectp_x.7_83 = vectp_x.7_82 + ivtmp_81;
 105   │   vectp_y.10_87 = vectp_y.10_86 + ivtmp_81;
 106   │   vectp_out.14_92 = vectp_out.14_91 + ivtmp_81;
 107   │   ivtmp_96 = ivtmp_95 - _97;

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* match.pd: Add case 2 for the signed .SAT_ADD consumed by
vect pattern.
* tree-vect-patterns.cc (gimple_signed_integer_sat_add): Add new
matching func decl for signed .SAT_ADD.
(vect_recog_sat_add_pattern): Add signed .SAT_ADD pattern match.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd  | 16 
 gcc/tree-vect-patterns.cc |  5 -
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index a664be5fe238..4cef965c9c7a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3221,6 +3221,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value))
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
+/* Signed saturation add, case 3:
+   T sum = (T)((UT)X + (UT)Y)
+   SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
+   (nop_convert @1
+   

[gcc r15-3593] RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass

2024-09-11 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:c08e493ceee47bbeb466eeef100be7c1dd01a4e5

commit r15-3593-gc08e493ceee47bbeb466eeef100be7c1dd01a4e5
Author: garthlei 
Date:   Wed Sep 11 17:09:37 2024 +0800

RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass

This patch fixes a bug in the current vsetvl pass.  The current pass uses
`m_vl` to determine whether the dest operand has been used by non-RVV
instructions.  However, `m_vl` may have been modified as a result of an
`update_avl` call, and thus would be no longer the dest operand of the
original instruction.  This can lead to incorrect vsetvl eliminations, as is
shown in the testcase.  In this patch, we create a `dest_vl` variable for
this scenerio.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc: Use `dest_vl` for dest VL operand

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc| 16 +++-
 .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c  | 17 +
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 017efa8bc17e..ce831685439a 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1002,6 +1002,9 @@ public:
 
   void parse_insn (insn_info *insn)
   {
+/* The VL dest of the insn */
+rtx dest_vl = NULL_RTX;
+
 m_insn = insn;
 m_bb = insn->bb ();
 /* Return if it is debug insn for the consistency with optimize == 0.  */
@@ -1035,7 +1038,10 @@ public:
 if (m_avl)
   {
if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
- m_vl = ::get_vl (insn->rtl ());
+ {
+   m_vl = ::get_vl (insn->rtl ());
+   dest_vl = m_vl;
+ }
 
if (has_nonvlmax_reg_avl ())
  m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
@@ -1132,22 +1138,22 @@ public:
   }
 
 /* Determine if dest operand(vl) has been used by non-RVV instructions.  */
-if (has_vl ())
+if (dest_vl)
   {
const hash_set vl_uses
- = get_all_real_uses (get_insn (), REGNO (get_vl ()));
+ = get_all_real_uses (get_insn (), REGNO (dest_vl));
for (use_info *use : vl_uses)
  {
gcc_assert (use->insn ()->is_real ());
rtx_insn *rinsn = use->insn ()->rtl ();
if (!has_vl_op (rinsn)
-   || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
+   || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1)
  {
m_vl_used_by_non_rvv_insn = true;
break;
  }
rtx avl = ::get_avl (rinsn);
-   if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
+   if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl))
  {
m_vl_used_by_non_rvv_insn = true;
break;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c
new file mode 100644
index ..c155f5613d27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2 -fdump-rtl-vsetvl-details" } 
*/
+
+#include 
+
+uint64_t a[2], b[2];
+
+void
+foo ()
+{
+  size_t vl = __riscv_vsetvl_e64m1 (2);
+  vuint64m1_t vx = __riscv_vle64_v_u64m1 (a, vl);
+  vx = __riscv_vslide1down_vx_u64m1 (vx, 0xull, vl);
+  __riscv_vse64_v_u64m1 (b, vx, vl);
+}
+
+/* { dg-final { scan-rtl-dump-not "Eliminate insn" "vsetvl" } }  */


[gcc r15-3595] RISC-V: Eliminate latter vsetvl when fused

2024-09-11 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3f212eabbba3edc1827d6da53cf6d5a64c6524f0

commit r15-3595-g3f212eabbba3edc1827d6da53cf6d5a64c6524f0
Author: Bohan Lei 
Date:   Thu Sep 12 10:28:03 2024 +0800

RISC-V: Eliminate latter vsetvl when fused

Hi all,

A simple assembly check has been added in this version. Previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662783.html

Thanks,
Bohan

--

The current vsetvl pass eliminates a vsetvl instruction when the previous
info is "available," but does not when "compatible."  This can lead to not
only redundancy, but also incorrect behaviors when the previous info happens
to be compatible with a later vector instruction, which ends of using the
vsetvl info that should have been eliminated, as is shown in the testcase.
This patch eliminates the vsetvl when the previous info is "compatible."

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (pre_vsetvl::fuse_local_vsetvl_info):
Delete vsetvl insn when `prev_info` is compatible

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc  |  3 +++
 .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c| 19 +++
 2 files changed, 22 insertions(+)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index ce831685439a..030ffbe2ebbc 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2796,6 +2796,9 @@ pre_vsetvl::fuse_local_vsetvl_info ()
  curr_info.dump (dump_file, "");
}
  m_dem.merge (prev_info, curr_info);
+ if (!curr_info.vl_used_by_non_rvv_insn_p ()
+ && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
+   m_delete_list.safe_push (curr_info);
  if (curr_info.get_read_vl_insn ())
prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
  if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c
new file mode 100644
index ..04a8ff2945a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O2 -fno-schedule-insns 
-fdump-rtl-vsetvl-details" } */
+
+#include 
+
+vuint16m1_t
+foo (vuint16m1_t a, vuint16m1_t b, size_t avl)
+{
+  size_t vl;
+  vuint16m1_t ret;
+  uint16_t c = __riscv_vmv_x_s_u16m1_u16(a);
+  vl = __riscv_vsetvl_e8mf2 (avl);
+  ret = __riscv_vadd_vx_u16m1 (a, c, avl);
+  ret = __riscv_vadd_vv_u16m1 (ret, a, vl);
+  return ret;
+}
+
+/* { dg-final { scan-rtl-dump "Eliminate insn" "vsetvl" } }  */
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */


[gcc r15-3620] Match: Remove unnecessary types_match for case 1 of signed SAT_ADD

2024-09-13 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:45e7cc9caf327bfddd75b3093eb855b8b64acae8

commit r15-3620-g45e7cc9caf327bfddd75b3093eb855b8b64acae8
Author: Pan Li 
Date:   Fri Sep 13 11:36:40 2024 +0800

Match: Remove unnecessary types_match for case 1 of signed SAT_ADD

Given all commutative binary operators requires types matching
for both operands.  Remove the types_match check for case 1 of
the signed SAT_ADD, because we have (bit_xor @0 @1), which ensure
the operands have the correct TREE type.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Remove the types_match check for signed SAT_ADD
case 1.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4cef965c9c7a..5566c0e4c41c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3204,8 +3204,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
integer_zerop)
(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
@2)
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
-  && types_match (type, @0, @1
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
 /* Signed saturation add, case 2:
T sum = (T)((UT)X + (UT)Y)


[gcc r15-1671] Vect: Support truncate after .SAT_SUB pattern in zip

2024-06-26 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:f2476a2649e9975d454d179145574c21d8218aee

commit r15-1671-gf2476a2649e9975d454d179145574c21d8218aee
Author: Pan Li 
Date:   Thu Jun 27 09:28:04 2024 +0800

Vect: Support truncate after .SAT_SUB pattern in zip

The zip benchmark of coremark-pro have one SAT_SUB like pattern but
truncated as below:

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
a = *--p;
*p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB
  } while (--n);
}

It will have gimple before vect pass,  it cannot hit any pattern of
SAT_SUB and then cannot vectorize to SAT_SUB.

_2 = a_11 - b_12(D);
iftmp.0_13 = (short unsigned int) _2;
_18 = a_11 >= b_12(D);
iftmp.0_5 = _18 ? iftmp.0_13 : 0;

This patch would like to improve the pattern match to recog above
as truncate after .SAT_SUB pattern.  Then we will have the pattern
similar to below,  as well as eliminate the first 3 dead stmt.

_2 = a_11 - b_12(D);
iftmp.0_13 = (short unsigned int) _2;
_18 = a_11 >= b_12(D);
iftmp.0_5 = (short unsigned int).SAT_SUB (a_11, b_12(D));

The below tests are passed for this patch.
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

* match.pd: Add convert description for minus and capture.
* tree-vect-patterns.cc (vect_recog_build_binary_gimple_call): Add
new logic to handle in_type is incompatibile with out_type,  as
well as rename from.
(vect_recog_build_binary_gimple_stmt): Rename to.
(vect_recog_sat_add_pattern): Leverage above renamed func.
(vect_recog_sat_sub_pattern): Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd  |  4 ++--
 gcc/tree-vect-patterns.cc | 51 ---
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index cf8a399a744..820591a36b3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3164,9 +3164,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Unsigned saturation sub, case 2 (branch with ge):
SAT_U_SUB = X >= Y ? X - Y : 0.  */
 (match (unsigned_integer_sat_sub @0 @1)
- (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (ge @0 @1) (convert? (minus (convert1? @0) (convert1? @1))) 
integer_zerop)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
-  && types_match (type, @0, @1
+  && TYPE_UNSIGNED (TREE_TYPE (@0)) && types_match (@0, @1
 
 /* Unsigned saturation sub, case 3 (branchless with gt):
SAT_U_SUB = (X - Y) * (X > Y).  */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index cef901808eb..519d15f2a43 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4490,26 +4490,37 @@ vect_recog_mult_pattern (vec_info *vinfo,
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 
-static gcall *
-vect_recog_build_binary_gimple_call (vec_info *vinfo, gimple *stmt,
+static gimple *
+vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
 internal_fn fn, tree *type_out,
-tree op_0, tree op_1)
+tree lhs, tree op_0, tree op_1)
 {
   tree itype = TREE_TYPE (op_0);
-  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+  tree otype = TREE_TYPE (lhs);
+  tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
+  tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
 
-  if (vtype != NULL_TREE
-&& direct_internal_fn_supported_p (fn, vtype, OPTIMIZE_FOR_BOTH))
+  if (v_itype != NULL_TREE && v_otype != NULL_TREE
+&& direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
 {
   gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+  tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
 
-  gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+  gimple_call_set_lhs (call, in_ssa);
   gimple_call_set_nothrow (call, /* nothrow_p */ false);
-  gimple_set_location (call, gimple_location (stmt));
+  gimple_set_location (call, gimple_location (STMT_VINFO_STMT 
(stmt_info)));
+
+  *type_out = v_otype;
 
-  *type_out = vtype;
+  if (types_compatible_p (itype, otype))
+   return call;
+  else
+   {
+ append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
+ tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
 
-  return call;
+ return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
+   }
 }
 
   return NULL;
@@ -4541,13 +4552,13 @@ vect_recog_sat_add_pattern (vec_info *vinfo, 
stmt_v

[gcc r15-1672] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int

2024-06-26 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:212441e19d8179645efbec6dd98a74eb673734dd

commit r15-1672-g212441e19d8179645efbec6dd98a74eb673734dd
Author: Pan Li 
Date:   Wed Jun 26 09:28:05 2024 +0800

Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int

This patch would like to add the middle-end presentation for the
saturation truncation.  Aka set the result of truncated value to
the max value when overflow.  It will take the pattern similar
as below.

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##T##_fmt_1 (WT x)\
  {\
bool overflow = x > (WT)(NT)(-1);  \
return ((NT)x) | (NT)-overflow;\
  }

For example, truncated uint16_t to uint8_t, we have

* SAT_TRUNC (254)   => 254
* SAT_TRUNC (255)   => 255
* SAT_TRUNC (256)   => 255
* SAT_TRUNC (65536) => 255

Given below SAT_TRUNC from uint64_t to uint32_t.

DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)

Before this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
  _Bool overflow;
  unsigned int _1;
  unsigned int _2;
  unsigned int _3;
  uint32_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  overflow_5 = x_4(D) > 4294967295;
  _1 = (unsigned int) x_4(D);
  _2 = (unsigned int) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;succ:   EXIT

}

After this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
  uint32_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;succ:   EXIT

}

The below tests are passed for this patch:
*. The rv64gcv fully regression tests.
*. The rv64gcv build with glibc.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

gcc/ChangeLog:

* internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
unary_convert.
* match.pd: Add new matching pattern for unsigned int sat_trunc.
* optabs.def (OPTAB_CL): Add unsigned and signed optab.
* tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
new decl for the matching pattern generated func.
(match_unsigned_saturation_trunc): Add new func impl to match
the .SAT_TRUNC.
(math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
function under BIT_IOR_EXPR case.

Signed-off-by: Pan Li 

Diff:
---
 gcc/internal-fn.def   |  2 ++
 gcc/match.pd  | 16 
 gcc/optabs.def|  3 +++
 gcc/tree-ssa-math-opts.cc | 32 
 4 files changed, 53 insertions(+)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a8c83437ada..915d329c05a 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | 
ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, 
unary_convert)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 820591a36b3..3fa3f2e8296 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3210,6 +3210,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
+   SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
+(match (unsigned_integer_sat_trunc @0)
+ (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
+   (convert @0))
+ (with {
+   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+   unsigned otype_precision = TYPE_PRECISION (type);
+   wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
+   wide_int int_cst = wi::to_wide (@1, itype_precision);
+  }
+  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+   && TYPE_UNSIGNED (TREE_TYPE (@0))
+   && otype_precision < itype_precision
+   && wi::eq_p (trunc_max, int_cst)
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 2f36ed4cb42..a69af51d601 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
 OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", 
gen_satfract_conv_libfunc)
 OPTAB_CL(satfractuns_op

[gcc r15-1676] RISC-V: Add testcases for vector truncate after .SAT_SUB

2024-06-26 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b55798c0fc5cb02512b58502961d8425fb60588f

commit r15-1676-gb55798c0fc5cb02512b58502961d8425fb60588f
Author: Pan Li 
Date:   Mon Jun 24 22:25:57 2024 +0800

RISC-V: Add testcases for vector truncate after .SAT_SUB

This patch would like to add the test cases of the vector truncate after
.SAT_SUB.  Aka:

  #define DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T)   \
  void __attribute__((noinline))   \
  vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \
 unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
IN_T x = op_1[i];  \
out[i] = (OUT_T)(x >= y ? x - y : 0);  \
  }\
  }

The below 3 cases are included.

DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(uint8_t, uint16_t)
DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(uint16_t, uint32_t)
DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(uint32_t, uint64_t)

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
test macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-run-1.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-run-2.c: 
New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-run-3.c: 
New test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h| 19 ++
 .../rvv/autovec/binop/vec_sat_binary_scalar.h  | 27 
 .../rvv/autovec/binop/vec_sat_u_sub_trunc-1.c  | 21 ++
 .../rvv/autovec/binop/vec_sat_u_sub_trunc-2.c  | 21 ++
 .../rvv/autovec/binop/vec_sat_u_sub_trunc-3.c  | 21 ++
 .../rvv/autovec/binop/vec_sat_u_sub_trunc-run-1.c  | 74 ++
 .../rvv/autovec/binop/vec_sat_u_sub_trunc-run-2.c  | 74 ++
 .../rvv/autovec/binop/vec_sat_u_sub_trunc-run-3.c  | 74 ++
 8 files changed, 331 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index d5c81fbe5a9..a3116033fb3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -310,4 +310,23 @@ vec_sat_u_sub_##T##_fmt_10 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_SUB_FMT_10(T, out, op_1, op_2, N) \
   vec_sat_u_sub_##T##_fmt_10(out, op_1, op_2, N)
 
+/**/
+/* Saturation Sub Truncated (Unsigned and Signed) 
*/
+/**/
+#define DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T)   \
+void __attribute__((noinline))   \
+vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \
+unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  IN_T x = op_1[i];  \
+  out[i] = (OUT_T)(x >= y ? x - y : 0);  \
+}\
+}
+
+#define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \
+  vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N)
+
 #endif
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h
new file mode 100644
index 000..c79b180054e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h
@@ -0,0 +1,27 @@
+#ifndef HAVE_DEFINED_VEC_SAT_BINARY_SCALAR
+#define HAVE_DEFINED_VEC_SAT_BINARY_SCALAR
+
+int
+main ()
+{
+  unsigned i, k;
+  OUT_T out[N];
+
+  for (i = 0; i < size

[gcc r15-1721] Match: Support imm form for unsigned scalar .SAT_ADD

2024-06-29 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:21e3565927eda5ce9907d91100623052fa8182cd

commit r15-1721-g21e3565927eda5ce9907d91100623052fa8182cd
Author: Pan Li 
Date:   Fri Jun 28 11:33:41 2024 +0800

Match: Support imm form for unsigned scalar .SAT_ADD

This patch would like to support the form of unsigned scalar .SAT_ADD
when one of the op is IMM.  For example as below:

Form IMM:
  #define DEF_SAT_U_ADD_IMM_FMT_1(T)   \
  T __attribute__((noinline))  \
  sat_u_add_imm_##T##_fmt_1 (T x)  \
  {\
return (T)(x + 9) >= x ? (x + 9) : -1; \
  }

DEF_SAT_U_ADD_IMM_FMT_1(uint64_t)

Before this patch:
__attribute__((noinline))
uint64_t sat_u_add_imm_uint64_t_fmt_1 (uint64_t x)
{
  long unsigned int _1;
  uint64_t _3;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _1 = MIN_EXPR ;
  _3 = _1 + 9;
  return _3;
;;succ:   EXIT

}

After this patch:
__attribute__((noinline))
uint64_t sat_u_add_imm_uint64_t_fmt_1 (uint64_t x)
{
  uint64_t _3;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _3 = .SAT_ADD (x_2(D), 9); [tail call]
  return _3;
;;succ:   EXIT

}

The below test suites are passed for this patch:
1. The rv64gcv fully regression test with newlib.
2. The x86 bootstrap test.
3. The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Add imm form for .SAT_ADD matching.
* tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children):
Add .SAT_ADD matching under PLUS_EXPR.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd  | 24 
 gcc/tree-ssa-math-opts.cc |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3fa3f2e8296..7fff7b5f9fe 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3154,6 +3154,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (cond^ (gt @0 (usadd_left_part_1@2 @0 @1)) integer_minus_onep @2))
 
+/* Unsigned saturation add, case 9 (one op is imm):
+   SAT_U_ADD = (X + 3) >= x ? (X + 3) : -1.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (plus (min @0 INTEGER_CST@2) INTEGER_CST@1)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1))
+  (with
+   {
+unsigned precision = TYPE_PRECISION (type);
+wide_int cst_1 = wi::to_wide (@1);
+wide_int cst_2 = wi::to_wide (@2);
+wide_int max = wi::mask (precision, false, precision);
+wide_int sum = wi::add (cst_1, cst_2);
+   }
+   (if (wi::eq_p (max, sum))
+
+/* Unsigned saturation add, case 10 (one op is imm):
+   SAT_U_ADD = __builtin_add_overflow (X, 3, &ret) == 0 ? ret : -1.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop)
+  integer_minus_onep (realpart @2))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0
+
 /* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 3783a874699..3b5433ec000 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -6195,6 +6195,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
  break;
 
case PLUS_EXPR:
+ match_unsigned_saturation_add (&gsi, as_a (stmt));
+ /* fall-through  */
case MINUS_EXPR:
  if (!convert_plusminus_to_widen (&gsi, stmt, code))
{


[gcc r15-1753] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 1

2024-07-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ed213b384fdca9375c3ec53c2a0eae134fb98612

commit r15-1753-ged213b384fdca9375c3ec53c2a0eae134fb98612
Author: Pan Li 
Date:   Sun Jun 30 16:03:41 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 1

This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 1.  Aka:

Form 1:
  #define DEF_SAT_U_ADD_IMM_FMT_1(T)   \
  T __attribute__((noinline))  \
  sat_u_add_imm_##T##_fmt_1 (T x)  \
  {\
return (T)(x + 9) >= x ? (x + 9) : -1; \
  }

DEF_SAT_U_ADD_IMM_FMT_1(uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-1.c: New test.
* gcc.target/riscv/sat_u_add_imm-2.c: New test.
* gcc.target/riscv/sat_u_add_imm-3.c: New test.
* gcc.target/riscv/sat_u_add_imm-4.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-1.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-2.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-3.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-4.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 10 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c   | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c   | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-3.c   | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-4.c   | 17 
 .../gcc.target/riscv/sat_u_add_imm-run-1.c | 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-2.c | 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-3.c | 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-4.c | 46 ++
 9 files changed, 269 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 0c2e44af718..4ec4ec36cc1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -60,6 +60,16 @@ sat_u_add_##T##_fmt_6 (T x, T y)\
 #define RUN_SAT_U_ADD_FMT_5(T, x, y) sat_u_add_##T##_fmt_5(x, y)
 #define RUN_SAT_U_ADD_FMT_6(T, x, y) sat_u_add_##T##_fmt_6(x, y)
 
+#define DEF_SAT_U_ADD_IMM_FMT_1(T, IMM)  \
+T __attribute__((noinline))  \
+sat_u_add_imm##IMM##_##T##_fmt_1 (T x)   \
+{\
+  return (T)(x + IMM) >= x ? (x + IMM) : -1; \
+}
+
+#define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c
new file mode 100644
index 000..14e9b7595a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm9_uint8_t_fmt_1:
+** addi\s+[atx][0-9]+,\s*a0,\s*9
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_1(uint8_t, 9)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c
new file mode 100644
index 000..c1a3c6ff21d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm3_uint16_t_fmt_1:
+** addi\s+[atx][0-9]+,\s*a0,\s*3
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_1(uint16_t, 3)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsui

[gcc r15-1754] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 2

2024-07-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:bff0d025aff8efaa5d991fcd13dd9876b115dc94

commit r15-1754-gbff0d025aff8efaa5d991fcd13dd9876b115dc94
Author: Pan Li 
Date:   Sun Jun 30 16:14:38 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 2

This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 2.  Aka:

Form 2:
  #define DEF_SAT_U_ADD_IMM_FMT_2(T)  \
  T __attribute__((noinline)) \
  sat_u_add_imm_##T##_fmt_1 (T x) \
  {   \
return (T)(x + 9) < x ? -1 : (x + 9); \
  }

DEF_SAT_U_ADD_IMM_FMT_2(uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-5.c: New test.
* gcc.target/riscv/sat_u_add_imm-6.c: New test.
* gcc.target/riscv/sat_u_add_imm-7.c: New test.
* gcc.target/riscv/sat_u_add_imm-8.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-5.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-6.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-7.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 10 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c   | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c   | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-7.c   | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-8.c   | 17 
 .../gcc.target/riscv/sat_u_add_imm-run-5.c | 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-6.c | 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-7.c | 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-8.c | 46 ++
 9 files changed, 269 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 4ec4ec36cc1..d94f0fd602c 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -67,9 +67,19 @@ sat_u_add_imm##IMM##_##T##_fmt_1 (T x)   \
   return (T)(x + IMM) >= x ? (x + IMM) : -1; \
 }
 
+#define DEF_SAT_U_ADD_IMM_FMT_2(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_add_imm##IMM##_##T##_fmt_2 (T x)  \
+{   \
+  return (T)(x + IMM) < x ? -1 : (x + IMM); \
+}
+
 #define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
 
+#define RUN_SAT_U_ADD_IMM_FMT_2(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c
new file mode 100644
index 000..19b502db6c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm9_uint8_t_fmt_2:
+** addi\s+[atx][0-9]+,\s*a0,\s*9
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_2(uint8_t, 9)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c
new file mode 100644
index 000..0317370b67e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm3_uint16_t_fmt_2:
+** addi\s+[atx][0-9]+,\s*a0,\s*3
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_2(uint16_t, 3)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD "

[gcc r15-1755] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 3

2024-07-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:6d98e88f61f9b2e6864775ce390e9ce0a1359624

commit r15-1755-g6d98e88f61f9b2e6864775ce390e9ce0a1359624
Author: Pan Li 
Date:   Sun Jun 30 16:41:16 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 3

This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 3.  Aka:

Form 3:
  #define DEF_SAT_U_ADD_IMM_FMT_3(T)   \
  T __attribute__((noinline))  \
  sat_u_add_imm_##T##_fmt_3 (T x)  \
  {\
T ret; \
return __builtin_add_overflow (x, 8, &ret) ? -1 : ret; \
  }

DEF_SAT_U_ADD_IMM_FMT_3(uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-10.c: New test.
* gcc.target/riscv/sat_u_add_imm-11.c: New test.
* gcc.target/riscv/sat_u_add_imm-12.c: New test.
* gcc.target/riscv/sat_u_add_imm-9.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-10.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-11.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-12.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 11 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c  | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c  | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c  | 17 
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-9.c   | 19 +
 .../gcc.target/riscv/sat_u_add_imm-run-10.c| 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-11.c| 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-12.c| 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-9.c | 46 ++
 9 files changed, 270 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index d94f0fd602c..83b294db476 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -74,12 +74,23 @@ sat_u_add_imm##IMM##_##T##_fmt_2 (T x)  \
   return (T)(x + IMM) < x ? -1 : (x + IMM); \
 }
 
+#define DEF_SAT_U_ADD_IMM_FMT_3(T, IMM)\
+T __attribute__((noinline))\
+sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \
+{  \
+  T ret;   \
+  return __builtin_add_overflow (x, IMM, &ret) ? -1 : ret; \
+}
+
 #define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
 
 #define RUN_SAT_U_ADD_IMM_FMT_2(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort ()
 
+#define RUN_SAT_U_ADD_IMM_FMT_3(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_3(x) != expect) __builtin_abort ()
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c
new file mode 100644
index 000..24cdd267cca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm3_uint16_t_fmt_3:
+** addi\s+[atx][0-9]+,\s*a0,\s*3
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_3(uint16_t, 3)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
new file mode 100644
index 000..f30e2405a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "

[gcc r15-1756] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 4

2024-07-01 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:7a65ab6b5f38d3018ffd456f278a9fd885487a27

commit r15-1756-g7a65ab6b5f38d3018ffd456f278a9fd885487a27
Author: Pan Li 
Date:   Sun Jun 30 16:48:19 2024 +0800

RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 4

This patch would like to add test cases for the unsigned scalar
.SAT_ADD IMM form 4.  Aka:

Form 4:
  #define DEF_SAT_U_ADD_IMM_FMT_4(T)\
  T __attribute__((noinline))   \
  sat_u_add_imm_##T##_fmt_4 (T x)   \
  { \
T ret;  \
return __builtin_add_overflow (x, 9, &ret) == 0 ? ret : -1; \
  }

DEF_SAT_U_ADD_IMM_FMT_4(uint64_t)

The below test is passed for this patch.
* The rv64gcv regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add helper test macro.
* gcc.target/riscv/sat_u_add_imm-13.c: New test.
* gcc.target/riscv/sat_u_add_imm-14.c: New test.
* gcc.target/riscv/sat_u_add_imm-15.c: New test.
* gcc.target/riscv/sat_u_add_imm-16.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-13.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-14.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-15.c: New test.
* gcc.target/riscv/sat_u_add_imm-run-16.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 11 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c  | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c  | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c  | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-16.c  | 17 
 .../gcc.target/riscv/sat_u_add_imm-run-13.c| 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-14.c| 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-15.c| 46 ++
 .../gcc.target/riscv/sat_u_add_imm-run-16.c| 46 ++
 9 files changed, 270 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 83b294db476..75442c94dc1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -82,6 +82,14 @@ sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \
   return __builtin_add_overflow (x, IMM, &ret) ? -1 : ret; \
 }
 
+#define DEF_SAT_U_ADD_IMM_FMT_4(T, IMM) \
+T __attribute__((noinline)) \
+sat_u_add_imm##IMM##_##T##_fmt_4 (T x)  \
+{   \
+  T ret;\
+  return __builtin_add_overflow (x, IMM, &ret) == 0 ? ret : -1; \
+}
+
 #define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort ()
 
@@ -91,6 +99,9 @@ sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \
 #define RUN_SAT_U_ADD_IMM_FMT_3(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_3(x) != expect) __builtin_abort ()
 
+#define RUN_SAT_U_ADD_IMM_FMT_4(T, x, IMM, expect) \
+  if (sat_u_add_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c
new file mode 100644
index 000..a3b2679233c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_imm9_uint8_t_fmt_4:
+** addi\s+[atx][0-9]+,\s*a0,\s*9
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_IMM_FMT_4(uint8_t, 9)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c
new file mode 100644
index 000..968534b74da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-e

[gcc r15-1805] RISC-V: Fix asm check failure for truncated after SAT_SUB

2024-07-03 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ab3e3d2f0564c2eb0640de3f4d0a50e1fcc8c318

commit r15-1805-gab3e3d2f0564c2eb0640de3f4d0a50e1fcc8c318
Author: Pan Li 
Date:   Wed Jul 3 13:17:16 2024 +0800

RISC-V: Fix asm check failure for truncated after SAT_SUB

It seems that the asm check is incorrect for truncated after SAT_SUB,
we should take the vx check for vssubu instead of vv check.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c:
Update vssubu check from vv to vx.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c:
Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c:
Ditto.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c  | 2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c
index dd9e3999a29..1e380657d74 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c
@@ -11,7 +11,7 @@
 ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
 ** ...
 ** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
-** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+
 ** vsetvli\s+zero,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma
 ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+
 ** ...
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c
index 738d1465a01..d7b8931f0ec 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c
@@ -11,7 +11,7 @@
 ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
 ** ...
 ** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
-** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+
 ** vsetvli\s+zero,\s*zero,\s*e16,\s*mf2,\s*ta,\s*ma
 ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+
 ** ...
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c
index b008b21cf0c..edf42a1f776 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c
@@ -11,7 +11,7 @@
 ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
 ** ...
 ** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
-** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+
 ** vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma
 ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+
 ** ...


[gcc r15-1819] Vect: Support IFN SAT_TRUNC for unsigned vector int

2024-07-03 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:8d2c460e79aa013cc4eeb79bb45d18bd3d0aee58

commit r15-1819-g8d2c460e79aa013cc4eeb79bb45d18bd3d0aee58
Author: Pan Li 
Date:   Tue Jul 2 21:23:43 2024 +0800

Vect: Support IFN SAT_TRUNC for unsigned vector int

This patch would like to support the .SAT_TRUNC for the unsigned
vector int.  Given we have below example code:

Form 1
  #define VEC_DEF_SAT_U_TRUC_FMT_1(NT, WT) \
  void __attribute__((noinline))   \
  vec_sat_u_truc_##WT##_to_##NT##_fmt_1 (NT *x, WT *y, unsigned limit) \
  {\
for (unsigned i = 0; i < limit; i++)   \
  {\
bool overflow = y[i] > (WT)(NT)(-1);   \
x[i] = ((NT)y[i]) | (NT)-overflow; \
  }\
  }

VEC_DEF_SAT_U_TRUC_FMT_1 (uint32_t, uint64_t)

Before this patch:
void vec_sat_u_truc_uint64_t_to_uint32_t_fmt_1 (uint32_t * x, uint64_t * y, 
unsigned int limit)
{
  ...
  _51 = .SELECT_VL (ivtmp_49, POLY_INT_CST [2, 2]);
  ivtmp_35 = _51 * 8;
  vect__4.7_32 = .MASK_LEN_LOAD (vectp_y.5_34, 64B, { -1, ... }, _51, 0);
  mask_overflow_16.8_30 = vect__4.7_32 > { 4294967295, ... };
  vect__5.9_29 = (vector([2,2]) unsigned int) vect__4.7_32;
  vect__10.13_20 = .VCOND_MASK (mask_overflow_16.8_30, { 4294967295, ... }, 
vect__5.9_29);
  ivtmp_12 = _51 * 4;
  .MASK_LEN_STORE (vectp_x.14_11, 32B, { -1, ... }, _51, 0, vect__10.13_20);
  vectp_y.5_33 = vectp_y.5_34 + ivtmp_35;
  vectp_x.14_46 = vectp_x.14_11 + ivtmp_12;
  ivtmp_50 = ivtmp_49 - _51;
  if (ivtmp_50 != 0)
  ...
}

After this patch:
void vec_sat_u_truc_uint64_t_to_uint32_t_fmt_1 (uint32_t * x, uint64_t * y, 
unsigned int limit)
{
  ...
  _12 = .SELECT_VL (ivtmp_21, POLY_INT_CST [2, 2]);
  ivtmp_34 = _12 * 8;
  vect__4.7_31 = .MASK_LEN_LOAD (vectp_y.5_33, 64B, { -1, ... }, _12, 0);
  vect_patt_40.8_30 = .SAT_TRUNC (vect__4.7_31); // << .SAT_TRUNC
  ivtmp_29 = _12 * 4;
  .MASK_LEN_STORE (vectp_x.9_28, 32B, { -1, ... }, _12, 0, 
vect_patt_40.8_30);
  vectp_y.5_32 = vectp_y.5_33 + ivtmp_34;
  vectp_x.9_27 = vectp_x.9_28 + ivtmp_29;
  ivtmp_20 = ivtmp_21 - _12;
  if (ivtmp_20 != 0)
  ...
}

The below test suites are passed for this patch
* The x86 bootstrap test.
* The x86 fully regression test.
* The rv64gcv fully regression tests.

gcc/ChangeLog:

* tree-vect-patterns.cc (gimple_unsigned_integer_sat_trunc): Add
new decl generated by match.
(vect_recog_sat_trunc_pattern): Add new func impl to recog the
.SAT_TRUNC pattern.

Signed-off-by: Pan Li 

Diff:
---
 gcc/tree-vect-patterns.cc | 54 +++
 1 file changed, 54 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 519d15f2a43..86e893a1c43 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4489,6 +4489,7 @@ vect_recog_mult_pattern (vec_info *vinfo,
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
 static gimple *
 vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
@@ -4603,6 +4604,58 @@ vect_recog_sat_sub_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   return NULL;
 }
 
+/*
+ * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
+ *   overflow_5 = x_4(D) > 4294967295;
+ *   _1 = (unsigned int) x_4(D);
+ *   _2 = (unsigned int) overflow_5;
+ *   _3 = -_2;
+ *   _6 = _1 | _3;
+ *
+ * And then simplied to
+ *   _6 = .SAT_TRUNC (x_4(D));
+ */
+
+static gimple *
+vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+ tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+return NULL;
+
+  tree ops[1];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL))
+{
+  tree itype = TREE_TYPE (ops[0]);
+  tree otype = TREE_TYPE (lhs);
+  tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
+  tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
+  internal_fn fn = IFN_SAT_TRUNC;
+
+  if (v_itype != NULL_TREE && v_otype != NULL_TREE
+   && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
+  OPTIMIZE_FOR_BOTH))
+   {
+ gcall *c

[gcc r15-1820] Match: Allow more types truncation for .SAT_TRUNC

2024-07-03 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:44c767c06b6882d05fe56f4a3e03195101402fb0

commit r15-1820-g44c767c06b6882d05fe56f4a3e03195101402fb0
Author: Pan Li 
Date:   Tue Jul 2 08:57:50 2024 +0800

Match: Allow more types truncation for .SAT_TRUNC

The .SAT_TRUNC has the input and output types,  aka cvt from
itype to otype and the sizeof (otype) < sizeof (itype).  The
previous patch only allows the sizeof (otype) == sizeof (itype) / 2.
But actually we have 1/4 and 1/8 truncation.

This patch would like to support more types trunction when
sizeof (otype) < sizeof (itype).  The below truncation will be
covered.

* uint64_t => uint8_t
* uint64_t => uint16_t
* uint64_t => uint32_t
* uint32_t => uint8_t
* uint32_t => uint16_t
* uint16_t => uint8_t

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

* match.pd: Allow any otype is less than itype truncation.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index a2e205b3207..4edfa2ae2c9 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3239,16 +3239,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_trunc @0)
  (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
(convert @0))
- (with {
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0)))
+ (with
+  {
unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
unsigned otype_precision = TYPE_PRECISION (type);
-   wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
+   wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
wide_int int_cst = wi::to_wide (@1, itype_precision);
   }
-  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
-   && TYPE_UNSIGNED (TREE_TYPE (@0))
-   && otype_precision < itype_precision
-   && wi::eq_p (trunc_max, int_cst)
+  (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */


[gcc r15-1822] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

2024-07-03 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:de9254e224eb3d89303cb9b3ba50b4c479c55f7c

commit r15-1822-gde9254e224eb3d89303cb9b3ba50b4c479c55f7c
Author: Pan Li 
Date:   Wed Jul 3 22:06:48 2024 +0800

RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

According to the ISA,  the zvfhmin sub extension should only contain
convertion insn.  Thus,  the vfmv insn acts on FP16 should not be
present when only the zvfhmin option is given.

This patch would like to fix it by split the pred_broadcast define_insn
into zvfhmin and zvfh part.  Given below example:

void test (_Float16 *dest, _Float16 bias) {
  dest[0] = bias;
  dest[1] = bias;
}

when compile with -march=rv64gcv_zfh_zvfhmin

Before this patch:
test:
  vsetivlizero,2,e16,mf4,ta,ma
  vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin
  vse16.v v1,0(a0)
  ret

After this patch:
test:
  addi sp,sp,-16
  fsh  fa0,14(sp)
  addi a5,sp,14
  vsetivli zero,2,e16,mf4,ta,ma
  vlse16.v v1,0(a5),zero
  vse16.v  v1,0(a0)
  addi sp,sp,16
  jr   ra

PR target/115763

gcc/ChangeLog:

* config/riscv/vector.md (*pred_broadcast): Split into
zvfh and zvfhmin part.
(*pred_broadcast_zvfh): New define_insn for zvfh part.
(*pred_broadcast_zvfhmin): Ditto but for zvfhmin.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check.
* gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
* gcc.target/riscv/rvv/base/pr115763-1.c: New test.
* gcc.target/riscv/rvv/base/pr115763-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/vector.md | 49 +++---
 .../gcc.target/riscv/rvv/base/pr115763-1.c |  9 
 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 +
 .../gcc.target/riscv/rvv/base/scalar_move-5.c  |  4 +-
 .../gcc.target/riscv/rvv/base/scalar_move-6.c  |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-7.c  |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-8.c  |  6 +--
 7 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index fe18ee5b5f7..d9474262d54 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2080,31 +2080,50 @@
   [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
(set_attr "mode" "")])
 
-(define_insn "*pred_broadcast"
-  [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, 
vr, vr, vr, vr, vr, vr")
-   (if_then_else:V_VLSF_ZVFHMIN
+(define_insn "*pred_broadcast_zvfh"
+  [(set (match_operand:V_VLSF0 "register_operand"  "=vr,  vr,  
vr,  vr")
+   (if_then_else:V_VLSF
  (unspec:
-   [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, 
vm, vm,Wc1,Wc1,Wb1,Wb1")
-(match_operand 4 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK")
-(match_operand 5 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
-(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
-(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
+   [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, 
Wb1, Wb1")
+(match_operand  4 "vector_length_operand" " rK,  rK,  
rK,  rK")
+(match_operand  5 "const_int_operand" "  i,   i,   
i,   i")
+(match_operand  6 "const_int_operand" "  i,   i,   
i,   i")
+(match_operand  7 "const_int_operand" "  i,   i,   
i,   i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLSF_ZVFHMIN
-   (match_operand: 3 "direct_broadcast_operand"   " f,  
f,Wdm,Wdm,Wdm,Wdm,  f,  f"))
- (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu,  0, 
vu,  0, vu,  0, vu,  0")))]
+ (vec_duplicate:V_VLSF
+   (match_operand: 3 "direct_broadcast_operand"  "  f,   f,   
f,   f"))
+ (match_operand:V_VLSF  2 "vector_merge_operand"  " vu,   0,  
vu,   0")))]
   "TARGET_VECTOR"
   "@
vfmv.v.f\t%0,%3
vfmv.v.f\t%0,%3
+   vfmv.s.f\t%0,%3
+   vfmv.s.f\t%0,%3"
+  [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
+   (set_attr "mode" "")])
+
+(define_insn "*pred_broadcast_zvfhmin"
+  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"  
"=vr,  vr,  vr,  vr")
+   (if_then_else:V_VLSF_ZVFHMIN
+ (unspec:
+   [(match_operand:1 

[gcc r15-1894] RISC-V: Implement .SAT_TRUNC for vector unsigned int

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:dafd63d7c5cddce1e00803606e742d75927b1a1e

commit r15-1894-gdafd63d7c5cddce1e00803606e742d75927b1a1e
Author: Pan Li 
Date:   Fri Jul 5 09:02:47 2024 +0800

RISC-V: Implement .SAT_TRUNC for vector unsigned int

This patch would like to implement the .SAT_TRUNC for the RISC-V
backend.  With the help of the RVV Vector Narrowing Fixed-Point
Clip Instructions.  The below SEW(S) are supported:

* e64 => e32
* e64 => e16
* e64 => e8
* e32 => e16
* e32 => e8
* e16 => e8

Take below example to see the changes to asm.
Form 1:
  #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \
  void __attribute__((noinline))\
  vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
bool overflow = x > (WT)(NT)(-1);   \
out[i] = ((NT)x) | (NT)-overflow;   \
  } \
  }

DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t)

Before this patch:
.L3:
  vsetvli  a5,a2,e64,m1,ta,ma
  vle64.v  v1,0(a1)
  vmsgtu.vvv0,v1,v2
  vsetvli  zero,zero,e32,mf2,ta,ma
  vncvt.x.x.w  v1,v1
  vmerge.vim   v1,v1,-1,v0
  vse32.v  v1,0(a0)
  slli a4,a5,3
  add  a1,a1,a4
  slli a4,a5,2
  add  a0,a0,a4
  sub  a2,a2,a5
  bne  a2,zero,.L3

After this patch:
.L3:
  vsetvli  a5,a2,e32,mf2,ta,ma
  vle64.v  v1,0(a1)
  vnclipu.wi   v1,v1,0
  vse32.v  v1,0(a0)
  slli a4,a5,3
  add  a1,a1,a4
  slli a4,a5,2
  add  a0,a0,a4
  sub  a2,a2,a5
  bne  a2,zero,.L3

Passed the rv64gcv fully regression tests.

gcc/ChangeLog:

* config/riscv/autovec.md (ustrunc2): Add
new pattern for double truncation.
(ustrunc2): Ditto but for quad truncation.
(ustrunc2): Ditto but for oct truncation.
* config/riscv/riscv-protos.h (expand_vec_double_ustrunc): Add
new func decl to expand double vec ustrunc.
(expand_vec_quad_ustrunc): Ditto but for quad.
(expand_vec_oct_ustrunc): Ditto but for oct.
* config/riscv/riscv-v.cc (expand_vec_double_ustrunc): Add new
func impl to expand vector double ustrunc.
(expand_vec_quad_ustrunc): Ditto but for quad.
(expand_vec_oct_ustrunc): Ditto but for oct.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
test macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/autovec.md|  35 ++
 gcc/config/riscv/riscv-protos.h|   4 +
 gcc/config/riscv/riscv-v.cc|  46 +++
 .../riscv/rvv/autovec/binop/vec_sat_arith.h|  22 ++
 .../riscv/rvv/autovec/unop/vec_sat_data.h  | 394 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c |  19 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c |  21 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c |  23 ++
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c |  19 +
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c

[gcc r15-1903] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:35b1096896a94a90d787f5ef402ba009dd4f0393

commit r15-1903-g35b1096896a94a90d787f5ef402ba009dd4f0393
Author: Pan Li 
Date:   Mon Jul 8 20:31:31 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1

After the middle-end supported the vector mode of .SAT_ADD,  add more
testcases to ensure the correctness of RISC-V backend for form 1.  Aka:

Form 1:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_1 (uint64_t, 9)

Passed the fully rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help
test macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-4.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h|  25 ++
 .../riscv/rvv/autovec/binop/vec_sat_data.h | 256 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c  |  14 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c  |  14 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c  |  14 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c  |  14 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-1.c|  28 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-2.c|  28 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-3.c|  28 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-4.c|  28 +++
 10 files changed, 449 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index b55a589e019a..3733c8fd2c15 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -4,6 +4,14 @@
 #include 
 #include 
 
+#define VALIDATE_RESULT(out, expect, N)  \
+  do \
+{\
+  for (unsigned i = 0; i < N; i++)   \
+if (out[i] != expect[i]) __builtin_abort (); \
+}\
+  while (false)
+
 
/**/
 /* Saturation Add (unsigned and signed)   
*/
 
/**/
@@ -139,6 +147,23 @@ vec_sat_u_add_##T##_fmt_8 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_8(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_8(out, op_1, op_2, N)
 
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)  \
+T __attribute__((noinline))  \
+vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \
+}
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \
+  DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)
+
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \
+  vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \
+  VALIDATE_RESULT (out, expect, N)
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \
+  RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/

[gcc r15-1904] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ecde8d50bea3573194f21277666f83463cbbe9c9

commit r15-1904-gecde8d50bea3573194f21277666f83463cbbe9c9
Author: Pan Li 
Date:   Mon Jul 8 21:58:59 2024 +0800

RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2

After the middle-end supported the vector mode of .SAT_ADD,  add more
testcases to ensure the correctness of RISC-V backend for form 2.  Aka:

Form 2:
  #define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM)  \
  T __attribute__((noinline))  \
  vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM);  \
  }

DEF_VEC_SAT_U_ADD_IMM_FMT_2 (uint64_t, 9)

Passed the fully rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help
test macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-5.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-6.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-7.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-8.c: New 
test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h| 17 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c  | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c  | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c  | 14 +++
 .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c  | 14 +++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-5.c| 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-6.c| 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-7.c| 28 ++
 .../rvv/autovec/binop/vec_sat_u_add_imm-run-8.c| 28 ++
 9 files changed, 185 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 3733c8fd2c15..10459807b2c4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -158,12 +158,29 @@ vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, 
unsigned limit) \
 #define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \
   DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM)
 
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM)  \
+T __attribute__((noinline))  \
+vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM);  \
+}
+#define DEF_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, IMM) \
+  DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM)
+
 #define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \
   vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \
   VALIDATE_RESULT (out, expect, N)
 #define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \
   RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N)
 
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N) \
+  vec_sat_u_add_imm##IMM##_##T##_fmt_2(out, op_1, N); \
+  VALIDATE_RESULT (out, expect, N)
+#define RUN_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, out, op_1, expect, IMM, N) \
+  RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c
new file mode 100644
index ..d25fdcf78f38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mab

[gcc r14-10396] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

2024-07-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:505382ceee0b5e72dc5defa05aec77a97658feca

commit r14-10396-g505382ceee0b5e72dc5defa05aec77a97658feca
Author: Pan Li 
Date:   Wed Jul 3 22:06:48 2024 +0800

RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

According to the ISA,  the zvfhmin sub extension should only contain
convertion insn.  Thus,  the vfmv insn acts on FP16 should not be
present when only the zvfhmin option is given.

This patch would like to fix it by split the pred_broadcast define_insn
into zvfhmin and zvfh part.  Given below example:

void test (_Float16 *dest, _Float16 bias) {
  dest[0] = bias;
  dest[1] = bias;
}

when compile with -march=rv64gcv_zfh_zvfhmin

Before this patch:
test:
  vsetivlizero,2,e16,mf4,ta,ma
  vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin
  vse16.v v1,0(a0)
  ret

After this patch:
test:
  addi sp,sp,-16
  fsh  fa0,14(sp)
  addi a5,sp,14
  vsetivli zero,2,e16,mf4,ta,ma
  vlse16.v v1,0(a5),zero
  vse16.v  v1,0(a0)
  addi sp,sp,16
  jr   ra

PR target/115763

gcc/ChangeLog:

* config/riscv/vector.md (*pred_broadcast): Split into
zvfh and zvfhmin part.
(*pred_broadcast_zvfh): New define_insn for zvfh part.
(*pred_broadcast_zvfhmin): Ditto but for zvfhmin.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check.
* gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
* gcc.target/riscv/rvv/base/pr115763-1.c: New test.
* gcc.target/riscv/rvv/base/pr115763-2.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit de9254e224eb3d89303cb9b3ba50b4c479c55f7c)

Diff:
---
 gcc/config/riscv/vector.md | 49 +++---
 .../gcc.target/riscv/rvv/base/pr115763-1.c |  9 
 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 +
 .../gcc.target/riscv/rvv/base/scalar_move-5.c  |  4 +-
 .../gcc.target/riscv/rvv/base/scalar_move-6.c  |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-7.c  |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-8.c  |  6 +--
 7 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 228d0f9a7663..03012d677d79 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2080,31 +2080,50 @@
   [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
(set_attr "mode" "")])
 
-(define_insn "*pred_broadcast"
-  [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, 
vr, vr, vr, vr, vr, vr")
-   (if_then_else:V_VLSF_ZVFHMIN
+(define_insn "*pred_broadcast_zvfh"
+  [(set (match_operand:V_VLSF0 "register_operand"  "=vr,  vr,  
vr,  vr")
+   (if_then_else:V_VLSF
  (unspec:
-   [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, 
vm, vm,Wc1,Wc1,Wb1,Wb1")
-(match_operand 4 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK")
-(match_operand 5 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
-(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
-(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i")
+   [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, 
Wb1, Wb1")
+(match_operand  4 "vector_length_operand" " rK,  rK,  
rK,  rK")
+(match_operand  5 "const_int_operand" "  i,   i,   
i,   i")
+(match_operand  6 "const_int_operand" "  i,   i,   
i,   i")
+(match_operand  7 "const_int_operand" "  i,   i,   
i,   i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLSF_ZVFHMIN
-   (match_operand: 3 "direct_broadcast_operand"   " f,  
f,Wdm,Wdm,Wdm,Wdm,  f,  f"))
- (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu,  0, 
vu,  0, vu,  0, vu,  0")))]
+ (vec_duplicate:V_VLSF
+   (match_operand: 3 "direct_broadcast_operand"  "  f,   f,   
f,   f"))
+ (match_operand:V_VLSF  2 "vector_merge_operand"  " vu,   0,  
vu,   0")))]
   "TARGET_VECTOR"
   "@
vfmv.v.f\t%0,%3
vfmv.v.f\t%0,%3
+   vfmv.s.f\t%0,%3
+   vfmv.s.f\t%0,%3"
+  [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
+   (set_attr "mode" "")])
+
+(define_insn "*pred_broadcast_zvfhmin"
+  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"  
"=vr,  vr,  vr,  vr")
+   (if_then_e

[gcc r15-1936] Match: Support form 2 for the .SAT_TRUNC

2024-07-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:80e446e829d818dc19daa6e671b9626e93ee4949

commit r15-1936-g80e446e829d818dc19daa6e671b9626e93ee4949
Author: Pan Li 
Date:   Fri Jul 5 20:36:35 2024 +0800

Match: Support form 2 for the .SAT_TRUNC

This patch would like to add form 2 support for the .SAT_TRUNC.  Aka:

Form 2:
  #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \
  {\
bool overflow = x > (WT)(NT)(-1);  \
return overflow ? (NT)-1 : (NT)x;  \
  }

DEF_SAT_U_TRUC_FMT_2(uint32, uint64)

Before this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint32_t sat_u_truc_uint64_t_to_uint32_t_fmt_2 (uint64_t x)
   6   │ {
   7   │   uint32_t _1;
   8   │   long unsigned int _3;
   9   │
  10   │ ;;   basic block 2, loop depth 0
  11   │ ;;pred:   ENTRY
  12   │   _3 = MIN_EXPR ;
  13   │   _1 = (uint32_t) _3;
  14   │   return _1;
  15   │ ;;succ:   EXIT
  16   │
  17   │ }

After this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint32_t sat_u_truc_uint64_t_to_uint32_t_fmt_2 (uint64_t x)
   6   │ {
   7   │   uint32_t _1;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;pred:   ENTRY
  11   │   _1 = .SAT_TRUNC (x_2(D)); [tail call]
  12   │   return _1;
  13   │ ;;succ:   EXIT
  14   │
  15   │ }

The below test suites are passed for this patch:
1. The x86 bootstrap test.
2. The x86 fully regression test.
3. The rv64gcv fully regresssion test.

gcc/ChangeLog:

* match.pd: Add form 2 for .SAT_TRUNC.
* tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children):
Add new case NOP_EXPR,  and try to match SAT_TRUNC.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd  | 17 -
 gcc/tree-ssa-math-opts.cc |  4 
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4edfa2ae2c90..3759c64d461f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3234,7 +3234,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
-/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
+/* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
 (match (unsigned_integer_sat_trunc @0)
  (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
@@ -3250,6 +3250,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
 
+/* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
+   SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
+(match (unsigned_integer_sat_trunc @0)
+ (convert (min @0 INTEGER_CST@1))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0)))
+ (with
+  {
+   unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+   unsigned otype_precision = TYPE_PRECISION (type);
+   wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
+   wide_int int_cst = wi::to_wide (@1, itype_precision);
+  }
+  (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index a35caf5f0588..ac86be8eb947 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -6170,6 +6170,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
  match_unsigned_saturation_sub (&gsi, as_a (stmt));
  break;
 
+   case NOP_EXPR:
+ match_unsigned_saturation_trunc (&gsi, as_a (stmt));
+ break;
+
default:;
}
}


[gcc r15-1959] Vect: Optimize truncation for .SAT_SUB operands

2024-07-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3918bea620e826b0df68a9c8492b791a67f294b5

commit r15-1959-g3918bea620e826b0df68a9c8492b791a67f294b5
Author: Pan Li 
Date:   Sun Jun 30 10:55:50 2024 +0800

Vect: Optimize truncation for .SAT_SUB operands

To get better vectorized code of .SAT_SUB,  we would like to avoid the
truncated operation for the assignment.  For example, as below.

unsigned int _1;
unsigned int _2;
unsigned short int _4;
_9 = (unsigned short int).SAT_SUB (_1, _2);

If we make sure that the _1 is in the range of unsigned short int.  Such
as a def similar to:

_1 = (unsigned short int)_4;

Then we can do the distribute the truncation operation to:

_3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
_9 = .SAT_SUB (_4, _3);

Then,  we can better vectorized code and avoid the unnecessary narrowing
stmt during vectorization with below stmt(s).

_3 = .SAT_TRUNC(_2); // SI => HI
_9 = .SAT_SUB (_4, _3);

Let's take RISC-V vector as example to tell the changes.  For below
sample code:

__attribute__((noinline))
void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  uint16_t *p = x;

  do {
a = *--p;
*p = (uint16_t)(a >= b ? a - b : 0);
  } while (--n);
}

Before this patch:
  ...
  .L3:
  vle16.v   v1,0(a3)
  vrsub.vx  v5,v2,t1
  mvt3,a4
  addw  a4,a4,t5
  vrgather.vv   v3,v1,v5
  vsetvli   zero,zero,e32,m1,ta,ma
  vzext.vf2 v1,v3
  vssubu.vx v1,v1,a1
  vsetvli   zero,zero,e16,mf2,ta,ma
  vncvt.x.x.w   v1,v1
  vrgather.vv   v3,v1,v5
  vse16.v   v3,0(a3)
  sub   a3,a3,t4
  bgtu  t6,a4,.L3
  ...

After this patch:
test:
  ...
  .L3:
  vle16.v v3,0(a3)
  vrsub.vxv5,v2,a6
  mv  a7,a4
  addwa4,a4,t3
  vrgather.vv v1,v3,v5
  vssubu.vv   v1,v1,v6
  vrgather.vv v3,v1,v5
  vse16.v v3,0(a3)
  sub a3,a3,t1
  bgtut4,a4,.L3
  ...

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_sat_sub_pattern_transform):
Add new func impl to perform the truncation distribution.
(vect_recog_sat_sub_pattern): Perform above optimize before
generate .SAT_SUB call.

Signed-off-by: Pan Li 

Diff:
---
 gcc/tree-vect-patterns.cc | 65 +++
 1 file changed, 65 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 86e893a1c433..4570c25b6647 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4566,6 +4566,70 @@ vect_recog_sat_add_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   return NULL;
 }
 
+/*
+ * Try to transform the truncation for .SAT_SUB pattern,  mostly occurs in
+ * the benchmark zip.  Aka:
+ *
+ *   unsigned int _1;
+ *   unsigned int _2;
+ *   unsigned short int _4;
+ *   _9 = (unsigned short int).SAT_SUB (_1, _2);
+ *
+ *   if _1 is known to be in the range of unsigned short int.  For example
+ *   there is a def _1 = (unsigned short int)_4.  Then we can transform the
+ *   truncation to:
+ *
+ *   _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
+ *   _9 = .SAT_SUB (_4, _3);
+ *
+ *   Then,  we can better vectorized code and avoid the unnecessary narrowing
+ *   stmt during vectorization with below stmt(s).
+ *
+ *   _3 = .SAT_TRUNC(_2); // SI => HI
+ *   _9 = .SAT_SUB (_4, _3);
+ */
+static void
+vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
+ stmt_vec_info stmt_vinfo,
+ tree lhs, tree *ops)
+{
+  tree otype = TREE_TYPE (lhs);
+  tree itype = TREE_TYPE (ops[0]);
+  unsigned itype_prec = TYPE_PRECISION (itype);
+  unsigned otype_prec = TYPE_PRECISION (otype);
+
+  if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
+return;
+
+  tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
+  tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
+  tree_pair v_pair = tree_pair (v_otype, v_itype);
+
+  if (v_otype == NULL_TREE || v_itype == NULL_TREE
+|| !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
+   OPTIMIZE_FOR_BOTH))
+return;
+
+  /* 1. Find the _4 and update ops[0] as above example.  */
+  vect_unpromoted_value unprom;
+  tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
+
+  if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
+return;
+
+  ops[0] = tmp;
+
+  /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example.  */
+ 

[gcc r15-1967] RISC-V: Add testcases for vector .SAT_SUB in zip benchmark

2024-07-11 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b3c686416e88bf135def0e72d316713af01445a1

commit r15-1967-gb3c686416e88bf135def0e72d316713af01445a1
Author: Pan Li 
Date:   Thu Jul 11 15:54:32 2024 +0800

RISC-V: Add testcases for vector .SAT_SUB in zip benchmark

This patch would like to add the test cases for the vector .SAT_SUB in
the zip benchmark.  Aka:

Form in zip benchmark:
  #define DEF_VEC_SAT_U_SUB_ZIP(T1, T2) \
  void __attribute__((noinline))\
  vec_sat_u_sub_##T1##_##T2##_fmt_zip (T1 *x, T2 b, unsigned limit) \
  { \
T2 a;   \
T1 *p = x;  \
do {\
  a = *--p; \
  *p = (T1)(a >= b ? a - b : 0);\
} while (--limit);  \
  }

DEF_VEC_SAT_U_SUB_ZIP(uint8_t, uint16_t)

vec_sat_u_sub_uint16_t_uint32_t_fmt_zip:
  ...
  vsetvli   a4,zero,e32,m1,ta,ma
  vmv.v.x   v6,a1
  vsetvli   zero,zero,e16,mf2,ta,ma
  vid.v v2
  lia4,-1
  vnclipu.wiv6,v6,0   // .SAT_TRUNC
.L3:
  vle16.v   v3,0(a3)
  vrsub.vx  v5,v2,a6
  mva7,a4
  addw  a4,a4,t3
  vrgather.vv   v1,v3,v5
  vssubu.vv v1,v1,v6  // .SAT_SUB
  vrgather.vv   v3,v1,v5
  vse16.v   v3,0(a3)
  sub   a3,a3,t1
  bgtu  t4,a4,.L3

Passed the rv64gcv tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add test
helper macros.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: Add test
data for .SAT_SUB in zip benchmark.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip-run.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h| 18 +
 .../riscv/rvv/autovec/binop/vec_sat_binary_vx.h| 22 ++
 .../riscv/rvv/autovec/binop/vec_sat_data.h | 81 ++
 .../rvv/autovec/binop/vec_sat_u_sub_zip-run.c  | 16 +
 .../riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c| 18 +
 5 files changed, 155 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 10459807b2c4..416a1e49a47b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -322,6 +322,19 @@ vec_sat_u_sub_##T##_fmt_10 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 } \
 }
 
+#define DEF_VEC_SAT_U_SUB_ZIP(T1, T2) \
+void __attribute__((noinline))\
+vec_sat_u_sub_##T1##_##T2##_fmt_zip (T1 *x, T2 b, unsigned limit) \
+{ \
+  T2 a;   \
+  T1 *p = x;  \
+  do {\
+a = *--p; \
+*p = (T1)(a >= b ? a - b : 0);\
+  } while (--limit);  \
+}
+#define DEF_VEC_SAT_U_SUB_ZIP_WRAP(T1, T2) DEF_VEC_SAT_U_SUB_ZIP(T1, T2)
+
 #define RUN_VEC_SAT_U_SUB_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_sub_##T##_fmt_1(out, op_1, op_2, N)
 
@@ -352,6 +365,11 @@ vec_sat_u_sub_##T##_fmt_10 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_SUB_FMT_10(T, out, op_1, op_2, N) \
   vec_sat_u_sub_##T##_fmt_10(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_SUB_FMT_ZIP(T1, T2, x, b, N) \
+  vec_sat_u_sub_##T1##_##T2##_fmt_zip(x, b, N)
+#define RUN_VEC_SAT_U_SUB_FMT_ZIP_WRAP(T1, T2, x, b, N) \
+  RUN_VEC_SAT_U_SUB_FMT_ZIP(T1, T2, x, b, N) \
+
 
/**/
 /* Saturation Sub Truncated (Unsigned and Signed) 
*/
 
/**/
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h
new file mode 100644
index ..d238c6392def
--- /dev/null
+++ b/gcc/testsuite/gcc.target/

[gcc r15-2138] Doc: Add Standard-Names ustrunc and sstrunc for integer modes

2024-07-18 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ebac11afbcb7a52536da5f04fc524b870f5d76e0

commit r15-2138-gebac11afbcb7a52536da5f04fc524b870f5d76e0
Author: Pan Li 
Date:   Thu Jul 18 11:30:38 2024 +0800

Doc: Add Standard-Names ustrunc and sstrunc for integer modes

This patch would like to add the doc for the Standard-Names
ustrunc and sstrunc,  include both the scalar and vector integer
modes.

gcc/ChangeLog:

* doc/md.texi: Add Standard-Names ustrunc and sstrunc.

Signed-off-by: Pan Li 

Diff:
---
 gcc/doc/md.texi | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 7f4335e0aac1..ecb7f34f1b92 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5543,6 +5543,18 @@ means of constraints requiring operands 1 and 0 to be 
the same location.
 @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
 Similar, for other arithmetic operations.
 
+@cindex @code{ustrunc@var{m}@var{n}2} instruction pattern
+@item @samp{ustrunc@var{m}@var{n}2}
+Truncate the operand 1, and storing the result in operand 0.  There will
+be saturation during the trunction.  The result will be saturated to the
+maximal value of operand 0 type if there is overflow when truncation.  The
+operand 1 must have mode @var{n},  and the operand 0 must have mode @var{m}.
+Both scalar and vector integer modes are allowed.
+
+@cindex @code{sstrunc@var{m}@var{n}2} instruction pattern
+@item @samp{sstrunc@var{m}@var{n}2}
+Similar but for signed.
+
 @cindex @code{andc@var{m}3} instruction pattern
 @item @samp{andc@var{m}3}
 Like @code{and@var{m}3}, but it uses bitwise-complement of operand 2


[gcc r15-2149] Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863]

2024-07-18 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:02cc8494745c4235890ad58e93b5acce5a89a775

commit r15-2149-g02cc8494745c4235890ad58e93b5acce5a89a775
Author: Pan Li 
Date:   Thu Jul 18 20:16:34 2024 +0800

Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863]

The SAT_TRUNC form 2 has below pattern matching.
From:
  _18 = MIN_EXPR ;
  iftmp.0_11 = (unsigned int) _18;

To:
  _18 = MIN_EXPR ;
  iftmp.0_11 = .SAT_TRUNC (left_8);

But if there is another use of _18 like below,  the transform to the
.SAT_TRUNC may have no earnings.  For example:

From:
  _18 = MIN_EXPR ; // op_0 def
  iftmp.0_11 = (unsigned int) _18; // op_0
  stream.avail_out = iftmp.0_11;
  left_37 = left_8 - _18;  // op_0 use

To:
  _18 = MIN_EXPR ; // op_0 def
  iftmp.0_11 = .SAT_TRUNC (left_8);
  stream.avail_out = iftmp.0_11;
  left_37 = left_8 - _18;  // op_0 use

Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above.  Then the
backend (for example x86/riscv) will have additional 2-3 more insns
after pattern recog besides the MIN_EXPR.  Thus,  keep the normal truncation
as is should be the better choose.

The below testsuites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/115863

gcc/ChangeLog:

* match.pd: Add single_use check for .SAT_TRUNC form 2.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115863-1.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd   | 15 ++--
 gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 6818856991c6..cf359b0ec0f0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3252,10 +3252,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
+/* If Op_0 def is MIN_EXPR and not single_use.  Aka below pattern:
+
+ _18 = MIN_EXPR ; // op_0 def
+ iftmp.0_11 = (unsigned int) _18; // op_0
+ stream.avail_out = iftmp.0_11;
+ left_37 = left_8 - _18;  // op_0 use
+
+   Transfer to .SAT_TRUNC will have MIN_EXPR still live.  Then the backend
+   (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC
+   besides the MIN_EXPR.  Thus,  keep the normal truncation as is should be
+   the better choose.  */
 (match (unsigned_integer_sat_trunc @0)
- (convert (min @0 INTEGER_CST@1))
+ (convert (min@2 @0 INTEGER_CST@1))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
-  && TYPE_UNSIGNED (TREE_TYPE (@0)))
+  && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2))
  (with
   {
unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c 
b/gcc/testsuite/gcc.target/i386/pr115863-1.c
new file mode 100644
index ..a672f62cec54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c
@@ -0,0 +1,37 @@
+/* PR target/115863 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+#include 
+
+typedef struct z_stream_s {
+uint32_t avail_out;
+} z_stream;
+
+typedef z_stream *z_streamp;
+
+extern int deflate (z_streamp strmp);
+
+int compress2 (uint64_t *destLen)
+{
+  z_stream stream;
+  int err;
+  const uint32_t max = (uint32_t)(-1);
+  uint64_t left;
+
+  left = *destLen;
+
+  stream.avail_out = 0;
+
+  do {
+if (stream.avail_out == 0) {
+stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left;
+left -= stream.avail_out;
+}
+err = deflate(&stream);
+} while (err == 0);
+
+  return err;
+}
+
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */


[gcc r15-2189] RISC-V: Rearrange the test helper files for vector .SAT_*

2024-07-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:4ab19e461159989b7fb43e858190adcf480762b7

commit r15-2189-g4ab19e461159989b7fb43e858190adcf480762b7
Author: Pan Li 
Date:   Sat Jul 20 10:43:44 2024 +0800

RISC-V: Rearrange the test helper files for vector .SAT_*

Rearrange the test help header files,  as well as align the name
conventions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: Move to...
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvv_run.h: 
...here.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: Move 
to...
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvx_run.h: 
...here.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: Move to...
* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx_run.h: 
...here.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust
the include file names.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c: 

[gcc r15-2209] RISC-V: Implement the .SAT_TRUNC for scalar

2024-07-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:5d2115b850df63b0ecdf56efb720ad848e7afe21

commit r15-2209-g5d2115b850df63b0ecdf56efb720ad848e7afe21
Author: Pan Li 
Date:   Mon Jul 1 16:36:35 2024 +0800

RISC-V: Implement the .SAT_TRUNC for scalar

This patch would like to implement the simple .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \
  NT __attribute__((noinline)) \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {\
bool overflow = x > (WT)(NT)(-1);  \
return ((NT)x) | (NT)-overflow;\
  }

DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  _Bool overflow;
  unsigned char _1;
  unsigned char _2;
  unsigned char _3;
  uint8_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  overflow_5 = x_4(D) > 255;
  _1 = (unsigned char) x_4(D);
  _2 = (unsigned char) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;succ:   EXIT

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;succ:   EXIT

}

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

* config/riscv/iterators.md (ANYI_DOUBLE_TRUNC): Add new iterator
for int double truncation.
(ANYI_DOUBLE_TRUNCATED): Add new attr for int double truncation.
(anyi_double_truncated): Ditto but for lowercase.
* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
func decl for expanding ustrunc
* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
impl to expand ustrunc.
* config/riscv/riscv.md (ustrunc2): 
Impl
the new pattern ustrunc2 for int.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test helper macro.
* gcc.target/riscv/sat_arith_data.h: New test.
* gcc.target/riscv/sat_u_trunc-1.c: New test.
* gcc.target/riscv/sat_u_trunc-2.c: New test.
* gcc.target/riscv/sat_u_trunc-3.c: New test.
* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
* gcc.target/riscv/scalar_sat_unary.h: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/iterators.md  | 10 
 gcc/config/riscv/riscv-protos.h|  1 +
 gcc/config/riscv/riscv.cc  | 40 
 gcc/config/riscv/riscv.md  | 10 
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 16 +++
 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 56 ++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c | 17 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c | 20 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c | 19 
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c | 16 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c | 16 +++
 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c | 16 +++
 gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h  | 22 +
 13 files changed, 259 insertions(+)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index d61ed53a8b1b..734da041f0cb 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -65,6 +65,16 @@
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
+
+(define_mode_attr ANYI_DOUBLE_TRUNCATED [
+  (HI "QI") (SI "HI") (DI "SI")
+])
+
+(define_mode_attr anyi_double_truncated [
+  (HI "qi") (SI "hi") (DI "si")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 7c0ea1b445b1..ce5e38d3dbbf 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -135,6 +135,7 @@ riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern voi

[gcc r15-2241] Internal-fn: Only allow modes describe types for internal fn[PR115961]

2024-07-23 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:905973410957891fec8a3e42eeefa4618780e0ce

commit r15-2241-g905973410957891fec8a3e42eeefa4618780e0ce
Author: Pan Li 
Date:   Thu Jul 18 17:23:36 2024 +0800

Internal-fn: Only allow modes describe types for internal fn[PR115961]

The direct_internal_fn_supported_p has no restrictions for the type
modes.  For example the bitfield like below will be recog as .SAT_TRUNC.

struct e
{
  unsigned pre : 12;
  unsigned a : 4;
};

__attribute__((noipa))
void bug (e * v, unsigned def, unsigned use) {
  e & defE = *v;
  defE.a = min_u (use + 1, 0xf);
}

This patch would like to add checks for the direct_internal_fn_supported_p,
and only allows the tree types describled by modes.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/115961

gcc/ChangeLog:

* internal-fn.cc (type_strictly_matches_mode_p): Add new func
impl to check type strictly matches mode or not.
(type_pair_strictly_matches_mode_p): Ditto but for tree type
pair.
(direct_internal_fn_supported_p): Add above check for the tree
type pair.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr115961-run-1.C: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/internal-fn.cc| 32 +++
 gcc/testsuite/g++.dg/torture/pr115961-run-1.C | 32 +++
 2 files changed, 64 insertions(+)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 95946bfd6839..8a2e07f2f965 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4164,6 +4164,35 @@ direct_internal_fn_optab (internal_fn fn)
   gcc_unreachable ();
 }
 
+/* Return true if TYPE's mode has the same format as TYPE, and if there is
+   a 1:1 correspondence between the values that the mode can store and the
+   values that the type can store.  */
+
+static bool
+type_strictly_matches_mode_p (const_tree type)
+{
+  if (VECTOR_TYPE_P (type))
+return VECTOR_MODE_P (TYPE_MODE (type));
+
+  if (INTEGRAL_TYPE_P (type))
+return type_has_mode_precision_p (type);
+
+  if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
+return true;
+
+  return false;
+}
+
+/* Returns true if both types of TYPE_PAIR strictly match their modes,
+   else returns false.  */
+
+static bool
+type_pair_strictly_matches_mode_p (tree_pair type_pair)
+{
+  return type_strictly_matches_mode_p (type_pair.first)
+&& type_strictly_matches_mode_p (type_pair.second);
+}
+
 /* Return true if FN is supported for the types in TYPES when the
optimization type is OPT_TYPE.  The types are those associated with
the "type0" and "type1" fields of FN's direct_internal_fn_info
@@ -4173,6 +4202,9 @@ bool
 direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
optimization_type opt_type)
 {
+  if (!type_pair_strictly_matches_mode_p (types))
+return false;
+
   switch (fn)
 {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
diff --git a/gcc/testsuite/g++.dg/torture/pr115961-run-1.C 
b/gcc/testsuite/g++.dg/torture/pr115961-run-1.C
new file mode 100644
index ..787f7245457d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr115961-run-1.C
@@ -0,0 +1,32 @@
+/* PR target/115961 */
+/* { dg-do run } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}


[gcc r14-9418] VECT: Fix ICE for vectorizable LD/ST when both len and store are enabled

2024-03-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:993c6de642ffeb2867edbe80ff2a72c0a2eb604e

commit r14-9418-g993c6de642ffeb2867edbe80ff2a72c0a2eb604e
Author: Pan Li 
Date:   Sun Mar 10 11:02:35 2024 +0800

VECT: Fix ICE for vectorizable LD/ST when both len and store are enabled

This patch would like to fix one ICE in vectorizable_store when both the
loop_masks and loop_lens are enabled.  The ICE looks like below when build
with "-march=rv64gcv -O3".

during GIMPLE pass: vect
test.c: In function ‘d’:
test.c:6:6: internal compiler error: in vectorizable_store, at
tree-vect-stmts.cc:8691
6 | void d() {
  |  ^
0x37a6f2f vectorizable_store
.../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:8691
0x37b861c vect_analyze_stmt(vec_info*, _stmt_vec_info*, bool*,
_slp_tree*, _slp_instance*, vec*)
.../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:13242
0x1db5dca vect_analyze_loop_operations
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:2208
0x1db885b vect_analyze_loop_2
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3041
0x1dba029 vect_analyze_loop_1
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3481
0x1dbabad vect_analyze_loop(loop*, vec_info_shared*)
.../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3639
0x1e389d1 try_vectorize_loop_1
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1066
0x1e38f3d try_vectorize_loop
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1182
0x1e39230 execute
.../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1298

There are two ways to reach vectorizer LD/ST, one is the analysis and
the other is transform.  We cannot have both the lens and the masks
enabled during transform but it is valid during analysis.  Given the
transform doesn't required cost_vec,  we can only enable the assert
based on cost_vec is NULL or not.

Below testsuites are passed for this patch:
* The x86 bootstrap tests.
* The x86 fully regression tests.
* The aarch64 fully regression tests.
* The riscv fully regressison tests.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_store): Enable the assert
during transform process.
(vectorizable_load): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114195-1.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c | 15 +++
 gcc/tree-vect-stmts.cc   | 18 ++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c
new file mode 100644
index 000..a67b847112b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c
@@ -0,0 +1,15 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+long a, b;
+extern short c[];
+
+void d() {
+  for (int e = 0; e < 35; e = 2) {
+a = ({ a < 0 ? a : 0; });
+b = ({ b < 0 ? b : 0; });
+
+c[e] = 0;
+  }
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 14a3ffb5f02..e8617439a48 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8697,8 +8697,13 @@ vectorizable_store (vec_info *vinfo,
? &LOOP_VINFO_LENS (loop_vinfo)
: NULL);
 
-  /* Shouldn't go with length-based approach if fully masked.  */
-  gcc_assert (!loop_lens || !loop_masks);
+  /* The vect_transform_stmt and vect_analyze_stmt will go here but there
+ are some difference here.  We cannot enable both the lens and masks
+ during transform but it is allowed during analysis.
+ Shouldn't go with length-based approach if fully masked.  */
+  if (cost_vec == NULL)
+/* The cost_vec is NULL during transfrom.  */
+gcc_assert ((!loop_lens || !loop_masks));
 
   /* Targets with store-lane instructions must not require explicit
  realignment.  vect_supportable_dr_alignment always returns either
@@ -10577,8 +10582,13 @@ vectorizable_load (vec_info *vinfo,
? &LOOP_VINFO_LENS (loop_vinfo)
: NULL);
 
-  /* Shouldn't go with length-based approach if fully masked.  */
-  gcc_assert (!loop_lens || !loop_masks);
+  /* The vect_transform_stmt and vect_analyze_stmt will go here but there
+ are some difference here.  We cannot enable both the lens and masks
+ during transform but it is allowed during analysis.
+ Shouldn't go with length-based approach if fully masked.  */
+  if (cost_vec == NULL)
+/* The cost_vec is NULL during transfrom.  */
+gcc_assert ((!loop_lens || !loop_masks));
 
   /* Targets with store-lane instructions must not require explicit
  realignment.  vect_supportable_dr_alignment always returns either


[gcc r14-9436] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

2024-03-12 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:cdf0c6604d03afd7f544dd8bd5d43d9ded059ada

commit r14-9436-gcdf0c6604d03afd7f544dd8bd5d43d9ded059ada
Author: Pan Li 
Date:   Tue Mar 12 15:01:57 2024 +0800

RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]

Notice some code style issue(s) when add __riscv_v_fixed_vlen, includes:

* Meanless empty line.
* Line greater than 80 chars.
* Indent with 3 space(s).
* Argument unalignment.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_ext_version_value): Fix
code style greater than 80 chars.
(riscv_cpu_cpp_builtins): Fix useless empty line, indent
with 3 space(s) and argument unalignment.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-c.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 3755ec0b8ef..7029ba88186 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -37,7 +37,8 @@ along with GCC; see the file COPYING3.  If not see
 static int
 riscv_ext_version_value (unsigned major, unsigned minor)
 {
-  return (major * RISCV_MAJOR_VERSION_BASE) + (minor * 
RISCV_MINOR_VERSION_BASE);
+  return (major * RISCV_MAJOR_VERSION_BASE)
++ (minor * RISCV_MINOR_VERSION_BASE);
 }
 
 /* Implement TARGET_CPU_CPP_BUILTINS.  */
@@ -110,7 +111,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 case CM_MEDANY:
   builtin_define ("__riscv_cmodel_medany");
   break;
-
 }
 
   if (riscv_user_wants_strict_align)
@@ -142,9 +142,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 riscv_ext_version_value (0, 12));
 }
 
-   if (TARGET_XTHEADVECTOR)
- builtin_define_with_int_value ("__riscv_th_v_intrinsic",
-riscv_ext_version_value (0, 11));
+  if (TARGET_XTHEADVECTOR)
+builtin_define_with_int_value ("__riscv_th_v_intrinsic",
+  riscv_ext_version_value (0, 11));
 
   /* Define architecture extension test macros.  */
   builtin_define_with_int_value ("__riscv_arch_test", 1);


[gcc r14-9604] RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))

2024-03-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:d3c24e9e55a7cf18df313a8b32b6de4b3ba81013

commit r14-9604-gd3c24e9e55a7cf18df313a8b32b6de4b3ba81013
Author: Pan Li 
Date:   Mon Mar 18 11:21:29 2024 +0800

RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))

This patch would like to fix one ICE for __attribute__((target("arch=+v"))
and likewise extension(s). Given we have sample code as below:

void __attribute__((target("arch=+v")))
test_2 (int *a, int *b, int *out, unsigned count)
{
  unsigned i;
  for (i = 0; i < count; i++)
   out[i] = a[i] + b[i];
}

It will have ICE when build with -march=rv64gc -O3.

test.c: In function ‘test_2’:
test.c:4:1: internal compiler error: Floating point exception
4 | {
  | ^
0x1a5891b crash_signal
.../__RISC-V_BUILD__/../gcc/toplev.cc:319
0x7f0a7884251f ???
./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0
0x1f51ba4 riscv_hard_regno_nregs
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:8143
0x1967bb9 init_reg_modes_target()
.../__RISC-V_BUILD__/../gcc/reginfo.cc:471
0x13fc029 init_emit_regs()
.../__RISC-V_BUILD__/../gcc/emit-rtl.cc:6237
0x1a5b83d target_reinit()
.../__RISC-V_BUILD__/../gcc/toplev.cc:1936
0x35e374d save_target_globals()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:92
0x35e381f save_target_globals_default_opts()
.../__RISC-V_BUILD__/../gcc/target-globals.cc:122
0x1f544cc riscv_save_restore_target_globals(tree_node*)
.../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:9138
0x1f55c36 riscv_set_current_function
...

There are two reasons for this ICE.
1. The implied extension(s) of v are not well handled and the
   TARGET_MIN_VLEN is 0 which is not reinitialized.  Then the
   size / TARGET_MIN_VLEN will have DivideByZero.
2. The machine modes of the vector types will be vary after
   the v extension is introduced.

This patch passed below testsuite:
1. The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_subset_list::parse):
Replace implied, combine and check to func finalize.
(riscv_subset_list::finalize): New func impl to take care of
implied, combine ext and related checks.
* config/riscv/riscv-subset.h: Add func decl for finalize.
* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Finalize the ext before return succeed.
* config/riscv/riscv.cc (riscv_set_current_function): Reinit the
machine mode before when set cur function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-1.c: New test.
* gcc.target/riscv/rvv/base/pr114352-2.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc| 31 
 gcc/config/riscv/riscv-subset.h|  2 +
 gcc/config/riscv/riscv-target-attr.cc  |  2 +
 gcc/config/riscv/riscv.cc  |  4 ++
 .../gcc.target/riscv/rvv/base/pr114352-1.c | 58 ++
 .../gcc.target/riscv/rvv/base/pr114352-2.c | 27 ++
 6 files changed, 114 insertions(+), 10 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 440127a2af0..15d44245b3c 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1428,16 +1428,7 @@ riscv_subset_list::parse (const char *arch, location_t 
loc)
   if (p == NULL)
 goto fail;
 
-  for (itr = subset_list->m_head; itr != NULL; itr = itr->next)
-{
-  subset_list->handle_implied_ext (itr->name.c_str ());
-}
-
-  /* Make sure all implied extensions are included. */
-  gcc_assert (subset_list->check_implied_ext ());
-
-  subset_list->handle_combine_ext ();
-  subset_list->check_conflict_ext ();
+  subset_list->finalize ();
 
   return subset_list;
 
@@ -1467,6 +1458,26 @@ riscv_subset_list::set_loc (location_t loc)
   m_loc = loc;
 }
 
+/* Make sure the implied or combined extension is included after add
+   a new std extension to subset list or likewise.  For exmaple as below,
+
+   void __attribute__((target("arch=+v"))) func () with -march=rv64gc.
+
+   The implied zvl128b and zve64d of the std v should be included.  */
+void
+riscv_subset_list::finalize ()
+{
+  riscv_subset_t *subset;
+
+  for (subset = m_head; subset != NULL; subset = subset->next)
+handle_implied_ext (subset->name.c_str ());
+
+  gcc_assert (check_implied_ext ());
+
+  handle_combine_ext ();
+  check_conflict_ext ();
+}
+
 /* Return the current arch string.  */
 
 std::string
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index ae849e2a302.

[gcc r14-9605] RISC-V: Bugfix function target attribute pollution

2024-03-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9941f0295a14659e25260458efd2e46a68ad0342

commit r14-9605-g9941f0295a14659e25260458efd2e46a68ad0342
Author: Pan Li 
Date:   Tue Mar 19 09:43:24 2024 +0800

RISC-V: Bugfix function target attribute pollution

This patch depends on below ICE fix.

https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647915.html

The function target attribute should be on a per-function basis.
For example, we have 3 function as below:

void test_1 () {}

void __attribute__((target("arch=+v"))) test_2 () {}

void __attribute__((target("arch=+zfh"))) test_3 () {}

void test_4 () {}

The scope of the target attribute should not extend the function body.
Aka, test_3 cannot have the 'v' extension, as well as the test_4
cannot have both the 'v' and 'zfh' extension.

Unfortunately, for now the test_4 is able to leverage the 'v' and
the 'zfh' extension which is incorrect.  This patch would like to
fix the sticking attribute by introduce the commandline subset_list.
When parse_arch, we always clone from the cmdline_subset_list instead
of the current_subset_list.

Meanwhile, we correct the print information about arch like below.

.option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zbb1p0

The riscv_declare_function_name hook is always after the hook
riscv_process_target_attr.  Thus, we introduce one hash_map to record
the 1:1 mapping from fndel to its' subset_list in advance.  And later
the riscv_declare_function_name is able to get the right information
about the arch.

Below test are passed for this patch
* The riscv fully regression test.

PR target/114352

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (struct 
riscv_func_target_info):
New struct for func decl and target name.
(struct riscv_func_target_hasher): New hasher for hash table mapping
from the fn_decl to fn_target_name.
(riscv_func_decl_hash): New func to compute the hash for fn_decl.
(riscv_func_target_hasher::hash): New func to impl hash interface.
(riscv_func_target_hasher::equal): New func to impl equal interface.
(riscv_cmdline_subset_list): New static var for cmdline subset list.
(riscv_func_target_table_lazy_init): New func to lazy init the func
target hash table.
(riscv_func_target_get): New func to get target name from hash 
table.
(riscv_func_target_put): New func to put target name into hash 
table.
(riscv_func_target_remove_and_destory): New func to remove target
info from the hash table and destory it.
(riscv_parse_arch_string): Set the static var cmdline_subset_list.
* config/riscv/riscv-subset.h (riscv_cmdline_subset_list): New 
static
var for cmdline subset list.
(riscv_func_target_get): New func decl.
(riscv_func_target_put): Ditto.
(riscv_func_target_remove_and_destory): Ditto.
* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Take cmdline_subset_list instead of current_subset_list when clone.
(riscv_process_target_attr): Record the func target info to hash 
table.
(riscv_option_valid_attribute_p): Add new arg tree fndel.
* config/riscv/riscv.cc (riscv_declare_function_name): Consume the
func target info and print the arch message.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114352-3.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc| 105 ++-
 gcc/config/riscv/riscv-subset.h|   4 +
 gcc/config/riscv/riscv-target-attr.cc  |  18 +++-
 gcc/config/riscv/riscv.cc  |   7 +-
 .../gcc.target/riscv/rvv/base/pr114352-3.c | 113 +
 5 files changed, 240 insertions(+), 7 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 15d44245b3c..7095f303cbb 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -426,11 +426,108 @@ bool riscv_subset_list::parse_failed = false;
 
 static riscv_subset_list *current_subset_list = NULL;
 
+static riscv_subset_list *cmdline_subset_list = NULL;
+
+struct riscv_func_target_info
+{
+  tree fn_decl;
+  std::string fn_target_name;
+
+  riscv_func_target_info (const tree &decl, const std::string &target_name)
+: fn_decl (decl), fn_target_name (target_name)
+  {
+  }
+};
+
+struct riscv_func_target_hasher : nofree_ptr_hash
+{
+  typedef tree compare_type;
+
+  static hashval_t hash (value_type);
+  static bool equal (value_type, const compare_type &);
+};
+
+static hash_table *f

[gcc r14-9616] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

2024-03-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:47de95d801c6899033c303b1fe642feb0489994f

commit r14-9616-g47de95d801c6899033c303b1fe642feb0489994f
Author: Pan Li 
Date:   Fri Mar 22 14:43:47 2024 +0800

RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV

This patch would like to introduce one new gcc attribute for RVV.
This attribute is used to define fixed-length variants of one
existing sizeless RVV types.

This attribute is valid if and only if the mrvv-vector-bits=zvl, the only
one args should be the integer constant and its' value is terminated
by the LMUL and the vector register bits in zvl*b.  For example:

typedef vint32m2_t fixed_vint32m2_t 
__attribute__((riscv_rvv_vector_bits(128)));

The above type define is valid when -march=rv64gc_zve64d_zvl64b
(aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when
-march=rv64gcv_zvl128b similar to below.

"error: invalid RVV vector size '128', expected size is '256' based on
LMUL of type and '-mrvv-vector-bits=zvl'"

Meanwhile, a pre-define macro __riscv_v_fixed_vlen is introduced to
represent the fixed vlen in a RVV vector register.

For the vint*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, -

The CMP will return vint*m*_t the same as aarch64 sve. For example:
typedef vint32m1_t fixed_vint32m1_t 
__attribute__((riscv_rvv_vector_bits(128)));
fixed_vint32m1_t less_than (fixed_vint32m1_t a, fixed_vint32m1_t b)
{
  return a < b;
}

For the vfloat*m*_t below operations are allowed.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.
* CMP: >, <, ==, !=, <=, >=
* ALU: +, -, *, /, -

The CMP will return vfloat*m*_t the same as aarch64 sve. For example:
typedef vfloat32m1_t fixed_vfloat32m1_t 
__attribute__((riscv_rvv_vector_bits(128)));
fixed_vfloat32m1_t less_than (fixed_vfloat32m1_t a, fixed_vfloat32m1_t b)
{
  return a < b;
}

For the vbool*_t types only below operations are allowed except
the CMP and ALU. The CMP and ALU operations on vbool*_t is not
well defined currently.
* The sizeof.
* The global variable(s).
* The element of union and struct.
* The cast to other equalities.

For the vint*x*m*_t tuple types are not suppored in this patch which is
compatible with clang.

This patch passed the below testsuites.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Add pre-define
macro __riscv_v_fixed_vlen when zvl.
* config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute):
New static func to take care of the RVV types decorated by
the attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-13.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-14.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-15.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-16.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-17.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-18.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test.
* gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-c.cc|   3 +
 gcc/config/riscv/riscv.cc  |  87 -
 .../riscv/rvv/base/riscv_rvv_vector_bits-1.c   |   6 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-10.c  |  53 ++
 .../riscv/rvv/base/riscv_rvv_vector_bits-11.c  |  76 +++
 .../riscv/rvv/base/riscv_rvv_vector_bits-12.c  |  14 +++

[gcc r14-9651] RISC-V: Allow RVV intrinsic when function target("arch=+v")

2024-03-25 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:5cab64a9cfb93fb0e246a25e3fdc7b664afb774e

commit r14-9651-g5cab64a9cfb93fb0e246a25e3fdc7b664afb774e
Author: Pan Li 
Date:   Mon Mar 25 14:22:31 2024 +0800

RISC-V: Allow RVV intrinsic when function target("arch=+v")

This patch would like to allow the RVV intrinsic when function is
attributed as target("arch=+v") and build with rv64gc.  For example:

vint32m1_t
__attribute__((target("arch=+v")))
test_1 (vint32m1_t a, vint32m1_t b, size_t vl)
{
  return __riscv_vadd_vv_i32m1 (a, b, vl);
}

build with -march=rv64gc -mabi=lp64d -O3, we will have asm like below:
test_1:
  .option push
  .option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_\

zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0
  vsetvli zero,a0,e32,m1,ta,ma
  vadd.vv v8,v8,v9
  ret

The riscv_vector.h must be included when leverage intrinisc type(s) and
API(s).  And the scope of this attribute should not excced the function
body.  Meanwhile, to make rvv types and API(s) available for this attribute,
include riscv_vector.h will not report error for now if v is not present
in march.

Below test are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_pragma_intrinsic): Remove error
when V is disabled and init the RVV types and intrinic APIs.
* config/riscv/riscv-vector-builtins.cc (expand_builtin): Report
error if V ext is disabled.
* config/riscv/riscv.cc (riscv_return_value_is_vector_type_p):
Ditto.
(riscv_arguments_is_vector_type_p): Ditto.
(riscv_vector_cc_function_p): Ditto.
* config/riscv/riscv_vector.h: Remove error if V is disable.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pragma-1.c: Remove.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-1.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-2.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-3.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-4.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-5.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-6.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: 
New test.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: 
New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-c.cc| 18 -
 gcc/config/riscv/riscv-vector-builtins.cc  |  5 
 gcc/config/riscv/riscv.cc  | 30 +++---
 gcc/config/riscv/riscv_vector.h|  4 ---
 gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c |  4 ---
 .../rvv/base/target_attribute_v_with_intrinsic-1.c |  5 
 .../rvv/base/target_attribute_v_with_intrinsic-2.c | 18 +
 .../rvv/base/target_attribute_v_with_intrinsic-3.c | 13 ++
 .../rvv/base/target_attribute_v_with_intrinsic-4.c | 10 
 .../rvv/base/target_attribute_v_with_intrinsic-5.c | 12 +
 .../rvv/base/target_attribute_v_with_intrinsic-6.c | 12 +
 .../rvv/base/target_attribute_v_with_intrinsic-7.c |  9 +++
 .../rvv/base/target_attribute_v_with_intrinsic-8.c | 23 +
 13 files changed, 145 insertions(+), 18 deletions(-)

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index edb866d51e4..01314037461 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -201,14 +201,20 @@ riscv_pragma_intrinsic (cpp_reader *)
   if (strcmp (name, "vector") == 0
   || strcmp (name, "xtheadvector") == 0)
 {
-  if (!TARGET_VECTOR)
+  if (TARGET_VECTOR)
+   riscv_vector::handle_pragma_vector ();
+  else /* Indicates riscv_vector.h is included but v is missing in arch  */
{
- error ("%<#pragma riscv intrinsic%> option %qs needs 'V' or "
-"'XTHEADVECTOR' extension enabled",
-name);
- return;
+ /* To make the the rvv types and intrinsic API available for the
+target("arch=+v") attribute,  we need to temporally enable the
+TARGET_VECTOR, and disable it after all initialized.  */
+ target_flags |= MASK_VECTOR;
+
+ riscv_vector::init_builtins ();
+ riscv_vector::handle_pragma_vector ();
+
+ target_flags &= ~MASK_VECTOR;
}
-  riscv_vector::handle_pragma_vector ();
 }
   else
 error ("unknown %<#pragma riscv intrinsic%> option %qs", name);
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc

[gcc r14-9730] RISC-V: Fix one unused varable in riscv_subset_list::parse

2024-03-31 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:46eb34a75a9d004ce776bba382fe8af0978cace7

commit r14-9730-g46eb34a75a9d004ce776bba382fe8af0978cace7
Author: Pan Li 
Date:   Sat Mar 30 21:32:06 2024 +0800

RISC-V: Fix one unused varable in riscv_subset_list::parse

This patch would like to fix one unused variable as below:

../../gcc/common/config/riscv/riscv-common.cc: In static member function
'static riscv_subset_list* riscv_subset_list::parse(const char*, 
location_t)':
../../gcc/common/config/riscv/riscv-common.cc:1501:19: error: unused 
variable 'itr'
  [-Werror=unused-variable]
 1501 |   riscv_subset_t *itr;

The variable consume code was removed but missed the var itself in
previous.  Thus, we have unused variable here.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_subset_list::parse):
Remove unused var decl.

Signed-off-by: Pan Li 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 7095f303cbb..43b7549e3ec 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1498,7 +1498,6 @@ riscv_subset_list::parse (const char *arch, location_t 
loc)
 return NULL;
 
   riscv_subset_list *subset_list = new riscv_subset_list (arch, loc);
-  riscv_subset_t *itr;
   const char *p = arch;
   p = subset_list->parse_base_ext (p);
   if (p == NULL)


[gcc r14-9731] RISC-V: Fix misspelled term builtin in error message

2024-03-31 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b313baba57f7e09f66b603e1e30dd4b48800693f

commit r14-9731-gb313baba57f7e09f66b603e1e30dd4b48800693f
Author: Pan Li 
Date:   Sat Mar 30 20:03:18 2024 +0800

RISC-V: Fix misspelled term builtin in error message

This patch would like to fix below misspelled term in error message.

../../gcc/config/riscv/riscv-vector-builtins.cc:4592:16: error:
misspelled term 'builtin function' in format; use 'built-in function' 
instead [-Werror=format-diag]
 4592 |   "builtin function %qE requires the V ISA extension", 
exp);

The below tests are passed for this patch.
* The riscv regression test on rvv.exp and riscv.exp.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc (expand_builtin): Take
the term built-in over builtin.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c:
Adjust test dg-error.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c:
Ditto.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-vector-builtins.cc   | 2 +-
 .../gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c | 2 +-
 .../gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index e07373d8b57..db9246eed2d 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4589,7 +4589,7 @@ expand_builtin (unsigned int code, tree exp, rtx target)
 
   if (!TARGET_VECTOR)
 error_at (EXPR_LOCATION (exp),
- "builtin function %qE requires the V ISA extension", exp);
+ "built-in function %qE requires the V ISA extension", exp);
 
   return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
 }
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
index 520b2e59fae..a4cd67f4f95 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c
@@ -5,5 +5,5 @@
 
 size_t test_1 (size_t vl)
 {
-  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {builtin function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
+  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {built-in function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
 }
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c
index 9032d9d0b43..06ed9a9eddc 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c
@@ -19,5 +19,5 @@ test_2 ()
 size_t
 test_3 (size_t vl)
 {
-  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {builtin function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
+  return __riscv_vsetvl_e8m4 (vl); /* { dg-error {built-in function 
'__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */
 }


[gcc r14-9828] RISC-V: Refine the error msg for RVV intrinisc required ext

2024-04-08 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:7d051f7d45789e1442d26c07bfc5e7fb77433b87

commit r14-9828-g7d051f7d45789e1442d26c07bfc5e7fb77433b87
Author: Pan Li 
Date:   Mon Apr 8 12:33:05 2024 +0800

RISC-V: Refine the error msg for RVV intrinisc required ext

The RVV intrinisc API has sorts of required extension from both
the march or target attribute.  It will have error message similar
to below:

built-in function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension

However, it is not accurate as we have many additional sub extenstion
besides v extension.  For example, zvbb, zvbk, zvbc ... etc.  This patch
would like to refine the error message with a friendly hint for the
required extension.  For example as below:

vuint64m1_t
__attribute__((target("arch=+v")))
test_1 (vuint64m1_t op_1, vuint64m1_t op_2, size_t vl)
{
  return __riscv_vclmul_vv_u64m1 (op_1, op_2, vl);
}

When compile with march=rv64gc and target arch=+v, we will have error
message as below:

error: built-in function '__riscv_vclmul_vv_u64m1(op_1,  op_2,  vl)'
  requires the 'zvbc' ISA extension

Then the end-user will get the point that the *zvbc* extension is missing
for the intrinisc API easily.

The below tests are passed for this patch.
* The riscv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-shapes.cc (build_one): Pass
required_ext arg when invoke add function.
(build_th_loadstore): Ditto.
(struct vcreate_def): Ditto.
(struct read_vl_def): Ditto.
(struct vlenb_def): Ditto.
* config/riscv/riscv-vector-builtins.cc 
(function_builder::add_function):
Introduce new arg required_ext to fill in the register func.
(function_builder::add_unique_function): Ditto.
(function_builder::add_overloaded_function): Ditto.
(expand_builtin): Leverage required_extensions_specified to
check if the required extension is provided.
* config/riscv/riscv-vector-builtins.h (reqired_ext_to_isa_name): 
New
func impl to convert the required_ext enum to the extension name.
(required_extensions_specified): New func impl to predicate if
the required extension is well feeded.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: 
Adjust
the error message for v extension.
* gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: 
Ditto.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-1.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-10.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-2.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-3.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-4.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-5.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-6.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-7.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-8.c: New test.
* gcc.target/riscv/rvv/base/intrinsic_required_ext-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-vector-builtins-shapes.cc   | 18 --
 gcc/config/riscv/riscv-vector-builtins.cc  | 23 +--
 gcc/config/riscv/riscv-vector-builtins.h   | 75 +-
 .../riscv/rvv/base/intrinsic_required_ext-1.c  | 10 +++
 .../riscv/rvv/base/intrinsic_required_ext-10.c | 11 
 .../riscv/rvv/base/intrinsic_required_ext-2.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-3.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-4.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-5.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-6.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-7.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-8.c  | 11 
 .../riscv/rvv/base/intrinsic_required_ext-9.c  | 11 
 .../rvv/base/target_attribute_v_with_intrinsic-7.c |  2 +-
 .../rvv/base/target_attribute_v_with_intrinsic-8.c |  2 +-
 15 files changed, 210 insertions(+), 19 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index c5ffcc1f2c4..7f983e82370 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -72,9 +72,10 @@ build_one (function_builder &b, const function_group_info 
&group,
   if (TARGET_XTHEADVECTOR && !check_type (return_type, argument_types))
 return;
 
-  b.add_overloaded_function (function_instance, *group.shape);
+  b.add_overload

[gcc r14-9908] RISC-V: Bugfix ICE for the vector return arg in mode switch

2024-04-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:e40a3d86511efcea71e9eadde8fb9f96be52f790

commit r14-9908-ge40a3d86511efcea71e9eadde8fb9f96be52f790
Author: Pan Li 
Date:   Thu Apr 11 09:39:44 2024 +0800

RISC-V: Bugfix ICE for the vector return arg in mode switch

This patch would like to fix a ICE in mode sw for below example code.

during RTL pass: mode_sw
test.c: In function ‘vbool16_t j(vuint64m4_t)’:
test.c:15:1: internal compiler error: in create_pre_exit, at
mode-switching.cc:451
   15 | }
  | ^
0x3978f12 create_pre_exit
__RISCV_BUILD__/../gcc/mode-switching.cc:451
0x3979e9e optimize_mode_switching
__RISCV_BUILD__/../gcc/mode-switching.cc:849
0x397b9bc execute
__RISCV_BUILD__/../gcc/mode-switching.cc:1324

extern size_t get_vl ();

vbool16_t
test (vuint64m4_t a)
{
  unsigned long b;
  return __riscv_vmsne_vx_u64m4_b16 (a, b, get_vl ());
}

The create_pre_exit would like to find a return value copy.  If
not, there will be a reason in assert but not available for above
sample code when vector calling convension is enabled by default.
This patch would like to override the TARGET_FUNCTION_VALUE_REGNO_P
for vector register and then we will have hard_regno_nregs for copy_num,
aka there is a return value copy.

As a side-effect of allow vector in TARGET_FUNCTION_VALUE_REGNO_P, the
TARGET_GET_RAW_RESULT_MODE will have vector mode and which is sizeless
cannot be converted to fixed_size_mode.  Thus override the hook
TARGET_GET_RAW_RESULT_MODE and return VOIDmode when the regno is-not-a
fixed_size_mode.

The below tests are passed for this patch.
* The fully riscv regression tests.
* The reproducing test in bugzilla PR114639.

PR target/114639

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_function_value_regno_p): New func
impl for hook TARGET_FUNCTION_VALUE_REGNO_P.
(riscv_get_raw_result_mode): New func imple for hook
TARGET_GET_RAW_RESULT_MODE.
(TARGET_FUNCTION_VALUE_REGNO_P): Impl the hook.
(TARGET_GET_RAW_RESULT_MODE): Ditto.
* config/riscv/riscv.h (V_RETURN): New macro for vector return.
(GP_RETURN_FIRST): New macro for the first GPR in return.
(GP_RETURN_LAST): New macro for the last GPR in return.
(FP_RETURN_FIRST): Diito but for FPR.
(FP_RETURN_LAST): Ditto.
(FUNCTION_VALUE_REGNO_P): Remove as deprecated and replace by
TARGET_FUNCTION_VALUE_REGNO_P.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr114639-1.C: New test.
* gcc.target/riscv/rvv/base/pr114639-1.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc  | 34 ++
 gcc/config/riscv/riscv.h   |  8 +++--
 .../g++.target/riscv/rvv/base/pr114639-1.C | 25 
 .../gcc.target/riscv/rvv/base/pr114639-1.c | 14 +
 4 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 00defa69fd8..91f017dd52a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10997,6 +10997,34 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
   return true;
 }
 
+/* Implements hook TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+riscv_function_value_regno_p (const unsigned regno)
+{
+  if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST)
+return true;
+
+  if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
+return true;
+
+  if (regno == V_RETURN)
+return true;
+
+  return false;
+}
+
+/* Implements hook TARGET_GET_RAW_RESULT_MODE.  */
+
+static fixed_size_mode
+riscv_get_raw_result_mode (int regno)
+{
+  if (!is_a  (reg_raw_mode[regno]))
+return as_a  (VOIDmode);
+
+  return default_get_reg_raw_mode (regno);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11343,6 +11371,12 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P 
riscv_vector_mode_supported_any_target_p
 
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
+
+#undef TARGET_GET_RAW_RESULT_MODE
+#define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 269b8c1f076..7797e67317a 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -683,6 +683,12 @@ enum reg_class
 
 #define GP_RETURN GP_ARG_FIRST
 #define FP_RETURN (UNITS_PER_FP_ARG == 0 ? GP_RETURN : FP_ARG_FIRST)
+#def

[gcc r14-9909] RISC-V: Remove -Wno-psabi for test build option [NFC]

2024-04-10 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:f3fdcf4a37a7be07f2acbf5c8ed5e3399440a0ef

commit r14-9909-gf3fdcf4a37a7be07f2acbf5c8ed5e3399440a0ef
Author: Pan Li 
Date:   Thu Apr 11 11:42:40 2024 +0800

RISC-V: Remove -Wno-psabi for test build option [NFC]

Just notice there are some test case still have -Wno-psabi option,
which is deprecated now.  Remove them all for riscv test cases.

The below test are passed for this patch.
* The riscv rvv regression test.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/base/pr109244.C: Remove deprecated
-Wno-psabi option.
* g++.target/riscv/rvv/base/pr109535.C: Ditto.
* gcc.target/riscv/rvv/autovec/fixed-vlmax-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto.
 

[gcc r14-9930] RISC-V: Bugfix ICE non-vector in TARGET_FUNCTION_VALUE_REGNO_P

2024-04-11 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:dc51a6428f6d8e5a57b8b1bf559145288e87660b

commit r14-9930-gdc51a6428f6d8e5a57b8b1bf559145288e87660b
Author: Pan Li 
Date:   Fri Apr 12 11:12:24 2024 +0800

RISC-V: Bugfix ICE non-vector in TARGET_FUNCTION_VALUE_REGNO_P

This patch would like to fix one ICE when vector is not enabled
in hook TARGET_FUNCTION_VALUE_REGNO_P implementation.  The vector
regno is available if and only if the TARGET_VECTOR is true.  The
previous implement missed this condition and then result in ICE
when rv64gc build option without vector.

The below test suite is passed for this patch.

* The rv64gcv fully regression tests.
* The rv64gc fully regression tests.

PR target/114639

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_function_value_regno_p): Add
TARGET_VECTOR predicate for V_RETURN regno.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr114639-1.c: New test.
* gcc.target/riscv/pr114639-2.c: New test.
* gcc.target/riscv/pr114639-3.c: New test.
* gcc.target/riscv/pr114639-4.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc   |  2 +-
 gcc/testsuite/gcc.target/riscv/pr114639-1.c | 11 +++
 gcc/testsuite/gcc.target/riscv/pr114639-2.c | 11 +++
 gcc/testsuite/gcc.target/riscv/pr114639-3.c | 11 +++
 gcc/testsuite/gcc.target/riscv/pr114639-4.c | 11 +++
 5 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 91f017dd52a..e5f00806bb9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11008,7 +11008,7 @@ riscv_function_value_regno_p (const unsigned regno)
   if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
 return true;
 
-  if (regno == V_RETURN)
+  if (TARGET_VECTOR && regno == V_RETURN)
 return true;
 
   return false;
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-1.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-1.c
new file mode 100644
index 000..f41723193a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-1.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-2.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-2.c
new file mode 100644
index 000..0c402c4b254
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-2.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64imac -mabi=lp64 -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-3.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-3.c
new file mode 100644
index 000..ffb0d6d162d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-3.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-4.c 
b/gcc/testsuite/gcc.target/riscv/pr114639-4.c
new file mode 100644
index 000..a6e229101ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr114639-4.c
@@ -0,0 +1,11 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imac -mabi=ilp32 -std=gnu89 -O3" } */
+
+g (a, b) {}
+
+f (xx)
+ void* xx;
+{
+  __builtin_apply ((void*)g, xx, 200);
+}


[gcc r14-9936] RISC-V: Fix Werror=sign-compare in riscv_validate_vector_type

2024-04-12 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:6e7e5943619a2c20d93fc7089c885483786558bc

commit r14-9936-g6e7e5943619a2c20d93fc7089c885483786558bc
Author: Pan Li 
Date:   Fri Apr 12 16:38:18 2024 +0800

RISC-V: Fix Werror=sign-compare in riscv_validate_vector_type

This patch would like to fix the Werror=sign-compare similar to below:

gcc/config/riscv/riscv.cc: In function ‘void
riscv_validate_vector_type(const_tree, const char*)’:
gcc/config/riscv/riscv.cc:5614:23: error: comparison of integer
expressions of different signedness: ‘int’ and ‘unsigned int’
[-Werror=sign-compare]
 5614 |   if (TARGET_MIN_VLEN < required_min_vlen)

The TARGET_MIN_VLEN is *int* by default but the required_min_vlen
returned from riscv_vector_required_min_vlen is **unsigned**.  Thus,
adjust the related function and reference variable(s) to int type
to avoid such kind of Werror.

The below test suite is passed for this patch.
* The rv64gcv fully regression tests.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_vector_float_type_p): Take int
as the return value instead of unsigned.
(riscv_vector_element_bitsize): Ditto.
(riscv_vector_required_min_vlen): Ditto.
(riscv_validate_vector_type): Take int type for local variable(s).

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e5f00806bb9..74445bc977c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5499,7 +5499,7 @@ riscv_vector_float_type_p (const_tree type)
   return strstr (name, "vfloat") != NULL;
 }
 
-static unsigned
+static int
 riscv_vector_element_bitsize (const_tree type)
 {
   machine_mode mode = TYPE_MODE (type);
@@ -5523,7 +5523,7 @@ riscv_vector_element_bitsize (const_tree type)
   gcc_unreachable ();
 }
 
-static unsigned
+static int
 riscv_vector_required_min_vlen (const_tree type)
 {
   machine_mode mode = TYPE_MODE (type);
@@ -5531,7 +5531,7 @@ riscv_vector_required_min_vlen (const_tree type)
   if (riscv_v_ext_mode_p (mode))
 return TARGET_MIN_VLEN;
 
-  unsigned element_bitsize = riscv_vector_element_bitsize (type);
+  int element_bitsize = riscv_vector_element_bitsize (type);
   const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
 
   if (strstr (name, "bool64") != NULL)
@@ -5569,7 +5569,7 @@ riscv_validate_vector_type (const_tree type, const char 
*hint)
   return;
 }
 
-  unsigned element_bitsize = riscv_vector_element_bitsize (type);
+  int element_bitsize = riscv_vector_element_bitsize (type);
   bool int_type_p = riscv_vector_int_type_p (type);
 
   if (int_type_p && element_bitsize == 64
@@ -5609,7 +5609,7 @@ riscv_validate_vector_type (const_tree type, const char 
*hint)
   return;
 }
 
-  unsigned required_min_vlen = riscv_vector_required_min_vlen (type);
+  int required_min_vlen = riscv_vector_required_min_vlen (type);
 
   if (TARGET_MIN_VLEN < required_min_vlen)
 {


[gcc r14-10049] Revert "RISC-V: Support one more overlap for wv instructions"

2024-04-19 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:0cbeafe26513954b0aea3293d2f82d4863f10f1d

commit r14-10049-g0cbeafe26513954b0aea3293d2f82d4863f10f1d
Author: Pan Li 
Date:   Sat Apr 20 08:29:38 2024 +0800

Revert "RISC-V: Support one more overlap for wv instructions"

This reverts commit b3b2799b872bc4c1944629af9dfc8472c8ca5fe6.

Diff:
---
 gcc/config/riscv/riscv.md  | 14 ++--
 gcc/config/riscv/vector.md | 84 +++---
 .../gcc.target/riscv/rvv/base/pr112431-42.c| 30 
 3 files changed, 46 insertions(+), 82 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index c2b4323c53a..f0928398698 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -541,7 +541,7 @@
 ;; Widening instructions have group-overlap constraints.  Those are only
 ;; valid for certain register-group sizes.  This attribute marks the
 ;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87"
   (const_string "none"))
 
 (define_attr "group_overlap_valid" "no,yes"
@@ -562,9 +562,9 @@
 
  ;; According to RVV ISA:
  ;; The destination EEW is greater than the source EEW, the source 
EMUL is at least 1,
- ;; and the overlap is in the highest-numbered part of the destination 
register group
- ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
- ;; So the source operand should have LMUL >= 1.
+;; and the overlap is in the highest-numbered part of the destination 
register group
+;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
+;; So the source operand should have LMUL >= 1.
  (and (eq_attr "group_overlap" "W43")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4
   && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
@@ -574,12 +574,6 @@
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8
   && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
 (const_string "no")
-
- ;; W21 supports highest-number overlap for source LMUL = 1.
- ;; For 'wv' variant, we can also allow wide source operand overlaps 
dest operand.
- (and (eq_attr "group_overlap" "W0")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
> 1"))
-(const_string "no")
 ]
(const_string "yes")))
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8b1c24c5d79..8298a72b771 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3842,48 +3842,48 @@
(set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
 
 (define_insn "@pred_single_widen_sub"
-  [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, 
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr,  &vr,  &vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i,i,i")
-(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i,i,i")
-(match_operand 8 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
+(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
+(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
+(match_operand 8 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (minus:VWEXTI
-   (match_operand:VWEXTI 3 "register_operand" " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,0,0,   vr,   vr")
+   (match_operand:VWEXTI 3 "register_operan

[gcc r14-10050] RISC-V: Add xfail test case for wv insn register overlap

2024-04-19 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9f10005dbc9b660465ec4a9640bcbdcc1e5171c3

commit r14-10050-g9f10005dbc9b660465ec4a9640bcbdcc1e5171c3
Author: Pan Li 
Date:   Sat Apr 20 09:02:39 2024 +0800

RISC-V: Add xfail test case for wv insn register overlap

We reverted below patch for wv insn overlap, add the related wv
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

b3b2799b872 RISC-V: Support one more overlap for wv instructions

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-42.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-42.c| 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c
new file mode 100644
index 000..fa5dac58a20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */
+
+#include 
+
+int64_t
+reduc_plus_int (int *__restrict a, int n)
+{
+  int64_t r = 0;
+  for (int i = 0; i < n; ++i)
+r += a[i];
+  return r;
+}
+
+double
+reduc_plus_float (float *__restrict a, int n)
+{
+  double r = 0;
+  for (int i = 0; i < n; ++i)
+r += a[i];
+  return r;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */


[gcc r14-10051] Revert "RISC-V: Support highest overlap for wv instructions"

2024-04-19 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:f5447eae72f11d9bfbb403183fd282918c0445c6

commit r14-10051-gf5447eae72f11d9bfbb403183fd282918c0445c6
Author: Pan Li 
Date:   Sat Apr 20 09:42:57 2024 +0800

Revert "RISC-V: Support highest overlap for wv instructions"

This reverts commit 7e854b58084c131fceca9e8fa9dcc7469972e69d.

Diff:
---
 gcc/config/riscv/vector.md |  88 ++--
 .../gcc.target/riscv/rvv/base/pr112431-39.c| 158 -
 .../gcc.target/riscv/rvv/base/pr112431-40.c|  94 
 .../gcc.target/riscv/rvv/base/pr112431-41.c|  62 
 4 files changed, 42 insertions(+), 360 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8298a72b771..8a727e2ea41 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3842,48 +3842,46 @@
(set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
 
 (define_insn "@pred_single_widen_sub"
-  [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"  "=&vr,&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 8 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,   
rK")
+(match_operand 6 "const_int_operand"  "i,
i")
+(match_operand 7 "const_int_operand"  "i,
i")
+(match_operand 8 "const_int_operand"  "i,
i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (minus:VWEXTI
-   (match_operand:VWEXTI 3 "register_operand" " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,   vr,   vr")
+   (match_operand:VWEXTI 3 "register_operand" "   vr,   
vr")
(any_extend:VWEXTI
- (match_operand: 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr")))
- (match_operand:VWEXTI 2 "vector_merge_operand"   " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,0")))]
+ (match_operand: 4 "register_operand" "   vr,   
vr")))
+ (match_operand:VWEXTI 2 "vector_merge_operand"   "   vu,
0")))]
   "TARGET_VECTOR"
   "vwsub.wv\t%0,%3,%4%p1"
   [(set_attr "type" "viwalu")
-   (set_attr "mode" "")
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "mode" "")])
 
 (define_insn "@pred_single_widen_add"
-  [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"  "=&vr,&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 7 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 8 "const_int_operand"  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,   
rK")
+(match_operand 6 "const_int_operand"  "i,
i")
+(match_operand 7 "const_int_operand"  "i,
i")
+(match_operand 8 "const_int_operand"  "i,
i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (plus:VWEXTI
(any_extend:VWEXTI
- (match_operand: 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr"))
-   (match_operand:VWEXTI 3 "register_operand" " vr, vr, 

[gcc r14-10052] RISC-V: Add xfail test case for wv insn highest overlap

2024-04-20 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1690e47e101c1e273b1ee052de21d5214257c13a

commit r14-10052-g1690e47e101c1e273b1ee052de21d5214257c13a
Author: Pan Li 
Date:   Sat Apr 20 13:05:52 2024 +0800

RISC-V: Add xfail test case for wv insn highest overlap

We reverted below patch for wv insn overlap, add the related wv
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

7e854b58084 RISC-V: Support highest overlap for wv instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Xfail csr 
check.
* gcc.target/riscv/rvv/base/pr112431-39.c: New test.
* gcc.target/riscv/rvv/base/pr112431-40.c: New test.
* gcc.target/riscv/rvv/base/pr112431-41.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../vect/costmodel/riscv/rvv/dynamic-lmul8-11.c|   2 +-
 .../gcc.target/riscv/rvv/base/pr112431-39.c| 158 +
 .../gcc.target/riscv/rvv/base/pr112431-40.c|  94 
 .../gcc.target/riscv/rvv/base/pr112431-41.c|  62 
 4 files changed, 315 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
index c9e28251225..5a39f04b140 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c
@@ -40,7 +40,7 @@ void foo2 (int64_t *__restrict a,
 }
 
 /* { dg-final { scan-assembler {e64,m8} } } */
-/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
new file mode 100644
index 000..770b5411666
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c
@@ -0,0 +1,158 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out, int n)
+{
+  for (int i = 0; i < n; i++)
+{
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v0 = __riscv_vwsub_wv_i16m2_tu (v0, v0, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v1 = __riscv_vwsub_wv_i16m2_tu (v1, v1, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v2 = __riscv_vwsub_wv_i16m2_tu (v2, v2, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v3 = __riscv_vwsub_wv_i16m2_tu (v3, v3, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v4 = __riscv_vwsub_wv_i16m2_tu (v4, v4, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v5 = __riscv_vwsub_wv_i16m2_tu (v5, v5, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v6 = __riscv_vwsub_wv_i16m2_tu (v6, v6, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v7 = __riscv_vwsub_wv_i16m2_tu (v7, v7, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v8 = __riscv_vwsub_wv_i16m2_tu (v8, v8, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v9 = __riscv_vwsub_wv_i16m2_tu (v9, v9, 
__riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4);
+  asm volatile("nop" ::: "memory");
+  vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100;
+  v10 = __riscv_vwsub_wv_i16m2_tu (v10, v10, 
__riscv_vreinterpret_

[gcc r14-10054] Revert "RISC-V: Fix overlap group incorrect overlap on v0"

2024-04-20 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:3afcb04bd7d444b4c6547ad98668c2a6a7f37a21

commit r14-10054-g3afcb04bd7d444b4c6547ad98668c2a6a7f37a21
Author: Pan Li 
Date:   Sat Apr 20 22:37:56 2024 +0800

Revert "RISC-V: Fix overlap group incorrect overlap on v0"

This reverts commit 018ba3ac952bed4ae01344c060360f13f7cc084a.

Diff:
---
 gcc/config/riscv/vector.md | 268 ++---
 .../gcc.target/riscv/rvv/base/pr112431-34.c| 101 
 2 files changed, 134 insertions(+), 235 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8a727e2ea41..2a6ab979588 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2254,70 +2254,70 @@
 
 ;; DEST eew is greater than SOURCE eew.
 (define_insn "@pred_indexed_load_x2_greater_eew"
-  [(set (match_operand:VEEWEXT2 0 "register_operand"   "=vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VEEWEXT2 0 "register_operand" "=vr, 
  vr,   vr,   vr,   vr,   vr, ?&vr, ?&vr")
(if_then_else:VEEWEXT2
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   " 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  " rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 7 "const_int_operand"  "i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 8 "const_int_operand"  "i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,  
 rK,   rK,   rK,   rK,   rK,   rK,   rK")
+(match_operand 6 "const_int_operand"  "i,  
  i,i,i,i,i,i,i")
+(match_operand 7 "const_int_operand"  "i,  
  i,i,i,i,i,i,i")
+(match_operand 8 "const_int_operand"  "i,  
  i,i,i,i,i,i,i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT2
-   [(match_operand 3 "pmode_reg_or_0_operand" " rJ, 
rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ,   rJ,   rJ")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ,   rJ,   rJ,   rJ,   rJ,   rJ,   rJ")
 (mem:BLK (scratch))
-(match_operand: 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr")] ORDER)
- (match_operand:VEEWEXT2 2 "vector_merge_operand" " vu, 
vu,  0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,0")))]
+(match_operand: 4 "register_operand" "  W21,  
W21,  W42,  W42,  W84,  W84,   vr,   vr")] ORDER)
+ (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,  
  0,   vu,0,   vu,0,   vu,0")))]
   "TARGET_VECTOR"
   "vlxei.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vldx")
(set_attr "mode" "")
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
 
 (define_insn "@pred_indexed_load_x4_greater_eew"
-  [(set (match_operand:VEEWEXT4 0 "register_operand"   "=vd, 
vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VEEWEXT4 0 "register_operand""=vr,  
  vr,   vr,   vr, ?&vr, ?&vr")
(if_then_else:VEEWEXT4
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   " 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  " rK, 
rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "  i,  
i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 7 "const_int_operand"  "  i,  
i,  i,  i,  i,  i,  i,  i,i,i")
-(match_operand 8 "const_int_operand"  "  i,  
i,  i,  i,  i,  i,  i,  i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,  
 rK,   rK,   rK,   rK,   rK")
+(match_operand 6 "const_int_operand"  "i,  
  i,i,i,i,i")
+(match_operand 7 "const_int_operand"  "i,  
  i,i,i,i,i")
+(match_operand 8 "const_int_operand" 

[gcc r14-10056] RISC-V: Add xfail test case for incorrect overlap on v0

2024-04-20 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:d37b34fe82e6e19e80ec9c46400f63fa90ba5255

commit r14-10056-gd37b34fe82e6e19e80ec9c46400f63fa90ba5255
Author: Pan Li 
Date:   Sat Apr 20 22:43:13 2024 +0800

RISC-V: Add xfail test case for incorrect overlap on v0

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

018ba3ac952 RISC-V: Fix overlap group incorrect overlap on v0

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-34.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-34.c| 101 +
 1 file changed, 101 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c
new file mode 100644
index 000..286185aa01e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c
@@ -0,0 +1,101 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl);
+  vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl);
+  vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl);
+  vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl);
+  vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl);
+  vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl);
+  vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl);
+  vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl);
+  vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl);
+  vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl);
+  vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl);
+  vint16m2_t vw11 = __riscv_vluxei8_v_i16m2 ((void *) it, v11, vl);
+  vint16m2_t vw12 = __riscv_vluxei8_v_i16m2 ((void *) it, v12, vl);
+  vint16m2_t vw13 = __riscv_vluxei8_v_i16m2 ((void *) it, v13, vl);
+  vint16m2_t vw14 = __riscv_vluxei8_v_i16m2 ((void *) it, v14, vl);
+  vbool8_t mask = *(vbool8_t*)it;
+  vint16m2_t vw15 = __riscv_vluxei8_v_i16m2_m (mask, (void *) it, v15, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7);
+  size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8);
+  size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9);
+  size_t sum10 = __r

[gcc r14-10057] Revert "RISC-V: Support highpart register overlap for widen vx/vf instructions"

2024-04-20 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ef2392236ec629351496d7f299d6a0956080e4d9

commit r14-10057-gef2392236ec629351496d7f299d6a0956080e4d9
Author: Pan Li 
Date:   Sun Apr 21 09:37:00 2024 +0800

Revert "RISC-V: Support highpart register overlap for widen vx/vf 
instructions"

This reverts commit a23415d7572774701d7ec04664390260ab9a3f63.

Diff:
---
 gcc/config/riscv/vector.md |  65 ---
 .../gcc.target/riscv/rvv/base/pr112431-22.c| 188 -
 .../gcc.target/riscv/rvv/base/pr112431-23.c| 119 -
 .../gcc.target/riscv/rvv/base/pr112431-24.c|  86 --
 .../gcc.target/riscv/rvv/base/pr112431-25.c| 104 
 .../gcc.target/riscv/rvv/base/pr112431-26.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-27.c|  51 --
 7 files changed, 31 insertions(+), 650 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 2a6ab979588..f620f13682c 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3818,28 +3818,27 @@
(set_attr "mode" "")])
 
 (define_insn 
"@pred_dual_widen__scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"   "=vr,   
vr,   vr,   vr,  vr,vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"  "=&vr,&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  "   rK,   
rK,   rK,   rK,   rK,   rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "i,
i,i,i,i,i,i,i")
-(match_operand 7 "const_int_operand"  "i,
i,i,i,i,i,i,i")
-(match_operand 8 "const_int_operand"  "i,
i,i,i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,   
rK")
+(match_operand 6 "const_int_operand"  "i,
i")
+(match_operand 7 "const_int_operand"  "i,
i")
+(match_operand 8 "const_int_operand"  "i,
i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (any_widen_binop:VWEXTI
(any_extend:VWEXTI
- (match_operand: 3 "register_operand" "  W21,  
W21,  W42,  W42,  W84,  W84,   vr,   vr"))
+ (match_operand: 3 "register_operand" "   vr,   
vr"))
(any_extend:VWEXTI
  (vec_duplicate:
-   (match_operand: 4 "reg_or_0_operand"   "   rJ,   
rJ,   rJ,   rJ,   rJ,   rJ,   rJ,   rJ"
- (match_operand:VWEXTI 2 "vector_merge_operand"   "   vu,
0,   vu,0,   vu,0,   vu,0")))]
+   (match_operand: 4 "reg_or_0_operand"   "   rJ,   
rJ"
+ (match_operand:VWEXTI 2 "vector_merge_operand"   "   vu,
0")))]
   "TARGET_VECTOR"
   "vw.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "vi")
-   (set_attr "mode" "")
-   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
+   (set_attr "mode" "")])
 
 (define_insn "@pred_single_widen_sub"
   [(set (match_operand:VWEXTI 0 "register_operand"  "=&vr,&vr")
@@ -3928,28 +3927,27 @@
(set_attr "mode" "")])
 
 (define_insn "@pred_widen_mulsu_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"   "=vr,   
vr,   vr,   vr,  vr,vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"  "=&vr,&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  "   rK,   
rK,   rK,   rK,   rK,   rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "i,
i,i,i,i,i,i,i")
-(match_operand 7 "const_int_operand"  "i,
i,i,i,i,i,i,i")
-(match_operand 8 "const_int_operand"  "i,
i,i,i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,   
rK")
+(match_operand 6 "const_int_operand"  "i,
i")
+(match_operand 7 "const_int_operand"  "i,
i")
+(match_operand 8 "const_int_operand"  "i,
i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (mult:VWEXTI
(sign_extend:VWEXTI
-   

[gcc r14-10061] RISC-V: Add xfail test case for highpart register overlap of vx/vf widen

2024-04-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:338640fbee2977485efb6ff0f1d3c7c8220074ad

commit r14-10061-g338640fbee2977485efb6ff0f1d3c7c8220074ad
Author: Pan Li 
Date:   Sun Apr 21 12:34:19 2024 +0800

RISC-V: Add xfail test case for highpart register overlap of vx/vf widen

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

a23415d7572 RISC-V: Support highpart register overlap for widen vx/vf 
instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-22.c: New test.
* gcc.target/riscv/rvv/base/pr112431-23.c: New test.
* gcc.target/riscv/rvv/base/pr112431-24.c: New test.
* gcc.target/riscv/rvv/base/pr112431-25.c: New test.
* gcc.target/riscv/rvv/base/pr112431-26.c: New test.
* gcc.target/riscv/rvv/base/pr112431-27.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-22.c| 188 +
 .../gcc.target/riscv/rvv/base/pr112431-23.c| 119 +
 .../gcc.target/riscv/rvv/base/pr112431-24.c|  86 ++
 .../gcc.target/riscv/rvv/base/pr112431-25.c| 104 
 .../gcc.target/riscv/rvv/base/pr112431-26.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-27.c|  51 ++
 6 files changed, 616 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
new file mode 100644
index 000..ac56703c75c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vwadd_vx_i16m2 (v0, 33, vl);
+  vint16m2_t vw1 = __riscv_vwadd_vx_i16m2 (v1, 33, vl);
+  vint16m2_t vw2 = __riscv_vwadd_vx_i16m2 (v2, 33, vl);
+  vint16m2_t vw3 = __riscv_vwadd_vx_i16m2 (v3, 33, vl);
+  vint16m2_t vw4 = __riscv_vwadd_vx_i16m2 (v4, 33, vl);
+  vint16m2_t vw5 = __riscv_vwadd_vx_i16m2 (v5, 33, vl);
+  vint16m2_t vw6 = __riscv_vwadd_vx_i16m2 (v6, 33, vl);
+  vint16m2_t vw7 = __riscv_vwadd_vx_i16m2 (v7, 33, vl);
+  vint16m2_t vw8 = __riscv_vwadd_vx_i16m2 (v8, 33, vl);
+  vint16m2_t vw9 = __riscv_vwadd_vx_i16m2 (v9, 33, vl);
+  vint16m2_t vw10 = __riscv_vwadd_vx_i16m2 (v10, 33, vl);
+  vint16m2_t vw11 = __riscv_vwadd_vx_i16m2 (v11, 33, vl);
+  vint16m2_t vw12 = __riscv_vwadd_vx_i16m2 (v12, 33, vl);
+  vint16m2_t vw13 = __riscv_vwadd_vx_i16m2 (v13, 33, vl);
+  vint16m2_t vw14 = __riscv_vwadd_vx_i16m2 (v14, 33, vl);
+  vint16m2_t vw15 = __riscv_vwadd_vx_i16m2 (v15, 33, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0);
+  size_t sum1 = __ri

[gcc r14-10062] Revert "RISC-V: Support widening register overlap for vf4/vf8"

2024-04-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:ec78916bb37bec0cd3ede5c6263387345ce16f94

commit r14-10062-gec78916bb37bec0cd3ede5c6263387345ce16f94
Author: Pan Li 
Date:   Mon Apr 22 09:26:04 2024 +0800

Revert "RISC-V: Support widening register overlap for vf4/vf8"

This reverts commit 303195e2a6b6f0e8f42e0578b61f9f37c6250beb.

Diff:
---
 gcc/config/riscv/vector.md | 38 ++--
 .../gcc.target/riscv/rvv/base/pr112431-16.c| 68 --
 .../gcc.target/riscv/rvv/base/pr112431-17.c| 51 
 .../gcc.target/riscv/rvv/base/pr112431-18.c| 51 
 4 files changed, 18 insertions(+), 190 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index f620f13682c..140b4638346 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3754,45 +3754,43 @@
 
 ;; Vector Quad-Widening Sign-extend and Zero-extend.
 (define_insn "@pred__vf4"
-  [(set (match_operand:VQEXTI 0 "register_operand"   "=vr,   vr,   
vr,   vr, ?&vr, ?&vr")
+  [(set (match_operand:VQEXTI 0 "register_operand"  "=&vr,&vr")
(if_then_else:VQEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 4 "vector_length_operand"  "   rK,   rK,   
rK,   rK,   rK,   rK")
-(match_operand 5 "const_int_operand"  "i,i,
i,i,i,i")
-(match_operand 6 "const_int_operand"  "i,i,
i,i,i,i")
-(match_operand 7 "const_int_operand"  "i,i,
i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   "vmWc1,vmWc1")
+(match_operand 4 "vector_length_operand"  "   rK,   rK")
+(match_operand 5 "const_int_operand"  "i,i")
+(match_operand 6 "const_int_operand"  "i,i")
+(match_operand 7 "const_int_operand"  "i,i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (any_extend:VQEXTI
-   (match_operand: 3 "register_operand" "  W43,  W43,  
W86,  W86,   vr,   vr"))
- (match_operand:VQEXTI 2 "vector_merge_operand"   "   vu,0,   
vu,0,   vu,0")))]
+   (match_operand: 3 "register_operand" "   vr,   vr"))
+ (match_operand:VQEXTI 2 "vector_merge_operand"   "   vu,0")))]
   "TARGET_VECTOR"
   "vext.vf4\t%0,%3%p1"
   [(set_attr "type" "vext")
-   (set_attr "mode" "")
-   (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
+   (set_attr "mode" "")])
 
 ;; Vector Oct-Widening Sign-extend and Zero-extend.
 (define_insn "@pred__vf8"
-  [(set (match_operand:VOEXTI 0 "register_operand"  "=vr,   vr, 
?&vr, ?&vr")
+  [(set (match_operand:VOEXTI 0 "register_operand" "=&vr,&vr")
(if_then_else:VOEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"  
"vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 4 "vector_length_operand" "   rK,   rK,   
rK,   rK")
-(match_operand 5 "const_int_operand" "i,i,
i,i")
-(match_operand 6 "const_int_operand" "i,i,
i,i")
-(match_operand 7 "const_int_operand" "i,i,
i,i")
+   [(match_operand: 1 "vector_mask_operand"  "vmWc1,vmWc1")
+(match_operand 4 "vector_length_operand" "   rK,   rK")
+(match_operand 5 "const_int_operand" "i,i")
+(match_operand 6 "const_int_operand" "i,i")
+(match_operand 7 "const_int_operand" "i,i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (any_extend:VOEXTI
-   (match_operand: 3 "register_operand" "  W87,  W87,   
vr,   vr"))
- (match_operand:VOEXTI 2 "vector_merge_operand"  "   vu,0,   
vu,0")))]
+   (match_operand: 3 "register_operand" "   vr,   vr"))
+ (match_operand:VOEXTI 2 "vector_merge_operand"  "   vu,0")))]
   "TARGET_VECTOR"
   "vext.vf8\t%0,%3%p1"
   [(set_attr "type" "vext")
-   (set_attr "mode" "")
-   (set_attr "group_overlap" "W87,W87,none,none")])
+   (set_attr "mode" "")])
 
 ;; Vector Widening Add/Subtract/Multiply.
 (define_insn "@pred_dual_widen_"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
deleted file mode 100644
index 98f42458883..000
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
-
-#include "riscv_vector.h"
-
-size_t __attribute__ ((noinline))
-sumation (size_t sum0, size_t sum1, size_t sum2,

[gcc r14-10063] RISC-V: Add xfail test case for widening register overlap of vf4/vf8

2024-04-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:c4fdbdac1226787b4d33046f0be189a24dac468e

commit r14-10063-gc4fdbdac1226787b4d33046f0be189a24dac468e
Author: Pan Li 
Date:   Mon Apr 22 10:11:25 2024 +0800

RISC-V: Add xfail test case for widening register overlap of vf4/vf8

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

303195e2a6b RISC-V: Support widening register overlap for vf4/vf8

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-16.c: New test.
* gcc.target/riscv/rvv/base/pr112431-17.c: New test.
* gcc.target/riscv/rvv/base/pr112431-18.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-16.c| 68 ++
 .../gcc.target/riscv/rvv/base/pr112431-17.c| 51 
 .../gcc.target/riscv/rvv/base/pr112431-18.c| 51 
 3 files changed, 170 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 000..42d11611d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+  vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+  vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+  vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+  vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+  vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+  vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+  vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+  sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+}
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 000..9ecc62e234b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it +=

[gcc r14-10064] Revert "RISC-V: Support highest-number regno overlap for widen ternary"

2024-04-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:cc46b6d4f3b4edc832a319ebf5053131dada3c8c

commit r14-10064-gcc46b6d4f3b4edc832a319ebf5053131dada3c8c
Author: Pan Li 
Date:   Mon Apr 22 14:10:02 2024 +0800

Revert "RISC-V: Support highest-number regno overlap for widen ternary"

This reverts commit 27fde325d64447a3a0d5d550c5976e5f3fb6dc16.

Diff:
---
 gcc/config/riscv/vector.md | 115 ++---
 .../gcc.target/riscv/rvv/base/pr112431-37.c| 103 --
 .../gcc.target/riscv/rvv/base/pr112431-38.c|  82 ---
 3 files changed, 55 insertions(+), 245 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 140b4638346..aef8cad20a0 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -5930,30 +5930,29 @@
(set_attr "mode" "")])
 
 (define_insn "@pred_widen_mul_plus_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"   "=vd, vr, 
vd, vr, vd, vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand""=&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand" " vm,Wc1, 
vm,Wc1, vm,Wc1,vmWc1")
-(match_operand 5 "vector_length_operand"" rK, rK, 
rK, rK, rK, rK,   rK")
-(match_operand 6 "const_int_operand""  i,  i,  
i,  i,  i,  i,i")
-(match_operand 7 "const_int_operand""  i,  i,  
i,  i,  i,  i,i")
-(match_operand 8 "const_int_operand""  i,  i,  
i,  i,  i,  i,i")
+   [(match_operand: 1 "vector_mask_operand" "vmWc1")
+(match_operand 5 "vector_length_operand""   rK")
+(match_operand 6 "const_int_operand""i")
+(match_operand 7 "const_int_operand""i")
+(match_operand 8 "const_int_operand""i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (plus:VWEXTI
(mult:VWEXTI
  (any_extend:VWEXTI
(vec_duplicate:
- (match_operand: 3 "reg_or_0_operand"   " rJ, rJ, 
rJ, rJ, rJ, rJ,   rJ")))
+ (match_operand: 3 "register_operand"   "r")))
  (any_extend:VWEXTI
-   (match_operand: 4 "register_operand" 
"W21,W21,W42,W42,W84,W84,   vr")))
-   (match_operand:VWEXTI 2 "register_operand"   "  0,  0,  
0,  0,  0,  0,0"))
+   (match_operand: 4 "register_operand" "   vr")))
+   (match_operand:VWEXTI 2 "register_operand"   "0"))
  (match_dup 2)))]
   "TARGET_VECTOR"
-  "vwmacc.vx\t%0,%z3,%4%p1"
+  "vwmacc.vx\t%0,%3,%4%p1"
   [(set_attr "type" "viwmuladd")
-   (set_attr "mode" "")
-   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")])
+   (set_attr "mode" "")])
 
 (define_insn "@pred_widen_mul_plussu"
   [(set (match_operand:VWEXTI 0 "register_operand""=&vr")
@@ -5980,56 +5979,54 @@
(set_attr "mode" "")])
 
 (define_insn "@pred_widen_mul_plussu_scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand""=vd, 
vr, vd, vr, vd, vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand""=&vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"  " vm,Wc1, 
vm,Wc1, vm,Wc1,vmWc1")
-(match_operand 5 "vector_length_operand" " rK, rK, 
rK, rK, rK, rK,   rK")
-(match_operand 6 "const_int_operand" "  i,  i, 
 i,  i,  i,  i,i")
-(match_operand 7 "const_int_operand" "  i,  i, 
 i,  i,  i,  i,i")
-(match_operand 8 "const_int_operand" "  i,  i, 
 i,  i,  i,  i,i")
+   [(match_operand: 1 "vector_mask_operand" "vmWc1")
+(match_operand 5 "vector_length_operand""   rK")
+(match_operand 6 "const_int_operand""i")
+(match_operand 7 "const_int_operand""i")
+(match_operand 8 "const_int_operand""i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (plus:VWEXTI
(mult:VWEXTI
  (sign_extend:VWEXTI
(vec_duplicate:
- (match_operand: 3 "reg_or_0_operand"" rJ, rJ, 
rJ, rJ, rJ, rJ,   rJ")))
+ (match_operand: 3 "register_operand"   "r")))
  (zero_extend:VWEXTI
-   (match_operand: 4 "register_operand"  
"W21,W21,W42,W42,W84,W84,   vr")))
-   (match_operand:VWEXTI 2 "register_operand""  0,  0, 
 0,  0,  0,  0,0"))
+   (match_operand: 4 "

[gcc r14-10065] RISC-V: Add xfail test case for highest-number regno ternary overlap

2024-04-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:c7506847c020ad34eff248ab715eae238b9d1ed3

commit r14-10065-gc7506847c020ad34eff248ab715eae238b9d1ed3
Author: Pan Li 
Date:   Mon Apr 22 14:32:25 2024 +0800

RISC-V: Add xfail test case for highest-number regno ternary overlap

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-37.c: New test.
* gcc.target/riscv/rvv/base/pr112431-38.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-37.c| 103 +
 .../gcc.target/riscv/rvv/base/pr112431-38.c|  82 
 2 files changed, 185 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
new file mode 100644
index 000..66e81ea905a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo7 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8);
+  vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo8 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8);
+  vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo9 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vuint8m4_t high_ueew8 = __riscv_vreinterpret_v_i8m4_u8m4 (high_eew8);
+  vint16m8_t result = __riscv_vwmaccsu_vx_i16m8 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv4r} { xfail riscv*-*-* } } } */
+/* { dg-fina

[gcc r14-10067] Revert "RISC-V: Support highpart overlap for indexed load with SRC EEW < DEST EEW"

2024-04-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9257c7a72059aba0df1684a0722c4d1538cbb6d4

commit r14-10067-g9257c7a72059aba0df1684a0722c4d1538cbb6d4
Author: Pan Li 
Date:   Mon Apr 22 15:39:45 2024 +0800

Revert "RISC-V: Support highpart overlap for indexed load with SRC EEW < 
DEST EEW"

This reverts commit 4418d55bcd1b7e0ef823981b6a781d7de5c38cce.

Diff:
---
 gcc/config/riscv/vector.md |  63 ++---
 .../gcc.target/riscv/rvv/base/pr112431-28.c| 104 -
 .../gcc.target/riscv/rvv/base/pr112431-29.c|  68 --
 .../gcc.target/riscv/rvv/base/pr112431-30.c|  51 --
 .../gcc.target/riscv/rvv/base/pr112431-31.c|  68 --
 .../gcc.target/riscv/rvv/base/pr112431-32.c|  51 --
 .../gcc.target/riscv/rvv/base/pr112431-33.c|  51 --
 7 files changed, 30 insertions(+), 426 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index aef8cad20a0..768d23e9f1d 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2254,70 +2254,67 @@
 
 ;; DEST eew is greater than SOURCE eew.
 (define_insn "@pred_indexed_load_x2_greater_eew"
-  [(set (match_operand:VEEWEXT2 0 "register_operand" "=vr, 
  vr,   vr,   vr,   vr,   vr, ?&vr, ?&vr")
+  [(set (match_operand:VEEWEXT2 0 "register_operand""=&vr, 
 &vr")
(if_then_else:VEEWEXT2
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  "   rK,  
 rK,   rK,   rK,   rK,   rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "i,  
  i,i,i,i,i,i,i")
-(match_operand 7 "const_int_operand"  "i,  
  i,i,i,i,i,i,i")
-(match_operand 8 "const_int_operand"  "i,  
  i,i,i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,  
 rK")
+(match_operand 6 "const_int_operand"  "i,  
  i")
+(match_operand 7 "const_int_operand"  "i,  
  i")
+(match_operand 8 "const_int_operand"  "i,  
  i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT2
-   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ,   rJ,   rJ,   rJ,   rJ,   rJ,   rJ")
+   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ")
 (mem:BLK (scratch))
-(match_operand: 4 "register_operand" "  W21,  
W21,  W42,  W42,  W84,  W84,   vr,   vr")] ORDER)
- (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,  
  0,   vu,0,   vu,0,   vu,0")))]
+(match_operand: 4 "register_operand" "   vr,  
 vr")] ORDER)
+ (match_operand:VEEWEXT2 2 "vector_merge_operand" "   vu,  
  0")))]
   "TARGET_VECTOR"
   "vlxei.v\t%0,(%z3),%4%p1"
   [(set_attr "type" "vldx")
-   (set_attr "mode" "")
-   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
+   (set_attr "mode" "")])
 
 (define_insn "@pred_indexed_load_x4_greater_eew"
-  [(set (match_operand:VEEWEXT4 0 "register_operand""=vr,  
  vr,   vr,   vr, ?&vr, ?&vr")
+  [(set (match_operand:VEEWEXT4 0 "register_operand""=&vr, 
 &vr")
(if_then_else:VEEWEXT4
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  "   rK,  
 rK,   rK,   rK,   rK,   rK")
-(match_operand 6 "const_int_operand"  "i,  
  i,i,i,i,i")
-(match_operand 7 "const_int_operand"  "i,  
  i,i,i,i,i")
-(match_operand 8 "const_int_operand"  "i,  
  i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
+(match_operand 5 "vector_length_operand"  "   rK,  
 rK")
+(match_operand 6 "const_int_operand"  "i,  
  i")
+(match_operand 7 "const_int_operand"  "i,  
  i")
+(match_operand 8 "const_int_operand"  "i,  
  i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VEEWEXT4
-   [(match_operand 3 "pmode_reg_or_0_operand" "   rJ,  
 rJ,   rJ,   rJ,   rJ,   rJ")
+   [(match_operand 3 "pm

[gcc r14-10068] RISC-V: Add xfail test case for indexed load overlap with SRC EEW < DEST EEW

2024-04-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a367b99f916cb7d2d673180ace640096fd118950

commit r14-10068-ga367b99f916cb7d2d673180ace640096fd118950
Author: Pan Li 
Date:   Mon Apr 22 15:36:59 2024 +0800

RISC-V: Add xfail test case for indexed load overlap with SRC EEW < DEST EEW

Update in v2:
* Add change log to pr112431-34.c.

Original log:

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

4418d55bcd1 RISC-V: Support highpart overlap for indexed load with SRC EEW 
< DEST EEW

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-34.c: Remove xfail for vluxei8 
check.
* gcc.target/riscv/rvv/base/pr112431-28.c: New test.
* gcc.target/riscv/rvv/base/pr112431-29.c: New test.
* gcc.target/riscv/rvv/base/pr112431-30.c: New test.
* gcc.target/riscv/rvv/base/pr112431-31.c: New test.
* gcc.target/riscv/rvv/base/pr112431-32.c: New test.
* gcc.target/riscv/rvv/base/pr112431-33.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-28.c| 104 +
 .../gcc.target/riscv/rvv/base/pr112431-29.c|  68 ++
 .../gcc.target/riscv/rvv/base/pr112431-30.c|  51 ++
 .../gcc.target/riscv/rvv/base/pr112431-31.c|  68 ++
 .../gcc.target/riscv/rvv/base/pr112431-32.c|  51 ++
 .../gcc.target/riscv/rvv/base/pr112431-33.c|  51 ++
 .../gcc.target/riscv/rvv/base/pr112431-34.c|   2 +-
 7 files changed, 394 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
new file mode 100644
index 000..c16cbdfe9f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+ size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+ size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl);
+  vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl);
+  vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl);
+  vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl);
+  vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl);
+  vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl);
+  vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl);
+  vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl);
+  vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl);
+  vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl);
+  vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl);
+  vint16m2_t vw11 = __riscv_vluxei8_v_i16

[gcc r14-10069] Revert "RISC-V: Support highpart overlap for floating-point widen instructions"

2024-04-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:4df96b4ec788f2d588febf3555685f2700b932b3

commit r14-10069-g4df96b4ec788f2d588febf3555685f2700b932b3
Author: Pan Li 
Date:   Mon Apr 22 16:25:57 2024 +0800

Revert "RISC-V: Support highpart overlap for floating-point widen 
instructions"

This reverts commit 8614cbb253484e28c3eb20cde4d1067aad56de58.

Diff:
---
 gcc/config/riscv/vector.md |  78 -
 .../gcc.target/riscv/rvv/base/pr112431-10.c| 104 
 .../gcc.target/riscv/rvv/base/pr112431-11.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-12.c|  51 --
 .../gcc.target/riscv/rvv/base/pr112431-13.c| 188 -
 .../gcc.target/riscv/rvv/base/pr112431-14.c| 119 -
 .../gcc.target/riscv/rvv/base/pr112431-15.c|  86 --
 .../gcc.target/riscv/rvv/base/pr112431-7.c | 106 
 .../gcc.target/riscv/rvv/base/pr112431-8.c |  68 
 .../gcc.target/riscv/rvv/base/pr112431-9.c |  51 --
 10 files changed, 37 insertions(+), 882 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 768d23e9f1d..598aa8fba33 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7696,88 +7696,84 @@
 ;; 
---
 
 (define_insn "@pred_widen_fcvt_x_f"
-  [(set (match_operand:VWCONVERTI 0 "register_operand"  "=vr,   vr,   
vr,   vr,  vr,vr, ?&vr, ?&vr")
+  [(set (match_operand:VWCONVERTI 0 "register_operand" "=&vr,  &vr")
(if_then_else:VWCONVERTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"  
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 4 "vector_length_operand" "   rK,   rK,   
rK,   rK,   rK,   rK,   rK,   rK")
-(match_operand 5 "const_int_operand" "i,i,
i,i,i,i,i,i")
-(match_operand 6 "const_int_operand" "i,i,
i,i,i,i,i,i")
-(match_operand 7 "const_int_operand" "i,i,
i,i,i,i,i,i")
-(match_operand 8 "const_int_operand" "i,i,
i,i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand"  "vmWc1,vmWc1")
+(match_operand 4 "vector_length_operand" "   rK,   rK")
+(match_operand 5 "const_int_operand" "i,i")
+(match_operand 6 "const_int_operand" "i,i")
+(match_operand 7 "const_int_operand" "i,i")
+(match_operand 8 "const_int_operand" "i,i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)
 (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:VWCONVERTI
-[(match_operand: 3 "register_operand" "  W21,  W21,  
W42,  W42,  W84,  W84,   vr,   vr")] VFCVTS)
- (match_operand:VWCONVERTI 2 "vector_merge_operand"  "   vu,0,   
vu,0,   vu,0,   vu,0")))]
+[(match_operand: 3 "register_operand" "   vr,   vr")] 
VFCVTS)
+ (match_operand:VWCONVERTI 2 "vector_merge_operand"  "   vu,0")))]
   "TARGET_VECTOR"
   "vfwcvt.x.f.v\t%0,%3%p1"
   [(set_attr "type" "vfwcvtftoi")
(set_attr "mode" "")
(set (attr "frm_mode")
-   (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))
-   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
+   (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
 
 (define_insn "@pred_widen_"
-  [(set (match_operand:VWCONVERTI 0 "register_operand" "=vr,   vr,   
vr,   vr,  vr,vr, ?&vr, ?&vr")
+  [(set (match_operand:VWCONVERTI 0 "register_operand""=&vr,  &vr")
(if_then_else:VWCONVERTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand" 
"vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-(match_operand 4 "vector_length_operand""   rK,   rK,   
rK,   rK,   rK,   rK,   rK,   rK")
-(match_operand 5 "const_int_operand""i,i,
i,i,i,i,i,i")
-(match_operand 6 "const_int_operand""i,i,
i,i,i,i,i,i")
-(match_operand 7 "const_int_operand""i,i,
i,i,i,i,i,i")
+   [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1")
+(match_operand 4 "vector_length_operand""   rK,   rK")
+(match_operand 5 "const_int_operand""i,i")
+(match_operand 6 "const_int_operand""i,i")
+(match_operand 7 "const_int_operand""i,i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (any_fix:VWCONVERTI
- 

[gcc r14-10070] RISC-V: Add xfail test case for highpart overlap floating-point widen insn

2024-04-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b991193eb8a79ec7562f3de3df866df9f041015a

commit r14-10070-gb991193eb8a79ec7562f3de3df866df9f041015a
Author: Pan Li 
Date:   Mon Apr 22 16:07:36 2024 +0800

RISC-V: Add xfail test case for highpart overlap floating-point widen insn

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

8614cbb2534 RISC-V: Support highpart overlap for floating-point widen 
instructions

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-10.c: New test.
* gcc.target/riscv/rvv/base/pr112431-11.c: New test.
* gcc.target/riscv/rvv/base/pr112431-12.c: New test.
* gcc.target/riscv/rvv/base/pr112431-13.c: New test.
* gcc.target/riscv/rvv/base/pr112431-14.c: New test.
* gcc.target/riscv/rvv/base/pr112431-15.c: New test.
* gcc.target/riscv/rvv/base/pr112431-7.c: New test.
* gcc.target/riscv/rvv/base/pr112431-8.c: New test.
* gcc.target/riscv/rvv/base/pr112431-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/base/pr112431-10.c| 104 
 .../gcc.target/riscv/rvv/base/pr112431-11.c|  68 
 .../gcc.target/riscv/rvv/base/pr112431-12.c|  51 ++
 .../gcc.target/riscv/rvv/base/pr112431-13.c| 188 +
 .../gcc.target/riscv/rvv/base/pr112431-14.c| 119 +
 .../gcc.target/riscv/rvv/base/pr112431-15.c|  86 ++
 .../gcc.target/riscv/rvv/base/pr112431-7.c | 104 
 .../gcc.target/riscv/rvv/base/pr112431-8.c |  68 
 .../gcc.target/riscv/rvv/base/pr112431-9.c |  51 ++
 9 files changed, 839 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c
new file mode 100644
index 000..5d3f2fbe46d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+double __attribute__ ((noinline))
+sumation (double sum0, double sum1, double sum2, double sum3, double sum4,
+ double sum5, double sum6, double sum7, double sum8, double sum9,
+ double sum10, double sum11, double sum12, double sum13, double sum14,
+ double sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+double
+foo (char const *buf, size_t len)
+{
+  double sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint32m1_t v0 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v1 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v2 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v3 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v4 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v5 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v6 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v7 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v8 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v9 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v10 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v11 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v12 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v13 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v14 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  vint32m1_t v15 = __riscv_vle32_v_i32m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vfloat64m2_t vw0 = __riscv_vfwcvt_f_x_v_f64m2 (v0, vl);
+  vfloat64m2_t vw1 = __riscv_vfwcvt_f_x_v_f64m2 (v1, vl);
+  vfloat64m2_t vw2 = __riscv_vfwcvt_f_x_v_f64m2 (v2, vl);
+  vfloat64m2_t vw3 = __riscv_vfwcvt_f_x_v_f64m2 (v3, vl);
+  vfloat64m2_t vw4 = __riscv_vfwcvt_f_x_v_f64m2 (v4, vl);
+  vfloat64m2_t vw5 = __riscv_vfwcvt_f_x_v_f64m2 (v5, vl);
+  vfloat64m2_t vw6 = __riscv_vfwcvt_f_x_v_f64m2 (v6, vl);
+  vfloat64m2_t vw7 = __riscv_vfwcvt_f_x_v_f64m2 (v7, vl);
+  vfloat64m2_t vw8 = __riscv_vfwcvt_f_x_v_f64m2 (v8, vl);
+  vfloat64m2_t vw9 = __riscv_vfwcvt_f_x_v_f64m2 (v9, vl);
+  vfloat64m2_t vw10 = __riscv_

[gcc r14-10073] Revert "RISC-V: Robostify the W43, W86, W87 constraint enabled attribute"

2024-04-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b78c88438cf3672987736edc013ffc0b20e879f7

commit r14-10073-gb78c88438cf3672987736edc013ffc0b20e879f7
Author: Pan Li 
Date:   Mon Apr 22 20:44:38 2024 +0800

Revert "RISC-V: Robostify the W43, W86, W87 constraint enabled attribute"

This reverts commit d3544cea63d0a642b6357a7be55986f5562beaa0.

Diff:
---
 gcc/config/riscv/riscv.md | 19 ++-
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index f0928398698..3628e2215da 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -552,28 +552,13 @@
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 2"))
 (const_string "no")
 
- (and (eq_attr "group_overlap" "W42")
+ (and (eq_attr "group_overlap" "W42,W43")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4"))
 (const_string "no")
 
- (and (eq_attr "group_overlap" "W84")
+ (and (eq_attr "group_overlap" "W84,W86,W87")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8"))
 (const_string "no")
-
- ;; According to RVV ISA:
- ;; The destination EEW is greater than the source EEW, the source 
EMUL is at least 1,
-;; and the overlap is in the highest-numbered part of the destination 
register group
-;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
-;; So the source operand should have LMUL >= 1.
- (and (eq_attr "group_overlap" "W43")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4
-  && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
-(const_string "no")
-
- (and (eq_attr "group_overlap" "W86,W87")
- (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8
-  && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
-(const_string "no")
 ]
(const_string "yes")))


[gcc r14-10074] Revert "RISC-V: Rename vconstraint into group_overlap"

2024-04-22 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:cacc55a4c0be8d0bc7417b6a28924eadbbe428e3

commit r14-10074-gcacc55a4c0be8d0bc7417b6a28924eadbbe428e3
Author: Pan Li 
Date:   Mon Apr 22 20:45:40 2024 +0800

Revert "RISC-V: Rename vconstraint into group_overlap"

This reverts commit e65aaf8efe1900f7bbf76235a078000bf2ec8b45.

Diff:
---
 gcc/config/riscv/constraints.md | 12 ++--
 gcc/config/riscv/riscv.md   | 19 ---
 gcc/config/riscv/vector.md  |  4 ++--
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 972e8842c9f..e37c6936bfa 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -173,14 +173,14 @@
 (define_register_constraint "W84" "TARGET_VECTOR ? V_REGS : NO_REGS"
   "A vector register has register number % 8 == 4." "regno % 8 == 4")
 
-(define_register_constraint "W43" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 4 == 3." "regno % 4 == 3")
+(define_register_constraint "W41" "TARGET_VECTOR ? V_REGS : NO_REGS"
+  "A vector register has register number % 4 == 1." "regno % 4 == 1")
 
-(define_register_constraint "W86" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 8 == 6." "regno % 8 == 6")
+(define_register_constraint "W81" "TARGET_VECTOR ? V_REGS : NO_REGS"
+  "A vector register has register number % 8 == 1." "regno % 8 == 1")
 
-(define_register_constraint "W87" "TARGET_VECTOR ? V_REGS : NO_REGS"
-  "A vector register has register number % 8 == 7." "regno % 8 == 7")
+(define_register_constraint "W82" "TARGET_VECTOR ? V_REGS : NO_REGS"
+  "A vector register has register number % 8 == 2." "regno % 8 == 2")
 
 ;; This constraint is used to match instruction "csrr %0, vlenb" which is 
generated in "mov".
 ;; VLENB is a run-time constant which represent the vector register length in 
bytes.
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3628e2215da..1693d4008c6 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -538,25 +538,22 @@
   ]
   (const_string "no")))
 
-;; Widening instructions have group-overlap constraints.  Those are only
-;; valid for certain register-group sizes.  This attribute marks the
-;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87"
-  (const_string "none"))
+(define_attr "vconstraint" "no,W21,W42,W84,W41,W81,W82"
+  (const_string "no"))
 
-(define_attr "group_overlap_valid" "no,yes"
-  (cond [(eq_attr "group_overlap" "none")
+(define_attr "vconstraint_enabled" "no,yes"
+  (cond [(eq_attr "vconstraint" "no")
  (const_string "yes")
 
- (and (eq_attr "group_overlap" "W21")
+ (and (eq_attr "vconstraint" "W21")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 2"))
 (const_string "no")
 
- (and (eq_attr "group_overlap" "W42,W43")
+ (and (eq_attr "vconstraint" "W42,W41")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4"))
 (const_string "no")
 
- (and (eq_attr "group_overlap" "W84,W86,W87")
+ (and (eq_attr "vconstraint" "W84,W81,W82")
  (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8"))
 (const_string "no")
 ]
@@ -590,7 +587,7 @@
 (eq_attr "fp_vector_disabled" "yes")
 (const_string "no")
 
-(eq_attr "group_overlap_valid" "no")
+(eq_attr "vconstraint_enabled" "no")
 (const_string "no")
 
 (eq_attr "spec_restriction_disabled" "yes")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 598aa8fba33..cb5174a5e91 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3747,7 +3747,7 @@
   "vext.vf2\t%0,%3%p1"
   [(set_attr "type" "vext")
(set_attr "mode" "")
-   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
+   (set_attr "vconstraint" "W21,W21,W42,W42,W84,W84,no,no")])
 
 ;; Vector Quad-Widening Sign-extend and Zero-extend.
 (define_insn "@pred__vf4"
@@ -3970,7 +3970,7 @@
(set (attr "ta") (symbol_ref "riscv_vector::get_ta(operands[5])"))
(set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])"))
(set (attr "avl_type_idx") (const_int 7))
-   (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")])
+   (set_attr "vconstraint" "W21,W21,W42,W42,W84,W84,no,no")])
 
 ;; 
---
 ;;  Predicated integer Narrowing operations


  1   2   3   4   >