[gcc r15-342] RISC-V: Make full-vec-move1.c test robust for optimization
https://gcc.gnu.org/g:b1520d2260c5e0cfcd7a4354fab70f66e2912ff2 commit r15-342-gb1520d2260c5e0cfcd7a4354fab70f66e2912ff2 Author: Pan Li Date: Thu May 9 10:56:46 2024 +0800 RISC-V: Make full-vec-move1.c test robust for optimization During investigate the support of early break autovec, we notice the test full-vec-move1.c will be optimized to 'return 0;' in main function body. Because somehow the value of V type is compiler time constant, and then the second loop will be considered as assert (true). Thus, the ccp4 pass will eliminate these stmt and just return 0. typedef int16_t V __attribute__((vector_size (128))); int main () { V v; for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++) (v)[i] = i; V res = v; for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++) assert (res[i] == i); // will be optimized to assert (true) } This patch would like to introduce a extern function to use the res[i] that get rid of the ccp4 optimization. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: Introduce extern func use to get rid of ccp4 optimization. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c index d73bad4af6f7..fae2ae91572f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c @@ -2,11 +2,12 @@ /* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d -fno-vect-cost-model -mrvv-vector-bits=zvl" } */ #include -#include /* This would cause us to emit a vl1r.v for VNx4HImode even when the hardware vector size vl > 64. */ +extern int16_t test_element (int16_t); + typedef int16_t V __attribute__((vector_size (128))); int main () @@ -14,9 +15,10 @@ int main () V v; for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++) (v)[i] = i; + V res = v; for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++) -assert (res[i] == i); +test_element (res[i]); } /* { dg-final { scan-assembler-not {vl[1248]r.v} } } */
[gcc r15-435] RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar
https://gcc.gnu.org/g:41b3cf262e61aee9d26380f1c820e0eaae740f50 commit r15-435-g41b3cf262e61aee9d26380f1c820e0eaae740f50 Author: Pan Li Date: Sat May 11 15:25:28 2024 +0800 RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar For the vfw vx format RVV intrinsic, the scalar type _Float16 also requires the zvfh extension. Unfortunately, we only check the vector tree type and miss the scalar _Float16 type checking. For example: vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t vl) { return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl); } It should report some error message like zvfh extension is required instead of ICE for unreg insn. This patch would like to make up such kind of validation for _Float16 in the RVV intrinsic API. It will report some error like below when there is no zvfh enabled. error: built-in function '__riscv_vfwsub_wf_f32mf2(vs2, rs1, vl)' requires the zvfhmin or zvfh ISA extension Passed the rv64gcv fully regression tests, included c/c++/fortran. PR target/114988 gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (validate_instance_type_required_extensions): New func impl to validate the intrinisc func type ops. (expand_builtin): Validate instance type before expand. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr114988-1.c: New test. * gcc.target/riscv/rvv/base/pr114988-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-vector-builtins.cc | 51 ++ .../gcc.target/riscv/rvv/base/pr114988-1.c | 9 .../gcc.target/riscv/rvv/base/pr114988-2.c | 9 3 files changed, 69 insertions(+) diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 192a6c230d1c..3fdb4400d70d 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -4632,6 +4632,54 @@ gimple_fold_builtin (unsigned int code, gimple_stmt_iterator *gsi, gcall *stmt) return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold (); } +static bool +validate_instance_type_required_extensions (const rvv_type_info type, + tree exp) +{ + uint64_t exts = type.required_extensions; + + if ((exts & RVV_REQUIRE_ELEN_FP_16) && +!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags)) +{ + error_at (EXPR_LOCATION (exp), + "built-in function %qE requires the " + "zvfhmin or zvfh ISA extension", + exp); + return false; +} + + if ((exts & RVV_REQUIRE_ELEN_FP_32) && +!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags)) +{ + error_at (EXPR_LOCATION (exp), + "built-in function %qE requires the " + "zve32f, zve64f, zve64d or v ISA extension", + exp); + return false; +} + + if ((exts & RVV_REQUIRE_ELEN_FP_64) && +!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags)) +{ + error_at (EXPR_LOCATION (exp), + "built-in function %qE requires the zve64d or v ISA extension", + exp); + return false; +} + + if ((exts & RVV_REQUIRE_ELEN_64) && +!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags)) +{ + error_at (EXPR_LOCATION (exp), + "built-in function %qE requires the " + "zve64x, zve64f, zve64d or v ISA extension", + exp); + return false; +} + + return true; +} + /* Expand a call to the RVV function with subcode CODE. EXP is the call expression and TARGET is the preferred location for the result. Return the value of the lhs. */ @@ -4649,6 +4697,9 @@ expand_builtin (unsigned int code, tree exp, rtx target) return target; } + if (!validate_instance_type_required_extensions (rfn.instance.type, exp)) +return target; + return function_expander (rfn.instance, rfn.decl, exp, target).expand (); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c new file mode 100644 index ..b8474804c880 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t vl) +{ + return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl); /* { dg-error {built-in function '__riscv_vfwsub_wf_f32mf2\(vs2, rs1, vl\)' requires the zvfhmin or zvfh ISA extension} } */ +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c new file mode 100644 index ..49aa3141af31 --- /dev
[gcc r15-442] RISC-V: Fix format issue for trailing operator [NFC]
https://gcc.gnu.org/g:b6dc8464e613d1da2b28235bbd2f9c3fd4bc386b commit r15-442-gb6dc8464e613d1da2b28235bbd2f9c3fd4bc386b Author: Pan Li Date: Tue May 14 09:38:55 2024 +0800 RISC-V: Fix format issue for trailing operator [NFC] This patch would like to fix below format issue of trailing operator. === ERROR type #1: trailing operator (4 error(s)) === gcc/config/riscv/riscv-vector-builtins.cc:4641:39: if ((exts & RVV_REQUIRE_ELEN_FP_16) && gcc/config/riscv/riscv-vector-builtins.cc:4651:39: if ((exts & RVV_REQUIRE_ELEN_FP_32) && gcc/config/riscv/riscv-vector-builtins.cc:4661:39: if ((exts & RVV_REQUIRE_ELEN_FP_64) && gcc/config/riscv/riscv-vector-builtins.cc:4670:36: if ((exts & RVV_REQUIRE_ELEN_64) && Passed the ./contrib/check_GNU_style.sh for this patch, and double checked there is no other format issue of the original patch. Committed as format change. gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (validate_instance_type_required_extensions): Remove the operator from the trailing and put it to new line. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-vector-builtins.cc | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 3fdb4400d70d..c08d87a26807 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -4638,8 +4638,8 @@ validate_instance_type_required_extensions (const rvv_type_info type, { uint64_t exts = type.required_extensions; - if ((exts & RVV_REQUIRE_ELEN_FP_16) && -!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags)) + if ((exts & RVV_REQUIRE_ELEN_FP_16) +&& !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags)) { error_at (EXPR_LOCATION (exp), "built-in function %qE requires the " @@ -4648,8 +4648,8 @@ validate_instance_type_required_extensions (const rvv_type_info type, return false; } - if ((exts & RVV_REQUIRE_ELEN_FP_32) && -!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags)) + if ((exts & RVV_REQUIRE_ELEN_FP_32) +&& !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags)) { error_at (EXPR_LOCATION (exp), "built-in function %qE requires the " @@ -4658,8 +4658,8 @@ validate_instance_type_required_extensions (const rvv_type_info type, return false; } - if ((exts & RVV_REQUIRE_ELEN_FP_64) && -!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags)) + if ((exts & RVV_REQUIRE_ELEN_FP_64) +&& !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags)) { error_at (EXPR_LOCATION (exp), "built-in function %qE requires the zve64d or v ISA extension", @@ -4667,8 +4667,8 @@ validate_instance_type_required_extensions (const rvv_type_info type, return false; } - if ((exts & RVV_REQUIRE_ELEN_64) && -!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags)) + if ((exts & RVV_REQUIRE_ELEN_64) +&& !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags)) { error_at (EXPR_LOCATION (exp), "built-in function %qE requires the "
[gcc r15-576] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
https://gcc.gnu.org/g:52b0536710ff3f3ace72ab00ce9ef6c630cd1183 commit r15-576-g52b0536710ff3f3ace72ab00ce9ef6c630cd1183 Author: Pan Li Date: Wed May 15 10:14:05 2024 +0800 Internal-fn: Support new IFN SAT_ADD for unsigned scalar int This patch would like to add the middle-end presentation for the saturation add. Aka set the result of add to the max when overflow. It will take the pattern similar as below. SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) Take uint8_t as example, we will have: * SAT_ADD (1, 254) => 255. * SAT_ADD (1, 255) => 255. * SAT_ADD (2, 255) => 255. * SAT_ADD (255, 255) => 255. Given below example for the unsigned scalar integer uint64_t: uint64_t sat_add_u64 (uint64_t x, uint64_t y) { return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); } Before this patch: uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) { long unsigned int _1; _Bool _2; long unsigned int _3; long unsigned int _4; uint64_t _7; long unsigned int _10; __complex__ long unsigned int _11; ;; basic block 2, loop depth 0 ;;pred: ENTRY _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); _1 = REALPART_EXPR <_11>; _10 = IMAGPART_EXPR <_11>; _2 = _10 != 0; _3 = (long unsigned int) _2; _4 = -_3; _7 = _1 | _4; return _7; ;;succ: EXIT } After this patch: uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) { uint64_t _7; ;; basic block 2, loop depth 0 ;;pred: ENTRY _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] return _7; ;;succ: EXIT } The below tests are passed for this patch: 1. The riscv fully regression tests. 3. The x86 bootstrap tests. 4. The x86 fully regression tests. PR target/51492 PR target/112600 gcc/ChangeLog: * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD to the return true switch case(s). * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. * match.pd: Add unsigned SAT_ADD match(es). * optabs.def (OPTAB_NL): Remove fixed-point limitation for us/ssadd. * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New extern func decl generated in match.pd match. (match_saturation_arith): New func impl to match the saturation arith. (math_opts_dom_walker::after_dom_children): Try match saturation arith when IOR expr. Signed-off-by: Pan Li Diff: --- gcc/internal-fn.cc| 1 + gcc/internal-fn.def | 2 ++ gcc/match.pd | 51 +++ gcc/optabs.def| 4 ++-- gcc/tree-ssa-math-opts.cc | 32 + 5 files changed, 88 insertions(+), 2 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 0a7053c2286c..73045ca8c8c1 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) case IFN_UBSAN_CHECK_MUL: case IFN_ADD_OVERFLOW: case IFN_MUL_OVERFLOW: +case IFN_SAT_ADD: case IFN_VEC_WIDEN_PLUS: case IFN_VEC_WIDEN_PLUS_LO: case IFN_VEC_WIDEN_PLUS_HI: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 848bb9dbff3f..25badbb86e56 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, smulhrs, umulhrs, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) + DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) diff --git a/gcc/match.pd b/gcc/match.pd index 07e743ae464b..0f9c34fa8974 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || POINTER_TYPE_P (itype)) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)) +/* Unsigned Saturation Add */ +(match (usadd_left_part_1 @0 @1) + (plus:c @0 @1) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1) + +(match (usadd_left_part_2 @0 @1) + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1) + +(match (usadd_right_part_1 @0 @1) + (negate (convert (lt (plus:c @0 @1) @0))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_matc
[gcc r15-577] Vect: Support new IFN SAT_ADD for unsigned vector int
https://gcc.gnu.org/g:d4dee347b3fe1982bab26485ff31cd039c9df010 commit r15-577-gd4dee347b3fe1982bab26485ff31cd039c9df010 Author: Pan Li Date: Wed May 15 10:14:06 2024 +0800 Vect: Support new IFN SAT_ADD for unsigned vector int For vectorize, we leverage the existing vect pattern recog to find the pattern similar to scalar and let the vectorizer to perform the rest part for standard name usadd3 in vector mode. The riscv vector backend have insn "Vector Single-Width Saturating Add and Subtract" which can be leveraged when expand the usadd3 in vector mode. For example: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { unsigned i; for (i = 0; i < n; i++) out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i])); } Before this patch: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { ... _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]); ivtmp_58 = _80 * 8; vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0); vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0); vect__7.11_66 = vect__4.7_61 + vect__6.10_65; mask__8.12_67 = vect__4.7_61 > vect__7.11_66; vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, vect__7.11_66); .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72); vectp_x.5_60 = vectp_x.5_59 + ivtmp_58; vectp_y.8_64 = vectp_y.8_63 + ivtmp_58; vectp_out.16_75 = vectp_out.16_74 + ivtmp_58; ivtmp_79 = ivtmp_78 - _80; ... } After this patch: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { ... _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]); ivtmp_46 = _62 * 8; vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0); vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0); vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53); .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54); ... } The below test suites are passed for this patch. * The riscv fully regression tests. * The x86 bootstrap tests. * The x86 fully regression tests. PR target/51492 PR target/112600 gcc/ChangeLog: * tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func decl generated by match.pd match. (vect_recog_sat_add_pattern): New func impl to recog the pattern for unsigned SAT_ADD. Signed-off-by: Pan Li Diff: --- gcc/tree-vect-patterns.cc | 52 +++ 1 file changed, 52 insertions(+) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index dfb7d8005262..a313dc64643e 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4487,6 +4487,57 @@ vect_recog_mult_pattern (vec_info *vinfo, return pattern_stmt; } +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); + +/* + * Try to detect saturation add pattern (SAT_ADD), aka below gimple: + * _7 = _4 + _6; + * _8 = _4 > _7; + * _9 = (long unsigned int) _8; + * _10 = -_9; + * _12 = _7 | _10; + * + * And then simplied to + * _12 = .SAT_ADD (_4, _6); + */ + +static gimple * +vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, + tree *type_out) +{ + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); + + if (!is_gimple_assign (last_stmt)) +return NULL; + + tree res_ops[2]; + tree lhs = gimple_assign_lhs (last_stmt); + + if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL)) +{ + tree itype = TREE_TYPE (res_ops[0]); + tree vtype = get_vectype_for_scalar_type (vinfo, itype); + + if (vtype != NULL_TREE + && direct_internal_fn_supported_p (IFN_SAT_ADD, vtype, + OPTIMIZE_FOR_BOTH)) + { + *type_out = vtype; + gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0], + res_ops[1]); + + gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL)); + gimple_call_set_nothrow (call, /* nothrow_p */ false); + gimple_set_location (call, gimple_location (last_stmt)); + + vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt); + return call; + } +} + + return NULL; +} + /* Detect a signed division by a constant that wouldn't be otherwise vectorized: @@ -6987,6 +7038,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, { vect_recog_divmod_pattern, "divmod" }, { vect_recog_mult_pattern, "mult" }, + { vect_recog_sat_add_patter
[gcc r15-578] Vect: Support loop len in vectorizable early exit
https://gcc.gnu.org/g:57f8a2f67c1536be23231808ab00613ab69193ed commit r15-578-g57f8a2f67c1536be23231808ab00613ab69193ed Author: Pan Li Date: Thu May 16 09:58:13 2024 +0800 Vect: Support loop len in vectorizable early exit This patch adds early break auto-vectorization support for target which use length on partial vectorization. Consider this following example: unsigned vect_a[802]; unsigned vect_b[802]; void test (unsigned x, int n) { for (int i = 0; i < n; i++) { vect_b[i] = x + i; if (vect_a[i] > x) break; vect_a[i] = x; } } We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias). And then the IR of RVV looks like below: ... _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]); _55 = (int) _87; ... mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67; vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \ {0, ... }, _87, 0); if (vec_len_mask_72 != { 0, ... }) goto ; [5.50%] else goto ; [94.50%] The below tests are passed for this patch: 1. The riscv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen the loop len mask. * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the vect_gen_loop_len_mask for 1 or more stmt(s). * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl for vect_gen_loop_len_mask. Signed-off-by: Pan Li Diff: --- gcc/tree-vect-loop.cc | 27 +++ gcc/tree-vect-stmts.cc | 17 +++-- gcc/tree-vectorizer.h | 4 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 361aec064884..83c0544b6aa5 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, return loop_len; } +/* Generate the tree for the loop len mask and return it. Given the lens, + nvectors, vectype, index and factor to gen the len mask as below. + + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias) +*/ +tree +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens, + unsigned int nvectors, tree vectype, tree stmt, + unsigned int index, unsigned int factor) +{ + tree all_one_mask = build_all_ones_cst (vectype); + tree all_zero_mask = build_zero_cst (vectype); + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, + factor); + tree bias = build_int_cst (intQI_type_node, +LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)); + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask"); + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt, + all_one_mask, all_zero_mask, len, + bias); + gimple_call_set_lhs (call, len_mask); + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT); + + return len_mask; +} + /* Scale profiling counters by estimation for LOOP which is vectorized by factor VF. If FLAT is true, the loop we started with had unrealistically flat diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b8a71605f1bc..672959501bb7 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, ncopies = vect_get_num_copies (loop_vinfo, vectype); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo); /* Now build the new conditional. Pattern gimple_conds get dropped during codegen so we must replace the original insn. */ @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, { if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, OPTIMIZE_FOR_SPEED)) - return false; + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); else vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); } - return true; } @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, stmts[i], &cond_gsi); workset.quick_push (stmt_mas
[gcc r15-582] RISC-V: Implement vectorizable early exit with vcond_mask_len
https://gcc.gnu.org/g:6c1de786e53a11150feb16ba990d0d6c6fd910db commit r15-582-g6c1de786e53a11150feb16ba990d0d6c6fd910db Author: Pan Li Date: Thu May 16 10:02:40 2024 +0800 RISC-V: Implement vectorizable early exit with vcond_mask_len After we support the loop lens for the vectorizable, we would like to implement the feature for the RISC-V target. Given below example: unsigned vect_a[1923]; unsigned vect_b[1923]; void test (unsigned limit, int n) { for (int i = 0; i < n; i++) { vect_b[i] = limit + i; if (vect_a[i] > limit) { ret = vect_b[i]; return ret; } vect_a[i] = limit; } } Before this patch: ... .L8: swa3,0(a5) addiw a0,a0,1 addi a4,a4,4 addi a5,a5,4 beq a1,a0,.L2 .L4: swa0,0(a4) lwa2,0(a5) bleu a2,a3,.L8 ret After this patch: ... .L5: vsetvli a5,a3,e8,mf4,ta,ma vmv1r.v v4,v2 vsetvli t4,zero,e32,m1,ta,ma vmv.v.x v1,a5 vadd.vv v2,v2,v1 vsetvli zero,a5,e32,m1,ta,ma vadd.vv v5,v4,v3 slli a6,a5,2 vle32.v v1,0(t1) vmsltu.vv v1,v3,v1 vcpop.m t4,v1 beq t4,zero,.L4 vmv.x.s a4,v4 .L3: ... The below tests are passed for this patch: 1. The riscv fully regression tests. gcc/ChangeLog: * config/riscv/autovec-opt.md(*vcond_mask_len_popcount_): New pattern of vcond_mask_len_popcount for vector bool mode. * config/riscv/autovec.md (vcond_mask_len_): New pattern of vcond_mask_len for vector bool mode. (cbranch4): New pattern for vector bool mode. * config/riscv/vector-iterators.md: Add new unspec UNSPEC_SELECT_MASK. * config/riscv/vector.md (@pred_popcount): Add VLS mode to popcount pattern. (@pred_popcount): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/early-break-1.c: New test. * gcc.target/riscv/rvv/autovec/early-break-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec-opt.md| 33 gcc/config/riscv/autovec.md| 61 ++ gcc/config/riscv/vector-iterators.md | 1 + gcc/config/riscv/vector.md | 18 +++ .../gcc.target/riscv/rvv/autovec/early-break-1.c | 34 .../gcc.target/riscv/rvv/autovec/early-break-2.c | 37 + 6 files changed, 175 insertions(+), 9 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 645dc53d8680..04f85d8e4553 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1436,3 +1436,36 @@ DONE; } [(set_attr "type" "vmalu")]) + +;; Optimization pattern for early break auto-vectorization +;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount +;; -> non vlmax popcount (mask, len) +(define_insn_and_split "*vcond_mask_len_popcount_" + [(set (match_operand:P 0 "register_operand") +(popcount:P + (unspec:VB_VLS [ + (unspec:VB_VLS [ + (match_operand:VB_VLS 1 "register_operand") + (match_operand:VB_VLS 2 "const_1_operand") + (match_operand:VB_VLS 3 "const_0_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK) + (match_operand 6 "autovec_length_operand") + (const_int 1) + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))] + "TARGET_VECTOR + && can_create_pseudo_p () + && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (mode)).exists ()" + "#" + "&& 1" + [(const_int 0)] + { +riscv_vector::emit_nonvlmax_insn ( + code_for_pred_popcount (mode, Pmode), + riscv_vector::CPOP_OP, + operands, operands[4]); +DONE; + } + [(set_attr "type" "vector")] +) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index aa1ae0fe075b..1ee3c8052fb4 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2612,3 +2612,64 @@ DONE; } ) + +;; = +;; == Early break auto-vectorization patterns +;; = + +;; vcond_mask_len (mask, 1s, 0s, len, bias) +;; => mask[i] = mask[i] && i < len ? 1 : 0 +(define_insn_and_split "vcond_mask_len_" + [(set (match_operand:VB 0 "register_operand") +(unspec: VB [ + (match_operand:VB 1 "register_operand") + (match_operand:VB 2 "const_1_operand") + (match_operand:VB 3 "const_0_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] U
[gcc r15-583] RISC-V: Enable vectorizable early exit testsuite
https://gcc.gnu.org/g:556e777298dac8574533935000c57335c5232921 commit r15-583-g556e777298dac8574533935000c57335c5232921 Author: Pan Li Date: Thu May 16 10:04:10 2024 +0800 RISC-V: Enable vectorizable early exit testsuite After we supported vectorizable early exit in RISC-V, we would like to enable the gcc vect test for vectorizable early test. The vect-early-break_124-pr114403.c failed to vectorize for now. Because that the __builtin_memcpy with 8 bytes failed to folded into int64 assignment during ccp1. We will improve that first and mark this as xfail for RISC-V. The below tests are passed for this patch: 1. The riscv fully regression tests. gcc/testsuite/ChangeLog: * gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will have 2 times LOOP VECTORIZED in RISC-V. * gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the riscv backend. * lib/target-supports.exp: Add RISC-V backend. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 2 ++ gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +- gcc/testsuite/lib/target-supports.exp | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c index fdd9032da98a..2f80bf89e5e6 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c @@ -28,6 +28,8 @@ main () if (__builtin_memcmp (x, res, sizeof (x)) != 0) abort (); + +#pragma GCC novector for (int i = 0; i < 32; ++i) if (flag[i] != 0 && flag[i] != 1) abort (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c index 51abf245ccb5..101ae1e0eaa1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c @@ -2,7 +2,7 @@ /* { dg-require-effective-target vect_early_break_hw } */ /* { dg-require-effective-target vect_long_long } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } } */ #include "tree-vect.h" diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 6f5d477b1288..ec9baa4f32a3 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4099,6 +4099,7 @@ proc check_effective_target_vect_early_break { } { || [check_effective_target_arm_v8_neon_ok] || [check_effective_target_sse4] || [istarget amdgcn-*-*] + || [check_effective_target_riscv_v] }}] } @@ -4114,6 +4115,7 @@ proc check_effective_target_vect_early_break_hw { } { || [check_effective_target_arm_v8_neon_hw] || [check_sse4_hw_available] || [istarget amdgcn-*-*] + || [check_effective_target_riscv_v_ok] }}] }
[gcc r15-585] RISC-V: Cleanup some temporally files [NFC]
https://gcc.gnu.org/g:d477d683d5c6db90c80d348c795709ae6444ba7a commit r15-585-gd477d683d5c6db90c80d348c795709ae6444ba7a Author: Pan Li Date: Fri May 17 07:45:19 2024 +0800 RISC-V: Cleanup some temporally files [NFC] Just notice some temporally files under gcc/config/riscv, deleted as useless. * Empty file j. * Vim swap file. gcc/ChangeLog: * config/riscv/.riscv.cc.swo: Removed. * config/riscv/j: Removed. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/.riscv.cc.swo | Bin 417792 -> 0 bytes gcc/config/riscv/j | 0 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/gcc/config/riscv/.riscv.cc.swo b/gcc/config/riscv/.riscv.cc.swo deleted file mode 100644 index 77ed37353bee.. Binary files a/gcc/config/riscv/.riscv.cc.swo and /dev/null differ diff --git a/gcc/config/riscv/j b/gcc/config/riscv/j deleted file mode 100644 index e69de29bb2d1..
[gcc r15-642] RISC-V: Implement IFN SAT_ADD for both the scalar and vector
https://gcc.gnu.org/g:34ed2b4593fa98b613632d0dde30b6ba3e7ecad9 commit r15-642-g34ed2b4593fa98b613632d0dde30b6ba3e7ecad9 Author: Pan Li Date: Fri May 17 18:49:46 2024 +0800 RISC-V: Implement IFN SAT_ADD for both the scalar and vector The patch implement the SAT_ADD in the riscv backend as the sample for both the scalar and vector. Given below vector as example: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { unsigned i; for (i = 0; i < n; i++) out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i])); } Before this patch: vec_sat_add_u64: ... vsetvli a5,a3,e64,m1,ta,ma vle64.v v0,0(a1) vle64.v v1,0(a2) sllia4,a5,3 sub a3,a3,a5 add a1,a1,a4 add a2,a2,a4 vadd.vv v1,v0,v1 vmsgtu.vv v0,v0,v1 vmerge.vim v1,v1,-1,v0 vse64.v v1,0(a0) ... After this patch: vec_sat_add_u64: ... vsetvli a5,a3,e64,m1,ta,ma vle64.v v1,0(a1) vle64.v v2,0(a2) sllia4,a5,3 sub a3,a3,a5 add a1,a1,a4 add a2,a2,a4 vsaddu.vv v1,v1,v2 <= Vector Single-Width Saturating Add vse64.v v1,0(a0) ... The below test suites are passed for this patch. * The riscv fully regression tests. * The aarch64 fully regression tests. * The x86 bootstrap tests. * The x86 fully regression tests. PR target/51492 PR target/112600 gcc/ChangeLog: * config/riscv/autovec.md (usadd3): New pattern expand for the unsigned SAT_ADD in vector mode. * config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl to expand usadd3 pattern. (expand_vec_usadd): Ditto but for vector. * config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit the vsadd insn. (expand_vec_usadd): New func impl to expand usadd3 for vector. * config/riscv/riscv.cc (riscv_expand_usadd): New func impl to expand usadd3 for scalar. * config/riscv/riscv.md (usadd3): New pattern expand for the unsigned SAT_ADD in scalar mode. * config/riscv/vector.md: Allow VLS mode for vsaddu. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New test. * gcc.target/riscv/sat_arith.h: New test. * gcc.target/riscv/sat_u_add-1.c: New test. * gcc.target/riscv/sat_u_add-2.c: New test. * gcc.target/riscv/sat_u_add-3.c: New test. * gcc.target/riscv/sat_u_add-4.c: New test. * gcc.target/riscv/sat_u_add-run-1.c: New test. * gcc.target/riscv/sat_u_add-run-2.c: New test. * gcc.target/riscv/sat_u_add-run-3.c: New test. * gcc.target/riscv/sat_u_add-run-4.c: New test. * gcc.target/riscv/scalar_sat_binary.h: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec.md| 17 + gcc/config/riscv/riscv-protos.h| 2 + gcc/config/riscv/riscv-v.cc| 19 ++ gcc/config/riscv/riscv.cc | 55 gcc/config/riscv/riscv.md | 11 gcc/config/riscv/vector.md | 12 ++-- .../riscv/rvv/autovec/binop/vec_sat_binary.h | 33 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c | 19 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-2.c | 20 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-3.c | 20 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-4.c | 20 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c | 75 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c | 75 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c | 75 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c | 75 ++ gcc/testsuite/gcc.target/riscv/sat_arith.h | 31 + gcc/testsuite/gcc.target/riscv/sat_u_add-1.c | 19 ++ gcc/testsuite/gcc.target/riscv/sat_u_add-2.c | 21 ++ gcc/testsuite/gcc.targe
[gcc r15-655] DSE: Fix ICE after allow vector type in get_stored_val
https://gcc.gnu.org/g:88b3f83238087cbe2aa2c51c6054796856f2fb94 commit r15-655-g88b3f83238087cbe2aa2c51c6054796856f2fb94 Author: Pan Li Date: Tue Apr 30 09:42:39 2024 +0800 DSE: Fix ICE after allow vector type in get_stored_val We allowed vector type for get_stored_val when read is less than or equal to store in previous. Unfortunately, the valididate_subreg treats the vector type's size is less than vector register as invalid. Then we will have ICE here. This patch would like to fix it by filter-out the invalid type size, and make sure the subreg is valid for both the read_mode and store_mode before perform the real gen_lowpart. The below test suites are passed for this patch: * The x86 bootstrap test. * The x86 regression test. * The riscv rv64gcv regression test. * The riscv rv64gc regression test. * The aarch64 regression test. gcc/ChangeLog: * dse.cc (get_stored_val): Make sure read_mode/write_mode is valid subreg before gen_lowpart. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/bug-6.c: New test. Signed-off-by: Pan Li Diff: --- gcc/dse.cc | 4 +++- gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c | 22 ++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/gcc/dse.cc b/gcc/dse.cc index edc7a1dfecf7..1596da91da08 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) -&& targetm.modes_tieable_p (read_mode, store_mode)) +&& targetm.modes_tieable_p (read_mode, store_mode) +&& validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), + subreg_lowpart_offset (read_mode, store_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c new file mode 100644 index ..5bb00b8f587e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c @@ -0,0 +1,22 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */ + +struct A { float x, y; }; +struct B { struct A u; }; + +extern void bar (struct A *); + +float +f3 (struct B *x, int y) +{ + struct A p = {1.0f, 2.0f}; + struct A *q = &x[y].u; + + __builtin_memcpy (&q->x, &p.x, sizeof (float)); + __builtin_memcpy (&q->y, &p.y, sizeof (float)); + + bar (&p); + + return x[y].u.x + x[y].u.y; +}
[gcc r15-2962] RISC-V: Fix factor in dwarf_poly_indeterminate_value [PR116305]
https://gcc.gnu.org/g:a11dcaff9fc94971188d54310d3053e9f68a0d3d commit r15-2962-ga11dcaff9fc94971188d54310d3053e9f68a0d3d Author: 曾治金 Date: Wed Aug 14 14:06:23 2024 +0800 RISC-V: Fix factor in dwarf_poly_indeterminate_value [PR116305] This patch is to fix the bug (BugId:116305) introduced by the commit bd93ef for risc-v target. The commit bd93ef changes the chunk_num from 1 to TARGET_MIN_VLEN/128 if TARGET_MIN_VLEN is larger than 128 in riscv_convert_vector_bits. So it changes the value of BYTES_PER_RISCV_VECTOR. For example, before merging the commit bd93ef and if TARGET_MIN_VLEN is 256, the value of BYTES_PER_RISCV_VECTOR should be [8, 8], but now [16, 16]. The value of riscv_bytes_per_vector_chunk and BYTES_PER_RISCV_VECTOR are no longer equal. Prologue will use BYTES_PER_RISCV_VECTOR.coeffs[1] to estimate the vlenb register value in riscv_legitimize_poly_move, and dwarf2cfi will also get the estimated vlenb register value in riscv_dwarf_poly_indeterminate_value to calculate the number of times to multiply the vlenb register value. So need to change the factor from riscv_bytes_per_vector_chunk to BYTES_PER_RISCV_VECTOR, otherwise we will get the incorrect dwarf information. The incorrect example as follow: ``` csrr t0,vlenb slli t1,t0,1 sub sp,sp,t1 .cfi_escape 0xf,0xb,0x72,0,0x92,0xa2,0x38,0,0x34,0x1e,0x23,0x50,0x22 ``` The sequence '0x92,0xa2,0x38,0' means the vlenb register, '0x34' means the literal 4, '0x1e' means the multiply operation. But in fact, the vlenb register value just need to multiply the literal 2. PR target/116305 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_dwarf_poly_indeterminate_value): Take BYTES_PER_RISCV_VECTOR for *factor instead of riscv_bytes_per_vector_chunk. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/scalable_vector_cfi.c: New test. Signed-off-by: Zhijin Zeng Diff: --- gcc/config/riscv/riscv.cc | 4 +-- .../riscv/rvv/base/scalable_vector_cfi.c | 32 ++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 1f60d8f9711..8b7123e043e 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11010,12 +11010,12 @@ static unsigned int riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor, int *offset) { - /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1. + /* Polynomial invariant 1 == (VLENB / BYTES_PER_RISCV_VECTOR) - 1. 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1. 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1. */ gcc_assert (i == 1); - *factor = riscv_bytes_per_vector_chunk; + *factor = BYTES_PER_RISCV_VECTOR.coeffs[1]; *offset = 1; return RISCV_DWARF_VLENB; } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c new file mode 100644 index 000..184da10caf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalable_vector_cfi.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-g -O3 -march=rv64gcv -mabi=lp64d" } */ +/* { dg-skip-if "" { *-*-* } {"-O2" "-O1" "-O0" "-Og" "-Oz" "-flto"} } */ +/* { dg-final { scan-assembler {cfi_escape .*0x92,0xa2,0x38,0,0x32,0x1e} } } */ + +#include "riscv_vector.h" + +#define PI_2 1.570796326795 + +extern void func(float *result); + +void test(const float *ys, const float *xs, float *result, size_t length) { +size_t gvl = __riscv_vsetvlmax_e32m2(); +vfloat32m2_t vpi2 = __riscv_vfmv_v_f_f32m2(PI_2, gvl); + +for(size_t i = 0; i < length;) { +gvl = __riscv_vsetvl_e32m2(length - i); +vfloat32m2_t y = __riscv_vle32_v_f32m2(ys, gvl); +vfloat32m2_t x = __riscv_vle32_v_f32m2(xs, gvl); +vbool16_t mask0 = __riscv_vmflt_vv_f32m2_b16(x, y, gvl); +vfloat32m2_t fixpi = __riscv_vfrsub_vf_f32m2_mu(mask0, vpi2, vpi2, 0, gvl); + +__riscv_vse32_v_f32m2(result, fixpi, gvl); + +func(result); + +i += gvl; +ys += gvl; +xs += gvl; +result += gvl; +} +}
[gcc r15-2977] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2
https://gcc.gnu.org/g:6fbdbad97d451cc220a5654c8b97b9911485ef4a commit r15-2977-g6fbdbad97d451cc220a5654c8b97b9911485ef4a Author: Pan Li Date: Sat Aug 17 18:04:00 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 2 This patch would like to add test cases for the unsigned scalar .SAT_TRUNC form 2. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } DEF_SAT_U_TRUC_FMT_2 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-7.c: New test. * gcc.target/riscv/sat_u_trunc-8.c: New test. * gcc.target/riscv/sat_u_trunc-9.c: New test. * gcc.target/riscv/sat_u_trunc-run-7.c: New test. * gcc.target/riscv/sat_u_trunc-run-8.c: New test. * gcc.target/riscv/sat_u_trunc-run-9.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 12 gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c | 19 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c | 16 7 files changed, 116 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 37e0a60f21b..576a4926d1f 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -227,7 +227,19 @@ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ } #define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT) +#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ +NT __attribute__((noinline)) \ +sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ +{\ + WT max = (WT)(NT)-1; \ + return x > max ? (NT) max : (NT)x; \ +} +#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) + #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x) #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x) +#define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x) +#define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c new file mode 100644 index 000..95d513a15fb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint16_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c new file mode 100644 index 000..f168912293d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint16_t_fmt_2: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_2(uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c new file mode 100644 index 000..d82363d6aef --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno
[gcc r15-2978] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3
https://gcc.gnu.org/g:8d0efcf5581abf2560701f4143a0c2ccb261d1f7 commit r15-2978-g8d0efcf5581abf2560701f4143a0c2ccb261d1f7 Author: Pan Li Date: Sat Aug 17 19:27:11 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 3 This patch would like to add test cases for the unsigned scalar .SAT_TRUNC form 3. Aka: Form 3: #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x <= max ? (NT)x : (NT) max;\ } DEF_SAT_U_TRUC_FMT_3 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-13.c: New test. * gcc.target/riscv/sat_u_trunc-14.c: New test. * gcc.target/riscv/sat_u_trunc-15.c: New test. * gcc.target/riscv/sat_u_trunc-run-13.c: New test. * gcc.target/riscv/sat_u_trunc-run-14.c: New test. * gcc.target/riscv/sat_u_trunc-run-15.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 12 gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c | 19 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-13.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-14.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-15.c | 16 7 files changed, 116 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 576a4926d1f..cf055410fd1 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -236,10 +236,22 @@ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ } #define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) +#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ +NT __attribute__((noinline)) \ +sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ +{\ + WT max = (WT)(NT)-1; \ + return x <= max ? (NT)x : (NT) max;\ +} +#define DEF_SAT_U_TRUC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_3(NT, WT) + #define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x) #define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x) #define RUN_SAT_U_TRUC_FMT_2(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_2 (x) #define RUN_SAT_U_TRUC_FMT_2_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_2(NT, WT, x) +#define RUN_SAT_U_TRUC_FMT_3(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_3 (x) +#define RUN_SAT_U_TRUC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_3(NT, WT, x) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c new file mode 100644 index 000..58910793a80 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint16_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c new file mode 100644 index 000..236ea1d45f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_truc_uint32_t_to_uint16_t_fmt_3: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUC_FMT_3(uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c new file mode 100644 index 000..33c3686c053 --- /de
[gcc r15-2979] RISC-V: Make sure high bits of usadd operands is clean for non-Xmode [PR116278]
https://gcc.gnu.org/g:e8f31f4f58f0fcf1716fc1d9ee003fbcdda600c3 commit r15-2979-ge8f31f4f58f0fcf1716fc1d9ee003fbcdda600c3 Author: Pan Li Date: Fri Aug 9 10:26:32 2024 +0800 RISC-V: Make sure high bits of usadd operands is clean for non-Xmode [PR116278] For QI/HImode of .SAT_ADD, the operands may be sign-extended and the high bits of Xmode may be all 1 which is not expected. For example as below code. signed char b[1]; unsigned short c; signed char *d = b; int main() { b[0] = -40; c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9; __builtin_printf("%d\n", c); } After expanding we have: ;; _6 = .SAT_ADD (_3, 9); (insn 8 7 9 (set (reg:DI 143) (high:DI (symbol_ref:DI ("d") [flags 0x86] ))) (nil)) (insn 9 8 10 (set (reg/f:DI 142) (mem/f/c:DI (lo_sum:DI (reg:DI 143) (symbol_ref:DI ("d") [flags 0x86] )) [1 d+0 S8 A64])) (nil)) (insn 10 9 11 (set (reg:HI 144 [ _3 ]) (sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) "test.c":7:10 -1 (nil)) The convert from signed char to unsigned short will have sign_extend rtl as above. And finally become the lb insn as below: lb a1,0(a5) // a1 is -40, aka 0xffd8 lui a0,0x1a addia5,a1,9 sllia5,a5,0x30 srlia5,a5,0x30 // a5 is 65505 sltua1,a5,a1 // compare 65505 and 0xffd8 => TRUE The sltu try to compare 65505 and 0xffd8 here, but we actually want to compare 65505 and 65496 (0xffd8). Thus we need to clean up the high bits to ensure this. The below test suites are passed for this patch: * The rv64gcv fully regression test. PR target/116278 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Add new func impl to zero extend rtx. (riscv_expand_usadd): Leverage above func to cleanup operands 0 and remove the special handing for SImode in RV64. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_add-11.c: Adjust asm check body. * gcc.target/riscv/sat_u_add-15.c: Ditto. * gcc.target/riscv/sat_u_add-19.c: Ditto. * gcc.target/riscv/sat_u_add-23.c: Ditto. * gcc.target/riscv/sat_u_add-3.c: Ditto. * gcc.target/riscv/sat_u_add-7.c: Ditto. * gcc.target/riscv/sat_u_add_imm-11.c: Ditto. * gcc.target/riscv/sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/sat_u_add_imm-3.c: Ditto. * gcc.target/riscv/sat_u_add_imm-7.c: Ditto. * gcc.target/riscv/pr116278-run-1.c: New test. * gcc.target/riscv/pr116278-run-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 34 +++ gcc/testsuite/gcc.target/riscv/pr116278-run-1.c | 20 + gcc/testsuite/gcc.target/riscv/pr116278-run-2.c | 20 + gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-3.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add-7.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-3.c | 6 +++- gcc/testsuite/gcc.target/riscv/sat_u_add_imm-7.c | 6 +++- 13 files changed, 112 insertions(+), 22 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index c3877008d05..f266c45ed4d 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11828,12 +11828,29 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } +/* Generate a new rtx of Xmode based on the rtx and mode in define pattern. + The rtx x will be zero extended to Xmode if the mode is HI/QImode, and + the new zero extended Xmode rtx will be returned. + Or the gen_lowpart rtx of Xmode will be returned. */ + +static rtx +riscv_gen_zero_extend_rtx (rtx x, machine_mode mode) +{ + if (mode == Xmode) +return x; + + rtx xmode_reg = gen_reg_rtx (Xmode); + riscv_emit_unary (ZERO_EXTEND, xmode_reg, x); + + return xmode_reg; +} + /* Implements the unsigned saturation add standard name usadd for int mode. z = SAT_ADD(x, y). => 1. sum = x + y. - 2. sum = truncate (sum) for QI and HI only. + 2. sum = truncate (sum) for non-Xmode. 3. lt = sum < x. 4. lt = -lt. 5. z = sum | lt. */ @@ -11844,22 +11861,15 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) machine_mode mode = GET_MODE (dest); rtx xmode_sum = gen_re
[gcc r15-2980] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar
https://gcc.gnu.org/g:a183b255be8ec8f434c3c39f3f4e01d6bd5566f8 commit r15-2980-ga183b255be8ec8f434c3c39f3f4e01d6bd5566f8 Author: Pan Li Date: Tue Jul 23 11:18:48 2024 +0800 RISC-V: Implement the quad and oct .SAT_TRUNC for scalar This patch would like to implement the quad and oct .SAT_TRUNC pattern in the riscv backend. Aka: Form 1: #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t) Before this patch: 4 │ __attribute__((noinline)) 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x) 6 │ { 7 │ _Bool overflow; 8 │ short unsigned int _1; 9 │ short unsigned int _2; 10 │ short unsigned int _3; 11 │ uint16_t _6; 12 │ 13 │ ;; basic block 2, loop depth 0 14 │ ;;pred: ENTRY 15 │ overflow_5 = x_4(D) > 65535; 16 │ _1 = (short unsigned int) x_4(D); 17 │ _2 = (short unsigned int) overflow_5; 18 │ _3 = -_2; 19 │ _6 = _1 | _3; 20 │ return _6; 21 │ ;;succ: EXIT 22 │ 23 │ } After this patch: 3 │ 4 │ __attribute__((noinline)) 5 │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x) 6 │ { 7 │ uint16_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below tests suites are passed for this patch 1. The rv64gcv fully regression test. 2. The rv64gcv build with glibc gcc/ChangeLog: * config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for quad truncation. (ANYI_OCT_TRUNC): New iterator for oct truncation. (ANYI_QUAD_TRUNCATED): New attr for truncated quad modes. (ANYI_OCT_TRUNCATED): New attr for truncated oct modes. (anyi_quad_truncated): Ditto but for lower case. (anyi_oct_truncated): Ditto but for lower case. * config/riscv/riscv.md (ustrunc2): Add new pattern for quad truncation. (ustrunc2): Ditto but for oct. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust the expand dump check times. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto. * gcc.target/riscv/sat_arith_data.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-4.c: New test. * gcc.target/riscv/sat_u_trunc-5.c: New test. * gcc.target/riscv/sat_u_trunc-6.c: New test. * gcc.target/riscv/sat_u_trunc-run-4.c: New test. * gcc.target/riscv/sat_u_trunc-run-5.c: New test. * gcc.target/riscv/sat_u_trunc-run-6.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/iterators.md | 20 + gcc/config/riscv/riscv.md | 20 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c | 2 +- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 51 ++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c | 17 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c | 17 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c | 20 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c | 16 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c | 16 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c | 16 +++ 11 files changed, 195 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 0a669f560e3..2844cb02ff0 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -67,14 +67,34 @@ (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")]) +(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")]) + +(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")]) + (define_mode_attr ANYI_DOUBLE_TRUNCATED [ (HI "QI") (SI "HI") (DI "SI") ]) +(define_mode_attr ANYI_QUAD_TRUNCATED [ + (SI "QI") (DI "HI") +]) + +(define_mode_attr ANYI_OCT_TRUNCATED [ + (DI "QI") +]) + (define_mode_attr anyi_double_truncated [ (HI "qi") (SI "hi") (DI "si") ]) +(define_mode_attr anyi_quad_truncated [ + (SI "qi") (DI "hi") +]) + +(define_mode_attr anyi_oct_truncated [ + (DI "qi") +]) + ;; Iterator for hardware-supported floating-point modes. (define_mode_iterato
[gcc r15-3054] RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC]
https://gcc.gnu.org/g:1b72e07696a062e628c35e4bd25926c11ac18297 commit r15-3054-g1b72e07696a062e628c35e4bd25926c11ac18297 Author: Pan Li Date: Tue Aug 20 21:08:23 2024 +0800 RISC-V: Fix one typo in .SAT_TRUNC test func name [NFC] Fix one typo `sat_truc` to `sat_trunc`, as well as `SAT_TRUC` to `SAT_TRUNC`. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Fix SAT_TRUNC typo. * gcc.target/riscv/sat_u_trunc-1.c: Ditto. * gcc.target/riscv/sat_u_trunc-13.c: Ditto. * gcc.target/riscv/sat_u_trunc-14.c: Ditto. * gcc.target/riscv/sat_u_trunc-15.c: Ditto. * gcc.target/riscv/sat_u_trunc-2.c: Ditto. * gcc.target/riscv/sat_u_trunc-3.c: Ditto. * gcc.target/riscv/sat_u_trunc-4.c: Ditto. * gcc.target/riscv/sat_u_trunc-5.c: Ditto. * gcc.target/riscv/sat_u_trunc-6.c: Ditto. * gcc.target/riscv/sat_u_trunc-7.c: Ditto. * gcc.target/riscv/sat_u_trunc-8.c: Ditto. * gcc.target/riscv/sat_u_trunc-9.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-1.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-13.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-14.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-15.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-2.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-3.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-4.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-5.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-6.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-7.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-8.c: Ditto. * gcc.target/riscv/sat_u_trunc-run-9.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 30 +++--- gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-13.c| 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-14.c| 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-15.c| 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-7.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-8.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-9.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-13.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-14.c | 4 +-- .../gcc.target/riscv/sat_u_trunc-run-15.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-7.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-8.c | 4 +-- gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-9.c | 4 +-- 25 files changed, 63 insertions(+), 63 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index cf055410fd1f..91853b60f592 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -218,40 +218,40 @@ sat_u_sub_##T##_fmt_12 (T x, T y) \ /* Saturation Truncate (unsigned and signed) */ /**/ -#define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_1(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } -#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_1(NT, WT) -#define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ +#define DEF_SAT_U_TRUNC_FMT_2(NT, WT)\ NT __attribute__((noinline)) \ -sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ +sat_u_trunc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } -#define DEF_SAT_U_TRUC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_2(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_2_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_2(NT, WT) -#define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ +#define DEF_SAT
[gcc r15-3075] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 2
https://gcc.gnu.org/g:1e99e1ba79964f47f8850871d025209dfab73693 commit r15-3075-g1e99e1ba79964f47f8850871d025209dfab73693 Author: Pan Li Date: Wed Aug 21 17:43:12 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 2 This patch would like to add test cases for the unsigned vector .SAT_TRUNC form 2. Aka: Form 2: #define DEF_VEC_SAT_U_TRUNC_FMT_2(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT max = (WT)(NT)-1;\ out[i] = in[i] > max ? (NT)max : (NT)in[i]; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_2 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-10.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-11.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-12.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c| 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c| 21 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c| 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c | 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c | 21 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c | 23 ++ .../rvv/autovec/unop/vec_sat_u_trunc-run-10.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-11.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-12.c | 16 +++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-run-7.c | 16 +++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-run-8.c | 16 +++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-run-9.c | 16 +++ .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 18 + 13 files changed, 236 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c new file mode 100644 index ..f5084e503ebd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint16_t_uint32_t_fmt_2: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*mf2,\s*ta,\s*ma +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_2 (uint16_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c new file mode 100644 index ..e2ab880a1aca --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final {
[gcc r15-3076] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 3
https://gcc.gnu.org/g:91f213908c0443b0249490b03b8046509f6e7e9d commit r15-3076-g91f213908c0443b0249490b03b8046509f6e7e9d Author: Pan Li Date: Wed Aug 21 17:57:47 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 3 This patch would like to add test cases for the unsigned vector .SAT_TRUNC form 3. Aka: Form 3: #define DEF_VEC_SAT_U_TRUNC_FMT_3(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT max = (WT)(NT)-1;\ out[i] = in[i] <= max ? (NT)in[i] : (NT)max;\ } \ } DEF_VEC_SAT_U_TRUNC_FMT_3 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-13.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-14.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-15.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-17.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-18.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c| 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c| 21 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c| 23 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c| 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c| 21 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c| 19 ++ .../rvv/autovec/unop/vec_sat_u_trunc-run-13.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-14.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-15.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-16.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-17.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-18.c | 16 +++ .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 18 + 13 files changed, 236 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c new file mode 100644 index ..49bdbdc36062 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_3: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_3 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c new file mode 100644 index ..3ff696edcfee --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final
[gcc r15-3122] Match: Support form 4 for unsigned integer .SAT_TRUNC
https://gcc.gnu.org/g:07988874c340e575521473b01dc98c8a1b2886b5 commit r15-3122-g07988874c340e575521473b01dc98c8a1b2886b5 Author: Pan Li Date: Tue Aug 20 15:44:38 2024 +0800 Match: Support form 4 for unsigned integer .SAT_TRUNC This patch would like to support the form 4 of the unsigned integer .SAT_TRUNC. Aka below example: Form 4: #define DEF_SAT_U_TRUC_FMT_4(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_4 (WT x) \ { \ bool not_overflow = x <= (WT)(NT)(-1); \ return ((NT)x) | (NT)((NT)not_overflow - 1); \ } DEF_SAT_U_TRUC_FMT_4(uint32_t, uint64_t) Before this patch: 4 │ __attribute__((noinline)) 5 │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x) 6 │ { 7 │ _Bool not_overflow; 8 │ unsigned char _1; 9 │ unsigned char _2; 10 │ unsigned char _3; 11 │ uint8_t _6; 12 │ 13 │ ;; basic block 2, loop depth 0 14 │ ;;pred: ENTRY 15 │ not_overflow_5 = x_4(D) <= 255; 16 │ _1 = (unsigned char) x_4(D); 17 │ _2 = (unsigned char) not_overflow_5; 18 │ _3 = _2 + 255; 19 │ _6 = _1 | _3; 20 │ return _6; 21 │ ;;succ: EXIT 22 │ 23 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ uint8_t sat_u_truc_uint32_t_to_uint8_t_fmt_4 (uint32_t x) 6 │ { 7 │ uint8_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_TRUNC (x_4(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add form 4 for unsigned .SAT_TRUNC matching. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 18 ++ 1 file changed, 18 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 65a3aae22430..78f1957e8c78 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3325,6 +3325,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) +/* Unsigned saturation truncate, case 3, sizeof (WT) > sizeof (NT). + SAT_U_TRUNC = (NT)X | ((NT)(X <= (WT)-1) + (NT)-1). */ +(match (unsigned_integer_sat_trunc @0) + (bit_ior:c (plus:c (convert (le @0 INTEGER_CST@1)) INTEGER_CST@2) + (convert @0)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0))) + (with + { + unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned otype_precision = TYPE_PRECISION (type); + wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); + wide_int max = wi::mask (otype_precision, false, otype_precision); + wide_int int_cst_1 = wi::to_wide (@1); + wide_int int_cst_2 = wi::to_wide (@2); + } + (if (wi::eq_p (trunc_max, int_cst_1) && wi::eq_p (max, int_cst_2)) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne)
[gcc r15-3172] RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 4
https://gcc.gnu.org/g:5ab1e238aa23d1773429f8f28abfb6ed16f655f6 commit r15-3172-g5ab1e238aa23d1773429f8f28abfb6ed16f655f6 Author: Pan Li Date: Sun Aug 25 11:02:10 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_TRUNC form 4 This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 4. Aka: Form 4: #define DEF_SAT_U_TRUNC_FMT_4(NT, WT) \ NT __attribute__((noinline)) \ sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x) \ { \ bool not_overflow = x <= (WT)(NT)(-1); \ return ((NT)x) | (NT)((NT)not_overflow - 1); \ } The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_trunc-19.c: New test. * gcc.target/riscv/sat_u_trunc-20.c: New test. * gcc.target/riscv/sat_u_trunc-21.c: New test. * gcc.target/riscv/sat_u_trunc-22.c: New test. * gcc.target/riscv/sat_u_trunc-23.c: New test. * gcc.target/riscv/sat_u_trunc-24.c: New test. * gcc.target/riscv/sat_u_trunc-run-19.c: New test. * gcc.target/riscv/sat_u_trunc-run-20.c: New test. * gcc.target/riscv/sat_u_trunc-run-21.c: New test. * gcc.target/riscv/sat_u_trunc-run-22.c: New test. * gcc.target/riscv/sat_u_trunc-run-23.c: New test. * gcc.target/riscv/sat_u_trunc-run-24.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 12 gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-21.c | 19 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-22.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-23.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-24.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-19.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-20.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-21.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-22.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-23.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-24.c | 16 13 files changed, 218 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 91853b60f592..229e1f0a5cda 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -245,6 +245,15 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \ } #define DEF_SAT_U_TRUNC_FMT_3_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_3(NT, WT) +#define DEF_SAT_U_TRUNC_FMT_4(NT, WT) \ +NT __attribute__((noinline)) \ +sat_u_trunc_##WT##_to_##NT##_fmt_4 (WT x) \ +{ \ + bool not_overflow = x <= (WT)(NT)(-1); \ + return ((NT)x) | (NT)((NT)not_overflow - 1); \ +} +#define DEF_SAT_U_TRUNC_FMT_4_WRAP(NT, WT) DEF_SAT_U_TRUNC_FMT_4(NT, WT) + #define RUN_SAT_U_TRUNC_FMT_1(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_1 (x) #define RUN_SAT_U_TRUNC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_1(NT, WT, x) @@ -254,4 +263,7 @@ sat_u_trunc_##WT##_to_##NT##_fmt_3 (WT x) \ #define RUN_SAT_U_TRUNC_FMT_3(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_3 (x) #define RUN_SAT_U_TRUNC_FMT_3_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_3(NT, WT, x) +#define RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) sat_u_trunc_##WT##_to_##NT##_fmt_4 (x) +#define RUN_SAT_U_TRUNC_FMT_4_WRAP(NT, WT, x) RUN_SAT_U_TRUNC_FMT_4(NT, WT, x) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c new file mode 100644 index ..e61faffbbc6c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-19.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint16_t_to_uint8_t_fmt_4: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_4(uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-20.c new file mode 100644 index
[gcc r15-3173] RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 4
https://gcc.gnu.org/g:8f2f7aabcef8d801af002a26885a97ccf9889099 commit r15-3173-g8f2f7aabcef8d801af002a26885a97ccf9889099 Author: Pan Li Date: Sun Aug 25 14:15:40 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_TRUNC form 4 This patch would like to add test cases for the unsigned vector .SAT_TRUNC form 4. Aka: Form 4: #define DEF_VEC_SAT_U_TRUNC_FMT_4(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ bool not_overflow = in[i] <= (WT)(NT)(-1); \ out[i] = ((NT)in[i]) | (NT)((NT)not_overflow - 1); \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_4 (uint32_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-19.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-20.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-21.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-22.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-23.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-24.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c| 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c| 21 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c| 23 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c| 19 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c| 21 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c| 19 ++ .../rvv/autovec/unop/vec_sat_u_trunc-run-19.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-20.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-21.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-22.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-23.c | 16 +++ .../rvv/autovec/unop/vec_sat_u_trunc-run-24.c | 16 +++ .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 18 + 13 files changed, 236 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c new file mode 100644 index ..a80cefe46ab0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_trunc_uint8_t_uint16_t_fmt_4: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*mf2,\s*ta,\s*ma +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vnclipu\.wi\s+v[0-9]+,\s*v[0-9]+,\s*0 +** vse8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** ... +*/ +DEF_VEC_SAT_U_TRUNC_FMT_4 (uint8_t, uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c new file mode 100644 index ..9a4d261d052d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final
[gcc r15-3174] RISC-V: Support IMM for operand 0 of ussub pattern
https://gcc.gnu.org/g:17be00916e51835dcc47e30ed32fc892ee0c581d commit r15-3174-g17be00916e51835dcc47e30ed32fc892ee0c581d Author: Pan Li Date: Sat Aug 3 07:02:57 2024 + RISC-V: Support IMM for operand 0 of ussub pattern This patch would like to allow IMM for the operand 0 of ussub pattern. Aka .SAT_SUB(1023, y) as the below example. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return (T)IMM >= y ? (T)IMM - y : 0; \ } DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1023) Before this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ bgtua0,a5,.L3 13 │ sub a0,a5,a0 14 │ ret 15 │ .L3: 16 │ li a0,0 17 │ ret After this patch: 10 │ sat_u_sub_imm82_uint64_t_fmt_1: 11 │ li a5,82 12 │ sltua4,a5,a0 13 │ addia4,a4,-1 14 │ sub a0,a5,a0 15 │ and a0,a4,a0 16 │ ret The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_unsigned_xmode_reg): Add new func impl to gen xmode rtx reg from operand rtx. (riscv_expand_ussub): Gen xmode reg for operand 1. * config/riscv/riscv.md: Allow const_int for operand 1. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macro. * gcc.target/riscv/sat_u_sub_imm-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-1_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-4.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-1.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-2.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-3.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-4.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 46 +- gcc/config/riscv/riscv.md | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 10 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_1.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_2.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2.c | 21 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_1.c | 21 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c | 21 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4.c | 19 .../gcc.target/riscv/sat_u_sub_imm-run-1.c | 56 ++ .../gcc.target/riscv/sat_u_sub_imm-run-2.c | 56 ++ .../gcc.target/riscv/sat_u_sub_imm-run-3.c | 55 + .../gcc.target/riscv/sat_u_sub_imm-run-4.c | 48 +++ 17 files changed, 477 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 8538d405f505..90a6e936558d 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11907,6 +11907,50 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Generate a REG rtx of Xmode from the given rtx and mode. + The rtx x can be REG (QI/HI/SI/DI) or const_int. + The machine_mode mode is the original mode from define pattern. + + If rtx is REG, the gen_lowpart of Xmode will be returned. + + If rtx is const_int, a new REG rtx will be created to hold the value of + const_int and then returned. + + According to the gccint doc, the constants generated for modes with fewer + bits than in HOST_WIDE_INT must be sign extended to full width. Thus there + will be two cases here, take QImode as example. + + For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple + mov from const_int to the new REG rtx is good enough here. + + For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand. + Aka 0xfffe in Xmode of RV64 but we actually need 0xfe in Xmode +
[gcc r15-3188] Match: Add int type fits check for .SAT_ADD imm operand
https://gcc.gnu.org/g:3b78aa3e316a22b4ae477c91866d47f654f129b1 commit r15-3188-g3b78aa3e316a22b4ae477c91866d47f654f129b1 Author: Pan Li Date: Sat Aug 24 10:16:28 2024 +0800 Match: Add int type fits check for .SAT_ADD imm operand This patch would like to add strict check for imm operand of .SAT_ADD matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_ADD pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_ADD. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_ADD (a, 12); uint8_t sum = .SAT_ADD (a, 12u); uint8_t sum = .SAT_ADD (a, 126u); uint8_t sum = .SAT_ADD (a, 128u); uint8_t sum = .SAT_ADD (a, 228); uint8_t sum = .SAT_ADD (a, 223u); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_ADD (a, -1); uint8_t sum = .SAT_ADD (a, 256u); uint8_t sum = .SAT_ADD (a, 257); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_ADD imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm-11.c: Adjust test case for imm. * gcc.target/riscv/sat_u_add_imm-12.c: Ditto. * gcc.target/riscv/sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/sat_u_add_imm-16.c: Ditto. * gcc.target/riscv/sat_u_add_imm_type_check-1.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-10.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-11.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-12.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-13.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-14.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-15.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-16.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-17.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-18.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-19.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-2.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-20.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-21.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-22.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-23.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-24.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-25.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-26.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-27.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-28.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-29.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-3.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-30.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-31.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-32.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-33.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-34.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-35.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-36.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-37.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-38.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-39.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-4.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-40.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-41.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-42.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-43.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-44.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-45.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-46.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-47.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-48.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-49.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-5.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-50.c: New test. * gcc.target/riscv/sat_u_add_imm_
[gcc r15-3208] RISC-V: Support IMM for operand 1 of ussub pattern
https://gcc.gnu.org/g:a1062b0c07bb729cf6a1fff34929d22e5d5b633d commit r15-3208-ga1062b0c07bb729cf6a1fff34929d22e5d5b633d Author: Pan Li Date: Mon Aug 26 15:58:52 2024 +0800 RISC-V: Support IMM for operand 1 of ussub pattern This patch would like to allow IMM for the operand 1 of ussub pattern. Aka .SAT_SUB(x, 22) as the below example. Form 2: #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ { \ return x >= (T)IMM ? x - (T)IMM : 0; \ } DEF_SAT_U_SUB_IMM_FMT_2(uint64_t, 1022) It is almost the as support imm for operand 0 of ussub pattern, but allow the second operand to be imm insted of the first operand. The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_ussub): Gen xmode for the second operand, aka y in parameter. * config/riscv/riscv.md (ussub3): Allow const_int for operand 2. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_sub_imm-5.c: New test. * gcc.target/riscv/sat_u_sub_imm-5_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-5_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-6.c: New test. * gcc.target/riscv/sat_u_sub_imm-6_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-6_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-7.c: New test. * gcc.target/riscv/sat_u_sub_imm-7_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-7_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-8.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-5.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-6.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-7.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-8.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 2 +- gcc/config/riscv/riscv.md | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 9 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_1.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-5_2.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_1.c | 21 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-6_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c | 21 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-8.c | 18 +++ .../gcc.target/riscv/sat_u_sub_imm-run-5.c | 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-6.c | 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-7.c | 54 + .../gcc.target/riscv/sat_u_sub_imm-run-8.c | 48 +++ 17 files changed, 423 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 90a6e936558d..1f544c1287ec 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11965,7 +11965,7 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y) { machine_mode mode = GET_MODE (dest); rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode); - rtx xmode_y = gen_lowpart (Xmode, y); + rtx xmode_y = riscv_gen_unsigned_xmode_reg (y, mode); rtx xmode_lt = gen_reg_rtx (Xmode); rtx xmode_minus = gen_reg_rtx (Xmode); rtx xmode_dest = gen_reg_rtx (Xmode); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index a94705a8e7cc..3289ed2155ad 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4370,7 +4370,7 @@ (define_expand "ussub3" [(match_operand:ANYI 0 "register_operand") (match_operand:ANYI 1 "reg_or_int_operand") - (match_operand:ANYI 2 "register_operand")] + (match_operand:ANYI 2 "reg_or_int_operand")] "" { riscv_expand_ussub (operands[0], operands[1], operands[2]); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 4eca73586c87..c8ff8320d824 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -224,6 +224,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ return (T)IMM >= y ? (T)IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ +{ \ + return x >= (T)IMM ? x - (T)IMM : 0; \ +} + #d
[gcc r15-3238] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3
https://gcc.gnu.org/g:cb0b8b62223b485a058a56fc5c6345974ebaa230 commit r15-3238-gcb0b8b62223b485a058a56fc5c6345974ebaa230 Author: Pan Li Date: Tue Aug 27 14:37:01 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 3 This patch would like to add test cases for the unsigned scalar .SAT_SUB IMM form 3. Aka: Form 3: #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ { \ return (T)IMM > y ? (T)IMM - y : 0; \ } DEF_SAT_U_SUB_IMM_FMT_3(uint64_t, 23) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_sub_imm-10.c: New test. * gcc.target/riscv/sat_u_sub_imm-10_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-10_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-11.c: New test. * gcc.target/riscv/sat_u_sub_imm-11_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-11_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-12.c: New test. * gcc.target/riscv/sat_u_sub_imm-9.c: New test. * gcc.target/riscv/sat_u_sub_imm-9_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-9_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-10.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-11.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-12.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-9.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 9 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c | 21 .../gcc.target/riscv/sat_u_sub_imm-10_1.c | 22 + .../gcc.target/riscv/sat_u_sub_imm-10_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c | 20 .../gcc.target/riscv/sat_u_sub_imm-11_1.c | 22 + .../gcc.target/riscv/sat_u_sub_imm-11_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-12.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_1.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-9_2.c | 20 .../gcc.target/riscv/sat_u_sub_imm-run-10.c| 56 ++ .../gcc.target/riscv/sat_u_sub_imm-run-11.c| 55 + .../gcc.target/riscv/sat_u_sub_imm-run-12.c| 48 +++ .../gcc.target/riscv/sat_u_sub_imm-run-9.c | 56 ++ 15 files changed, 432 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index c8ff8320d824..b4339eb0dff9 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -231,6 +231,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ return x >= (T)IMM ? x - (T)IMM : 0; \ } +#define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ +{ \ + return (T)IMM > y ? (T)IMM - y : 0; \ +} + #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y) #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y) #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y) @@ -248,6 +255,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_2 (T x) \ if (sat_u_sub_imm##IMM##_##T##_fmt_1(y) != expect) __builtin_abort () #define RUN_SAT_U_SUB_IMM_FMT_2(T, x, IMM, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort () +#define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \ + if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort () /**/ /* Saturation Truncate (unsigned and signed) */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c new file mode 100644 index ..db450d7cfbf8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-10.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm6_uint16_t_fmt_3: +** li\s+[atx][0-9]+,\s*6 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_3(uint16_t, 6) + +/* { d
[gcc r15-3239] RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4
https://gcc.gnu.org/g:3989e31d867b3505f847ecb6d870eacacfdf47bf commit r15-3239-g3989e31d867b3505f847ecb6d870eacacfdf47bf Author: Pan Li Date: Tue Aug 27 15:14:40 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_SUB IMM form 4 This patch would like to add test cases for the unsigned scalar .SAT_SUB IMM form 4. Aka: Form 4: #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ { \ return x > (T)IMM ? x - (T)IMM : 0; \ } DEF_SAT_U_SUB_IMM_FMT_4(uint64_t, 23) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_sub_imm-13.c: New test. * gcc.target/riscv/sat_u_sub_imm-13_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-13_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-14.c: New test. * gcc.target/riscv/sat_u_sub_imm-14_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-14_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-15.c: New test. * gcc.target/riscv/sat_u_sub_imm-15_1.c: New test. * gcc.target/riscv/sat_u_sub_imm-15_2.c: New test. * gcc.target/riscv/sat_u_sub_imm-16.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-13.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-14.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-15.c: New test. * gcc.target/riscv/sat_u_sub_imm-run-16.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 9 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c | 19 .../gcc.target/riscv/sat_u_sub_imm-13_1.c | 19 .../gcc.target/riscv/sat_u_sub_imm-13_2.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-14.c | 20 .../gcc.target/riscv/sat_u_sub_imm-14_1.c | 21 + .../gcc.target/riscv/sat_u_sub_imm-14_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c | 19 .../gcc.target/riscv/sat_u_sub_imm-15_1.c | 21 + .../gcc.target/riscv/sat_u_sub_imm-15_2.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-16.c | 18 +++ .../gcc.target/riscv/sat_u_sub_imm-run-13.c| 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-14.c| 55 ++ .../gcc.target/riscv/sat_u_sub_imm-run-15.c| 54 + .../gcc.target/riscv/sat_u_sub_imm-run-16.c| 48 +++ 15 files changed, 421 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index b4339eb0dff9..a899979904b9 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -238,6 +238,13 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ return (T)IMM > y ? (T)IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ +{ \ + return x > (T)IMM ? x - (T)IMM : 0; \ +} + #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y) #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y) #define RUN_SAT_U_SUB_FMT_3(T, x, y) sat_u_sub_##T##_fmt_3(x, y) @@ -257,6 +264,8 @@ sat_u_sub_imm##IMM##_##T##_fmt_3 (T y) \ if (sat_u_sub_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort () #define RUN_SAT_U_SUB_IMM_FMT_3(T, IMM, y, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_3(y) != expect) __builtin_abort () +#define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \ + if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () /**/ /* Saturation Truncate (unsigned and signed) */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c new file mode 100644 index ..7dcbc3b1a126 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-13.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm11_uint8_t_fmt_4: +** addi\s+[atx][0-9]+,\s*a0,\s*-11 +** sltiu\s+a0,\s*[atx][0-9]+,\s*11 +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** andi\s+a0,\s*a0,\s*0xff +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_4(uint8_t, 11) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git
[gcc r15-3241] Match: Support form 1 for scalar signed integer .SAT_ADD
https://gcc.gnu.org/g:fe5f652bab420eb372645281f7fe3e5aa1534d01 commit r15-3241-gfe5f652bab420eb372645281f7fe3e5aa1534d01 Author: Pan Li Date: Mon Aug 26 10:11:38 2024 +0800 Match: Support form 1 for scalar signed integer .SAT_ADD This patch would like to support the form 1 of the scalar signed integer .SAT_ADD. Aka below example: Form 1: #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_1 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ return (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t sum; 8 │ long unsigned int x.0_1; 9 │ long unsigned int y.1_2; 10 │ long unsigned int _3; 11 │ long int _4; 12 │ long int _5; 13 │ int64_t _6; 14 │ _Bool _11; 15 │ long int _12; 16 │ long int _13; 17 │ long int _14; 18 │ long int _16; 19 │ long int _17; 20 │ 21 │ ;; basic block 2, loop depth 0 22 │ ;;pred: ENTRY 23 │ x.0_1 = (long unsigned int) x_7(D); 24 │ y.1_2 = (long unsigned int) y_8(D); 25 │ _3 = x.0_1 + y.1_2; 26 │ sum_9 = (int64_t) _3; 27 │ _4 = x_7(D) ^ y_8(D); 28 │ _5 = x_7(D) ^ sum_9; 29 │ _17 = ~_4; 30 │ _16 = _5 & _17; 31 │ if (_16 < 0) 32 │ goto ; [41.00%] 33 │ else 34 │ goto ; [59.00%] 35 │ ;;succ: 3 36 │ ;;4 37 │ 38 │ ;; basic block 3, loop depth 0 39 │ ;;pred: 2 40 │ _11 = x_7(D) < 0; 41 │ _12 = (long int) _11; 42 │ _13 = -_12; 43 │ _14 = _13 ^ 9223372036854775807; 44 │ ;;succ: 4 45 │ 46 │ ;; basic block 4, loop depth 0 47 │ ;;pred: 2 48 │ ;;3 49 │ # _6 = PHI 50 │ return _6; 51 │ ;;succ: EXIT 52 │ 53 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y) 6 │ { 7 │ int64_t _4; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] 12 │ return _4; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the matching for signed .SAT_ADD. * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new matching func decl. (match_unsigned_saturation_add): Try signed .SAT_ADD and rename to ... (match_saturation_add): ... here. (math_opts_dom_walker::after_dom_children): Update the above renamed func from caller. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 15 +++ gcc/tree-ssa-math-opts.cc | 35 ++- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 5ee60963c904..be211535a49f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3192,6 +3192,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0) && int_fits_type_p (@1, type +/* Signed saturation add, case 1: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ sum) & !(X ^ Y) < 0 ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (lt (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1 + (bit_not (bit_xor:c @0 @1))) + integer_zerop) + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) + @2) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) + && types_match (type, @0, @1 + /* Unsigned saturation sub, case 1 (branch w
[gcc r15-3243] Vect: Reconcile the const_int operand type of unsigned .SAT_ADD
https://gcc.gnu.org/g:6dccd5710380429c7addec9fe92a1a0bcb2f3367 commit r15-3243-g6dccd5710380429c7addec9fe92a1a0bcb2f3367 Author: Pan Li Date: Tue Aug 27 15:01:02 2024 +0800 Vect: Reconcile the const_int operand type of unsigned .SAT_ADD The .SAT_ADD has 2 operand, when one of the operand may be INTEGER_CST. For example _1 = .SAT_ADD (_2, 9) comes from below sample code. Form 3: #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ T ret; \ for (i = 0; i < limit; i++)\ {\ out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \ }\ } DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 9) It will fail to vectorize as the vectorizable_call will check the operands is type_compatiable but the imm will be (const_int 9) with the SImode, which is different from _2 (DImode). Aka: uint64_t _1; uint64_t _2; _1 = .SAT_ADD (_2, 9); This patch would like to reconcile the imm operand to the operand type mode of _2 by fold_convert to make the vectorizable_call happy. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_sat_add_pattern): Add fold convert for const_int to the type of operand 0. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-8.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-9.c: New test. Signed-off-by: Pan Li Diff: --- .../autovec/binop/vec_sat_u_add_imm_reconcile-1.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-10.c | 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-11.c | 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-12.c | 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-13.c | 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-14.c | 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-15.c | 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-2.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-3.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-4.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-5.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-6.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-7.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-8.c| 9 + .../autovec/binop/vec_sat_u_add_imm_reconcile-9.c| 9 + .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 20 gcc/tree-vect-patterns.cc| 3 +++ 17 files changed, 158 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm_reconcile-1.c new file mode 100644 index
[gcc r15-3244] Test: Move pr116278 run test to dg/torture [NFC]
https://gcc.gnu.org/g:3178786c88761e47b3cbe700a97a0de2b6e133cb commit r15-3244-g3178786c88761e47b3cbe700a97a0de2b6e133cb Author: Pan Li Date: Mon Aug 19 10:02:46 2024 +0800 Test: Move pr116278 run test to dg/torture [NFC] Move the run test of pr116278 to dg/torture and leave the risc-v the asm check under risc-v part. PR target/116278 gcc/testsuite/ChangeLog: * gcc.target/riscv/pr116278-run-1.c: Take compile instead of run. * gcc.target/riscv/pr116278-run-2.c: Ditto. * gcc.dg/torture/pr116278-run-1.c: New test. * gcc.dg/torture/pr116278-run-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.dg/torture/pr116278-run-1.c | 19 +++ gcc/testsuite/gcc.dg/torture/pr116278-run-2.c | 19 +++ gcc/testsuite/gcc.target/riscv/pr116278-run-1.c | 4 ++-- gcc/testsuite/gcc.target/riscv/pr116278-run-2.c | 4 ++-- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c new file mode 100644 index ..ada3ac984451 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-1.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32 } */ +/* { dg-options "-O2" } */ + +#include + +int8_t b[1]; +int8_t *d = b; +int32_t c; + +int main() { + b[0] = -40; + uint16_t t = (uint16_t)d[0]; + + c = (t < 0xFFF6 ? t : 0xFFF6) + 9; + + if (c != 65505) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c new file mode 100644 index ..5e72d15957ea --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116278-run-2.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32 } */ +/* { dg-options "-O2" } */ + +#include + +int16_t b[1]; +int16_t *d = b; +int64_t c; + +int main() { + b[0] = -40; + uint32_t t = (uint32_t)d[0]; + + c = (t < 0xFFF6u ? t : 0xFFF6u) + 9; + + if (c != 4294967265) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c index d3812bdcdfb7..67cf17ebc33e 100644 --- a/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-1.c @@ -1,7 +1,7 @@ -/* { dg-do run { target { riscv_v } } } */ +/* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand-details" } */ -#include +#include int8_t b[1]; int8_t *d = b; diff --git a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c index 669cd4f003f1..103602df2584 100644 --- a/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c +++ b/gcc/testsuite/gcc.target/riscv/pr116278-run-2.c @@ -1,7 +1,7 @@ -/* { dg-do run { target { riscv_v } } } */ +/* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand-details" } */ -#include +#include int16_t b[1]; int16_t *d = b;
[gcc r15-3348] RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM
https://gcc.gnu.org/g:72f3e9021e55f14e90773cf2966805a318f44842 commit r15-3348-g72f3e9021e55f14e90773cf2966805a318f44842 Author: Pan Li Date: Fri Aug 30 08:36:45 2024 +0800 RISC-V: Add testcases for form 3 of unsigned vector .SAT_ADD IMM This patch would like to add test cases for the unsigned vector .SAT_ADD when one of the operand is IMM. Form 3: #define DEF_VEC_SAT_U_ADD_IMM_FMT_3(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_3 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ T ret; \ for (i = 0; i < limit; i++)\ {\ out[i] = __builtin_add_overflow (in[i], IMM, &ret) ? -1 : ret; \ }\ } DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint64_t, 123) The below test are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-10.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-11.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-12.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-9.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c | 14 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-10.c | 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-11.c | 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-12.c | 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-9.c| 28 ++ 8 files changed, 168 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c new file mode 100644 index ..b6b605ac6158 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm15_uint16_t_fmt_3: +** ... +** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15 +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint16_t, 15) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c new file mode 100644 index ..6da86a1abe17 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm33u_uint32_t_fmt_3: +** ... +** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_3(uint32_t, 33u) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c new file mode 100644 index ..b6ff5a6d5d68 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm129ull_uint64_t_fmt_3: +** ... +** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/
[gcc r15-3349] RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM
https://gcc.gnu.org/g:56ed1dfa79c436b769f3266258d34d160b4330d9 commit r15-3349-g56ed1dfa79c436b769f3266258d34d160b4330d9 Author: Pan Li Date: Fri Aug 30 11:01:37 2024 +0800 RISC-V: Add testcases for form 4 of unsigned vector .SAT_ADD IMM This patch would like to add test cases for the unsigned vector .SAT_ADD when one of the operand is IMM. Form 4: #define DEF_VEC_SAT_U_ADD_IMM_FMT_4(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_4 (T *out, T *in, unsigned limit) \ { \ unsigned i; \ T ret; \ for (i = 0; i < limit; i++) \ { \ out[i] = __builtin_add_overflow (in[i], IMM, &ret) == 0 ? ret : -1; \ } \ } DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint64_t, 123) The below test are passed for this patch. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-13.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-14.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-15.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-16.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c | 14 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-13.c | 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-14.c | 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-15.c | 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-16.c | 28 ++ .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 20 9 files changed, 188 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c new file mode 100644 index ..a9439dff39f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm9u_uint8_t_fmt_4: +** ... +** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*9 +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint8_t, 9u) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c new file mode 100644 index ..dbe474975991 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../vec_sat_arith.h" + +/* +** vec_sat_u_add_imm15_uint16_t_fmt_4: +** ... +** vsaddu\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15 +** ... +*/ +DEF_VEC_SAT_U_ADD_IMM_FMT_4(uint16_t, 15) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c new file mode 100644 index ..0ac2e1b2942f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto"
[gcc r15-3347] RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64
https://gcc.gnu.org/g:e96d4bf6a6e8b8a5ea1b81a79f4efa07dee77af1 commit r15-3347-ge96d4bf6a6e8b8a5ea1b81a79f4efa07dee77af1 Author: Pan Li Date: Fri Aug 30 14:07:12 2024 +0800 RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64 In previous, we have some specially handling for both the .SAT_ADD and .SAT_SUB for unsigned int. There are similar to take care of SImode in RV64 for zero extend. Thus refactor these two helper function into one for possible code duplication. The below test suite are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge the zero_extend handing from func riscv_gen_unsigned_xmode_reg. (riscv_gen_unsigned_xmode_reg): Remove. (riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx instead of riscv_gen_unsigned_xmode_reg. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_sub-11.c: Adjust asm check. * gcc.target/riscv/sat_u_sub-15.c: Ditto. * gcc.target/riscv/sat_u_sub-19.c: Ditto. * gcc.target/riscv/sat_u_sub-23.c: Ditto. * gcc.target/riscv/sat_u_sub-27.c: Ditto. * gcc.target/riscv/sat_u_sub-3.c: Ditto. * gcc.target/riscv/sat_u_sub-31.c: Ditto. * gcc.target/riscv/sat_u_sub-35.c: Ditto. * gcc.target/riscv/sat_u_sub-39.c: Ditto. * gcc.target/riscv/sat_u_sub-43.c: Ditto. * gcc.target/riscv/sat_u_sub-47.c: Ditto. * gcc.target/riscv/sat_u_sub-7.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-11.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-15.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-3.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-7.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto. * gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 99 ++ gcc/testsuite/gcc.target/riscv/sat_u_sub-11.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-15.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-19.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-23.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-27.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-3.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-31.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-35.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-39.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-43.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-47.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub-7.c | 4 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-11.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-11_1.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-11_2.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-15.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-15_1.c | 2 + .../gcc.target/riscv/sat_u_sub_imm-15_2.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_1.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_2.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_1.c | 2 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-7_2.c | 2 + 25 files changed, 118 insertions(+), 53 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 496dd177fe7f..75b37b532443 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11894,19 +11894,56 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } -/* Generate a new rtx of Xmode based on the rtx and mode in define pattern. - The rtx x will be zero extended to Xmode if the mode is HI/QImode, and - the new zero extended Xmode rtx will be returned. - Or the gen_lowpart rtx of Xmode will be returned. */ +/* Generate a REG rtx of Xmode from the given rtx and mode. + The rtx x can be REG (QI/HI/SI/DI) or const_int. + The machine_mode mode is the original mode from define pattern. + + If rtx is REG and Xmode, the RTX x will be returned directly. + + If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be + returned. + + If rtx is const_int, a new REG rtx will be created to hold the value of + const_int and then returned. + + According to the gcci
[gcc r15-3351] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3
https://gcc.gnu.org/g:5239902210a16b22d59d2cf8b535d615922a5c00 commit r15-3351-g5239902210a16b22d59d2cf8b535d615922a5c00 Author: Pan Li Date: Sun Aug 18 14:08:21 2024 +0800 RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 3 This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 3. Aka: Form 3: #define DEF_SAT_U_TRUC_FMT_3(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_3 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x <= max ? (NT)x : (NT) max;\ } QUAD: DEF_SAT_U_TRUC_FMT_3 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_3 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-16.c: New test. * gcc.target/riscv/sat_u_trunc-17.c: New test. * gcc.target/riscv/sat_u_trunc-18.c: New test. * gcc.target/riscv/sat_u_trunc-run-16.c: New test. * gcc.target/riscv/sat_u_trunc-run-17.c: New test. * gcc.target/riscv/sat_u_trunc-run-18.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-17.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-18.c | 16 6 files changed, 102 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c new file mode 100644 index ..f91da58c0bae --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-16.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint32_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c new file mode 100644 index ..9813e1f79b05 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-17.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint8_t_fmt_3: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_3(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c new file mode 100644 index ..eb799849f73a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-18.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint16_t_fmt_3: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUNC_FMT_3(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c new file mode 100644 index ..20ceda6852e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-16.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUNC_FMT_3_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2
[gcc r15-3350] RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2
https://gcc.gnu.org/g:ea81e21d5398bdacf883533fd738fc45ea8d6dd9 commit r15-3350-gea81e21d5398bdacf883533fd738fc45ea8d6dd9 Author: Pan Li Date: Sun Aug 18 12:49:47 2024 +0800 RISC-V: Add testcases for unsigned scalar quad and oct .SAT_TRUNC form 2 This patch would like to add test cases for the unsigned scalar quad and oct .SAT_TRUNC form 2. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ WT max = (WT)(NT)-1; \ return x > max ? (NT) max : (NT)x; \ } QUAD: DEF_SAT_U_TRUC_FMT_2 (uint16_t, uint64_t) DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint32_t) OCT: DEF_SAT_U_TRUC_FMT_2 (uint8_t, uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_trunc-10.c: New test. * gcc.target/riscv/sat_u_trunc-11.c: New test. * gcc.target/riscv/sat_u_trunc-12.c: New test. * gcc.target/riscv/sat_u_trunc-run-10.c: New test. * gcc.target/riscv/sat_u_trunc-run-11.c: New test. * gcc.target/riscv/sat_u_trunc-run-12.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c | 17 + gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-11.c | 16 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-12.c | 16 6 files changed, 102 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c new file mode 100644 index ..5ea8e613901c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-10.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint32_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c new file mode 100644 index ..3b45e2af9ce3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-11.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint8_t_fmt_2: +** sltiu\s+[atx][0-9]+,\s*a0,\s*255 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** ret +*/ +DEF_SAT_U_TRUNC_FMT_2(uint8_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c new file mode 100644 index ..7ea2c93a301f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_trunc_uint64_t_to_uint16_t_fmt_2: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_TRUNC_FMT_2(uint16_t, uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c new file mode 100644 index ..2281610f3353 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-10.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define T1 uint8_t +#define T2 uint32_t + +DEF_SAT_U_TRUNC_FMT_2_WRAP(T1, T2) + +#define DATA TEST_UNARY_DATA_WRAP(T1, T2
[gcc r15-3390] RISC-V: Support form 1 of integer scalar .SAT_ADD
https://gcc.gnu.org/g:539fcaae67c6cf54bd377eba6c9d5b1792a3 commit r15-3390-g539fcaae67c6cf54bd377eba6c9d5b1792a3 Author: Pan Li Date: Thu Aug 29 11:25:44 2024 +0800 RISC-V: Support form 1 of integer scalar .SAT_ADD This patch would like to support the scalar signed ssadd pattern for the RISC-V backend. Aka Form 1: #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_1 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ return (x ^ y) < 0 \ ? sum\ : (sum ^ x) >= 0 \ ? sum \ : x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) Before this patch: 10 │ sat_s_add_int64_t_fmt_1: 11 │ mv a5,a0 12 │ add a0,a0,a1 13 │ xor a1,a5,a1 14 │ not a1,a1 15 │ xor a4,a5,a0 16 │ and a1,a1,a4 17 │ blt a1,zero,.L5 18 │ ret 19 │ .L5: 20 │ srai a5,a5,63 21 │ li a0,-1 22 │ srli a0,a0,1 23 │ xor a0,a5,a0 24 │ ret After this patch: 10 │ sat_s_add_int64_t_fmt_1: 11 │ add a2,a0,a1 12 │ xor a1,a0,a1 13 │ xor a5,a0,a2 14 │ srli a5,a5,63 15 │ srli a1,a1,63 16 │ xori a1,a1,1 17 │ and a5,a5,a1 18 │ srai a4,a0,63 19 │ li a3,-1 20 │ srli a3,a3,1 21 │ xor a3,a3,a4 22 │ neg a4,a5 23 │ and a3,a3,a4 24 │ addi a5,a5,-1 25 │ and a0,a2,a5 26 │ or a0,a0,a3 27 │ ret The below test suites are passed for this patch: 1. The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func decl for expanding ssadd. * config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func impl to gen the max int rtx. (riscv_expand_ssadd): Add new func impl to expand the ssadd. * config/riscv/riscv.md (ssadd3): Add new pattern for signed integer .SAT_ADD. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_arith_data.h: Add test data. * gcc.target/riscv/sat_s_add-1.c: New test. * gcc.target/riscv/sat_s_add-2.c: New test. * gcc.target/riscv/sat_s_add-3.c: New test. * gcc.target/riscv/sat_s_add-4.c: New test. * gcc.target/riscv/sat_s_add-run-1.c: New test. * gcc.target/riscv/sat_s_add-run-2.c: New test. * gcc.target/riscv/sat_s_add-run-3.c: New test. * gcc.target/riscv/sat_s_add-run-4.c: New test. * gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-protos.h| 1 + gcc/config/riscv/riscv.cc | 90 ++ gcc/config/riscv/riscv.md | 11 +++ gcc/testsuite/gcc.target/riscv/sat_arith.h | 17 gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 85 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c | 30 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c | 32 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c | 31 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c | 30 gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c | 16 gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c | 16 gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c | 16 gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c | 16 .../gcc.target/riscv/scalar_sat_binary_run_xxx.h | 26 +++ 14 files changed, 417 insertions(+) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 926899ccad64..3358e3887b95 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -134,6 +134,7 @@ extern bool riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int); extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx); extern void riscv_expand_usadd (rtx, rtx, rtx); +extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d03e51f3a687..98720611e246 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -12001,6 +12001,96 @@ riscv_expand_us
[gcc r15-3438] RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD
https://gcc.gnu.org/g:9ea9d05908432fc5f3632f3e397e3709f95ef636 commit r15-3438-g9ea9d05908432fc5f3632f3e397e3709f95ef636 Author: Pan Li Date: Mon Sep 2 15:54:43 2024 +0800 RISC-V: Allow IMM operand for unsigned scalar .SAT_ADD This patch would like to allow the IMM operand of the unsigned scalar .SAT_ADD. Like the operand 0, the operand 1 of .SAT_ADD will be zero extended to Xmode before underlying code generation. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_expand_usadd): Zero extend the second operand of usadd as the first operand does. * config/riscv/riscv.md (usadd3): Allow imm operand for scalar usadd pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_add-11.c: Make asm check robust. * gcc.target/riscv/sat_u_add-15.c: Ditto. * gcc.target/riscv/sat_u_add-19.c: Ditto. * gcc.target/riscv/sat_u_add-23.c: Ditto. * gcc.target/riscv/sat_u_add-3.c: Ditto. * gcc.target/riscv/sat_u_add-7.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 2 +- gcc/config/riscv/riscv.md | 4 ++-- gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-3.c | 2 +- gcc/testsuite/gcc.target/riscv/sat_u_add-7.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 98720611e246..f82e64a6fec8 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11970,7 +11970,7 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y) rtx xmode_sum = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode); - rtx xmode_y = gen_lowpart (Xmode, y); + rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode); rtx xmode_dest = gen_reg_rtx (Xmode); /* Step-1: sum = x + y */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 6f7efafb8abe..9f94b5aa0232 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4360,8 +4360,8 @@ (define_expand "usadd3" [(match_operand:ANYI 0 "register_operand") - (match_operand:ANYI 1 "register_operand") - (match_operand:ANYI 2 "register_operand")] + (match_operand:ANYI 1 "reg_or_int_operand") + (match_operand:ANYI 2 "reg_or_int_operand")] "" { riscv_expand_usadd (operands[0], operands[1], operands[2]); diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c index e248aeafa8ef..bd830ececad4 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_3: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c index bb8b991a84ee..de615a6225e9 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-15.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_4: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c index 7e4ae12f2f51..2b793e2f8fdb 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-19.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_5: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*a[01],\s*a[01] ** slli\s+[atx][0-9]+,\s*[atx][0-9],\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 ** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c index 49bbb74a401e..5de086e11384 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-23.c @@ -8,7 +8,7 @@ ** sat_u_add_uint32_t_fmt_6: ** slli\s+[atx][0-9]+,\s*a0,\s*32 ** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 -** add\s+[atx][0-9]+,\s*a0,\s*a1 +** add\s+[atx][0-9]+,\s*
[gcc r15-3502] Match: Add int type fits check for form 1 of .SAT_SUB imm operand
https://gcc.gnu.org/g:019335b404c8d7fb2d234bb179745cc28693dd20 commit r15-3502-g019335b404c8d7fb2d234bb179745cc28693dd20 Author: Pan Li Date: Mon Sep 2 09:48:46 2024 +0800 Match: Add int type fits check for form 1 of .SAT_SUB imm operand This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (12, a); uint8_t sum = .SAT_SUB (12u, a); uint8_t sum = .SAT_SUB (126u, a); uint8_t sum = .SAT_SUB (128u, a); uint8_t sum = .SAT_SUB (228, a); uint8_t sum = .SAT_SUB (223u, a); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (-1, a); uint8_t sum = .SAT_SUB (256u, a); uint8_t sum = .SAT_SUB (257, a); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-53.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-54.c | 27 ++ .../gcc.target/riscv/sat_u_add_imm_type_check-55.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-56.c | 27 ++ 6 files changed, 105 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 621306213e4..4b86d20e165 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3269,7 +3269,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_sub @0 @1) (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @1)) + && types_match (type, @1) && int_fits_type_p (@0, type)) (with { unsigned precision = TYPE_PRECISION (type); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 86cd6bea8df..187c94795f7 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -284,6 +284,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\ +{ \ + return IMM >= y ? IMM - y : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ +{ \ + return IMM > y ? IMM - y : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c new file mode 100644 index 000..c959eeb0d86 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not
[gcc r15-3503] Match: Add int type fits check for form 2 of .SAT_SUB imm operand
https://gcc.gnu.org/g:a2e28b105cea4c44c3903d8d979c7a4afa1193f0 commit r15-3503-ga2e28b105cea4c44c3903d8d979c7a4afa1193f0 Author: Pan Li Date: Mon Sep 2 11:33:08 2024 +0800 Match: Add int type fits check for form 2 of .SAT_SUB imm operand This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, 12); uint8_t sum = .SAT_SUB (a, 12u); uint8_t sum = .SAT_SUB (a, 126u); uint8_t sum = .SAT_SUB (a, 128u); uint8_t sum = .SAT_SUB (a, 228); uint8_t sum = .SAT_SUB (a, 223u); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, -1); uint8_t sum = .SAT_SUB (a, 256u); uint8_t sum = .SAT_SUB (a, 257); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-57.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-58.c | 27 ++ .../gcc.target/riscv/sat_u_add_imm_type_check-59.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-60.c | 27 ++ 6 files changed, 105 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 4b86d20e165..bc6a83b47fb 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3288,7 +3288,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_sub @0 @1) (plus (max @0 INTEGER_CST@1) INTEGER_CST@2) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @1)) + && types_match (type, @1) && int_fits_type_p (@1, type)) (with { unsigned precision = TYPE_PRECISION (type); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 187c94795f7..a8672f66322 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -298,6 +298,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ return IMM > y ? IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\ +{ \ + return x >= IMM ? x - IMM : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\ +{ \ + return x > IMM ? x - IMM : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c new file mode 100644 index 000..1b193bcfb26 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */ diff --git a/gcc/testsuite/gcc.target/ris
[gcc r15-3569] Match: Support form 2 for scalar signed integer .SAT_ADD
https://gcc.gnu.org/g:a7eaf7d5edb194bae0d7d9bc3d20bb5730be57d8 commit r15-3569-ga7eaf7d5edb194bae0d7d9bc3d20bb5730be57d8 Author: Pan Li Date: Tue Sep 3 15:39:16 2024 +0800 Match: Support form 2 for scalar signed integer .SAT_ADD This patch would like to support the form 2 of the scalar signed integer .SAT_ADD. Aka below example: Form 2: #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ T __attribute__((noinline)) \ sat_s_add_##T##_fmt_2 (T x, T y) \ {\ T sum = (UT)x + (UT)y; \ \ if ((x ^ y) < 0 || (sum ^ x) >= 0) \ return sum; \ \ return x < 0 ? MIN : MAX; \ } DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) We can tell the difference before and after this patch if backend implemented the ssadd3 pattern similar as below. Before this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y) 6 │ { 7 │ int8_t sum; 8 │ unsigned char x.0_1; 9 │ unsigned char y.1_2; 10 │ unsigned char _3; 11 │ signed char _4; 12 │ signed char _5; 13 │ int8_t _6; 14 │ _Bool _11; 15 │ signed char _12; 16 │ signed char _13; 17 │ signed char _14; 18 │ signed char _22; 19 │ signed char _23; 20 │ 21 │ ;; basic block 2, loop depth 0 22 │ ;;pred: ENTRY 23 │ x.0_1 = (unsigned char) x_7(D); 24 │ y.1_2 = (unsigned char) y_8(D); 25 │ _3 = x.0_1 + y.1_2; 26 │ sum_9 = (int8_t) _3; 27 │ _4 = x_7(D) ^ y_8(D); 28 │ _5 = x_7(D) ^ sum_9; 29 │ _23 = ~_4; 30 │ _22 = _5 & _23; 31 │ if (_22 >= 0) 32 │ goto ; [42.57%] 33 │ else 34 │ goto ; [57.43%] 35 │ ;;succ: 4 36 │ ;;3 37 │ 38 │ ;; basic block 3, loop depth 0 39 │ ;;pred: 2 40 │ _11 = x_7(D) < 0; 41 │ _12 = (signed char) _11; 42 │ _13 = -_12; 43 │ _14 = _13 ^ 127; 44 │ ;;succ: 4 45 │ 46 │ ;; basic block 4, loop depth 0 47 │ ;;pred: 2 48 │ ;;3 49 │ # _6 = PHI 50 │ return _6; 51 │ ;;succ: EXIT 52 │ 53 │ } After this patch: 4 │ __attribute__((noinline)) 5 │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y) 6 │ { 7 │ int8_t _6; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _6 = .SAT_ADD (x_7(D), y_8(D)); [tail call] 12 │ return _6; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add the form 2 of signed .SAT_ADD matching. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index bc6a83b47fb0..a664be5fe238 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3207,6 +3207,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Signed saturation add, case 2: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ sum) & !(X ^ Y) >= 0 ? sum : (-(T)(X < 0) ^ MAX); + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (ge (bit_and:c (bit_xor @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1 + (bit_not (bit_xor:c @0 @1))) + integer_zerop) + @2 + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)) + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1)
[gcc r15-3578] RISC-V: Fix asm check for Vector SAT_* due to middle-end change
https://gcc.gnu.org/g:6bd3ee7f2f2f4beed5b9d9a530736ad69d2cac42 commit r15-3578-g6bd3ee7f2f2f4beed5b9d9a530736ad69d2cac42 Author: Pan Li Date: Wed Sep 11 07:00:13 2024 +0800 RISC-V: Fix asm check for Vector SAT_* due to middle-end change The middle-end change makes the effect on the layout of the assembly for vector SAT_*. This patch would like to fix it and make it robust. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust asm check and make it robust. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto. * gcc.target/riscv/rvv/autovec/bino
[gcc r15-3580] Vect: Support form 1 of vector signed integer .SAT_ADD
https://gcc.gnu.org/g:9b14a5823b685e3a604dc17b02c033f60ad90414 commit r15-3580-g9b14a5823b685e3a604dc17b02c033f60ad90414 Author: Pan Li Date: Wed Sep 11 09:54:38 2024 +0800 Vect: Support form 1 of vector signed integer .SAT_ADD This patch would like to support the vector signed ssadd pattern for the RISC-V backend. Aka Form 1: #define DEF_VEC_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_1 (T *out, T *x, T *y, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ { \ T sum = (UT)x[i] + (UT)y[i]; \ out[i] = (x[i] ^ y[i]) < 0 \ ? sum \ : (sum ^ x[i]) >= 0\ ? sum\ : x[i] < 0 ? MIN : MAX; \ } \ } DEF_VEC_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) If the backend implemented the vector mode of ssadd, we will see IR diff similar as below: Before this patch: 108 │ _114 = .SELECT_VL (ivtmp_112, POLY_INT_CST [2, 2]); 109 │ ivtmp_77 = _114 * 8; 110 │ vect__4.9_80 = .MASK_LEN_LOAD (vectp_x.7_78, 64B, { -1, ... }, _114, 0); 111 │ vect__5.10_81 = VIEW_CONVERT_EXPR(vect__4.9_80); 112 │ vect__7.13_85 = .MASK_LEN_LOAD (vectp_y.11_83, 64B, { -1, ... }, _114, 0); 113 │ vect__8.14_86 = VIEW_CONVERT_EXPR(vect__7.13_85); 114 │ vect__9.15_87 = vect__5.10_81 + vect__8.14_86; 115 │ vect_sum_20.16_88 = VIEW_CONVERT_EXPR(vect__9.15_87); 116 │ vect__10.17_89 = vect__4.9_80 ^ vect__7.13_85; 117 │ vect__11.18_90 = vect__4.9_80 ^ vect_sum_20.16_88; 118 │ mask__46.19_92 = vect__10.17_89 >= { 0, ... }; 119 │ _36 = vect__4.9_80 >> 63; 120 │ mask__44.26_104 = vect__11.18_90 < { 0, ... }; 121 │ mask__43.27_105 = mask__46.19_92 & mask__44.26_104; 122 │ _115 = .COND_XOR (mask__43.27_105, _36, { 9223372036854775807, ... }, vect_sum_20.16_88); 123 │ .MASK_LEN_STORE (vectp_out.29_108, 64B, { -1, ... }, _114, 0, _115); 124 │ vectp_x.7_79 = vectp_x.7_78 + ivtmp_77; 125 │ vectp_y.11_84 = vectp_y.11_83 + ivtmp_77; 126 │ vectp_out.29_109 = vectp_out.29_108 + ivtmp_77; 127 │ ivtmp_113 = ivtmp_112 - _114; After this patch: 94 │ # vectp_x.7_82 = PHI 95 │ # vectp_y.10_86 = PHI 96 │ # vectp_out.14_91 = PHI 97 │ # ivtmp_95 = PHI 98 │ _97 = .SELECT_VL (ivtmp_95, POLY_INT_CST [2, 2]); 99 │ ivtmp_81 = _97 * 8; 100 │ vect__4.9_84 = .MASK_LEN_LOAD (vectp_x.7_82, 64B, { -1, ... }, _97, 0); 101 │ vect__7.12_88 = .MASK_LEN_LOAD (vectp_y.10_86, 64B, { -1, ... }, _97, 0); 102 │ vect_patt_40.13_89 = .SAT_ADD (vect__4.9_84, vect__7.12_88); 103 │ .MASK_LEN_STORE (vectp_out.14_91, 64B, { -1, ... }, _97, 0, vect_patt_40.13_89); 104 │ vectp_x.7_83 = vectp_x.7_82 + ivtmp_81; 105 │ vectp_y.10_87 = vectp_y.10_86 + ivtmp_81; 106 │ vectp_out.14_92 = vectp_out.14_91 + ivtmp_81; 107 │ ivtmp_96 = ivtmp_95 - _97; The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Add case 2 for the signed .SAT_ADD consumed by vect pattern. * tree-vect-patterns.cc (gimple_signed_integer_sat_add): Add new matching func decl for signed .SAT_ADD. (vect_recog_sat_add_pattern): Add signed .SAT_ADD pattern match. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 16 gcc/tree-vect-patterns.cc | 5 - 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index a664be5fe238..4cef965c9c7a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3221,6 +3221,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type +/* Signed saturation add, case 3: + T sum = (T)((UT)X + (UT)Y) + SAT_S_ADD = (X ^ Y) < 0 && (X ^ sum) >= 0 ? (-(T)(X < 0) ^ MAX) : sum; + + The T and UT are type pair like T=int8_t, UT=uint8_t. */ +(match (signed_integer_sat_add @0 @1) + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0) + (nop_convert @1 +
[gcc r15-3593] RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass
https://gcc.gnu.org/g:c08e493ceee47bbeb466eeef100be7c1dd01a4e5 commit r15-3593-gc08e493ceee47bbeb466eeef100be7c1dd01a4e5 Author: garthlei Date: Wed Sep 11 17:09:37 2024 +0800 RISC-V: Fix vl_used_by_non_rvv_insn logic of vsetvl pass This patch fixes a bug in the current vsetvl pass. The current pass uses `m_vl` to determine whether the dest operand has been used by non-RVV instructions. However, `m_vl` may have been modified as a result of an `update_avl` call, and thus would be no longer the dest operand of the original instruction. This can lead to incorrect vsetvl eliminations, as is shown in the testcase. In this patch, we create a `dest_vl` variable for this scenerio. gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc: Use `dest_vl` for dest VL operand gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc| 16 +++- .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c | 17 + 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 017efa8bc17e..ce831685439a 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1002,6 +1002,9 @@ public: void parse_insn (insn_info *insn) { +/* The VL dest of the insn */ +rtx dest_vl = NULL_RTX; + m_insn = insn; m_bb = insn->bb (); /* Return if it is debug insn for the consistency with optimize == 0. */ @@ -1035,7 +1038,10 @@ public: if (m_avl) { if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ()) - m_vl = ::get_vl (insn->rtl ()); + { + m_vl = ::get_vl (insn->rtl ()); + dest_vl = m_vl; + } if (has_nonvlmax_reg_avl ()) m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def (); @@ -1132,22 +1138,22 @@ public: } /* Determine if dest operand(vl) has been used by non-RVV instructions. */ -if (has_vl ()) +if (dest_vl) { const hash_set vl_uses - = get_all_real_uses (get_insn (), REGNO (get_vl ())); + = get_all_real_uses (get_insn (), REGNO (dest_vl)); for (use_info *use : vl_uses) { gcc_assert (use->insn ()->is_real ()); rtx_insn *rinsn = use->insn ()->rtl (); if (!has_vl_op (rinsn) - || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1) + || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1) { m_vl_used_by_non_rvv_insn = true; break; } rtx avl = ::get_avl (rinsn); - if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl)) + if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl)) { m_vl_used_by_non_rvv_insn = true; break; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c new file mode 100644 index ..c155f5613d27 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O2 -fdump-rtl-vsetvl-details" } */ + +#include + +uint64_t a[2], b[2]; + +void +foo () +{ + size_t vl = __riscv_vsetvl_e64m1 (2); + vuint64m1_t vx = __riscv_vle64_v_u64m1 (a, vl); + vx = __riscv_vslide1down_vx_u64m1 (vx, 0xull, vl); + __riscv_vse64_v_u64m1 (b, vx, vl); +} + +/* { dg-final { scan-rtl-dump-not "Eliminate insn" "vsetvl" } } */
[gcc r15-3595] RISC-V: Eliminate latter vsetvl when fused
https://gcc.gnu.org/g:3f212eabbba3edc1827d6da53cf6d5a64c6524f0 commit r15-3595-g3f212eabbba3edc1827d6da53cf6d5a64c6524f0 Author: Bohan Lei Date: Thu Sep 12 10:28:03 2024 +0800 RISC-V: Eliminate latter vsetvl when fused Hi all, A simple assembly check has been added in this version. Previous version: https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662783.html Thanks, Bohan -- The current vsetvl pass eliminates a vsetvl instruction when the previous info is "available," but does not when "compatible." This can lead to not only redundancy, but also incorrect behaviors when the previous info happens to be compatible with a later vector instruction, which ends of using the vsetvl info that should have been eliminated, as is shown in the testcase. This patch eliminates the vsetvl when the previous info is "compatible." gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::fuse_local_vsetvl_info): Delete vsetvl insn when `prev_info` is compatible gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 3 +++ .../gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c| 19 +++ 2 files changed, 22 insertions(+) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index ce831685439a..030ffbe2ebbc 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -2796,6 +2796,9 @@ pre_vsetvl::fuse_local_vsetvl_info () curr_info.dump (dump_file, ""); } m_dem.merge (prev_info, curr_info); + if (!curr_info.vl_used_by_non_rvv_insn_p () + && vsetvl_insn_p (curr_info.get_insn ()->rtl ())) + m_delete_list.safe_push (curr_info); if (curr_info.get_read_vl_insn ()) prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); if (dump_file && (dump_flags & TDF_DETAILS)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c new file mode 100644 index ..04a8ff2945a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O2 -fno-schedule-insns -fdump-rtl-vsetvl-details" } */ + +#include + +vuint16m1_t +foo (vuint16m1_t a, vuint16m1_t b, size_t avl) +{ + size_t vl; + vuint16m1_t ret; + uint16_t c = __riscv_vmv_x_s_u16m1_u16(a); + vl = __riscv_vsetvl_e8mf2 (avl); + ret = __riscv_vadd_vx_u16m1 (a, c, avl); + ret = __riscv_vadd_vv_u16m1 (ret, a, vl); + return ret; +} + +/* { dg-final { scan-rtl-dump "Eliminate insn" "vsetvl" } } */ +/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
[gcc r15-3620] Match: Remove unnecessary types_match for case 1 of signed SAT_ADD
https://gcc.gnu.org/g:45e7cc9caf327bfddd75b3093eb855b8b64acae8 commit r15-3620-g45e7cc9caf327bfddd75b3093eb855b8b64acae8 Author: Pan Li Date: Fri Sep 13 11:36:40 2024 +0800 Match: Remove unnecessary types_match for case 1 of signed SAT_ADD Given all commutative binary operators requires types matching for both operands. Remove the types_match check for case 1 of the signed SAT_ADD, because we have (bit_xor @0 @1), which ensure the operands have the correct TREE type. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Remove the types_match check for signed SAT_ADD case 1. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 4cef965c9c7a..5566c0e4c41c 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3204,8 +3204,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) integer_zerop) (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value) @2) - (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) - && types_match (type, @0, @1 + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type /* Signed saturation add, case 2: T sum = (T)((UT)X + (UT)Y)
[gcc r15-1671] Vect: Support truncate after .SAT_SUB pattern in zip
https://gcc.gnu.org/g:f2476a2649e9975d454d179145574c21d8218aee commit r15-1671-gf2476a2649e9975d454d179145574c21d8218aee Author: Pan Li Date: Thu Jun 27 09:28:04 2024 +0800 Vect: Support truncate after .SAT_SUB pattern in zip The zip benchmark of coremark-pro have one SAT_SUB like pattern but truncated as below: void test (uint16_t *x, unsigned b, unsigned n) { unsigned a = 0; register uint16_t *p = x; do { a = *--p; *p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB } while (--n); } It will have gimple before vect pass, it cannot hit any pattern of SAT_SUB and then cannot vectorize to SAT_SUB. _2 = a_11 - b_12(D); iftmp.0_13 = (short unsigned int) _2; _18 = a_11 >= b_12(D); iftmp.0_5 = _18 ? iftmp.0_13 : 0; This patch would like to improve the pattern match to recog above as truncate after .SAT_SUB pattern. Then we will have the pattern similar to below, as well as eliminate the first 3 dead stmt. _2 = a_11 - b_12(D); iftmp.0_13 = (short unsigned int) _2; _18 = a_11 >= b_12(D); iftmp.0_5 = (short unsigned int).SAT_SUB (a_11, b_12(D)); The below tests are passed for this patch. 1. The rv64gcv fully regression tests. 2. The rv64gcv build with glibc. 3. The x86 bootstrap tests. 4. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Add convert description for minus and capture. * tree-vect-patterns.cc (vect_recog_build_binary_gimple_call): Add new logic to handle in_type is incompatibile with out_type, as well as rename from. (vect_recog_build_binary_gimple_stmt): Rename to. (vect_recog_sat_add_pattern): Leverage above renamed func. (vect_recog_sat_sub_pattern): Ditto. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 4 ++-- gcc/tree-vect-patterns.cc | 51 --- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index cf8a399a744..820591a36b3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3164,9 +3164,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Unsigned saturation sub, case 2 (branch with ge): SAT_U_SUB = X >= Y ? X - Y : 0. */ (match (unsigned_integer_sat_sub @0 @1) - (cond^ (ge @0 @1) (minus @0 @1) integer_zerop) + (cond^ (ge @0 @1) (convert? (minus (convert1? @0) (convert1? @1))) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @0, @1 + && TYPE_UNSIGNED (TREE_TYPE (@0)) && types_match (@0, @1 /* Unsigned saturation sub, case 3 (branchless with gt): SAT_U_SUB = (X - Y) * (X > Y). */ diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index cef901808eb..519d15f2a43 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4490,26 +4490,37 @@ vect_recog_mult_pattern (vec_info *vinfo, extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); -static gcall * -vect_recog_build_binary_gimple_call (vec_info *vinfo, gimple *stmt, +static gimple * +vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, internal_fn fn, tree *type_out, -tree op_0, tree op_1) +tree lhs, tree op_0, tree op_1) { tree itype = TREE_TYPE (op_0); - tree vtype = get_vectype_for_scalar_type (vinfo, itype); + tree otype = TREE_TYPE (lhs); + tree v_itype = get_vectype_for_scalar_type (vinfo, itype); + tree v_otype = get_vectype_for_scalar_type (vinfo, otype); - if (vtype != NULL_TREE -&& direct_internal_fn_supported_p (fn, vtype, OPTIMIZE_FOR_BOTH)) + if (v_itype != NULL_TREE && v_otype != NULL_TREE +&& direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH)) { gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); + tree in_ssa = vect_recog_temp_ssa_var (itype, NULL); - gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL)); + gimple_call_set_lhs (call, in_ssa); gimple_call_set_nothrow (call, /* nothrow_p */ false); - gimple_set_location (call, gimple_location (stmt)); + gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info))); + + *type_out = v_otype; - *type_out = vtype; + if (types_compatible_p (itype, otype)) + return call; + else + { + append_pattern_def_seq (vinfo, stmt_info, call, v_itype); + tree out_ssa = vect_recog_temp_ssa_var (otype, NULL); - return call; + return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa); + } } return NULL; @@ -4541,13 +4552,13 @@ vect_recog_sat_add_pattern (vec_info *vinfo, stmt_v
[gcc r15-1672] Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int
https://gcc.gnu.org/g:212441e19d8179645efbec6dd98a74eb673734dd commit r15-1672-g212441e19d8179645efbec6dd98a74eb673734dd Author: Pan Li Date: Wed Jun 26 09:28:05 2024 +0800 Internal-fn: Support new IFN SAT_TRUNC for unsigned scalar int This patch would like to add the middle-end presentation for the saturation truncation. Aka set the result of truncated value to the max value when overflow. It will take the pattern similar as below. Form 1: #define DEF_SAT_U_TRUC_FMT_1(WT, NT) \ NT __attribute__((noinline)) \ sat_u_truc_##T##_fmt_1 (WT x)\ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } For example, truncated uint16_t to uint8_t, we have * SAT_TRUNC (254) => 254 * SAT_TRUNC (255) => 255 * SAT_TRUNC (256) => 255 * SAT_TRUNC (65536) => 255 Given below SAT_TRUNC from uint64_t to uint32_t. DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t) Before this patch: __attribute__((noinline)) uint32_t sat_u_truc_T_fmt_1 (uint64_t x) { _Bool overflow; unsigned int _1; unsigned int _2; unsigned int _3; uint32_t _6; ;; basic block 2, loop depth 0 ;;pred: ENTRY overflow_5 = x_4(D) > 4294967295; _1 = (unsigned int) x_4(D); _2 = (unsigned int) overflow_5; _3 = -_2; _6 = _1 | _3; return _6; ;;succ: EXIT } After this patch: __attribute__((noinline)) uint32_t sat_u_truc_T_fmt_1 (uint64_t x) { uint32_t _6; ;; basic block 2, loop depth 0 ;;pred: ENTRY _6 = .SAT_TRUNC (x_4(D)); [tail call] return _6; ;;succ: EXIT } The below tests are passed for this patch: *. The rv64gcv fully regression tests. *. The rv64gcv build with glibc. *. The x86 bootstrap tests. *. The x86 fully regression tests. gcc/ChangeLog: * internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as unary_convert. * match.pd: Add new matching pattern for unsigned int sat_trunc. * optabs.def (OPTAB_CL): Add unsigned and signed optab. * tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add new decl for the matching pattern generated func. (match_unsigned_saturation_trunc): Add new func impl to match the .SAT_TRUNC. (math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match function under BIT_IOR_EXPR case. Signed-off-by: Pan Li Diff: --- gcc/internal-fn.def | 2 ++ gcc/match.pd | 16 gcc/optabs.def| 3 +++ gcc/tree-ssa-math-opts.cc | 32 4 files changed, 53 insertions(+) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a8c83437ada..915d329c05a 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -278,6 +278,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert) + DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) diff --git a/gcc/match.pd b/gcc/match.pd index 820591a36b3..3fa3f2e8296 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3210,6 +3210,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1 +/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT). + SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ +(match (unsigned_integer_sat_trunc @0) + (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1))) + (convert @0)) + (with { + unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned otype_precision = TYPE_PRECISION (type); + wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision); + wide_int int_cst = wi::to_wide (@1, itype_precision); + } + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && otype_precision < itype_precision + && wi::eq_p (trunc_max, int_cst) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/optabs.def b/gcc/optabs.def index 2f36ed4cb42..a69af51d601 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -63,6 +63,9 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2") OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc) OPTAB_CL(satfractuns_op
[gcc r15-1676] RISC-V: Add testcases for vector truncate after .SAT_SUB
https://gcc.gnu.org/g:b55798c0fc5cb02512b58502961d8425fb60588f commit r15-1676-gb55798c0fc5cb02512b58502961d8425fb60588f Author: Pan Li Date: Mon Jun 24 22:25:57 2024 +0800 RISC-V: Add testcases for vector truncate after .SAT_SUB This patch would like to add the test cases of the vector truncate after .SAT_SUB. Aka: #define DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T) \ void __attribute__((noinline)) \ vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ {\ IN_T x = op_1[i]; \ out[i] = (OUT_T)(x >= y ? x - y : 0); \ }\ } The below 3 cases are included. DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(uint8_t, uint16_t) DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(uint16_t, uint32_t) DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(uint32_t, uint64_t) gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper test macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-run-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-run-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-run-3.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_arith.h| 19 ++ .../rvv/autovec/binop/vec_sat_binary_scalar.h | 27 .../rvv/autovec/binop/vec_sat_u_sub_trunc-1.c | 21 ++ .../rvv/autovec/binop/vec_sat_u_sub_trunc-2.c | 21 ++ .../rvv/autovec/binop/vec_sat_u_sub_trunc-3.c | 21 ++ .../rvv/autovec/binop/vec_sat_u_sub_trunc-run-1.c | 74 ++ .../rvv/autovec/binop/vec_sat_u_sub_trunc-run-2.c | 74 ++ .../rvv/autovec/binop/vec_sat_u_sub_trunc-run-3.c | 74 ++ 8 files changed, 331 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index d5c81fbe5a9..a3116033fb3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -310,4 +310,23 @@ vec_sat_u_sub_##T##_fmt_10 (T *out, T *op_1, T *op_2, unsigned limit) \ #define RUN_VEC_SAT_U_SUB_FMT_10(T, out, op_1, op_2, N) \ vec_sat_u_sub_##T##_fmt_10(out, op_1, op_2, N) +/**/ +/* Saturation Sub Truncated (Unsigned and Signed) */ +/**/ +#define DEF_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T) \ +void __attribute__((noinline)) \ +vec_sat_u_sub_trunc_##OUT_T##_fmt_1 (OUT_T *out, IN_T *op_1, IN_T y, \ +unsigned limit) \ +{\ + unsigned i;\ + for (i = 0; i < limit; i++)\ +{\ + IN_T x = op_1[i]; \ + out[i] = (OUT_T)(x >= y ? x - y : 0); \ +}\ +} + +#define RUN_VEC_SAT_U_SUB_TRUNC_FMT_1(OUT_T, IN_T, out, op_1, y, N) \ + vec_sat_u_sub_trunc_##OUT_T##_fmt_1(out, op_1, y, N) + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h new file mode 100644 index 000..c79b180054e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h @@ -0,0 +1,27 @@ +#ifndef HAVE_DEFINED_VEC_SAT_BINARY_SCALAR +#define HAVE_DEFINED_VEC_SAT_BINARY_SCALAR + +int +main () +{ + unsigned i, k; + OUT_T out[N]; + + for (i = 0; i < size
[gcc r15-1721] Match: Support imm form for unsigned scalar .SAT_ADD
https://gcc.gnu.org/g:21e3565927eda5ce9907d91100623052fa8182cd commit r15-1721-g21e3565927eda5ce9907d91100623052fa8182cd Author: Pan Li Date: Fri Jun 28 11:33:41 2024 +0800 Match: Support imm form for unsigned scalar .SAT_ADD This patch would like to support the form of unsigned scalar .SAT_ADD when one of the op is IMM. For example as below: Form IMM: #define DEF_SAT_U_ADD_IMM_FMT_1(T) \ T __attribute__((noinline)) \ sat_u_add_imm_##T##_fmt_1 (T x) \ {\ return (T)(x + 9) >= x ? (x + 9) : -1; \ } DEF_SAT_U_ADD_IMM_FMT_1(uint64_t) Before this patch: __attribute__((noinline)) uint64_t sat_u_add_imm_uint64_t_fmt_1 (uint64_t x) { long unsigned int _1; uint64_t _3; ;; basic block 2, loop depth 0 ;;pred: ENTRY _1 = MIN_EXPR ; _3 = _1 + 9; return _3; ;;succ: EXIT } After this patch: __attribute__((noinline)) uint64_t sat_u_add_imm_uint64_t_fmt_1 (uint64_t x) { uint64_t _3; ;; basic block 2, loop depth 0 ;;pred: ENTRY _3 = .SAT_ADD (x_2(D), 9); [tail call] return _3; ;;succ: EXIT } The below test suites are passed for this patch: 1. The rv64gcv fully regression test with newlib. 2. The x86 bootstrap test. 3. The x86 fully regression test. gcc/ChangeLog: * match.pd: Add imm form for .SAT_ADD matching. * tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children): Add .SAT_ADD matching under PLUS_EXPR. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 24 gcc/tree-ssa-math-opts.cc | 2 ++ 2 files changed, 26 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 3fa3f2e8296..7fff7b5f9fe 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3154,6 +3154,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_add @0 @1) (cond^ (gt @0 (usadd_left_part_1@2 @0 @1)) integer_minus_onep @2)) +/* Unsigned saturation add, case 9 (one op is imm): + SAT_U_ADD = (X + 3) >= x ? (X + 3) : -1. */ +(match (unsigned_integer_sat_add @0 @1) + (plus (min @0 INTEGER_CST@2) INTEGER_CST@1) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)) + (with + { +unsigned precision = TYPE_PRECISION (type); +wide_int cst_1 = wi::to_wide (@1); +wide_int cst_2 = wi::to_wide (@2); +wide_int max = wi::mask (precision, false, precision); +wide_int sum = wi::add (cst_1, cst_2); + } + (if (wi::eq_p (max, sum)) + +/* Unsigned saturation add, case 10 (one op is imm): + SAT_U_ADD = __builtin_add_overflow (X, 3, &ret) == 0 ? ret : -1. */ +(match (unsigned_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop) + integer_minus_onep (realpart @2)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0 + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 3783a874699..3b5433ec000 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -6195,6 +6195,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case PLUS_EXPR: + match_unsigned_saturation_add (&gsi, as_a (stmt)); + /* fall-through */ case MINUS_EXPR: if (!convert_plusminus_to_widen (&gsi, stmt, code)) {
[gcc r15-1753] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 1
https://gcc.gnu.org/g:ed213b384fdca9375c3ec53c2a0eae134fb98612 commit r15-1753-ged213b384fdca9375c3ec53c2a0eae134fb98612 Author: Pan Li Date: Sun Jun 30 16:03:41 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 1 This patch would like to add test cases for the unsigned scalar .SAT_ADD IMM form 1. Aka: Form 1: #define DEF_SAT_U_ADD_IMM_FMT_1(T) \ T __attribute__((noinline)) \ sat_u_add_imm_##T##_fmt_1 (T x) \ {\ return (T)(x + 9) >= x ? (x + 9) : -1; \ } DEF_SAT_U_ADD_IMM_FMT_1(uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add helper test macro. * gcc.target/riscv/sat_u_add_imm-1.c: New test. * gcc.target/riscv/sat_u_add_imm-2.c: New test. * gcc.target/riscv/sat_u_add_imm-3.c: New test. * gcc.target/riscv/sat_u_add_imm-4.c: New test. * gcc.target/riscv/sat_u_add_imm-run-1.c: New test. * gcc.target/riscv/sat_u_add_imm-run-2.c: New test. * gcc.target/riscv/sat_u_add_imm-run-3.c: New test. * gcc.target/riscv/sat_u_add_imm-run-4.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 10 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c | 19 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c | 21 ++ gcc/testsuite/gcc.target/riscv/sat_u_add_imm-3.c | 18 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-4.c | 17 .../gcc.target/riscv/sat_u_add_imm-run-1.c | 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-2.c | 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-3.c | 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-4.c | 46 ++ 9 files changed, 269 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 0c2e44af718..4ec4ec36cc1 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -60,6 +60,16 @@ sat_u_add_##T##_fmt_6 (T x, T y)\ #define RUN_SAT_U_ADD_FMT_5(T, x, y) sat_u_add_##T##_fmt_5(x, y) #define RUN_SAT_U_ADD_FMT_6(T, x, y) sat_u_add_##T##_fmt_6(x, y) +#define DEF_SAT_U_ADD_IMM_FMT_1(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_add_imm##IMM##_##T##_fmt_1 (T x) \ +{\ + return (T)(x + IMM) >= x ? (x + IMM) : -1; \ +} + +#define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \ + if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort () + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c new file mode 100644 index 000..14e9b7595a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_add_imm9_uint8_t_fmt_1: +** addi\s+[atx][0-9]+,\s*a0,\s*9 +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+a0,\s*a0,\s*0xff +** ret +*/ +DEF_SAT_U_ADD_IMM_FMT_1(uint8_t, 9) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c new file mode 100644 index 000..c1a3c6ff21d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_add_imm3_uint16_t_fmt_1: +** addi\s+[atx][0-9]+,\s*a0,\s*3 +** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_ADD_IMM_FMT_1(uint16_t, 3) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsui
[gcc r15-1754] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 2
https://gcc.gnu.org/g:bff0d025aff8efaa5d991fcd13dd9876b115dc94 commit r15-1754-gbff0d025aff8efaa5d991fcd13dd9876b115dc94 Author: Pan Li Date: Sun Jun 30 16:14:38 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 2 This patch would like to add test cases for the unsigned scalar .SAT_ADD IMM form 2. Aka: Form 2: #define DEF_SAT_U_ADD_IMM_FMT_2(T) \ T __attribute__((noinline)) \ sat_u_add_imm_##T##_fmt_1 (T x) \ { \ return (T)(x + 9) < x ? -1 : (x + 9); \ } DEF_SAT_U_ADD_IMM_FMT_2(uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add helper test macro. * gcc.target/riscv/sat_u_add_imm-5.c: New test. * gcc.target/riscv/sat_u_add_imm-6.c: New test. * gcc.target/riscv/sat_u_add_imm-7.c: New test. * gcc.target/riscv/sat_u_add_imm-8.c: New test. * gcc.target/riscv/sat_u_add_imm-run-5.c: New test. * gcc.target/riscv/sat_u_add_imm-run-6.c: New test. * gcc.target/riscv/sat_u_add_imm-run-7.c: New test. * gcc.target/riscv/sat_u_add_imm-run-8.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 10 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c | 19 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c | 21 ++ gcc/testsuite/gcc.target/riscv/sat_u_add_imm-7.c | 18 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-8.c | 17 .../gcc.target/riscv/sat_u_add_imm-run-5.c | 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-6.c | 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-7.c | 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-8.c | 46 ++ 9 files changed, 269 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 4ec4ec36cc1..d94f0fd602c 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -67,9 +67,19 @@ sat_u_add_imm##IMM##_##T##_fmt_1 (T x) \ return (T)(x + IMM) >= x ? (x + IMM) : -1; \ } +#define DEF_SAT_U_ADD_IMM_FMT_2(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_add_imm##IMM##_##T##_fmt_2 (T x) \ +{ \ + return (T)(x + IMM) < x ? -1 : (x + IMM); \ +} + #define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \ if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort () +#define RUN_SAT_U_ADD_IMM_FMT_2(T, x, IMM, expect) \ + if (sat_u_add_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort () + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c new file mode 100644 index 000..19b502db6c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-5.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_add_imm9_uint8_t_fmt_2: +** addi\s+[atx][0-9]+,\s*a0,\s*9 +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+a0,\s*a0,\s*0xff +** ret +*/ +DEF_SAT_U_ADD_IMM_FMT_2(uint8_t, 9) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c new file mode 100644 index 000..0317370b67e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-6.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_add_imm3_uint16_t_fmt_2: +** addi\s+[atx][0-9]+,\s*a0,\s*3 +** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_ADD_IMM_FMT_2(uint16_t, 3) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD "
[gcc r15-1755] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 3
https://gcc.gnu.org/g:6d98e88f61f9b2e6864775ce390e9ce0a1359624 commit r15-1755-g6d98e88f61f9b2e6864775ce390e9ce0a1359624 Author: Pan Li Date: Sun Jun 30 16:41:16 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 3 This patch would like to add test cases for the unsigned scalar .SAT_ADD IMM form 3. Aka: Form 3: #define DEF_SAT_U_ADD_IMM_FMT_3(T) \ T __attribute__((noinline)) \ sat_u_add_imm_##T##_fmt_3 (T x) \ {\ T ret; \ return __builtin_add_overflow (x, 8, &ret) ? -1 : ret; \ } DEF_SAT_U_ADD_IMM_FMT_3(uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add helper test macro. * gcc.target/riscv/sat_u_add_imm-10.c: New test. * gcc.target/riscv/sat_u_add_imm-11.c: New test. * gcc.target/riscv/sat_u_add_imm-12.c: New test. * gcc.target/riscv/sat_u_add_imm-9.c: New test. * gcc.target/riscv/sat_u_add_imm-run-10.c: New test. * gcc.target/riscv/sat_u_add_imm-run-11.c: New test. * gcc.target/riscv/sat_u_add_imm-run-12.c: New test. * gcc.target/riscv/sat_u_add_imm-run-9.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 11 ++ gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c | 21 ++ gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c | 18 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-12.c | 17 gcc/testsuite/gcc.target/riscv/sat_u_add_imm-9.c | 19 + .../gcc.target/riscv/sat_u_add_imm-run-10.c| 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-11.c| 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-12.c| 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-9.c | 46 ++ 9 files changed, 270 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index d94f0fd602c..83b294db476 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -74,12 +74,23 @@ sat_u_add_imm##IMM##_##T##_fmt_2 (T x) \ return (T)(x + IMM) < x ? -1 : (x + IMM); \ } +#define DEF_SAT_U_ADD_IMM_FMT_3(T, IMM)\ +T __attribute__((noinline))\ +sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \ +{ \ + T ret; \ + return __builtin_add_overflow (x, IMM, &ret) ? -1 : ret; \ +} + #define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \ if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort () #define RUN_SAT_U_ADD_IMM_FMT_2(T, x, IMM, expect) \ if (sat_u_add_imm##IMM##_##T##_fmt_2(x) != expect) __builtin_abort () +#define RUN_SAT_U_ADD_IMM_FMT_3(T, x, IMM, expect) \ + if (sat_u_add_imm##IMM##_##T##_fmt_3(x) != expect) __builtin_abort () + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c new file mode 100644 index 000..24cdd267cca --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-10.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_add_imm3_uint16_t_fmt_3: +** addi\s+[atx][0-9]+,\s*a0,\s*3 +** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ +DEF_SAT_U_ADD_IMM_FMT_3(uint16_t, 3) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c new file mode 100644 index 000..f30e2405a0d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-11.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "
[gcc r15-1756] RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 4
https://gcc.gnu.org/g:7a65ab6b5f38d3018ffd456f278a9fd885487a27 commit r15-1756-g7a65ab6b5f38d3018ffd456f278a9fd885487a27 Author: Pan Li Date: Sun Jun 30 16:48:19 2024 +0800 RISC-V: Add testcases for unsigned scalar .SAT_ADD IMM form 4 This patch would like to add test cases for the unsigned scalar .SAT_ADD IMM form 4. Aka: Form 4: #define DEF_SAT_U_ADD_IMM_FMT_4(T)\ T __attribute__((noinline)) \ sat_u_add_imm_##T##_fmt_4 (T x) \ { \ T ret; \ return __builtin_add_overflow (x, 9, &ret) == 0 ? ret : -1; \ } DEF_SAT_U_ADD_IMM_FMT_4(uint64_t) The below test is passed for this patch. * The rv64gcv regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add helper test macro. * gcc.target/riscv/sat_u_add_imm-13.c: New test. * gcc.target/riscv/sat_u_add_imm-14.c: New test. * gcc.target/riscv/sat_u_add_imm-15.c: New test. * gcc.target/riscv/sat_u_add_imm-16.c: New test. * gcc.target/riscv/sat_u_add_imm-run-13.c: New test. * gcc.target/riscv/sat_u_add_imm-run-14.c: New test. * gcc.target/riscv/sat_u_add_imm-run-15.c: New test. * gcc.target/riscv/sat_u_add_imm-run-16.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 11 ++ gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c | 19 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c | 21 ++ gcc/testsuite/gcc.target/riscv/sat_u_add_imm-15.c | 18 + gcc/testsuite/gcc.target/riscv/sat_u_add_imm-16.c | 17 .../gcc.target/riscv/sat_u_add_imm-run-13.c| 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-14.c| 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-15.c| 46 ++ .../gcc.target/riscv/sat_u_add_imm-run-16.c| 46 ++ 9 files changed, 270 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 83b294db476..75442c94dc1 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -82,6 +82,14 @@ sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \ return __builtin_add_overflow (x, IMM, &ret) ? -1 : ret; \ } +#define DEF_SAT_U_ADD_IMM_FMT_4(T, IMM) \ +T __attribute__((noinline)) \ +sat_u_add_imm##IMM##_##T##_fmt_4 (T x) \ +{ \ + T ret;\ + return __builtin_add_overflow (x, IMM, &ret) == 0 ? ret : -1; \ +} + #define RUN_SAT_U_ADD_IMM_FMT_1(T, x, IMM, expect) \ if (sat_u_add_imm##IMM##_##T##_fmt_1(x) != expect) __builtin_abort () @@ -91,6 +99,9 @@ sat_u_add_imm##IMM##_##T##_fmt_3 (T x) \ #define RUN_SAT_U_ADD_IMM_FMT_3(T, x, IMM, expect) \ if (sat_u_add_imm##IMM##_##T##_fmt_3(x) != expect) __builtin_abort () +#define RUN_SAT_U_ADD_IMM_FMT_4(T, x, IMM, expect) \ + if (sat_u_add_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c new file mode 100644 index 000..a3b2679233c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-13.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_add_imm9_uint8_t_fmt_4: +** addi\s+[atx][0-9]+,\s*a0,\s*9 +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** neg\s+[atx][0-9]+,\s*[atx][0-9]+ +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** andi\s+a0,\s*a0,\s*0xff +** ret +*/ +DEF_SAT_U_ADD_IMM_FMT_4(uint8_t, 9) + +/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c new file mode 100644 index 000..968534b74da --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm-14.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-e
[gcc r15-1805] RISC-V: Fix asm check failure for truncated after SAT_SUB
https://gcc.gnu.org/g:ab3e3d2f0564c2eb0640de3f4d0a50e1fcc8c318 commit r15-1805-gab3e3d2f0564c2eb0640de3f4d0a50e1fcc8c318 Author: Pan Li Date: Wed Jul 3 13:17:16 2024 +0800 RISC-V: Fix asm check failure for truncated after SAT_SUB It seems that the asm check is incorrect for truncated after SAT_SUB, we should take the vx check for vssubu instead of vv check. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: Update vssubu check from vv to vx. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: Ditto. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c index dd9e3999a29..1e380657d74 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c @@ -11,7 +11,7 @@ ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma ** ... ** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) -** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+ ** vsetvli\s+zero,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+ ** ... diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c index 738d1465a01..d7b8931f0ec 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c @@ -11,7 +11,7 @@ ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma ** ... ** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) -** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+ ** vsetvli\s+zero,\s*zero,\s*e16,\s*mf2,\s*ta,\s*ma ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+ ** ... diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c index b008b21cf0c..edf42a1f776 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c @@ -11,7 +11,7 @@ ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma ** ... ** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) -** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+ ** vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+ ** ...
[gcc r15-1819] Vect: Support IFN SAT_TRUNC for unsigned vector int
https://gcc.gnu.org/g:8d2c460e79aa013cc4eeb79bb45d18bd3d0aee58 commit r15-1819-g8d2c460e79aa013cc4eeb79bb45d18bd3d0aee58 Author: Pan Li Date: Tue Jul 2 21:23:43 2024 +0800 Vect: Support IFN SAT_TRUNC for unsigned vector int This patch would like to support the .SAT_TRUNC for the unsigned vector int. Given we have below example code: Form 1 #define VEC_DEF_SAT_U_TRUC_FMT_1(NT, WT) \ void __attribute__((noinline)) \ vec_sat_u_truc_##WT##_to_##NT##_fmt_1 (NT *x, WT *y, unsigned limit) \ {\ for (unsigned i = 0; i < limit; i++) \ {\ bool overflow = y[i] > (WT)(NT)(-1); \ x[i] = ((NT)y[i]) | (NT)-overflow; \ }\ } VEC_DEF_SAT_U_TRUC_FMT_1 (uint32_t, uint64_t) Before this patch: void vec_sat_u_truc_uint64_t_to_uint32_t_fmt_1 (uint32_t * x, uint64_t * y, unsigned int limit) { ... _51 = .SELECT_VL (ivtmp_49, POLY_INT_CST [2, 2]); ivtmp_35 = _51 * 8; vect__4.7_32 = .MASK_LEN_LOAD (vectp_y.5_34, 64B, { -1, ... }, _51, 0); mask_overflow_16.8_30 = vect__4.7_32 > { 4294967295, ... }; vect__5.9_29 = (vector([2,2]) unsigned int) vect__4.7_32; vect__10.13_20 = .VCOND_MASK (mask_overflow_16.8_30, { 4294967295, ... }, vect__5.9_29); ivtmp_12 = _51 * 4; .MASK_LEN_STORE (vectp_x.14_11, 32B, { -1, ... }, _51, 0, vect__10.13_20); vectp_y.5_33 = vectp_y.5_34 + ivtmp_35; vectp_x.14_46 = vectp_x.14_11 + ivtmp_12; ivtmp_50 = ivtmp_49 - _51; if (ivtmp_50 != 0) ... } After this patch: void vec_sat_u_truc_uint64_t_to_uint32_t_fmt_1 (uint32_t * x, uint64_t * y, unsigned int limit) { ... _12 = .SELECT_VL (ivtmp_21, POLY_INT_CST [2, 2]); ivtmp_34 = _12 * 8; vect__4.7_31 = .MASK_LEN_LOAD (vectp_y.5_33, 64B, { -1, ... }, _12, 0); vect_patt_40.8_30 = .SAT_TRUNC (vect__4.7_31); // << .SAT_TRUNC ivtmp_29 = _12 * 4; .MASK_LEN_STORE (vectp_x.9_28, 32B, { -1, ... }, _12, 0, vect_patt_40.8_30); vectp_y.5_32 = vectp_y.5_33 + ivtmp_34; vectp_x.9_27 = vectp_x.9_28 + ivtmp_29; ivtmp_20 = ivtmp_21 - _12; if (ivtmp_20 != 0) ... } The below test suites are passed for this patch * The x86 bootstrap test. * The x86 fully regression test. * The rv64gcv fully regression tests. gcc/ChangeLog: * tree-vect-patterns.cc (gimple_unsigned_integer_sat_trunc): Add new decl generated by match. (vect_recog_sat_trunc_pattern): Add new func impl to recog the .SAT_TRUNC pattern. Signed-off-by: Pan Li Diff: --- gcc/tree-vect-patterns.cc | 54 +++ 1 file changed, 54 insertions(+) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 519d15f2a43..86e893a1c43 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4489,6 +4489,7 @@ vect_recog_mult_pattern (vec_info *vinfo, extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); +extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); static gimple * vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, @@ -4603,6 +4604,58 @@ vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, return NULL; } +/* + * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple: + * overflow_5 = x_4(D) > 4294967295; + * _1 = (unsigned int) x_4(D); + * _2 = (unsigned int) overflow_5; + * _3 = -_2; + * _6 = _1 | _3; + * + * And then simplied to + * _6 = .SAT_TRUNC (x_4(D)); + */ + +static gimple * +vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, + tree *type_out) +{ + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); + + if (!is_gimple_assign (last_stmt)) +return NULL; + + tree ops[1]; + tree lhs = gimple_assign_lhs (last_stmt); + + if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)) +{ + tree itype = TREE_TYPE (ops[0]); + tree otype = TREE_TYPE (lhs); + tree v_itype = get_vectype_for_scalar_type (vinfo, itype); + tree v_otype = get_vectype_for_scalar_type (vinfo, otype); + internal_fn fn = IFN_SAT_TRUNC; + + if (v_itype != NULL_TREE && v_otype != NULL_TREE + && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype), + OPTIMIZE_FOR_BOTH)) + { + gcall *c
[gcc r15-1820] Match: Allow more types truncation for .SAT_TRUNC
https://gcc.gnu.org/g:44c767c06b6882d05fe56f4a3e03195101402fb0 commit r15-1820-g44c767c06b6882d05fe56f4a3e03195101402fb0 Author: Pan Li Date: Tue Jul 2 08:57:50 2024 +0800 Match: Allow more types truncation for .SAT_TRUNC The .SAT_TRUNC has the input and output types, aka cvt from itype to otype and the sizeof (otype) < sizeof (itype). The previous patch only allows the sizeof (otype) == sizeof (itype) / 2. But actually we have 1/4 and 1/8 truncation. This patch would like to support more types trunction when sizeof (otype) < sizeof (itype). The below truncation will be covered. * uint64_t => uint8_t * uint64_t => uint16_t * uint64_t => uint32_t * uint32_t => uint8_t * uint32_t => uint16_t * uint16_t => uint8_t The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The rv64gcv build with glibc. 3. The x86 bootstrap tests. 4. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Allow any otype is less than itype truncation. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index a2e205b3207..4edfa2ae2c9 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3239,16 +3239,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_trunc @0) (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1))) (convert @0)) - (with { + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0))) + (with + { unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); unsigned otype_precision = TYPE_PRECISION (type); - wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision); + wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); wide_int int_cst = wi::to_wide (@1, itype_precision); } - (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && TYPE_UNSIGNED (TREE_TYPE (@0)) - && otype_precision < itype_precision - && wi::eq_p (trunc_max, int_cst) + (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */
[gcc r15-1822] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]
https://gcc.gnu.org/g:de9254e224eb3d89303cb9b3ba50b4c479c55f7c commit r15-1822-gde9254e224eb3d89303cb9b3ba50b4c479c55f7c Author: Pan Li Date: Wed Jul 3 22:06:48 2024 +0800 RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763] According to the ISA, the zvfhmin sub extension should only contain convertion insn. Thus, the vfmv insn acts on FP16 should not be present when only the zvfhmin option is given. This patch would like to fix it by split the pred_broadcast define_insn into zvfhmin and zvfh part. Given below example: void test (_Float16 *dest, _Float16 bias) { dest[0] = bias; dest[1] = bias; } when compile with -march=rv64gcv_zfh_zvfhmin Before this patch: test: vsetivlizero,2,e16,mf4,ta,ma vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin vse16.v v1,0(a0) ret After this patch: test: addi sp,sp,-16 fsh fa0,14(sp) addi a5,sp,14 vsetivli zero,2,e16,mf4,ta,ma vlse16.v v1,0(a5),zero vse16.v v1,0(a0) addi sp,sp,16 jr ra PR target/115763 gcc/ChangeLog: * config/riscv/vector.md (*pred_broadcast): Split into zvfh and zvfhmin part. (*pred_broadcast_zvfh): New define_insn for zvfh part. (*pred_broadcast_zvfhmin): Ditto but for zvfhmin. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check. * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto. * gcc.target/riscv/rvv/base/pr115763-1.c: New test. * gcc.target/riscv/rvv/base/pr115763-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/vector.md | 49 +++--- .../gcc.target/riscv/rvv/base/pr115763-1.c | 9 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 + .../gcc.target/riscv/rvv/base/scalar_move-5.c | 4 +- .../gcc.target/riscv/rvv/base/scalar_move-6.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-7.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-8.c | 6 +-- 7 files changed, 64 insertions(+), 26 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index fe18ee5b5f7..d9474262d54 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2080,31 +2080,50 @@ [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") (set_attr "mode" "")]) -(define_insn "*pred_broadcast" - [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr, vr, vr, vr, vr") - (if_then_else:V_VLSF_ZVFHMIN +(define_insn "*pred_broadcast_zvfh" + [(set (match_operand:V_VLSF0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF (unspec: - [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i") + [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1") +(match_operand 4 "vector_length_operand" " rK, rK, rK, rK") +(match_operand 5 "const_int_operand" " i, i, i, i") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLSF_ZVFHMIN - (match_operand: 3 "direct_broadcast_operand" " f, f,Wdm,Wdm,Wdm,Wdm, f, f")) - (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu, 0, vu, 0, vu, 0, vu, 0")))] + (vec_duplicate:V_VLSF + (match_operand: 3 "direct_broadcast_operand" " f, f, f, f")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "@ vfmv.v.f\t%0,%3 vfmv.v.f\t%0,%3 + vfmv.s.f\t%0,%3 + vfmv.s.f\t%0,%3" + [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv") + (set_attr "mode" "")]) + +(define_insn "*pred_broadcast_zvfhmin" + [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF_ZVFHMIN + (unspec: + [(match_operand:1
[gcc r15-1894] RISC-V: Implement .SAT_TRUNC for vector unsigned int
https://gcc.gnu.org/g:dafd63d7c5cddce1e00803606e742d75927b1a1e commit r15-1894-gdafd63d7c5cddce1e00803606e742d75927b1a1e Author: Pan Li Date: Fri Jul 5 09:02:47 2024 +0800 RISC-V: Implement .SAT_TRUNC for vector unsigned int This patch would like to implement the .SAT_TRUNC for the RISC-V backend. With the help of the RVV Vector Narrowing Fixed-Point Clip Instructions. The below SEW(S) are supported: * e64 => e32 * e64 => e16 * e64 => e8 * e32 => e16 * e32 => e8 * e16 => e8 Take below example to see the changes to asm. Form 1: #define DEF_VEC_SAT_U_TRUNC_FMT_1(NT, WT) \ void __attribute__((noinline))\ vec_sat_u_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ bool overflow = x > (WT)(NT)(-1); \ out[i] = ((NT)x) | (NT)-overflow; \ } \ } DEF_VEC_SAT_U_TRUNC_FMT_1 (uint32_t, uint64_t) Before this patch: .L3: vsetvli a5,a2,e64,m1,ta,ma vle64.v v1,0(a1) vmsgtu.vvv0,v1,v2 vsetvli zero,zero,e32,mf2,ta,ma vncvt.x.x.w v1,v1 vmerge.vim v1,v1,-1,v0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 After this patch: .L3: vsetvli a5,a2,e32,mf2,ta,ma vle64.v v1,0(a1) vnclipu.wi v1,v1,0 vse32.v v1,0(a0) slli a4,a5,3 add a1,a1,a4 slli a4,a5,2 add a0,a0,a4 sub a2,a2,a5 bne a2,zero,.L3 Passed the rv64gcv fully regression tests. gcc/ChangeLog: * config/riscv/autovec.md (ustrunc2): Add new pattern for double truncation. (ustrunc2): Ditto but for quad truncation. (ustrunc2): Ditto but for oct truncation. * config/riscv/riscv-protos.h (expand_vec_double_ustrunc): Add new func decl to expand double vec ustrunc. (expand_vec_quad_ustrunc): Ditto but for quad. (expand_vec_oct_ustrunc): Ditto but for oct. * config/riscv/riscv-v.cc (expand_vec_double_ustrunc): Add new func impl to expand vector double ustrunc. (expand_vec_quad_ustrunc): Ditto but for quad. (expand_vec_oct_ustrunc): Ditto but for oct. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper test macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-4.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-5.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-run-6.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_unary_vv_run.h: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec.md| 35 ++ gcc/config/riscv/riscv-protos.h| 4 + gcc/config/riscv/riscv-v.cc| 46 +++ .../riscv/rvv/autovec/binop/vec_sat_arith.h| 22 ++ .../riscv/rvv/autovec/unop/vec_sat_data.h | 394 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c | 19 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c | 21 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c | 23 ++ .../riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c | 19 + .../riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c
[gcc r15-1903] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1
https://gcc.gnu.org/g:35b1096896a94a90d787f5ef402ba009dd4f0393 commit r15-1903-g35b1096896a94a90d787f5ef402ba009dd4f0393 Author: Pan Li Date: Mon Jul 8 20:31:31 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 1 After the middle-end supported the vector mode of .SAT_ADD, add more testcases to ensure the correctness of RISC-V backend for form 1. Aka: Form 1: #define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \ } DEF_VEC_SAT_U_ADD_IMM_FMT_1 (uint64_t, 9) Passed the fully rv64gcv regression tests. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help test macro. * gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-4.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_arith.h| 25 ++ .../riscv/rvv/autovec/binop/vec_sat_data.h | 256 + .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c | 14 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c | 14 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c | 14 ++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c | 14 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-1.c| 28 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-2.c| 28 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-3.c| 28 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-4.c| 28 +++ 10 files changed, 449 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index b55a589e019a..3733c8fd2c15 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -4,6 +4,14 @@ #include #include +#define VALIDATE_RESULT(out, expect, N) \ + do \ +{\ + for (unsigned i = 0; i < N; i++) \ +if (out[i] != expect[i]) __builtin_abort (); \ +}\ + while (false) + /**/ /* Saturation Add (unsigned and signed) */ /**/ @@ -139,6 +147,23 @@ vec_sat_u_add_##T##_fmt_8 (T *out, T *op_1, T *op_2, unsigned limit) \ #define RUN_VEC_SAT_U_ADD_FMT_8(T, out, op_1, op_2, N) \ vec_sat_u_add_##T##_fmt_8(out, op_1, op_2, N) +#define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) \ +T __attribute__((noinline)) \ +vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \ +{\ + unsigned i;\ + for (i = 0; i < limit; i++)\ +out[i] = (T)(in[i] + IMM) >= in[i] ? (in[i] + IMM) : -1; \ +} +#define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \ + DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) + +#define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \ + vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \ + VALIDATE_RESULT (out, expect, N) +#define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \ + RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) + /**/ /* Saturation Sub (Unsigned and Signed) */ /
[gcc r15-1904] RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2
https://gcc.gnu.org/g:ecde8d50bea3573194f21277666f83463cbbe9c9 commit r15-1904-gecde8d50bea3573194f21277666f83463cbbe9c9 Author: Pan Li Date: Mon Jul 8 21:58:59 2024 +0800 RISC-V: Add testcases for unsigned vector .SAT_ADD IMM form 2 After the middle-end supported the vector mode of .SAT_ADD, add more testcases to ensure the correctness of RISC-V backend for form 2. Aka: Form 2: #define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM) \ T __attribute__((noinline)) \ vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \ {\ unsigned i;\ for (i = 0; i < limit; i++)\ out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM); \ } DEF_VEC_SAT_U_ADD_IMM_FMT_2 (uint64_t, 9) Passed the fully rv64gcv regression tests. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add help test macro. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-5.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-6.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-7.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-run-8.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_arith.h| 17 + .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c | 14 +++ .../riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c | 14 +++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-5.c| 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-6.c| 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-7.c| 28 ++ .../rvv/autovec/binop/vec_sat_u_add_imm-run-8.c| 28 ++ 9 files changed, 185 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index 3733c8fd2c15..10459807b2c4 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -158,12 +158,29 @@ vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \ #define DEF_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, IMM) \ DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) +#define DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM) \ +T __attribute__((noinline)) \ +vec_sat_u_add_imm##IMM##_##T##_fmt_2 (T *out, T *in, unsigned limit) \ +{\ + unsigned i;\ + for (i = 0; i < limit; i++)\ +out[i] = (T)(in[i] + IMM) < in[i] ? -1 : (in[i] + IMM); \ +} +#define DEF_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, IMM) \ + DEF_VEC_SAT_U_ADD_IMM_FMT_2(T, IMM) + #define RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) \ vec_sat_u_add_imm##IMM##_##T##_fmt_1(out, op_1, N); \ VALIDATE_RESULT (out, expect, N) #define RUN_VEC_SAT_U_ADD_IMM_FMT_1_WRAP(T, out, op_1, expect, IMM, N) \ RUN_VEC_SAT_U_ADD_IMM_FMT_1(T, out, op_1, expect, IMM, N) +#define RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N) \ + vec_sat_u_add_imm##IMM##_##T##_fmt_2(out, op_1, N); \ + VALIDATE_RESULT (out, expect, N) +#define RUN_VEC_SAT_U_ADD_IMM_FMT_2_WRAP(T, out, op_1, expect, IMM, N) \ + RUN_VEC_SAT_U_ADD_IMM_FMT_2(T, out, op_1, expect, IMM, N) + /**/ /* Saturation Sub (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c new file mode 100644 index ..d25fdcf78f38 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mab
[gcc r14-10396] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]
https://gcc.gnu.org/g:505382ceee0b5e72dc5defa05aec77a97658feca commit r14-10396-g505382ceee0b5e72dc5defa05aec77a97658feca Author: Pan Li Date: Wed Jul 3 22:06:48 2024 +0800 RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763] According to the ISA, the zvfhmin sub extension should only contain convertion insn. Thus, the vfmv insn acts on FP16 should not be present when only the zvfhmin option is given. This patch would like to fix it by split the pred_broadcast define_insn into zvfhmin and zvfh part. Given below example: void test (_Float16 *dest, _Float16 bias) { dest[0] = bias; dest[1] = bias; } when compile with -march=rv64gcv_zfh_zvfhmin Before this patch: test: vsetivlizero,2,e16,mf4,ta,ma vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin vse16.v v1,0(a0) ret After this patch: test: addi sp,sp,-16 fsh fa0,14(sp) addi a5,sp,14 vsetivli zero,2,e16,mf4,ta,ma vlse16.v v1,0(a5),zero vse16.v v1,0(a0) addi sp,sp,16 jr ra PR target/115763 gcc/ChangeLog: * config/riscv/vector.md (*pred_broadcast): Split into zvfh and zvfhmin part. (*pred_broadcast_zvfh): New define_insn for zvfh part. (*pred_broadcast_zvfhmin): Ditto but for zvfhmin. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check. * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto. * gcc.target/riscv/rvv/base/pr115763-1.c: New test. * gcc.target/riscv/rvv/base/pr115763-2.c: New test. Signed-off-by: Pan Li (cherry picked from commit de9254e224eb3d89303cb9b3ba50b4c479c55f7c) Diff: --- gcc/config/riscv/vector.md | 49 +++--- .../gcc.target/riscv/rvv/base/pr115763-1.c | 9 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 + .../gcc.target/riscv/rvv/base/scalar_move-5.c | 4 +- .../gcc.target/riscv/rvv/base/scalar_move-6.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-7.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-8.c | 6 +-- 7 files changed, 64 insertions(+), 26 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 228d0f9a7663..03012d677d79 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2080,31 +2080,50 @@ [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") (set_attr "mode" "")]) -(define_insn "*pred_broadcast" - [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr, vr, vr, vr, vr") - (if_then_else:V_VLSF_ZVFHMIN +(define_insn "*pred_broadcast_zvfh" + [(set (match_operand:V_VLSF0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF (unspec: - [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i") + [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1") +(match_operand 4 "vector_length_operand" " rK, rK, rK, rK") +(match_operand 5 "const_int_operand" " i, i, i, i") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLSF_ZVFHMIN - (match_operand: 3 "direct_broadcast_operand" " f, f,Wdm,Wdm,Wdm,Wdm, f, f")) - (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu, 0, vu, 0, vu, 0, vu, 0")))] + (vec_duplicate:V_VLSF + (match_operand: 3 "direct_broadcast_operand" " f, f, f, f")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "@ vfmv.v.f\t%0,%3 vfmv.v.f\t%0,%3 + vfmv.s.f\t%0,%3 + vfmv.s.f\t%0,%3" + [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv") + (set_attr "mode" "")]) + +(define_insn "*pred_broadcast_zvfhmin" + [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr") + (if_then_e
[gcc r15-1936] Match: Support form 2 for the .SAT_TRUNC
https://gcc.gnu.org/g:80e446e829d818dc19daa6e671b9626e93ee4949 commit r15-1936-g80e446e829d818dc19daa6e671b9626e93ee4949 Author: Pan Li Date: Fri Jul 5 20:36:35 2024 +0800 Match: Support form 2 for the .SAT_TRUNC This patch would like to add form 2 support for the .SAT_TRUNC. Aka: Form 2: #define DEF_SAT_U_TRUC_FMT_2(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_2 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return overflow ? (NT)-1 : (NT)x; \ } DEF_SAT_U_TRUC_FMT_2(uint32, uint64) Before this patch: 3 │ 4 │ __attribute__((noinline)) 5 │ uint32_t sat_u_truc_uint64_t_to_uint32_t_fmt_2 (uint64_t x) 6 │ { 7 │ uint32_t _1; 8 │ long unsigned int _3; 9 │ 10 │ ;; basic block 2, loop depth 0 11 │ ;;pred: ENTRY 12 │ _3 = MIN_EXPR ; 13 │ _1 = (uint32_t) _3; 14 │ return _1; 15 │ ;;succ: EXIT 16 │ 17 │ } After this patch: 3 │ 4 │ __attribute__((noinline)) 5 │ uint32_t sat_u_truc_uint64_t_to_uint32_t_fmt_2 (uint64_t x) 6 │ { 7 │ uint32_t _1; 8 │ 9 │ ;; basic block 2, loop depth 0 10 │ ;;pred: ENTRY 11 │ _1 = .SAT_TRUNC (x_2(D)); [tail call] 12 │ return _1; 13 │ ;;succ: EXIT 14 │ 15 │ } The below test suites are passed for this patch: 1. The x86 bootstrap test. 2. The x86 fully regression test. 3. The rv64gcv fully regresssion test. gcc/ChangeLog: * match.pd: Add form 2 for .SAT_TRUNC. * tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children): Add new case NOP_EXPR, and try to match SAT_TRUNC. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 17 - gcc/tree-ssa-math-opts.cc | 4 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 4edfa2ae2c90..3759c64d461f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3234,7 +3234,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1 -/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT). +/* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ (match (unsigned_integer_sat_trunc @0) (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1))) @@ -3250,6 +3250,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) +/* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT). + SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)). */ +(match (unsigned_integer_sat_trunc @0) + (convert (min @0 INTEGER_CST@1)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0))) + (with + { + unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned otype_precision = TYPE_PRECISION (type); + wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); + wide_int int_cst = wi::to_wide (@1, itype_precision); + } + (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index a35caf5f0588..ac86be8eb947 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -6170,6 +6170,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) match_unsigned_saturation_sub (&gsi, as_a (stmt)); break; + case NOP_EXPR: + match_unsigned_saturation_trunc (&gsi, as_a (stmt)); + break; + default:; } }
[gcc r15-1959] Vect: Optimize truncation for .SAT_SUB operands
https://gcc.gnu.org/g:3918bea620e826b0df68a9c8492b791a67f294b5 commit r15-1959-g3918bea620e826b0df68a9c8492b791a67f294b5 Author: Pan Li Date: Sun Jun 30 10:55:50 2024 +0800 Vect: Optimize truncation for .SAT_SUB operands To get better vectorized code of .SAT_SUB, we would like to avoid the truncated operation for the assignment. For example, as below. unsigned int _1; unsigned int _2; unsigned short int _4; _9 = (unsigned short int).SAT_SUB (_1, _2); If we make sure that the _1 is in the range of unsigned short int. Such as a def similar to: _1 = (unsigned short int)_4; Then we can do the distribute the truncation operation to: _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2); _9 = .SAT_SUB (_4, _3); Then, we can better vectorized code and avoid the unnecessary narrowing stmt during vectorization with below stmt(s). _3 = .SAT_TRUNC(_2); // SI => HI _9 = .SAT_SUB (_4, _3); Let's take RISC-V vector as example to tell the changes. For below sample code: __attribute__((noinline)) void test (uint16_t *x, unsigned b, unsigned n) { unsigned a = 0; uint16_t *p = x; do { a = *--p; *p = (uint16_t)(a >= b ? a - b : 0); } while (--n); } Before this patch: ... .L3: vle16.v v1,0(a3) vrsub.vx v5,v2,t1 mvt3,a4 addw a4,a4,t5 vrgather.vv v3,v1,v5 vsetvli zero,zero,e32,m1,ta,ma vzext.vf2 v1,v3 vssubu.vx v1,v1,a1 vsetvli zero,zero,e16,mf2,ta,ma vncvt.x.x.w v1,v1 vrgather.vv v3,v1,v5 vse16.v v3,0(a3) sub a3,a3,t4 bgtu t6,a4,.L3 ... After this patch: test: ... .L3: vle16.v v3,0(a3) vrsub.vxv5,v2,a6 mv a7,a4 addwa4,a4,t3 vrgather.vv v1,v3,v5 vssubu.vv v1,v1,v6 vrgather.vv v3,v1,v5 vse16.v v3,0(a3) sub a3,a3,t1 bgtut4,a4,.L3 ... The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The rv64gcv build with glibc. 3. The x86 bootstrap tests. 4. The x86 fully regression tests. gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_sat_sub_pattern_transform): Add new func impl to perform the truncation distribution. (vect_recog_sat_sub_pattern): Perform above optimize before generate .SAT_SUB call. Signed-off-by: Pan Li Diff: --- gcc/tree-vect-patterns.cc | 65 +++ 1 file changed, 65 insertions(+) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 86e893a1c433..4570c25b6647 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4566,6 +4566,70 @@ vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, return NULL; } +/* + * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in + * the benchmark zip. Aka: + * + * unsigned int _1; + * unsigned int _2; + * unsigned short int _4; + * _9 = (unsigned short int).SAT_SUB (_1, _2); + * + * if _1 is known to be in the range of unsigned short int. For example + * there is a def _1 = (unsigned short int)_4. Then we can transform the + * truncation to: + * + * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2); + * _9 = .SAT_SUB (_4, _3); + * + * Then, we can better vectorized code and avoid the unnecessary narrowing + * stmt during vectorization with below stmt(s). + * + * _3 = .SAT_TRUNC(_2); // SI => HI + * _9 = .SAT_SUB (_4, _3); + */ +static void +vect_recog_sat_sub_pattern_transform (vec_info *vinfo, + stmt_vec_info stmt_vinfo, + tree lhs, tree *ops) +{ + tree otype = TREE_TYPE (lhs); + tree itype = TREE_TYPE (ops[0]); + unsigned itype_prec = TYPE_PRECISION (itype); + unsigned otype_prec = TYPE_PRECISION (otype); + + if (types_compatible_p (otype, itype) || otype_prec >= itype_prec) +return; + + tree v_otype = get_vectype_for_scalar_type (vinfo, otype); + tree v_itype = get_vectype_for_scalar_type (vinfo, itype); + tree_pair v_pair = tree_pair (v_otype, v_itype); + + if (v_otype == NULL_TREE || v_itype == NULL_TREE +|| !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair, + OPTIMIZE_FOR_BOTH)) +return; + + /* 1. Find the _4 and update ops[0] as above example. */ + vect_unpromoted_value unprom; + tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom); + + if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec) +return; + + ops[0] = tmp; + + /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */ +
[gcc r15-1967] RISC-V: Add testcases for vector .SAT_SUB in zip benchmark
https://gcc.gnu.org/g:b3c686416e88bf135def0e72d316713af01445a1 commit r15-1967-gb3c686416e88bf135def0e72d316713af01445a1 Author: Pan Li Date: Thu Jul 11 15:54:32 2024 +0800 RISC-V: Add testcases for vector .SAT_SUB in zip benchmark This patch would like to add the test cases for the vector .SAT_SUB in the zip benchmark. Aka: Form in zip benchmark: #define DEF_VEC_SAT_U_SUB_ZIP(T1, T2) \ void __attribute__((noinline))\ vec_sat_u_sub_##T1##_##T2##_fmt_zip (T1 *x, T2 b, unsigned limit) \ { \ T2 a; \ T1 *p = x; \ do {\ a = *--p; \ *p = (T1)(a >= b ? a - b : 0);\ } while (--limit); \ } DEF_VEC_SAT_U_SUB_ZIP(uint8_t, uint16_t) vec_sat_u_sub_uint16_t_uint32_t_fmt_zip: ... vsetvli a4,zero,e32,m1,ta,ma vmv.v.x v6,a1 vsetvli zero,zero,e16,mf2,ta,ma vid.v v2 lia4,-1 vnclipu.wiv6,v6,0 // .SAT_TRUNC .L3: vle16.v v3,0(a3) vrsub.vx v5,v2,a6 mva7,a4 addw a4,a4,t3 vrgather.vv v1,v3,v5 vssubu.vv v1,v1,v6 // .SAT_SUB vrgather.vv v3,v1,v5 vse16.v v3,0(a3) sub a3,a3,t1 bgtu t4,a4,.L3 Passed the rv64gcv tests. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/binop/vec_sat_data.h: Add test data for .SAT_SUB in zip benchmark. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip-run.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_arith.h| 18 + .../riscv/rvv/autovec/binop/vec_sat_binary_vx.h| 22 ++ .../riscv/rvv/autovec/binop/vec_sat_data.h | 81 ++ .../rvv/autovec/binop/vec_sat_u_sub_zip-run.c | 16 + .../riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c| 18 + 5 files changed, 155 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h index 10459807b2c4..416a1e49a47b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h @@ -322,6 +322,19 @@ vec_sat_u_sub_##T##_fmt_10 (T *out, T *op_1, T *op_2, unsigned limit) \ } \ } +#define DEF_VEC_SAT_U_SUB_ZIP(T1, T2) \ +void __attribute__((noinline))\ +vec_sat_u_sub_##T1##_##T2##_fmt_zip (T1 *x, T2 b, unsigned limit) \ +{ \ + T2 a; \ + T1 *p = x; \ + do {\ +a = *--p; \ +*p = (T1)(a >= b ? a - b : 0);\ + } while (--limit); \ +} +#define DEF_VEC_SAT_U_SUB_ZIP_WRAP(T1, T2) DEF_VEC_SAT_U_SUB_ZIP(T1, T2) + #define RUN_VEC_SAT_U_SUB_FMT_1(T, out, op_1, op_2, N) \ vec_sat_u_sub_##T##_fmt_1(out, op_1, op_2, N) @@ -352,6 +365,11 @@ vec_sat_u_sub_##T##_fmt_10 (T *out, T *op_1, T *op_2, unsigned limit) \ #define RUN_VEC_SAT_U_SUB_FMT_10(T, out, op_1, op_2, N) \ vec_sat_u_sub_##T##_fmt_10(out, op_1, op_2, N) +#define RUN_VEC_SAT_U_SUB_FMT_ZIP(T1, T2, x, b, N) \ + vec_sat_u_sub_##T1##_##T2##_fmt_zip(x, b, N) +#define RUN_VEC_SAT_U_SUB_FMT_ZIP_WRAP(T1, T2, x, b, N) \ + RUN_VEC_SAT_U_SUB_FMT_ZIP(T1, T2, x, b, N) \ + /**/ /* Saturation Sub Truncated (Unsigned and Signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h new file mode 100644 index ..d238c6392def --- /dev/null +++ b/gcc/testsuite/gcc.target/
[gcc r15-2138] Doc: Add Standard-Names ustrunc and sstrunc for integer modes
https://gcc.gnu.org/g:ebac11afbcb7a52536da5f04fc524b870f5d76e0 commit r15-2138-gebac11afbcb7a52536da5f04fc524b870f5d76e0 Author: Pan Li Date: Thu Jul 18 11:30:38 2024 +0800 Doc: Add Standard-Names ustrunc and sstrunc for integer modes This patch would like to add the doc for the Standard-Names ustrunc and sstrunc, include both the scalar and vector integer modes. gcc/ChangeLog: * doc/md.texi: Add Standard-Names ustrunc and sstrunc. Signed-off-by: Pan Li Diff: --- gcc/doc/md.texi | 12 1 file changed, 12 insertions(+) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 7f4335e0aac1..ecb7f34f1b92 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5543,6 +5543,18 @@ means of constraints requiring operands 1 and 0 to be the same location. @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3} Similar, for other arithmetic operations. +@cindex @code{ustrunc@var{m}@var{n}2} instruction pattern +@item @samp{ustrunc@var{m}@var{n}2} +Truncate the operand 1, and storing the result in operand 0. There will +be saturation during the trunction. The result will be saturated to the +maximal value of operand 0 type if there is overflow when truncation. The +operand 1 must have mode @var{n}, and the operand 0 must have mode @var{m}. +Both scalar and vector integer modes are allowed. + +@cindex @code{sstrunc@var{m}@var{n}2} instruction pattern +@item @samp{sstrunc@var{m}@var{n}2} +Similar but for signed. + @cindex @code{andc@var{m}3} instruction pattern @item @samp{andc@var{m}3} Like @code{and@var{m}3}, but it uses bitwise-complement of operand 2
[gcc r15-2149] Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863]
https://gcc.gnu.org/g:02cc8494745c4235890ad58e93b5acce5a89a775 commit r15-2149-g02cc8494745c4235890ad58e93b5acce5a89a775 Author: Pan Li Date: Thu Jul 18 20:16:34 2024 +0800 Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863] The SAT_TRUNC form 2 has below pattern matching. From: _18 = MIN_EXPR ; iftmp.0_11 = (unsigned int) _18; To: _18 = MIN_EXPR ; iftmp.0_11 = .SAT_TRUNC (left_8); But if there is another use of _18 like below, the transform to the .SAT_TRUNC may have no earnings. For example: From: _18 = MIN_EXPR ; // op_0 def iftmp.0_11 = (unsigned int) _18; // op_0 stream.avail_out = iftmp.0_11; left_37 = left_8 - _18; // op_0 use To: _18 = MIN_EXPR ; // op_0 def iftmp.0_11 = .SAT_TRUNC (left_8); stream.avail_out = iftmp.0_11; left_37 = left_8 - _18; // op_0 use Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above. Then the backend (for example x86/riscv) will have additional 2-3 more insns after pattern recog besides the MIN_EXPR. Thus, keep the normal truncation as is should be the better choose. The below testsuites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/115863 gcc/ChangeLog: * match.pd: Add single_use check for .SAT_TRUNC form 2. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115863-1.c: New test. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 15 ++-- gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 6818856991c6..cf359b0ec0f0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3252,10 +3252,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)). */ +/* If Op_0 def is MIN_EXPR and not single_use. Aka below pattern: + + _18 = MIN_EXPR ; // op_0 def + iftmp.0_11 = (unsigned int) _18; // op_0 + stream.avail_out = iftmp.0_11; + left_37 = left_8 - _18; // op_0 use + + Transfer to .SAT_TRUNC will have MIN_EXPR still live. Then the backend + (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC + besides the MIN_EXPR. Thus, keep the normal truncation as is should be + the better choose. */ (match (unsigned_integer_sat_trunc @0) - (convert (min @0 INTEGER_CST@1)) + (convert (min@2 @0 INTEGER_CST@1)) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && TYPE_UNSIGNED (TREE_TYPE (@0))) + && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2)) (with { unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c b/gcc/testsuite/gcc.target/i386/pr115863-1.c new file mode 100644 index ..a672f62cec54 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c @@ -0,0 +1,37 @@ +/* PR target/115863 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-rtl-expand-details" } */ + +#include + +typedef struct z_stream_s { +uint32_t avail_out; +} z_stream; + +typedef z_stream *z_streamp; + +extern int deflate (z_streamp strmp); + +int compress2 (uint64_t *destLen) +{ + z_stream stream; + int err; + const uint32_t max = (uint32_t)(-1); + uint64_t left; + + left = *destLen; + + stream.avail_out = 0; + + do { +if (stream.avail_out == 0) { +stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left; +left -= stream.avail_out; +} +err = deflate(&stream); +} while (err == 0); + + return err; +} + +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
[gcc r15-2189] RISC-V: Rearrange the test helper files for vector .SAT_*
https://gcc.gnu.org/g:4ab19e461159989b7fb43e858190adcf480762b7 commit r15-2189-g4ab19e461159989b7fb43e858190adcf480762b7 Author: Pan Li Date: Sat Jul 20 10:43:44 2024 +0800 RISC-V: Rearrange the test helper files for vector .SAT_* Rearrange the test help header files, as well as align the name conventions. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvv_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvx_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust the include file names. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c:
[gcc r15-2209] RISC-V: Implement the .SAT_TRUNC for scalar
https://gcc.gnu.org/g:5d2115b850df63b0ecdf56efb720ad848e7afe21 commit r15-2209-g5d2115b850df63b0ecdf56efb720ad848e7afe21 Author: Pan Li Date: Mon Jul 1 16:36:35 2024 +0800 RISC-V: Implement the .SAT_TRUNC for scalar This patch would like to implement the simple .SAT_TRUNC pattern in the riscv backend. Aka: Form 1: #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t) Before this patch: __attribute__((noinline)) uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x) { _Bool overflow; unsigned char _1; unsigned char _2; unsigned char _3; uint8_t _6; ;; basic block 2, loop depth 0 ;;pred: ENTRY overflow_5 = x_4(D) > 255; _1 = (unsigned char) x_4(D); _2 = (unsigned char) overflow_5; _3 = -_2; _6 = _1 | _3; return _6; ;;succ: EXIT } After this patch: __attribute__((noinline)) uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x) { uint8_t _6; ;; basic block 2, loop depth 0 ;;pred: ENTRY _6 = .SAT_TRUNC (x_4(D)); [tail call] return _6; ;;succ: EXIT } The below tests suites are passed for this patch 1. The rv64gcv fully regression test. 2. The rv64gcv build with glibc gcc/ChangeLog: * config/riscv/iterators.md (ANYI_DOUBLE_TRUNC): Add new iterator for int double truncation. (ANYI_DOUBLE_TRUNCATED): Add new attr for int double truncation. (anyi_double_truncated): Ditto but for lowercase. * config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new func decl for expanding ustrunc * config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func impl to expand ustrunc. * config/riscv/riscv.md (ustrunc2): Impl the new pattern ustrunc2 for int. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macro. * gcc.target/riscv/sat_arith_data.h: New test. * gcc.target/riscv/sat_u_trunc-1.c: New test. * gcc.target/riscv/sat_u_trunc-2.c: New test. * gcc.target/riscv/sat_u_trunc-3.c: New test. * gcc.target/riscv/sat_u_trunc-run-1.c: New test. * gcc.target/riscv/sat_u_trunc-run-2.c: New test. * gcc.target/riscv/sat_u_trunc-run-3.c: New test. * gcc.target/riscv/scalar_sat_unary.h: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/iterators.md | 10 gcc/config/riscv/riscv-protos.h| 1 + gcc/config/riscv/riscv.cc | 40 gcc/config/riscv/riscv.md | 10 gcc/testsuite/gcc.target/riscv/sat_arith.h | 16 +++ gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 56 ++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c | 17 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c | 16 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c | 16 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c | 16 +++ gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h | 22 + 13 files changed, 259 insertions(+) diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index d61ed53a8b1b..734da041f0cb 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -65,6 +65,16 @@ ;; Iterator for hardware-supported integer modes. (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")]) +(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")]) + +(define_mode_attr ANYI_DOUBLE_TRUNCATED [ + (HI "QI") (SI "HI") (DI "SI") +]) + +(define_mode_attr anyi_double_truncated [ + (HI "qi") (SI "hi") (DI "si") +]) + ;; Iterator for hardware-supported floating-point modes. (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX") (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX") diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 7c0ea1b445b1..ce5e38d3dbbf 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -135,6 +135,7 @@ riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int); extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx); extern void riscv_expand_usadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); +extern voi
[gcc r15-2241] Internal-fn: Only allow modes describe types for internal fn[PR115961]
https://gcc.gnu.org/g:905973410957891fec8a3e42eeefa4618780e0ce commit r15-2241-g905973410957891fec8a3e42eeefa4618780e0ce Author: Pan Li Date: Thu Jul 18 17:23:36 2024 +0800 Internal-fn: Only allow modes describe types for internal fn[PR115961] The direct_internal_fn_supported_p has no restrictions for the type modes. For example the bitfield like below will be recog as .SAT_TRUNC. struct e { unsigned pre : 12; unsigned a : 4; }; __attribute__((noipa)) void bug (e * v, unsigned def, unsigned use) { e & defE = *v; defE.a = min_u (use + 1, 0xf); } This patch would like to add checks for the direct_internal_fn_supported_p, and only allows the tree types describled by modes. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/115961 gcc/ChangeLog: * internal-fn.cc (type_strictly_matches_mode_p): Add new func impl to check type strictly matches mode or not. (type_pair_strictly_matches_mode_p): Ditto but for tree type pair. (direct_internal_fn_supported_p): Add above check for the tree type pair. gcc/testsuite/ChangeLog: * g++.dg/torture/pr115961-run-1.C: New test. Signed-off-by: Pan Li Diff: --- gcc/internal-fn.cc| 32 +++ gcc/testsuite/g++.dg/torture/pr115961-run-1.C | 32 +++ 2 files changed, 64 insertions(+) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 95946bfd6839..8a2e07f2f965 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4164,6 +4164,35 @@ direct_internal_fn_optab (internal_fn fn) gcc_unreachable (); } +/* Return true if TYPE's mode has the same format as TYPE, and if there is + a 1:1 correspondence between the values that the mode can store and the + values that the type can store. */ + +static bool +type_strictly_matches_mode_p (const_tree type) +{ + if (VECTOR_TYPE_P (type)) +return VECTOR_MODE_P (TYPE_MODE (type)); + + if (INTEGRAL_TYPE_P (type)) +return type_has_mode_precision_p (type); + + if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type)) +return true; + + return false; +} + +/* Returns true if both types of TYPE_PAIR strictly match their modes, + else returns false. */ + +static bool +type_pair_strictly_matches_mode_p (tree_pair type_pair) +{ + return type_strictly_matches_mode_p (type_pair.first) +&& type_strictly_matches_mode_p (type_pair.second); +} + /* Return true if FN is supported for the types in TYPES when the optimization type is OPT_TYPE. The types are those associated with the "type0" and "type1" fields of FN's direct_internal_fn_info @@ -4173,6 +4202,9 @@ bool direct_internal_fn_supported_p (internal_fn fn, tree_pair types, optimization_type opt_type) { + if (!type_pair_strictly_matches_mode_p (types)) +return false; + switch (fn) { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ diff --git a/gcc/testsuite/g++.dg/torture/pr115961-run-1.C b/gcc/testsuite/g++.dg/torture/pr115961-run-1.C new file mode 100644 index ..787f7245457d --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr115961-run-1.C @@ -0,0 +1,32 @@ +/* PR target/115961 */ +/* { dg-do run } */ + +struct e +{ + unsigned pre : 12; + unsigned a : 4; +}; + +static unsigned min_u (unsigned a, unsigned b) +{ + return (b < a) ? b : a; +} + +__attribute__((noipa)) +void bug (e * v, unsigned def, unsigned use) { + e & defE = *v; + defE.a = min_u (use + 1, 0xf); +} + +__attribute__((noipa, optimize(0))) +int main(void) +{ + e v = { 0xded, 3 }; + + bug(&v, 32, 33); + + if (v.a != 0xf) +__builtin_abort (); + + return 0; +}
[gcc r14-9418] VECT: Fix ICE for vectorizable LD/ST when both len and store are enabled
https://gcc.gnu.org/g:993c6de642ffeb2867edbe80ff2a72c0a2eb604e commit r14-9418-g993c6de642ffeb2867edbe80ff2a72c0a2eb604e Author: Pan Li Date: Sun Mar 10 11:02:35 2024 +0800 VECT: Fix ICE for vectorizable LD/ST when both len and store are enabled This patch would like to fix one ICE in vectorizable_store when both the loop_masks and loop_lens are enabled. The ICE looks like below when build with "-march=rv64gcv -O3". during GIMPLE pass: vect test.c: In function ‘d’: test.c:6:6: internal compiler error: in vectorizable_store, at tree-vect-stmts.cc:8691 6 | void d() { | ^ 0x37a6f2f vectorizable_store .../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:8691 0x37b861c vect_analyze_stmt(vec_info*, _stmt_vec_info*, bool*, _slp_tree*, _slp_instance*, vec*) .../__RISC-V_BUILD__/../gcc/tree-vect-stmts.cc:13242 0x1db5dca vect_analyze_loop_operations .../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:2208 0x1db885b vect_analyze_loop_2 .../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3041 0x1dba029 vect_analyze_loop_1 .../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3481 0x1dbabad vect_analyze_loop(loop*, vec_info_shared*) .../__RISC-V_BUILD__/../gcc/tree-vect-loop.cc:3639 0x1e389d1 try_vectorize_loop_1 .../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1066 0x1e38f3d try_vectorize_loop .../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1182 0x1e39230 execute .../__RISC-V_BUILD__/../gcc/tree-vectorizer.cc:1298 There are two ways to reach vectorizer LD/ST, one is the analysis and the other is transform. We cannot have both the lens and the masks enabled during transform but it is valid during analysis. Given the transform doesn't required cost_vec, we can only enable the assert based on cost_vec is NULL or not. Below testsuites are passed for this patch: * The x86 bootstrap tests. * The x86 fully regression tests. * The aarch64 fully regression tests. * The riscv fully regressison tests. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_store): Enable the assert during transform process. (vectorizable_load): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr114195-1.c: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c | 15 +++ gcc/tree-vect-stmts.cc | 18 ++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c new file mode 100644 index 000..a67b847112b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114195-1.c @@ -0,0 +1,15 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */ + +long a, b; +extern short c[]; + +void d() { + for (int e = 0; e < 35; e = 2) { +a = ({ a < 0 ? a : 0; }); +b = ({ b < 0 ? b : 0; }); + +c[e] = 0; + } +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 14a3ffb5f02..e8617439a48 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8697,8 +8697,13 @@ vectorizable_store (vec_info *vinfo, ? &LOOP_VINFO_LENS (loop_vinfo) : NULL); - /* Shouldn't go with length-based approach if fully masked. */ - gcc_assert (!loop_lens || !loop_masks); + /* The vect_transform_stmt and vect_analyze_stmt will go here but there + are some difference here. We cannot enable both the lens and masks + during transform but it is allowed during analysis. + Shouldn't go with length-based approach if fully masked. */ + if (cost_vec == NULL) +/* The cost_vec is NULL during transfrom. */ +gcc_assert ((!loop_lens || !loop_masks)); /* Targets with store-lane instructions must not require explicit realignment. vect_supportable_dr_alignment always returns either @@ -10577,8 +10582,13 @@ vectorizable_load (vec_info *vinfo, ? &LOOP_VINFO_LENS (loop_vinfo) : NULL); - /* Shouldn't go with length-based approach if fully masked. */ - gcc_assert (!loop_lens || !loop_masks); + /* The vect_transform_stmt and vect_analyze_stmt will go here but there + are some difference here. We cannot enable both the lens and masks + during transform but it is allowed during analysis. + Shouldn't go with length-based approach if fully masked. */ + if (cost_vec == NULL) +/* The cost_vec is NULL during transfrom. */ +gcc_assert ((!loop_lens || !loop_masks)); /* Targets with store-lane instructions must not require explicit realignment. vect_supportable_dr_alignment always returns either
[gcc r14-9436] RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC]
https://gcc.gnu.org/g:cdf0c6604d03afd7f544dd8bd5d43d9ded059ada commit r14-9436-gcdf0c6604d03afd7f544dd8bd5d43d9ded059ada Author: Pan Li Date: Tue Mar 12 15:01:57 2024 +0800 RISC-V: Fix some code style issue(s) in riscv-c.cc [NFC] Notice some code style issue(s) when add __riscv_v_fixed_vlen, includes: * Meanless empty line. * Line greater than 80 chars. * Indent with 3 space(s). * Argument unalignment. gcc/ChangeLog: * config/riscv/riscv-c.cc (riscv_ext_version_value): Fix code style greater than 80 chars. (riscv_cpu_cpp_builtins): Fix useless empty line, indent with 3 space(s) and argument unalignment. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-c.cc | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index 3755ec0b8ef..7029ba88186 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -37,7 +37,8 @@ along with GCC; see the file COPYING3. If not see static int riscv_ext_version_value (unsigned major, unsigned minor) { - return (major * RISCV_MAJOR_VERSION_BASE) + (minor * RISCV_MINOR_VERSION_BASE); + return (major * RISCV_MAJOR_VERSION_BASE) ++ (minor * RISCV_MINOR_VERSION_BASE); } /* Implement TARGET_CPU_CPP_BUILTINS. */ @@ -110,7 +111,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) case CM_MEDANY: builtin_define ("__riscv_cmodel_medany"); break; - } if (riscv_user_wants_strict_align) @@ -142,9 +142,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) riscv_ext_version_value (0, 12)); } - if (TARGET_XTHEADVECTOR) - builtin_define_with_int_value ("__riscv_th_v_intrinsic", -riscv_ext_version_value (0, 11)); + if (TARGET_XTHEADVECTOR) +builtin_define_with_int_value ("__riscv_th_v_intrinsic", + riscv_ext_version_value (0, 11)); /* Define architecture extension test macros. */ builtin_define_with_int_value ("__riscv_arch_test", 1);
[gcc r14-9604] RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))
https://gcc.gnu.org/g:d3c24e9e55a7cf18df313a8b32b6de4b3ba81013 commit r14-9604-gd3c24e9e55a7cf18df313a8b32b6de4b3ba81013 Author: Pan Li Date: Mon Mar 18 11:21:29 2024 +0800 RISC-V: Bugfix ICE for __attribute__((target("arch=+v")) This patch would like to fix one ICE for __attribute__((target("arch=+v")) and likewise extension(s). Given we have sample code as below: void __attribute__((target("arch=+v"))) test_2 (int *a, int *b, int *out, unsigned count) { unsigned i; for (i = 0; i < count; i++) out[i] = a[i] + b[i]; } It will have ICE when build with -march=rv64gc -O3. test.c: In function ‘test_2’: test.c:4:1: internal compiler error: Floating point exception 4 | { | ^ 0x1a5891b crash_signal .../__RISC-V_BUILD__/../gcc/toplev.cc:319 0x7f0a7884251f ??? ./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0 0x1f51ba4 riscv_hard_regno_nregs .../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:8143 0x1967bb9 init_reg_modes_target() .../__RISC-V_BUILD__/../gcc/reginfo.cc:471 0x13fc029 init_emit_regs() .../__RISC-V_BUILD__/../gcc/emit-rtl.cc:6237 0x1a5b83d target_reinit() .../__RISC-V_BUILD__/../gcc/toplev.cc:1936 0x35e374d save_target_globals() .../__RISC-V_BUILD__/../gcc/target-globals.cc:92 0x35e381f save_target_globals_default_opts() .../__RISC-V_BUILD__/../gcc/target-globals.cc:122 0x1f544cc riscv_save_restore_target_globals(tree_node*) .../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:9138 0x1f55c36 riscv_set_current_function ... There are two reasons for this ICE. 1. The implied extension(s) of v are not well handled and the TARGET_MIN_VLEN is 0 which is not reinitialized. Then the size / TARGET_MIN_VLEN will have DivideByZero. 2. The machine modes of the vector types will be vary after the v extension is introduced. This patch passed below testsuite: 1. The riscv fully regression test. PR target/114352 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::parse): Replace implied, combine and check to func finalize. (riscv_subset_list::finalize): New func impl to take care of implied, combine ext and related checks. * config/riscv/riscv-subset.h: Add func decl for finalize. * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Finalize the ext before return succeed. * config/riscv/riscv.cc (riscv_set_current_function): Reinit the machine mode before when set cur function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr114352-1.c: New test. * gcc.target/riscv/rvv/base/pr114352-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/common/config/riscv/riscv-common.cc| 31 gcc/config/riscv/riscv-subset.h| 2 + gcc/config/riscv/riscv-target-attr.cc | 2 + gcc/config/riscv/riscv.cc | 4 ++ .../gcc.target/riscv/rvv/base/pr114352-1.c | 58 ++ .../gcc.target/riscv/rvv/base/pr114352-2.c | 27 ++ 6 files changed, 114 insertions(+), 10 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 440127a2af0..15d44245b3c 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -1428,16 +1428,7 @@ riscv_subset_list::parse (const char *arch, location_t loc) if (p == NULL) goto fail; - for (itr = subset_list->m_head; itr != NULL; itr = itr->next) -{ - subset_list->handle_implied_ext (itr->name.c_str ()); -} - - /* Make sure all implied extensions are included. */ - gcc_assert (subset_list->check_implied_ext ()); - - subset_list->handle_combine_ext (); - subset_list->check_conflict_ext (); + subset_list->finalize (); return subset_list; @@ -1467,6 +1458,26 @@ riscv_subset_list::set_loc (location_t loc) m_loc = loc; } +/* Make sure the implied or combined extension is included after add + a new std extension to subset list or likewise. For exmaple as below, + + void __attribute__((target("arch=+v"))) func () with -march=rv64gc. + + The implied zvl128b and zve64d of the std v should be included. */ +void +riscv_subset_list::finalize () +{ + riscv_subset_t *subset; + + for (subset = m_head; subset != NULL; subset = subset->next) +handle_implied_ext (subset->name.c_str ()); + + gcc_assert (check_implied_ext ()); + + handle_combine_ext (); + check_conflict_ext (); +} + /* Return the current arch string. */ std::string diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index ae849e2a302.
[gcc r14-9605] RISC-V: Bugfix function target attribute pollution
https://gcc.gnu.org/g:9941f0295a14659e25260458efd2e46a68ad0342 commit r14-9605-g9941f0295a14659e25260458efd2e46a68ad0342 Author: Pan Li Date: Tue Mar 19 09:43:24 2024 +0800 RISC-V: Bugfix function target attribute pollution This patch depends on below ICE fix. https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647915.html The function target attribute should be on a per-function basis. For example, we have 3 function as below: void test_1 () {} void __attribute__((target("arch=+v"))) test_2 () {} void __attribute__((target("arch=+zfh"))) test_3 () {} void test_4 () {} The scope of the target attribute should not extend the function body. Aka, test_3 cannot have the 'v' extension, as well as the test_4 cannot have both the 'v' and 'zfh' extension. Unfortunately, for now the test_4 is able to leverage the 'v' and the 'zfh' extension which is incorrect. This patch would like to fix the sticking attribute by introduce the commandline subset_list. When parse_arch, we always clone from the cmdline_subset_list instead of the current_subset_list. Meanwhile, we correct the print information about arch like below. .option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zbb1p0 The riscv_declare_function_name hook is always after the hook riscv_process_target_attr. Thus, we introduce one hash_map to record the 1:1 mapping from fndel to its' subset_list in advance. And later the riscv_declare_function_name is able to get the right information about the arch. Below test are passed for this patch * The riscv fully regression test. PR target/114352 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (struct riscv_func_target_info): New struct for func decl and target name. (struct riscv_func_target_hasher): New hasher for hash table mapping from the fn_decl to fn_target_name. (riscv_func_decl_hash): New func to compute the hash for fn_decl. (riscv_func_target_hasher::hash): New func to impl hash interface. (riscv_func_target_hasher::equal): New func to impl equal interface. (riscv_cmdline_subset_list): New static var for cmdline subset list. (riscv_func_target_table_lazy_init): New func to lazy init the func target hash table. (riscv_func_target_get): New func to get target name from hash table. (riscv_func_target_put): New func to put target name into hash table. (riscv_func_target_remove_and_destory): New func to remove target info from the hash table and destory it. (riscv_parse_arch_string): Set the static var cmdline_subset_list. * config/riscv/riscv-subset.h (riscv_cmdline_subset_list): New static var for cmdline subset list. (riscv_func_target_get): New func decl. (riscv_func_target_put): Ditto. (riscv_func_target_remove_and_destory): Ditto. * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Take cmdline_subset_list instead of current_subset_list when clone. (riscv_process_target_attr): Record the func target info to hash table. (riscv_option_valid_attribute_p): Add new arg tree fndel. * config/riscv/riscv.cc (riscv_declare_function_name): Consume the func target info and print the arch message. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr114352-3.c: New test. Signed-off-by: Pan Li Diff: --- gcc/common/config/riscv/riscv-common.cc| 105 ++- gcc/config/riscv/riscv-subset.h| 4 + gcc/config/riscv/riscv-target-attr.cc | 18 +++- gcc/config/riscv/riscv.cc | 7 +- .../gcc.target/riscv/rvv/base/pr114352-3.c | 113 + 5 files changed, 240 insertions(+), 7 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 15d44245b3c..7095f303cbb 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -426,11 +426,108 @@ bool riscv_subset_list::parse_failed = false; static riscv_subset_list *current_subset_list = NULL; +static riscv_subset_list *cmdline_subset_list = NULL; + +struct riscv_func_target_info +{ + tree fn_decl; + std::string fn_target_name; + + riscv_func_target_info (const tree &decl, const std::string &target_name) +: fn_decl (decl), fn_target_name (target_name) + { + } +}; + +struct riscv_func_target_hasher : nofree_ptr_hash +{ + typedef tree compare_type; + + static hashval_t hash (value_type); + static bool equal (value_type, const compare_type &); +}; + +static hash_table *f
[gcc r14-9616] RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV
https://gcc.gnu.org/g:47de95d801c6899033c303b1fe642feb0489994f commit r14-9616-g47de95d801c6899033c303b1fe642feb0489994f Author: Pan Li Date: Fri Mar 22 14:43:47 2024 +0800 RISC-V: Introduce gcc attribute riscv_rvv_vector_bits for RVV This patch would like to introduce one new gcc attribute for RVV. This attribute is used to define fixed-length variants of one existing sizeless RVV types. This attribute is valid if and only if the mrvv-vector-bits=zvl, the only one args should be the integer constant and its' value is terminated by the LMUL and the vector register bits in zvl*b. For example: typedef vint32m2_t fixed_vint32m2_t __attribute__((riscv_rvv_vector_bits(128))); The above type define is valid when -march=rv64gc_zve64d_zvl64b (aka 2(m2) * 64 = 128 for vin32m2_t), and will report error when -march=rv64gcv_zvl128b similar to below. "error: invalid RVV vector size '128', expected size is '256' based on LMUL of type and '-mrvv-vector-bits=zvl'" Meanwhile, a pre-define macro __riscv_v_fixed_vlen is introduced to represent the fixed vlen in a RVV vector register. For the vint*m*_t below operations are allowed. * The sizeof. * The global variable(s). * The element of union and struct. * The cast to other equalities. * CMP: >, <, ==, !=, <=, >= * ALU: +, -, *, /, %, &, |, ^, >>, <<, ~, - The CMP will return vint*m*_t the same as aarch64 sve. For example: typedef vint32m1_t fixed_vint32m1_t __attribute__((riscv_rvv_vector_bits(128))); fixed_vint32m1_t less_than (fixed_vint32m1_t a, fixed_vint32m1_t b) { return a < b; } For the vfloat*m*_t below operations are allowed. * The sizeof. * The global variable(s). * The element of union and struct. * The cast to other equalities. * CMP: >, <, ==, !=, <=, >= * ALU: +, -, *, /, - The CMP will return vfloat*m*_t the same as aarch64 sve. For example: typedef vfloat32m1_t fixed_vfloat32m1_t __attribute__((riscv_rvv_vector_bits(128))); fixed_vfloat32m1_t less_than (fixed_vfloat32m1_t a, fixed_vfloat32m1_t b) { return a < b; } For the vbool*_t types only below operations are allowed except the CMP and ALU. The CMP and ALU operations on vbool*_t is not well defined currently. * The sizeof. * The global variable(s). * The element of union and struct. * The cast to other equalities. For the vint*x*m*_t tuple types are not suppored in this patch which is compatible with clang. This patch passed the below testsuites. * The riscv fully regression tests. gcc/ChangeLog: * config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Add pre-define macro __riscv_v_fixed_vlen when zvl. * config/riscv/riscv.cc (riscv_handle_rvv_vector_bits_attribute): New static func to take care of the RVV types decorated by the attributes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-1.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-10.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-11.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-12.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-13.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-14.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-15.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-16.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-17.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-18.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-2.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-3.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-4.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-5.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-6.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-7.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-8.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits-9.c: New test. * gcc.target/riscv/rvv/base/riscv_rvv_vector_bits.h: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-c.cc| 3 + gcc/config/riscv/riscv.cc | 87 - .../riscv/rvv/base/riscv_rvv_vector_bits-1.c | 6 ++ .../riscv/rvv/base/riscv_rvv_vector_bits-10.c | 53 ++ .../riscv/rvv/base/riscv_rvv_vector_bits-11.c | 76 +++ .../riscv/rvv/base/riscv_rvv_vector_bits-12.c | 14 +++
[gcc r14-9651] RISC-V: Allow RVV intrinsic when function target("arch=+v")
https://gcc.gnu.org/g:5cab64a9cfb93fb0e246a25e3fdc7b664afb774e commit r14-9651-g5cab64a9cfb93fb0e246a25e3fdc7b664afb774e Author: Pan Li Date: Mon Mar 25 14:22:31 2024 +0800 RISC-V: Allow RVV intrinsic when function target("arch=+v") This patch would like to allow the RVV intrinsic when function is attributed as target("arch=+v") and build with rv64gc. For example: vint32m1_t __attribute__((target("arch=+v"))) test_1 (vint32m1_t a, vint32m1_t b, size_t vl) { return __riscv_vadd_vv_i32m1 (a, b, vl); } build with -march=rv64gc -mabi=lp64d -O3, we will have asm like below: test_1: .option push .option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zicsr2p0_\ zifencei2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0 vsetvli zero,a0,e32,m1,ta,ma vadd.vv v8,v8,v9 ret The riscv_vector.h must be included when leverage intrinisc type(s) and API(s). And the scope of this attribute should not excced the function body. Meanwhile, to make rvv types and API(s) available for this attribute, include riscv_vector.h will not report error for now if v is not present in march. Below test are passed for this patch: * The riscv fully regression test. gcc/ChangeLog: * config/riscv/riscv-c.cc (riscv_pragma_intrinsic): Remove error when V is disabled and init the RVV types and intrinic APIs. * config/riscv/riscv-vector-builtins.cc (expand_builtin): Report error if V ext is disabled. * config/riscv/riscv.cc (riscv_return_value_is_vector_type_p): Ditto. (riscv_arguments_is_vector_type_p): Ditto. (riscv_vector_cc_function_p): Ditto. * config/riscv/riscv_vector.h: Remove error if V is disable. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pragma-1.c: Remove. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-1.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-2.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-3.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-4.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-5.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-6.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: New test. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-c.cc| 18 - gcc/config/riscv/riscv-vector-builtins.cc | 5 gcc/config/riscv/riscv.cc | 30 +++--- gcc/config/riscv/riscv_vector.h| 4 --- gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c | 4 --- .../rvv/base/target_attribute_v_with_intrinsic-1.c | 5 .../rvv/base/target_attribute_v_with_intrinsic-2.c | 18 + .../rvv/base/target_attribute_v_with_intrinsic-3.c | 13 ++ .../rvv/base/target_attribute_v_with_intrinsic-4.c | 10 .../rvv/base/target_attribute_v_with_intrinsic-5.c | 12 + .../rvv/base/target_attribute_v_with_intrinsic-6.c | 12 + .../rvv/base/target_attribute_v_with_intrinsic-7.c | 9 +++ .../rvv/base/target_attribute_v_with_intrinsic-8.c | 23 + 13 files changed, 145 insertions(+), 18 deletions(-) diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index edb866d51e4..01314037461 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -201,14 +201,20 @@ riscv_pragma_intrinsic (cpp_reader *) if (strcmp (name, "vector") == 0 || strcmp (name, "xtheadvector") == 0) { - if (!TARGET_VECTOR) + if (TARGET_VECTOR) + riscv_vector::handle_pragma_vector (); + else /* Indicates riscv_vector.h is included but v is missing in arch */ { - error ("%<#pragma riscv intrinsic%> option %qs needs 'V' or " -"'XTHEADVECTOR' extension enabled", -name); - return; + /* To make the the rvv types and intrinsic API available for the +target("arch=+v") attribute, we need to temporally enable the +TARGET_VECTOR, and disable it after all initialized. */ + target_flags |= MASK_VECTOR; + + riscv_vector::init_builtins (); + riscv_vector::handle_pragma_vector (); + + target_flags &= ~MASK_VECTOR; } - riscv_vector::handle_pragma_vector (); } else error ("unknown %<#pragma riscv intrinsic%> option %qs", name); diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
[gcc r14-9730] RISC-V: Fix one unused varable in riscv_subset_list::parse
https://gcc.gnu.org/g:46eb34a75a9d004ce776bba382fe8af0978cace7 commit r14-9730-g46eb34a75a9d004ce776bba382fe8af0978cace7 Author: Pan Li Date: Sat Mar 30 21:32:06 2024 +0800 RISC-V: Fix one unused varable in riscv_subset_list::parse This patch would like to fix one unused variable as below: ../../gcc/common/config/riscv/riscv-common.cc: In static member function 'static riscv_subset_list* riscv_subset_list::parse(const char*, location_t)': ../../gcc/common/config/riscv/riscv-common.cc:1501:19: error: unused variable 'itr' [-Werror=unused-variable] 1501 | riscv_subset_t *itr; The variable consume code was removed but missed the var itself in previous. Thus, we have unused variable here. gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::parse): Remove unused var decl. Signed-off-by: Pan Li Diff: --- gcc/common/config/riscv/riscv-common.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 7095f303cbb..43b7549e3ec 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -1498,7 +1498,6 @@ riscv_subset_list::parse (const char *arch, location_t loc) return NULL; riscv_subset_list *subset_list = new riscv_subset_list (arch, loc); - riscv_subset_t *itr; const char *p = arch; p = subset_list->parse_base_ext (p); if (p == NULL)
[gcc r14-9731] RISC-V: Fix misspelled term builtin in error message
https://gcc.gnu.org/g:b313baba57f7e09f66b603e1e30dd4b48800693f commit r14-9731-gb313baba57f7e09f66b603e1e30dd4b48800693f Author: Pan Li Date: Sat Mar 30 20:03:18 2024 +0800 RISC-V: Fix misspelled term builtin in error message This patch would like to fix below misspelled term in error message. ../../gcc/config/riscv/riscv-vector-builtins.cc:4592:16: error: misspelled term 'builtin function' in format; use 'built-in function' instead [-Werror=format-diag] 4592 | "builtin function %qE requires the V ISA extension", exp); The below tests are passed for this patch. * The riscv regression test on rvv.exp and riscv.exp. gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (expand_builtin): Take the term built-in over builtin. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: Adjust test dg-error. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-vector-builtins.cc | 2 +- .../gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c | 2 +- .../gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index e07373d8b57..db9246eed2d 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -4589,7 +4589,7 @@ expand_builtin (unsigned int code, tree exp, rtx target) if (!TARGET_VECTOR) error_at (EXPR_LOCATION (exp), - "builtin function %qE requires the V ISA extension", exp); + "built-in function %qE requires the V ISA extension", exp); return function_expander (rfn.instance, rfn.decl, exp, target).expand (); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c index 520b2e59fae..a4cd67f4f95 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c @@ -5,5 +5,5 @@ size_t test_1 (size_t vl) { - return __riscv_vsetvl_e8m4 (vl); /* { dg-error {builtin function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */ + return __riscv_vsetvl_e8m4 (vl); /* { dg-error {built-in function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */ } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c index 9032d9d0b43..06ed9a9eddc 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c @@ -19,5 +19,5 @@ test_2 () size_t test_3 (size_t vl) { - return __riscv_vsetvl_e8m4 (vl); /* { dg-error {builtin function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */ + return __riscv_vsetvl_e8m4 (vl); /* { dg-error {built-in function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension} } */ }
[gcc r14-9828] RISC-V: Refine the error msg for RVV intrinisc required ext
https://gcc.gnu.org/g:7d051f7d45789e1442d26c07bfc5e7fb77433b87 commit r14-9828-g7d051f7d45789e1442d26c07bfc5e7fb77433b87 Author: Pan Li Date: Mon Apr 8 12:33:05 2024 +0800 RISC-V: Refine the error msg for RVV intrinisc required ext The RVV intrinisc API has sorts of required extension from both the march or target attribute. It will have error message similar to below: built-in function '__riscv_vsetvl_e8m4\(vl\)' requires the V ISA extension However, it is not accurate as we have many additional sub extenstion besides v extension. For example, zvbb, zvbk, zvbc ... etc. This patch would like to refine the error message with a friendly hint for the required extension. For example as below: vuint64m1_t __attribute__((target("arch=+v"))) test_1 (vuint64m1_t op_1, vuint64m1_t op_2, size_t vl) { return __riscv_vclmul_vv_u64m1 (op_1, op_2, vl); } When compile with march=rv64gc and target arch=+v, we will have error message as below: error: built-in function '__riscv_vclmul_vv_u64m1(op_1, op_2, vl)' requires the 'zvbc' ISA extension Then the end-user will get the point that the *zvbc* extension is missing for the intrinisc API easily. The below tests are passed for this patch. * The riscv fully regression tests. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-shapes.cc (build_one): Pass required_ext arg when invoke add function. (build_th_loadstore): Ditto. (struct vcreate_def): Ditto. (struct read_vl_def): Ditto. (struct vlenb_def): Ditto. * config/riscv/riscv-vector-builtins.cc (function_builder::add_function): Introduce new arg required_ext to fill in the register func. (function_builder::add_unique_function): Ditto. (function_builder::add_overloaded_function): Ditto. (expand_builtin): Leverage required_extensions_specified to check if the required extension is provided. * config/riscv/riscv-vector-builtins.h (reqired_ext_to_isa_name): New func impl to convert the required_ext enum to the extension name. (required_extensions_specified): New func impl to predicate if the required extension is well feeded. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-7.c: Adjust the error message for v extension. * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-8.c: Ditto. * gcc.target/riscv/rvv/base/intrinsic_required_ext-1.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-10.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-2.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-3.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-4.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-5.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-6.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-7.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-8.c: New test. * gcc.target/riscv/rvv/base/intrinsic_required_ext-9.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-vector-builtins-shapes.cc | 18 -- gcc/config/riscv/riscv-vector-builtins.cc | 23 +-- gcc/config/riscv/riscv-vector-builtins.h | 75 +- .../riscv/rvv/base/intrinsic_required_ext-1.c | 10 +++ .../riscv/rvv/base/intrinsic_required_ext-10.c | 11 .../riscv/rvv/base/intrinsic_required_ext-2.c | 11 .../riscv/rvv/base/intrinsic_required_ext-3.c | 11 .../riscv/rvv/base/intrinsic_required_ext-4.c | 11 .../riscv/rvv/base/intrinsic_required_ext-5.c | 11 .../riscv/rvv/base/intrinsic_required_ext-6.c | 11 .../riscv/rvv/base/intrinsic_required_ext-7.c | 11 .../riscv/rvv/base/intrinsic_required_ext-8.c | 11 .../riscv/rvv/base/intrinsic_required_ext-9.c | 11 .../rvv/base/target_attribute_v_with_intrinsic-7.c | 2 +- .../rvv/base/target_attribute_v_with_intrinsic-8.c | 2 +- 15 files changed, 210 insertions(+), 19 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index c5ffcc1f2c4..7f983e82370 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -72,9 +72,10 @@ build_one (function_builder &b, const function_group_info &group, if (TARGET_XTHEADVECTOR && !check_type (return_type, argument_types)) return; - b.add_overloaded_function (function_instance, *group.shape); + b.add_overload
[gcc r14-9908] RISC-V: Bugfix ICE for the vector return arg in mode switch
https://gcc.gnu.org/g:e40a3d86511efcea71e9eadde8fb9f96be52f790 commit r14-9908-ge40a3d86511efcea71e9eadde8fb9f96be52f790 Author: Pan Li Date: Thu Apr 11 09:39:44 2024 +0800 RISC-V: Bugfix ICE for the vector return arg in mode switch This patch would like to fix a ICE in mode sw for below example code. during RTL pass: mode_sw test.c: In function ‘vbool16_t j(vuint64m4_t)’: test.c:15:1: internal compiler error: in create_pre_exit, at mode-switching.cc:451 15 | } | ^ 0x3978f12 create_pre_exit __RISCV_BUILD__/../gcc/mode-switching.cc:451 0x3979e9e optimize_mode_switching __RISCV_BUILD__/../gcc/mode-switching.cc:849 0x397b9bc execute __RISCV_BUILD__/../gcc/mode-switching.cc:1324 extern size_t get_vl (); vbool16_t test (vuint64m4_t a) { unsigned long b; return __riscv_vmsne_vx_u64m4_b16 (a, b, get_vl ()); } The create_pre_exit would like to find a return value copy. If not, there will be a reason in assert but not available for above sample code when vector calling convension is enabled by default. This patch would like to override the TARGET_FUNCTION_VALUE_REGNO_P for vector register and then we will have hard_regno_nregs for copy_num, aka there is a return value copy. As a side-effect of allow vector in TARGET_FUNCTION_VALUE_REGNO_P, the TARGET_GET_RAW_RESULT_MODE will have vector mode and which is sizeless cannot be converted to fixed_size_mode. Thus override the hook TARGET_GET_RAW_RESULT_MODE and return VOIDmode when the regno is-not-a fixed_size_mode. The below tests are passed for this patch. * The fully riscv regression tests. * The reproducing test in bugzilla PR114639. PR target/114639 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_function_value_regno_p): New func impl for hook TARGET_FUNCTION_VALUE_REGNO_P. (riscv_get_raw_result_mode): New func imple for hook TARGET_GET_RAW_RESULT_MODE. (TARGET_FUNCTION_VALUE_REGNO_P): Impl the hook. (TARGET_GET_RAW_RESULT_MODE): Ditto. * config/riscv/riscv.h (V_RETURN): New macro for vector return. (GP_RETURN_FIRST): New macro for the first GPR in return. (GP_RETURN_LAST): New macro for the last GPR in return. (FP_RETURN_FIRST): Diito but for FPR. (FP_RETURN_LAST): Ditto. (FUNCTION_VALUE_REGNO_P): Remove as deprecated and replace by TARGET_FUNCTION_VALUE_REGNO_P. gcc/testsuite/ChangeLog: * g++.target/riscv/rvv/base/pr114639-1.C: New test. * gcc.target/riscv/rvv/base/pr114639-1.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 34 ++ gcc/config/riscv/riscv.h | 8 +++-- .../g++.target/riscv/rvv/base/pr114639-1.C | 25 .../gcc.target/riscv/rvv/base/pr114639-1.c | 14 + 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 00defa69fd8..91f017dd52a 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10997,6 +10997,34 @@ riscv_vector_mode_supported_any_target_p (machine_mode) return true; } +/* Implements hook TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +riscv_function_value_regno_p (const unsigned regno) +{ + if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST) +return true; + + if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST) +return true; + + if (regno == V_RETURN) +return true; + + return false; +} + +/* Implements hook TARGET_GET_RAW_RESULT_MODE. */ + +static fixed_size_mode +riscv_get_raw_result_mode (int regno) +{ + if (!is_a (reg_raw_mode[regno])) +return as_a (VOIDmode); + + return default_get_reg_raw_mode (regno); +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11343,6 +11371,12 @@ riscv_vector_mode_supported_any_target_p (machine_mode) #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p + +#undef TARGET_GET_RAW_RESULT_MODE +#define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-riscv.h" diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 269b8c1f076..7797e67317a 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -683,6 +683,12 @@ enum reg_class #define GP_RETURN GP_ARG_FIRST #define FP_RETURN (UNITS_PER_FP_ARG == 0 ? GP_RETURN : FP_ARG_FIRST) +#def
[gcc r14-9909] RISC-V: Remove -Wno-psabi for test build option [NFC]
https://gcc.gnu.org/g:f3fdcf4a37a7be07f2acbf5c8ed5e3399440a0ef commit r14-9909-gf3fdcf4a37a7be07f2acbf5c8ed5e3399440a0ef Author: Pan Li Date: Thu Apr 11 11:42:40 2024 +0800 RISC-V: Remove -Wno-psabi for test build option [NFC] Just notice there are some test case still have -Wno-psabi option, which is deprecated now. Remove them all for riscv test cases. The below test are passed for this patch. * The riscv rvv regression test. gcc/testsuite/ChangeLog: * g++.target/riscv/rvv/base/pr109244.C: Remove deprecated -Wno-psabi option. * g++.target/riscv/rvv/base/pr109535.C: Ditto. * gcc.target/riscv/rvv/autovec/fixed-vlmax-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/compress_run-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive_run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/consecutive_run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge-7.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/merge_run-7.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-7.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-7.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-run.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto.
[gcc r14-9930] RISC-V: Bugfix ICE non-vector in TARGET_FUNCTION_VALUE_REGNO_P
https://gcc.gnu.org/g:dc51a6428f6d8e5a57b8b1bf559145288e87660b commit r14-9930-gdc51a6428f6d8e5a57b8b1bf559145288e87660b Author: Pan Li Date: Fri Apr 12 11:12:24 2024 +0800 RISC-V: Bugfix ICE non-vector in TARGET_FUNCTION_VALUE_REGNO_P This patch would like to fix one ICE when vector is not enabled in hook TARGET_FUNCTION_VALUE_REGNO_P implementation. The vector regno is available if and only if the TARGET_VECTOR is true. The previous implement missed this condition and then result in ICE when rv64gc build option without vector. The below test suite is passed for this patch. * The rv64gcv fully regression tests. * The rv64gc fully regression tests. PR target/114639 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_function_value_regno_p): Add TARGET_VECTOR predicate for V_RETURN regno. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr114639-1.c: New test. * gcc.target/riscv/pr114639-2.c: New test. * gcc.target/riscv/pr114639-3.c: New test. * gcc.target/riscv/pr114639-4.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 2 +- gcc/testsuite/gcc.target/riscv/pr114639-1.c | 11 +++ gcc/testsuite/gcc.target/riscv/pr114639-2.c | 11 +++ gcc/testsuite/gcc.target/riscv/pr114639-3.c | 11 +++ gcc/testsuite/gcc.target/riscv/pr114639-4.c | 11 +++ 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 91f017dd52a..e5f00806bb9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11008,7 +11008,7 @@ riscv_function_value_regno_p (const unsigned regno) if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST) return true; - if (regno == V_RETURN) + if (TARGET_VECTOR && regno == V_RETURN) return true; return false; diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-1.c b/gcc/testsuite/gcc.target/riscv/pr114639-1.c new file mode 100644 index 000..f41723193a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr114639-1.c @@ -0,0 +1,11 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -std=gnu89 -O3" } */ + +g (a, b) {} + +f (xx) + void* xx; +{ + __builtin_apply ((void*)g, xx, 200); +} diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-2.c b/gcc/testsuite/gcc.target/riscv/pr114639-2.c new file mode 100644 index 000..0c402c4b254 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr114639-2.c @@ -0,0 +1,11 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64imac -mabi=lp64 -std=gnu89 -O3" } */ + +g (a, b) {} + +f (xx) + void* xx; +{ + __builtin_apply ((void*)g, xx, 200); +} diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-3.c b/gcc/testsuite/gcc.target/riscv/pr114639-3.c new file mode 100644 index 000..ffb0d6d162d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr114639-3.c @@ -0,0 +1,11 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc -mabi=ilp32d -std=gnu89 -O3" } */ + +g (a, b) {} + +f (xx) + void* xx; +{ + __builtin_apply ((void*)g, xx, 200); +} diff --git a/gcc/testsuite/gcc.target/riscv/pr114639-4.c b/gcc/testsuite/gcc.target/riscv/pr114639-4.c new file mode 100644 index 000..a6e229101ef --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr114639-4.c @@ -0,0 +1,11 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv32imac -mabi=ilp32 -std=gnu89 -O3" } */ + +g (a, b) {} + +f (xx) + void* xx; +{ + __builtin_apply ((void*)g, xx, 200); +}
[gcc r14-9936] RISC-V: Fix Werror=sign-compare in riscv_validate_vector_type
https://gcc.gnu.org/g:6e7e5943619a2c20d93fc7089c885483786558bc commit r14-9936-g6e7e5943619a2c20d93fc7089c885483786558bc Author: Pan Li Date: Fri Apr 12 16:38:18 2024 +0800 RISC-V: Fix Werror=sign-compare in riscv_validate_vector_type This patch would like to fix the Werror=sign-compare similar to below: gcc/config/riscv/riscv.cc: In function ‘void riscv_validate_vector_type(const_tree, const char*)’: gcc/config/riscv/riscv.cc:5614:23: error: comparison of integer expressions of different signedness: ‘int’ and ‘unsigned int’ [-Werror=sign-compare] 5614 | if (TARGET_MIN_VLEN < required_min_vlen) The TARGET_MIN_VLEN is *int* by default but the required_min_vlen returned from riscv_vector_required_min_vlen is **unsigned**. Thus, adjust the related function and reference variable(s) to int type to avoid such kind of Werror. The below test suite is passed for this patch. * The rv64gcv fully regression tests. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_vector_float_type_p): Take int as the return value instead of unsigned. (riscv_vector_element_bitsize): Ditto. (riscv_vector_required_min_vlen): Ditto. (riscv_validate_vector_type): Take int type for local variable(s). Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv.cc | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e5f00806bb9..74445bc977c 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5499,7 +5499,7 @@ riscv_vector_float_type_p (const_tree type) return strstr (name, "vfloat") != NULL; } -static unsigned +static int riscv_vector_element_bitsize (const_tree type) { machine_mode mode = TYPE_MODE (type); @@ -5523,7 +5523,7 @@ riscv_vector_element_bitsize (const_tree type) gcc_unreachable (); } -static unsigned +static int riscv_vector_required_min_vlen (const_tree type) { machine_mode mode = TYPE_MODE (type); @@ -5531,7 +5531,7 @@ riscv_vector_required_min_vlen (const_tree type) if (riscv_v_ext_mode_p (mode)) return TARGET_MIN_VLEN; - unsigned element_bitsize = riscv_vector_element_bitsize (type); + int element_bitsize = riscv_vector_element_bitsize (type); const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); if (strstr (name, "bool64") != NULL) @@ -5569,7 +5569,7 @@ riscv_validate_vector_type (const_tree type, const char *hint) return; } - unsigned element_bitsize = riscv_vector_element_bitsize (type); + int element_bitsize = riscv_vector_element_bitsize (type); bool int_type_p = riscv_vector_int_type_p (type); if (int_type_p && element_bitsize == 64 @@ -5609,7 +5609,7 @@ riscv_validate_vector_type (const_tree type, const char *hint) return; } - unsigned required_min_vlen = riscv_vector_required_min_vlen (type); + int required_min_vlen = riscv_vector_required_min_vlen (type); if (TARGET_MIN_VLEN < required_min_vlen) {
[gcc r14-10049] Revert "RISC-V: Support one more overlap for wv instructions"
https://gcc.gnu.org/g:0cbeafe26513954b0aea3293d2f82d4863f10f1d commit r14-10049-g0cbeafe26513954b0aea3293d2f82d4863f10f1d Author: Pan Li Date: Sat Apr 20 08:29:38 2024 +0800 Revert "RISC-V: Support one more overlap for wv instructions" This reverts commit b3b2799b872bc4c1944629af9dfc8472c8ca5fe6. Diff: --- gcc/config/riscv/riscv.md | 14 ++-- gcc/config/riscv/vector.md | 84 +++--- .../gcc.target/riscv/rvv/base/pr112431-42.c| 30 3 files changed, 46 insertions(+), 82 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index c2b4323c53a..f0928398698 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -541,7 +541,7 @@ ;; Widening instructions have group-overlap constraints. Those are only ;; valid for certain register-group sizes. This attribute marks the ;; alternatives not matching the required register-group size as disabled. -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0" +(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87" (const_string "none")) (define_attr "group_overlap_valid" "no,yes" @@ -562,9 +562,9 @@ ;; According to RVV ISA: ;; The destination EEW is greater than the source EEW, the source EMUL is at least 1, - ;; and the overlap is in the highest-numbered part of the destination register group - ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not). - ;; So the source operand should have LMUL >= 1. +;; and the overlap is in the highest-numbered part of the destination register group +;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not). +;; So the source operand should have LMUL >= 1. (and (eq_attr "group_overlap" "W43") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 4 && riscv_get_v_regno_alignment (GET_MODE (operands[3])) >= 1")) @@ -574,12 +574,6 @@ (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 8 && riscv_get_v_regno_alignment (GET_MODE (operands[3])) >= 1")) (const_string "no") - - ;; W21 supports highest-number overlap for source LMUL = 1. - ;; For 'wv' variant, we can also allow wide source operand overlaps dest operand. - (and (eq_attr "group_overlap" "W0") - (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) > 1")) -(const_string "no") ] (const_string "yes"))) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8b1c24c5d79..8298a72b771 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3842,48 +3842,48 @@ (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_sub" - [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i,i,i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i,i,i") -(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") +(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") +(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") +(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (minus:VWEXTI - (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,0,0, vr, vr") + (match_operand:VWEXTI 3 "register_operan
[gcc r14-10050] RISC-V: Add xfail test case for wv insn register overlap
https://gcc.gnu.org/g:9f10005dbc9b660465ec4a9640bcbdcc1e5171c3 commit r14-10050-g9f10005dbc9b660465ec4a9640bcbdcc1e5171c3 Author: Pan Li Date: Sat Apr 20 09:02:39 2024 +0800 RISC-V: Add xfail test case for wv insn register overlap We reverted below patch for wv insn overlap, add the related wv insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. b3b2799b872 RISC-V: Support one more overlap for wv instructions gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-42.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-42.c| 30 ++ 1 file changed, 30 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c new file mode 100644 index 000..fa5dac58a20 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-42.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */ + +#include + +int64_t +reduc_plus_int (int *__restrict a, int n) +{ + int64_t r = 0; + for (int i = 0; i < n; ++i) +r += a[i]; + return r; +} + +double +reduc_plus_float (float *__restrict a, int n) +{ + double r = 0; + for (int i = 0; i < n; ++i) +r += a[i]; + return r; +} + +/* { dg-final { scan-assembler-not {vmv1r} { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-not {vmv2r} } } */ +/* { dg-final { scan-assembler-not {vmv4r} } } */ +/* { dg-final { scan-assembler-not {vmv8r} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */ +/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */
[gcc r14-10051] Revert "RISC-V: Support highest overlap for wv instructions"
https://gcc.gnu.org/g:f5447eae72f11d9bfbb403183fd282918c0445c6 commit r14-10051-gf5447eae72f11d9bfbb403183fd282918c0445c6 Author: Pan Li Date: Sat Apr 20 09:42:57 2024 +0800 Revert "RISC-V: Support highest overlap for wv instructions" This reverts commit 7e854b58084c131fceca9e8fa9dcc7469972e69d. Diff: --- gcc/config/riscv/vector.md | 88 ++-- .../gcc.target/riscv/rvv/base/pr112431-39.c| 158 - .../gcc.target/riscv/rvv/base/pr112431-40.c| 94 .../gcc.target/riscv/rvv/base/pr112431-41.c| 62 4 files changed, 42 insertions(+), 360 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8298a72b771..8a727e2ea41 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3842,48 +3842,46 @@ (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_sub" - [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") -(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK") +(match_operand 6 "const_int_operand" "i, i") +(match_operand 7 "const_int_operand" "i, i") +(match_operand 8 "const_int_operand" "i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (minus:VWEXTI - (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr") + (match_operand:VWEXTI 3 "register_operand" " vr, vr") (any_extend:VWEXTI - (match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))) - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu,0")))] + (match_operand: 4 "register_operand" " vr, vr"))) + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] "TARGET_VECTOR" "vwsub.wv\t%0,%3,%4%p1" [(set_attr "type" "viwalu") - (set_attr "mode" "") - (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) + (set_attr "mode" "")]) (define_insn "@pred_single_widen_add" - [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") -(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK") +(match_operand 6 "const_int_operand" "i, i") +(match_operand 7 "const_int_operand" "i, i") +(match_operand 8 "const_int_operand" "i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTI (any_extend:VWEXTI - (match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")) - (match_operand:VWEXTI 3 "register_operand" " vr, vr,
[gcc r14-10052] RISC-V: Add xfail test case for wv insn highest overlap
https://gcc.gnu.org/g:1690e47e101c1e273b1ee052de21d5214257c13a commit r14-10052-g1690e47e101c1e273b1ee052de21d5214257c13a Author: Pan Li Date: Sat Apr 20 13:05:52 2024 +0800 RISC-V: Add xfail test case for wv insn highest overlap We reverted below patch for wv insn overlap, add the related wv insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. 7e854b58084 RISC-V: Support highest overlap for wv instructions The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Xfail csr check. * gcc.target/riscv/rvv/base/pr112431-39.c: New test. * gcc.target/riscv/rvv/base/pr112431-40.c: New test. * gcc.target/riscv/rvv/base/pr112431-41.c: New test. Signed-off-by: Pan Li Diff: --- .../vect/costmodel/riscv/rvv/dynamic-lmul8-11.c| 2 +- .../gcc.target/riscv/rvv/base/pr112431-39.c| 158 + .../gcc.target/riscv/rvv/base/pr112431-40.c| 94 .../gcc.target/riscv/rvv/base/pr112431-41.c| 62 4 files changed, 315 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c index c9e28251225..5a39f04b140 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c @@ -40,7 +40,7 @@ void foo2 (int64_t *__restrict a, } /* { dg-final { scan-assembler {e64,m8} } } */ -/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */ /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c new file mode 100644 index 000..770b5411666 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c @@ -0,0 +1,158 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) +{ + asm volatile("nop" ::: "memory"); + vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v0 = __riscv_vwsub_wv_i16m2_tu (v0, v0, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v1 = __riscv_vwsub_wv_i16m2_tu (v1, v1, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v2 = __riscv_vwsub_wv_i16m2_tu (v2, v2, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v3 = __riscv_vwsub_wv_i16m2_tu (v3, v3, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v4 = __riscv_vwsub_wv_i16m2_tu (v4, v4, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v5 = __riscv_vwsub_wv_i16m2_tu (v5, v5, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v6 = __riscv_vwsub_wv_i16m2_tu (v6, v6, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v7 = __riscv_vwsub_wv_i16m2_tu (v7, v7, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v8 = __riscv_vwsub_wv_i16m2_tu (v8, v8, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v9 = __riscv_vwsub_wv_i16m2_tu (v9, v9, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v10 = __riscv_vwsub_wv_i16m2_tu (v10, v10, __riscv_vreinterpret_
[gcc r14-10054] Revert "RISC-V: Fix overlap group incorrect overlap on v0"
https://gcc.gnu.org/g:3afcb04bd7d444b4c6547ad98668c2a6a7f37a21 commit r14-10054-g3afcb04bd7d444b4c6547ad98668c2a6a7f37a21 Author: Pan Li Date: Sat Apr 20 22:37:56 2024 +0800 Revert "RISC-V: Fix overlap group incorrect overlap on v0" This reverts commit 018ba3ac952bed4ae01344c060360f13f7cc084a. Diff: --- gcc/config/riscv/vector.md | 268 ++--- .../gcc.target/riscv/rvv/base/pr112431-34.c| 101 2 files changed, 134 insertions(+), 235 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8a727e2ea41..2a6ab979588 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2254,70 +2254,70 @@ ;; DEST eew is greater than SOURCE eew. (define_insn "@pred_indexed_load_x2_greater_eew" - [(set (match_operand:VEEWEXT2 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VEEWEXT2 0 "register_operand" "=vr, vr, vr, vr, vr, vr, ?&vr, ?&vr") (if_then_else:VEEWEXT2 (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" "i, i, i, i, i, i, i, i, i, i, i, i,i,i") -(match_operand 7 "const_int_operand" "i, i, i, i, i, i, i, i, i, i, i, i,i,i") -(match_operand 8 "const_int_operand" "i, i, i, i, i, i, i, i, i, i, i, i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") +(match_operand 6 "const_int_operand" "i, i,i,i,i,i,i,i") +(match_operand 7 "const_int_operand" "i, i,i,i,i,i,i,i") +(match_operand 8 "const_int_operand" "i, i,i,i,i,i,i,i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:VEEWEXT2 - [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ") + [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ") (mem:BLK (scratch)) -(match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")] ORDER) - (match_operand:VEEWEXT2 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu,0")))] +(match_operand: 4 "register_operand" " W21, W21, W42, W42, W84, W84, vr, vr")] ORDER) + (match_operand:VEEWEXT2 2 "vector_merge_operand" " vu, 0, vu,0, vu,0, vu,0")))] "TARGET_VECTOR" "vlxei.v\t%0,(%z3),%4%p1" [(set_attr "type" "vldx") (set_attr "mode" "") - (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) + (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")]) (define_insn "@pred_indexed_load_x4_greater_eew" - [(set (match_operand:VEEWEXT4 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VEEWEXT4 0 "register_operand""=vr, vr, vr, vr, ?&vr, ?&vr") (if_then_else:VEEWEXT4 (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i,i,i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i,i,i") -(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK") +(match_operand 6 "const_int_operand" "i, i,i,i,i,i") +(match_operand 7 "const_int_operand" "i, i,i,i,i,i") +(match_operand 8 "const_int_operand"
[gcc r14-10056] RISC-V: Add xfail test case for incorrect overlap on v0
https://gcc.gnu.org/g:d37b34fe82e6e19e80ec9c46400f63fa90ba5255 commit r14-10056-gd37b34fe82e6e19e80ec9c46400f63fa90ba5255 Author: Pan Li Date: Sat Apr 20 22:43:13 2024 +0800 RISC-V: Add xfail test case for incorrect overlap on v0 We reverted below patch for register group overlap, add the related insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. 018ba3ac952 RISC-V: Fix overlap group incorrect overlap on v0 The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-34.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-34.c| 101 + 1 file changed, 101 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c new file mode 100644 index 000..286185aa01e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-34.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +size_t __attribute__ ((noinline)) +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4, + size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9, + size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14, + size_t sum15) +{ + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9 ++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15; +} + +size_t +foo (char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8 (); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for (; it + step <= end;) +{ + vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + + asm volatile("nop" ::: "memory"); + vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl); + vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl); + vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl); + vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl); + vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl); + vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl); + vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl); + vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl); + vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl); + vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl); + vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl); + vint16m2_t vw11 = __riscv_vluxei8_v_i16m2 ((void *) it, v11, vl); + vint16m2_t vw12 = __riscv_vluxei8_v_i16m2 ((void *) it, v12, vl); + vint16m2_t vw13 = __riscv_vluxei8_v_i16m2 ((void *) it, v13, vl); + vint16m2_t vw14 = __riscv_vluxei8_v_i16m2 ((void *) it, v14, vl); + vbool8_t mask = *(vbool8_t*)it; + vint16m2_t vw15 = __riscv_vluxei8_v_i16m2_m (mask, (void *) it, v15, vl); + + asm volatile("nop" ::: "memory"); + size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0); + size_t sum1 = __riscv_vmv_x_s_i16m2_i16 (vw1); + size_t sum2 = __riscv_vmv_x_s_i16m2_i16 (vw2); + size_t sum3 = __riscv_vmv_x_s_i16m2_i16 (vw3); + size_t sum4 = __riscv_vmv_x_s_i16m2_i16 (vw4); + size_t sum5 = __riscv_vmv_x_s_i16m2_i16 (vw5); + size_t sum6 = __riscv_vmv_x_s_i16m2_i16 (vw6); + size_t sum7 = __riscv_vmv_x_s_i16m2_i16 (vw7); + size_t sum8 = __riscv_vmv_x_s_i16m2_i16 (vw8); + size_t sum9 = __riscv_vmv_x_s_i16m2_i16 (vw9); + size_t sum10 = __r
[gcc r14-10057] Revert "RISC-V: Support highpart register overlap for widen vx/vf instructions"
https://gcc.gnu.org/g:ef2392236ec629351496d7f299d6a0956080e4d9 commit r14-10057-gef2392236ec629351496d7f299d6a0956080e4d9 Author: Pan Li Date: Sun Apr 21 09:37:00 2024 +0800 Revert "RISC-V: Support highpart register overlap for widen vx/vf instructions" This reverts commit a23415d7572774701d7ec04664390260ab9a3f63. Diff: --- gcc/config/riscv/vector.md | 65 --- .../gcc.target/riscv/rvv/base/pr112431-22.c| 188 - .../gcc.target/riscv/rvv/base/pr112431-23.c| 119 - .../gcc.target/riscv/rvv/base/pr112431-24.c| 86 -- .../gcc.target/riscv/rvv/base/pr112431-25.c| 104 .../gcc.target/riscv/rvv/base/pr112431-26.c| 68 .../gcc.target/riscv/rvv/base/pr112431-27.c| 51 -- 7 files changed, 31 insertions(+), 650 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 2a6ab979588..f620f13682c 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3818,28 +3818,27 @@ (set_attr "mode" "")]) (define_insn "@pred_dual_widen__scalar" - [(set (match_operand:VWEXTI 0 "register_operand" "=vr, vr, vr, vr, vr,vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" "i, i,i,i,i,i,i,i") -(match_operand 7 "const_int_operand" "i, i,i,i,i,i,i,i") -(match_operand 8 "const_int_operand" "i, i,i,i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK") +(match_operand 6 "const_int_operand" "i, i") +(match_operand 7 "const_int_operand" "i, i") +(match_operand 8 "const_int_operand" "i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (any_widen_binop:VWEXTI (any_extend:VWEXTI - (match_operand: 3 "register_operand" " W21, W21, W42, W42, W84, W84, vr, vr")) + (match_operand: 3 "register_operand" " vr, vr")) (any_extend:VWEXTI (vec_duplicate: - (match_operand: 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ" - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0, vu,0, vu,0, vu,0")))] + (match_operand: 4 "reg_or_0_operand" " rJ, rJ" + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] "TARGET_VECTOR" "vw.vx\t%0,%3,%z4%p1" [(set_attr "type" "vi") - (set_attr "mode" "") - (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")]) + (set_attr "mode" "")]) (define_insn "@pred_single_widen_sub" [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") @@ -3928,28 +3927,27 @@ (set_attr "mode" "")]) (define_insn "@pred_widen_mulsu_scalar" - [(set (match_operand:VWEXTI 0 "register_operand" "=vr, vr, vr, vr, vr,vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" "i, i,i,i,i,i,i,i") -(match_operand 7 "const_int_operand" "i, i,i,i,i,i,i,i") -(match_operand 8 "const_int_operand" "i, i,i,i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK") +(match_operand 6 "const_int_operand" "i, i") +(match_operand 7 "const_int_operand" "i, i") +(match_operand 8 "const_int_operand" "i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (mult:VWEXTI (sign_extend:VWEXTI -
[gcc r14-10061] RISC-V: Add xfail test case for highpart register overlap of vx/vf widen
https://gcc.gnu.org/g:338640fbee2977485efb6ff0f1d3c7c8220074ad commit r14-10061-g338640fbee2977485efb6ff0f1d3c7c8220074ad Author: Pan Li Date: Sun Apr 21 12:34:19 2024 +0800 RISC-V: Add xfail test case for highpart register overlap of vx/vf widen We reverted below patch for register group overlap, add the related insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. a23415d7572 RISC-V: Support highpart register overlap for widen vx/vf instructions The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-22.c: New test. * gcc.target/riscv/rvv/base/pr112431-23.c: New test. * gcc.target/riscv/rvv/base/pr112431-24.c: New test. * gcc.target/riscv/rvv/base/pr112431-25.c: New test. * gcc.target/riscv/rvv/base/pr112431-26.c: New test. * gcc.target/riscv/rvv/base/pr112431-27.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-22.c| 188 + .../gcc.target/riscv/rvv/base/pr112431-23.c| 119 + .../gcc.target/riscv/rvv/base/pr112431-24.c| 86 ++ .../gcc.target/riscv/rvv/base/pr112431-25.c| 104 .../gcc.target/riscv/rvv/base/pr112431-26.c| 68 .../gcc.target/riscv/rvv/base/pr112431-27.c| 51 ++ 6 files changed, 616 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c new file mode 100644 index 000..ac56703c75c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c @@ -0,0 +1,188 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +size_t __attribute__ ((noinline)) +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4, + size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9, + size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14, + size_t sum15) +{ + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9 ++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15; +} + +size_t +foo (char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8 (); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for (; it + step <= end;) +{ + vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + + asm volatile("nop" ::: "memory"); + vint16m2_t vw0 = __riscv_vwadd_vx_i16m2 (v0, 33, vl); + vint16m2_t vw1 = __riscv_vwadd_vx_i16m2 (v1, 33, vl); + vint16m2_t vw2 = __riscv_vwadd_vx_i16m2 (v2, 33, vl); + vint16m2_t vw3 = __riscv_vwadd_vx_i16m2 (v3, 33, vl); + vint16m2_t vw4 = __riscv_vwadd_vx_i16m2 (v4, 33, vl); + vint16m2_t vw5 = __riscv_vwadd_vx_i16m2 (v5, 33, vl); + vint16m2_t vw6 = __riscv_vwadd_vx_i16m2 (v6, 33, vl); + vint16m2_t vw7 = __riscv_vwadd_vx_i16m2 (v7, 33, vl); + vint16m2_t vw8 = __riscv_vwadd_vx_i16m2 (v8, 33, vl); + vint16m2_t vw9 = __riscv_vwadd_vx_i16m2 (v9, 33, vl); + vint16m2_t vw10 = __riscv_vwadd_vx_i16m2 (v10, 33, vl); + vint16m2_t vw11 = __riscv_vwadd_vx_i16m2 (v11, 33, vl); + vint16m2_t vw12 = __riscv_vwadd_vx_i16m2 (v12, 33, vl); + vint16m2_t vw13 = __riscv_vwadd_vx_i16m2 (v13, 33, vl); + vint16m2_t vw14 = __riscv_vwadd_vx_i16m2 (v14, 33, vl); + vint16m2_t vw15 = __riscv_vwadd_vx_i16m2 (v15, 33, vl); + + asm volatile("nop" ::: "memory"); + size_t sum0 = __riscv_vmv_x_s_i16m2_i16 (vw0); + size_t sum1 = __ri
[gcc r14-10062] Revert "RISC-V: Support widening register overlap for vf4/vf8"
https://gcc.gnu.org/g:ec78916bb37bec0cd3ede5c6263387345ce16f94 commit r14-10062-gec78916bb37bec0cd3ede5c6263387345ce16f94 Author: Pan Li Date: Mon Apr 22 09:26:04 2024 +0800 Revert "RISC-V: Support widening register overlap for vf4/vf8" This reverts commit 303195e2a6b6f0e8f42e0578b61f9f37c6250beb. Diff: --- gcc/config/riscv/vector.md | 38 ++-- .../gcc.target/riscv/rvv/base/pr112431-16.c| 68 -- .../gcc.target/riscv/rvv/base/pr112431-17.c| 51 .../gcc.target/riscv/rvv/base/pr112431-18.c| 51 4 files changed, 18 insertions(+), 190 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index f620f13682c..140b4638346 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3754,45 +3754,43 @@ ;; Vector Quad-Widening Sign-extend and Zero-extend. (define_insn "@pred__vf4" - [(set (match_operand:VQEXTI 0 "register_operand" "=vr, vr, vr, vr, ?&vr, ?&vr") + [(set (match_operand:VQEXTI 0 "register_operand" "=&vr,&vr") (if_then_else:VQEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand" "i,i, i,i,i,i") -(match_operand 6 "const_int_operand" "i,i, i,i,i,i") -(match_operand 7 "const_int_operand" "i,i, i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 4 "vector_length_operand" " rK, rK") +(match_operand 5 "const_int_operand" "i,i") +(match_operand 6 "const_int_operand" "i,i") +(match_operand 7 "const_int_operand" "i,i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (any_extend:VQEXTI - (match_operand: 3 "register_operand" " W43, W43, W86, W86, vr, vr")) - (match_operand:VQEXTI 2 "vector_merge_operand" " vu,0, vu,0, vu,0")))] + (match_operand: 3 "register_operand" " vr, vr")) + (match_operand:VQEXTI 2 "vector_merge_operand" " vu,0")))] "TARGET_VECTOR" "vext.vf4\t%0,%3%p1" [(set_attr "type" "vext") - (set_attr "mode" "") - (set_attr "group_overlap" "W43,W43,W86,W86,none,none")]) + (set_attr "mode" "")]) ;; Vector Oct-Widening Sign-extend and Zero-extend. (define_insn "@pred__vf8" - [(set (match_operand:VOEXTI 0 "register_operand" "=vr, vr, ?&vr, ?&vr") + [(set (match_operand:VOEXTI 0 "register_operand" "=&vr,&vr") (if_then_else:VOEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK") -(match_operand 5 "const_int_operand" "i,i, i,i") -(match_operand 6 "const_int_operand" "i,i, i,i") -(match_operand 7 "const_int_operand" "i,i, i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 4 "vector_length_operand" " rK, rK") +(match_operand 5 "const_int_operand" "i,i") +(match_operand 6 "const_int_operand" "i,i") +(match_operand 7 "const_int_operand" "i,i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (any_extend:VOEXTI - (match_operand: 3 "register_operand" " W87, W87, vr, vr")) - (match_operand:VOEXTI 2 "vector_merge_operand" " vu,0, vu,0")))] + (match_operand: 3 "register_operand" " vr, vr")) + (match_operand:VOEXTI 2 "vector_merge_operand" " vu,0")))] "TARGET_VECTOR" "vext.vf8\t%0,%3%p1" [(set_attr "type" "vext") - (set_attr "mode" "") - (set_attr "group_overlap" "W87,W87,none,none")]) + (set_attr "mode" "")]) ;; Vector Widening Add/Subtract/Multiply. (define_insn "@pred_dual_widen_" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c deleted file mode 100644 index 98f42458883..000 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c +++ /dev/null @@ -1,68 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ - -#include "riscv_vector.h" - -size_t __attribute__ ((noinline)) -sumation (size_t sum0, size_t sum1, size_t sum2,
[gcc r14-10063] RISC-V: Add xfail test case for widening register overlap of vf4/vf8
https://gcc.gnu.org/g:c4fdbdac1226787b4d33046f0be189a24dac468e commit r14-10063-gc4fdbdac1226787b4d33046f0be189a24dac468e Author: Pan Li Date: Mon Apr 22 10:11:25 2024 +0800 RISC-V: Add xfail test case for widening register overlap of vf4/vf8 We reverted below patch for register group overlap, add the related insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. 303195e2a6b RISC-V: Support widening register overlap for vf4/vf8 The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-16.c: New test. * gcc.target/riscv/rvv/base/pr112431-17.c: New test. * gcc.target/riscv/rvv/base/pr112431-18.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-16.c| 68 ++ .../gcc.target/riscv/rvv/base/pr112431-17.c| 51 .../gcc.target/riscv/rvv/base/pr112431-18.c| 51 3 files changed, 170 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c new file mode 100644 index 000..42d11611d98 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +size_t __attribute__ ((noinline)) +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4, + size_t sum5, size_t sum6, size_t sum7) +{ + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7; +} + +size_t +foo (char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8 (); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for (; it + step <= end;) +{ + vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl); + it += vl; + + asm volatile("nop" ::: "memory"); + vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl); + vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl); + vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl); + vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl); + vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl); + vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl); + vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl); + vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl); + + asm volatile("nop" ::: "memory"); + size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0); + size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1); + size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2); + size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3); + size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4); + size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5); + size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6); + size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7); + + sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7); +} + return sum; +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv2r} } } */ +/* { dg-final { scan-assembler-not {vmv4r} } } */ +/* { dg-final { scan-assembler-not {vmv8r} } } */ +/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c new file mode 100644 index 000..9ecc62e234b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +size_t __attribute__ ((noinline)) +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3) +{ + return sum0 + sum1 + sum2 + sum3; +} + +size_t +foo (char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8 (); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for (; it + step <= end;) +{ + vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl); + it += vl; + vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl); + it += vl; + vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl); + it += vl; + vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl); + it +=
[gcc r14-10064] Revert "RISC-V: Support highest-number regno overlap for widen ternary"
https://gcc.gnu.org/g:cc46b6d4f3b4edc832a319ebf5053131dada3c8c commit r14-10064-gcc46b6d4f3b4edc832a319ebf5053131dada3c8c Author: Pan Li Date: Mon Apr 22 14:10:02 2024 +0800 Revert "RISC-V: Support highest-number regno overlap for widen ternary" This reverts commit 27fde325d64447a3a0d5d550c5976e5f3fb6dc16. Diff: --- gcc/config/riscv/vector.md | 115 ++--- .../gcc.target/riscv/rvv/base/pr112431-37.c| 103 -- .../gcc.target/riscv/rvv/base/pr112431-38.c| 82 --- 3 files changed, 55 insertions(+), 245 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 140b4638346..aef8cad20a0 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -5930,30 +5930,29 @@ (set_attr "mode" "")]) (define_insn "@pred_widen_mul_plus_scalar" - [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand""=&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1") -(match_operand 5 "vector_length_operand"" rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand"" i, i, i, i, i, i,i") -(match_operand 7 "const_int_operand"" i, i, i, i, i, i,i") -(match_operand 8 "const_int_operand"" i, i, i, i, i, i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1") +(match_operand 5 "vector_length_operand"" rK") +(match_operand 6 "const_int_operand""i") +(match_operand 7 "const_int_operand""i") +(match_operand 8 "const_int_operand""i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTI (mult:VWEXTI (any_extend:VWEXTI (vec_duplicate: - (match_operand: 3 "reg_or_0_operand" " rJ, rJ, rJ, rJ, rJ, rJ, rJ"))) + (match_operand: 3 "register_operand" "r"))) (any_extend:VWEXTI - (match_operand: 4 "register_operand" "W21,W21,W42,W42,W84,W84, vr"))) - (match_operand:VWEXTI 2 "register_operand" " 0, 0, 0, 0, 0, 0,0")) + (match_operand: 4 "register_operand" " vr"))) + (match_operand:VWEXTI 2 "register_operand" "0")) (match_dup 2)))] "TARGET_VECTOR" - "vwmacc.vx\t%0,%z3,%4%p1" + "vwmacc.vx\t%0,%3,%4%p1" [(set_attr "type" "viwmuladd") - (set_attr "mode" "") - (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none")]) + (set_attr "mode" "")]) (define_insn "@pred_widen_mul_plussu" [(set (match_operand:VWEXTI 0 "register_operand""=&vr") @@ -5980,56 +5979,54 @@ (set_attr "mode" "")]) (define_insn "@pred_widen_mul_plussu_scalar" - [(set (match_operand:VWEXTI 0 "register_operand""=vd, vr, vd, vr, vd, vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand""=&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i,i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i,i") -(match_operand 8 "const_int_operand" " i, i, i, i, i, i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1") +(match_operand 5 "vector_length_operand"" rK") +(match_operand 6 "const_int_operand""i") +(match_operand 7 "const_int_operand""i") +(match_operand 8 "const_int_operand""i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTI (mult:VWEXTI (sign_extend:VWEXTI (vec_duplicate: - (match_operand: 3 "reg_or_0_operand"" rJ, rJ, rJ, rJ, rJ, rJ, rJ"))) + (match_operand: 3 "register_operand" "r"))) (zero_extend:VWEXTI - (match_operand: 4 "register_operand" "W21,W21,W42,W42,W84,W84, vr"))) - (match_operand:VWEXTI 2 "register_operand"" 0, 0, 0, 0, 0, 0,0")) + (match_operand: 4 "
[gcc r14-10065] RISC-V: Add xfail test case for highest-number regno ternary overlap
https://gcc.gnu.org/g:c7506847c020ad34eff248ab715eae238b9d1ed3 commit r14-10065-gc7506847c020ad34eff248ab715eae238b9d1ed3 Author: Pan Li Date: Mon Apr 22 14:32:25 2024 +0800 RISC-V: Add xfail test case for highest-number regno ternary overlap We reverted below patch for register group overlap, add the related insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. 27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-37.c: New test. * gcc.target/riscv/rvv/base/pr112431-38.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-37.c| 103 + .../gcc.target/riscv/rvv/base/pr112431-38.c| 82 2 files changed, 185 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c new file mode 100644 index 000..66e81ea905a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c @@ -0,0 +1,103 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (void *in, void *out) +{ + vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4); + vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1); + vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16); + vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4); + __riscv_vse16_v_i16m2 (out, result, 4); +} + +void +foo2 (void *in, void *out) +{ + vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4); + vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1); + vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16); + vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4); + __riscv_vse16_v_i16m4 (out, result, 4); +} + +void +foo3 (void *in, void *out) +{ + vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4); + vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1); + vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16); + vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4); + __riscv_vse16_v_i16m8 (out, result, 4); +} + +void +foo4 (void *in, void *out) +{ + vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4); + vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1); + vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16); + vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4); + __riscv_vse16_v_i16m2 (out, result, 4); +} + +void +foo5 (void *in, void *out) +{ + vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4); + vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1); + vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16); + vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4); + __riscv_vse16_v_i16m4 (out, result, 4); +} + +void +foo6 (void *in, void *out) +{ + vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4); + vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1); + vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16); + vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4); + __riscv_vse16_v_i16m8 (out, result, 4); +} + +void +foo7 (void *in, void *out) +{ + vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4); + vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1); + vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16); + vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8); + vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4); + __riscv_vse16_v_i16m2 (out, result, 4); +} + +void +foo8 (void *in, void *out) +{ + vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4); + vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1); + vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16); + vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8); + vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4); + __riscv_vse16_v_i16m4 (out, result, 4); +} + +void +foo9 (void *in, void *out) +{ + vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4); + vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1); + vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16); + vuint8m4_t high_ueew8 = __riscv_vreinterpret_v_i8m4_u8m4 (high_eew8); + vint16m8_t result = __riscv_vwmaccsu_vx_i16m8 (accum, 16, high_ueew8, 4); + __riscv_vse16_v_i16m8 (out, result, 4); +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv2r} { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-not {vmv4r} { xfail riscv*-*-* } } } */ +/* { dg-fina
[gcc r14-10067] Revert "RISC-V: Support highpart overlap for indexed load with SRC EEW < DEST EEW"
https://gcc.gnu.org/g:9257c7a72059aba0df1684a0722c4d1538cbb6d4 commit r14-10067-g9257c7a72059aba0df1684a0722c4d1538cbb6d4 Author: Pan Li Date: Mon Apr 22 15:39:45 2024 +0800 Revert "RISC-V: Support highpart overlap for indexed load with SRC EEW < DEST EEW" This reverts commit 4418d55bcd1b7e0ef823981b6a781d7de5c38cce. Diff: --- gcc/config/riscv/vector.md | 63 ++--- .../gcc.target/riscv/rvv/base/pr112431-28.c| 104 - .../gcc.target/riscv/rvv/base/pr112431-29.c| 68 -- .../gcc.target/riscv/rvv/base/pr112431-30.c| 51 -- .../gcc.target/riscv/rvv/base/pr112431-31.c| 68 -- .../gcc.target/riscv/rvv/base/pr112431-32.c| 51 -- .../gcc.target/riscv/rvv/base/pr112431-33.c| 51 -- 7 files changed, 30 insertions(+), 426 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index aef8cad20a0..768d23e9f1d 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2254,70 +2254,67 @@ ;; DEST eew is greater than SOURCE eew. (define_insn "@pred_indexed_load_x2_greater_eew" - [(set (match_operand:VEEWEXT2 0 "register_operand" "=vr, vr, vr, vr, vr, vr, ?&vr, ?&vr") + [(set (match_operand:VEEWEXT2 0 "register_operand""=&vr, &vr") (if_then_else:VEEWEXT2 (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" "i, i,i,i,i,i,i,i") -(match_operand 7 "const_int_operand" "i, i,i,i,i,i,i,i") -(match_operand 8 "const_int_operand" "i, i,i,i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK") +(match_operand 6 "const_int_operand" "i, i") +(match_operand 7 "const_int_operand" "i, i") +(match_operand 8 "const_int_operand" "i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:VEEWEXT2 - [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ, rJ, rJ, rJ, rJ, rJ") + [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ") (mem:BLK (scratch)) -(match_operand: 4 "register_operand" " W21, W21, W42, W42, W84, W84, vr, vr")] ORDER) - (match_operand:VEEWEXT2 2 "vector_merge_operand" " vu, 0, vu,0, vu,0, vu,0")))] +(match_operand: 4 "register_operand" " vr, vr")] ORDER) + (match_operand:VEEWEXT2 2 "vector_merge_operand" " vu, 0")))] "TARGET_VECTOR" "vlxei.v\t%0,(%z3),%4%p1" [(set_attr "type" "vldx") - (set_attr "mode" "") - (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")]) + (set_attr "mode" "")]) (define_insn "@pred_indexed_load_x4_greater_eew" - [(set (match_operand:VEEWEXT4 0 "register_operand""=vr, vr, vr, vr, ?&vr, ?&vr") + [(set (match_operand:VEEWEXT4 0 "register_operand""=&vr, &vr") (if_then_else:VEEWEXT4 (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK") -(match_operand 6 "const_int_operand" "i, i,i,i,i,i") -(match_operand 7 "const_int_operand" "i, i,i,i,i,i") -(match_operand 8 "const_int_operand" "i, i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 5 "vector_length_operand" " rK, rK") +(match_operand 6 "const_int_operand" "i, i") +(match_operand 7 "const_int_operand" "i, i") +(match_operand 8 "const_int_operand" "i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:VEEWEXT4 - [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ, rJ, rJ, rJ") + [(match_operand 3 "pm
[gcc r14-10068] RISC-V: Add xfail test case for indexed load overlap with SRC EEW < DEST EEW
https://gcc.gnu.org/g:a367b99f916cb7d2d673180ace640096fd118950 commit r14-10068-ga367b99f916cb7d2d673180ace640096fd118950 Author: Pan Li Date: Mon Apr 22 15:36:59 2024 +0800 RISC-V: Add xfail test case for indexed load overlap with SRC EEW < DEST EEW Update in v2: * Add change log to pr112431-34.c. Original log: We reverted below patch for register group overlap, add the related insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. 4418d55bcd1 RISC-V: Support highpart overlap for indexed load with SRC EEW < DEST EEW The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-34.c: Remove xfail for vluxei8 check. * gcc.target/riscv/rvv/base/pr112431-28.c: New test. * gcc.target/riscv/rvv/base/pr112431-29.c: New test. * gcc.target/riscv/rvv/base/pr112431-30.c: New test. * gcc.target/riscv/rvv/base/pr112431-31.c: New test. * gcc.target/riscv/rvv/base/pr112431-32.c: New test. * gcc.target/riscv/rvv/base/pr112431-33.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-28.c| 104 + .../gcc.target/riscv/rvv/base/pr112431-29.c| 68 ++ .../gcc.target/riscv/rvv/base/pr112431-30.c| 51 ++ .../gcc.target/riscv/rvv/base/pr112431-31.c| 68 ++ .../gcc.target/riscv/rvv/base/pr112431-32.c| 51 ++ .../gcc.target/riscv/rvv/base/pr112431-33.c| 51 ++ .../gcc.target/riscv/rvv/base/pr112431-34.c| 2 +- 7 files changed, 394 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c new file mode 100644 index 000..c16cbdfe9f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-28.c @@ -0,0 +1,104 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +size_t __attribute__ ((noinline)) +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4, + size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9, + size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14, + size_t sum15) +{ + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9 ++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15; +} + +size_t +foo (char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8 (); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for (; it + step <= end;) +{ + vuint8m1_t v0 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v1 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v2 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v3 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v4 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v5 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v6 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v7 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v8 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v9 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v10 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v11 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v12 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v13 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v14 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + vuint8m1_t v15 = __riscv_vle8_v_u8m1 ((void *) it, vl); + it += vl; + + asm volatile("nop" ::: "memory"); + vint16m2_t vw0 = __riscv_vluxei8_v_i16m2 ((void *) it, v0, vl); + vint16m2_t vw1 = __riscv_vluxei8_v_i16m2 ((void *) it, v1, vl); + vint16m2_t vw2 = __riscv_vluxei8_v_i16m2 ((void *) it, v2, vl); + vint16m2_t vw3 = __riscv_vluxei8_v_i16m2 ((void *) it, v3, vl); + vint16m2_t vw4 = __riscv_vluxei8_v_i16m2 ((void *) it, v4, vl); + vint16m2_t vw5 = __riscv_vluxei8_v_i16m2 ((void *) it, v5, vl); + vint16m2_t vw6 = __riscv_vluxei8_v_i16m2 ((void *) it, v6, vl); + vint16m2_t vw7 = __riscv_vluxei8_v_i16m2 ((void *) it, v7, vl); + vint16m2_t vw8 = __riscv_vluxei8_v_i16m2 ((void *) it, v8, vl); + vint16m2_t vw9 = __riscv_vluxei8_v_i16m2 ((void *) it, v9, vl); + vint16m2_t vw10 = __riscv_vluxei8_v_i16m2 ((void *) it, v10, vl); + vint16m2_t vw11 = __riscv_vluxei8_v_i16
[gcc r14-10069] Revert "RISC-V: Support highpart overlap for floating-point widen instructions"
https://gcc.gnu.org/g:4df96b4ec788f2d588febf3555685f2700b932b3 commit r14-10069-g4df96b4ec788f2d588febf3555685f2700b932b3 Author: Pan Li Date: Mon Apr 22 16:25:57 2024 +0800 Revert "RISC-V: Support highpart overlap for floating-point widen instructions" This reverts commit 8614cbb253484e28c3eb20cde4d1067aad56de58. Diff: --- gcc/config/riscv/vector.md | 78 - .../gcc.target/riscv/rvv/base/pr112431-10.c| 104 .../gcc.target/riscv/rvv/base/pr112431-11.c| 68 .../gcc.target/riscv/rvv/base/pr112431-12.c| 51 -- .../gcc.target/riscv/rvv/base/pr112431-13.c| 188 - .../gcc.target/riscv/rvv/base/pr112431-14.c| 119 - .../gcc.target/riscv/rvv/base/pr112431-15.c| 86 -- .../gcc.target/riscv/rvv/base/pr112431-7.c | 106 .../gcc.target/riscv/rvv/base/pr112431-8.c | 68 .../gcc.target/riscv/rvv/base/pr112431-9.c | 51 -- 10 files changed, 37 insertions(+), 882 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 768d23e9f1d..598aa8fba33 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7696,88 +7696,84 @@ ;; --- (define_insn "@pred_widen_fcvt_x_f" - [(set (match_operand:VWCONVERTI 0 "register_operand" "=vr, vr, vr, vr, vr,vr, ?&vr, ?&vr") + [(set (match_operand:VWCONVERTI 0 "register_operand" "=&vr, &vr") (if_then_else:VWCONVERTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand" "i,i, i,i,i,i,i,i") -(match_operand 6 "const_int_operand" "i,i, i,i,i,i,i,i") -(match_operand 7 "const_int_operand" "i,i, i,i,i,i,i,i") -(match_operand 8 "const_int_operand" "i,i, i,i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 4 "vector_length_operand" " rK, rK") +(match_operand 5 "const_int_operand" "i,i") +(match_operand 6 "const_int_operand" "i,i") +(match_operand 7 "const_int_operand" "i,i") +(match_operand 8 "const_int_operand" "i,i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (unspec:VWCONVERTI -[(match_operand: 3 "register_operand" " W21, W21, W42, W42, W84, W84, vr, vr")] VFCVTS) - (match_operand:VWCONVERTI 2 "vector_merge_operand" " vu,0, vu,0, vu,0, vu,0")))] +[(match_operand: 3 "register_operand" " vr, vr")] VFCVTS) + (match_operand:VWCONVERTI 2 "vector_merge_operand" " vu,0")))] "TARGET_VECTOR" "vfwcvt.x.f.v\t%0,%3%p1" [(set_attr "type" "vfwcvtftoi") (set_attr "mode" "") (set (attr "frm_mode") - (symbol_ref "riscv_vector::get_frm_mode (operands[8])")) - (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")]) + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) (define_insn "@pred_widen_" - [(set (match_operand:VWCONVERTI 0 "register_operand" "=vr, vr, vr, vr, vr,vr, ?&vr, ?&vr") + [(set (match_operand:VWCONVERTI 0 "register_operand""=&vr, &vr") (if_then_else:VWCONVERTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1") -(match_operand 4 "vector_length_operand"" rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand""i,i, i,i,i,i,i,i") -(match_operand 6 "const_int_operand""i,i, i,i,i,i,i,i") -(match_operand 7 "const_int_operand""i,i, i,i,i,i,i,i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") +(match_operand 4 "vector_length_operand"" rK, rK") +(match_operand 5 "const_int_operand""i,i") +(match_operand 6 "const_int_operand""i,i") +(match_operand 7 "const_int_operand""i,i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (any_fix:VWCONVERTI -
[gcc r14-10070] RISC-V: Add xfail test case for highpart overlap floating-point widen insn
https://gcc.gnu.org/g:b991193eb8a79ec7562f3de3df866df9f041015a commit r14-10070-gb991193eb8a79ec7562f3de3df866df9f041015a Author: Pan Li Date: Mon Apr 22 16:07:36 2024 +0800 RISC-V: Add xfail test case for highpart overlap floating-point widen insn We reverted below patch for register group overlap, add the related insn test and mark it as xfail. And we will remove the xfail after we support the register overlap in GCC-15. 8614cbb2534 RISC-V: Support highpart overlap for floating-point widen instructions The below test suites are passed. * The rv64gcv fully regression test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-10.c: New test. * gcc.target/riscv/rvv/base/pr112431-11.c: New test. * gcc.target/riscv/rvv/base/pr112431-12.c: New test. * gcc.target/riscv/rvv/base/pr112431-13.c: New test. * gcc.target/riscv/rvv/base/pr112431-14.c: New test. * gcc.target/riscv/rvv/base/pr112431-15.c: New test. * gcc.target/riscv/rvv/base/pr112431-7.c: New test. * gcc.target/riscv/rvv/base/pr112431-8.c: New test. * gcc.target/riscv/rvv/base/pr112431-9.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/base/pr112431-10.c| 104 .../gcc.target/riscv/rvv/base/pr112431-11.c| 68 .../gcc.target/riscv/rvv/base/pr112431-12.c| 51 ++ .../gcc.target/riscv/rvv/base/pr112431-13.c| 188 + .../gcc.target/riscv/rvv/base/pr112431-14.c| 119 + .../gcc.target/riscv/rvv/base/pr112431-15.c| 86 ++ .../gcc.target/riscv/rvv/base/pr112431-7.c | 104 .../gcc.target/riscv/rvv/base/pr112431-8.c | 68 .../gcc.target/riscv/rvv/base/pr112431-9.c | 51 ++ 9 files changed, 839 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c new file mode 100644 index 000..5d3f2fbe46d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-10.c @@ -0,0 +1,104 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +double __attribute__ ((noinline)) +sumation (double sum0, double sum1, double sum2, double sum3, double sum4, + double sum5, double sum6, double sum7, double sum8, double sum9, + double sum10, double sum11, double sum12, double sum13, double sum14, + double sum15) +{ + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9 ++ sum10 + sum11 + sum12 + sum13 + sum14 + sum15; +} + +double +foo (char const *buf, size_t len) +{ + double sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8 (); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for (; it + step <= end;) +{ + vint32m1_t v0 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v1 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v2 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v3 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v4 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v5 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v6 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v7 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v8 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v9 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v10 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v11 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v12 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v13 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v14 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + vint32m1_t v15 = __riscv_vle32_v_i32m1 ((void *) it, vl); + it += vl; + + asm volatile("nop" ::: "memory"); + vfloat64m2_t vw0 = __riscv_vfwcvt_f_x_v_f64m2 (v0, vl); + vfloat64m2_t vw1 = __riscv_vfwcvt_f_x_v_f64m2 (v1, vl); + vfloat64m2_t vw2 = __riscv_vfwcvt_f_x_v_f64m2 (v2, vl); + vfloat64m2_t vw3 = __riscv_vfwcvt_f_x_v_f64m2 (v3, vl); + vfloat64m2_t vw4 = __riscv_vfwcvt_f_x_v_f64m2 (v4, vl); + vfloat64m2_t vw5 = __riscv_vfwcvt_f_x_v_f64m2 (v5, vl); + vfloat64m2_t vw6 = __riscv_vfwcvt_f_x_v_f64m2 (v6, vl); + vfloat64m2_t vw7 = __riscv_vfwcvt_f_x_v_f64m2 (v7, vl); + vfloat64m2_t vw8 = __riscv_vfwcvt_f_x_v_f64m2 (v8, vl); + vfloat64m2_t vw9 = __riscv_vfwcvt_f_x_v_f64m2 (v9, vl); + vfloat64m2_t vw10 = __riscv_
[gcc r14-10073] Revert "RISC-V: Robostify the W43, W86, W87 constraint enabled attribute"
https://gcc.gnu.org/g:b78c88438cf3672987736edc013ffc0b20e879f7 commit r14-10073-gb78c88438cf3672987736edc013ffc0b20e879f7 Author: Pan Li Date: Mon Apr 22 20:44:38 2024 +0800 Revert "RISC-V: Robostify the W43, W86, W87 constraint enabled attribute" This reverts commit d3544cea63d0a642b6357a7be55986f5562beaa0. Diff: --- gcc/config/riscv/riscv.md | 19 ++- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index f0928398698..3628e2215da 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -552,28 +552,13 @@ (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 2")) (const_string "no") - (and (eq_attr "group_overlap" "W42") + (and (eq_attr "group_overlap" "W42,W43") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 4")) (const_string "no") - (and (eq_attr "group_overlap" "W84") + (and (eq_attr "group_overlap" "W84,W86,W87") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 8")) (const_string "no") - - ;; According to RVV ISA: - ;; The destination EEW is greater than the source EEW, the source EMUL is at least 1, -;; and the overlap is in the highest-numbered part of the destination register group -;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not). -;; So the source operand should have LMUL >= 1. - (and (eq_attr "group_overlap" "W43") - (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 4 - && riscv_get_v_regno_alignment (GET_MODE (operands[3])) >= 1")) -(const_string "no") - - (and (eq_attr "group_overlap" "W86,W87") - (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 8 - && riscv_get_v_regno_alignment (GET_MODE (operands[3])) >= 1")) -(const_string "no") ] (const_string "yes")))
[gcc r14-10074] Revert "RISC-V: Rename vconstraint into group_overlap"
https://gcc.gnu.org/g:cacc55a4c0be8d0bc7417b6a28924eadbbe428e3 commit r14-10074-gcacc55a4c0be8d0bc7417b6a28924eadbbe428e3 Author: Pan Li Date: Mon Apr 22 20:45:40 2024 +0800 Revert "RISC-V: Rename vconstraint into group_overlap" This reverts commit e65aaf8efe1900f7bbf76235a078000bf2ec8b45. Diff: --- gcc/config/riscv/constraints.md | 12 ++-- gcc/config/riscv/riscv.md | 19 --- gcc/config/riscv/vector.md | 4 ++-- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index 972e8842c9f..e37c6936bfa 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -173,14 +173,14 @@ (define_register_constraint "W84" "TARGET_VECTOR ? V_REGS : NO_REGS" "A vector register has register number % 8 == 4." "regno % 8 == 4") -(define_register_constraint "W43" "TARGET_VECTOR ? V_REGS : NO_REGS" - "A vector register has register number % 4 == 3." "regno % 4 == 3") +(define_register_constraint "W41" "TARGET_VECTOR ? V_REGS : NO_REGS" + "A vector register has register number % 4 == 1." "regno % 4 == 1") -(define_register_constraint "W86" "TARGET_VECTOR ? V_REGS : NO_REGS" - "A vector register has register number % 8 == 6." "regno % 8 == 6") +(define_register_constraint "W81" "TARGET_VECTOR ? V_REGS : NO_REGS" + "A vector register has register number % 8 == 1." "regno % 8 == 1") -(define_register_constraint "W87" "TARGET_VECTOR ? V_REGS : NO_REGS" - "A vector register has register number % 8 == 7." "regno % 8 == 7") +(define_register_constraint "W82" "TARGET_VECTOR ? V_REGS : NO_REGS" + "A vector register has register number % 8 == 2." "regno % 8 == 2") ;; This constraint is used to match instruction "csrr %0, vlenb" which is generated in "mov". ;; VLENB is a run-time constant which represent the vector register length in bytes. diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 3628e2215da..1693d4008c6 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -538,25 +538,22 @@ ] (const_string "no"))) -;; Widening instructions have group-overlap constraints. Those are only -;; valid for certain register-group sizes. This attribute marks the -;; alternatives not matching the required register-group size as disabled. -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87" - (const_string "none")) +(define_attr "vconstraint" "no,W21,W42,W84,W41,W81,W82" + (const_string "no")) -(define_attr "group_overlap_valid" "no,yes" - (cond [(eq_attr "group_overlap" "none") +(define_attr "vconstraint_enabled" "no,yes" + (cond [(eq_attr "vconstraint" "no") (const_string "yes") - (and (eq_attr "group_overlap" "W21") + (and (eq_attr "vconstraint" "W21") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 2")) (const_string "no") - (and (eq_attr "group_overlap" "W42,W43") + (and (eq_attr "vconstraint" "W42,W41") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 4")) (const_string "no") - (and (eq_attr "group_overlap" "W84,W86,W87") + (and (eq_attr "vconstraint" "W84,W81,W82") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 8")) (const_string "no") ] @@ -590,7 +587,7 @@ (eq_attr "fp_vector_disabled" "yes") (const_string "no") -(eq_attr "group_overlap_valid" "no") +(eq_attr "vconstraint_enabled" "no") (const_string "no") (eq_attr "spec_restriction_disabled" "yes") diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 598aa8fba33..cb5174a5e91 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3747,7 +3747,7 @@ "vext.vf2\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "") - (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")]) + (set_attr "vconstraint" "W21,W21,W42,W42,W84,W84,no,no")]) ;; Vector Quad-Widening Sign-extend and Zero-extend. (define_insn "@pred__vf4" @@ -3970,7 +3970,7 @@ (set (attr "ta") (symbol_ref "riscv_vector::get_ta(operands[5])")) (set (attr "ma") (symbol_ref "riscv_vector::get_ma(operands[6])")) (set (attr "avl_type_idx") (const_int 7)) - (set_attr "group_overlap" "W21,W21,W42,W42,W84,W84,none,none")]) + (set_attr "vconstraint" "W21,W21,W42,W42,W84,W84,no,no")]) ;; --- ;; Predicated integer Narrowing operations