https://gcc.gnu.org/g:e88115c93dc7b3cb2a805db3612bdcc3a66b5dcd
commit e88115c93dc7b3cb2a805db3612bdcc3a66b5dcd Author: Pan Li <pan2...@intel.com> Date: Mon Oct 14 10:14:31 2024 +0800 RISC-V: Implement vector SAT_TRUNC for signed integer This patch would like to implement the sstrunc for vector signed integer. Form 1: #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline)) \ vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX; \ } \ } DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX) Before this patch: 27 │ vsetvli a5,a2,e64,m1,ta,ma 28 │ vle64.v v1,0(a1) 29 │ slli a3,a5,3 30 │ slli a4,a5,2 31 │ sub a2,a2,a5 32 │ add a1,a1,a3 33 │ vadd.vv v0,v1,v5 34 │ vsetvli zero,zero,e32,mf2,ta,ma 35 │ vnsrl.wx v2,v1,a6 36 │ vncvt.x.x.w v1,v1 37 │ vsetvli zero,zero,e64,m1,ta,ma 38 │ vmsgtu.vv v0,v0,v4 39 │ vsetvli zero,zero,e32,mf2,ta,mu 40 │ vneg.v v2,v2 41 │ vxor.vv v1,v2,v3,v0.t 42 │ vse32.v v1,0(a0) 43 │ add a0,a0,a4 44 │ bne a2,zero,.L3 After this patch: 16 │ vsetvli a5,a2,e32,mf2,ta,ma 17 │ vle64.v v1,0(a1) 18 │ slli a3,a5,3 19 │ slli a4,a5,2 20 │ sub a2,a2,a5 21 │ add a1,a1,a3 22 │ vnclip.wi v1,v1,0 23 │ vse32.v v1,0(a0) 24 │ add a0,a0,a4 25 │ bne a2,zero,.L3 The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (sstrunc<mode><v_double_trunc>2): Add new pattern sstrunc for double trunc. (sstrunc<mode><v_quad_trunc>2): Ditto but for quad trunc. (sstrunc<mode><v_oct_trunc>2): Ditto but for oct trunc. * config/riscv/riscv-protos.h (expand_vec_double_sstrunc): Add new func decl to expand double trunc. (expand_vec_quad_sstrunc): Ditto but for quad trunc. (expand_vec_oct_sstrunc): Ditto but for oct trunc. * config/riscv/riscv-v.cc (expand_vec_double_sstrunc): Add new func to expand double trunc. (expand_vec_quad_sstrunc): Ditto but for quad trunc. (expand_vec_oct_sstrunc): Ditto but for oct trunc. Signed-off-by: Pan Li <pan2...@intel.com> (cherry picked from commit b5a058154179ab16fe5f9e6aa331624363410aad) Diff: --- gcc/config/riscv/autovec.md | 34 ++++++++++++++++++++++++++++++ gcc/config/riscv/riscv-protos.h | 4 ++++ gcc/config/riscv/riscv-v.cc | 46 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a34f63c96516..774a3d337231 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2779,6 +2779,40 @@ } ) +(define_expand "sstrunc<mode><v_double_trunc>2" + [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1], + <MODE>mode); + DONE; + } +) + +(define_expand "sstrunc<mode><v_quad_trunc>2" + [(match_operand:<V_QUAD_TRUNC> 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode); + DONE; + } +) + +(define_expand "sstrunc<mode><v_oct_trunc>2" + [(match_operand:<V_OCT_TRUNC> 0 "register_operand") + (match_operand:VOEXTI 1 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], <MODE>mode, + <V_DOUBLE_TRUNC>mode, + <V_QUAD_TRUNC>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index d690162bb0c3..54f472afd8d0 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -652,9 +652,13 @@ void expand_vec_ssadd (rtx, rtx, rtx, machine_mode); void expand_vec_ussub (rtx, rtx, rtx, machine_mode); void expand_vec_sssub (rtx, rtx, rtx, machine_mode); void expand_vec_double_ustrunc (rtx, rtx, machine_mode); +void expand_vec_double_sstrunc (rtx, rtx, machine_mode); void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode); +void expand_vec_quad_sstrunc (rtx, rtx, machine_mode, machine_mode); void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); +void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode, + machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 630fbd80e941..c48b87278a31 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4939,6 +4939,22 @@ expand_vec_double_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode) emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); } +/* Expand the standard name sstrunc<m><n>2 for double vector mode, like + DI => SI. we can leverage the vector fixed point vector narrowing + fixed-point clip directly. */ + +void +expand_vec_double_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode) +{ + insn_code icode; + rtx zero = CONST0_RTX (Xmode); + enum unspec unspec = UNSPEC_VNCLIP; + rtx ops[] = {op_0, op_1, zero}; + + icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode); + emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops); +} + /* Expand the standard name ustrunc<m><n>2 for double vector mode, like DI => HI. we can leverage the vector fixed point vector narrowing fixed-point clip directly. */ @@ -4953,6 +4969,20 @@ expand_vec_quad_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, expand_vec_double_ustrunc (op_0, double_rtx, double_mode); } +/* Expand the standard name sstrunc<m><n>2 for quad vector mode, like + DI => HI. we can leverage the vector fixed point vector narrowing + fixed-point clip directly. */ + +void +expand_vec_quad_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + + expand_vec_double_sstrunc (double_rtx, op_1, vec_mode); + expand_vec_double_sstrunc (op_0, double_rtx, double_mode); +} + /* Expand the standard name ustrunc<m><n>2 for double vector mode, like DI => QI. we can leverage the vector fixed point vector narrowing fixed-point clip directly. */ @@ -4969,6 +4999,22 @@ expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode, expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode); } +/* Expand the standard name sstrunc<m><n>2 for oct vector mode, like + DI => QI. we can leverage the vector fixed point vector narrowing + fixed-point clip directly. */ + +void +expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode, + machine_mode double_mode, machine_mode quad_mode) +{ + rtx double_rtx = gen_reg_rtx (double_mode); + rtx quad_rtx = gen_reg_rtx (quad_mode); + + expand_vec_double_sstrunc (double_rtx, op_1, vec_mode); + expand_vec_double_sstrunc (quad_rtx, double_rtx, double_mode); + expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void