gcc-14-with-riscv-opts)] RISC-V: Implement vector SAT_TRUNC for signed integer

Jeff Law via Gcc-cvs Thu, 07 Nov 2024 13:05:10 -0800

https://gcc.gnu.org/g:e88115c93dc7b3cb2a805db3612bdcc3a66b5dcd


commit e88115c93dc7b3cb2a805db3612bdcc3a66b5dcd
Author: Pan Li <pan2...@intel.com>
Date:   Mon Oct 14 10:14:31 2024 +0800

    RISC-V: Implement vector SAT_TRUNC for signed integer
    
    This patch would like to implement the sstrunc for vector signed integer.
    
    Form 1:
      #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX)             \
      void __attribute__((noinline))                                        \
      vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
      {                                                                     \
        unsigned i;                                                         \
        for (i = 0; i < limit; i++)                                         \
          {                                                                 \
            WT x = in[i];                                                   \
            NT trunc = (NT)x;                                               \
            out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX                     \
              ? trunc                                                       \
              : x < 0 ? NT_MIN : NT_MAX;                                    \
          }                                                                 \
      }
    
    DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX)
    
    Before this patch:
      27   │     vsetvli a5,a2,e64,m1,ta,ma
      28   │     vle64.v v1,0(a1)
      29   │     slli    a3,a5,3
      30   │     slli    a4,a5,2
      31   │     sub a2,a2,a5
      32   │     add a1,a1,a3
      33   │     vadd.vv v0,v1,v5
      34   │     vsetvli zero,zero,e32,mf2,ta,ma
      35   │     vnsrl.wx    v2,v1,a6
      36   │     vncvt.x.x.w v1,v1
      37   │     vsetvli zero,zero,e64,m1,ta,ma
      38   │     vmsgtu.vv   v0,v0,v4
      39   │     vsetvli zero,zero,e32,mf2,ta,mu
      40   │     vneg.v  v2,v2
      41   │     vxor.vv v1,v2,v3,v0.t
      42   │     vse32.v v1,0(a0)
      43   │     add a0,a0,a4
      44   │     bne a2,zero,.L3
    
    After this patch:
      16   │     vsetvli a5,a2,e32,mf2,ta,ma
      17   │     vle64.v v1,0(a1)
      18   │     slli    a3,a5,3
      19   │     slli    a4,a5,2
      20   │     sub a2,a2,a5
      21   │     add a1,a1,a3
      22   │     vnclip.wi   v1,v1,0
      23   │     vse32.v v1,0(a0)
      24   │     add a0,a0,a4
      25   │     bne a2,zero,.L3
    
    The below test suites are passed for this patch.
    * The rv64gcv fully regression test.
    
    gcc/ChangeLog:
    
            * config/riscv/autovec.md (sstrunc<mode><v_double_trunc>2): Add
            new pattern sstrunc for double trunc.
            (sstrunc<mode><v_quad_trunc>2): Ditto but for quad trunc.
            (sstrunc<mode><v_oct_trunc>2): Ditto but for oct trunc.
            * config/riscv/riscv-protos.h (expand_vec_double_sstrunc): Add
            new func decl to expand double trunc.
            (expand_vec_quad_sstrunc): Ditto but for quad trunc.
            (expand_vec_oct_sstrunc): Ditto but for oct trunc.
            * config/riscv/riscv-v.cc (expand_vec_double_sstrunc): Add new
            func to expand double trunc.
            (expand_vec_quad_sstrunc): Ditto but for quad trunc.
            (expand_vec_oct_sstrunc): Ditto but for oct trunc.
    
    Signed-off-by: Pan Li <pan2...@intel.com>
    (cherry picked from commit b5a058154179ab16fe5f9e6aa331624363410aad)

Diff:
---
 gcc/config/riscv/autovec.md     | 34 ++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |  4 ++++
 gcc/config/riscv/riscv-v.cc     | 46 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a34f63c96516..774a3d337231 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2779,6 +2779,40 @@
   }
 )
 
+(define_expand "sstrunc<mode><v_double_trunc>2"
+  [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
+   (match_operand:VWEXTI           1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1],
+                                         <MODE>mode);
+    DONE;
+  }
+)
+
+(define_expand "sstrunc<mode><v_quad_trunc>2"
+  [(match_operand:<V_QUAD_TRUNC> 0 "register_operand")
+   (match_operand:VQEXTI         1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1], 
<MODE>mode,
+                                          <V_DOUBLE_TRUNC>mode);
+    DONE;
+  }
+)
+
+(define_expand "sstrunc<mode><v_oct_trunc>2"
+  [(match_operand:<V_OCT_TRUNC> 0 "register_operand")
+   (match_operand:VOEXTI        1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], <MODE>mode,
+                                         <V_DOUBLE_TRUNC>mode,
+                                         <V_QUAD_TRUNC>mode);
+    DONE;
+  }
+)
+
 ;; =========================================================================
 ;; == Early break auto-vectorization patterns
 ;; =========================================================================
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index d690162bb0c3..54f472afd8d0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -652,9 +652,13 @@ void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
 void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
 void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
 void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
+void expand_vec_double_sstrunc (rtx, rtx, machine_mode);
 void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_quad_sstrunc (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
                             machine_mode);
+void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
+                            machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
                          bool, void (*)(rtx *, rtx), enum avl_type);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 630fbd80e941..c48b87278a31 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4939,6 +4939,22 @@ expand_vec_double_ustrunc (rtx op_0, rtx op_1, 
machine_mode vec_mode)
   emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops);
 }
 
+/* Expand the standard name sstrunc<m><n>2 for double vector mode,  like
+   DI => SI.  we can leverage the vector fixed point vector narrowing
+   fixed-point clip directly.  */
+
+void
+expand_vec_double_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode)
+{
+  insn_code icode;
+  rtx zero = CONST0_RTX (Xmode);
+  enum unspec unspec = UNSPEC_VNCLIP;
+  rtx ops[] = {op_0, op_1, zero};
+
+  icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode);
+  emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops);
+}
+
 /* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => HI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */
@@ -4953,6 +4969,20 @@ expand_vec_quad_ustrunc (rtx op_0, rtx op_1, 
machine_mode vec_mode,
   expand_vec_double_ustrunc (op_0, double_rtx, double_mode);
 }
 
+/* Expand the standard name sstrunc<m><n>2 for quad vector mode,  like
+   DI => HI.  we can leverage the vector fixed point vector narrowing
+   fixed-point clip directly.  */
+
+void
+expand_vec_quad_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
+                        machine_mode double_mode)
+{
+  rtx double_rtx = gen_reg_rtx (double_mode);
+
+  expand_vec_double_sstrunc (double_rtx, op_1, vec_mode);
+  expand_vec_double_sstrunc (op_0, double_rtx, double_mode);
+}
+
 /* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => QI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */
@@ -4969,6 +4999,22 @@ expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode 
vec_mode,
   expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode);
 }
 
+/* Expand the standard name sstrunc<m><n>2 for oct vector mode,  like
+   DI => QI.  we can leverage the vector fixed point vector narrowing
+   fixed-point clip directly.  */
+
+void
+expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
+                       machine_mode double_mode, machine_mode quad_mode)
+{
+  rtx double_rtx = gen_reg_rtx (double_mode);
+  rtx quad_rtx = gen_reg_rtx (quad_mode);
+
+  expand_vec_double_sstrunc (double_rtx, op_1, vec_mode);
+  expand_vec_double_sstrunc (quad_rtx, double_rtx, double_mode);
+  expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode);
+}
+
 /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
    well.  */
 void

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement vector SAT_TRUNC for signed integer

Reply via email to