From: Pan Li <[email protected]>
This patch would like to implement the sstrunc for vector signed integer.
Form 1:
#define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
void __attribute__((noinline)) \
vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
{ \
unsigned i; \
for (i = 0; i < limit; i++) \
{ \
WT x = in[i]; \
NT trunc = (NT)x; \
out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \
? trunc \
: x < 0 ? NT_MIN : NT_MAX; \
} \
}
DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX)
Before this patch:
27 │ vsetvli a5,a2,e64,m1,ta,ma
28 │ vle64.v v1,0(a1)
29 │ slli a3,a5,3
30 │ slli a4,a5,2
31 │ sub a2,a2,a5
32 │ add a1,a1,a3
33 │ vadd.vv v0,v1,v5
34 │ vsetvli zero,zero,e32,mf2,ta,ma
35 │ vnsrl.wx v2,v1,a6
36 │ vncvt.x.x.w v1,v1
37 │ vsetvli zero,zero,e64,m1,ta,ma
38 │ vmsgtu.vv v0,v0,v4
39 │ vsetvli zero,zero,e32,mf2,ta,mu
40 │ vneg.v v2,v2
41 │ vxor.vv v1,v2,v3,v0.t
42 │ vse32.v v1,0(a0)
43 │ add a0,a0,a4
44 │ bne a2,zero,.L3
After this patch:
16 │ vsetvli a5,a2,e32,mf2,ta,ma
17 │ vle64.v v1,0(a1)
18 │ slli a3,a5,3
19 │ slli a4,a5,2
20 │ sub a2,a2,a5
21 │ add a1,a1,a3
22 │ vnclip.wi v1,v1,0
23 │ vse32.v v1,0(a0)
24 │ add a0,a0,a4
25 │ bne a2,zero,.L3
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
gcc/ChangeLog:
* config/riscv/autovec.md (sstrunc<mode><v_double_trunc>2): Add
new pattern sstrunc for double trunc.
(sstrunc<mode><v_quad_trunc>2): Ditto but for quad trunc.
(sstrunc<mode><v_oct_trunc>2): Ditto but for oct trunc.
* config/riscv/riscv-protos.h (expand_vec_double_sstrunc): Add
new func decl to expand double trunc.
(expand_vec_quad_sstrunc): Ditto but for quad trunc.
(expand_vec_oct_sstrunc): Ditto but for oct trunc.
* config/riscv/riscv-v.cc (expand_vec_double_sstrunc): Add new
func to expand double trunc.
(expand_vec_quad_sstrunc): Ditto but for quad trunc.
(expand_vec_oct_sstrunc): Ditto but for oct trunc.
Signed-off-by: Pan Li <[email protected]>
---
gcc/config/riscv/autovec.md | 34 ++++++++++++++++++++++++
gcc/config/riscv/riscv-protos.h | 4 +++
gcc/config/riscv/riscv-v.cc | 46 +++++++++++++++++++++++++++++++++
3 files changed, 84 insertions(+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 7dc78a48874..82d65a95e7a 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2779,6 +2779,40 @@ (define_expand "ustrunc<mode><v_oct_trunc>2"
}
)
+(define_expand "sstrunc<mode><v_double_trunc>2"
+ [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
+ (match_operand:VWEXTI 1 "register_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1],
+ <MODE>mode);
+ DONE;
+ }
+)
+
+(define_expand "sstrunc<mode><v_quad_trunc>2"
+ [(match_operand:<V_QUAD_TRUNC> 0 "register_operand")
+ (match_operand:VQEXTI 1 "register_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1],
<MODE>mode,
+ <V_DOUBLE_TRUNC>mode);
+ DONE;
+ }
+)
+
+(define_expand "sstrunc<mode><v_oct_trunc>2"
+ [(match_operand:<V_OCT_TRUNC> 0 "register_operand")
+ (match_operand:VOEXTI 1 "register_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], <MODE>mode,
+ <V_DOUBLE_TRUNC>mode,
+ <V_QUAD_TRUNC>mode);
+ DONE;
+ }
+)
+
;; =========================================================================
;; == Early break auto-vectorization patterns
;; =========================================================================
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index b2f5d72f494..2b2378468e2 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -651,9 +651,13 @@ void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
+void expand_vec_double_sstrunc (rtx, rtx, machine_mode);
void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_quad_sstrunc (rtx, rtx, machine_mode, machine_mode);
void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
machine_mode);
+void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
+ machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index fba35652cc2..65d36dc31d2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4927,6 +4927,22 @@ expand_vec_double_ustrunc (rtx op_0, rtx op_1,
machine_mode vec_mode)
emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops);
}
+/* Expand the standard name sstrunc<m><n>2 for double vector mode, like
+ DI => SI. we can leverage the vector fixed point vector narrowing
+ fixed-point clip directly. */
+
+void
+expand_vec_double_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode)
+{
+ insn_code icode;
+ rtx zero = CONST0_RTX (Xmode);
+ enum unspec unspec = UNSPEC_VNCLIP;
+ rtx ops[] = {op_0, op_1, zero};
+
+ icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode);
+ emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops);
+}
+
/* Expand the standard name ustrunc<m><n>2 for double vector mode, like
DI => HI. we can leverage the vector fixed point vector narrowing
fixed-point clip directly. */
@@ -4941,6 +4957,20 @@ expand_vec_quad_ustrunc (rtx op_0, rtx op_1,
machine_mode vec_mode,
expand_vec_double_ustrunc (op_0, double_rtx, double_mode);
}
+/* Expand the standard name sstrunc<m><n>2 for quad vector mode, like
+ DI => HI. we can leverage the vector fixed point vector narrowing
+ fixed-point clip directly. */
+
+void
+expand_vec_quad_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
+ machine_mode double_mode)
+{
+ rtx double_rtx = gen_reg_rtx (double_mode);
+
+ expand_vec_double_sstrunc (double_rtx, op_1, vec_mode);
+ expand_vec_double_sstrunc (op_0, double_rtx, double_mode);
+}
+
/* Expand the standard name ustrunc<m><n>2 for double vector mode, like
DI => QI. we can leverage the vector fixed point vector narrowing
fixed-point clip directly. */
@@ -4957,6 +4987,22 @@ expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode
vec_mode,
expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode);
}
+/* Expand the standard name sstrunc<m><n>2 for oct vector mode, like
+ DI => QI. we can leverage the vector fixed point vector narrowing
+ fixed-point clip directly. */
+
+void
+expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
+ machine_mode double_mode, machine_mode quad_mode)
+{
+ rtx double_rtx = gen_reg_rtx (double_mode);
+ rtx quad_rtx = gen_reg_rtx (quad_mode);
+
+ expand_vec_double_sstrunc (double_rtx, op_1, vec_mode);
+ expand_vec_double_sstrunc (quad_rtx, double_rtx, double_mode);
+ expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode);
+}
+
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
well. */
void
--
2.43.0