From: Yunze Zhu <yunze...@linux.alibaba.com> This commit add support for xtheadvector-specific strided segment load/store intrinsics with b/h/w suffix. We also defined enum to be used in thead-vector-builtins-bases.cc https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
gcc/ChangeLog: * config/riscv/riscv-vector-builtins-shapes.cc (struct th_seg_loadstore_def):Define new builtin shapes. * config/riscv/riscv-vector-builtins.cc:Define new operand informations. * config/riscv/thead-vector-builtins-bases.cc (class th_vlsseg):New function. (class th_vssseg):Ditto. (BASE):New base_name. * config/riscv/thead-vector-builtins-bases.h:New function_base. * config/riscv/thead-vector-builtins-functions.def (th_vlsseg):New intrinsics def. (th_vlssegu):Ditto. (th_vssseg):Ditto. * config/riscv/thead-vector.md (@pred_th_strided_load<vlmem_op_attr><mode>):New RTL mode. (@pred_th_strided_store<vlmem_op_attr><mode>):Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c: New test. * gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c: New test. --- .../riscv/riscv-vector-builtins-shapes.cc | 4 + gcc/config/riscv/riscv-vector-builtins.cc | 24 ++++ .../riscv/thead-vector-builtins-bases.cc | 78 +++++++++++ .../riscv/thead-vector-builtins-bases.h | 3 + .../riscv/thead-vector-builtins-functions.def | 3 + gcc/config/riscv/thead-vector.md | 61 +++++++++ .../riscv/rvv/xtheadvector/vlsseg-vssseg.c | 125 ++++++++++++++++++ .../riscv/rvv/xtheadvector/vlssegu-vssseg.c | 125 ++++++++++++++++++ 8 files changed, 423 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index c6a10c2e5fa..b34eb4b2ee5 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -1400,8 +1400,12 @@ struct th_seg_loadstore_def : public build_base { if (strstr (instance.base_name, "vlseg")) b.append_name ("__riscv_th_vlseg"); + else if (strstr (instance.base_name, "vlsseg")) + b.append_name ("__riscv_th_vlsseg"); else if (strstr (instance.base_name, "vsseg")) b.append_name ("__riscv_th_vsseg"); + else if (strstr (instance.base_name, "vssseg")) + b.append_name ("__riscv_th_vssseg"); else gcc_unreachable (); diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index a8967fcafc6..adfbe6882fa 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -3387,6 +3387,30 @@ static CONSTEXPR const rvv_op_info th_tuple_v_int_scalar_ptr_ops rvv_arg_type_info (RVV_BASE_void), /* Return type */ scalar_ptr_args /* Args */}; +/* A static operand information for vector_type func (const scalar_type *, + * ptrdiff_t) function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops + = {th_tuple_sint_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + scalar_const_ptr_ptrdiff_args /* Args */}; + +/* A static operand information for vector_type func (const scalar_type *, + * ptrdiff_t) function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops + = {th_tuple_uint_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + scalar_const_ptr_ptrdiff_args /* Args */}; + +/* A static operand information for void func (scalar_type *, ptrdiff_t, + * vector_type) function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_int_scalar_ptr_ptrdiff_ops + = {th_tuple_int_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + scalar_ptr_ptrdiff_args /* Args */}; + /* A list of all RVV base function types. */ static CONSTEXPR const function_type_info function_types[] = { #define DEF_RVV_TYPE_INDEX( \ diff --git a/gcc/config/riscv/thead-vector-builtins-bases.cc b/gcc/config/riscv/thead-vector-builtins-bases.cc index d1afcf80307..2c59b384d6c 100644 --- a/gcc/config/riscv/thead-vector-builtins-bases.cc +++ b/gcc/config/riscv/thead-vector-builtins-bases.cc @@ -121,10 +121,85 @@ public: } }; +/* Implements vlsseg (b/h/w)[u].v codegen. */ +template <bool IS_SIGNED = false> +class th_vlsseg : public function_base { +public: + unsigned int call_properties (const function_instance &) const override { + return CP_READ_MEMORY; + } + + bool can_be_overloaded_p (enum predication_type_index pred) const override { + return pred != PRED_TYPE_none; + } + + rtx expand (function_expander &e) const override { + gcc_assert (TARGET_XTHEADVECTOR); + unsigned sew = GET_MODE_BITSIZE (GET_MODE_INNER (e.vector_mode ())); + int UNSPEC; + switch (sew) + { + case 8: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSSEGB : UNSPEC_TH_VLSSEGBU; + break; + case 16: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSSEGH : UNSPEC_TH_VLSSEGHU; + break; + case 32: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSSEGW : UNSPEC_TH_VLSSEGWU; + break; + default: + gcc_unreachable (); + } + return e.use_exact_insn ( + code_for_pred_th_strided_load (UNSPEC, e.vector_mode ())); + } +}; + +/* Implements vssseg (b/h/w)[u].v codegen. */ +class th_vssseg : public function_base { +public: + bool apply_tail_policy_p () const override { return false; } + bool apply_mask_policy_p () const override { return false; } + + unsigned int call_properties (const function_instance &) const override { + return CP_WRITE_MEMORY; + } + + bool can_be_overloaded_p (enum predication_type_index) const override { + return true; + } + + rtx expand (function_expander &e) const override { + gcc_assert (TARGET_XTHEADVECTOR); + unsigned sew = GET_MODE_BITSIZE (GET_MODE_INNER (e.vector_mode ())); + int UNSPEC; + switch (sew) + { + case 8: + UNSPEC = UNSPEC_TH_VLSSEGB; + break; + case 16: + UNSPEC = UNSPEC_TH_VLSSEGH; + break; + case 32: + UNSPEC = UNSPEC_TH_VLSSEGW; + break; + default: + gcc_unreachable (); + } + return e.use_exact_insn ( + code_for_pred_th_strided_store (UNSPEC, e.vector_mode ())); + } +}; + /* Xtheadvector */ static CONSTEXPR const th_vlseg<true> th_vlseg_obj; static CONSTEXPR const th_vlseg<false> th_vlsegu_obj; static CONSTEXPR const th_vsseg th_vsseg_obj; +static CONSTEXPR const th_vlsseg<true> th_vlsseg_obj; +static CONSTEXPR const th_vlsseg<false> th_vlssegu_obj; +static CONSTEXPR const th_vssseg th_vssseg_obj; /* Declare the function base NAME, pointing it to an instance of class <NAME>_obj. */ @@ -135,4 +210,7 @@ static CONSTEXPR const th_vsseg th_vsseg_obj; BASE (th_vlseg) BASE (th_vlsegu) BASE (th_vsseg) +BASE (th_vlsseg) +BASE (th_vlssegu) +BASE (th_vssseg) } // end namespace riscv_vector diff --git a/gcc/config/riscv/thead-vector-builtins-bases.h b/gcc/config/riscv/thead-vector-builtins-bases.h index 6614c177504..e77aed5c259 100644 --- a/gcc/config/riscv/thead-vector-builtins-bases.h +++ b/gcc/config/riscv/thead-vector-builtins-bases.h @@ -29,6 +29,9 @@ namespace bases { extern const function_base *const th_vlseg; extern const function_base *const th_vlsegu; extern const function_base *const th_vsseg; +extern const function_base *const th_vlsseg; +extern const function_base *const th_vlssegu; +extern const function_base *const th_vssseg; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def b/gcc/config/riscv/thead-vector-builtins-functions.def index 01eedc1571c..b985746f8bd 100644 --- a/gcc/config/riscv/thead-vector-builtins-functions.def +++ b/gcc/config/riscv/thead-vector-builtins-functions.def @@ -37,6 +37,9 @@ DEF_RVV_FUNCTION (vext_x_v, th_extract, none_preds, iu_x_s_u_ops) DEF_RVV_FUNCTION (th_vlseg, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) DEF_RVV_FUNCTION (th_vlsegu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) DEF_RVV_FUNCTION (th_vsseg, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ops) +DEF_RVV_FUNCTION (th_vlsseg, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops) +DEF_RVV_FUNCTION (th_vlssegu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops) +DEF_RVV_FUNCTION (th_vssseg, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ptrdiff_ops) #undef REQUIRED_EXTENSIONS diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md index 007682c3af5..7ba06c6ab5b 100644 --- a/gcc/config/riscv/thead-vector.md +++ b/gcc/config/riscv/thead-vector.md @@ -32,6 +32,13 @@ (define_c_enum "unspec" [ UNSPEC_TH_VLSEGHU UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU + + UNSPEC_TH_VLSSEGB + UNSPEC_TH_VLSSEGBU + UNSPEC_TH_VLSSEGH + UNSPEC_TH_VLSSEGHU + UNSPEC_TH_VLSSEGW + UNSPEC_TH_VLSSEGWU ]) (define_int_iterator UNSPEC_TH_VLMEM_OP [ @@ -58,6 +65,12 @@ (define_int_iterator UNSPEC_TH_VLSEGMEM_OP[ UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU ]) +(define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[ + UNSPEC_TH_VLSSEGB UNSPEC_TH_VLSSEGBU + UNSPEC_TH_VLSSEGH UNSPEC_TH_VLSSEGHU + UNSPEC_TH_VLSSEGW UNSPEC_TH_VLSSEGWU +]) + (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLB "b") (UNSPEC_TH_VLBU "bu") (UNSPEC_TH_VLH "h") (UNSPEC_TH_VLHU "hu") @@ -74,6 +87,9 @@ (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLSEGB "b") (UNSPEC_TH_VLSEGBU "bu") (UNSPEC_TH_VLSEGH "h") (UNSPEC_TH_VLSEGHU "hu") (UNSPEC_TH_VLSEGW "w") (UNSPEC_TH_VLSEGWU "wu") + (UNSPEC_TH_VLSSEGB "b") (UNSPEC_TH_VLSSEGBU "bu") + (UNSPEC_TH_VLSSEGH "h") (UNSPEC_TH_VLSSEGHU "hu") + (UNSPEC_TH_VLSSEGW "w") (UNSPEC_TH_VLSSEGWU "wu") ]) (define_int_attr vlmem_order_attr [ @@ -112,6 +128,12 @@ (define_int_iterator UNSPEC_TH_VSSEGMEM_OP[ UNSPEC_TH_VLSEGW ]) +(define_int_iterator UNSPEC_TH_VSSSEGMEM_OP[ + UNSPEC_TH_VLSSEGB + UNSPEC_TH_VLSSEGH + UNSPEC_TH_VLSSEGW +]) + (define_mode_iterator V_VLS_VT [V VLS VT]) (define_mode_iterator V_VB_VLS_VT [V VB VLS VT]) @@ -483,3 +505,42 @@ (define_insn "@pred_th_unit_seg_store<vlmem_op_attr><mode>" "vsseg<nf><vlmem_op_attr>.v\t%2,(%z1)%p0" [(set_attr "type" "vssegte") (set_attr "mode" "<MODE>")]) + +(define_insn "@pred_th_strided_load<vlmem_op_attr><mode>" + [(set (match_operand:VT 0 "register_operand" "=vr, vr, vd") + (if_then_else:VT + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm") + (match_operand 5 "vector_length_operand" " rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i") + (match_operand 7 "const_int_operand" " i, i, i") + (match_operand 8 "const_int_operand" " i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VLSSEGMEM_OP) + (unspec:VT + [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ") + (match_operand 4 "pmode_reg_or_0_operand" " rJ, rJ, rJ") + (mem:BLK (scratch))] UNSPEC_TH_VLSSEGMEM_OP) + (match_operand:VT 2 "vector_merge_operand" " 0, vu, vu")))] + "TARGET_XTHEADVECTOR" + "vlsseg<nf><vlmem_op_attr>.v\t%0,(%z3),%z4%p1" + [(set_attr "type" "vlsegds") + (set_attr "mode" "<MODE>")]) + +(define_insn "@pred_th_strided_store<vlmem_op_attr><mode>" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1") + (match_operand 4 "vector_length_operand" " rK") + (match_operand 5 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSSSEGMEM_OP) + (match_operand 1 "pmode_reg_or_0_operand" " rJ") + (match_operand 2 "pmode_reg_or_0_operand" " rJ") + (match_operand:VT 3 "register_operand" " vr") + (mem:BLK (scratch))] UNSPEC_TH_VSSSEGMEM_OP))] + "TARGET_XTHEADVECTOR" + "vssseg<nf><vlmem_op_attr>.v\t%3,(%z1),%z2%p0" + [(set_attr "type" "vssegts") + (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c new file mode 100644 index 00000000000..e97ff7e74ea --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** ret +*/ +void f1 (void * in, void *out, ptrdiff_t s) +{ + vint16m1x2_t v = __riscv_th_vlsseg2h_v_i16m1x2 (in, s, 4); + vint16m1x2_t v2 = __riscv_th_vlsseg2h_v_i16m1x2_tu (v, in, s, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vssseg2h_v_i16m1x2 (out, s, v4, 4); +} + +/* +** f2: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t +** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** ret +*/ +void f2 (void * in, void *out, ptrdiff_t s) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vint16m1x2_t v = __riscv_th_vlsseg2h_v_i16m1x2 (in, s, 4); + vint16m1x2_t v2 = __riscv_th_vlsseg2h_v_i16m1x2_m (mask, in, s, 4); + vint16m1_t v_0 = __riscv_vget_i16m1(v, 0); + vint16m1_t v_1 = __riscv_vget_i16m1(v, 1); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_m (mask, v_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_m (mask, v_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_m (mask, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_m (mask, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vssseg2h_v_i16m1x2 (out, s, v4, 4); +} + +/* +** f3: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** ret +*/ +void f3 (void * in, void *out, ptrdiff_t s) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vint16m1x2_t v = __riscv_th_vlsseg2h_v_i16m1x2 (in, s, 4); + vint16m1x2_t v2 = __riscv_th_vlsseg2h_v_i16m1x2_tumu (mask, v, in, s, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vssseg2h_v_i16m1x2 (out, s, v4, 4); +} \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c new file mode 100644 index 00000000000..b645eb3583f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** ret +*/ +void f1 (void * in, void *out, ptrdiff_t s) +{ + vuint16m1x2_t v = __riscv_th_vlsseg2hu_v_u16m1x2 (in, s, 4); + vuint16m1x2_t v2 = __riscv_th_vlsseg2hu_v_u16m1x2_tu (v, in, s, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1 (v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1 (v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1 (v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1 (v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vssseg2h_v_u16m1x2 (out, s, v4, 4); +} + +/* +** f2: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t +** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** ret +*/ +void f2 (void * in, void *out, ptrdiff_t s) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vuint16m1x2_t v = __riscv_th_vlsseg2hu_v_u16m1x2 (in, s, 4); + vuint16m1x2_t v2 = __riscv_th_vlsseg2hu_v_u16m1x2_m (mask, in, s, 4); + vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0); + vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1); + vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_m (mask, v_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_m (mask, v_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_m (mask, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_m (mask, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vssseg2h_v_u16m1x2 (out, s, v4, 4); +} + +/* +** f3: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+ +** ret +*/ +void f3 (void * in, void *out, ptrdiff_t s) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vuint16m1x2_t v = __riscv_th_vlsseg2hu_v_u16m1x2 (in, s, 4); + vuint16m1x2_t v2 = __riscv_th_vlsseg2hu_v_u16m1x2_tumu (mask, v, in, s, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vssseg2h_v_u16m1x2 (out, s, v4, 4); +} \ No newline at end of file -- 2.47.1