From: Yunze Zhu <yunze...@linux.alibaba.com> This commit add support for xtheadvector-specific unit-stride segment load/store intrinsics with b/h/w suffix. We also defined enum to be used in thead-vector.md https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
V2: Change to reuse existed thead function base th_loadstore_width. Fix indentation error. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (BASE): New base_name. * config/riscv/riscv-vector-builtins-bases.h: New function_base. * config/riscv/riscv-vector-builtins-shapes.cc (build_th_loadstore): Define new builtin shapes. (struct th_seg_loadstore_def): Ditto. (build): Ditto. (SHAPE): Ditto. * config/riscv/riscv-vector-builtins-shapes.h: Ditto. * config/riscv/riscv-vector-builtins-types.def (DEF_RVV_TH_INT_TUPLE_OPS): New type. (DEF_RVV_TH_UINT_TUPLE_OPS): Ditto. (vint8m1x2_t): Ditto. (vint8m1x3_t): Ditto. (vint8m1x4_t): Ditto. (vint8m1x5_t): Ditto. (vint8m1x6_t): Ditto. (vint8m1x7_t): Ditto. (vint8m1x8_t): Ditto. (vint8m2x2_t): Ditto. (vint8m2x3_t): Ditto. (vint8m2x4_t): Ditto. (vint8m4x2_t): Ditto. (vint16m1x2_t): Ditto. (vint16m1x3_t): Ditto. (vint16m1x4_t): Ditto. (vint16m1x5_t): Ditto. (vint16m1x6_t): Ditto. (vint16m1x7_t): Ditto. (vint16m1x8_t): Ditto. (vint16m2x2_t): Ditto. (vint16m2x3_t): Ditto. (vint16m2x4_t): Ditto. (vint16m4x2_t): Ditto. (vint32m1x2_t): Ditto. (vint32m1x3_t): Ditto. (vint32m1x4_t): Ditto. (vint32m1x5_t): Ditto. (vint32m1x6_t): Ditto. (vint32m1x7_t): Ditto. (vint32m1x8_t): Ditto. (vint32m2x2_t): Ditto. (vint32m2x3_t): Ditto. (vint32m2x4_t): Ditto. (vint32m4x2_t): Ditto. (vint64m1x2_t): Ditto. (vint64m1x3_t): Ditto. (vint64m1x4_t): Ditto. (vint64m1x5_t): Ditto. (vint64m1x6_t): Ditto. (vint64m1x7_t): Ditto. (vint64m1x8_t): Ditto. (vint64m2x2_t): Ditto. (vint64m2x3_t): Ditto. (vint64m2x4_t): Ditto. (vint64m4x2_t): Ditto. (vuint8m1x2_t): Ditto. (vuint8m1x3_t): Ditto. (vuint8m1x4_t): Ditto. (vuint8m1x5_t): Ditto. (vuint8m1x6_t): Ditto. (vuint8m1x7_t): Ditto. (vuint8m1x8_t): Ditto. (vuint8m2x2_t): Ditto. (vuint8m2x3_t): Ditto. (vuint8m2x4_t): Ditto. (vuint8m4x2_t): Ditto. (vuint16m1x2_t): Ditto. (vuint16m1x3_t): Ditto. (vuint16m1x4_t): Ditto. (vuint16m1x5_t): Ditto. (vuint16m1x6_t): Ditto. (vuint16m1x7_t): Ditto. (vuint16m1x8_t): Ditto. (vuint16m2x2_t): Ditto. (vuint16m2x3_t): Ditto. (vuint16m2x4_t): Ditto. (vuint16m4x2_t): Ditto. (vuint32m1x2_t): Ditto. (vuint32m1x3_t): Ditto. (vuint32m1x4_t): Ditto. (vuint32m1x5_t): Ditto. (vuint32m1x6_t): Ditto. (vuint32m1x7_t): Ditto. (vuint32m1x8_t): Ditto. (vuint32m2x2_t): Ditto. (vuint32m2x3_t): Ditto. (vuint32m2x4_t): Ditto. (vuint32m4x2_t): Ditto. (vuint64m1x2_t): Ditto. (vuint64m1x3_t): Ditto. (vuint64m1x4_t): Ditto. (vuint64m1x5_t): Ditto. (vuint64m1x6_t): Ditto. (vuint64m1x7_t): Ditto. (vuint64m1x8_t): Ditto. (vuint64m2x2_t): Ditto. (vuint64m2x3_t): Ditto. (vuint64m2x4_t): Ditto. (vuint64m4x2_t): Ditto. * config/riscv/riscv-vector-builtins.cc (DEF_RVV_TH_INT_TUPLE_OPS): New builtins def. (DEF_RVV_TH_UINT_TUPLE_OPS): Ditto. * config/riscv/t-riscv: Add include for Thead files. * config/riscv/thead-vector-builtins-functions.def (vlsegb): New intrinsics def. (vlsegh): Ditto. (vlsegw): Ditto. (vlsegbu): Ditto. (vlseghu): Ditto. (vlsegwu): Ditto. (vssegb): Ditto. (vssegh): Ditto. (vssegw): Ditto. * config/riscv/thead-vector.md (@pred_th_unit_seg_load<vlmem_op_attr><mode>): New RTL mode. (@pred_th_unit_seg_store<vlmem_op_attr><mode>): Ditto. * config/riscv/thead.cc (th_asm_output_opcode): Implement Thead function to add assembler insn code prefix/suffix. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/vlseg-vsseg.c: New test. * gcc.target/riscv/rvv/xtheadvector/vlsegu-vsseg.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 132 ++++++++++------- .../riscv/riscv-vector-builtins-bases.h | 9 ++ .../riscv/riscv-vector-builtins-shapes.cc | 75 +++++++++- .../riscv/riscv-vector-builtins-shapes.h | 2 + .../riscv/riscv-vector-builtins-types.def | 104 ++++++++++++++ gcc/config/riscv/riscv-vector-builtins.cc | 43 ++++++ gcc/config/riscv/t-riscv | 4 + .../riscv/thead-vector-builtins-functions.def | 9 ++ gcc/config/riscv/thead-vector.md | 133 ++++++++++++++++++ gcc/config/riscv/thead.cc | 60 +++++--- .../riscv/rvv/xtheadvector/vlseg-vsseg.c | 118 ++++++++++++++++ .../riscv/rvv/xtheadvector/vlsegu-vsseg.c | 116 +++++++++++++++ 12 files changed, 733 insertions(+), 72 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseg-vsseg.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegu-vsseg.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index bf5172c6e04..4133a7846c4 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2136,7 +2136,7 @@ public: * th.vl(b/h/w)[u].v/th.vs(b/h/w)[u].v/th.vls(b/h/w)[u].v/th.vss(b/h/w)[u].v/ * th.vlx(b/h/w)[u].v/th.vs[u]x(b/h/w).v * codegen. */ -template<bool STORE_P, lst_type LST_TYPE, int UNSPEC> +template<bool STORE_P, lst_type LST_TYPE, bool SEG_P, int UNSPEC> class th_loadstore_width : public function_base { public: @@ -2161,33 +2161,45 @@ public: rtx expand (function_expander &e) const override { gcc_assert (TARGET_XTHEADVECTOR); - if (LST_TYPE == LST_INDEXED) - { - if (STORE_P) - return e.use_exact_insn ( - code_for_pred_indexed_store_width (UNSPEC, UNSPEC, - e.vector_mode ())); - else - return e.use_exact_insn ( - code_for_pred_indexed_load_width (UNSPEC, e.vector_mode ())); - } - else if (LST_TYPE == LST_STRIDED) + if (!SEG_P) { - if (STORE_P) - return e.use_contiguous_store_insn ( - code_for_pred_strided_store_width (UNSPEC, e.vector_mode ())); + if (LST_TYPE == LST_INDEXED) + { + if (STORE_P) + return e.use_exact_insn ( + code_for_pred_indexed_store_width (UNSPEC, UNSPEC, + e.vector_mode ())); + else + return e.use_exact_insn ( + code_for_pred_indexed_load_width (UNSPEC, e.vector_mode ())); + } + else if (LST_TYPE == LST_STRIDED) + { + if (STORE_P) + return e.use_contiguous_store_insn ( + code_for_pred_strided_store_width (UNSPEC, e.vector_mode ())); + else + return e.use_contiguous_load_insn ( + code_for_pred_strided_load_width (UNSPEC, e.vector_mode ())); + } else - return e.use_contiguous_load_insn ( - code_for_pred_strided_load_width (UNSPEC, e.vector_mode ())); + { + if (STORE_P) + return e.use_contiguous_store_insn ( + code_for_pred_store_width (UNSPEC, e.vector_mode ())); + else + return e.use_contiguous_load_insn ( + code_for_pred_mov_width (UNSPEC, e.vector_mode ())); + } } else { if (STORE_P) - return e.use_contiguous_store_insn ( - code_for_pred_store_width (UNSPEC, e.vector_mode ())); + return e.use_exact_insn ( + code_for_pred_th_unit_seg_store (UNSPEC, e.vector_mode ())); else - return e.use_contiguous_load_insn ( - code_for_pred_mov_width (UNSPEC, e.vector_mode ())); + return e.use_exact_insn ( + code_for_pred_th_unit_seg_load (UNSPEC, e.vector_mode ())); } } }; @@ -2725,37 +2737,46 @@ static CONSTEXPR const seg_indexed_load<UNSPEC_ORDERED> vloxseg_obj; static CONSTEXPR const seg_indexed_store<UNSPEC_UNORDERED> vsuxseg_obj; static CONSTEXPR const seg_indexed_store<UNSPEC_ORDERED> vsoxseg_obj; static CONSTEXPR const vlsegff vlsegff_obj; -static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, UNSPEC_TH_VLB> vlb_obj; -static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, UNSPEC_TH_VLBU> vlbu_obj; -static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, UNSPEC_TH_VLH> vlh_obj; -static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, UNSPEC_TH_VLHU> vlhu_obj; -static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, UNSPEC_TH_VLW> vlw_obj; -static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, UNSPEC_TH_VLWU> vlwu_obj; -static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, UNSPEC_TH_VLB> vsb_obj; -static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, UNSPEC_TH_VLH> vsh_obj; -static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, UNSPEC_TH_VLW> vsw_obj; -static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, UNSPEC_TH_VLSB> vlsb_obj; -static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, UNSPEC_TH_VLSBU> vlsbu_obj; -static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, UNSPEC_TH_VLSH> vlsh_obj; -static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, UNSPEC_TH_VLSHU> vlshu_obj; -static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, UNSPEC_TH_VLSW> vlsw_obj; -static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, UNSPEC_TH_VLSWU> vlswu_obj; -static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, UNSPEC_TH_VLSB> vssb_obj; -static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, UNSPEC_TH_VLSH> vssh_obj; -static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, UNSPEC_TH_VLSW> vssw_obj; -static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, UNSPEC_TH_VLXB> vlxb_obj; -static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, UNSPEC_TH_VLXBU> vlxbu_obj; -static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, UNSPEC_TH_VLXH> vlxh_obj; -static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, UNSPEC_TH_VLXHU> vlxhu_obj; -static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, UNSPEC_TH_VLXW> vlxw_obj; -static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, UNSPEC_TH_VLXWU> vlxwu_obj; -static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, UNSPEC_TH_VLXB> vsxb_obj; -static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, UNSPEC_TH_VLXH> vsxh_obj; -static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, UNSPEC_TH_VLXW> vsxw_obj; -static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, UNSPEC_TH_VSUXB> vsuxb_obj; -static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, UNSPEC_TH_VSUXH> vsuxh_obj; -static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, UNSPEC_TH_VSUXW> vsuxw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, false, UNSPEC_TH_VLB> vlb_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, false, UNSPEC_TH_VLBU> vlbu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, false, UNSPEC_TH_VLH> vlh_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, false, UNSPEC_TH_VLHU> vlhu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, false, UNSPEC_TH_VLW> vlw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, false, UNSPEC_TH_VLWU> vlwu_obj; +static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, false, UNSPEC_TH_VLB> vsb_obj; +static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, false, UNSPEC_TH_VLH> vsh_obj; +static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, false, UNSPEC_TH_VLW> vsw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, false, UNSPEC_TH_VLSB> vlsb_obj; +static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, false, UNSPEC_TH_VLSBU> vlsbu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, false, UNSPEC_TH_VLSH> vlsh_obj; +static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, false, UNSPEC_TH_VLSHU> vlshu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, false, UNSPEC_TH_VLSW> vlsw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, false, UNSPEC_TH_VLSWU> vlswu_obj; +static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, false, UNSPEC_TH_VLSB> vssb_obj; +static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, false, UNSPEC_TH_VLSH> vssh_obj; +static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, false, UNSPEC_TH_VLSW> vssw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, false, UNSPEC_TH_VLXB> vlxb_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, false, UNSPEC_TH_VLXBU> vlxbu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, false, UNSPEC_TH_VLXH> vlxh_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, false, UNSPEC_TH_VLXHU> vlxhu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, false, UNSPEC_TH_VLXW> vlxw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, false, UNSPEC_TH_VLXWU> vlxwu_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, false, UNSPEC_TH_VLXB> vsxb_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, false, UNSPEC_TH_VLXH> vsxh_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, false, UNSPEC_TH_VLXW> vsxw_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, false, UNSPEC_TH_VSUXB> vsuxb_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, false, UNSPEC_TH_VSUXH> vsuxh_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, false, UNSPEC_TH_VSUXW> vsuxw_obj; static CONSTEXPR const th_extract vext_x_v_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGB> vlsegb_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGH> vlsegh_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGW> vlsegw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGBU> vlsegbu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGHU> vlseghu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGWU> vlsegwu_obj; +static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGB> vssegb_obj; +static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGH> vssegh_obj; +static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGW> vssegw_obj; /* Crypto Vector */ static CONSTEXPR const vandn vandn_obj; @@ -3086,6 +3107,15 @@ BASE (vsuxb) BASE (vsuxh) BASE (vsuxw) BASE (vext_x_v) +BASE (vlsegb) +BASE (vlsegh) +BASE (vlsegw) +BASE (vlsegbu) +BASE (vlseghu) +BASE (vlsegwu) +BASE (vssegb) +BASE (vssegh) +BASE (vssegw) /* Crypto vector */ BASE (vandn) BASE (vbrev) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index 4a2f6b4bdf7..be62fb1781e 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -317,6 +317,15 @@ extern const function_base *const vsuxb; extern const function_base *const vsuxh; extern const function_base *const vsuxw; extern const function_base *const vext_x_v; +extern const function_base *const vlsegb; +extern const function_base *const vlsegh; +extern const function_base *const vlsegw; +extern const function_base *const vlsegbu; +extern const function_base *const vlseghu; +extern const function_base *const vlsegwu; +extern const function_base *const vssegb; +extern const function_base *const vssegh; +extern const function_base *const vssegw; /* Below function_base are Vectro Crypto*/ extern const function_base *const vandn; extern const function_base *const vbrev; diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index b855d4c5fa5..4aa8824451d 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -259,6 +259,11 @@ build_th_loadstore (function_builder &b, const function_group_info &group, return; tree type = builtin_types[group.ops_infos.types[vec_type_idx].index].vector; + machine_mode mode = TYPE_MODE (type); + if (riscv_v_ext_tuple_mode_p (mode)) + type = group.ops_infos.ret.get_tuple_subpart_type + (group.ops_infos.types[vec_type_idx].index); + if (strstr (group.base_name, "l") && strstr (group.base_name, "u") && !TYPE_UNSIGNED (TREE_TYPE (type))) @@ -269,7 +274,6 @@ build_th_loadstore (function_builder &b, const function_group_info &group, && TYPE_UNSIGNED (TREE_TYPE (type))) return; - machine_mode mode = TYPE_MODE (type); int sew = GET_MODE_BITSIZE (GET_MODE_INNER (mode)); if (strstr (group.base_name, "h") && sew == 8) return; @@ -1389,6 +1393,74 @@ struct sf_vcix_def : public build_base }; +/* th_seg_loadstore_def class. */ +struct th_seg_loadstore_def : public build_base { +void build (function_builder &b, + const function_group_info &group) const override + { + for (unsigned int pred_idx = 0; group.preds[pred_idx] != NUM_PRED_TYPES; + ++pred_idx) + { + for (unsigned int vec_type_idx = 0; + group.ops_infos.types[vec_type_idx].index != NUM_VECTOR_TYPES; + ++vec_type_idx) + { + build_th_loadstore (b, group, pred_idx, vec_type_idx); + } + } + } + + char *get_name (function_builder &b, const function_instance &instance, + bool overloaded_p) const override { + /* Return nullptr if it can not be overloaded. */ + if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred)) + return nullptr; + + if (strstr (instance.base_name, "vlseg")) + b.append_name ("__riscv_th_vlseg"); + else if (strstr (instance.base_name, "vsseg")) + b.append_name ("__riscv_th_vsseg"); + else + gcc_unreachable (); + + tree type = builtin_types[instance.type.index].vector; + machine_mode mode = TYPE_MODE (type); + + int nf = get_nf (mode); + /* vop --> vop<nf>. */ + b.append_nf (nf); + + /* vop<nf> --> vop<nf><b/h/w>. */ + if (strstr (instance.base_name, "segb")) + b.append_name ("b"); + else if (strstr (instance.base_name, "segh")) + b.append_name ("h"); + else if (strstr (instance.base_name, "segw")) + b.append_name ("w"); + else + gcc_unreachable (); + + if (strstr (instance.base_name, "l") + && TYPE_UNSIGNED (builtin_types[instance.type.index].scalar)) + b.append_name ("u"); + + if (!overloaded_p) + { + /* vop<nf><b/h/w> --> vop<nf><b/h/w>_v. */ + b.append_name (operand_suffixes[instance.op_info->op]); + /* vop<nf><b/h/w>_v --> vop<nf><b/h/w>_v_<type>. */ + b.append_name (type_suffixes[instance.type.index].vector); + } + + /* According to rvv-intrinsic-doc, it does not add "_m" suffix + for vop_m C++ overloaded API. */ + if (overloaded_p && instance.pred == PRED_TYPE_m) + return b.finish_name (); + b.append_name (predication_suffixes[instance.pred]); + return b.finish_name (); + } +}; + SHAPE(vsetvl, vsetvl) SHAPE(vsetvl, vsetvlmax) SHAPE(loadstore, loadstore) @@ -1427,4 +1499,5 @@ SHAPE (sf_vqmacc, sf_vqmacc) SHAPE (sf_vfnrclip, sf_vfnrclip) SHAPE(sf_vcix_se, sf_vcix_se) SHAPE(sf_vcix, sf_vcix) +SHAPE (th_seg_loadstore, th_seg_loadstore) } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h b/gcc/config/riscv/riscv-vector-builtins-shapes.h index 2f2636ee386..f432d9edbff 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.h +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.h @@ -64,6 +64,8 @@ extern const function_shape *const sf_vqmacc; extern const function_shape *const sf_vfnrclip; extern const function_shape *const sf_vcix_se; extern const function_shape *const sf_vcix; +/* Xtheadvector extension. */ +extern const function_shape *const th_seg_loadstore; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def index ade6644b56e..7b6cbb118bc 100644 --- a/gcc/config/riscv/riscv-vector-builtins-types.def +++ b/gcc/config/riscv/riscv-vector-builtins-types.def @@ -381,6 +381,18 @@ along with GCC; see the file COPYING3. If not see #define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE) #endif +/* Use "DEF_RVV_TH_INT_TUPLE_OPS" macro include all signed tuple types + which will be iterated and registered as intrinsic functions. */ +#ifndef DEF_RVV_TH_INT_TUPLE_OPS +#define DEF_RVV_TH_INT_TUPLE_OPS(TYPE, REQUIRE) +#endif + +/* Use "DEF_RVV_TH_UINT_TUPLE_OPS" macro include all unsigned tuple types + which will be iterated and registered as intrinsic functions. */ +#ifndef DEF_RVV_TH_UINT_TUPLE_OPS +#define DEF_RVV_TH_UINT_TUPLE_OPS(TYPE, REQUIRE) +#endif + DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint8mf4_t, 0) DEF_RVV_I_OPS (vint8mf2_t, 0) @@ -1501,6 +1513,96 @@ DEF_RVV_X2_WU_OPS (vuint32m1_t, 0) DEF_RVV_X2_WU_OPS (vuint32m2_t, 0) DEF_RVV_X2_WU_OPS (vuint32m4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x3_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x5_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x6_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x7_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m1x8_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m2x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m2x3_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m2x4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint8m4x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x3_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x5_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x6_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x7_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m1x8_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m2x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m2x3_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m2x4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint16m4x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x3_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x5_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x6_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x7_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m1x8_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m2x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m2x3_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m2x4_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint32m4x2_t, 0) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m1x8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m2x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m2x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m2x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_INT_TUPLE_OPS (vint64m4x2_t, RVV_REQUIRE_ELEN_64) + +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x3_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x4_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x5_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x6_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x7_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m1x8_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m2x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m2x3_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m2x4_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint8m4x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x3_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x4_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x5_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x6_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x7_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m1x8_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m2x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m2x3_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m2x4_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint16m4x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x3_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x4_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x5_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x6_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x7_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m1x8_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m2x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m2x3_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m2x4_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint32m4x2_t, 0) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m1x8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m2x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m2x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m2x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TH_UINT_TUPLE_OPS (vuint64m4x2_t, RVV_REQUIRE_ELEN_64) + #undef DEF_RVV_I_OPS #undef DEF_RVV_U_OPS #undef DEF_RVV_F_OPS @@ -1559,3 +1661,5 @@ DEF_RVV_X2_WU_OPS (vuint32m4_t, 0) #undef DEF_RVV_XFQF_OPS #undef DEF_RVV_X2_U_OPS #undef DEF_RVV_X2_WU_OPS +#undef DEF_RVV_TH_INT_TUPLE_OPS +#undef DEF_RVV_TH_UINT_TUPLE_OPS diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index f3c706bfba9..00105749cad 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -571,6 +571,25 @@ static const rvv_type_info xfqf_ops[] = { #include "riscv-vector-builtins-types.def" {NUM_VECTOR_TYPES, 0}}; +/* A list of Int Tuple types will be registered for intrinsic functions. */ +static const rvv_type_info th_tuple_int_ops[] = { +#define DEF_RVV_TH_INT_TUPLE_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#define DEF_RVV_TH_UINT_TUPLE_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + +/* A list of Int Tuple types will be registered for intrinsic functions. */ +static const rvv_type_info th_tuple_sint_ops[] = { +#define DEF_RVV_TH_INT_TUPLE_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + +/* A list of Int Tuple types will be registered for intrinsic functions. */ +static const rvv_type_info th_tuple_uint_ops[] = { +#define DEF_RVV_TH_UINT_TUPLE_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + static CONSTEXPR const rvv_arg_type_info rvv_arg_type_info_end = rvv_arg_type_info (NUM_BASE_TYPES); @@ -3343,6 +3362,30 @@ static CONSTEXPR const rvv_op_info sf_vc_v_fvw_ops rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */ sf_vc_fvw_args /* Args */}; +/* A static operand information for vector_type func (const scalar_type *) + * function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_sint_scalar_const_ptr_ops + = {th_tuple_sint_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector),/* Return type */ + scalar_const_ptr_args /* Args */}; + +/* A static operand information for vector_type func (const scalar_type *) + * function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_uint_scalar_const_ptr_ops + = {th_tuple_uint_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector),/* Return type */ + scalar_const_ptr_args /* Args */}; + +/* A static operand information for void func (scalar_type *, vector_type) + * function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_int_scalar_ptr_ops + = {th_tuple_int_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + scalar_ptr_args /* Args */}; + /* A list of all RVV base function types. */ static CONSTEXPR const function_type_info function_types[] = { #define DEF_RVV_TYPE_INDEX( \ diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 12e2b6e999a..90d01380d70 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -11,6 +11,7 @@ riscv-builtins.o: $(srcdir)/config/riscv/riscv-builtins.cc $(CONFIG_H) \ $(srcdir)/config/riscv/riscv-ftypes.def \ $(srcdir)/config/riscv/riscv-vector-builtins-types.def \ $(srcdir)/config/riscv/sifive-vector-builtins-functions.def \ + $(srcdir)/config/riscv/thead-vector-builtins-functions.def \ $(srcdir)/config/riscv/riscv-modes.def \ $(srcdir)/config/riscv/riscv-cmo.def \ $(srcdir)/config/riscv/riscv-scalar-crypto.def @@ -28,6 +29,7 @@ riscv-vector-builtins.o: $(srcdir)/config/riscv/riscv-vector-builtins.cc \ $(srcdir)/config/riscv/sifive-vector-builtins-bases.h \ $(srcdir)/config/riscv/riscv-vector-builtins-types.def \ $(srcdir)/config/riscv/sifive-vector-builtins-functions.def \ + $(srcdir)/config/riscv/thead-vector-builtins-functions.def \ $(RISCV_BUILTINS_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vector-builtins.cc @@ -178,6 +180,8 @@ $(srcdir)/config/riscv/riscv-vector-builtins.def: riscv-vector-type-indexer.gen. $(srcdir)/config/riscv/riscv-vector-builtins.h: $(srcdir)/config/riscv/riscv-vector-builtins.def $(srcdir)/config/riscv/sifive-vector-builtins-functions.def: riscv-vector-type-indexer.gen.def $(srcdir)/config/riscv/riscv-vector-builtins.h: $(srcdir)/config/riscv/sifive-vector-builtins-functions.def +$(srcdir)/config/riscv/thead-vector-builtins-functions.def: riscv-vector-type-indexer.gen.def +$(srcdir)/config/riscv/riscv-vector-builtins.h: $(srcdir)/config/riscv/thead-vector-builtins-functions.def riscv-vector-type-indexer.gen.def: s-riscv-vector-type-indexer.gen.defs ; @true diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def b/gcc/config/riscv/thead-vector-builtins-functions.def index fd3ba29bae9..c7702f4be48 100644 --- a/gcc/config/riscv/thead-vector-builtins-functions.def +++ b/gcc/config/riscv/thead-vector-builtins-functions.def @@ -34,6 +34,15 @@ DEF_RVV_FUNCTION (vsuxb, th_indexed_loadstore_width, none_m_preds, all_v_scalar_ DEF_RVV_FUNCTION (vsuxh, th_indexed_loadstore_width, none_m_preds, all_v_scalar_ptr_index_ops) DEF_RVV_FUNCTION (vsuxw, th_indexed_loadstore_width, none_m_preds, all_v_scalar_ptr_index_ops) DEF_RVV_FUNCTION (vext_x_v, th_extract, none_preds, iu_x_s_u_ops) +DEF_RVV_FUNCTION (vlsegb, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegh, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegw, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegbu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlseghu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegwu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vssegb, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ops) +DEF_RVV_FUNCTION (vssegh, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ops) +DEF_RVV_FUNCTION (vssegw, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ops) #undef REQUIRED_EXTENSIONS #undef DEF_RVV_FUNCTION diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md index 5a02debdd20..007682c3af5 100644 --- a/gcc/config/riscv/thead-vector.md +++ b/gcc/config/riscv/thead-vector.md @@ -25,6 +25,13 @@ (define_c_enum "unspec" [ UNSPEC_TH_VSUXW UNSPEC_TH_VWLDST + + UNSPEC_TH_VLSEGB + UNSPEC_TH_VLSEGBU + UNSPEC_TH_VLSEGH + UNSPEC_TH_VLSEGHU + UNSPEC_TH_VLSEGW + UNSPEC_TH_VLSEGWU ]) (define_int_iterator UNSPEC_TH_VLMEM_OP [ @@ -45,6 +52,12 @@ (define_int_iterator UNSPEC_TH_VLXMEM_OP [ UNSPEC_TH_VLXW UNSPEC_TH_VLXWU ]) +(define_int_iterator UNSPEC_TH_VLSEGMEM_OP[ + UNSPEC_TH_VLSEGB UNSPEC_TH_VLSEGBU + UNSPEC_TH_VLSEGH UNSPEC_TH_VLSEGHU + UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU +]) + (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLB "b") (UNSPEC_TH_VLBU "bu") (UNSPEC_TH_VLH "h") (UNSPEC_TH_VLHU "hu") @@ -58,6 +71,9 @@ (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VSUXB "b") (UNSPEC_TH_VSUXH "h") (UNSPEC_TH_VSUXW "w") + (UNSPEC_TH_VLSEGB "b") (UNSPEC_TH_VLSEGBU "bu") + (UNSPEC_TH_VLSEGH "h") (UNSPEC_TH_VLSEGHU "hu") + (UNSPEC_TH_VLSEGW "w") (UNSPEC_TH_VLSEGWU "wu") ]) (define_int_attr vlmem_order_attr [ @@ -90,9 +106,89 @@ (define_int_iterator UNSPEC_TH_VSXMEM_OP [ UNSPEC_TH_VSUXW ]) +(define_int_iterator UNSPEC_TH_VSSEGMEM_OP[ + UNSPEC_TH_VLSEGB + UNSPEC_TH_VLSEGH + UNSPEC_TH_VLSEGW +]) + (define_mode_iterator V_VLS_VT [V VLS VT]) (define_mode_iterator V_VB_VLS_VT [V VB VLS VT]) +(define_mode_attr th_width[ + (RVVM8QI "b") (RVVM4QI "b") (RVVM2QI "b") (RVVM1QI "b") (RVVMF2QI "b") (RVVMF4QI "b") (RVVMF8QI "b") + + (RVVM8HI "h") (RVVM4HI "h") (RVVM2HI "h") (RVVM1HI "h") (RVVMF2HI "h") (RVVMF4HI "h") + + (RVVM8BF "h") (RVVM4BF "h") (RVVM2BF "h") (RVVM1BF "h") (RVVMF2BF "h") (RVVMF4BF "h") + + (RVVM8HF "h") (RVVM4HF "h") (RVVM2HF "h") (RVVM1HF "h") (RVVMF2HF "h") (RVVMF4HF "h") + + (RVVM8SI "w") (RVVM4SI "w") (RVVM2SI "w") (RVVM1SI "w") (RVVMF2SI "w") + + (RVVM8SF "w") (RVVM4SF "w") (RVVM2SF "w") (RVVM1SF "w") (RVVMF2SF "w") + + (RVVM1x8QI "b") (RVVMF2x8QI "b") (RVVMF4x8QI "b") (RVVMF8x8QI "b") + (RVVM1x7QI "b") (RVVMF2x7QI "b") (RVVMF4x7QI "b") (RVVMF8x7QI "b") + (RVVM1x6QI "b") (RVVMF2x6QI "b") (RVVMF4x6QI "b") (RVVMF8x6QI "b") + (RVVM1x5QI "b") (RVVMF2x5QI "b") (RVVMF4x5QI "b") (RVVMF8x5QI "b") + (RVVM2x4QI "b") (RVVM1x4QI "b") (RVVMF2x4QI "b") (RVVMF4x4QI "b") (RVVMF8x4QI "b") + (RVVM2x3QI "b") (RVVM1x3QI "b") (RVVMF2x3QI "b") (RVVMF4x3QI "b") (RVVMF8x3QI "b") + (RVVM4x2QI "b") (RVVM2x2QI "b") (RVVM1x2QI "b") (RVVMF2x2QI "b") (RVVMF4x2QI "b") (RVVMF8x2QI "b") + + (RVVM1x8HI "h") (RVVMF2x8HI "h") (RVVMF4x8HI "h") + (RVVM1x7HI "h") (RVVMF2x7HI "h") (RVVMF4x7HI "h") + (RVVM1x6HI "h") (RVVMF2x6HI "h") (RVVMF4x6HI "h") + (RVVM1x5HI "h") (RVVMF2x5HI "h") (RVVMF4x5HI "h") + (RVVM2x4HI "h") (RVVM1x4HI "h") (RVVMF2x4HI "h") (RVVMF4x4HI "h") + (RVVM2x3HI "h") (RVVM1x3HI "h") (RVVMF2x3HI "h") (RVVMF4x3HI "h") + (RVVM4x2HI "h") (RVVM2x2HI "h") (RVVM1x2HI "h") (RVVMF2x2HI "h") (RVVMF4x2HI "h") + + (RVVM1x8BF "h") (RVVMF2x8BF "h") (RVVMF4x8BF "h") + (RVVM1x7BF "h") (RVVMF2x7BF "h") (RVVMF4x7BF "h") + (RVVM1x6BF "h") (RVVMF2x6BF "h") (RVVMF4x6BF "h") + (RVVM1x5BF "h") (RVVMF2x5BF "h") (RVVMF4x5BF "h") + (RVVM2x4BF "h") (RVVM1x4BF "h") (RVVMF2x4BF "h") (RVVMF4x4BF "h") + (RVVM2x3BF "h") (RVVM1x3BF "h") (RVVMF2x3BF "h") (RVVMF4x3BF "h") + (RVVM4x2BF "h") (RVVM2x2BF "h") (RVVM1x2BF "h") (RVVMF2x2BF "h") (RVVMF4x2BF "h") + + (RVVM1x8HF "h") (RVVMF2x8HF "h") (RVVMF4x8HF "h") + (RVVM1x7HF "h") (RVVMF2x7HF "h") (RVVMF4x7HF "h") + (RVVM1x6HF "h") (RVVMF2x6HF "h") (RVVMF4x6HF "h") + (RVVM1x5HF "h") (RVVMF2x5HF "h") (RVVMF4x5HF "h") + (RVVM2x4HF "h") (RVVM1x4HF "h") (RVVMF2x4HF "h") (RVVMF4x4HF "h") + (RVVM2x3HF "h") (RVVM1x3HF "h") (RVVMF2x3HF "h") (RVVMF4x3HF "h") + (RVVM4x2HF "h") (RVVM2x2HF "h") (RVVM1x2HF "h") (RVVMF2x2HF "h") (RVVMF4x2HF "h") + + (RVVM1x8SI "w") (RVVMF2x8SI "w") + (RVVM1x7SI "w") (RVVMF2x7SI "w") + (RVVM1x6SI "w") (RVVMF2x6SI "w") + (RVVM1x5SI "w") (RVVMF2x5SI "w") + (RVVM2x4SI "w") (RVVM1x4SI "w") (RVVMF2x4SI "w") + (RVVM2x3SI "w") (RVVM1x3SI "w") (RVVMF2x3SI "w") + (RVVM4x2SI "w") (RVVM2x2SI "w") (RVVM1x2SI "w") (RVVMF2x2SI "w") + + (RVVM1x8SF "w") (RVVMF2x8SF "w") + (RVVM1x7SF "w") (RVVMF2x7SF "w") + (RVVM1x6SF "w") (RVVMF2x6SF "w") + (RVVM1x5SF "w") (RVVMF2x5SF "w") + (RVVM2x4SF "w") (RVVM1x4SF "w") (RVVMF2x4SF "w") + (RVVM2x3SF "w") (RVVM1x3SF "w") (RVVMF2x3SF "w") + (RVVM4x2SF "w") (RVVM2x2SF "w") (RVVM1x2SF "w") (RVVMF2x2SF "w") + + ;; VLS modes. + (V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b") (V32QI "b") (V64QI "b") (V128QI "b") (V256QI "b") (V512QI "b") + (V1024QI "b") (V2048QI "b") (V4096QI "b") + (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") (V16HI "h") (V32HI "h") (V64HI "h") (V128HI "h") (V256HI "h") + (V512HI "h") (V1024HI "h") (V2048HI "h") + (V1SI "w") (V2SI "w") (V4SI "w") (V8SI "w") (V16SI "w") (V32SI "w") (V64SI "w") (V128SI "w") (V256SI "w") + (V512SI "w") (V1024SI "w") + (V1HF "h") (V2HF "h") (V4HF "h") (V8HF "h") (V16HF "h") (V32HF "h") (V64HF "h") (V128HF "h") (V256HF "h") + (V512HF "h") (V1024HF "h") (V2048HF "h") + (V1SF "w") (V2SF "w") (V4SF "w") (V8SF "w") (V16SF "w") (V32SF "w") (V64SF "w") (V128SF "w") (V256SF "w") + (V512SF "w") (V1024SF "w") +]) + (define_split [(set (match_operand:V_VB_VLS_VT 0 "reg_or_mem_operand") (match_operand:V_VB_VLS_VT 1 "reg_or_mem_operand"))] @@ -350,3 +446,40 @@ (define_insn "*pred_th_extract<mode>" "vext.x.v\t%0,%1,%2" [(set_attr "type" "vimovvx") (set_attr "mode" "<MODE>")]) + +(define_insn "@pred_th_unit_seg_load<vlmem_op_attr><mode>" + [(set (match_operand:VT 0 "register_operand" "=vr, vr, vd") + (if_then_else:VT + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm") + (match_operand 4 "vector_length_operand" " rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i") + (match_operand 6 "const_int_operand" " i, i, i") + (match_operand 7 "const_int_operand" " i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VLSEGMEM_OP) + (unspec:VT + [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ") + (mem:BLK (scratch))] UNSPEC_TH_VLSEGMEM_OP) + (match_operand:VT 2 "vector_merge_operand" " 0, vu, vu")))] + "TARGET_XTHEADVECTOR" + "vlseg<nf><vlmem_op_attr>.v\t%0,(%z3)%p1" + [(set_attr "type" "vlsegde") + (set_attr "mode" "<MODE>")]) + +(define_insn "@pred_th_unit_seg_store<vlmem_op_attr><mode>" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1") + (match_operand 3 "vector_length_operand" " rK") + (match_operand 4 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSSEGMEM_OP) + (match_operand 1 "pmode_reg_or_0_operand" " rJ") + (match_operand:VT 2 "register_operand" " vr") + (mem:BLK (scratch))] UNSPEC_TH_VSSEGMEM_OP))] + "TARGET_XTHEADVECTOR" + "vsseg<nf><vlmem_op_attr>.v\t%2,(%z1)%p0" + [(set_attr "type" "vssegte") + (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/riscv/thead.cc b/gcc/config/riscv/thead.cc index de23e410d4c..628f50d012c 100644 --- a/gcc/config/riscv/thead.cc +++ b/gcc/config/riscv/thead.cc @@ -1059,12 +1059,17 @@ th_asm_output_opcode (FILE *asm_out_file, const char *p) get_attr_type (current_output_insn) == TYPE_VSSEGTE ? fputs ("th.vsseg", asm_out_file) : fputs ("th.vlseg", asm_out_file); - asm_fprintf (asm_out_file, "%c", p[5]); - fputs ("e", asm_out_file); - if (strstr (p, "e8")) - return p+8; + if (strstr (p, "b") || strstr (p, "h") || strstr (p, "w")) + return p+5; else - return p+9; + { + asm_fprintf (asm_out_file, "%c", p[5]); + fputs ("e", asm_out_file); + if (strstr (p, "e8")) + return p+8; + else + return p+9; + } } if (get_attr_type (current_output_insn) == TYPE_VLSEGDS || @@ -1073,36 +1078,51 @@ th_asm_output_opcode (FILE *asm_out_file, const char *p) get_attr_type (current_output_insn) == TYPE_VSSEGTS ? fputs ("th.vssseg", asm_out_file) : fputs ("th.vlsseg", asm_out_file); - asm_fprintf (asm_out_file, "%c", p[6]); - fputs ("e", asm_out_file); - if (strstr (p, "e8")) - return p+9; + if (strstr (p, "b") || strstr (p, "h") || strstr (p, "w")) + return p+6; else - return p+10; + { + asm_fprintf (asm_out_file, "%c", p[6]); + fputs ("e", asm_out_file); + if (strstr (p, "e8")) + return p+9; + else + return p+10; + } } if (get_attr_type (current_output_insn) == TYPE_VLSEGDUX || get_attr_type (current_output_insn) == TYPE_VLSEGDOX) { fputs ("th.vlxseg", asm_out_file); - asm_fprintf (asm_out_file, "%c", p[7]); - fputs ("e", asm_out_file); - if (strstr (p, "ei8")) - return p+11; + if (strstr (p, "b") || strstr (p, "h") || strstr (p, "w")) + return p+6; else - return p+12; + { + asm_fprintf (asm_out_file, "%c", p[7]); + fputs ("e", asm_out_file); + if (strstr (p, "ei8")) + return p+11; + else + return p+12; + } } if (get_attr_type (current_output_insn) == TYPE_VSSEGTUX || get_attr_type (current_output_insn) == TYPE_VSSEGTOX) { fputs ("th.vsxseg", asm_out_file); - asm_fprintf (asm_out_file, "%c", p[7]); - fputs ("e", asm_out_file); - if (strstr (p, "ei8")) - return p+11; + if (strstr (p, "b") || strstr (p, "h") || strstr (p, "w")) + return p+6; else - return p+12; + { + asm_fprintf (asm_out_file, "%c", p[7]); + fputs ("e", asm_out_file); + if (strstr (p, "ei8")) + return p+11; + else + return p+12; + } } if (get_attr_type (current_output_insn) == TYPE_VNSHIFT) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseg-vsseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseg-vsseg.c new file mode 100644 index 00000000000..1911824bbfc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseg-vsseg.c @@ -0,0 +1,118 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" { target rv32 } } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vlseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f1 (void * in, void *out) +{ + vint16m1x2_t v = __riscv_th_vlseg2h_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2h_v_i16m1x2_tu (v, in, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} + +/* +** f2: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vlseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f2 (void * in, void *out, vbool16_t mask) +{ + vint16m1x2_t v = __riscv_th_vlseg2h_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2h_v_i16m1x2_m (mask, in, 4); + vint16m1_t v_0 = __riscv_vget_i16m1(v, 0); + vint16m1_t v_1 = __riscv_vget_i16m1(v, 1); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} + +/* +** f3: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f3 (void * in, void *out, vbool16_t mask) +{ + vint16m1x2_t v = __riscv_th_vlseg2h_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2h_v_i16m1x2_tumu (mask, v, in, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegu-vsseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegu-vsseg.c new file mode 100644 index 00000000000..c3791f7872f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegu-vsseg.c @@ -0,0 +1,116 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" { target rv32 } } + */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vlseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f1(void *in, void *out) { + vuint16m1x2_t v = __riscv_th_vlseg2hu_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2hu_v_u16m1x2_tu(v, in, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} + +/* +** f2: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vlseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f2(void *in, void *out, vbool16_t mask) { + vuint16m1x2_t v = __riscv_th_vlseg2hu_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2hu_v_u16m1x2_m(mask, in, 4); + vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0); + vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} + +/* +** f3: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f3(void *in, void *out, vbool16_t mask) { + vuint16m1x2_t v = __riscv_th_vlseg2hu_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2hu_v_u16m1x2_tumu(mask, v, in, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu(mask, v3_0, v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu(mask, v3_1, v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu(mask, v4_0, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu(mask, v4_1, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} -- 2.47.1