From: Yunze Zhu <[email protected]>
This commit add support for xtheadvector-specific fault-only-first segment
load/store intrinsics with b/h/w suffix. We also defined enum to be used
in thead-vector.md
https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
V2:
Change to reuse existed thead function base th_loadstore_width.
V3:
Define new mode iterator and remove unnecessary insn pattern
to reduce amount of pattern intrduced.
gcc/ChangeLog:
* config/riscv/riscv-vector-builtins-bases.cc (BASE): New base_name.
* config/riscv/riscv-vector-builtins-bases.h: New function_base.
* config/riscv/riscv-vector-builtins-shapes.cc (build): Define new
builtin shapes.
* config/riscv/thead-vector-builtins-functions.def (vlsegbff): New
intrinsics def.
(vlseghff): Ditto.
(vlsegwff): Ditto.
(vlsegbuff): Ditto.
(vlseghuff): Ditto.
(vlsegwuff): Ditto.
* config/riscv/thead-vector.md: (UNSPEC_TH_VLSEGBFF): New.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c: New test.
* gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc | 12 ++
.../riscv/riscv-vector-builtins-bases.h | 6 +
.../riscv/riscv-vector-builtins-shapes.cc | 10 +-
.../riscv/thead-vector-builtins-functions.def | 6 +
gcc/config/riscv/thead-vector.md | 13 ++
.../riscv/rvv/xtheadvector/vlsegff-vsseg.c | 118 ++++++++++++++++++
.../riscv/rvv/xtheadvector/vlseguff-vsseg.c | 115 +++++++++++++++++
7 files changed, 277 insertions(+), 3 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 3220fb4fd9c..5a46f62ce88 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -2816,6 +2816,12 @@ static CONSTEXPR const th_loadstore_width<false,
LST_INDEXED, true, UNSPEC_TH_VL
static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true,
UNSPEC_TH_VLXSEGB> vsxsegb_obj;
static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true,
UNSPEC_TH_VLXSEGH> vsxsegh_obj;
static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true,
UNSPEC_TH_VLXSEGW> vsxsegw_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGBFF> vlsegbff_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGHFF> vlseghff_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGWFF> vlsegwff_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGBUFF> vlsegbuff_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGHUFF> vlseghuff_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGWUFF> vlsegwuff_obj;
/* Crypto Vector */
static CONSTEXPR const vandn vandn_obj;
@@ -3173,6 +3179,12 @@ BASE (vlxsegwu)
BASE (vsxsegb)
BASE (vsxsegh)
BASE (vsxsegw)
+BASE (vlsegbff)
+BASE (vlseghff)
+BASE (vlsegwff)
+BASE (vlsegbuff)
+BASE (vlseghuff)
+BASE (vlsegwuff)
/* Crypto vector */
BASE (vandn)
BASE (vbrev)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 9a8d378019e..eedfff8a922 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -344,6 +344,12 @@ extern const function_base *const vlxsegwu;
extern const function_base *const vsxsegb;
extern const function_base *const vsxsegh;
extern const function_base *const vsxsegw;
+extern const function_base *const vlsegbff;
+extern const function_base *const vlseghff;
+extern const function_base *const vlsegwff;
+extern const function_base *const vlsegbuff;
+extern const function_base *const vlseghuff;
+extern const function_base *const vlsegwuff;
/* Below function_base are Vectro Crypto*/
extern const function_base *const vandn;
extern const function_base *const vbrev;
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index fec333df5ab..42b93407680 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -1452,9 +1452,13 @@ void build (function_builder &b,
else
gcc_unreachable ();
- if (strstr (instance.base_name, "l")
- && TYPE_UNSIGNED (builtin_types[instance.type.index].scalar))
- b.append_name ("u");
+ if (strstr (instance.base_name, "l"))
+ {
+ if (TYPE_UNSIGNED (builtin_types[instance.type.index].scalar))
+ b.append_name ("u");
+ if (strstr (instance.base_name, "ff"))
+ b.append_name ("ff");
+ }
if (!overloaded_p)
{
diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def
b/gcc/config/riscv/thead-vector-builtins-functions.def
index 2d28b4eb33e..ba8172fa337 100644
--- a/gcc/config/riscv/thead-vector-builtins-functions.def
+++ b/gcc/config/riscv/thead-vector-builtins-functions.def
@@ -61,6 +61,12 @@ DEF_RVV_FUNCTION (vlxsegwu, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar
DEF_RVV_FUNCTION (vsxsegb, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_index_ops)
DEF_RVV_FUNCTION (vsxsegh, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_index_ops)
DEF_RVV_FUNCTION (vsxsegw, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_index_ops)
+DEF_RVV_FUNCTION (vlsegbff, th_seg_loadstore, full_preds,
th_tuple_v_sint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (vlseghff, th_seg_loadstore, full_preds,
th_tuple_v_sint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (vlsegwff, th_seg_loadstore, full_preds,
th_tuple_v_sint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (vlsegbuff, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (vlseghuff, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (vlsegwuff, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_const_ptr_ops)
#undef REQUIRED_EXTENSIONS
#undef DEF_RVV_FUNCTION
diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md
index 661b90c9c08..463b8d0ae9c 100644
--- a/gcc/config/riscv/thead-vector.md
+++ b/gcc/config/riscv/thead-vector.md
@@ -46,6 +46,13 @@ (define_c_enum "unspec" [
UNSPEC_TH_VLXSEGHU
UNSPEC_TH_VLXSEGW
UNSPEC_TH_VLXSEGWU
+
+ UNSPEC_TH_VLSEGBFF
+ UNSPEC_TH_VLSEGBUFF
+ UNSPEC_TH_VLSEGHFF
+ UNSPEC_TH_VLSEGHUFF
+ UNSPEC_TH_VLSEGWFF
+ UNSPEC_TH_VLSEGWUFF
])
(define_int_iterator UNSPEC_TH_VLMEM_OP [
@@ -70,6 +77,9 @@ (define_int_iterator UNSPEC_TH_VLSEGMEM_OP[
UNSPEC_TH_VLSEGB UNSPEC_TH_VLSEGBU
UNSPEC_TH_VLSEGH UNSPEC_TH_VLSEGHU
UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU
+ UNSPEC_TH_VLSEGBFF UNSPEC_TH_VLSEGBUFF
+ UNSPEC_TH_VLSEGHFF UNSPEC_TH_VLSEGHUFF
+ UNSPEC_TH_VLSEGWFF UNSPEC_TH_VLSEGWUFF
])
(define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[
@@ -106,6 +116,9 @@ (define_int_attr vlmem_op_attr [
(UNSPEC_TH_VLXSEGB "b") (UNSPEC_TH_VLXSEGBU "bu")
(UNSPEC_TH_VLXSEGH "h") (UNSPEC_TH_VLXSEGHU "hu")
(UNSPEC_TH_VLXSEGW "w") (UNSPEC_TH_VLXSEGWU "wu")
+ (UNSPEC_TH_VLSEGBFF "bff") (UNSPEC_TH_VLSEGBUFF "buff")
+ (UNSPEC_TH_VLSEGHFF "hff") (UNSPEC_TH_VLSEGHUFF "huff")
+ (UNSPEC_TH_VLSEGWFF "wff") (UNSPEC_TH_VLSEGWUFF "wuff")
])
(define_int_attr vlmem_order_attr [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
new file mode 100644
index 00000000000..7aec80bbeaa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
@@ -0,0 +1,118 @@
+/* { dg-do compile { target { rv32 } } } */
+/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_th_vector.h"
+
+/*
+** f1:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+** ret
+*/
+void f1 (void * in, void *out)
+{
+ vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4);
+ vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tu (v, in, 4);
+ vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+ vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+ vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4);
+ vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4);
+ vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4);
+ vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4);
+ vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4);
+}
+
+/*
+** f2:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+** ret
+*/
+void f2 (void * in, void *out, vbool16_t mask)
+{
+ vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4);
+ vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_m (mask, in, 4);
+ vint16m1_t v_0 = __riscv_vget_i16m1(v, 0);
+ vint16m1_t v_1 = __riscv_vget_i16m1(v, 1);
+ vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+ vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+ vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v_0, v2_0, 4);
+ vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v_1, v2_1, 4);
+ vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4);
+ vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4);
+ vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4);
+}
+
+/*
+** f3:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+** ret
+*/
+void f3 (void * in, void *out, vbool16_t mask)
+{
+ vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4);
+ vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tumu (mask, v, in, 4);
+ vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+ vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+ vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4);
+ vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4);
+ vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4);
+ vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4);
+ vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c
new file mode 100644
index 00000000000..f6aae887b95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c
@@ -0,0 +1,115 @@
+/* { dg-do compile { target { rv32 } } } */
+/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_th_vector.h"
+
+/*
+** f1:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+** ret
+*/
+void f1(void *in, void *out) {
+ vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4);
+ vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tu(v, in, 4);
+ vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0);
+ vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1);
+ vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v2_0, v2_0, 4);
+ vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v2_1, v2_1, 4);
+ vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4);
+ vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4);
+ vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0);
+ v4 = __riscv_vset(v4, 1, v4_1);
+ __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4);
+}
+
+/*
+** f2:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+** ret
+*/
+void f2(void *in, void *out, vbool16_t mask) {
+ vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4);
+ vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_m(mask, in, 4);
+ vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0);
+ vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1);
+ vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0);
+ vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1);
+ vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v_0, v2_0, 4);
+ vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v_1, v2_1, 4);
+ vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4);
+ vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4);
+ vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0);
+ v4 = __riscv_vset(v4, 1, v4_1);
+ __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4);
+}
+
+/*
+** f3:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+** ret
+*/
+void f3(void *in, void *out, vbool16_t mask) {
+ vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4);
+ vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tumu(mask, v, in, 4);
+ vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0);
+ vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1);
+ vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu(mask, v3_0, v2_0, v2_0, 4);
+ vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu(mask, v3_1, v2_1, v2_1, 4);
+ vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu(mask, v4_0, v3_0, v2_0, 4);
+ vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu(mask, v4_1, v3_1, v2_1, 4);
+ vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0);
+ v4 = __riscv_vset(v4, 1, v4_1);
+ __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4);
+}
--
2.47.1