From: Yunze Zhu <[email protected]>
This commit add support for xtheadvector-specific strided segment load/store
intrinsics with b/h/w suffix. We also defined enum to be used in
thead-vector.md
https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
V2:
Change to reuse existed thead function base th_loadstore_width.
V3:
Define new mode iterator and remove unnecessary insn pattern
to reduce amount of pattern intrduced.
gcc/ChangeLog:
* config/riscv/riscv-vector-builtins-bases.cc (BASE): New base_name.
* config/riscv/riscv-vector-builtins-bases.h: New function_base.
* config/riscv/riscv-vector-builtins-shapes.cc (build): Define new
builtin shapes.
* config/riscv/riscv-vector-builtins.cc: New rvv_op_info.
* config/riscv/thead-vector-builtins-functions.def (vlssegb): New
intrinsics def.
(vlssegh): Ditto.
(vlssegw): Ditto.
(vlssegbu): Ditto.
(vlsseghu): Ditto.
(vlssegwu): Ditto.
(vsssegb): Ditto.
(vsssegh): Ditto.
(vsssegw): Ditto.
* config/riscv/thead-vector.md
(@pred_th_strided_load<vlmem_op_attr><mode>): New RTL mode.
(@pred_th_strided_store<vlmem_op_attr><mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c: New test.
* gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc | 38 +++++-
.../riscv/riscv-vector-builtins-bases.h | 9 ++
.../riscv/riscv-vector-builtins-shapes.cc | 4 +
gcc/config/riscv/riscv-vector-builtins.cc | 25 ++++
.../riscv/thead-vector-builtins-functions.def | 9 ++
gcc/config/riscv/thead-vector.md | 61 +++++++++
.../riscv/rvv/xtheadvector/vlsseg-vssseg.c | 125 ++++++++++++++++++
.../riscv/rvv/xtheadvector/vlssegu-vssseg.c | 125 ++++++++++++++++++
8 files changed, 392 insertions(+), 4 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 4133a7846c4..a52ae921639 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -2194,12 +2194,24 @@ public:
}
else
{
- if (STORE_P)
- return e.use_exact_insn (
- code_for_pred_th_unit_seg_store (UNSPEC, e.vector_mode ()));
+ if (LST_TYPE == LST_STRIDED)
+ {
+ if (STORE_P)
+ return e.use_exact_insn (
+ code_for_pred_th_strided_store (UNSPEC, e.vector_mode ()));
+ else
+ return e.use_exact_insn (
+ code_for_pred_th_strided_load (UNSPEC, e.vector_mode ()));
+ }
else
- return e.use_exact_insn (
+ {
+ if (STORE_P)
+ return e.use_exact_insn (
+ code_for_pred_th_unit_seg_store (UNSPEC, e.vector_mode ()));
+ else
+ return e.use_exact_insn (
code_for_pred_th_unit_seg_load (UNSPEC, e.vector_mode ()));
+ }
}
}
};
@@ -2777,6 +2789,15 @@ static CONSTEXPR const th_loadstore_width<false,
LST_UNIT_STRIDE, true, UNSPEC_T
static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGB> vssegb_obj;
static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGH> vssegh_obj;
static CONSTEXPR const th_loadstore_width<true, LST_UNIT_STRIDE, true,
UNSPEC_TH_VLSEGW> vssegw_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true,
UNSPEC_TH_VLSSEGB> vlssegb_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true,
UNSPEC_TH_VLSSEGH> vlssegh_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true,
UNSPEC_TH_VLSSEGW> vlssegw_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true,
UNSPEC_TH_VLSSEGBU> vlssegbu_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true,
UNSPEC_TH_VLSSEGHU> vlsseghu_obj;
+static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true,
UNSPEC_TH_VLSSEGWU> vlssegwu_obj;
+static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, true,
UNSPEC_TH_VLSSEGB> vsssegb_obj;
+static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, true,
UNSPEC_TH_VLSSEGH> vsssegh_obj;
+static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, true,
UNSPEC_TH_VLSSEGW> vsssegw_obj;
/* Crypto Vector */
static CONSTEXPR const vandn vandn_obj;
@@ -3116,6 +3137,15 @@ BASE (vlsegwu)
BASE (vssegb)
BASE (vssegh)
BASE (vssegw)
+BASE (vlssegb)
+BASE (vlssegh)
+BASE (vlssegw)
+BASE (vlssegbu)
+BASE (vlsseghu)
+BASE (vlssegwu)
+BASE (vsssegb)
+BASE (vsssegh)
+BASE (vsssegw)
/* Crypto vector */
BASE (vandn)
BASE (vbrev)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index be62fb1781e..5406b0271a9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -326,6 +326,15 @@ extern const function_base *const vlsegwu;
extern const function_base *const vssegb;
extern const function_base *const vssegh;
extern const function_base *const vssegw;
+extern const function_base *const vlssegb;
+extern const function_base *const vlssegh;
+extern const function_base *const vlssegw;
+extern const function_base *const vlssegbu;
+extern const function_base *const vlsseghu;
+extern const function_base *const vlssegwu;
+extern const function_base *const vsssegb;
+extern const function_base *const vsssegh;
+extern const function_base *const vsssegw;
/* Below function_base are Vectro Crypto*/
extern const function_base *const vandn;
extern const function_base *const vbrev;
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index 7b01f3a7f60..301399e0b77 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -1422,8 +1422,12 @@ void build (function_builder &b,
if (strstr (instance.base_name, "vlseg"))
b.append_name ("__riscv_th_vlseg");
+ else if (strstr (instance.base_name, "vlsseg"))
+ b.append_name ("__riscv_th_vlsseg");
else if (strstr (instance.base_name, "vsseg"))
b.append_name ("__riscv_th_vsseg");
+ else if (strstr (instance.base_name, "vssseg"))
+ b.append_name ("__riscv_th_vssseg");
else
gcc_unreachable ();
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc
b/gcc/config/riscv/riscv-vector-builtins.cc
index 398c2732341..8cc1b6a9523 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3386,6 +3386,31 @@ static CONSTEXPR const rvv_op_info
th_tuple_v_int_scalar_ptr_ops
rvv_arg_type_info (RVV_BASE_void), /* Return type */
scalar_ptr_args /* Args */};
+
+/* A static operand information for vector_type func (const scalar_type *,
+ * ptrdiff_t) function registration. */
+static CONSTEXPR const rvv_op_info th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops
+ = {th_tuple_sint_ops, /* Types */
+ OP_TYPE_v, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ scalar_const_ptr_ptrdiff_args /* Args */};
+
+/* A static operand information for vector_type func (const scalar_type *,
+ * ptrdiff_t) function registration. */
+static CONSTEXPR const rvv_op_info th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops
+ = {th_tuple_uint_ops, /* Types */
+ OP_TYPE_v, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+ scalar_const_ptr_ptrdiff_args /* Args */};
+
+/* A static operand information for void func (scalar_type *, ptrdiff_t,
+ * vector_type) function registration. */
+static CONSTEXPR const rvv_op_info th_tuple_v_int_scalar_ptr_ptrdiff_ops
+ = {th_tuple_int_ops, /* Types */
+ OP_TYPE_v, /* Suffix */
+ rvv_arg_type_info (RVV_BASE_void), /* Return type */
+ scalar_ptr_ptrdiff_args /* Args */};
+
/* A list of all RVV base function types. */
static CONSTEXPR const function_type_info function_types[] = {
#define DEF_RVV_TYPE_INDEX(
\
diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def
b/gcc/config/riscv/thead-vector-builtins-functions.def
index c7702f4be48..7d6ec519616 100644
--- a/gcc/config/riscv/thead-vector-builtins-functions.def
+++ b/gcc/config/riscv/thead-vector-builtins-functions.def
@@ -43,6 +43,15 @@ DEF_RVV_FUNCTION (vlsegwu, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_
DEF_RVV_FUNCTION (vssegb, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_ops)
DEF_RVV_FUNCTION (vssegh, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_ops)
DEF_RVV_FUNCTION (vssegw, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_ops)
+DEF_RVV_FUNCTION (vlssegb, th_seg_loadstore, full_preds,
th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vlssegh, th_seg_loadstore, full_preds,
th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vlssegw, th_seg_loadstore, full_preds,
th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vlssegbu, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vlsseghu, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vlssegwu, th_seg_loadstore, full_preds,
th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vsssegb, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vsssegh, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_ptrdiff_ops)
+DEF_RVV_FUNCTION (vsssegw, th_seg_loadstore, none_m_preds,
th_tuple_v_int_scalar_ptr_ptrdiff_ops)
#undef REQUIRED_EXTENSIONS
#undef DEF_RVV_FUNCTION
diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md
index 4faa803333e..aba50432566 100644
--- a/gcc/config/riscv/thead-vector.md
+++ b/gcc/config/riscv/thead-vector.md
@@ -32,6 +32,13 @@ (define_c_enum "unspec" [
UNSPEC_TH_VLSEGHU
UNSPEC_TH_VLSEGW
UNSPEC_TH_VLSEGWU
+
+ UNSPEC_TH_VLSSEGB
+ UNSPEC_TH_VLSSEGBU
+ UNSPEC_TH_VLSSEGH
+ UNSPEC_TH_VLSSEGHU
+ UNSPEC_TH_VLSSEGW
+ UNSPEC_TH_VLSSEGWU
])
(define_int_iterator UNSPEC_TH_VLMEM_OP [
@@ -58,6 +65,12 @@ (define_int_iterator UNSPEC_TH_VLSEGMEM_OP[
UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU
])
+(define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[
+ UNSPEC_TH_VLSSEGB UNSPEC_TH_VLSSEGBU
+ UNSPEC_TH_VLSSEGH UNSPEC_TH_VLSSEGHU
+ UNSPEC_TH_VLSSEGW UNSPEC_TH_VLSSEGWU
+])
+
(define_int_attr vlmem_op_attr [
(UNSPEC_TH_VLB "b") (UNSPEC_TH_VLBU "bu")
(UNSPEC_TH_VLH "h") (UNSPEC_TH_VLHU "hu")
@@ -74,6 +87,9 @@ (define_int_attr vlmem_op_attr [
(UNSPEC_TH_VLSEGB "b") (UNSPEC_TH_VLSEGBU "bu")
(UNSPEC_TH_VLSEGH "h") (UNSPEC_TH_VLSEGHU "hu")
(UNSPEC_TH_VLSEGW "w") (UNSPEC_TH_VLSEGWU "wu")
+ (UNSPEC_TH_VLSSEGB "b") (UNSPEC_TH_VLSSEGBU "bu")
+ (UNSPEC_TH_VLSSEGH "h") (UNSPEC_TH_VLSSEGHU "hu")
+ (UNSPEC_TH_VLSSEGW "w") (UNSPEC_TH_VLSSEGWU "wu")
])
(define_int_attr vlmem_order_attr [
@@ -112,6 +128,12 @@ (define_int_iterator UNSPEC_TH_VSSEGMEM_OP[
UNSPEC_TH_VLSEGW
])
+(define_int_iterator UNSPEC_TH_VSSSEGMEM_OP[
+ UNSPEC_TH_VLSSEGB
+ UNSPEC_TH_VLSSEGH
+ UNSPEC_TH_VLSSEGW
+])
+
(define_mode_iterator V_VLS_VT [V VLS VT])
(define_mode_iterator V_VB_VLS_VT [V VB VLS VT])
@@ -587,3 +609,42 @@ (define_insn "@pred_th_unit_seg_store<vlmem_op_attr><mode>"
"vsseg<nf><vlmem_op_attr>.v\t%2,(%z1)%p0"
[(set_attr "type" "vssegte")
(set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_th_strided_load<vlmem_op_attr><mode>"
+ [(set (match_operand:TH_VT 0 "register_operand" "=vr, vr,
vd")
+ (if_then_else:TH_VT
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm")
+ (match_operand 5 "vector_length_operand" " rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i")
+ (match_operand 8 "const_int_operand" " i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VLSSEGMEM_OP)
+ (unspec:TH_VT
+ [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ")
+ (match_operand 4 "pmode_reg_or_0_operand" " rJ, rJ, rJ")
+ (mem:BLK (scratch))] UNSPEC_TH_VLSSEGMEM_OP)
+ (match_operand:TH_VT 2 "vector_merge_operand" " 0, vu,
vu")))]
+ "TARGET_XTHEADVECTOR"
+ "vlsseg<nf><vlmem_op_attr>.v\t%0,(%z3),%z4%p1"
+ [(set_attr "type" "vlsegds")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_th_strided_store<vlmem_op_attr><mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(unspec:<VM>
+ [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
+ (match_operand 4 "vector_length_operand" " rK")
+ (match_operand 5 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSSSEGMEM_OP)
+ (match_operand 1 "pmode_reg_or_0_operand" " rJ")
+ (match_operand 2 "pmode_reg_or_0_operand" " rJ")
+ (match_operand:TH_VT 3 "register_operand" " vr")
+ (mem:BLK (scratch))] UNSPEC_TH_VSSSEGMEM_OP))]
+ "TARGET_XTHEADVECTOR"
+ "vssseg<nf><vlmem_op_attr>.v\t%3,(%z1),%z2%p0"
+ [(set_attr "type" "vssegts")
+ (set_attr "mode" "<MODE>")])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c
new file mode 100644
index 00000000000..d4061392f0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsseg-vssseg.c
@@ -0,0 +1,125 @@
+/* { dg-do compile { target { rv32 } } } */
+/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_th_vector.h"
+
+/*
+** f1:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** ret
+*/
+void f1 (void * in, void *out, ptrdiff_t s)
+{
+ vint16m1x2_t v = __riscv_th_vlsseg2h_v_i16m1x2 (in, s, 4);
+ vint16m1x2_t v2 = __riscv_th_vlsseg2h_v_i16m1x2_tu (v, in, s, 4);
+ vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+ vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+ vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4);
+ vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4);
+ vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4);
+ vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4);
+ vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vssseg2h_v_i16m1x2 (out, s, v4, 4);
+}
+
+/*
+** f2:
+** th\.vsetvli\s+zero,zero,e8,m1
+** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\)
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t
+** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** ret
+*/
+void f2 (void * in, void *out, ptrdiff_t s)
+{
+ vbool16_t mask = *(vbool16_t*)in;
+ asm volatile ("":::"memory");
+ vint16m1x2_t v = __riscv_th_vlsseg2h_v_i16m1x2 (in, s, 4);
+ vint16m1x2_t v2 = __riscv_th_vlsseg2h_v_i16m1x2_m (mask, in, s, 4);
+ vint16m1_t v_0 = __riscv_vget_i16m1(v, 0);
+ vint16m1_t v_1 = __riscv_vget_i16m1(v, 1);
+ vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+ vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+ vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_m (mask, v_0, v2_0, 4);
+ vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_m (mask, v_1, v2_1, 4);
+ vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_m (mask, v3_0, v2_0, 4);
+ vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_m (mask, v3_1, v2_1, 4);
+ vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vssseg2h_v_i16m1x2 (out, s, v4, 4);
+}
+
+/*
+** f3:
+** th\.vsetvli\s+zero,zero,e8,m1
+** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\)
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** ret
+*/
+void f3 (void * in, void *out, ptrdiff_t s)
+{
+ vbool16_t mask = *(vbool16_t*)in;
+ asm volatile ("":::"memory");
+ vint16m1x2_t v = __riscv_th_vlsseg2h_v_i16m1x2 (in, s, 4);
+ vint16m1x2_t v2 = __riscv_th_vlsseg2h_v_i16m1x2_tumu (mask, v, in, s, 4);
+ vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+ vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+ vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4);
+ vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4);
+ vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4);
+ vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4);
+ vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vssseg2h_v_i16m1x2 (out, s, v4, 4);
+}
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c
new file mode 100644
index 00000000000..bf560020773
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlssegu-vssseg.c
@@ -0,0 +1,125 @@
+/* { dg-do compile { target { rv32 } } } */
+/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_th_vector.h"
+
+/*
+** f1:
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** ret
+*/
+void f1 (void * in, void *out, ptrdiff_t s)
+{
+ vuint16m1x2_t v = __riscv_th_vlsseg2hu_v_u16m1x2 (in, s, 4);
+ vuint16m1x2_t v2 = __riscv_th_vlsseg2hu_v_u16m1x2_tu (v, in, s, 4);
+ vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0);
+ vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1);
+ vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1 (v2_0, v2_0, 4);
+ vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1 (v2_1, v2_1, 4);
+ vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1 (v3_0, v2_0, 4);
+ vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1 (v3_1, v2_1, 4);
+ vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vssseg2h_v_u16m1x2 (out, s, v4, 4);
+}
+
+/*
+** f2:
+** th\.vsetvli\s+zero,zero,e8,m1
+** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\)
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t
+** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** ret
+*/
+void f2 (void * in, void *out, ptrdiff_t s)
+{
+ vbool16_t mask = *(vbool16_t*)in;
+ asm volatile ("":::"memory");
+ vuint16m1x2_t v = __riscv_th_vlsseg2hu_v_u16m1x2 (in, s, 4);
+ vuint16m1x2_t v2 = __riscv_th_vlsseg2hu_v_u16m1x2_m (mask, in, s, 4);
+ vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0);
+ vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1);
+ vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0);
+ vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1);
+ vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_m (mask, v_0, v2_0, 4);
+ vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_m (mask, v_1, v2_1, 4);
+ vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_m (mask, v3_0, v2_0, 4);
+ vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_m (mask, v3_1, v2_1, 4);
+ vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vssseg2h_v_u16m1x2 (out, s, v4, 4);
+}
+
+/*
+** f3:
+** th\.vsetvli\s+zero,zero,e8,m1
+** th\.vle\.v\s+v[0-9]+,[0-9]\([a-x0-9]+\)
+** li\s+[a-x0-9]+,4
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vlsseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+,v0\.t
+** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+** th\.vmv\.v\.i\s+v[0-9]+,0
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+** th\.vsetvli\s+zero,zero,e16,m1
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+** th\.vssseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),[a-x0-9]+
+** ret
+*/
+void f3 (void * in, void *out, ptrdiff_t s)
+{
+ vbool16_t mask = *(vbool16_t*)in;
+ asm volatile ("":::"memory");
+ vuint16m1x2_t v = __riscv_th_vlsseg2hu_v_u16m1x2 (in, s, 4);
+ vuint16m1x2_t v2 = __riscv_th_vlsseg2hu_v_u16m1x2_tumu (mask, v, in, s, 4);
+ vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0);
+ vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1);
+ vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu (mask, v3_0, v2_0, v2_0, 4);
+ vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu (mask, v3_1, v2_1, v2_1, 4);
+ vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu (mask, v4_0, v3_0, v2_0, 4);
+ vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu (mask, v4_1, v3_1, v2_1, 4);
+ vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+ v4 = __riscv_vset (v4, 1, v4_1);
+ __riscv_th_vssseg2h_v_u16m1x2 (out, s, v4, 4);
+}
\ No newline at end of file
--
2.47.1