https://github.com/TelGome updated https://github.com/llvm/llvm-project/pull/203147
>From 60509ceb6b5d804eb826e2e197deffb3b6f39424 Mon Sep 17 00:00:00 2001 From: Dongyan Chen <[email protected]> Date: Fri, 12 Jun 2026 01:33:11 +0000 Subject: [PATCH] [RISCV][P-ext] Support Packed Averaging Addition and Subtraction --- clang/include/clang/Basic/BuiltinsRISCV.td | 30 ++ clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 58 +++ clang/lib/Headers/riscv_packed_simd.h | 24 + clang/test/CodeGen/RISCV/rvp-intrinsics.c | 444 ++++++++++++++++++ .../riscv_packed_simd.c | 132 ++++++ llvm/include/llvm/IR/IntrinsicsRISCV.td | 14 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 + llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 27 ++ llvm/test/CodeGen/RISCV/rvp-simd-32.ll | 72 +++ llvm/test/CodeGen/RISCV/rvp-simd-64.ll | 168 +++++++ 10 files changed, 992 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td index 2dad5ede2d64b..185269bfc6d85 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.td +++ b/clang/include/clang/Basic/BuiltinsRISCV.td @@ -153,6 +153,36 @@ def ntl_store : RISCVBuiltin<"void(...)">; let Features = "zihintpause", Attributes = [NoThrow] in def pause : RISCVBuiltin<"void()">; +//===----------------------------------------------------------------------===// +// P extension. +//===----------------------------------------------------------------------===// +let Features = "experimental-p", Attributes = [NoThrow, Const] in { +// Packed Averaging Addition and Subtraction (32-bit) +def paadd_i8x4 : RISCVBuiltin<"_Vector<4, signed char>(_Vector<4, signed char>, _Vector<4, signed char>)">; +def paadd_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def paaddu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned char>, _Vector<4, unsigned char>)">; +def paaddu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>)">; +def pasub_i8x4 : RISCVBuiltin<"_Vector<4, signed char>(_Vector<4, signed char>, _Vector<4, signed char>)">; +def pasub_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def pasubu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned char>, _Vector<4, unsigned char>)">; +def pasubu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>)">; + +// Packed Averaging Addition and Subtraction (64-bit) +def paadd_i8x8 : RISCVBuiltin<"_Vector<8, signed char>(_Vector<8, signed char>, _Vector<8, signed char>)">; +def paadd_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def paadd_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def paaddu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>)">; +def paaddu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">; +def paaddu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>)">; +def pasub_i8x8 : RISCVBuiltin<"_Vector<8, signed char>(_Vector<8, signed char>, _Vector<8, signed char>)">; +def pasub_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def pasub_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def pasubu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>)">; +def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">; +def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>)">; + +} // Features = "experimental-p" + //===----------------------------------------------------------------------===// // XCV extensions. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index 3bf7dd07d54d3..8c0684110dad7 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -1199,6 +1199,64 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, break; } + // Packed Averaging Addition and Subtraction + case RISCV::BI__builtin_riscv_paadd_i8x4: + case RISCV::BI__builtin_riscv_paadd_i16x2: + case RISCV::BI__builtin_riscv_paadd_i8x8: + case RISCV::BI__builtin_riscv_paadd_i16x4: + case RISCV::BI__builtin_riscv_paadd_i32x2: + case RISCV::BI__builtin_riscv_paaddu_u8x4: + case RISCV::BI__builtin_riscv_paaddu_u16x2: + case RISCV::BI__builtin_riscv_paaddu_u8x8: + case RISCV::BI__builtin_riscv_paaddu_u16x4: + case RISCV::BI__builtin_riscv_paaddu_u32x2: + case RISCV::BI__builtin_riscv_pasub_i8x4: + case RISCV::BI__builtin_riscv_pasub_i16x2: + case RISCV::BI__builtin_riscv_pasub_i8x8: + case RISCV::BI__builtin_riscv_pasub_i16x4: + case RISCV::BI__builtin_riscv_pasub_i32x2: + case RISCV::BI__builtin_riscv_pasubu_u8x4: + case RISCV::BI__builtin_riscv_pasubu_u16x2: + case RISCV::BI__builtin_riscv_pasubu_u8x8: + case RISCV::BI__builtin_riscv_pasubu_u16x4: + case RISCV::BI__builtin_riscv_pasubu_u32x2: { + switch (BuiltinID) { + default: + llvm_unreachable("unexpected builtin ID"); + case RISCV::BI__builtin_riscv_paadd_i8x4: + case RISCV::BI__builtin_riscv_paadd_i16x2: + case RISCV::BI__builtin_riscv_paadd_i8x8: + case RISCV::BI__builtin_riscv_paadd_i16x4: + case RISCV::BI__builtin_riscv_paadd_i32x2: + ID = Intrinsic::riscv_paadd; + break; + case RISCV::BI__builtin_riscv_paaddu_u8x4: + case RISCV::BI__builtin_riscv_paaddu_u16x2: + case RISCV::BI__builtin_riscv_paaddu_u8x8: + case RISCV::BI__builtin_riscv_paaddu_u16x4: + case RISCV::BI__builtin_riscv_paaddu_u32x2: + ID = Intrinsic::riscv_paaddu; + break; + case RISCV::BI__builtin_riscv_pasub_i8x4: + case RISCV::BI__builtin_riscv_pasub_i16x2: + case RISCV::BI__builtin_riscv_pasub_i8x8: + case RISCV::BI__builtin_riscv_pasub_i16x4: + case RISCV::BI__builtin_riscv_pasub_i32x2: + ID = Intrinsic::riscv_pasub; + break; + case RISCV::BI__builtin_riscv_pasubu_u8x4: + case RISCV::BI__builtin_riscv_pasubu_u16x2: + case RISCV::BI__builtin_riscv_pasubu_u8x8: + case RISCV::BI__builtin_riscv_pasubu_u16x4: + case RISCV::BI__builtin_riscv_pasubu_u32x2: + ID = Intrinsic::riscv_pasubu; + break; + } + + IntrinsicTypes = {ResultType}; + break; + } + // Zk builtins // Zknh diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h index 39f2f4cd2c704..7e981c91ec3e1 100644 --- a/clang/lib/Headers/riscv_packed_simd.h +++ b/clang/lib/Headers/riscv_packed_simd.h @@ -351,6 +351,30 @@ __packed_unary_op(pnot_u16x4, uint16x4_t, ~) __packed_unary_op(pnot_i32x2, int32x2_t, ~) __packed_unary_op(pnot_u32x2, uint32x2_t, ~) +/* Packed Averaging Addition and Subtraction (32-bit) */ +__packed_binary_builtin(paadd_i8x4, int8x4_t, __builtin_riscv_paadd_i8x4) +__packed_binary_builtin(paadd_i16x2, int16x2_t, __builtin_riscv_paadd_i16x2) +__packed_binary_builtin(paaddu_u8x4, uint8x4_t, __builtin_riscv_paaddu_u8x4) +__packed_binary_builtin(paaddu_u16x2, uint16x2_t, __builtin_riscv_paaddu_u16x2) +__packed_binary_builtin(pasub_i8x4, int8x4_t, __builtin_riscv_pasub_i8x4) +__packed_binary_builtin(pasub_i16x2, int16x2_t, __builtin_riscv_pasub_i16x2) +__packed_binary_builtin(pasubu_u8x4, uint8x4_t, __builtin_riscv_pasubu_u8x4) +__packed_binary_builtin(pasubu_u16x2, uint16x2_t, __builtin_riscv_pasubu_u16x2) + +/* Packed Averaging Addition and Subtraction (64-bit) */ +__packed_binary_builtin(paadd_i8x8, int8x8_t, __builtin_riscv_paadd_i8x8) +__packed_binary_builtin(paadd_i16x4, int16x4_t, __builtin_riscv_paadd_i16x4) +__packed_binary_builtin(paadd_i32x2, int32x2_t, __builtin_riscv_paadd_i32x2) +__packed_binary_builtin(paaddu_u8x8, uint8x8_t, __builtin_riscv_paaddu_u8x8) +__packed_binary_builtin(paaddu_u16x4, uint16x4_t, __builtin_riscv_paaddu_u16x4) +__packed_binary_builtin(paaddu_u32x2, uint32x2_t, __builtin_riscv_paaddu_u32x2) +__packed_binary_builtin(pasub_i8x8, int8x8_t, __builtin_riscv_pasub_i8x8) +__packed_binary_builtin(pasub_i16x4, int16x4_t, __builtin_riscv_pasub_i16x4) +__packed_binary_builtin(pasub_i32x2, int32x2_t, __builtin_riscv_pasub_i32x2) +__packed_binary_builtin(pasubu_u8x8, uint8x8_t, __builtin_riscv_pasubu_u8x8) +__packed_binary_builtin(pasubu_u16x4, uint16x4_t, __builtin_riscv_pasubu_u16x4) +__packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2) + // clang-format on #undef __packed_splat2 diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c index 71fb5eb1f0e25..363bfa5f5d995 100644 --- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -4791,3 +4791,447 @@ int32x2_t test_pnot_i32x2(int32x2_t a) { uint32x2_t test_pnot_u32x2(uint32x2_t a) { return __riscv_pnot_u32x2(a); } + +// RV32-LABEL: define dso_local i32 @test_paadd_i8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paadd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_paadd_i8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paadd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int8x4_t test_paadd_i8x4(int8x4_t rs1, int8x4_t rs2) { + return __riscv_paadd_i8x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_paadd_i16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paadd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_paadd_i16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paadd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_paadd_i16x2(int16x2_t rs1, int16x2_t rs2) { + return __riscv_paadd_i16x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_paaddu_u8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paaddu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_paaddu_u8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paaddu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint8x4_t test_paaddu_u8x4(uint8x4_t rs1, uint8x4_t rs2) { + return __riscv_paaddu_u8x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_paaddu_u16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paaddu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_paaddu_u16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paaddu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint16x2_t test_paaddu_u16x2(uint16x2_t rs1, uint16x2_t rs2) { + return __riscv_paaddu_u16x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pasub_i8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasub.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pasub_i8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasub.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int8x4_t test_pasub_i8x4(int8x4_t rs1, int8x4_t rs2) { + return __riscv_pasub_i8x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pasub_i16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasub.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pasub_i16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasub.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_pasub_i16x2(int16x2_t rs1, int16x2_t rs2) { + return __riscv_pasub_i16x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pasubu_u8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasubu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pasubu_u8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasubu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint8x4_t test_pasubu_u8x4(uint8x4_t rs1, uint8x4_t rs2) { + return __riscv_pasubu_u8x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pasubu_u16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pasubu_u16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint16x2_t test_pasubu_u16x2(uint16x2_t rs1, uint16x2_t rs2) { + return __riscv_pasubu_u16x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_paadd_i8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paadd_i8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t test_paadd_i8x8(int8x8_t rs1, int8x8_t rs2) { + return __riscv_paadd_i8x8(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_paadd_i16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paadd_i16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_paadd_i16x4(int16x4_t rs1, int16x4_t rs2) { + return __riscv_paadd_i16x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_paadd_i32x2( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paadd.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paadd_i32x2( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paadd.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_paadd_i32x2(int32x2_t rs1, int32x2_t rs2) { + return __riscv_paadd_i32x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_paaddu_u8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paaddu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paaddu_u8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paaddu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t test_paaddu_u8x8(uint8x8_t rs1, uint8x8_t rs2) { + return __riscv_paaddu_u8x8(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_paaddu_u16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paaddu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paaddu_u16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paaddu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t test_paaddu_u16x4(uint16x4_t rs1, uint16x4_t rs2) { + return __riscv_paaddu_u16x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_paaddu_u32x2( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paaddu.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paaddu_u32x2( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paaddu.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint32x2_t test_paaddu_u32x2(uint32x2_t rs1, uint32x2_t rs2) { + return __riscv_paaddu_u32x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_pasub_i8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasub_i8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t test_pasub_i8x8(int8x8_t rs1, int8x8_t rs2) { + return __riscv_pasub_i8x8(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_pasub_i16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasub_i16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_pasub_i16x4(int16x4_t rs1, int16x4_t rs2) { + return __riscv_pasub_i16x4(rs1, rs2); +} + + +// RV32-LABEL: define dso_local i64 @test_pasub_i32x2( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasub.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasub_i32x2( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasub.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_pasub_i32x2(int32x2_t rs1, int32x2_t rs2) { + return __riscv_pasub_i32x2(rs1, rs2); +} + + +// RV32-LABEL: define dso_local i64 @test_pasubu_u8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasubu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasubu_u8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasubu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t test_pasubu_u8x8(uint8x8_t rs1, uint8x8_t rs2) { + return __riscv_pasubu_u8x8(rs1, rs2); +} + + +// RV32-LABEL: define dso_local i64 @test_pasubu_u16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasubu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasubu_u16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasubu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t test_pasubu_u16x4(uint16x4_t rs1, uint16x4_t rs2) { + return __riscv_pasubu_u16x4(rs1, rs2); +} + + +// RV32-LABEL: define dso_local i64 @test_pasubu_u32x2( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasubu_u32x2( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint32x2_t test_pasubu_u32x2(uint32x2_t rs1, uint32x2_t rs2) { + return __riscv_pasubu_u32x2(rs1, rs2); +} diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c index 971795c877198..edbc56ce0e199 100644 --- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c +++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c @@ -1640,3 +1640,135 @@ uint32x2_t test_pmsle_u32x2(int32x2_t a, int32x2_t b) { uint32x2_t test_pmsleu_u32x2(uint32x2_t a, uint32x2_t b) { return __riscv_pmsleu_u32x2(a, b); } + +// CHECK-LABEL: test_paadd_i8x4: +// CHECK: paadd.b +int8x4_t test_paadd_i8x4(int8x4_t a, int8x4_t b) { + return __riscv_paadd_i8x4(a, b); +} + +// CHECK-LABEL: test_paadd_i16x2: +// CHECK: paadd.h +int16x2_t test_paadd_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_paadd_i16x2(a, b); +} + +// CHECK-LABEL: test_paaddu_u8x4: +// CHECK: paaddu.b +uint8x4_t test_paaddu_u8x4(uint8x4_t a, uint8x4_t b) { + return __riscv_paaddu_u8x4(a, b); +} + +// CHECK-LABEL: test_paaddu_u16x2: +// CHECK: paaddu.h +uint16x2_t test_paaddu_u16x2(uint16x2_t a, uint16x2_t b) { + return __riscv_paaddu_u16x2(a, b); +} + +// CHECK-LABEL: test_pasub_i8x4: +// CHECK: pasub.b +int8x4_t test_pasub_i8x4(int8x4_t a, int8x4_t b) { + return __riscv_pasub_i8x4(a, b); +} + +// CHECK-LABEL: test_pasub_i16x2: +// CHECK: pasub.h +int16x2_t test_pasub_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pasub_i16x2(a, b); +} + +// CHECK-LABEL: test_pasubu_u8x4: +// CHECK: pasubu.b +uint8x4_t test_pasubu_u8x4(uint8x4_t a, uint8x4_t b) { + return __riscv_pasubu_u8x4(a, b); +} + +// CHECK-LABEL: test_pasubu_u16x2: +// CHECK: pasubu.h +uint16x2_t test_pasubu_u16x2(uint16x2_t a, uint16x2_t b) { + return __riscv_pasubu_u16x2(a, b); +} + +// CHECK-LABEL: test_paadd_i8x8: +// RV32: paadd.db +// RV64: paadd.b +int8x8_t test_paadd_i8x8(int8x8_t a, int8x8_t b) { + return __riscv_paadd_i8x8(a, b); +} + +// CHECK-LABEL: test_paadd_i16x4: +// RV32: paadd.dh +// RV64: paadd.h +int16x4_t test_paadd_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_paadd_i16x4(a, b); +} + +// CHECK-LABEL: test_paadd_i32x2: +// RV32: paadd.dw +// RV64: paadd.w +int32x2_t test_paadd_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_paadd_i32x2(a, b); +} + +// CHECK-LABEL: test_paaddu_u8x8: +// RV32: paaddu.db +// RV64: paaddu.b +uint8x8_t test_paaddu_u8x8(uint8x8_t a, uint8x8_t b) { + return __riscv_paaddu_u8x8(a, b); +} + +// CHECK-LABEL: test_paaddu_u16x4: +// RV32: paaddu.dh +// RV64: paaddu.h +uint16x4_t test_paaddu_u16x4(uint16x4_t a, uint16x4_t b) { + return __riscv_paaddu_u16x4(a, b); +} + +// CHECK-LABEL: test_paaddu_u32x2: +// RV32: paaddu.dw +// RV64: paaddu.w +uint32x2_t test_paaddu_u32x2(uint32x2_t a, uint32x2_t b) { + return __riscv_paaddu_u32x2(a, b); +} + +// CHECK-LABEL: test_pasub_i8x8: +// RV32: pasub.db +// RV64: pasub.b +int8x8_t test_pasub_i8x8(int8x8_t a, int8x8_t b) { + return __riscv_pasub_i8x8(a, b); +} + +// CHECK-LABEL: test_pasub_i16x4: +// RV32: pasub.dh +// RV64: pasub.h +int16x4_t test_pasub_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pasub_i16x4(a, b); +} + +// CHECK-LABEL: test_pasub_i32x2: +// RV32: pasub.dw +// RV64: pasub.w +int32x2_t test_pasub_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pasub_i32x2(a, b); +} + +// CHECK-LABEL: test_pasubu_u8x8: +// RV32: pasubu.db +// RV64: pasubu.b +uint8x8_t test_pasubu_u8x8(uint8x8_t a, uint8x8_t b) { + return __riscv_pasubu_u8x8(a, b); +} + +// CHECK-LABEL: test_pasubu_u16x4: +// RV32: pasubu.dh +// RV64: pasubu.h +uint16x4_t test_pasubu_u16x4(uint16x4_t a, uint16x4_t b) { + return __riscv_pasubu_u16x4(a, b); +} + +// CHECK-LABEL: test_pasubu_u32x2: +// RV32: pasubu.dw +// RV64: pasubu.w +uint32x2_t test_pasubu_u32x2(uint32x2_t a, uint32x2_t b) { + return __riscv_pasubu_u32x2(a, b); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index f53f752c25c30..b2add44b19a5e 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -2043,6 +2043,20 @@ let TargetPrefix = "riscv" in { let TargetPrefix = "riscv" in def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; +// Packed SIMD extensions +//===----------------------------------------------------------------------===// +let TargetPrefix = "riscv" in { +// Packed Averaging Addition and Subtraction. +class RVPBinaryIntrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + + def int_riscv_paadd : RVPBinaryIntrinsic; + def int_riscv_paaddu : RVPBinaryIntrinsic; + def int_riscv_pasub : RVPBinaryIntrinsic; + def int_riscv_pasubu : RVPBinaryIntrinsic; +} // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// // Zvfofp8min - OFP8 conversion extension diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 753901d71baca..57af056c2d3c0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -556,6 +556,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // widen for those operations that will be unrolled. setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, {MVT::v2i16, MVT::v4i8}, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, {MVT::v2i16, MVT::v4i8}, + Custom); } else { VTs = P32VecVTs; } @@ -633,6 +635,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction( {ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT, ISD::SSUBSAT}, P64VecVTs, Legal); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, P64VecVTs, Legal); setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, P64VecVTs, Legal); setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, P64VecVTs, Legal); @@ -15637,6 +15640,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; } + case Intrinsic::riscv_paadd: + case Intrinsic::riscv_paaddu: + case Intrinsic::riscv_pasub: + case Intrinsic::riscv_pasubu: { + EVT VT = N->getValueType(0); + if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16)) + return; + + EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16; + SDValue Undef = DAG.getUNDEF(VT); + SDValue Op0 = + DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), Undef); + SDValue Op1 = + DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(2), Undef); + SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WideVT, + N->getOperand(0), Op0, Op1); + Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getVectorIdxConstant(0, DL))); + return; + } case Intrinsic::riscv_orc_b: case Intrinsic::riscv_brev8: case Intrinsic::riscv_sha256sig0: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 79ff90545b68a..fe1e459967e92 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1990,12 +1990,22 @@ let Predicates = [HasStdExtP] in { def : PatGprGpr<riscv_asub, PASUB_B, XLenVecI8VT>; def : PatGprGpr<riscv_asubu, PASUBU_B, XLenVecI8VT>; + def : PatGprGpr<int_riscv_paadd, PAADD_B, XLenVecI8VT>; + def : PatGprGpr<int_riscv_paaddu, PAADDU_B, XLenVecI8VT>; + def : PatGprGpr<int_riscv_pasub, PASUB_B, XLenVecI8VT>; + def : PatGprGpr<int_riscv_pasubu, PASUBU_B, XLenVecI8VT>; + // 16-bit averaging patterns def : PatGprGpr<avgfloors, PAADD_H, XLenVecI16VT>; def : PatGprGpr<avgflooru, PAADDU_H, XLenVecI16VT>; def : PatGprGpr<riscv_asub, PASUB_H, XLenVecI16VT>; def : PatGprGpr<riscv_asubu, PASUBU_H, XLenVecI16VT>; + def : PatGprGpr<int_riscv_paadd, PAADD_H, XLenVecI16VT>; + def : PatGprGpr<int_riscv_paaddu, PAADDU_H, XLenVecI16VT>; + def : PatGprGpr<int_riscv_pasub, PASUB_H, XLenVecI16VT>; + def : PatGprGpr<int_riscv_pasubu, PASUBU_H, XLenVecI16VT>; + // 8-bit absolute difference patterns def : Pat<(XLenVecI8VT (abs GPR:$rs1)), (PABD_B GPR:$rs1, (XLenVecI8VT X0))>; def : PatGprGpr<abds, PABD_B, XLenVecI8VT>; @@ -2270,6 +2280,19 @@ let append Predicates = [IsRV32] in { def : PatGprPairGprPair<riscv_asub, PASUB_DW, v2i32>; def : PatGprPairGprPair<riscv_asubu, PASUBU_DW, v2i32>; + def : PatGprPairGprPair<int_riscv_paadd, PAADD_DB, v8i8>; + def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DB, v8i8>; + def : PatGprPairGprPair<int_riscv_pasub, PASUB_DB, v8i8>; + def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DB, v8i8>; + def : PatGprPairGprPair<int_riscv_paadd, PAADD_DH, v4i16>; + def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DH, v4i16>; + def : PatGprPairGprPair<int_riscv_pasub, PASUB_DH, v4i16>; + def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DH, v4i16>; + def : PatGprPairGprPair<int_riscv_paadd, PAADD_DW, v2i32>; + def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DW, v2i32>; + def : PatGprPairGprPair<int_riscv_pasub, PASUB_DW, v2i32>; + def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DW, v2i32>; + // 8-bit absolute difference patterns def : Pat<(v8i8 (abs GPRPair:$rs1)), (PABD_DB GPRPair:$rs1, (v8i8 X0_Pair))>; def : PatGprPairGprPair<abds, PABD_DB, v8i8>; @@ -2478,10 +2501,14 @@ let append Predicates = [IsRV64] in { // 32-bit averaging patterns def : PatGprGpr<avgfloors, PAADD_W, v2i32>; def : PatGprGpr<avgflooru, PAADDU_W, v2i32>; + def : PatGprGpr<int_riscv_paadd, PAADD_W, v2i32>; + def : PatGprGpr<int_riscv_paaddu, PAADDU_W, v2i32>; // 32-bit averaging-sub patterns def : PatGprGpr<riscv_asub, PASUB_W, v2i32>; def : PatGprGpr<riscv_asubu, PASUBU_W, v2i32>; + def : PatGprGpr<int_riscv_pasub, PASUB_W, v2i32>; + def : PatGprGpr<int_riscv_pasubu, PASUBU_W, v2i32>; // 32-bit multiply high patterns def : PatGprGpr<mulhs, PMULH_W, v2i32>; diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll index 9292450d51b19..cddea9ae32d25 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll @@ -2686,3 +2686,75 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) { %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) ret <2 x i16> %res } + +define <4 x i8> @test_paadd_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_paadd_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: paadd.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.paadd.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <4 x i8> @test_paaddu_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_paaddu_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: paaddu.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.paaddu.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <4 x i8> @test_pasub_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_pasub_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pasub.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.pasub.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <4 x i8> @test_pasubu_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_pasubu_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pasubu.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.pasubu.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <2 x i16> @test_paadd_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_paadd_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: paadd.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.paadd.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_paaddu_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_paaddu_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: paaddu.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.paaddu.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_pasub_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pasub_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pasub.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pasub.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_pasubu_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pasubu_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pasubu.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll index 79ae7f43a45d9..2fcbc7bfec7f2 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll @@ -4720,3 +4720,171 @@ define <2 x i32> @test_sext_v2i16_to_v2i32(<2 x i16> %a) { %res = sext <2 x i16> %a to <2 x i32> ret <2 x i32> %res } + +define <8 x i8> @test_paadd_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_paadd_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: paadd.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paadd_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: paadd.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.paadd.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <8 x i8> @test_paaddu_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_paaddu_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: paaddu.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paaddu_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: paaddu.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.paaddu.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <8 x i8> @test_pasub_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_pasub_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: pasub.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasub_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: pasub.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.pasub.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <8 x i8> @test_pasubu_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_pasubu_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: pasubu.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasubu_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: pasubu.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.pasubu.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <4 x i16> @test_paadd_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_paadd_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: paadd.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paadd_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: paadd.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.paadd.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_paaddu_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_paaddu_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: paaddu.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paaddu_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: paaddu.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.paaddu.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_pasub_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pasub_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: pasub.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasub_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: pasub.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pasub.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_pasubu_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pasubu_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: pasubu.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasubu_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: pasubu.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pasubu.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <2 x i32> @test_paadd_v2i32(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_paadd_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: paadd.dw a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paadd_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: paadd.w a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.paadd.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_paaddu_v2i32(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_paaddu_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: paaddu.dw a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paaddu_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: paaddu.w a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.paaddu.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_pasub_v2i32(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_pasub_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: pasub.dw a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasub_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: pasub.w a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.pasub.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_pasubu_v2i32(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_pasubu_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: pasubu.dw a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasubu_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: pasubu.w a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
