llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Jiahao Guo (E00N777) <details> <summary>Changes</summary> ##summary This is a part of : https://github.com/llvm/llvm-project/issues/185382 follow up of : https://github.com/llvm/llvm-project/pull/202857 Lower part of Widen and Widening subtraction intrinsics ### why implement two sets of intrinsics in one PR? Widening subtraction depends on the widen intrinsics during lowering, so I implemented them in the same PR. --- Patch is 26.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/204088.diff 4 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+12-1) - (modified) clang/test/CodeGen/AArch64/neon-intrinsics.c (-230) - (modified) clang/test/CodeGen/AArch64/neon/subtraction.c (+199-1) - (added) clang/test/CodeGen/AArch64/neon/widen.c (+98) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 84b9bb1007763..176787b6c2402 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -882,7 +882,18 @@ static mlir::Value emitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld3q_lane_v: case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: - case NEON::BI__builtin_neon_vmovl_v: + cgf.cgm.errorNYI(expr->getSourceRange(), + std::string("Reached code-path for ARM builtin call ") + + ctx.BuiltinInfo.getName(builtinID) + + "(ARM builtins are not supported ATM)"); + return mlir::Value{}; + case NEON::BI__builtin_neon_vmovl_v: { + cir::VectorType dTy = + cgf.getBuilder().getExtendedOrTruncatedElementVectorType( + ty, false /* truncate */, !usgn); + ops[0] = cgf.getBuilder().createBitcast(loc, ops[0], dTy); + return cgf.getBuilder().createIntCast(ops[0], ty); + } case NEON::BI__builtin_neon_vmovn_v: case NEON::BI__builtin_neon_vmull_v: case NEON::BI__builtin_neon_vpadal_v: diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index bbcc8978804f7..9fde7f37fc192 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -5298,74 +5298,6 @@ uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 19); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]] -// -int16x8_t test_vmovl_s8(int8x8_t a) { - return vmovl_s8(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vmovl_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]] -// -int32x4_t test_vmovl_s16(int16x4_t a) { - return vmovl_s16(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vmovl_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]] -// -int64x2_t test_vmovl_s32(int32x2_t a) { - return vmovl_s32(a); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[VMOVL_I]] -// -uint16x8_t test_vmovl_u8(uint8x8_t a) { - return vmovl_u8(a); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vmovl_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[VMOVL_I]] -// -uint32x4_t test_vmovl_u16(uint16x4_t a) { - return vmovl_u16(a); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vmovl_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[VMOVL_I]] -// -uint64x2_t test_vmovl_u32(uint32x2_t a) { - return vmovl_u32(a); -} - // CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_high_s8( // CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -5782,94 +5714,6 @@ uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { return vaddw_high_u32(a, b); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { - return vsubl_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { - return vsubl_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { - return vsubl_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { - return vsubl_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { - return vsubl_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { - return vsubl_u32(a, b); -} - // CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_s8( // CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -5970,80 +5814,6 @@ uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { return vsubl_high_u32(a, b); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_s8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { - return vsubw_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_s16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { - return vsubw_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_s32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { - return vsubw_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_u8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { - return vsubw_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_u16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { - return vsubw_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_u32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { - return vsubw_u32(a, b); -} - // CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_s8( // CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/subtraction.c b/clang/test/CodeGen/AArch64/neon/subtraction.c index 8297e444b2bd9..7810445fa0094 100644 --- a/clang/test/CodeGen/AArch64/neon/subtraction.c +++ b/clang/test/CodeGen/AArch64/neon/subtraction.c @@ -7,7 +7,7 @@ //============================================================================= // NOTES // -// Tests for vector permutation intrinsics: Subtraction, Widening subtraction, Narrowing subtraction and Saturating subtract elements. +// Tests for vector subtraction intrinsics: Subtraction, Widening subtraction, Narrowing subtraction and Saturating subtract elements. // // ACLE section headings based on v2025Q2 of the ACLE specification: // * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#subtract @@ -264,3 +264,201 @@ uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { // LLVM: ret i64 [[VSUBD_I]] return vsubd_u64(a, b); } + +//===------------------------------------------------------===// +// 2.1.1.5.3. Widening subtraction +// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#widening-subtraction +// TODO: Migrate the vsubl_high_* / vsubw_high_* intrinsics +//===------------------------------------------------------===// + +// LLVM-LABEL: @test_vsubl_s8( +// CIR-LABEL: @vsubl_s8( +int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<8 x !s16i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[VMOVL0:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +// LLVM: [[VMOVL1:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubl_s8(a, b); +} + +// LLVM-LABEL: @test_vsubl_s16( +// CIR-LABEL: @vsubl_s16( +int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL0:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// LLVM: [[VMOVL1:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubl_s16(a, b); +} + +// LLVM-LABEL: @test_vsubl_s32( +// CIR-LABEL: @vsubl_s32( +int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL0:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +// LLVM: [[VMOVL1:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubl_s32(a, b); +} + +// LLVM-LABEL: @test_vsubl_u8( +// CIR-LABEL: @vsubl_u8( +uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<8 x !u16i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[VMOVL0:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +// LLVM: [[VMOVL1:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubl_u8(a, b); +} + +// LLVM-LABEL: @test_vsubl_u16( +// CIR-LABEL: @vsubl_u16( +uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<4 x !u32i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL0:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// LLVM: [[VMOVL1:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubl_u16(a, b); +} + +// LLVM-LABEL: @test_vsubl_u32( +// CIR-LABEL: @vsubl_u32( +uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<2 x !u64i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL0:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +// LLVM: [[VMOVL1:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubl_u32(a, b); +} + +// LLVM-LABEL: @test_vsubw_s8( +// CIR-LABEL: @vsubw_s8( +int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<8 x !s16i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[VMOVL_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubw_s8(a, b); +} + +// LLVM-LABEL: @test_vsubw_s16( +// CIR-LABEL: @vsubw_s16( +int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubw_s16(a, b); +} + +// LLVM-LABEL: @test_vsubw_s32( +// CIR-LABEL: @vsubw_s32( +int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: {{%.*}} = cir.sub {{%.*... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/204088 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
