https://github.com/E00N777 created https://github.com/llvm/llvm-project/pull/205508
### summary Part of : https://github.com/llvm/llvm-project/issues/185382 This is a light patch and a follow up of : https://github.com/llvm/llvm-project/pull/204088 All lowering logic have been implemented in this PR : https://github.com/llvm/llvm-project/pull/204285 , I just need to migrate these intrinsics. This PR completed with the assistance of Claude Opus4.8 >From 58035d26fb51f9a244dfd7443f6eadf6da4239d8 Mon Sep 17 00:00:00 2001 From: E00N777 <[email protected]> Date: Wed, 24 Jun 2026 16:53:47 +0800 Subject: [PATCH] [CIR][AArch64] Migrate vsubl_high/vsubw_high NEON tests to subtraction.c Move the widening-subtraction high-half intrinsic tests (vsubl_high_* and vsubw_high_*) out of neon-intrinsics.c into subtraction.c, rewriting them with the LLVM and ClangIR (CIR) check-prefix style used by the rest of that file so they exercise both the classic CodeGen and ClangIR lowering paths. Co-Authored-By: Claude Opus 4.8 <[email protected]> --- clang/test/CodeGen/AArch64/neon-intrinsics.c | 180 --------------- clang/test/CodeGen/AArch64/neon/subtraction.c | 213 +++++++++++++++++- 2 files changed, 211 insertions(+), 182 deletions(-) diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index b37ed5aa29f10..560191e43baec 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -5219,186 +5219,6 @@ uint64x2_t test_vmovl_high_u32(uint32x4_t a) { return vmovl_high_u32(a); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { - return vsubl_high_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_high_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { - return vsubl_high_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_high_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { - return vsubl_high_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -// CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { - return vsubl_high_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_high_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { - return vsubl_high_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_high_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { - return vsubl_high_u32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_s8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[TMP0]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { - return vsubw_high_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_high_s16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[TMP2]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { - return vsubw_high_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_high_s32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[TMP2]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { - return vsubw_high_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_u8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[TMP0]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { - return vsubw_high_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_high_u16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[TMP2]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { - return vsubw_high_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_high_u32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[TMP2]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { - return vsubw_high_u32(a, b); -} - // CHECK-LABEL: define dso_local <8 x i8> @test_vaddhn_s16( // CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/subtraction.c b/clang/test/CodeGen/AArch64/neon/subtraction.c index 7810445fa0094..d23f5907f0362 100644 --- a/clang/test/CodeGen/AArch64/neon/subtraction.c +++ b/clang/test/CodeGen/AArch64/neon/subtraction.c @@ -12,7 +12,7 @@ // ACLE section headings based on v2025Q2 of the ACLE specification: // * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#subtract // -// TODO: Migrate Widening subtraction, Narrowing subtraction and Saturating subtract test cases. +// TODO: Migrate Narrowing subtraction and Saturating subtract test cases. // //============================================================================= @@ -268,7 +268,6 @@ uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { //===------------------------------------------------------===// // 2.1.1.5.3. Widening subtraction // https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#widening-subtraction -// TODO: Migrate the vsubl_high_* / vsubw_high_* intrinsics //===------------------------------------------------------===// // LLVM-LABEL: @test_vsubl_s8( @@ -462,3 +461,213 @@ uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { // LLVM: ret <2 x i64> [[SUB_I]] return vsubw_u32(a, b); } + +// LLVM-LABEL: @test_vsubl_high_s8( +// CIR-LABEL: @vsubl_high_s8( +int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<8 x !s16i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// LLVM: [[VMOVL0:%.*]] = sext <8 x i8> [[SHUFFLE0]] to <8 x i16> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// LLVM: [[VMOVL1:%.*]] = sext <8 x i8> [[SHUFFLE1]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubl_high_s8(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_s16( +// CIR-LABEL: @vsubl_high_s16( +int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<4 x !s32i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL0:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// LLVM: [[VMOVL1:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubl_high_s16(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_s32( +// CIR-LABEL: @vsubl_high_s32( +int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<2 x !s64i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL0:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> +// LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +// LLVM: [[VMOVL1:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubl_high_s32(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_u8( +// CIR-LABEL: @vsubl_high_u8( +uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<8 x !u16i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// LLVM: [[VMOVL0:%.*]] = zext <8 x i8> [[SHUFFLE0]] to <8 x i16> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// LLVM: [[VMOVL1:%.*]] = zext <8 x i8> [[SHUFFLE1]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubl_high_u8(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_u16( +// CIR-LABEL: @vsubl_high_u16( +uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<4 x !u32i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL0:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// LLVM: [[VMOVL1:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubl_high_u16(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_u32( +// CIR-LABEL: @vsubl_high_u32( +uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<2 x !u64i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL0:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> +// LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +// LLVM: [[VMOVL1:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubl_high_u32(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_s8( +// CIR-LABEL: @vsubw_high_s8( +int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<8 x !s16i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// LLVM: [[VMOVL_I:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubw_high_s8(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_s16( +// CIR-LABEL: @vsubw_high_s16( +int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubw_high_s16(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_s32( +// CIR-LABEL: @vsubw_high_s32( +int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubw_high_s32(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_u8( +// CIR-LABEL: @vsubw_high_u8( +uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<8 x !u16i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +// LLVM: [[VMOVL_I:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubw_high_u8(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_u16( +// CIR-LABEL: @vsubw_high_u16( +uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<4 x !u32i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubw_high_u16(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_u32( +// CIR-LABEL: @vsubw_high_u32( +uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<2 x !u64i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubw_high_u32(a, b); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
