llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: None (amilendra) <details> <summary>Changes</summary> Add the following new clang intrinsics based on the ACLE specification https://github.com/ARM-software/acle/pull/428 (Add alpha support for 9.7 data processing intrinsics) Multi-vector saturating rounding shift right narrow and interleave instructions - SQRSHRN - svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - UQRSHRN - svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - SQRSHRUN - svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t) Multi-vector saturating shift right narrow and interleave - SQSHRN - svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t) - UQSHRN - svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t) - SQSHRUN - svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t) - svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t) --- Patch is 70.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/186087.diff 10 Files Affected: - (modified) clang/include/clang/Basic/arm_sve.td (+18) - (added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c (+148) - (added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c (+271) - (added) clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c (+160) - (added) clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c (+161) - (added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp (+51) - (added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp (+87) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+7-1) - (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+9-9) - (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+8-6) ``````````diff diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..98838af2030ad 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; } +// +// Multi-vector saturating rounding shift right narrow and interleave +// +let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in { + def SVSQRSHRN_X2_2P3 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "s", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVUQRSHRN_X2_2P3 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Us", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVSQRSHRUN_X2_2P3 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "s", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>; def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>; @@ -2300,6 +2309,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in { def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>; } +// +// Multi-vector saturating shift right narrow and interleave +// +let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in { + def SVSQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "h2i", "is", MergeNone, "aarch64_sve_sqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVUQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "e2i", "UiUs", MergeNone, "aarch64_sve_uqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVSQSHRUN_X2 : SInst<"svqshrun[_n]_{0}[_{d}_x2]", "e2i", "is", MergeNone, "aarch64_sve_sqshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + // // Multi-vector saturating extract narrow // diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c new file mode 100644 index 0000000000000..4d7bf51d33913 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c @@ -0,0 +1,148 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_s8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_s8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svint8_t test_svqrshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2,)(zn, 8); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_u8_u16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_u8_u16_x212svuint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2,)(zn, 8); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrun_n_u8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z26test_svqrshrun_n_u8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2,)(zn, 8); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c new file mode 100644 index 0000000000000..60da63d609880 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c @@ -0,0 +1,271 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqshrn_n_s8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z24test_svqshrn_n_s8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/186087 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
