https://github.com/amilendra created https://github.com/llvm/llvm-project/pull/186087
Add the following new clang intrinsics based on the ACLE specification https://github.com/ARM-software/acle/pull/428 (Add alpha support for 9.7 data processing intrinsics) Multi-vector saturating rounding shift right narrow and interleave instructions - SQRSHRN - svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - UQRSHRN - svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - SQRSHRUN - svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t) Multi-vector saturating shift right narrow and interleave - SQSHRN - svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t) - UQSHRN - svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t) - SQSHRUN - svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t) - svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t) >From f1a68c962930cba7a6b7e4653757cc16f4280abd Mon Sep 17 00:00:00 2001 From: Amilendra Kodithuwakku <[email protected]> Date: Tue, 10 Mar 2026 12:34:35 +0000 Subject: [PATCH] [clang][AArch64][SVE2p3][SME2p3] Add intrinsics for v9.7a shift operations Add the following new clang intrinsics based on the ACLE specification https://github.com/ARM-software/acle/pull/428 (Add alpha support for 9.7 data processing intrinsics) Multi-vector saturating rounding shift right narrow and interleave instructions - SQRSHRN - svint8_t svqrshrn_s8(svint16x2_t, uint64_t) / svint8_t svqrshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - UQRSHRN - svuint8_t svqrshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqrshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - SQRSHRUN - svuint8_t svqrshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqrshrun_n_u8_s16_x2(svint16x2_t, uint64_t) Multi-vector saturating shift right narrow and interleave - SQSHRN - svint8_t svqshrn_s8(svint16x2_t, uint64_t) / svint8_t svqshrn_n_s8_s16_x2(svint16x2_t, uint64_t) - svint16_t svqshrn_s16(svint32x2_t, uint64_t) / svint16_t svqshrn_n_s16_s32_x2(svint32x2_t, uint64_t) - UQSHRN - svuint8_t svqshrn_u8(svuint16x2_t, uint64_t) / svuint8_t svqshrn_n_u8_u16_x2(svuint16x2_t, uint64_t) - svuint16_t svqshrn_u16(svuint32x2_t, uint64_t) / svuint16_t svqshrn_n_u16_u32_x2(svuint32x2_t, uint64_t) - SQSHRUN - svuint8_t svqshrun_u8(svint16x2_t, uint64_t) / svuint8_t svqshrun_n_u8_s16_x2(svint16x2_t, uint64_t) - svuint16_t svqshrun_u16(svint32x2_t, uint64_t) / svuint16_t svqshrun_n_u16_s32_x2(svint32x2_t, uint64_t) --- clang/include/clang/Basic/arm_sve.td | 18 ++ .../sve2p3-intrinsics/acle_sve2p3_qrshr.c | 148 ++++++++++ .../sve2p3-intrinsics/acle_sve2p3_qshr.c | 271 ++++++++++++++++++ ...sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c | 160 +++++++++++ ...ependent_sve_AND_sve2p3___sme_AND_sme2p3.c | 161 +++++++++++ .../aarch64-sve2p3-intrinsics/acle_sve2p3.cpp | 51 ++++ .../acle_sve2p3_imm.cpp | 87 ++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 8 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 14 +- 10 files changed, 920 insertions(+), 16 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c create mode 100644 clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp create mode 100644 clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..98838af2030ad 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2221,6 +2221,15 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; } +// +// Multi-vector saturating rounding shift right narrow and interleave +// +let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in { + def SVSQRSHRN_X2_2P3 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "s", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVUQRSHRN_X2_2P3 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Us", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVSQRSHRUN_X2_2P3 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "s", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>; def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>; @@ -2300,6 +2309,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in { def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>; } +// +// Multi-vector saturating shift right narrow and interleave +// +let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in { + def SVSQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "h2i", "is", MergeNone, "aarch64_sve_sqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVUQSHRN_X2 : SInst<"svqshrn[_n]_{0}[_{d}_x2]", "e2i", "UiUs", MergeNone, "aarch64_sve_uqshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + def SVSQSHRUN_X2 : SInst<"svqshrun[_n]_{0}[_{d}_x2]", "e2i", "is", MergeNone, "aarch64_sve_sqshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + // // Multi-vector saturating extract narrow // diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c new file mode 100644 index 0000000000000..4d7bf51d33913 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qrshr.c @@ -0,0 +1,148 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_s8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_s8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svint8_t test_svqrshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2,)(zn, 8); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrn_n_u8_u16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqrshrn_n_u8_u16_x212svuint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2,)(zn, 8); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqrshrun_n_u8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z26test_svqrshrun_n_u8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2,)(zn, 8); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c new file mode 100644 index 0000000000000..60da63d609880 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_qshr.c @@ -0,0 +1,271 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqshrn_n_s8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z24test_svqshrn_n_s8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svint8_t test_svqshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_s8,_s16_x2,)(zn, 8); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svqshrn_n_s16_s32_x2( +// CHECK-SAME: <vscale x 4 x i32> [[ZN_COERCE0:%.*]], <vscale x 4 x i32> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrn.x2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i32 16) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z25test_svqshrn_n_s16_s32_x211svint32x2_tm( +// CPP-CHECK-SAME: <vscale x 4 x i32> [[ZN_COERCE0:%.*]], <vscale x 4 x i32> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrn.x2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i32 16) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] +// +svint16_t test_svqshrn_n_s16_s32_x2(svint32x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_s16,_s32_x2,)(zn, 16); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqshrn_n_u8_u16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z24test_svqshrn_n_u8_u16_x212svuint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrn.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svuint8_t test_svqshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_u8,_u16_x2,)(zn, 8); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svqshrn_n_u16_u32_x2( +// CHECK-SAME: <vscale x 4 x i32> [[ZN_COERCE0:%.*]], <vscale x 4 x i32> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrn.x2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i32 16) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z25test_svqshrn_n_u16_u32_x212svuint32x2_tm( +// CPP-CHECK-SAME: <vscale x 4 x i32> [[ZN_COERCE0:%.*]], <vscale x 4 x i32> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrn.x2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i32 16) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] +// +svuint16_t test_svqshrn_n_u16_u32_x2(svuint32x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqshrn,_n,_u16,_u32_x2,)(zn, 16); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svqshrun_n_u16_s32_x2( +// CHECK-SAME: <vscale x 4 x i32> [[ZN_COERCE0:%.*]], <vscale x 4 x i32> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrun.x2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i32 16) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z26test_svqshrun_n_u16_s32_x211svint32x2_tm( +// CPP-CHECK-SAME: <vscale x 4 x i32> [[ZN_COERCE0:%.*]], <vscale x 4 x i32> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } poison, <vscale x 4 x i32> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], <vscale x 4 x i32> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 4 x i32>, <vscale x 4 x i32> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 4 x i32>, <vscale x 4 x i32> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrun.x2.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i32 16) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]] +// +svuint16_t test_svqshrun_n_u16_s32_x2(svint32x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqshrun,_n,_u16,_s32_x2,)(zn, 16); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svqshrun_n_u8_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svqshrun_n_u8_s16_x211svint16x2_tm( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], i64 noundef [[IMM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[ZN:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[ZN_ADDR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CPP-CHECK-NEXT: [[IMM_ADDR:%.*]] = alloca i64, align 8 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } poison, <vscale x 8 x i16> [[ZN_COERCE0]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], <vscale x 8 x i16> [[ZN_COERCE1]], 1 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: [[ZN1:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN]], align 16 +// CPP-CHECK-NEXT: store { <vscale x 8 x i16>, <vscale x 8 x i16> } [[ZN1]], ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: store i64 [[IMM]], ptr [[IMM_ADDR]], align 8 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = load { <vscale x 8 x i16>, <vscale x 8 x i16> }, ptr [[ZN_ADDR]], align 16 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrun.x2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i32 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP5]] +// +svuint8_t test_svqshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) ATTR +{ + return SVE_ACLE_FUNC(svqshrun,_n,_u8,_s16_x2,)(zn, 8); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c new file mode 100644 index 0000000000000..9c31ebde4f7f8 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c @@ -0,0 +1,160 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve-aes2 -verify=guard +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +ssve-aes -target-feature +sve -target-feature +sve-aes2 -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,sve-aes2" streaming_guard="sme,sve-aes2,ssve-aes" flags="feature-dependent" + +void test(void) { + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x4_t svuint8x4_t_val; + svuint64_t svuint64_t_val; + svuint64x2_t svuint64x2_t_val; + uint64_t uint64_t_val; + + svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svaese_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaese_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + svpmull_pair(svuint64_t_val, svuint64_t_val); + svpmull_pair(svuint64_t_val, uint64_t_val); + svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val); + svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val); +} + +void test_streaming(void) __arm_streaming{ + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x4_t svuint8x4_t_val; + svuint64_t svuint64_t_val; + svuint64x2_t svuint64x2_t_val; + uint64_t uint64_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x4_t svuint8x4_t_val; + svuint64_t svuint64_t_val; + svuint64x2_t svuint64x2_t_val; + uint64_t uint64_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c new file mode 100644 index 0000000000000..145d47be283e2 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_sme2p3.c @@ -0,0 +1,161 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify=guard +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify=streaming-guard +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -target-feature +sve2p3 -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,sve2p3" streaming_guard="sme,sme2p3" flags="feature-dependent" + +void test(void) { + svint16x2_t svint16x2_t_val; + svint32x2_t svint32x2_t_val; + svuint16x2_t svuint16x2_t_val; + svuint32x2_t svuint32x2_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_n_s8_s16_x2(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_s8(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_u8(svuint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrun_n_u8_s16_x2(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrun_u8(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_s8_s16_x2(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_s16_s32_x2(svint32x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_u16_u32_x2(svuint32x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_s8(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_s16(svint32x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_u8(svuint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_u16(svuint32x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_n_u8_s16_x2(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_n_u16_s32_x2(svint32x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_u8(svint16x2_t_val, 2); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_u16(svint32x2_t_val, 2); +} + +void test_streaming(void) __arm_streaming{ + svint16x2_t svint16x2_t_val; + svint32x2_t svint32x2_t_val; + svuint16x2_t svuint16x2_t_val; + svuint32x2_t svuint32x2_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqrshrn_n_s8_s16_x2(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqrshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqrshrn_s8(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqrshrn_u8(svuint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqrshrun_n_u8_s16_x2(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqrshrun_u8(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_n_s8_s16_x2(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_n_s16_s32_x2(svint32x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_n_u16_u32_x2(svuint32x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_s8(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_s16(svint32x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_u8(svuint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrn_u16(svuint32x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrun_n_u8_s16_x2(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrun_n_u16_s32_x2(svint32x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrun_u8(svint16x2_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svqshrun_u16(svint32x2_t_val, 2); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svint16x2_t svint16x2_t_val; + svint32x2_t svint32x2_t_val; + svuint16x2_t svuint16x2_t_val; + svuint32x2_t svuint32x2_t_val; + + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_n_s8_s16_x2(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_s8(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrn_u8(svuint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrun_n_u8_s16_x2(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqrshrun_u8(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_s8_s16_x2(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_s16_s32_x2(svint32x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_u8_u16_x2(svuint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_n_u16_u32_x2(svuint32x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_s8(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_s16(svint32x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_u8(svuint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrn_u16(svuint32x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_n_u8_s16_x2(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_n_u16_s32_x2(svint32x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_u8(svint16x2_t_val, 2); + // guard-error@+2 {{builtin can only be called from a non-streaming function}} + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svqshrun_u16(svint32x2_t_val, 2); +} diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp new file mode 100644 index 0000000000000..6300b0ebd2ab5 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3.cpp @@ -0,0 +1,51 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -verify=overload -verify-ignore-unexpected=error,note -emit-llvm -o - %s +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +void test(svint32x2_t s32x2, svint16x2_t s16x2, svint8x2_t s8x2, svuint32x2_t u32x2, svuint16x2_t u16x2, svuint8x2_t u8x2) +{ + // expected-error@+2 {{'svqshrn_n_s8_s16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqshrn_s8' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqshrn,_n,_s8,_s16_x2,)(s16x2, 8); + + // expected-error@+2 {{'svqshrn_n_s16_s32_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqshrn_s16' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqshrn,_n,_s16,_s32_x2,)(s32x2, 16); + + // expected-error@+2 {{'svqshrn_n_u8_u16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqshrn_u8' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqshrn,_n,_u8,_u16_x2,)(u16x2, 8); + + // expected-error@+2 {{'svqshrn_n_u16_u32_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqshrn_u16' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqshrn,_n,_u16,_u32_x2,)(u32x2, 16); + + // expected-error@+2 {{'svqshrun_n_u16_s32_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqshrun_u16' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqshrun,_n,_u16,_s32_x2,)(s32x2, 16); + + // expected-error@+2 {{'svqshrun_n_u8_s16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqshrun_u8' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqshrun,_n,_u8,_s16_x2,)(s16x2, 8); + + // expected-error@+2 {{'svqrshrn_n_s8_s16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqrshrn_s8' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s16_x2,)(s16x2, 8); + + // expected-error@+2 {{'svqrshrn_n_u8_u16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqrshrn_u8' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u16_x2,)(u16x2, 8); + + // expected-error@+2 {{'svqrshrun_n_u8_s16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + // overload-error@+1 {{'svqrshrun_u8' needs target feature (sve,sve2p3)|(sme,sme2p3)}} + SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s16_x2,)(s16x2, 8); +} diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp new file mode 100644 index 0000000000000..e0d21aed716d2 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp @@ -0,0 +1,87 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s + +#include <arm_sve.h> + + + +svint8_t test_svqshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) +{ + svqshrn_n_s8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_s8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_s8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqshrn_n_s8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svint16_t test_svqshrn_n_s16_s32_x2(svint32x2_t zn, uint64_t imm) +{ + svqshrn_n_s16_s32_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_s16_s32_x2(zn, 17); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_s16_s32_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + + svqshrn_n_s16_s32_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) +{ + svqshrn_n_u8_u16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_u8_u16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrn_n_u8_u16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqshrn_n_u8_u16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint16_t test_svqshrn_n_u16_u32_x2(svuint32x2_t zn, uint64_t imm) +{ + svqshrn_n_u16_u32_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_u16_u32_x2(zn, 17); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrn_n_u16_u32_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + + svqshrn_n_u16_u32_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint16_t test_svqshrun_n_u16_s32_x2(svint32x2_t zn, uint64_t imm) +{ + svqshrun_n_u16_s32_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrun_n_u16_s32_x2(zn, 17); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + svqshrun_n_u16_s32_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 16]}} + + svqshrun_n_u16_s32_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) +{ + svqshrun_n_u8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrun_n_u8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqshrun_n_u8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +void test_svqrshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) { + svqrshrn_n_s8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_s8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_s8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqrshrn_n_s8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqrshrn_n_u8_u16_x2(svuint16x2_t zn, uint64_t imm) +{ + svqrshrn_n_u8_u16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_u8_u16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrn_n_u8_u16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqrshrn_n_u8_u16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} + +svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) +{ + svqrshrun_n_u8_s16_x2(zn, 0); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrun_n_u8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + svqrshrun_n_u8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} + + svqrshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 75929cbc222ad..f977acb71223b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3778,6 +3778,13 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_uqcvtn_x4 : SVE2_CVT_VG4_SINGLE_Intrinsic; def int_aarch64_sve_sqcvtun_x4 : SVE2_CVT_VG4_SINGLE_Intrinsic; + // + // Multi-vector saturating shift right narrow and interleave + // + def int_aarch64_sve_sqshrn_x2 : SVE2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqshrn_x2 : SVE2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqshrun_x2 : SVE2_VG2_Multi_Imm_Intrinsic; + // // Multi-Single add/sub // @@ -4293,4 +4300,3 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; } - diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 4e67d067f4e10..ed00ca55ef606 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4731,15 +4731,15 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; // SVE2 saturating shift right narrow by immediate and interleave - defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>; - defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001>; - defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000>; - defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100>; - defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111>; - defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010>; - defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, null_frag>; - defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, null_frag>; - defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, null_frag>; + defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; + defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>; + defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000, int_aarch64_sve_sqshrn_x2>; + defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100, int_aarch64_sve_sqshrun_x2>; + defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>; + defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010, int_aarch64_sve_uqshrn_x2>; + defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, int_aarch64_sve_sqshrun_x2>; + defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, int_aarch64_sve_sqshrn_x2>; + defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, int_aarch64_sve_uqshrn_x2>; defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">; } // End HasSME2p3orSVE2p3 diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index af1c8676a99ce..eedd445f0eb95 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -720,7 +720,7 @@ class SVE_Shift_Add_All_Active_Pat<ValueType vtd, SDPatternOperator op, ValueTyp : Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))), (inst $Op1, $Op2, $Op3)>; -class SVE2p1_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> +class SVE_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; @@ -10104,7 +10104,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte } // SVE2 multi-vec shift narrow -class sve2p1_multi_vec_shift_narrow<string mnemonic, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, +class sve_multi_vec_shift_narrow<string mnemonic, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, Operand immtype, bits<3> opc, bits<2> tsz> : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn, immtype:$imm), mnemonic, "\t$Zd, $Zn, $imm", @@ -10128,17 +10128,19 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, ZPRRegOp ZdRC, RegisterOper } multiclass sve_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { - def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR16, ZZ_s_mul_r, vecshiftR16, opc, 0b01> { + def NAME : sve_multi_vec_shift_narrow<mnemonic, ZPR16, ZZ_s_mul_r, vecshiftR16, opc, 0b01> { let Inst{19} = imm{3}; // imm4 } - def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; + def : SVE_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } -multiclass sve_multi_vec_round_shift_narrow<string mnemonic, bits<3> opc> { - def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR8, ZZ_h_mul_r, vecshiftR8, opc, 0b00> { +multiclass sve_multi_vec_round_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { + def NAME : sve_multi_vec_shift_narrow<mnemonic, ZPR8, ZZ_h_mul_r, vecshiftR8, opc, 0b00> { let Inst{19} = 0b1; // always 1 for imm3 version } + + def : SVE_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv16i8, nxv8i16, vecshiftR8>; } // SME2 multi-vec contiguous load (scalar plus scalar, two registers) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
