https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/91356
Scalable types are only available when: * The function is compiled with +sve * The function is compiled with +sme and the function is executed in Streaming-SVE mode. >From 1cc17981a612dcb31fba86c5d64c444f26a44d38 Mon Sep 17 00:00:00 2001 From: Sander de Smalen <sander.desma...@arm.com> Date: Fri, 3 May 2024 13:07:18 +0100 Subject: [PATCH] [Clang][AArch64] Require SVE or SSVE for scalable types. Scalable types are only available when: * The function is compiled with +sve * The function is compiled with +sme and the function is executed in Streaming-SVE mode. --- .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/lib/Sema/Sema.cpp | 11 ++- clang/lib/Sema/SemaDecl.cpp | 16 ++-- .../acle_sme2_reinterpret_svcount_svbool.c | 10 ++- .../aarch64-sve2-intrinsics/acle_sve2_revd.c | 78 ++++++++++--------- .../acle_sve2p1_bfadd.c | 18 +++-- .../acle_sve2p1_bfmax.c | 18 +++-- .../acle_sve2p1_bfmaxnm.c | 18 +++-- .../acle_sve2p1_bfmin.c | 18 +++-- .../acle_sve2p1_bfminnm.c | 18 +++-- .../acle_sve2p1_bfmla.c | 18 +++-- .../acle_sve2p1_bfmls.c | 18 +++-- .../acle_sve2p1_bfmul.c | 18 +++-- .../acle_sve2p1_bfsub.c | 18 +++-- .../acle_sve2p1_create2_bool.c | 20 ++--- .../acle_sve2p1_get4_bool.c | 24 +++--- .../acle_sve2p1_undef_bool.c | 10 ++- .../Sema/aarch64-sme2-sve2p1-diagnostics.c | 2 + clang/test/Sema/aarch64-sme2p1-diagnostics.c | 2 +- 19 files changed, 212 insertions(+), 125 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9a0bae9c216d..72326d4509cd 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3205,6 +3205,8 @@ def warn_attribute_arm_zt0_builtin_no_zt0_state : Warning< InGroup<DiagGroup<"undefined-arm-zt0">>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; +def err_sve_vector_in_non_streaming_function : Error< + "SVE vector type %0 cannot be used in a non-streaming function">; def err_attribute_riscv_rvv_bits_unsupported : Error< "%0 is only supported when '-mrvv-vector-bits=<bits>' is specified with a " "value of \"zvl\" or a power 2 in the range [64,65536]">; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index a1e32d391ed0..92f859b7146c 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2056,9 +2056,14 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) { llvm::StringMap<bool> CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); - if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap) && - !Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) - Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; + if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) { + if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) + Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; + else if (!IsArmStreamingFunction(FD, /*IncludeLocallyStreaming=*/true)) { + Diag(D->getLocation(), diag::err_sve_vector_in_non_streaming_function) + << Ty; + } + } } }; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 590f37837eb2..0d11a2acf256 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8982,11 +8982,17 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { const FunctionDecl *FD = cast<FunctionDecl>(CurContext); llvm::StringMap<bool> CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); - if (!Builtin::evaluateRequiredTargetFeatures( - "sve", CallerFeatureMap)) { - Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T; - NewVD->setInvalidDecl(); - return; + + if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) { + if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) { + Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T; + NewVD->setInvalidDecl(); + } else if (!IsArmStreamingFunction(FD, /*IncludeLocallyStreaming=*/true)) { + Diag(NewVD->getLocation(), + diag::err_sve_vector_in_non_streaming_function) + << T; + NewVD->setInvalidDecl(); + } } } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c index b3d5f4a4c4a5..c225c5c6c669 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c @@ -9,6 +9,12 @@ #include <arm_sme.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin.ยง #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[CNT:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]] // -svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) __arm_streaming_compatible +svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret,_b,,)(cnt); } @@ -41,7 +47,7 @@ svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) __arm_streaming_compatib // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP0]] // -svcount_t test_svreinterpret_svcnt_svbool(svbool_t pg) __arm_streaming_compatible +svcount_t test_svreinterpret_svcnt_svbool(svbool_t pg) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret,_c,,)(pg); } diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c index 128a7eb102da..d2a4e1669a1c 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c @@ -12,6 +12,12 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 @@ -29,7 +35,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // -svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) { +svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s8, _z, )(pg, op); } @@ -45,7 +51,7 @@ svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // -svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) { +svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s16, _z, )(pg, op); } @@ -61,7 +67,7 @@ svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // -svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) { +svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s32, _z, )(pg, op); } @@ -77,7 +83,7 @@ svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // -svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) { +svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s64, _z, )(pg, op); } @@ -91,7 +97,7 @@ svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // -svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) { +svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u8, _z, )(pg, op); } // CHECK-LABEL: @test_svrevd_u16_z( @@ -106,7 +112,7 @@ svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // -svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) { +svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u16, _z, )(pg, op); } @@ -122,7 +128,7 @@ svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // -svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) { +svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u32, _z, )(pg, op); } @@ -138,7 +144,7 @@ svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // -svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) { +svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u64, _z, )(pg, op); } @@ -152,7 +158,7 @@ svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // -svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) { +svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s8, _m, )(inactive, pg, op); } @@ -168,7 +174,7 @@ svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // -svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) { +svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s16, _m, )(inactive, pg, op); } @@ -184,7 +190,7 @@ svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // -svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) { +svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s32, _m, )(inactive, pg, op); } @@ -200,7 +206,7 @@ svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // -svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) { +svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s64, _m, )(inactive, pg, op); } @@ -214,7 +220,7 @@ svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // -svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { +svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u8, _m, )(inactive, pg, op); } @@ -230,7 +236,7 @@ svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // -svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { +svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u16, _m, )(inactive, pg, op); } @@ -246,7 +252,7 @@ svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // -svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { +svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u32, _m, )(inactive, pg, op); } @@ -262,7 +268,7 @@ svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // -svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { +svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u64, _m, )(inactive, pg, op); } @@ -276,7 +282,7 @@ svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // -svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) { +svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s8, _x, )(pg, op); } @@ -292,7 +298,7 @@ svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // -svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) { +svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s16, _x, )(pg, op); } @@ -308,7 +314,7 @@ svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // -svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) { +svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s32, _x, )(pg, op); } @@ -324,7 +330,7 @@ svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // -svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) { +svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _s64, _x, )(pg, op); } @@ -338,7 +344,7 @@ svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // -svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) { +svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u8, _x, )(pg, op); } @@ -354,7 +360,7 @@ svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // -svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) { +svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u16, _x, )(pg, op); } @@ -370,7 +376,7 @@ svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // -svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) { +svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u32, _x, )(pg, op); } @@ -386,7 +392,7 @@ svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // -svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) { +svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _u64, _x, )(pg, op); } @@ -403,7 +409,7 @@ svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.revd.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) { +svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _bf16, _z, )(pg, op); } @@ -419,7 +425,7 @@ svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.revd.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] // -svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) { +svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f16, _z, )(pg, op); } @@ -435,7 +441,7 @@ svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.revd.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] // -svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) { +svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f32, _z, )(pg, op); } @@ -451,7 +457,7 @@ svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.revd.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] // -svfloat64_t test_svrevd_f64_z(svbool_t pg, svfloat64_t op) { +svfloat64_t test_svrevd_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f64, _z, )(pg, op); } @@ -467,7 +473,7 @@ svfloat64_t test_svrevd_f64_z(svbool_t pg, svfloat64_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.revd.nxv8bf16(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svrevd_bf16_m(svbfloat16_t inactive, svbool_t pg, svbfloat16_t op) { +svbfloat16_t test_svrevd_bf16_m(svbfloat16_t inactive, svbool_t pg, svbfloat16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _bf16, _m, )(inactive, pg, op); } @@ -483,7 +489,7 @@ svbfloat16_t test_svrevd_bf16_m(svbfloat16_t inactive, svbool_t pg, svbfloat16_t // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.revd.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] // -svfloat16_t test_svrevd_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { +svfloat16_t test_svrevd_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f16, _m, )(inactive, pg, op); } @@ -499,7 +505,7 @@ svfloat16_t test_svrevd_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.revd.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] // -svfloat32_t test_svrevd_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { +svfloat32_t test_svrevd_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f32, _m, )(inactive, pg, op); } @@ -515,7 +521,7 @@ svfloat32_t test_svrevd_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.revd.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] // -svfloat64_t test_svrevd_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { +svfloat64_t test_svrevd_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f64, _m, )(inactive, pg, op); } @@ -531,7 +537,7 @@ svfloat64_t test_svrevd_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.revd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svrevd_bf16_x(svbool_t pg, svbfloat16_t op) { +svbfloat16_t test_svrevd_bf16_x(svbool_t pg, svbfloat16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _bf16, _x, )(pg, op); } @@ -547,7 +553,7 @@ svbfloat16_t test_svrevd_bf16_x(svbool_t pg, svbfloat16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.revd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] // -svfloat16_t test_svrevd_f16_x(svbool_t pg, svfloat16_t op) { +svfloat16_t test_svrevd_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f16, _x, )(pg, op); } @@ -563,7 +569,7 @@ svfloat16_t test_svrevd_f16_x(svbool_t pg, svfloat16_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.revd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] // -svfloat32_t test_svrevd_f32_x(svbool_t pg, svfloat32_t op) { +svfloat32_t test_svrevd_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f32, _x, )(pg, op); } @@ -579,6 +585,6 @@ svfloat32_t test_svrevd_f32_x(svbool_t pg, svfloat32_t op) { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.revd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] // -svfloat64_t test_svrevd_f64_x(svbool_t pg, svfloat64_t op) { +svfloat64_t test_svrevd_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svrevd, _f64, _x, )(pg, op); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c index 9d01ba773694..61b088b08338 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svadd, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svadd, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svadd, _bf16, _x)(pg, op1, op2); } @@ -84,7 +90,7 @@ svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svadd, _n_bf16, _m)(pg, op1, op2); } @@ -107,7 +113,7 @@ svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svadd, _n_bf16, _z)(pg, op1, op2); } @@ -128,7 +134,7 @@ svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svadd, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c index ce28a0b0653d..36d20bdf745d 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmax, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmax, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmax, _bf16, _x)(pg, op1, op2); } @@ -85,7 +91,7 @@ svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmax, _n_bf16, _m)(pg, op1, op2); } @@ -108,7 +114,7 @@ svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmax, _n_bf16, _z)(pg, op1, op2); } @@ -129,7 +135,7 @@ svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmax_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmax_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmax, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c index d8fec8c5145e..d89cd777de1e 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmaxnm, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmaxnm, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmaxnm, _bf16, _x)(pg, op1, op2); } @@ -85,7 +91,7 @@ svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _m)(pg, op1, op2); } @@ -108,7 +114,7 @@ svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _z)(pg, op1, op2); } @@ -129,7 +135,7 @@ svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmaxnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmaxnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c index 5efaa37a1464..8bc88572ebc5 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmin, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmin, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmin, _bf16, _x)(pg, op1, op2); } @@ -85,7 +91,7 @@ svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmin, _n_bf16, _m)(pg, op1, op2); } @@ -108,7 +114,7 @@ svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmin, _n_bf16, _z)(pg, op1, op2); } @@ -129,7 +135,7 @@ svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmin_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmin_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmin, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c index 98fd12b3a839..d90b38262624 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svminnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svminnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svminnm, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svminnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svminnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svminnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svminnm, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svminnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svminnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svminnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svminnm, _bf16, _x)(pg, op1, op2); } @@ -85,7 +91,7 @@ svbfloat16_t test_svminnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svminnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svminnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svminnm, _n_bf16, _m)(pg, op1, op2); } @@ -108,7 +114,7 @@ svbfloat16_t test_svminnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svminnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svminnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svminnm, _n_bf16, _z)(pg, op1, op2); } @@ -129,7 +135,7 @@ svbfloat16_t test_svminnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svminnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svminnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svminnm, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c index 0a70466a540d..63c7e3e52ce1 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmla_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmla_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmla, _bf16, _m)(pg, op1, op2, op3); } @@ -46,7 +52,7 @@ svbfloat16_t test_svmla_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmla_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmla_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmla, _bf16, _z)(pg, op1, op2, op3); } @@ -63,7 +69,7 @@ svbfloat16_t test_svmla_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmla_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmla_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmla, _bf16, _x)(pg, op1, op2, op3); } @@ -84,7 +90,7 @@ svbfloat16_t test_svmla_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmla_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmla_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmla, _n_bf16, _m)(pg, op1, op2, op3); } @@ -107,7 +113,7 @@ svbfloat16_t test_svmla_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmla_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmla_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmla, _n_bf16, _z)(pg, op1, op2, op3); } @@ -128,7 +134,7 @@ svbfloat16_t test_svmla_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmla_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmla_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmla, _n_bf16, _x)(pg, op1, op2, op3); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c index ed71d4e490bb..4898a8765f38 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmls_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmls_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmls, _bf16, _m)(pg, op1, op2, op3); } @@ -46,7 +52,7 @@ svbfloat16_t test_svmls_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmls_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmls_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmls, _bf16, _z)(pg, op1, op2, op3); } @@ -63,7 +69,7 @@ svbfloat16_t test_svmls_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmls_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmls_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmls, _bf16, _x)(pg, op1, op2, op3); } @@ -84,7 +90,7 @@ svbfloat16_t test_svmls_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmls_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmls_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmls, _n_bf16, _m)(pg, op1, op2, op3); } @@ -107,7 +113,7 @@ svbfloat16_t test_svmls_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmls_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmls_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmls, _n_bf16, _z)(pg, op1, op2, op3); } @@ -128,7 +134,7 @@ svbfloat16_t test_svmls_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmls_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible +svbfloat16_t test_svmls_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) MODE_ATTR { return SVE_ACLE_FUNC(svmls, _n_bf16, _x)(pg, op1, op2, op3); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c index f0a3664426de..904d74303f74 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmul_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmul_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmul, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svmul_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmul_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmul_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmul, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svmul_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmul_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmul_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmul, _bf16, _x)(pg, op1, op2); } @@ -85,7 +91,7 @@ svbfloat16_t test_svmul_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmul_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmul_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmul, _n_bf16, _m)(pg, op1, op2); } @@ -108,7 +114,7 @@ svbfloat16_t test_svmul_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svmul_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmul_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmul, _n_bf16, _z)(pg, op1, op2); } @@ -129,7 +135,7 @@ svbfloat16_t test_svmul_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svmul_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svmul_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svmul, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c index 6f08ea84ab82..82199f647b60 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c @@ -8,6 +8,12 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 @@ -27,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svsub_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svsub_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsub, _bf16, _m)(pg, op1, op2); } @@ -46,7 +52,7 @@ svbfloat16_t test_svsub_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svsub_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svsub_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsub, _bf16, _z)(pg, op1, op2); } @@ -63,7 +69,7 @@ svbfloat16_t test_svsub_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svsub_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svsub_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsub, _bf16, _x)(pg, op1, op2); } @@ -85,7 +91,7 @@ svbfloat16_t test_svsub_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svsub_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svsub_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsub, _n_bf16, _m)(pg, op1, op2); } @@ -108,7 +114,7 @@ svbfloat16_t test_svsub_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] // -svbfloat16_t test_svsub_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svsub_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsub, _n_bf16, _z)(pg, op1, op2); } @@ -129,7 +135,7 @@ svbfloat16_t test_svsub_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] // -svbfloat16_t test_svsub_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible +svbfloat16_t test_svsub_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsub, _n_bf16, _x)(pg, op1, op2); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c index d441c934bad0..b5eb220d7776 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c @@ -5,19 +5,25 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DTEST_SME -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64 -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 @@ -25,12 +31,6 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif -#ifndef TEST_SME2 -#define ATTR -#else -#define ATTR __arm_streaming -#endif - // CHECK-LABEL: @test_svcreate2_b( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> [[X0:%.*]], i64 0) @@ -43,7 +43,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> [[TMP0]], <vscale x 16 x i1> [[X1:%.*]], i64 16) // CPP-CHECK-NEXT: ret <vscale x 32 x i1> [[TMP1]] // -svboolx2_t test_svcreate2_b(svbool_t x0, svbool_t x1) ATTR +svboolx2_t test_svcreate2_b(svbool_t x0, svbool_t x1) MODE_ATTR { return SVE_ACLE_FUNC(svcreate2,_b,,)(x0, x1); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c index e5016d6cb3dc..e3530c142c83 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c @@ -5,19 +5,25 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64 -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 @@ -25,12 +31,6 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif -#ifndef TEST_SME -#define ATTR -#else -#define ATTR __arm_streaming -#endif - // NOTE: For these tests clang converts the struct parameter into // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_b_0( @@ -43,7 +43,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]] // -svbool_t test_svget4_b_0(svboolx4_t tuple) ATTR +svbool_t test_svget4_b_0(svboolx4_t tuple) MODE_ATTR { return SVE_ACLE_FUNC(svget4,_b,,)(tuple, 0); } @@ -60,7 +60,7 @@ svbool_t test_svget4_b_0(svboolx4_t tuple) ATTR // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]] // -svbool_t test_svget4_b_1(svboolx4_t tuple) ATTR +svbool_t test_svget4_b_1(svboolx4_t tuple) MODE_ATTR { return SVE_ACLE_FUNC(svget4,_b,,)(tuple, 1); } @@ -77,7 +77,7 @@ svbool_t test_svget4_b_1(svboolx4_t tuple) ATTR // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[TUPLE:%.*]], i64 48) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]] // -svbool_t test_svget4_b_3(svboolx4_t tuple) ATTR +svbool_t test_svget4_b_3(svboolx4_t tuple) MODE_ATTR { return SVE_ACLE_FUNC(svget4,_b,,)(tuple, 3); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c index 5197e41c1ffd..01363681dad2 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c @@ -9,6 +9,12 @@ #include <arm_sve.h> +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR __arm_streaming_compatible +#endif + // CHECK-LABEL: define dso_local <vscale x 32 x i1> @test_svundef2_b( // CPP-CHECK-LABEL: define dso_local <vscale x 32 x i1> @_Z15test_svundef2_bv( // @@ -18,7 +24,7 @@ // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: ret <vscale x 32 x i1> undef // -svboolx2_t test_svundef2_b() { +svboolx2_t test_svundef2_b(void) MODE_ATTR { return svundef2_b(); } @@ -31,6 +37,6 @@ svboolx2_t test_svundef2_b() { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: ret <vscale x 64 x i1> undef // -svboolx4_t test_svundef4_b() { +svboolx4_t test_svundef4_b(void) MODE_ATTR { return svundef4_b(); } diff --git a/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c b/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c index 4debc14190aa..2012221b4804 100644 --- a/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c +++ b/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c @@ -6,6 +6,8 @@ //svldnt1: +// expected-error@+3 {{SVE vector type 'svcount_t' (aka '__SVCount_t') cannot be used in a non-streaming function}} +// expected-error@+2 {{SVE vector type 'svuint8x2_t' (aka '__clang_svuint8x2_t') cannot be used in a non-streaming function}} __attribute__((target("+sme2"))) svuint8x2_t sme2_or_sve2p1_intrinsic_test_sme2_invalid(svcount_t png, const uint8_t *rn) { // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} diff --git a/clang/test/Sema/aarch64-sme2p1-diagnostics.c b/clang/test/Sema/aarch64-sme2p1-diagnostics.c index a0adb0403858..c3c8b4ede305 100644 --- a/clang/test/Sema/aarch64-sme2p1-diagnostics.c +++ b/clang/test/Sema/aarch64-sme2p1-diagnostics.c @@ -3,7 +3,7 @@ // REQUIRES: aarch64-registered-target #include "arm_sme.h" -svuint8x2_t test_sme2p1(svuint8x2_t x) { +svuint8x2_t test_sme2p1(svuint8x2_t x) __arm_streaming { // expected-no-diagnostics return x; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits