llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Sam Tebbs (SamTebbs33) <details> <summary>Changes</summary> This patch adds a warning that's emitted when a builtin call uses ZA state but the calling function doesn't provide any. Patch by David Sherwood <david.sherwood@<!-- -->arm.com>. --- Patch is 186.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75805.diff 24 Files Affected: - (modified) clang/include/clang/Basic/CMakeLists.txt (+3) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+3) - (modified) clang/lib/Sema/SemaChecking.cpp (+24) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c (+8-8) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c (+8-8) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c (+10-10) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c (+10-10) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c (+5-5) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c (+7-7) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c (+5-5) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c (+7-7) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c (+5-5) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c (+96-96) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c (+10-10) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c (+10-10) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c (+5-5) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c (+96-96) - (modified) clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c (+4-4) - (modified) clang/test/Sema/aarch64-incompat-sm-builtin-calls.c (+5) - (modified) clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp (+7-7) - (modified) clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c (+4-4) - (modified) clang/utils/TableGen/SveEmitter.cpp (+41) - (modified) clang/utils/TableGen/TableGen.cpp (+6) - (modified) clang/utils/TableGen/TableGenBackends.h (+1) ``````````diff diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 73fd521aeeec31..28baa2e45e423e 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -103,6 +103,9 @@ clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs SOURCE arm_sme.td TARGET ClangARMSmeStreamingAttrs) +clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltinsZAState) clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def SOURCE arm_cde.td TARGET ClangARMCdeBuiltinsDef) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c8e32a63684f28..0d8da213088d98 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3155,6 +3155,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error< def warn_attribute_arm_sm_incompat_builtin : Warning< "builtin call has undefined behaviour when called from a %0 function">, InGroup<DiagGroup<"undefined-arm-streaming">>; +def warn_attribute_arm_za_builtin_no_za_state : Warning< + "builtin call is not valid when calling from a function without active ZA state">, + InGroup<DiagGroup<"undefined-arm-za">>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 49be88051e6543..42e29e43093789 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3174,6 +3174,25 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, } } +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr<ArmNewZAAttr>()) + return true; + if (const auto *T = FD->getType()->getAs<FunctionProtoType>()) + if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: + return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (const FunctionDecl *FD = getCurFunctionDecl()) { std::optional<ArmStreamingType> BuiltinType; @@ -3186,6 +3205,11 @@ bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinType) checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + + if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_za_builtin_no_za_state) + << TheCall->getSourceRange(); } // Range check SME intrinsics that take immediate values. diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c index ee6c1c9dd566b5..55d2e355897f72 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn); } @@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn); } @@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn); } @@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn); } @@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn); } @@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming { +void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn); } @@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn); } @@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming { +void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c index 254ea89d22c501..8e9c2e7da46a3a 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn); } @@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn); } @@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn); } @@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn); } @@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn); } @@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming { +void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn); } @@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn); } @@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming { +void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c index 5622568c4cd76b..e17782db222b60 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -22,7 +22,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za8(0, slice_base, pg, ptr); svld1_hor_za8(0, slice_base + 15, pg, ptr); } @@ -45,7 +45,7 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za16(0, slice_base, pg, ptr); svld1_hor_za16(1, slice_base + 7, pg, ptr); } @@ -68,7 +68,7 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za32(0, slice_base, pg, ptr); svld1_hor_za32(3, slice_base + 3, pg, ptr); } @@ -91,7 +91,7 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za64(0, slice_base, pg, ptr); svld1_hor_za64(7, slice_base + 1, pg, ptr); } @@ -112,7 +112,7 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za128(0, slice_base, pg, ptr); svld1_hor_za128(15, slice_base, pg, ptr); } @@ -133,7 +133,7 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za8(0, slice_base, pg, ptr); svld1_ver_za8(0, slice_base + 15, pg, ptr); } @@ -156,7 +156,7 @@ void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za16(0, slice_base, pg, ptr); svld1_ver_za16(1, slice_base + 7, pg, ptr); } @@ -179,7 +179,7 @@ void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za32(0, slice_base, pg, ptr); svld1_ver_za32(3, slice_base + 3, pg, ptr); } @@ -202,7 +202,7 @@ void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za64(0, slice_base, pg, ptr); svld1_ver_za64(7, slice_base + 1, pg, ptr); } @@ -223,7 +223,7 @@ void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming { +void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { svld1_ver_za128(0, slice_base, pg, ptr); svld1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c index 0fe7dcfc0a7994..0fa77e1144a7d9 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -28,7 +28,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -57,7 +57,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -86,7 +86,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming { +void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum)... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/75805 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits