https://github.com/NickGuy-Arm updated https://github.com/llvm/llvm-project/pull/120265
>From 898c30b5b97e80b8bdeb024aec30d0e530d39d42 Mon Sep 17 00:00:00 2001 From: Nick Guy <nicholas....@arm.com> Date: Fri, 13 Dec 2024 13:39:24 +0000 Subject: [PATCH 1/5] [clang][llvm][aarch64] Add aarch64_sme_in_streaming_mode intrinsic --- clang/include/clang/Basic/arm_sme.td | 2 + .../sme-intrinsics/acle_sme_state_funs.c | 38 +++++++--------- clang/utils/TableGen/SveEmitter.cpp | 8 +--- llvm/include/llvm/IR/IntrinsicsAArch64.td | 1 + .../Target/AArch64/AArch64ISelLowering.cpp | 9 ++++ .../CodeGen/AArch64/sme-intrinsics-state.ll | 44 +++++++++++++++++++ 6 files changed, 74 insertions(+), 28 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 6b31dec004a1e2..e66a023f998ed4 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -716,6 +716,8 @@ let SMETargetGuard = "sme2" in { def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>; } +def IN_STREAMING_MODE : Inst<"in_streaming_mode", "d", "", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>; + // // lookup table expand four contiguous registers // diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c index 9ba1527f269663..e880f7d7dbacd8 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c @@ -8,19 +8,13 @@ // CHECK-LABEL: @test_in_streaming_mode( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 -// CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1 -// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0 -// CHECK-NEXT: ret i1 [[TOBOOL_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode() +// CHECK-NEXT: ret i1 [[TMP0]] // // CPP-CHECK-LABEL: @_Z22test_in_streaming_modev( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3:[0-9]+]] -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i64 [[AND_I]], 0 -// CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode() +// CPP-CHECK-NEXT: ret i1 [[TMP0]] // bool test_in_streaming_mode(void) __arm_streaming_compatible { return __arm_in_streaming_mode(); @@ -28,12 +22,12 @@ bool test_in_streaming_mode(void) __arm_streaming_compatible { // CHECK-LABEL: @test_za_disable( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR3]] +// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]] // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_za_disablev( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR3]] +// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]] // CPP-CHECK-NEXT: ret void // void test_za_disable(void) __arm_streaming_compatible { @@ -42,14 +36,14 @@ void test_za_disable(void) __arm_streaming_compatible { // CHECK-LABEL: @test_has_sme( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]] // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // // CPP-CHECK-LABEL: @_Z12test_has_smev( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 // CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] @@ -72,12 +66,12 @@ void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") { // CHECK-LABEL: @test_sc_memcpy( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z14test_sc_memcpyPvPKvm( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible { @@ -86,12 +80,12 @@ void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_comp // CHECK-LABEL: @test_sc_memmove( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z15test_sc_memmovePvPKvm( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible { @@ -100,12 +94,12 @@ void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_com // CHECK-LABEL: @test_sc_memset( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z14test_sc_memsetPvim( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible { @@ -114,12 +108,12 @@ void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible { // CHECK-LABEL: @test_sc_memchr( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z14test_sc_memchrPvim( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR3]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible { diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 14e5637f62517e..883eb990f7ba49 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1636,12 +1636,8 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << " return x0 & (1ULL << 63);\n"; OS << "}\n\n"; - OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible " - "{\n"; - OS << " uint64_t x0, x1;\n"; - OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; - OS << " return x0 & 1;\n"; - OS << "}\n\n"; + OS << "__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_in_streaming_mode)))"; + OS << " bool __arm_in_streaming_mode(void) __arm_streaming_compatible;\n\n"; OS << "void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n"; OS << "void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n"; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 53a66099a92bda..cc7a81e15f6609 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2974,6 +2974,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>; + def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">; class SME_OuterProduct_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 28f304100326c6..708753f5762b4c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1183,6 +1183,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setMaxDivRemBitWidthSupported(128); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + if (Subtarget->hasSME()) + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); if (Subtarget->isNeonAvailable()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to @@ -27292,6 +27294,13 @@ void AArch64TargetLowering::ReplaceNodeResults( N->getOperand(1), N->getOperand(2)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; + } + case Intrinsic::aarch64_sme_in_streaming_mode: { + auto DL = SDLoc(N); + SDValue Chain = DAG.getEntryNode(); + auto RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM)); + return; } case Intrinsic::experimental_vector_match: case Intrinsic::get_active_lane_mask: { diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll new file mode 100644 index 00000000000000..1e534e746d7e38 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + + +define i1 @streaming_mode_st_compatible() #0 { +; CHECK-LABEL: streaming_mode_st_compatible: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode() + ret i1 %mode +} + +define i1 @streaming_mode_st_enabled() #1 { +; CHECK-LABEL: streaming_mode_st_enabled: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode() + ret i1 %mode +} + +define i1 @streaming_mode_st_disabled() #2 { +; CHECK-LABEL: streaming_mode_st_disabled: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode() + ret i1 %mode +} + + +attributes #0 = {nounwind memory(none) "aarch64_pstate_sm_compatible"} +attributes #1 = {nounwind memory(none) "aarch64_pstate_sm_enabled"} +attributes #2 = {nounwind memory(none)} >From 20701ac53cfb49a27df947c0eecb59acbb25a1dc Mon Sep 17 00:00:00 2001 From: Nick Guy <nicholas....@arm.com> Date: Wed, 18 Dec 2024 11:39:07 +0000 Subject: [PATCH 2/5] Remove redundant __arm_in_streaming_mode declaration --- clang/include/clang/Basic/arm_sme.td | 2 +- clang/utils/TableGen/SveEmitter.cpp | 3 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +++++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index e66a023f998ed4..891ed9874bb3d0 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -716,7 +716,7 @@ let SMETargetGuard = "sme2" in { def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>; } -def IN_STREAMING_MODE : Inst<"in_streaming_mode", "d", "", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>; +def IN_STREAMING_MODE : Inst<"__arm_in_streaming_mode", "sv", "Pc", MergeNone, "aarch64_sme_in_streaming_mode", [IsOverloadNone, IsStreamingCompatible], []>; // // lookup table expand four contiguous registers diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 883eb990f7ba49..e0616d679e45b9 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1636,9 +1636,6 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << " return x0 & (1ULL << 63);\n"; OS << "}\n\n"; - OS << "__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_in_streaming_mode)))"; - OS << " bool __arm_in_streaming_mode(void) __arm_streaming_compatible;\n\n"; - OS << "void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n"; OS << "void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;\n"; OS << "void *__arm_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible;\n"; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 708753f5762b4c..7af38bc82aadf1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27295,11 +27295,13 @@ void AArch64TargetLowering::ReplaceNodeResults( Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); return; } - case Intrinsic::aarch64_sme_in_streaming_mode: { + case Intrinsic::aarch64_sme_in_streaming_mode: { auto DL = SDLoc(N); SDValue Chain = DAG.getEntryNode(); - auto RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0)); - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM)); + auto RuntimePStateSM = + getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0)); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM)); return; } case Intrinsic::experimental_vector_match: >From 453324a87f9ba669b3c997f47c5d3ce76992a6ea Mon Sep 17 00:00:00 2001 From: Nick Guy <nicholas....@arm.com> Date: Wed, 18 Dec 2024 13:34:44 +0000 Subject: [PATCH 3/5] Replaced auto with actual types --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7af38bc82aadf1..78d6d71a7a98bb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27296,9 +27296,9 @@ void AArch64TargetLowering::ReplaceNodeResults( return; } case Intrinsic::aarch64_sme_in_streaming_mode: { - auto DL = SDLoc(N); + SDLoc DL(N); SDValue Chain = DAG.getEntryNode(); - auto RuntimePStateSM = + SDValue RuntimePStateSM = getRuntimePStateSM(DAG, Chain, DL, N->getValueType(0)); Results.push_back( DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM)); >From 0eca79f711d4a5535ef04563ab4a79cd7ebbe8df Mon Sep 17 00:00:00 2001 From: Nick Guy <nicholas....@arm.com> Date: Wed, 18 Dec 2024 15:57:09 +0000 Subject: [PATCH 4/5] Address comments --- clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++ .../sme-intrinsics/acle_sme_state_funs.c | 55 ++++++++++++++----- .../CodeGen/AArch64/sme-intrinsics-state.ll | 28 +--------- 3 files changed, 56 insertions(+), 42 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4d4b7428abd505..fe9f0ade22f57d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11285,6 +11285,21 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, if (Builtin->LLVMIntrinsic == 0) return nullptr; + if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) { + // If we already know the streaming mode, don't bother with the intrinsic + // and emit a constant instead + auto FD = cast<FunctionDecl>(CurFuncDecl); + if (const Type *Ty = FD->getType().getTypePtrOrNull()) + if (const auto *FPT = Ty->getAs<FunctionProtoType>()) { + unsigned SMEAttrs = FPT->getAArch64SMEAttributes(); + if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) { + bool IsStreamingMode = + SMEAttrs & FunctionType::SME_PStateSMEnabledMask; + return ConstantInt::getBool(Builder.getContext(), IsStreamingMode); + } + } + } + // Predicates must match the main datatype. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c index e880f7d7dbacd8..72f2d17fc6dc11 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_state_funs.c @@ -6,28 +6,53 @@ #include <arm_sme.h> -// CHECK-LABEL: @test_in_streaming_mode( +// CHECK-LABEL: @test_in_streaming_mode_streaming_compatible( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode() // CHECK-NEXT: ret i1 [[TMP0]] // -// CPP-CHECK-LABEL: @_Z22test_in_streaming_modev( +// CPP-CHECK-LABEL: @_Z43test_in_streaming_mode_streaming_compatiblev( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sme.in.streaming.mode() // CPP-CHECK-NEXT: ret i1 [[TMP0]] // -bool test_in_streaming_mode(void) __arm_streaming_compatible { +bool test_in_streaming_mode_streaming_compatible(void) __arm_streaming_compatible { + return __arm_in_streaming_mode(); +} + +// CHECK-LABEL: @test_in_streaming_mode_streaming( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i1 true +// +// CPP-CHECK-LABEL: @_Z32test_in_streaming_mode_streamingv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret i1 true +// +bool test_in_streaming_mode_streaming(void) __arm_streaming { +// + return __arm_in_streaming_mode(); +} + +// CHECK-LABEL: @test_in_streaming_mode_non_streaming( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i1 false +// +// CPP-CHECK-LABEL: @_Z36test_in_streaming_mode_non_streamingv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret i1 false +// +bool test_in_streaming_mode_non_streaming(void) { return __arm_in_streaming_mode(); } // CHECK-LABEL: @test_za_disable( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]] +// CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR7:[0-9]+]] // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_za_disablev( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR5:[0-9]+]] +// CPP-CHECK-NEXT: tail call void @__arm_za_disable() #[[ATTR7:[0-9]+]] // CPP-CHECK-NEXT: ret void // void test_za_disable(void) __arm_streaming_compatible { @@ -36,14 +61,14 @@ void test_za_disable(void) __arm_streaming_compatible { // CHECK-LABEL: @test_has_sme( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR7]] // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // // CPP-CHECK-LABEL: @_Z12test_has_smev( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR5]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR7]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp slt i64 [[TMP1]], 0 // CPP-CHECK-NEXT: ret i1 [[TOBOOL_I]] @@ -66,12 +91,12 @@ void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") { // CHECK-LABEL: @test_sc_memcpy( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z14test_sc_memcpyPvPKvm( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible { @@ -80,12 +105,12 @@ void *test_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_comp // CHECK-LABEL: @test_sc_memmove( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z15test_sc_memmovePvPKvm( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible { @@ -94,12 +119,12 @@ void *test_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_com // CHECK-LABEL: @test_sc_memset( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z14test_sc_memsetPvim( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible { @@ -108,12 +133,12 @@ void *test_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible { // CHECK-LABEL: @test_sc_memchr( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CHECK-NEXT: ret ptr [[CALL]] // // CPP-CHECK-LABEL: @_Z14test_sc_memchrPvim( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR5]] +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[S:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR7]] // CPP-CHECK-NEXT: ret ptr [[CALL]] // void *test_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible { diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll index 1e534e746d7e38..4d78ae6c564839 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s -define i1 @streaming_mode_st_compatible() #0 { +define i1 @streaming_mode_streaming_compatible() #0 { ; CHECK-LABEL: streaming_mode_st_compatible: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -14,31 +14,5 @@ define i1 @streaming_mode_st_compatible() #0 { ret i1 %mode } -define i1 @streaming_mode_st_enabled() #1 { -; CHECK-LABEL: streaming_mode_st_enabled: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: and w0, w0, #0x1 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode() - ret i1 %mode -} - -define i1 @streaming_mode_st_disabled() #2 { -; CHECK-LABEL: streaming_mode_st_disabled: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: and w0, w0, #0x1 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret - %mode = tail call noundef i1 @llvm.aarch64.sme.in.streaming.mode() - ret i1 %mode -} - attributes #0 = {nounwind memory(none) "aarch64_pstate_sm_compatible"} -attributes #1 = {nounwind memory(none) "aarch64_pstate_sm_enabled"} -attributes #2 = {nounwind memory(none)} >From afaf44786fe59e2ec1494fb13569e55bee4d44c3 Mon Sep 17 00:00:00 2001 From: Nick Guy <nicholas....@arm.com> Date: Mon, 6 Jan 2025 10:08:16 +0000 Subject: [PATCH 5/5] Address nits and update test --- clang/lib/CodeGen/CGBuiltin.cpp | 17 ++++++++--------- .../CodeGen/AArch64/sme-intrinsics-state.ll | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index fe9f0ade22f57d..fdbd5c83c6b710 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11288,16 +11288,15 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) { // If we already know the streaming mode, don't bother with the intrinsic // and emit a constant instead - auto FD = cast<FunctionDecl>(CurFuncDecl); - if (const Type *Ty = FD->getType().getTypePtrOrNull()) - if (const auto *FPT = Ty->getAs<FunctionProtoType>()) { - unsigned SMEAttrs = FPT->getAArch64SMEAttributes(); - if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) { - bool IsStreamingMode = - SMEAttrs & FunctionType::SME_PStateSMEnabledMask; - return ConstantInt::getBool(Builder.getContext(), IsStreamingMode); - } + const auto *FD = cast<FunctionDecl>(CurFuncDecl); + if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) { + unsigned SMEAttrs = FPT->getAArch64SMEAttributes(); + if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) { + bool IsStreaming = + SMEAttrs & FunctionType::SME_PStateSMEnabledMask; + return ConstantInt::getBool(Builder.getContext(), IsStreaming); } + } } // Predicates must match the main datatype. diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll index 4d78ae6c564839..5037772a51cee3 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-state.ll @@ -3,7 +3,7 @@ define i1 @streaming_mode_streaming_compatible() #0 { -; CHECK-LABEL: streaming_mode_st_compatible: +; CHECK-LABEL: streaming_mode_streaming_compatible: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits