[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
aaupov wrote: Ping @wlei-llvm https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] b0558fd - Revert "[CFI][LowerTypeTests] Fix indirect call with alias (#106185)"
Author: Igor Kudrin Date: 2024-10-28T16:12:16-07:00 New Revision: b0558fdef77d83c8609b56711aaa640ecde6c9a0 URL: https://github.com/llvm/llvm-project/commit/b0558fdef77d83c8609b56711aaa640ecde6c9a0 DIFF: https://github.com/llvm/llvm-project/commit/b0558fdef77d83c8609b56711aaa640ecde6c9a0.diff LOG: Revert "[CFI][LowerTypeTests] Fix indirect call with alias (#106185)" This reverts commit 67bcce21415c7f687c28eb727c40b27924335f5a. Added: Modified: llvm/include/llvm/IR/ModuleSummaryIndexYAML.h llvm/lib/Transforms/IPO/LowerTypeTests.cpp Removed: llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index d12bc260f5cf4e..902d1305c818ac 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -135,14 +135,10 @@ template <> struct MappingTraits { } }; -struct GlobalValueSummaryYaml { - // Commonly used fields +struct FunctionSummaryYaml { unsigned Linkage, Visibility; bool NotEligibleToImport, Live, IsLocal, CanAutoHide; unsigned ImportType; - // Fields for AliasSummary - std::optional Aliasee; - // Fields for FunctionSummary std::vector Refs; std::vector TypeTests; std::vector TypeTestAssumeVCalls, @@ -180,8 +176,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummary::ConstVCall) namespace llvm { namespace yaml { -template <> struct MappingTraits { - static void mapping(IO &io, GlobalValueSummaryYaml &summary) { +template <> struct MappingTraits { + static void mapping(IO &io, FunctionSummaryYaml& summary) { io.mapOptional("Linkage", summary.Linkage); io.mapOptional("Visibility", summary.Visibility); io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); @@ -189,7 +185,6 @@ template <> struct MappingTraits { io.mapOptional("Local", summary.IsLocal); io.mapOptional("CanAutoHide", summary.CanAutoHide); io.mapOptional("ImportType", summary.ImportType); -io.mapOptional("Aliasee", summary.Aliasee); io.mapOptional("Refs", summary.Refs); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); @@ -204,7 +199,7 @@ template <> struct MappingTraits { } // End yaml namespace } // End llvm namespace -LLVM_YAML_IS_SEQUENCE_VECTOR(GlobalValueSummaryYaml) +LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml) namespace llvm { namespace yaml { @@ -212,99 +207,61 @@ namespace yaml { // FIXME: Add YAML mappings for the rest of the module summary. template <> struct CustomMappingTraits { static void inputOne(IO &io, StringRef Key, GlobalValueSummaryMapTy &V) { -std::vector GVSums; -io.mapRequired(Key.str().c_str(), GVSums); +std::vector FSums; +io.mapRequired(Key.str().c_str(), FSums); uint64_t KeyInt; if (Key.getAsInteger(0, KeyInt)) { io.setError("key not an integer"); return; } auto &Elem = V.try_emplace(KeyInt, /*IsAnalysis=*/false).first->second; -for (auto &GVSum : GVSums) { - GlobalValueSummary::GVFlags GVFlags( - static_cast(GVSum.Linkage), - static_cast(GVSum.Visibility), - GVSum.NotEligibleToImport, GVSum.Live, GVSum.IsLocal, - GVSum.CanAutoHide, - static_cast(GVSum.ImportType)); - if (GVSum.Aliasee) { -auto ASum = std::make_unique(GVFlags); -if (!V.count(*GVSum.Aliasee)) - V.emplace(*GVSum.Aliasee, /*IsAnalysis=*/false); -ValueInfo AliaseeVI(/*IsAnalysis=*/false, &*V.find(*GVSum.Aliasee)); -// Note: Aliasee cannot be filled until all summaries are loaded. -// This is done in fixAliaseeLinks() which is called in -// MappingTraits::mapping(). -ASum->setAliasee(AliaseeVI, /*Aliasee=*/nullptr); -Elem.SummaryList.push_back(std::move(ASum)); -continue; - } +for (auto &FSum : FSums) { SmallVector Refs; - Refs.reserve(GVSum.Refs.size()); - for (auto &RefGUID : GVSum.Refs) { + Refs.reserve(FSum.Refs.size()); + for (auto &RefGUID : FSum.Refs) { auto It = V.try_emplace(RefGUID, /*IsAnalysis=*/false).first; Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*It)); } Elem.SummaryList.push_back(std::make_unique( - GVFlags, /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), - SmallVector{}, std::move(GVSum.TypeTests), - std::move(GVSum.TypeTestAssumeVCalls), - std::move(GVSum.TypeCheckedLoadVCalls), - std::move(GVSum.TypeTestAssumeConstVCalls), - std::move(GVSum.TypeCheckedLoadConstVCalls), + GlobalValueSummary::GVFlags( + static_cast(FSum.Linkage), + static_cast(FSum.Visibility), + FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, +
[llvm-branch-commits] [lld] [llvm] [CGData][llvm-cgdata] Support for stable function map (PR #112664)
@@ -32,10 +32,40 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { Error CodeGenDataReader::mergeFromObjectFile( const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, +StableFunctionMapRecord &GlobalFunctionMapRecord, stable_hash *CombinedHash) { Triple TT = Obj->makeTriple(); auto CGOutLineName = getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); ellishg wrote: I think this should be spelled `GCOutlineName` since it's "outline", not "out line". https://github.com/llvm/llvm-project/pull/112664 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AVR][MC] Fix incorrect range of relative jumps (#109124) (PR #113969)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/113969 Backport 8c3b94f420a20a45dd07f3e12d6a6d649858f452 Requested by: @Patryk27 >From 661f649aacf844dfd89ca2af460eeeab92900d0e Mon Sep 17 00:00:00 2001 From: Ben Shi <2283975...@qq.com> Date: Fri, 20 Sep 2024 11:40:07 +0800 Subject: [PATCH] [AVR][MC] Fix incorrect range of relative jumps (#109124) 'rjmp .+4094' is legal but rejected by llvm-mc since 86a60e7f1e8f361f84ccb6e656e848dd4fbaa713, and this patch fixed that range issue. (cherry picked from commit 8c3b94f420a20a45dd07f3e12d6a6d649858f452) --- .../Target/AVR/MCTargetDesc/AVRAsmBackend.cpp | 6 +-- llvm/test/MC/AVR/inst-rjmp.s | 40 ++- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 388d58a82214d1..c0bc1276967bf0 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -88,15 +88,15 @@ static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, /// Adjusts the value of a relative branch target before fixup application. static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { + // Jumps are relative to the current instruction. + Value -= 2; + // We have one extra bit of precision because the value is rightshifted by // one. signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx); // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); - - // Jumps are relative to the current instruction. - Value -= 1; } /// 22-bit absolute fixup. diff --git a/llvm/test/MC/AVR/inst-rjmp.s b/llvm/test/MC/AVR/inst-rjmp.s index cc843a58b55d2c..2d7aa401feacf0 100644 --- a/llvm/test/MC/AVR/inst-rjmp.s +++ b/llvm/test/MC/AVR/inst-rjmp.s @@ -19,25 +19,28 @@ end: x: rjmp x .short 0xc00f + rjmp .+4094 -; CHECK: rjmp (.Ltmp0+2)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp1-2)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel -; CHECK: rjmp foo ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp2+8)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel -; CHECK: rjmp end ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp3+0)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp4-4)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp5-6)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel -; CHECK: rjmp x ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp0+2)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp1-2)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel +; CHECK: rjmp foo ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp2+8)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel +; CHECK: rjmp end ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp3+0)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp4-4)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp5-6)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel +; CHECK: rjmp x ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp6+4094)+2 ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp6+4094)+2, kind: fixup_13_pcrel ; INST-LABEL: : ; INST-NEXT: 01 c0 rjmp .+2
[llvm-branch-commits] [llvm] release/19.x: [AVR][MC] Fix incorrect range of relative jumps (#109124) (PR #113969)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/113969 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AVR][MC] Fix incorrect range of relative jumps (#109124) (PR #113969)
llvmbot wrote: @llvm/pr-subscribers-mc Author: None (llvmbot) Changes Backport 8c3b94f420a20a45dd07f3e12d6a6d649858f452 Requested by: @Patryk27 --- Full diff: https://github.com/llvm/llvm-project/pull/113969.diff 2 Files Affected: - (modified) llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp (+3-3) - (modified) llvm/test/MC/AVR/inst-rjmp.s (+22-18) ``diff diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 388d58a82214d1..c0bc1276967bf0 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -88,15 +88,15 @@ static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, /// Adjusts the value of a relative branch target before fixup application. static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { + // Jumps are relative to the current instruction. + Value -= 2; + // We have one extra bit of precision because the value is rightshifted by // one. signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx); // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); - - // Jumps are relative to the current instruction. - Value -= 1; } /// 22-bit absolute fixup. diff --git a/llvm/test/MC/AVR/inst-rjmp.s b/llvm/test/MC/AVR/inst-rjmp.s index cc843a58b55d2c..2d7aa401feacf0 100644 --- a/llvm/test/MC/AVR/inst-rjmp.s +++ b/llvm/test/MC/AVR/inst-rjmp.s @@ -19,25 +19,28 @@ end: x: rjmp x .short 0xc00f + rjmp .+4094 -; CHECK: rjmp (.Ltmp0+2)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp1-2)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel -; CHECK: rjmp foo ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp2+8)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel -; CHECK: rjmp end ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp3+0)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp4-4)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp5-6)+2 ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel -; CHECK: rjmp x ; encoding: [A,0b1100] -; CHECK-NEXT: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp0+2)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp1-2)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel +; CHECK: rjmp foo ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp2+8)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel +; CHECK: rjmp end ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp3+0)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp4-4)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp5-6)+2; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel +; CHECK: rjmp x ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp6+4094)+2 ; encoding: [A,0b1100] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp6+4094)+2, kind: fixup_13_pcrel ; INST-LABEL: : ; INST-NEXT: 01 c0 rjmp .+2 @@ -54,3 +57,4 @@ x: ; INST-LABEL: : ; INST-NEXT: ff cf rjmp .-2 ; INST-NEXT: 0f c0 rjmp .+30 +; INST-NEXT: ff c7 rjmp .+4094 `` https://github.com/llvm/llvm-project/pull/113969 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bran
[llvm-branch-commits] [llvm] release/19.x: [AVR][MC] Fix incorrect range of relative jumps (#109124) (PR #113969)
llvmbot wrote: @jacquesguan What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/113969 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add skeletons for new register bank select passes (PR #112862)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/112862 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
wlei-llvm wrote: > Ping @wlei-llvm Sorry for the delay. The new version addressed my last comment (with just minor nits). However, I didn't fully follow the new features related to `ProbeMatchSpecs` stuffs. Could you add more descriptions to the diff summary? Or if it’s not a lot of work, could we split it into two patches? We could commit the first part, and I will review the second part separately. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -628,9 +618,75 @@ void YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( } } } -if (Cur && Decoder.getFuncDescForGUID(GUID)->FuncHash == Hash) +// Don't match nodes if the profile is stale (mismatching binary FuncHash +// and YAML Hash) +if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash) mapInlineTreeNode(InlineTreeNodeId, Cur); } + return Map.size(); +} + +// Decode index deltas and indirection through \p YamlPD. Return modified copy +// of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex). +static std::vector +decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD, + std::vector YamlInlineTree) { + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { +uint32_t GUIDIdx = InlineTreeNode.GUIDIndex; +if (GUIDIdx != UINT32_MAX) + PrevGUIDIdx = GUIDIdx; +else + GUIDIdx = PrevGUIDIdx; +uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; +ParentId += InlineTreeNode.ParentIndexDelta; +InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx]; +InlineTreeNode.Hash = YamlPD.Hash[HashIdx]; +InlineTreeNode.ParentIndexDelta = ParentId; + } + return YamlInlineTree; +} + +size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; + + // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe + // matching. + if (opts::StaleMatchingWithPseudoProbes) { wlei-llvm wrote: nit: early return(`if (!opts::StaleMatchingWithPseudoProbes) ...`) to save indentation. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -722,12 +722,28 @@ class BinaryContext { /// Stats for stale profile matching: /// the total number of basic blocks in the profile uint32_t NumStaleBlocks{0}; -/// the number of matched basic blocks -uint32_t NumMatchedBlocks{0}; +/// the number of exactly matched basic blocks +uint32_t NumExactMatchedBlocks{0}; +/// the number of loosely matched basic blocks +uint32_t NumLooseMatchedBlocks{0}; +/// the number of exactly pseudo probe matched basic blocks +uint32_t NumPseudoProbeExactMatchedBlocks{0}; +/// the number of loosely pseudo probe matched basic blocks +uint32_t NumPseudoProbeLooseMatchedBlocks{0}; +/// the number of call matched basic blocks +uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; -/// the count of matched samples -uint64_t MatchedSampleCount{0}; +/// the count of exactly matched samples +uint64_t ExactMatchedSampleCount{0}; +/// the count of exactly matched samples wlei-llvm wrote: Still two "the count of exactly matched samples". https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AVR][MC] Fix incorrect range of relative jumps (#109124) (PR #113969)
Patryk27 wrote: A little context - I'd like to get it merged to 19.x, so that I can pull it to rustc - it fixes a minor codegen issue within the AVR backend, which causes some firmwares not to get compiled, crashing LLVM. https://github.com/llvm/llvm-project/pull/113969 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
tru wrote: @heiher this would still break the ABI right? so it would still create problem for downstream users like rust? cc @nikic @arsenm @tgross35 https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/113874 >From e93205a78b9f604246818dbc71c852d8ef03a4f5 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH 1/2] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/CodeGen/RegisterUsageInfo.h | 1 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 9 files changed, 77 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index f031818e7ef3fa..3073b62f37be7e 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -53,6 +53,7 @@ class PhysicalRegisterUsageInfo { bool invalidate(Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv); + private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector",
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/113874 >From e93205a78b9f604246818dbc71c852d8ef03a4f5 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH 1/2] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/CodeGen/RegisterUsageInfo.h | 1 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 9 files changed, 77 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index f031818e7ef3fa..3073b62f37be7e 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -53,6 +53,7 @@ class PhysicalRegisterUsageInfo { bool invalidate(Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv); + private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector",
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
tgross35 wrote: > @heiher this would still break the ABI right? so it would still create > problem for downstream users like rust? Speaking only from a Rust perspective, don't worry too much about making breaking changes that fix `f16` or `f128` behavior. The types are nightly-only for now (probably will be that way for at least a year or so) so we don't have any public guarantees. (I would just get to un-skip `f16` tests on another arch sooner). I don't know what other frontends exposes this type but the commit mentions Zig. If it is preferable to keep using float registers, it sounds like there is another fix for the precision issue rather than changing the ABI https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218678418. I don't think there is a test for the precision yet. https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Parsing support for iterator in DEPEND clause (PR #113622)
@@ -795,35 +795,41 @@ bool ClauseProcessor::processCopyprivate( bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - return findRepeatableClause( - [&](const omp::clause::Depend &clause, const parser::CharBlock &) { -using Depend = omp::clause::Depend; -assert(std::holds_alternative(clause.u) && - "Only the form with depenence type is handled at the moment"); -auto &depType = std::get(clause.u); -auto kind = std::get(depType.t); -auto &objects = std::get(depType.t); - -mlir::omp::ClauseTaskDependAttr dependTypeOperand = -genDependKindAttr(firOpBuilder, kind); -result.dependKinds.append(objects.size(), dependTypeOperand); - -for (const omp::Object &object : objects) { - assert(object.ref() && "Expecting designator"); - - if (evaluate::ExtractSubstring(*object.ref())) { -TODO(converter.getCurrentLocation(), - "substring not supported for task depend"); - } else if (evaluate::IsArrayElement(*object.ref())) { -TODO(converter.getCurrentLocation(), - "array sections not supported for task depend"); - } + auto process = [&](const omp::clause::Depend &clause, + const parser::CharBlock &) { +using Depend = omp::clause::Depend; +assert(std::holds_alternative(clause.u) && + "Only the form with depenence type is handled at the moment"); tblah wrote: Should this be a TODO? https://github.com/llvm/llvm-project/pull/113622 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM] [Clang] Backport "Support for Gentoo `*t64` triples (64-bit time_t ABIs)" (PR #112364)
tru wrote: Correct me if I am wrong, but this seems like an added feature and not a regression or critical fix? it's a pretty big patch and while it's most likely is "safe" it would still fall outside our current definition of backports? https://github.com/llvm/llvm-project/pull/112364 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Make LazyOffsetPtr more portable (#112927) (PR #113052)
tru wrote: @jrtc27 @zygoloid how about merging this to the release branch? https://github.com/llvm/llvm-project/pull/113052 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang] Disable use of the counted_by attribute for whole struct pointers (#112636) (PR #112786)
tru wrote: Who can review this? https://github.com/llvm/llvm-project/pull/112786 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
heiher wrote: > @heiher this would still break the ABI right? so it would still create > problem for downstream users like rust? > > cc @nikic @arsenm @tgross35 After deciding that [FPU does not support fp16](https://github.com/llvm/llvm-project/pull/109368#issuecomment-2423879356), there’s no longer any ABI-breaking impact. Without this patch, LoongArch’s software emulation for fp16 operations still has [correctness issues](https://github.com/llvm/llvm-project/issues/97975). This is why downstream software depends on it, which is the reason for my request. Thanks. https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Undef _TIME_BITS along with _FILE_OFFSET_BITS (PR #112247)
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/112247 >From d8752671e825ca5c967cc58a23778ae378c8dea2 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 19 Jul 2024 22:22:55 +0200 Subject: [PATCH] Undef _TIME_BITS along with _FILE_OFFSET_BITS This change is identical to 26800a2c7e7996dc773b4e990dd5cca41c45e1a9 ("[sanitizer] Undef _TIME_BITS along with _FILE_OFFSET_BITS on Linux"), but for sanitizer_procmaps_solaris.cpp. Indeed, even though sanitizer_procmaps_solaris.cpp is Solaris specific, it also gets built on Linux platforms. It also includes sanitizer_platform.h, which also ends up including features-time64.h, causing a build failure on 32-bit Linux platforms on which 64-bit time_t is enabled by setting _TIME_BITS=64. To fix this, we do the same change: undefine _TIME_BITS, which anyway will cause no harm as the rest of this file is inside a SANITIZER_SOLARIS compile-time conditional. Fixes: In file included from /home/thomas/buildroot/buildroot/output/host/i686-buildroot-linux-gnu/sysroot/usr/include/features.h:394, from ../../../../libsanitizer/sanitizer_common/sanitizer_platform.h:25, from ../../../../libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp:14: /home/thomas/buildroot/buildroot/output/host/i686-buildroot-linux-gnu/sysroot/usr/include/features-time64.h:26:5: error: #error "_TIME_BITS=64 is al lowed only with _FILE_OFFSET_BITS=64" 26 | # error "_TIME_BITS=64 is allowed only with _FILE_OFFSET_BITS=64" | ^ Signed-off-by: Thomas Petazzoni Closes: https://github.com/llvm/llvm-project/pull/99699 (cherry picked from commit a1217020da219386b29c1a5a4a217904ecf07d7d) --- .../lib/sanitizer_common/sanitizer_procmaps_solaris.cpp | 4 1 file changed, 4 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp index eeb49e2afe34d4..80b8158f43db9e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp @@ -11,6 +11,10 @@ // Before Solaris 11.4, doesn't work in a largefile environment. #undef _FILE_OFFSET_BITS + +// Avoid conflict between `_TIME_BITS` defined vs. `_FILE_OFFSET_BITS` +// undefined in some Linux configurations. +#undef _TIME_BITS #include "sanitizer_platform.h" #if SANITIZER_SOLARIS # include ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/113874 None >From f019b9e7311678231c9c5414b61c79619833bec4 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/CodeGen/RegisterUsageInfo.h | 3 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/CodeGen/RegisterUsageInfo.cpp| 7 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 10 files changed, 86 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index 3f18bf14615081..3073b62f37be7e 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -51,6 +51,9 @@ class PhysicalRegisterUsageInfo { void print(raw_ostream &OS, const Module *M = nullptr) const; + bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv); + private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/113874?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#113874** https://app.graphite.dev/github/pr/llvm/llvm-project/113874?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#113873** https://app.graphite.dev/github/pr/llvm/llvm-project/113873?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @optimisan and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/113874 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/113874 >From 7f9bc0fe3612bb8b964765ab5b859bb812913231 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH 1/2] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 8 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass) DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass) DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass) DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index cf5c35fe81b4c7..76b74ea4e6fe0b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -113,7 +113,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRABasicPass(Registry); initializeRAGreedyPass(Registry); initializeRegAllocFastPass(Reg
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 42096a48034185afe411226ca268b2796d1f9faa 7b2af0e256ed54102a3be69d62ff21cbccfa4e6d --extensions cpp,h -- llvm/include/llvm/CodeGen/RegUsageInfoCollector.h llvm/include/llvm/CodeGen/RegisterUsageInfo.h llvm/include/llvm/InitializePasses.h llvm/include/llvm/Passes/CodeGenPassBuilder.h llvm/lib/CodeGen/CodeGen.cpp llvm/lib/CodeGen/RegUsageInfoCollector.cpp llvm/lib/CodeGen/RegisterUsageInfo.cpp llvm/lib/Passes/PassBuilder.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 0457c1f319..4f5c7f6efe 100644 --- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -126,9 +126,10 @@ bool RegUsageInfoCollector::run(MachineFunction &MF) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const LLVMTargetMachine &TM = MF.getTarget(); - LLVM_DEBUG(dbgs() << " Register Usage Information Collector Pass" -<< " \nFunction Name : " -<< MF.getName() << '\n'); + LLVM_DEBUG( + dbgs() + << " Register Usage Information Collector Pass" + << " \nFunction Name : " << MF.getName() << '\n'); // Analyzing the register usage may be expensive on some targets. if (!isCallableFunction(MF)) { `` https://github.com/llvm/llvm-project/pull/113874 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/113874.diff 12 Files Affected: - (added) llvm/include/llvm/CodeGen/RegUsageInfoCollector.h (+25) - (modified) llvm/include/llvm/CodeGen/RegisterUsageInfo.h (-3) - (modified) llvm/include/llvm/InitializePasses.h (+1-1) - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) - (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) - (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) - (modified) llvm/lib/CodeGen/RegUsageInfoCollector.cpp (+44-21) - (modified) llvm/lib/CodeGen/RegisterUsageInfo.cpp (-7) - (modified) llvm/lib/Passes/PassBuilder.cpp (+1) - (modified) llvm/test/CodeGen/AMDGPU/ipra-regmask.ll (+5) - (modified) llvm/test/CodeGen/X86/ipra-inline-asm.ll (+4) - (modified) llvm/test/CodeGen/X86/ipra-reg-usage.ll (+4) ``diff diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index 3073b62f37be7e..3f18bf14615081 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -51,9 +51,6 @@ class PhysicalRegisterUsageInfo { void print(raw_ostream &OS, const Module *M = nullptr) const; - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv); - private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pb
[llvm-branch-commits] [clang] [Serialization] Code cleanups and polish 83233 (PR #83237)
ChuanqiXu9 wrote: > > I tried to take a look at eigen and it looks like the declaration looks > > well and I had no clue how that happens. A reproducer may be necessary here > > to proceed. Thanks in advance. > > I can reproduce using the following sources and invocations outlined in > `run.sh` > [usx95@363d877](https://github.com/usx95/llvm-project/commit/363d877bd317638b197f57c3591860e1688950d5) > > ```shell > > module-reproducer/run.sh > > Building sensor_data.h > Building tensor.h > Building base.cc > In module 'sensor_data': > ../../eigen/Eigen/src/Core/../plugins/CommonCwiseBinaryOps.inc:47:29: > warning: inline function 'Eigen::operator*' is not defined > [-Wundefined-inline] >47 | EIGEN_MAKE_SCALAR_BINARY_OP(operator*, product) > | ^ > ../../eigen/Eigen/src/Geometry/AngleAxis.h:221:35: note: used here > 221 | Vector3 sin_axis = sin(m_angle) * m_axis; > | ^ > 1 warning generated. > ``` > > This warning is a new breakage and does not happen without this change > (ignore the linker failure). Let me know if you can reproduce or need help > reproducing. Reproduced. Thank you very much! https://github.com/llvm/llvm-project/pull/83237 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/113874 >From 0fc5f97551d6d27e6ad3f5b0dfd859b64f37a67c Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH 1/2] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/CodeGen/RegisterUsageInfo.h | 3 + llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/CodeGen/RegisterUsageInfo.cpp| 7 +++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 10 files changed, 86 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index 3f18bf14615081..3073b62f37be7e 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -51,6 +51,9 @@ class PhysicalRegisterUsageInfo { void print(raw_ostream &OS, const Module *M = nullptr) const; + bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv); + private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-gr
[llvm-branch-commits] [clang] [clang] Fix C23 constexpr crashes (#112708) (PR #112855)
tru wrote: @AaronBallman @shafik ? https://github.com/llvm/llvm-project/pull/112855 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [SLP]Check that operand of abs does not overflow before making it part of minbitwidth transformation (PR #113146)
tru wrote: who can review this? @nikic? https://github.com/llvm/llvm-project/pull/113146 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] d875267 - Undef _TIME_BITS along with _FILE_OFFSET_BITS
Author: Thomas Petazzoni Date: 2024-10-28T08:40:43+01:00 New Revision: d8752671e825ca5c967cc58a23778ae378c8dea2 URL: https://github.com/llvm/llvm-project/commit/d8752671e825ca5c967cc58a23778ae378c8dea2 DIFF: https://github.com/llvm/llvm-project/commit/d8752671e825ca5c967cc58a23778ae378c8dea2.diff LOG: Undef _TIME_BITS along with _FILE_OFFSET_BITS This change is identical to 26800a2c7e7996dc773b4e990dd5cca41c45e1a9 ("[sanitizer] Undef _TIME_BITS along with _FILE_OFFSET_BITS on Linux"), but for sanitizer_procmaps_solaris.cpp. Indeed, even though sanitizer_procmaps_solaris.cpp is Solaris specific, it also gets built on Linux platforms. It also includes sanitizer_platform.h, which also ends up including features-time64.h, causing a build failure on 32-bit Linux platforms on which 64-bit time_t is enabled by setting _TIME_BITS=64. To fix this, we do the same change: undefine _TIME_BITS, which anyway will cause no harm as the rest of this file is inside a SANITIZER_SOLARIS compile-time conditional. Fixes: In file included from /home/thomas/buildroot/buildroot/output/host/i686-buildroot-linux-gnu/sysroot/usr/include/features.h:394, from ../../../../libsanitizer/sanitizer_common/sanitizer_platform.h:25, from ../../../../libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp:14: /home/thomas/buildroot/buildroot/output/host/i686-buildroot-linux-gnu/sysroot/usr/include/features-time64.h:26:5: error: #error "_TIME_BITS=64 is al lowed only with _FILE_OFFSET_BITS=64" 26 | # error "_TIME_BITS=64 is allowed only with _FILE_OFFSET_BITS=64" | ^ Signed-off-by: Thomas Petazzoni Closes: https://github.com/llvm/llvm-project/pull/99699 (cherry picked from commit a1217020da219386b29c1a5a4a217904ecf07d7d) Added: Modified: compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp Removed: diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp index eeb49e2afe34d4..80b8158f43db9e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp @@ -11,6 +11,10 @@ // Before Solaris 11.4, doesn't work in a largefile environment. #undef _FILE_OFFSET_BITS + +// Avoid conflict between `_TIME_BITS` defined vs. `_FILE_OFFSET_BITS` +// undefined in some Linux configurations. +#undef _TIME_BITS #include "sanitizer_platform.h" #if SANITIZER_SOLARIS # include ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Undef _TIME_BITS along with _FILE_OFFSET_BITS (PR #112247)
github-actions[bot] wrote: @mgorny (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/112247 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Undef _TIME_BITS along with _FILE_OFFSET_BITS (PR #112247)
https://github.com/tru closed https://github.com/llvm/llvm-project/pull/112247 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] 7d0bfab - Revert "Add explicit symbol visibility macros to InstrProfData.inc (#110732)"
Author: Vassil Vassilev Date: 2024-10-28T10:57:04+02:00 New Revision: 7d0bfab06494f4e744fae530ebf418368550cffe URL: https://github.com/llvm/llvm-project/commit/7d0bfab06494f4e744fae530ebf418368550cffe DIFF: https://github.com/llvm/llvm-project/commit/7d0bfab06494f4e744fae530ebf418368550cffe.diff LOG: Revert "Add explicit symbol visibility macros to InstrProfData.inc (#110732)" This reverts commit d7ca703eab7997814de425eaa4fd888563d78831. Added: Modified: compiler-rt/include/profile/InstrProfData.inc llvm/include/llvm/ProfileData/InstrProfData.inc Removed: diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 08ecaf0ed9fa5b..c66b0465a0b548 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -62,15 +62,6 @@ #define INSTR_PROF_VISIBILITY #endif -/* This include is needed for symbol visibility macros used on - * ValueProfRecord\ValueProfData so there functions are exported from the - * LLVM shared library on windows. */ -#ifdef __cplusplus -#include "llvm/Support/Compiler.h" -#else -#define LLVM_ABI -#endif - // clang-format off:consider re-enabling clang-format if auto-formatted C macros // are readable (e.g., after `issue #82426` is fixed) /* INSTR_PROF_DATA start. */ @@ -382,7 +373,7 @@ INSTR_PROF_SECT_ENTRY(IPSK_covinit, \ * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ -typedef struct LLVM_ABI ValueProfRecord { +typedef struct ValueProfRecord { /* The kind of the value profile record. */ uint32_t Kind; /* @@ -432,7 +423,7 @@ typedef struct LLVM_ABI ValueProfRecord { * Per-function header/control data structure for value profiling * data in indexed format. */ -typedef struct LLVM_ABI ValueProfData { +typedef struct ValueProfData { /* * Total size in bytes including this field. It must be a multiple * of sizeof(uint64_t). diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 08ecaf0ed9fa5b..c66b0465a0b548 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -62,15 +62,6 @@ #define INSTR_PROF_VISIBILITY #endif -/* This include is needed for symbol visibility macros used on - * ValueProfRecord\ValueProfData so there functions are exported from the - * LLVM shared library on windows. */ -#ifdef __cplusplus -#include "llvm/Support/Compiler.h" -#else -#define LLVM_ABI -#endif - // clang-format off:consider re-enabling clang-format if auto-formatted C macros // are readable (e.g., after `issue #82426` is fixed) /* INSTR_PROF_DATA start. */ @@ -382,7 +373,7 @@ INSTR_PROF_SECT_ENTRY(IPSK_covinit, \ * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ -typedef struct LLVM_ABI ValueProfRecord { +typedef struct ValueProfRecord { /* The kind of the value profile record. */ uint32_t Kind; /* @@ -432,7 +423,7 @@ typedef struct LLVM_ABI ValueProfRecord { * Per-function header/control data structure for value profiling * data in indexed format. */ -typedef struct LLVM_ABI ValueProfData { +typedef struct ValueProfData { /* * Total size in bytes including this field. It must be a multiple * of sizeof(uint64_t). ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/113874 >From 7f9bc0fe3612bb8b964765ab5b859bb812913231 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH 1/2] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 8 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass) DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass) DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass) DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index cf5c35fe81b4c7..76b74ea4e6fe0b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -113,7 +113,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRABasicPass(Registry); initializeRAGreedyPass(Registry); initializeRegAllocFastPass(Reg
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
llvmbot wrote: @llvm/pr-subscribers-llvm-regalloc Author: Akshat Oke (optimisan) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/113874.diff 12 Files Affected: - (added) llvm/include/llvm/CodeGen/RegUsageInfoCollector.h (+25) - (modified) llvm/include/llvm/CodeGen/RegisterUsageInfo.h (-3) - (modified) llvm/include/llvm/InitializePasses.h (+1-1) - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) - (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) - (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) - (modified) llvm/lib/CodeGen/RegUsageInfoCollector.cpp (+44-21) - (modified) llvm/lib/CodeGen/RegisterUsageInfo.cpp (-7) - (modified) llvm/lib/Passes/PassBuilder.cpp (+1) - (modified) llvm/test/CodeGen/AMDGPU/ipra-regmask.ll (+5) - (modified) llvm/test/CodeGen/X86/ipra-inline-asm.ll (+4) - (modified) llvm/test/CodeGen/X86/ipra-reg-usage.ll (+4) ``diff diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index 3073b62f37be7e..3f18bf14615081 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -51,9 +51,6 @@ class PhysicalRegisterUsageInfo { void print(raw_ostream &OS, const Module *M = nullptr) const; - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv); - private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbq
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: Akshat Oke (optimisan) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/113874.diff 12 Files Affected: - (added) llvm/include/llvm/CodeGen/RegUsageInfoCollector.h (+25) - (modified) llvm/include/llvm/CodeGen/RegisterUsageInfo.h (-3) - (modified) llvm/include/llvm/InitializePasses.h (+1-1) - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) - (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) - (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) - (modified) llvm/lib/CodeGen/RegUsageInfoCollector.cpp (+44-21) - (modified) llvm/lib/CodeGen/RegisterUsageInfo.cpp (-7) - (modified) llvm/lib/Passes/PassBuilder.cpp (+1) - (modified) llvm/test/CodeGen/AMDGPU/ipra-regmask.ll (+5) - (modified) llvm/test/CodeGen/X86/ipra-inline-asm.ll (+4) - (modified) llvm/test/CodeGen/X86/ipra-reg-usage.ll (+4) ``diff diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h index 3073b62f37be7e..3f18bf14615081 100644 --- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h @@ -51,9 +51,6 @@ class PhysicalRegisterUsageInfo { void print(raw_ostream &OS, const Module *M = nullptr) const; - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv); - private: /// A Dense map from Function * to RegMask. /// In RegMask 0 means register used (clobbered) by function. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp"
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/113874 >From 7f9bc0fe3612bb8b964765ab5b859bb812913231 Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Mon, 28 Oct 2024 06:22:49 + Subject: [PATCH 1/3] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM --- .../llvm/CodeGen/RegUsageInfoCollector.h | 25 llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/RegUsageInfoCollector.cpp| 60 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AMDGPU/ipra-regmask.ll | 5 ++ 8 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/RegUsageInfoCollector.h diff --git a/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h new file mode 100644 index 00..6b88cc4f99089e --- /dev/null +++ b/llvm/include/llvm/CodeGen/RegUsageInfoCollector.h @@ -0,0 +1,25 @@ +//===- llvm/CodeGen/RegUsageInfoCollector.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H +#define LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class RegUsageInfoCollectorPass +: public AnalysisInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REGUSAGEINFOCOLLECTOR_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index f6f6797ec9f87c..c881dcd57006db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -257,7 +257,7 @@ void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); -void initializeRegUsageInfoCollectorPass(PassRegistry &); +void initializeRegUsageInfoCollectorLegacyPass(PassRegistry &); void initializeRegUsageInfoPropagationPass(PassRegistry &); void initializeRegionInfoPassPass(PassRegistry &); void initializeRegionOnlyPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index e5de62935a8e48..14fcf9d79fbc23 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -53,6 +53,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 183a777a93b9fa..36d17b713639c1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("print", MachinePostDominatorTreePrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) +MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) @@ -249,7 +250,6 @@ DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass) DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass) DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass) DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index cf5c35fe81b4c7..76b74ea4e6fe0b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -113,7 +113,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRABasicPass(Registry); initializeRAGreedyPass(Registry); initializeRegAllocFastPass(Reg
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/113874 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang][OpenMP] Access full list of entry block syms and vars (NFC) (PR #113681)
https://github.com/tblah approved this pull request. LGTM, thanks! Maybe sometime we could go a step further and provide a standard implementation of the `genRegionEntryCB`? https://github.com/llvm/llvm-project/pull/113681 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM] [Clang] Backport "Support for Gentoo `*t64` triples (64-bit time_t ABIs)" (PR #112364)
mgorny wrote: > Correct me if I am wrong, but this seems like an added feature and not a > regression or critical fix? it's a pretty big patch and while it's most > likely is "safe" it would still fall outside our current definition of > backports? Without this change, you can't use clang at all, because it rejects the host triplet. https://github.com/llvm/llvm-project/pull/112364 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/113938 Backport e1c36bde0551977d4b2efae032af6dfc4b2b3936 Requested by: @samitolvanen >From cd089dd3ac6e1bcef6523514763428a55d80e462 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 20 Aug 2024 16:51:16 -0700 Subject: [PATCH] Fix KCFI types for generated functions with integer normalization (#104826) With -fsanitize-cfi-icall-experimental-normalize-integers, Clang appends ".normalized" to KCFI types in CodeGenModule::CreateKCFITypeId, which changes type hashes also for functions that don't have integer types in their signatures. However, llvm::setKCFIType does not take integer normalization into account, which means LLVM generated functions with KCFI types, e.g. sanitizer constructors, will fail KCFI checks when integer normalization is enabled in Clang. Add a cfi-normalize-integers module flag to indicate integer normalization is used, and append ".normalized" to KCFI types also in llvm::setKCFIType to fix the type mismatch. (cherry picked from commit e1c36bde0551977d4b2efae032af6dfc4b2b3936) --- clang/lib/CodeGen/CodeGenModule.cpp | 5 +++ clang/test/CodeGen/kcfi-normalize.c | 1 + llvm/lib/Transforms/Utils/ModuleUtils.cpp | 12 --- .../GCOVProfiling/kcfi-normalize.ll | 35 +++ llvm/test/Transforms/GCOVProfiling/kcfi.ll| 8 +++-- 5 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index cf5e29e5a3db8d..49266f3e3982e1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1137,6 +1137,11 @@ void CodeGenModule::Release() { CodeGenOpts.SanitizeCfiCanonicalJumpTables); } + if (CodeGenOpts.SanitizeCfiICallNormalizeIntegers) { +getModule().addModuleFlag(llvm::Module::Override, "cfi-normalize-integers", + 1); + } + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) { getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1); // KCFI assumes patchable-function-prefix is the same for all indirectly diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c index 7660c908a7bdd5..b9150e88f6ab5f 100644 --- a/clang/test/CodeGen/kcfi-normalize.c +++ b/clang/test/CodeGen/kcfi-normalize.c @@ -28,6 +28,7 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) { fn(arg1, arg2, arg3); } +// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1} // CHECK: ![[TYPE1]] = !{i32 -1143117868} // CHECK: ![[TYPE2]] = !{i32 -460921415} // CHECK: ![[TYPE3]] = !{i32 -333839615} diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 122279160cc7e8..95bf9f06bc331c 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -161,11 +161,13 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { // Matches CodeGenModule::CreateKCFITypeId in Clang. LLVMContext &Ctx = M.getContext(); MDBuilder MDB(Ctx); - F.setMetadata( - LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), - static_cast(xxHash64(MangledType)); + std::string Type = MangledType.str(); + if (M.getModuleFlag("cfi-normalize-integers")) +Type += ".normalized"; + F.setMetadata(LLVMContext::MD_kcfi_type, +MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), + static_cast(xxHash64(Type)); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null( diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll new file mode 100644 index 00..19122b920d1ca4 --- /dev/null +++ b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll @@ -0,0 +1,35 @@ +;; Ensure __llvm_gcov_(writeout|reset|init) have the correct !kcfi_type +;; with integer normalization. +; RUN: mkdir -p %t && cd %t +; RUN: opt < %s -S -passes=insert-gcov-profiling | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @empty() !dbg !5 { +entry: + ret void, !dbg !8 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/113938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
llvmbot wrote: @MaskRay What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/113938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) Changes Backport e1c36bde0551977d4b2efae032af6dfc4b2b3936 Requested by: @samitolvanen --- Full diff: https://github.com/llvm/llvm-project/pull/113938.diff 5 Files Affected: - (modified) clang/lib/CodeGen/CodeGenModule.cpp (+5) - (modified) clang/test/CodeGen/kcfi-normalize.c (+1) - (modified) llvm/lib/Transforms/Utils/ModuleUtils.cpp (+7-5) - (added) llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll (+35) - (modified) llvm/test/Transforms/GCOVProfiling/kcfi.ll (+5-3) ``diff diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index cf5e29e5a3db8d..49266f3e3982e1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1137,6 +1137,11 @@ void CodeGenModule::Release() { CodeGenOpts.SanitizeCfiCanonicalJumpTables); } + if (CodeGenOpts.SanitizeCfiICallNormalizeIntegers) { +getModule().addModuleFlag(llvm::Module::Override, "cfi-normalize-integers", + 1); + } + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) { getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1); // KCFI assumes patchable-function-prefix is the same for all indirectly diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c index 7660c908a7bdd5..b9150e88f6ab5f 100644 --- a/clang/test/CodeGen/kcfi-normalize.c +++ b/clang/test/CodeGen/kcfi-normalize.c @@ -28,6 +28,7 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) { fn(arg1, arg2, arg3); } +// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1} // CHECK: ![[TYPE1]] = !{i32 -1143117868} // CHECK: ![[TYPE2]] = !{i32 -460921415} // CHECK: ![[TYPE3]] = !{i32 -333839615} diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 122279160cc7e8..95bf9f06bc331c 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -161,11 +161,13 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { // Matches CodeGenModule::CreateKCFITypeId in Clang. LLVMContext &Ctx = M.getContext(); MDBuilder MDB(Ctx); - F.setMetadata( - LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), - static_cast(xxHash64(MangledType)); + std::string Type = MangledType.str(); + if (M.getModuleFlag("cfi-normalize-integers")) +Type += ".normalized"; + F.setMetadata(LLVMContext::MD_kcfi_type, +MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), + static_cast(xxHash64(Type)); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null( diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll new file mode 100644 index 00..19122b920d1ca4 --- /dev/null +++ b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll @@ -0,0 +1,35 @@ +;; Ensure __llvm_gcov_(writeout|reset|init) have the correct !kcfi_type +;; with integer normalization. +; RUN: mkdir -p %t && cd %t +; RUN: opt < %s -S -passes=insert-gcov-profiling | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @empty() !dbg !5 { +entry: + ret void, !dbg !8 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!6 = !DISubroutineType(types: !7) +!7 = !{null} +!8 = !DILocation(line: 2, column: 1, scope: !5) +!9 = !{i32 4, !"kcfi", i32 1} +!10 = !{i32 4, !"cfi-normalize-integers", i32 1} + +; CHECK: define internal void @__llvm_gcov_writeout() +; CHECK-SAME: !kcfi_type ![[#TYPE:]] +; CHECK: define internal void @__llvm_gcov_reset() +; CHECK-SAME: !kcfi_type ![[#TYPE]] +; CHECK: define internal void @__llvm_gcov_init() +; CHECK-SAME: !kcfi_type ![[#TYPE]] + +; CHECK: ![[#TYPE]] = !{i32 -440107680} diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi.ll b/llvm/test/Transforms/GCOVProfiling/kcfi.ll index b25f40f05d5bc4..1b97d25294cd65 100644 --- a/llvm/test/Transforms/GCOVProfiling/kcfi.ll +++ b/llvm/test/Transforms/GCOVProfiling/kcfi.ll @@ -24,8 +24,10 @@ entry: !9 = !{i32 4, !"kcfi", i32 1} ; CHECK: define internal void @__l
[llvm-branch-commits] [llvm] [PAC][CodeGen][ELF][AArch64] Support signed GOT with tiny code model (PR #113812)
llvmbot wrote: @llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-aarch64 Author: Daniil Kovalev (kovdan01) Changes Support the following relocations and assembly operators: - `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` (`:got_auth:` for `adr`) - `R_AARCH64_AUTH_GOT_LD_PREL19` (`:got_auth:` for `ldr`) `LOADgotAUTH` pseudo-instruction is expanded to actual instruction sequence like the following. ``` adr x16, :got_auth:sym ldr x0, [x16] autia x0, x16 ``` If FPAC bit is not set, a check+trap sequence similar to one used for `AUT` pseudo is emitted to ensure auth success. Both SelectionDAG and GlobalISel are suppported. For FastISel, we fall back to SelectionDAG. Tests starting with 'ptrauth-' have corresponding variants w/o this prefix. --- Patch is 21.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113812.diff 8 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp (+30-18) - (modified) llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (+7-1) - (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp (+18) - (modified) llvm/test/CodeGen/AArch64/ptrauth-extern-weak.ll (+42) - (added) llvm/test/CodeGen/AArch64/ptrauth-tiny-model-pic.ll (+182) - (added) llvm/test/CodeGen/AArch64/ptrauth-tiny-model-static.ll (+157) - (modified) llvm/test/MC/AArch64/arm64-elf-relocs.s (+13) - (modified) llvm/test/MC/AArch64/ilp32-diagnostics.s (+6) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index e79457f925db66..c2a7450ffb9132 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -2277,28 +2277,40 @@ void AArch64AsmPrinter::LowerLOADgotAUTH(const MachineInstr &MI) { const MachineOperand &GAMO = MI.getOperand(1); assert(GAMO.getOffset() == 0); - MachineOperand GAHiOp(GAMO); - MachineOperand GALoOp(GAMO); - GAHiOp.addTargetFlag(AArch64II::MO_PAGE); - GALoOp.addTargetFlag(AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + if (MI.getParent()->getParent()->getTarget().getCodeModel() == + CodeModel::Tiny) { +MCOperand GAMC; +MCInstLowering.lowerOperand(GAMO, GAMC); +EmitToStreamer( +MCInstBuilder(AArch64::ADR).addReg(AArch64::X17).addOperand(GAMC)); +EmitToStreamer(MCInstBuilder(AArch64::LDRXui) + .addReg(AuthResultReg) + .addReg(AArch64::X17) + .addImm(0)); + } else { +MachineOperand GAHiOp(GAMO); +MachineOperand GALoOp(GAMO); +GAHiOp.addTargetFlag(AArch64II::MO_PAGE); +GALoOp.addTargetFlag(AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - MCOperand GAMCHi, GAMCLo; - MCInstLowering.lowerOperand(GAHiOp, GAMCHi); - MCInstLowering.lowerOperand(GALoOp, GAMCLo); +MCOperand GAMCHi, GAMCLo; +MCInstLowering.lowerOperand(GAHiOp, GAMCHi); +MCInstLowering.lowerOperand(GALoOp, GAMCLo); - EmitToStreamer( - MCInstBuilder(AArch64::ADRP).addReg(AArch64::X17).addOperand(GAMCHi)); +EmitToStreamer( +MCInstBuilder(AArch64::ADRP).addReg(AArch64::X17).addOperand(GAMCHi)); - EmitToStreamer(MCInstBuilder(AArch64::ADDXri) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addOperand(GAMCLo) - .addImm(0)); +EmitToStreamer(MCInstBuilder(AArch64::ADDXri) + .addReg(AArch64::X17) + .addReg(AArch64::X17) + .addOperand(GAMCLo) + .addImm(0)); - EmitToStreamer(MCInstBuilder(AArch64::LDRXui) - .addReg(AuthResultReg) - .addReg(AArch64::X17) - .addImm(0)); +EmitToStreamer(MCInstBuilder(AArch64::LDRXui) + .addReg(AuthResultReg) + .addReg(AArch64::X17) + .addImm(0)); + } assert(GAMO.isGlobal()); MCSymbol *UndefWeakSym; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index b83ca3f7e52db4..de8e0a4731e419 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3353,7 +3353,13 @@ ParseStatus AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) { // No modifier was specified at all; this is the syntax for an ELF basic // ADR relocation (unfortunately). Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, getContext()); -} else { +} else if (ELFRefKind != AArch64MCExpr::VK_GOT_AUTH_PAGE) { + // For tiny code model, we use :got_auth: operator to fill 21-bit imm of + // adr. It's not actually GOT entry page address but the GOT address + // itself - we just share the same variant kind with :got_auth: operator + // applied for adrp. + // TODO: can we so
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Daniil Kovalev (kovdan01) Changes Depends on #113812 Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19` GOT-generating relocations. --- Full diff: https://github.com/llvm/llvm-project/pull/113816.diff 5 Files Affected: - (modified) lld/ELF/Arch/AArch64.cpp (+5) - (modified) lld/ELF/InputSection.cpp (+1) - (modified) lld/ELF/Relocations.cpp (+9-8) - (modified) lld/ELF/Relocations.h (+1) - (modified) lld/test/ELF/aarch64-got-relocations-pauth.s (+73) ``diff diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 86f509f3fd78a7..2f2e0c2a52b0ef 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_AUTH_LD64_GOT_LO12_NC: case R_AARCH64_AUTH_GOT_ADD_LO12_NC: return R_AARCH64_AUTH_GOT; + case R_AARCH64_AUTH_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: +return R_AARCH64_AUTH_GOT_PC; case R_AARCH64_LD64_GOTPAGE_LO15: return R_AARCH64_GOT_PAGE; case R_AARCH64_ADR_GOT_PAGE: @@ -549,6 +552,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: checkInt(ctx, loc, val, 21, rel); write32AArch64Addr(loc, val); break; @@ -569,6 +573,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: case R_AARCH64_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_LD_PREL19: checkAlignment(ctx, loc, val, 4, rel); checkInt(ctx, loc, val, 21, rel); writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index ccc7cf8c6e2de9..ba135afd3580bf 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -788,6 +788,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_AARCH64_GOT_PAGE: return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA()); case R_GOT_PC: + case R_AARCH64_AUTH_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return r.sym->getGotVA(ctx) + a - p; case R_GOTPLT_GOTREL: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 2d3815e58b5f67..324a97d7a55f41 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -210,11 +210,11 @@ static bool needsPlt(RelExpr expr) { } bool lld::elf::needsGot(RelExpr expr) { - return oneof( - expr); + return oneof(expr); } // True if this expression is of the form Sym - X, where X is a position in the @@ -1011,8 +1011,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, -R_AARCH64_AUTH_GOT, R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, -R_LOONGARCH_GOT_PAGE_PC>(e)) +R_AARCH64_AUTH_GOT, R_AARCH64_AUTH_GOT_PC, R_LOONGARCH_PLT_PAGE_PC, +R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1126,7 +1126,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which // case the NEEDS_GOT flag shouldn't get set. bool needsGotAuth = - (expr == R_AARCH64_AUTH_GOT || expr == R_AARCH64_AUTH_GOT_PAGE_PC); + (expr == R_AARCH64_AUTH_GOT || expr == R_AARCH64_AUTH_GOT_PC || + expr == R_AARCH64_AUTH_GOT_PAGE_PC); uint16_t flags = sym.flags.load(std::memory_order_relaxed); if (!(flags & NEEDS_GOT)) { if (needsGotAuth) diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 20d88de402ac18..38d55d46116569 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -89,6 +89,7 @@ enum RelExpr { R_AARCH64_AUTH_GOT_PAGE_PC, R_AARCH64_GOT_PAGE, R_AARCH64_AUTH_GOT, + R_AARCH64_AUTH_GOT_PC, R_AARCH64_PAGE_PC, R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, R_AARCH64_TLSDESC_PAGE, diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s b/lld/test/ELF/aarch64-got-relocations-pauth.s index f04e3d953388ce..c43f1ca251a9a8 100644 --- a/lld/test/ELF/aarch64-got-relocations-pauth.s +++ b/lld/test/ELF/aarch64-got-relocations-pauth.s @@ -78,6 +78,79 @@ _start: adrp x1, :got_auth:zed add x1, x1, :got_auth_lo12:zed +#--- ok-tiny.s + +# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux ok-tiny.s -o ok-tiny.o + +# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny +# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s --check-prefix=EXTERNAL-TINY + +# RUN: ld.lld ok-tiny.o a.o
[llvm-branch-commits] [llvm] [PAC][CodeGen][ELF][AArch64] Support signed TLSDESC (PR #113813)
https://github.com/kovdan01 ready_for_review https://github.com/llvm/llvm-project/pull/113813 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PAC][CodeGen][ELF][AArch64] Support signed TLSDESC (PR #113813)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Daniil Kovalev (kovdan01) Changes Depends on #113716 Support the following relocations and assembly operators: - `R_AARCH64_AUTH_TLSDESC_ADR_PAGE21` (`:tlsdesc_auth:` for `adrp`) - `R_AARCH64_AUTH_TLSDESC_LD64_LO12` (`:tlsdesc_auth_lo12:` for `ldr`) - `R_AARCH64_AUTH_TLSDESC_ADD_LO12` (`:tlsdesc_auth_lo12:` for `add`) `TLSDESC_AUTH_CALLSEQ` pseudo-instruction is introduced which is later expanded to actual instruction sequence like the following. ``` adrp x0, :tlsdesc_auth:var ldr x16, [x0, #:tlsdesc_auth_lo12:var] add x0, x0, #:tlsdesc_auth_lo12:var .tlsdesccall var blraa x16, x0 (TPIDR_EL0 offset now in x0) ``` Only SelectionDAG ISel is supported. Tests starting with 'ptrauth-' have corresponding variants w/o this prefix. --- Patch is 39.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113813.diff 12 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp (+59) - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+13-3) - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+14) - (modified) llvm/lib/Target/AArch64/AArch64MCInstLower.cpp (+19-7) - (modified) llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (+58-51) - (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp (+26) - (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp (+4) - (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h (+65-62) - (added) llvm/test/CodeGen/AArch64/ptrauth-arm64-tls-dynamics.ll (+104) - (modified) llvm/test/MC/AArch64/arm64-elf-relocs.s (+37-18) - (modified) llvm/test/MC/AArch64/ilp32-diagnostics.s (+9) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index e79457f925db66..8ec785027d23d7 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -2603,6 +2603,65 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInstSB); return; } + case AArch64::TLSDESC_AUTH_CALLSEQ: { +/// lower this to: +///adrp x0, :tlsdesc_auth:var +///ldr x16, [x0, #:tlsdesc_auth_lo12:var] +///add x0, x0, #:tlsdesc_auth_lo12:var +///.tlsdesccall var +///blraa x16, x0 +///(TPIDR_EL0 offset now in x0) +const MachineOperand &MO_Sym = MI->getOperand(0); +MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); +MCOperand Sym, SymTLSDescLo12, SymTLSDesc; +MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF); +MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); +MCInstLowering.lowerOperand(MO_Sym, Sym); +MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); +MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc); + +MCInst Adrp; +Adrp.setOpcode(AArch64::ADRP); +Adrp.addOperand(MCOperand::createReg(AArch64::X0)); +Adrp.addOperand(SymTLSDesc); +EmitToStreamer(*OutStreamer, Adrp); + +MCInst Ldr; +Ldr.setOpcode(AArch64::LDRXui); +Ldr.addOperand(MCOperand::createReg(AArch64::X16)); +Ldr.addOperand(MCOperand::createReg(AArch64::X0)); +Ldr.addOperand(SymTLSDescLo12); +Ldr.addOperand(MCOperand::createImm(0)); +EmitToStreamer(*OutStreamer, Ldr); + +MCInst Add; +Add.setOpcode(AArch64::ADDXri); +Add.addOperand(MCOperand::createReg(AArch64::X0)); +Add.addOperand(MCOperand::createReg(AArch64::X0)); +Add.addOperand(SymTLSDescLo12); +Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0))); +EmitToStreamer(*OutStreamer, Add); + +// Emit a relocation-annotation. This expands to no code, but requests +// the following instruction gets an R_AARCH64_TLSDESC_CALL. +// TODO: we probably don't need that for AUTH TLSDESC. Emit as for now for +// consistency with non-AUTH case. +MCInst TLSDescCall; +TLSDescCall.setOpcode(AArch64::TLSDESCCALL); +TLSDescCall.addOperand(Sym); +EmitToStreamer(*OutStreamer, TLSDescCall); +#ifndef NDEBUG +--InstsEmitted; // no code emitted +#endif + +MCInst Blraa; +Blraa.setOpcode(AArch64::BLRAA); +Blraa.addOperand(MCOperand::createReg(AArch64::X16)); +Blraa.addOperand(MCOperand::createReg(AArch64::X0)); +EmitToStreamer(*OutStreamer, Blraa); + +return; + } case AArch64::TLSDESC_CALLSEQ: { /// lower this to: ///adrp x0, :tlsdesc:var diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 884e5fe9f60040..108a01f34e2e81 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2634,6 +2634,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcod
[llvm-branch-commits] [llvm] [PAC][CodeGen][ELF][AArch64] Support signed GOT with tiny code model (PR #113812)
https://github.com/kovdan01 ready_for_review https://github.com/llvm/llvm-project/pull/113812 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Daniil Kovalev (kovdan01) Changes Depends on #113813 Support `R_AARCH64_AUTH_TLSDESC_ADR_PAGE21`, `R_AARCH64_AUTH_TLSDESC_LD64_LO12` and `R_AARCH64_AUTH_TLSDESC_LD64_LO12` static relocations and `R_AARCH64_AUTH_TLSDESC` dynamic relocation. IE/LE optimization is not currently supported for AUTH TLSDESC. --- Full diff: https://github.com/llvm/llvm-project/pull/113817.diff 7 Files Affected: - (modified) lld/ELF/Arch/AArch64.cpp (+8) - (modified) lld/ELF/InputSection.cpp (+2) - (modified) lld/ELF/Relocations.cpp (+36-2) - (modified) lld/ELF/Relocations.h (+4) - (modified) lld/ELF/Symbols.h (+1) - (modified) lld/ELF/SyntheticSections.cpp (+5) - (added) lld/test/ELF/aarch64-tlsdesc-pauth.s (+134) ``diff diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 86f509f3fd78a7..8ad466bf49878b 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -157,9 +157,14 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, return R_AARCH64_AUTH; case R_AARCH64_TLSDESC_ADR_PAGE21: return R_AARCH64_TLSDESC_PAGE; + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: +return R_AARCH64_AUTH_TLSDESC_PAGE; case R_AARCH64_TLSDESC_LD64_LO12: case R_AARCH64_TLSDESC_ADD_LO12: return R_TLSDESC; + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: +return RelExpr::R_AARCH64_AUTH_TLSDESC; case R_AARCH64_TLSDESC_CALL: return R_TLSDESC_CALL; case R_AARCH64_TLSLE_ADD_TPREL_HI12: @@ -543,6 +548,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: checkInt(ctx, loc, val, 33, rel); [[fallthrough]]; case R_AARCH64_ADR_PREL_PG_HI21_NC: @@ -593,6 +599,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: checkAlignment(ctx, loc, val, 8, rel); write32Imm12(loc, getBits(val, 3, 11)); break; @@ -667,6 +674,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: case R_AARCH64_TLSDESC_ADD_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: write32Imm12(loc, val); break; case R_AARCH64_TLSDESC: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index ccc7cf8c6e2de9..b3303c59a3b4a5 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -935,12 +935,14 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_SIZE: return r.sym->getSize() + a; case R_TLSDESC: + case RelExpr::R_AARCH64_AUTH_TLSDESC: return ctx.in.got->getTlsDescAddr(*r.sym) + a; case R_TLSDESC_PC: return ctx.in.got->getTlsDescAddr(*r.sym) + a - p; case R_TLSDESC_GOTPLT: return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA(); case R_AARCH64_TLSDESC_PAGE: + case R_AARCH64_AUTH_TLSDESC_PAGE: return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) - getAArch64Page(p); case R_LOONGARCH_TLSDESC_PAGE_PC: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 2d3815e58b5f67..966088aca77669 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1355,6 +1355,36 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } + auto fatalBothAuthAndNonAuth = [&sym]() { +fatal("both AUTH and non-AUTH TLSDESC entries for '" + sym.getName() + + "' requested, but only one type of TLSDESC entry per symbol is " + "supported"); + }; + + // Do not optimize signed TLSDESC as described in pauthabielf64 to LE/IE. + // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions + // > PAUTHELF64 only supports the descriptor based TLS (TLSDESC). + if (oneof( + expr)) { +assert(ctx.arg.emachine == EM_AARCH64); +if (!sym.hasFlag(NEEDS_TLSDESC)) + sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH); +else if (!sym.hasFlag(NEEDS_TLSDESC_AUTH)) + fatalBothAuthAndNonAuth(); +sec->addReloc({expr, type, offset, addend, &sym}); +return 1; + } + + if (sym.hasFlag(NEEDS_TLSDESC_AUTH)) { +assert(ctx.arg.emachine == EM_AARCH64); +// TLSDESC_CALL hint relocation probably should not be emitted by compiler +// with signed TLSDESC enabled since it does not give any value, but leave a +// check against that just in case someone uses it. +if (expr != R_TLSDESC_CALL) + fatalBothAuthAndNonAuth(); +return 1; + } + bool isRISCV = ctx.arg.emachine == EM_RISCV; if (oneofaddTlsDescEntry(sym); + RelType tlsDescRel = ctx.target->tlsDescRel; + if (flags &
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
https://github.com/kovdan01 ready_for_review https://github.com/llvm/llvm-project/pull/113816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
llvmbot wrote: @llvm/pr-subscribers-lld-elf Author: Daniil Kovalev (kovdan01) Changes Depends on #113812 Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19` GOT-generating relocations. --- Full diff: https://github.com/llvm/llvm-project/pull/113816.diff 5 Files Affected: - (modified) lld/ELF/Arch/AArch64.cpp (+5) - (modified) lld/ELF/InputSection.cpp (+1) - (modified) lld/ELF/Relocations.cpp (+9-8) - (modified) lld/ELF/Relocations.h (+1) - (modified) lld/test/ELF/aarch64-got-relocations-pauth.s (+73) ``diff diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 86f509f3fd78a7..2f2e0c2a52b0ef 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_AUTH_LD64_GOT_LO12_NC: case R_AARCH64_AUTH_GOT_ADD_LO12_NC: return R_AARCH64_AUTH_GOT; + case R_AARCH64_AUTH_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: +return R_AARCH64_AUTH_GOT_PC; case R_AARCH64_LD64_GOTPAGE_LO15: return R_AARCH64_GOT_PAGE; case R_AARCH64_ADR_GOT_PAGE: @@ -549,6 +552,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: checkInt(ctx, loc, val, 21, rel); write32AArch64Addr(loc, val); break; @@ -569,6 +573,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: case R_AARCH64_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_LD_PREL19: checkAlignment(ctx, loc, val, 4, rel); checkInt(ctx, loc, val, 21, rel); writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index ccc7cf8c6e2de9..ba135afd3580bf 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -788,6 +788,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_AARCH64_GOT_PAGE: return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA()); case R_GOT_PC: + case R_AARCH64_AUTH_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return r.sym->getGotVA(ctx) + a - p; case R_GOTPLT_GOTREL: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 2d3815e58b5f67..324a97d7a55f41 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -210,11 +210,11 @@ static bool needsPlt(RelExpr expr) { } bool lld::elf::needsGot(RelExpr expr) { - return oneof( - expr); + return oneof(expr); } // True if this expression is of the form Sym - X, where X is a position in the @@ -1011,8 +1011,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, -R_AARCH64_AUTH_GOT, R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, -R_LOONGARCH_GOT_PAGE_PC>(e)) +R_AARCH64_AUTH_GOT, R_AARCH64_AUTH_GOT_PC, R_LOONGARCH_PLT_PAGE_PC, +R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1126,7 +1126,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which // case the NEEDS_GOT flag shouldn't get set. bool needsGotAuth = - (expr == R_AARCH64_AUTH_GOT || expr == R_AARCH64_AUTH_GOT_PAGE_PC); + (expr == R_AARCH64_AUTH_GOT || expr == R_AARCH64_AUTH_GOT_PC || + expr == R_AARCH64_AUTH_GOT_PAGE_PC); uint16_t flags = sym.flags.load(std::memory_order_relaxed); if (!(flags & NEEDS_GOT)) { if (needsGotAuth) diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 20d88de402ac18..38d55d46116569 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -89,6 +89,7 @@ enum RelExpr { R_AARCH64_AUTH_GOT_PAGE_PC, R_AARCH64_GOT_PAGE, R_AARCH64_AUTH_GOT, + R_AARCH64_AUTH_GOT_PC, R_AARCH64_PAGE_PC, R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, R_AARCH64_TLSDESC_PAGE, diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s b/lld/test/ELF/aarch64-got-relocations-pauth.s index f04e3d953388ce..c43f1ca251a9a8 100644 --- a/lld/test/ELF/aarch64-got-relocations-pauth.s +++ b/lld/test/ELF/aarch64-got-relocations-pauth.s @@ -78,6 +78,79 @@ _start: adrp x1, :got_auth:zed add x1, x1, :got_auth_lo12:zed +#--- ok-tiny.s + +# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux ok-tiny.s -o ok-tiny.o + +# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny +# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s --check-prefix=EXTERNAL-TINY + +# RUN: ld.lld ok-tiny.o
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/112866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang] Disable use of the counted_by attribute for whole struct pointers (#112636) (PR #112786)
https://github.com/AaronBallman approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/112786 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 2f315eb - Revert "[flang] Integrate the option -flang-experimental-integer-overflow int…"
Author: Yusuke MINATO Date: 2024-10-28T21:45:39+09:00 New Revision: 2f315eb090ec08e193acbc59127b958efa93fd35 URL: https://github.com/llvm/llvm-project/commit/2f315eb090ec08e193acbc59127b958efa93fd35 DIFF: https://github.com/llvm/llvm-project/commit/2f315eb090ec08e193acbc59127b958efa93fd35.diff LOG: Revert "[flang] Integrate the option -flang-experimental-integer-overflow int…" This reverts commit 96bb375f5cedcfcc5dcd96296ba54ff933b39d4d. Added: Modified: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Flang.cpp flang/include/flang/Lower/LoweringOptions.def flang/include/flang/Optimizer/Transforms/Passes.h flang/include/flang/Optimizer/Transforms/Passes.td flang/include/flang/Tools/CrossToolHelpers.h flang/lib/Frontend/CompilerInvocation.cpp flang/lib/Frontend/FrontendActions.cpp flang/lib/Lower/Bridge.cpp flang/lib/Lower/IO.cpp flang/lib/Optimizer/Passes/Pipelines.cpp flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp flang/test/Driver/frontend-forwarding.f90 flang/test/Fir/convert-to-llvm-openmp-and-fir.fir flang/test/Fir/loop01.fir flang/test/Fir/loop02.fir flang/test/Lower/HLFIR/goto-do-body.f90 flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 flang/test/Lower/OpenMP/wsloop-variable.f90 flang/test/Lower/array-character.f90 flang/test/Lower/array-derived-assignments.f90 flang/test/Lower/array-derived.f90 flang/test/Lower/array-elemental-calls-char-byval.f90 flang/test/Lower/array-elemental-calls-char.f90 flang/test/Lower/array-expression-assumed-size.f90 flang/test/Lower/array-expression-slice-1.f90 flang/test/Lower/array-substring.f90 flang/test/Lower/array-temp.f90 flang/test/Lower/components.f90 flang/test/Lower/do_loop.f90 flang/test/Lower/do_loop_unstructured.f90 flang/test/Lower/goto-do-body.f90 flang/test/Lower/host-associated.f90 flang/test/Lower/infinite_loop.f90 flang/test/Lower/io-implied-do-fixes.f90 flang/test/Lower/loops2.f90 flang/test/Lower/mixed_loops.f90 flang/test/Lower/vector-subscript-io.f90 flang/tools/bbc/bbc.cpp Removed: diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5df6ddd5e6a0c5..75b4607b1b2391 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6787,6 +6787,10 @@ def flang_deprecated_no_hlfir : Flag<["-"], "flang-deprecated-no-hlfir">, Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>, HelpText<"Do not use HLFIR lowering (deprecated)">; +def flang_experimental_integer_overflow : Flag<["-"], "flang-experimental-integer-overflow">, + Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>, + HelpText<"Add nsw flag to internal operations such as do-variable increment (experimental)">; + //===--===// // FLangOption + CoreOption + NoXarchOption //===--===// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 366cadc2e54775..a9d2b7a4dc48f9 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -148,6 +148,7 @@ void Flang::addCodegenOptions(const ArgList &Args, Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, options::OPT_flang_deprecated_no_hlfir, +options::OPT_flang_experimental_integer_overflow, options::OPT_fno_ppc_native_vec_elem_order, options::OPT_fppc_native_vec_elem_order}); } diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 0b22e54b648e94..231de533fbd30a 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -38,5 +38,10 @@ ENUM_LOWERINGOPT(Underscoring, unsigned, 1, 1) /// (i.e. wraps around as two's complement). Off by default. ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0) +/// If true, add nsw flags to loop variable increments. +/// Off by default. +/// TODO: integrate this option with the above +ENUM_LOWERINGOPT(NSWOnLoopVarInc, unsigned, 1, 0) + #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index e1453cfa374bfc..e8f0a8444a31a1 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -66,7 +66,7 @@ namespace fir { std::unique_ptr createAffineDemotionPass(); std::unique_ptr createArrayValueCopyPass(fir::ArrayValue
[llvm-branch-commits] [clang] [clang] Fix C23 constexpr crashes (#112708) (PR #112855)
https://github.com/AaronBallman approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/112855 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port RegUsageInfoCollector pass to NPM (PR #113874)
https://github.com/pravinjagtap approved this pull request. LGTM, wait for others https://github.com/llvm/llvm-project/pull/113874 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang] Disable use of the counted_by attribute for whole struct pointers (#112636) (PR #112786)
https://github.com/Cydox approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/112786 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankLegalize (PR #112864)
petar-avramovic wrote: Rebase for new-reg-bank-select taking over AMDGPURegBankSelect TODO: helper changes after DstOp improvements as it accepts bank+LLT https://github.com/llvm/llvm-project/pull/112864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [llvm] [CGData][llvm-cgdata] Support for stable function map (PR #112664)
https://github.com/kyulee-com ready_for_review https://github.com/llvm/llvm-project/pull/112664 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport e1c36bde0551977d4b2efae032af6dfc4b2b3936 Requested by: @samitolvanen --- Full diff: https://github.com/llvm/llvm-project/pull/113938.diff 5 Files Affected: - (modified) clang/lib/CodeGen/CodeGenModule.cpp (+5) - (modified) clang/test/CodeGen/kcfi-normalize.c (+1) - (modified) llvm/lib/Transforms/Utils/ModuleUtils.cpp (+7-5) - (added) llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll (+35) - (modified) llvm/test/Transforms/GCOVProfiling/kcfi.ll (+5-3) ``diff diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index cf5e29e5a3db8d..49266f3e3982e1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1137,6 +1137,11 @@ void CodeGenModule::Release() { CodeGenOpts.SanitizeCfiCanonicalJumpTables); } + if (CodeGenOpts.SanitizeCfiICallNormalizeIntegers) { +getModule().addModuleFlag(llvm::Module::Override, "cfi-normalize-integers", + 1); + } + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) { getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1); // KCFI assumes patchable-function-prefix is the same for all indirectly diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c index 7660c908a7bdd5..b9150e88f6ab5f 100644 --- a/clang/test/CodeGen/kcfi-normalize.c +++ b/clang/test/CodeGen/kcfi-normalize.c @@ -28,6 +28,7 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) { fn(arg1, arg2, arg3); } +// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1} // CHECK: ![[TYPE1]] = !{i32 -1143117868} // CHECK: ![[TYPE2]] = !{i32 -460921415} // CHECK: ![[TYPE3]] = !{i32 -333839615} diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 122279160cc7e8..95bf9f06bc331c 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -161,11 +161,13 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { // Matches CodeGenModule::CreateKCFITypeId in Clang. LLVMContext &Ctx = M.getContext(); MDBuilder MDB(Ctx); - F.setMetadata( - LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), - static_cast(xxHash64(MangledType)); + std::string Type = MangledType.str(); + if (M.getModuleFlag("cfi-normalize-integers")) +Type += ".normalized"; + F.setMetadata(LLVMContext::MD_kcfi_type, +MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), + static_cast(xxHash64(Type)); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null( diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll new file mode 100644 index 00..19122b920d1ca4 --- /dev/null +++ b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll @@ -0,0 +1,35 @@ +;; Ensure __llvm_gcov_(writeout|reset|init) have the correct !kcfi_type +;; with integer normalization. +; RUN: mkdir -p %t && cd %t +; RUN: opt < %s -S -passes=insert-gcov-profiling | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @empty() !dbg !5 { +entry: + ret void, !dbg !8 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!6 = !DISubroutineType(types: !7) +!7 = !{null} +!8 = !DILocation(line: 2, column: 1, scope: !5) +!9 = !{i32 4, !"kcfi", i32 1} +!10 = !{i32 4, !"cfi-normalize-integers", i32 1} + +; CHECK: define internal void @__llvm_gcov_writeout() +; CHECK-SAME: !kcfi_type ![[#TYPE:]] +; CHECK: define internal void @__llvm_gcov_reset() +; CHECK-SAME: !kcfi_type ![[#TYPE]] +; CHECK: define internal void @__llvm_gcov_init() +; CHECK-SAME: !kcfi_type ![[#TYPE]] + +; CHECK: ![[#TYPE]] = !{i32 -440107680} diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi.ll b/llvm/test/Transforms/GCOVProfiling/kcfi.ll index b25f40f05d5bc4..1b97d25294cd65 100644 --- a/llvm/test/Transforms/GCOVProfiling/kcfi.ll +++ b/llvm/test/Transforms/GCOVProfiling/kcfi.ll @@ -24,8 +24,10 @@ entry: !9 = !{i32 4, !"kcfi", i32 1} ; CHECK: define internal void @__llvm_gcov_w
[llvm-branch-commits] [clang] release/19.x: [clang] Make LazyOffsetPtr more portable (#112927) (PR #113052)
jrtc27 wrote: As the author of the patch that seems sensible to me, and I’ve not been aware of any regressions from it in main. https://github.com/llvm/llvm-project/pull/113052 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM] [Clang] Backport "Support for Gentoo `*t64` triples (64-bit time_t ABIs)" (PR #112364)
mgorny wrote: Just to clarify my concise reply (I was in a hurry): right now (FWICS since clang 17), clang rejects the host triplet we'd like to use as invalid, i.e.: ``` clang: error: version 't32' in target triple 'i686-pc-linux-gnut32' is invalid clang: error: no input files ``` So at least *some* patch is needed to make it work at all. And since we're changing it anyway, I don't think there's a major risk in backporting the whole thing — and it would at least have the advantage of preserving consistent behavior across clang versions (i.e. not having clang 20 that applies time64 flags, and clang 19 that doesn't reject the triplet but doesn't apply the flags). https://github.com/llvm/llvm-project/pull/112364 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)
arsenm wrote: I would like to reiterate that this is a roundabout fix for the assertion in question. It sends it down a different path that happens to avoid it. It is not necessary to make this ABI change to fix the assertion. https://github.com/llvm/llvm-project/pull/109093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld] Do not emit warnings for `-z pac-plt` with valid PAuth core info (PR #112959)
https://github.com/kovdan01 updated https://github.com/llvm/llvm-project/pull/112959 >From 5420db8f3959f073f379466c340252a1816e1810 Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Fri, 18 Oct 2024 22:07:51 +0300 Subject: [PATCH 1/2] [PAC][lld] Do not emit warnings for `-z pac-plt` with valid PAuth core info When PAuth core info is present and (platform,version) is not (0,0), treat input files as pac-enabled and do not emit a warning with `-z pac-plt` passed. --- lld/ELF/Driver.cpp | 10 - lld/test/ELF/aarch64-feature-pac.s | 2 +- lld/test/ELF/aarch64-feature-pauth.s | 58 ++-- 3 files changed, 64 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index fb77e67e9fc5ca..c436be6b24e001 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2753,6 +2753,10 @@ static void readSecurityNotes(Ctx &ctx) { referenceFileName = (*it)->getName(); } } + bool hasValidPauthAbiCoreInfo = + (!ctx.aarch64PauthAbiCoreInfo.empty() && + llvm::any_of(ctx.aarch64PauthAbiCoreInfo, +[](uint8_t c) { return c != 0; })); for (ELFFileBase *f : ctx.objectFiles) { uint32_t features = f->andFeatures; @@ -2789,9 +2793,11 @@ static void readSecurityNotes(Ctx &ctx) { "GNU_PROPERTY_X86_FEATURE_1_IBT property"); features |= GNU_PROPERTY_X86_FEATURE_1_IBT; } -if (ctx.arg.zPacPlt && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) { +if (ctx.arg.zPacPlt && !(hasValidPauthAbiCoreInfo || + (features & GNU_PROPERTY_AARCH64_FEATURE_1_PAC))) { warn(toString(f) + ": -z pac-plt: file does not have " - "GNU_PROPERTY_AARCH64_FEATURE_1_PAC property"); + "GNU_PROPERTY_AARCH64_FEATURE_1_PAC property and no " + "valid PAuth core info present for this link job"); features |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC; } ctx.arg.andFeatures &= features; diff --git a/lld/test/ELF/aarch64-feature-pac.s b/lld/test/ELF/aarch64-feature-pac.s index b85a33216cb5bd..4fd1fd2acea737 100644 --- a/lld/test/ELF/aarch64-feature-pac.s +++ b/lld/test/ELF/aarch64-feature-pac.s @@ -82,7 +82,7 @@ # RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe 2>&1 | FileCheck -DFILE=%t2.o --check-prefix WARN %s -# WARN: warning: [[FILE]]: -z pac-plt: file does not have GNU_PROPERTY_AARCH64_FEATURE_1_PAC property +# WARN: warning: [[FILE]]: -z pac-plt: file does not have GNU_PROPERTY_AARCH64_FEATURE_1_PAC property and no valid PAuth core info present for this link job # RUN: llvm-readelf -n %tpacplt.exe | FileCheck --check-prefix=PACPROP %s # RUN: llvm-readelf --dynamic-table %tpacplt.exe | FileCheck --check-prefix PACDYN2 %s diff --git a/lld/test/ELF/aarch64-feature-pauth.s b/lld/test/ELF/aarch64-feature-pauth.s index 699a650d72295a..c11073dba86f24 100644 --- a/lld/test/ELF/aarch64-feature-pauth.s +++ b/lld/test/ELF/aarch64-feature-pauth.s @@ -33,13 +33,53 @@ # RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu no-info.s -o noinfo1.o # RUN: cp noinfo1.o noinfo2.o # RUN: not ld.lld -z pauth-report=error noinfo1.o tag1.o noinfo2.o -o /dev/null 2>&1 | FileCheck --check-prefix ERR5 %s -# RUN: ld.lld -z pauth-report=warning noinfo1.o tag1.o noinfo2.o -o /dev/null 2>&1 | FileCheck --check-prefix WARN %s +# RUN: ld.lld -z pauth-report=warning noinfo1.o tag1.o noinfo2.o -o /dev/null 2>&1 | FileCheck --check-prefix WARN1 %s # RUN: ld.lld -z pauth-report=none noinfo1.o tag1.o noinfo2.o --fatal-warnings -o /dev/null # ERR5: error: noinfo1.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one # ERR5-NEXT: error: noinfo2.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one -# WARN: warning: noinfo1.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one -# WARN-NEXT: warning: noinfo2.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one +# WARN1: warning: noinfo1.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one +# WARN1-NEXT: warning: noinfo2.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-zero.s -o tag-zero.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/aarch64-func2.s -o func2.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/aarch64-func3.s -o func3.o +# RUN: ld.lld func3.o --shared -o func3.so +# RUN: ld.lld tag1.o func2.o func3.so -z pac-plt --shared -o pacplt-nowarn --fatal-warnings +# RUN: ld.lld tag-zero.o func2.o func3.so -z pac-plt --shared -o pacplt-warn 2>&1 | FileCheck --check-prefix WARN2 %s + +# WARN2: warning: tag-zero.o: -z pac-plt: file does not have GNU_PROPERTY_AARCH64_FEATURE_1_PAC p
[llvm-branch-commits] [lld] [PAC][lld] Use braa instr in PAC PLT sequence with valid PAuth core info (PR #113945)
kovdan01 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/113945?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#113945** https://app.graphite.dev/github/pr/llvm/llvm-project/113945?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#112959** https://app.graphite.dev/github/pr/llvm/llvm-project/112959?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#112958** https://app.graphite.dev/github/pr/llvm/llvm-project/112958?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @kovdan01 and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/113945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld] Use braa instr in PAC PLT sequence with valid PAuth core info (PR #113945)
https://github.com/kovdan01 ready_for_review https://github.com/llvm/llvm-project/pull/113945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld] Use braa instr in PAC PLT sequence with valid PAuth core info (PR #113945)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Daniil Kovalev (kovdan01) Changes Assume PAC instructions being supported with PAuth core info different from (0,0). Given that, `autia1716; br x17` can be replaced with `braa x17, x16; nop`. --- Full diff: https://github.com/llvm/llvm-project/pull/113945.diff 2 Files Affected: - (modified) lld/ELF/Arch/AArch64.cpp (+15-4) - (modified) lld/test/ELF/aarch64-feature-pauth.s (+6-4) ``diff diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 260307ac4c3dcb..c76f226bc5511c 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -999,7 +999,9 @@ class AArch64BtiPac final : public AArch64 { private: bool btiHeader; // bti instruction needed in PLT Header and Entry - bool pacEntry; // autia1716 instruction needed in PLT Entry + bool pacEntry; // Authenticated branch needed in PLT Entry + bool pacUseHint = + true; // Use hint space instructions for authenticated branch in PLT entry }; } // namespace @@ -1016,6 +1018,10 @@ AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) { // from properties in the objects, so we use the command line flag. pacEntry = ctx.arg.zPacPlt; + if (llvm::any_of(ctx.aarch64PauthAbiCoreInfo, + [](uint8_t c) { return c != 0; })) +pacUseHint = false; + if (btiHeader || pacEntry) { pltEntrySize = 24; ipltEntrySize = 24; @@ -1066,9 +1072,13 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))] 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n])) }; + const uint8_t pacHintBr[] = { + 0x9f, 0x21, 0x03, 0xd5, // autia1716 + 0x20, 0x02, 0x1f, 0xd6 // br x17 + }; const uint8_t pacBr[] = { - 0x9f, 0x21, 0x03, 0xd5, // autia1716 - 0x20, 0x02, 0x1f, 0xd6 // br x17 + 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16 + 0x1f, 0x20, 0x03, 0xd5 // nop }; const uint8_t stdBr[] = { 0x20, 0x02, 0x1f, 0xd6, // br x17 @@ -1097,7 +1107,8 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); if (pacEntry) -memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); +memcpy(buf + sizeof(addrInst), (pacUseHint ? pacHintBr : pacBr), + sizeof(pacUseHint ? pacHintBr : pacBr)); else memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr)); if (!hasBti) diff --git a/lld/test/ELF/aarch64-feature-pauth.s b/lld/test/ELF/aarch64-feature-pauth.s index c11073dba86f24..34f2f2698a26b8 100644 --- a/lld/test/ELF/aarch64-feature-pauth.s +++ b/lld/test/ELF/aarch64-feature-pauth.s @@ -56,8 +56,8 @@ # PACPLTTAG: 0x7003 (AARCH64_PAC_PLT) -# RUN: llvm-objdump -d pacplt-nowarn | FileCheck --check-prefix PACPLT -DA=10380 -DB=478 -DC=480 %s -# RUN: llvm-objdump -d pacplt-warn | FileCheck --check-prefix PACPLT -DA=10390 -DB=488 -DC=490 %s +# RUN: llvm-objdump -d pacplt-nowarn | FileCheck --check-prefixes=PACPLT,NOHINT -DA=10380 -DB=478 -DC=480 %s +# RUN: llvm-objdump -d pacplt-warn | FileCheck --check-prefixes=PACPLT,HINT -DA=10390 -DB=488 -DC=490 %s # PACPLT: Disassembly of section .text: # PACPLT: : @@ -77,8 +77,10 @@ # PACPLT-NEXT: adrpx16, 0x3 # PACPLT-NEXT: ldr x17, [x16, #0x[[C]]] # PACPLT-NEXT: add x16, x16, #0x[[C]] -# PACPLT-NEXT: autia1716 -# PACPLT-NEXT: br x17 +# NOHINT-NEXT: braax17, x16 +# NOHINT-NEXT: nop +# HINT-NEXT: autia1716 +# HINT-NEXT: br x17 # PACPLT-NEXT: nop #--- abi-tag-short.s `` https://github.com/llvm/llvm-project/pull/113945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld] Use braa instr in PAC PLT sequence with valid PAuth core info (PR #113945)
llvmbot wrote: @llvm/pr-subscribers-lld-elf Author: Daniil Kovalev (kovdan01) Changes Assume PAC instructions being supported with PAuth core info different from (0,0). Given that, `autia1716; br x17` can be replaced with `braa x17, x16; nop`. --- Full diff: https://github.com/llvm/llvm-project/pull/113945.diff 2 Files Affected: - (modified) lld/ELF/Arch/AArch64.cpp (+15-4) - (modified) lld/test/ELF/aarch64-feature-pauth.s (+6-4) ``diff diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 260307ac4c3dcb..c76f226bc5511c 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -999,7 +999,9 @@ class AArch64BtiPac final : public AArch64 { private: bool btiHeader; // bti instruction needed in PLT Header and Entry - bool pacEntry; // autia1716 instruction needed in PLT Entry + bool pacEntry; // Authenticated branch needed in PLT Entry + bool pacUseHint = + true; // Use hint space instructions for authenticated branch in PLT entry }; } // namespace @@ -1016,6 +1018,10 @@ AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) { // from properties in the objects, so we use the command line flag. pacEntry = ctx.arg.zPacPlt; + if (llvm::any_of(ctx.aarch64PauthAbiCoreInfo, + [](uint8_t c) { return c != 0; })) +pacUseHint = false; + if (btiHeader || pacEntry) { pltEntrySize = 24; ipltEntrySize = 24; @@ -1066,9 +1072,13 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))] 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n])) }; + const uint8_t pacHintBr[] = { + 0x9f, 0x21, 0x03, 0xd5, // autia1716 + 0x20, 0x02, 0x1f, 0xd6 // br x17 + }; const uint8_t pacBr[] = { - 0x9f, 0x21, 0x03, 0xd5, // autia1716 - 0x20, 0x02, 0x1f, 0xd6 // br x17 + 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16 + 0x1f, 0x20, 0x03, 0xd5 // nop }; const uint8_t stdBr[] = { 0x20, 0x02, 0x1f, 0xd6, // br x17 @@ -1097,7 +1107,8 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); if (pacEntry) -memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); +memcpy(buf + sizeof(addrInst), (pacUseHint ? pacHintBr : pacBr), + sizeof(pacUseHint ? pacHintBr : pacBr)); else memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr)); if (!hasBti) diff --git a/lld/test/ELF/aarch64-feature-pauth.s b/lld/test/ELF/aarch64-feature-pauth.s index c11073dba86f24..34f2f2698a26b8 100644 --- a/lld/test/ELF/aarch64-feature-pauth.s +++ b/lld/test/ELF/aarch64-feature-pauth.s @@ -56,8 +56,8 @@ # PACPLTTAG: 0x7003 (AARCH64_PAC_PLT) -# RUN: llvm-objdump -d pacplt-nowarn | FileCheck --check-prefix PACPLT -DA=10380 -DB=478 -DC=480 %s -# RUN: llvm-objdump -d pacplt-warn | FileCheck --check-prefix PACPLT -DA=10390 -DB=488 -DC=490 %s +# RUN: llvm-objdump -d pacplt-nowarn | FileCheck --check-prefixes=PACPLT,NOHINT -DA=10380 -DB=478 -DC=480 %s +# RUN: llvm-objdump -d pacplt-warn | FileCheck --check-prefixes=PACPLT,HINT -DA=10390 -DB=488 -DC=490 %s # PACPLT: Disassembly of section .text: # PACPLT: : @@ -77,8 +77,10 @@ # PACPLT-NEXT: adrpx16, 0x3 # PACPLT-NEXT: ldr x17, [x16, #0x[[C]]] # PACPLT-NEXT: add x16, x16, #0x[[C]] -# PACPLT-NEXT: autia1716 -# PACPLT-NEXT: br x17 +# NOHINT-NEXT: braax17, x16 +# NOHINT-NEXT: nop +# HINT-NEXT: autia1716 +# HINT-NEXT: br x17 # PACPLT-NEXT: nop #--- abi-tag-short.s `` https://github.com/llvm/llvm-project/pull/113945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld] Use braa instr in PAC PLT sequence with valid PAuth core info (PR #113945)
https://github.com/kovdan01 created https://github.com/llvm/llvm-project/pull/113945 Assume PAC instructions being supported with PAuth core info different from (0,0). Given that, `autia1716; br x17` can be replaced with `braa x17, x16; nop`. >From f2daf75b8506e31180f2d41291c6f1a63da5138b Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Mon, 28 Oct 2024 21:23:54 +0300 Subject: [PATCH] [PAC][lld] Use braa instr in PAC PLT sequence with valid PAuth core info Assume PAC instructions being supported with PAuth core info different from (0,0). Given that, `autia1716; br x17` can be replaced with `braa x17, x16; nop`. --- lld/ELF/Arch/AArch64.cpp | 19 +++ lld/test/ELF/aarch64-feature-pauth.s | 10 ++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 260307ac4c3dcb..c76f226bc5511c 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -999,7 +999,9 @@ class AArch64BtiPac final : public AArch64 { private: bool btiHeader; // bti instruction needed in PLT Header and Entry - bool pacEntry; // autia1716 instruction needed in PLT Entry + bool pacEntry; // Authenticated branch needed in PLT Entry + bool pacUseHint = + true; // Use hint space instructions for authenticated branch in PLT entry }; } // namespace @@ -1016,6 +1018,10 @@ AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) { // from properties in the objects, so we use the command line flag. pacEntry = ctx.arg.zPacPlt; + if (llvm::any_of(ctx.aarch64PauthAbiCoreInfo, + [](uint8_t c) { return c != 0; })) +pacUseHint = false; + if (btiHeader || pacEntry) { pltEntrySize = 24; ipltEntrySize = 24; @@ -1066,9 +1072,13 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))] 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n])) }; + const uint8_t pacHintBr[] = { + 0x9f, 0x21, 0x03, 0xd5, // autia1716 + 0x20, 0x02, 0x1f, 0xd6 // br x17 + }; const uint8_t pacBr[] = { - 0x9f, 0x21, 0x03, 0xd5, // autia1716 - 0x20, 0x02, 0x1f, 0xd6 // br x17 + 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16 + 0x1f, 0x20, 0x03, 0xd5 // nop }; const uint8_t stdBr[] = { 0x20, 0x02, 0x1f, 0xd6, // br x17 @@ -1097,7 +1107,8 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); if (pacEntry) -memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); +memcpy(buf + sizeof(addrInst), (pacUseHint ? pacHintBr : pacBr), + sizeof(pacUseHint ? pacHintBr : pacBr)); else memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr)); if (!hasBti) diff --git a/lld/test/ELF/aarch64-feature-pauth.s b/lld/test/ELF/aarch64-feature-pauth.s index c11073dba86f24..34f2f2698a26b8 100644 --- a/lld/test/ELF/aarch64-feature-pauth.s +++ b/lld/test/ELF/aarch64-feature-pauth.s @@ -56,8 +56,8 @@ # PACPLTTAG: 0x7003 (AARCH64_PAC_PLT) -# RUN: llvm-objdump -d pacplt-nowarn | FileCheck --check-prefix PACPLT -DA=10380 -DB=478 -DC=480 %s -# RUN: llvm-objdump -d pacplt-warn | FileCheck --check-prefix PACPLT -DA=10390 -DB=488 -DC=490 %s +# RUN: llvm-objdump -d pacplt-nowarn | FileCheck --check-prefixes=PACPLT,NOHINT -DA=10380 -DB=478 -DC=480 %s +# RUN: llvm-objdump -d pacplt-warn | FileCheck --check-prefixes=PACPLT,HINT -DA=10390 -DB=488 -DC=490 %s # PACPLT: Disassembly of section .text: # PACPLT: : @@ -77,8 +77,10 @@ # PACPLT-NEXT: adrpx16, 0x3 # PACPLT-NEXT: ldr x17, [x16, #0x[[C]]] # PACPLT-NEXT: add x16, x16, #0x[[C]] -# PACPLT-NEXT: autia1716 -# PACPLT-NEXT: br x17 +# NOHINT-NEXT: braax17, x16 +# NOHINT-NEXT: nop +# HINT-NEXT: autia1716 +# HINT-NEXT: br x17 # PACPLT-NEXT: nop #--- abi-tag-short.s ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
github-actions[bot] wrote: ⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo. Please turn off [Keep my email addresses private](https://github.com/settings/emails) setting in your account. See [LLVM Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it) for more information. https://github.com/llvm/llvm-project/pull/113938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add skeletons for new register bank select passes (PR #112862)
@@ -39,6 +39,8 @@ FunctionPass *createSIFoldOperandsLegacyPass(); FunctionPass *createSIPeepholeSDWALegacyPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); +FunctionPass *createAMDGPURBSelectPass(); petar-avramovic wrote: Reverting default reg-bank-select to RegBankSelect, new pass will take AMDGPURegBankSelect https://github.com/llvm/llvm-project/pull/112862 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/19.x: Fix KCFI types for generated functions with integer normalization (#104826) (PR #113938)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/113938 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
https://github.com/kovdan01 ready_for_review https://github.com/llvm/llvm-project/pull/113817 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112866 >From 610eeeacfb418948bf36a8a0b4eee40c4ada24aa Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 28 Oct 2024 15:15:10 +0100 Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi Change existing code for G_PHI to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI since it may appear with an undef operand and getVRegDef can fail. Most notably this improves number of values that can be allocated to sgpr in AMDGPURegBankSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch. --- llvm/lib/CodeGen/MachineSSAContext.cpp| 28 ++- .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++ .../AMDGPU/MIR/hidden-loop-diverge.mir| 4 +- .../AMDGPU/MIR/uses-value-from-cycle.mir | 8 +- .../GlobalISel/divergence-structurizer.mir| 80 -- .../regbankselect-mui-regbanklegalize.mir | 70 .../regbankselect-mui-regbankselect.mir | 18 ++-- .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 52 ++-- 9 files changed, 194 insertions(+), 178 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index e384187b6e8593..359d4a11a69060 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -54,9 +55,34 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { return F->getRegInfo().getVRegDef(value)->getParent(); } +static bool isUndef(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + MI.getOpcode() == TargetOpcode::IMPLICIT_DEF; +} + +/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI. template <> bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) { - return Phi.isConstantValuePHI(); + if (!Phi.isPHI()) +return false; + + // In later passes PHI may appear with an undef operand, getVRegDef can fail. + if (Phi.getOpcode() == TargetOpcode::PHI) +return Phi.isConstantValuePHI(); + + // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue(). + const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo(); + Register This = Phi.getOperand(0).getReg(); + Register ConstantValue; + for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) { +Register Incoming = Phi.getOperand(i).getReg(); +if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) { + if (ConstantValue && ConstantValue != Incoming) +return false; + ConstantValue = Incoming; +} + } + return true; } template <> diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir index ce00edf3363f77..9694a340b5e906 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir @@ -1,24 +1,24 @@ # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge # CHECK-LABEL: BLOCK bb.0 -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 -# CHECK: DIVERGENT: G_BR %bb.2 +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add skeletons for new register bank select passes (PR #112862)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112862 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for load (PR #112882)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112882 >From 619288bc69a9d1290a2a5c521582b0204e0608b6 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 28 Oct 2024 15:09:50 +0100 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load Add IDs for bit width that cover multiple LLTs: B32 B64 etc. "Predicate" wrapper class for bool predicate functions used to write pretty rules. Predicates can be combined using &&, || and !. Lowering for splitting and widening loads. Write rules for loads to not change existing mir tests from old regbankselect. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 299 +++- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 5 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 309 - .../AMDGPU/AMDGPURegBankLegalizeRules.h | 65 +++- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 +++--- .../GlobalISel/regbankselect-zextload.mir | 9 +- 6 files changed, 941 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 7220773b94792d..3996167633a221 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -37,6 +37,97 @@ bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { return true; } +void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, + ArrayRef LLTBreakdown, LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register BasePtrReg = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(BasePtrReg); + const RegisterBank *PtrRB = MRI.getRegBankOrNull(BasePtrReg); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + SmallVector LoadPartRegs; + + unsigned ByteOffset = 0; + for (LLT PartTy : LLTBreakdown) { +Register BasePtrPlusOffsetReg; +if (ByteOffset == 0) { + BasePtrPlusOffsetReg = BasePtrReg; +} else { + BasePtrPlusOffsetReg = MRI.createVirtualRegister({PtrRB, PtrTy}); + Register OffsetReg = MRI.createVirtualRegister({PtrRB, OffsetTy}); + B.buildConstant(OffsetReg, ByteOffset); + B.buildPtrAdd(BasePtrPlusOffsetReg, BasePtrReg, OffsetReg); +} +MachineMemOperand *BasePtrPlusOffsetMMO = +MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); +Register PartLoad = MRI.createVirtualRegister({DstRB, PartTy}); +B.buildLoad(PartLoad, BasePtrPlusOffsetReg, *BasePtrPlusOffsetMMO); +LoadPartRegs.push_back(PartLoad); +ByteOffset += PartTy.getSizeInBytes(); + } + + if (!MergeTy.isValid()) { +// Loads are of same size, concat or merge them together. +B.buildMergeLikeInstr(Dst, LoadPartRegs); + } else { +// Load(s) are not all of same size, need to unmerge them to smaller pieces +// of MergeTy type, then merge them all together in Dst. +SmallVector MergeTyParts; +for (Register Reg : LoadPartRegs) { + if (MRI.getType(Reg) == MergeTy) { +MergeTyParts.push_back(Reg); + } else { +auto Unmerge = B.buildUnmerge(MergeTy, Reg); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { + Register UnmergeReg = Unmerge->getOperand(i).getReg(); + MRI.setRegBank(UnmergeReg, *DstRB); + MergeTyParts.push_back(UnmergeReg); +} + } +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + +void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, + LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register BasePtrReg = MI.getOperand(1).getReg(); + + Register BasePtrPlusOffsetReg; + BasePtrPlusOffsetReg = BasePtrReg; + + MachineMemOperand *BasePtrPlusOffsetMMO = + MF.getMachineMemOperand(&BaseMMO, 0, WideTy); + Register WideLoad = MRI.createVirtualRegister({DstRB, WideTy}); + B.buildLoad(WideLoad, BasePtrPlusOffsetReg, *BasePtrPlusOffsetMMO); + + if (WideTy.isScalar()) { +B.buildTrunc(Dst, WideLoad); + } else { +SmallVector MergeTyParts; +unsigned NumEltsMerge = +MRI.getType(Dst).getSizeInBits() / MergeTy.getSizeInBits(); +auto Unmerge = B.buildUnmerge(MergeTy, WideLoad); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { + Register UnmergeReg = Unmerge->getOperand(i).getReg(); + MRI.setRegBank(UnmergeReg, *DstRB); + if (i < NumEltsMerge) +MergeTyParts.push_back(UnmergeReg); +} +B.buildMergeLikeInstr(Dst, MergeTyP
[llvm-branch-commits] [lld] [llvm] [CGData][llvm-cgdata] Support for stable function map (PR #112664)
llvmbot wrote: @llvm/pr-subscribers-lld-macho @llvm/pr-subscribers-llvm-binary-utilities Author: Kyungwoo Lee (kyulee-com) Changes This introduces a new cgdata format for stable function maps. The raw data is embedded in the __llvm_merge section during compile time. This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal. Depends on #112662. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- Patch is 53.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112664.diff 21 Files Affected: - (modified) lld/test/MachO/cgdata-generate.s (+3-3) - (modified) llvm/docs/CommandGuide/llvm-cgdata.rst (+7-9) - (modified) llvm/include/llvm/CGData/CodeGenData.h (+23-1) - (modified) llvm/include/llvm/CGData/CodeGenData.inc (+9-3) - (modified) llvm/include/llvm/CGData/CodeGenDataReader.h (+26-3) - (modified) llvm/include/llvm/CGData/CodeGenDataWriter.h (+16-1) - (modified) llvm/lib/CGData/CodeGenData.cpp (+18-12) - (modified) llvm/lib/CGData/CodeGenDataReader.cpp (+43-20) - (modified) llvm/lib/CGData/CodeGenDataWriter.cpp (+28-2) - (modified) llvm/test/tools/llvm-cgdata/empty.test (+5-3) - (modified) llvm/test/tools/llvm-cgdata/error.test (+8-5) - (added) llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test (+66) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test (+83) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test (+78) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-double.test (+79) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-single.test (+36) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test (+4-4) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test (+3-3) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-double.test (+4-4) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-single.test (+2-2) - (modified) llvm/tools/llvm-cgdata/llvm-cgdata.cpp (+36-12) ``diff diff --git a/lld/test/MachO/cgdata-generate.s b/lld/test/MachO/cgdata-generate.s index 174df39d666c5d..f942ae07f64e0e 100644 --- a/lld/test/MachO/cgdata-generate.s +++ b/lld/test/MachO/cgdata-generate.s @@ -3,12 +3,12 @@ # RUN: rm -rf %t; split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. # RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt +# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt # RUN: sed "s//$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s # RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt +# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt # RUN: sed "s//$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s # RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o diff --git a/llvm/docs/CommandGuide/llvm-cgdata.rst b/llvm/docs/CommandGuide/llvm-cgdata.rst index f592e1508844ee..0670decd087e39 100644 --- a/llvm/docs/CommandGuide/llvm-cgdata.rst +++ b/llvm/docs/CommandGuide/llvm-cgdata.rst @@ -11,15 +11,13 @@ SYNOPSIS DESCRIPTION --- -The :program:llvm-cgdata utility parses raw codegen data embedded -in compiled binary files and merges them into a single .cgdata file. -It can also inspect and manipulate .cgdata files. -Currently, the tool supports saving and restoring outlined hash trees, -enabling global function outlining across modules, allowing for more -efficient function outlining in subsequent compilations. -The design is extensible, allowing for the incorporation of additional -codegen summaries and optimization techniques, such as global function -merging, in the future. +The :program:llvm-cgdata utility parses raw codegen data embedded in compiled +binary files and merges them into a single .cgdata file. It can also inspect +and manipulate .cgdata files. Currently, the tool supports saving and restoring +outlined hash trees and stable function maps, allowing for more efficient +function outlining and function merging across modules in subsequent +compilations. The design is extensible, allowing for the incorporation of +additional codegen summaries and optimization techniques. COMMANDS diff --git a/llvm/inclu
[llvm-branch-commits] [clang] [PAC][Driver] Support ptrauth flags only on AArch64 Linux and ARM64 Darwin (PR #113152)
kovdan01 wrote: Would be glad to see everyone's feedback on the changes. https://github.com/llvm/llvm-project/pull/113152 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld] Do not emit warnings for `-z pac-plt` with valid PAuth core info (PR #112959)
kovdan01 wrote: Would be glad to see everyone's feedback on the changes. https://github.com/llvm/llvm-project/pull/112959 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [PAC][clang] Handle pauthtest environment and ABI in Linux-specific code (PR #113151)
kovdan01 wrote: Would be glad to see everyone's feedback on the changes. https://github.com/llvm/llvm-project/pull/113151 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [PAC][clang] Add new features to pauthtest ABI (PR #113150)
kovdan01 wrote: Would be glad to see everyone's feedback on the changes. https://github.com/llvm/llvm-project/pull/113150 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [llvm] [CGData][llvm-cgdata] Support for stable function map (PR #112664)
kyulee-com wrote: cc. @nocchijiang https://github.com/llvm/llvm-project/pull/112664 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [llvm] [CGData][llvm-cgdata] Support for stable function map (PR #112664)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Kyungwoo Lee (kyulee-com) Changes This introduces a new cgdata format for stable function maps. The raw data is embedded in the __llvm_merge section during compile time. This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal. Depends on #112662. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- Patch is 53.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112664.diff 21 Files Affected: - (modified) lld/test/MachO/cgdata-generate.s (+3-3) - (modified) llvm/docs/CommandGuide/llvm-cgdata.rst (+7-9) - (modified) llvm/include/llvm/CGData/CodeGenData.h (+23-1) - (modified) llvm/include/llvm/CGData/CodeGenData.inc (+9-3) - (modified) llvm/include/llvm/CGData/CodeGenDataReader.h (+26-3) - (modified) llvm/include/llvm/CGData/CodeGenDataWriter.h (+16-1) - (modified) llvm/lib/CGData/CodeGenData.cpp (+18-12) - (modified) llvm/lib/CGData/CodeGenDataReader.cpp (+43-20) - (modified) llvm/lib/CGData/CodeGenDataWriter.cpp (+28-2) - (modified) llvm/test/tools/llvm-cgdata/empty.test (+5-3) - (modified) llvm/test/tools/llvm-cgdata/error.test (+8-5) - (added) llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test (+66) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test (+83) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test (+78) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-double.test (+79) - (added) llvm/test/tools/llvm-cgdata/merge-funcmap-single.test (+36) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test (+4-4) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test (+3-3) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-double.test (+4-4) - (renamed) llvm/test/tools/llvm-cgdata/merge-hashtree-single.test (+2-2) - (modified) llvm/tools/llvm-cgdata/llvm-cgdata.cpp (+36-12) ``diff diff --git a/lld/test/MachO/cgdata-generate.s b/lld/test/MachO/cgdata-generate.s index 174df39d666c5d..f942ae07f64e0e 100644 --- a/lld/test/MachO/cgdata-generate.s +++ b/lld/test/MachO/cgdata-generate.s @@ -3,12 +3,12 @@ # RUN: rm -rf %t; split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. # RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt +# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt # RUN: sed "s//$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s # RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt +# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt # RUN: sed "s//$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s # RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o diff --git a/llvm/docs/CommandGuide/llvm-cgdata.rst b/llvm/docs/CommandGuide/llvm-cgdata.rst index f592e1508844ee..0670decd087e39 100644 --- a/llvm/docs/CommandGuide/llvm-cgdata.rst +++ b/llvm/docs/CommandGuide/llvm-cgdata.rst @@ -11,15 +11,13 @@ SYNOPSIS DESCRIPTION --- -The :program:llvm-cgdata utility parses raw codegen data embedded -in compiled binary files and merges them into a single .cgdata file. -It can also inspect and manipulate .cgdata files. -Currently, the tool supports saving and restoring outlined hash trees, -enabling global function outlining across modules, allowing for more -efficient function outlining in subsequent compilations. -The design is extensible, allowing for the incorporation of additional -codegen summaries and optimization techniques, such as global function -merging, in the future. +The :program:llvm-cgdata utility parses raw codegen data embedded in compiled +binary files and merges them into a single .cgdata file. It can also inspect +and manipulate .cgdata files. Currently, the tool supports saving and restoring +outlined hash trees and stable function maps, allowing for more efficient +function outlining and function merging across modules in subsequent +compilations. The design is extensible, allowing for the incorporation of +additional codegen summaries and optimization techniques. COMMANDS diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for load (PR #112882)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112882 >From 1f2cecde58eb1b2443b8b93d27bde2b1de0e Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 28 Oct 2024 15:09:50 +0100 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load Add IDs for bit width that cover multiple LLTs: B32 B64 etc. "Predicate" wrapper class for bool predicate functions used to write pretty rules. Predicates can be combined using &&, || and !. Lowering for splitting and widening loads. Write rules for loads to not change existing mir tests from old regbankselect. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 299 +++- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 5 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 309 - .../AMDGPU/AMDGPURegBankLegalizeRules.h | 65 +++- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 +++--- .../GlobalISel/regbankselect-zextload.mir | 9 +- 6 files changed, 941 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 7220773b94792d..3996167633a221 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -37,6 +37,97 @@ bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { return true; } +void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, + ArrayRef LLTBreakdown, LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register BasePtrReg = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(BasePtrReg); + const RegisterBank *PtrRB = MRI.getRegBankOrNull(BasePtrReg); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + SmallVector LoadPartRegs; + + unsigned ByteOffset = 0; + for (LLT PartTy : LLTBreakdown) { +Register BasePtrPlusOffsetReg; +if (ByteOffset == 0) { + BasePtrPlusOffsetReg = BasePtrReg; +} else { + BasePtrPlusOffsetReg = MRI.createVirtualRegister({PtrRB, PtrTy}); + Register OffsetReg = MRI.createVirtualRegister({PtrRB, OffsetTy}); + B.buildConstant(OffsetReg, ByteOffset); + B.buildPtrAdd(BasePtrPlusOffsetReg, BasePtrReg, OffsetReg); +} +MachineMemOperand *BasePtrPlusOffsetMMO = +MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); +Register PartLoad = MRI.createVirtualRegister({DstRB, PartTy}); +B.buildLoad(PartLoad, BasePtrPlusOffsetReg, *BasePtrPlusOffsetMMO); +LoadPartRegs.push_back(PartLoad); +ByteOffset += PartTy.getSizeInBytes(); + } + + if (!MergeTy.isValid()) { +// Loads are of same size, concat or merge them together. +B.buildMergeLikeInstr(Dst, LoadPartRegs); + } else { +// Load(s) are not all of same size, need to unmerge them to smaller pieces +// of MergeTy type, then merge them all together in Dst. +SmallVector MergeTyParts; +for (Register Reg : LoadPartRegs) { + if (MRI.getType(Reg) == MergeTy) { +MergeTyParts.push_back(Reg); + } else { +auto Unmerge = B.buildUnmerge(MergeTy, Reg); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { + Register UnmergeReg = Unmerge->getOperand(i).getReg(); + MRI.setRegBank(UnmergeReg, *DstRB); + MergeTyParts.push_back(UnmergeReg); +} + } +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + +void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, + LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register BasePtrReg = MI.getOperand(1).getReg(); + + Register BasePtrPlusOffsetReg; + BasePtrPlusOffsetReg = BasePtrReg; + + MachineMemOperand *BasePtrPlusOffsetMMO = + MF.getMachineMemOperand(&BaseMMO, 0, WideTy); + Register WideLoad = MRI.createVirtualRegister({DstRB, WideTy}); + B.buildLoad(WideLoad, BasePtrPlusOffsetReg, *BasePtrPlusOffsetMMO); + + if (WideTy.isScalar()) { +B.buildTrunc(Dst, WideLoad); + } else { +SmallVector MergeTyParts; +unsigned NumEltsMerge = +MRI.getType(Dst).getSizeInBits() / MergeTy.getSizeInBits(); +auto Unmerge = B.buildUnmerge(MergeTy, WideLoad); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { + Register UnmergeReg = Unmerge->getOperand(i).getReg(); + MRI.setRegBank(UnmergeReg, *DstRB); + if (i < NumEltsMerge) +MergeTyParts.push_back(UnmergeReg); +} +B.buildMergeLikeInstr(Dst, MergeTyP
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112866 >From 000d17beab5b20702c92c07f9a2241ac50a63629 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 28 Oct 2024 15:15:10 +0100 Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi Change existing code for G_PHI to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI since it may appear with an undef operand and getVRegDef can fail. Most notably this improves number of values that can be allocated to sgpr in AMDGPURegBankSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch. --- llvm/lib/CodeGen/MachineSSAContext.cpp| 28 ++- .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++ .../AMDGPU/MIR/hidden-loop-diverge.mir| 4 +- .../AMDGPU/MIR/uses-value-from-cycle.mir | 8 +- .../GlobalISel/divergence-structurizer.mir| 80 -- .../regbankselect-mui-regbanklegalize.mir | 70 .../regbankselect-mui-regbankselect.mir | 18 ++-- .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 52 ++-- 9 files changed, 194 insertions(+), 178 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index e384187b6e8593..359d4a11a69060 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -54,9 +55,34 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { return F->getRegInfo().getVRegDef(value)->getParent(); } +static bool isUndef(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + MI.getOpcode() == TargetOpcode::IMPLICIT_DEF; +} + +/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI. template <> bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) { - return Phi.isConstantValuePHI(); + if (!Phi.isPHI()) +return false; + + // In later passes PHI may appear with an undef operand, getVRegDef can fail. + if (Phi.getOpcode() == TargetOpcode::PHI) +return Phi.isConstantValuePHI(); + + // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue(). + const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo(); + Register This = Phi.getOperand(0).getReg(); + Register ConstantValue; + for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) { +Register Incoming = Phi.getOperand(i).getReg(); +if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) { + if (ConstantValue && ConstantValue != Incoming) +return false; + ConstantValue = Incoming; +} + } + return true; } template <> diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir index ce00edf3363f77..9694a340b5e906 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir @@ -1,24 +1,24 @@ # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge # CHECK-LABEL: BLOCK bb.0 -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 -# CHECK: DIVERGENT: G_BR %bb.2 +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankLegalize (PR #112864)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add skeletons for new register bank select passes (PR #112862)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112862 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankLegalize (PR #112864)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)
https://github.com/petar-avramovic edited https://github.com/llvm/llvm-project/pull/112863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankLegalize (PR #112864)
@@ -69,6 +81,241 @@ FunctionPass *llvm::createAMDGPURBLegalizePass() { using namespace AMDGPU; +const RegBankLegalizeRules &getRules(const GCNSubtarget &ST, + MachineRegisterInfo &MRI) { + static std::mutex GlobalMutex; + static SmallDenseMap> + CacheForRuleSet; + std::lock_guard Lock(GlobalMutex); + if (!CacheForRuleSet.contains(ST.getGeneration())) { +auto Rules = std::make_unique(ST, MRI); +CacheForRuleSet[ST.getGeneration()] = std::move(Rules); + } else { +CacheForRuleSet[ST.getGeneration()]->refreshRefs(ST, MRI); + } + return *CacheForRuleSet[ST.getGeneration()]; +} + bool AMDGPURBLegalize::runOnMachineFunction(MachineFunction &MF) { + + const GCNSubtarget &ST = MF.getSubtarget(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Setup the instruction builder with CSE. + std::unique_ptr MIRBuilder; petar-avramovic wrote: Simplified to always use CSE builder https://github.com/llvm/llvm-project/pull/112864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Serialization] Handle uninitialized type constraints (PR #113182)
falbrechtskirchinger wrote: @cor3ntin @tru This should be added to the 19.X Release milestone, right? https://github.com/llvm/llvm-project/pull/113182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Serialization] Handle uninitialized type constraints (PR #113182)
https://github.com/cor3ntin milestoned https://github.com/llvm/llvm-project/pull/113182 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for load (PR #112882)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112882 >From b2fd498febbdd9ed4e65cb18a541aaeee85550b1 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 28 Oct 2024 15:09:50 +0100 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load Add IDs for bit width that cover multiple LLTs: B32 B64 etc. "Predicate" wrapper class for bool predicate functions used to write pretty rules. Predicates can be combined using &&, || and !. Lowering for splitting and widening loads. Write rules for loads to not change existing mir tests from old regbankselect. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 299 +++- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 5 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 309 - .../AMDGPU/AMDGPURegBankLegalizeRules.h | 65 +++- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 +++--- .../GlobalISel/regbankselect-zextload.mir | 9 +- 6 files changed, 941 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 7220773b94792d..3996167633a221 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -37,6 +37,97 @@ bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { return true; } +void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, + ArrayRef LLTBreakdown, LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register BasePtrReg = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(BasePtrReg); + const RegisterBank *PtrRB = MRI.getRegBankOrNull(BasePtrReg); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + SmallVector LoadPartRegs; + + unsigned ByteOffset = 0; + for (LLT PartTy : LLTBreakdown) { +Register BasePtrPlusOffsetReg; +if (ByteOffset == 0) { + BasePtrPlusOffsetReg = BasePtrReg; +} else { + BasePtrPlusOffsetReg = MRI.createVirtualRegister({PtrRB, PtrTy}); + Register OffsetReg = MRI.createVirtualRegister({PtrRB, OffsetTy}); + B.buildConstant(OffsetReg, ByteOffset); + B.buildPtrAdd(BasePtrPlusOffsetReg, BasePtrReg, OffsetReg); +} +MachineMemOperand *BasePtrPlusOffsetMMO = +MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); +Register PartLoad = MRI.createVirtualRegister({DstRB, PartTy}); +B.buildLoad(PartLoad, BasePtrPlusOffsetReg, *BasePtrPlusOffsetMMO); +LoadPartRegs.push_back(PartLoad); +ByteOffset += PartTy.getSizeInBytes(); + } + + if (!MergeTy.isValid()) { +// Loads are of same size, concat or merge them together. +B.buildMergeLikeInstr(Dst, LoadPartRegs); + } else { +// Load(s) are not all of same size, need to unmerge them to smaller pieces +// of MergeTy type, then merge them all together in Dst. +SmallVector MergeTyParts; +for (Register Reg : LoadPartRegs) { + if (MRI.getType(Reg) == MergeTy) { +MergeTyParts.push_back(Reg); + } else { +auto Unmerge = B.buildUnmerge(MergeTy, Reg); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { + Register UnmergeReg = Unmerge->getOperand(i).getReg(); + MRI.setRegBank(UnmergeReg, *DstRB); + MergeTyParts.push_back(UnmergeReg); +} + } +} +B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + +void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, + LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register BasePtrReg = MI.getOperand(1).getReg(); + + Register BasePtrPlusOffsetReg; + BasePtrPlusOffsetReg = BasePtrReg; + + MachineMemOperand *BasePtrPlusOffsetMMO = + MF.getMachineMemOperand(&BaseMMO, 0, WideTy); + Register WideLoad = MRI.createVirtualRegister({DstRB, WideTy}); + B.buildLoad(WideLoad, BasePtrPlusOffsetReg, *BasePtrPlusOffsetMMO); + + if (WideTy.isScalar()) { +B.buildTrunc(Dst, WideLoad); + } else { +SmallVector MergeTyParts; +unsigned NumEltsMerge = +MRI.getType(Dst).getSizeInBits() / MergeTy.getSizeInBits(); +auto Unmerge = B.buildUnmerge(MergeTy, WideLoad); +for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { + Register UnmergeReg = Unmerge->getOperand(i).getReg(); + MRI.setRegBank(UnmergeReg, *DstRB); + if (i < NumEltsMerge) +MergeTyParts.push_back(UnmergeReg); +} +B.buildMergeLikeInstr(Dst, MergeTyP
[llvm-branch-commits] [llvm] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi (PR #112866)
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112866 >From f5c645d18bc3b17f92ff2ef65e1c676f0fc0dfa0 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 28 Oct 2024 15:15:10 +0100 Subject: [PATCH] MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi Change existing code for G_PHI to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. This is not safe for regular PHI since it may appear with an undef operand and getVRegDef can fail. Most notably this improves number of values that can be allocated to sgpr in AMDGPURegBankSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch. --- llvm/lib/CodeGen/MachineSSAContext.cpp| 28 ++- .../AMDGPU/MIR/hidden-diverge-gmir.mir| 28 +++ .../AMDGPU/MIR/hidden-loop-diverge.mir| 4 +- .../AMDGPU/MIR/uses-value-from-cycle.mir | 8 +- .../GlobalISel/divergence-structurizer.mir| 80 -- .../regbankselect-mui-regbanklegalize.mir | 70 .../regbankselect-mui-regbankselect.mir | 18 ++-- .../AMDGPU/GlobalISel/regbankselect-mui.ll| 84 ++- .../AMDGPU/GlobalISel/regbankselect-mui.mir | 52 ++-- 9 files changed, 194 insertions(+), 178 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp index e384187b6e8593..359d4a11a69060 100644 --- a/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -54,9 +55,34 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const { return F->getRegInfo().getVRegDef(value)->getParent(); } +static bool isUndef(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + MI.getOpcode() == TargetOpcode::IMPLICIT_DEF; +} + +/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue() for G_PHI. template <> bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) { - return Phi.isConstantValuePHI(); + if (!Phi.isPHI()) +return false; + + // In later passes PHI may appear with an undef operand, getVRegDef can fail. + if (Phi.getOpcode() == TargetOpcode::PHI) +return Phi.isConstantValuePHI(); + + // For G_PHI we do equivalent of PHINode::hasConstantOrUndefValue(). + const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo(); + Register This = Phi.getOperand(0).getReg(); + Register ConstantValue; + for (unsigned i = 1, e = Phi.getNumOperands(); i < e; i += 2) { +Register Incoming = Phi.getOperand(i).getReg(); +if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) { + if (ConstantValue && ConstantValue != Incoming) +return false; + ConstantValue = Incoming; +} + } + return true; } template <> diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir index ce00edf3363f77..9694a340b5e906 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir @@ -1,24 +1,24 @@ # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s # CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge # CHECK-LABEL: BLOCK bb.0 -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) -# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1 -# CHECK: DIVERGENT: G_BR %bb.2 +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_ +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if) +# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_
[llvm-branch-commits] [flang] [flang][cuda] Convert gpu.launch_func to CUFLaunchClusterKernel when cluster dims are present (PR #113959)
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/113959 Kernel launch in CUF are converted to `gpu.launch_func`. When the kernel has `cluster_dims` specified these get carried over to the `gpu.launch_func` operation. This patch updates the special conversion of `gpu.launch_func` when cluster dims are present to the newly added entry point. >From 912b3e1d5e98a5d5bb4f1fc5eaa8e6ba3a42158c Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Mon, 28 Oct 2024 09:52:27 -0700 Subject: [PATCH] [flang][cuda] Convert gpu.launch_func to CUFLaunchClusterKernel when cluster dims are present --- .../Transforms/CUFGPUToLLVMConversion.cpp | 83 --- flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir | 24 +- 2 files changed, 76 insertions(+), 31 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp index 5645ce6e6858c8..c64f35542a6e59 100644 --- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp @@ -76,11 +76,6 @@ struct GPULaunchKernelConversion mlir::LogicalResult matchAndRewrite(mlir::gpu::LaunchFuncOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - -if (op.hasClusterSize()) { - return mlir::failure(); -} - mlir::Location loc = op.getLoc(); auto *ctx = rewriter.getContext(); mlir::ModuleOp mod = op->getParentOfType(); @@ -107,37 +102,65 @@ struct GPULaunchKernelConversion rewriter.create(loc, ptrTy, kernel.getName()); } -auto funcOp = mod.lookupSymbol( -RTNAME_STRING(CUFLaunchKernel)); - auto llvmIntPtrType = mlir::IntegerType::get( ctx, this->getTypeConverter()->getPointerBitwidth(0)); auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx); -auto funcTy = mlir::LLVM::LLVMFunctionType::get( -voidTy, -{ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, - llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy}, -/*isVarArg=*/false); - -auto cufLaunchKernel = mlir::SymbolRefAttr::get( -mod.getContext(), RTNAME_STRING(CUFLaunchKernel)); -if (!funcOp) { - mlir::OpBuilder::InsertionGuard insertGuard(rewriter); - rewriter.setInsertionPointToStart(mod.getBody()); - auto launchKernelFuncOp = rewriter.create( - loc, RTNAME_STRING(CUFLaunchKernel), funcTy); - launchKernelFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private); -} mlir::Value nullPtr = rewriter.create(loc, ptrTy); -rewriter.replaceOpWithNewOp( -op, funcTy, cufLaunchKernel, -mlir::ValueRange{kernelPtr, adaptor.getGridSizeX(), - adaptor.getGridSizeY(), adaptor.getGridSizeZ(), - adaptor.getBlockSizeX(), adaptor.getBlockSizeY(), - adaptor.getBlockSizeZ(), dynamicMemorySize, kernelArgs, - nullPtr}); +if (op.hasClusterSize()) { + auto funcOp = mod.lookupSymbol( + RTNAME_STRING(CUFLaunchClusterKernel)); + auto funcTy = mlir::LLVM::LLVMFunctionType::get( + voidTy, + {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, + llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, + llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy}, + /*isVarArg=*/false); + auto cufLaunchClusterKernel = mlir::SymbolRefAttr::get( + mod.getContext(), RTNAME_STRING(CUFLaunchClusterKernel)); + if (!funcOp) { +mlir::OpBuilder::InsertionGuard insertGuard(rewriter); +rewriter.setInsertionPointToStart(mod.getBody()); +auto launchKernelFuncOp = rewriter.create( +loc, RTNAME_STRING(CUFLaunchClusterKernel), funcTy); +launchKernelFuncOp.setVisibility( +mlir::SymbolTable::Visibility::Private); + } + rewriter.replaceOpWithNewOp( + op, funcTy, cufLaunchClusterKernel, + mlir::ValueRange{kernelPtr, adaptor.getClusterSizeX(), + adaptor.getClusterSizeY(), adaptor.getClusterSizeZ(), + adaptor.getGridSizeX(), adaptor.getGridSizeY(), + adaptor.getGridSizeZ(), adaptor.getBlockSizeX(), + adaptor.getBlockSizeY(), adaptor.getBlockSizeZ(), + dynamicMemorySize, kernelArgs, nullPtr}); +} else { + auto funcOp = mod.lookupSymbol( + RTNAME_STRING(CUFLaunchKernel)); + auto funcTy = mlir::LLVM::LLVMFunctionType::get( + voidTy, + {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, + llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy}, + /*isVarArg=*/false); + auto cufLaunchKernel = mlir::SymbolRefAttr::get( + mod.getContext(), RTNAME_STRING(