[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
https://github.com/snehasish created https://github.com/llvm/llvm-project/pull/140503 None >From 61b636b6367ff9fe41eefce3430ee58551ffedf3 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 16 May 2025 23:41:29 -0700 Subject: [PATCH] [NFC][MemProf] Move IndexedMemProfData to its own header. --- .../llvm/ProfileData/IndexedMemProfData.h | 70 ++- .../llvm/ProfileData/InstrProfWriter.h| 2 +- llvm/include/llvm/ProfileData/MemProf.h | 51 -- .../llvm/ProfileData/MemProfRadixTree.h | 1 + llvm/include/llvm/ProfileData/MemProfReader.h | 1 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 1 + llvm/lib/ProfileData/InstrProfWriter.cpp | 1 - llvm/lib/ProfileData/MemProf.cpp | 13 llvm/unittests/ProfileData/InstrProfTest.cpp | 1 + llvm/unittests/ProfileData/MemProfTest.cpp| 1 + .../Instrumentation/MemProfUseTest.cpp| 1 + 11 files changed, 75 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h index 3c6c329d1c49d..94a16227477cb 100644 --- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h +++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h @@ -6,18 +6,84 @@ // //===--===// // -// MemProf data is serialized in writeMemProf provided in this header file. +// This file implements IndexedMemProfData, a data structure to hold MemProf +// in a space optimized format. It also provides utility methods for writing +// MemProf data. // //===--===// +#ifndef LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H +#define LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H + #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" namespace llvm { +namespace memprof { +struct IndexedMemProfData { + // A map to hold memprof data per function. The lower 64 bits obtained from + // the md5 hash of the function name is used to index into the map. + llvm::MapVector Records; + + // A map to hold frame id to frame mappings. The mappings are used to + // convert IndexedMemProfRecord to MemProfRecords with frame information + // inline. + llvm::MapVector Frames; + + // A map to hold call stack id to call stacks. + llvm::MapVector> CallStacks; + + FrameId addFrame(const Frame &F) { +const FrameId Id = hashFrame(F); +Frames.try_emplace(Id, F); +return Id; + } + + CallStackId addCallStack(ArrayRef CS) { +CallStackId CSId = hashCallStack(CS); +CallStacks.try_emplace(CSId, CS); +return CSId; + } + + CallStackId addCallStack(SmallVector &&CS) { +CallStackId CSId = hashCallStack(CS); +CallStacks.try_emplace(CSId, std::move(CS)); +return CSId; + } + +private: + // Return a hash value based on the contents of the frame. Here we use a + // cryptographic hash function to minimize the chance of hash collisions. We + // do persist FrameIds as part of memprof formats up to Version 2, inclusive. + // However, the deserializer never calls this function; it uses FrameIds + // merely as keys to look up Frames proper. + FrameId hashFrame(const Frame &F) const { +llvm::HashBuilder, llvm::endianness::little> +HashBuilder; +HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame); +llvm::BLAKE3Result<8> Hash = HashBuilder.final(); +FrameId Id; +std::memcpy(&Id, Hash.data(), sizeof(Hash)); +return Id; + } + + // Compute a CallStackId for a given call stack. + CallStackId hashCallStack(ArrayRef CS) const { + llvm::HashBuilder, llvm::endianness::little> + HashBuilder; + for (FrameId F : CS) +HashBuilder.add(F); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + CallStackId CSId; + std::memcpy(&CSId, Hash.data(), sizeof(Hash)); + return CSId; +} +}; +} // namespace memprof // Write the MemProf data to OS. Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema); - } // namespace llvm +#endif diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 67d85daa81623..16d2ef3fab3e3 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -20,7 +20,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/Object/BuildID.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/IndexedMemProfData.h" #include "llvm/Support/Error.h" #include #include diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 215102c131fff..ce5cd5ee4856b 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -842,57 +842
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
https://github.com/snehasish created https://github.com/llvm/llvm-project/pull/140505 None >From d4413c619035039133f7d3509bbe6c94a1650adf Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Mon, 19 May 2025 00:03:59 -0700 Subject: [PATCH] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header --- .../include/llvm/Analysis/MemoryProfileInfo.h | 3 +- llvm/include/llvm/IR/ModuleSummaryIndex.h | 22 +- llvm/include/llvm/ProfileData/MemProfCommon.h | 44 +++ .../Instrumentation/MemProfiler.cpp | 1 + 4 files changed, 49 insertions(+), 21 deletions(-) create mode 100644 llvm/include/llvm/ProfileData/MemProfCommon.h diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 1d98f86f50484..33d59efe8d77e 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -14,7 +14,8 @@ #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H #include "llvm/IR/Metadata.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/ProfileData/MemProfCommon.h" +#include "llvm/IR/InstrTypes.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 65e428a3adea7..77430c5cb5ea1 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -27,6 +27,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProfCommon.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InterleavedRange.h" @@ -306,13 +307,7 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return hash_value(I.getRef()); } }; -// For optional hinted size reporting, holds a pair of the full stack id -// (pre-trimming, from the full context in the profile), and the associated -// total profiled size. -struct ContextTotalSize { - uint64_t FullStackId; - uint64_t TotalSize; -}; + /// Summary of memprof callsite metadata. struct CallsiteInfo { @@ -350,19 +345,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) { return OS; } -// Allocation type assigned to an allocation reached by a given context. -// More can be added, now this is cold, notcold and hot. -// Values should be powers of two so that they can be ORed, in particular to -// track allocations that have different behavior with different calling -// contexts. -enum class AllocationType : uint8_t { - None = 0, - NotCold = 1, - Cold = 2, - Hot = 4, - All = 7 // This should always be set to the OR of all values. -}; - /// Summary of a single MIB in a memprof metadata on allocations. struct MIBInfo { // The allocation type for this profiled context. diff --git a/llvm/include/llvm/ProfileData/MemProfCommon.h b/llvm/include/llvm/ProfileData/MemProfCommon.h new file mode 100644 index 0..4097ccb651188 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfCommon.h @@ -0,0 +1,44 @@ +//===- MemProfCommon.h - MemProf support *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common types used by different parts of the MemProf code. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROFCOMMON_H +#define LLVM_PROFILEDATA_MEMPROFCOMMON_H + +#include + +namespace llvm { + +// For optional hinted size reporting, holds a pair of the full stack id +// (pre-trimming, from the full context in the profile), and the associated +// total profiled size. +struct ContextTotalSize { + uint64_t FullStackId; + uint64_t TotalSize; +}; + +// Allocation type assigned to an allocation reached by a given context. +// More can be added, now this is cold, notcold and hot. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { + None = 0, + NotCold = 1, + Cold = 2, + Hot = 4, + All = 7 // This should always be set to the OR of all values. +}; + +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H + diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 5982476f3994e..6538311571529 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -46,6 +46,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include +#include
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
https://github.com/snehasish created https://github.com/llvm/llvm-project/pull/140504 None >From 532d85a11742e5a3994e5efce357f85c0c60a6c7 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 16 May 2025 23:55:43 -0700 Subject: [PATCH] [NFC][MemProf] Add the LLVM license text and minor clean up. --- llvm/include/llvm/ProfileData/MemProf.h | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ce5cd5ee4856b..683193aa42747 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -1,5 +1,18 @@ -#ifndef LLVM_PROFILEDATA_MEMPROF_H_ -#define LLVM_PROFILEDATA_MEMPROF_H_ +//===- MemProf.h - MemProf support --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common definitions used in the reading and writing of +// memory profile data. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROF_H +#define LLVM_PROFILEDATA_MEMPROF_H #include "llvm/ADT/BitVector.h" #include "llvm/ADT/MapVector.h" @@ -844,5 +857,4 @@ struct LineLocation { using CallEdgeTy = std::pair; } // namespace memprof } // namespace llvm - -#endif // LLVM_PROFILEDATA_MEMPROF_H_ +#endif // LLVM_PROFILEDATA_MEMPROF_H ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
snehasish wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140505** https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140504** https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140503** https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140502** https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140501** https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140500** https://app.graphite.dev/github/pr/llvm/llvm-project/140500?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
snehasish wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140505** https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140504** https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140503** https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140502** https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140501** https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140500** https://app.graphite.dev/github/pr/llvm/llvm-project/140500?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140504 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
llvmbot wrote: @llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Snehasish Kumar (snehasish) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/140502.diff 7 Files Affected: - (modified) llvm/include/llvm/ProfileData/MemProf.h (+5-5) - (modified) llvm/include/llvm/ProfileData/MemProfYAML.h (+1-1) - (modified) llvm/lib/ProfileData/MemProf.cpp (+1-1) - (modified) llvm/lib/ProfileData/MemProfReader.cpp (+1-1) - (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+2-2) - (modified) llvm/unittests/ProfileData/MemProfTest.cpp (+10-10) - (modified) llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp (+24-24) ``diff diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 0bc1432f7d198..215102c131fff 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -472,13 +472,13 @@ struct IndexedMemProfRecord { // translate CallStackId to call stacks with frames inline. MemProfRecord toMemProfRecord( llvm::function_ref(const CallStackId)> Callback) const; - - // Returns the GUID for the function name after canonicalization. For - // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are - // mapped to functions using this GUID. - static GlobalValue::GUID getGUID(const StringRef FunctionName); }; +// Returns the GUID for the function name after canonicalization. For +// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are +// mapped to functions using this GUID. +GlobalValue::GUID getGUID(const StringRef FunctionName); + // Holds call site information with frame contents inline. struct CallSiteInfo { // The frames in the call stack diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h index 08dee253f615a..b642e3098aa0e 100644 --- a/llvm/include/llvm/ProfileData/MemProfYAML.h +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -46,7 +46,7 @@ template <> struct ScalarTraits { Val = Num; } else { // Otherwise, treat the input as a string containing a function name. - Val = memprof::IndexedMemProfRecord::getGUID(Scalar); + Val = memprof::getGUID(Scalar); } return StringRef(); } diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index a9c5ee09a6daf..795e97bee38f5 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -343,7 +343,7 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord( return Record; } -GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { +GlobalValue::GUID getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop // those by default. This is by design to differentiate internal linkage diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index e0f280b9eb2f6..aca534b0a4c98 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -570,7 +570,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames( I++) { const auto &DIFrame = DI.getFrame(I); const uint64_t Guid = -IndexedMemProfRecord::getGUID(DIFrame.FunctionName); +memprof::getGUID(DIFrame.FunctionName); const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 375ff84f82ed2..5982476f3994e 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -865,8 +865,8 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, StringRef CallerName = DIL->getSubprogramLinkageName(); assert(!CallerName.empty() && "Be sure to enable -fdebug-info-for-profiling"); - uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); - uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); + uint64_t CallerGUID = memprof::getGUID(CallerName); + uint64_t CalleeGUID = memprof::getGUID(CalleeName); // Pretend that we are calling a function with GUID == 0 if we are // in the inline stack leading to a heap allocation function. if (IsAlloc) { diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index a072dee26d9a0..2ae9cd96f0197 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -107,7 +107,7 @@ const DILineInfoSpecifier specifi
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
llvmbot wrote: @llvm/pr-subscribers-pgo Author: Snehasish Kumar (snehasish) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/140504.diff 1 Files Affected: - (modified) llvm/include/llvm/ProfileData/MemProf.h (+16-4) ``diff diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ce5cd5ee4856b..683193aa42747 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -1,5 +1,18 @@ -#ifndef LLVM_PROFILEDATA_MEMPROF_H_ -#define LLVM_PROFILEDATA_MEMPROF_H_ +//===- MemProf.h - MemProf support --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common definitions used in the reading and writing of +// memory profile data. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROF_H +#define LLVM_PROFILEDATA_MEMPROF_H #include "llvm/ADT/BitVector.h" #include "llvm/ADT/MapVector.h" @@ -844,5 +857,4 @@ struct LineLocation { using CallEdgeTy = std::pair; } // namespace memprof } // namespace llvm - -#endif // LLVM_PROFILEDATA_MEMPROF_H_ +#endif // LLVM_PROFILEDATA_MEMPROF_H `` https://github.com/llvm/llvm-project/pull/140504 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
snehasish wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140505** https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140504** https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140503** https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140502** https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140501** https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140500** https://app.graphite.dev/github/pr/llvm/llvm-project/140500?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
snehasish wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140505** https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140504** https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140503** https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140502** https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140501** https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140500** https://app.graphite.dev/github/pr/llvm/llvm-project/140500?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Snehasish Kumar (snehasish) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/140505.diff 4 Files Affected: - (modified) llvm/include/llvm/Analysis/MemoryProfileInfo.h (+2-1) - (modified) llvm/include/llvm/IR/ModuleSummaryIndex.h (+2-20) - (added) llvm/include/llvm/ProfileData/MemProfCommon.h (+44) - (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+1) ``diff diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 1d98f86f50484..33d59efe8d77e 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -14,7 +14,8 @@ #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H #include "llvm/IR/Metadata.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/ProfileData/MemProfCommon.h" +#include "llvm/IR/InstrTypes.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 65e428a3adea7..77430c5cb5ea1 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -27,6 +27,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProfCommon.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InterleavedRange.h" @@ -306,13 +307,7 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return hash_value(I.getRef()); } }; -// For optional hinted size reporting, holds a pair of the full stack id -// (pre-trimming, from the full context in the profile), and the associated -// total profiled size. -struct ContextTotalSize { - uint64_t FullStackId; - uint64_t TotalSize; -}; + /// Summary of memprof callsite metadata. struct CallsiteInfo { @@ -350,19 +345,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) { return OS; } -// Allocation type assigned to an allocation reached by a given context. -// More can be added, now this is cold, notcold and hot. -// Values should be powers of two so that they can be ORed, in particular to -// track allocations that have different behavior with different calling -// contexts. -enum class AllocationType : uint8_t { - None = 0, - NotCold = 1, - Cold = 2, - Hot = 4, - All = 7 // This should always be set to the OR of all values. -}; - /// Summary of a single MIB in a memprof metadata on allocations. struct MIBInfo { // The allocation type for this profiled context. diff --git a/llvm/include/llvm/ProfileData/MemProfCommon.h b/llvm/include/llvm/ProfileData/MemProfCommon.h new file mode 100644 index 0..4097ccb651188 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfCommon.h @@ -0,0 +1,44 @@ +//===- MemProfCommon.h - MemProf support *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common types used by different parts of the MemProf code. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROFCOMMON_H +#define LLVM_PROFILEDATA_MEMPROFCOMMON_H + +#include + +namespace llvm { + +// For optional hinted size reporting, holds a pair of the full stack id +// (pre-trimming, from the full context in the profile), and the associated +// total profiled size. +struct ContextTotalSize { + uint64_t FullStackId; + uint64_t TotalSize; +}; + +// Allocation type assigned to an allocation reached by a given context. +// More can be added, now this is cold, notcold and hot. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { + None = 0, + NotCold = 1, + Cold = 2, + Hot = 4, + All = 7 // This should always be set to the OR of all values. +}; + +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H + diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 5982476f3994e..6538311571529 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -46,6 +46,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include +#include using namespace llvm; using namespace llvm::memprof; `` https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/include/llvm/ProfileData/MemProf.h llvm/include/llvm/ProfileData/MemProfYAML.h llvm/lib/ProfileData/MemProf.cpp llvm/lib/ProfileData/MemProfReader.cpp llvm/lib/Transforms/Instrumentation/MemProfiler.cpp llvm/unittests/ProfileData/MemProfTest.cpp llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index aca534b0a..d6bc4fdf5 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -569,8 +569,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames( for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; I++) { const auto &DIFrame = DI.getFrame(I); -const uint64_t Guid = -memprof::getGUID(DIFrame.FunctionName); +const uint64_t Guid = memprof::getGUID(DIFrame.FunctionName); const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp index 95828356b..2ed32c6ea 100644 --- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp +++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp @@ -107,10 +107,9 @@ declare !dbg !19 void @_Z2f3v() // locations. EXPECT_THAT( CallSites, - ElementsAre( - Pair(LineLocation(1, 3), memprof::getGUID("_Z2f1v")), - Pair(LineLocation(2, 3), memprof::getGUID("_Z2f2v")), - Pair(LineLocation(2, 9), memprof::getGUID("_Z2f3v"; + ElementsAre(Pair(LineLocation(1, 3), memprof::getGUID("_Z2f1v")), + Pair(LineLocation(2, 3), memprof::getGUID("_Z2f2v")), + Pair(LineLocation(2, 9), memprof::getGUID("_Z2f3v"; } TEST(MemProf, ExtractDirectCallsFromIRInline) { @@ -206,25 +205,22 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr EXPECT_EQ(FooCallerGUID, memprof::getGUID("_Z3foov")); EXPECT_THAT( FooCallSites, - ElementsAre( - Pair(LineLocation(1, 3), memprof::getGUID("_ZL2f3v")), - Pair(LineLocation(2, 9), memprof::getGUID("_ZL2g3v"; + ElementsAre(Pair(LineLocation(1, 3), memprof::getGUID("_ZL2f3v")), + Pair(LineLocation(2, 9), memprof::getGUID("_ZL2g3v"; auto F2It = Calls.find(memprof::getGUID("_ZL2f2v")); ASSERT_NE(F2It, Calls.end()); const auto &[F2CallerGUID, F2CallSites] = *F2It; EXPECT_EQ(F2CallerGUID, memprof::getGUID("_ZL2f2v")); - EXPECT_THAT(F2CallSites, - ElementsAre(Pair(LineLocation(2, 3), - memprof::getGUID("_Z2f1v"; + EXPECT_THAT(F2CallSites, ElementsAre(Pair(LineLocation(2, 3), +memprof::getGUID("_Z2f1v"; auto F3It = Calls.find(memprof::getGUID("_ZL2f3v")); ASSERT_NE(F3It, Calls.end()); const auto &[F3CallerGUID, F3CallSites] = *F3It; EXPECT_EQ(F3CallerGUID, memprof::getGUID("_ZL2f3v")); - EXPECT_THAT(F3CallSites, - ElementsAre(Pair(LineLocation(1, 10), - memprof::getGUID("_ZL2f2v"; + EXPECT_THAT(F3CallSites, ElementsAre(Pair(LineLocation(1, 10), +memprof::getGUID("_ZL2f2v"; auto G3It = Calls.find(memprof::getGUID("_ZL2g3v")); ASSERT_NE(G3It, Calls.end()); @@ -232,9 +228,8 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr EXPECT_EQ(G3CallerGUID, memprof::getGUID("_ZL2g3v")); EXPECT_THAT( G3CallSites, - ElementsAre( - Pair(LineLocation(1, 8), memprof::getGUID("_Z2g1v")), - Pair(LineLocation(2, 3), memprof::getGUID("_Z2g2v"; + ElementsAre(Pair(LineLocation(1, 8), memprof::getGUID("_Z2g1v")), + Pair(LineLocation(2, 3), memprof::getGUID("_Z2g2v"; } TEST(MemProf, ExtractDirectCallsFromIRCallingNew) { `` https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/include/llvm/ProfileData/MemProfCommon.h llvm/include/llvm/Analysis/MemoryProfileInfo.h llvm/include/llvm/IR/ModuleSummaryIndex.h llvm/lib/Transforms/Instrumentation/MemProfiler.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 33d59efe8..9fcb81a0a 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -13,9 +13,9 @@ #ifndef LLVM_ANALYSIS_MEMORYPROFILEINFO_H #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Metadata.h" #include "llvm/ProfileData/MemProfCommon.h" -#include "llvm/IR/InstrTypes.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 77430c5cb..23f9504b4 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -307,8 +307,6 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return hash_value(I.getRef()); } }; - - /// Summary of memprof callsite metadata. struct CallsiteInfo { // Actual callee function. diff --git a/llvm/include/llvm/ProfileData/MemProfCommon.h b/llvm/include/llvm/ProfileData/MemProfCommon.h index 4097ccb65..a638824ec 100644 --- a/llvm/include/llvm/ProfileData/MemProfCommon.h +++ b/llvm/include/llvm/ProfileData/MemProfCommon.h @@ -39,6 +39,5 @@ enum class AllocationType : uint8_t { }; } // namespace llvm - -#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H +#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H `` https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/140504 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/140503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
https://github.com/snehasish created https://github.com/llvm/llvm-project/pull/140501 None >From c8e520c48fe9e64f9e2ac389498d0e27797bf362 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 16 May 2025 18:54:05 -0700 Subject: [PATCH] [NFC][MemProf] Move Radix tree methods to their own header and cpp. --- llvm/include/llvm/ProfileData/MemProf.h | 336 .../llvm/ProfileData/MemProfRadixTree.h | 358 ++ llvm/include/llvm/ProfileData/MemProfReader.h | 2 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 1 + llvm/lib/ProfileData/CMakeLists.txt | 1 + llvm/lib/ProfileData/IndexedMemProfData.cpp | 1 + llvm/lib/ProfileData/InstrProfReader.cpp | 3 +- llvm/lib/ProfileData/MemProf.cpp | 235 llvm/lib/ProfileData/MemProfRadixTree.cpp | 253 + llvm/unittests/ProfileData/InstrProfTest.cpp | 1 + llvm/unittests/ProfileData/MemProfTest.cpp| 3 +- 11 files changed, 620 insertions(+), 574 deletions(-) create mode 100644 llvm/include/llvm/ProfileData/MemProfRadixTree.h create mode 100644 llvm/lib/ProfileData/MemProfRadixTree.cpp diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index e713c3807611b..0bc1432f7d198 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -818,133 +818,6 @@ class CallStackLookupTrait { } }; -namespace detail { -// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have -// to do so in one of two different ways depending on the type of the hash -// table. -template -value_type DerefIterator(IterTy Iter) { - using deref_type = llvm::remove_cvref_t; - if constexpr (std::is_same_v) -return *Iter; - else -return Iter->second; -} -} // namespace detail - -// A function object that returns a frame for a given FrameId. -template struct FrameIdConverter { - std::optional LastUnmappedId; - MapTy โค - - FrameIdConverter() = delete; - FrameIdConverter(MapTy &Map) : Map(Map) {} - - // Delete the copy constructor and copy assignment operator to avoid a - // situation where a copy of FrameIdConverter gets an error in LastUnmappedId - // while the original instance doesn't. - FrameIdConverter(const FrameIdConverter &) = delete; - FrameIdConverter &operator=(const FrameIdConverter &) = delete; - - Frame operator()(FrameId Id) { -auto Iter = Map.find(Id); -if (Iter == Map.end()) { - LastUnmappedId = Id; - return Frame(); -} -return detail::DerefIterator(Iter); - } -}; - -// A function object that returns a call stack for a given CallStackId. -template struct CallStackIdConverter { - std::optional LastUnmappedId; - MapTy โค - llvm::function_ref FrameIdToFrame; - - CallStackIdConverter() = delete; - CallStackIdConverter(MapTy &Map, - llvm::function_ref FrameIdToFrame) - : Map(Map), FrameIdToFrame(FrameIdToFrame) {} - - // Delete the copy constructor and copy assignment operator to avoid a - // situation where a copy of CallStackIdConverter gets an error in - // LastUnmappedId while the original instance doesn't. - CallStackIdConverter(const CallStackIdConverter &) = delete; - CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; - - std::vector operator()(CallStackId CSId) { -std::vector Frames; -auto CSIter = Map.find(CSId); -if (CSIter == Map.end()) { - LastUnmappedId = CSId; -} else { - llvm::SmallVector CS = - detail::DerefIterator>(CSIter); - Frames.reserve(CS.size()); - for (FrameId Id : CS) -Frames.push_back(FrameIdToFrame(Id)); -} -return Frames; - } -}; - -// A function object that returns a Frame stored at a given index into the Frame -// array in the profile. -struct LinearFrameIdConverter { - const unsigned char *FrameBase; - - LinearFrameIdConverter() = delete; - LinearFrameIdConverter(const unsigned char *FrameBase) - : FrameBase(FrameBase) {} - - Frame operator()(LinearFrameId LinearId) { -uint64_t Offset = static_cast(LinearId) * Frame::serializedSize(); -return Frame::deserialize(FrameBase + Offset); - } -}; - -// A function object that returns a call stack stored at a given index into the -// call stack array in the profile. -struct LinearCallStackIdConverter { - const unsigned char *CallStackBase; - llvm::function_ref FrameIdToFrame; - - LinearCallStackIdConverter() = delete; - LinearCallStackIdConverter( - const unsigned char *CallStackBase, - llvm::function_ref FrameIdToFrame) - : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} - - std::vector operator()(LinearCallStackId LinearCSId) { -std::vector Frames; - -const unsigned char *Ptr = -CallStackBase + -static_cast(LinearCSId) * sizeof(LinearFrameId); -uint32_t NumFrames = -support::endian::readNext(Ptr); -Frames.reserve(NumFrames
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
llvmbot wrote: @llvm/pr-subscribers-pgo Author: Snehasish Kumar (snehasish) Changes --- Patch is 53.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140501.diff 11 Files Affected: - (modified) llvm/include/llvm/ProfileData/MemProf.h (-336) - (added) llvm/include/llvm/ProfileData/MemProfRadixTree.h (+358) - (modified) llvm/include/llvm/ProfileData/MemProfReader.h (+1-1) - (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+1) - (modified) llvm/lib/ProfileData/CMakeLists.txt (+1) - (modified) llvm/lib/ProfileData/IndexedMemProfData.cpp (+1) - (modified) llvm/lib/ProfileData/InstrProfReader.cpp (+2-1) - (modified) llvm/lib/ProfileData/MemProf.cpp (-235) - (added) llvm/lib/ProfileData/MemProfRadixTree.cpp (+253) - (modified) llvm/unittests/ProfileData/InstrProfTest.cpp (+1) - (modified) llvm/unittests/ProfileData/MemProfTest.cpp (+2-1) ``diff diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index e713c3807611b..0bc1432f7d198 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -818,133 +818,6 @@ class CallStackLookupTrait { } }; -namespace detail { -// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have -// to do so in one of two different ways depending on the type of the hash -// table. -template -value_type DerefIterator(IterTy Iter) { - using deref_type = llvm::remove_cvref_t; - if constexpr (std::is_same_v) -return *Iter; - else -return Iter->second; -} -} // namespace detail - -// A function object that returns a frame for a given FrameId. -template struct FrameIdConverter { - std::optional LastUnmappedId; - MapTy โค - - FrameIdConverter() = delete; - FrameIdConverter(MapTy &Map) : Map(Map) {} - - // Delete the copy constructor and copy assignment operator to avoid a - // situation where a copy of FrameIdConverter gets an error in LastUnmappedId - // while the original instance doesn't. - FrameIdConverter(const FrameIdConverter &) = delete; - FrameIdConverter &operator=(const FrameIdConverter &) = delete; - - Frame operator()(FrameId Id) { -auto Iter = Map.find(Id); -if (Iter == Map.end()) { - LastUnmappedId = Id; - return Frame(); -} -return detail::DerefIterator(Iter); - } -}; - -// A function object that returns a call stack for a given CallStackId. -template struct CallStackIdConverter { - std::optional LastUnmappedId; - MapTy โค - llvm::function_ref FrameIdToFrame; - - CallStackIdConverter() = delete; - CallStackIdConverter(MapTy &Map, - llvm::function_ref FrameIdToFrame) - : Map(Map), FrameIdToFrame(FrameIdToFrame) {} - - // Delete the copy constructor and copy assignment operator to avoid a - // situation where a copy of CallStackIdConverter gets an error in - // LastUnmappedId while the original instance doesn't. - CallStackIdConverter(const CallStackIdConverter &) = delete; - CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; - - std::vector operator()(CallStackId CSId) { -std::vector Frames; -auto CSIter = Map.find(CSId); -if (CSIter == Map.end()) { - LastUnmappedId = CSId; -} else { - llvm::SmallVector CS = - detail::DerefIterator>(CSIter); - Frames.reserve(CS.size()); - for (FrameId Id : CS) -Frames.push_back(FrameIdToFrame(Id)); -} -return Frames; - } -}; - -// A function object that returns a Frame stored at a given index into the Frame -// array in the profile. -struct LinearFrameIdConverter { - const unsigned char *FrameBase; - - LinearFrameIdConverter() = delete; - LinearFrameIdConverter(const unsigned char *FrameBase) - : FrameBase(FrameBase) {} - - Frame operator()(LinearFrameId LinearId) { -uint64_t Offset = static_cast(LinearId) * Frame::serializedSize(); -return Frame::deserialize(FrameBase + Offset); - } -}; - -// A function object that returns a call stack stored at a given index into the -// call stack array in the profile. -struct LinearCallStackIdConverter { - const unsigned char *CallStackBase; - llvm::function_ref FrameIdToFrame; - - LinearCallStackIdConverter() = delete; - LinearCallStackIdConverter( - const unsigned char *CallStackBase, - llvm::function_ref FrameIdToFrame) - : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} - - std::vector operator()(LinearCallStackId LinearCSId) { -std::vector Frames; - -const unsigned char *Ptr = -CallStackBase + -static_cast(LinearCSId) * sizeof(LinearFrameId); -uint32_t NumFrames = -support::endian::readNext(Ptr); -Frames.reserve(NumFrames); -for (; NumFrames; --NumFrames) { - LinearFrameId Elem = - support::endian::read(Ptr); - // Follow a pointer to the parent, if any. See comments below on - // CallStackRadixTreeBuilder for the descript
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/include/llvm/ProfileData/IndexedMemProfData.h llvm/include/llvm/ProfileData/InstrProfWriter.h llvm/include/llvm/ProfileData/MemProf.h llvm/include/llvm/ProfileData/MemProfRadixTree.h llvm/include/llvm/ProfileData/MemProfReader.h llvm/lib/Bitcode/Writer/BitcodeWriter.cpp llvm/lib/ProfileData/InstrProfWriter.cpp llvm/lib/ProfileData/MemProf.cpp llvm/unittests/ProfileData/InstrProfTest.cpp llvm/unittests/ProfileData/MemProfTest.cpp llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h index 94a162274..f33b160e0 100644 --- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h +++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h @@ -69,15 +69,15 @@ private: // Compute a CallStackId for a given call stack. CallStackId hashCallStack(ArrayRef CS) const { - llvm::HashBuilder, llvm::endianness::little> - HashBuilder; - for (FrameId F : CS) -HashBuilder.add(F); - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); - CallStackId CSId; - std::memcpy(&CSId, Hash.data(), sizeof(Hash)); - return CSId; -} +llvm::HashBuilder, llvm::endianness::little> +HashBuilder; +for (FrameId F : CS) + HashBuilder.add(F); +llvm::BLAKE3Result<8> Hash = HashBuilder.final(); +CallStackId CSId; +std::memcpy(&CSId, Hash.data(), sizeof(Hash)); +return CSId; + } }; } // namespace memprof diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 16d2ef3fa..b72c901db 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -19,8 +19,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/BuildID.h" -#include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/IndexedMemProfData.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Error.h" #include #include diff --git a/llvm/include/llvm/ProfileData/MemProfRadixTree.h b/llvm/include/llvm/ProfileData/MemProfRadixTree.h index 39f150c90..710721364 100644 --- a/llvm/include/llvm/ProfileData/MemProfRadixTree.h +++ b/llvm/include/llvm/ProfileData/MemProfRadixTree.h @@ -13,8 +13,8 @@ #ifndef LLVM_PROFILEDATA_MEMPROFRADIXTREE_H #define LLVM_PROFILEDATA_MEMPROFRADIXTREE_H -#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/IndexedMemProfData.h" +#include "llvm/ProfileData/MemProf.h" namespace llvm { namespace memprof { diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 427a161bc..f8d80c1fc 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -61,8 +61,8 @@ #include "llvm/MC/StringTableBuilder.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Object/IRSymtab.h" -#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/IndexedMemProfData.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfRadixTree.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 6fc9b9074..dcdacb903 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -12,12 +12,12 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/IndexedMemProfData.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/MemProf.h" -#include "llvm/ProfileData/IndexedMemProfData.h" -#include "llvm/ProfileData/MemProfRadixTree.h" #include "llvm/ProfileData/MemProfData.inc" +#include "llvm/ProfileData/MemProfRadixTree.h" #include "llvm/Support/Compression.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Error.h" diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 147f8813a..2113250e7 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -6,6 +6,7 @@ // //===--===// +#include "llvm/ProfileData/MemProf.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLForwardCompat.h" @@ -13,11 +14,10 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/IR/Value.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Pr
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
https://github.com/mingmingl-llvm approved this pull request. https://github.com/llvm/llvm-project/pull/140504 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/140505 >From 8751ae1be821ebc4803bddd05ddb49a84c0b2773 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Mon, 19 May 2025 00:03:59 -0700 Subject: [PATCH] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header --- .../include/llvm/Analysis/MemoryProfileInfo.h | 3 +- llvm/include/llvm/IR/ModuleSummaryIndex.h | 22 +- llvm/include/llvm/ProfileData/MemProfCommon.h | 43 +++ llvm/lib/ProfileData/CMakeLists.txt | 1 - .../Instrumentation/MemProfiler.cpp | 1 + 5 files changed, 47 insertions(+), 23 deletions(-) create mode 100644 llvm/include/llvm/ProfileData/MemProfCommon.h diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 1d98f86f50484..9fcb81a0a1b4c 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -13,8 +13,9 @@ #ifndef LLVM_ANALYSIS_MEMORYPROFILEINFO_H #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/ProfileData/MemProfCommon.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 65e428a3adea7..23f9504b44fab 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -27,6 +27,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProfCommon.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InterleavedRange.h" @@ -306,14 +307,6 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return hash_value(I.getRef()); } }; -// For optional hinted size reporting, holds a pair of the full stack id -// (pre-trimming, from the full context in the profile), and the associated -// total profiled size. -struct ContextTotalSize { - uint64_t FullStackId; - uint64_t TotalSize; -}; - /// Summary of memprof callsite metadata. struct CallsiteInfo { // Actual callee function. @@ -350,19 +343,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) { return OS; } -// Allocation type assigned to an allocation reached by a given context. -// More can be added, now this is cold, notcold and hot. -// Values should be powers of two so that they can be ORed, in particular to -// track allocations that have different behavior with different calling -// contexts. -enum class AllocationType : uint8_t { - None = 0, - NotCold = 1, - Cold = 2, - Hot = 4, - All = 7 // This should always be set to the OR of all values. -}; - /// Summary of a single MIB in a memprof metadata on allocations. struct MIBInfo { // The allocation type for this profiled context. diff --git a/llvm/include/llvm/ProfileData/MemProfCommon.h b/llvm/include/llvm/ProfileData/MemProfCommon.h new file mode 100644 index 0..a638824ec000e --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfCommon.h @@ -0,0 +1,43 @@ +//===- MemProfCommon.h - MemProf support *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common types used by different parts of the MemProf code. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROFCOMMON_H +#define LLVM_PROFILEDATA_MEMPROFCOMMON_H + +#include + +namespace llvm { + +// For optional hinted size reporting, holds a pair of the full stack id +// (pre-trimming, from the full context in the profile), and the associated +// total profiled size. +struct ContextTotalSize { + uint64_t FullStackId; + uint64_t TotalSize; +}; + +// Allocation type assigned to an allocation reached by a given context. +// More can be added, now this is cold, notcold and hot. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { + None = 0, + NotCold = 1, + Cold = 2, + Hot = 4, + All = 7 // This should always be set to the OR of all values. +}; + +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt index ca9ea3205ee1d..de60a655d5bd5 100644 --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -26,7 +26,6 @@ add_llvm_component_library(LLV
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Snehasish Kumar (snehasish) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/140503.diff 11 Files Affected: - (modified) llvm/include/llvm/ProfileData/IndexedMemProfData.h (+68-2) - (modified) llvm/include/llvm/ProfileData/InstrProfWriter.h (+1-1) - (modified) llvm/include/llvm/ProfileData/MemProf.h (-51) - (modified) llvm/include/llvm/ProfileData/MemProfRadixTree.h (+1) - (modified) llvm/include/llvm/ProfileData/MemProfReader.h (+1) - (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+1) - (modified) llvm/lib/ProfileData/InstrProfWriter.cpp (-1) - (modified) llvm/lib/ProfileData/MemProf.cpp (-13) - (modified) llvm/unittests/ProfileData/InstrProfTest.cpp (+1) - (modified) llvm/unittests/ProfileData/MemProfTest.cpp (+1) - (modified) llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp (+1) ``diff diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h index 3c6c329d1c49d..94a16227477cb 100644 --- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h +++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h @@ -6,18 +6,84 @@ // //===--===// // -// MemProf data is serialized in writeMemProf provided in this header file. +// This file implements IndexedMemProfData, a data structure to hold MemProf +// in a space optimized format. It also provides utility methods for writing +// MemProf data. // //===--===// +#ifndef LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H +#define LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H + #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" namespace llvm { +namespace memprof { +struct IndexedMemProfData { + // A map to hold memprof data per function. The lower 64 bits obtained from + // the md5 hash of the function name is used to index into the map. + llvm::MapVector Records; + + // A map to hold frame id to frame mappings. The mappings are used to + // convert IndexedMemProfRecord to MemProfRecords with frame information + // inline. + llvm::MapVector Frames; + + // A map to hold call stack id to call stacks. + llvm::MapVector> CallStacks; + + FrameId addFrame(const Frame &F) { +const FrameId Id = hashFrame(F); +Frames.try_emplace(Id, F); +return Id; + } + + CallStackId addCallStack(ArrayRef CS) { +CallStackId CSId = hashCallStack(CS); +CallStacks.try_emplace(CSId, CS); +return CSId; + } + + CallStackId addCallStack(SmallVector &&CS) { +CallStackId CSId = hashCallStack(CS); +CallStacks.try_emplace(CSId, std::move(CS)); +return CSId; + } + +private: + // Return a hash value based on the contents of the frame. Here we use a + // cryptographic hash function to minimize the chance of hash collisions. We + // do persist FrameIds as part of memprof formats up to Version 2, inclusive. + // However, the deserializer never calls this function; it uses FrameIds + // merely as keys to look up Frames proper. + FrameId hashFrame(const Frame &F) const { +llvm::HashBuilder, llvm::endianness::little> +HashBuilder; +HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame); +llvm::BLAKE3Result<8> Hash = HashBuilder.final(); +FrameId Id; +std::memcpy(&Id, Hash.data(), sizeof(Hash)); +return Id; + } + + // Compute a CallStackId for a given call stack. + CallStackId hashCallStack(ArrayRef CS) const { + llvm::HashBuilder, llvm::endianness::little> + HashBuilder; + for (FrameId F : CS) +HashBuilder.add(F); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + CallStackId CSId; + std::memcpy(&CSId, Hash.data(), sizeof(Hash)); + return CSId; +} +}; +} // namespace memprof // Write the MemProf data to OS. Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema); - } // namespace llvm +#endif diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 67d85daa81623..16d2ef3fab3e3 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -20,7 +20,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/Object/BuildID.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/IndexedMemProfData.h" #include "llvm/Support/Error.h" #include #include diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 215102c131fff..ce5cd5ee4856b 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -842,57 +842,6 @@ struct LineLocation { // A pair of a call site locat
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Snehasish Kumar (snehasish) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/140505.diff 4 Files Affected: - (modified) llvm/include/llvm/Analysis/MemoryProfileInfo.h (+2-1) - (modified) llvm/include/llvm/IR/ModuleSummaryIndex.h (+2-20) - (added) llvm/include/llvm/ProfileData/MemProfCommon.h (+44) - (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+1) ``diff diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 1d98f86f50484..33d59efe8d77e 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -14,7 +14,8 @@ #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H #include "llvm/IR/Metadata.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/ProfileData/MemProfCommon.h" +#include "llvm/IR/InstrTypes.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 65e428a3adea7..77430c5cb5ea1 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -27,6 +27,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProfCommon.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InterleavedRange.h" @@ -306,13 +307,7 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return hash_value(I.getRef()); } }; -// For optional hinted size reporting, holds a pair of the full stack id -// (pre-trimming, from the full context in the profile), and the associated -// total profiled size. -struct ContextTotalSize { - uint64_t FullStackId; - uint64_t TotalSize; -}; + /// Summary of memprof callsite metadata. struct CallsiteInfo { @@ -350,19 +345,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) { return OS; } -// Allocation type assigned to an allocation reached by a given context. -// More can be added, now this is cold, notcold and hot. -// Values should be powers of two so that they can be ORed, in particular to -// track allocations that have different behavior with different calling -// contexts. -enum class AllocationType : uint8_t { - None = 0, - NotCold = 1, - Cold = 2, - Hot = 4, - All = 7 // This should always be set to the OR of all values. -}; - /// Summary of a single MIB in a memprof metadata on allocations. struct MIBInfo { // The allocation type for this profiled context. diff --git a/llvm/include/llvm/ProfileData/MemProfCommon.h b/llvm/include/llvm/ProfileData/MemProfCommon.h new file mode 100644 index 0..4097ccb651188 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfCommon.h @@ -0,0 +1,44 @@ +//===- MemProfCommon.h - MemProf support *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common types used by different parts of the MemProf code. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROFCOMMON_H +#define LLVM_PROFILEDATA_MEMPROFCOMMON_H + +#include + +namespace llvm { + +// For optional hinted size reporting, holds a pair of the full stack id +// (pre-trimming, from the full context in the profile), and the associated +// total profiled size. +struct ContextTotalSize { + uint64_t FullStackId; + uint64_t TotalSize; +}; + +// Allocation type assigned to an allocation reached by a given context. +// More can be added, now this is cold, notcold and hot. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { + None = 0, + NotCold = 1, + Cold = 2, + Hot = 4, + All = 7 // This should always be set to the OR of all values. +}; + +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H + diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 5982476f3994e..6538311571529 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -46,6 +46,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include +#include using namespace llvm; using namespace llvm::memprof; `` https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/snehasish created https://github.com/llvm/llvm-project/pull/140502 None >From 83ff2babe9dc5e71bc01eefb0aa78e6634d25351 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 16 May 2025 20:20:00 -0700 Subject: [PATCH] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord --- llvm/include/llvm/ProfileData/MemProf.h | 10 ++-- llvm/include/llvm/ProfileData/MemProfYAML.h | 2 +- llvm/lib/ProfileData/MemProf.cpp | 2 +- llvm/lib/ProfileData/MemProfReader.cpp| 2 +- .../Instrumentation/MemProfiler.cpp | 4 +- llvm/unittests/ProfileData/MemProfTest.cpp| 20 .../Instrumentation/MemProfUseTest.cpp| 48 +-- 7 files changed, 44 insertions(+), 44 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 0bc1432f7d198..215102c131fff 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -472,13 +472,13 @@ struct IndexedMemProfRecord { // translate CallStackId to call stacks with frames inline. MemProfRecord toMemProfRecord( llvm::function_ref(const CallStackId)> Callback) const; - - // Returns the GUID for the function name after canonicalization. For - // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are - // mapped to functions using this GUID. - static GlobalValue::GUID getGUID(const StringRef FunctionName); }; +// Returns the GUID for the function name after canonicalization. For +// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are +// mapped to functions using this GUID. +GlobalValue::GUID getGUID(const StringRef FunctionName); + // Holds call site information with frame contents inline. struct CallSiteInfo { // The frames in the call stack diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h index 08dee253f615a..b642e3098aa0e 100644 --- a/llvm/include/llvm/ProfileData/MemProfYAML.h +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -46,7 +46,7 @@ template <> struct ScalarTraits { Val = Num; } else { // Otherwise, treat the input as a string containing a function name. - Val = memprof::IndexedMemProfRecord::getGUID(Scalar); + Val = memprof::getGUID(Scalar); } return StringRef(); } diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index a9c5ee09a6daf..795e97bee38f5 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -343,7 +343,7 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord( return Record; } -GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { +GlobalValue::GUID getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop // those by default. This is by design to differentiate internal linkage diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index e0f280b9eb2f6..aca534b0a4c98 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -570,7 +570,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames( I++) { const auto &DIFrame = DI.getFrame(I); const uint64_t Guid = -IndexedMemProfRecord::getGUID(DIFrame.FunctionName); +memprof::getGUID(DIFrame.FunctionName); const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 375ff84f82ed2..5982476f3994e 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -865,8 +865,8 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, StringRef CallerName = DIL->getSubprogramLinkageName(); assert(!CallerName.empty() && "Be sure to enable -fdebug-info-for-profiling"); - uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); - uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); + uint64_t CallerGUID = memprof::getGUID(CallerName); + uint64_t CalleeGUID = memprof::getGUID(CalleeName); // Pretend that we are calling a function with GUID == 0 if we are // in the inline stack leading to a heap allocation function. if (IsAlloc) { diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index a072dee26d9a0..2ae9cd96f0197 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/Mem
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
snehasish wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140505** https://app.graphite.dev/github/pr/llvm/llvm-project/140505?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140504** https://app.graphite.dev/github/pr/llvm/llvm-project/140504?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140503** https://app.graphite.dev/github/pr/llvm/llvm-project/140503?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140502** https://app.graphite.dev/github/pr/llvm/llvm-project/140502?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140501** https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140501?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140500** https://app.graphite.dev/github/pr/llvm/llvm-project/140500?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] translate derived type array init to attribute if possible (PR #140268)
@@ -0,0 +1,204 @@ +//===-- LLVMInsertChainFolder.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "flang/Optimizer/CodeGen/LLVMInsertChainFolder.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Builders.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "flang-insert-folder" + +#include + +namespace { +// Helper class to construct the attribute elements of an aggregate value being +// folded without creating a full mlir::Attribute representation for each step +// of the insert value chain, which would both be expensive in terms of +// compilation time and memory (since the intermediate Attribute would survive, +// unused, inside the mlir context). +class InsertChainBackwardFolder { + // Type for the current value of an element of the aggregate value being + // constructed by the insert chain. + // At any point of the insert chain, the value of an element is either: + // - nullptr: not yet known, the insert has not yet been seen. + // - an mlir::Attribute: the element is fully defined. + // - a nested InsertChainBackwardFolder: the element is itself an aggregate + //and its sub-elements have been partially defined (insert with mutliple + //indices have been seen). + + // The insertion folder assumes backward walk of the insert chain. Once an + // element or sub-element has been defined, it is not overriden by new + // insertions (last insert wins). + using InFlightValue = + llvm::PointerUnion; + +public: + InsertChainBackwardFolder( + mlir::Type type, std::deque *folderStorage) + : values(getNumElements(type), mlir::Attribute{}), +folderStorage{folderStorage}, type{type} {} + + /// Push + bool pushValue(mlir::Attribute val, llvm::ArrayRef at); + + mlir::Attribute finalize(mlir::Attribute defaultFieldValue); + +private: + static int64_t getNumElements(mlir::Type type) { +if (auto structTy = +llvm::dyn_cast_if_present(type)) + return structTy.getBody().size(); +if (auto arrayTy = +llvm::dyn_cast_if_present(type)) + return arrayTy.getNumElements(); +return 0; + } + + static mlir::Type getSubElementType(mlir::Type type, int64_t field) { +if (auto arrayTy = +llvm::dyn_cast_if_present(type)) + return arrayTy.getElementType(); +if (auto structTy = +llvm::dyn_cast_if_present(type)) + return structTy.getBody()[field]; +return {}; + } + + // Current element value of the aggregate value being built. + llvm::SmallVector values; + // std::deque is used to allocate storage for nested list and guarantee the + // stability of the InsertChainBackwardFolder* used as element value. + std::deque *folderStorage; + // Type of the aggregate value being built. + mlir::Type type; +}; +} // namespace + +// Helper to fold the value being inserted by an llvm.insert_value. +// This may call tryFoldingLLVMInsertChain if the value is an aggregate and +// was itself constructed by a different insert chain. +static mlir::Attribute getAttrIfConstant(mlir::Value val, + mlir::OpBuilder &rewriter) { + if (auto cst = val.getDefiningOp()) +return cst.getValue(); + if (auto insert = val.getDefiningOp()) +return fir::tryFoldingLLVMInsertChain(val, rewriter); + if (val.getDefiningOp()) +return mlir::LLVM::ZeroAttr::get(val.getContext()); + if (val.getDefiningOp()) +return mlir::LLVM::UndefAttr::get(val.getContext()); + if (mlir::Operation *op = val.getDefiningOp()) { +unsigned resNum = llvm::cast(val).getResultNumber(); +llvm::SmallVector results; +if (mlir::succeeded(rewriter.tryFold(op, results)) && +results.size() > resNum) { + if (auto cst = results[resNum].getDefiningOp()) +return cst.getValue(); +} + } + if (auto trunc = val.getDefiningOp()) +if (auto attr = getAttrIfConstant(trunc.getArg(), rewriter)) + if (auto intAttr = llvm::dyn_cast(attr)) +return mlir::IntegerAttr::get(trunc.getType(), intAttr.getInt()); + LLVM_DEBUG(llvm::dbgs() << "cannot fold insert value operand: " << val + << "\n"); + return {}; +} + +mlir::Attribute +InsertChainBackwardFolder::finalize(mlir::Attribute defaultFieldValue) { + std::vector attrs; + attrs.reserve(values.size()); + for (InFlightValue &inFlight : values) { +if (!inFlight) { + attrs.push_back(defaultFieldValue); +} else if (auto attr = llvm::dyn_cast(inFlight)) { + attrs.push_back(attr); +} else { + auto *inFlightList = llvm::cast(inFlight); + attrs.push_back(inFlightList->finalize(defaul
[llvm-branch-commits] [flang] [flang] translate derived type array init to attribute if possible (PR #140268)
@@ -0,0 +1,204 @@ +//===-- LLVMInsertChainFolder.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "flang/Optimizer/CodeGen/LLVMInsertChainFolder.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Builders.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "flang-insert-folder" + +#include + +namespace { +// Helper class to construct the attribute elements of an aggregate value being +// folded without creating a full mlir::Attribute representation for each step +// of the insert value chain, which would both be expensive in terms of +// compilation time and memory (since the intermediate Attribute would survive, +// unused, inside the mlir context). +class InsertChainBackwardFolder { + // Type for the current value of an element of the aggregate value being + // constructed by the insert chain. + // At any point of the insert chain, the value of an element is either: + // - nullptr: not yet known, the insert has not yet been seen. + // - an mlir::Attribute: the element is fully defined. + // - a nested InsertChainBackwardFolder: the element is itself an aggregate + //and its sub-elements have been partially defined (insert with mutliple + //indices have been seen). + + // The insertion folder assumes backward walk of the insert chain. Once an + // element or sub-element has been defined, it is not overriden by new + // insertions (last insert wins). + using InFlightValue = + llvm::PointerUnion; + +public: + InsertChainBackwardFolder( + mlir::Type type, std::deque *folderStorage) + : values(getNumElements(type), mlir::Attribute{}), +folderStorage{folderStorage}, type{type} {} + + /// Push + bool pushValue(mlir::Attribute val, llvm::ArrayRef at); + + mlir::Attribute finalize(mlir::Attribute defaultFieldValue); + +private: + static int64_t getNumElements(mlir::Type type) { +if (auto structTy = +llvm::dyn_cast_if_present(type)) + return structTy.getBody().size(); +if (auto arrayTy = +llvm::dyn_cast_if_present(type)) + return arrayTy.getNumElements(); +return 0; + } + + static mlir::Type getSubElementType(mlir::Type type, int64_t field) { +if (auto arrayTy = +llvm::dyn_cast_if_present(type)) + return arrayTy.getElementType(); +if (auto structTy = +llvm::dyn_cast_if_present(type)) + return structTy.getBody()[field]; +return {}; + } + + // Current element value of the aggregate value being built. + llvm::SmallVector values; + // std::deque is used to allocate storage for nested list and guarantee the + // stability of the InsertChainBackwardFolder* used as element value. + std::deque *folderStorage; + // Type of the aggregate value being built. + mlir::Type type; +}; +} // namespace + +// Helper to fold the value being inserted by an llvm.insert_value. +// This may call tryFoldingLLVMInsertChain if the value is an aggregate and +// was itself constructed by a different insert chain. +static mlir::Attribute getAttrIfConstant(mlir::Value val, + mlir::OpBuilder &rewriter) { + if (auto cst = val.getDefiningOp()) +return cst.getValue(); + if (auto insert = val.getDefiningOp()) +return fir::tryFoldingLLVMInsertChain(val, rewriter); + if (val.getDefiningOp()) +return mlir::LLVM::ZeroAttr::get(val.getContext()); + if (val.getDefiningOp()) +return mlir::LLVM::UndefAttr::get(val.getContext()); + if (mlir::Operation *op = val.getDefiningOp()) { +unsigned resNum = llvm::cast(val).getResultNumber(); +llvm::SmallVector results; +if (mlir::succeeded(rewriter.tryFold(op, results)) && +results.size() > resNum) { + if (auto cst = results[resNum].getDefiningOp()) +return cst.getValue(); +} + } + if (auto trunc = val.getDefiningOp()) +if (auto attr = getAttrIfConstant(trunc.getArg(), rewriter)) + if (auto intAttr = llvm::dyn_cast(attr)) +return mlir::IntegerAttr::get(trunc.getType(), intAttr.getInt()); + LLVM_DEBUG(llvm::dbgs() << "cannot fold insert value operand: " << val + << "\n"); + return {}; +} + +mlir::Attribute +InsertChainBackwardFolder::finalize(mlir::Attribute defaultFieldValue) { + std::vector attrs; + attrs.reserve(values.size()); + for (InFlightValue &inFlight : values) { +if (!inFlight) { + attrs.push_back(defaultFieldValue); +} else if (auto attr = llvm::dyn_cast(inFlight)) { + attrs.push_back(attr); +} else { + auto *inFlightList = llvm::cast(inFlight); + attrs.push_back(inFlightList->finalize(defaul
[llvm-branch-commits] [flang] [flang] translate derived type array init to attribute if possible (PR #140268)
https://github.com/jeanPerier updated https://github.com/llvm/llvm-project/pull/140268 >From d71c0b7f45582ece43016eb98367251e54e75280 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Fri, 16 May 2025 08:09:37 -0700 Subject: [PATCH 1/2] [flang] translate derived type array init to attribute if possible --- .../Optimizer/CodeGen/LLVMInsertChainFolder.h | 31 +++ .../include/flang/Optimizer/Dialect/FIROps.td | 5 + flang/lib/Optimizer/CodeGen/CMakeLists.txt| 1 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 51 +++-- .../CodeGen/LLVMInsertChainFolder.cpp | 204 ++ flang/lib/Optimizer/Dialect/FIROps.cpp| 15 ++ .../Fir/convert-and-fold-insert-on-range.fir | 33 +++ 7 files changed, 319 insertions(+), 21 deletions(-) create mode 100644 flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h create mode 100644 flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp create mode 100644 flang/test/Fir/convert-and-fold-insert-on-range.fir diff --git a/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h new file mode 100644 index 0..d577c4c0fa70b --- /dev/null +++ b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h @@ -0,0 +1,31 @@ +//===-- LLVMInsertChainFolder.h -- insertvalue chain folder *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Helper to fold LLVM dialect llvm.insertvalue chain representing constants +// into an Attribute representation. +// This sits in Flang because it is incomplete and tailored for flang needs. +// +//===--===// + +namespace mlir { +class Attribute; +class OpBuilder; +class Value; +} // namespace mlir + +namespace fir { + +/// Attempt to fold an llvm.insertvalue chain into an attribute representation +/// suitable as llvm.constant operand. The returned value will be a null pointer +/// if this is not an llvm.insertvalue result pr if the chain is not a constant, +/// or cannot be represented as an Attribute. The operations are not deleted, +/// but some llvm.insertvalue value operands may be folded with the builder on +/// the way. +mlir::Attribute tryFoldingLLVMInsertChain(mlir::Value insertChainResult, + mlir::OpBuilder &builder); +} // namespace fir diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 458b780806144..dc66885f776f0 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2129,6 +2129,11 @@ def fir_InsertOnRangeOp : fir_OneResultOp<"insert_on_range", [NoMemoryEffect]> { $seq `,` $val custom($coor) attr-dict `:` functional-type(operands, results) }]; + let extraClassDeclaration = [{ +/// Is this insert_on_range inserting on all the values of the result type? +bool isFullRange(); + }]; + let hasVerifier = 1; } diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt index 04480bac552b7..980307db315d9 100644 --- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -3,6 +3,7 @@ add_flang_library(FIRCodeGen CodeGen.cpp CodeGenOpenMP.cpp FIROpPatterns.cpp + LLVMInsertChainFolder.cpp LowerRepackArrays.cpp PreCGRewrite.cpp TBAABuilder.cpp diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ad9119ba4a031..ed76a77ced047 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -14,6 +14,7 @@ #include "flang/Optimizer/CodeGen/CodeGenOpenMP.h" #include "flang/Optimizer/CodeGen/FIROpPatterns.h" +#include "flang/Optimizer/CodeGen/LLVMInsertChainFolder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRCG/CGOps.h" @@ -2412,15 +2413,38 @@ struct InsertOnRangeOpConversion doRewrite(fir::InsertOnRangeOp range, mlir::Type ty, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { -llvm::SmallVector dims; -auto type = adaptor.getOperands()[0].getType(); +auto arrayType = adaptor.getSeq().getType(); // Iteratively extract the array dimensions from the type. +llvm::SmallVector dims; +mlir::Type type = arrayType; while (auto t = mlir::dyn_cast(type)) { dims.push_back(t.getNumElements()); type = t.getElementType(); } +// Avoid generating long insert chain that are very slow to fold back +// (which is required in
[llvm-branch-commits] [clang] Implement src:*=sanitize for UBSan. (PR #140489)
https://github.com/qinkunbao closed https://github.com/llvm/llvm-project/pull/140489 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Clang][Backport] Demote mixed enumeration arithmetic error to a warning (#131811) (PR #139396)
@@ -7567,9 +7567,13 @@ def warn_arith_conv_mixed_enum_types_cxx20 : Warning< "%sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2 is deprecated">, InGroup; -def err_conv_mixed_enum_types_cxx26 : Error< + +def err_conv_mixed_enum_types: Error < "invalid %sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2">; +def warn_conv_mixed_enum_types_cxx26 : Warning < + err_conv_mixed_enum_types.Summary>, + InGroup, DefaultError; AaronBallman wrote: > The issue is that TableGen sorts the enums alphabetically, so adding a new > value will change the name->integer mapping for approximately half of the > enum. Maybe ABI break is the wrong term here, but it means than an app built > against 20.1.4, for example will stop working if 20.1.4 libraries are > replaced by 20.1.5 libraries, which is something we want to avoid even if > it's not technically an ABI break. Ah okay, that's good to know, thank you! https://github.com/llvm/llvm-project/pull/139396 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
https://github.com/kazutakahirata approved this pull request. LGTM. Thanks! https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/kazutakahirata edited https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/kazutakahirata approved this pull request. LGTM. Thanks! https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/el-ev approved this pull request. https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/kazutakahirata approved this pull request. LGTM. Thanks! https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/kazutakahirata edited https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
https://github.com/kazutakahirata approved this pull request. LGTM. Thanks! https://github.com/llvm/llvm-project/pull/140503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
https://github.com/kazutakahirata edited https://github.com/llvm/llvm-project/pull/140503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/140503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
https://github.com/teresajohnson approved this pull request. https://github.com/llvm/llvm-project/pull/140504 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
https://github.com/kazutakahirata edited https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Add the LLVM license text and minor clean up. (PR #140504)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/140504 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
https://github.com/teresajohnson approved this pull request. lgtm with one question https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
@@ -46,6 +46,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include +#include teresajohnson wrote: Why this change? Were we pulling it in from ModuleSummaryIndex.h via some other chain of includes? https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move IndexedMemProfData to its own header. (PR #140503)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/140503 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/140502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] IR: Remove redundant UseList check in addUse (PR #138676)
arsenm wrote: ### Merge activity * **May 19, 6:10 AM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/138676). https://github.com/llvm/llvm-project/pull/138676 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562)
https://github.com/TIFitis created https://github.com/llvm/llvm-project/pull/140562 This patch adds support to emit default declare mappers for implicit mapping of derived types when not supplied by user. This especially helps tackle mapping of allocatables of derived types. >From 5d735f1cd6d8dce7cb6dbd8cf3df41967f8911b5 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Thu, 8 May 2025 21:19:26 +0100 Subject: [PATCH] [OpenMP][Flang] Emit default declare mappers implicitly for derived types This patch adds support to emit default declare mappers for implicit mapping of derived types when not supplied by user. This especially helps tackle mapping of allocatables of derived types. This supports nested derived types as well. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp| 39 +++--- flang/lib/Lower/OpenMP/OpenMP.cpp | 132 +- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 2 +- flang/lib/Semantics/resolve-names.cpp | 95 +++-- flang/test/Lower/OpenMP/derived-type-map.f90 | 22 ++- 5 files changed, 223 insertions(+), 67 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 8dcc8be9be5bf..cf25e91a437b8 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1102,23 +1102,30 @@ void ClauseProcessor::processMapObjects( auto getDefaultMapperID = [&](const omp::Object &object, std::string &mapperIdName) { -if (!mlir::isa( -firOpBuilder.getRegion().getParentOp())) { - const semantics::DerivedTypeSpec *typeSpec = nullptr; - - if (object.sym()->owner().IsDerivedType()) -typeSpec = object.sym()->owner().derivedTypeSpec(); - else if (object.sym()->GetType() && - object.sym()->GetType()->category() == - semantics::DeclTypeSpec::TypeDerived) -typeSpec = &object.sym()->GetType()->derivedTypeSpec(); - - if (typeSpec) { -mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; -if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) - mapperIdName = converter.mangleName(mapperIdName, sym->owner()); - } +const semantics::DerivedTypeSpec *typeSpec = nullptr; + +if (object.sym()->GetType() && object.sym()->GetType()->category() == + semantics::DeclTypeSpec::TypeDerived) + typeSpec = &object.sym()->GetType()->derivedTypeSpec(); +else if (object.sym()->owner().IsDerivedType()) + typeSpec = object.sym()->owner().derivedTypeSpec(); + +if (typeSpec) { + mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) +mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else +mapperIdName = +converter.mangleName(mapperIdName, *typeSpec->GetScope()); } + +// Make sure we don't return a mapper to self +llvm::StringRef parentOpName; +if (auto declMapOp = mlir::dyn_cast( +firOpBuilder.getRegion().getParentOp())) + parentOpName = declMapOp.getSymName(); +if (mapperIdName == parentOpName) + mapperIdName = ""; }; // Create the mapper symbol from its name, if specified. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index cfcba0159db8d..8d5c26a4f2d58 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2348,6 +2348,124 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, +mlir::Location loc, fir::RecordType recordType, +llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + lower::StatementContext stmtCtx; + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ยฎion = declMapperOp.getRegion(); + firOpBuilder.createBlock(ยฎion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, +llvm::SmallVectorImpl &bounds) { +fir::ExtendedValue extVal = +hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, +hlfir::Entity{mapVal}, +/*conti
[llvm-branch-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Akash Banerjee (TIFitis) Changes This patch adds support to emit default declare mappers for implicit mapping of derived types when not supplied by user. This especially helps tackle mapping of allocatables of derived types. --- Patch is 33.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140562.diff 5 Files Affected: - (modified) flang/lib/Lower/OpenMP/ClauseProcessor.cpp (+23-16) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+131-1) - (modified) flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp (+1-1) - (modified) flang/lib/Semantics/resolve-names.cpp (+47-48) - (modified) flang/test/Lower/OpenMP/derived-type-map.f90 (+21-1) ``diff diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 8dcc8be9be5bf..cf25e91a437b8 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1102,23 +1102,30 @@ void ClauseProcessor::processMapObjects( auto getDefaultMapperID = [&](const omp::Object &object, std::string &mapperIdName) { -if (!mlir::isa( -firOpBuilder.getRegion().getParentOp())) { - const semantics::DerivedTypeSpec *typeSpec = nullptr; - - if (object.sym()->owner().IsDerivedType()) -typeSpec = object.sym()->owner().derivedTypeSpec(); - else if (object.sym()->GetType() && - object.sym()->GetType()->category() == - semantics::DeclTypeSpec::TypeDerived) -typeSpec = &object.sym()->GetType()->derivedTypeSpec(); - - if (typeSpec) { -mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; -if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) - mapperIdName = converter.mangleName(mapperIdName, sym->owner()); - } +const semantics::DerivedTypeSpec *typeSpec = nullptr; + +if (object.sym()->GetType() && object.sym()->GetType()->category() == + semantics::DeclTypeSpec::TypeDerived) + typeSpec = &object.sym()->GetType()->derivedTypeSpec(); +else if (object.sym()->owner().IsDerivedType()) + typeSpec = object.sym()->owner().derivedTypeSpec(); + +if (typeSpec) { + mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) +mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else +mapperIdName = +converter.mangleName(mapperIdName, *typeSpec->GetScope()); } + +// Make sure we don't return a mapper to self +llvm::StringRef parentOpName; +if (auto declMapOp = mlir::dyn_cast( +firOpBuilder.getRegion().getParentOp())) + parentOpName = declMapOp.getSymName(); +if (mapperIdName == parentOpName) + mapperIdName = ""; }; // Create the mapper symbol from its name, if specified. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index cfcba0159db8d..8d5c26a4f2d58 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2348,6 +2348,124 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, +mlir::Location loc, fir::RecordType recordType, +llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + lower::StatementContext stmtCtx; + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ยฎion = declMapperOp.getRegion(); + firOpBuilder.createBlock(ยฎion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, +llvm::SmallVectorImpl &bounds) { +fir::ExtendedValue extVal = +hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, +hlfir::Entity{mapVal}, +/*contiguousHint=*/true) +.first; +fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( +firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); +bounds = fir::factory::genImplicitBoundsOps( +firOpBuilder, info, extVal, +/*dataExvIsAssumedSize=*/false, mapVal.getLoc(
[llvm-branch-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir Author: Akash Banerjee (TIFitis) Changes This patch adds support to emit default declare mappers for implicit mapping of derived types when not supplied by user. This especially helps tackle mapping of allocatables of derived types. --- Patch is 33.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140562.diff 5 Files Affected: - (modified) flang/lib/Lower/OpenMP/ClauseProcessor.cpp (+23-16) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+131-1) - (modified) flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp (+1-1) - (modified) flang/lib/Semantics/resolve-names.cpp (+47-48) - (modified) flang/test/Lower/OpenMP/derived-type-map.f90 (+21-1) ``diff diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 8dcc8be9be5bf..cf25e91a437b8 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1102,23 +1102,30 @@ void ClauseProcessor::processMapObjects( auto getDefaultMapperID = [&](const omp::Object &object, std::string &mapperIdName) { -if (!mlir::isa( -firOpBuilder.getRegion().getParentOp())) { - const semantics::DerivedTypeSpec *typeSpec = nullptr; - - if (object.sym()->owner().IsDerivedType()) -typeSpec = object.sym()->owner().derivedTypeSpec(); - else if (object.sym()->GetType() && - object.sym()->GetType()->category() == - semantics::DeclTypeSpec::TypeDerived) -typeSpec = &object.sym()->GetType()->derivedTypeSpec(); - - if (typeSpec) { -mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; -if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) - mapperIdName = converter.mangleName(mapperIdName, sym->owner()); - } +const semantics::DerivedTypeSpec *typeSpec = nullptr; + +if (object.sym()->GetType() && object.sym()->GetType()->category() == + semantics::DeclTypeSpec::TypeDerived) + typeSpec = &object.sym()->GetType()->derivedTypeSpec(); +else if (object.sym()->owner().IsDerivedType()) + typeSpec = object.sym()->owner().derivedTypeSpec(); + +if (typeSpec) { + mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) +mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else +mapperIdName = +converter.mangleName(mapperIdName, *typeSpec->GetScope()); } + +// Make sure we don't return a mapper to self +llvm::StringRef parentOpName; +if (auto declMapOp = mlir::dyn_cast( +firOpBuilder.getRegion().getParentOp())) + parentOpName = declMapOp.getSymName(); +if (mapperIdName == parentOpName) + mapperIdName = ""; }; // Create the mapper symbol from its name, if specified. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index cfcba0159db8d..8d5c26a4f2d58 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2348,6 +2348,124 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, +mlir::Location loc, fir::RecordType recordType, +llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + lower::StatementContext stmtCtx; + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ยฎion = declMapperOp.getRegion(); + firOpBuilder.createBlock(ยฎion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, +llvm::SmallVectorImpl &bounds) { +fir::ExtendedValue extVal = +hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, +hlfir::Entity{mapVal}, +/*contiguousHint=*/true) +.first; +fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( +firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); +bounds = fir::factory::genImplicitBoundsOps( +firOpBuilder, info, extVal, +/*dataExvIsAssumedSize=*/false, mapVal.getL
[llvm-branch-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp -- flang/lib/Lower/OpenMP/ClauseProcessor.cpp flang/lib/Lower/OpenMP/OpenMP.cpp flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp flang/lib/Semantics/resolve-names.cpp `` View the diff from clang-format here. ``diff diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 318af99a5..627205cc1 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -2163,7 +2163,7 @@ void AttrsVisitor::SetBindNameOn(Symbol &symbol) { } symbol.SetBindName(std::move(*label)); if (!oldBindName.empty()) { -if (const std::string *newBindName{symbol.GetBindName()}) { +if (const std::string * newBindName{symbol.GetBindName()}) { if (oldBindName != *newBindName) { Say(symbol.name(), "The entity '%s' has multiple BIND names ('%s' and '%s')"_err_en_US, @@ -2285,7 +2285,7 @@ void DeclTypeSpecVisitor::Post(const parser::TypeSpec &typeSpec) { // expression semantics if the DeclTypeSpec is a valid TypeSpec. // The grammar ensures that it's an intrinsic or derived type spec, // not TYPE(*) or CLASS(*) or CLASS(T). - if (const DeclTypeSpec *spec{state_.declTypeSpec}) { + if (const DeclTypeSpec * spec{state_.declTypeSpec}) { switch (spec->category()) { case DeclTypeSpec::Numeric: case DeclTypeSpec::Logical: @@ -2293,7 +2293,7 @@ void DeclTypeSpecVisitor::Post(const parser::TypeSpec &typeSpec) { typeSpec.declTypeSpec = spec; break; case DeclTypeSpec::TypeDerived: - if (const DerivedTypeSpec *derived{spec->AsDerived()}) { + if (const DerivedTypeSpec * derived{spec->AsDerived()}) { CheckForAbstractType(derived->typeSymbol()); // C703 typeSpec.declTypeSpec = spec; } @@ -2891,7 +2891,7 @@ void ScopeHandler::ApplyImplicitRules( if (context().HasError(symbol) || !NeedsType(symbol)) { return; } - if (const DeclTypeSpec *type{GetImplicitType(symbol)}) { + if (const DeclTypeSpec * type{GetImplicitType(symbol)}) { if (!skipImplicitTyping_) { symbol.set(Symbol::Flag::Implicit); symbol.SetType(*type); @@ -2991,7 +2991,7 @@ const DeclTypeSpec *ScopeHandler::GetImplicitType( const auto *type{implicitRulesMap_->at(scope).GetType( symbol.name(), respectImplicitNoneType)}; if (type) { -if (const DerivedTypeSpec *derived{type->AsDerived()}) { +if (const DerivedTypeSpec * derived{type->AsDerived()}) { // Resolve any forward-referenced derived type; a quick no-op else. auto &instantiatable{*const_cast(derived)}; instantiatable.Instantiate(currScope()); @@ -3969,7 +3969,7 @@ Scope *ModuleVisitor::FindModule(const parser::Name &name, if (scope) { if (DoesScopeContain(scope, currScope())) { // 14.2.2(1) std::optional submoduleName; - if (const Scope *container{FindModuleOrSubmoduleContaining(currScope())}; + if (const Scope * container{FindModuleOrSubmoduleContaining(currScope())}; container && container->IsSubmodule()) { submoduleName = container->GetName(); } @@ -4074,7 +4074,7 @@ bool InterfaceVisitor::isAbstract() const { void InterfaceVisitor::AddSpecificProcs( const std::list &names, ProcedureKind kind) { - if (Symbol *symbol{GetGenericInfo().symbol}; + if (Symbol * symbol{GetGenericInfo().symbol}; symbol && symbol->has()) { for (const auto &name : names) { specificsForGenericProcs_.emplace(symbol, std::make_pair(&name, kind)); @@ -4174,7 +4174,7 @@ void GenericHandler::DeclaredPossibleSpecificProc(Symbol &proc) { } void InterfaceVisitor::ResolveNewSpecifics() { - if (Symbol *generic{genericInfo_.top().symbol}; + if (Symbol * generic{genericInfo_.top().symbol}; generic && generic->has()) { ResolveSpecificsInGeneric(*generic, false); } @@ -4315,7 +4315,7 @@ bool SubprogramVisitor::Pre(const parser::Suffix &suffix) { } else { Message &msg{Say(*suffix.resultName, "RESULT(%s) may appear only in a function"_err_en_US)}; - if (const Symbol *subprogram{InclusiveScope().symbol()}) { + if (const Symbol * subprogram{InclusiveScope().symbol()}) { msg.Attach(subprogram->name(), "Containing subprogram"_en_US); } } @@ -4833,7 +4833,7 @@ Symbol *ScopeHandler::FindSeparateModuleProcedureInterface( symbol = generic->specific(); } } - if (const Symbol *defnIface{FindSeparateModuleSubprogramInterface(symbol)}) { + if (const Symbol * defnIface{FindSeparateModuleSubprogramInterface(symbol)}) { // Error recovery in case of multiple definitions symbol = const_cast(defnIface); } @@ -5068,7 +5068,7 @@ Symbol &SubprogramVisitor::PushSubprogramScope(con
[llvm-branch-commits] [flang] [OpenMP][Flang] Emit default declare mappers implicitly for derived types (PR #140562)
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/140562 >From 580e8625cb0431a86dd77e5c5ba72cd6f33f38ed Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Thu, 8 May 2025 21:19:26 +0100 Subject: [PATCH] [OpenMP][Flang] Emit default declare mappers implicitly for derived types This patch adds support to emit default declare mappers for implicit mapping of derived types when not supplied by user. This especially helps tackle mapping of allocatables of derived types. This supports nested derived types as well. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp| 39 +++--- flang/lib/Lower/OpenMP/OpenMP.cpp | 132 +- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 2 +- flang/test/Lower/OpenMP/derived-type-map.f90 | 22 ++- 4 files changed, 176 insertions(+), 19 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 8dcc8be9be5bf..cf25e91a437b8 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1102,23 +1102,30 @@ void ClauseProcessor::processMapObjects( auto getDefaultMapperID = [&](const omp::Object &object, std::string &mapperIdName) { -if (!mlir::isa( -firOpBuilder.getRegion().getParentOp())) { - const semantics::DerivedTypeSpec *typeSpec = nullptr; - - if (object.sym()->owner().IsDerivedType()) -typeSpec = object.sym()->owner().derivedTypeSpec(); - else if (object.sym()->GetType() && - object.sym()->GetType()->category() == - semantics::DeclTypeSpec::TypeDerived) -typeSpec = &object.sym()->GetType()->derivedTypeSpec(); - - if (typeSpec) { -mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; -if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) - mapperIdName = converter.mangleName(mapperIdName, sym->owner()); - } +const semantics::DerivedTypeSpec *typeSpec = nullptr; + +if (object.sym()->GetType() && object.sym()->GetType()->category() == + semantics::DeclTypeSpec::TypeDerived) + typeSpec = &object.sym()->GetType()->derivedTypeSpec(); +else if (object.sym()->owner().IsDerivedType()) + typeSpec = object.sym()->owner().derivedTypeSpec(); + +if (typeSpec) { + mapperIdName = typeSpec->name().ToString() + ".omp.default.mapper"; + if (auto *sym = converter.getCurrentScope().FindSymbol(mapperIdName)) +mapperIdName = converter.mangleName(mapperIdName, sym->owner()); + else +mapperIdName = +converter.mangleName(mapperIdName, *typeSpec->GetScope()); } + +// Make sure we don't return a mapper to self +llvm::StringRef parentOpName; +if (auto declMapOp = mlir::dyn_cast( +firOpBuilder.getRegion().getParentOp())) + parentOpName = declMapOp.getSymName(); +if (mapperIdName == parentOpName) + mapperIdName = ""; }; // Create the mapper symbol from its name, if specified. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index cfcba0159db8d..8d5c26a4f2d58 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2348,6 +2348,124 @@ genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, queue, item, clauseOps); } +static mlir::FlatSymbolRefAttr +genImplicitDefaultDeclareMapper(lower::AbstractConverter &converter, +mlir::Location loc, fir::RecordType recordType, +llvm::StringRef mapperNameStr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + lower::StatementContext stmtCtx; + + // Save current insertion point before moving to the module scope to create + // the DeclareMapperOp + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + + firOpBuilder.setInsertionPointToStart(converter.getModuleOp().getBody()); + auto declMapperOp = firOpBuilder.create( + loc, mapperNameStr, recordType); + auto ยฎion = declMapperOp.getRegion(); + firOpBuilder.createBlock(ยฎion); + auto mapperArg = region.addArgument(firOpBuilder.getRefType(recordType), loc); + + auto declareOp = + firOpBuilder.create(loc, mapperArg, /*uniq_name=*/""); + + const auto genBoundsOps = [&](mlir::Value mapVal, +llvm::SmallVectorImpl &bounds) { +fir::ExtendedValue extVal = +hlfir::translateToExtendedValue(mapVal.getLoc(), firOpBuilder, +hlfir::Entity{mapVal}, +/*contiguousHint=*/true) +.first; +fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( +firOpBuilder, mapVal, /*isOptional=*/false, mapVal.getLoc()); +bounds = fir::factory::genImplicitBoundsOps( +firOpBuild
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
https://github.com/teresajohnson edited https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SPARC] Use op-then-halve instructions when we have VIS3 (PR #135718)
koachan wrote: Ping? https://github.com/llvm/llvm-project/pull/135718 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] translate derived type array init to attribute if possible (PR #140268)
@@ -0,0 +1,204 @@ +//===-- LLVMInsertChainFolder.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "flang/Optimizer/CodeGen/LLVMInsertChainFolder.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Builders.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "flang-insert-folder" + +#include + +namespace { +// Helper class to construct the attribute elements of an aggregate value being +// folded without creating a full mlir::Attribute representation for each step +// of the insert value chain, which would both be expensive in terms of +// compilation time and memory (since the intermediate Attribute would survive, +// unused, inside the mlir context). +class InsertChainBackwardFolder { + // Type for the current value of an element of the aggregate value being + // constructed by the insert chain. + // At any point of the insert chain, the value of an element is either: + // - nullptr: not yet known, the insert has not yet been seen. + // - an mlir::Attribute: the element is fully defined. + // - a nested InsertChainBackwardFolder: the element is itself an aggregate + //and its sub-elements have been partially defined (insert with mutliple + //indices have been seen). + + // The insertion folder assumes backward walk of the insert chain. Once an + // element or sub-element has been defined, it is not overriden by new + // insertions (last insert wins). + using InFlightValue = + llvm::PointerUnion; + +public: + InsertChainBackwardFolder( + mlir::Type type, std::deque *folderStorage) + : values(getNumElements(type), mlir::Attribute{}), +folderStorage{folderStorage}, type{type} {} + + /// Push + bool pushValue(mlir::Attribute val, llvm::ArrayRef at); + + mlir::Attribute finalize(mlir::Attribute defaultFieldValue); + +private: + static int64_t getNumElements(mlir::Type type) { +if (auto structTy = +llvm::dyn_cast_if_present(type)) + return structTy.getBody().size(); +if (auto arrayTy = +llvm::dyn_cast_if_present(type)) + return arrayTy.getNumElements(); +return 0; + } + + static mlir::Type getSubElementType(mlir::Type type, int64_t field) { +if (auto arrayTy = +llvm::dyn_cast_if_present(type)) + return arrayTy.getElementType(); +if (auto structTy = +llvm::dyn_cast_if_present(type)) + return structTy.getBody()[field]; +return {}; + } jeanPerier wrote: That the sub element type could not be retrieved. I changed that to return `nullptr` (which is equivalent to `{}`), and to have the caller directly check the result so that it is clearer. Since mlir::Value, Attribute, Type have a null state, it quite commonly used to indicate failure in helpers (for instance, mlir::Value::getDefiningOp returns a null T if the defining op is not a T). I however changed the exposed API to return `FailureOr` to make that very clear to external users. https://github.com/llvm/llvm-project/pull/140268 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Implement src:*=sanitize for UBSan. (PR #140489)
https://github.com/qinkunbao updated https://github.com/llvm/llvm-project/pull/140489 >From d383fc3d23c0c302d134a76d39491c87547526a1 Mon Sep 17 00:00:00 2001 From: Qinkun Bao Date: Mon, 19 May 2025 02:45:30 + Subject: [PATCH 1/2] fix format Created using spr 1.3.6 --- clang/include/clang/Basic/SanitizerSpecialCaseList.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/SanitizerSpecialCaseList.h b/clang/include/clang/Basic/SanitizerSpecialCaseList.h index dd01a786dee01..25d518e7128cf 100644 --- a/clang/include/clang/Basic/SanitizerSpecialCaseList.h +++ b/clang/include/clang/Basic/SanitizerSpecialCaseList.h @@ -44,9 +44,9 @@ class SanitizerSpecialCaseList : public llvm::SpecialCaseList { StringRef Category = StringRef()) const; // Query ignorelisted entries if any bit in Mask matches the entry's section. - // Return 0 if not found. If found, return the line number (starts with 1). + // Return 0 if not found. If found, return the line number (starts with 1). unsigned inSectionBlame(SanitizerMask Mask, StringRef Prefix, StringRef Query, - StringRef Category = StringRef()) const; + StringRef Category = StringRef()) const; protected: // Initialize SanitizerSections. @@ -54,7 +54,7 @@ class SanitizerSpecialCaseList : public llvm::SpecialCaseList { struct SanitizerSection { SanitizerSection(SanitizerMask SM, SectionEntries &E) -: Mask(SM), Entries(E){}; +: Mask(SM), Entries(E) {}; SanitizerMask Mask; SectionEntries &Entries; >From da2f95e6399b539e45ec9235f5751cb6c98acf77 Mon Sep 17 00:00:00 2001 From: Qinkun Bao Date: Mon, 19 May 2025 03:20:43 + Subject: [PATCH 2/2] fix tests Created using spr 1.3.6 --- clang/lib/Basic/SanitizerSpecialCaseList.cpp | 7 ++- clang/test/CodeGen/ubsan-src-ignorelist-category.test | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp index 3bf79876235db..7da36f3801453 100644 --- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp +++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp @@ -56,7 +56,12 @@ void SanitizerSpecialCaseList::createSanitizerSections() { bool SanitizerSpecialCaseList::inSection(SanitizerMask Mask, StringRef Prefix, StringRef Query, StringRef Category) const { - return inSectionBlame(Mask, Prefix, Query, Category) > 0; + for (auto &S : SanitizerSections) +if ((S.Mask & Mask) && +SpecialCaseList::inSectionBlame(S.Entries, Prefix, Query, Category)) + return true; + + return false; } unsigned SanitizerSpecialCaseList::inSectionBlame(SanitizerMask Mask, diff --git a/clang/test/CodeGen/ubsan-src-ignorelist-category.test b/clang/test/CodeGen/ubsan-src-ignorelist-category.test index f32dc5cbb9e13..e0efd65df8652 100644 --- a/clang/test/CodeGen/ubsan-src-ignorelist-category.test +++ b/clang/test/CodeGen/ubsan-src-ignorelist-category.test @@ -17,7 +17,7 @@ src:* src:*/test1.c=sanitize src:*/test1.c -//--- src.ignorelist.contradict1 +//--- src.ignorelist.contradict2 src:* src:*/test1.c src:*/test1.c=sanitize ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] Implement src:*=sanitize for UBSan. (PR #140489)
https://github.com/qinkunbao edited https://github.com/llvm/llvm-project/pull/140489 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move Radix tree methods to their own header and cpp. (PR #140501)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/include/llvm/ProfileData/MemProfRadixTree.h llvm/lib/ProfileData/MemProfRadixTree.cpp llvm/include/llvm/ProfileData/MemProf.h llvm/include/llvm/ProfileData/MemProfReader.h llvm/lib/Bitcode/Writer/BitcodeWriter.cpp llvm/lib/ProfileData/IndexedMemProfData.cpp llvm/lib/ProfileData/InstrProfReader.cpp llvm/lib/ProfileData/MemProf.cpp llvm/unittests/ProfileData/InstrProfTest.cpp llvm/unittests/ProfileData/MemProfTest.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/ProfileData/MemProfRadixTree.h b/llvm/include/llvm/ProfileData/MemProfRadixTree.h index 9abf9f7a8..7afa66088 100644 --- a/llvm/include/llvm/ProfileData/MemProfRadixTree.h +++ b/llvm/include/llvm/ProfileData/MemProfRadixTree.h @@ -6,7 +6,7 @@ // //===--===// // -// A custom Radix Tree builder for memprof data to optimize for space. +// A custom Radix Tree builder for memprof data to optimize for space. // //===--===// @@ -211,7 +211,6 @@ struct CallerCalleePairExtractor { } }; - // A convenience wrapper around FrameIdConverter and CallStackIdConverter for // tests. struct IndexedCallstackIdConverter { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 65b8eb514..a1eb08362 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -//#include "llvm/ProfileData/MemProf.h" +// #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfRadixTree.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SymbolRemappingReader.h" diff --git a/llvm/lib/ProfileData/MemProfRadixTree.cpp b/llvm/lib/ProfileData/MemProfRadixTree.cpp index 5ef357efd..ec3ff2e72 100644 --- a/llvm/lib/ProfileData/MemProfRadixTree.cpp +++ b/llvm/lib/ProfileData/MemProfRadixTree.cpp @@ -5,12 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===--===// -// This file contains logic that implements a space efficient radix tree +// This file contains logic that implements a space efficient radix tree // encoding for callstacks used by MemProf. // //===--===// - #include "llvm/ProfileData/MemProfRadixTree.h" namespace llvm { diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 9f1caae29..439c60267 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -15,8 +15,8 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/MemProf.h" -#include "llvm/ProfileData/MemProfRadixTree.h" #include "llvm/ProfileData/MemProfData.inc" +#include "llvm/ProfileData/MemProfRadixTree.h" #include "llvm/Support/Compression.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Error.h" diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index a072dee26..26b09698c 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -6,6 +6,7 @@ // //===--===// +#include "llvm/ProfileData/MemProf.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLForwardCompat.h" @@ -13,10 +14,9 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/IR/Value.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" -#include "llvm/ProfileData/MemProfReader.h" #include "llvm/ProfileData/MemProfRadixTree.h" +#include "llvm/ProfileData/MemProfReader.h" #include "llvm/ProfileData/MemProfYAML.h" #include "llvm/Support/raw_ostream.h" #include "gmock/gmock.h" `` https://github.com/llvm/llvm-project/pull/140501 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Add apply_patterns.vector.arm_sve.lower_contraction TD Op (PR #140572)
llvmbot wrote: @llvm/pr-subscribers-mlir-vector Author: Momchil Velikov (momchil-velikov) Changes --- Patch is 73.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140572.diff 12 Files Affected: - (modified) mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt (+1) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h (+31) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td (+26) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt (+6) - (modified) mlir/include/mlir/InitAllExtensions.h (+2) - (modified) mlir/lib/Dialect/ArmSVE/CMakeLists.txt (+1) - (added) mlir/lib/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.cpp (+54) - (added) mlir/lib/Dialect/ArmSVE/TransformOps/CMakeLists.txt (+19) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-smmla.mlir (+143-120) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-summla.mlir (+73-50) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-ummla.mlir (+78-60) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-usmmla.mlir (+78-60) ``diff diff --git a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt index 9f57627c321fb..cb1e9d01821a2 100644 --- a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h new file mode 100644 index 0..7f22cd1fe6435 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h @@ -0,0 +1,31 @@ +//===- ArmSVEVectorTransformOps.h - Vector transform ops *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H +#define MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H + +#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" +#include "mlir/IR/OpImplementation.h" + +//===--===// +// ArmSVE Vector Transform Operations +//===--===// + +#define GET_OP_CLASSES +#include "mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h.inc" + +namespace mlir { +class DialectRegistry; + +namespace arm_sve { +void registerTransformDialectExtension(DialectRegistry ยฎistry); + +} // namespace arm_sve +} // namespace mlir + +#endif // MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td new file mode 100644 index 0..81b59340f3b0d --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td @@ -0,0 +1,26 @@ +//===- ArmSVEVectorTransformOps.td - Arm SVE transform ops--*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef ARMSVE_VECTOR_TRANSFORM_OPS +#define ARMSVE_VECTOR_TRANSFORM_OPS + +include "mlir/Dialect/Transform/IR/TransformAttrs.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" + +def ApplyArmSVELowerContractionPatternsOp +: Op]> { + let description = [{ +Indicates that vector contraction-like operations should be lowered to +finer-grained vector primitives using the ArmSVE dialect. + }]; + + let assemblyFormat = "attr-dict"; +} + +#endif // ARMSVE_VECTOR_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt new file mode 100644 index 0..ce8d8fea7f188 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS ArmSVEVectorTransformOps.td) +mlir_tablegen(ArmSVEVectorTransformOps.h.inc -gen-op-decls) +mlir_tablegen(ArmSVEVectorTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRArmSVEVectorTransformOpsIncGen) + +add_mlir_doc(ArmSVEVectorTransformOps ArmSVEVectorTransformOps Dialects/ -gen-op-doc) diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.
[llvm-branch-commits] [mlir] [MLIR] Add apply_patterns.vector.arm_sve.lower_contraction TD Op (PR #140572)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/140572 None >From 251f93ea5b87acefac1fbcd6951c3b7870eff83c Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 16 May 2025 15:47:36 + Subject: [PATCH] [MLIR] Add apply_patterns.vector.arm_sve.lower_contraction TD Op --- .../mlir/Dialect/ArmSVE/CMakeLists.txt| 1 + .../TransformOps/ArmSVEVectorTransformOps.h | 31 +++ .../TransformOps/ArmSVEVectorTransformOps.td | 26 ++ .../ArmSVE/TransformOps/CMakeLists.txt| 6 + mlir/include/mlir/InitAllExtensions.h | 2 + mlir/lib/Dialect/ArmSVE/CMakeLists.txt| 1 + .../TransformOps/ArmSVEVectorTransformOps.cpp | 54 .../ArmSVE/TransformOps/CMakeLists.txt| 19 ++ .../Vector/CPU/ArmSVE/vector-smmla.mlir | 263 ++ .../Vector/CPU/ArmSVE/vector-summla.mlir | 123 .../Vector/CPU/ArmSVE/vector-ummla.mlir | 138 + .../Vector/CPU/ArmSVE/vector-usmmla.mlir | 138 + 12 files changed, 512 insertions(+), 290 deletions(-) create mode 100644 mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h create mode 100644 mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td create mode 100644 mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt create mode 100644 mlir/lib/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.cpp create mode 100644 mlir/lib/Dialect/ArmSVE/TransformOps/CMakeLists.txt diff --git a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt index 9f57627c321fb..cb1e9d01821a2 100644 --- a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h new file mode 100644 index 0..7f22cd1fe6435 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h @@ -0,0 +1,31 @@ +//===- ArmSVEVectorTransformOps.h - Vector transform ops *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H +#define MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H + +#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" +#include "mlir/IR/OpImplementation.h" + +//===--===// +// ArmSVE Vector Transform Operations +//===--===// + +#define GET_OP_CLASSES +#include "mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h.inc" + +namespace mlir { +class DialectRegistry; + +namespace arm_sve { +void registerTransformDialectExtension(DialectRegistry ยฎistry); + +} // namespace arm_sve +} // namespace mlir + +#endif // MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td new file mode 100644 index 0..81b59340f3b0d --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td @@ -0,0 +1,26 @@ +//===- ArmSVEVectorTransformOps.td - Arm SVE transform ops--*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef ARMSVE_VECTOR_TRANSFORM_OPS +#define ARMSVE_VECTOR_TRANSFORM_OPS + +include "mlir/Dialect/Transform/IR/TransformAttrs.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" + +def ApplyArmSVELowerContractionPatternsOp +: Op]> { + let description = [{ +Indicates that vector contraction-like operations should be lowered to +finer-grained vector primitives using the ArmSVE dialect. + }]; + + let assemblyFormat = "attr-dict"; +} + +#endif // ARMSVE_VECTOR_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt new file mode 100644 index 0..ce8d8fea7f188 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS ArmSVEVectorTransformOps.td) +mlir_tablegen(ArmSVEVectorTrans
[llvm-branch-commits] [mlir] [MLIR] Add apply_patterns.vector.arm_sve.lower_contraction TD Op (PR #140572)
llvmbot wrote: @llvm/pr-subscribers-mlir-sve Author: Momchil Velikov (momchil-velikov) Changes --- Patch is 73.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140572.diff 12 Files Affected: - (modified) mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt (+1) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h (+31) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td (+26) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt (+6) - (modified) mlir/include/mlir/InitAllExtensions.h (+2) - (modified) mlir/lib/Dialect/ArmSVE/CMakeLists.txt (+1) - (added) mlir/lib/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.cpp (+54) - (added) mlir/lib/Dialect/ArmSVE/TransformOps/CMakeLists.txt (+19) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-smmla.mlir (+143-120) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-summla.mlir (+73-50) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-ummla.mlir (+78-60) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-usmmla.mlir (+78-60) ``diff diff --git a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt index 9f57627c321fb..cb1e9d01821a2 100644 --- a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h new file mode 100644 index 0..7f22cd1fe6435 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h @@ -0,0 +1,31 @@ +//===- ArmSVEVectorTransformOps.h - Vector transform ops *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H +#define MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H + +#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" +#include "mlir/IR/OpImplementation.h" + +//===--===// +// ArmSVE Vector Transform Operations +//===--===// + +#define GET_OP_CLASSES +#include "mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h.inc" + +namespace mlir { +class DialectRegistry; + +namespace arm_sve { +void registerTransformDialectExtension(DialectRegistry ยฎistry); + +} // namespace arm_sve +} // namespace mlir + +#endif // MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td new file mode 100644 index 0..81b59340f3b0d --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td @@ -0,0 +1,26 @@ +//===- ArmSVEVectorTransformOps.td - Arm SVE transform ops--*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef ARMSVE_VECTOR_TRANSFORM_OPS +#define ARMSVE_VECTOR_TRANSFORM_OPS + +include "mlir/Dialect/Transform/IR/TransformAttrs.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" + +def ApplyArmSVELowerContractionPatternsOp +: Op]> { + let description = [{ +Indicates that vector contraction-like operations should be lowered to +finer-grained vector primitives using the ArmSVE dialect. + }]; + + let assemblyFormat = "attr-dict"; +} + +#endif // ARMSVE_VECTOR_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt new file mode 100644 index 0..ce8d8fea7f188 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS ArmSVEVectorTransformOps.td) +mlir_tablegen(ArmSVEVectorTransformOps.h.inc -gen-op-decls) +mlir_tablegen(ArmSVEVectorTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRArmSVEVectorTransformOpsIncGen) + +add_mlir_doc(ArmSVEVectorTransformOps ArmSVEVectorTransformOps Dialects/ -gen-op-doc) diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h i
[llvm-branch-commits] [mlir] [MLIR] Add apply_patterns.vector.arm_sve.lower_contraction TD Op (PR #140572)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Momchil Velikov (momchil-velikov) Changes --- Patch is 73.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140572.diff 12 Files Affected: - (modified) mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt (+1) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h (+31) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td (+26) - (added) mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt (+6) - (modified) mlir/include/mlir/InitAllExtensions.h (+2) - (modified) mlir/lib/Dialect/ArmSVE/CMakeLists.txt (+1) - (added) mlir/lib/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.cpp (+54) - (added) mlir/lib/Dialect/ArmSVE/TransformOps/CMakeLists.txt (+19) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-smmla.mlir (+143-120) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-summla.mlir (+73-50) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-ummla.mlir (+78-60) - (modified) mlir/test/Dialect/Vector/CPU/ArmSVE/vector-usmmla.mlir (+78-60) ``diff diff --git a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt index 9f57627c321fb..cb1e9d01821a2 100644 --- a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h new file mode 100644 index 0..7f22cd1fe6435 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h @@ -0,0 +1,31 @@ +//===- ArmSVEVectorTransformOps.h - Vector transform ops *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H +#define MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H + +#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" +#include "mlir/IR/OpImplementation.h" + +//===--===// +// ArmSVE Vector Transform Operations +//===--===// + +#define GET_OP_CLASSES +#include "mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.h.inc" + +namespace mlir { +class DialectRegistry; + +namespace arm_sve { +void registerTransformDialectExtension(DialectRegistry ยฎistry); + +} // namespace arm_sve +} // namespace mlir + +#endif // MLIR_DIALECT_ARM_SVE_VECTOR_TRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td new file mode 100644 index 0..81b59340f3b0d --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/ArmSVEVectorTransformOps.td @@ -0,0 +1,26 @@ +//===- ArmSVEVectorTransformOps.td - Arm SVE transform ops--*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +#ifndef ARMSVE_VECTOR_TRANSFORM_OPS +#define ARMSVE_VECTOR_TRANSFORM_OPS + +include "mlir/Dialect/Transform/IR/TransformAttrs.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" + +def ApplyArmSVELowerContractionPatternsOp +: Op]> { + let description = [{ +Indicates that vector contraction-like operations should be lowered to +finer-grained vector primitives using the ArmSVE dialect. + }]; + + let assemblyFormat = "attr-dict"; +} + +#endif // ARMSVE_VECTOR_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt new file mode 100644 index 0..ce8d8fea7f188 --- /dev/null +++ b/mlir/include/mlir/Dialect/ArmSVE/TransformOps/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS ArmSVEVectorTransformOps.td) +mlir_tablegen(ArmSVEVectorTransformOps.h.inc -gen-op-decls) +mlir_tablegen(ArmSVEVectorTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRArmSVEVectorTransformOpsIncGen) + +add_mlir_doc(ArmSVEVectorTransformOps ArmSVEVectorTransformOps Dialects/ -gen-op-doc) diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index
[llvm-branch-commits] [mlir] [MLIR] Integration tests for lowering vector.contract to SVE FEAT_I8MM (PR #140573)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/140573 None >From 194c1c7737ea7baa74971666f93312a071f5703d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 19 May 2025 14:50:45 + Subject: [PATCH] [MLIR] Integration tests for lowering vector.contract to SVE FEAT_I8MM --- .../CPU/ArmSVE/contraction-smmla-4x8x4.mlir | 117 + .../ArmSVE/contraction-smmla-8x8x8-vs2.mlir | 159 ++ .../CPU/ArmSVE/contraction-summla-4x8x4.mlir | 118 + .../CPU/ArmSVE/contraction-ummla-4x8x4.mlir | 119 + .../CPU/ArmSVE/contraction-usmmla-4x8x4.mlir | 117 + 5 files changed, 630 insertions(+) create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-8x8x8-vs2.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-summla-4x8x4.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-ummla-4x8x4.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-usmmla-4x8x4.mlir diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir new file mode 100644 index 0..88534dd2aab1e --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir @@ -0,0 +1,117 @@ +// REQUIRES: arm-emulator + +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: --convert-vector-to-scf --convert-scf-to-cf --convert-vector-to-llvm='enable-arm-sve enable-arm-i8mm' \ +// DEFINE: --expand-strided-metadata --convert-to-llvm --finalize-memref-to-llvm --reconcile-unrealized-casts \ +// DEFINE: -o %t + +// DEFINE: %{entry_point} = main + +// DEFINE: %{run} = %mcr_aarch64_cmd %t -e %{entry_point} -entry-point-result=void --march=aarch64 --mattr="+sve,+i8mm" \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%native_mlir_arm_runner_utils + +// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s + +#packed_maps = [ + affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d1, d2)>, + affine_map<(d0, d1, d2) -> (d0, d1)> +] + +func.func private @setArmVLBits(%bits : i32) + +func.func @main() { + %c128 = arith.constant 128 : i32 + func.call @setArmVLBits(%c128) : (i32) -> () + + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %c0_i8 = arith.constant 0 : i8 + +// Accumulator test data + %acc_cst = arith.constant dense<[[-44, 20, 44, -46], + [ -8, 25, -34, 26], + [-20, -36, -3, 39], + [-48, -31, -25, -21]]> : vector<4x4xi32> + %acc_m = memref.alloca() : memref<4x4xi32> + vector.transfer_write %acc_cst, %acc_m[%c0, %c0] : vector<4x4xi32>, memref<4x4xi32> + + %acc_m1 = memref.collapse_shape %acc_m [[0, 1]] : memref<4x4xi32> into memref<16xi32> + %acc_flat = vector.transfer_read %acc_m1[%c0], %c0_i32 {in_bounds = [true]} : memref<16xi32>, vector<[16]xi32> + %acc = vector.shape_cast %acc_flat : vector<[16]xi32> to vector<4x[4]xi32> + + vector.print str "ACC:\n" + %acc0 = vector.extract %acc[0] : vector<[4]xi32> from vector<4x[4]xi32> + %acc1 = vector.extract %acc[1] : vector<[4]xi32> from vector<4x[4]xi32> + %acc2 = vector.extract %acc[2] : vector<[4]xi32> from vector<4x[4]xi32> + %acc3 = vector.extract %acc[3] : vector<[4]xi32> from vector<4x[4]xi32> + vector.print %acc0 : vector<[4]xi32> + vector.print %acc1 : vector<[4]xi32> + vector.print %acc2 : vector<[4]xi32> + vector.print %acc3 : vector<[4]xi32> + + // LHS test data + %lhs_cst = arith.constant dense<[[-35, -27, -36, -31, 23, -34, -8, -33], + [-20, 17, -32, -47, 37, 22, -7, -21], + [ -7, -35, 20, -4, 39, 46, -23, 40], + [ 40, 27, 37, 43, 38, -6, 37, 49]]> : vector<4x8xi8> + + %lhs_m = memref.alloca() : memref<4x8xi8> + vector.transfer_write %lhs_cst, %lhs_m[%c0, %c0] : vector<4x8xi8>, memref<4x8xi8> + %lhs = vector.transfer_read %lhs_m[%c0, %c0], %c0_i8 : memref<4x8xi8>, vector<4x8xi8> + + vector.print str "LHS:\n" + %lhs0 = vector.extract %lhs[0] : vector<8xi8> from vector<4x8xi8> + %lhs1 = vector.extract %lhs[1] : vector<8xi8> from vector<4x8xi8> + %lhs2 = vector.extract %lhs[2] : vector<8xi8> from vector<4x8xi8> + %lhs3 = vector.extract %lhs[3] : vector<8xi8> from vector<4x8xi8> + vector.print %lhs0 : vector<8xi8> + vector.print %lhs1 : vector<8xi8> + vector.print %lhs2 : vector<8xi8> + vector.print %lhs3 : vector<8xi8> + + // RHS test data + %rhs_cst = arith.constant dense<[[-17, -50, -1, 48, -13, 22, 39, 33], + [-35, -24, 37, -32, 33, 30, -11, -17], +
[llvm-branch-commits] [mlir] [MLIR] Integration tests for lowering vector.contract to SVE FEAT_I8MM (PR #140573)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Momchil Velikov (momchil-velikov) Changes --- Patch is 30.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140573.diff 5 Files Affected: - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir (+117) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-8x8x8-vs2.mlir (+159) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-summla-4x8x4.mlir (+118) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-ummla-4x8x4.mlir (+119) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-usmmla-4x8x4.mlir (+117) ``diff diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir new file mode 100644 index 0..88534dd2aab1e --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir @@ -0,0 +1,117 @@ +// REQUIRES: arm-emulator + +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: --convert-vector-to-scf --convert-scf-to-cf --convert-vector-to-llvm='enable-arm-sve enable-arm-i8mm' \ +// DEFINE: --expand-strided-metadata --convert-to-llvm --finalize-memref-to-llvm --reconcile-unrealized-casts \ +// DEFINE: -o %t + +// DEFINE: %{entry_point} = main + +// DEFINE: %{run} = %mcr_aarch64_cmd %t -e %{entry_point} -entry-point-result=void --march=aarch64 --mattr="+sve,+i8mm" \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%native_mlir_arm_runner_utils + +// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s + +#packed_maps = [ + affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d1, d2)>, + affine_map<(d0, d1, d2) -> (d0, d1)> +] + +func.func private @setArmVLBits(%bits : i32) + +func.func @main() { + %c128 = arith.constant 128 : i32 + func.call @setArmVLBits(%c128) : (i32) -> () + + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %c0_i8 = arith.constant 0 : i8 + +// Accumulator test data + %acc_cst = arith.constant dense<[[-44, 20, 44, -46], + [ -8, 25, -34, 26], + [-20, -36, -3, 39], + [-48, -31, -25, -21]]> : vector<4x4xi32> + %acc_m = memref.alloca() : memref<4x4xi32> + vector.transfer_write %acc_cst, %acc_m[%c0, %c0] : vector<4x4xi32>, memref<4x4xi32> + + %acc_m1 = memref.collapse_shape %acc_m [[0, 1]] : memref<4x4xi32> into memref<16xi32> + %acc_flat = vector.transfer_read %acc_m1[%c0], %c0_i32 {in_bounds = [true]} : memref<16xi32>, vector<[16]xi32> + %acc = vector.shape_cast %acc_flat : vector<[16]xi32> to vector<4x[4]xi32> + + vector.print str "ACC:\n" + %acc0 = vector.extract %acc[0] : vector<[4]xi32> from vector<4x[4]xi32> + %acc1 = vector.extract %acc[1] : vector<[4]xi32> from vector<4x[4]xi32> + %acc2 = vector.extract %acc[2] : vector<[4]xi32> from vector<4x[4]xi32> + %acc3 = vector.extract %acc[3] : vector<[4]xi32> from vector<4x[4]xi32> + vector.print %acc0 : vector<[4]xi32> + vector.print %acc1 : vector<[4]xi32> + vector.print %acc2 : vector<[4]xi32> + vector.print %acc3 : vector<[4]xi32> + + // LHS test data + %lhs_cst = arith.constant dense<[[-35, -27, -36, -31, 23, -34, -8, -33], + [-20, 17, -32, -47, 37, 22, -7, -21], + [ -7, -35, 20, -4, 39, 46, -23, 40], + [ 40, 27, 37, 43, 38, -6, 37, 49]]> : vector<4x8xi8> + + %lhs_m = memref.alloca() : memref<4x8xi8> + vector.transfer_write %lhs_cst, %lhs_m[%c0, %c0] : vector<4x8xi8>, memref<4x8xi8> + %lhs = vector.transfer_read %lhs_m[%c0, %c0], %c0_i8 : memref<4x8xi8>, vector<4x8xi8> + + vector.print str "LHS:\n" + %lhs0 = vector.extract %lhs[0] : vector<8xi8> from vector<4x8xi8> + %lhs1 = vector.extract %lhs[1] : vector<8xi8> from vector<4x8xi8> + %lhs2 = vector.extract %lhs[2] : vector<8xi8> from vector<4x8xi8> + %lhs3 = vector.extract %lhs[3] : vector<8xi8> from vector<4x8xi8> + vector.print %lhs0 : vector<8xi8> + vector.print %lhs1 : vector<8xi8> + vector.print %lhs2 : vector<8xi8> + vector.print %lhs3 : vector<8xi8> + + // RHS test data + %rhs_cst = arith.constant dense<[[-17, -50, -1, 48, -13, 22, 39, 33], + [-35, -24, 37, -32, 33, 30, -11, -17], + [-28, 31, 3, -44, -15, -27, 22, 35], + [-23, 39, 48, 26, -23, 32, -39, -38]]> : vector<4x8xi8> + + %rhs_m = memref.alloca() : memref<4x8xi8> + vector.transfer_write %rhs_cst, %rhs_m[%c0, %c0] : vector<4x8xi8>, memref<4x8xi8> + + %rhs_m1 = memref.collapse_shape %rhs_m [[0, 1]] : memref<4x8xi8> into memref<32xi8> + %rhs_flat = vector.transfer_read %rhs_m1[%c0], %c0_i
[llvm-branch-commits] [mlir] [MLIR] Integration tests for lowering vector.contract to SVE FEAT_I8MM (PR #140573)
llvmbot wrote: @llvm/pr-subscribers-mlir-sve Author: Momchil Velikov (momchil-velikov) Changes --- Patch is 30.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140573.diff 5 Files Affected: - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir (+117) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-8x8x8-vs2.mlir (+159) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-summla-4x8x4.mlir (+118) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-ummla-4x8x4.mlir (+119) - (added) mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-usmmla-4x8x4.mlir (+117) ``diff diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir new file mode 100644 index 0..88534dd2aab1e --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/contraction-smmla-4x8x4.mlir @@ -0,0 +1,117 @@ +// REQUIRES: arm-emulator + +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: --convert-vector-to-scf --convert-scf-to-cf --convert-vector-to-llvm='enable-arm-sve enable-arm-i8mm' \ +// DEFINE: --expand-strided-metadata --convert-to-llvm --finalize-memref-to-llvm --reconcile-unrealized-casts \ +// DEFINE: -o %t + +// DEFINE: %{entry_point} = main + +// DEFINE: %{run} = %mcr_aarch64_cmd %t -e %{entry_point} -entry-point-result=void --march=aarch64 --mattr="+sve,+i8mm" \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%native_mlir_arm_runner_utils + +// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s + +#packed_maps = [ + affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d1, d2)>, + affine_map<(d0, d1, d2) -> (d0, d1)> +] + +func.func private @setArmVLBits(%bits : i32) + +func.func @main() { + %c128 = arith.constant 128 : i32 + func.call @setArmVLBits(%c128) : (i32) -> () + + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %c0_i8 = arith.constant 0 : i8 + +// Accumulator test data + %acc_cst = arith.constant dense<[[-44, 20, 44, -46], + [ -8, 25, -34, 26], + [-20, -36, -3, 39], + [-48, -31, -25, -21]]> : vector<4x4xi32> + %acc_m = memref.alloca() : memref<4x4xi32> + vector.transfer_write %acc_cst, %acc_m[%c0, %c0] : vector<4x4xi32>, memref<4x4xi32> + + %acc_m1 = memref.collapse_shape %acc_m [[0, 1]] : memref<4x4xi32> into memref<16xi32> + %acc_flat = vector.transfer_read %acc_m1[%c0], %c0_i32 {in_bounds = [true]} : memref<16xi32>, vector<[16]xi32> + %acc = vector.shape_cast %acc_flat : vector<[16]xi32> to vector<4x[4]xi32> + + vector.print str "ACC:\n" + %acc0 = vector.extract %acc[0] : vector<[4]xi32> from vector<4x[4]xi32> + %acc1 = vector.extract %acc[1] : vector<[4]xi32> from vector<4x[4]xi32> + %acc2 = vector.extract %acc[2] : vector<[4]xi32> from vector<4x[4]xi32> + %acc3 = vector.extract %acc[3] : vector<[4]xi32> from vector<4x[4]xi32> + vector.print %acc0 : vector<[4]xi32> + vector.print %acc1 : vector<[4]xi32> + vector.print %acc2 : vector<[4]xi32> + vector.print %acc3 : vector<[4]xi32> + + // LHS test data + %lhs_cst = arith.constant dense<[[-35, -27, -36, -31, 23, -34, -8, -33], + [-20, 17, -32, -47, 37, 22, -7, -21], + [ -7, -35, 20, -4, 39, 46, -23, 40], + [ 40, 27, 37, 43, 38, -6, 37, 49]]> : vector<4x8xi8> + + %lhs_m = memref.alloca() : memref<4x8xi8> + vector.transfer_write %lhs_cst, %lhs_m[%c0, %c0] : vector<4x8xi8>, memref<4x8xi8> + %lhs = vector.transfer_read %lhs_m[%c0, %c0], %c0_i8 : memref<4x8xi8>, vector<4x8xi8> + + vector.print str "LHS:\n" + %lhs0 = vector.extract %lhs[0] : vector<8xi8> from vector<4x8xi8> + %lhs1 = vector.extract %lhs[1] : vector<8xi8> from vector<4x8xi8> + %lhs2 = vector.extract %lhs[2] : vector<8xi8> from vector<4x8xi8> + %lhs3 = vector.extract %lhs[3] : vector<8xi8> from vector<4x8xi8> + vector.print %lhs0 : vector<8xi8> + vector.print %lhs1 : vector<8xi8> + vector.print %lhs2 : vector<8xi8> + vector.print %lhs3 : vector<8xi8> + + // RHS test data + %rhs_cst = arith.constant dense<[[-17, -50, -1, 48, -13, 22, 39, 33], + [-35, -24, 37, -32, 33, 30, -11, -17], + [-28, 31, 3, -44, -15, -27, 22, 35], + [-23, 39, 48, 26, -23, 32, -39, -38]]> : vector<4x8xi8> + + %rhs_m = memref.alloca() : memref<4x8xi8> + vector.transfer_write %rhs_cst, %rhs_m[%c0, %c0] : vector<4x8xi8>, memref<4x8xi8> + + %rhs_m1 = memref.collapse_shape %rhs_m [[0, 1]] : memref<4x8xi8> into memref<32xi8> + %rhs_flat = vector.transfer_read %rhs_m1[%c0], %
[llvm-branch-commits] [llvm] AMDGPU: Check for subreg match when folding through reg_sequence (PR #140582)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/140582 We need to consider the use instruction's intepretation of the bits, not the defined immediate without use context. This will regress some cases where we previously coud match f64 inline constants. We can restore them by either using pseudo instructions to materialize f64 constants, or recognizing reg_sequence decomposed into 32-bit pieces for them (which essentially means recognizing every other input is a 0). Fixes #139908 >From 9ae69332688f4864b25449a694dc67968a1bf45b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 May 2025 17:21:39 +0200 Subject: [PATCH] AMDGPU: Check for subreg match when folding through reg_sequence We need to consider the use instruction's intepretation of the bits, not the defined immediate without use context. This will regress some cases where we previously coud match f64 inline constants. We can restore them by either using pseudo instructions to materialize f64 constants, or recognizing reg_sequence decomposed into 32-bit pieces for them (which essentially means recognizing every other input is a 0). Fixes #139908 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 48 - llvm/test/CodeGen/AMDGPU/constrained-shift.ll | 6 +- llvm/test/CodeGen/AMDGPU/global-saddr-load.ll | 5 +- .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll | 14 +- llvm/test/CodeGen/AMDGPU/operand-folding.ll | 4 +- llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 198 -- .../AMDGPU/si-fold-operands-subreg-imm.mir| 2 +- 7 files changed, 242 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 92937e33fd500..d81f25c57af60 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -895,6 +895,8 @@ SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) const { if (!SrcRC) return {}; + // TODO: Recognize 64-bit splats broken into 32-bit pieces (i.e. recognize + // every other other element is 0 for 64-bit immediates) int64_t Imm; for (unsigned I = 0, E = Defs.size(); I != E; ++I) { const MachineOperand *Op = Defs[I].first; @@ -924,10 +926,41 @@ MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat( if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx)) return nullptr; - // FIXME: Verify SplatRC is compatible with the use operand - uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; - if (!TII->isInlineConstant(*SplatVal, OpTy) || - !TII->isOperandLegal(*UseMI, UseOpIdx, SplatVal)) + int16_t RCID = Desc.operands()[UseOpIdx].RegClass; + if (RCID == -1) +return nullptr; + + // Special case 0/-1, since when interpreted as a 64-bit element both halves + // have the same bits. Effectively this code does not handle 64-bit element + // operands correctly, as the incoming 64-bit constants are already split into + // 32-bit sequence elements. + // + // TODO: We should try to figure out how to interpret the reg_sequence as a + // split 64-bit splat constant, or use 64-bit pseudos for materializing f64 + // constants. + if (SplatVal->getImm() != 0 && SplatVal->getImm() != -1) { +const TargetRegisterClass *OpRC = TRI->getRegClass(RCID); +// We need to figure out the scalar type read by the operand. e.g. the MFMA +// operand will be AReg_128, and we want to check if it's compatible with an +// AReg_32 constant. +uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; +switch (OpTy) { +case AMDGPU::OPERAND_REG_INLINE_AC_INT32: +case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0); + break; +case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1); + break; +default: + return nullptr; +} + +if (!TRI->getCommonSubClass(OpRC, SplatRC)) + return nullptr; + } + + if (!TII->isOperandLegal(*UseMI, UseOpIdx, SplatVal)) return nullptr; return SplatVal; @@ -1039,14 +1072,13 @@ void SIFoldOperandsImpl::foldOperand( } } - if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList)) + if (RSUse->getSubReg() != RegSeqDstSubReg) continue; - if (RSUse->getSubReg() != RegSeqDstSubReg) + if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList)) continue; - foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList, - CopiesToReplace); + foldOperand(OpToFold, RSUseMI, OpNo, FoldList, CopiesToReplace); } return; diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll index af4ca2ad7120a..fb53e889b1158 100644 --- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll +++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll @@ -192,8 +192,10 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg
[llvm-branch-commits] [llvm] AMDGPU: Check for subreg match when folding through reg_sequence (PR #140582)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140582?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140582** https://app.graphite.dev/github/pr/llvm/llvm-project/140582?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140582?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140581** https://app.graphite.dev/github/pr/llvm/llvm-project/140581?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140582 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Check for subreg match when folding through reg_sequence (PR #140582)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/140582 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Check for subreg match when folding through reg_sequence (PR #140582)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes We need to consider the use instruction's intepretation of the bits, not the defined immediate without use context. This will regress some cases where we previously coud match f64 inline constants. We can restore them by either using pseudo instructions to materialize f64 constants, or recognizing reg_sequence decomposed into 32-bit pieces for them (which essentially means recognizing every other input is a 0). Fixes #139908 --- Patch is 21.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140582.diff 7 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+40-8) - (modified) llvm/test/CodeGen/AMDGPU/constrained-shift.ll (+4-2) - (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+4-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll (+11-3) - (modified) llvm/test/CodeGen/AMDGPU/operand-folding.ll (+3-1) - (modified) llvm/test/CodeGen/AMDGPU/packed-fp32.ll (+179-19) - (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir (+1-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 92937e33fd500..d81f25c57af60 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -895,6 +895,8 @@ SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) const { if (!SrcRC) return {}; + // TODO: Recognize 64-bit splats broken into 32-bit pieces (i.e. recognize + // every other other element is 0 for 64-bit immediates) int64_t Imm; for (unsigned I = 0, E = Defs.size(); I != E; ++I) { const MachineOperand *Op = Defs[I].first; @@ -924,10 +926,41 @@ MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat( if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx)) return nullptr; - // FIXME: Verify SplatRC is compatible with the use operand - uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; - if (!TII->isInlineConstant(*SplatVal, OpTy) || - !TII->isOperandLegal(*UseMI, UseOpIdx, SplatVal)) + int16_t RCID = Desc.operands()[UseOpIdx].RegClass; + if (RCID == -1) +return nullptr; + + // Special case 0/-1, since when interpreted as a 64-bit element both halves + // have the same bits. Effectively this code does not handle 64-bit element + // operands correctly, as the incoming 64-bit constants are already split into + // 32-bit sequence elements. + // + // TODO: We should try to figure out how to interpret the reg_sequence as a + // split 64-bit splat constant, or use 64-bit pseudos for materializing f64 + // constants. + if (SplatVal->getImm() != 0 && SplatVal->getImm() != -1) { +const TargetRegisterClass *OpRC = TRI->getRegClass(RCID); +// We need to figure out the scalar type read by the operand. e.g. the MFMA +// operand will be AReg_128, and we want to check if it's compatible with an +// AReg_32 constant. +uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType; +switch (OpTy) { +case AMDGPU::OPERAND_REG_INLINE_AC_INT32: +case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0); + break; +case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1); + break; +default: + return nullptr; +} + +if (!TRI->getCommonSubClass(OpRC, SplatRC)) + return nullptr; + } + + if (!TII->isOperandLegal(*UseMI, UseOpIdx, SplatVal)) return nullptr; return SplatVal; @@ -1039,14 +1072,13 @@ void SIFoldOperandsImpl::foldOperand( } } - if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList)) + if (RSUse->getSubReg() != RegSeqDstSubReg) continue; - if (RSUse->getSubReg() != RegSeqDstSubReg) + if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList)) continue; - foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList, - CopiesToReplace); + foldOperand(OpToFold, RSUseMI, OpNo, FoldList, CopiesToReplace); } return; diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll index af4ca2ad7120a..fb53e889b1158 100644 --- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll +++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll @@ -192,8 +192,10 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) ; ; GISEL-LABEL: s_csh_v4i32: ; GISEL: ; %bb.0: -; GISEL-NEXT:s_and_b64 s[4:5], s[4:5], 31 -; GISEL-NEXT:s_and_b64 s[6:7], s[6:7], 31 +; GISEL-NEXT:s_mov_b32 s8, 31 +; GISEL-NEXT:s_mov_b32 s9, s8 +; GISEL-NEXT:s_and_b64 s[4:5], s[4:5], s[8:9] +; GISEL-NEXT:s_and_b64 s[6:7], s[6:7], s[8:9] ; GISEL-NEXT:s_lshl_b32 s8, s0, s4 ; GISEL-NEXT:s_lshl_b32 s9, s1, s5 ; GISEL-NEXT:s_lshl_b32 s10,
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/140587 This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case. >From 1b34deb8f7a46a11723f4408773713b4ac538de0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 May 2025 20:02:54 +0200 Subject: [PATCH] AMDGPU: Remove redundant operand folding checks This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 18 -- 1 file changed, 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d94c2d8b03dff..3abc1be685e2e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList( return true; } - // Check the case where we might introduce a second constant operand to a - // scalar instruction - if (TII->isSALU(MI->getOpcode())) { -const MCInstrDesc &InstDesc = MI->getDesc(); -const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; - -// Fine if the operand can be encoded as an inline constant -if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) { - // Otherwise check for another constant - for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) { -auto &Op = MI->getOperand(i); -if (OpNo != i && !Op.isReg() && -!TII->isInlineConstant(Op, InstDesc.operands()[i])) - return false; - } -} - } - appendFoldCandidate(FoldList, MI, OpNo, OpToFold); return true; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Check for subreg match when folding through reg_sequence (PR #140582)
https://github.com/rampitec approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/140582 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/140587 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140587?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140587** https://app.graphite.dev/github/pr/llvm/llvm-project/140587?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140587?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140580** https://app.graphite.dev/github/pr/llvm/llvm-project/140580?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140587 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case. --- Full diff: https://github.com/llvm/llvm-project/pull/140587.diff 1 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (-18) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d94c2d8b03dff..3abc1be685e2e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList( return true; } - // Check the case where we might introduce a second constant operand to a - // scalar instruction - if (TII->isSALU(MI->getOpcode())) { -const MCInstrDesc &InstDesc = MI->getDesc(); -const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; - -// Fine if the operand can be encoded as an inline constant -if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) { - // Otherwise check for another constant - for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) { -auto &Op = MI->getOperand(i); -if (OpNo != i && !Op.isReg() && -!TII->isInlineConstant(Op, InstDesc.operands()[i])) - return false; - } -} - } - appendFoldCandidate(FoldList, MI, OpNo, OpToFold); return true; } `` https://github.com/llvm/llvm-project/pull/140587 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Check for subreg match when folding through reg_sequence (PR #140582)
arsenm wrote: ### Merge activity * **May 19, 3:39 PM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/140582). https://github.com/llvm/llvm-project/pull/140582 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [utils][TableGen] Unify name qualifications in DirectiveEmitter (PR #140606)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/140606 Remove extraneous qualifications from names when - the name is explicitly enclosed by corresponding namespaces, and - the name is in a body of a function defined in corresponding namespaces. Otherwise add missing qualifications. This applies to individual sections of TableGen output, and makes name lookup independent of the context in which these sections are included. >From 7973dce4ffba843927cfef3a9a980999abeaa014 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 19 May 2025 14:31:19 -0500 Subject: [PATCH] [utils][TableGen] Unify name qualifications in DirectiveEmitter Remove extraenous qualifications from names when - the name is explicitly enclosed by corresponding namespaces, and - the name is in a body of a function defined in corresponding namespaces. Otherwise add missing qualifications. This applies to individual sections of TableGen output, and makes name lookup independent of the context in which these sections are included. --- llvm/test/TableGen/directive1.td | 64 +- llvm/test/TableGen/directive2.td | 42 +++ .../utils/TableGen/Basic/DirectiveEmitter.cpp | 116 -- 3 files changed, 106 insertions(+), 116 deletions(-) diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 8270de5eb2132..3b2b4ca1b7031 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -98,7 +98,7 @@ def TDL_DirA : Directive<"dira"> { // CHECK-EMPTY: // CHECK-NEXT: static constexpr std::size_t Directive_enumSize = 1; // CHECK-EMPTY: -// CHECK-NEXT: constexpr auto TDLD_dira = llvm::tdl::Directive::TDLD_dira; +// CHECK-NEXT: constexpr auto TDLD_dira = Directive::TDLD_dira; // CHECK-EMPTY: // CHECK-NEXT: enum class Clause { // CHECK-NEXT:TDLC_clausea, @@ -108,9 +108,9 @@ def TDL_DirA : Directive<"dira"> { // CHECK-EMPTY: // CHECK-NEXT: static constexpr std::size_t Clause_enumSize = 3; // CHECK-EMPTY: -// CHECK-NEXT: constexpr auto TDLC_clausea = llvm::tdl::Clause::TDLC_clausea; -// CHECK-NEXT: constexpr auto TDLC_clauseb = llvm::tdl::Clause::TDLC_clauseb; -// CHECK-NEXT: constexpr auto TDLC_clausec = llvm::tdl::Clause::TDLC_clausec; +// CHECK-NEXT: constexpr auto TDLC_clausea = Clause::TDLC_clausea; +// CHECK-NEXT: constexpr auto TDLC_clauseb = Clause::TDLC_clauseb; +// CHECK-NEXT: constexpr auto TDLC_clausec = Clause::TDLC_clausec; // CHECK-EMPTY: // CHECK-NEXT: enum class AKind { // CHECK-NEXT:TDLCV_vala=1, @@ -118,18 +118,18 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT:TDLCV_valc=3, // CHECK-NEXT: }; // CHECK-EMPTY: -// CHECK-NEXT: constexpr auto TDLCV_vala = llvm::tdl::AKind::TDLCV_vala; -// CHECK-NEXT: constexpr auto TDLCV_valb = llvm::tdl::AKind::TDLCV_valb; -// CHECK-NEXT: constexpr auto TDLCV_valc = llvm::tdl::AKind::TDLCV_valc; +// CHECK-NEXT: constexpr auto TDLCV_vala = AKind::TDLCV_vala; +// CHECK-NEXT: constexpr auto TDLCV_valb = AKind::TDLCV_valb; +// CHECK-NEXT: constexpr auto TDLCV_valc = AKind::TDLCV_valc; // CHECK-EMPTY: // CHECK-NEXT: // Enumeration helper functions -// CHECK-NEXT: LLVM_ABI Directive getTdlDirectiveKind(llvm::StringRef Str); +// CHECK-NEXT: LLVM_ABI Directive getTdlDirectiveKind(StringRef Str); // CHECK-EMPTY: -// CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlDirectiveName(Directive D); +// CHECK-NEXT: LLVM_ABI StringRef getTdlDirectiveName(Directive D); // CHECK-EMPTY: -// CHECK-NEXT: LLVM_ABI Clause getTdlClauseKind(llvm::StringRef Str); +// CHECK-NEXT: LLVM_ABI Clause getTdlClauseKind(StringRef Str); // CHECK-EMPTY: -// CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlClauseName(Clause C); +// CHECK-NEXT: LLVM_ABI StringRef getTdlClauseName(Clause C); // CHECK-EMPTY: // CHECK-NEXT: /// Return true if \p C is a valid clause for \p D in version \p Version. // CHECK-NEXT: LLVM_ABI bool isAllowedClauseForDirective(Directive D, Clause C, unsigned Version); @@ -138,8 +138,8 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT: LLVM_ABI Association getDirectiveAssociation(Directive D); // CHECK-NEXT: LLVM_ABI Category getDirectiveCategory(Directive D); // CHECK-NEXT: LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D); -// CHECK-NEXT: LLVM_ABI AKind getAKind(StringRef); -// CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlAKindName(AKind); +// CHECK-NEXT: LLVM_ABI AKind getAKind(StringRef Str); +// CHECK-NEXT: LLVM_ABI StringRef getTdlAKindName(AKind x); // CHECK-EMPTY: // CHECK-NEXT: } // namespace tdl // CHECK-NEXT: } // namespace llvm @@ -155,8 +155,8 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT:// Sets for dira // IMPL-EMPTY: // IMPL-NEXT:static allowedClauses_TDLD_dira { -// IMPL-NEXT: llvm::tdl::Clause::TDLC_clausea, -// IMPL-NEXT: llvm::tdl::Clause::TDLC_clauseb, +// IMPL-NEXT: Clause::TDLC_clausea, +// IMPL-NEXT: Clause::TDLC_clauseb, // IMPL-NEXT:
[llvm-branch-commits] [llvm] [utils][TableGen] Unify name qualifications in DirectiveEmitter (PR #140606)
llvmbot wrote: @llvm/pr-subscribers-tablegen Author: Krzysztof Parzyszek (kparzysz) Changes Remove extraneous qualifications from names when - the name is explicitly enclosed by corresponding namespaces, and - the name is in a body of a function defined in corresponding namespaces. Otherwise add missing qualifications. This applies to individual sections of TableGen output, and makes name lookup independent of the context in which these sections are included. --- Patch is 26.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140606.diff 3 Files Affected: - (modified) llvm/test/TableGen/directive1.td (+32-32) - (modified) llvm/test/TableGen/directive2.td (+21-21) - (modified) llvm/utils/TableGen/Basic/DirectiveEmitter.cpp (+53-63) ``diff diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 8270de5eb2132..3b2b4ca1b7031 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -98,7 +98,7 @@ def TDL_DirA : Directive<"dira"> { // CHECK-EMPTY: // CHECK-NEXT: static constexpr std::size_t Directive_enumSize = 1; // CHECK-EMPTY: -// CHECK-NEXT: constexpr auto TDLD_dira = llvm::tdl::Directive::TDLD_dira; +// CHECK-NEXT: constexpr auto TDLD_dira = Directive::TDLD_dira; // CHECK-EMPTY: // CHECK-NEXT: enum class Clause { // CHECK-NEXT:TDLC_clausea, @@ -108,9 +108,9 @@ def TDL_DirA : Directive<"dira"> { // CHECK-EMPTY: // CHECK-NEXT: static constexpr std::size_t Clause_enumSize = 3; // CHECK-EMPTY: -// CHECK-NEXT: constexpr auto TDLC_clausea = llvm::tdl::Clause::TDLC_clausea; -// CHECK-NEXT: constexpr auto TDLC_clauseb = llvm::tdl::Clause::TDLC_clauseb; -// CHECK-NEXT: constexpr auto TDLC_clausec = llvm::tdl::Clause::TDLC_clausec; +// CHECK-NEXT: constexpr auto TDLC_clausea = Clause::TDLC_clausea; +// CHECK-NEXT: constexpr auto TDLC_clauseb = Clause::TDLC_clauseb; +// CHECK-NEXT: constexpr auto TDLC_clausec = Clause::TDLC_clausec; // CHECK-EMPTY: // CHECK-NEXT: enum class AKind { // CHECK-NEXT:TDLCV_vala=1, @@ -118,18 +118,18 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT:TDLCV_valc=3, // CHECK-NEXT: }; // CHECK-EMPTY: -// CHECK-NEXT: constexpr auto TDLCV_vala = llvm::tdl::AKind::TDLCV_vala; -// CHECK-NEXT: constexpr auto TDLCV_valb = llvm::tdl::AKind::TDLCV_valb; -// CHECK-NEXT: constexpr auto TDLCV_valc = llvm::tdl::AKind::TDLCV_valc; +// CHECK-NEXT: constexpr auto TDLCV_vala = AKind::TDLCV_vala; +// CHECK-NEXT: constexpr auto TDLCV_valb = AKind::TDLCV_valb; +// CHECK-NEXT: constexpr auto TDLCV_valc = AKind::TDLCV_valc; // CHECK-EMPTY: // CHECK-NEXT: // Enumeration helper functions -// CHECK-NEXT: LLVM_ABI Directive getTdlDirectiveKind(llvm::StringRef Str); +// CHECK-NEXT: LLVM_ABI Directive getTdlDirectiveKind(StringRef Str); // CHECK-EMPTY: -// CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlDirectiveName(Directive D); +// CHECK-NEXT: LLVM_ABI StringRef getTdlDirectiveName(Directive D); // CHECK-EMPTY: -// CHECK-NEXT: LLVM_ABI Clause getTdlClauseKind(llvm::StringRef Str); +// CHECK-NEXT: LLVM_ABI Clause getTdlClauseKind(StringRef Str); // CHECK-EMPTY: -// CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlClauseName(Clause C); +// CHECK-NEXT: LLVM_ABI StringRef getTdlClauseName(Clause C); // CHECK-EMPTY: // CHECK-NEXT: /// Return true if \p C is a valid clause for \p D in version \p Version. // CHECK-NEXT: LLVM_ABI bool isAllowedClauseForDirective(Directive D, Clause C, unsigned Version); @@ -138,8 +138,8 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT: LLVM_ABI Association getDirectiveAssociation(Directive D); // CHECK-NEXT: LLVM_ABI Category getDirectiveCategory(Directive D); // CHECK-NEXT: LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D); -// CHECK-NEXT: LLVM_ABI AKind getAKind(StringRef); -// CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlAKindName(AKind); +// CHECK-NEXT: LLVM_ABI AKind getAKind(StringRef Str); +// CHECK-NEXT: LLVM_ABI StringRef getTdlAKindName(AKind x); // CHECK-EMPTY: // CHECK-NEXT: } // namespace tdl // CHECK-NEXT: } // namespace llvm @@ -155,8 +155,8 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT:// Sets for dira // IMPL-EMPTY: // IMPL-NEXT:static allowedClauses_TDLD_dira { -// IMPL-NEXT: llvm::tdl::Clause::TDLC_clausea, -// IMPL-NEXT: llvm::tdl::Clause::TDLC_clauseb, +// IMPL-NEXT: Clause::TDLC_clausea, +// IMPL-NEXT: Clause::TDLC_clauseb, // IMPL-NEXT:}; // IMPL-EMPTY: // IMPL-NEXT:static allowedOnceClauses_TDLD_dira { @@ -311,13 +311,13 @@ def TDL_DirA : Directive<"dira"> { // IMPL-EMPTY: // IMPL-NEXT: #include "llvm/Support/ErrorHandling.h" // IMPL-EMPTY: -// IMPL-NEXT: Directive llvm::tdl::getTdlDirectiveKind(llvm::StringRef Str) { -// IMPL-NEXT:return llvm::StringSwitch(Str) +// IMPL-NEXT: llvm::tdl::Directive llvm::tdl::getTdlDirectiveKind(llvm::StringRef Str) { +// IMPL-NEXT:return StringSwitch(Str) //
[llvm-branch-commits] [llvm] [utils][TableGen] Unify name qualifications in DirectiveEmitter (PR #140606)
kparzysz wrote: Previous PR: https://github.com/llvm/llvm-project/pull/140605 https://github.com/llvm/llvm-project/pull/140606 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/140587 >From 04c0bfd8afd6d0c22177ab74fcd563911b628de8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 May 2025 20:02:54 +0200 Subject: [PATCH] AMDGPU: Remove redundant operand folding checks This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 18 -- 1 file changed, 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index b62230d4dc28c..9bbc8e75fd31b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList( return true; } - // Check the case where we might introduce a second constant operand to a - // scalar instruction - if (TII->isSALU(MI->getOpcode())) { -const MCInstrDesc &InstDesc = MI->getDesc(); -const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; - -// Fine if the operand can be encoded as an inline constant -if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) { - // Otherwise check for another constant - for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) { -auto &Op = MI->getOperand(i); -if (OpNo != i && !Op.isReg() && -!TII->isInlineConstant(Op, InstDesc.operands()[i])) - return false; - } -} - } - appendFoldCandidate(FoldList, MI, OpNo, OpToFold); return true; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/140587 >From 04c0bfd8afd6d0c22177ab74fcd563911b628de8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 May 2025 20:02:54 +0200 Subject: [PATCH] AMDGPU: Remove redundant operand folding checks This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 18 -- 1 file changed, 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index b62230d4dc28c..9bbc8e75fd31b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList( return true; } - // Check the case where we might introduce a second constant operand to a - // scalar instruction - if (TII->isSALU(MI->getOpcode())) { -const MCInstrDesc &InstDesc = MI->getDesc(); -const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; - -// Fine if the operand can be encoded as an inline constant -if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) { - // Otherwise check for another constant - for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) { -auto &Op = MI->getOperand(i); -if (OpNo != i && !Op.isReg() && -!TII->isInlineConstant(Op, InstDesc.operands()[i])) - return false; - } -} - } - appendFoldCandidate(FoldList, MI, OpNo, OpToFold); return true; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for #139317 (PR #140607)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140607?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140608** https://app.graphite.dev/github/pr/llvm/llvm-project/140608?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140607** https://app.graphite.dev/github/pr/llvm/llvm-project/140607?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140607?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140587** https://app.graphite.dev/github/pr/llvm/llvm-project/140587?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140580** https://app.graphite.dev/github/pr/llvm/llvm-project/140580?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140607 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for #139317 (PR #140607)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/140607 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/140608?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#140608** https://app.graphite.dev/github/pr/llvm/llvm-project/140608?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> ๐ https://app.graphite.dev/github/pr/llvm/llvm-project/140608?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#140607** https://app.graphite.dev/github/pr/llvm/llvm-project/140607?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140587** https://app.graphite.dev/github/pr/llvm/llvm-project/140587?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#140580** https://app.graphite.dev/github/pr/llvm/llvm-project/140580?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/140608 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/140608 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for #139317 (PR #140607)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/140607 None >From 4021541f1a05550bf27348b67f551d4ba73ef9e1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 14 May 2025 08:50:59 +0200 Subject: [PATCH] AMDGPU: Add baseline tests for #139317 --- .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 320 ++ .../AMDGPU/fold-short-64-bit-literals.mir | 392 +- ...issue139317-bad-opsel-reg-sequence-fold.ll | 66 +++ .../si-fold-operands-subreg-imm.gfx942.mir| 202 + .../AMDGPU/si-fold-operands-subreg-imm.mir| 26 ++ 5 files changed, 1001 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir create mode 100644 llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir new file mode 100644 index 0..3021761f099fa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir @@ -0,0 +1,320 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] +; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +%1:areg_64_align2 = COPY %0 +$agpr0_agpr1 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] +; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec +%1:areg_64_align2 = COPY %0 +$agpr0_agpr1 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_literal_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_literal_copy_to_areg_64 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 999, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] +; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 999, implicit $exec +%1:areg_64_align2 = COPY %0 +$agpr0_agpr1 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_0_copy_sub0_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_sub0_to_agpr_32 +; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec +; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +%1:agpr_32 = COPY %0.sub0 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_0_copy_sub1_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_sub1_to_agpr_32 +; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec +; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +%1:agpr_32 = COPY %0.sub1 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub0 +; GCN-NEXT: $agpr0 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +%1:agpr_32 = COPY %0.sub0 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub1 +; GCN-NEXT: $agpr0 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_
[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for #139317 (PR #140607)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 43.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140607.diff 5 Files Affected: - (added) llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir (+320) - (modified) llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir (+387-5) - (added) llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll (+66) - (added) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir (+202) - (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir (+26) ``diff diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir new file mode 100644 index 0..3021761f099fa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir @@ -0,0 +1,320 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] +; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +%1:areg_64_align2 = COPY %0 +$agpr0_agpr1 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] +; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec +%1:areg_64_align2 = COPY %0 +$agpr0_agpr1 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_literal_copy_to_areg_64 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_literal_copy_to_areg_64 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 999, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]] +; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 999, implicit $exec +%1:areg_64_align2 = COPY %0 +$agpr0_agpr1 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_0_copy_sub0_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_sub0_to_agpr_32 +; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec +; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +%1:agpr_32 = COPY %0.sub0 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_imm_0_copy_sub1_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_sub1_to_agpr_32 +; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec +; GCN-NEXT: $agpr0 = COPY [[V_ACCVGPR_WRITE_B32_e64_]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec +%1:agpr_32 = COPY %0.sub1 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub0_to_agpr_32 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub0 +; GCN-NEXT: $agpr0 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +%1:agpr_32 = COPY %0.sub0 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + +--- +name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32 +tracksRegLiveness: true +body: | + bb.0: +; GCN-LABEL: name: v_mov_b64_pseudo_lit_copy_sub1_to_agpr_32 +; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B]].sub1 +; GCN-NEXT: $agpr0 = COPY [[COPY]] +; GCN-NEXT: S_ENDPGM 0 +%0:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329592, implicit $exec +%1:agpr_32 = COPY %0.sub1 +$agpr0 = COPY %1 +S_ENDPGM 0 + +... + + +--- +name: v_mov_b64_pseudo_imm_0_copy_to_av_64 +tracksRegLiveness: true +body: | + bb.0: +
[llvm-branch-commits] [llvm] [utils][TableGen] Unify name qualifications in DirectiveEmitter (PR #140606)
https://github.com/clementval approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/140606 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes We weren't fully respecting the type of a def of an immediate vs. the type at the use point. Refactor the folding logic to track the value to fold, as well as a subregister to apply to the underlying value. This is similar to how PeepholeOpt tracks subregisters (though only for pure copy-like instructions, no constants). Fixes #139317 --- Patch is 34.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140608.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+242-164) - (modified) llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll (+2-1) - (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir (+2-2) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9bbc8e75fd31b..eb7fb94e25f5c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -25,52 +25,151 @@ using namespace llvm; namespace { -struct FoldCandidate { - MachineInstr *UseMI; +/// Track a value we may want to fold into downstream users, applying +/// subregister extracts along the way. +struct FoldableDef { union { -MachineOperand *OpToFold; +MachineOperand *OpToFold = nullptr; uint64_t ImmToFold; int FrameIndexToFold; }; - int ShrinkOpcode; - unsigned UseOpNo; + + /// Register class of the originally defined value. + const TargetRegisterClass *DefRC = nullptr; + + /// Track the original defining instruction for the value. + const MachineInstr *DefMI = nullptr; + + /// Subregister to apply to the value at the use point. + unsigned DefSubReg = AMDGPU::NoSubRegister; + + /// Kind of value stored in the union. MachineOperand::MachineOperandType Kind; - bool Commuted; - FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, -bool Commuted_ = false, -int ShrinkOp = -1) : -UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), -Kind(FoldOp->getType()), -Commuted(Commuted_) { -if (FoldOp->isImm()) { - ImmToFold = FoldOp->getImm(); -} else if (FoldOp->isFI()) { - FrameIndexToFold = FoldOp->getIndex(); + FoldableDef() = delete; + FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC, + unsigned DefSubReg = AMDGPU::NoSubRegister) + : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) { + +if (FoldOp.isImm()) { + ImmToFold = FoldOp.getImm(); +} else if (FoldOp.isFI()) { + FrameIndexToFold = FoldOp.getIndex(); } else { - assert(FoldOp->isReg() || FoldOp->isGlobal()); - OpToFold = FoldOp; + assert(FoldOp.isReg() || FoldOp.isGlobal()); + OpToFold = &FoldOp; } + +DefMI = FoldOp.getParent(); } - FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm, -bool Commuted_ = false, int ShrinkOp = -1) - : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), -Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {} + FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC, + unsigned DefSubReg = AMDGPU::NoSubRegister) + : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg), +Kind(MachineOperand::MO_Immediate) {} + + /// Copy the current def and apply \p SubReg to the value. + FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const { +FoldableDef Copy(*this); +Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg); +return Copy; + } + + bool isReg() const { return Kind == MachineOperand::MO_Register; } + + Register getReg() const { +assert(isReg()); +return OpToFold->getReg(); + } + + unsigned getSubReg() const { +assert(isReg()); +return OpToFold->getSubReg(); + } + + bool isImm() const { return Kind == MachineOperand::MO_Immediate; } bool isFI() const { return Kind == MachineOperand::MO_FrameIndex; } - bool isImm() const { -return Kind == MachineOperand::MO_Immediate; + int getFI() const { +assert(isFI()); +return FrameIndexToFold; } - bool isReg() const { -return Kind == MachineOperand::MO_Register; + bool isGlobal() const { return OpToFold->isGlobal(); } + + /// Return the effective immediate value defined by this instruction, after + /// application of any subregister extracts which may exist between the use + /// and def instruction. + std::optional getEffectiveImmVal() const { +assert(isImm()); +return SIInstrInfo::extractSubregFromImm(ImmToFold, DefSubReg); } - bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; } + /// Check if it is legal to fold this effective value into \p MI's \p OpNo +
[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/140608 We weren't fully respecting the type of a def of an immediate vs. the type at the use point. Refactor the folding logic to track the value to fold, as well as a subregister to apply to the underlying value. This is similar to how PeepholeOpt tracks subregisters (though only for pure copy-like instructions, no constants). Fixes #139317 >From 045021f4ed97861a352e9ebd43e1f1049021e987 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 15 May 2025 10:51:39 +0200 Subject: [PATCH] AMDGPU: Fix tracking subreg defs when folding through reg_sequence We weren't fully respecting the type of a def of an immediate vs. the type at the use point. Refactor the folding logic to track the value to fold, as well as a subregister to apply to the underlying value. This is similar to how PeepholeOpt tracks subregisters (though only for pure copy-like instructions, no constants). Fixes #139317 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 406 +++--- ...issue139317-bad-opsel-reg-sequence-fold.ll | 3 +- .../si-fold-operands-subreg-imm.gfx942.mir| 8 +- .../AMDGPU/si-fold-operands-subreg-imm.mir| 4 +- 4 files changed, 250 insertions(+), 171 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9bbc8e75fd31b..eb7fb94e25f5c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -25,52 +25,151 @@ using namespace llvm; namespace { -struct FoldCandidate { - MachineInstr *UseMI; +/// Track a value we may want to fold into downstream users, applying +/// subregister extracts along the way. +struct FoldableDef { union { -MachineOperand *OpToFold; +MachineOperand *OpToFold = nullptr; uint64_t ImmToFold; int FrameIndexToFold; }; - int ShrinkOpcode; - unsigned UseOpNo; + + /// Register class of the originally defined value. + const TargetRegisterClass *DefRC = nullptr; + + /// Track the original defining instruction for the value. + const MachineInstr *DefMI = nullptr; + + /// Subregister to apply to the value at the use point. + unsigned DefSubReg = AMDGPU::NoSubRegister; + + /// Kind of value stored in the union. MachineOperand::MachineOperandType Kind; - bool Commuted; - FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, -bool Commuted_ = false, -int ShrinkOp = -1) : -UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), -Kind(FoldOp->getType()), -Commuted(Commuted_) { -if (FoldOp->isImm()) { - ImmToFold = FoldOp->getImm(); -} else if (FoldOp->isFI()) { - FrameIndexToFold = FoldOp->getIndex(); + FoldableDef() = delete; + FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC, + unsigned DefSubReg = AMDGPU::NoSubRegister) + : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) { + +if (FoldOp.isImm()) { + ImmToFold = FoldOp.getImm(); +} else if (FoldOp.isFI()) { + FrameIndexToFold = FoldOp.getIndex(); } else { - assert(FoldOp->isReg() || FoldOp->isGlobal()); - OpToFold = FoldOp; + assert(FoldOp.isReg() || FoldOp.isGlobal()); + OpToFold = &FoldOp; } + +DefMI = FoldOp.getParent(); } - FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm, -bool Commuted_ = false, int ShrinkOp = -1) - : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), -Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {} + FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC, + unsigned DefSubReg = AMDGPU::NoSubRegister) + : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg), +Kind(MachineOperand::MO_Immediate) {} + + /// Copy the current def and apply \p SubReg to the value. + FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const { +FoldableDef Copy(*this); +Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg); +return Copy; + } + + bool isReg() const { return Kind == MachineOperand::MO_Register; } + + Register getReg() const { +assert(isReg()); +return OpToFold->getReg(); + } + + unsigned getSubReg() const { +assert(isReg()); +return OpToFold->getSubReg(); + } + + bool isImm() const { return Kind == MachineOperand::MO_Immediate; } bool isFI() const { return Kind == MachineOperand::MO_FrameIndex; } - bool isImm() const { -return Kind == MachineOperand::MO_Immediate; + int getFI() const { +assert(isFI()); +return FrameIndexToFold; } - bool isReg() const { -return Kind == MachineOperand::MO_Register; + bool isGlobal() const { return OpToFold->isGlobal(); } + + /// Return the effective immediate value defined by this instruction, after + /// application of any subregister extracts which
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Extract Info into JSON values (PR #138063)
@@ -162,15 +162,264 @@ Error MustacheHTMLGenerator::generateDocs( return Error::success(); } +static json::Value +extractValue(const Location &L, + std::optional RepositoryUrl = std::nullopt) { + Object Obj = Object(); + // Should there be Start/End line numbers? ilovepi wrote: No, I think that's still valid. This just has LineNumber, but we track Start/End. I think I actually added this when adapting/splitting up the patch. I can make it a TODO with more context. https://github.com/llvm/llvm-project/pull/138063 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
@@ -46,6 +46,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include +#include snehasish wrote: Yes, we need it for the `LocHashToCallSites` type but we were including it via ModuleSummaryIndex.h. https://github.com/llvm/llvm-project/pull/140505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [UBSan] Implement src:*=sanitize for UBSan (PR #140529)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/140529 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Gather resource names in DXIL resource analysis (PR #140633)
https://github.com/hekota created https://github.com/llvm/llvm-project/pull/140633 Gather resource names from `llvm.dx.resource.handlefrombinding` calls during DXIL resource analysis and add them to ResourceMap. Part 3/4 of llvm/llvm-project#105059 >From a73cdb9288a0f325d4c03a58daf6803e8d8d2ddf Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Mon, 19 May 2025 14:55:18 -0700 Subject: [PATCH] [DirectX] Gather resource names in resource analysis --- llvm/include/llvm/Analysis/DXILResource.h | 17 +++-- llvm/lib/Analysis/DXILResource.cpp| 35 - .../DXILResource/buffer-frombinding.ll| 39 +++--- llvm/unittests/Analysis/DXILResourceTest.cpp | 73 --- 4 files changed, 101 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index b6efd82bb308e..a274e2294561e 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -16,9 +16,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -#include "llvm/Support/Alignment.h" #include "llvm/Support/DXILABI.h" -#include #include namespace llvm { @@ -32,6 +30,10 @@ class DXILResourceTypeMap; namespace dxil { +// Returns the resource name from dx_resource_handlefrombinding or +// dx_resource_handlefromimplicitbinding call +StringRef getResourceNameFromBindingCall(CallInst *CI); + /// The dx.RawBuffer target extension type /// /// `target("dx.RawBuffer", Type, IsWriteable, IsROV)` @@ -358,6 +360,7 @@ class ResourceInfo { private: ResourceBinding Binding; TargetExtType *HandleTy; + StringRef Name; GlobalVariable *Symbol = nullptr; public: @@ -365,10 +368,10 @@ class ResourceInfo { ResourceCounterDirection CounterDirection = ResourceCounterDirection::Unknown; ResourceInfo(uint32_t RecordID, uint32_t Space, uint32_t LowerBound, - uint32_t Size, TargetExtType *HandleTy, + uint32_t Size, TargetExtType *HandleTy, StringRef Name = "", GlobalVariable *Symbol = nullptr) : Binding{RecordID, Space, LowerBound, Size}, HandleTy(HandleTy), -Symbol(Symbol) {} +Name(Name), Symbol(Symbol) {} void setBindingID(unsigned ID) { Binding.RecordID = ID; } @@ -378,10 +381,12 @@ class ResourceInfo { const ResourceBinding &getBinding() const { return Binding; } TargetExtType *getHandleTy() const { return HandleTy; } - StringRef getName() const { return Symbol ? Symbol->getName() : ""; } + const StringRef getName() const { +return Name.empty() ? (Symbol ? Symbol->getName() : "") : Name; + } bool hasSymbol() const { return Symbol; } - GlobalVariable *createSymbol(Module &M, StructType *Ty, StringRef Name = ""); + GlobalVariable *createSymbol(Module &M, StructType *Ty); MDTuple *getAsMetadata(Module &M, dxil::ResourceTypeInfo &RTI) const; std::pair diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index 36b3901246285..8cc9316dfb667 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/FormatVariadic.h" -#include #include #include @@ -531,8 +530,7 @@ void ResourceTypeInfo::print(raw_ostream &OS, const DataLayout &DL) const { } } -GlobalVariable *ResourceInfo::createSymbol(Module &M, StructType *Ty, - StringRef Name) { +GlobalVariable *ResourceInfo::createSymbol(Module &M, StructType *Ty) { assert(!Symbol && "Symbol has already been created"); Symbol = new GlobalVariable(M, Ty, /*isConstant=*/true, GlobalValue::ExternalLinkage, @@ -659,6 +657,9 @@ ResourceInfo::getAnnotateProps(Module &M, dxil::ResourceTypeInfo &RTI) const { void ResourceInfo::print(raw_ostream &OS, dxil::ResourceTypeInfo &RTI, const DataLayout &DL) const { + if (!Name.empty()) +OS << " Name: " << Name << "\n"; + if (Symbol) { OS << " Symbol: "; Symbol->printAsOperand(OS); @@ -706,6 +707,29 @@ static bool isUpdateCounterIntrinsic(Function &F) { return F.getIntrinsicID() == Intrinsic::dx_resource_updatecounter; } +StringRef dxil::getResourceNameFromBindingCall(CallInst *CI) { + Value *Op = nullptr; + switch (CI->getCalledFunction()->getIntrinsicID()) { + default: +return ""; + case Intrinsic::dx_resource_handlefrombinding: + case Intrinsic::dx_resource_handlefromimplicitbinding: +Op = CI->getArgOperand(5); +break; + } + StringRef Name; + if (auto *GV = dyn_cast(Op)) { +auto *CA = dyn_cast(GV->getInitializer()); +if (CA && CA->isString()) { + Name = CA->getAsString(); + // strip trailing 0 + if (Name.ends_with('\0')) +Name = Name.drop_back(1); +} + } + return Name; +} + void DXILResourceMap::popul
[llvm-branch-commits] [clang] [UBSan] Implement src:*=sanitize for UBSan (PR #140529)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/140529 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header (PR #140505)
https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/140505 >From 305e2bdbab27828633afb3d1e2698002f7ccadda Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Mon, 19 May 2025 00:03:59 -0700 Subject: [PATCH] [NFC][MemProf] Move types shared between Analysis, ProfileData and ModuleSummary (Core) to a separate header --- .../include/llvm/Analysis/MemoryProfileInfo.h | 3 +- llvm/include/llvm/IR/ModuleSummaryIndex.h | 22 +- llvm/include/llvm/ProfileData/MemProfCommon.h | 43 +++ llvm/lib/ProfileData/CMakeLists.txt | 1 - .../Instrumentation/MemProfiler.cpp | 1 + 5 files changed, 47 insertions(+), 23 deletions(-) create mode 100644 llvm/include/llvm/ProfileData/MemProfCommon.h diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 1d98f86f50484..9fcb81a0a1b4c 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -13,8 +13,9 @@ #ifndef LLVM_ANALYSIS_MEMORYPROFILEINFO_H #define LLVM_ANALYSIS_MEMORYPROFILEINFO_H +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/ProfileData/MemProfCommon.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 65e428a3adea7..23f9504b44fab 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -27,6 +27,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/MemProfCommon.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/InterleavedRange.h" @@ -306,14 +307,6 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return hash_value(I.getRef()); } }; -// For optional hinted size reporting, holds a pair of the full stack id -// (pre-trimming, from the full context in the profile), and the associated -// total profiled size. -struct ContextTotalSize { - uint64_t FullStackId; - uint64_t TotalSize; -}; - /// Summary of memprof callsite metadata. struct CallsiteInfo { // Actual callee function. @@ -350,19 +343,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) { return OS; } -// Allocation type assigned to an allocation reached by a given context. -// More can be added, now this is cold, notcold and hot. -// Values should be powers of two so that they can be ORed, in particular to -// track allocations that have different behavior with different calling -// contexts. -enum class AllocationType : uint8_t { - None = 0, - NotCold = 1, - Cold = 2, - Hot = 4, - All = 7 // This should always be set to the OR of all values. -}; - /// Summary of a single MIB in a memprof metadata on allocations. struct MIBInfo { // The allocation type for this profiled context. diff --git a/llvm/include/llvm/ProfileData/MemProfCommon.h b/llvm/include/llvm/ProfileData/MemProfCommon.h new file mode 100644 index 0..a638824ec000e --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfCommon.h @@ -0,0 +1,43 @@ +//===- MemProfCommon.h - MemProf support *- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file contains common types used by different parts of the MemProf code. +// +//===--===// + +#ifndef LLVM_PROFILEDATA_MEMPROFCOMMON_H +#define LLVM_PROFILEDATA_MEMPROFCOMMON_H + +#include + +namespace llvm { + +// For optional hinted size reporting, holds a pair of the full stack id +// (pre-trimming, from the full context in the profile), and the associated +// total profiled size. +struct ContextTotalSize { + uint64_t FullStackId; + uint64_t TotalSize; +}; + +// Allocation type assigned to an allocation reached by a given context. +// More can be added, now this is cold, notcold and hot. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { + None = 0, + NotCold = 1, + Cold = 2, + Hot = 4, + All = 7 // This should always be set to the OR of all values. +}; + +} // namespace llvm + +#endif // LLVM_PROFILEDATA_MEMPROFCOMMON_H diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt index ca9ea3205ee1d..de60a655d5bd5 100644 --- a/llvm/lib/ProfileData/CMakeLists.txt +++ b/llvm/lib/ProfileData/CMakeLists.txt @@ -26,7 +26,6 @@ add_llvm_component_library(LLV
[llvm-branch-commits] [llvm] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord (PR #140502)
https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/140502 >From 9028dc98ac740d72c9c6ad02e4503da5e9c02a13 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 16 May 2025 20:20:00 -0700 Subject: [PATCH 1/2] [NFC][MemProf] Move getGUID out of IndexedMemProfRecord --- llvm/include/llvm/ProfileData/MemProf.h | 10 ++-- llvm/include/llvm/ProfileData/MemProfYAML.h | 2 +- llvm/lib/ProfileData/MemProf.cpp | 2 +- llvm/lib/ProfileData/MemProfReader.cpp| 2 +- .../Instrumentation/MemProfiler.cpp | 4 +- llvm/unittests/ProfileData/MemProfTest.cpp| 20 .../Instrumentation/MemProfUseTest.cpp| 48 +-- 7 files changed, 44 insertions(+), 44 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 0bc1432f7d198..215102c131fff 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -472,13 +472,13 @@ struct IndexedMemProfRecord { // translate CallStackId to call stacks with frames inline. MemProfRecord toMemProfRecord( llvm::function_ref(const CallStackId)> Callback) const; - - // Returns the GUID for the function name after canonicalization. For - // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are - // mapped to functions using this GUID. - static GlobalValue::GUID getGUID(const StringRef FunctionName); }; +// Returns the GUID for the function name after canonicalization. For +// memprof, we remove any .llvm suffix added by LTO. MemProfRecords are +// mapped to functions using this GUID. +GlobalValue::GUID getGUID(const StringRef FunctionName); + // Holds call site information with frame contents inline. struct CallSiteInfo { // The frames in the call stack diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h index 08dee253f615a..b642e3098aa0e 100644 --- a/llvm/include/llvm/ProfileData/MemProfYAML.h +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -46,7 +46,7 @@ template <> struct ScalarTraits { Val = Num; } else { // Otherwise, treat the input as a string containing a function name. - Val = memprof::IndexedMemProfRecord::getGUID(Scalar); + Val = memprof::getGUID(Scalar); } return StringRef(); } diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index a9c5ee09a6daf..795e97bee38f5 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -343,7 +343,7 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord( return Record; } -GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { +GlobalValue::GUID getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop // those by default. This is by design to differentiate internal linkage diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index e0f280b9eb2f6..aca534b0a4c98 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -570,7 +570,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames( I++) { const auto &DIFrame = DI.getFrame(I); const uint64_t Guid = -IndexedMemProfRecord::getGUID(DIFrame.FunctionName); +memprof::getGUID(DIFrame.FunctionName); const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 375ff84f82ed2..5982476f3994e 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -865,8 +865,8 @@ memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, StringRef CallerName = DIL->getSubprogramLinkageName(); assert(!CallerName.empty() && "Be sure to enable -fdebug-info-for-profiling"); - uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); - uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); + uint64_t CallerGUID = memprof::getGUID(CallerName); + uint64_t CalleeGUID = memprof::getGUID(CalleeName); // Pretend that we are calling a function with GUID == 0 if we are // in the inline stack leading to a heap allocation function. if (IsAlloc) { diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 26b09698c7ea3..201ee2d7272cf 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemPr