[llvm-branch-commits] [llvm] [MIR2Vec] Add embedder for machine instructions (PR #162161)
https://github.com/svkeerthy updated
https://github.com/llvm/llvm-project/pull/162161
>From 6fa7a86b61886f8b7242aea92f0198c819b22090 Mon Sep 17 00:00:00 2001
From: svkeerthy
Date: Mon, 6 Oct 2025 21:15:14 +
Subject: [PATCH] MIR2Vec embedding
---
llvm/include/llvm/CodeGen/MIR2Vec.h | 108 ++
llvm/include/llvm/CodeGen/Passes.h| 4 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/lib/CodeGen/CodeGen.cpp | 1 +
llvm/lib/CodeGen/MIR2Vec.cpp | 194 ++-
.../Inputs/mir2vec_dummy_3D_vocab.json| 22 ++
llvm/test/CodeGen/MIR2Vec/if-else.mir | 144
.../MIR2Vec/mir2vec-basic-symbolic.mir| 76
llvm/tools/llc/llc.cpp| 15 +
llvm/unittests/CodeGen/MIR2VecTest.cpp| 324 --
10 files changed, 862 insertions(+), 27 deletions(-)
create mode 100644 llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json
create mode 100644 llvm/test/CodeGen/MIR2Vec/if-else.mir
create mode 100644 llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir
diff --git a/llvm/include/llvm/CodeGen/MIR2Vec.h
b/llvm/include/llvm/CodeGen/MIR2Vec.h
index ea68b4594a2ad..ebafe4ccddff3 100644
--- a/llvm/include/llvm/CodeGen/MIR2Vec.h
+++ b/llvm/include/llvm/CodeGen/MIR2Vec.h
@@ -51,11 +51,21 @@ class LLVMContext;
class MIR2VecVocabLegacyAnalysis;
class TargetInstrInfo;
+enum class MIR2VecKind { Symbolic };
+
namespace mir2vec {
+
+// Forward declarations
+class MIREmbedder;
+class SymbolicMIREmbedder;
+
extern llvm::cl::OptionCategory MIR2VecCategory;
extern cl::opt OpcWeight;
using Embedding = ir2vec::Embedding;
+using MachineInstEmbeddingsMap = DenseMap;
+using MachineBlockEmbeddingsMap =
+DenseMap;
/// Class for storing and accessing the MIR2Vec vocabulary.
/// The MIRVocabulary class manages seed embeddings for LLVM Machine IR
@@ -132,6 +142,79 @@ class MIRVocabulary {
assert(isValid() && "Invalid vocabulary");
return Storage.size();
}
+
+ /// Create a dummy vocabulary for testing purposes.
+ static MIRVocabulary createDummyVocabForTest(const TargetInstrInfo &TII,
+ unsigned Dim = 1);
+};
+
+/// Base class for MIR embedders
+class MIREmbedder {
+protected:
+ const MachineFunction &MF;
+ const MIRVocabulary &Vocab;
+
+ /// Dimension of the embeddings; Captured from the vocabulary
+ const unsigned Dimension;
+
+ /// Weight for opcode embeddings
+ const float OpcWeight;
+
+ // Utility maps - these are used to store the vector representations of
+ // instructions, basic blocks and functions.
+ mutable Embedding MFuncVector;
+ mutable MachineBlockEmbeddingsMap MBBVecMap;
+ mutable MachineInstEmbeddingsMap MInstVecMap;
+
+ MIREmbedder(const MachineFunction &MF, const MIRVocabulary &Vocab);
+
+ /// Function to compute embeddings. It generates embeddings for all
+ /// the instructions and basic blocks in the function F.
+ void computeEmbeddings() const;
+
+ /// Function to compute the embedding for a given basic block.
+ /// Specific to the kind of embeddings being computed.
+ virtual void computeEmbeddings(const MachineBasicBlock &MBB) const = 0;
+
+public:
+ virtual ~MIREmbedder() = default;
+
+ /// Factory method to create an Embedder object of the specified kind
+ /// Returns nullptr if the requested kind is not supported.
+ static std::unique_ptr create(MIR2VecKind Mode,
+ const MachineFunction &MF,
+ const MIRVocabulary &Vocab);
+
+ /// Returns a map containing machine instructions and the corresponding
+ /// embeddings for the machine function MF if it has been computed. If not,
it
+ /// computes the embeddings for MF and returns the map.
+ const MachineInstEmbeddingsMap &getMInstVecMap() const;
+
+ /// Returns a map containing machine basic block and the corresponding
+ /// embeddings for the machine function MF if it has been computed. If not,
it
+ /// computes the embeddings for MF and returns the map.
+ const MachineBlockEmbeddingsMap &getMBBVecMap() const;
+
+ /// Returns the embedding for a given machine basic block in the machine
+ /// function MF if it has been computed. If not, it computes the embedding
for
+ /// MBB and returns it.
+ const Embedding &getMBBVector(const MachineBasicBlock &MBB) const;
+
+ /// Computes and returns the embedding for the current machine function.
+ const Embedding &getMFunctionVector() const;
+};
+
+/// Class for computing Symbolic embeddings
+/// Symbolic embeddings are constructed based on the entity-level
+/// representations obtained from the MIR Vocabulary.
+class SymbolicMIREmbedder : public MIREmbedder {
+private:
+ void computeEmbeddings(const MachineBasicBlock &MBB) const override;
+
+public:
+ SymbolicMIREmbedder(const MachineFunction &F, const MIRVocabulary &Vocab);
+ static std::unique_ptr
+ create(const MachineFun
[llvm-branch-commits] [llvm] Refactor getConstantSectionPrefix to prepare for the next change (PR #162388)
https://github.com/mingmingl-llvm created
https://github.com/llvm/llvm-project/pull/162388
None
>From 7f1da8e97ebbf1f068a894cce5728f3a81c16e5a Mon Sep 17 00:00:00 2001
From: mingmingl
Date: Tue, 7 Oct 2025 15:12:54 -0700
Subject: [PATCH] Refactor getConstantSectionPrefix to prepare for the next
change
---
.../llvm/Analysis/StaticDataProfileInfo.h | 21 +++-
llvm/lib/Analysis/StaticDataProfileInfo.cpp | 48 +--
2 files changed, 52 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
index f06e7ceaa74ce..70199a904f320 100644
--- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -32,8 +32,11 @@ bool IsAnnotationOK(const GlobalVariable &GV);
/// profile information and provides methods to operate on them.
class StaticDataProfileInfo {
public:
- /// Accummulate the profile count of a constant that will be lowered to
static
- /// data sections.
+ /// A constant is tracked only if the following conditions are met.
+ /// 1) It has local (i.e., private or internal) linkage.
+ //2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}.
+ //3) It's eligible for section prefix annotation. See `AnnotationKind`
+ // above for ineligible reasons.
DenseMap ConstantProfileCounts;
/// Keeps track of the constants that are seen at least once without profile
@@ -44,6 +47,20 @@ class StaticDataProfileInfo {
LLVM_ABI std::optional
getConstantProfileCount(const Constant *C) const;
+ enum class StaticDataHotness : uint8_t {
+Cold = 0,
+LukewarmOrUnknown = 1,
+Hot = 2,
+ };
+
+ /// Return the hotness of the constant \p C based on its profile count \p
+ /// Count.
+ LLVM_ABI StaticDataHotness getSectionHotnessUsingProfileCount(
+ const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const;
+
+ /// Return the string representation of the hotness enum \p Hotness.
+ LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const;
+
public:
StaticDataProfileInfo() = default;
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index ff4582ca7eeb1..b954338d33e8e 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -57,6 +57,37 @@ void StaticDataProfileInfo::addConstantProfileCount(
OriginalCount = getInstrMaxCountValue();
}
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getSectionHotnessUsingProfileCount(
+const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const {
+ // The accummulated counter shows the constant is hot. Return 'hot' whether
+ // this variable is seen by unprofiled functions or not.
+ if (PSI->isHotCount(Count))
+return StaticDataHotness::Hot;
+ // The constant is not hot, and seen by unprofiled functions. We don't want
to
+ // assign it to unlikely sections, even if the counter says 'cold'. So return
+ // an empty prefix before checking whether the counter is cold.
+ if (ConstantWithoutCounts.count(C))
+return StaticDataHotness::LukewarmOrUnknown;
+ // The accummulated counter shows the constant is cold. Return 'unlikely'.
+ if (PSI->isColdCount(Count))
+return StaticDataHotness::Cold;
+
+ return StaticDataHotness::LukewarmOrUnknown;
+}
+
+StringRef StaticDataProfileInfo::hotnessToStr(
+StaticDataProfileInfo::StaticDataHotness Hotness) const {
+ switch (Hotness) {
+ case StaticDataProfileInfo::StaticDataHotness::Cold:
+return "unlikely";
+ case StaticDataProfileInfo::StaticDataHotness::Hot:
+return "hot";
+ default:
+return "";
+ }
+}
+
std::optional
StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
auto I = ConstantProfileCounts.find(C);
@@ -67,23 +98,10 @@ StaticDataProfileInfo::getConstantProfileCount(const
Constant *C) const {
StringRef StaticDataProfileInfo::getConstantSectionPrefix(
const Constant *C, const ProfileSummaryInfo *PSI) const {
- auto Count = getConstantProfileCount(C);
+ std::optional Count = getConstantProfileCount(C);
if (!Count)
return "";
- // The accummulated counter shows the constant is hot. Return 'hot' whether
- // this variable is seen by unprofiled functions or not.
- if (PSI->isHotCount(*Count))
-return "hot";
- // The constant is not hot, and seen by unprofiled functions. We don't want
to
- // assign it to unlikely sections, even if the counter says 'cold'. So return
- // an empty prefix before checking whether the counter is cold.
- if (ConstantWithoutCounts.count(C))
-return "";
- // The accummulated counter shows the constant is cold. Return 'unlikely'.
- if (PSI->isColdCount(*Count))
-return "unlikely";
- // The counter says lukewarm. Return an empty prefix.
- return "";
+ return hotnessToStr(getSectionHotnessUsingProfileCount(C, PSI, *C
[llvm-branch-commits] [llvm] [StaticDataLayout] Factor out a helper function for section prefix eligibility and use it in both optimizer and codegen (PR #162348)
@@ -75,7 +75,7 @@ bool StaticDataAnnotator::runOnModule(Module &M) {
bool Changed = false;
for (auto &GV : M.globals()) {
-if (GV.isDeclarationForLinker())
+if (!llvm::memprof::IsAnnotationOK(GV))
mingmingl-llvm wrote:
yeah I agree.
I think this PR is more of 'NFCI' (non functional change intended) or no
user-visible change intended. I removed the 'NFC' tag so (unfamiliar) readers
won't get (unintentionally) fooled..
https://github.com/llvm/llvm-project/pull/162348
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MIR2Vec] Add embedder for machine instructions (PR #162161)
https://github.com/svkeerthy updated
https://github.com/llvm/llvm-project/pull/162161
>From fa573657fc3f0ce6f3bc12d45d690003a2312a50 Mon Sep 17 00:00:00 2001
From: svkeerthy
Date: Mon, 6 Oct 2025 21:15:14 +
Subject: [PATCH] MIR2Vec embedding
---
llvm/include/llvm/CodeGen/MIR2Vec.h | 108 ++
llvm/include/llvm/CodeGen/Passes.h| 4 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/lib/CodeGen/CodeGen.cpp | 1 +
llvm/lib/CodeGen/MIR2Vec.cpp | 195 ++-
.../Inputs/mir2vec_dummy_3D_vocab.json| 22 ++
llvm/test/CodeGen/MIR2Vec/if-else.mir | 144
.../MIR2Vec/mir2vec-basic-symbolic.mir| 76
llvm/tools/llc/llc.cpp| 15 +
llvm/unittests/CodeGen/MIR2VecTest.cpp| 324 --
10 files changed, 863 insertions(+), 27 deletions(-)
create mode 100644 llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_3D_vocab.json
create mode 100644 llvm/test/CodeGen/MIR2Vec/if-else.mir
create mode 100644 llvm/test/CodeGen/MIR2Vec/mir2vec-basic-symbolic.mir
diff --git a/llvm/include/llvm/CodeGen/MIR2Vec.h
b/llvm/include/llvm/CodeGen/MIR2Vec.h
index ea68b4594a2ad..ebafe4ccddff3 100644
--- a/llvm/include/llvm/CodeGen/MIR2Vec.h
+++ b/llvm/include/llvm/CodeGen/MIR2Vec.h
@@ -51,11 +51,21 @@ class LLVMContext;
class MIR2VecVocabLegacyAnalysis;
class TargetInstrInfo;
+enum class MIR2VecKind { Symbolic };
+
namespace mir2vec {
+
+// Forward declarations
+class MIREmbedder;
+class SymbolicMIREmbedder;
+
extern llvm::cl::OptionCategory MIR2VecCategory;
extern cl::opt OpcWeight;
using Embedding = ir2vec::Embedding;
+using MachineInstEmbeddingsMap = DenseMap;
+using MachineBlockEmbeddingsMap =
+DenseMap;
/// Class for storing and accessing the MIR2Vec vocabulary.
/// The MIRVocabulary class manages seed embeddings for LLVM Machine IR
@@ -132,6 +142,79 @@ class MIRVocabulary {
assert(isValid() && "Invalid vocabulary");
return Storage.size();
}
+
+ /// Create a dummy vocabulary for testing purposes.
+ static MIRVocabulary createDummyVocabForTest(const TargetInstrInfo &TII,
+ unsigned Dim = 1);
+};
+
+/// Base class for MIR embedders
+class MIREmbedder {
+protected:
+ const MachineFunction &MF;
+ const MIRVocabulary &Vocab;
+
+ /// Dimension of the embeddings; Captured from the vocabulary
+ const unsigned Dimension;
+
+ /// Weight for opcode embeddings
+ const float OpcWeight;
+
+ // Utility maps - these are used to store the vector representations of
+ // instructions, basic blocks and functions.
+ mutable Embedding MFuncVector;
+ mutable MachineBlockEmbeddingsMap MBBVecMap;
+ mutable MachineInstEmbeddingsMap MInstVecMap;
+
+ MIREmbedder(const MachineFunction &MF, const MIRVocabulary &Vocab);
+
+ /// Function to compute embeddings. It generates embeddings for all
+ /// the instructions and basic blocks in the function F.
+ void computeEmbeddings() const;
+
+ /// Function to compute the embedding for a given basic block.
+ /// Specific to the kind of embeddings being computed.
+ virtual void computeEmbeddings(const MachineBasicBlock &MBB) const = 0;
+
+public:
+ virtual ~MIREmbedder() = default;
+
+ /// Factory method to create an Embedder object of the specified kind
+ /// Returns nullptr if the requested kind is not supported.
+ static std::unique_ptr create(MIR2VecKind Mode,
+ const MachineFunction &MF,
+ const MIRVocabulary &Vocab);
+
+ /// Returns a map containing machine instructions and the corresponding
+ /// embeddings for the machine function MF if it has been computed. If not,
it
+ /// computes the embeddings for MF and returns the map.
+ const MachineInstEmbeddingsMap &getMInstVecMap() const;
+
+ /// Returns a map containing machine basic block and the corresponding
+ /// embeddings for the machine function MF if it has been computed. If not,
it
+ /// computes the embeddings for MF and returns the map.
+ const MachineBlockEmbeddingsMap &getMBBVecMap() const;
+
+ /// Returns the embedding for a given machine basic block in the machine
+ /// function MF if it has been computed. If not, it computes the embedding
for
+ /// MBB and returns it.
+ const Embedding &getMBBVector(const MachineBasicBlock &MBB) const;
+
+ /// Computes and returns the embedding for the current machine function.
+ const Embedding &getMFunctionVector() const;
+};
+
+/// Class for computing Symbolic embeddings
+/// Symbolic embeddings are constructed based on the entity-level
+/// representations obtained from the MIR Vocabulary.
+class SymbolicMIREmbedder : public MIREmbedder {
+private:
+ void computeEmbeddings(const MachineBasicBlock &MBB) const override;
+
+public:
+ SymbolicMIREmbedder(const MachineFunction &F, const MIRVocabulary &Vocab);
+ static std::unique_ptr
+ create(const MachineFun
[llvm-branch-commits] [flang] [flang][openacc] map data operand results to symbols inside compute region (PR #162306)
https://github.com/vzakhari approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/162306 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][SpecialCaseList] Rename SpecialCaseList::inSectionBlame and move into Section (PR #162390)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/162390 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][SpecialCaseList] Rename SpecialCaseList::inSectionBlame and move into Section (PR #162390)
https://github.com/qinkunbao approved this pull request. https://github.com/llvm/llvm-project/pull/162390 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][openacc] map data operand results to symbols inside compute region (PR #162306)
https://github.com/razvanlupusoru approved this pull request. Thank you! https://github.com/llvm/llvm-project/pull/162306 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
