https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112638
>From 6225d74229d41068c57109a24b063f6fcba13985 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Wed, 16 Oct 2024 17:09:07 -0700 Subject: [PATCH 1/3] [StructuralHash] Support Differences This comutes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored. --- llvm/include/llvm/IR/StructuralHash.h | 46 ++++++ llvm/lib/IR/StructuralHash.cpp | 188 +++++++++++++++++++++-- llvm/unittests/IR/StructuralHashTest.cpp | 55 +++++++ 3 files changed, 275 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index aa292bc3446799..bc82c204c4d1f6 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,9 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StableHashing.h" +#include "llvm/IR/Instruction.h" #include <cstdint> namespace llvm { @@ -23,6 +25,7 @@ class Function; class Module; using IRHash = stable_hash; +using OpndHash = stable_hash; /// Returns a hash of the function \p F. /// \param F The function to hash. @@ -37,6 +40,49 @@ IRHash StructuralHash(const Function &F, bool DetailedHash = false); /// composed the module hash. IRHash StructuralHash(const Module &M, bool DetailedHash = false); +/// The pair of an instruction index and a operand index. +using IndexPair = std::pair<unsigned, unsigned>; + +/// A map from an instruction index to an instruction pointer. +using IndexInstrMap = MapVector<unsigned, Instruction *>; + +/// A map from an IndexPair to an OpndHash. +using IndexOperandHashMapType = DenseMap<IndexPair, OpndHash>; + +/// A function that takes an instruction and an operand index and returns true +/// if the operand should be ignored in the function hash computation. +using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>; + +struct FunctionHashInfo { + /// A hash value representing the structural content of the function + IRHash FunctionHash; + /// A mapping from instruction indices to instruction pointers + std::unique_ptr<IndexInstrMap> IndexInstruction; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. This can be used to analyze or + /// reconstruct the differences in ignored operands + std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap; + + FunctionHashInfo(IRHash FuntionHash, + std::unique_ptr<IndexInstrMap> IndexInstruction, + std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap) + : FunctionHash(FuntionHash), + IndexInstruction(std::move(IndexInstruction)), + IndexOperandHashMap(std::move(IndexOperandHashMap)) {} +}; + +/// Computes a structural hash of a given function, considering the structure +/// and content of the function's instructions while allowing for selective +/// ignoring of certain operands based on custom criteria. This hash can be used +/// to identify functions that are structurally similar or identical, which is +/// useful in optimizations, deduplication, or analysis tasks. +/// \param F The function to hash. +/// \param IgnoreOp A callable that takes an instruction and an operand index, +/// and returns true if the operand should be ignored in the hash computation. +/// \return A FunctionHashInfo structure +FunctionHashInfo StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp); + } // end namespace llvm #endif diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index a1fabab77d52b2..6e0af666010a05 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -28,6 +28,19 @@ class StructuralHashImpl { bool DetailedHash; + /// IgnoreOp is a function that returns true if the operand should be ignored. + IgnoreOperandFunc IgnoreOp = nullptr; + /// A mapping from instruction indices to instruction pointers. + /// The index represents the position of an instruction based on the order in + /// which it is first encountered. + std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. + std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr; + + /// Assign a unique ID to each Value in the order they are first seen. + DenseMap<const Value *, int> ValueToId; + // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between @@ -47,24 +60,140 @@ class StructuralHashImpl { public: StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + explicit StructuralHashImpl(bool DetailedHash, + IgnoreOperandFunc IgnoreOp = nullptr) + : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { + if (IgnoreOp) { + IndexInstruction = std::make_unique<IndexInstrMap>(); + IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>(); + } + } - stable_hash hashConstant(Constant *C) { + stable_hash hashAPInt(const APInt &I) { SmallVector<stable_hash> Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast<Function>(C)) - // Hashing the name will be deterministic as LLVM's hashing infrastructure - // has explicit support for hashing strings and will not simply hash - // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + Hashes.emplace_back(I.getBitWidth()); + for (unsigned J = 0; J < I.getNumWords(); ++J) + Hashes.emplace_back((I.getRawData())[J]); + return stable_hash_combine(Hashes); + } + stable_hash hashAPFloat(const APFloat &F) { + SmallVector<stable_hash> Hashes; + const fltSemantics &S = F.getSemantics(); + Hashes.emplace_back(APFloat::semanticsPrecision(S)); + Hashes.emplace_back(APFloat::semanticsMaxExponent(S)); + Hashes.emplace_back(APFloat::semanticsMinExponent(S)); + Hashes.emplace_back(APFloat::semanticsSizeInBits(S)); + Hashes.emplace_back(hashAPInt(F.bitcastToAPInt())); return stable_hash_combine(Hashes); } + stable_hash hashGlobalValue(const GlobalValue *GV) { + if (!GV->hasName()) + return 0; + return stable_hash_name(GV->getName()); + } + + // Compute a hash for a Constant. This function is logically similar to + // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here + // we're interested in computing a hash rather than comparing two Constants. + // Some of the logic is simplified, e.g, we don't expand GEPOperator. + stable_hash hashConstant(Constant *C) { + SmallVector<stable_hash> Hashes; + + Type *Ty = C->getType(); + Hashes.emplace_back(hashType(Ty)); + + if (C->isNullValue()) { + Hashes.emplace_back(static_cast<stable_hash>('N')); + return stable_hash_combine(Hashes); + } + + auto *G = dyn_cast<GlobalValue>(C); + if (G) { + Hashes.emplace_back(hashGlobalValue(G)); + return stable_hash_combine(Hashes); + } + + if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) { + Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); + return stable_hash_combine(Hashes); + } + + switch (C->getValueID()) { + case Value::UndefValueVal: + case Value::PoisonValueVal: + case Value::ConstantTokenNoneVal: { + return stable_hash_combine(Hashes); + } + case Value::ConstantIntVal: { + const APInt &Int = cast<ConstantInt>(C)->getValue(); + Hashes.emplace_back(hashAPInt(Int)); + return stable_hash_combine(Hashes); + } + case Value::ConstantFPVal: { + const APFloat &APF = cast<ConstantFP>(C)->getValueAPF(); + Hashes.emplace_back(hashAPFloat(APF)); + return stable_hash_combine(Hashes); + } + case Value::ConstantArrayVal: { + const ConstantArray *A = cast<ConstantArray>(C); + uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : A->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantStructVal: { + const ConstantStruct *S = cast<ConstantStruct>(C); + unsigned NumElements = cast<StructType>(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : S->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantVectorVal: { + const ConstantVector *V = cast<ConstantVector>(C); + unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : V->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantExprVal: { + const ConstantExpr *E = cast<ConstantExpr>(C); + unsigned NumOperands = E->getNumOperands(); + Hashes.emplace_back(NumOperands); + for (auto &Op : E->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::BlockAddressVal: { + const BlockAddress *BA = cast<BlockAddress>(C); + auto H = hashGlobalValue(BA->getFunction()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + case Value::DSOLocalEquivalentVal: { + const auto *Equiv = cast<DSOLocalEquivalent>(C); + auto H = hashGlobalValue(Equiv->getGlobalValue()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + default: // Unknown constant, abort. + llvm_unreachable("Constant ValueID not recognized."); + } + return Hash; + } + stable_hash hashValue(Value *V) { // Check constant and return its hash. Constant *C = dyn_cast<Constant>(V); @@ -76,6 +205,10 @@ class StructuralHashImpl { if (Argument *Arg = dyn_cast<Argument>(V)) Hashes.emplace_back(Arg->getArgNo()); + // Get an index (an insertion order) for the non-constant value. + auto I = ValueToId.insert({V, ValueToId.size()}); + Hashes.emplace_back(I.first->second); + return stable_hash_combine(Hashes); } @@ -100,8 +233,20 @@ class StructuralHashImpl { if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) Hashes.emplace_back(ComparisonInstruction->getPredicate()); - for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); + unsigned InstIdx = 0; + if (IndexInstruction) { + InstIdx = IndexInstruction->size(); + IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)}); + } + + for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { + auto OpndHash = hashOperand(Op); + if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { + assert(IndexOperandHashMap); + IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash}); + } else + Hashes.emplace_back(OpndHash); + } return stable_hash_combine(Hashes); } @@ -184,6 +329,12 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + std::unique_ptr<IndexInstrMap> getIndexInstrMap() { + return std::move(IndexInstruction); + } + std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { + return std::move(IndexOperandHashMap); + } }; } // namespace @@ -199,3 +350,12 @@ IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { H.update(M); return H.getHash(); } + +FunctionHashInfo +llvm::StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp) { + StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); + H.update(F); + return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), + H.getIndexPairOpndHashMap()); +} diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index 64e66aa5f97a6d..e9f18ed40bc65b 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) { EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } + +TEST(StructuralHashTest, Differences) { + LLVMContext Ctx; + std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f1(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + auto *F1 = M1->getFunction("f"); + std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f2(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + auto *F2 = M2->getFunction("g"); + + // They are originally different when not ignoring any operand. + EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true)); + EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash, + StructuralHashWithDifferences(*F2, nullptr).FunctionHash); + + // When we ignore the call target f1 vs f2, they have the same hash. + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && OpndIdx == 1; + }; + auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp); + auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp); + EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); + + // There are total 3 instructions. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u); + + // The only 1 operand (the call target) has been ignored. + EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u); + EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u); + + // The index pair of instruction and operand (1, 1) is a key in the map. + ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1})); + ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1})); + + // The indexed instruciton must be the call instruction as shown in the + // IgnoreOp above. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + + // The ignored operand hashes (for f1 vs. f2) are different. + EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}), + FuncHashInfo2.IndexOperandHashMap->lookup({1, 1})); +} + } // end anonymous namespace >From b877be6bfb11c7fa75c7d7bab44fe1c1755b5649 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Sat, 19 Oct 2024 00:41:14 -0700 Subject: [PATCH 2/3] Address comments from ellishg --- llvm/lib/IR/StructuralHash.cpp | 38 +++++++----------------- llvm/unittests/IR/StructuralHashTest.cpp | 18 +++++++---- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 6e0af666010a05..abf9a72b0d6fbe 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -72,20 +72,13 @@ class StructuralHashImpl { stable_hash hashAPInt(const APInt &I) { SmallVector<stable_hash> Hashes; Hashes.emplace_back(I.getBitWidth()); - for (unsigned J = 0; J < I.getNumWords(); ++J) - Hashes.emplace_back((I.getRawData())[J]); + auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords()); + Hashes.append(RawVals.begin(), RawVals.end()); return stable_hash_combine(Hashes); } stable_hash hashAPFloat(const APFloat &F) { - SmallVector<stable_hash> Hashes; - const fltSemantics &S = F.getSemantics(); - Hashes.emplace_back(APFloat::semanticsPrecision(S)); - Hashes.emplace_back(APFloat::semanticsMaxExponent(S)); - Hashes.emplace_back(APFloat::semanticsMinExponent(S)); - Hashes.emplace_back(APFloat::semanticsSizeInBits(S)); - Hashes.emplace_back(hashAPInt(F.bitcastToAPInt())); - return stable_hash_combine(Hashes); + return hashAPInt(F.bitcastToAPInt()); } stable_hash hashGlobalValue(const GlobalValue *GV) { @@ -109,8 +102,7 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - auto *G = dyn_cast<GlobalValue>(C); - if (G) { + if (auto *G = dyn_cast<GlobalValue>(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } @@ -138,8 +130,6 @@ class StructuralHashImpl { } case Value::ConstantArrayVal: { const ConstantArray *A = cast<ConstantArray>(C); - uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements(); - Hashes.emplace_back(NumElements); for (auto &Op : A->operands()) { auto H = hashConstant(cast<Constant>(Op)); Hashes.emplace_back(H); @@ -148,8 +138,6 @@ class StructuralHashImpl { } case Value::ConstantStructVal: { const ConstantStruct *S = cast<ConstantStruct>(C); - unsigned NumElements = cast<StructType>(Ty)->getNumElements(); - Hashes.emplace_back(NumElements); for (auto &Op : S->operands()) { auto H = hashConstant(cast<Constant>(Op)); Hashes.emplace_back(H); @@ -158,8 +146,6 @@ class StructuralHashImpl { } case Value::ConstantVectorVal: { const ConstantVector *V = cast<ConstantVector>(C); - unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements(); - Hashes.emplace_back(NumElements); for (auto &Op : V->operands()) { auto H = hashConstant(cast<Constant>(Op)); Hashes.emplace_back(H); @@ -168,8 +154,6 @@ class StructuralHashImpl { } case Value::ConstantExprVal: { const ConstantExpr *E = cast<ConstantExpr>(C); - unsigned NumOperands = E->getNumOperands(); - Hashes.emplace_back(NumOperands); for (auto &Op : E->operands()) { auto H = hashConstant(cast<Constant>(Op)); Hashes.emplace_back(H); @@ -188,10 +172,10 @@ class StructuralHashImpl { Hashes.emplace_back(H); return stable_hash_combine(Hashes); } - default: // Unknown constant, abort. - llvm_unreachable("Constant ValueID not recognized."); + default: + // Skip other types of constants for simplicity. + return stable_hash_combine(Hashes); } - return Hash; } stable_hash hashValue(Value *V) { @@ -206,8 +190,8 @@ class StructuralHashImpl { Hashes.emplace_back(Arg->getArgNo()); // Get an index (an insertion order) for the non-constant value. - auto I = ValueToId.insert({V, ValueToId.size()}); - Hashes.emplace_back(I.first->second); + auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); + Hashes.emplace_back(It->second); return stable_hash_combine(Hashes); } @@ -236,14 +220,14 @@ class StructuralHashImpl { unsigned InstIdx = 0; if (IndexInstruction) { InstIdx = IndexInstruction->size(); - IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)}); + IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst)); } for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { auto OpndHash = hashOperand(Op); if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { assert(IndexOperandHashMap); - IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash}); + IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); } else Hashes.emplace_back(OpndHash); } diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index e9f18ed40bc65b..81c17120a1f6ff 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -10,6 +10,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" +#include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include <memory> @@ -18,6 +19,11 @@ using namespace llvm; namespace { +using testing::Contains; +using testing::Key; +using testing::Pair; +using testing::SizeIs; + std::unique_ptr<Module> parseIR(LLVMContext &Context, const char *IR) { SMDiagnostic Err; std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Context); @@ -271,16 +277,16 @@ TEST(StructuralHashTest, Differences) { EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); // There are total 3 instructions. - EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u); - EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u); + EXPECT_THAT(*FuncHashInfo1.IndexInstruction, SizeIs(3)); + EXPECT_THAT(*FuncHashInfo2.IndexInstruction, SizeIs(3)); // The only 1 operand (the call target) has been ignored. - EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u); - EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u); + EXPECT_THAT(*FuncHashInfo1.IndexOperandHashMap, SizeIs(1u)); + EXPECT_THAT(*FuncHashInfo2.IndexOperandHashMap, SizeIs(1u)); // The index pair of instruction and operand (1, 1) is a key in the map. - ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1})); - ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1})); + ASSERT_THAT(*FuncHashInfo1.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); + ASSERT_THAT(*FuncHashInfo2.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); // The indexed instruciton must be the call instruction as shown in the // IgnoreOp above. >From 925b8477879e7b4bc9931fbcd0977adc74aeed86 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Sat, 19 Oct 2024 09:18:45 -0700 Subject: [PATCH 3/3] Address comments from ilovepi --- llvm/include/llvm/Analysis/StructuralHash.h | 9 +++-- llvm/lib/Analysis/StructuralHash.cpp | 27 ++++++++++++-- llvm/lib/IR/StructuralHash.cpp | 37 +++---------------- llvm/lib/Passes/PassBuilder.cpp | 16 ++++++-- llvm/lib/Passes/PassRegistry.def | 7 ++-- .../StructuralHash/structural-hash-printer.ll | 24 +++++++++--- 6 files changed, 71 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 9f33c69aed345c..58846511807b6e 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -13,15 +13,18 @@ namespace llvm { +enum class StructuralHashOptions { None, Detailed, CallTargetIgnored }; + /// Printer pass for StructuralHashes class StructuralHashPrinterPass : public PassInfoMixin<StructuralHashPrinterPass> { raw_ostream &OS; - bool EnableDetailedStructuralHash; + const StructuralHashOptions Options; public: - explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed) - : OS(OS), EnableDetailedStructuralHash(Detailed) {} + explicit StructuralHashPrinterPass(raw_ostream &OS, + StructuralHashOptions Options) + : OS(OS), Options(Options) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); diff --git a/llvm/lib/Analysis/StructuralHash.cpp b/llvm/lib/Analysis/StructuralHash.cpp index 3a2341fe59ad9c..4f2e003148b606 100644 --- a/llvm/lib/Analysis/StructuralHash.cpp +++ b/llvm/lib/Analysis/StructuralHash.cpp @@ -21,14 +21,33 @@ using namespace llvm; PreservedAnalyses StructuralHashPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { OS << "Module Hash: " - << format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash)) + << format("%016" PRIx64, + StructuralHash(M, Options != StructuralHashOptions::None)) << "\n"; for (Function &F : M) { if (F.isDeclaration()) continue; - OS << "Function " << F.getName() << " Hash: " - << format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash)) - << "\n"; + if (Options == StructuralHashOptions::CallTargetIgnored) { + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && + isa<Constant>(I->getOperand(OpndIdx)); + }; + auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp); + OS << "Function " << F.getName() + << " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash) + << "\n"; + for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = IndexPair; + OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash) + << " at (" << InstIndex << "," << OpndIndex << ")\n"; + } + } else { + OS << "Function " << F.getName() << " Hash: " + << format( + "%016" PRIx64, + StructuralHash(F, Options == StructuralHashOptions::Detailed)) + << "\n"; + } } return PreservedAnalyses::all(); } diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index abf9a72b0d6fbe..bc27a68d8b81d2 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -113,11 +113,6 @@ class StructuralHashImpl { } switch (C->getValueID()) { - case Value::UndefValueVal: - case Value::PoisonValueVal: - case Value::ConstantTokenNoneVal: { - return stable_hash_combine(Hashes); - } case Value::ConstantIntVal: { const APInt &Int = cast<ConstantInt>(C)->getValue(); Hashes.emplace_back(hashAPInt(Int)); @@ -128,33 +123,11 @@ class StructuralHashImpl { Hashes.emplace_back(hashAPFloat(APF)); return stable_hash_combine(Hashes); } - case Value::ConstantArrayVal: { - const ConstantArray *A = cast<ConstantArray>(C); - for (auto &Op : A->operands()) { - auto H = hashConstant(cast<Constant>(Op)); - Hashes.emplace_back(H); - } - return stable_hash_combine(Hashes); - } - case Value::ConstantStructVal: { - const ConstantStruct *S = cast<ConstantStruct>(C); - for (auto &Op : S->operands()) { - auto H = hashConstant(cast<Constant>(Op)); - Hashes.emplace_back(H); - } - return stable_hash_combine(Hashes); - } - case Value::ConstantVectorVal: { - const ConstantVector *V = cast<ConstantVector>(C); - for (auto &Op : V->operands()) { - auto H = hashConstant(cast<Constant>(Op)); - Hashes.emplace_back(H); - } - return stable_hash_combine(Hashes); - } + case Value::ConstantArrayVal: + case Value::ConstantStructVal: + case Value::ConstantVectorVal: case Value::ConstantExprVal: { - const ConstantExpr *E = cast<ConstantExpr>(C); - for (auto &Op : E->operands()) { + for (const auto &Op : C->operands()) { auto H = hashConstant(cast<Constant>(Op)); Hashes.emplace_back(H); } @@ -313,9 +286,11 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + std::unique_ptr<IndexInstrMap> getIndexInstrMap() { return std::move(IndexInstruction); } + std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { return std::move(IndexOperandHashMap); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f47f8ca35d7737..84a81c35d75508 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1168,9 +1168,19 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) { return Result; } -Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) { - return PassBuilder::parseSinglePassOption(Params, "detailed", - "StructuralHashPrinterPass"); +Expected<StructuralHashOptions> +parseStructuralHashPrinterPassOptions(StringRef Params) { + if (Params.empty()) + return StructuralHashOptions::None; + else if (Params == "detailed") + return StructuralHashOptions::Detailed; + else if (Params == "call-target-ignored") + return StructuralHashOptions::CallTargetIgnored; + else + return make_error<StringError>( + formatv("invalid structural hash printer parameter '{0}' ", Params) + .str(), + inconvertibleErrorCode()); } Expected<bool> parseWinEHPrepareOptions(StringRef Params) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 90859c18c4f499..b23ca3b8d00c8b 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS( parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N") MODULE_PASS_WITH_PARAMS( "print<structural-hash>", "StructuralHashPrinterPass", - [](bool EnableDetailedStructuralHash) { - return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash); + [](StructuralHashOptions Options) { + return StructuralHashPrinterPass(dbgs(), Options); }, - parseStructuralHashPrinterPassOptions, "detailed") + parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored") + #undef MODULE_PASS_WITH_PARAMS #ifndef CGSCC_ANALYSIS diff --git a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll index 5936199bf32f43..3c23b54d297369 100644 --- a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll +++ b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll @@ -1,17 +1,21 @@ ; RUN: opt -passes='print<structural-hash>' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -passes='print<structural-hash><detailed>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH +; RUN: opt -passes='print<structural-hash><call-target-ignored>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH ; Add a declaration so that we can test we skip it. -declare i64 @d1() +declare i64 @d1(i64) +declare i64 @e1(i64) define i64 @f1(i64 %a) { %b = add i64 %a, 1 - ret i64 %b + %c = call i64 @d1(i64 %b) + ret i64 %c } -define i32 @f2(i32 %a) { - %b = add i32 %a, 2 - ret i32 %b +define i64 @f2(i64 %a) { + %b = add i64 %a, 1 + %c = call i64 @e1(i64 %b) + ret i64 %c } ; CHECK: Module Hash: {{([a-f0-9]{16,})}} @@ -22,3 +26,13 @@ define i32 @f2(i32 %a) { ; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]] ; DETAILED-HASH-NOT: [[DF1H]] ; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}} + +; When ignoring the call target, check if `f1` and `f2` produce the same function hash. +; The index for the call instruction is 1, and the index of the call target operand is 1. +; The ignored operand hashes for different call targets should be different. +; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}} +; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1) +; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]] +; CALLTARGETIGNORED-HASH-NOT: [[IO1H]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits