llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir Author: Kyungwoo Lee (kyulee-com) <details> <summary>Changes</summary> This computes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored. Depends on https://github.com/llvm/llvm-project/pull/112621. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- Full diff: https://github.com/llvm/llvm-project/pull/112638.diff 3 Files Affected: - (modified) llvm/include/llvm/IR/StructuralHash.h (+46) - (modified) llvm/lib/IR/StructuralHash.cpp (+174-14) - (modified) llvm/unittests/IR/StructuralHashTest.cpp (+55) ``````````diff diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index aa292bc3446799..bc82c204c4d1f6 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,9 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StableHashing.h" +#include "llvm/IR/Instruction.h" #include <cstdint> namespace llvm { @@ -23,6 +25,7 @@ class Function; class Module; using IRHash = stable_hash; +using OpndHash = stable_hash; /// Returns a hash of the function \p F. /// \param F The function to hash. @@ -37,6 +40,49 @@ IRHash StructuralHash(const Function &F, bool DetailedHash = false); /// composed the module hash. IRHash StructuralHash(const Module &M, bool DetailedHash = false); +/// The pair of an instruction index and a operand index. +using IndexPair = std::pair<unsigned, unsigned>; + +/// A map from an instruction index to an instruction pointer. +using IndexInstrMap = MapVector<unsigned, Instruction *>; + +/// A map from an IndexPair to an OpndHash. +using IndexOperandHashMapType = DenseMap<IndexPair, OpndHash>; + +/// A function that takes an instruction and an operand index and returns true +/// if the operand should be ignored in the function hash computation. +using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>; + +struct FunctionHashInfo { + /// A hash value representing the structural content of the function + IRHash FunctionHash; + /// A mapping from instruction indices to instruction pointers + std::unique_ptr<IndexInstrMap> IndexInstruction; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. This can be used to analyze or + /// reconstruct the differences in ignored operands + std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap; + + FunctionHashInfo(IRHash FuntionHash, + std::unique_ptr<IndexInstrMap> IndexInstruction, + std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap) + : FunctionHash(FuntionHash), + IndexInstruction(std::move(IndexInstruction)), + IndexOperandHashMap(std::move(IndexOperandHashMap)) {} +}; + +/// Computes a structural hash of a given function, considering the structure +/// and content of the function's instructions while allowing for selective +/// ignoring of certain operands based on custom criteria. This hash can be used +/// to identify functions that are structurally similar or identical, which is +/// useful in optimizations, deduplication, or analysis tasks. +/// \param F The function to hash. +/// \param IgnoreOp A callable that takes an instruction and an operand index, +/// and returns true if the operand should be ignored in the hash computation. +/// \return A FunctionHashInfo structure +FunctionHashInfo StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp); + } // end namespace llvm #endif diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index a1fabab77d52b2..6e0af666010a05 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -28,6 +28,19 @@ class StructuralHashImpl { bool DetailedHash; + /// IgnoreOp is a function that returns true if the operand should be ignored. + IgnoreOperandFunc IgnoreOp = nullptr; + /// A mapping from instruction indices to instruction pointers. + /// The index represents the position of an instruction based on the order in + /// which it is first encountered. + std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. + std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr; + + /// Assign a unique ID to each Value in the order they are first seen. + DenseMap<const Value *, int> ValueToId; + // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between @@ -47,24 +60,140 @@ class StructuralHashImpl { public: StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + explicit StructuralHashImpl(bool DetailedHash, + IgnoreOperandFunc IgnoreOp = nullptr) + : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { + if (IgnoreOp) { + IndexInstruction = std::make_unique<IndexInstrMap>(); + IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>(); + } + } - stable_hash hashConstant(Constant *C) { + stable_hash hashAPInt(const APInt &I) { SmallVector<stable_hash> Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast<Function>(C)) - // Hashing the name will be deterministic as LLVM's hashing infrastructure - // has explicit support for hashing strings and will not simply hash - // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + Hashes.emplace_back(I.getBitWidth()); + for (unsigned J = 0; J < I.getNumWords(); ++J) + Hashes.emplace_back((I.getRawData())[J]); + return stable_hash_combine(Hashes); + } + stable_hash hashAPFloat(const APFloat &F) { + SmallVector<stable_hash> Hashes; + const fltSemantics &S = F.getSemantics(); + Hashes.emplace_back(APFloat::semanticsPrecision(S)); + Hashes.emplace_back(APFloat::semanticsMaxExponent(S)); + Hashes.emplace_back(APFloat::semanticsMinExponent(S)); + Hashes.emplace_back(APFloat::semanticsSizeInBits(S)); + Hashes.emplace_back(hashAPInt(F.bitcastToAPInt())); return stable_hash_combine(Hashes); } + stable_hash hashGlobalValue(const GlobalValue *GV) { + if (!GV->hasName()) + return 0; + return stable_hash_name(GV->getName()); + } + + // Compute a hash for a Constant. This function is logically similar to + // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here + // we're interested in computing a hash rather than comparing two Constants. + // Some of the logic is simplified, e.g, we don't expand GEPOperator. + stable_hash hashConstant(Constant *C) { + SmallVector<stable_hash> Hashes; + + Type *Ty = C->getType(); + Hashes.emplace_back(hashType(Ty)); + + if (C->isNullValue()) { + Hashes.emplace_back(static_cast<stable_hash>('N')); + return stable_hash_combine(Hashes); + } + + auto *G = dyn_cast<GlobalValue>(C); + if (G) { + Hashes.emplace_back(hashGlobalValue(G)); + return stable_hash_combine(Hashes); + } + + if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) { + Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); + return stable_hash_combine(Hashes); + } + + switch (C->getValueID()) { + case Value::UndefValueVal: + case Value::PoisonValueVal: + case Value::ConstantTokenNoneVal: { + return stable_hash_combine(Hashes); + } + case Value::ConstantIntVal: { + const APInt &Int = cast<ConstantInt>(C)->getValue(); + Hashes.emplace_back(hashAPInt(Int)); + return stable_hash_combine(Hashes); + } + case Value::ConstantFPVal: { + const APFloat &APF = cast<ConstantFP>(C)->getValueAPF(); + Hashes.emplace_back(hashAPFloat(APF)); + return stable_hash_combine(Hashes); + } + case Value::ConstantArrayVal: { + const ConstantArray *A = cast<ConstantArray>(C); + uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : A->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantStructVal: { + const ConstantStruct *S = cast<ConstantStruct>(C); + unsigned NumElements = cast<StructType>(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : S->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantVectorVal: { + const ConstantVector *V = cast<ConstantVector>(C); + unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : V->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantExprVal: { + const ConstantExpr *E = cast<ConstantExpr>(C); + unsigned NumOperands = E->getNumOperands(); + Hashes.emplace_back(NumOperands); + for (auto &Op : E->operands()) { + auto H = hashConstant(cast<Constant>(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::BlockAddressVal: { + const BlockAddress *BA = cast<BlockAddress>(C); + auto H = hashGlobalValue(BA->getFunction()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + case Value::DSOLocalEquivalentVal: { + const auto *Equiv = cast<DSOLocalEquivalent>(C); + auto H = hashGlobalValue(Equiv->getGlobalValue()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + default: // Unknown constant, abort. + llvm_unreachable("Constant ValueID not recognized."); + } + return Hash; + } + stable_hash hashValue(Value *V) { // Check constant and return its hash. Constant *C = dyn_cast<Constant>(V); @@ -76,6 +205,10 @@ class StructuralHashImpl { if (Argument *Arg = dyn_cast<Argument>(V)) Hashes.emplace_back(Arg->getArgNo()); + // Get an index (an insertion order) for the non-constant value. + auto I = ValueToId.insert({V, ValueToId.size()}); + Hashes.emplace_back(I.first->second); + return stable_hash_combine(Hashes); } @@ -100,8 +233,20 @@ class StructuralHashImpl { if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) Hashes.emplace_back(ComparisonInstruction->getPredicate()); - for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); + unsigned InstIdx = 0; + if (IndexInstruction) { + InstIdx = IndexInstruction->size(); + IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)}); + } + + for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { + auto OpndHash = hashOperand(Op); + if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { + assert(IndexOperandHashMap); + IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash}); + } else + Hashes.emplace_back(OpndHash); + } return stable_hash_combine(Hashes); } @@ -184,6 +329,12 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + std::unique_ptr<IndexInstrMap> getIndexInstrMap() { + return std::move(IndexInstruction); + } + std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { + return std::move(IndexOperandHashMap); + } }; } // namespace @@ -199,3 +350,12 @@ IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { H.update(M); return H.getHash(); } + +FunctionHashInfo +llvm::StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp) { + StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); + H.update(F); + return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), + H.getIndexPairOpndHashMap()); +} diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index 64e66aa5f97a6d..e9f18ed40bc65b 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) { EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } + +TEST(StructuralHashTest, Differences) { + LLVMContext Ctx; + std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f1(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + auto *F1 = M1->getFunction("f"); + std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f2(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + auto *F2 = M2->getFunction("g"); + + // They are originally different when not ignoring any operand. + EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true)); + EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash, + StructuralHashWithDifferences(*F2, nullptr).FunctionHash); + + // When we ignore the call target f1 vs f2, they have the same hash. + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && OpndIdx == 1; + }; + auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp); + auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp); + EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); + + // There are total 3 instructions. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u); + + // The only 1 operand (the call target) has been ignored. + EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u); + EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u); + + // The index pair of instruction and operand (1, 1) is a key in the map. + ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1})); + ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1})); + + // The indexed instruciton must be the call instruction as shown in the + // IgnoreOp above. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + + // The ignored operand hashes (for f1 vs. f2) are different. + EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}), + FuncHashInfo2.IndexOperandHashMap->lookup({1, 1})); +} + } // end anonymous namespace `````````` </details> https://github.com/llvm/llvm-project/pull/112638 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits