================ @@ -0,0 +1,637 @@ +//===--------------- IRNormalizer.cpp - IR Normalizer ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the IRNormalizer class which aims to transform LLVM +/// Modules into a canonical form by reordering and renaming instructions while +/// preserving the same semantics. The normalizer makes it easier to spot +/// semantic differences while diffing two modules which have undergone +/// different passes. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/IRNormalizer.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" +#include <algorithm> +#include <vector> + +#define DEBUG_TYPE "normalize" + +using namespace llvm; + +namespace { +/// IRNormalizer aims to transform LLVM IR into canonical form. +class IRNormalizer { +public: + /// \name Normalizer flags. + /// @{ + /// Preserves original order of instructions. + static cl::opt<bool> PreserveOrder; + /// Renames all instructions (including user-named). + static cl::opt<bool> RenameAll; + /// Folds all regular instructions (including pre-outputs). + static cl::opt<bool> FoldPreoutputs; + /// Sorts and reorders operands in commutative instructions. + static cl::opt<bool> ReorderOperands; + /// @} + + bool runOnFunction(Function &F); + +private: + // Random constant for hashing, so the state isn't zero. + const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL; + DenseSet<const Instruction *> NamedInstructions; + + /// \name Naming. + /// @{ + void nameFunctionArguments(Function &F); + void nameBasicBlocks(Function &F); + void nameInstruction(Instruction *I); + void nameAsInitialInstruction(Instruction *I); + void nameAsRegularInstruction(Instruction *I); + void foldInstructionName(Instruction *I); + /// @} + + /// \name Reordering. + /// @{ + void reorderInstructions(SmallVector<Instruction *, 16> &Outputs); + void reorderInstruction(Instruction *Used, Instruction *User, + SmallPtrSet<const Instruction *, 32> &Visited); + void reorderInstructionOperandsByNames(Instruction *I); + void reorderPHIIncomingValues(PHINode *PN); + /// @} + + /// \name Utility methods. + /// @{ + SmallVector<Instruction *, 16> collectOutputInstructions(Function &F); + bool isOutput(const Instruction *I); + bool isInitialInstruction(const Instruction *I); + bool hasOnlyImmediateOperands(const Instruction *I); + SetVector<int> + getOutputFootprint(Instruction *I, + SmallPtrSet<const Instruction *, 32> &Visited); + /// @} +}; +} // namespace + +cl::opt<bool> IRNormalizer::PreserveOrder( + "preserve-order", cl::Hidden, + cl::desc("Preserves original instruction order")); +cl::opt<bool> IRNormalizer::RenameAll( + "rename-all", cl::Hidden, + cl::desc("Renames all instructions (including user-named)")); +cl::opt<bool> IRNormalizer::FoldPreoutputs( + "fold-all", cl::Hidden, + cl::desc("Folds all regular instructions (including pre-outputs)")); +cl::opt<bool> IRNormalizer::ReorderOperands( + "reorder-operands", cl::Hidden, + cl::desc("Sorts and reorders operands in commutative instructions")); + +/// Entry method to the IRNormalizer. +/// +/// \param M Module to normalize. +bool IRNormalizer::runOnFunction(Function &F) { + nameFunctionArguments(F); + nameBasicBlocks(F); + + SmallVector<Instruction *, 16> Outputs = collectOutputInstructions(F); + + if (!PreserveOrder) + reorderInstructions(Outputs); + + for (auto &I : Outputs) + nameInstruction(I); + + for (auto &I : instructions(F)) { + if (!PreserveOrder) { + if (ReorderOperands && I.isCommutative()) + reorderInstructionOperandsByNames(&I); + + if (auto *PN = dyn_cast<PHINode>(&I)) + reorderPHIIncomingValues(PN); + } + + foldInstructionName(&I); + } + + return true; +} + +/// Numbers arguments. +/// +/// \param F Function whose arguments will be renamed. +void IRNormalizer::nameFunctionArguments(Function &F) { + int ArgumentCounter = 0; + for (auto &A : F.args()) { + if (RenameAll || A.getName().empty()) { + A.setName("a" + Twine(ArgumentCounter)); + ++ArgumentCounter; + } + } +} + +/// Names basic blocks using a generated hash for each basic block in +/// a function considering the opcode and the order of output instructions. +/// +/// \param F Function containing basic blocks to rename. +void IRNormalizer::nameBasicBlocks(Function &F) { + for (auto &B : F) { + // Initialize to a magic constant, so the state isn't zero. + uint64_t Hash = MagicHashConstant; + + // Hash considering output instruction opcodes. + for (auto &I : B) + if (isOutput(&I)) + Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode()); + + if (RenameAll || B.getName().empty()) { + // Name basic block. Substring hash to make diffs more readable. + B.setName("bb" + std::to_string(Hash).substr(0, 5)); + } + } +} + +/// Names instructions graphically (recursive) in accordance with the +/// def-use tree, starting from the initial instructions (defs), finishing at +/// the output (top-most user) instructions (depth-first). +/// +/// \param I Instruction to be renamed. +void IRNormalizer::nameInstruction(Instruction *I) { + // Ensure instructions are not renamed. This is done + // to prevent situation where instructions are used + // before their definition (in phi nodes) + if (NamedInstructions.contains(I)) + return; + NamedInstructions.insert(I); + // Determine the type of instruction to name. + if (isInitialInstruction(I)) { + // This is an initial instruction. + nameAsInitialInstruction(I); + } else { + // This must be a regular instruction. + nameAsRegularInstruction(I); + } +} + +/// Names instruction following the scheme: +/// vl00000Callee(Operands) +/// +/// Where 00000 is a hash calculated considering instruction's opcode and output +/// footprint. Callee's name is only included when instruction's type is +/// CallInst. In cases where instruction is commutative, operands list is also +/// sorted. +/// +/// Renames instruction only when RenameAll flag is raised or instruction is +/// unnamed. +/// +/// \see getOutputFootprint() +/// \param I Instruction to be renamed. +void IRNormalizer::nameAsInitialInstruction(Instruction *I) { + if (I->getType()->isVoidTy() || (!I->getName().empty() && !RenameAll)) + return; + + // Instruction operands for further sorting. + SmallVector<SmallString<64>, 4> Operands; + + // Collect operands. + for (auto &OP : I->operands()) { + if (!isa<Function>(OP)) { + std::string TextRepresentation; + raw_string_ostream Stream(TextRepresentation); + OP->printAsOperand(Stream, false); + Operands.push_back(StringRef(Stream.str())); + } + } + + if (I->isCommutative()) + llvm::sort(Operands); + + // Initialize to a magic constant, so the state isn't zero. + uint64_t Hash = MagicHashConstant; + + // Consider instruction's opcode in the hash. + Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode()); + + SmallPtrSet<const Instruction *, 32> Visited; + // Get output footprint for I. + SetVector<int> OutputFootprint = getOutputFootprint(I, Visited); + + // Consider output footprint in the hash. + for (const int &Output : OutputFootprint) + Hash = hashing::detail::hash_16_bytes(Hash, Output); + + // Base instruction name. + SmallString<256> Name; + Name.append("vl" + std::to_string(Hash).substr(0, 5)); + + // In case of CallInst, consider callee in the instruction name. + if (const auto *CI = dyn_cast<CallInst>(I)) { + Function *F = CI->getCalledFunction(); + + if (F != nullptr) { ---------------- nikic wrote:
Braces https://github.com/llvm/llvm-project/pull/68176 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits