================ @@ -0,0 +1,637 @@ +//===--------------- IRNormalizer.cpp - IR Normalizer ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the IRNormalizer class which aims to transform LLVM +/// Modules into a canonical form by reordering and renaming instructions while +/// preserving the same semantics. The normalizer makes it easier to spot +/// semantic differences while diffing two modules which have undergone +/// different passes. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/IRNormalizer.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils.h" +#include <algorithm> +#include <vector> + +#define DEBUG_TYPE "normalize" + +using namespace llvm; + +namespace { +/// IRNormalizer aims to transform LLVM IR into canonical form. +class IRNormalizer { +public: + /// \name Normalizer flags. + /// @{ + /// Preserves original order of instructions. + static cl::opt<bool> PreserveOrder; + /// Renames all instructions (including user-named). + static cl::opt<bool> RenameAll; + /// Folds all regular instructions (including pre-outputs). + static cl::opt<bool> FoldPreoutputs; + /// Sorts and reorders operands in commutative instructions. + static cl::opt<bool> ReorderOperands; + /// @} + + bool runOnFunction(Function &F); + +private: + // Random constant for hashing, so the state isn't zero. + const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL; + DenseSet<const Instruction *> NamedInstructions; + + /// \name Naming. + /// @{ + void nameFunctionArguments(Function &F); + void nameBasicBlocks(Function &F); + void nameInstruction(Instruction *I); + void nameAsInitialInstruction(Instruction *I); + void nameAsRegularInstruction(Instruction *I); + void foldInstructionName(Instruction *I); + /// @} + + /// \name Reordering. + /// @{ + void reorderInstructions(SmallVector<Instruction *, 16> &Outputs); + void reorderInstruction(Instruction *Used, Instruction *User, + SmallPtrSet<const Instruction *, 32> &Visited); + void reorderInstructionOperandsByNames(Instruction *I); + void reorderPHIIncomingValues(PHINode *PN); + /// @} + + /// \name Utility methods. + /// @{ + SmallVector<Instruction *, 16> collectOutputInstructions(Function &F); + bool isOutput(const Instruction *I); + bool isInitialInstruction(const Instruction *I); + bool hasOnlyImmediateOperands(const Instruction *I); + SetVector<int> + getOutputFootprint(Instruction *I, + SmallPtrSet<const Instruction *, 32> &Visited); + /// @} +}; +} // namespace + +cl::opt<bool> IRNormalizer::PreserveOrder( + "preserve-order", cl::Hidden, + cl::desc("Preserves original instruction order")); +cl::opt<bool> IRNormalizer::RenameAll( + "rename-all", cl::Hidden, + cl::desc("Renames all instructions (including user-named)")); +cl::opt<bool> IRNormalizer::FoldPreoutputs( + "fold-all", cl::Hidden, + cl::desc("Folds all regular instructions (including pre-outputs)")); +cl::opt<bool> IRNormalizer::ReorderOperands( + "reorder-operands", cl::Hidden, + cl::desc("Sorts and reorders operands in commutative instructions")); + +/// Entry method to the IRNormalizer. +/// +/// \param M Module to normalize. +bool IRNormalizer::runOnFunction(Function &F) { + nameFunctionArguments(F); + nameBasicBlocks(F); + + SmallVector<Instruction *, 16> Outputs = collectOutputInstructions(F); + + if (!PreserveOrder) + reorderInstructions(Outputs); + + for (auto &I : Outputs) + nameInstruction(I); + + for (auto &I : instructions(F)) { + if (!PreserveOrder) { + if (ReorderOperands && I.isCommutative()) + reorderInstructionOperandsByNames(&I); + + if (auto *PN = dyn_cast<PHINode>(&I)) + reorderPHIIncomingValues(PN); + } + + foldInstructionName(&I); + } + + return true; +} + +/// Numbers arguments. +/// +/// \param F Function whose arguments will be renamed. +void IRNormalizer::nameFunctionArguments(Function &F) { + int ArgumentCounter = 0; + for (auto &A : F.args()) { + if (RenameAll || A.getName().empty()) { + A.setName("a" + Twine(ArgumentCounter)); + ++ArgumentCounter; + } + } +} + +/// Names basic blocks using a generated hash for each basic block in +/// a function considering the opcode and the order of output instructions. +/// +/// \param F Function containing basic blocks to rename. +void IRNormalizer::nameBasicBlocks(Function &F) { + for (auto &B : F) { + // Initialize to a magic constant, so the state isn't zero. + uint64_t Hash = MagicHashConstant; + + // Hash considering output instruction opcodes. + for (auto &I : B) + if (isOutput(&I)) + Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode()); + + if (RenameAll || B.getName().empty()) { + // Name basic block. Substring hash to make diffs more readable. + B.setName("bb" + std::to_string(Hash).substr(0, 5)); + } + } +} + +/// Names instructions graphically (recursive) in accordance with the +/// def-use tree, starting from the initial instructions (defs), finishing at +/// the output (top-most user) instructions (depth-first). +/// +/// \param I Instruction to be renamed. +void IRNormalizer::nameInstruction(Instruction *I) { + // Ensure instructions are not renamed. This is done + // to prevent situation where instructions are used + // before their definition (in phi nodes) + if (NamedInstructions.contains(I)) + return; + NamedInstructions.insert(I); + // Determine the type of instruction to name. + if (isInitialInstruction(I)) { + // This is an initial instruction. + nameAsInitialInstruction(I); + } else { + // This must be a regular instruction. + nameAsRegularInstruction(I); + } +} + +/// Names instruction following the scheme: +/// vl00000Callee(Operands) +/// +/// Where 00000 is a hash calculated considering instruction's opcode and output +/// footprint. Callee's name is only included when instruction's type is +/// CallInst. In cases where instruction is commutative, operands list is also +/// sorted. +/// +/// Renames instruction only when RenameAll flag is raised or instruction is +/// unnamed. +/// +/// \see getOutputFootprint() +/// \param I Instruction to be renamed. +void IRNormalizer::nameAsInitialInstruction(Instruction *I) { + if (I->getType()->isVoidTy() || (!I->getName().empty() && !RenameAll)) + return; + + // Instruction operands for further sorting. + SmallVector<SmallString<64>, 4> Operands; + + // Collect operands. + for (auto &OP : I->operands()) { + if (!isa<Function>(OP)) { + std::string TextRepresentation; + raw_string_ostream Stream(TextRepresentation); + OP->printAsOperand(Stream, false); + Operands.push_back(StringRef(Stream.str())); + } + } + + if (I->isCommutative()) + llvm::sort(Operands); + + // Initialize to a magic constant, so the state isn't zero. + uint64_t Hash = MagicHashConstant; + + // Consider instruction's opcode in the hash. + Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode()); + + SmallPtrSet<const Instruction *, 32> Visited; + // Get output footprint for I. + SetVector<int> OutputFootprint = getOutputFootprint(I, Visited); + + // Consider output footprint in the hash. + for (const int &Output : OutputFootprint) + Hash = hashing::detail::hash_16_bytes(Hash, Output); + + // Base instruction name. + SmallString<256> Name; + Name.append("vl" + std::to_string(Hash).substr(0, 5)); + + // In case of CallInst, consider callee in the instruction name. + if (const auto *CI = dyn_cast<CallInst>(I)) { + Function *F = CI->getCalledFunction(); + + if (F != nullptr) { + Name.append(F->getName()); + } + } + + Name.append("("); + for (unsigned long i = 0; i < Operands.size(); ++i) { + Name.append(Operands[i]); + + if (i < Operands.size() - 1) + Name.append(", "); + } + Name.append(")"); + + I->setName(Name); +} + +/// Names instruction following the scheme: +/// op00000Callee(Operands) +/// +/// Where 00000 is a hash calculated considering instruction's opcode, its +/// operands' opcodes and order. Callee's name is only included when +/// instruction's type is CallInst. In cases where instruction is commutative, +/// operand list is also sorted. +/// +/// Names instructions recursively in accordance with the def-use tree, +/// starting from the initial instructions (defs), finishing at +/// the output (top-most user) instructions (depth-first). +/// +/// Renames instruction only when RenameAll flag is raised or instruction is +/// unnamed. +/// +/// \see getOutputFootprint() +/// \param I Instruction to be renamed. +void IRNormalizer::nameAsRegularInstruction(Instruction *I) { + // Instruction operands for further sorting. + SmallVector<SmallString<128>, 4> Operands; + + // The name of a regular instruction depends + // on the names of its operands. Hence, all + // operands must be named first in the use-def + // walk. + + // Collect operands. + for (auto &OP : I->operands()) { + if (auto *IOP = dyn_cast<Instruction>(OP)) { + // Walk down the use-def chain. + nameInstruction(IOP); + Operands.push_back(IOP->getName()); + } else if (isa<Value>(OP) && !isa<Function>(OP)) { + // This must be an immediate value. + std::string TextRepresentation; + raw_string_ostream Stream(TextRepresentation); + OP->printAsOperand(Stream, false); + Operands.push_back(StringRef(Stream.str())); + } + } + + if (I->isCommutative()) + llvm::sort(Operands.begin(), Operands.end()); + + // Initialize to a magic constant, so the state isn't zero. + uint64_t Hash = MagicHashConstant; + + // Consider instruction opcode in the hash. + Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode()); + + // Operand opcodes for further sorting (commutative). + SmallVector<int, 4> OperandsOpcodes; + + // Collect operand opcodes for hashing. + for (auto &OP : I->operands()) + if (auto *IOP = dyn_cast<Instruction>(OP)) + OperandsOpcodes.push_back(IOP->getOpcode()); + + if (I->isCommutative()) + llvm::sort(OperandsOpcodes.begin(), OperandsOpcodes.end()); + + // Consider operand opcodes in the hash. + for (const int Code : OperandsOpcodes) + Hash = hashing::detail::hash_16_bytes(Hash, Code); + + // Base instruction name. + SmallString<512> Name; + Name.append("op" + std::to_string(Hash).substr(0, 5)); + + // In case of CallInst, consider callee in the instruction name. + if (const auto *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + Name.append(F->getName()); + + Name.append("("); + for (unsigned long i = 0; i < Operands.size(); ++i) { + Name.append(Operands[i]); + + if (i < Operands.size() - 1) + Name.append(", "); + } + Name.append(")"); + + if ((I->getName().empty() || RenameAll) && !I->getType()->isVoidTy()) + I->setName(Name); +} + +/// Shortens instruction's name. This method removes called function name from +/// the instruction name and substitutes the call chain with a corresponding +/// list of operands. +/// +/// Examples: +/// op00000Callee(op00001Callee(...), vl00000Callee(1, 2), ...) -> +/// op00000(op00001, vl00000, ...) vl00000Callee(1, 2) -> vl00000(1, 2) +/// +/// This method omits output instructions and pre-output (instructions directly +/// used by an output instruction) instructions (by default). By default it also +/// does not affect user named instructions. +/// +/// \param I Instruction whose name will be folded. +void IRNormalizer::foldInstructionName(Instruction *I) { + // If this flag is raised, fold all regular + // instructions (including pre-outputs). + if (!FoldPreoutputs) { + // Don't fold if one of the users is an output instruction. + for (auto *U : I->users()) + if (auto *IU = dyn_cast<Instruction>(U)) + if (isOutput(IU)) + return; + } + + // Don't fold if it is an output instruction or has no op prefix. + if (isOutput(I) || I->getName().substr(0, 2) != "op") + return; + + // Instruction operands. + SmallVector<SmallString<64>, 4> Operands; + + for (auto &OP : I->operands()) { + if (const auto *IOP = dyn_cast<Instruction>(OP)) { + bool HasCanonicalName = I->getName().substr(0, 2) == "op" || + I->getName().substr(0, 2) == "vl"; + + Operands.push_back(HasCanonicalName ? IOP->getName().substr(0, 7) + : IOP->getName()); + } + } + + if (I->isCommutative()) + llvm::sort(Operands.begin(), Operands.end()); + + SmallString<256> Name; + Name.append(I->getName().substr(0, 7)); + + Name.append("("); + for (unsigned long i = 0; i < Operands.size(); ++i) { + Name.append(Operands[i]); + + if (i < Operands.size() - 1) + Name.append(", "); + } + Name.append(")"); + + I->setName(Name); +} + +/// Reorders instructions by walking up the tree from each operand of an output +/// instruction and reducing the def-use distance. +/// This method assumes that output instructions were collected top-down, +/// otherwise the def-use chain may be broken. +/// This method is a wrapper for recursive reorderInstruction(). +/// +/// \see reorderInstruction() +/// \param Outputs Vector of pointers to output instructions collected top-down. +void IRNormalizer::reorderInstructions( + SmallVector<Instruction *, 16> &Outputs) { + // This method assumes output instructions were collected top-down, + // otherwise the def-use chain may be broken. + + SmallPtrSet<const Instruction *, 32> Visited; + + // Walk up the tree. + for (auto &I : Outputs) + for (auto &OP : I->operands()) + if (auto *IOP = dyn_cast<Instruction>(OP)) + reorderInstruction(IOP, I, Visited); +} + +/// Reduces def-use distance or places instruction at the end of the basic +/// block. Continues to walk up the def-use tree recursively. Used by +/// reorderInstructions(). +/// +/// \see reorderInstructions() +/// \param Used Pointer to the instruction whose value is used by the \p User. +/// \param User Pointer to the instruction which uses the \p Used. +/// \param Visited Set of visited instructions. +void IRNormalizer::reorderInstruction( + Instruction *Used, Instruction *User, + SmallPtrSet<const Instruction *, 32> &Visited) { + if (isa<PHINode>(Used)) + return; + if (Visited.contains(Used)) + return; + Visited.insert(Used); + + if (Used->getParent() == User->getParent()) { + // If Used and User share the same basic block move Used just before User. + Used->moveBefore(User); + } else { + // Otherwise move Used to the very end of its basic block. + Used->moveBefore(&Used->getParent()->back()); + } + + for (auto &OP : Used->operands()) { + if (auto *IOP = dyn_cast<Instruction>(OP)) { + // Walk up the def-use tree. + reorderInstruction(IOP, Used, Visited); + } + } +} ---------------- nikic wrote:
Am I missing something, or can this reorder side-effecting instructions? https://github.com/llvm/llvm-project/pull/68176 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits