https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/107369
>From e45d7e68a371a09ea766c4accf8edc6c030fd7fd Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Wed, 4 Sep 2024 12:09:50 +0100 Subject: [PATCH 1/3] Add CMake option to enable expensive line number origin tracking --- llvm/CMakeLists.txt | 4 ++++ llvm/cmake/modules/HandleLLVMOptions.cmake | 12 ++++++++++++ llvm/docs/CMake.rst | 11 +++++++++++ llvm/include/llvm/Config/config.h.cmake | 4 ++++ 4 files changed, 31 insertions(+) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 12618966c4adfd..3e2e90f5adad2e 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -524,6 +524,10 @@ endif() option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF) +set(LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING "DISABLED" CACHE STRING + "Enhance debugify's line number coverage tracking; enabling this is abi-breaking. Can be DISABLED, COVERAGE, or COVERAGE_AND_ORIGIN.") +set_property(CACHE LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING PROPERTY STRINGS DISABLED COVERAGE COVERAGE_AND_ORIGIN) + set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF) if (MINGW) # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 5ca580fbb59c59..a4b11c149da9de 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -196,6 +196,18 @@ else() message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!") endif() +string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING) + +if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" ) + set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 ) +elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" ) + message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.") +elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING ) + # The DISABLED setting is default and requires no additional defines. +else() + message(FATAL_ERROR "Unknown value for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING: \"${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}\"!") +endif() + if( LLVM_REVERSE_ITERATION ) set( LLVM_ENABLE_REVERSE_ITERATION 1 ) endif() diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 2a80813999ea1e..304e22759770d9 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -475,6 +475,17 @@ enabled sub-projects. Nearly all of these variable names begin with **LLVM_ENABLE_BINDINGS**:BOOL If disabled, do not try to build the OCaml bindings. +**LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING**:STRING + Enhances Debugify's ability to detect line number errors by storing extra + information inside Instructions, removing false positives from Debugify's + results at the cost of performance. Allowed values are `DISABLED` (default), + `COVERAGE`, and `COVERAGE_AND_ORIGIN`. `COVERAGE` tracks whether and why a + line number was intentionally dropped or not generated for an instruction, + allowing Debugify to avoid reporting these as errors. `COVERAGE_AND_ORIGIN` + additionally stores a stacktrace of the point where each DebugLoc is + unintentionally dropped, allowing for much easier bug triaging at the cost of + a ~10x performance slowdown. + **LLVM_ENABLE_DIA_SDK**:BOOL Enable building with MSVC DIA SDK for PDB debugging support. Available only with MSVC. Defaults to ON. diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index ff30741c8f360a..388ce1e8f74e3e 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -19,6 +19,10 @@ /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS +/* Define to 1 to enable expensive checks for debug location coverage checking, + and to 0 otherwise. */ +#cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING + /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer backslashes. */ #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH >From abab69ae42bf5650d6a8fff5a22341ff32effe57 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Wed, 4 Sep 2024 12:23:52 +0100 Subject: [PATCH 2/3] Add conditionally-enabled DebugLocKinds --- clang/lib/CodeGen/BackendUtil.cpp | 16 +++++ llvm/include/llvm/IR/DebugLoc.h | 74 +++++++++++++++++++++- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 5 ++ llvm/lib/IR/DebugInfo.cpp | 4 +- llvm/lib/IR/DebugLoc.cpp | 16 +++++ llvm/lib/Transforms/Utils/Debugify.cpp | 19 ++++-- 6 files changed, 124 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e765bbf637a661..20653daff7d4ae 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -911,6 +911,22 @@ void EmitAssemblyHelper::RunOptimizationPipeline( Debugify.setOrigDIVerifyBugsReportFilePath( CodeGenOpts.DIBugsReportFilePath); Debugify.registerCallbacks(PIC, MAM); + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // If we're using debug location coverage tracking, mark all the + // instructions coming out of the frontend without a DebugLoc as being + // intentional line-zero locations, to prevent both those instructions and + // new instructions that inherit their location from being treated as + // incorrectly empty locations. + for (Function &F : *TheModule) { + if (!F.getSubprogram()) + continue; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (!I.getDebugLoc()) + I.setDebugLoc(DebugLoc::getLineZero()); + } +#endif } // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index c22d3e9b10d27f..ae5f9d72c97e26 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_DEBUGLOC_H #define LLVM_IR_DEBUGLOC_H +#include "llvm/Config/config.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/Support/DataTypes.h" @@ -22,6 +23,67 @@ namespace llvm { class LLVMContext; class raw_ostream; class DILocation; + class Function; + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc + // is either ordinary, containing a valid DILocation, or otherwise describing + // the reason why the DebugLoc does not contain a valid DILocation. + enum class DebugLocKind : uint8_t { + // DebugLoc is expected to contain a valid DILocation. + Normal, + // DebugLoc intentionally does not have a valid DILocation; may be for a + // compiler-generated instruction, or an explicitly dropped location. + LineZero, + // DebugLoc does not have a known or currently knowable source location, + // e.g. the attribution is ambiguous in a way that can't be represented, or + // determining the correct location is complicated and requires future + // developer effort. + Unknown, + // DebugLoc is attached to an instruction that we don't expect to be + // emitted, and so can omit a valid DILocation; we don't expect to ever try + // and emit these into the line table, and trying to do so is a sign that + // something has gone wrong (most likely a DebugLoc leaking from a transient + // compiler-generated instruction). + Temporary + }; + + // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify + // to ignore intentionally-empty DebugLocs. + class DILocAndCoverageTracking : public TrackingMDNodeRef { + public: + DebugLocKind Kind; + // Default constructor for empty DebugLocs. + DILocAndCoverageTracking() + : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {} + // Valid or nullptr MDNode*, normal DebugLocKind. + DILocAndCoverageTracking(const MDNode *Loc) + : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), + Kind(DebugLocKind::Normal) {} + DILocAndCoverageTracking(const DILocation *Loc); + // Explicit DebugLocKind, which always means a nullptr MDNode*. + DILocAndCoverageTracking(DebugLocKind Kind) + : TrackingMDNodeRef(nullptr), Kind(Kind) {} + }; + template <> struct simplify_type<DILocAndCoverageTracking> { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + template <> struct simplify_type<const DILocAndCoverageTracking> { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(const DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + + using DebugLocTrackingRef = DILocAndCoverageTracking; +#else + using DebugLocTrackingRef = TrackingMDNodeRef; +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING /// A debug info location. /// @@ -31,7 +93,8 @@ namespace llvm { /// To avoid extra includes, \a DebugLoc doubles the \a DILocation API with a /// one based on relatively opaque \a MDNode pointers. class DebugLoc { - TrackingMDNodeRef Loc; + + DebugLocTrackingRef Loc; public: DebugLoc() = default; @@ -47,6 +110,15 @@ namespace llvm { /// IR. explicit DebugLoc(const MDNode *N); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLoc(DebugLocKind Kind) : Loc(Kind) {} + DebugLocKind getKind() const { return Loc.Kind; } +#endif + + static DebugLoc getTemporary(); + static DebugLoc getUnknown(); + static DebugLoc getLineZero(); + /// Get the underlying \a DILocation. /// /// \pre !*this or \c isa<DILocation>(getAsMDNode()). diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f88653146cc6ff..4ba8262259b112 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/IR/Constants.h" @@ -2080,6 +2081,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } if (!DL) { + // FIXME: We could assert that `DL.getKind() != DebugLocKind::Temporary` + // here, or otherwise record any temporary DebugLocs seen to ensure that + // transient compiler-generated instructions aren't leaking their DLs to + // other instructions. // We have an unspecified location, which might want to be line 0. // If we have already emitted a line-0 record, don't repeat it. if (LastAsmLine == 0) diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 7fa1f9696d43b2..86ac46540c5ef9 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -979,7 +979,7 @@ void Instruction::dropLocation() { } if (!MayLowerToCall) { - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getLineZero()); return; } @@ -998,7 +998,7 @@ void Instruction::dropLocation() { // // One alternative is to set a line 0 location with the existing scope and // inlinedAt info. The location might be sensitive to when inlining occurs. - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getLineZero()); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index bdea52180f74ae..501eafd0175b7b 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -11,6 +11,22 @@ #include "llvm/IR/DebugInfo.h" using namespace llvm; +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) + : TrackingMDNodeRef(const_cast<DILocation *>(L)), + Kind(DebugLocKind::Normal) {} + +DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); } +DebugLoc DebugLoc::getUnknown() { return DebugLoc(DebugLocKind::Unknown); } +DebugLoc DebugLoc::getLineZero() { return DebugLoc(DebugLocKind::LineZero); } + +#else + +DebugLoc DebugLoc::getTemporary() { return DebugLoc(); } +DebugLoc DebugLoc::getUnknown() { return DebugLoc(); } +DebugLoc DebugLoc::getLineZero() { return DebugLoc(); } +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING + //===----------------------------------------------------------------------===// // DebugLoc Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index fcc82eadac36cf..f9f85d05ab45c5 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -292,6 +292,16 @@ bool llvm::stripDebugifyMetadata(Module &M) { return Changed; } +bool hasLoc(const Instruction &I) { + const DILocation *Loc = I.getDebugLoc().get(); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLocKind Kind = I.getDebugLoc().getKind(); + return Loc || Kind != DebugLocKind::Normal; +#else + return Loc; +#endif +} + bool llvm::collectDebugInfoMetadata(Module &M, iterator_range<Module::iterator> Functions, DebugInfoPerPass &DebugInfoBeforePass, @@ -364,9 +374,7 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - DebugInfoBeforePass.DILocations.insert({&I, HasLoc}); + DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } } @@ -609,10 +617,7 @@ bool llvm::checkDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - - DebugInfoAfterPass.DILocations.insert({&I, HasLoc}); + DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)}); } } } >From 0d750fdbba783f000947c6ba1e09afe908044ecd Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Wed, 4 Sep 2024 16:35:46 +0100 Subject: [PATCH 3/3] Add origin-tracking support for Unix in LLVM --- llvm/cmake/modules/HandleLLVMOptions.cmake | 3 +- llvm/include/llvm/Config/config.h.cmake | 4 + llvm/include/llvm/IR/DebugLoc.h | 56 ++++++++-- llvm/include/llvm/Support/Signals.h | 40 +++++++ llvm/lib/CodeGen/BranchFolding.cpp | 2 +- llvm/lib/CodeGen/BranchFolding.h | 12 +- llvm/lib/IR/DebugLoc.cpp | 22 +++- llvm/lib/IR/Instruction.cpp | 6 +- llvm/lib/Support/Signals.cpp | 116 ++++++++++++++++++++ llvm/lib/Support/Unix/Signals.inc | 15 +++ llvm/lib/Support/Windows/Signals.inc | 5 + llvm/lib/Transforms/Utils/Debugify.cpp | 77 +++++++++++-- llvm/utils/llvm-original-di-preservation.py | 22 ++-- 13 files changed, 344 insertions(+), 36 deletions(-) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index a4b11c149da9de..7f66e55dca13b1 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -201,7 +201,8 @@ string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" ) set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 ) elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" ) - message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.") + set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 ) + set( ENABLE_DEBUGLOC_ORIGIN_TRACKING 1 ) elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING ) # The DISABLED setting is default and requires no additional defines. else() diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index 388ce1e8f74e3e..7e8f1aa9474654 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -23,6 +23,10 @@ and to 0 otherwise. */ #cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING +/* Define to 1 to enable expensive tracking of the origin of debug location + coverage bugs, and to 0 otherwise. */ +#cmakedefine01 ENABLE_DEBUGLOC_ORIGIN_TRACKING + /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer backslashes. */ #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index ae5f9d72c97e26..a3e26eadb224d4 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -26,6 +26,22 @@ namespace llvm { class Function; #if ENABLE_DEBUGLOC_COVERAGE_TRACKING +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING + struct DbgLocOrigin { + static constexpr unsigned long MaxDepth = 16; + using StackTracesTy = + SmallVector<std::pair<int, std::array<void *, MaxDepth>>, 0>; + StackTracesTy StackTraces; + DbgLocOrigin(bool ShouldCollectTrace); + void addTrace(); + const StackTracesTy &getOriginStackTraces() const { return StackTraces; }; + }; +#else + struct DbgLocOrigin { + DbgLocOrigin(bool) {} + }; +#endif + // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc // is either ordinary, containing a valid DILocation, or otherwise describing // the reason why the DebugLoc does not contain a valid DILocation. @@ -48,22 +64,29 @@ namespace llvm { Temporary }; - // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify - // to ignore intentionally-empty DebugLocs. - class DILocAndCoverageTracking : public TrackingMDNodeRef { + // Extends TrackingMDNodeRef to also store a DebugLocKind and Origin, + // allowing Debugify to ignore intentionally-empty DebugLocs and display the + // code responsible for generating unintentionally-empty DebugLocs. + // Currently we only need to track the Origin of this DILoc when using a + // DebugLoc that is Normal and empty, so only collect the origin stacktrace in + // those cases. + class DILocAndCoverageTracking : public TrackingMDNodeRef, + public DbgLocOrigin { public: DebugLocKind Kind; // Default constructor for empty DebugLocs. DILocAndCoverageTracking() - : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {} - // Valid or nullptr MDNode*, normal DebugLocKind. + : TrackingMDNodeRef(nullptr), DbgLocOrigin(true), + Kind(DebugLocKind::Normal) {} + // Valid or nullptr MDNode*, normal DebugLocKind DILocAndCoverageTracking(const MDNode *Loc) - : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), + : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), DbgLocOrigin(!Loc), Kind(DebugLocKind::Normal) {} DILocAndCoverageTracking(const DILocation *Loc); - // Explicit DebugLocKind, which always means a nullptr MDNode*. + // Always nullptr MDNode*, any DebugLocKind DILocAndCoverageTracking(DebugLocKind Kind) - : TrackingMDNodeRef(nullptr), Kind(Kind) {} + : TrackingMDNodeRef(nullptr), + DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {} }; template <> struct simplify_type<DILocAndCoverageTracking> { using SimpleType = MDNode *; @@ -115,6 +138,23 @@ namespace llvm { DebugLocKind getKind() const { return Loc.Kind; } #endif +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +#if !ENABLE_DEBUGLOC_COVERAGE_TRACKING +#error Cannot enable DebugLoc origin-tracking without coverage-tracking! +#endif + + const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const { + return Loc.getOriginStackTraces(); + } + DebugLoc getCopied() const { + DebugLoc NewDL = *this; + NewDL.Loc.addTrace(); + return NewDL; + } +#else + DebugLoc getCopied() const { return *this; } +#endif + static DebugLoc getTemporary(); static DebugLoc getUnknown(); static DebugLoc getLineZero(); diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h index 70749ce30184a7..6addb8212e20ac 100644 --- a/llvm/include/llvm/Support/Signals.h +++ b/llvm/include/llvm/Support/Signals.h @@ -14,6 +14,8 @@ #ifndef LLVM_SUPPORT_SIGNALS_H #define LLVM_SUPPORT_SIGNALS_H +#include "llvm/Config/config.h" +#include <array> #include <cstdint> #include <string> @@ -21,6 +23,22 @@ namespace llvm { class StringRef; class raw_ostream; +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +// Typedefs that are convenient but only used by the StackTrace-collection code +// added if DebugLoc origin-tracking is enabled. +template <typename T, typename Enable> struct DenseMapInfo; +template <typename ValueT, typename ValueInfoT> class DenseSet; +namespace detail { +template <typename KeyT, typename ValueT> struct DenseMapPair; +} +template <typename KeyT, typename ValueT, typename KeyInfoT, typename BucketT> +class DenseMap; +using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>; +using SymbolizedAddressMap = + DenseMap<void *, std::string, DenseMapInfo<void *, void>, + detail::DenseMapPair<void *, std::string>>; +#endif + namespace sys { /// This function runs all the registered interrupt handlers, including the @@ -55,6 +73,28 @@ namespace sys { /// specified, the entire frame is printed. void PrintStackTrace(raw_ostream &OS, int Depth = 0); +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +#ifdef NDEBUG +#error DebugLoc origin-tracking should not be enabled in Release builds. +#endif + /// Populates the given array with a stacktrace of the current program, up to + /// MaxDepth frames. Returns the number of frames returned, which will be + /// inserted into \p StackTrace from index 0. All entries after the returned + /// depth will be unmodified. NB: This is only intended to be used for + /// introspection of LLVM by Debugify, will not be enabled in release builds, + /// and should not be relied on for other purposes. + template <unsigned long MaxDepth> + int getStackTrace(std::array<void *, MaxDepth> &StackTrace); + + /// Takes a set of \p Addresses, symbolizes them and stores the result in the + /// provided \p SymbolizedAddresses map. + /// NB: This is only intended to be used for introspection of LLVM by + /// Debugify, will not be enabled in release builds, and should not be relied + /// on for other purposes. + void symbolizeAddresses(AddressSet &Addresses, + SymbolizedAddressMap &SymbolizedAddresses); +#endif + // Run all registered signal handlers. void RunSignalHandlers(); diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 92a03eb52e35d9..edd60c5ad4a18d 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -915,7 +915,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { unsigned CurHash = MergePotentials.back().getHash(); - const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc(); + const DebugLoc BranchDL = MergePotentials.back().getBranchDebugLoc(); // Build SameTails, identifying the set of blocks with this hash code // and with the maximum number of instructions in common. diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index ff2bbe06c04887..9638cfda1239d1 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -50,11 +50,15 @@ class TargetRegisterInfo; class MergePotentialsElt { unsigned Hash; MachineBasicBlock *Block; - DebugLoc BranchDebugLoc; + // We use MDNode rather than DebugLoc here because under certain CMake + // options*, DebugLoc may contain a SmallVector used for introspection + // purposes, which causes errors when stored here. + // *LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING=COVERAGE_AND_ORIGIN + MDNode *BranchDebugLoc; public: - MergePotentialsElt(unsigned h, MachineBasicBlock *b, DebugLoc bdl) - : Hash(h), Block(b), BranchDebugLoc(std::move(bdl)) {} + MergePotentialsElt(unsigned h, MachineBasicBlock *b, MDNode *bdl) + : Hash(h), Block(b), BranchDebugLoc(bdl) {} unsigned getHash() const { return Hash; } MachineBasicBlock *getBlock() const { return Block; } @@ -63,7 +67,7 @@ class TargetRegisterInfo; Block = MBB; } - const DebugLoc &getBranchDebugLoc() { return BranchDebugLoc; } + const DebugLoc getBranchDebugLoc() { return DebugLoc(BranchDebugLoc); } bool operator<(const MergePotentialsElt &) const; }; diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index 501eafd0175b7b..ad02cd078df043 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -9,11 +9,31 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfo.h" + +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +#include "llvm/Support/Signals.h" + +namespace llvm { +DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) { + if (ShouldCollectTrace) { + auto &[Depth, StackTrace] = StackTraces.emplace_back(); + Depth = sys::getStackTrace(StackTrace); + } +} +void DbgLocOrigin::addTrace() { + if (StackTraces.empty()) + return; + auto &[Depth, StackTrace] = StackTraces.emplace_back(); + Depth = sys::getStackTrace(StackTrace); +} +} // namespace llvm +#endif + using namespace llvm; #if ENABLE_DEBUGLOC_COVERAGE_TRACKING DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) - : TrackingMDNodeRef(const_cast<DILocation *>(L)), + : TrackingMDNodeRef(const_cast<DILocation *>(L)), DbgLocOrigin(!L), Kind(DebugLocKind::Normal) {} DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); } diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 6f0f3f244c050c..2c0713aa886412 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -1279,6 +1279,9 @@ void Instruction::swapProfMetadata() { void Instruction::copyMetadata(const Instruction &SrcInst, ArrayRef<unsigned> WL) { + if (WL.empty() || is_contained(WL, LLVMContext::MD_dbg)) + setDebugLoc(SrcInst.getDebugLoc()); + if (!SrcInst.hasMetadata()) return; @@ -1292,8 +1295,6 @@ void Instruction::copyMetadata(const Instruction &SrcInst, if (WL.empty() || WLS.count(MD.first)) setMetadata(MD.first, MD.second); } - if (WL.empty() || WLS.count(LLVMContext::MD_dbg)) - setDebugLoc(SrcInst.getDebugLoc()); } Instruction *Instruction::clone() const { @@ -1311,5 +1312,6 @@ Instruction *Instruction::clone() const { New->SubclassOptionalData = SubclassOptionalData; New->copyMetadata(*this); + New->setDebugLoc(getDebugLoc().getCopied()); return New; } diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index 9f9030e79d1040..6825720f51e96d 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -253,6 +253,122 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, return true; } +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +void sys::symbolizeAddresses(AddressSet &Addresses, + SymbolizedAddressMap &SymbolizedAddresses) { + assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) && + "Debugify origin stacktraces require symbolization to be enabled."); + + // Convert Set of Addresses to ordered list. + SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end()); + if (AddressList.empty()) + return; + int NumAddresses = AddressList.size(); + llvm::sort(AddressList); + + // Use llvm-symbolizer tool to symbolize the stack traces. First look for it + // alongside our binary, then in $PATH. + ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code(); + if (const char *Path = getenv(LLVMSymbolizerPathEnv)) { + LLVMSymbolizerPathOrErr = sys::findProgramByName(Path); + } + if (!LLVMSymbolizerPathOrErr) + LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer"); + assert(!!LLVMSymbolizerPathOrErr && + "Debugify origin stacktraces require llvm-symbolizer."); + const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; + + // Try to guess the main executable name, since we don't have argv0 available + // here. + std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr); + + BumpPtrAllocator Allocator; + StringSaver StrPool(Allocator); + std::vector<const char *> Modules(NumAddresses, nullptr); + std::vector<intptr_t> Offsets(NumAddresses, 0); + if (!findModulesAndOffsets(AddressList.data(), NumAddresses, Modules.data(), + Offsets.data(), MainExecutableName.c_str(), + StrPool)) + return; + int InputFD; + SmallString<32> InputFile, OutputFile; + sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile); + sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile); + FileRemover InputRemover(InputFile.c_str()); + FileRemover OutputRemover(OutputFile.c_str()); + + { + raw_fd_ostream Input(InputFD, true); + for (int i = 0; i < NumAddresses; i++) { + if (Modules[i]) + Input << Modules[i] << " " << (void *)Offsets[i] << "\n"; + } + } + + std::optional<StringRef> Redirects[] = {InputFile.str(), OutputFile.str(), + StringRef("")}; + StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining", +#ifdef _WIN32 + // Pass --relative-address on Windows so that we don't + // have to add ImageBase from PE file. + // FIXME: Make this the default for llvm-symbolizer. + "--relative-address", +#endif + "--demangle"}; + int RunResult = + sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects); + if (RunResult != 0) + return; + + // This report format is based on the sanitizer stack trace printer. See + // sanitizer_stacktrace_printer.cc in compiler-rt. + auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return; + StringRef Output = OutputBuf.get()->getBuffer(); + SmallVector<StringRef, 32> Lines; + Output.split(Lines, "\n"); + auto CurLine = Lines.begin(); + for (int i = 0; i < NumAddresses; i++) { + assert(!SymbolizedAddresses.contains(AddressList[i])); + std::string &SymbolizedAddr = SymbolizedAddresses[AddressList[i]]; + raw_string_ostream OS(SymbolizedAddr); + if (!Modules[i]) { + OS << format_ptr(AddressList[i]) << '\n'; + continue; + } + // Read pairs of lines (function name and file/line info) until we + // encounter empty line. + for (bool IsFirst = true;; IsFirst = false) { + if (CurLine == Lines.end()) + return; + StringRef FunctionName = *CurLine++; + if (FunctionName.empty()) + break; + // Add indentation for lines after the first; we use 3 spaces, because + // currently that aligns with the expected indentation that will be added + // to the first line by Debugify. + if (!IsFirst) + OS << " "; + OS << format_ptr(AddressList[i]) << ' '; + if (!FunctionName.starts_with("??")) + OS << FunctionName << ' '; + if (CurLine == Lines.end()) { + OS << '\n'; + return; + } + StringRef FileLineInfo = *CurLine++; + if (!FileLineInfo.starts_with("??")) + OS << FileLineInfo; + else + OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")"; + OS << '\n'; + } + } + return; +} +#endif + static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName); LLVM_ATTRIBUTE_USED diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 298fde1a387cc5..d5b11a08ead835 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -499,6 +499,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { return 0; } +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +#if !defined(HAVE_BACKTRACE) +#error DebugLoc origin-tracking currently requires `backtrace()`. +#endif +namespace llvm { +namespace sys { +template <unsigned long MaxDepth> +int getStackTrace(std::array<void *, MaxDepth> &StackTrace) { + return backtrace(StackTrace.data(), MaxDepth); +} +template int getStackTrace<16ul>(std::array<void *, 16ul> &); +} // namespace sys +} // namespace llvm +#endif + /// If this is an ELF platform, we can find all loaded modules and their virtual /// addresses with dl_iterate_phdr. static bool findModulesAndOffsets(void **StackTrace, int Depth, diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index 29ebf7c696e04f..c35ed744244d51 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -9,6 +9,7 @@ // This file provides the Win32 specific implementation of the Signals class. // //===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ExitCodes.h" #include "llvm/Support/FileSystem.h" @@ -538,6 +539,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0, extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord); #endif +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +#error DebugLoc origin-tracking currently unimplemented for Windows. +#endif + static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) { STACKFRAME64 StackFrame{}; CONTEXT Context{}; diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index f9f85d05ab45c5..3467f3482a1e62 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -15,7 +15,10 @@ #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/InstIterator.h" @@ -28,6 +31,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include <optional> +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +// We need the Signals header to operate on stacktraces if we're using DebugLoc +// origin-tracking. +#include "llvm/Support/Signals.h" +#endif #define DEBUG_TYPE "debugify" @@ -57,6 +65,49 @@ cl::opt<Level> DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. +static DenseMap<void *, std::string> SymbolizedAddrs; +static DenseSet<void *> UnsymbolizedAddrs; + +std::string symbolizeStackTrace(const Instruction *I) { + // We flush the set of unsymbolized addresses at the latest possible moment, + // i.e. now. + if (!UnsymbolizedAddrs.empty()) { + sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs); + UnsymbolizedAddrs.clear(); + } + auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces(); + std::string Result; + raw_string_ostream OS(Result); + for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) { + if (TraceIdx != 0) + OS << "========================================\n"; + auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx]; + for (int Frame = 0; Frame < Depth; ++Frame) { + assert(SymbolizedAddrs.contains(StackTrace[Frame]) && + "Expected each address to have been symbolized."); + OS << right_justify(formatv("#{0}", Frame).str(), std::log10(Depth) + 2) + << ' ' << SymbolizedAddrs[StackTrace[Frame]]; + } + } + return Result; +} +void collectStackAddresses(Instruction &I) { + auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces(); + for (auto &[Depth, StackTrace] : OriginStackTraces) { + for (int Frame = 0; Frame < Depth; ++Frame) { + void *Addr = StackTrace[Frame]; + if (!SymbolizedAddrs.contains(Addr)) + UnsymbolizedAddrs.insert(Addr); + } + } +} +#else +void collectStackAddresses(Instruction &I) {} +#endif // ENABLE_DEBUGLOC_ORIGIN_TRACKING + uint64_t getAllocSizeInBits(Module &M, Type *Ty) { return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0; } @@ -374,6 +425,8 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); + // Track the addresses to symbolize, if the feature is enabled. + collectStackAddresses(I); DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } @@ -449,14 +502,20 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, auto BBName = BB->hasName() ? BB->getName() : "no-name"; auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); + auto CreateJSONBugEntry = [&](const char *Action) { + Bugs.push_back(llvm::json::Object({ + {"metadata", "DILocation"}, {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, {"instr", InstName}, {"action", Action}, +#if ENABLE_DEBUGLOC_ORIGIN_TRACKING + {"origin", symbolizeStackTrace(Instr)}, +#endif + })); + }; + auto InstrIt = DILocsBefore.find(Instr); if (InstrIt == DILocsBefore.end()) { if (ShouldWriteIntoJSON) - Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "not-generate"}})); + CreateJSONBugEntry("not-generate"); else dbg() << "WARNING: " << NameOfWrappedPass << " did not generate DILocation for " << *Instr @@ -469,11 +528,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, // If the instr had the !dbg attached before the pass, consider it as // a debug info issue. if (ShouldWriteIntoJSON) - Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "drop"}})); + CreateJSONBugEntry("drop"); else dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of " << *Instr << " (BB: " << BBName << ", Fn: " << FnName @@ -617,6 +672,8 @@ bool llvm::checkDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); + // Track the addresses to symbolize, if the feature is enabled. + collectStackAddresses(I); DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)}); } } diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py index dc1fa518ca8e6d..a8c12252d972ca 100755 --- a/llvm/utils/llvm-original-di-preservation.py +++ b/llvm/utils/llvm-original-di-preservation.py @@ -13,14 +13,15 @@ class DILocBug: - def __init__(self, action, bb_name, fn_name, instr): + def __init__(self, origin, action, bb_name, fn_name, instr): + self.origin = origin self.action = action self.bb_name = bb_name self.fn_name = fn_name self.instr = instr def __str__(self): - return self.action + self.bb_name + self.fn_name + self.instr + return self.action + self.bb_name + self.fn_name + self.instr + self.origin class DISPBug: @@ -86,6 +87,7 @@ def generate_html_report( "Function Name", "Basic Block Name", "Action", + "Origin", ] for column in header_di_loc: @@ -112,6 +114,7 @@ def generate_html_report( row.append(x.fn_name) row.append(x.bb_name) row.append(x.action) + row.append(f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>") row.append(" </tr>\n") # Dump the bugs info into the table. for column in row: @@ -428,9 +431,9 @@ def Main(): sys.exit(1) # Use the defaultdict in order to make multidim dicts. - di_location_bugs = defaultdict(lambda: defaultdict(dict)) - di_subprogram_bugs = defaultdict(lambda: defaultdict(dict)) - di_variable_bugs = defaultdict(lambda: defaultdict(dict)) + di_location_bugs = defaultdict(lambda: defaultdict(list)) + di_subprogram_bugs = defaultdict(lambda: defaultdict(list)) + di_variable_bugs = defaultdict(lambda: defaultdict(list)) # Use the ordered dict to make a summary. di_location_bugs_summary = OrderedDict() @@ -470,9 +473,9 @@ def Main(): skipped_lines += 1 continue - di_loc_bugs = [] - di_sp_bugs = [] - di_var_bugs = [] + di_loc_bugs = di_location_bugs[bugs_file][bugs_pass] + di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass] + di_var_bugs = di_variable_bugs[bugs_file][bugs_pass] # Omit duplicated bugs. di_loc_set = set() @@ -487,6 +490,7 @@ def Main(): if bugs_metadata == "DILocation": try: + origin = bug["origin"] action = bug["action"] bb_name = bug["bb-name"] fn_name = bug["fn-name"] @@ -494,7 +498,7 @@ def Main(): except: skipped_bugs += 1 continue - di_loc_bug = DILocBug(action, bb_name, fn_name, instr) + di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr) if not str(di_loc_bug) in di_loc_set: di_loc_set.add(str(di_loc_bug)) if opts.compress: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits