https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/107279
>From 55115af37710a80abbc2ec2ca5d96d1eea5827b4 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Wed, 4 Sep 2024 12:23:52 +0100 Subject: [PATCH 1/4] Add conditionally-enabled DebugLocKinds --- clang/lib/CodeGen/BackendUtil.cpp | 16 ++++ llvm/docs/HowToUpdateDebugInfo.rst | 36 +++++++++ llvm/include/llvm/IR/DebugLoc.h | 88 +++++++++++++++++++++- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 5 ++ llvm/lib/IR/DebugInfo.cpp | 8 +- llvm/lib/IR/DebugLoc.cpp | 6 ++ llvm/lib/Transforms/Utils/Debugify.cpp | 19 +++-- 7 files changed, 167 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7557cb8408921..20504b0e0868e 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -961,6 +961,22 @@ void EmitAssemblyHelper::RunOptimizationPipeline( Debugify.setOrigDIVerifyBugsReportFilePath( CodeGenOpts.DIBugsReportFilePath); Debugify.registerCallbacks(PIC, MAM); + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // If we're using debug location coverage tracking, mark all the + // instructions coming out of the frontend without a DebugLoc as being + // compiler-generated, to prevent both those instructions and new + // instructions that inherit their location from being treated as + // incorrectly empty locations. + for (Function &F : *TheModule) { + if (!F.getSubprogram()) + continue; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (!I.getDebugLoc()) + I.setDebugLoc(DebugLoc::getCompilerGenerated()); + } +#endif } // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 3088f59c1066a..0e17813ea6534 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -169,6 +169,42 @@ See the discussion in the section about :ref:`merging locations<WhenToMergeLocation>` for examples of when the rule for dropping locations applies. +.. _NewInstLocations: + +Setting locations for new instructions +------------------------------------ + +Whenever a new instruction is created and there is no suitable location for that +instruction, that location should be annotated accordingly. There are a set of +special ``DebugLoc`` values that can be used to indicate the reason that a new +instruction does not have a valid location. These are as follows: + +- ``DebugLoc::getCompilerGenerated()``: This indicates that the instruction is a +compiler-generated instruction, i.e. it is not associated with any user source +code. +- ``DebugLoc::getDropped()``: This indicates that the instruction has +intentionally had its source location removed, according to the rules for +dropping locations; this is set automatically by +``Instruction::dropLocation()``. +- ``DebugLoc::getUnknown()``: This indicates that the instruction does not have +a known or currently knowable source location, e.g. the attribution is ambiguous +in a way that can't currently be represented in LLVM, or that it is otherwise +infeasible to determine or track the correct source location. +- ``DebugLoc::getTemporary()``: This is used for instructions that we don't +expect to be emitted (e.g. ``UnreachableInst``), and so should not need a valid +location; if we ever try to emit a temporary location into an object file, this +indicates that something has gone wrong. + +Where applicable, these should be used instead of leaving an instruction without +an assigned location or explicitly setting the location as ``DebugLoc()``. +Ordinarily these special locations are ignored by the compiler, but some testing +builds will track their use in order to detect missing locations; for this +reason, the most important rule is to *not* apply any of these if it isn't clear +which, if any, is appropriate - an absent location can be detected and fixed, +while an incorrectly annotated instruction is much harder to detect. On the +other hand, if any of these clearly apply, then they should be used to prevent +false positives from being flagged up. + Rules for updating debug values =============================== diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index c22d3e9b10d27..fb205aa93a443 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_DEBUGLOC_H #define LLVM_IR_DEBUGLOC_H +#include "llvm/Config/config.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/Support/DataTypes.h" @@ -22,6 +23,73 @@ namespace llvm { class LLVMContext; class raw_ostream; class DILocation; + class Function; + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // Used to represent different "kinds" of DebugLoc, expressing that the + // instruction it is part of is either normal and should contain a valid + // DILocation, or otherwise describing the reason why the instruction does + // not contain a valid DILocation. + enum class DebugLocKind : uint8_t { + // The instruction is expected to contain a valid DILocation. + Normal, + // The instruction is compiler-generated, i.e. it is not associated with any + // line in the original source. + CompilerGenerated, + // The instruction has intentionally had its source location removed, + // typically because it was moved outside of its original control-flow and + // presenting the prior source location would be misleading for debuggers + // or profilers. + Dropped, + // The instruction does not have a known or currently knowable source + // location, e.g. the attribution is ambiguous in a way that can't be + // represented, or determining the correct location is complicated and + // requires future developer effort. + Unknown, + // DebugLoc is attached to an instruction that we don't expect to be + // emitted, and so can omit a valid DILocation; we don't expect to ever try + // and emit these into the line table, and trying to do so is a sign that + // something has gone wrong (most likely a DebugLoc leaking from a transient + // compiler-generated instruction). + Temporary + }; + + // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify + // to ignore intentionally-empty DebugLocs. + class DILocAndCoverageTracking : public TrackingMDNodeRef { + public: + DebugLocKind Kind; + // Default constructor for empty DebugLocs. + DILocAndCoverageTracking() + : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {} + // Valid or nullptr MDNode*, normal DebugLocKind. + DILocAndCoverageTracking(const MDNode *Loc) + : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), + Kind(DebugLocKind::Normal) {} + DILocAndCoverageTracking(const DILocation *Loc); + // Explicit DebugLocKind, which always means a nullptr MDNode*. + DILocAndCoverageTracking(DebugLocKind Kind) + : TrackingMDNodeRef(nullptr), Kind(Kind) {} + }; + template <> struct simplify_type<DILocAndCoverageTracking> { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + template <> struct simplify_type<const DILocAndCoverageTracking> { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(const DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + + using DebugLocTrackingRef = DILocAndCoverageTracking; +#else + using DebugLocTrackingRef = TrackingMDNodeRef; +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING /// A debug info location. /// @@ -31,7 +99,8 @@ namespace llvm { /// To avoid extra includes, \a DebugLoc doubles the \a DILocation API with a /// one based on relatively opaque \a MDNode pointers. class DebugLoc { - TrackingMDNodeRef Loc; + + DebugLocTrackingRef Loc; public: DebugLoc() = default; @@ -47,6 +116,23 @@ namespace llvm { /// IR. explicit DebugLoc(const MDNode *N); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLoc(DebugLocKind Kind) : Loc(Kind) {} + DebugLocKind getKind() const { return Loc.Kind; } +#endif + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + static inline DebugLoc getTemporary() { return DebugLoc(DebugLocKind::Temporary); } + static inline DebugLoc getUnknown() { return DebugLoc(DebugLocKind::Unknown); } + static inline DebugLoc getCompilerGenerated() { return DebugLoc(DebugLocKind::CompilerGenerated); } + static inline DebugLoc getDropped() { return DebugLoc(DebugLocKind::Dropped); } +#else + static inline DebugLoc getTemporary() { return DebugLoc(); } + static inline DebugLoc getUnknown() { return DebugLoc(); } + static inline DebugLoc getCompilerGenerated() { return DebugLoc(); } + static inline DebugLoc getDropped() { return DebugLoc(); } +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING + /// Get the underlying \a DILocation. /// /// \pre !*this or \c isa<DILocation>(getAsMDNode()). diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 39f1299a24e81..44aeb06780f1d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/IR/Constants.h" @@ -2097,6 +2098,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } if (!DL) { + // FIXME: We could assert that `DL.getKind() != DebugLocKind::Temporary` + // here, or otherwise record any temporary DebugLocs seen to ensure that + // transient compiler-generated instructions aren't leaking their DLs to + // other instructions. // We have an unspecified location, which might want to be line 0. // If we have already emitted a line-0 record, don't repeat it. if (LastAsmLine == 0) diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index cefd6fe14a3ac..404569275e34f 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -987,8 +987,10 @@ void Instruction::updateLocationAfterHoist() { dropLocation(); } void Instruction::dropLocation() { const DebugLoc &DL = getDebugLoc(); - if (!DL) + if (!DL) { + setDebugLoc(DebugLoc::getDropped()); return; + } // If this isn't a call, drop the location to allow a location from a // preceding instruction to propagate. @@ -1000,7 +1002,7 @@ void Instruction::dropLocation() { } if (!MayLowerToCall) { - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getDropped()); return; } @@ -1019,7 +1021,7 @@ void Instruction::dropLocation() { // // One alternative is to set a line 0 location with the existing scope and // inlinedAt info. The location might be sensitive to when inlining occurs. - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getDropped()); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index bdea52180f74a..b05d0f6096bda 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -11,6 +11,12 @@ #include "llvm/IR/DebugInfo.h" using namespace llvm; +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) + : TrackingMDNodeRef(const_cast<DILocation *>(L)), + Kind(DebugLocKind::Normal) {} +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING + //===----------------------------------------------------------------------===// // DebugLoc Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index e6b5e267d192b..e5a7415431f56 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -290,6 +290,16 @@ bool llvm::stripDebugifyMetadata(Module &M) { return Changed; } +bool hasLoc(const Instruction &I) { + const DILocation *Loc = I.getDebugLoc().get(); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLocKind Kind = I.getDebugLoc().getKind(); + return Loc || Kind != DebugLocKind::Normal; +#else + return Loc; +#endif +} + bool llvm::collectDebugInfoMetadata(Module &M, iterator_range<Module::iterator> Functions, DebugInfoPerPass &DebugInfoBeforePass, @@ -362,9 +372,7 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - DebugInfoBeforePass.DILocations.insert({&I, HasLoc}); + DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } } @@ -607,10 +615,7 @@ bool llvm::checkDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - - DebugInfoAfterPass.DILocations.insert({&I, HasLoc}); + DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)}); } } } >From 057bbad662e4bdc55847a776a0ac0298472ed892 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Fri, 11 Apr 2025 11:20:18 +0100 Subject: [PATCH 2/4] clang-format --- llvm/include/llvm/IR/DebugLoc.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index fb205aa93a443..9f1dafa8b71d9 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -122,10 +122,18 @@ namespace llvm { #endif #if ENABLE_DEBUGLOC_COVERAGE_TRACKING - static inline DebugLoc getTemporary() { return DebugLoc(DebugLocKind::Temporary); } - static inline DebugLoc getUnknown() { return DebugLoc(DebugLocKind::Unknown); } - static inline DebugLoc getCompilerGenerated() { return DebugLoc(DebugLocKind::CompilerGenerated); } - static inline DebugLoc getDropped() { return DebugLoc(DebugLocKind::Dropped); } + static inline DebugLoc getTemporary() { + return DebugLoc(DebugLocKind::Temporary); + } + static inline DebugLoc getUnknown() { + return DebugLoc(DebugLocKind::Unknown); + } + static inline DebugLoc getCompilerGenerated() { + return DebugLoc(DebugLocKind::CompilerGenerated); + } + static inline DebugLoc getDropped() { + return DebugLoc(DebugLocKind::Dropped); + } #else static inline DebugLoc getTemporary() { return DebugLoc(); } static inline DebugLoc getUnknown() { return DebugLoc(); } >From 6489ef292310394a8ed80de64219a8cff6dc9ff8 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Thu, 17 Apr 2025 16:49:43 +0100 Subject: [PATCH 3/4] Fix underline length --- llvm/docs/HowToUpdateDebugInfo.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 0e17813ea6534..0a77fdfd82612 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -172,7 +172,7 @@ dropping locations applies. .. _NewInstLocations: Setting locations for new instructions ------------------------------------- +-------------------------------------- Whenever a new instruction is created and there is no suitable location for that instruction, that location should be annotated accordingly. There are a set of >From 7f87542f4351dcf80d40101015a63450c5b84613 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Thu, 17 Apr 2025 17:16:47 +0100 Subject: [PATCH 4/4] Fix bullet list formatting --- llvm/docs/HowToUpdateDebugInfo.rst | 33 ++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 0a77fdfd82612..75a88193a1f85 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -179,21 +179,24 @@ instruction, that location should be annotated accordingly. There are a set of special ``DebugLoc`` values that can be used to indicate the reason that a new instruction does not have a valid location. These are as follows: -- ``DebugLoc::getCompilerGenerated()``: This indicates that the instruction is a -compiler-generated instruction, i.e. it is not associated with any user source -code. -- ``DebugLoc::getDropped()``: This indicates that the instruction has -intentionally had its source location removed, according to the rules for -dropping locations; this is set automatically by -``Instruction::dropLocation()``. -- ``DebugLoc::getUnknown()``: This indicates that the instruction does not have -a known or currently knowable source location, e.g. the attribution is ambiguous -in a way that can't currently be represented in LLVM, or that it is otherwise -infeasible to determine or track the correct source location. -- ``DebugLoc::getTemporary()``: This is used for instructions that we don't -expect to be emitted (e.g. ``UnreachableInst``), and so should not need a valid -location; if we ever try to emit a temporary location into an object file, this -indicates that something has gone wrong. +* ``DebugLoc::getCompilerGenerated()``: This indicates that the instruction is a + compiler-generated instruction, i.e. it is not associated with any user source + code. + +* ``DebugLoc::getDropped()``: This indicates that the instruction has + intentionally had its source location removed, according to the rules for + dropping locations; this is set automatically by + ``Instruction::dropLocation()``. + +* ``DebugLoc::getUnknown()``: This indicates that the instruction does not have + a known or currently knowable source location, e.g. the attribution is ambiguous + in a way that can't currently be represented in LLVM, or that it is otherwise + infeasible to determine or track the correct source location. + +* ``DebugLoc::getTemporary()``: This is used for instructions that we don't + expect to be emitted (e.g. ``UnreachableInst``), and so should not need a valid + location; if we ever try to emit a temporary location into an object file, this + indicates that something has gone wrong. Where applicable, these should be used instead of leaving an instruction without an assigned location or explicitly setting the location as ``DebugLoc()``. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits