https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143594
>From e2ff01bc95a78c4372bdf538f0433dc882c070f8 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Tue, 10 Jun 2025 20:02:36 +0100 Subject: [PATCH] [DLCov] Origin-Tracking: Add debugify support --- llvm/lib/Transforms/Utils/Debugify.cpp | 83 ++++++++++++++++++--- llvm/utils/llvm-original-di-preservation.py | 24 +++--- 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index 5f70bc442d2f0..e8ed55a99546e 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -15,7 +15,10 @@ #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/InstIterator.h" @@ -28,6 +31,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include <optional> +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// We need the Signals header to operate on stacktraces if we're using DebugLoc +// origin-tracking. +#include "llvm/Support/Signals.h" +#endif #define DEBUG_TYPE "debugify" @@ -59,6 +67,52 @@ cl::opt<Level> DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. +static DenseMap<void *, SmallVector<std::string, 0>> SymbolizedAddrs; +static DenseSet<void *> UnsymbolizedAddrs; + +std::string symbolizeStackTrace(const Instruction *I) { + // We flush the set of unsymbolized addresses at the latest possible moment, + // i.e. now. + if (!UnsymbolizedAddrs.empty()) { + sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs); + UnsymbolizedAddrs.clear(); + } + auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces(); + std::string Result; + raw_string_ostream OS(Result); + for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) { + if (TraceIdx != 0) + OS << "========================================\n"; + auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx]; + unsigned VirtualFrameNo = 0; + for (int Frame = 0; Frame < Depth; ++Frame) { + assert(SymbolizedAddrs.contains(StackTrace[Frame]) && + "Expected each address to have been symbolized."); + for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) { + OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2) + << ' ' << SymbolizedFrame << '\n'; + } + } + } + return Result; +} +void collectStackAddresses(Instruction &I) { + auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces(); + for (auto &[Depth, StackTrace] : OriginStackTraces) { + for (int Frame = 0; Frame < Depth; ++Frame) { + void *Addr = StackTrace[Frame]; + if (!SymbolizedAddrs.contains(Addr)) + UnsymbolizedAddrs.insert(Addr); + } + } +} +#else +void collectStackAddresses(Instruction &I) {} +#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + uint64_t getAllocSizeInBits(Module &M, Type *Ty) { return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0; } @@ -375,6 +429,8 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); + // Track the addresses to symbolize, if the feature is enabled. + collectStackAddresses(I); DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } @@ -450,14 +506,23 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, auto BBName = BB->hasName() ? BB->getName() : "no-name"; auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); + auto CreateJSONBugEntry = [&](const char *Action) { + Bugs.push_back(llvm::json::Object({ + {"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr", InstName}, + {"action", Action}, +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN + {"origin", symbolizeStackTrace(Instr)}, +#endif + })); + }; + auto InstrIt = DILocsBefore.find(Instr); if (InstrIt == DILocsBefore.end()) { if (ShouldWriteIntoJSON) - Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "not-generate"}})); + CreateJSONBugEntry("not-generate"); else dbg() << "WARNING: " << NameOfWrappedPass << " did not generate DILocation for " << *Instr @@ -470,11 +535,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, // If the instr had the !dbg attached before the pass, consider it as // a debug info issue. if (ShouldWriteIntoJSON) - Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "drop"}})); + CreateJSONBugEntry("drop"); else dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of " << *Instr << " (BB: " << BBName << ", Fn: " << FnName @@ -612,6 +673,8 @@ bool llvm::checkDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); + // Track the addresses to symbolize, if the feature is enabled. + collectStackAddresses(I); DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)}); } } diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py index dc1fa518ca8e6..680b75691526c 100755 --- a/llvm/utils/llvm-original-di-preservation.py +++ b/llvm/utils/llvm-original-di-preservation.py @@ -13,14 +13,15 @@ class DILocBug: - def __init__(self, action, bb_name, fn_name, instr): + def __init__(self, origin, action, bb_name, fn_name, instr): + self.origin = origin self.action = action self.bb_name = bb_name self.fn_name = fn_name self.instr = instr def __str__(self): - return self.action + self.bb_name + self.fn_name + self.instr + return self.action + self.bb_name + self.fn_name + self.instr + self.origin class DISPBug: @@ -86,6 +87,7 @@ def generate_html_report( "Function Name", "Basic Block Name", "Action", + "Origin", ] for column in header_di_loc: @@ -112,6 +114,9 @@ def generate_html_report( row.append(x.fn_name) row.append(x.bb_name) row.append(x.action) + row.append( + f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>" + ) row.append(" </tr>\n") # Dump the bugs info into the table. for column in row: @@ -428,9 +433,9 @@ def Main(): sys.exit(1) # Use the defaultdict in order to make multidim dicts. - di_location_bugs = defaultdict(lambda: defaultdict(dict)) - di_subprogram_bugs = defaultdict(lambda: defaultdict(dict)) - di_variable_bugs = defaultdict(lambda: defaultdict(dict)) + di_location_bugs = defaultdict(lambda: defaultdict(list)) + di_subprogram_bugs = defaultdict(lambda: defaultdict(list)) + di_variable_bugs = defaultdict(lambda: defaultdict(list)) # Use the ordered dict to make a summary. di_location_bugs_summary = OrderedDict() @@ -470,9 +475,9 @@ def Main(): skipped_lines += 1 continue - di_loc_bugs = [] - di_sp_bugs = [] - di_var_bugs = [] + di_loc_bugs = di_location_bugs[bugs_file][bugs_pass] + di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass] + di_var_bugs = di_variable_bugs[bugs_file][bugs_pass] # Omit duplicated bugs. di_loc_set = set() @@ -487,6 +492,7 @@ def Main(): if bugs_metadata == "DILocation": try: + origin = bug["origin"] action = bug["action"] bb_name = bug["bb-name"] fn_name = bug["fn-name"] @@ -494,7 +500,7 @@ def Main(): except: skipped_bugs += 1 continue - di_loc_bug = DILocBug(action, bb_name, fn_name, instr) + di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr) if not str(di_loc_bug) in di_loc_set: di_loc_set.add(str(di_loc_bug)) if opts.compress: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits