https://github.com/VitaNuo updated https://github.com/llvm/llvm-project/pull/133910
>From 6d61256a73c64ef4351266d6ef76067f5a2568db Mon Sep 17 00:00:00 2001 From: Viktoriia Bakalova <bakal...@google.com> Date: Fri, 4 Apr 2025 13:40:55 +0200 Subject: [PATCH] [WIP] Implement -dump-deserialized-declaration-ranges flag to dump source ranges of deserialized decls. --- clang/include/clang/Driver/Options.td | 4 + .../include/clang/Frontend/FrontendOptions.h | 3 + clang/lib/Frontend/FrontendAction.cpp | 161 +++++++++++++++++- .../dump-deserialized-declaration-ranges.cpp | 80 +++++++++ 4 files changed, 243 insertions(+), 5 deletions(-) create mode 100644 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3af072242d039..1737e40b776e1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"], "print-dependency-directives-minimized-source">, HelpText<"Print the output of the dependency directives source minimizer">; } +def dump_deserialized_declaration_ranges : Joined<["-"], + "dump-deserialized-declaration-ranges=">, + HelpText<"Dump ranges of deserialized declarations to aid debugging and minimization">, + MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>; defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists", CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index a9c9849ff52ab..8ef9ce9db8783 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -530,6 +530,9 @@ class FrontendOptions { /// Output Path for module output file. std::string ModuleOutputPath; + /// Output path to dump ranges of deserialized declarations. + std::string DumpDeserializedDeclarationRangesPath; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 2d77f06be7446..f98aa5ab1fe51 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/FileEntry.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/Sarif.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/Stack.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" @@ -35,6 +36,7 @@ #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/GlobalModuleIndex.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -49,6 +51,144 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry) namespace { +/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations to aid debugging and bug minimization. +/// It implements ASTConsumer and ASTDeserializationListener, so that an object of DeserializedDeclsLineRangePrinter registers +/// as its own listener. +/// The ASTDeserializationListener interface provides the DeclRead callback that we use to collect the deserialized Decls. +/// Note that printing or otherwise processing them as this point is dangerous, since that could trigger additional +/// deserialization and crash compilation. +/// Therefore, we process the collected Decls in HandleTranslationUnit method of ASTConsumer. +/// This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been +/// deserialized. In case our processing causes further deserialization, DeclRead from the listener might be called again. +/// However, at that point we don't accept any more Decls for processing. +class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, public ASTConsumer { +public: + explicit DeserializedDeclsLineRangePrinter(SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS) + : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {} + + void DeclRead(GlobalDeclID ID, const Decl *D) override { + if (!IsCollectingDecls) { + return; + } + if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) || + isa<NamespaceDecl>(D)) + return; + if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) + return; + PendingDecls.push_back(D); + ASTDeserializationListener::DeclRead(ID, D); + } + + using Position = std::pair<unsigned, unsigned>; + struct RequiredRanges { + StringRef Filename; + std::vector<std::pair<Position, Position>> FromTo; + }; + void HandleTranslationUnit(ASTContext &Context) override { + IsCollectingDecls = false; + std::vector<const Decl *> Decls = std::move(PendingDecls); + if (!PendingDecls.empty()) { + llvm::errs() << "Deserialized more decls while printing, total of " + << PendingDecls.size() << "\n"; + PendingDecls.clear(); + } + + // Merge ranges in each of the files. For simplicity, track lines and hope + // they do not break things. + struct FileData { + std::vector<std::pair<Position, Position>> FromTo; + std::vector<std::pair<unsigned, unsigned>> Columns; + OptionalFileEntryRef Ref; + }; + llvm::DenseMap<const FileEntry *, FileData> FileToLines; + for (const Decl *D : Decls) { + CharSourceRange R = SM.getExpansionRange(D->getSourceRange()); + if (!R.isValid()) + continue; + + auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); + if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) + continue; + + auto &Data = FileToLines[F]; + if (!Data.Ref) + Data.Ref = + SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); + Data.FromTo.push_back({{SM.getSpellingLineNumber(R.getBegin()), SM.getSpellingColumnNumber(R.getBegin())}, + {SM.getSpellingLineNumber(R.getEnd()), SM.getSpellingColumnNumber(R.getEnd())}}); + } + + std::vector<RequiredRanges> Result; + for (auto &[F, Data] : FileToLines) { + auto& FromTo = Data.FromTo; + assert(!FromTo.empty()); + + if (!Data.Ref) continue; + + llvm::sort(FromTo); + + std::vector<std::pair<Position, Position>> MergedLines; + MergedLines.push_back(FromTo.front()); + for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) { + if (MergedLines.back().second < It->first) { + MergedLines.push_back(*It); + continue; + } + if (MergedLines.back().second < It->second) + MergedLines.back().second = It->second; + } + Result.push_back({Data.Ref->getName(), MergedLines}); + } + printJson(Result); + } + + void printJson(const std::vector<RequiredRanges>& Result) { + *OS << "{\n"; + *OS << " \"required_ranges\": [\n"; + for (size_t i = 0; i < Result.size(); ++i) { + auto &F = Result[i].Filename; + auto &MergedLines = Result[i].FromTo; + *OS << " {\n"; + *OS << " \"file\": \"" << F << "\",\n"; + *OS << " \"range\": [\n"; + for (size_t j = 0; j < MergedLines.size(); ++j) { + auto &From = MergedLines[j].first; + auto &To = MergedLines[j].second; + *OS << " {\n"; + *OS << " \"from\": {\n"; + *OS << " \"line\": " << From.first << ",\n"; + *OS << " \"column\": " << From.second << "\n },\n"; + *OS << " \"to\": {\n"; + *OS << " \"line\": " << To.first << ",\n"; + *OS << " \"column\": " << To.second << "\n }\n"; + *OS << " }"; + if (j < MergedLines.size() - 1) { + *OS << ","; + } + *OS << "\n"; + } + *OS << " ]\n }"; + if (i < Result.size() - 1) { + *OS << ","; + } + *OS << "\n"; + } + *OS << " ]\n"; + *OS << "}\n"; + } + + ASTDeserializationListener *GetASTDeserializationListener() override { + return this; + } + +private: +std::vector<const Decl *> PendingDecls; +bool IsCollectingDecls = true; +const SourceManager &SM; +std::unique_ptr<llvm::raw_ostream> OS; +}; + + /// Dumps deserialized declarations. class DeserializedDeclsDumper : public DelegatingDeserializationListener { public: @@ -121,6 +261,19 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, if (!Consumer) return nullptr; + std::vector<std::unique_ptr<ASTConsumer>> Consumers; + llvm::StringRef DumpDeserializedDeclarationRangesPath = CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath; + if (!DumpDeserializedDeclarationRangesPath.empty()) { + std::error_code ErrorCode; + auto FileStream = std::make_unique<llvm::raw_fd_ostream>(DumpDeserializedDeclarationRangesPath, ErrorCode, llvm::sys::fs::OF_None); + if (!ErrorCode) { + auto Printer = std::make_unique<DeserializedDeclsLineRangePrinter>(CI.getSourceManager(), std::move(FileStream)); + Consumers.push_back(std::move(Printer)); + } else { + llvm::errs() << "Failed to create output file for -dump-deserialized-declaration-ranges flag, file path: " << DumpDeserializedDeclarationRangesPath << ", error: " << ErrorCode.message() << "\n"; + } + } + // Validate -add-plugin args. bool FoundAllPlugins = true; for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) { @@ -138,17 +291,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, if (!FoundAllPlugins) return nullptr; - // If there are no registered plugins we don't need to wrap the consumer - if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end()) - return Consumer; - // If this is a code completion run, avoid invoking the plugin consumers if (CI.hasCodeCompletionConsumer()) return Consumer; // Collect the list of plugins that go before the main action (in Consumers) // or after it (in AfterConsumers) - std::vector<std::unique_ptr<ASTConsumer>> Consumers; std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers; for (const FrontendPluginRegistry::entry &Plugin : FrontendPluginRegistry::entries()) { @@ -191,6 +339,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, Consumers.push_back(std::move(C)); } + assert(Consumers.size() >= 1 && "should have added the main consumer"); + if (Consumers.size() == 1) + return std::move(Consumers.front()); return std::make_unique<MultiplexConsumer>(std::move(Consumers)); } diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp new file mode 100644 index 0000000000000..bb43cb7c40e77 --- /dev/null +++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp @@ -0,0 +1,80 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm +// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o +// RUN: cat %t/decls +// RUN: echo '{ \ +// RUN: "required_ranges": [\ +// RUN: {\ +// RUN: "file": "/usr/local/google/home/bakalova/llvm-project/build/tools/clang/test/Frontend/Output/dump-deserialized-declaration-ranges.cpp.tmp/foo.h",\ +// RUN: "range": [\ +// RUN: {\ +// RUN: "from": {\ +// RUN: "line": 1,\ +// RUN: "column": 1\ +// RUN: },\ +// RUN: "to": {\ +// RUN: "line": 9,\ +// RUN: "column": 1\ +// RUN: }\ +// RUN: },\ +// RUN: {\ +// RUN: "from": {\ +// RUN: "line": 11,\ +// RUN: "column": 1\ +// RUN: },\ +// RUN: "to": {\ +// RUN: "line": 11,\ +// RUN: "column": 12\ +// RUN: }\ +// RUN: },\ +// RUN: {\ +// RUN: "from": {\ +// RUN: "line": 13,\ +// RUN: "column": 1\ +// RUN: },\ +// RUN: "to": {\ +// RUN: "line": 15,\ +// RUN: "column": 1\ +// RUN: }\ +// RUN: }\ +// RUN: ]\ +// RUN: }\ +// RUN: ]\ +// RUN:}' > %t/expected_decls +// RUN: jq '.' %t/expected_decls > %t/expected_decls_formatted +// RUN: diff %t/decls %t/expected_decls_formatted + +//--- foo.cppmap +module foo { + header "foo.h" + export * +} + +//--- foo.h +class MyData { +public: + MyData(int val): value_(val) {} + int getValue() const { + return 5; + } +private: + int value_; +}; + +extern int global_value; + +int multiply(int a, int b) { + return a * b; +} + +//--- foo.cpp +#include "foo.h" +int global_value = 5; +int main() { + MyData data(5); + int current_value = data.getValue(); + int doubled_value = multiply(current_value, 2); + int final_result = doubled_value + global_value; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits