https://github.com/VitaNuo updated https://github.com/llvm/llvm-project/pull/133910
>From cfa057b4d43ebe7f94ccd4f387a94359beaa29b2 Mon Sep 17 00:00:00 2001 From: Viktoriia Bakalova <bakal...@google.com> Date: Fri, 4 Apr 2025 17:45:24 +0200 Subject: [PATCH] This commit implements a CC1 flag `-dump-deserialized-declaration-ranges`. The flag allows to specify a file path to dump ranges of deserialized declarations in `ASTReader`. Example usage: ``` clang -Xclang=-dump-deserialized-declaration-ranges=/tmp/decls -c file.cc -o file.o ``` Example output: ``` // /tmp/decls { "required_ranges": [ { "file": "foo.h", "range": [ { "from": { "line": 26, "column": 1 }, "to": { "line": 27, "column": 77 } } ] }, { "file": "bar.h", "range": [ { "from": { "line": 30, "column": 1 }, "to": { "line": 35, "column": 1 } }, { "from": { "line": 92, "column": 1 }, "to": { "line": 95, "column": 1 } } ] } ] } ``` Specifying the flag creates an instance of `DeserializedDeclsLineRangePrinter`, which dumps ranges of deserialized declarations to aid debugging and bug minimization. Required ranges are computed from source ranges of Decls. `TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for the sake of this PR. Technical details: * `DeserializedDeclsLineRangePrinter` implements `ASTConsumer` and `ASTDeserializationListener`, so that an object of `DeserializedDeclsLineRangePrinter` registers as its own listener. * `ASTDeserializationListener` interface provides the `DeclRead` callback that we use to collect the deserialized Decls. Printing or otherwise processing them as this point is dangerous, since that could trigger additional deserialization and crash compilation. * The collected Decls are processed in `HandleTranslationUnit` method of `ASTConsumer`. This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been deserialized. * In case our processing causes further deserialization, `DeclRead` from the listener might be called again. However, at that point we don't accept any more Decls for processing. --- clang/include/clang/Driver/Options.td | 4 + .../include/clang/Frontend/FrontendOptions.h | 3 + clang/lib/Frontend/FrontendAction.cpp | 190 +++++++++++++++++- .../dump-deserialized-declaration-ranges.cpp | 118 +++++++++++ 4 files changed, 310 insertions(+), 5 deletions(-) create mode 100644 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3af072242d039..1737e40b776e1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"], "print-dependency-directives-minimized-source">, HelpText<"Print the output of the dependency directives source minimizer">; } +def dump_deserialized_declaration_ranges : Joined<["-"], + "dump-deserialized-declaration-ranges=">, + HelpText<"Dump ranges of deserialized declarations to aid debugging and minimization">, + MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>; defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists", CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index a9c9849ff52ab..8ef9ce9db8783 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -530,6 +530,9 @@ class FrontendOptions { /// Output Path for module output file. std::string ModuleOutputPath; + /// Output path to dump ranges of deserialized declarations. + std::string DumpDeserializedDeclarationRangesPath; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 2d77f06be7446..1f939f7722d19 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -15,6 +15,8 @@ #include "clang/Basic/FileEntry.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/Sarif.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/Stack.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" @@ -35,6 +37,7 @@ #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/GlobalModuleIndex.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -49,6 +52,166 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry) namespace { +/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations +/// to aid debugging and bug minimization. It implements ASTConsumer and +/// ASTDeserializationListener, so that an object of +/// DeserializedDeclsLineRangePrinter registers as its own listener. The +/// ASTDeserializationListener interface provides the DeclRead callback that we +/// use to collect the deserialized Decls. Note that printing or otherwise +/// processing them as this point is dangerous, since that could trigger +/// additional deserialization and crash compilation. Therefore, we process the +/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a +/// safe point, since we know that by this point all the Decls needed by the +/// compiler frontend have been deserialized. In case our processing causes +/// further deserialization, DeclRead from the listener might be called again. +/// However, at that point we don't accept any more Decls for processing. +class DeserializedDeclsLineRangePrinter : public ASTConsumer, + ASTDeserializationListener { +public: + explicit DeserializedDeclsLineRangePrinter( + SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS) + : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {} + + ASTDeserializationListener *GetASTDeserializationListener() override { + return this; + } + + void DeclRead(GlobalDeclID ID, const Decl *D) override { + if (!IsCollectingDecls) + return; + if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) || + isa<NamespaceDecl>(D)) { + // These decls cover a lot of nested declarations that might not be used, + // reducing the granularity and making the output less useful. + return; + } + if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) { + // We choose to work at namespace level to reduce complexity and the + // number of cases we care about. + return; + } + PendingDecls.push_back(D); + } + + struct Position { + unsigned Line; + unsigned Column; + + bool operator<(const Position &other) const { + if (Line < other.Line) + return true; + if (Line > other.Line) + return false; + return Column < other.Column; + } + + static Position GetSpelling(const SourceManager &SM, + const SourceLocation &SL) { + return {SM.getSpellingLineNumber(SL), SM.getSpellingColumnNumber(SL)}; + } + }; + + struct RequiredRanges { + StringRef Filename; + std::vector<std::pair<Position, Position>> FromTo; + }; + void HandleTranslationUnit(ASTContext &Context) override { + assert(IsCollectingDecls && "HandleTranslationUnit called twice?"); + IsCollectingDecls = false; + + // Merge ranges in each of the files. + struct FileData { + std::vector<std::pair<Position, Position>> FromTo; + OptionalFileEntryRef Ref; + }; + llvm::DenseMap<const FileEntry *, FileData> FileToLines; + for (const Decl *D : PendingDecls) { + CharSourceRange R = SM.getExpansionRange(D->getSourceRange()); + if (!R.isValid()) + continue; + + auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); + if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { + // Such cases are rare and difficult to handle. + continue; + } + + auto &Data = FileToLines[F]; + if (!Data.Ref) + Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); + Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()), + Position::GetSpelling(SM, R.getEnd())}); + } + + // To simplify output, merge consecutive and intersecting ranges. + std::vector<RequiredRanges> Result; + for (auto &[F, Data] : FileToLines) { + auto &FromTo = Data.FromTo; + assert(!FromTo.empty()); + + if (!Data.Ref) + continue; + + llvm::sort(FromTo); + + std::vector<std::pair<Position, Position>> MergedRanges; + MergedRanges.push_back(FromTo.front()); + for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) { + if (MergedRanges.back().second < It->first) { + MergedRanges.push_back(*It); + continue; + } + if (MergedRanges.back().second < It->second) + MergedRanges.back().second = It->second; + } + Result.push_back({Data.Ref->getName(), MergedRanges}); + } + printJson(Result); + } + +private: + std::vector<const Decl *> PendingDecls; + bool IsCollectingDecls = true; + const SourceManager &SM; + std::unique_ptr<llvm::raw_ostream> OS; + + void printJson(llvm::ArrayRef<RequiredRanges> Result) { + *OS << "{\n"; + *OS << R"( "required_ranges": [)" << "\n"; + for (size_t I = 0; I < Result.size(); ++I) { + auto &F = Result[I].Filename; + auto &MergedRanges = Result[I].FromTo; + *OS << R"( {)" << "\n"; + *OS << R"( "file": ")" << F << "\"," << "\n"; + *OS << R"( "range": [)" << "\n"; + for (size_t J = 0; J < MergedRanges.size(); ++J) { + auto &From = MergedRanges[J].first; + auto &To = MergedRanges[J].second; + *OS << R"( {)" << "\n"; + *OS << R"( "from": {)" << "\n"; + *OS << R"( "line": )" << From.Line << ",\n"; + *OS << R"( "column": )" << From.Column << "\n" + << R"( },)" << "\n"; + *OS << R"( "to": {)" << "\n"; + *OS << R"( "line": )" << To.Line << ",\n"; + *OS << R"( "column": )" << To.Column << "\n" + << R"( })" << "\n"; + *OS << R"( })"; + if (J < MergedRanges.size() - 1) { + *OS << ","; + } + *OS << "\n"; + } + *OS << " ]" << "\n" << " }"; + if (I < Result.size() - 1) + *OS << ","; + *OS << "\n"; + } + *OS << " ]\n"; + *OS << "}\n"; + } +}; + /// Dumps deserialized declarations. class DeserializedDeclsDumper : public DelegatingDeserializationListener { public: @@ -121,6 +284,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, if (!Consumer) return nullptr; + std::vector<std::unique_ptr<ASTConsumer>> Consumers; + llvm::StringRef DumpDeserializedDeclarationRangesPath = + CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath; + if (!DumpDeserializedDeclarationRangesPath.empty()) { + std::error_code ErrorCode; + auto FileStream = std::make_unique<llvm::raw_fd_ostream>( + DumpDeserializedDeclarationRangesPath, ErrorCode, + llvm::sys::fs::OF_None); + if (!ErrorCode) { + Consumers.push_back(std::make_unique<DeserializedDeclsLineRangePrinter>( + CI.getSourceManager(), std::move(FileStream))); + } else { + llvm::errs() << "Failed to create output file for " + "-dump-deserialized-declaration-ranges flag, file path: " + << DumpDeserializedDeclarationRangesPath + << ", error: " << ErrorCode.message() << "\n"; + } + } + // Validate -add-plugin args. bool FoundAllPlugins = true; for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) { @@ -138,17 +320,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, if (!FoundAllPlugins) return nullptr; - // If there are no registered plugins we don't need to wrap the consumer - if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end()) - return Consumer; - // If this is a code completion run, avoid invoking the plugin consumers if (CI.hasCodeCompletionConsumer()) return Consumer; // Collect the list of plugins that go before the main action (in Consumers) // or after it (in AfterConsumers) - std::vector<std::unique_ptr<ASTConsumer>> Consumers; std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers; for (const FrontendPluginRegistry::entry &Plugin : FrontendPluginRegistry::entries()) { @@ -191,6 +368,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI, Consumers.push_back(std::move(C)); } + assert(Consumers.size() >= 1 && "should have added the main consumer"); + if (Consumers.size() == 1) + return std::move(Consumers.front()); return std::make_unique<MultiplexConsumer>(std::move(Consumers)); } diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp new file mode 100644 index 0000000000000..c72fc32479b0f --- /dev/null +++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp @@ -0,0 +1,118 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm +// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o +// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s +// RANGE:{ +// RANGE-NEXT: "required_ranges": [ +// RANGE-NEXT: { +// RANGE-NEXT: "file": "{{.+}}/foo.h", +// RANGE-NEXT: "range": [ +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 1, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 9, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 11, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 11, +// RANGE-NEXT: "column": 12 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 13, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 15, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: } +// RANGE-NEXT: } +// RANGE-NEXT: ] +// RANGE-NEXT: } +// RANGE-NEXT: ] +// RANGE-NEXT:} +// RUN: echo -e '{\n\ +// RUN: "required_ranges": [\n\ +// RUN: {\n\ +// RUN: "file": "%t/foo.h",\n\ +// RUN: "range": [\n\ +// RUN: {\n\ +// RUN: "from": {\n\ +// RUN: "line": 1,\n\ +// RUN: "column": 1\n\ +// RUN: },\n\ +// RUN: "to": {\n\ +// RUN: "line": 9,\n\ +// RUN: "column": 1\n\ +// RUN: }\n\ +// RUN: },\n\ +// RUN: {\n\ +// RUN: "from": {\n\ +// RUN: "line": 11,\n\ +// RUN: "column": 1\n\ +// RUN: },\n\ +// RUN: "to": {\n\ +// RUN: "line": 11,\n\ +// RUN: "column": 12\n\ +// RUN: }\n\ +// RUN: },\n\ +// RUN: {\n\ +// RUN: "from": {\n\ +// RUN: "line": 13,\n\ +// RUN: "column": 1\n\ +// RUN: },\n\ +// RUN: "to": {\n\ +// RUN: "line": 15,\n\ +// RUN: "column": 1\n\ +// RUN: }\n\ +// RUN: }\n\ +// RUN: ]\n\ +// RUN: }\n\ +// RUN: ]\n\ +// RUN:}' > %t/expected_decls +// RUN: diff %t/decls %t/expected_decls + +//--- foo.cppmap +module foo { + header "foo.h" + export * +} + +//--- foo.h +class MyData { +public: + MyData(int val): value_(val) {} + int getValue() const { + return 5; + } +private: + int value_; +}; + +extern int global_value; + +int multiply(int a, int b) { + return a * b; +} + +//--- foo.cpp +#include "foo.h" +int global_value = 5; +int main() { + MyData data(5); + int current_value = data.getValue(); + int doubled_value = multiply(current_value, 2); + int final_result = doubled_value + global_value; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits