llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Viktoriia Bakalova (VitaNuo)

<details>
<summary>Changes</summary>

This PR implements a CC1 flag `-dump-deserialized-declaration-ranges`. The flag 
allows to specify a file path to dump ranges of deserialized declarations in 
`ASTReader`. Example usage:

```
clang -Xclang=-dump-deserialized-declaration-ranges=/tmp/decls -c file.cc -o 
file.o
```

Example output:
```
// /tmp/decls
{
  "required_ranges": [
    {
      "file": "foo.h",
      "range": [
        {
          "from": {
            "line": 26,
            "column": 1
          },
          "to": {
            "line": 27,
            "column": 77
          }
        }
      ]
    },
    {
      "file": "bar.h",
      "range": [
        {
          "from": {
            "line": 30,
            "column": 1
          },
          "to": {
            "line": 35,
            "column": 1
          }
        },
        {
          "from": {
            "line": 92,
            "column": 1
          },
          "to": {
            "line": 95,
            "column": 1
          }
        }
      ]
    }
  ]
}

```
Specifying the flag creates an instance of `DeserializedDeclsLineRangePrinter`, 
which dumps ranges of deserialized declarations to aid debugging and bug 
minimization.

Required ranges are computed from source ranges of Decls. 
`TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for 
the sake of this PR.

Technical details:
* `DeserializedDeclsLineRangePrinter` implements `ASTConsumer` and 
`ASTDeserializationListener`, so that an object of 
`DeserializedDeclsLineRangePrinter` registers as its own listener.
* `ASTDeserializationListener` interface provides the `DeclRead` callback that 
we use to collect the deserialized Decls.
Printing or otherwise processing them as this point is dangerous, since that 
could trigger additional deserialization and crash compilation.
* The collected Decls are processed in `HandleTranslationUnit` method of 
`ASTConsumer`. This is a safe point, since we know that by this point all the 
Decls needed by the compiler frontend have been deserialized.
*  In case our processing causes further deserialization, `DeclRead` from the 
listener might be called again. However, at that point we don't accept any more 
Decls for processing.

---
Full diff: https://github.com/llvm/llvm-project/pull/133910.diff


4 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+4) 
- (modified) clang/include/clang/Frontend/FrontendOptions.h (+3) 
- (modified) clang/lib/Frontend/FrontendAction.cpp (+156-5) 
- (added) clang/test/Frontend/dump-deserialized-declaration-ranges.cpp (+80) 


``````````diff
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 3af072242d039..1737e40b776e1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : 
Flag<["-"],
   "print-dependency-directives-minimized-source">,
   HelpText<"Print the output of the dependency directives source minimizer">;
 }
+def dump_deserialized_declaration_ranges : Joined<["-"],
+  "dump-deserialized-declaration-ranges=">,
+  HelpText<"Dump ranges of deserialized declarations to aid debugging and 
minimization">,
+  MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>;
 
 defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
   CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h 
b/clang/include/clang/Frontend/FrontendOptions.h
index a9c9849ff52ab..8ef9ce9db8783 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -530,6 +530,9 @@ class FrontendOptions {
   /// Output Path for module output file.
   std::string ModuleOutputPath;
 
+  /// Output path to dump ranges of deserialized declarations.
+  std::string DumpDeserializedDeclarationRangesPath;
+
 public:
   FrontendOptions()
       : DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Frontend/FrontendAction.cpp 
b/clang/lib/Frontend/FrontendAction.cpp
index 2d77f06be7446..f98aa5ab1fe51 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -15,6 +15,7 @@
 #include "clang/Basic/FileEntry.h"
 #include "clang/Basic/LangStandard.h"
 #include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Stack.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -35,6 +36,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/BuryPointer.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -49,6 +51,144 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
 
 namespace {
 
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized 
declarations to aid debugging and bug minimization.
+/// It implements ASTConsumer and ASTDeserializationListener, so that an 
object of DeserializedDeclsLineRangePrinter registers
+/// as its own listener.
+/// The ASTDeserializationListener interface provides the DeclRead callback 
that we use to collect the deserialized Decls.
+/// Note that printing or otherwise processing them as this point is 
dangerous, since that could trigger additional
+/// deserialization and crash compilation.
+/// Therefore, we process the collected Decls in HandleTranslationUnit method 
of ASTConsumer.
+/// This is a safe point, since we know that by this point all the Decls 
needed by the compiler frontend have been
+/// deserialized. In case our processing causes further deserialization, 
DeclRead from the listener might be called again.
+/// However, at that point we don't accept any more Decls for processing.
+class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, 
public ASTConsumer {
+public:
+  explicit DeserializedDeclsLineRangePrinter(SourceManager &SM, 
std::unique_ptr<llvm::raw_fd_ostream> OS)
+      : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
+
+  void DeclRead(GlobalDeclID ID, const Decl *D) override {
+    if (!IsCollectingDecls) {
+      return;
+    }
+    if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
+          isa<NamespaceDecl>(D))
+      return;
+    if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext())
+      return;
+    PendingDecls.push_back(D);
+    ASTDeserializationListener::DeclRead(ID, D);
+  }
+
+  using Position = std::pair<unsigned, unsigned>;
+  struct RequiredRanges {
+      StringRef Filename;
+      std::vector<std::pair<Position, Position>> FromTo;
+  };
+  void HandleTranslationUnit(ASTContext &Context) override {
+    IsCollectingDecls = false;
+    std::vector<const Decl *> Decls = std::move(PendingDecls);
+    if (!PendingDecls.empty()) {
+      llvm::errs() << "Deserialized more decls while printing, total of "
+                    << PendingDecls.size() << "\n";
+      PendingDecls.clear();
+    }
+
+    // Merge ranges in each of the files. For simplicity, track lines and hope
+    // they do not break things.
+    struct FileData {
+      std::vector<std::pair<Position, Position>> FromTo;
+      std::vector<std::pair<unsigned, unsigned>> Columns;
+      OptionalFileEntryRef Ref;
+    };
+    llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+    for (const Decl *D : Decls) {
+      CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
+      if (!R.isValid())
+        continue;
+
+      auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+      if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd())))
+        continue;
+
+      auto &Data = FileToLines[F];
+      if (!Data.Ref)
+        Data.Ref =
+            SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+      Data.FromTo.push_back({{SM.getSpellingLineNumber(R.getBegin()), 
SM.getSpellingColumnNumber(R.getBegin())},
+                            {SM.getSpellingLineNumber(R.getEnd()), 
SM.getSpellingColumnNumber(R.getEnd())}});
+    }
+
+    std::vector<RequiredRanges> Result;
+    for (auto &[F, Data] : FileToLines) {
+      auto& FromTo = Data.FromTo;
+      assert(!FromTo.empty());
+
+      if (!Data.Ref) continue;
+
+      llvm::sort(FromTo);
+
+      std::vector<std::pair<Position, Position>> MergedLines;
+      MergedLines.push_back(FromTo.front());
+      for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
+        if (MergedLines.back().second < It->first) {
+          MergedLines.push_back(*It);
+          continue;
+        }
+        if (MergedLines.back().second < It->second)
+          MergedLines.back().second = It->second;
+      }
+      Result.push_back({Data.Ref->getName(), MergedLines});
+    }
+    printJson(Result);
+  }
+
+  void printJson(const std::vector<RequiredRanges>& Result) {
+    *OS << "{\n";
+    *OS << "  \"required_ranges\": [\n";
+    for (size_t i = 0; i < Result.size(); ++i) {
+      auto &F = Result[i].Filename;
+      auto &MergedLines = Result[i].FromTo;
+      *OS << "    {\n";
+      *OS << "      \"file\": \"" << F << "\",\n";
+      *OS << "      \"range\": [\n";
+      for (size_t j = 0; j < MergedLines.size(); ++j) {
+        auto &From = MergedLines[j].first;
+        auto &To = MergedLines[j].second;
+        *OS << "        {\n";
+        *OS << "          \"from\": {\n";
+        *OS << "            \"line\": " << From.first << ",\n";
+        *OS << "            \"column\": " << From.second << "\n          },\n";
+        *OS << "          \"to\": {\n";
+        *OS << "            \"line\": " << To.first << ",\n";
+        *OS << "            \"column\": " << To.second << "\n          }\n";
+        *OS << "        }";
+        if (j < MergedLines.size() - 1) {
+          *OS << ",";
+        }
+        *OS << "\n";
+      }
+      *OS << "      ]\n    }";
+      if (i < Result.size() - 1) {
+        *OS << ",";
+      }
+      *OS << "\n";
+    }
+    *OS << "  ]\n";
+    *OS << "}\n";
+  }
+
+  ASTDeserializationListener *GetASTDeserializationListener() override {
+    return this;
+  }
+
+private:
+std::vector<const Decl *> PendingDecls;
+bool IsCollectingDecls = true;
+const SourceManager &SM;
+std::unique_ptr<llvm::raw_ostream> OS;
+};
+
+
 /// Dumps deserialized declarations.
 class DeserializedDeclsDumper : public DelegatingDeserializationListener {
 public:
@@ -121,6 +261,19 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance 
&CI,
   if (!Consumer)
     return nullptr;
 
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  llvm::StringRef DumpDeserializedDeclarationRangesPath = 
CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+  if (!DumpDeserializedDeclarationRangesPath.empty()) {
+    std::error_code ErrorCode;
+    auto FileStream = 
std::make_unique<llvm::raw_fd_ostream>(DumpDeserializedDeclarationRangesPath, 
ErrorCode, llvm::sys::fs::OF_None);
+    if (!ErrorCode) {
+      auto Printer = 
std::make_unique<DeserializedDeclsLineRangePrinter>(CI.getSourceManager(), 
std::move(FileStream));
+      Consumers.push_back(std::move(Printer));
+    } else {
+      llvm::errs() << "Failed to create output file for 
-dump-deserialized-declaration-ranges flag, file path: " << 
DumpDeserializedDeclarationRangesPath << ", error: " << ErrorCode.message() << 
"\n";
+    }
+  }
+
   // Validate -add-plugin args.
   bool FoundAllPlugins = true;
   for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
@@ -138,17 +291,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance 
&CI,
   if (!FoundAllPlugins)
     return nullptr;
 
-  // If there are no registered plugins we don't need to wrap the consumer
-  if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
-    return Consumer;
-
   // If this is a code completion run, avoid invoking the plugin consumers
   if (CI.hasCodeCompletionConsumer())
     return Consumer;
 
   // Collect the list of plugins that go before the main action (in Consumers)
   // or after it (in AfterConsumers)
-  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
   std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
   for (const FrontendPluginRegistry::entry &Plugin :
        FrontendPluginRegistry::entries()) {
@@ -191,6 +339,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance 
&CI,
       Consumers.push_back(std::move(C));
   }
 
+  assert(Consumers.size() >= 1 && "should have added the main consumer");
+  if (Consumers.size() == 1) 
+    return std::move(Consumers.front());
   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp 
b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
new file mode 100644
index 0000000000000..bb43cb7c40e77
--- /dev/null
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -0,0 +1,80 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo 
-fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
+// RUN: %clang_cc1 -xc++ -fmodules 
-dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm 
%t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls
+// RUN: echo '{ \
+// RUN:  "required_ranges": [\
+// RUN:    {\
+// RUN:      "file": 
"/usr/local/google/home/bakalova/llvm-project/build/tools/clang/test/Frontend/Output/dump-deserialized-declaration-ranges.cpp.tmp/foo.h",\
+// RUN:      "range": [\
+// RUN:        {\
+// RUN:          "from": {\
+// RUN:            "line": 1,\
+// RUN:            "column": 1\
+// RUN:          },\
+// RUN:          "to": {\
+// RUN:            "line": 9,\
+// RUN:            "column": 1\
+// RUN:          }\
+// RUN:        },\
+// RUN:        {\
+// RUN:          "from": {\
+// RUN:            "line": 11,\
+// RUN:            "column": 1\
+// RUN:          },\
+// RUN:          "to": {\
+// RUN:            "line": 11,\
+// RUN:            "column": 12\
+// RUN:          }\
+// RUN:        },\
+// RUN:        {\
+// RUN:          "from": {\
+// RUN:            "line": 13,\
+// RUN:            "column": 1\
+// RUN:          },\
+// RUN:          "to": {\
+// RUN:            "line": 15,\
+// RUN:            "column": 1\
+// RUN:          }\
+// RUN:        }\
+// RUN:      ]\
+// RUN:    }\
+// RUN:  ]\
+// RUN:}' > %t/expected_decls
+// RUN: jq '.' %t/expected_decls > %t/expected_decls_formatted
+// RUN: diff %t/decls %t/expected_decls_formatted
+
+//--- foo.cppmap
+module foo {
+  header "foo.h"
+  export *
+}
+
+//--- foo.h
+class MyData {
+public:
+    MyData(int val): value_(val) {}
+    int getValue() const {
+        return 5;
+    }
+private:
+    int value_;
+};
+
+extern int global_value;
+
+int multiply(int a, int b) {
+    return a * b;
+}
+
+//--- foo.cpp
+#include "foo.h"
+int global_value = 5;
+int main() {
+  MyData data(5);
+  int current_value = data.getValue();
+  int doubled_value = multiply(current_value, 2);
+  int final_result = doubled_value + global_value;
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/133910
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to