https://github.com/VitaNuo updated 
https://github.com/llvm/llvm-project/pull/133910

>From cfa057b4d43ebe7f94ccd4f387a94359beaa29b2 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Fri, 4 Apr 2025 17:45:24 +0200
Subject: [PATCH 1/7] This commit implements a CC1 flag
 `-dump-deserialized-declaration-ranges`. The flag allows to specify a file
 path to dump ranges of deserialized declarations in `ASTReader`. Example
 usage:

```
clang -Xclang=-dump-deserialized-declaration-ranges=/tmp/decls -c file.cc -o 
file.o
```

Example output:
```
// /tmp/decls
{
  "required_ranges": [
    {
      "file": "foo.h",
      "range": [
        {
          "from": {
            "line": 26,
            "column": 1
          },
          "to": {
            "line": 27,
            "column": 77
          }
        }
      ]
    },
    {
      "file": "bar.h",
      "range": [
        {
          "from": {
            "line": 30,
            "column": 1
          },
          "to": {
            "line": 35,
            "column": 1
          }
        },
        {
          "from": {
            "line": 92,
            "column": 1
          },
          "to": {
            "line": 95,
            "column": 1
          }
        }
      ]
    }
  ]
}

```
Specifying the flag creates an instance of `DeserializedDeclsLineRangePrinter`, 
which dumps ranges of deserialized declarations to aid debugging and bug 
minimization.

Required ranges are computed from source ranges of Decls. 
`TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for 
the sake of this PR.

Technical details:
* `DeserializedDeclsLineRangePrinter` implements `ASTConsumer` and 
`ASTDeserializationListener`, so that an object of 
`DeserializedDeclsLineRangePrinter` registers as its own listener.
* `ASTDeserializationListener` interface provides the `DeclRead` callback that 
we use to collect the deserialized Decls.
Printing or otherwise processing them as this point is dangerous, since that 
could trigger additional deserialization and crash compilation.
* The collected Decls are processed in `HandleTranslationUnit` method of 
`ASTConsumer`. This is a safe point, since we know that by this point all the 
Decls needed by the compiler frontend have been deserialized.
*  In case our processing causes further deserialization, `DeclRead` from the 
listener might be called again. However, at that point we don't accept any more 
Decls for processing.
---
 clang/include/clang/Driver/Options.td         |   4 +
 .../include/clang/Frontend/FrontendOptions.h  |   3 +
 clang/lib/Frontend/FrontendAction.cpp         | 190 +++++++++++++++++-
 .../dump-deserialized-declaration-ranges.cpp  | 118 +++++++++++
 4 files changed, 310 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 3af072242d039..1737e40b776e1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : 
Flag<["-"],
   "print-dependency-directives-minimized-source">,
   HelpText<"Print the output of the dependency directives source minimizer">;
 }
+def dump_deserialized_declaration_ranges : Joined<["-"],
+  "dump-deserialized-declaration-ranges=">,
+  HelpText<"Dump ranges of deserialized declarations to aid debugging and 
minimization">,
+  MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>;
 
 defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
   CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h 
b/clang/include/clang/Frontend/FrontendOptions.h
index a9c9849ff52ab..8ef9ce9db8783 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -530,6 +530,9 @@ class FrontendOptions {
   /// Output Path for module output file.
   std::string ModuleOutputPath;
 
+  /// Output path to dump ranges of deserialized declarations.
+  std::string DumpDeserializedDeclarationRangesPath;
+
 public:
   FrontendOptions()
       : DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Frontend/FrontendAction.cpp 
b/clang/lib/Frontend/FrontendAction.cpp
index 2d77f06be7446..1f939f7722d19 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -15,6 +15,8 @@
 #include "clang/Basic/FileEntry.h"
 #include "clang/Basic/LangStandard.h"
 #include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Stack.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -35,6 +37,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/BuryPointer.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -49,6 +52,166 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
 
 namespace {
 
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
+/// to aid debugging and bug minimization. It implements ASTConsumer and
+/// ASTDeserializationListener, so that an object of
+/// DeserializedDeclsLineRangePrinter registers as its own listener. The
+/// ASTDeserializationListener interface provides the DeclRead callback that we
+/// use to collect the deserialized Decls. Note that printing or otherwise
+/// processing them as this point is dangerous, since that could trigger
+/// additional deserialization and crash compilation. Therefore, we process the
+/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
+/// safe point, since we know that by this point all the Decls needed by the
+/// compiler frontend have been deserialized. In case our processing causes
+/// further deserialization, DeclRead from the listener might be called again.
+/// However, at that point we don't accept any more Decls for processing.
+class DeserializedDeclsLineRangePrinter : public ASTConsumer,
+                                          ASTDeserializationListener {
+public:
+  explicit DeserializedDeclsLineRangePrinter(
+      SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
+      : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
+
+  ASTDeserializationListener *GetASTDeserializationListener() override {
+    return this;
+  }
+
+  void DeclRead(GlobalDeclID ID, const Decl *D) override {
+    if (!IsCollectingDecls)
+      return;
+    if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
+        isa<NamespaceDecl>(D)) {
+      // These decls cover a lot of nested declarations that might not be used,
+      // reducing the granularity and making the output less useful.
+      return;
+    }
+    if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
+      // We choose to work at namespace level to reduce complexity and the
+      // number of cases we care about.
+      return;
+    }
+    PendingDecls.push_back(D);
+  }
+
+  struct Position {
+    unsigned Line;
+    unsigned Column;
+
+    bool operator<(const Position &other) const {
+      if (Line < other.Line)
+        return true;
+      if (Line > other.Line)
+        return false;
+      return Column < other.Column;
+    }
+
+    static Position GetSpelling(const SourceManager &SM,
+                                const SourceLocation &SL) {
+      return {SM.getSpellingLineNumber(SL), SM.getSpellingColumnNumber(SL)};
+    }
+  };
+
+  struct RequiredRanges {
+    StringRef Filename;
+    std::vector<std::pair<Position, Position>> FromTo;
+  };
+  void HandleTranslationUnit(ASTContext &Context) override {
+    assert(IsCollectingDecls && "HandleTranslationUnit called twice?");
+    IsCollectingDecls = false;
+
+    // Merge ranges in each of the files.
+    struct FileData {
+      std::vector<std::pair<Position, Position>> FromTo;
+      OptionalFileEntryRef Ref;
+    };
+    llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+    for (const Decl *D : PendingDecls) {
+      CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
+      if (!R.isValid())
+        continue;
+
+      auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+      if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+        // Such cases are rare and difficult to handle.
+        continue;
+      }
+
+      auto &Data = FileToLines[F];
+      if (!Data.Ref)
+        Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+      Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
+                             Position::GetSpelling(SM, R.getEnd())});
+    }
+
+    // To simplify output, merge consecutive and intersecting ranges.
+    std::vector<RequiredRanges> Result;
+    for (auto &[F, Data] : FileToLines) {
+      auto &FromTo = Data.FromTo;
+      assert(!FromTo.empty());
+
+      if (!Data.Ref)
+        continue;
+
+      llvm::sort(FromTo);
+
+      std::vector<std::pair<Position, Position>> MergedRanges;
+      MergedRanges.push_back(FromTo.front());
+      for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
+        if (MergedRanges.back().second < It->first) {
+          MergedRanges.push_back(*It);
+          continue;
+        }
+        if (MergedRanges.back().second < It->second)
+          MergedRanges.back().second = It->second;
+      }
+      Result.push_back({Data.Ref->getName(), MergedRanges});
+    }
+    printJson(Result);
+  }
+
+private:
+  std::vector<const Decl *> PendingDecls;
+  bool IsCollectingDecls = true;
+  const SourceManager &SM;
+  std::unique_ptr<llvm::raw_ostream> OS;
+
+  void printJson(llvm::ArrayRef<RequiredRanges> Result) {
+    *OS << "{\n";
+    *OS << R"(  "required_ranges": [)" << "\n";
+    for (size_t I = 0; I < Result.size(); ++I) {
+      auto &F = Result[I].Filename;
+      auto &MergedRanges = Result[I].FromTo;
+      *OS << R"(    {)" << "\n";
+      *OS << R"(      "file": ")" << F << "\"," << "\n";
+      *OS << R"(      "range": [)" << "\n";
+      for (size_t J = 0; J < MergedRanges.size(); ++J) {
+        auto &From = MergedRanges[J].first;
+        auto &To = MergedRanges[J].second;
+        *OS << R"(        {)" << "\n";
+        *OS << R"(          "from": {)" << "\n";
+        *OS << R"(            "line": )" << From.Line << ",\n";
+        *OS << R"(            "column": )" << From.Column << "\n"
+            << R"(          },)" << "\n";
+        *OS << R"(          "to": {)" << "\n";
+        *OS << R"(            "line": )" << To.Line << ",\n";
+        *OS << R"(            "column": )" << To.Column << "\n"
+            << R"(          })" << "\n";
+        *OS << R"(        })";
+        if (J < MergedRanges.size() - 1) {
+          *OS << ",";
+        }
+        *OS << "\n";
+      }
+      *OS << "      ]" << "\n" << "    }";
+      if (I < Result.size() - 1)
+        *OS << ",";
+      *OS << "\n";
+    }
+    *OS << "  ]\n";
+    *OS << "}\n";
+  }
+};
+
 /// Dumps deserialized declarations.
 class DeserializedDeclsDumper : public DelegatingDeserializationListener {
 public:
@@ -121,6 +284,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance 
&CI,
   if (!Consumer)
     return nullptr;
 
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  llvm::StringRef DumpDeserializedDeclarationRangesPath =
+      CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+  if (!DumpDeserializedDeclarationRangesPath.empty()) {
+    std::error_code ErrorCode;
+    auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
+        DumpDeserializedDeclarationRangesPath, ErrorCode,
+        llvm::sys::fs::OF_None);
+    if (!ErrorCode) {
+      Consumers.push_back(std::make_unique<DeserializedDeclsLineRangePrinter>(
+          CI.getSourceManager(), std::move(FileStream)));
+    } else {
+      llvm::errs() << "Failed to create output file for "
+                      "-dump-deserialized-declaration-ranges flag, file path: "
+                   << DumpDeserializedDeclarationRangesPath
+                   << ", error: " << ErrorCode.message() << "\n";
+    }
+  }
+
   // Validate -add-plugin args.
   bool FoundAllPlugins = true;
   for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
@@ -138,17 +320,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance 
&CI,
   if (!FoundAllPlugins)
     return nullptr;
 
-  // If there are no registered plugins we don't need to wrap the consumer
-  if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
-    return Consumer;
-
   // If this is a code completion run, avoid invoking the plugin consumers
   if (CI.hasCodeCompletionConsumer())
     return Consumer;
 
   // Collect the list of plugins that go before the main action (in Consumers)
   // or after it (in AfterConsumers)
-  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
   std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
   for (const FrontendPluginRegistry::entry &Plugin :
        FrontendPluginRegistry::entries()) {
@@ -191,6 +368,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance 
&CI,
       Consumers.push_back(std::move(C));
   }
 
+  assert(Consumers.size() >= 1 && "should have added the main consumer");
+  if (Consumers.size() == 1)
+    return std::move(Consumers.front());
   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp 
b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
new file mode 100644
index 0000000000000..c72fc32479b0f
--- /dev/null
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -0,0 +1,118 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo 
-fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
+// RUN: %clang_cc1 -xc++ -fmodules 
-dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm 
%t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
+// RANGE:{
+// RANGE-NEXT:  "required_ranges": [
+// RANGE-NEXT:    {
+// RANGE-NEXT:      "file": "{{.+}}/foo.h",
+// RANGE-NEXT:      "range": [
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 1,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 9,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 11,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 11,
+// RANGE-NEXT:            "column": 12
+// RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 13,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 15,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          }
+// RANGE-NEXT:        }
+// RANGE-NEXT:      ]
+// RANGE-NEXT:    }
+// RANGE-NEXT:  ]
+// RANGE-NEXT:}
+// RUN: echo -e '{\n\
+// RUN:  "required_ranges": [\n\
+// RUN:    {\n\
+// RUN:      "file": "%t/foo.h",\n\
+// RUN:      "range": [\n\
+// RUN:        {\n\
+// RUN:          "from": {\n\
+// RUN:            "line": 1,\n\
+// RUN:            "column": 1\n\
+// RUN:          },\n\
+// RUN:          "to": {\n\
+// RUN:            "line": 9,\n\
+// RUN:            "column": 1\n\
+// RUN:          }\n\
+// RUN:        },\n\
+// RUN:        {\n\
+// RUN:          "from": {\n\
+// RUN:            "line": 11,\n\
+// RUN:            "column": 1\n\
+// RUN:          },\n\
+// RUN:          "to": {\n\
+// RUN:            "line": 11,\n\
+// RUN:            "column": 12\n\
+// RUN:          }\n\
+// RUN:        },\n\
+// RUN:        {\n\
+// RUN:          "from": {\n\
+// RUN:            "line": 13,\n\
+// RUN:            "column": 1\n\
+// RUN:          },\n\
+// RUN:          "to": {\n\
+// RUN:            "line": 15,\n\
+// RUN:            "column": 1\n\
+// RUN:          }\n\
+// RUN:        }\n\
+// RUN:      ]\n\
+// RUN:    }\n\
+// RUN:  ]\n\
+// RUN:}' > %t/expected_decls
+// RUN: diff %t/decls %t/expected_decls
+
+//--- foo.cppmap
+module foo {
+  header "foo.h"
+  export *
+}
+
+//--- foo.h
+class MyData {
+public:
+    MyData(int val): value_(val) {}
+    int getValue() const {
+        return 5;
+    }
+private:
+    int value_;
+};
+
+extern int global_value;
+
+int multiply(int a, int b) {
+    return a * b;
+}
+
+//--- foo.cpp
+#include "foo.h"
+int global_value = 5;
+int main() {
+  MyData data(5);
+  int current_value = data.getValue();
+  int doubled_value = multiply(current_value, 2);
+  int final_result = doubled_value + global_value;
+}

>From 221d1dffe9229ce6a11ac1ade9b072ed6a3e000a Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Tue, 8 Apr 2025 09:26:55 +0200
Subject: [PATCH 2/7] Fix test on Windows

---
 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp 
b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index c72fc32479b0f..9de2962e11880 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -7,7 +7,7 @@
 // RANGE:{
 // RANGE-NEXT:  "required_ranges": [
 // RANGE-NEXT:    {
-// RANGE-NEXT:      "file": "{{.+}}/foo.h",
+// RANGE-NEXT:      "file": "{{.+}}foo.h",
 // RANGE-NEXT:      "range": [
 // RANGE-NEXT:        {
 // RANGE-NEXT:          "from": {

>From 1525fe80da2c242caeb32f65c0398403dfab9362 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Tue, 8 Apr 2025 09:32:42 +0200
Subject: [PATCH 3/7] Remove old test.

---
 .../dump-deserialized-declaration-ranges.cpp  | 40 -------------------
 1 file changed, 40 deletions(-)

diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp 
b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index 9de2962e11880..8fbbef49388a1 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -43,46 +43,6 @@
 // RANGE-NEXT:    }
 // RANGE-NEXT:  ]
 // RANGE-NEXT:}
-// RUN: echo -e '{\n\
-// RUN:  "required_ranges": [\n\
-// RUN:    {\n\
-// RUN:      "file": "%t/foo.h",\n\
-// RUN:      "range": [\n\
-// RUN:        {\n\
-// RUN:          "from": {\n\
-// RUN:            "line": 1,\n\
-// RUN:            "column": 1\n\
-// RUN:          },\n\
-// RUN:          "to": {\n\
-// RUN:            "line": 9,\n\
-// RUN:            "column": 1\n\
-// RUN:          }\n\
-// RUN:        },\n\
-// RUN:        {\n\
-// RUN:          "from": {\n\
-// RUN:            "line": 11,\n\
-// RUN:            "column": 1\n\
-// RUN:          },\n\
-// RUN:          "to": {\n\
-// RUN:            "line": 11,\n\
-// RUN:            "column": 12\n\
-// RUN:          }\n\
-// RUN:        },\n\
-// RUN:        {\n\
-// RUN:          "from": {\n\
-// RUN:            "line": 13,\n\
-// RUN:            "column": 1\n\
-// RUN:          },\n\
-// RUN:          "to": {\n\
-// RUN:            "line": 15,\n\
-// RUN:            "column": 1\n\
-// RUN:          }\n\
-// RUN:        }\n\
-// RUN:      ]\n\
-// RUN:    }\n\
-// RUN:  ]\n\
-// RUN:}' > %t/expected_decls
-// RUN: diff %t/decls %t/expected_decls
 
 //--- foo.cppmap
 module foo {

>From 14bd903b35833b6098ecde44d3b7f01ebc0dd816 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Tue, 8 Apr 2025 10:49:31 +0200
Subject: [PATCH 4/7] Handle source ranges where `isTokenRange() == true` (the
 end of this range specifies the start of the last token). In this case,
 compute the source location just past the end of the token at this source
 location. Fix the test. The end column is not exclusive.

---
 clang/lib/Frontend/FrontendAction.cpp                     | 8 ++++++--
 .../Frontend/dump-deserialized-declaration-ranges.cpp     | 7 ++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Frontend/FrontendAction.cpp 
b/clang/lib/Frontend/FrontendAction.cpp
index 1f939f7722d19..4066d41cc4e6d 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -130,8 +130,12 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
       if (!R.isValid())
         continue;
 
+      SourceLocation End = R.getEnd();
+      if (R.isTokenRange())
+        End = Lexer::getLocForEndOfToken(End, 0, SM, D->getLangOpts());
+
       auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
-      if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+      if (F != SM.getFileEntryForID(SM.getFileID(End))) {
         // Such cases are rare and difficult to handle.
         continue;
       }
@@ -140,7 +144,7 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
       if (!Data.Ref)
         Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
       Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
-                             Position::GetSpelling(SM, R.getEnd())});
+                             Position::GetSpelling(SM, End)});
     }
 
     // To simplify output, merge consecutive and intersecting ranges.
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp 
b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index 8fbbef49388a1..2a54bcea32d05 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -3,6 +3,7 @@
 // RUN: split-file %s %t
 // RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo 
-fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
 // RUN: %clang_cc1 -xc++ -fmodules 
-dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm 
%t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls
 // RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
 // RANGE:{
 // RANGE-NEXT:  "required_ranges": [
@@ -16,7 +17,7 @@
 // RANGE-NEXT:          },
 // RANGE-NEXT:          "to": {
 // RANGE-NEXT:            "line": 9,
-// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:            "column": 2
 // RANGE-NEXT:          }
 // RANGE-NEXT:        },
 // RANGE-NEXT:        {
@@ -26,7 +27,7 @@
 // RANGE-NEXT:          },
 // RANGE-NEXT:          "to": {
 // RANGE-NEXT:            "line": 11,
-// RANGE-NEXT:            "column": 12
+// RANGE-NEXT:            "column": 24
 // RANGE-NEXT:          }
 // RANGE-NEXT:        },
 // RANGE-NEXT:        {
@@ -36,7 +37,7 @@
 // RANGE-NEXT:          },
 // RANGE-NEXT:          "to": {
 // RANGE-NEXT:            "line": 15,
-// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:            "column": 2
 // RANGE-NEXT:          }
 // RANGE-NEXT:        }
 // RANGE-NEXT:      ]

>From c71f0bd053a360fb3f3953392d8c010949c85aaa Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Tue, 8 Apr 2025 11:51:57 +0200
Subject: [PATCH 5/7] Relex the token past the end location of the last token
 in the source range. If it's a semicolon, advance the location by one token.

---
 clang/lib/Frontend/FrontendAction.cpp         | 43 ++++++++++++++-----
 .../dump-deserialized-declaration-ranges.cpp  |  4 +-
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Frontend/FrontendAction.cpp 
b/clang/lib/Frontend/FrontendAction.cpp
index 4066d41cc4e6d..ffed6febdaaba 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -18,6 +18,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Stack.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -105,9 +106,34 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
       return Column < other.Column;
     }
 
-    static Position GetSpelling(const SourceManager &SM,
-                                const SourceLocation &SL) {
-      return {SM.getSpellingLineNumber(SL), SM.getSpellingColumnNumber(SL)};
+    static Position GetBeginSpelling(const SourceManager &SM,
+                                     const CharSourceRange &R) {
+      SourceLocation Begin = R.getBegin();
+      return {SM.getSpellingLineNumber(Begin),
+              SM.getSpellingColumnNumber(Begin)};
+    }
+    
+    static Position GetEndSpelling(const SourceManager &SM,
+                                   const CharSourceRange &R,
+                                   const LangOptions &LangOpts) {
+      SourceLocation End = R.getEnd();
+      if (R.isTokenRange()) {
+        // Compute end location for end character of the range.
+        // The returned location is exclusive.
+        End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts);
+      } else {
+        // If end already points at the last character in the range, advance 
one
+        // location, so that end location is exclusive.
+        End = End.getLocWithOffset(1);
+      }
+      // Relex the token past the end location of the last token in the source
+      // range. If it's a semicolon, advance the location by one token.
+      Token PossiblySemi;
+      Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
+      if (PossiblySemi.is(tok::semi))
+        End = Lexer::getLocForEndOfToken(PossiblySemi.getLocation(), 0, SM,
+                                         LangOpts);
+      return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
     }
   };
 
@@ -130,12 +156,8 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
       if (!R.isValid())
         continue;
 
-      SourceLocation End = R.getEnd();
-      if (R.isTokenRange())
-        End = Lexer::getLocForEndOfToken(End, 0, SM, D->getLangOpts());
-
       auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
-      if (F != SM.getFileEntryForID(SM.getFileID(End))) {
+      if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
         // Such cases are rare and difficult to handle.
         continue;
       }
@@ -143,8 +165,9 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
       auto &Data = FileToLines[F];
       if (!Data.Ref)
         Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
-      Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
-                             Position::GetSpelling(SM, End)});
+      Data.FromTo.push_back(
+          {Position::GetBeginSpelling(SM, R),
+           Position::GetEndSpelling(SM, R, D->getLangOpts())});
     }
 
     // To simplify output, merge consecutive and intersecting ranges.
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp 
b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index 2a54bcea32d05..433dabe5eb84e 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -17,7 +17,7 @@
 // RANGE-NEXT:          },
 // RANGE-NEXT:          "to": {
 // RANGE-NEXT:            "line": 9,
-// RANGE-NEXT:            "column": 2
+// RANGE-NEXT:            "column": 3
 // RANGE-NEXT:          }
 // RANGE-NEXT:        },
 // RANGE-NEXT:        {
@@ -27,7 +27,7 @@
 // RANGE-NEXT:          },
 // RANGE-NEXT:          "to": {
 // RANGE-NEXT:            "line": 11,
-// RANGE-NEXT:            "column": 24
+// RANGE-NEXT:            "column": 25
 // RANGE-NEXT:          }
 // RANGE-NEXT:        },
 // RANGE-NEXT:        {

>From b2a908f877a8cc0cd6ce16b0ac67b9dcd7b2ab11 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Tue, 8 Apr 2025 11:56:24 +0200
Subject: [PATCH 6/7] Fix formatting.

---
 clang/lib/Frontend/FrontendAction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Frontend/FrontendAction.cpp 
b/clang/lib/Frontend/FrontendAction.cpp
index ffed6febdaaba..0d5a40891654a 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -112,7 +112,7 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
       return {SM.getSpellingLineNumber(Begin),
               SM.getSpellingColumnNumber(Begin)};
     }
-    
+
     static Position GetEndSpelling(const SourceManager &SM,
                                    const CharSourceRange &R,
                                    const LangOptions &LangOpts) {

>From 72c51cc12ab322c7be59723371326e9b140d73ff Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakal...@google.com>
Date: Tue, 8 Apr 2025 14:18:28 +0200
Subject: [PATCH 7/7] Simplify the code by using `Lexer::getAsCharRange` to
 advance to the end location of the end token in the range.

---
 clang/lib/Frontend/FrontendAction.cpp | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Frontend/FrontendAction.cpp 
b/clang/lib/Frontend/FrontendAction.cpp
index 0d5a40891654a..345128700289b 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -13,6 +13,7 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileEntry.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/LangStandard.h"
 #include "clang/Basic/Sarif.h"
 #include "clang/Basic/SourceLocation.h"
@@ -114,25 +115,18 @@ class DeserializedDeclsLineRangePrinter : public 
ASTConsumer,
     }
 
     static Position GetEndSpelling(const SourceManager &SM,
-                                   const CharSourceRange &R,
+                                   const CharSourceRange &Range,
                                    const LangOptions &LangOpts) {
+      // For token ranges, compute end location for end character of the range.
+      // The end location of returned range is exclusive.
+      CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts);
       SourceLocation End = R.getEnd();
-      if (R.isTokenRange()) {
-        // Compute end location for end character of the range.
-        // The returned location is exclusive.
-        End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts);
-      } else {
-        // If end already points at the last character in the range, advance 
one
-        // location, so that end location is exclusive.
-        End = End.getLocWithOffset(1);
-      }
       // Relex the token past the end location of the last token in the source
       // range. If it's a semicolon, advance the location by one token.
       Token PossiblySemi;
       Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
       if (PossiblySemi.is(tok::semi))
-        End = Lexer::getLocForEndOfToken(PossiblySemi.getLocation(), 0, SM,
-                                         LangOpts);
+        End = End.getLocWithOffset(1);
       return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
     }
   };

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to