https://github.com/bob-wilson created 
https://github.com/llvm/llvm-project/pull/137087

This adds a new filtering option to be used along with the 
-header-include-format=json option. The existing "only-direct-system" filtering 
option is missing some things:
- It does not show module imports.
- It does not show includes of non-system headers. This new "direct-per-file" 
filtering has a separate entry in the JSON output for each non-system source 
file, showing the direct includes and imports from that file. You can use this 
to see uses of non-system headers, and also find the paths through non-system 
headers that lead up to module imports and system headers. Modules are 
identified here by their modulemap files.

>From a7195bdc6d98f672344c25ea8794c434cc58e0ef Mon Sep 17 00:00:00 2001
From: Bob Wilson <bob.wil...@apple.com>
Date: Tue, 22 Apr 2025 09:24:49 -0700
Subject: [PATCH] [Clang] Add new -header-include-filtering=direct-per-file
 option

This adds a new filtering option to be used along with the
-header-include-format=json option. The existing "only-direct-system"
filtering option is missing some things:
- It does not show module imports.
- It does not show includes of non-system headers.
This new "direct-per-file" filtering has a separate entry in the JSON output
for each non-system source file, showing the direct includes and imports
from that file. You can use this to see uses of non-system headers, and also
find the paths through non-system headers that lead up to module imports and
system headers. Modules are identified here by their modulemap files.
---
 clang/include/clang/Basic/HeaderInclude.h   |  13 +-
 clang/include/clang/Driver/Options.td       |   3 +-
 clang/lib/Frontend/HeaderIncludeGen.cpp     | 140 +++++++++++++++++++-
 clang/test/Preprocessor/print-header-json.c |   5 +
 clang/tools/driver/driver.cpp               |   2 +-
 5 files changed, 153 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Basic/HeaderInclude.h 
b/clang/include/clang/Basic/HeaderInclude.h
index 83c26543bbd3b..86c21cf292882 100644
--- a/clang/include/clang/Basic/HeaderInclude.h
+++ b/clang/include/clang/Basic/HeaderInclude.h
@@ -23,8 +23,14 @@ enum HeaderIncludeFormatKind { HIFMT_None, HIFMT_Textual, 
HIFMT_JSON };
 
 /// Whether header information is filtered or not. If HIFIL_Only_Direct_System
 /// is used, only information on system headers directly included from
-/// non-system headers is emitted.
-enum HeaderIncludeFilteringKind { HIFIL_None, HIFIL_Only_Direct_System };
+/// non-system files is emitted. The HIFIL_Direct_Per_File filtering shows the
+/// direct imports and includes for each non-system source and header file
+/// separately.
+enum HeaderIncludeFilteringKind {
+  HIFIL_None,
+  HIFIL_Only_Direct_System,
+  HIFIL_Direct_Per_File
+};
 
 inline HeaderIncludeFormatKind
 stringToHeaderIncludeFormatKind(const char *Str) {
@@ -40,6 +46,7 @@ inline bool stringToHeaderIncludeFiltering(const char *Str,
       llvm::StringSwitch<std::pair<bool, HeaderIncludeFilteringKind>>(Str)
           .Case("none", {true, HIFIL_None})
           .Case("only-direct-system", {true, HIFIL_Only_Direct_System})
+          .Case("direct-per-file", {true, HIFIL_Direct_Per_File})
           .Default({false, HIFIL_None});
   Kind = P.second;
   return P.first;
@@ -64,6 +71,8 @@ headerIncludeFilteringKindToString(HeaderIncludeFilteringKind 
K) {
     return "none";
   case HIFIL_Only_Direct_System:
     return "only-direct-system";
+  case HIFIL_Direct_Per_File:
+    return "direct-per-file";
   }
   llvm_unreachable("Unknown HeaderIncludeFilteringKind enum");
 }
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index a9f4083920663..c0f469e04375c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7793,7 +7793,8 @@ def header_include_format_EQ : Joined<["-"], 
"header-include-format=">,
   MarshallingInfoEnum<DependencyOutputOpts<"HeaderIncludeFormat">, 
"HIFMT_Textual">;
 def header_include_filtering_EQ : Joined<["-"], "header-include-filtering=">,
   HelpText<"set the flag that enables filtering header information">,
-  Values<"none,only-direct-system">, NormalizedValues<["HIFIL_None", 
"HIFIL_Only_Direct_System"]>,
+  Values<"none,only-direct-system,direct-per-file">,
+  NormalizedValues<["HIFIL_None", "HIFIL_Only_Direct_System", 
"HIFIL_Direct_Per_File"]>,
   MarshallingInfoEnum<DependencyOutputOpts<"HeaderIncludeFiltering">, 
"HIFIL_None">;
 def show_includes : Flag<["--"], "show-includes">,
   HelpText<"Print cl.exe style /showIncludes to stdout">;
diff --git a/clang/lib/Frontend/HeaderIncludeGen.cpp 
b/clang/lib/Frontend/HeaderIncludeGen.cpp
index 792526083b1e6..85fc61b6f5e92 100644
--- a/clang/lib/Frontend/HeaderIncludeGen.cpp
+++ b/clang/lib/Frontend/HeaderIncludeGen.cpp
@@ -106,6 +106,50 @@ class HeaderIncludesJSONCallback : public PPCallbacks {
   void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
                    SrcMgr::CharacteristicKind FileType) override;
 };
+
+/// A callback for emitting direct header and module usage information to a
+/// file in JSON. The output format is like HeaderIncludesJSONCallback but has
+/// an array of separate entries, one for each non-system source file used in
+/// the compilation showing only the direct includes and imports from that 
file.
+class HeaderIncludesDirectPerFileCallback : public PPCallbacks {
+  SourceManager &SM;
+  HeaderSearch &HSI;
+  raw_ostream *OutputFile;
+  bool OwnsOutputFile;
+  using DependencyMap =
+      llvm::DenseMap<FileEntryRef, SmallVector<FileEntryRef>>;
+  DependencyMap Dependencies;
+
+public:
+  HeaderIncludesDirectPerFileCallback(const Preprocessor *PP,
+                                      raw_ostream *OutputFile_,
+                                      bool OwnsOutputFile_)
+      : SM(PP->getSourceManager()), HSI(PP->getHeaderSearchInfo()),
+        OutputFile(OutputFile_), OwnsOutputFile(OwnsOutputFile_) {}
+
+  ~HeaderIncludesDirectPerFileCallback() override {
+    if (OwnsOutputFile)
+      delete OutputFile;
+  }
+
+  HeaderIncludesDirectPerFileCallback
+      (const HeaderIncludesDirectPerFileCallback &) = delete;
+  HeaderIncludesDirectPerFileCallback &
+  operator=(const HeaderIncludesDirectPerFileCallback &) = delete;
+
+  void EndOfMainFile() override;
+
+  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+                          StringRef FileName, bool IsAngled,
+                          CharSourceRange FilenameRange,
+                          OptionalFileEntryRef File, StringRef SearchPath,
+                          StringRef RelativePath, const Module 
*SuggestedModule,
+                          bool ModuleImported,
+                          SrcMgr::CharacteristicKind FileType) override;
+
+  void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
+                    const Module *Imported) override;
+};
 }
 
 static void PrintHeaderInfo(raw_ostream *OutputFile, StringRef Filename,
@@ -192,14 +236,21 @@ void clang::AttachHeaderIncludeGen(Preprocessor &PP,
         MSStyle));
     break;
   }
-  case HIFMT_JSON: {
-    assert(DepOpts.HeaderIncludeFiltering == HIFIL_Only_Direct_System &&
-           "only-direct-system is the only option for filtering");
-    PP.addPPCallbacks(std::make_unique<HeaderIncludesJSONCallback>(
-        &PP, OutputFile, OwnsOutputFile));
+  case HIFMT_JSON:
+    switch (DepOpts.HeaderIncludeFiltering) {
+    default:
+      llvm_unreachable("Unknown HeaderIncludeFilteringKind enum");
+    case HIFIL_Only_Direct_System:
+      PP.addPPCallbacks(std::make_unique<HeaderIncludesJSONCallback>(
+          &PP, OutputFile, OwnsOutputFile));
+      break;
+    case HIFIL_Direct_Per_File:
+      PP.addPPCallbacks(std::make_unique<HeaderIncludesDirectPerFileCallback>(
+          &PP, OutputFile, OwnsOutputFile));
+      break;
+    }
     break;
   }
-  }
 }
 
 void HeaderIncludesCallback::FileChanged(SourceLocation Loc,
@@ -322,3 +373,80 @@ void HeaderIncludesJSONCallback::FileSkipped(
 
   IncludedHeaders.push_back(SkippedFile.getName().str());
 }
+
+void HeaderIncludesDirectPerFileCallback::EndOfMainFile() {
+  if (Dependencies.empty())
+    return;
+
+  // Sort the files so that the output does not depend on the DenseMap order.
+  SmallVector<FileEntryRef> SourceFiles;
+  for (auto F = Dependencies.begin(), FEnd = Dependencies.end();
+       F != FEnd; ++F) {
+    SourceFiles.push_back(F->first);
+  }
+  llvm::sort(SourceFiles, [](const FileEntryRef &LHS, const FileEntryRef &RHS) 
{
+    return LHS.getUID() < RHS.getUID();
+  });
+
+  std::string Str;
+  llvm::raw_string_ostream OS(Str);
+  llvm::json::OStream JOS(OS);
+  JOS.array([&] {
+    for (auto S = SourceFiles.begin(), SE = SourceFiles.end(); S != SE; ++S) {
+      JOS.object([&] {
+        SmallVector<FileEntryRef> &Deps = Dependencies[*S];
+        JOS.attribute("source", S->getName().str());
+        JOS.attributeArray("includes", [&] {
+          for (unsigned I = 0, N = Deps.size(); I != N; ++I)
+            JOS.value(Deps[I].getName().str());
+        });
+      });
+    }
+  });
+  OS << "\n";
+
+  if (OutputFile->get_kind() == raw_ostream::OStreamKind::OK_FDStream) {
+    llvm::raw_fd_ostream *FDS = static_cast<llvm::raw_fd_ostream 
*>(OutputFile);
+    if (auto L = FDS->lock())
+      *OutputFile << Str;
+  } else
+    *OutputFile << Str;
+}
+
+void HeaderIncludesDirectPerFileCallback::InclusionDirective(
+    SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
+    bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
+    StringRef SearchPath, StringRef RelativePath, const Module 
*SuggestedModule,
+    bool ModuleImported, SrcMgr::CharacteristicKind FileType) {
+  if (!File)
+    return;
+
+  SourceLocation Loc = SM.getExpansionLoc(HashLoc);
+  if (SM.isInSystemHeader(Loc))
+    return;
+  OptionalFileEntryRef FromFile = SM.getFileEntryRefForID(SM.getFileID(Loc));
+  if (!FromFile)
+    return;
+
+  Dependencies[*FromFile].push_back(*File);
+}
+
+void HeaderIncludesDirectPerFileCallback::moduleImport(
+    SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) {
+  if (!Imported)
+    return;
+
+  SourceLocation Loc = SM.getExpansionLoc(ImportLoc);
+  if (SM.isInSystemHeader(Loc))
+    return;
+  OptionalFileEntryRef FromFile = SM.getFileEntryRefForID(SM.getFileID(Loc));
+  if (!FromFile)
+    return;
+
+  OptionalFileEntryRef ModuleMapFile =
+      HSI.getModuleMap().getModuleMapFileForUniquing(Imported);
+  if (!ModuleMapFile)
+    return;
+
+  Dependencies[*FromFile].push_back(*ModuleMapFile);
+}
diff --git a/clang/test/Preprocessor/print-header-json.c 
b/clang/test/Preprocessor/print-header-json.c
index 1ba63ddc7a249..bb1830e5030d8 100644
--- a/clang/test/Preprocessor/print-header-json.c
+++ b/clang/test/Preprocessor/print-header-json.c
@@ -13,11 +13,16 @@
 // RUN: env CC_PRINT_HEADERS_FORMAT=json 
CC_PRINT_HEADERS_FILTERING=only-direct-system CC_PRINT_HEADERS_FILE=%t.txt 
%clang -fsyntax-only -I %S/Inputs/print-header-json -isystem 
%S/Inputs/print-header-json/system %s -o /dev/null
 // RUN: cat %t.txt | FileCheck %s --check-prefix=SUPPORTED
 
+// RUN: rm %t.txt
+// RUN: env CC_PRINT_HEADERS_FORMAT=json 
CC_PRINT_HEADERS_FILTERING=direct-per-file CC_PRINT_HEADERS_FILE=%t.txt %clang 
-fsyntax-only -I %S/Inputs/print-header-json -isystem 
%S/Inputs/print-header-json/system %s -o /dev/null
+// RUN: cat %t.txt | FileCheck %s --check-prefix=SUPPORTED_PERFILE
+
 #include "system0.h"
 #include "header0.h"
 #include "system2.h"
 
 // SUPPORTED: 
{"source":"{{[^,]*}}print-header-json.c","includes":["{{[^,]*}}system0.h","{{[^,]*}}system3.h","{{[^,]*}}system2.h"]}
+// SUPPORTED_PERFILE: 
[{"source":"{{[^,]*}}print-header-json.c","includes":["{{[^,]*}}system0.h","{{[^,]*}}header0.h","{{[^,]*}}system2.h"]},{"source":"{{[^,]*}}header0.h","includes":["{{[^,]*}}system3.h","{{[^,]*}}header1.h","{{[^,]*}}header2.h"]}]
 
 // UNSUPPORTED0: error: unsupported combination: 
-header-include-format=textual and -header-include-filtering=only-direct-system
 // UNSUPPORTED1: error: unsupported combination: -header-include-format=json 
and -header-include-filtering=none
diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp
index db72b4a4526fe..82f47ab973064 100644
--- a/clang/tools/driver/driver.cpp
+++ b/clang/tools/driver/driver.cpp
@@ -171,7 +171,7 @@ static bool SetBackdoorDriverOutputsFromEnvVars(Driver 
&TheDriver) {
       if ((TheDriver.CCPrintHeadersFormat == HIFMT_Textual &&
            Filtering != HIFIL_None) ||
           (TheDriver.CCPrintHeadersFormat == HIFMT_JSON &&
-           Filtering != HIFIL_Only_Direct_System)) {
+           Filtering == HIFIL_None)) {
         TheDriver.Diag(clang::diag::err_drv_print_header_env_var_combination)
             << EnvVar << FilteringStr;
         return false;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to