dang created this revision.
dang added reviewers: zixuw, ributzka, QuietMisdreavus, cishida.
Herald added a project: All.
dang requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

We should only process APIs declared in the command line inputs to avoid
drowning the ExtractAPI output with symbols the user doesn't care about.
This is achieved by keeping track of the provided input files and
checking that the associated Decl or Macro is declared in one of those files.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D123148

Files:
  clang/include/clang/ExtractAPI/FrontendActions.h
  clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
  clang/test/ExtractAPI/known_files_only.c

Index: clang/test/ExtractAPI/known_files_only.c
===================================================================
--- /dev/null
+++ clang/test/ExtractAPI/known_files_only.c
@@ -0,0 +1,499 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s@INPUT_DIR@%/t@g" %t/reference.output.json.in >> \
+// RUN: %t/reference.output.json
+// RUN: %clang -extract-api --product-name=GlobalRecord -target arm64-apple-macosx \
+// RUN: %t/input1.h %t/input2.h %t/input3.h -o %t/output.json | FileCheck -allow-empty %s
+
+// Generator version is not consistent across test runs, normalize it.
+// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \
+// RUN: %t/output.json >> %t/output-normalized.json
+// RUN: diff %t/reference.output.json %t/output-normalized.json
+
+// CHECK-NOT: error:
+// CHECK-NOT: warning:
+
+//--- input1.h
+int num;
+
+// Let's make sure we aren't pulling in symbols from complex.h
+#include <complex.h>
+double complex build_complex(double real, double imaginary);
+
+//--- input2.h
+/**
+ * \brief Add two numbers.
+ * \param [in]  x   A number.
+ * \param [in]  y   Another number.
+ * \param [out] res The result of x + y.
+ */
+void add(const int x, const int y, int *res);
+
+//--- input3.h
+char unavailable __attribute__((unavailable));
+
+//--- reference.output.json.in
+{
+  "metadata": {
+    "formatVersion": {
+      "major": 0,
+      "minor": 5,
+      "patch": 3
+    },
+    "generator": "?"
+  },
+  "module": {
+    "name": "GlobalRecord",
+    "platform": {
+      "architecture": "arm64",
+      "operatingSystem": {
+        "minimumVersion": {
+          "major": 11,
+          "minor": 0,
+          "patch": 0
+        },
+        "name": "macosx"
+      },
+      "vendor": "apple"
+    }
+  },
+  "relationhips": [],
+  "symbols": [
+    {
+      "declarationFragments": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "num"
+        }
+      ],
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@num"
+      },
+      "kind": {
+        "displayName": "Global Variable",
+        "identifier": "c.var"
+      },
+      "location": {
+        "character": 5,
+        "line": 1,
+        "uri": "file://INPUT_DIR/input1.h"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "num"
+          }
+        ],
+        "title": "num"
+      }
+    },
+    {
+      "declarationFragments": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:<d",
+          "spelling": "_Complex double"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "build_complex"
+        },
+        {
+          "kind": "text",
+          "spelling": "("
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:d",
+          "spelling": "double"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "real"
+        },
+        {
+          "kind": "text",
+          "spelling": ", "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:d",
+          "spelling": "double"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "imaginary"
+        },
+        {
+          "kind": "text",
+          "spelling": ")"
+        }
+      ],
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@F@build_complex"
+      },
+      "kind": {
+        "displayName": "Function",
+        "identifier": "c.func"
+      },
+      "location": {
+        "character": 16,
+        "line": 5,
+        "uri": "file:///Users/dgrumberg/VersionControlledDocuments/oss/llvm-project/build/tools/clang/test/ExtractAPI/Output/known_files_only.c.tmp/input1.h"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "build_complex"
+          }
+        ],
+        "title": "build_complex"
+      },
+      "parameters": {
+        "parameters": [
+          {
+            "declarationFragments": [
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:d",
+                "spelling": "double"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "real"
+              }
+            ],
+            "name": "real"
+          },
+          {
+            "declarationFragments": [
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:d",
+                "spelling": "double"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "imaginary"
+              }
+            ],
+            "name": "imaginary"
+          }
+        ],
+        "returns": [
+          {
+            "kind": "typeIdentifier",
+            "preciseIdentifier": "c:<d",
+            "spelling": "_Complex double"
+          }
+        ]
+      }
+    },
+    {
+      "declarationFragments": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:v",
+          "spelling": "void"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "add"
+        },
+        {
+          "kind": "text",
+          "spelling": "("
+        },
+        {
+          "kind": "keyword",
+          "spelling": "const"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "x"
+        },
+        {
+          "kind": "text",
+          "spelling": ", "
+        },
+        {
+          "kind": "keyword",
+          "spelling": "const"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "y"
+        },
+        {
+          "kind": "text",
+          "spelling": ", "
+        },
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " *"
+        },
+        {
+          "kind": "internalParam",
+          "spelling": "res"
+        },
+        {
+          "kind": "text",
+          "spelling": ")"
+        }
+      ],
+      "docComment": {
+        "lines": [
+          {
+            "range": {
+              "end": {
+                "character": 4,
+                "line": 1
+              },
+              "start": {
+                "character": 4,
+                "line": 1
+              }
+            },
+            "text": ""
+          },
+          {
+            "range": {
+              "end": {
+                "character": 27,
+                "line": 2
+              },
+              "start": {
+                "character": 3,
+                "line": 2
+              }
+            },
+            "text": " \\brief Add two numbers."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 30,
+                "line": 3
+              },
+              "start": {
+                "character": 3,
+                "line": 3
+              }
+            },
+            "text": " \\param [in]  x   A number."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 36,
+                "line": 4
+              },
+              "start": {
+                "character": 3,
+                "line": 4
+              }
+            },
+            "text": " \\param [in]  y   Another number."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 41,
+                "line": 5
+              },
+              "start": {
+                "character": 3,
+                "line": 5
+              }
+            },
+            "text": " \\param [out] res The result of x + y."
+          },
+          {
+            "range": {
+              "end": {
+                "character": 4,
+                "line": 6
+              },
+              "start": {
+                "character": 1,
+                "line": 6
+              }
+            },
+            "text": " "
+          }
+        ]
+      },
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@F@add"
+      },
+      "kind": {
+        "displayName": "Function",
+        "identifier": "c.func"
+      },
+      "location": {
+        "character": 6,
+        "line": 7,
+        "uri": "file://INPUT_DIR/input2.h"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "add"
+          }
+        ],
+        "title": "add"
+      },
+      "parameters": {
+        "parameters": [
+          {
+            "declarationFragments": [
+              {
+                "kind": "keyword",
+                "spelling": "const"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:I",
+                "spelling": "int"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "x"
+              }
+            ],
+            "name": "x"
+          },
+          {
+            "declarationFragments": [
+              {
+                "kind": "keyword",
+                "spelling": "const"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:I",
+                "spelling": "int"
+              },
+              {
+                "kind": "text",
+                "spelling": " "
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "y"
+              }
+            ],
+            "name": "y"
+          },
+          {
+            "declarationFragments": [
+              {
+                "kind": "typeIdentifier",
+                "preciseIdentifier": "c:I",
+                "spelling": "int"
+              },
+              {
+                "kind": "text",
+                "spelling": " *"
+              },
+              {
+                "kind": "internalParam",
+                "spelling": "res"
+              }
+            ],
+            "name": "res"
+          }
+        ],
+        "returns": [
+          {
+            "kind": "typeIdentifier",
+            "preciseIdentifier": "c:v",
+            "spelling": "void"
+          }
+        ]
+      }
+    }
+  ]
+}
Index: clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
===================================================================
--- clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
+++ clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/RawCommentList.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/ExtractAPI/API.h"
 #include "clang/ExtractAPI/AvailabilityInfo.h"
@@ -30,23 +31,88 @@
 #include "clang/Frontend/FrontendOptions.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <utility>
 
 using namespace clang;
 using namespace extractapi;
 
 namespace {
 
+struct LocationFileChecker {
+  bool isLocationInKnownFile(SourceLocation Loc) {
+    // If the loc refers to a macro expansion we need to first get the file
+    // location of the expansion.
+    auto FileLoc = SM.getFileLoc(Loc);
+    FileID FID = SM.getFileID(FileLoc);
+    if (FID.isInvalid())
+      return false;
+
+    const auto *File = SM.getFileEntryForID(FID);
+    if (!File)
+      return false;
+
+    if (KnownFileEntries.count(File))
+      return true;
+
+    // Check if we have looked up this particular file entry previously instead
+    // of querying the preprocessor for externsal sources.
+    if (UnknownFileEntries.count(File))
+      return false;
+
+    // If file was not found, search by how the header was included. This is
+    // primarily to resolve headers found via headermaps, as they remap
+    // locations.
+    const auto *FileInfo = PP.getHeaderSearchInfo().getExistingFileInfo(File);
+    if (!FileInfo || !FileInfo->IsValid)
+      return false;
+
+    StringRef FileName = File->getName();
+
+    if (!KnownIncludes.count(FileName)) {
+      // Record that the file was found to avoid future string searches for the
+      // same file.
+      UnknownFileEntries.insert(File);
+      return false;
+    }
+
+    KnownFileEntries.insert(File);
+    return true;
+  }
+
+  LocationFileChecker(const SourceManager &SM, const Preprocessor &PP,
+                      const std::vector<std::string> &KnownFiles)
+      : SM(SM), PP(PP) {
+    for (const auto &KnownFilePath : KnownFiles) {
+      if (auto FileEntry = SM.getFileManager().getFile(KnownFilePath))
+        KnownFileEntries.insert(*FileEntry);
+      KnownIncludes.insert(KnownFilePath);
+    }
+  }
+
+private:
+  const SourceManager &SM;
+  const Preprocessor &PP;
+  llvm::DenseSet<const FileEntry *> KnownFileEntries;
+  llvm::StringSet<> KnownIncludes;
+  // Memoize unknown file entries so we don't have to check for external sources
+  // all the time.
+  llvm::DenseSet<const FileEntry *> UnknownFileEntries;
+};
+
 /// The RecursiveASTVisitor to traverse symbol declarations and collect API
 /// information.
 class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
 public:
-  ExtractAPIVisitor(ASTContext &Context, APISet &API)
-      : Context(Context), API(API) {}
+  ExtractAPIVisitor(ASTContext &Context, LocationFileChecker &LCF, APISet &API)
+      : Context(Context), API(API), LCF(LCF) {}
 
   const APISet &getAPI() const { return API; }
 
@@ -68,6 +134,9 @@
         Decl->getTemplateSpecializationKind() == TSK_Undeclared)
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -125,6 +194,9 @@
       return true;
     }
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -159,6 +231,9 @@
     if (!Decl->isThisDeclarationADefinition())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -194,6 +269,9 @@
     if (isa<CXXRecordDecl>(Decl))
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -225,6 +303,9 @@
     if (!Decl->isThisDeclarationADefinition())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -269,6 +350,9 @@
     if (!Decl->isThisDeclarationADefinition())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -494,12 +578,14 @@
 
   ASTContext &Context;
   APISet &API;
+  LocationFileChecker &LCF;
 };
 
 class ExtractAPIConsumer : public ASTConsumer {
 public:
-  ExtractAPIConsumer(ASTContext &Context, APISet &API)
-      : Visitor(Context, API) {}
+  ExtractAPIConsumer(ASTContext &Context,
+                     std::unique_ptr<LocationFileChecker> LCF, APISet &API)
+      : Visitor(Context, *LCF, API), LCF(std::move(LCF)) {}
 
   void HandleTranslationUnit(ASTContext &Context) override {
     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
@@ -508,11 +594,13 @@
 
 private:
   ExtractAPIVisitor Visitor;
+  std::unique_ptr<LocationFileChecker> LCF;
 };
 
 class MacroCallback : public PPCallbacks {
 public:
-  MacroCallback(const SourceManager &SM, APISet &API) : SM(SM), API(API) {}
+  MacroCallback(const SourceManager &SM, LocationFileChecker &LCF, APISet &API)
+      : SM(SM), LCF(LCF), API(API) {}
 
   void MacroDefined(const Token &MacroNameToken,
                     const MacroDirective *MD) override {
@@ -552,6 +640,9 @@
       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
         continue;
 
+      if (!LCF.isLocationInKnownFile(PM.MacroNameToken.getLocation()))
+        continue;
+
       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
       StringRef USR =
@@ -576,6 +667,7 @@
   };
 
   const SourceManager &SM;
+  LocationFileChecker &LCF;
   APISet &API;
   llvm::SmallVector<PendingMacro> PendingMacros;
 };
@@ -596,11 +688,15 @@
       CI.getTarget().getTriple(),
       CI.getFrontendOpts().Inputs.back().getKind().getLanguage());
 
+  auto LCF = std::make_unique<LocationFileChecker>(
+      CI.getSourceManager(), CI.getPreprocessor(), KnownInputFiles);
+
   // Register preprocessor callbacks that will add macro definitions to API.
   CI.getPreprocessor().addPPCallbacks(
-      std::make_unique<MacroCallback>(CI.getSourceManager(), *API));
+      std::make_unique<MacroCallback>(CI.getSourceManager(), *LCF, *API));
 
-  return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), *API);
+  return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
+                                              std::move(LCF), *API);
 }
 
 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
@@ -620,6 +716,8 @@
     HeaderContents += " \"";
     HeaderContents += FIF.getFile();
     HeaderContents += "\"\n";
+
+    KnownInputFiles.emplace_back(FIF.getFile());
   }
 
   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
Index: clang/include/clang/ExtractAPI/FrontendActions.h
===================================================================
--- clang/include/clang/ExtractAPI/FrontendActions.h
+++ clang/include/clang/ExtractAPI/FrontendActions.h
@@ -39,6 +39,9 @@
   /// files.
   std::unique_ptr<llvm::MemoryBuffer> Buffer;
 
+  /// The input file originally provided on the command line.
+  std::vector<std::string> KnownInputFiles;
+
   /// Prepare to execute the action on the given CompilerInstance.
   ///
   /// This is called before executing the action on any inputs. This generates a
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to