[clang] aebe5fc - [clang][extract-api] Process only APIs declared in inputs

Daniel Grumberg via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 7 09:56:13 PDT 2022


Author: Daniel Grumberg
Date: 2022-04-07T17:49:05+01:00
New Revision: aebe5fc6e7d8ab99f3796067d430752552932d28

URL: https://github.com/llvm/llvm-project/commit/aebe5fc6e7d8ab99f3796067d430752552932d28
DIFF: https://github.com/llvm/llvm-project/commit/aebe5fc6e7d8ab99f3796067d430752552932d28.diff

LOG: [clang][extract-api] Process only APIs declared in inputs

We should only process APIs declared in the command line inputs to avoid
drowning the ExtractAPI output with symbols the user doesn't care about.
This is achieved by keeping track of the provided input files and
checking that the associated Decl or Macro is declared in one of those files.

Differential Revision: https://reviews.llvm.org/D123148

Added: 
    clang/test/ExtractAPI/known_files_only.c
    clang/test/ExtractAPI/known_files_only_hmap.c

Modified: 
    clang/include/clang/ExtractAPI/FrontendActions.h
    clang/lib/ExtractAPI/ExtractAPIConsumer.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/ExtractAPI/FrontendActions.h b/clang/include/clang/ExtractAPI/FrontendActions.h
index 2bdb61dc6994d..dec3b5ca93d18 100644
--- a/clang/include/clang/ExtractAPI/FrontendActions.h
+++ b/clang/include/clang/ExtractAPI/FrontendActions.h
@@ -39,6 +39,9 @@ class ExtractAPIAction : public ASTFrontendAction {
   /// files.
   std::unique_ptr<llvm::MemoryBuffer> Buffer;
 
+  /// The input file originally provided on the command line.
+  std::vector<std::string> KnownInputFiles;
+
   /// Prepare to execute the action on the given CompilerInstance.
   ///
   /// This is called before executing the action on any inputs. This generates a

diff  --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
index 7c2914da7ea0c..949413d7d2b1d 100644
--- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
+++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
@@ -20,6 +20,8 @@
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/RawCommentList.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/ExtractAPI/API.h"
 #include "clang/ExtractAPI/AvailabilityInfo.h"
@@ -31,11 +33,15 @@
 #include "clang/Frontend/FrontendOptions.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <utility>
 
 using namespace clang;
 using namespace extractapi;
@@ -49,12 +55,44 @@ StringRef getTypedefName(const TagDecl *Decl) {
   return {};
 }
 
+struct LocationFileChecker {
+  bool isLocationInKnownFile(SourceLocation Loc) {
+    // If the loc refers to a macro expansion we need to first get the file
+    // location of the expansion.
+    auto FileLoc = SM.getFileLoc(Loc);
+    FileID FID = SM.getFileID(FileLoc);
+    if (FID.isInvalid())
+      return false;
+
+    const auto *File = SM.getFileEntryForID(FID);
+    if (!File)
+      return false;
+
+    if (KnownFileEntries.count(File))
+      return true;
+
+    return false;
+  }
+
+  LocationFileChecker(const SourceManager &SM,
+                      const std::vector<std::string> &KnownFiles)
+      : SM(SM) {
+    for (const auto &KnownFilePath : KnownFiles)
+      if (auto FileEntry = SM.getFileManager().getFile(KnownFilePath))
+        KnownFileEntries.insert(*FileEntry);
+  }
+
+private:
+  const SourceManager &SM;
+  llvm::DenseSet<const FileEntry *> KnownFileEntries;
+};
+
 /// The RecursiveASTVisitor to traverse symbol declarations and collect API
 /// information.
 class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
 public:
-  ExtractAPIVisitor(ASTContext &Context, APISet &API)
-      : Context(Context), API(API) {}
+  ExtractAPIVisitor(ASTContext &Context, LocationFileChecker &LCF, APISet &API)
+      : Context(Context), API(API), LCF(LCF) {}
 
   const APISet &getAPI() const { return API; }
 
@@ -76,6 +114,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
         Decl->getTemplateSpecializationKind() == TSK_Undeclared)
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -133,6 +174,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
       return true;
     }
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -167,6 +211,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
     if (!Decl->isThisDeclarationADefinition())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     if (Name.empty())
@@ -204,6 +251,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
     if (isa<CXXRecordDecl>(Decl))
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     if (Name.empty())
@@ -237,6 +287,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
     if (!Decl->isThisDeclarationADefinition())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -281,6 +334,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
     if (!Decl->isThisDeclarationADefinition())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     // Collect symbol information.
     StringRef Name = Decl->getName();
     StringRef USR = API.recordUSR(Decl);
@@ -316,6 +372,9 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
     if (!Decl->isDefinedOutsideFunctionOrMethod())
       return true;
 
+    if (!LCF.isLocationInKnownFile(Decl->getLocation()))
+      return true;
+
     PresumedLoc Loc =
         Context.getSourceManager().getPresumedLoc(Decl->getLocation());
     StringRef Name = Decl->getName();
@@ -569,12 +628,14 @@ class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
 
   ASTContext &Context;
   APISet &API;
+  LocationFileChecker &LCF;
 };
 
 class ExtractAPIConsumer : public ASTConsumer {
 public:
-  ExtractAPIConsumer(ASTContext &Context, APISet &API)
-      : Visitor(Context, API) {}
+  ExtractAPIConsumer(ASTContext &Context,
+                     std::unique_ptr<LocationFileChecker> LCF, APISet &API)
+      : Visitor(Context, *LCF, API), LCF(std::move(LCF)) {}
 
   void HandleTranslationUnit(ASTContext &Context) override {
     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
@@ -583,11 +644,13 @@ class ExtractAPIConsumer : public ASTConsumer {
 
 private:
   ExtractAPIVisitor Visitor;
+  std::unique_ptr<LocationFileChecker> LCF;
 };
 
 class MacroCallback : public PPCallbacks {
 public:
-  MacroCallback(const SourceManager &SM, APISet &API) : SM(SM), API(API) {}
+  MacroCallback(const SourceManager &SM, LocationFileChecker &LCF, APISet &API)
+      : SM(SM), LCF(LCF), API(API) {}
 
   void MacroDefined(const Token &MacroNameToken,
                     const MacroDirective *MD) override {
@@ -627,6 +690,9 @@ class MacroCallback : public PPCallbacks {
       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
         continue;
 
+      if (!LCF.isLocationInKnownFile(PM.MacroNameToken.getLocation()))
+        continue;
+
       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
       StringRef USR =
@@ -651,6 +717,7 @@ class MacroCallback : public PPCallbacks {
   };
 
   const SourceManager &SM;
+  LocationFileChecker &LCF;
   APISet &API;
   llvm::SmallVector<PendingMacro> PendingMacros;
 };
@@ -671,11 +738,15 @@ ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
       CI.getTarget().getTriple(),
       CI.getFrontendOpts().Inputs.back().getKind().getLanguage());
 
+  auto LCF = std::make_unique<LocationFileChecker>(CI.getSourceManager(),
+                                                   KnownInputFiles);
+
   // Register preprocessor callbacks that will add macro definitions to API.
   CI.getPreprocessor().addPPCallbacks(
-      std::make_unique<MacroCallback>(CI.getSourceManager(), *API));
+      std::make_unique<MacroCallback>(CI.getSourceManager(), *LCF, *API));
 
-  return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), *API);
+  return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
+                                              std::move(LCF), *API);
 }
 
 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
@@ -695,6 +766,8 @@ bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
     HeaderContents += " \"";
     HeaderContents += FIF.getFile();
     HeaderContents += "\"\n";
+
+    KnownInputFiles.emplace_back(FIF.getFile());
   }
 
   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,

diff  --git a/clang/test/ExtractAPI/known_files_only.c b/clang/test/ExtractAPI/known_files_only.c
new file mode 100644
index 0000000000000..4f1d75b05fc97
--- /dev/null
+++ b/clang/test/ExtractAPI/known_files_only.c
@@ -0,0 +1,100 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s at INPUT_DIR@%/t at g" %t/reference.output.json.in >> \
+// RUN: %t/reference.output.json
+// RUN: %clang -extract-api --product-name=GlobalRecord -target arm64-apple-macosx \
+// RUN: %t/input1.h -o %t/output.json | FileCheck -allow-empty %s
+
+// Generator version is not consistent across test runs, normalize it.
+// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \
+// RUN: %t/output.json >> %t/output-normalized.json
+// RUN: 
diff  %t/reference.output.json %t/output-normalized.json
+
+// CHECK-NOT: error:
+// CHECK-NOT: warning:
+
+//--- input1.h
+int num;
+#include "input2.h"
+
+//--- input2.h
+// Ensure that these symbols are not emitted in the Symbol Graph.
+#define HELLO 1
+char not_emitted;
+void foo(int);
+struct Foo { int a; };
+
+//--- reference.output.json.in
+{
+  "metadata": {
+    "formatVersion": {
+      "major": 0,
+      "minor": 5,
+      "patch": 3
+    },
+    "generator": "?"
+  },
+  "module": {
+    "name": "GlobalRecord",
+    "platform": {
+      "architecture": "arm64",
+      "operatingSystem": {
+        "minimumVersion": {
+          "major": 11,
+          "minor": 0,
+          "patch": 0
+        },
+        "name": "macosx"
+      },
+      "vendor": "apple"
+    }
+  },
+  "relationships": [],
+  "symbols": [
+    {
+      "accessLevel": "public",
+      "declarationFragments": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "num"
+        }
+      ],
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@num"
+      },
+      "kind": {
+        "displayName": "Global Variable",
+        "identifier": "c.var"
+      },
+      "location": {
+        "position": {
+          "character": 5,
+          "line": 1
+        },
+        "uri": "file://INPUT_DIR/input1.h"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "num"
+          }
+        ],
+        "title": "num"
+      },
+      "pathComponents": [
+        "num"
+      ]
+    }
+  ]
+}

diff  --git a/clang/test/ExtractAPI/known_files_only_hmap.c b/clang/test/ExtractAPI/known_files_only_hmap.c
new file mode 100644
index 0000000000000..f213287975d06
--- /dev/null
+++ b/clang/test/ExtractAPI/known_files_only_hmap.c
@@ -0,0 +1,164 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s at INPUT_DIR@%/t at g" %t/reference.output.json.in >> \
+// RUN: %t/reference.output.json
+// RUN: sed -e "s at INPUT_DIR@%/t at g" %t/known_files_only.hmap.json.in >> \
+// RUN: %t/known_files_only.hmap.json
+// RUN: %hmaptool write %t/known_files_only.hmap.json %t/known_files_only.hmap
+// RUN: %clang -extract-api --product-name=KnownFilesOnlyHmap -target arm64-apple-macosx \
+// RUN: -I%t/known_files_only.hmap -I%t/subdir %t/subdir/subdir1/input.h \
+// RUN: %t/subdir/subdir2/known_file.h -o %t/output.json | FileCheck -allow-empty %s
+
+// Generator version is not consistent across test runs, normalize it.
+// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \
+// RUN: %t/output.json >> %t/output-normalized.json
+// RUN: 
diff  %t/reference.output.json %t/output-normalized.json
+
+// CHECK-NOT: error:
+// CHECK-NOT: warning:
+//--- known_files_only.hmap.json.in
+{
+  "mappings" :
+    {
+     "subdir2/known_file.h" : "INPUT_DIR/subdir/subdir3/unknown.h"
+    }
+}
+
+//--- subdir/subdir1/input.h
+int num;
+#include "subdir2/known_file.h"
+
+//--- subdir/subdir2/known_file.h
+int known_num;
+
+//--- subdir/subdir3/unknown.h
+// Ensure that these symbols are not emitted in the Symbol Graph.
+#ifndef INPUT4_H
+#define INPUT4_H
+
+#define HELLO 1
+char not_emitted;
+void foo(int);
+struct Foo { int a; };
+
+#endif
+
+//--- reference.output.json.in
+{
+  "metadata": {
+    "formatVersion": {
+      "major": 0,
+      "minor": 5,
+      "patch": 3
+    },
+    "generator": "?"
+  },
+  "module": {
+    "name": "KnownFilesOnlyHmap",
+    "platform": {
+      "architecture": "arm64",
+      "operatingSystem": {
+        "minimumVersion": {
+          "major": 11,
+          "minor": 0,
+          "patch": 0
+        },
+        "name": "macosx"
+      },
+      "vendor": "apple"
+    }
+  },
+  "relationships": [],
+  "symbols": [
+    {
+      "accessLevel": "public",
+      "declarationFragments": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "num"
+        }
+      ],
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@num"
+      },
+      "kind": {
+        "displayName": "Global Variable",
+        "identifier": "c.var"
+      },
+      "location": {
+        "position": {
+          "character": 5,
+          "line": 1
+        },
+        "uri": "file://INPUT_DIR/subdir/subdir1/input.h"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "num"
+          }
+        ],
+        "title": "num"
+      },
+      "pathComponents": [
+        "num"
+      ]
+    },
+    {
+      "accessLevel": "public",
+      "declarationFragments": [
+        {
+          "kind": "typeIdentifier",
+          "preciseIdentifier": "c:I",
+          "spelling": "int"
+        },
+        {
+          "kind": "text",
+          "spelling": " "
+        },
+        {
+          "kind": "identifier",
+          "spelling": "known_num"
+        }
+      ],
+      "identifier": {
+        "interfaceLanguage": "c",
+        "precise": "c:@known_num"
+      },
+      "kind": {
+        "displayName": "Global Variable",
+        "identifier": "c.var"
+      },
+      "location": {
+        "position": {
+          "character": 5,
+          "line": 1
+        },
+        "uri": "file://INPUT_DIR/subdir/subdir2/known_file.h"
+      },
+      "names": {
+        "subHeading": [
+          {
+            "kind": "identifier",
+            "spelling": "known_num"
+          }
+        ],
+        "title": "known_num"
+      },
+      "pathComponents": [
+        "known_num"
+      ]
+    }
+  ]
+}


        


More information about the cfe-commits mailing list