[clang] aa484c9 - [Lex/DependencyDirectivesScanner] Keep track of the presence of tokens between the last scanned directive and EOF

Argyrios Kyrtzidis via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 7 10:31:39 PDT 2022


Author: Argyrios Kyrtzidis
Date: 2022-09-07T10:31:29-07:00
New Revision: aa484c90cf5902042cec0f6a4f3bf2a460eea307

URL: https://github.com/llvm/llvm-project/commit/aa484c90cf5902042cec0f6a4f3bf2a460eea307
DIFF: https://github.com/llvm/llvm-project/commit/aa484c90cf5902042cec0f6a4f3bf2a460eea307.diff

LOG: [Lex/DependencyDirectivesScanner] Keep track of the presence of tokens between the last scanned directive and EOF

Directive `dependency_directives_scan::tokens_present_before_eof` is introduced to indicate there were tokens present before
the last scanned dependency directive and EOF.
This is useful to ensure we correctly identify the macro guards when lexing using the dependency directives.

Differential Revision: https://reviews.llvm.org/D133357

Added: 
    clang/unittests/Lex/PPDependencyDirectivesTest.cpp

Modified: 
    clang/include/clang/Lex/DependencyDirectivesScanner.h
    clang/lib/Lex/DependencyDirectivesScanner.cpp
    clang/lib/Lex/Lexer.cpp
    clang/unittests/Lex/CMakeLists.txt
    clang/unittests/Lex/DependencyDirectivesScannerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index ec6ac6173e855..529b93aa0ffbf 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -82,6 +82,9 @@ enum DirectiveKind : uint8_t {
   cxx_import_decl,
   cxx_export_module_decl,
   cxx_export_import_decl,
+  /// Indicates that there are tokens present between the last scanned directive
+  /// and eof. The \p Directive::Tokens array will be empty for this kind.
+  tokens_present_before_eof,
   pp_eof,
 };
 

diff  --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 0efcc352f0883..c0b1687b2847f 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -87,6 +87,9 @@ struct Scanner {
   dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
                                                         const char *const End);
 
+  void skipLine(const char *&First, const char *const End);
+  void skipDirective(StringRef Name, const char *&First, const char *const End);
+
   /// Lexes next token and if it is identifier returns its string, otherwise
   /// it skips the current line and returns \p None.
   ///
@@ -150,6 +153,7 @@ struct Scanner {
   DiagnosticsEngine *Diags;
   SourceLocation InputSourceLoc;
 
+  const char *LastTokenPtr = nullptr;
   /// Keeps track of the tokens for the currently lexed directive. Once a
   /// directive is fully lexed and "committed" then the tokens get appended to
   /// \p Tokens and \p CurDirToks is cleared for the next directive.
@@ -364,7 +368,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start,
   return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));
 }
 
-static void skipLine(const char *&First, const char *const End) {
+void Scanner::skipLine(const char *&First, const char *const End) {
   for (;;) {
     assert(First <= End);
     if (First == End)
@@ -379,6 +383,7 @@ static void skipLine(const char *&First, const char *const End) {
       // Iterate over strings correctly to avoid comments and newlines.
       if (*First == '"' ||
           (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
+        LastTokenPtr = First;
         if (isRawStringLiteral(Start, First))
           skipRawString(First, End);
         else
@@ -388,6 +393,7 @@ static void skipLine(const char *&First, const char *const End) {
 
       // Iterate over comments correctly.
       if (*First != '/' || End - First < 2) {
+        LastTokenPtr = First;
         ++First;
         continue;
       }
@@ -399,6 +405,7 @@ static void skipLine(const char *&First, const char *const End) {
       }
 
       if (First[1] != '*') {
+        LastTokenPtr = First;
         ++First;
         continue;
       }
@@ -416,8 +423,8 @@ static void skipLine(const char *&First, const char *const End) {
   }
 }
 
-static void skipDirective(StringRef Name, const char *&First,
-                          const char *const End) {
+void Scanner::skipDirective(StringRef Name, const char *&First,
+                            const char *const End) {
   if (llvm::StringSwitch<bool>(Name)
           .Case("warning", true)
           .Case("error", true)
@@ -710,6 +717,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
     return false;
   }
 
+  LastTokenPtr = First;
+
   TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
 
   auto ScEx1 = make_scope_exit([&]() {
@@ -803,6 +812,9 @@ bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
 
   if (!Error) {
     // Add an EOF on success.
+    if (LastTokenPtr &&
+        (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
+      pushDirective(tokens_present_before_eof);
     pushDirective(pp_eof);
   }
 
@@ -851,6 +863,8 @@ void clang::printDependencyDirectivesAsSource(
   };
 
   for (const dependency_directives_scan::Directive &Directive : Directives) {
+    if (Directive.Kind == tokens_present_before_eof)
+      OS << "<TokBeforeEOF>";
     Optional<tok::TokenKind> PrevTokenKind;
     for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
       if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))

diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 83c31bc491e85..a0a0802da1736 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -4323,6 +4323,8 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {
   while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
     if (DepDirectives.front().Kind == pp_eof)
       return LexEndOfFile(Result, BufferEnd);
+    if (DepDirectives.front().Kind == tokens_present_before_eof)
+      MIOpt.ReadToken();
     NextDepDirectiveTokenIndex = 0;
     DepDirectives = DepDirectives.drop_front();
   }
@@ -4398,6 +4400,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
     case cxx_import_decl:
     case cxx_export_module_decl:
     case cxx_export_import_decl:
+    case tokens_present_before_eof:
       break;
     case pp_if:
     case pp_ifdef:

diff  --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt
index 7aa7ecf92584f..5b498f54fb0af 100644
--- a/clang/unittests/Lex/CMakeLists.txt
+++ b/clang/unittests/Lex/CMakeLists.txt
@@ -9,6 +9,7 @@ add_clang_unittest(LexTests
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
+  PPDependencyDirectivesTest.cpp
   PPMemoryAllocationsTest.cpp
   )
 
@@ -19,4 +20,6 @@ clang_target_link_libraries(LexTests
   clangLex
   clangParse
   clangSema
+
+  LLVMTestingSupport
   )

diff  --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index 8c57539852830..7ff09dc5d9c5b 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -57,10 +57,11 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
                                                     Directives));
-  EXPECT_TRUE(Out.empty());
+  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
   EXPECT_TRUE(Tokens.empty());
-  ASSERT_EQ(1u, Directives.size());
-  ASSERT_EQ(pp_eof, Directives.back().Kind);
+  ASSERT_EQ(2u, Directives.size());
+  EXPECT_EQ(tokens_present_before_eof, Directives[0].Kind);
+  EXPECT_EQ(pp_eof, Directives[1].Kind);
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
@@ -451,7 +452,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) {
   SmallVector<char, 128> Out;
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#pragma push_macro(\"MACRO\")\n", Out));
@@ -470,15 +471,15 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) {
   EXPECT_STREQ("#pragma include_alias(<A>, <B>)\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
 
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#pragma clang module\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#pragma clang module impor\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#pragma clang module import\n", Out));
@@ -663,7 +664,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) {
            "#error \\\n#include <t.h>\n",
        }) {
     ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-    EXPECT_STREQ("", Out.data());
+    EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
   }
 
   for (auto Source : {
@@ -767,7 +768,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest,
 )";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
   EXPECT_STREQ(
-      "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n",
+      "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n"
+      "<TokBeforeEOF>\n",
       Out.data());
 
   Source = R"(#if NEVER_ENABLED
@@ -778,7 +780,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest,
   )";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
   EXPECT_STREQ(
-      "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n",
+      "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n"
+      "<TokBeforeEOF>\n",
       Out.data());
 }
 
@@ -799,11 +802,11 @@ TEST(MinimizeSourceToDependencyDirectivesTest,
 
   StringRef Source = "#define X '\\ \t\nx'\nvoid foo() {}";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#define X '\\ \t\nx'\n", Out.data());
+  EXPECT_STREQ("#define X '\\ \t\nx'\n<TokBeforeEOF>\n", Out.data());
 
   Source = "#define X \"\\ \r\nx\"\nvoid foo() {}";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#define X \"\\ \r\nx\"\n", Out.data());
+  EXPECT_STREQ("#define X \"\\ \r\nx\"\n<TokBeforeEOF>\n", Out.data());
 
   Source = "#define X \"\\ \r\nx\n#include <x>\n";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
@@ -848,11 +851,56 @@ ort \
                "exp\\\nort import:l[[rename]];"
                "import<<=3;import a b d e d e f e;"
                "import foo[[no_unique_address]];import foo();"
-               "import f(:sefse);import f(->a=3);\n",
+               "import f(:sefse);import f(->a=3);"
+               "<TokBeforeEOF>\n",
                Out.data());
-  ASSERT_EQ(Directives.size(), 10u);
+  ASSERT_EQ(Directives.size(), 11u);
   EXPECT_EQ(Directives[0].Kind, pp_include);
   EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl);
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) {
+  SmallString<128> Out;
+
+  StringRef Source = R"(
+    #define A
+    #ifdef B
+    int x;
+    #endif
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#define A\n<TokBeforeEOF>\n", Out.data());
+
+  Source = R"(
+    #ifndef A
+    #define A
+    #endif // some comment
+
+    // other comment
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data());
+
+  Source = R"(
+    #ifndef A
+    #define A
+    #endif /* some comment
+
+    */
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data());
+
+  Source = R"(
+    #ifndef A
+    #define A
+    #endif /* some comment
+
+    */
+    int x;
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#ifndef A\n#define A\n#endif\n<TokBeforeEOF>\n", Out.data());
+}
+
 } // end anonymous namespace

diff  --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
new file mode 100644
index 0000000000000..b7d743132f6f9
--- /dev/null
+++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -0,0 +1,148 @@
+//===- unittests/Lex/PPDependencyDirectivesTest.cpp -------------------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+
+namespace {
+
+// The test fixture.
+class PPDependencyDirectivesTest : public ::testing::Test {
+protected:
+  PPDependencyDirectivesTest()
+      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
+        Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+        SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
+    TargetOpts->Triple = "x86_64-apple-macos12";
+    Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr<TargetOptions> TargetOpts;
+  IntrusiveRefCntPtr<TargetInfo> Target;
+};
+
+class IncludeCollector : public PPCallbacks {
+public:
+  Preprocessor &PP;
+  SmallVectorImpl<StringRef> &IncludedFiles;
+
+  IncludeCollector(Preprocessor &PP, SmallVectorImpl<StringRef> &IncludedFiles)
+      : PP(PP), IncludedFiles(IncludedFiles) {}
+
+  void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
+                        SrcMgr::CharacteristicKind FileType, FileID PrevFID,
+                        SourceLocation Loc) override {
+    if (Reason != LexedFileChangeReason::EnterFile)
+      return;
+    if (FID == PP.getPredefinesFileID())
+      return;
+    StringRef Filename =
+        PP.getSourceManager().getSLocEntry(FID).getFile().getName();
+    IncludedFiles.push_back(Filename);
+  }
+};
+
+TEST_F(PPDependencyDirectivesTest, MacroGuard) {
+  // "head1.h" has a macro guard and should only be included once.
+  // "head2.h" and "head3.h" have tokens following the macro check, they should
+  // be included multiple times.
+
+  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  VFS->addFile(
+      "head1.h", 0,
+      llvm::MemoryBuffer::getMemBuffer("#ifndef H1_H\n#define H1_H\n#endif\n"));
+  VFS->addFile(
+      "head2.h", 0,
+      llvm::MemoryBuffer::getMemBuffer("#ifndef H2_H\n#define H2_H\n#endif\n\n"
+                                       "extern int foo;\n"));
+  VFS->addFile("head3.h", 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   "#ifndef H3_H\n#define H3_H\n#endif\n\n"
+                   "#ifdef SOMEMAC\nextern int foo;\n#endif\n"));
+  VFS->addFile("main.c", 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   "#include \"head1.h\"\n#include \"head1.h\"\n"
+                   "#include \"head2.h\"\n#include \"head2.h\"\n"
+                   "#include \"head3.h\"\n#include \"head3.h\"\n"));
+  FileMgr.setVirtualFileSystem(VFS);
+
+  Optional<FileEntryRef> FE;
+  ASSERT_THAT_ERROR(FileMgr.getFileRef("main.c").moveInto(FE),
+                    llvm::Succeeded());
+  SourceMgr.setMainFileID(
+      SourceMgr.createFileID(*FE, SourceLocation(), SrcMgr::C_User));
+
+  struct DepDirectives {
+    SmallVector<dependency_directives_scan::Token> Tokens;
+    SmallVector<dependency_directives_scan::Directive> Directives;
+  };
+  SmallVector<std::unique_ptr<DepDirectives>> DepDirectivesObjects;
+
+  auto getDependencyDirectives = [&](FileEntryRef File)
+      -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+    DepDirectivesObjects.push_back(std::make_unique<DepDirectives>());
+    StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer();
+    bool Err = scanSourceForDependencyDirectives(
+        Input, DepDirectivesObjects.back()->Tokens,
+        DepDirectivesObjects.back()->Directives);
+    EXPECT_FALSE(Err);
+    return llvm::makeArrayRef(DepDirectivesObjects.back()->Directives);
+  };
+
+  auto PPOpts = std::make_shared<PreprocessorOptions>();
+  PPOpts->DependencyDirectivesForFile = [&](FileEntryRef File)
+      -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+    return getDependencyDirectives(File);
+  };
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                          Diags, LangOpts, Target.get());
+  Preprocessor PP(PPOpts, Diags, LangOpts, SourceMgr, HeaderInfo, ModLoader,
+                  /*IILookup =*/nullptr,
+                  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+
+  SmallVector<StringRef> IncludedFiles;
+  PP.addPPCallbacks(std::make_unique<IncludeCollector>(PP, IncludedFiles));
+  PP.EnterMainSourceFile();
+  while (true) {
+    Token tok;
+    PP.Lex(tok);
+    if (tok.is(tok::eof))
+      break;
+  }
+
+  SmallVector<StringRef> ExpectedIncludes{
+      "main.c", "./head1.h", "./head2.h", "./head2.h", "./head3.h", "./head3.h",
+  };
+  EXPECT_EQ(IncludedFiles, ExpectedIncludes);
+}
+
+} // anonymous namespace


        


More information about the cfe-commits mailing list