[clang] Pass LangOpts from CompilerInstance to DependencyScanningWorker (PR #93753)

Nishith Kumar M Shah via cfe-commits cfe-commits at lists.llvm.org
Fri May 31 11:27:09 PDT 2024


https://github.com/nishithshah2211 updated https://github.com/llvm/llvm-project/pull/93753

>From 46a25038abbcf5ab9eebded6813b4bbb71a44925 Mon Sep 17 00:00:00 2001
From: Nishith Shah <nishithshah.2211 at gmail.com>
Date: Wed, 29 May 2024 12:34:52 -0700
Subject: [PATCH] Pass LangOpts from CompilerInstance to
 DependencyScanningWorker

This commit fixes https://github.com/llvm/llvm-project/issues/88896
by passing LangOpts from the CompilerInstance to
DependencyScanningWorker so that the original LangOpts are
preserved/respected. This makes for more accurate parsing/lexing when
certain language versions or features specific to versions are to be used.
---
 .../clang/Lex/DependencyDirectivesScanner.h   |  3 +-
 .../DependencyScanningFilesystem.h            |  3 +-
 clang/lib/Frontend/FrontendActions.cpp        |  4 +-
 clang/lib/Lex/DependencyDirectivesScanner.cpp | 22 +++--
 .../DependencyScanningFilesystem.cpp          |  4 +-
 .../DependencyScanningWorker.cpp              |  5 +-
 .../Lex/DependencyDirectivesScannerTest.cpp   | 82 ++++++++++++++++---
 .../Lex/PPDependencyDirectivesTest.cpp        |  3 +-
 8 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index 0e115906fbfe5..2f8354dec939f 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
 #define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
 
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 
@@ -117,7 +118,7 @@ struct Directive {
 bool scanSourceForDependencyDirectives(
     StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
     SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
-    DiagnosticsEngine *Diags = nullptr,
+    const LangOptions &LangOpts, DiagnosticsEngine *Diags = nullptr,
     SourceLocation InputSourceLoc = SourceLocation());
 
 /// Print the previously scanned dependency directives as minimized source text.
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index f7b4510d7f7be..9dc20065a09a3 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -363,7 +363,8 @@ class DependencyScanningWorkerFilesystem
   ///
   /// Returns true if the directive tokens are populated for this file entry,
   /// false if not (i.e. this entry is not a file or its scan fails).
-  bool ensureDirectiveTokensArePopulated(EntryRef Entry);
+  bool ensureDirectiveTokensArePopulated(EntryRef Entry,
+                                         const LangOptions &LangOpts);
 
   /// Check whether \p Path exists. By default checks cached result of \c
   /// status(), and falls back on FS if unable to do so.
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 454653a31534c..eddb2ac0c0834 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -1168,8 +1168,8 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
   llvm::SmallVector<dependency_directives_scan::Token, 16> Tokens;
   llvm::SmallVector<dependency_directives_scan::Directive, 32> Directives;
   if (scanSourceForDependencyDirectives(
-          FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(),
-          SM.getLocForStartOfFile(SM.getMainFileID()))) {
+          FromFile.getBuffer(), Tokens, Directives, CI.getLangOpts(),
+          &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) {
     assert(CI.getDiagnostics().hasErrorOccurred() &&
            "no errors reported for failure");
 
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 0971daa1f3666..fda54d314eef6 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -62,14 +62,17 @@ struct DirectiveWithTokens {
 struct Scanner {
   Scanner(StringRef Input,
           SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
-          DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
+          DiagnosticsEngine *Diags, SourceLocation InputSourceLoc,
+          const LangOptions &LangOpts)
       : Input(Input), Tokens(Tokens), Diags(Diags),
-        InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
-        TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
+        InputSourceLoc(InputSourceLoc),
+        LangOpts(getLangOptsForDepScanning(LangOpts)),
+        TheLexer(InputSourceLoc, this->LangOpts, Input.begin(), Input.begin(),
                  Input.end()) {}
 
-  static LangOptions getLangOptsForDepScanning() {
-    LangOptions LangOpts;
+  static LangOptions
+  getLangOptsForDepScanning(const LangOptions &invocationLangOpts) {
+    LangOptions LangOpts(invocationLangOpts);
     // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
     LangOpts.ObjC = true;
     LangOpts.LineComment = true;
@@ -700,7 +703,7 @@ bool Scanner::lex_Pragma(const char *&First, const char *const End) {
   SmallVector<dependency_directives_scan::Token> DiscardTokens;
   const char *Begin = Buffer.c_str();
   Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
-                        InputSourceLoc};
+                        InputSourceLoc, LangOptions()};
 
   PragmaScanner.TheLexer.setParsingPreprocessorDirective(true);
   if (PragmaScanner.lexPragma(Begin, Buffer.end()))
@@ -950,9 +953,10 @@ bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
 
 bool clang::scanSourceForDependencyDirectives(
     StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
-    SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
-    SourceLocation InputSourceLoc) {
-  return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
+    SmallVectorImpl<Directive> &Directives, const LangOptions &LangOpts,
+    DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) {
+  return Scanner(Input, Tokens, Diags, InputSourceLoc, LangOpts)
+      .scan(Directives);
 }
 
 void clang::printDependencyDirectivesAsSource(
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 0cab17a342440..66a2f6e0acb63 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -42,7 +42,7 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
 }
 
 bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
-    EntryRef Ref) {
+    EntryRef Ref, const LangOptions &LangOpts) {
   auto &Entry = Ref.Entry;
 
   if (Entry.isError() || Entry.isDirectory())
@@ -66,7 +66,7 @@ bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
   // dependencies.
   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
                                         Contents->DepDirectiveTokens,
-                                        Directives)) {
+                                        Directives, LangOpts)) {
     Contents->DepDirectiveTokens.clear();
     // FIXME: Propagate the diagnostic if desired by the client.
     Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 0c047b6c5da2f..c3d63c3f890e8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -366,11 +366,12 @@ class DependencyScanningAction : public tooling::ToolAction {
     // Use the dependency scanning optimized file system if requested to do so.
     if (DepFS)
       ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
-          [LocalDepFS = DepFS](FileEntryRef File)
+          [LocalDepFS = DepFS,
+           &LangOpts = ScanInstance.getLangOpts()](FileEntryRef File)
           -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
         if (llvm::ErrorOr<EntryRef> Entry =
                 LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
-          if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry))
+          if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry, LangOpts))
             return Entry->getDirectiveTokens();
         return std::nullopt;
       };
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index 59fef9ecbb9c9..044c3d65ec6fb 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/SmallString.h"
 #include "gtest/gtest.h"
 
@@ -17,11 +18,11 @@ using namespace clang::dependency_directives_scan;
 static bool minimizeSourceToDependencyDirectives(
     StringRef Input, SmallVectorImpl<char> &Out,
     SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
-    SmallVectorImpl<Directive> &Directives) {
+    SmallVectorImpl<Directive> &Directives, const LangOptions &LangOpts) {
   Out.clear();
   Tokens.clear();
   Directives.clear();
-  if (scanSourceForDependencyDirectives(Input, Tokens, Directives))
+  if (scanSourceForDependencyDirectives(Input, Tokens, Directives, LangOpts))
     return true;
 
   raw_svector_ostream OS(Out);
@@ -38,7 +39,9 @@ static bool minimizeSourceToDependencyDirectives(StringRef Input,
                                                  SmallVectorImpl<char> &Out) {
   SmallVector<dependency_directives_scan::Token, 16> Tokens;
   SmallVector<Directive, 32> Directives;
-  return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives);
+  LangOptions LangOpts;
+  return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives,
+                                              LangOpts);
 }
 
 namespace {
@@ -47,16 +50,17 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
   SmallVector<char, 128> Out;
   SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
+  LangOptions LangOpts;
 
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives("", Out, Tokens, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens, Directives,
+                                                    LangOpts));
   EXPECT_TRUE(Out.empty());
   EXPECT_TRUE(Tokens.empty());
   ASSERT_EQ(1u, Directives.size());
   ASSERT_EQ(pp_eof, Directives.back().Kind);
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
-                                                    Directives));
+                                                    Directives, LangOpts));
   EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
   EXPECT_TRUE(Tokens.empty());
   ASSERT_EQ(2u, Directives.size());
@@ -68,6 +72,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
   SmallVector<char, 128> Out;
   SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
+  LangOptions LangOpts;
 
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#define A\n"
@@ -92,7 +97,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
                                            "export module m;\n"
                                            "import m;\n"
                                            "#pragma clang system_header\n",
-                                           Out, Tokens, Directives));
+                                           Out, Tokens, Directives, LangOpts));
   EXPECT_EQ(pp_define, Directives[0].Kind);
   EXPECT_EQ(pp_undef, Directives[1].Kind);
   EXPECT_EQ(pp_endif, Directives[2].Kind);
@@ -145,9 +150,10 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
   SmallVector<char, 128> Out;
   SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
+  LangOptions LangOpts;
 
-  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out,
-                                                    Tokens, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO", Out, Tokens, Directives, LangOpts));
   EXPECT_STREQ("#define MACRO\n", Out.data());
   ASSERT_EQ(4u, Tokens.size());
   ASSERT_EQ(2u, Directives.size());
@@ -838,6 +844,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
   SmallVector<char, 128> Out;
   SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
+  LangOptions LangOpts;
 
   StringRef Source = R"(// comment
 #pragma once
@@ -845,8 +852,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
 #include <test.h>
 _Pragma("once")
 )";
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+                                                    Directives, LangOpts));
   EXPECT_STREQ("#pragma once\n#include <test.h>\n_Pragma(\"once\")\n",
                Out.data());
   ASSERT_EQ(Directives.size(), 4u);
@@ -926,6 +933,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
   SmallVector<char, 128> Out;
   SmallVector<dependency_directives_scan::Token, 4> Tokens;
   SmallVector<Directive, 4> Directives;
+  LangOptions LangOpts;
 
   StringRef Source = R"(
     module;
@@ -954,8 +962,8 @@ ort \
       import f(->a = 3);
     }
     )";
-  ASSERT_FALSE(
-      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+                                                    Directives, LangOpts));
   EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
                "exp\\\nort import:l[[rename]];"
                "import<<=3;import a b d e d e f e;"
@@ -1012,4 +1020,52 @@ TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) {
   EXPECT_STREQ("#ifndef A\n#define A\n#endif\n<TokBeforeEOF>\n", Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CPlusPlus14PPNumber) {
+  SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 4> Tokens;
+  SmallVector<Directive, 4> Directives;
+  LangOptions LangOpts;
+
+  StringRef Source = R"(
+#if 123'124
+#endif
+)";
+
+  LangOpts.CPlusPlus14 = true;
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+                                                    Directives, LangOpts));
+  EXPECT_STREQ("#if 123'124\n#endif\n", Out.data());
+  ASSERT_EQ(Directives.size(), 3u);
+  EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_if);
+  EXPECT_EQ(Directives[1].Kind, dependency_directives_scan::pp_endif);
+  EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_eof);
+  ASSERT_EQ(Tokens.size(), 7u);
+
+  ASSERT_TRUE(Tokens[0].is(tok::hash));
+  ASSERT_TRUE(Tokens[1].is(tok::raw_identifier));   // "if"
+  ASSERT_TRUE(Tokens[2].is(tok::numeric_constant)); // 123'124
+  ASSERT_TRUE(Tokens[3].is(tok::eod));
+  ASSERT_TRUE(Tokens[4].is(tok::hash));
+  ASSERT_TRUE(Tokens[5].is(tok::raw_identifier)); // #endif
+  ASSERT_TRUE(Tokens[6].is(tok::eod));
+
+  LangOpts.CPlusPlus14 = false;
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+                                                    Directives, LangOpts));
+  EXPECT_STREQ("#if 123'124\n#endif\n", Out.data());
+  ASSERT_EQ(Directives.size(), 3u);
+  EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_if);
+  EXPECT_EQ(Directives[1].Kind, dependency_directives_scan::pp_endif);
+  EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_eof);
+  ASSERT_EQ(Tokens.size(), 8u);
+  ASSERT_TRUE(Tokens[0].is(tok::hash));
+  ASSERT_TRUE(Tokens[1].is(tok::raw_identifier));   // "if"
+  ASSERT_TRUE(Tokens[2].is(tok::numeric_constant)); // 123
+  ASSERT_TRUE(Tokens[3].is(tok::unknown));          // '124
+  ASSERT_TRUE(Tokens[4].is(tok::eod));
+  ASSERT_TRUE(Tokens[5].is(tok::hash));
+  ASSERT_TRUE(Tokens[6].is(tok::raw_identifier)); // #endif
+  ASSERT_TRUE(Tokens[7].is(tok::eod));
+}
+
 } // end anonymous namespace
diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
index 6ff87f720a559..410f378f1e89d 100644
--- a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
+++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -104,6 +104,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
     SmallVector<dependency_directives_scan::Directive> Directives;
   };
   SmallVector<std::unique_ptr<DepDirectives>> DepDirectivesObjects;
+  LangOptions LangOpts;
 
   auto getDependencyDirectives = [&](FileEntryRef File)
       -> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
@@ -111,7 +112,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
     StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer();
     bool Err = scanSourceForDependencyDirectives(
         Input, DepDirectivesObjects.back()->Tokens,
-        DepDirectivesObjects.back()->Directives);
+        DepDirectivesObjects.back()->Directives, LangOpts);
     EXPECT_FALSE(Err);
     return llvm::ArrayRef(DepDirectivesObjects.back()->Directives);
   };



More information about the cfe-commits mailing list