[clang] Pass LangOpts from CompilerInstance to DependencyScanningWorker (PR #93753)
Nishith Kumar M Shah via cfe-commits
cfe-commits at lists.llvm.org
Fri May 31 11:27:09 PDT 2024
https://github.com/nishithshah2211 updated https://github.com/llvm/llvm-project/pull/93753
>From 46a25038abbcf5ab9eebded6813b4bbb71a44925 Mon Sep 17 00:00:00 2001
From: Nishith Shah <nishithshah.2211 at gmail.com>
Date: Wed, 29 May 2024 12:34:52 -0700
Subject: [PATCH] Pass LangOpts from CompilerInstance to
DependencyScanningWorker
This commit fixes https://github.com/llvm/llvm-project/issues/88896
by passing LangOpts from the CompilerInstance to
DependencyScanningWorker so that the original LangOpts are
preserved/respected. This makes for more accurate parsing/lexing when
certain language versions or features specific to versions are to be used.
---
.../clang/Lex/DependencyDirectivesScanner.h | 3 +-
.../DependencyScanningFilesystem.h | 3 +-
clang/lib/Frontend/FrontendActions.cpp | 4 +-
clang/lib/Lex/DependencyDirectivesScanner.cpp | 22 +++--
.../DependencyScanningFilesystem.cpp | 4 +-
.../DependencyScanningWorker.cpp | 5 +-
.../Lex/DependencyDirectivesScannerTest.cpp | 82 ++++++++++++++++---
.../Lex/PPDependencyDirectivesTest.cpp | 3 +-
8 files changed, 95 insertions(+), 31 deletions(-)
diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index 0e115906fbfe5..2f8354dec939f 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -17,6 +17,7 @@
#ifndef LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
#define LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
+#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/ArrayRef.h"
@@ -117,7 +118,7 @@ struct Directive {
bool scanSourceForDependencyDirectives(
StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
- DiagnosticsEngine *Diags = nullptr,
+ const LangOptions &LangOpts, DiagnosticsEngine *Diags = nullptr,
SourceLocation InputSourceLoc = SourceLocation());
/// Print the previously scanned dependency directives as minimized source text.
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index f7b4510d7f7be..9dc20065a09a3 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -363,7 +363,8 @@ class DependencyScanningWorkerFilesystem
///
/// Returns true if the directive tokens are populated for this file entry,
/// false if not (i.e. this entry is not a file or its scan fails).
- bool ensureDirectiveTokensArePopulated(EntryRef Entry);
+ bool ensureDirectiveTokensArePopulated(EntryRef Entry,
+ const LangOptions &LangOpts);
/// Check whether \p Path exists. By default checks cached result of \c
/// status(), and falls back on FS if unable to do so.
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 454653a31534c..eddb2ac0c0834 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -1168,8 +1168,8 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
llvm::SmallVector<dependency_directives_scan::Token, 16> Tokens;
llvm::SmallVector<dependency_directives_scan::Directive, 32> Directives;
if (scanSourceForDependencyDirectives(
- FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(),
- SM.getLocForStartOfFile(SM.getMainFileID()))) {
+ FromFile.getBuffer(), Tokens, Directives, CI.getLangOpts(),
+ &CI.getDiagnostics(), SM.getLocForStartOfFile(SM.getMainFileID()))) {
assert(CI.getDiagnostics().hasErrorOccurred() &&
"no errors reported for failure");
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 0971daa1f3666..fda54d314eef6 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -62,14 +62,17 @@ struct DirectiveWithTokens {
struct Scanner {
Scanner(StringRef Input,
SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
- DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
+ DiagnosticsEngine *Diags, SourceLocation InputSourceLoc,
+ const LangOptions &LangOpts)
: Input(Input), Tokens(Tokens), Diags(Diags),
- InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
- TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
+ InputSourceLoc(InputSourceLoc),
+ LangOpts(getLangOptsForDepScanning(LangOpts)),
+ TheLexer(InputSourceLoc, this->LangOpts, Input.begin(), Input.begin(),
Input.end()) {}
- static LangOptions getLangOptsForDepScanning() {
- LangOptions LangOpts;
+ static LangOptions
+ getLangOptsForDepScanning(const LangOptions &invocationLangOpts) {
+ LangOptions LangOpts(invocationLangOpts);
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
LangOpts.ObjC = true;
LangOpts.LineComment = true;
@@ -700,7 +703,7 @@ bool Scanner::lex_Pragma(const char *&First, const char *const End) {
SmallVector<dependency_directives_scan::Token> DiscardTokens;
const char *Begin = Buffer.c_str();
Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
- InputSourceLoc};
+ InputSourceLoc, LangOptions()};
PragmaScanner.TheLexer.setParsingPreprocessorDirective(true);
if (PragmaScanner.lexPragma(Begin, Buffer.end()))
@@ -950,9 +953,10 @@ bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
bool clang::scanSourceForDependencyDirectives(
StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
- SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
- SourceLocation InputSourceLoc) {
- return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
+ SmallVectorImpl<Directive> &Directives, const LangOptions &LangOpts,
+ DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) {
+ return Scanner(Input, Tokens, Diags, InputSourceLoc, LangOpts)
+ .scan(Directives);
}
void clang::printDependencyDirectivesAsSource(
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 0cab17a342440..66a2f6e0acb63 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -42,7 +42,7 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
}
bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
- EntryRef Ref) {
+ EntryRef Ref, const LangOptions &LangOpts) {
auto &Entry = Ref.Entry;
if (Entry.isError() || Entry.isDirectory())
@@ -66,7 +66,7 @@ bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
// dependencies.
if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
Contents->DepDirectiveTokens,
- Directives)) {
+ Directives, LangOpts)) {
Contents->DepDirectiveTokens.clear();
// FIXME: Propagate the diagnostic if desired by the client.
Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 0c047b6c5da2f..c3d63c3f890e8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -366,11 +366,12 @@ class DependencyScanningAction : public tooling::ToolAction {
// Use the dependency scanning optimized file system if requested to do so.
if (DepFS)
ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
- [LocalDepFS = DepFS](FileEntryRef File)
+ [LocalDepFS = DepFS,
+ &LangOpts = ScanInstance.getLangOpts()](FileEntryRef File)
-> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
if (llvm::ErrorOr<EntryRef> Entry =
LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
- if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry))
+ if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry, LangOpts))
return Entry->getDirectiveTokens();
return std::nullopt;
};
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index 59fef9ecbb9c9..044c3d65ec6fb 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/SmallString.h"
#include "gtest/gtest.h"
@@ -17,11 +18,11 @@ using namespace clang::dependency_directives_scan;
static bool minimizeSourceToDependencyDirectives(
StringRef Input, SmallVectorImpl<char> &Out,
SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
- SmallVectorImpl<Directive> &Directives) {
+ SmallVectorImpl<Directive> &Directives, const LangOptions &LangOpts) {
Out.clear();
Tokens.clear();
Directives.clear();
- if (scanSourceForDependencyDirectives(Input, Tokens, Directives))
+ if (scanSourceForDependencyDirectives(Input, Tokens, Directives, LangOpts))
return true;
raw_svector_ostream OS(Out);
@@ -38,7 +39,9 @@ static bool minimizeSourceToDependencyDirectives(StringRef Input,
SmallVectorImpl<char> &Out) {
SmallVector<dependency_directives_scan::Token, 16> Tokens;
SmallVector<Directive, 32> Directives;
- return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives);
+ LangOptions LangOpts;
+ return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives,
+ LangOpts);
}
namespace {
@@ -47,16 +50,17 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
+ LangOptions LangOpts;
- ASSERT_FALSE(
- minimizeSourceToDependencyDirectives("", Out, Tokens, Directives));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens, Directives,
+ LangOpts));
EXPECT_TRUE(Out.empty());
EXPECT_TRUE(Tokens.empty());
ASSERT_EQ(1u, Directives.size());
ASSERT_EQ(pp_eof, Directives.back().Kind);
ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
- Directives));
+ Directives, LangOpts));
EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
EXPECT_TRUE(Tokens.empty());
ASSERT_EQ(2u, Directives.size());
@@ -68,6 +72,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
+ LangOptions LangOpts;
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("#define A\n"
@@ -92,7 +97,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
"export module m;\n"
"import m;\n"
"#pragma clang system_header\n",
- Out, Tokens, Directives));
+ Out, Tokens, Directives, LangOpts));
EXPECT_EQ(pp_define, Directives[0].Kind);
EXPECT_EQ(pp_undef, Directives[1].Kind);
EXPECT_EQ(pp_endif, Directives[2].Kind);
@@ -145,9 +150,10 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
+ LangOptions LangOpts;
- ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out,
- Tokens, Directives));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+ "#define MACRO", Out, Tokens, Directives, LangOpts));
EXPECT_STREQ("#define MACRO\n", Out.data());
ASSERT_EQ(4u, Tokens.size());
ASSERT_EQ(2u, Directives.size());
@@ -838,6 +844,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
+ LangOptions LangOpts;
StringRef Source = R"(// comment
#pragma once
@@ -845,8 +852,8 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
#include <test.h>
_Pragma("once")
)";
- ASSERT_FALSE(
- minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+ Directives, LangOpts));
EXPECT_STREQ("#pragma once\n#include <test.h>\n_Pragma(\"once\")\n",
Out.data());
ASSERT_EQ(Directives.size(), 4u);
@@ -926,6 +933,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
+ LangOptions LangOpts;
StringRef Source = R"(
module;
@@ -954,8 +962,8 @@ ort \
import f(->a = 3);
}
)";
- ASSERT_FALSE(
- minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+ Directives, LangOpts));
EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
"exp\\\nort import:l[[rename]];"
"import<<=3;import a b d e d e f e;"
@@ -1012,4 +1020,52 @@ TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) {
EXPECT_STREQ("#ifndef A\n#define A\n#endif\n<TokBeforeEOF>\n", Out.data());
}
+TEST(MinimizeSourceToDependencyDirectivesTest, CPlusPlus14PPNumber) {
+ SmallVector<char, 128> Out;
+ SmallVector<dependency_directives_scan::Token, 4> Tokens;
+ SmallVector<Directive, 4> Directives;
+ LangOptions LangOpts;
+
+ StringRef Source = R"(
+#if 123'124
+#endif
+)";
+
+ LangOpts.CPlusPlus14 = true;
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+ Directives, LangOpts));
+ EXPECT_STREQ("#if 123'124\n#endif\n", Out.data());
+ ASSERT_EQ(Directives.size(), 3u);
+ EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_if);
+ EXPECT_EQ(Directives[1].Kind, dependency_directives_scan::pp_endif);
+ EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_eof);
+ ASSERT_EQ(Tokens.size(), 7u);
+
+ ASSERT_TRUE(Tokens[0].is(tok::hash));
+ ASSERT_TRUE(Tokens[1].is(tok::raw_identifier)); // "if"
+ ASSERT_TRUE(Tokens[2].is(tok::numeric_constant)); // 123'124
+ ASSERT_TRUE(Tokens[3].is(tok::eod));
+ ASSERT_TRUE(Tokens[4].is(tok::hash));
+ ASSERT_TRUE(Tokens[5].is(tok::raw_identifier)); // #endif
+ ASSERT_TRUE(Tokens[6].is(tok::eod));
+
+ LangOpts.CPlusPlus14 = false;
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Tokens,
+ Directives, LangOpts));
+ EXPECT_STREQ("#if 123'124\n#endif\n", Out.data());
+ ASSERT_EQ(Directives.size(), 3u);
+ EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_if);
+ EXPECT_EQ(Directives[1].Kind, dependency_directives_scan::pp_endif);
+ EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_eof);
+ ASSERT_EQ(Tokens.size(), 8u);
+ ASSERT_TRUE(Tokens[0].is(tok::hash));
+ ASSERT_TRUE(Tokens[1].is(tok::raw_identifier)); // "if"
+ ASSERT_TRUE(Tokens[2].is(tok::numeric_constant)); // 123
+ ASSERT_TRUE(Tokens[3].is(tok::unknown)); // '124
+ ASSERT_TRUE(Tokens[4].is(tok::eod));
+ ASSERT_TRUE(Tokens[5].is(tok::hash));
+ ASSERT_TRUE(Tokens[6].is(tok::raw_identifier)); // #endif
+ ASSERT_TRUE(Tokens[7].is(tok::eod));
+}
+
} // end anonymous namespace
diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
index 6ff87f720a559..410f378f1e89d 100644
--- a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
+++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -104,6 +104,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
SmallVector<dependency_directives_scan::Directive> Directives;
};
SmallVector<std::unique_ptr<DepDirectives>> DepDirectivesObjects;
+ LangOptions LangOpts;
auto getDependencyDirectives = [&](FileEntryRef File)
-> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
@@ -111,7 +112,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer();
bool Err = scanSourceForDependencyDirectives(
Input, DepDirectivesObjects.back()->Tokens,
- DepDirectivesObjects.back()->Directives);
+ DepDirectivesObjects.back()->Directives, LangOpts);
EXPECT_FALSE(Err);
return llvm::ArrayRef(DepDirectivesObjects.back()->Directives);
};
More information about the cfe-commits
mailing list