[clang] [Clang][Parse] Fix ambiguity with nested-name-specifiers that may declarative (PR #96364)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 21 15:21:33 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Krystian Stasiowski (sdkrystian)
<details>
<summary>Changes</summary>
Consider the following:
```cpp
template<typename T>
struct A { };
template<typename T>
int A<T>::B::* f(); // error: no member named 'B' in 'A<T>'
```
Although this is clearly valid, clang rejects it because the _nested-name-specifier_ `A<T>::` is parsed as-if it was declarative, meaning, we parse it as-if it was the _nested-name-specifier_ in a redeclaration/specialization. However, we don't (and can't) know whether the _nested-name-specifier_ is declarative until we see the '`*`' token, but at that point we have already complained that `A` has no member named `B`! This patch addresses this bug by adding support for _fully_ unannotated _and_ unbounded tentative parsing, which allows for us to parse past tokens without having to cache tokens up to a point we can guarantee aren't part of the construct we are disambiguating.
I don't know where the approach taken here is ideal -- alternatives are welcome. However, the performance impact (as measured by [llvm-compile-time-tracker](https://llvm-compile-time-tracker.com/?config=Overview&stat=instructions%3Au&remote=sdkrystian)) is quite minimal (0.09%, which I plan to further improve).
---
Patch is 20.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96364.diff
8 Files Affected:
- (modified) clang/include/clang/Lex/Preprocessor.h (+11-3)
- (modified) clang/include/clang/Parse/Parser.h (+6-7)
- (modified) clang/lib/Lex/PPCaching.cpp (+35-4)
- (modified) clang/lib/Parse/ParseCXXInlineMethods.cpp (+2-39)
- (modified) clang/lib/Parse/ParseDecl.cpp (+51-32)
- (modified) clang/lib/Parse/ParseExprCXX.cpp (+9-10)
- (added) clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp (+64)
- (modified) clang/test/CXX/temp/temp.res/p3.cpp (+4-6)
``````````diff
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 9d8a1aae23df3..838857b6b8833 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1150,6 +1150,9 @@ class Preprocessor {
/// invoked (at which point the last position is popped).
std::vector<CachedTokensTy::size_type> BacktrackPositions;
+ std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
+ UnannotatedBacktrackPositions;
+
/// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
/// This is used to guard against calling this function recursively.
///
@@ -1712,7 +1715,7 @@ class Preprocessor {
/// at some point after EnableBacktrackAtThisPos. If you don't, caching of
/// tokens will continue indefinitely.
///
- void EnableBacktrackAtThisPos();
+ void EnableBacktrackAtThisPos(bool Unannotated = false);
/// Disable the last EnableBacktrackAtThisPos call.
void CommitBacktrackedTokens();
@@ -1723,7 +1726,11 @@ class Preprocessor {
/// True if EnableBacktrackAtThisPos() was called and
/// caching of tokens is on.
+
bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
+ bool isUnannotatedBacktrackEnabled() const {
+ return !UnannotatedBacktrackPositions.empty();
+ }
/// Lex the next token for this preprocessor.
void Lex(Token &Result);
@@ -1827,8 +1834,9 @@ class Preprocessor {
void RevertCachedTokens(unsigned N) {
assert(isBacktrackEnabled() &&
"Should only be called when tokens are cached for backtracking");
- assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
- && "Should revert tokens up to the last backtrack position, not more");
+ assert(signed(CachedLexPos) - signed(N) >=
+ signed(BacktrackPositions.back() >> 1) &&
+ "Should revert tokens up to the last backtrack position, not more");
assert(signed(CachedLexPos) - signed(N) >= 0 &&
"Corrupted backtrack positions ?");
CachedLexPos -= N;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index d054b8cf0d240..fc0bae6bdec2b 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -1033,7 +1033,7 @@ class Parser : public CodeCompletionHandler {
bool isActive;
public:
- explicit TentativeParsingAction(Parser &p)
+ explicit TentativeParsingAction(Parser &p, bool Unannotated = false)
: P(p), PrevPreferredType(P.PreferredType) {
PrevTok = P.Tok;
PrevTentativelyDeclaredIdentifierCount =
@@ -1041,7 +1041,7 @@ class Parser : public CodeCompletionHandler {
PrevParenCount = P.ParenCount;
PrevBracketCount = P.BracketCount;
PrevBraceCount = P.BraceCount;
- P.PP.EnableBacktrackAtThisPos();
+ P.PP.EnableBacktrackAtThisPos(Unannotated);
isActive = true;
}
void Commit() {
@@ -1072,13 +1072,11 @@ class Parser : public CodeCompletionHandler {
class RevertingTentativeParsingAction
: private Parser::TentativeParsingAction {
public:
- RevertingTentativeParsingAction(Parser &P)
- : Parser::TentativeParsingAction(P) {}
+ using TentativeParsingAction::TentativeParsingAction;
+
~RevertingTentativeParsingAction() { Revert(); }
};
- class UnannotatedTentativeParsingAction;
-
/// ObjCDeclContextSwitch - An object used to switch context from
/// an objective-c decl context to its enclosing decl context and
/// back.
@@ -1979,7 +1977,8 @@ class Parser : public CodeCompletionHandler {
CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHasErrors,
bool EnteringContext, bool *MayBePseudoDestructor = nullptr,
bool IsTypename = false, const IdentifierInfo **LastII = nullptr,
- bool OnlyNamespace = false, bool InUsingDeclaration = false);
+ bool OnlyNamespace = false, bool InUsingDeclaration = false,
+ bool Disambiguation = false);
//===--------------------------------------------------------------------===//
// C++11 5.1.2: Lambda expressions
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
index f38ff62ebf437..bc52bfb237e5c 100644
--- a/clang/lib/Lex/PPCaching.cpp
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -22,9 +22,12 @@ using namespace clang;
// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
// be combined with the EnableBacktrackAtThisPos calls in reverse order.
-void Preprocessor::EnableBacktrackAtThisPos() {
+void Preprocessor::EnableBacktrackAtThisPos(bool Unannotated) {
assert(LexLevel == 0 && "cannot use lookahead while lexing");
- BacktrackPositions.push_back(CachedLexPos);
+ BacktrackPositions.push_back((CachedLexPos << 1) | Unannotated);
+ if (Unannotated)
+ UnannotatedBacktrackPositions.emplace_back(CachedTokens,
+ CachedTokens.size());
EnterCachingLexMode();
}
@@ -32,7 +35,18 @@ void Preprocessor::EnableBacktrackAtThisPos() {
void Preprocessor::CommitBacktrackedTokens() {
assert(!BacktrackPositions.empty()
&& "EnableBacktrackAtThisPos was not called!");
+ auto BacktrackPos = BacktrackPositions.back();
BacktrackPositions.pop_back();
+ if (BacktrackPos & 1) {
+ assert(!UnannotatedBacktrackPositions.empty() &&
+ "missing unannotated tokens?");
+ auto [UnannotatedTokens, NumCachedToks] =
+ std::move(UnannotatedBacktrackPositions.back());
+ if (!UnannotatedBacktrackPositions.empty())
+ UnannotatedBacktrackPositions.back().first.append(
+ UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end());
+ UnannotatedBacktrackPositions.pop_back();
+ }
}
// Make Preprocessor re-lex the tokens that were lexed since
@@ -40,8 +54,20 @@ void Preprocessor::CommitBacktrackedTokens() {
void Preprocessor::Backtrack() {
assert(!BacktrackPositions.empty()
&& "EnableBacktrackAtThisPos was not called!");
- CachedLexPos = BacktrackPositions.back();
+ auto BacktrackPos = BacktrackPositions.back();
BacktrackPositions.pop_back();
+ CachedLexPos = BacktrackPos >> 1;
+ if (BacktrackPos & 1) {
+ assert(!UnannotatedBacktrackPositions.empty() &&
+ "missing unannotated tokens?");
+ auto [UnannotatedTokens, NumCachedToks] =
+ std::move(UnannotatedBacktrackPositions.back());
+ UnannotatedBacktrackPositions.pop_back();
+ if (!UnannotatedBacktrackPositions.empty())
+ UnannotatedBacktrackPositions.back().first.append(
+ UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end());
+ CachedTokens = std::move(UnannotatedTokens);
+ }
recomputeCurLexerKind();
}
@@ -67,6 +93,8 @@ void Preprocessor::CachingLex(Token &Result) {
EnterCachingLexModeUnchecked();
CachedTokens.push_back(Result);
++CachedLexPos;
+ if (isUnannotatedBacktrackEnabled())
+ UnannotatedBacktrackPositions.back().first.push_back(Result);
return;
}
@@ -108,6 +136,8 @@ const Token &Preprocessor::PeekAhead(unsigned N) {
for (size_t C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
CachedTokens.push_back(Token());
Lex(CachedTokens.back());
+ if (isUnannotatedBacktrackEnabled())
+ UnannotatedBacktrackPositions.back().first.push_back(CachedTokens.back());
}
EnterCachingLexMode();
return CachedTokens.back();
@@ -124,7 +154,8 @@ void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) {
CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1;
if (AnnotBegin->getLocation() == Tok.getLocation()) {
- assert((BacktrackPositions.empty() || BacktrackPositions.back() <= i) &&
+ assert((BacktrackPositions.empty() ||
+ (BacktrackPositions.back() >> 1) <= i) &&
"The backtrack pos points inside the annotated tokens!");
// Replace the cached tokens with the single annotation token.
if (i < CachedLexPos)
diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp
index 943ce0fdde3a3..86a89650e2583 100644
--- a/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -1188,41 +1188,6 @@ bool Parser::ConsumeAndStoreConditional(CachedTokens &Toks) {
return true;
}
-/// A tentative parsing action that can also revert token annotations.
-class Parser::UnannotatedTentativeParsingAction : public TentativeParsingAction {
-public:
- explicit UnannotatedTentativeParsingAction(Parser &Self,
- tok::TokenKind EndKind)
- : TentativeParsingAction(Self), Self(Self), EndKind(EndKind) {
- // Stash away the old token stream, so we can restore it once the
- // tentative parse is complete.
- TentativeParsingAction Inner(Self);
- Self.ConsumeAndStoreUntil(EndKind, Toks, true, /*ConsumeFinalToken*/false);
- Inner.Revert();
- }
-
- void RevertAnnotations() {
- Revert();
-
- // Put back the original tokens.
- Self.SkipUntil(EndKind, StopAtSemi | StopBeforeMatch);
- if (Toks.size()) {
- auto Buffer = std::make_unique<Token[]>(Toks.size());
- std::copy(Toks.begin() + 1, Toks.end(), Buffer.get());
- Buffer[Toks.size() - 1] = Self.Tok;
- Self.PP.EnterTokenStream(std::move(Buffer), Toks.size(), true,
- /*IsReinject*/ true);
-
- Self.Tok = Toks.front();
- }
- }
-
-private:
- Parser &Self;
- CachedTokens Toks;
- tok::TokenKind EndKind;
-};
-
/// ConsumeAndStoreInitializer - Consume and store the token at the passed token
/// container until the end of the current initializer expression (either a
/// default argument or an in-class initializer for a non-static data member).
@@ -1260,9 +1225,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
// syntactically-valid init-declarator-list, then this comma ends
// the default initializer.
{
- UnannotatedTentativeParsingAction PA(*this,
- CIK == CIK_DefaultInitializer
- ? tok::semi : tok::r_paren);
+ TentativeParsingAction TPA(*this, /*Unannotated=*/true);
Sema::TentativeAnalysisScope Scope(Actions);
TPResult Result = TPResult::Error;
@@ -1290,7 +1253,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
// Put the token stream back and undo any annotations we performed
// after the comma. They may reflect a different parse than the one
// we will actually perform at the end of the class.
- PA.RevertAnnotations();
+ TPA.Revert();
// If what follows could be a declaration, it is a declaration.
if (Result != TPResult::False && Result != TPResult::Error)
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index c528917437332..cad40a120a7b4 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -6615,48 +6615,67 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
(Tok.is(tok::identifier) &&
(NextToken().is(tok::coloncolon) || NextToken().is(tok::less))) ||
Tok.is(tok::annot_cxxscope))) {
+ TentativeParsingAction TPA(*this, /*Unannotated=*/true);
bool EnteringContext = D.getContext() == DeclaratorContext::File ||
D.getContext() == DeclaratorContext::Member;
+
CXXScopeSpec SS;
SS.setTemplateParamLists(D.getTemplateParameterLists());
- ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
- /*ObjectHasErrors=*/false, EnteringContext);
- if (SS.isNotEmpty()) {
- if (Tok.isNot(tok::star)) {
- // The scope spec really belongs to the direct-declarator.
- if (D.mayHaveIdentifier())
- D.getCXXScopeSpec() = SS;
- else
- AnnotateScopeToken(SS, true);
+ if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
+ /*ObjectHasErrors=*/false,
+ /*EnteringContext=*/false,
+ /*MayBePseudoDestructor=*/nullptr,
+ /*IsTypename=*/false, /*LastII=*/nullptr,
+ /*OnlyNamespace=*/false,
+ /*InUsingDeclaration=*/false,
+ /*Disambiguation=*/EnteringContext) ||
+
+ SS.isEmpty() || SS.isInvalid() || !EnteringContext ||
+ Tok.is(tok::star)) {
+ TPA.Commit();
+ if (SS.isNotEmpty() && Tok.is(tok::star)) {
+ if (SS.isValid()) {
+ checkCompoundToken(SS.getEndLoc(), tok::coloncolon,
+ CompoundToken::MemberPtr);
+ }
- if (DirectDeclParser)
- (this->*DirectDeclParser)(D);
+ SourceLocation StarLoc = ConsumeToken();
+ D.SetRangeEnd(StarLoc);
+ DeclSpec DS(AttrFactory);
+ ParseTypeQualifierListOpt(DS);
+ D.ExtendWithDeclSpec(DS);
+
+ // Recurse to parse whatever is left.
+ Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] {
+ ParseDeclaratorInternal(D, DirectDeclParser);
+ });
+
+ // Sema will have to catch (syntactically invalid) pointers into global
+ // scope. It has to catch pointers into namespace scope anyway.
+ D.AddTypeInfo(DeclaratorChunk::getMemberPointer(
+ SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()),
+ std::move(DS.getAttributes()),
+ /*EndLoc=*/SourceLocation());
return;
}
+ } else {
+ TPA.Revert();
+ SS.clear();
+ ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
+ /*ObjectHasErrors=*/false,
+ /*EnteringContext=*/true);
+ }
- if (SS.isValid()) {
- checkCompoundToken(SS.getEndLoc(), tok::coloncolon,
- CompoundToken::MemberPtr);
- }
+ if (SS.isNotEmpty()) {
+ // The scope spec really belongs to the direct-declarator.
+ if (D.mayHaveIdentifier())
+ D.getCXXScopeSpec() = SS;
+ else
+ AnnotateScopeToken(SS, true);
- SourceLocation StarLoc = ConsumeToken();
- D.SetRangeEnd(StarLoc);
- DeclSpec DS(AttrFactory);
- ParseTypeQualifierListOpt(DS);
- D.ExtendWithDeclSpec(DS);
-
- // Recurse to parse whatever is left.
- Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] {
- ParseDeclaratorInternal(D, DirectDeclParser);
- });
-
- // Sema will have to catch (syntactically invalid) pointers into global
- // scope. It has to catch pointers into namespace scope anyway.
- D.AddTypeInfo(DeclaratorChunk::getMemberPointer(
- SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()),
- std::move(DS.getAttributes()),
- /* Don't replace range end. */ SourceLocation());
+ if (DirectDeclParser)
+ (this->*DirectDeclParser)(D);
return;
}
}
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 1d364f77a8146..c0eae73927cdd 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -159,8 +159,8 @@ void Parser::CheckForTemplateAndDigraph(Token &Next, ParsedType ObjectType,
bool Parser::ParseOptionalCXXScopeSpecifier(
CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHadErrors,
bool EnteringContext, bool *MayBePseudoDestructor, bool IsTypename,
- const IdentifierInfo **LastII, bool OnlyNamespace,
- bool InUsingDeclaration) {
+ const IdentifierInfo **LastII, bool OnlyNamespace, bool InUsingDeclaration,
+ bool Disambiguation) {
assert(getLangOpts().CPlusPlus &&
"Call sites of this function should be guarded by checking for C++");
@@ -528,13 +528,11 @@ bool Parser::ParseOptionalCXXScopeSpecifier(
UnqualifiedId TemplateName;
TemplateName.setIdentifier(&II, Tok.getLocation());
bool MemberOfUnknownSpecialization;
- if (TemplateNameKind TNK = Actions.isTemplateName(getCurScope(), SS,
- /*hasTemplateKeyword=*/false,
- TemplateName,
- ObjectType,
- EnteringContext,
- Template,
- MemberOfUnknownSpecialization)) {
+ if (TemplateNameKind TNK = Actions.isTemplateName(
+ getCurScope(), SS,
+ /*hasTemplateKeyword=*/false, TemplateName, ObjectType,
+ EnteringContext, Template, MemberOfUnknownSpecialization,
+ Disambiguation)) {
// If lookup didn't find anything, we treat the name as a template-name
// anyway. C++20 requires this, and in prior language modes it improves
// error recovery. But before we commit to this, check that we actually
@@ -557,7 +555,8 @@ bool Parser::ParseOptionalCXXScopeSpecifier(
continue;
}
- if (MemberOfUnknownSpecialization && (ObjectType || SS.isSet()) &&
+ if (MemberOfUnknownSpecialization && !Disambiguation &&
+ (ObjectType || SS.isSet()) &&
(IsTypename || isTemplateArgumentList(1) == TPResult::True)) {
// If we had errors before, ObjectType can be dependent even without any
// templates. Do not report missing template keyword in that case.
diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp
new file mode 100644
index 0000000000000..a06b107755596
--- /dev/null
+++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+template<typename T>
+struct A0 {
+ struct B0;
+
+ template<typename U>
+ struct C0 {
+ struct D0;
+
+ template<typename V>
+ struct E0;
+ };
+};
+
+template<typename T>
+int A0<T>::B0::* f0();
+
+template<typename T>
+int A0<T>::B1::* f1();
+
+template<typename T>
+int A0<T>::C0<int>::* f2(); // expected-error {{expected unqualified-id}}
+
+template<typename T>
+int A0<T>::C1<int>::* f3(); // expected-error {{no member named 'C1' in 'A0<T>'}}
+ // expected-error at -1 {{expected ';' after top level declarator}}
+
+template<typename T>
+int A0<T>::template C2<int>::* f4();
+
+template<typename T>
+int A0<T>::template C0<int>::D0::* f5();
+
+template<typename T>
+int A0<T>::template C2<int>::D1::* f6();
+
+template<typename T>
+int A0<T>::template C0<int>::E0<int>::* f7(); // expected-error {{use 'template' keyword to treat 'E0' as a dependent template name}}
+ // expected-error at -1 {{expected unqualified-id}}
+
+template<typename T>
+int A0<T>::template C2<int>::E1<int>::* f8(); // expected-error {{no member named 'C2' in 'A0<T>'}}
+
+template<typename T>
+int A0<T>::template C0<int>::template E0<int>::* f9();
+
+template<typename T>
+int A0<T>::template C2<int>::template E1<int>::* f10();
+
+namespace TypoCorrection {
+ template<typename T>
+ struct A {
+ template<typename U>
+ struct Typo; // expected-note {{'Typo' declared here}}
+ };
+
+ template<typename T>
+ int A<T>::template typo<int>::* f();
+
+ template<typename T>
+ int A<T>::typo<int>::* g(); // expected-error {{no template named 'typo' in 'A<T>'; did you mean 'Typo'?}}
+ // expected-error at -1 {{expected unqualified-id}}
+}
diff --git a/clang/test/CXX/temp/temp.res/p3.cpp b/clang/test/CXX/temp/temp.res/p3.cpp
index 37ab93559e369..1eda967523a59 100644
--- a/clang/test/CXX/temp/temp.res/p3.cpp
+++ b/clang/test/CXX/temp/temp.res/p3.cpp
@@ -2,7 +2,7 @@
template<typename T> struct A {
template<typename U> struct B;
- template<typename U> using C = U; // expected-note {{here}}
+ template<typename U> using C = U;
};
struct X {
@@ -20,12 +20,10 @@ template<typename T> A<T>::C<T> f2(); // expected-warning {{miss...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/96364
More information about the cfe-commits
mailing list