[clang] [clang-format] Update FormatToken::isSimpleTypeSpecifier() (PR #79115)
Owen Pan via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 23 02:17:09 PST 2024
https://github.com/owenca created https://github.com/llvm/llvm-project/pull/79115
Depends on #79037.
>From 61a06b77e8e6f2b5c5a9a2aa0b7f46260545f5b4 Mon Sep 17 00:00:00 2001
From: Carl Peto <CPeto at becrypt.com>
Date: Mon, 22 Jan 2024 18:52:46 +0000
Subject: [PATCH 1/2] [clang] - Sema::isSimpleTypeSpecifier return true for
_Bool in c99 (currently returns false for _Bool, regardless of C dialect).
(Fixes #72203) - move simple type decision code into shared location
(IdentifierInfo) - replace the logic with a check for simple types and a
proper check for a valid keyword in the appropriate dialect - change all call
sites to match the above new API
---
clang/include/clang/Basic/IdentifierTable.h | 4 ++
clang/include/clang/Sema/Sema.h | 2 +-
clang/lib/Basic/IdentifierTable.cpp | 39 +++++++++++++++++
clang/lib/Parse/ParseExpr.cpp | 3 +-
clang/lib/Parse/ParseObjc.cpp | 3 +-
clang/lib/Sema/SemaDecl.cpp | 48 +--------------------
6 files changed, 50 insertions(+), 49 deletions(-)
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 1ac182d4fce26f..2c979e438e81bb 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -427,6 +427,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
/// language.
bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
+ /// Return true if this token is a simple type specifier
+ /// in the specified language.
+ bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
+
/// Get and set FETokenInfo. The language front-end is allowed to associate
/// arbitrary metadata with this token.
void *getFETokenInfo() const { return FETokenInfo; }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 0db39333b0ee34..bc1fd19b5c6de7 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2636,7 +2636,7 @@ class Sema final {
void DiagnoseUseOfUnimplementedSelectors();
- bool isSimpleTypeSpecifier(tok::TokenKind Kind) const;
+ bool isSimpleTypeSpecifier(const IdentifierInfo &II) const;
ParsedType getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
Scope *S, CXXScopeSpec *SS = nullptr,
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d0d8316385b452..78c783cdff6b5e 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -419,6 +419,45 @@ StringRef IdentifierInfo::deuglifiedName() const {
return Name;
}
+/// Determine whether the token kind starts a simple-type-specifier.
+bool IdentifierInfo::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
+ auto Kind = getTokenID();
+
+ switch (Kind) {
+ case tok::kw_short:
+ case tok::kw_long:
+ case tok::kw___int64:
+ case tok::kw___int128:
+ case tok::kw_signed:
+ case tok::kw_unsigned:
+ case tok::kw_void:
+ case tok::kw_char:
+ case tok::kw_int:
+ case tok::kw_half:
+ case tok::kw_float:
+ case tok::kw_double:
+ case tok::kw___bf16:
+ case tok::kw__Float16:
+ case tok::kw___float128:
+ case tok::kw_wchar_t:
+ case tok::kw_bool:
+ case tok::kw___underlying_type:
+ case tok::kw___auto_type:
+ case tok::kw__Bool:
+ case tok::annot_typename:
+ case tok::kw_char16_t:
+ case tok::kw_char32_t:
+ case tok::kw_typeof:
+ case tok::annot_decltype:
+ case tok::kw_decltype:
+ case tok::kw_char8_t:
+ return isKeyword(LangOpts);
+
+ default:
+ return false;
+ }
+}
+
tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
// We use a perfect hash function here involving the length of the keyword,
// the first and third character. For preprocessor ID's there are no
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index e862856a08ca11..8f9f918bf544fd 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1597,7 +1597,8 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
if (TryAnnotateTypeOrScopeToken())
return ExprError();
- if (!Actions.isSimpleTypeSpecifier(Tok.getKind()))
+ if (!Tok.getIdentifierInfo() ||
+ !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo()))
// We are trying to parse a simple-type-specifier but might not get such
// a token after error recovery.
return ExprError();
diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp
index 849fd1ac95a442..5565770610c491 100644
--- a/clang/lib/Parse/ParseObjc.cpp
+++ b/clang/lib/Parse/ParseObjc.cpp
@@ -2971,7 +2971,8 @@ bool Parser::ParseObjCXXMessageReceiver(bool &IsExpr, void *&TypeOrExpr) {
tok::annot_cxxscope))
TryAnnotateTypeOrScopeToken();
- if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) {
+ if (!Tok.getIdentifierInfo() ||
+ !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo())) {
// objc-receiver:
// expression
// Make sure any typos in the receiver are corrected or diagnosed, so that
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8dff2cdc063df3..a20894adda00f9 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -128,52 +128,8 @@ class TypeNameValidatorCCC final : public CorrectionCandidateCallback {
} // end anonymous namespace
/// Determine whether the token kind starts a simple-type-specifier.
-bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
- switch (Kind) {
- // FIXME: Take into account the current language when deciding whether a
- // token kind is a valid type specifier
- case tok::kw_short:
- case tok::kw_long:
- case tok::kw___int64:
- case tok::kw___int128:
- case tok::kw_signed:
- case tok::kw_unsigned:
- case tok::kw_void:
- case tok::kw_char:
- case tok::kw_int:
- case tok::kw_half:
- case tok::kw_float:
- case tok::kw_double:
- case tok::kw___bf16:
- case tok::kw__Float16:
- case tok::kw___float128:
- case tok::kw___ibm128:
- case tok::kw_wchar_t:
- case tok::kw_bool:
- case tok::kw__Accum:
- case tok::kw__Fract:
- case tok::kw__Sat:
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
-#include "clang/Basic/TransformTypeTraits.def"
- case tok::kw___auto_type:
- return true;
-
- case tok::annot_typename:
- case tok::kw_char16_t:
- case tok::kw_char32_t:
- case tok::kw_typeof:
- case tok::annot_decltype:
- case tok::kw_decltype:
- return getLangOpts().CPlusPlus;
-
- case tok::kw_char8_t:
- return getLangOpts().Char8;
-
- default:
- break;
- }
-
- return false;
+bool Sema::isSimpleTypeSpecifier(const IdentifierInfo &II) const {
+ return II.isSimpleTypeSpecifier(getLangOpts());
}
namespace {
>From 40645124c612cec71f5753fa2e97c6a01406f86a Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Tue, 23 Jan 2024 02:06:49 -0800
Subject: [PATCH 2/2] [clang-format] Update
FormatToken::isSimpleTypeSpecifier()
---
clang/include/clang/Format/Format.h | 2 ++
clang/lib/Format/FormatToken.cpp | 36 ++-------------------------
clang/lib/Format/FormatTokenLexer.cpp | 7 +++---
clang/lib/Format/FormatTokenLexer.h | 1 -
4 files changed, 8 insertions(+), 38 deletions(-)
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index bc9eecd42f9ebf..2800f6db4a9786 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -5160,6 +5160,8 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
ArrayRef<tooling::Range> Ranges,
StringRef FileName = "<stdin>");
+extern LangOptions LangOpts;
+
/// Returns the ``LangOpts`` that the formatter expects you to set.
///
/// \param Style determines specific settings for lexing mode.
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index b791c5a26bbe3a..a030d91d5589a5 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -34,41 +34,9 @@ const char *getTokenTypeName(TokenType Type) {
return nullptr;
}
-// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
-// duplication.
bool FormatToken::isSimpleTypeSpecifier() const {
- switch (Tok.getKind()) {
- case tok::kw_short:
- case tok::kw_long:
- case tok::kw___int64:
- case tok::kw___int128:
- case tok::kw_signed:
- case tok::kw_unsigned:
- case tok::kw_void:
- case tok::kw_char:
- case tok::kw_int:
- case tok::kw_half:
- case tok::kw_float:
- case tok::kw_double:
- case tok::kw___bf16:
- case tok::kw__Float16:
- case tok::kw___float128:
- case tok::kw___ibm128:
- case tok::kw_wchar_t:
- case tok::kw_bool:
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
-#include "clang/Basic/TransformTypeTraits.def"
- case tok::annot_typename:
- case tok::kw_char8_t:
- case tok::kw_char16_t:
- case tok::kw_char32_t:
- case tok::kw_typeof:
- case tok::kw_decltype:
- case tok::kw__Atomic:
- return true;
- default:
- return false;
- }
+ const auto *IdentifierInfo = Tok.getIdentifierInfo();
+ return IdentifierInfo && IdentifierInfo->isSimpleTypeSpecifier(LangOpts);
}
bool FormatToken::isTypeOrIdentifier() const {
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2f..e6575dd417438a 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -22,18 +22,20 @@
namespace clang {
namespace format {
+LangOptions LangOpts;
+
FormatTokenLexer::FormatTokenLexer(
const SourceManager &SourceMgr, FileID ID, unsigned Column,
const FormatStyle &Style, encoding::Encoding Encoding,
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
IdentifierTable &IdentTable)
: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
- Column(Column), TrailingWhitespace(0),
- LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID),
+ Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
MacroBlockEndRegex(Style.MacroBlockEnd) {
+ LangOpts = getFormattingLangOpts(Style);
Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));
Lex->SetKeepWhitespaceMode(true);
@@ -1411,7 +1413,6 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) {
void FormatTokenLexer::resetLexer(unsigned Offset) {
StringRef Buffer = SourceMgr.getBufferData(ID);
- LangOpts = getFormattingLangOpts(Style);
Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), LangOpts,
Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
Lex->SetKeepWhitespaceMode(true);
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 65dd733bd53352..52838f1d8a17f5 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -120,7 +120,6 @@ class FormatTokenLexer {
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;
- LangOptions LangOpts;
const SourceManager &SourceMgr;
FileID ID;
const FormatStyle &Style;
More information about the cfe-commits
mailing list