[clang] Improved is simple type specifier (PR #78903)

Carl Peto via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 22 10:03:41 PST 2024


https://github.com/carlos4242 updated https://github.com/llvm/llvm-project/pull/78903

>From 2622ca430d7edb1c49cda9bbbf6145b60e2e37c4 Mon Sep 17 00:00:00 2001
From: Carl Peto <carl.peto at me.com>
Date: Tue, 14 Nov 2023 03:50:14 +0000
Subject: [PATCH] [clang] Sema::isSimpleTypeSpecifier return true for _Bool in
 c99 Currently returns false for _Bool, regardless of C dialect.

Fixes #72203.

- move simple type decision code into shared location (IdentifierInfo)
- replace the logic with a check for simple types and a proper check for a valid keyword in the appropriate dialect
- change the duplicate functions from Sema and Format to stub functions that both invoke this
- attempt to reverse engineer language options in Format from the Style
- change all call sites to match the above new API
---
 clang/include/clang/Basic/IdentifierTable.h |  4 ++
 clang/include/clang/Sema/Sema.h             |  2 +-
 clang/lib/Basic/IdentifierTable.cpp         | 39 +++++++++++++++++
 clang/lib/Format/FormatToken.cpp            | 38 ++--------------
 clang/lib/Format/TokenAnnotator.cpp         | 16 +++----
 clang/lib/Format/UnwrappedLineParser.cpp    |  6 +--
 clang/lib/Parse/ParseExpr.cpp               |  2 +-
 clang/lib/Parse/ParseObjc.cpp               |  2 +-
 clang/lib/Sema/SemaDecl.cpp                 | 48 +--------------------
 9 files changed, 62 insertions(+), 95 deletions(-)

diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 1ac182d4fce26f6..2c979e438e81bb3 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -427,6 +427,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   /// language.
   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
 
+  /// Return true if this token is a simple type specifier
+  /// in the specified language.
+  bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
+
   /// Get and set FETokenInfo. The language front-end is allowed to associate
   /// arbitrary metadata with this token.
   void *getFETokenInfo() const { return FETokenInfo; }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 0db39333b0ee347..bc1fd19b5c6de7b 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2636,7 +2636,7 @@ class Sema final {
 
   void DiagnoseUseOfUnimplementedSelectors();
 
-  bool isSimpleTypeSpecifier(tok::TokenKind Kind) const;
+  bool isSimpleTypeSpecifier(const IdentifierInfo &II) const;
 
   ParsedType getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
                          Scope *S, CXXScopeSpec *SS = nullptr,
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d0d8316385b452f..78c783cdff6b5ea 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -419,6 +419,45 @@ StringRef IdentifierInfo::deuglifiedName() const {
   return Name;
 }
 
+/// Determine whether the token kind starts a simple-type-specifier.
+bool IdentifierInfo::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
+  auto Kind = getTokenID();
+
+  switch (Kind) {
+  case tok::kw_short:
+  case tok::kw_long:
+  case tok::kw___int64:
+  case tok::kw___int128:
+  case tok::kw_signed:
+  case tok::kw_unsigned:
+  case tok::kw_void:
+  case tok::kw_char:
+  case tok::kw_int:
+  case tok::kw_half:
+  case tok::kw_float:
+  case tok::kw_double:
+  case tok::kw___bf16:
+  case tok::kw__Float16:
+  case tok::kw___float128:
+  case tok::kw_wchar_t:
+  case tok::kw_bool:
+  case tok::kw___underlying_type:
+  case tok::kw___auto_type:
+  case tok::kw__Bool:
+  case tok::annot_typename:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
+  case tok::kw_typeof:
+  case tok::annot_decltype:
+  case tok::kw_decltype:
+  case tok::kw_char8_t:
+    return isKeyword(LangOpts);
+
+  default:
+    return false;
+  }
+}
+
 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // We use a perfect hash function here involving the length of the keyword,
   // the first and third character.  For preprocessor ID's there are no
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index b791c5a26bbe3ab..70e997e179a8ccd 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -34,41 +34,9 @@ const char *getTokenTypeName(TokenType Type) {
   return nullptr;
 }
 
-// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
-// duplication.
-bool FormatToken::isSimpleTypeSpecifier() const {
-  switch (Tok.getKind()) {
-  case tok::kw_short:
-  case tok::kw_long:
-  case tok::kw___int64:
-  case tok::kw___int128:
-  case tok::kw_signed:
-  case tok::kw_unsigned:
-  case tok::kw_void:
-  case tok::kw_char:
-  case tok::kw_int:
-  case tok::kw_half:
-  case tok::kw_float:
-  case tok::kw_double:
-  case tok::kw___bf16:
-  case tok::kw__Float16:
-  case tok::kw___float128:
-  case tok::kw___ibm128:
-  case tok::kw_wchar_t:
-  case tok::kw_bool:
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
-#include "clang/Basic/TransformTypeTraits.def"
-  case tok::annot_typename:
-  case tok::kw_char8_t:
-  case tok::kw_char16_t:
-  case tok::kw_char32_t:
-  case tok::kw_typeof:
-  case tok::kw_decltype:
-  case tok::kw__Atomic:
-    return true;
-  default:
-    return false;
-  }
+bool FormatToken::isSimpleTypeSpecifier(const FormatStyle &Style) const {
+  auto LangOptions = getFormattingLangOpts(Style);
+  return Tok.getIdentifierInfo()->isSimpleTypeSpecifier(LangOptions);
 }
 
 bool FormatToken::isTypeOrIdentifier() const {
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 25fcceb87864379..c365dacd498669c 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -528,7 +528,7 @@ class AnnotatingParser {
           (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
            CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
       if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
-           CurrentToken->Previous->isSimpleTypeSpecifier()) &&
+           CurrentToken->Previous->isSimpleTypeSpecifier(Style)) &&
           !(CurrentToken->is(tok::l_brace) ||
             (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
         Contexts.back().IsExpression = false;
@@ -2271,7 +2271,7 @@ class AnnotatingParser {
       return true;
 
     // MyClass a;
-    if (PreviousNotConst->isSimpleTypeSpecifier())
+    if (PreviousNotConst->isSimpleTypeSpecifier(Style))
       return true;
 
     // type[] a in Java
@@ -2406,7 +2406,7 @@ class AnnotatingParser {
     // Heuristically try to determine whether the parentheses contain a type.
     auto IsQualifiedPointerOrReference = [](FormatToken *T) {
       // This is used to handle cases such as x = (foo *const)&y;
-      assert(!T->isSimpleTypeSpecifier() && "Should have already been checked");
+      assert(!T->isSimpleTypeSpecifier(Style) && "Should have already been checked");
       // Strip trailing qualifiers such as const or volatile when checking
       // whether the parens could be a cast to a pointer/reference type.
       while (T) {
@@ -2438,7 +2438,7 @@ class AnnotatingParser {
     bool ParensAreType =
         !Tok.Previous ||
         Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
-        Tok.Previous->isSimpleTypeSpecifier() ||
+        Tok.Previous->isSimpleTypeSpecifier(Style) ||
         IsQualifiedPointerOrReference(Tok.Previous);
     bool ParensCouldEndDecl =
         Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
@@ -3311,7 +3311,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
         Next = Next->Next;
         continue;
       }
-      if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
+      if ((Next->isSimpleTypeSpecifier(Style) || Next->is(tok::identifier)) &&
           Next->Next && Next->Next->isPointerOrReference()) {
         // For operator void*(), operator char*(), operator Foo*().
         Next = Next->Next;
@@ -3409,7 +3409,7 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
       Tok = Tok->MatchingParen;
       continue;
     }
-    if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
+    if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier(Style) ||
         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis,
                      TT_TypeName)) {
       return true;
@@ -4155,7 +4155,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   if (Right.isPointerOrReference()) {
     const FormatToken *Previous = &Left;
     while (Previous && Previous->isNot(tok::kw_operator)) {
-      if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
+      if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier(Style)) {
         Previous = Previous->getPreviousNonComment();
         continue;
       }
@@ -4351,7 +4351,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   if (!Style.isVerilog() &&
       (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
                     tok::r_paren) ||
-       Left.isSimpleTypeSpecifier()) &&
+       Left.isSimpleTypeSpecifier(Style)) &&
       Right.is(tok::l_brace) && Right.getNextNonComment() &&
       Right.isNot(BK_Block)) {
     return false;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 7bc6410a78a495d..19a0cc72ace9a9c 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1851,7 +1851,7 @@ void UnwrappedLineParser::parseStructuralElement(
       nextToken();
       // Block return type.
       if (FormatTok->Tok.isAnyIdentifier() ||
-          FormatTok->isSimpleTypeSpecifier()) {
+          FormatTok->isSimpleTypeSpecifier(Style)) {
         nextToken();
         // Return types: pointers are ok too.
         while (FormatTok->is(tok::star))
@@ -2197,7 +2197,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
   bool InTemplateParameterList = false;
 
   while (FormatTok->isNot(tok::l_brace)) {
-    if (FormatTok->isSimpleTypeSpecifier()) {
+    if (FormatTok->isSimpleTypeSpecifier(Style)) {
       nextToken();
       continue;
     }
@@ -2310,7 +2310,7 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
   const FormatToken *Previous = FormatTok->Previous;
   const FormatToken *LeftSquare = FormatTok;
   nextToken();
-  if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
+  if ((Previous && ((Previous->Tok.getIdentifierInfo(Style) &&
                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
                                         tok::kw_co_yield, tok::kw_co_return)) ||
                     Previous->closesScope())) ||
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index e862856a08ca117..7e6f59c2f66b151 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1597,7 +1597,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
       if (TryAnnotateTypeOrScopeToken())
         return ExprError();
 
-      if (!Actions.isSimpleTypeSpecifier(Tok.getKind()))
+      if (!Tok.getIdentifierInfo() || !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo()))
         // We are trying to parse a simple-type-specifier but might not get such
         // a token after error recovery.
         return ExprError();
diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp
index 849fd1ac95a442e..58e5bc4a42b5f17 100644
--- a/clang/lib/Parse/ParseObjc.cpp
+++ b/clang/lib/Parse/ParseObjc.cpp
@@ -2971,7 +2971,7 @@ bool Parser::ParseObjCXXMessageReceiver(bool &IsExpr, void *&TypeOrExpr) {
                   tok::annot_cxxscope))
     TryAnnotateTypeOrScopeToken();
 
-  if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) {
+  if (!Tok.getIdentifierInfo() || !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo())) {
     //   objc-receiver:
     //     expression
     // Make sure any typos in the receiver are corrected or diagnosed, so that
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8dff2cdc063df32..a20894adda00f94 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -128,52 +128,8 @@ class TypeNameValidatorCCC final : public CorrectionCandidateCallback {
 } // end anonymous namespace
 
 /// Determine whether the token kind starts a simple-type-specifier.
-bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
-  switch (Kind) {
-  // FIXME: Take into account the current language when deciding whether a
-  // token kind is a valid type specifier
-  case tok::kw_short:
-  case tok::kw_long:
-  case tok::kw___int64:
-  case tok::kw___int128:
-  case tok::kw_signed:
-  case tok::kw_unsigned:
-  case tok::kw_void:
-  case tok::kw_char:
-  case tok::kw_int:
-  case tok::kw_half:
-  case tok::kw_float:
-  case tok::kw_double:
-  case tok::kw___bf16:
-  case tok::kw__Float16:
-  case tok::kw___float128:
-  case tok::kw___ibm128:
-  case tok::kw_wchar_t:
-  case tok::kw_bool:
-  case tok::kw__Accum:
-  case tok::kw__Fract:
-  case tok::kw__Sat:
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
-#include "clang/Basic/TransformTypeTraits.def"
-  case tok::kw___auto_type:
-    return true;
-
-  case tok::annot_typename:
-  case tok::kw_char16_t:
-  case tok::kw_char32_t:
-  case tok::kw_typeof:
-  case tok::annot_decltype:
-  case tok::kw_decltype:
-    return getLangOpts().CPlusPlus;
-
-  case tok::kw_char8_t:
-    return getLangOpts().Char8;
-
-  default:
-    break;
-  }
-
-  return false;
+bool Sema::isSimpleTypeSpecifier(const IdentifierInfo &II) const {
+  return II.isSimpleTypeSpecifier(getLangOpts());
 }
 
 namespace {



More information about the cfe-commits mailing list