[clang] 5c106f7 - [clang-format] Add TypeNames option to disambiguate types/objects

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 18 14:20:47 PDT 2023


Author: Owen Pan
Date: 2023-07-18T14:18:40-07:00
New Revision: 5c106f7b947e514852402ad5678c0ebf70ce91b1

URL: https://github.com/llvm/llvm-project/commit/5c106f7b947e514852402ad5678c0ebf70ce91b1
DIFF: https://github.com/llvm/llvm-project/commit/5c106f7b947e514852402ad5678c0ebf70ce91b1.diff

LOG: [clang-format] Add TypeNames option to disambiguate types/objects

If a non-keyword identifier is found in TypeNames, then a *, &, or && that
follows it is annotated as TT_PointerOrReference.

Differential Revision: https://reviews.llvm.org/D155273

Added: 
    

Modified: 
    clang/docs/ClangFormatStyleOptions.rst
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Format/Format.h
    clang/lib/Format/Format.cpp
    clang/lib/Format/FormatToken.h
    clang/lib/Format/FormatTokenLexer.cpp
    clang/lib/Format/FormatTokenLexer.h
    clang/lib/Format/TokenAnnotator.cpp
    clang/unittests/Format/TokenAnnotatorTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 8965b20e62c641..8ef58faa76837b 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -5355,6 +5355,15 @@ the configuration (without a prefix: ``Auto``).
 **TabWidth** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ <TabWidth>`
   The number of columns used for tab stops.
 
+.. _TypeNames:
+
+**TypeNames** (``List of Strings``) :versionbadge:`clang-format 17` :ref:`¶ <TypeNames>`
+  A vector of non-keyword identifiers that should be interpreted as type
+  names.
+
+  A `*`, `&`, or `&&` between a type name and another non-keyword identifier
+  is annotated as a pointer or reference token instead of a binary operator.
+
 .. _TypenameMacros:
 
 **TypenameMacros** (``List of Strings``) :versionbadge:`clang-format 9` :ref:`¶ <TypenameMacros>`

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index dd316a7a82e353..cad10dd090263c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -876,6 +876,7 @@ clang-format
   the indentation level of the contents of braced init lists.
 - Add ``KeepEmptyLinesAtEOF`` to keep empty lines at end of file.
 - Add ``RemoveParentheses`` to remove redundant parentheses.
+- Add ``TypeNames`` to treat listed non-keyword identifiers as type names.
 
 libclang
 --------

diff  --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 71948027fbe3ed..874f10b0c57fc2 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -4265,6 +4265,15 @@ struct FormatStyle {
   /// \version 3.7
   unsigned TabWidth;
 
+  /// A vector of non-keyword identifiers that should be interpreted as type
+  /// names.
+  ///
+  /// A `*`, `&`, or `&&` between a type name and another non-keyword identifier
+  /// is annotated as a pointer or reference token instead of a binary operator.
+  ///
+  /// \version 17
+  std::vector<std::string> TypeNames;
+
   /// \brief A vector of macros that should be interpreted as type declarations
   /// instead of as function calls.
   ///
@@ -4492,7 +4501,8 @@ struct FormatStyle {
            Standard == R.Standard &&
            StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
            StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
-           TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
+           TypeNames == R.TypeNames && TypenameMacros == R.TypenameMacros &&
+           UseTab == R.UseTab &&
            VerilogBreakBetweenInstancePorts ==
                R.VerilogBreakBetweenInstancePorts &&
            WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;

diff  --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index c71139d26ff80d..3df1b60d2cb958 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1051,6 +1051,7 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.StatementAttributeLikeMacros);
     IO.mapOptional("StatementMacros", Style.StatementMacros);
     IO.mapOptional("TabWidth", Style.TabWidth);
+    IO.mapOptional("TypeNames", Style.TypeNames);
     IO.mapOptional("TypenameMacros", Style.TypenameMacros);
     IO.mapOptional("UseTab", Style.UseTab);
     IO.mapOptional("VerilogBreakBetweenInstancePorts",

diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index a7f2a01683fb38..4e45478d7424e6 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -141,6 +141,7 @@ namespace format {
   TYPE(TrailingReturnArrow)                                                    \
   TYPE(TrailingUnaryOperator)                                                  \
   TYPE(TypeDeclarationParen)                                                   \
+  TYPE(TypeName)                                                               \
   TYPE(TypenameMacro)                                                          \
   TYPE(UnaryOperator)                                                          \
   TYPE(UnionLBrace)                                                            \

diff  --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index ae54de93daf51f..4d43796dd70e52 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -71,6 +71,9 @@ FormatTokenLexer::FormatTokenLexer(
     auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
     Macros.insert({Identifier, TT_StatementAttributeLikeMacro});
   }
+
+  for (const auto &TypeName : Style.TypeNames)
+    TypeNames.insert(&IdentTable.get(TypeName));
 }
 
 ArrayRef<FormatToken *> FormatTokenLexer::lex() {
@@ -1222,7 +1225,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
   }
 
   if (Style.isCpp()) {
-    auto it = Macros.find(FormatTok->Tok.getIdentifierInfo());
+    auto *Identifier = FormatTok->Tok.getIdentifierInfo();
+    auto it = Macros.find(Identifier);
     if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
           Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
               tok::pp_define) &&
@@ -1240,6 +1244,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
         FormatTok->setType(TT_MacroBlockBegin);
       else if (MacroBlockEndRegex.match(Text))
         FormatTok->setType(TT_MacroBlockEnd);
+      else if (TypeNames.contains(Identifier))
+        FormatTok->setFinalizedType(TT_TypeName);
     }
   }
 

diff  --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 0a8123fed29348..bb6a8ab69c1be1 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -22,6 +22,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Regex.h"
 
@@ -126,6 +127,8 @@ class FormatTokenLexer {
 
   llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
 
+  llvm::SmallPtrSet<IdentifierInfo *, 8> TypeNames;
+
   bool FormattingDisabled;
 
   llvm::Regex MacroBlockBeginRegex;

diff  --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index fbcc3a03ba56ae..4506e41d72d759 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -422,6 +422,7 @@ class AnnotatingParser {
           FormatToken *PrevPrev = Prev->getPreviousNonComment();
           FormatToken *Next = CurrentToken->Next;
           if (PrevPrev && PrevPrev->is(tok::identifier) &&
+              PrevPrev->isNot(TT_TypeName) &&
               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
             Prev->setType(TT_BinaryOperator);
@@ -2508,6 +2509,8 @@ class AnnotatingParser {
     const FormatToken *PrevToken = Tok.getPreviousNonComment();
     if (!PrevToken)
       return TT_UnaryOperator;
+    if (PrevToken->is(TT_TypeName))
+      return TT_PointerOrReference;
 
     const FormatToken *NextToken = Tok.getNextNonComment();
 

diff  --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index fd42dfded83e5f..ae2084923de00e 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -272,6 +272,19 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
   Tokens = annotate("template <enable_if_t<foo && !bar>* = nullptr> void f();");
   ASSERT_EQ(Tokens.size(), 19u) << Tokens;
   EXPECT_TOKEN(Tokens[5], tok::ampamp, TT_BinaryOperator);
+
+  FormatStyle Style = getLLVMStyle();
+  Style.TypeNames.push_back("MYI");
+  Tokens = annotate("if (MYI *p{nullptr})", Style);
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::identifier, TT_TypeName);
+  EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference);
+
+  Style.TypeNames.push_back("Class");
+  Tokens = annotate("if (Class *obj {getObj()})", Style);
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::identifier, TT_TypeName);
+  EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) {


        


More information about the cfe-commits mailing list