[clang] [clang-format] Correctly annotate user-defined conversion functions (PR #131434)

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 16 00:10:38 PDT 2025


https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/131434

>From 46cde1b7667f36115d746326b78d011511cac738 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Sat, 15 Mar 2025 00:37:53 -0700
Subject: [PATCH 1/2] [clang-format] Correctly annotate user-defined conversion
 functions

Also fix/delete existing invalid/redundant test cases.

Fix #130894
---
 clang/lib/Format/TokenAnnotator.cpp           | 27 ++++++++--
 clang/unittests/Format/FormatTest.cpp         | 28 +++++-----
 clang/unittests/Format/TokenAnnotatorTest.cpp | 53 +++++++++++++++++++
 3 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 08539de405c67..d618eab1692b3 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1639,6 +1639,25 @@ class AnnotatingParser {
     case tok::kw_operator:
       if (Style.isProto())
         break;
+      // C++ user-defined conversion function.
+      if (IsCpp && CurrentToken &&
+          (CurrentToken->is(tok::kw_auto) ||
+           CurrentToken->isTypeName(LangOpts))) {
+        FormatToken *LParen;
+        if (CurrentToken->startsSequence(tok::kw_decltype, tok::l_paren,
+                                         tok::kw_auto, tok::r_paren)) {
+          LParen = CurrentToken->Next->Next->Next->Next;
+        } else {
+          for (LParen = CurrentToken->Next;
+               LParen && LParen->isNot(tok::l_paren); LParen = LParen->Next) {
+          }
+        }
+        if (LParen && LParen->startsSequence(tok::l_paren, tok::r_paren)) {
+          Tok->setFinalizedType(TT_FunctionDeclarationName);
+          LParen->setFinalizedType(TT_FunctionDeclarationLParen);
+          break;
+        }
+      }
       while (CurrentToken &&
              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
         if (CurrentToken->isOneOf(tok::star, tok::amp))
@@ -3071,12 +3090,10 @@ class AnnotatingParser {
     if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
       return TT_BinaryOperator;
 
-    // "&&" followed by "(", "*", or "&" is quite unlikely to be two successive
-    // unary "&".
-    if (Tok.is(tok::ampamp) &&
-        NextToken->isOneOf(tok::l_paren, tok::star, tok::amp)) {
+    // "&&" followed by "*" or "&" is quite unlikely to be two successive unary
+    // "&".
+    if (Tok.is(tok::ampamp) && NextToken->isOneOf(tok::star, tok::amp))
       return TT_BinaryOperator;
-    }
 
     // This catches some cases where evaluation order is used as control flow:
     //   aaa && aaa->f();
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 9864e7ec1b2ec..5df7865f5a629 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -10443,27 +10443,17 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) {
                "void\n"
                "A::operator->() {}\n"
                "void\n"
-               "A::operator void *() {}\n"
+               "A::operator&() {}\n"
                "void\n"
-               "A::operator void &() {}\n"
-               "void\n"
-               "A::operator void &&() {}\n"
-               "void\n"
-               "A::operator char *() {}\n"
+               "A::operator&&() {}\n"
                "void\n"
                "A::operator[]() {}\n"
                "void\n"
                "A::operator!() {}\n"
                "void\n"
-               "A::operator**() {}\n"
-               "void\n"
                "A::operator<Foo> *() {}\n"
                "void\n"
-               "A::operator<Foo> **() {}\n"
-               "void\n"
-               "A::operator<Foo> &() {}\n"
-               "void\n"
-               "A::operator void **() {}",
+               "A::operator<Foo> &() {}\n",
                Style);
   verifyFormat("constexpr auto\n"
                "operator()() const -> reference {}\n"
@@ -10486,7 +10476,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) {
                "constexpr auto\n"
                "operator void &() const -> reference {}\n"
                "constexpr auto\n"
-               "operator void &&() const -> reference {}\n"
+               "operator&&() const -> reference {}\n"
                "constexpr auto\n"
                "operator char *() const -> reference {}\n"
                "constexpr auto\n"
@@ -28032,6 +28022,16 @@ TEST_F(FormatTest, BreakAfterAttributes) {
                "  --d;",
                CtrlStmtCode, Style);
 
+  verifyFormat("[[nodiscard]]\n"
+               "operator bool();\n"
+               "[[nodiscard]]\n"
+               "operator bool() {\n"
+               "  return true;\n"
+               "}",
+               "[[nodiscard]] operator bool();\n"
+               "[[nodiscard]] operator bool() { return true; }",
+               Style);
+
   constexpr StringRef CtorDtorCode("struct Foo {\n"
                                    "  [[deprecated]] Foo();\n"
                                    "  [[deprecated]] Foo() {}\n"
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 5e2d301c5d1f3..48927813af317 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3856,6 +3856,59 @@ TEST_F(TokenAnnotatorTest, AfterPPDirective) {
   EXPECT_TOKEN(Tokens[2], tok::minusminus, TT_AfterPPDirective);
 }
 
+TEST_F(TokenAnnotatorTest, UserDefinedConversionFunction) {
+  auto Tokens = annotate("operator int();");
+  ASSERT_EQ(Tokens.size(), 6u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
+
+  Tokens = annotate("explicit operator int *();");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_FunctionDeclarationLParen);
+
+  Tokens = annotate("operator int &();");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[2], tok::amp, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_FunctionDeclarationLParen);
+
+  Tokens = annotate("operator auto() const { return 2; }");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[4], tok::kw_const, TT_TrailingAnnotation);
+  EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_FunctionLBrace);
+
+  Tokens = annotate("operator decltype(auto)() const;");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[4], tok::r_paren, TT_TypeDeclarationParen);
+  EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[7], tok::kw_const, TT_TrailingAnnotation);
+
+  auto Style = getLLVMStyle();
+  Style.TypeNames.push_back("Foo");
+
+  Tokens = annotate("virtual operator Foo() = 0;", Style);
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_FunctionDeclarationLParen);
+
+  Tokens = annotate("operator Foo() override { return Foo(); }", Style);
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_FunctionLBrace);
+
+  Tokens = annotate("friend Bar::operator Foo();", Style);
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_FunctionDeclarationLParen);
+}
+
 } // namespace
 } // namespace format
 } // namespace clang

>From 83b12702b5a7ddb0e0e5febc9b57c6361c28df27 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Sun, 16 Mar 2025 00:09:28 -0700
Subject: [PATCH 2/2] Address review comments

---
 clang/lib/Format/FormatToken.h                |  4 ++
 clang/lib/Format/FormatTokenLexer.cpp         |  6 +--
 clang/lib/Format/TokenAnnotator.cpp           | 46 +++++++++++--------
 clang/unittests/Format/TokenAnnotatorTest.cpp | 13 ++----
 4 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 77935e75d4b4c..3808872d227a9 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -746,6 +746,10 @@ struct FormatToken {
     return isOneOf(tok::star, tok::amp, tok::ampamp);
   }
 
+  bool isPlacementOperator() const {
+    return isOneOf(tok::kw_new, tok::kw_delete);
+  }
+
   bool isUnaryOperator() const {
     switch (Tok.getKind()) {
     case tok::plus:
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 16f0a76f3a954..eed54a11684b5 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -610,9 +610,9 @@ bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
                       tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
                       tok::colon, tok::question, tok::tilde) ||
          Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
-                      tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
-                      tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
-         Tok->isBinaryOperator();
+                      tok::kw_else, tok::kw_void, tok::kw_typeof,
+                      Keywords.kw_instanceof, Keywords.kw_in) ||
+         Tok->isPlacementOperator() || Tok->isBinaryOperator();
 }
 
 bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index d618eab1692b3..abce6b04f5070 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1639,23 +1639,29 @@ class AnnotatingParser {
     case tok::kw_operator:
       if (Style.isProto())
         break;
-      // C++ user-defined conversion function.
-      if (IsCpp && CurrentToken &&
-          (CurrentToken->is(tok::kw_auto) ||
-           CurrentToken->isTypeName(LangOpts))) {
-        FormatToken *LParen;
-        if (CurrentToken->startsSequence(tok::kw_decltype, tok::l_paren,
-                                         tok::kw_auto, tok::r_paren)) {
-          LParen = CurrentToken->Next->Next->Next->Next;
-        } else {
-          for (LParen = CurrentToken->Next;
-               LParen && LParen->isNot(tok::l_paren); LParen = LParen->Next) {
+      // Handle C++ user-defined conversion function.
+      if (IsCpp && CurrentToken) {
+        const auto *Info = CurrentToken->Tok.getIdentifierInfo();
+        // What follows Tok is an identifier or a non-operator keyword.
+        if (Info && !(Info->isCPlusPlusOperatorKeyword() ||
+                      CurrentToken->isPlacementOperator() ||
+                      CurrentToken->is(tok::kw_co_await))) {
+          FormatToken *LParen;
+          if (CurrentToken->startsSequence(tok::kw_decltype, tok::l_paren,
+                                           tok::kw_auto, tok::r_paren)) {
+            // Skip `decltype(auto)`.
+            LParen = CurrentToken->Next->Next->Next->Next;
+          } else {
+            // Skip to l_paren.
+            for (LParen = CurrentToken->Next;
+                 LParen && LParen->isNot(tok::l_paren); LParen = LParen->Next) {
+            }
+          }
+          if (LParen && LParen->is(tok::l_paren)) {
+            Tok->setFinalizedType(TT_FunctionDeclarationName);
+            LParen->setFinalizedType(TT_FunctionDeclarationLParen);
+            break;
           }
-        }
-        if (LParen && LParen->startsSequence(tok::l_paren, tok::r_paren)) {
-          Tok->setFinalizedType(TT_FunctionDeclarationName);
-          LParen->setFinalizedType(TT_FunctionDeclarationLParen);
-          break;
         }
       }
       while (CurrentToken &&
@@ -3018,7 +3024,7 @@ class AnnotatingParser {
       return TT_UnaryOperator;
     if (PrevToken->is(TT_TypeName))
       return TT_PointerOrReference;
-    if (PrevToken->isOneOf(tok::kw_new, tok::kw_delete) && Tok.is(tok::ampamp))
+    if (PrevToken->isPlacementOperator() && Tok.is(tok::ampamp))
       return TT_BinaryOperator;
 
     const FormatToken *NextToken = Tok.getNextNonComment();
@@ -3808,7 +3814,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
         return Next;
       if (Next->is(TT_OverloadedOperator))
         continue;
-      if (Next->isOneOf(tok::kw_new, tok::kw_delete, tok::kw_co_await)) {
+      if (Next->isPlacementOperator() || Next->is(tok::kw_co_await)) {
         // For 'new[]' and 'delete[]'.
         if (Next->Next &&
             Next->Next->startsSequence(tok::l_square, tok::r_square)) {
@@ -4819,7 +4825,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
              spaceRequiredBeforeParens(Right);
     }
     if (Style.SpaceBeforeParens == FormatStyle::SBPO_Custom &&
-        Left.isOneOf(tok::kw_new, tok::kw_delete) &&
+        Left.isPlacementOperator() &&
         Right.isNot(TT_OverloadedOperatorLParen) &&
         !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
       const auto *RParen = Right.MatchingParen;
@@ -4862,7 +4868,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
         return Style.SpaceBeforeParensOptions.AfterControlStatements ||
                spaceRequiredBeforeParens(Right);
       }
-      if (Left.isOneOf(tok::kw_new, tok::kw_delete) ||
+      if (Left.isPlacementOperator() ||
           (Left.is(tok::r_square) && Left.MatchingParen &&
            Left.MatchingParen->Previous &&
            Left.MatchingParen->Previous->is(tok::kw_delete))) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 48927813af317..1ff785110fc34 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3857,8 +3857,8 @@ TEST_F(TokenAnnotatorTest, AfterPPDirective) {
 }
 
 TEST_F(TokenAnnotatorTest, UserDefinedConversionFunction) {
-  auto Tokens = annotate("operator int();");
-  ASSERT_EQ(Tokens.size(), 6u) << Tokens;
+  auto Tokens = annotate("operator int(void);");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
 
@@ -3889,21 +3889,18 @@ TEST_F(TokenAnnotatorTest, UserDefinedConversionFunction) {
   EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_FunctionDeclarationLParen);
   EXPECT_TOKEN(Tokens[7], tok::kw_const, TT_TrailingAnnotation);
 
-  auto Style = getLLVMStyle();
-  Style.TypeNames.push_back("Foo");
-
-  Tokens = annotate("virtual operator Foo() = 0;", Style);
+  Tokens = annotate("virtual operator Foo() = 0;");
   ASSERT_EQ(Tokens.size(), 9u) << Tokens;
   EXPECT_TOKEN(Tokens[1], tok::kw_operator, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_FunctionDeclarationLParen);
 
-  Tokens = annotate("operator Foo() override { return Foo(); }", Style);
+  Tokens = annotate("operator Foo() override { return Foo(); }");
   ASSERT_EQ(Tokens.size(), 13u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::kw_operator, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
   EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_FunctionLBrace);
 
-  Tokens = annotate("friend Bar::operator Foo();", Style);
+  Tokens = annotate("friend Bar::operator Foo();");
   ASSERT_EQ(Tokens.size(), 9u) << Tokens;
   EXPECT_TOKEN(Tokens[3], tok::kw_operator, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_FunctionDeclarationLParen);



More information about the cfe-commits mailing list