[clang] 35f2ac1 - [clang-format] Fix inconsistent annotation of operator&

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Sun Feb 5 13:33:41 PST 2023


Author: David Turner
Date: 2023-02-05T13:33:33-08:00
New Revision: 35f2ac1763adcbd5ee9b49a98de24c0c420c323e

URL: https://github.com/llvm/llvm-project/commit/35f2ac1763adcbd5ee9b49a98de24c0c420c323e
DIFF: https://github.com/llvm/llvm-project/commit/35f2ac1763adcbd5ee9b49a98de24c0c420c323e.diff

LOG: [clang-format] Fix inconsistent annotation of operator&

Token annotator incorrectly annotates operator& as a reference type in
situations like Boost serialization archives:
https://www.boost.org/doc/libs/1_81_0/libs/serialization/doc/tutorial.html

Add annotation rules for standalone and chained operator& instances while
preserving behavior for reference declarations at class scope. Add tests to
validate annotation and formatting behavior.

Differential Revision: https://reviews.llvm.org/D141959

Added: 
    

Modified: 
    clang/lib/Format/TokenAnnotator.cpp
    clang/lib/Format/TokenAnnotator.h
    clang/unittests/Format/FormatTest.cpp
    clang/unittests/Format/TokenAnnotatorTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 1b1123fa36456..161720ecb770a 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -111,14 +111,29 @@ static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
 class AnnotatingParser {
 public:
   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
-                   const AdditionalKeywords &Keywords)
+                   const AdditionalKeywords &Keywords,
+                   SmallVector<ScopeType> &Scopes)
       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
-        Keywords(Keywords) {
+        Keywords(Keywords), Scopes(Scopes) {
     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
     resetTokenMetadata();
   }
 
 private:
+  ScopeType getScopeType(const FormatToken &Token) const {
+    switch (Token.getType()) {
+    case TT_FunctionLBrace:
+    case TT_LambdaLBrace:
+      return ST_Function;
+    case TT_ClassLBrace:
+    case TT_StructLBrace:
+    case TT_UnionLBrace:
+      return ST_Class;
+    default:
+      return ST_Other;
+    }
+  }
+
   bool parseAngle() {
     if (!CurrentToken || !CurrentToken->Previous)
       return false;
@@ -849,6 +864,9 @@ class AnnotatingParser {
     unsigned CommaCount = 0;
     while (CurrentToken) {
       if (CurrentToken->is(tok::r_brace)) {
+        assert(!Scopes.empty());
+        assert(Scopes.back() == getScopeType(OpeningBrace));
+        Scopes.pop_back();
         assert(OpeningBrace.Optional == CurrentToken->Optional);
         OpeningBrace.MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = &OpeningBrace;
@@ -1148,6 +1166,7 @@ class AnnotatingParser {
         if (Previous && Previous->getType() != TT_DictLiteral)
           Previous->setType(TT_SelectorName);
       }
+      Scopes.push_back(getScopeType(*Tok));
       if (!parseBrace())
         return false;
       break;
@@ -1178,6 +1197,9 @@ class AnnotatingParser {
     case tok::r_square:
       return false;
     case tok::r_brace:
+      // Don't pop scope when encountering unbalanced r_brace.
+      if (!Scopes.empty())
+        Scopes.pop_back();
       // Lines can start with '}'.
       if (Tok->Previous)
         return false;
@@ -2448,6 +2470,28 @@ class AnnotatingParser {
     if (IsExpression && !Contexts.back().CaretFound)
       return TT_BinaryOperator;
 
+    // Opeartors at class scope are likely pointer or reference members.
+    if (!Scopes.empty() && Scopes.back() == ST_Class)
+      return TT_PointerOrReference;
+
+    // Tokens that indicate member access or chained operator& use.
+    auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
+      return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
+                                      tok::arrowstar, tok::periodstar);
+    };
+
+    // It's more likely that & represents operator& than an uninitialized
+    // reference.
+    if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
+        IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
+        NextToken && NextToken->Tok.isAnyIdentifier()) {
+      if (auto NextNext = NextToken->getNextNonComment();
+          NextNext &&
+          (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
+        return TT_BinaryOperator;
+      }
+    }
+
     return TT_PointerOrReference;
   }
 
@@ -2485,6 +2529,8 @@ class AnnotatingParser {
   bool AutoFound;
   const AdditionalKeywords &Keywords;
 
+  SmallVector<ScopeType> &Scopes;
+
   // Set of "<" tokens that do not open a template parameter list. If parseAngle
   // determines that a specific token can't be a template opener, it will make
   // same decision irrespective of the decisions for tokens leading up to it.
@@ -2765,11 +2811,11 @@ static unsigned maxNestingDepth(const AnnotatedLine &Line) {
   return Result;
 }
 
-void TokenAnnotator::annotate(AnnotatedLine &Line) const {
+void TokenAnnotator::annotate(AnnotatedLine &Line) {
   for (auto &Child : Line.Children)
     annotate(*Child);
 
-  AnnotatingParser Parser(Style, Line, Keywords);
+  AnnotatingParser Parser(Style, Line, Keywords, Scopes);
   Line.Type = Parser.parseLine();
 
   // With very deep nesting, ExpressionParser uses lots of stack and the

diff  --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 354511b6323af..0a6b4f69f38e0 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -34,6 +34,15 @@ enum LineType {
   LT_CommentAbovePPDirective,
 };
 
+enum ScopeType {
+  // Contained in class declaration/definition.
+  ST_Class,
+  // Contained within function definition.
+  ST_Function,
+  // Contained within other scope block (loop, if/else, etc).
+  ST_Other,
+};
+
 class AnnotatedLine {
 public:
   AnnotatedLine(const UnwrappedLine &Line)
@@ -178,7 +187,7 @@ class TokenAnnotator {
   // FIXME: Can/should this be done in the UnwrappedLineParser?
   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
 
-  void annotate(AnnotatedLine &Line) const;
+  void annotate(AnnotatedLine &Line);
   void calculateFormattingInformation(AnnotatedLine &Line) const;
 
 private:
@@ -220,6 +229,8 @@ class TokenAnnotator {
   const FormatStyle &Style;
 
   const AdditionalKeywords &Keywords;
+
+  SmallVector<ScopeType> Scopes;
 };
 
 } // end namespace format

diff  --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index e61fb5f3e0203..df8ce0187010f 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -11288,6 +11288,13 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
 
   verifyFormat("int operator()(T (&&)[N]) { return 1; }");
   verifyFormat("int operator()(T (&)[N]) { return 0; }");
+
+  verifyFormat("val1 & val2;");
+  verifyFormat("val1 & val2 & val3;");
+  verifyFormat("class c {\n"
+               "  void func(type &a) { a & member; }\n"
+               "  anotherType &member;\n"
+               "}");
 }
 
 TEST_F(FormatTest, UnderstandsAttributes) {

diff  --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 3ed6521d48c1f..b27bb69d6aceb 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -175,6 +175,73 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
   ASSERT_EQ(Tokens.size(), 17u) << Tokens;
   EXPECT_TOKEN(Tokens[9], tok::ampamp, TT_PointerOrReference);
   EXPECT_TOKEN(Tokens[12], tok::ampamp, TT_PointerOrReference);
+
+  Tokens = annotate("Type1 &val1 = val2;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_PointerOrReference);
+
+  Tokens = annotate("Type1 *val1 = &val2;");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::star, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[4], tok::amp, TT_UnaryOperator);
+
+  Tokens = annotate("val1 & val2;");
+  ASSERT_EQ(Tokens.size(), 5u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1 & val2.member;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1 & val2.*member;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1.*member & val2;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1 & val2->*member;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1->member & val2;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1 & val2 & val3;");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[3], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("val1 & val2 // comment\n"
+                    "     & val3;");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[4], tok::amp, TT_BinaryOperator);
+
+  Tokens =
+      annotate("val1 & val2.member & val3.member() & val4 & val5->member;");
+  ASSERT_EQ(Tokens.size(), 19u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::amp, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[5], tok::amp, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[11], tok::amp, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[13], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("class c {\n"
+                    "  void func(type &a) { a & member; }\n"
+                    "  anotherType &member;\n"
+                    "}");
+  ASSERT_EQ(Tokens.size(), 22u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::amp, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[12], tok::amp, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[17], tok::amp, TT_PointerOrReference);
+
+  Tokens = annotate("struct S {\n"
+                    "  auto Mem = C & D;\n"
+                    "}");
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::amp, TT_BinaryOperator);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) {


        


More information about the cfe-commits mailing list