[clang] bcd1e46 - [clang-format] Further improve support for requires expressions

Björn Schäpers via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 15 12:38:27 PST 2022


Author: Björn Schäpers
Date: 2022-02-15T21:37:35+01:00
New Revision: bcd1e4612f4fa2d12a51f0708c619ae3b2deaa2b

URL: https://github.com/llvm/llvm-project/commit/bcd1e4612f4fa2d12a51f0708c619ae3b2deaa2b
DIFF: https://github.com/llvm/llvm-project/commit/bcd1e4612f4fa2d12a51f0708c619ae3b2deaa2b.diff

LOG: [clang-format] Further improve support for requires expressions

Detect requires expressions in more unusable contexts. This is far from
perfect, but currently we have no good metric to decide between a
requires expression and a trailing requires clause.

Differential Revision: https://reviews.llvm.org/D119138

Added: 
    

Modified: 
    clang/lib/Format/UnwrappedLineParser.cpp
    clang/lib/Format/UnwrappedLineParser.h
    clang/unittests/Format/TokenAnnotatorTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index e2d5197988be6..25db8ee2e7549 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -40,6 +40,12 @@ class FormatTokenSource {
   // getNextToken().
   virtual FormatToken *peekNextToken() = 0;
 
+  // Returns the token that would be returned after the next N calls to
+  // getNextToken(). N needs to be greater than zero, and small enough that
+  // there are still tokens. Check for tok::eof with N-1 before calling it with
+  // N.
+  virtual FormatToken *peekNextToken(int N) = 0;
+
   // Returns whether we are at the end of the file.
   // This can be 
diff erent from whether getNextToken() returned an eof token
   // when the FormatTokenSource is a view on a part of the token stream.
@@ -137,6 +143,13 @@ class ScopedMacroState : public FormatTokenSource {
     return PreviousTokenSource->peekNextToken();
   }
 
+  FormatToken *peekNextToken(int N) override {
+    assert(N > 0);
+    if (eof())
+      return &FakeEOF;
+    return PreviousTokenSource->peekNextToken(N);
+  }
+
   bool isEOF() override { return PreviousTokenSource->isEOF(); }
 
   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
@@ -257,6 +270,16 @@ class IndexedTokenSource : public FormatTokenSource {
     return Tokens[Next];
   }
 
+  FormatToken *peekNextToken(int N) override {
+    assert(N > 0);
+    int Next = Position + N;
+    LLVM_DEBUG({
+      llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
+      dbgToken(Next);
+    });
+    return Tokens[Next];
+  }
+
   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
 
   unsigned getPosition() override {
@@ -1537,9 +1560,12 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
     case tok::kw_concept:
       parseConcept();
       return;
-    case tok::kw_requires:
-      parseRequiresClause();
-      return;
+    case tok::kw_requires: {
+      bool ParsedClause = parseRequires();
+      if (ParsedClause)
+        return;
+      break;
+    }
     case tok::kw_enum:
       // Ignore if this is part of "template <enum ...".
       if (Previous && Previous->is(tok::less)) {
@@ -2206,9 +2232,12 @@ void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
       else
         nextToken();
       break;
-    case tok::kw_requires:
-      parseRequiresExpression();
+    case tok::kw_requires: {
+      auto RequiresToken = FormatTok;
+      nextToken();
+      parseRequiresExpression(RequiresToken);
       break;
+    }
     case tok::ampamp:
       if (AmpAmpTokenType != TT_Unknown)
         FormatTok->setType(AmpAmpTokenType);
@@ -2783,28 +2812,163 @@ void UnwrappedLineParser::parseConcept() {
   addUnwrappedLine();
 }
 
+/// \brief Parses a requires, decides if it is a clause or an expression.
+/// \pre The current token has to be the requires keyword.
+/// \returns true if it parsed a clause.
+bool clang::format::UnwrappedLineParser::parseRequires() {
+  assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
+  auto RequiresToken = FormatTok;
+
+  // We try to guess if it is a requires clause, or a requires expression. For
+  // that we first consume the keyword and check the next token.
+  nextToken();
+
+  switch (FormatTok->Tok.getKind()) {
+  case tok::l_brace:
+    // This can only be an expression, never a clause.
+    parseRequiresExpression(RequiresToken);
+    return false;
+  case tok::l_paren:
+    // Clauses and expression can start with a paren, it's unclear what we have.
+    break;
+  default:
+    // All other tokens can only be a clause.
+    parseRequiresClause(RequiresToken);
+    return true;
+  }
+
+  // Looking forward we would have to decide if there are function declaration
+  // like arguments to the requires expression:
+  // requires (T t) {
+  // Or there is a constraint expression for the requires clause:
+  // requires (C<T> && ...
+
+  // But first let's look behind.
+  auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
+
+  if (!PreviousNonComment ||
+      PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
+    // If there is no token, or an expression left brace, we are a requires
+    // clause within a requires expression.
+    parseRequiresClause(RequiresToken);
+    return true;
+  }
+
+  switch (PreviousNonComment->Tok.getKind()) {
+  case tok::greater:
+  case tok::r_paren:
+  case tok::kw_noexcept:
+  case tok::kw_const:
+    // This is a requires clause.
+    parseRequiresClause(RequiresToken);
+    return true;
+  case tok::amp:
+  case tok::ampamp: {
+    // This can be either:
+    // if (... && requires (T t) ...)
+    // Or
+    // void member(...) && requires (C<T> ...
+    // We check the one token before that for a const:
+    // void member(...) const && requires (C<T> ...
+    auto PrevPrev = PreviousNonComment->getPreviousNonComment();
+    if (PrevPrev && PrevPrev->is(tok::kw_const)) {
+      parseRequiresClause(RequiresToken);
+      return true;
+    }
+    break;
+  }
+  default:
+    // It's an expression.
+    parseRequiresExpression(RequiresToken);
+    return false;
+  }
+
+  // Now we look forward and try to check if the paren content is a parameter
+  // list. The parameters can be cv-qualified and contain references or
+  // pointers.
+  // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
+  // of stuff: typename, const, *, &, &&, ::, identifiers.
+
+  int NextTokenOffset = 1;
+  auto NextToken = Tokens->peekNextToken(NextTokenOffset);
+  auto PeekNext = [&NextTokenOffset, &NextToken, this] {
+    ++NextTokenOffset;
+    NextToken = Tokens->peekNextToken(NextTokenOffset);
+  };
+
+  bool FoundType = false;
+  bool LastWasColonColon = false;
+  int OpenAngles = 0;
+
+  for (; NextTokenOffset < 50; PeekNext()) {
+    switch (NextToken->Tok.getKind()) {
+    case tok::kw_volatile:
+    case tok::kw_const:
+    case tok::comma:
+      parseRequiresExpression(RequiresToken);
+      return false;
+    case tok::r_paren:
+    case tok::pipepipe:
+      parseRequiresClause(RequiresToken);
+      return true;
+    case tok::eof:
+      // Break out of the loop.
+      NextTokenOffset = 50;
+      break;
+    case tok::coloncolon:
+      LastWasColonColon = true;
+      break;
+    case tok::identifier:
+      if (FoundType && !LastWasColonColon && OpenAngles == 0) {
+        parseRequiresExpression(RequiresToken);
+        return false;
+      }
+      FoundType = true;
+      LastWasColonColon = false;
+      break;
+    case tok::less:
+      ++OpenAngles;
+      break;
+    case tok::greater:
+      --OpenAngles;
+      break;
+    default:
+      if (NextToken->isSimpleTypeSpecifier()) {
+        parseRequiresExpression(RequiresToken);
+        return false;
+      }
+      break;
+    }
+  }
+
+  // This seems to be a complicated expression, just assume it's a clause.
+  parseRequiresClause(RequiresToken);
+  return true;
+}
+
 /// \brief Parses a requires clause.
-/// \pre The current token needs to be the requires keyword.
+/// \param RequiresToken The requires keyword token, which starts this clause.
+/// \pre We need to be on the next token after the requires keyword.
 /// \sa parseRequiresExpression
 ///
 /// Returns if it either has finished parsing the clause, or it detects, that
 /// the clause is incorrect.
-void UnwrappedLineParser::parseRequiresClause() {
-  assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
-  assert(FormatTok->getType() == TT_Unknown);
+void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
+  assert(FormatTok->getPreviousNonComment() == RequiresToken);
+  assert(RequiresToken->Tok.is(tok::kw_requires) && "'requires' expected");
+  assert(RequiresToken->getType() == TT_Unknown);
 
   // If there is no previous token, we are within a requires expression,
   // otherwise we will always have the template or function declaration in front
   // of it.
   bool InRequiresExpression =
-      !FormatTok->Previous ||
-      FormatTok->Previous->is(TT_RequiresExpressionLBrace);
+      !RequiresToken->Previous ||
+      RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
 
-  FormatTok->setType(InRequiresExpression
-                         ? TT_RequiresClauseInARequiresExpression
-                         : TT_RequiresClause);
+  RequiresToken->setType(InRequiresExpression
+                             ? TT_RequiresClauseInARequiresExpression
+                             : TT_RequiresClause);
 
-  nextToken();
   parseConstraintExpression();
 
   if (!InRequiresExpression)
@@ -2812,17 +2976,18 @@ void UnwrappedLineParser::parseRequiresClause() {
 }
 
 /// \brief Parses a requires expression.
-/// \pre The current token needs to be the requires keyword.
+/// \param RequiresToken The requires keyword token, which starts this clause.
+/// \pre We need to be on the next token after the requires keyword.
 /// \sa parseRequiresClause
 ///
 /// Returns if it either has finished parsing the expression, or it detects,
 /// that the expression is incorrect.
-void UnwrappedLineParser::parseRequiresExpression() {
-  assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
-  assert(FormatTok->getType() == TT_Unknown);
+void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
+  assert(FormatTok->getPreviousNonComment() == RequiresToken);
+  assert(RequiresToken->Tok.is(tok::kw_requires) && "'requires' expected");
+  assert(RequiresToken->getType() == TT_Unknown);
 
-  FormatTok->setType(TT_RequiresExpression);
-  nextToken();
+  RequiresToken->setType(TT_RequiresExpression);
 
   if (FormatTok->is(tok::l_paren)) {
     FormatTok->setType(TT_RequiresExpressionLParen);
@@ -2844,9 +3009,12 @@ void UnwrappedLineParser::parseRequiresExpression() {
 void UnwrappedLineParser::parseConstraintExpression() {
   do {
     switch (FormatTok->Tok.getKind()) {
-    case tok::kw_requires:
-      parseRequiresExpression();
+    case tok::kw_requires: {
+      auto RequiresToken = FormatTok;
+      nextToken();
+      parseRequiresExpression(RequiresToken);
       break;
+    }
 
     case tok::l_paren:
       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);

diff  --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index d49bbaefd1469..52f7618d9beab 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -133,8 +133,9 @@ class UnwrappedLineParser {
   bool parseEnum();
   bool parseStructLike();
   void parseConcept();
-  void parseRequiresClause();
-  void parseRequiresExpression();
+  bool parseRequires();
+  void parseRequiresClause(FormatToken *RequiresToken);
+  void parseRequiresExpression(FormatToken *RequiresToken);
   void parseConstraintExpression();
   void parseJavaEnumBody();
   // Parses a record (aka class) as a top level element. If ParseAsExpr is true,

diff  --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 0d4be8d788533..6d19af52dcd3e 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -141,6 +141,9 @@ TEST_F(TokenAnnotatorTest, UnderstandsRequiresClausesAndConcepts) {
                     "  { t.foo() };\n"
                     "} && Bar<T> && Baz<T>;");
   ASSERT_EQ(Tokens.size(), 35u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[9], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_RequiresExpressionLBrace);
   EXPECT_TOKEN(Tokens[23], tok::ampamp, TT_BinaryOperator);
   EXPECT_TOKEN(Tokens[28], tok::ampamp, TT_BinaryOperator);
 
@@ -148,6 +151,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsRequiresClausesAndConcepts) {
                     "requires C1<T> && (C21<T> || C22<T> && C2e<T>) && C3<T>\n"
                     "struct Foo;");
   ASSERT_EQ(Tokens.size(), 36u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::kw_requires, TT_RequiresClause);
   EXPECT_TOKEN(Tokens[6], tok::identifier, TT_Unknown);
   EXPECT_EQ(Tokens[6]->FakeLParens.size(), 1u);
   EXPECT_TOKEN(Tokens[10], tok::ampamp, TT_BinaryOperator);
@@ -163,6 +167,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsRequiresClausesAndConcepts) {
                "requires (C1<T> && (C21<T> || C22<T> && C2e<T>) && C3<T>)\n"
                "struct Foo;");
   ASSERT_EQ(Tokens.size(), 38u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::kw_requires, TT_RequiresClause);
   EXPECT_TOKEN(Tokens[7], tok::identifier, TT_Unknown);
   EXPECT_EQ(Tokens[7]->FakeLParens.size(), 1u);
   EXPECT_TOKEN(Tokens[11], tok::ampamp, TT_BinaryOperator);
@@ -173,6 +178,127 @@ TEST_F(TokenAnnotatorTest, UnderstandsRequiresClausesAndConcepts) {
   EXPECT_EQ(Tokens[32]->FakeRParens, 1u);
   EXPECT_TOKEN(Tokens[33], tok::r_paren, TT_Unknown);
   EXPECT_TRUE(Tokens[33]->ClosesRequiresClause);
+
+  Tokens = annotate("template <typename T>\n"
+                    "void foo(T) noexcept requires Bar<T>;");
+  ASSERT_EQ(Tokens.size(), 18u) << Tokens;
+  EXPECT_TOKEN(Tokens[11], tok::kw_requires, TT_RequiresClause);
+
+  Tokens = annotate("template <typename T>\n"
+                    "struct S {\n"
+                    "  void foo() const requires Bar<T>;\n"
+                    "  void bar() const & requires Baz<T>;\n"
+                    "  void bar() && requires Baz2<T>;\n"
+                    "  void baz() const & noexcept requires Baz<T>;\n"
+                    "  void baz() && noexcept requires Baz2<T>;\n"
+                    "};\n"
+                    "\n"
+                    "void S::bar() const & requires Baz<T> { }");
+  ASSERT_EQ(Tokens.size(), 85u) << Tokens;
+  EXPECT_TOKEN(Tokens[13], tok::kw_requires, TT_RequiresClause);
+  EXPECT_TOKEN(Tokens[25], tok::kw_requires, TT_RequiresClause);
+  EXPECT_TOKEN(Tokens[36], tok::kw_requires, TT_RequiresClause);
+  EXPECT_TOKEN(Tokens[49], tok::kw_requires, TT_RequiresClause);
+  EXPECT_TOKEN(Tokens[61], tok::kw_requires, TT_RequiresClause);
+  EXPECT_TOKEN(Tokens[77], tok::kw_requires, TT_RequiresClause);
+
+  Tokens = annotate("void Class::member() && requires(Constant) {}");
+  ASSERT_EQ(Tokens.size(), 14u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::kw_requires, TT_RequiresClause);
+
+  Tokens = annotate("void Class::member() && requires(Constant<T>) {}");
+  ASSERT_EQ(Tokens.size(), 17u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::kw_requires, TT_RequiresClause);
+
+  Tokens =
+      annotate("void Class::member() && requires(Namespace::Constant<T>) {}");
+  ASSERT_EQ(Tokens.size(), 19u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::kw_requires, TT_RequiresClause);
+
+  Tokens = annotate("void Class::member() && requires(typename "
+                    "Namespace::Outer<T>::Inner::Constant) {}");
+  ASSERT_EQ(Tokens.size(), 24u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::kw_requires, TT_RequiresClause);
+}
+
+TEST_F(TokenAnnotatorTest, UnderstandsRequiresExpressions) {
+  auto Tokens = annotate("bool b = requires(int i) { i + 5; };");
+  ASSERT_EQ(Tokens.size(), 16u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("if (requires(int i) { i + 5; }) return;");
+  ASSERT_EQ(Tokens.size(), 17u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("if (func() && requires(int i) { i + 5; }) return;");
+  ASSERT_EQ(Tokens.size(), 21u) << Tokens;
+  EXPECT_TOKEN(Tokens[6], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[11], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("foo(requires(const T t) {});");
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("foo(requires(const int t) {});");
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("foo(requires(const T t) {});");
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("foo(requires(int const* volatile t) {});");
+  ASSERT_EQ(Tokens.size(), 15u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[10], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("foo(requires(T const* volatile t) {});");
+  ASSERT_EQ(Tokens.size(), 15u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[10], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens =
+      annotate("foo(requires(const typename Outer<T>::Inner * const t) {});");
+  ASSERT_EQ(Tokens.size(), 21u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[16], tok::l_brace, TT_RequiresExpressionLBrace);
+
+  Tokens = annotate("template <typename T>\n"
+                    "concept C = requires(T T) {\n"
+                    "  requires Bar<T> && Foo<T>;\n"
+                    "};");
+  ASSERT_EQ(Tokens.size(), 28u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[9], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_RequiresExpressionLBrace);
+  EXPECT_TOKEN(Tokens[14], tok::kw_requires,
+               TT_RequiresClauseInARequiresExpression);
+
+  Tokens = annotate("template <typename T>\n"
+                    "concept C = requires(T T) {\n"
+                    "  { t.func() } -> std::same_as<int>;"
+                    "  requires Bar<T> && Foo<T>;\n"
+                    "};");
+  ASSERT_EQ(Tokens.size(), 43u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::kw_requires, TT_RequiresExpression);
+  EXPECT_TOKEN(Tokens[9], tok::l_paren, TT_RequiresExpressionLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_RequiresExpressionLBrace);
+  EXPECT_TOKEN(Tokens[29], tok::kw_requires,
+               TT_RequiresClauseInARequiresExpression);
 }
 
 TEST_F(TokenAnnotatorTest, RequiresDoesNotChangeParsingOfTheRest) {


        


More information about the cfe-commits mailing list