[clang] 1995d44 - [clang-format] Enable FormatTokenSource to insert tokens.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 15 04:43:40 PST 2023


Author: Manuel Klimek
Date: 2023-02-15T12:39:24Z
New Revision: 1995d4424505cb5a1c3f0e5f851a660ec32d7af1

URL: https://github.com/llvm/llvm-project/commit/1995d4424505cb5a1c3f0e5f851a660ec32d7af1
DIFF: https://github.com/llvm/llvm-project/commit/1995d4424505cb5a1c3f0e5f851a660ec32d7af1.diff

LOG: [clang-format] Enable FormatTokenSource to insert tokens.

In preparation for configured macro replacements in formatting,
add the ability to insert tokens to FormatTokenSource, and implement
token insertion in IndexedTokenSource.

Differential Revision: https://reviews.llvm.org/D143070

Added: 
    

Modified: 
    clang/lib/Format/FormatTokenSource.h
    clang/lib/Format/UnwrappedLineParser.h
    clang/unittests/Format/FormatTokenSourceTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatTokenSource.h b/clang/lib/Format/FormatTokenSource.h
index 0bef45a7ff980..8bf7e6932c895 100644
--- a/clang/lib/Format/FormatTokenSource.h
+++ b/clang/lib/Format/FormatTokenSource.h
@@ -1,4 +1,3 @@
-
 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -8,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines the \c TokenSource interface, which provides a token
+/// This file defines the \c FormatTokenSource interface, which provides a token
 /// stream as well as the ability to manipulate the token stream.
 ///
 //===----------------------------------------------------------------------===//
@@ -18,12 +17,17 @@
 
 #include "FormatToken.h"
 #include "UnwrappedLineParser.h"
+#include "llvm/ADT/DenseMap.h"
 
 #define DEBUG_TYPE "format-token-source"
 
 namespace clang {
 namespace format {
 
+// Navigate a token stream.
+//
+// Enables traversal of a token stream, resetting the position in a token
+// stream, as well as inserting new tokens.
 class FormatTokenSource {
 public:
   virtual ~FormatTokenSource() {}
@@ -33,6 +37,9 @@ class FormatTokenSource {
 
   // Returns the token preceding the token returned by the last call to
   // getNextToken() in the token stream, or nullptr if no such token exists.
+  //
+  // Must not be called directly at the position directly after insertTokens()
+  // is called.
   virtual FormatToken *getPreviousToken() = 0;
 
   // Returns the token that would be returned by the next call to
@@ -45,14 +52,31 @@ class FormatTokenSource {
   virtual bool isEOF() = 0;
 
   // Gets the current position in the token stream, to be used by setPosition().
+  //
+  // Note that the value of the position is not meaningful, and specifically
+  // should not be used to get relative token positions.
   virtual unsigned getPosition() = 0;
 
   // Resets the token stream to the state it was in when getPosition() returned
   // Position, and return the token at that position in the stream.
   virtual FormatToken *setPosition(unsigned Position) = 0;
+
+  // Insert the given tokens before the current position.
+  // Returns the first token in \c Tokens.
+  // The next returned token will be the second token in \c Tokens.
+  // Requires the last token in Tokens to be EOF; once the EOF token is reached,
+  // the next token will be the last token returned by getNextToken();
+  //
+  // For example, given the token sequence 'a1 a2':
+  // getNextToken() -> a1
+  // insertTokens('b1 b2') -> b1
+  // getNextToken() -> b2
+  // getNextToken() -> a1
+  // getNextToken() -> a2
+  virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
 };
 
-class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
+class IndexedTokenSource : public FormatTokenSource {
 public:
   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
       : Tokens(Tokens), Position(-1) {}
@@ -65,7 +89,7 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
       });
       return Tokens[Position];
     }
-    ++Position;
+    Position = successor(Position);
     LLVM_DEBUG({
       llvm::dbgs() << "Next ";
       dbgToken(Position);
@@ -74,16 +98,17 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
   }
 
   FormatToken *getPreviousToken() override {
+    assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof));
     return Position > 0 ? Tokens[Position - 1] : nullptr;
   }
 
   FormatToken *peekNextToken(bool SkipComment = false) override {
     if (isEOF())
       return Tokens[Position];
-    int Next = Position + 1;
+    int Next = successor(Position);
     if (SkipComment)
       while (Tokens[Next]->is(tok::comment))
-        ++Next;
+        Next = successor(Next);
     LLVM_DEBUG({
       llvm::dbgs() << "Peeking ";
       dbgToken(Next);
@@ -107,9 +132,40 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
     return Tokens[Position];
   }
 
+  FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
+    assert(Position != -1);
+    assert((*New.rbegin())->Tok.is(tok::eof));
+    int Next = Tokens.size();
+    Tokens.append(New.begin(), New.end());
+    LLVM_DEBUG({
+      llvm::dbgs() << "Inserting:\n";
+      for (int I = Next, E = Tokens.size(); I != E; ++I)
+        dbgToken(I, "  ");
+      llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
+                   << Position << "\n";
+    });
+    Jumps[Tokens.size() - 1] = Position;
+    Position = Next;
+    LLVM_DEBUG({
+      llvm::dbgs() << "At inserted token ";
+      dbgToken(Position);
+    });
+    return Tokens[Position];
+  }
+
   void reset() { Position = -1; }
 
 private:
+  int successor(int Current) const {
+    int Next = Current + 1;
+    auto it = Jumps.find(Next);
+    if (it != Jumps.end()) {
+      Next = it->second;
+      assert(Jumps.find(Next) == Jumps.end());
+    }
+    return Next;
+  }
+
   void dbgToken(int Position, llvm::StringRef Indent = "") {
     FormatToken *Tok = Tokens[Position];
     llvm::dbgs() << Indent << "[" << Position
@@ -117,8 +173,12 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
                  << ", Macro: " << !!Tok->MacroCtx << "\n";
   }
 
-  ArrayRef<FormatToken *> Tokens;
+  SmallVector<FormatToken *> Tokens;
   int Position;
+
+  // Maps from position a to position b, so that when we reach a, the token
+  // stream continues at position b instead.
+  llvm::DenseMap<int, int> Jumps;
 };
 
 class ScopedMacroState : public FormatTokenSource {
@@ -175,6 +235,10 @@ class ScopedMacroState : public FormatTokenSource {
     return Token;
   }
 
+  FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
+    assert(false && "Cannot insert tokens while parsing a macro.");
+  }
+
 private:
   bool eof() {
     return Token && Token->HasUnescapedNewline &&

diff  --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index f043e567eb733..77277471f7e3c 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -280,9 +280,6 @@ class UnwrappedLineParser {
   FormatTokenSource *Tokens;
   UnwrappedLineConsumer &Callback;
 
-  // FIXME: This is a temporary measure until we have reworked the ownership
-  // of the format tokens. The goal is to have the actual tokens created and
-  // owned outside of and handed into the UnwrappedLineParser.
   ArrayRef<FormatToken *> AllTokens;
 
   // Keeps a stack of the states of nested control statements (true if the

diff  --git a/clang/unittests/Format/FormatTokenSourceTest.cpp b/clang/unittests/Format/FormatTokenSourceTest.cpp
index 3274643aa8688..74de93057df6a 100644
--- a/clang/unittests/Format/FormatTokenSourceTest.cpp
+++ b/clang/unittests/Format/FormatTokenSourceTest.cpp
@@ -28,12 +28,17 @@ class IndexedTokenSourceTest : public ::testing::Test {
 #define EXPECT_TOKEN_KIND(FormatTok, Kind)                                     \
   do {                                                                         \
     FormatToken *Tok = FormatTok;                                              \
-    EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok);                           \
+    EXPECT_EQ(Tok->Tok.getKind(), Kind) << *Tok;                               \
+  } while (false);
+#define EXPECT_TOKEN_ID(FormatTok, Name)                                       \
+  do {                                                                         \
+    FormatToken *Tok = FormatTok;                                              \
+    EXPECT_EQ(Tok->Tok.getKind(), tok::identifier) << *Tok;                    \
+    EXPECT_EQ(Tok->TokenText, Name) << *Tok;                                   \
   } while (false);
 
 TEST_F(IndexedTokenSourceTest, EmptyInput) {
-  TokenList Tokens = lex("");
-  IndexedTokenSource Source(Tokens);
+  IndexedTokenSource Source(lex(""));
   EXPECT_FALSE(Source.isEOF());
   EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
   EXPECT_TRUE(Source.isEOF());
@@ -46,8 +51,7 @@ TEST_F(IndexedTokenSourceTest, EmptyInput) {
 }
 
 TEST_F(IndexedTokenSourceTest, NavigateTokenStream) {
-  TokenList Tokens = lex("int a;");
-  IndexedTokenSource Source(Tokens);
+  IndexedTokenSource Source(lex("int a;"));
   EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int);
   EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int);
   EXPECT_EQ(Source.getPreviousToken(), nullptr);
@@ -60,11 +64,12 @@ TEST_F(IndexedTokenSourceTest, NavigateTokenStream) {
   EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof);
   EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
   EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi);
+  EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
+  EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi);
 }
 
 TEST_F(IndexedTokenSourceTest, ResetPosition) {
-  TokenList Tokens = lex("int a;");
-  IndexedTokenSource Source(Tokens);
+  IndexedTokenSource Source(lex("int a;"));
   Source.getNextToken();
   unsigned Position = Source.getPosition();
   Source.getNextToken();
@@ -73,6 +78,50 @@ TEST_F(IndexedTokenSourceTest, ResetPosition) {
   EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int);
 }
 
+TEST_F(IndexedTokenSourceTest, InsertTokens) {
+  IndexedTokenSource Source(lex("A1 A2"));
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "B2");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A2");
+}
+
+TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) {
+  IndexedTokenSource Source(lex("A1"));
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "B2");
+  EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
+}
+
+TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) {
+  IndexedTokenSource Source(lex("A1"));
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  // A1
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1");
+  // B1 A1
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1");
+  // C1 B1 A1
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1");
+  // D1 C1 B1 A1
+  EXPECT_TOKEN_ID(Source.getNextToken(), "C1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "B1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+}
+
+TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) {
+  IndexedTokenSource Source(lex("A1"));
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+  EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1");
+  EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+}
+
 } // namespace
 } // namespace format
 } // namespace clang


        


More information about the cfe-commits mailing list