[clang] 1995d44 - [clang-format] Enable FormatTokenSource to insert tokens.
Manuel Klimek via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 15 04:43:40 PST 2023
Author: Manuel Klimek
Date: 2023-02-15T12:39:24Z
New Revision: 1995d4424505cb5a1c3f0e5f851a660ec32d7af1
URL: https://github.com/llvm/llvm-project/commit/1995d4424505cb5a1c3f0e5f851a660ec32d7af1
DIFF: https://github.com/llvm/llvm-project/commit/1995d4424505cb5a1c3f0e5f851a660ec32d7af1.diff
LOG: [clang-format] Enable FormatTokenSource to insert tokens.
In preparation for configured macro replacements in formatting,
add the ability to insert tokens to FormatTokenSource, and implement
token insertion in IndexedTokenSource.
Differential Revision: https://reviews.llvm.org/D143070
Added:
Modified:
clang/lib/Format/FormatTokenSource.h
clang/lib/Format/UnwrappedLineParser.h
clang/unittests/Format/FormatTokenSourceTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/FormatTokenSource.h b/clang/lib/Format/FormatTokenSource.h
index 0bef45a7ff980..8bf7e6932c895 100644
--- a/clang/lib/Format/FormatTokenSource.h
+++ b/clang/lib/Format/FormatTokenSource.h
@@ -1,4 +1,3 @@
-
//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -8,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines the \c TokenSource interface, which provides a token
+/// This file defines the \c FormatTokenSource interface, which provides a token
/// stream as well as the ability to manipulate the token stream.
///
//===----------------------------------------------------------------------===//
@@ -18,12 +17,17 @@
#include "FormatToken.h"
#include "UnwrappedLineParser.h"
+#include "llvm/ADT/DenseMap.h"
#define DEBUG_TYPE "format-token-source"
namespace clang {
namespace format {
+// Navigate a token stream.
+//
+// Enables traversal of a token stream, resetting the position in a token
+// stream, as well as inserting new tokens.
class FormatTokenSource {
public:
virtual ~FormatTokenSource() {}
@@ -33,6 +37,9 @@ class FormatTokenSource {
// Returns the token preceding the token returned by the last call to
// getNextToken() in the token stream, or nullptr if no such token exists.
+ //
+ // Must not be called directly at the position directly after insertTokens()
+ // is called.
virtual FormatToken *getPreviousToken() = 0;
// Returns the token that would be returned by the next call to
@@ -45,14 +52,31 @@ class FormatTokenSource {
virtual bool isEOF() = 0;
// Gets the current position in the token stream, to be used by setPosition().
+ //
+ // Note that the value of the position is not meaningful, and specifically
+ // should not be used to get relative token positions.
virtual unsigned getPosition() = 0;
// Resets the token stream to the state it was in when getPosition() returned
// Position, and return the token at that position in the stream.
virtual FormatToken *setPosition(unsigned Position) = 0;
+
+ // Insert the given tokens before the current position.
+ // Returns the first token in \c Tokens.
+ // The next returned token will be the second token in \c Tokens.
+ // Requires the last token in Tokens to be EOF; once the EOF token is reached,
+ // the next token will be the last token returned by getNextToken();
+ //
+ // For example, given the token sequence 'a1 a2':
+ // getNextToken() -> a1
+ // insertTokens('b1 b2') -> b1
+ // getNextToken() -> b2
+ // getNextToken() -> a1
+ // getNextToken() -> a2
+ virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
};
-class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
+class IndexedTokenSource : public FormatTokenSource {
public:
IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
: Tokens(Tokens), Position(-1) {}
@@ -65,7 +89,7 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
});
return Tokens[Position];
}
- ++Position;
+ Position = successor(Position);
LLVM_DEBUG({
llvm::dbgs() << "Next ";
dbgToken(Position);
@@ -74,16 +98,17 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
}
FormatToken *getPreviousToken() override {
+ assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof));
return Position > 0 ? Tokens[Position - 1] : nullptr;
}
FormatToken *peekNextToken(bool SkipComment = false) override {
if (isEOF())
return Tokens[Position];
- int Next = Position + 1;
+ int Next = successor(Position);
if (SkipComment)
while (Tokens[Next]->is(tok::comment))
- ++Next;
+ Next = successor(Next);
LLVM_DEBUG({
llvm::dbgs() << "Peeking ";
dbgToken(Next);
@@ -107,9 +132,40 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
return Tokens[Position];
}
+ FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
+ assert(Position != -1);
+ assert((*New.rbegin())->Tok.is(tok::eof));
+ int Next = Tokens.size();
+ Tokens.append(New.begin(), New.end());
+ LLVM_DEBUG({
+ llvm::dbgs() << "Inserting:\n";
+ for (int I = Next, E = Tokens.size(); I != E; ++I)
+ dbgToken(I, " ");
+ llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> "
+ << Position << "\n";
+ });
+ Jumps[Tokens.size() - 1] = Position;
+ Position = Next;
+ LLVM_DEBUG({
+ llvm::dbgs() << "At inserted token ";
+ dbgToken(Position);
+ });
+ return Tokens[Position];
+ }
+
void reset() { Position = -1; }
private:
+ int successor(int Current) const {
+ int Next = Current + 1;
+ auto it = Jumps.find(Next);
+ if (it != Jumps.end()) {
+ Next = it->second;
+ assert(Jumps.find(Next) == Jumps.end());
+ }
+ return Next;
+ }
+
void dbgToken(int Position, llvm::StringRef Indent = "") {
FormatToken *Tok = Tokens[Position];
llvm::dbgs() << Indent << "[" << Position
@@ -117,8 +173,12 @@ class LLVM_GSL_POINTER IndexedTokenSource : public FormatTokenSource {
<< ", Macro: " << !!Tok->MacroCtx << "\n";
}
- ArrayRef<FormatToken *> Tokens;
+ SmallVector<FormatToken *> Tokens;
int Position;
+
+ // Maps from position a to position b, so that when we reach a, the token
+ // stream continues at position b instead.
+ llvm::DenseMap<int, int> Jumps;
};
class ScopedMacroState : public FormatTokenSource {
@@ -175,6 +235,10 @@ class ScopedMacroState : public FormatTokenSource {
return Token;
}
+ FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
+ assert(false && "Cannot insert tokens while parsing a macro.");
+ }
+
private:
bool eof() {
return Token && Token->HasUnescapedNewline &&
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index f043e567eb733..77277471f7e3c 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -280,9 +280,6 @@ class UnwrappedLineParser {
FormatTokenSource *Tokens;
UnwrappedLineConsumer &Callback;
- // FIXME: This is a temporary measure until we have reworked the ownership
- // of the format tokens. The goal is to have the actual tokens created and
- // owned outside of and handed into the UnwrappedLineParser.
ArrayRef<FormatToken *> AllTokens;
// Keeps a stack of the states of nested control statements (true if the
diff --git a/clang/unittests/Format/FormatTokenSourceTest.cpp b/clang/unittests/Format/FormatTokenSourceTest.cpp
index 3274643aa8688..74de93057df6a 100644
--- a/clang/unittests/Format/FormatTokenSourceTest.cpp
+++ b/clang/unittests/Format/FormatTokenSourceTest.cpp
@@ -28,12 +28,17 @@ class IndexedTokenSourceTest : public ::testing::Test {
#define EXPECT_TOKEN_KIND(FormatTok, Kind) \
do { \
FormatToken *Tok = FormatTok; \
- EXPECT_EQ((Tok)->Tok.getKind(), Kind) << *(Tok); \
+ EXPECT_EQ(Tok->Tok.getKind(), Kind) << *Tok; \
+ } while (false);
+#define EXPECT_TOKEN_ID(FormatTok, Name) \
+ do { \
+ FormatToken *Tok = FormatTok; \
+ EXPECT_EQ(Tok->Tok.getKind(), tok::identifier) << *Tok; \
+ EXPECT_EQ(Tok->TokenText, Name) << *Tok; \
} while (false);
TEST_F(IndexedTokenSourceTest, EmptyInput) {
- TokenList Tokens = lex("");
- IndexedTokenSource Source(Tokens);
+ IndexedTokenSource Source(lex(""));
EXPECT_FALSE(Source.isEOF());
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
EXPECT_TRUE(Source.isEOF());
@@ -46,8 +51,7 @@ TEST_F(IndexedTokenSourceTest, EmptyInput) {
}
TEST_F(IndexedTokenSourceTest, NavigateTokenStream) {
- TokenList Tokens = lex("int a;");
- IndexedTokenSource Source(Tokens);
+ IndexedTokenSource Source(lex("int a;"));
EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::kw_int);
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::kw_int);
EXPECT_EQ(Source.getPreviousToken(), nullptr);
@@ -60,11 +64,12 @@ TEST_F(IndexedTokenSourceTest, NavigateTokenStream) {
EXPECT_TOKEN_KIND(Source.peekNextToken(), tok::eof);
EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi);
+ EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
+ EXPECT_TOKEN_KIND(Source.getPreviousToken(), tok::semi);
}
TEST_F(IndexedTokenSourceTest, ResetPosition) {
- TokenList Tokens = lex("int a;");
- IndexedTokenSource Source(Tokens);
+ IndexedTokenSource Source(lex("int a;"));
Source.getNextToken();
unsigned Position = Source.getPosition();
Source.getNextToken();
@@ -73,6 +78,50 @@ TEST_F(IndexedTokenSourceTest, ResetPosition) {
EXPECT_TOKEN_KIND(Source.setPosition(Position), tok::kw_int);
}
+TEST_F(IndexedTokenSourceTest, InsertTokens) {
+ IndexedTokenSource Source(lex("A1 A2"));
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "B2");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A2");
+}
+
+TEST_F(IndexedTokenSourceTest, InsertTokensAtEOF) {
+ IndexedTokenSource Source(lex("A1"));
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("B1 B2")), "B1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "B2");
+ EXPECT_TOKEN_KIND(Source.getNextToken(), tok::eof);
+}
+
+TEST_F(IndexedTokenSourceTest, InsertTokensRecursive) {
+ IndexedTokenSource Source(lex("A1"));
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ // A1
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1");
+ // B1 A1
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1");
+ // C1 B1 A1
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1");
+ // D1 C1 B1 A1
+ EXPECT_TOKEN_ID(Source.getNextToken(), "C1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "B1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+}
+
+TEST_F(IndexedTokenSourceTest, InsertTokensRecursiveAtEndOfSequence) {
+ IndexedTokenSource Source(lex("A1"));
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("B1")), "B1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("C1")), "C1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+ EXPECT_TOKEN_ID(Source.insertTokens(lex("D1")), "D1");
+ EXPECT_TOKEN_ID(Source.getNextToken(), "A1");
+}
+
} // namespace
} // namespace format
} // namespace clang
More information about the cfe-commits
mailing list