r279727 - clang-format: [JS] nested and tagged template strings.
Martin Probst via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 25 03:13:22 PDT 2016
Author: mprobst
Date: Thu Aug 25 05:13:21 2016
New Revision: 279727
URL: http://llvm.org/viewvc/llvm-project?rev=279727&view=rev
Log:
clang-format: [JS] nested and tagged template strings.
JavaScript template strings can be nested arbitrarily:
foo = `text ${es.map(e => { return `<${e}>`; })} text`;
This change lexes nested template strings using a stack of lexer states to
correctly switch back to template string lexing on closing braces.
Also, reuse the same stack for the token-stashed logic.
Reviewers: djasper
Subscribers: cfe-commits, klimek
Differential Revision: https://reviews.llvm.org/D22431
Modified:
cfe/trunk/lib/Format/FormatTokenLexer.cpp
cfe/trunk/lib/Format/FormatTokenLexer.h
cfe/trunk/lib/Format/TokenAnnotator.cpp
cfe/trunk/unittests/Format/FormatTestJS.cpp
Modified: cfe/trunk/lib/Format/FormatTokenLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.cpp?rev=279727&r1=279726&r2=279727&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.cpp (original)
+++ cfe/trunk/lib/Format/FormatTokenLexer.cpp Thu Aug 25 05:13:21 2016
@@ -26,12 +26,11 @@ namespace format {
FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
const FormatStyle &Style,
encoding::Encoding Encoding)
- : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
- LessStashed(false), Column(0), TrailingWhitespace(0),
- SourceMgr(SourceMgr), ID(ID), Style(Style),
- IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
- Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
- MacroBlockBeginRegex(Style.MacroBlockBegin),
+ : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
+ Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+ Style(Style), IdentTable(getFormattingLangOpts(Style)),
+ Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
+ FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
MacroBlockEndRegex(Style.MacroBlockEnd) {
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style)));
@@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer
Tokens.push_back(getNextToken());
if (Style.Language == FormatStyle::LK_JavaScript) {
tryParseJSRegexLiteral();
- tryParseTemplateString();
+ handleTemplateStrings();
}
tryMergePreviousTokens();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
@@ -228,17 +227,42 @@ void FormatTokenLexer::tryParseJSRegexLi
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
-void FormatTokenLexer::tryParseTemplateString() {
+void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
- if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+
+ if (BacktickToken->is(tok::l_brace)) {
+ StateStack.push(LexerState::NORMAL);
return;
+ }
+ if (BacktickToken->is(tok::r_brace)) {
+ StateStack.pop();
+ if (StateStack.top() != LexerState::TEMPLATE_STRING)
+ return;
+ // If back in TEMPLATE_STRING, fallthrough and continue parsing the
+ } else if (BacktickToken->is(tok::unknown) &&
+ BacktickToken->TokenText == "`") {
+ StateStack.push(LexerState::TEMPLATE_STRING);
+ } else {
+ return; // Not actually a template
+ }
// 'Manually' lex ahead in the current file buffer.
const char *Offset = Lex->getBufferLocation();
const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
- for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
- if (*Offset == '\\')
+ for (; Offset != Lex->getBuffer().end(); ++Offset) {
+ if (Offset[0] == '`') {
+ StateStack.pop();
+ break;
+ }
+ if (Offset[0] == '\\') {
++Offset; // Skip the escaped character.
+ } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
+ Offset[1] == '{') {
+ // '${' introduces an expression interpolation in the template string.
+ StateStack.push(LexerState::NORMAL);
+ ++Offset;
+ break;
+ }
}
StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
@@ -262,7 +286,10 @@ void FormatTokenLexer::tryParseTemplateS
Style.TabWidth, Encoding);
}
- resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+ SourceLocation loc = Offset < Lex->getBuffer().end()
+ ? Lex->getSourceLocation(Offset + 1)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(loc));
}
bool FormatTokenLexer::tryMerge_TMacro() {
@@ -384,12 +411,8 @@ FormatToken *FormatTokenLexer::getStashe
}
FormatToken *FormatTokenLexer::getNextToken() {
- if (GreaterStashed) {
- GreaterStashed = false;
- return getStashedToken();
- }
- if (LessStashed) {
- LessStashed = false;
+ if (StateStack.top() == LexerState::TOKEN_STASHED) {
+ StateStack.pop();
return getStashedToken();
}
@@ -500,11 +523,11 @@ FormatToken *FormatTokenLexer::getNextTo
} else if (FormatTok->Tok.is(tok::greatergreater)) {
FormatTok->Tok.setKind(tok::greater);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
- GreaterStashed = true;
+ StateStack.push(LexerState::TOKEN_STASHED);
} else if (FormatTok->Tok.is(tok::lessless)) {
FormatTok->Tok.setKind(tok::less);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
- LessStashed = true;
+ StateStack.push(LexerState::TOKEN_STASHED);
}
// Now FormatTok is the next non-whitespace token.
Modified: cfe/trunk/lib/Format/FormatTokenLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.h?rev=279727&r1=279726&r2=279727&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.h (original)
+++ cfe/trunk/lib/Format/FormatTokenLexer.h Thu Aug 25 05:13:21 2016
@@ -23,9 +23,17 @@
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
+#include <stack>
+
namespace clang {
namespace format {
+enum LexerState {
+ NORMAL,
+ TEMPLATE_STRING,
+ TOKEN_STASHED,
+};
+
class FormatTokenLexer {
public:
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
@@ -53,7 +61,16 @@ private:
// its text if successful.
void tryParseJSRegexLiteral();
- void tryParseTemplateString();
+ // Handles JavaScript template strings.
+ //
+ // JavaScript template strings use backticks ('`') as delimiters, and allow
+ // embedding expressions nested in ${expr-here}. Template strings can be
+ // nested recursively, i.e. expressions can contain template strings in turn.
+ //
+ // The code below parses starting from a backtick, up to a closing backtick or
+ // an opening ${. It also maintains a stack of lexing contexts to handle
+ // nested template parts by balancing curly braces.
+ void handleTemplateStrings();
bool tryMerge_TMacro();
@@ -65,7 +82,7 @@ private:
FormatToken *FormatTok;
bool IsFirstToken;
- bool GreaterStashed, LessStashed;
+ std::stack<LexerState> StateStack;
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;
Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=279727&r1=279726&r2=279727&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Thu Aug 25 05:13:21 2016
@@ -858,7 +858,7 @@ private:
if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
TT_FunctionLBrace, TT_ImplicitStringLiteral,
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
- TT_RegexLiteral))
+ TT_RegexLiteral, TT_TemplateString))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
CurrentToken->MatchingParen = nullptr;
@@ -1816,6 +1816,9 @@ unsigned TokenAnnotator::splitPenalty(co
return 100;
if (Left.is(TT_JsTypeColon))
return 35;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return 100;
}
if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
@@ -2114,6 +2117,11 @@ bool TokenAnnotator::spaceRequiredBefore
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(TT_JsFatArrow))
return true;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return false;
+ if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+ return false;
if (Right.is(tok::star) &&
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
return false;
Modified: cfe/trunk/unittests/Format/FormatTestJS.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestJS.cpp?rev=279727&r1=279726&r2=279727&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestJS.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestJS.cpp Thu Aug 25 05:13:21 2016
@@ -1122,7 +1122,7 @@ TEST_F(FormatTestJS, ImportWrapping) {
TEST_F(FormatTestJS, TemplateStrings) {
// Keeps any whitespace/indentation within the template string.
verifyFormat("var x = `hello\n"
- " ${ name }\n"
+ " ${name}\n"
" !`;",
"var x = `hello\n"
" ${ name }\n"
@@ -1206,6 +1206,18 @@ TEST_F(FormatTestJS, TemplateStrings) {
"var y;",
"var x = ` \\` a`;\n"
"var y;");
+ // Escaped dollar.
+ verifyFormat("var x = ` \\${foo}`;\n");
+}
+
+TEST_F(FormatTestJS, NestedTemplateStrings) {
+ verifyFormat(
+ "var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;");
+ verifyFormat("var x = `he${({text: 'll'}.text)}o`;");
+}
+
+TEST_F(FormatTestJS, TaggedTemplateStrings) {
+ verifyFormat("var x = html`<ul>`;");
}
TEST_F(FormatTestJS, CastSyntax) {
More information about the cfe-commits
mailing list