[llvm] 1fcf481 - [llvm][mustache] Support setting delimiters in templates (#159187)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 29 17:00:05 PDT 2025
Author: Paul Kirth
Date: 2025-09-29T17:00:00-07:00
New Revision: 1fcf481631b5521732d99963bcb69a81d19e9f79
URL: https://github.com/llvm/llvm-project/commit/1fcf481631b5521732d99963bcb69a81d19e9f79
DIFF: https://github.com/llvm/llvm-project/commit/1fcf481631b5521732d99963bcb69a81d19e9f79.diff
LOG: [llvm][mustache] Support setting delimiters in templates (#159187)
The base mustache spec allows setting custom delimiters, which slightly
change parsing of partials. This patch implements that feature by adding
a new token type, and changing the tokenizer's behavior to allow setting
custom delimiters.
Added:
Modified:
llvm/lib/Support/Mustache.cpp
llvm/unittests/Support/MustacheTest.cpp
llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 8da6fdb7beff9..9834bf6f9361f 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -7,9 +7,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Mustache.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
#include <sstream>
+#define DEBUG_TYPE "mustache"
+
using namespace llvm;
using namespace llvm::mustache;
@@ -62,6 +66,7 @@ class Token {
InvertSectionOpen,
UnescapeVariable,
Comment,
+ SetDelimiter,
};
Token(std::string Str)
@@ -102,6 +107,8 @@ class Token {
return Type::Partial;
case '&':
return Type::UnescapeVariable;
+ case '=':
+ return Type::SetDelimiter;
default:
return Type::Variable;
}
@@ -189,27 +196,27 @@ class ASTNode {
};
// A wrapper for arena allocator for ASTNodes
-AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
- llvm::StringMap<Lambda> &Lambdas,
- llvm::StringMap<SectionLambda> &SectionLambdas,
- EscapeMap &Escapes) {
+static AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ EscapeMap &Escapes) {
return std::make_unique<ASTNode>(Partials, Lambdas, SectionLambdas, Escapes);
}
-AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
- llvm::StringMap<AstPtr> &Partials,
- llvm::StringMap<Lambda> &Lambdas,
- llvm::StringMap<SectionLambda> &SectionLambdas,
- EscapeMap &Escapes) {
+static AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
+ llvm::StringMap<AstPtr> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ EscapeMap &Escapes) {
return std::make_unique<ASTNode>(T, std::move(A), Parent, Partials, Lambdas,
SectionLambdas, Escapes);
}
-AstPtr createTextNode(std::string Body, ASTNode *Parent,
- llvm::StringMap<AstPtr> &Partials,
- llvm::StringMap<Lambda> &Lambdas,
- llvm::StringMap<SectionLambda> &SectionLambdas,
- EscapeMap &Escapes) {
+static AstPtr createTextNode(std::string Body, ASTNode *Parent,
+ llvm::StringMap<AstPtr> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ EscapeMap &Escapes) {
return std::make_unique<ASTNode>(std::move(Body), Parent, Partials, Lambdas,
SectionLambdas, Escapes);
}
@@ -226,7 +233,7 @@ AstPtr createTextNode(std::string Body, ASTNode *Parent,
// and the current token is the second token.
// For example:
// "{{#Section}}"
-bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
+static bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
if (Idx == 0)
return true;
@@ -242,7 +249,7 @@ bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
// Function to check if there's no meaningful text ahead.
// We determine if a token has text ahead if the left of previous
// token does not start with a newline.
-bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
+static bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
if (Idx >= Tokens.size() - 1)
return true;
@@ -255,11 +262,11 @@ bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
return !TokenBody.starts_with("\r\n") && !TokenBody.starts_with("\n");
}
-bool requiresCleanUp(Token::Type T) {
+static bool requiresCleanUp(Token::Type T) {
// We must clean up all the tokens that could contain child nodes.
return T == Token::Type::SectionOpen || T == Token::Type::InvertSectionOpen ||
T == Token::Type::SectionClose || T == Token::Type::Comment ||
- T == Token::Type::Partial;
+ T == Token::Type::Partial || T == Token::Type::SetDelimiter;
}
// Adjust next token body if there is no text ahead.
@@ -268,7 +275,7 @@ bool requiresCleanUp(Token::Type T) {
// "{{! Comment }} \nLine 2"
// would be considered as no text ahead and should be rendered as
// " Line 2"
-void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
+static void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
Token &NextToken = Tokens[Idx + 1];
StringRef NextTokenBody = NextToken.TokenBody;
// Cut off the leading newline which could be \n or \r\n.
@@ -294,57 +301,128 @@ void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
CurrentToken.setIndentation(Indentation);
}
+struct Tag {
+ enum class Kind {
+ None,
+ Normal, // {{...}}
+ Triple, // {{{...}}}
+ };
+
+ Kind TagKind = Kind::None;
+ StringRef Content; // The content between the delimiters.
+ StringRef FullMatch; // The entire tag, including delimiters.
+ size_t StartPosition = StringRef::npos;
+};
+
+static Tag findNextTag(StringRef Template, size_t StartPos, StringRef Open,
+ StringRef Close) {
+ const StringLiteral TripleOpen("{{{");
+ const StringLiteral TripleClose("}}}");
+
+ size_t NormalOpenPos = Template.find(Open, StartPos);
+ size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
+
+ Tag Result;
+
+ // Determine which tag comes first.
+ if (TripleOpenPos != StringRef::npos &&
+ (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
+ // Found a triple mustache tag.
+ size_t EndPos =
+ Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
+ if (EndPos == StringRef::npos)
+ return Result; // No closing tag found.
+
+ Result.TagKind = Tag::Kind::Triple;
+ Result.StartPosition = TripleOpenPos;
+ size_t ContentStart = TripleOpenPos + TripleOpen.size();
+ Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
+ Result.FullMatch = Template.substr(
+ TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
+ } else if (NormalOpenPos != StringRef::npos) {
+ // Found a normal mustache tag.
+ size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
+ if (EndPos == StringRef::npos)
+ return Result; // No closing tag found.
+
+ Result.TagKind = Tag::Kind::Normal;
+ Result.StartPosition = NormalOpenPos;
+ size_t ContentStart = NormalOpenPos + Open.size();
+ Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
+ Result.FullMatch =
+ Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
+ }
+
+ return Result;
+}
+
+static void processTag(const Tag &T, SmallVectorImpl<Token> &Tokens,
+ SmallString<8> &Open, SmallString<8> &Close) {
+ LLVM_DEBUG(dbgs() << " Found tag: \"" << T.FullMatch << "\", Content: \""
+ << T.Content << "\"\n");
+ if (T.TagKind == Tag::Kind::Triple) {
+ Tokens.emplace_back(T.FullMatch.str(), "&" + T.Content.str(), '&');
+ LLVM_DEBUG(dbgs() << " Created UnescapeVariable token.\n");
+ return;
+ }
+ StringRef Interpolated = T.Content;
+ std::string RawBody = T.FullMatch.str();
+ if (!Interpolated.trim().starts_with("=")) {
+ char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
+ Tokens.emplace_back(RawBody, Interpolated.str(), Front);
+ LLVM_DEBUG(dbgs() << " Created tag token of type '" << Front << "'\n");
+ return;
+ }
+ Tokens.emplace_back(RawBody, Interpolated.str(), '=');
+ StringRef DelimSpec = Interpolated.trim();
+ DelimSpec = DelimSpec.drop_front(1);
+ DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
+ DelimSpec = DelimSpec.trim();
+
+ auto [NewOpen, NewClose] = DelimSpec.split(' ');
+ Open = NewOpen;
+ Close = NewClose;
+
+ LLVM_DEBUG(dbgs() << " Found Set Delimiter tag. NewOpen='" << Open
+ << "', NewClose='" << Close << "'\n");
+}
+
// Simple tokenizer that splits the template into tokens.
// The mustache spec allows {{{ }}} to unescape variables,
// but we don't support that here. An unescape variable
// is represented only by {{& variable}}.
-SmallVector<Token> tokenize(StringRef Template) {
+static SmallVector<Token> tokenize(StringRef Template) {
+ LLVM_DEBUG(dbgs() << "Tokenizing template: \"" << Template << "\"\n");
SmallVector<Token> Tokens;
- StringLiteral Open("{{");
- StringLiteral Close("}}");
- StringLiteral TripleOpen("{{{");
- StringLiteral TripleClose("}}}");
+ SmallString<8> Open("{{");
+ SmallString<8> Close("}}");
size_t Start = 0;
- size_t DelimiterStart = Template.find(Open);
- if (DelimiterStart == StringRef::npos) {
- Tokens.emplace_back(Template.str());
- return Tokens;
- }
- while (DelimiterStart != StringRef::npos) {
- if (DelimiterStart != Start)
- Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str());
-
- if (Template.substr(DelimiterStart).starts_with(TripleOpen)) {
- size_t DelimiterEnd = Template.find(TripleClose, DelimiterStart);
- if (DelimiterEnd == StringRef::npos)
- break;
- size_t BodyStart = DelimiterStart + TripleOpen.size();
- std::string Body =
- Template.substr(BodyStart, DelimiterEnd - BodyStart).str();
- std::string RawBody =
- Template.substr(DelimiterStart, DelimiterEnd - DelimiterStart + 3)
- .str();
- Tokens.emplace_back(RawBody, "&" + Body, '&');
- Start = DelimiterEnd + TripleClose.size();
- } else {
- size_t DelimiterEnd = Template.find(Close, DelimiterStart);
- if (DelimiterEnd == StringRef::npos)
- break;
-
- // Extract the Interpolated variable without delimiters.
- size_t InterpolatedStart = DelimiterStart + Open.size();
- size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size();
- std::string Interpolated =
- Template.substr(InterpolatedStart, InterpolatedEnd).str();
- std::string RawBody = Open.str() + Interpolated + Close.str();
- Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]);
- Start = DelimiterEnd + Close.size();
+
+ while (Start < Template.size()) {
+ LLVM_DEBUG(dbgs() << "Loop start. Start=" << Start << ", Open='" << Open
+ << "', Close='" << Close << "'\n");
+ Tag T = findNextTag(Template, Start, Open, Close);
+
+ if (T.TagKind == Tag::Kind::None) {
+ // No more tags, the rest is text.
+ Tokens.emplace_back(Template.substr(Start).str());
+ LLVM_DEBUG(dbgs() << " No more tags. Created final Text token: \""
+ << Template.substr(Start) << "\"\n");
+ break;
+ }
+
+ // Add the text before the tag.
+ if (T.StartPosition > Start) {
+ StringRef Text = Template.substr(Start, T.StartPosition - Start);
+ Tokens.emplace_back(Text.str());
+ LLVM_DEBUG(dbgs() << " Created Text token: \"" << Text << "\"\n");
}
- DelimiterStart = Template.find(Open, Start);
- }
- if (Start < Template.size())
- Tokens.emplace_back(Template.substr(Start).str());
+ processTag(T, Tokens, Open, Close);
+
+ // Move past the tag.
+ Start = T.StartPosition + T.FullMatch.size();
+ }
// Fix up white spaces for:
// - open sections
@@ -386,6 +464,7 @@ SmallVector<Token> tokenize(StringRef Template) {
if ((!HasTextBehind && !HasTextAhead) || (!HasTextBehind && Idx == LastIdx))
stripTokenBefore(Tokens, Idx, CurrentToken, CurrentType);
}
+ LLVM_DEBUG(dbgs() << "Tokenizing finished.\n");
return Tokens;
}
@@ -563,13 +642,14 @@ void Parser::parseMustache(ASTNode *Parent, llvm::StringMap<AstPtr> &Partials,
break;
}
case Token::Type::Comment:
+ case Token::Type::SetDelimiter:
break;
case Token::Type::SectionClose:
return;
}
}
}
-void toMustacheString(const json::Value &Data, raw_ostream &OS) {
+static void toMustacheString(const json::Value &Data, raw_ostream &OS) {
switch (Data.kind()) {
case json::Value::Null:
return;
@@ -602,6 +682,8 @@ void toMustacheString(const json::Value &Data, raw_ostream &OS) {
}
void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) {
+ if (Ty != Root && Ty != Text && AccessorValue.empty())
+ return;
// Set the parent context to the incoming context so that we
// can walk up the context tree correctly in findContext().
ParentContext = &CurrentCtx;
@@ -801,3 +883,5 @@ Template &Template::operator=(Template &&Other) noexcept {
return *this;
}
} // namespace llvm::mustache
+
+#undef DEBUG_TYPE
diff --git a/llvm/unittests/Support/MustacheTest.cpp b/llvm/unittests/Support/MustacheTest.cpp
index 0ebbc58e023cc..83f6e9afd1e71 100644
--- a/llvm/unittests/Support/MustacheTest.cpp
+++ b/llvm/unittests/Support/MustacheTest.cpp
@@ -1335,7 +1335,7 @@ TEST(MustacheDelimiters, PairBehavior) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("(Hey!)", Out);
+ EXPECT_EQ("(Hey!)", Out);
}
TEST(MustacheDelimiters, SpecialCharacters) {
@@ -1344,7 +1344,7 @@ TEST(MustacheDelimiters, SpecialCharacters) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("(It worked!)", Out);
+ EXPECT_EQ("(It worked!)", Out);
}
TEST(MustacheDelimiters, Sections) {
@@ -1355,7 +1355,7 @@ TEST(MustacheDelimiters, Sections) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ EXPECT_EQ("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
"interpolated.\n]\n",
Out);
}
@@ -1368,7 +1368,7 @@ TEST(MustacheDelimiters, InvertedSections) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ EXPECT_EQ("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
"interpolated.\n]\n",
Out);
}
@@ -1380,7 +1380,7 @@ TEST(MustacheDelimiters, PartialInheritence) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[ .yes. ]\n[ .yes. ]\n", Out);
+ EXPECT_EQ("[ .yes. ]\n[ .yes. ]\n", Out);
}
TEST(MustacheDelimiters, PostPartialBehavior) {
@@ -1390,7 +1390,7 @@ TEST(MustacheDelimiters, PostPartialBehavior) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
+ EXPECT_EQ("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
}
TEST(MustacheDelimiters, SurroundingWhitespace) {
@@ -1417,7 +1417,7 @@ TEST(MustacheDelimiters, StandaloneTag) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("Begin.\nEnd.\n", Out);
+ EXPECT_EQ("Begin.\nEnd.\n", Out);
}
TEST(MustacheDelimiters, IndentedStandaloneTag) {
@@ -1426,7 +1426,7 @@ TEST(MustacheDelimiters, IndentedStandaloneTag) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("Begin.\nEnd.\n", Out);
+ EXPECT_EQ("Begin.\nEnd.\n", Out);
}
TEST(MustacheDelimiters, StandaloneLineEndings) {
@@ -1435,7 +1435,7 @@ TEST(MustacheDelimiters, StandaloneLineEndings) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("|\r\n|", Out);
+ EXPECT_EQ("|\r\n|", Out);
}
TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
@@ -1444,7 +1444,7 @@ TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("=", Out);
+ EXPECT_EQ("=", Out);
}
TEST(MustacheDelimiters, StandaloneWithoutNewline) {
@@ -1453,7 +1453,7 @@ TEST(MustacheDelimiters, StandaloneWithoutNewline) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("=\n", Out);
+ EXPECT_EQ("=\n", Out);
}
TEST(MustacheDelimiters, PairwithPadding) {
diff --git a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
index ea1395b2646f6..bdcef376547fb 100644
--- a/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
+++ b/llvm/utils/llvm-test-mustache-spec/llvm-test-mustache-spec.cpp
@@ -54,20 +54,6 @@ static int NumXFail = 0;
static int NumSuccess = 0;
static const StringMap<StringSet<>> XFailTestNames = {{
- {"delimiters.json",
- {
- "Pair Behavior",
- "Special Characters",
- "Sections",
- "Inverted Sections",
- "Partial Inheritence",
- "Post-Partial Behavior",
- "Standalone Tag",
- "Indented Standalone Tag",
- "Standalone Line Endings",
- "Standalone Without Previous Line",
- "Standalone Without Newline",
- }},
{"~dynamic-names.json",
{
"Basic Behavior - Partial",
@@ -113,7 +99,6 @@ static const StringMap<StringSet<>> XFailTestNames = {{
"Block reindentation",
"Intrinsic indentation",
"Nested block reindentation",
-
}},
{"~lambdas.json",
{
@@ -126,7 +111,6 @@ static const StringMap<StringSet<>> XFailTestNames = {{
"Section - Expansion",
"Section - Alternate Delimiters",
"Section - Multiple Calls",
-
}},
{"partials.json", {"Standalone Indentation"}},
}};
More information about the llvm-commits
mailing list