[llvm] [llvm][mustache] Use single pass when tokenizing (PR #159196)
Erick Velez via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 13:51:25 PDT 2025
================
@@ -368,141 +368,101 @@ struct Tag {
llvm_unreachable("Unknown json::Value::Kind");
}
-static Tag findNextTag(StringRef Template, size_t StartPos, StringRef Open,
- StringRef Close) {
- const StringLiteral TripleOpen("{{{");
- const StringLiteral TripleClose("}}}");
-
- size_t NormalOpenPos = Template.find(Open, StartPos);
- size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
-
- Tag Result;
-
- // Determine which tag comes first.
- if (TripleOpenPos != StringRef::npos &&
- (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
- // Found a triple mustache tag.
- size_t EndPos =
- Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
- if (EndPos == StringRef::npos)
- return Result; // No closing tag found.
-
- Result.TagKind = Tag::Kind::Triple;
- Result.StartPosition = TripleOpenPos;
- size_t ContentStart = TripleOpenPos + TripleOpen.size();
- Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
- Result.FullMatch = Template.substr(
- TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
- } else if (NormalOpenPos != StringRef::npos) {
- // Found a normal mustache tag.
- size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
- if (EndPos == StringRef::npos)
- return Result; // No closing tag found.
-
- Result.TagKind = Tag::Kind::Normal;
- Result.StartPosition = NormalOpenPos;
- size_t ContentStart = NormalOpenPos + Open.size();
- Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
- Result.FullMatch =
- Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
- }
-
- return Result;
-}
-
-static std::optional<std::pair<StringRef, StringRef>>
-processTag(const Tag &T, SmallVectorImpl<Token> &Tokens, MustacheContext &Ctx) {
- LLVM_DEBUG(dbgs() << "[Tag] " << T.FullMatch << ", Content: " << T.Content
- << ", Kind: " << tagKindToString(T.TagKind) << "\n");
- if (T.TagKind == Tag::Kind::Triple) {
- Tokens.emplace_back(T.FullMatch, Ctx.Saver.save("&" + T.Content), '&', Ctx);
- return std::nullopt;
- }
- StringRef Interpolated = T.Content;
- if (!Interpolated.trim().starts_with("=")) {
- char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
- Tokens.emplace_back(T.FullMatch, Interpolated, Front, Ctx);
- return std::nullopt;
- }
- Tokens.emplace_back(T.FullMatch, Interpolated, '=', Ctx);
- StringRef DelimSpec = Interpolated.trim();
- DelimSpec = DelimSpec.drop_front(1);
- DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
- DelimSpec = DelimSpec.trim();
-
- std::pair<StringRef, StringRef> Ret = DelimSpec.split(' ');
- LLVM_DEBUG(dbgs() << "[Set Delimiter] NewOpen: " << Ret.first
- << ", NewClose: " << Ret.second << "\n");
- return Ret;
-}
-
// Simple tokenizer that splits the template into tokens.
-// The mustache spec allows {{{ }}} to unescape variables,
-// but we don't support that here. An unescape variable
-// is represented only by {{& variable}}.
static SmallVector<Token> tokenize(StringRef Template, MustacheContext &Ctx) {
LLVM_DEBUG(dbgs() << "[Tokenize Template] \"" << Template << "\"\n");
SmallVector<Token> Tokens;
SmallString<8> Open("{{");
SmallString<8> Close("}}");
- size_t Start = 0;
+ size_t Cursor = 0;
+ size_t TextStart = 0;
+
+ const StringLiteral TripleOpen("{{{");
+ const StringLiteral TripleClose("}}}");
- while (Start < Template.size()) {
- LLVM_DEBUG(dbgs() << "[Tokenize Loop] Start:" << Start << ", Open:'" << Open
- << "', Close:'" << Close << "'\n");
- Tag T = findNextTag(Template, Start, Open, Close);
+ while (Cursor < Template.size()) {
+ StringRef TemplateSuffix = Template.substr(Cursor);
+ StringRef TagOpen, TagClose;
+ Tag::Kind Kind;
+
+ // Determine which tag we've encountered.
+ if (TemplateSuffix.starts_with(TripleOpen)) {
+ Kind = Tag::Kind::Triple;
+ TagOpen = TripleOpen;
+ TagClose = TripleClose;
+ } else if (TemplateSuffix.starts_with(Open)) {
+ Kind = Tag::Kind::Normal;
+ TagOpen = Open;
+ TagClose = Close;
+ } else {
+ // Not at a tag, continue scanning.
+ ++Cursor;
+ continue;
+ }
- if (T.TagKind == Tag::Kind::None) {
- // No more tags, the rest is text.
- Tokens.emplace_back(Template.substr(Start));
- break;
+ // Found a tag, first add the preceding text.
+ if (Cursor > TextStart) {
----------------
evelez7 wrote:
nit: I don't think this if needs braces.
https://github.com/llvm/llvm-project/pull/159196
More information about the llvm-commits
mailing list