[clang-tools-extra] [llvm] [llvm] add support for mustache templating language (PR #105893)
Paul Kirth via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 6 17:14:20 PDT 2024
================
@@ -0,0 +1,446 @@
+//===-- Mustache.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Mustache.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::json;
+using namespace llvm::mustache;
+
+SmallString<128> escapeString(StringRef Input,
+ DenseMap<char, StringRef> &Escape) {
+ SmallString<128> EscapedString("");
+ for (char C : Input) {
+ if (Escape.find(C) != Escape.end())
+ EscapedString += Escape[C];
+ else
+ EscapedString += C;
+ }
+ return EscapedString;
+}
+
+std::vector<SmallString<128>> split(StringRef Str, char Delimiter) {
+ std::vector<SmallString<128>> Tokens;
+ if (Str == ".") {
+ Tokens.push_back(Str);
+ return Tokens;
+ }
+ StringRef Ref(Str);
+ while (!Ref.empty()) {
+ llvm::StringRef Part;
+ std::tie(Part, Ref) = Ref.split(Delimiter);
+ Tokens.push_back(Part.trim());
+ }
+ return Tokens;
+}
+
+Token::Token(StringRef RawBody, StringRef InnerBody, char Identifier)
+ : RawBody(RawBody), TokenBody(InnerBody) {
+
+ TokenType = getTokenType(Identifier);
+ if (TokenType == Type::Comment)
+ return;
+
+ StringRef AccessorStr = InnerBody;
+ if (TokenType != Type::Variable)
+ AccessorStr = InnerBody.substr(1);
+
+ Accessor = split(AccessorStr.trim(), '.');
+}
+
+Token::Token(StringRef Str)
+ : RawBody(Str), TokenType(Type::Text), TokenBody(Str), Accessor({}) {}
+
+Token::Type Token::getTokenType(char Identifier) {
+ switch (Identifier) {
+ case '#':
+ return Type::SectionOpen;
+ case '/':
+ return Type::SectionClose;
+ case '^':
+ return Type::InvertSectionOpen;
+ case '!':
+ return Type::Comment;
+ case '>':
+ return Type::Partial;
+ case '&':
+ return Type::UnescapeVariable;
+ default:
+ return Type::Variable;
+ }
+}
+
+std::vector<Token> tokenize(StringRef Template) {
+ // Simple tokenizer that splits the template into tokens
+ // the mustache spec allows {{{ }}} to unescape variables
+ // but we don't support that here unescape variable
+ // is represented only by {{& variable}}
+ std::vector<Token> Tokens;
+ SmallString<128> Open("{{");
+ SmallString<128> Close("}}");
+ std::size_t Start = 0;
+ std::size_t DelimiterStart = Template.find(Open);
+ if (DelimiterStart == StringRef::npos) {
+ Tokens.push_back(Token(Template));
+ return Tokens;
+ }
+ while (DelimiterStart != StringRef::npos) {
+ if (DelimiterStart != Start) {
+ Token TextToken = Token(Template.substr(Start, DelimiterStart - Start));
+ Tokens.push_back(TextToken);
+ }
+
+ std::size_t DelimiterEnd = Template.find(Close, DelimiterStart);
+ if (DelimiterEnd == StringRef::npos) {
+ break;
+ }
+
+ SmallString<128> Interpolated =
+ Template.substr(DelimiterStart + Open.size(),
+ DelimiterEnd - DelimiterStart - Close.size());
+ SmallString<128> RawBody;
+ RawBody += Open;
+ RawBody += Interpolated;
+ RawBody += Close;
+
+ Tokens.push_back(Token(RawBody, Interpolated, Interpolated[0]));
+ Start = DelimiterEnd + Close.size();
+ DelimiterStart = Template.find(Open, Start);
+ }
+
+ if (Start < Template.size())
+ Tokens.push_back(Token(Template.substr(Start)));
+
+ // fix up white spaces for
+ // open sections/inverted sections/close section/comment
+ for (std::size_t I = 0; I < Tokens.size(); I++) {
----------------
ilovepi wrote:
```suggestion
for (size_t Idx = 0, End = Tokens.size(); Idx < end; ++I) {
```
prefer pre-increment over post-increment. Avoid calling `size()` in the loop header, unless you're mutating `Tokens`. Avoid `I` as a variable name, unless its an `Instruction`. Loop indexes are one of the few place we allow lowercase variable names, like `i`.
https://github.com/llvm/llvm-project/pull/105893
More information about the cfe-commits
mailing list