[clang-tools-extra] [llvm] [llvm] add support for mustache templating language (PR #105893)
Paul Kirth via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 12 17:23:54 PDT 2024
================
@@ -0,0 +1,276 @@
+//===-- Mustache.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Mustache.h"
+#include "llvm/Support/Error.h"
+#include <iostream>
+#include <regex>
+#include <sstream>
+
+using namespace llvm;
+using namespace llvm::json;
+using namespace llvm::mustache;
+
+std::string escapeHtml(const std::string &Input) {
+ DenseMap<char, std::string> HtmlEntities = {{'&', "&"},
+ {'<', "<"},
+ {'>', ">"},
+ {'"', """},
+ {'"', "'"}};
+ std::string EscapedString;
+ EscapedString.reserve(Input.size());
+
+ for (char C : Input) {
+ if (HtmlEntities.find(C) != HtmlEntities.end()) {
+ EscapedString += HtmlEntities[C];
+ } else {
+ EscapedString += C;
+ }
+ }
+
+ return EscapedString;
+}
+
+std::vector<std::string> split(const std::string &Str, char Delimiter) {
+ std::vector<std::string> Tokens;
+ std::string Token;
+ std::stringstream SS(Str);
+ if (Str == ".") {
+ Tokens.push_back(Str);
+ return Tokens;
+ }
+ while (std::getline(SS, Token, Delimiter)) {
+ Tokens.push_back(Token);
+ }
+ return Tokens;
+}
+
+Token::Token(std::string Str, char Identifier) {
+ switch (Identifier) {
+ case '#':
+ TokenType = Type::SectionOpen;
+ break;
+ case '/':
+ TokenType = Type::SectionClose;
+ break;
+ case '^':
+ TokenType = Type::InvertSectionOpen;
+ break;
+ case '!':
+ TokenType = Type::Comment;
+ break;
+ case '>':
+ TokenType = Type::Partial;
+ break;
+ case '&':
+ TokenType = Type::UnescapeVariable;
+ break;
+ default:
+ TokenType = Type::Variable;
+ }
+ if (TokenType == Type::Comment)
+ return;
+
+ TokenBody = Str;
+ std::string AccessorStr = Str;
+ if (TokenType != Type::Variable) {
+ AccessorStr = Str.substr(1);
+ }
+ Accessor = split(StringRef(AccessorStr).trim().str(), '.');
+}
+
+Token::Token(std::string Str)
+ : TokenType(Type::Text), TokenBody(Str), Accessor({}) {}
+
+std::vector<Token> tokenize(std::string Template) {
+ std::vector<Token> Tokens;
+ std::regex Re(R"(\{\{(.*?)\}\})");
+ std::sregex_token_iterator Iter(Template.begin(), Template.end(), Re,
+ {-1, 0});
+ std::sregex_token_iterator End;
+
+ for (; Iter != End; ++Iter) {
+ if (!Iter->str().empty()) {
+ std::string Token = *Iter;
+ std::smatch Match;
+ if (std::regex_match(Token, Match, Re)) {
+ std::string Group = Match[1];
+ Tokens.emplace_back(Group, Group[0]);
+ } else {
+ Tokens.emplace_back(Token);
+ }
+ }
+ }
+
+ return Tokens;
+}
+
+class Parser {
+public:
+ Parser(std::string TemplateStr) : TemplateStr(TemplateStr) {}
+
+ std::shared_ptr<ASTNode> parse();
+
+private:
+ void parseMustache(std::shared_ptr<ASTNode> Parent);
+
+ std::vector<Token> Tokens;
+ std::size_t CurrentPtr;
+ std::string TemplateStr;
+};
+
+std::shared_ptr<ASTNode> Parser::parse() {
+ Tokens = tokenize(TemplateStr);
+ CurrentPtr = 0;
+ std::shared_ptr<ASTNode> Root = std::make_shared<ASTNode>();
----------------
ilovepi wrote:
Perhaps is better to intern the AST nodes on an arena? I’m not convinced we want to bake ownership into the graph like this. There’s also a lot of used data structures in ADT. Off the top of my head I’m not sure they completely fit this use case, but there are lots of primitives for build on and managing graphs IIRC.
https://github.com/llvm/llvm-project/pull/105893
More information about the cfe-commits
mailing list