[clang-tools-extra] [llvm] [llvm] add support for mustache templating language (PR #105893)

via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 6 14:44:52 PDT 2024


================
@@ -0,0 +1,276 @@
+//===-- Mustache.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Mustache.h"
+#include "llvm/Support/Error.h"
+#include <iostream>
+#include <regex>
+#include <sstream>
+
+using namespace llvm;
+using namespace llvm::json;
+using namespace llvm::mustache;
+
+std::string escapeHtml(const std::string &Input) {
+  DenseMap<char, std::string> HtmlEntities = {{'&', "&"},
+                                              {'<', "<"},
+                                              {'>', ">"},
+                                              {'"', """},
+                                              {'"', "'"}};
+  std::string EscapedString;
+  EscapedString.reserve(Input.size());
+
+  for (char C : Input) {
+    if (HtmlEntities.find(C) != HtmlEntities.end()) {
+      EscapedString += HtmlEntities[C];
+    } else {
+      EscapedString += C;
+    }
+  }
+
+  return EscapedString;
+}
+
+std::vector<std::string> split(const std::string &Str, char Delimiter) {
+  std::vector<std::string> Tokens;
+  std::string Token;
+  std::stringstream SS(Str);
+  if (Str == ".") {
+    Tokens.push_back(Str);
+    return Tokens;
+  }
+  while (std::getline(SS, Token, Delimiter)) {
+    Tokens.push_back(Token);
+  }
+  return Tokens;
+}
+
+Token::Token(std::string Str, char Identifier) {
+  switch (Identifier) {
+  case '#':
+    TokenType = Type::SectionOpen;
+    break;
+  case '/':
+    TokenType = Type::SectionClose;
+    break;
+  case '^':
+    TokenType = Type::InvertSectionOpen;
+    break;
+  case '!':
+    TokenType = Type::Comment;
+    break;
+  case '>':
+    TokenType = Type::Partial;
+    break;
+  case '&':
+    TokenType = Type::UnescapeVariable;
+    break;
+  default:
+    TokenType = Type::Variable;
+  }
+  if (TokenType == Type::Comment)
+    return;
+
+  TokenBody = Str;
+  std::string AccessorStr = Str;
+  if (TokenType != Type::Variable) {
+    AccessorStr = Str.substr(1);
+  }
+  Accessor = split(StringRef(AccessorStr).trim().str(), '.');
+}
+
+Token::Token(std::string Str)
+    : TokenType(Type::Text), TokenBody(Str), Accessor({}) {}
+
+std::vector<Token> tokenize(std::string Template) {
+  std::vector<Token> Tokens;
+  std::regex Re(R"(\{\{(.*?)\}\})");
+  std::sregex_token_iterator Iter(Template.begin(), Template.end(), Re,
+                                  {-1, 0});
+  std::sregex_token_iterator End;
+
+  for (; Iter != End; ++Iter) {
+    if (!Iter->str().empty()) {
+      std::string Token = *Iter;
+      std::smatch Match;
+      if (std::regex_match(Token, Match, Re)) {
+        std::string Group = Match[1];
+        Tokens.emplace_back(Group, Group[0]);
+      } else {
+        Tokens.emplace_back(Token);
+      }
+    }
+  }
+
+  return Tokens;
+}
+
+class Parser {
+public:
+  Parser(std::string TemplateStr) : TemplateStr(TemplateStr) {}
+
+  std::shared_ptr<ASTNode> parse();
+
+private:
+  void parseMustache(std::shared_ptr<ASTNode> Parent);
+
+  std::vector<Token> Tokens;
+  std::size_t CurrentPtr;
+  std::string TemplateStr;
+};
+
+std::shared_ptr<ASTNode> Parser::parse() {
+  Tokens = tokenize(TemplateStr);
+  CurrentPtr = 0;
+  std::shared_ptr<ASTNode> Root = std::make_shared<ASTNode>();
----------------
PeterChou1 wrote:

the problem is that the child nodes need references to the parent so I'm not sure if we could use unique_ptr

https://github.com/llvm/llvm-project/pull/105893


More information about the cfe-commits mailing list