[clang-tools-extra] [llvm] [llvm] add support for mustache templating language (PR #105893)
Nicolas van Kempen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 05:28:47 PST 2025
================
@@ -0,0 +1,785 @@
+//===-- Mustache.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Mustache.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+using namespace llvm::json;
+using namespace llvm::mustache;
+
+namespace {
+
+static bool isFalsey(const Value &V) {
+ return V.getAsNull() || (V.getAsBoolean() && !V.getAsBoolean().value()) ||
+ (V.getAsArray() && V.getAsArray()->empty()) ||
+ (V.getAsObject() && V.getAsObject()->empty());
+}
+
+static Accessor splitMustacheString(StringRef Str) {
+ // We split the mustache string into an accessor.
+ // For example:
+ // "a.b.c" would be split into {"a", "b", "c"}
+ // We make an exception for a single dot which
+ // refers to the current context.
+ Accessor Tokens;
+ if (Str == ".") {
+ Tokens.emplace_back(Str);
+ return Tokens;
+ }
+ while (!Str.empty()) {
+ StringRef Part;
+ std::tie(Part, Str) = Str.split(".");
+ Tokens.emplace_back(Part.trim());
+ }
+ return Tokens;
+}
+
+} // namespace
+
+namespace llvm {
+namespace mustache {
+
+class Token {
+public:
+ enum class Type {
+ Text,
+ Variable,
+ Partial,
+ SectionOpen,
+ SectionClose,
+ InvertSectionOpen,
+ UnescapeVariable,
+ Comment,
+ };
+
+ Token(std::string Str);
+
+ Token(std::string RawBody, std::string TokenBody, char Identifier);
+
+ StringRef getTokenBody() const { return TokenBody; };
+
+ StringRef getRawBody() const { return RawBody; };
+
+ void setTokenBody(std::string NewBody) { TokenBody = std::move(NewBody); };
+
+ Accessor getAccessor() const { return Accessor; };
+
+ Type getType() const { return TokenType; };
+
+ void setIndentation(size_t NewIndentation) { Indentation = NewIndentation; };
+
+ size_t getIndentation() const { return Indentation; };
+
+ static Type getTokenType(char Identifier);
+
+private:
+ Type TokenType;
+ // RawBody is the original string that was tokenized.
+ std::string RawBody;
+ // TokenBody is the original string with the identifier removed.
+ std::string TokenBody;
+ Accessor Accessor;
+ size_t Indentation;
+};
+
+class ASTNode {
+public:
+ enum Type {
+ Root,
+ Text,
+ Partial,
+ Variable,
+ UnescapeVariable,
+ Section,
+ InvertSection,
+ };
+
+ ASTNode(llvm::BumpPtrAllocator &Alloc, llvm::StringMap<ASTNode *> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ llvm::DenseMap<char, std::string> &Escapes)
+ : Allocator(Alloc), Partials(Partials), Lambdas(Lambdas),
+ SectionLambdas(SectionLambdas), Escapes(Escapes), Ty(Type::Root),
+ ParentContext(nullptr) {};
+
+ ASTNode(llvm::StringRef Body, ASTNode *Parent, llvm::BumpPtrAllocator &Alloc,
+ llvm::StringMap<ASTNode *> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ llvm::DenseMap<char, std::string> &Escapes)
+ : Allocator(Alloc), Partials(Partials), Lambdas(Lambdas),
+ SectionLambdas(SectionLambdas), Escapes(Escapes), Ty(Type::Text),
+ Body(Body.str()), Parent(Parent), ParentContext(nullptr) {}
+
+ // Constructor for Section/InvertSection/Variable/UnescapeVariable Nodes
+ ASTNode(Type Ty, Accessor Accessor, ASTNode *Parent,
+ llvm::BumpPtrAllocator &Alloc, llvm::StringMap<ASTNode *> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ llvm::DenseMap<char, std::string> &Escapes)
+ : Allocator(Alloc), Partials(Partials), Lambdas(Lambdas),
+ SectionLambdas(SectionLambdas), Escapes(Escapes), Ty(Ty),
+ Accessor(Accessor), Parent(Parent), ParentContext(nullptr) {}
+
+ void addChild(ASTNode *Child) { Children.emplace_back(Child); };
+
+ void setRawBody(std::string NewBody) { RawBody = std::move(NewBody); };
+
+ void setIndentation(size_t NewIndentation) { Indentation = NewIndentation; };
+
+ void render(const llvm::json::Value &Data, llvm::raw_ostream &OS);
+
+private:
+ void renderLambdas(const llvm::json::Value &Contexts, llvm::raw_ostream &OS,
+ Lambda &L);
+
+ void renderSectionLambdas(const llvm::json::Value &Contexts,
+ llvm::raw_ostream &OS, SectionLambda &L);
+
+ void renderPartial(const llvm::json::Value &Contexts, llvm::raw_ostream &OS,
+ ASTNode *Partial);
+
+ void renderChild(const llvm::json::Value &Context, llvm::raw_ostream &OS);
+
+ const llvm::json::Value *findContext();
+
+ llvm::BumpPtrAllocator &Allocator;
+ StringMap<ASTNode *> &Partials;
+ StringMap<Lambda> &Lambdas;
+ StringMap<SectionLambda> &SectionLambdas;
+ DenseMap<char, std::string> &Escapes;
+ Type Ty;
+ size_t Indentation = 0;
+ std::string RawBody;
+ std::string Body;
+ ASTNode *Parent;
+ // TODO: switch implementation to SmallVector<T>
+ std::vector<ASTNode *> Children;
+ const Accessor Accessor;
+ const llvm::json::Value *ParentContext;
+};
+
+// syntax wrapper for arena allocator for ASTNodes
+
+auto CreateRootNode =
+ [](void *Node, llvm::BumpPtrAllocator &Alloc,
+ llvm::StringMap<ASTNode *> &Partials, llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ llvm::DenseMap<char, std::string> &Escapes) -> ASTNode * {
+ return new (Node) ASTNode(Alloc, Partials, Lambdas, SectionLambdas, Escapes);
+};
+
+auto CreateNode = [](void *Node, ASTNode::Type T, Accessor A, ASTNode *Parent,
+ llvm::BumpPtrAllocator &Alloc,
+ llvm::StringMap<ASTNode *> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ llvm::DenseMap<char, std::string> &Escapes) -> ASTNode * {
+ return new (Node)
+ ASTNode(T, A, Parent, Alloc, Partials, Lambdas, SectionLambdas, Escapes);
+};
+
+auto CreateTextNode =
+ [](void *Node, StringRef Body, ASTNode *Parent,
+ llvm::BumpPtrAllocator &Alloc, llvm::StringMap<ASTNode *> &Partials,
+ llvm::StringMap<Lambda> &Lambdas,
+ llvm::StringMap<SectionLambda> &SectionLambdas,
+ llvm::DenseMap<char, std::string> &Escapes) -> ASTNode * {
+ return new (Node)
+ ASTNode(Body, Parent, Alloc, Partials, Lambdas, SectionLambdas, Escapes);
+};
+
+// Function to check if there is meaningful text behind.
+// We determine if a token has meaningful text behind
+// if the right of previous token contains anything that is
+// not a newline.
+// For example:
+// "Stuff {{#Section}}" (returns true)
+// vs
+// "{{#Section}} \n" (returns false)
+// We make an exception for when previous token is empty
+// and the current token is the second token.
+// For example:
+// "{{#Section}}"
+bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
+ if (Idx == 0)
+ return true;
+
+ int PrevIdx = Idx - 1;
----------------
nicovank wrote:
```suggestion
const size_t PrevIdx = Idx - 1;
```
https://github.com/llvm/llvm-project/pull/105893
More information about the llvm-commits
mailing list