================ @@ -0,0 +1,489 @@ +//===-- Mustache.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Mustache.h" +#include "llvm/Support/Error.h" +#include <sstream> + +using namespace llvm; +using namespace llvm::json; +using namespace llvm::mustache; + +SmallString<128> escapeString(StringRef Input, + DenseMap<char, StringRef> &Escape) { + SmallString<128> EscapedString(""); + for (char C : Input) { + if (Escape.find(C) != Escape.end()) + EscapedString += Escape[C]; + else + EscapedString += C; + } + return EscapedString; +} + +Accessor split(StringRef Str, char Delimiter) { + Accessor Tokens; + if (Str == ".") { + Tokens.emplace_back(Str); + return Tokens; + } + StringRef Ref(Str); + while (!Ref.empty()) { + StringRef Part; + std::tie(Part, Ref) = Ref.split(Delimiter); + Tokens.emplace_back(Part.trim()); + } + return Tokens; +} + +void addIndentation(llvm::SmallString<128> &PartialStr, + size_t IndentationSize) { + std::string Indent(IndentationSize, ' '); + llvm::SmallString<128> Result; + for (size_t I = 0; I < PartialStr.size(); ++I) { + Result.push_back(PartialStr[I]); + if (PartialStr[I] == '\n' && I < PartialStr.size() - 1) + Result.append(Indent); + } + PartialStr = Result; +} + +Token::Token(StringRef RawBody, StringRef InnerBody, char Identifier) + : RawBody(RawBody), TokenBody(InnerBody), Indentation(0) { + TokenType = getTokenType(Identifier); + if (TokenType == Type::Comment) + return; + + StringRef AccessorStr = + TokenType == Type::Variable ? InnerBody : InnerBody.substr(1); + + Accessor = split(AccessorStr.trim(), '.'); +} + +Token::Token(StringRef Str) + : TokenType(Type::Text), RawBody(Str), Accessor({}), TokenBody(Str), + Indentation(0) {} + +Token::Type Token::getTokenType(char Identifier) { + switch (Identifier) { + case '#': + return Type::SectionOpen; + case '/': + return Type::SectionClose; + case '^': + return Type::InvertSectionOpen; + case '!': + return Type::Comment; + case '>': + return Type::Partial; + case '&': + return Type::UnescapeVariable; + default: + return Type::Variable; + } +} + +// Function to check if there's no meaningful text behind +bool noTextBehind(size_t Idx, const SmallVector<Token, 0> &Tokens) { + if (Idx == 0 || Tokens[Idx - 1].getType() != Token::Type::Text) + return false; + const Token &PrevToken = Tokens[Idx - 1]; + StringRef TokenBody = PrevToken.getRawBody().rtrim(" \t\v\t"); + return TokenBody.ends_with("\n") || TokenBody.ends_with("\r\n") || + (TokenBody.empty() && Idx == 1); +} +// Function to check if there's no meaningful text ahead +bool noTextAhead(size_t Idx, const SmallVector<Token, 0> &Tokens) { + if (Idx >= Tokens.size() - 1 || + Tokens[Idx + 1].getType() != Token::Type::Text) + return false; + + const Token &NextToken = Tokens[Idx + 1]; + StringRef TokenBody = NextToken.getRawBody().ltrim(" "); + return TokenBody.starts_with("\r\n") || TokenBody.starts_with("\n"); +} + +// Simple tokenizer that splits the template into tokens +// the mustache spec allows {{{ }}} to unescape variables +// but we don't support that here unescape variable +// is represented only by {{& variable}} +SmallVector<Token, 0> tokenize(StringRef Template) { + SmallVector<Token, 0> Tokens; + StringRef Open("{{"); + StringRef Close("}}"); + std::size_t Start = 0; + std::size_t DelimiterStart = Template.find(Open); + if (DelimiterStart == StringRef::npos) { + Tokens.emplace_back(Template); + return Tokens; + } + while (DelimiterStart != StringRef::npos) { + if (DelimiterStart != Start) { + Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start)); + } + + size_t DelimiterEnd = Template.find(Close, DelimiterStart); + if (DelimiterEnd == StringRef::npos) { + break; + } + + // Extract the Interpolated variable without {{ and }} + size_t InterpolatedStart = DelimiterStart + Open.size(); + size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size(); + SmallString<128> Interpolated = + Template.substr(InterpolatedStart, InterpolatedEnd); + SmallString<128> RawBody; + RawBody += Open; + RawBody += Interpolated; + RawBody += Close; + + Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]); + Start = DelimiterEnd + Close.size(); + DelimiterStart = Template.find(Open, Start); + } + + if (Start < Template.size()) + Tokens.emplace_back(Template.substr(Start)); + + // fix up white spaces for + // open sections/inverted sections/close section/comment + // This loop attempts to find standalone tokens and tries to trim out + // the whitespace around them + // for example: + // if you have the template string + // "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3" + // The output would be + // "Line 1\n Line 2\n Line 3" + size_t LastIdx = Tokens.size() - 1; + for (size_t Idx = 0, End = Tokens.size(); Idx < End; ++Idx) { + Token &CurrentToken = Tokens[Idx]; + Token::Type CurrentType = CurrentToken.getType(); + // Check if token type requires cleanup + bool RequiresCleanUp = (CurrentType == Token::Type::SectionOpen || + CurrentType == Token::Type::InvertSectionOpen || + CurrentType == Token::Type::SectionClose || + CurrentType == Token::Type::Comment || + CurrentType == Token::Type::Partial); + + if (!RequiresCleanUp) + continue; + + bool NoTextBehind = noTextBehind(Idx, Tokens); + bool NoTextAhead = noTextAhead(Idx, Tokens); + + // Adjust next token body if no text ahead + if ((NoTextBehind && NoTextAhead) || (NoTextAhead && Idx == 0)) { + Token &NextToken = Tokens[Idx + 1]; + StringRef NextTokenBody = NextToken.getTokenBody(); + if (NextTokenBody.starts_with("\r\n")) { + NextToken.setTokenBody(NextTokenBody.substr(2)); + } else if (NextTokenBody.starts_with("\n")) { + NextToken.setTokenBody(NextTokenBody.substr(1)); + } ---------------- ilovepi wrote:
You could probably simplify this if by using `find('\n')`, and then making the call if the index <= 2. ```suggestion size_t Idx = NextTokenBody.find('\n'); if (Idx <= 2) { NextToken.setTokenBody(NextTokenBody.substr(Idx)); ``` https://github.com/llvm/llvm-project/pull/105893 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits