================ @@ -0,0 +1,234 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang::doc::markdown { + +//===----------------------------------------------------------------------===// +// Inline node print/dump +//===----------------------------------------------------------------------===// + +LLVM_DUMP_METHOD void InlineNode::dump() const { print(llvm::errs()); } + +void TextNode::print(llvm::raw_ostream &OS) const { + OS << "TextNode: " << getText() << "\n"; +} + +void InlineCodeNode::print(llvm::raw_ostream &OS) const { + OS << "InlineCodeNode: " << getCode() << "\n"; +} + +void EmphasisNode::print(llvm::raw_ostream &OS) const { + OS << "EmphasisNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +void StrongNode::print(llvm::raw_ostream &OS) const { + OS << "StrongNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +//===----------------------------------------------------------------------===// +// Block node print/dump +//===----------------------------------------------------------------------===// + +LLVM_DUMP_METHOD void BlockNode::dump() const { print(llvm::errs()); } + +void ParagraphNode::print(llvm::raw_ostream &OS) const { + OS << "ParagraphNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +void HeadingNode::print(llvm::raw_ostream &OS) const { + OS << "HeadingNode: level=" << getLevel() << "\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +void FencedCodeNode::print(llvm::raw_ostream &OS) const { + OS << "FencedCodeNode: lang=" << getLang() << "\n" << getCode() << "\n"; +} + +void ListItemNode::print(llvm::raw_ostream &OS) const { + OS << "ListItemNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void ListItemNode::dump() const { print(llvm::errs()); } + +void UnorderedListNode::print(llvm::raw_ostream &OS) const { + OS << "UnorderedListNode\n"; + for (const auto &Item : Items) + Item.print(OS); +} + +void OrderedListNode::print(llvm::raw_ostream &OS) const { + OS << "OrderedListNode: start=" << getStart() << "\n"; + for (const auto &Item : Items) + Item.print(OS); +} + +void BlockQuoteNode::print(llvm::raw_ostream &OS) const { + OS << "BlockQuoteNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +void ThematicBreakNode::print(llvm::raw_ostream &OS) const { + OS << "ThematicBreakNode\n"; +} + +void DocumentNode::print(llvm::raw_ostream &OS) const { + OS << "DocumentNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +static bool isListMarker(llvm::StringRef Line) { + return Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ "); +} + +static bool isThematicBreak(llvm::StringRef Line) { + if (Line.empty()) + return false; + char Marker = Line[0]; + if (Marker != '-' && Marker != '*' && Marker != '_') + return false; + llvm::SmallString<8> Allowed; + Allowed += Marker; + Allowed += ' '; + if (Line.find_first_not_of(llvm::StringRef(Allowed)) != llvm::StringRef::npos) + return false; + return Line.count(Marker) >= 3; +} + +DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { + auto *Doc = Ctx.allocate<DocumentNode>(); + Ctx.setRoot(Doc); + + llvm::SmallVector<llvm::StringRef> Lines; + Text.split(Lines, '\n'); + + size_t I = 0; + while (I < Lines.size()) { + llvm::StringRef Line = Lines[I].trim(); + + if (Line.empty()) { + ++I; + continue; + } + + // Thematic break must come before list and fenced code checks since + // "---" and "- - -" would otherwise match those patterns first. + if (isThematicBreak(Line)) { + auto *Node = Ctx.allocate<ThematicBreakNode>(); + Doc->Children.push_back(*Node); + ++I; + continue; + } + + // Fenced code block + if (Line.starts_with("```") || Line.starts_with("~~~")) { + char Fence = Line[0]; + llvm::StringRef Lang = Line.drop_front(3).trim(); + ++I; + llvm::SmallString<256> Code; + while (I < Lines.size()) { + llvm::StringRef Trimmed = Lines[I].trim(); + if (Trimmed.size() >= 3 && Trimmed[0] == Fence && Trimmed[1] == Fence && + Trimmed[2] == Fence) { + ++I; + break; + } + if (!Code.empty()) + Code += '\n'; + Code += Lines[I]; + ++I; + } + auto *Node = Ctx.allocate<FencedCodeNode>(Lang, Ctx.internString(Code)); ---------------- Neil-N4 wrote:
Removed along with the parser https://github.com/llvm/llvm-project/pull/205609 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
