================
@@ -0,0 +1,234 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Markdown.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang::doc::markdown {
+
+//===----------------------------------------------------------------------===//
+// Inline node print/dump
+//===----------------------------------------------------------------------===//
+
+LLVM_DUMP_METHOD void InlineNode::dump() const { print(llvm::errs()); }
+
+void TextNode::print(llvm::raw_ostream &OS) const {
+  OS << "TextNode: " << getText() << "\n";
+}
+
+void InlineCodeNode::print(llvm::raw_ostream &OS) const {
+  OS << "InlineCodeNode: " << getCode() << "\n";
+}
+
+void EmphasisNode::print(llvm::raw_ostream &OS) const {
+  OS << "EmphasisNode\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+void StrongNode::print(llvm::raw_ostream &OS) const {
+  OS << "StrongNode\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+//===----------------------------------------------------------------------===//
+// Block node print/dump
+//===----------------------------------------------------------------------===//
+
+LLVM_DUMP_METHOD void BlockNode::dump() const { print(llvm::errs()); }
+
+void ParagraphNode::print(llvm::raw_ostream &OS) const {
+  OS << "ParagraphNode\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+void HeadingNode::print(llvm::raw_ostream &OS) const {
+  OS << "HeadingNode: level=" << getLevel() << "\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+void FencedCodeNode::print(llvm::raw_ostream &OS) const {
+  OS << "FencedCodeNode: lang=" << getLang() << "\n" << getCode() << "\n";
+}
+
+void ListItemNode::print(llvm::raw_ostream &OS) const {
+  OS << "ListItemNode\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+LLVM_DUMP_METHOD void ListItemNode::dump() const { print(llvm::errs()); }
+
+void UnorderedListNode::print(llvm::raw_ostream &OS) const {
+  OS << "UnorderedListNode\n";
+  for (const auto &Item : Items)
+    Item.print(OS);
+}
+
+void OrderedListNode::print(llvm::raw_ostream &OS) const {
+  OS << "OrderedListNode: start=" << getStart() << "\n";
+  for (const auto &Item : Items)
+    Item.print(OS);
+}
+
+void BlockQuoteNode::print(llvm::raw_ostream &OS) const {
+  OS << "BlockQuoteNode\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+void ThematicBreakNode::print(llvm::raw_ostream &OS) const {
+  OS << "ThematicBreakNode\n";
+}
+
+void DocumentNode::print(llvm::raw_ostream &OS) const {
+  OS << "DocumentNode\n";
+  for (const auto &Child : Children)
+    Child.print(OS);
+}
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+static bool isListMarker(llvm::StringRef Line) {
+  return Line.starts_with("- ") || Line.starts_with("* ") ||
+         Line.starts_with("+ ");
+}
+
+static bool isThematicBreak(llvm::StringRef Line) {
+  if (Line.empty())
+    return false;
+  char Marker = Line[0];
+  if (Marker != '-' && Marker != '*' && Marker != '_')
+    return false;
+  llvm::SmallString<8> Allowed;
+  Allowed += Marker;
+  Allowed += ' ';
+  if (Line.find_first_not_of(llvm::StringRef(Allowed)) != 
llvm::StringRef::npos)
+    return false;
+  return Line.count(Marker) >= 3;
+}
+
+DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) {
+  auto *Doc = Ctx.allocate<DocumentNode>();
+  Ctx.setRoot(Doc);
+
+  llvm::SmallVector<llvm::StringRef> Lines;
+  Text.split(Lines, '\n');
+
+  size_t I = 0;
+  while (I < Lines.size()) {
+    llvm::StringRef Line = Lines[I].trim();
+
+    if (Line.empty()) {
+      ++I;
+      continue;
+    }
+
+    // Thematic break must come before list and fenced code checks since
+    // "---" and "- - -" would otherwise match those patterns first.
+    if (isThematicBreak(Line)) {
+      auto *Node = Ctx.allocate<ThematicBreakNode>();
+      Doc->Children.push_back(*Node);
+      ++I;
+      continue;
+    }
+
+    // Fenced code block
+    if (Line.starts_with("```") || Line.starts_with("~~~")) {
+      char Fence = Line[0];
+      llvm::StringRef Lang = Line.drop_front(3).trim();
+      ++I;
+      llvm::SmallString<256> Code;
+      while (I < Lines.size()) {
+        llvm::StringRef Trimmed = Lines[I].trim();
+        if (Trimmed.size() >= 3 && Trimmed[0] == Fence && Trimmed[1] == Fence 
&&
+            Trimmed[2] == Fence) {
+          ++I;
+          break;
+        }
+        if (!Code.empty())
+          Code += '\n';
+        Code += Lines[I];
+        ++I;
+      }
+      auto *Node = Ctx.allocate<FencedCodeNode>(Lang, Ctx.internString(Code));
----------------
ilovepi wrote:

I understand why you're allocating the Node, but why is it necessary to intern 
the String? Based on earlier discussions, you only need a read only reference 
to strings that outlive the AST context. You have that, from what I can tell, 
though it may need to be explained in the documentation. Generally, we don't 
want to be reallocating and copying substrings all over the place in 
`clang-doc` or `clang`.

```suggestion
      auto *Node = Ctx.allocate<FencedCodeNode>(Lang, Code);
```

https://github.com/llvm/llvm-project/pull/205609
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to