================
@@ -0,0 +1,261 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Standalone Markdown parsing library for the LLVM ecosystem.
+///
+/// The parser takes a single paragraph of plain text and returns a list of
+/// nodes describing the Markdown it found. Each kind of construct has its own
+/// node type, and every node shares a common MDNode base, so you can use
+/// llvm::isa<>/cast<>/dyn_cast<> to check what a node is.
+///
+/// Inline nodes (appear inside ParagraphNode, HeadingNode, etc.):
+///   TextNode       -- plain text run
+///   SoftBreakNode  -- soft line break
+///   HardBreakNode  -- hard line break (trailing spaces or backslash)
+///   InlineCodeNode -- inline code span (`code`)
+///   EmphasisNode   -- emphasis (*text* or _text_)
+///   StrongNode     -- strong emphasis (**text** or __text__)
+///
+/// Block nodes:
+///   ParagraphNode     -- sequence of inline nodes
+///   HeadingNode       -- ATX heading (# through ######), level 1-6
+///   FencedCodeNode    -- fenced code block (``` or ~~~)
+///   TableNode         -- pipe table (raw row text; TODO: structured cells)
+///   UnorderedListNode -- bullet list (-, *, +)
+///   OrderedListNode   -- numbered list with explicit start number
+///   ListItemNode      -- single item inside a list
+///   BlockQuoteNode    -- block quote (>)
+///   ThematicBreakNode -- horizontal rule (---, ***, ___)
+///
+/// All nodes are arena-allocated. The caller owns the arena and must keep it
+/// alive for the lifetime of any returned nodes. The parser never crashes on
+/// malformed input; unrecognized text falls back to TextNode.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang::doc::markdown {
+
+/// Discriminator for all Markdown AST nodes. Inline kinds are grouped before
+/// block kinds so that the sentinels NK_LastInline and NK_FirstBlock enable
+/// cheap range-based checks in classof() implementations.
+enum class NodeKind {
+  // Inline nodes
+  NK_Text,
+  NK_SoftBreak,
+  NK_HardBreak,
+  NK_InlineCode,
+  NK_Emphasis,
+  NK_Strong,
+  NK_LastInline = NK_Strong, // sentinel -- all inline kinds are <= this
+
+  // Block nodes
+  NK_Paragraph,
+  NK_Heading,
+  NK_FencedCode,
+  NK_Table,
+  NK_UnorderedList,
+  NK_OrderedList,
+  NK_ListItem,
+  NK_BlockQuote,
+  NK_ThematicBreak,
+  NK_FirstBlock = NK_Paragraph, // sentinel -- all block kinds are >= this
+};
+
+/// Base type for all Markdown AST nodes. Carries only the kind discriminator.
+/// Nodes are arena-allocated and have no virtual destructor; use
+/// llvm::isa<>/cast<>/dyn_cast<> for type-safe downcasting.
+struct MDNode {
+  NodeKind Kind;
+  explicit MDNode(NodeKind K) : Kind(K) {}
+};
+
+//===----------------------------------------------------------------------===//
+// Inline nodes
+//===----------------------------------------------------------------------===//
+
+/// Plain text run.
+struct TextNode : MDNode {
+  llvm::StringRef Text;
+  explicit TextNode(llvm::StringRef Text)
+      : MDNode(NodeKind::NK_Text), Text(Text) {}
+  static bool classof(const MDNode *N) { return N->Kind == NodeKind::NK_Text; }
+};
+
+/// Soft line break -- a newline that does not end the paragraph.
+struct SoftBreakNode : MDNode {
+  SoftBreakNode() : MDNode(NodeKind::NK_SoftBreak) {}
+  static bool classof(const MDNode *N) {
+    return N->Kind == NodeKind::NK_SoftBreak;
+  }
+};
+
+/// Hard line break -- two trailing spaces or a backslash before a newline.
+struct HardBreakNode : MDNode {
+  HardBreakNode() : MDNode(NodeKind::NK_HardBreak) {}
+  static bool classof(const MDNode *N) {
+    return N->Kind == NodeKind::NK_HardBreak;
+  }
+};
+
+/// Inline code span: `code`. Code does not include the surrounding backticks.
+struct InlineCodeNode : MDNode {
+  llvm::StringRef Code;
+  explicit InlineCodeNode(llvm::StringRef Code)
+      : MDNode(NodeKind::NK_InlineCode), Code(Code) {}
+  static bool classof(const MDNode *N) {
+    return N->Kind == NodeKind::NK_InlineCode;
+  }
+};
+
+/// Emphasized text: *text* or _text_.
+struct EmphasisNode : MDNode {
+  llvm::ArrayRef<MDNode *> Children;
+  explicit EmphasisNode(llvm::ArrayRef<MDNode *> Children)
+      : MDNode(NodeKind::NK_Emphasis), Children(Children) {}
+  static bool classof(const MDNode *N) {
+    return N->Kind == NodeKind::NK_Emphasis;
+  }
+};
+
+/// Strongly emphasized text: **text** or __text__.
+struct StrongNode : MDNode {
+  llvm::ArrayRef<MDNode *> Children;
+  explicit StrongNode(llvm::ArrayRef<MDNode *> Children)
+      : MDNode(NodeKind::NK_Strong), Children(Children) {}
+  static bool classof(const MDNode *N) {
+    return N->Kind == NodeKind::NK_Strong;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Block nodes
+//===----------------------------------------------------------------------===//
+
+/// A paragraph -- sequence of inline nodes separated from other blocks by
+/// blank lines.
+struct ParagraphNode : MDNode {
----------------
Neil-N4 wrote:

Will start wrapping inline content in ParagraphNodes this week

https://github.com/llvm/llvm-project/pull/202991
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to