https://github.com/Neil-N4 updated 
https://github.com/llvm/llvm-project/pull/202991

>From b4623400ed04066b222882d46bbddf1819ffca9e Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Wed, 10 Jun 2026 09:51:48 -0400
Subject: [PATCH 1/3] [clang-doc] Add standalone Markdown parsing library

---
 .../clang-doc/support/CMakeLists.txt          |   3 +-
 .../clang-doc/support/Markdown.cpp            | 145 ++++++++++++++++++
 .../clang-doc/support/Markdown.h              |  72 +++++++++
 .../unittests/clang-doc/CMakeLists.txt        |   4 +-
 .../clang-doc/MarkdownParserTest.cpp          |  94 ++++++++++++
 5 files changed, 316 insertions(+), 2 deletions(-)
 create mode 100644 clang-tools-extra/clang-doc/support/Markdown.cpp
 create mode 100644 clang-tools-extra/clang-doc/support/Markdown.h
 create mode 100644 clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp

diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt 
b/clang-tools-extra/clang-doc/support/CMakeLists.txt
index 8ac913ffbe998..acff865190ff9 100644
--- a/clang-tools-extra/clang-doc/support/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt
@@ -6,5 +6,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_clang_library(clangDocSupport STATIC
   File.cpp
+  Markdown.cpp
   Utils.cpp
-  )
+  )
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp 
b/clang-tools-extra/clang-doc/support/Markdown.cpp
new file mode 100644
index 0000000000000..776150b939d27
--- /dev/null
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -0,0 +1,145 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Markdown.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLog.h"
+
+#define DEBUG_TYPE "clang-doc-markdown"
+
+using namespace llvm;
+
+namespace clang::doc::markdown {
+
+static MDNode makeText(StringRef S) {
+  return {NodeKind::NK_Text, S, {}};
+}
+
+// A line is a table separator if it only contains |, -, :, and spaces,
+// and has at least one -.
+static bool isSepRow(StringRef Line) {
+  return Line.contains('-') &&
+         Line.find_first_not_of("|-: ") == StringRef::npos;
+}
+
+// Returns true if Line begins with a bullet list marker (-, *, or +)
+// followed by a space.
+static bool isListItem(StringRef Line) {
+  return Line.starts_with("- ") || Line.starts_with("* ") ||
+         Line.starts_with("+ ");
+}
+
+static ArrayRef<MDNode> allocateNodes(const SmallVectorImpl<MDNode> &Nodes,
+                                      BumpPtrAllocator &Arena) {
+  if (Nodes.empty())
+    return {};
+  MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size());
+  std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated);
+  return ArrayRef<MDNode>(Allocated, Nodes.size());
+}
+
+ArrayRef<MDNode> parseMarkdown(StringRef ParagraphText,
+                               BumpPtrAllocator &Arena) {
+  if (ParagraphText.trim().empty())
+    return {};
+
+  SmallVector<StringRef, 16> Lines;
+  ParagraphText.split(Lines, '\n');
+
+  SmallVector<MDNode> Nodes;
+  size_t I = 0, E = Lines.size();
+
+  while (I < E) {
+    StringRef Line = Lines[I].trim();
+
+    if (Line.empty()) {
+      ++I;
+      continue;
+    }
+
+    // TODO: Follow CommonMark spec §4.5 more closely -- opening fences may be
+    // indented up to 3 spaces, the closing fence must use the same character
+    // and be at least as long as the opening fence, and the closing fence may
+    // only be followed by spaces. Doxygen specifics should be handled on a
+    // case-by-case basis.
+    if (Line.starts_with("```") || Line.starts_with("~~~")) {
+      char Fence = Line[0];
+      StringRef Lang = Line.drop_front(3).trim();
+      SmallVector<MDNode> CodeLines;
+      ++I;
+      while (I < E) {
+        StringRef CodeLine = Lines[I].trim();
+        if (CodeLine.size() >= 3 &&
+            all_of(CodeLine.take_front(3),
+                   [Fence](char C) { return C == Fence; }))
+          break;
+        CodeLines.push_back(makeText(Lines[I]));
+        ++I;
+      }
+      ++I; // skip closing fence
+      MDNode Code;
+      Code.Kind = NodeKind::NK_FencedCode;
+      Code.Content = Lang;
+      Code.Children = allocateNodes(CodeLines, Arena);
+      LDBG() << "emitting NK_FencedCode lang='" << Lang
+             << "' lines=" << CodeLines.size();
+      Nodes.push_back(Code);
+      continue;
+    }
+
+    // Pipe table: current line has | and next line is a separator row.
+    if (Line.contains('|') && I + 1 < E && isSepRow(Lines[I + 1].trim())) {
+      SmallVector<MDNode> Rows;
+      while (I < E && Lines[I].trim().contains('|')) {
+        Rows.push_back(makeText(Lines[I].trim()));
+        ++I;
+      }
+      MDNode Table;
+      Table.Kind = NodeKind::NK_Table;
+      Table.Content = {};
+      Table.Children = allocateNodes(Rows, Arena);
+      LDBG() << "emitting NK_Table rows=" << Rows.size();
+      Nodes.push_back(Table);
+      continue;
+    }
+
+    // Unordered list item.
+    if (isListItem(Line)) {
+      SmallVector<MDNode> Items;
+      while (I < E) {
+        StringRef L = Lines[I].trim();
+        if (!isListItem(L))
+          break;
+        MDNode Item;
+        Item.Kind = NodeKind::NK_ListItem;
+        Item.Content = L.drop_front(2).trim();
+        Item.Children = {};
+        Items.push_back(Item);
+        ++I;
+      }
+      MDNode List;
+      List.Kind = NodeKind::NK_UnorderedList;
+      List.Content = {};
+      List.Children = allocateNodes(Items, Arena);
+      LDBG() << "emitting NK_UnorderedList items=" << Items.size();
+      Nodes.push_back(List);
+      continue;
+    }
+
+    // Plain text fallback.
+    Nodes.push_back(makeText(Line));
+    ++I;
+  }
+
+  LDBG() << "parseMarkdown done nodes=" << Nodes.size();
+  return allocateNodes(Nodes, Arena);
+}
+
+} // namespace clang::doc::markdown
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h 
b/clang-tools-extra/clang-doc/support/Markdown.h
new file mode 100644
index 0000000000000..890f764f937b1
--- /dev/null
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines a standalone Markdown parsing library for the LLVM
+/// ecosystem. The parser takes plain text and returns a tree of typed nodes
+/// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+///
+/// This is a simple Markdown parser for use inside Clang-Doc's comment
+/// pipeline. You give it a paragraph of text and an arena allocator, and it
+/// gives back a list of typed nodes describing the Markdown structure it 
found.
+///
+/// The main entry point is parseMarkdown(). If the text has no Markdown in it,
+/// you get back an empty list and can fall back to plain-text output. If it
+/// does, you get a tree of MDNode structs where each node has a kind, optional
+/// content (like the language tag on a code fence), and optional children.
+///
+/// All nodes are allocated in the arena you pass in. You own the arena and are
+/// responsible for keeping it alive as long as you use the nodes.
+///
+/// The parser handles fenced code blocks, pipe tables, and unordered lists.
+/// Anything it does not recognize comes back as a plain text node. It will
+/// never crash on bad input.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang::doc::markdown {
+
+enum class NodeKind {
+  // Block nodes
+  NK_Paragraph,
+  NK_FencedCode,
+  NK_Table,
+  NK_UnorderedList,
+  NK_OrderedList,
+  NK_ListItem,
+  NK_ThematicBreak,
+  // Inline nodes
+  NK_Text,
+  NK_InlineCode,
+  NK_Emphasis,
+  NK_Strong,
+  NK_SoftBreak,
+};
+
+struct MDNode {
+  NodeKind Kind;
+  llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
+  llvm::ArrayRef<MDNode> Children; // arena allocated
+};
+
+/// Parses Markdown from a single comment paragraph's text.
+/// Returns an empty ArrayRef if no Markdown constructs are found,
+/// so generators can fall back to plain-text rendering at zero cost.
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+                                     llvm::BumpPtrAllocator &Arena);
+
+} // namespace clang::doc::markdown
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file
diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt 
b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
index 01b34ec9a791e..b74207ac88fa7 100644
--- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
@@ -26,6 +26,7 @@ add_extra_unittest(ClangDocTests
   ClangDocTest.cpp
   GeneratorTest.cpp
   HTMLGeneratorTest.cpp
+  MarkdownParserTest.cpp
   MDGeneratorTest.cpp
   MergeTest.cpp
   SerializeTest.cpp
@@ -49,5 +50,6 @@ clang_target_link_libraries(ClangDocTests
 target_link_libraries(ClangDocTests
   PRIVATE
   clangDoc
+  clangDocSupport
   LLVMTestingSupport
-  )
+  )
\ No newline at end of file
diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp 
b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
new file mode 100644
index 0000000000000..8df5efc7f1d5f
--- /dev/null
+++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/Markdown.h"
+#include "llvm/Support/Allocator.h"
+#include "gtest/gtest.h"
+
+using namespace clang::doc::markdown;
+
+namespace {
+
+TEST(MarkdownParserTest, EmptyInput) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("", Arena);
+  EXPECT_TRUE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, WhitespaceOnlyInput) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("   \n  \n", Arena);
+  EXPECT_TRUE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, PlainText) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("hello world", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text);
+  EXPECT_EQ(Nodes[0].Content, "hello world");
+}
+
+TEST(MarkdownParserTest, FencedCodeBlock) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
+  EXPECT_EQ(Nodes[0].Content, "cpp");
+  ASSERT_EQ(Nodes[0].Children.size(), 1u);
+}
+
+TEST(MarkdownParserTest, FencedCodeBlockNoLang) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("```\nsome code\n```", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
+  EXPECT_TRUE(Nodes[0].Content.empty());
+}
+
+TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena);
+  // Unterminated fence should not crash and should produce a code node
+  // with whatever lines were found.
+  EXPECT_FALSE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, PipeTable) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table);
+}
+
+TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("a | b\nc | d", Arena);
+  // No separator row so should not be parsed as a table
+  for (const auto &Node : Nodes)
+    EXPECT_NE(Node.Kind, NodeKind::NK_Table);
+}
+
+TEST(MarkdownParserTest, UnorderedList) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList);
+  ASSERT_EQ(Nodes[0].Children.size(), 3u);
+  EXPECT_EQ(Nodes[0].Children[0].Content, "foo");
+  EXPECT_EQ(Nodes[0].Children[1].Content, "bar");
+  EXPECT_EQ(Nodes[0].Children[2].Content, "baz");
+}
+
+TEST(MarkdownParserTest, MixedContent) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena);
+  EXPECT_EQ(Nodes.size(), 3u);
+}
+
+} // namespace
\ No newline at end of file

>From f4cb4a28630e0f91289bfd4416c59114c5654ff7 Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Wed, 10 Jun 2026 11:35:54 -0400
Subject: [PATCH 2/3] [clang-doc] Address review feedback: test fixture, raw
 strings, DEBUG_TYPE, EOF newlines

---
 .../clang-doc/support/Markdown.cpp            |  4 +-
 .../clang-doc/support/Markdown.h              |  2 +-
 .../clang-doc/MarkdownParserTest.cpp          | 97 +++++++++++--------
 3 files changed, 61 insertions(+), 42 deletions(-)

diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp 
b/clang-tools-extra/clang-doc/support/Markdown.cpp
index 776150b939d27..9e008abf8b08d 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -12,7 +12,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DebugLog.h"
 
-#define DEBUG_TYPE "clang-doc-markdown"
+#define DEBUG_TYPE "clang-doc"
 
 using namespace llvm;
 
@@ -142,4 +142,4 @@ ArrayRef<MDNode> parseMarkdown(StringRef ParagraphText,
   return allocateNodes(Nodes, Arena);
 }
 
-} // namespace clang::doc::markdown
\ No newline at end of file
+} // namespace clang::doc::markdown
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h 
b/clang-tools-extra/clang-doc/support/Markdown.h
index 890f764f937b1..09b79cc8f2437 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.h
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -69,4 +69,4 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef 
ParagraphText,
 
 } // namespace clang::doc::markdown
 
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp 
b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
index 8df5efc7f1d5f..ff9bad88da136 100644
--- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
+++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
@@ -14,80 +14,99 @@ using namespace clang::doc::markdown;
 
 namespace {
 
-TEST(MarkdownParserTest, EmptyInput) {
+struct MarkdownParserTest : public ::testing::Test {
   llvm::BumpPtrAllocator Arena;
+};
+
+TEST_F(MarkdownParserTest, EmptyInput) {
   auto Nodes = parseMarkdown("", Arena);
   EXPECT_TRUE(Nodes.empty());
 }
 
-TEST(MarkdownParserTest, WhitespaceOnlyInput) {
-  llvm::BumpPtrAllocator Arena;
+TEST_F(MarkdownParserTest, WhitespaceOnlyInput) {
   auto Nodes = parseMarkdown("   \n  \n", Arena);
   EXPECT_TRUE(Nodes.empty());
 }
 
-TEST(MarkdownParserTest, PlainText) {
-  llvm::BumpPtrAllocator Arena;
+TEST_F(MarkdownParserTest, PlainText) {
   auto Nodes = parseMarkdown("hello world", Arena);
   ASSERT_EQ(Nodes.size(), 1u);
-  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text);
-  EXPECT_EQ(Nodes[0].Content, "hello world");
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_Text);
+  EXPECT_EQ(N.Content, "hello world");
 }
 
-TEST(MarkdownParserTest, FencedCodeBlock) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena);
+TEST_F(MarkdownParserTest, FencedCodeBlock) {
+  auto Nodes = parseMarkdown(R"(```cpp
+int x = 0;
+````)",
+                             Arena);
   ASSERT_EQ(Nodes.size(), 1u);
-  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
-  EXPECT_EQ(Nodes[0].Content, "cpp");
-  ASSERT_EQ(Nodes[0].Children.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_EQ(N.Content, "cpp");
+  ASSERT_EQ(N.Children.size(), 1u);
 }
 
-TEST(MarkdownParserTest, FencedCodeBlockNoLang) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("```\nsome code\n```", Arena);
+TEST_F(MarkdownParserTest, FencedCodeBlockNoLang) {
+  auto Nodes = parseMarkdown(R"(```
+some code
+```)",
+                             Arena);
   ASSERT_EQ(Nodes.size(), 1u);
-  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
-  EXPECT_TRUE(Nodes[0].Content.empty());
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_TRUE(N.Content.empty());
 }
 
-TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena);
+TEST_F(MarkdownParserTest, UnterminatedFenceReturnsEmpty) {
+  auto Nodes = parseMarkdown(R"(```cpp
+int x = 0;)",
+                             Arena);
   // Unterminated fence should not crash and should produce a code node
   // with whatever lines were found.
   EXPECT_FALSE(Nodes.empty());
 }
 
-TEST(MarkdownParserTest, PipeTable) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena);
+TEST_F(MarkdownParserTest, PipeTable) {
+  auto Nodes = parseMarkdown(R"(| A | B |
+|---|---|
+| 1 | 2 |)",
+                             Arena);
   ASSERT_EQ(Nodes.size(), 1u);
   EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table);
 }
 
-TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("a | b\nc | d", Arena);
-  // No separator row so should not be parsed as a table
+TEST_F(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) {
+  auto Nodes = parseMarkdown(R"(a | b
+c | d)",
+                             Arena);
+  // No separator row so should not be parsed as a table.
   for (const auto &Node : Nodes)
     EXPECT_NE(Node.Kind, NodeKind::NK_Table);
 }
 
-TEST(MarkdownParserTest, UnorderedList) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena);
+TEST_F(MarkdownParserTest, UnorderedList) {
+  auto Nodes = parseMarkdown(R"(- foo
+- bar
+- baz)",
+                             Arena);
   ASSERT_EQ(Nodes.size(), 1u);
-  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList);
-  ASSERT_EQ(Nodes[0].Children.size(), 3u);
-  EXPECT_EQ(Nodes[0].Children[0].Content, "foo");
-  EXPECT_EQ(Nodes[0].Children[1].Content, "bar");
-  EXPECT_EQ(Nodes[0].Children[2].Content, "baz");
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_UnorderedList);
+  ASSERT_EQ(N.Children.size(), 3u);
+  EXPECT_EQ(N.Children[0].Content, "foo");
+  EXPECT_EQ(N.Children[1].Content, "bar");
+  EXPECT_EQ(N.Children[2].Content, "baz");
 }
 
-TEST(MarkdownParserTest, MixedContent) {
-  llvm::BumpPtrAllocator Arena;
-  auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena);
+TEST_F(MarkdownParserTest, MixedContent) {
+  auto Nodes = parseMarkdown(R"(some text
+```
+code
+````
+- item)",
+                             Arena);
   EXPECT_EQ(Nodes.size(), 3u);
 }
 

>From 3ef8f62edab311caff0907ab2b9a0c3aaeb14353 Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Wed, 10 Jun 2026 13:45:44 -0400
Subject: [PATCH 3/3] [clang-doc] Add CommonMark spec tests for fenced code
 blocks

---
 .../clang-doc/MarkdownParserTest.cpp          | 112 +++++++++++++++++-
 1 file changed, 108 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp 
b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
index ff9bad88da136..4ca979c1f1d24 100644
--- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
+++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
@@ -39,7 +39,7 @@ TEST_F(MarkdownParserTest, PlainText) {
 TEST_F(MarkdownParserTest, FencedCodeBlock) {
   auto Nodes = parseMarkdown(R"(```cpp
 int x = 0;
-````)",
+````````)",
                              Arena);
   ASSERT_EQ(Nodes.size(), 1u);
   const auto &N = Nodes[0];
@@ -51,7 +51,7 @@ int x = 0;
 TEST_F(MarkdownParserTest, FencedCodeBlockNoLang) {
   auto Nodes = parseMarkdown(R"(```
 some code
-```)",
+```````)",
                              Arena);
   ASSERT_EQ(Nodes.size(), 1u);
   const auto &N = Nodes[0];
@@ -102,12 +102,116 @@ TEST_F(MarkdownParserTest, UnorderedList) {
 
 TEST_F(MarkdownParserTest, MixedContent) {
   auto Nodes = parseMarkdown(R"(some text
-```
+```````
 code
-````
+````````
 - item)",
                              Arena);
   EXPECT_EQ(Nodes.size(), 3u);
 }
 
+// CommonMark §4.5 example 120: tilde fences work the same as backtick fences.
+TEST_F(MarkdownParserTest, TildeFence) {
+  auto Nodes = parseMarkdown(R"(~~~
+int x = 0;
+~~~)",
+                             Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_TRUE(N.Content.empty());
+  ASSERT_EQ(N.Children.size(), 1u);
+}
+
+// CommonMark §4.5 example 120: tilde fence with a language tag.
+TEST_F(MarkdownParserTest, TildeFenceWithLang) {
+  auto Nodes = parseMarkdown(R"(~~~cpp
+int x = 0;
+~~~)",
+                             Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_EQ(N.Content, "cpp");
+  ASSERT_EQ(N.Children.size(), 1u);
+}
+
+// CommonMark §4.5 example 122: a tilde line does not close a backtick fence.
+TEST_F(MarkdownParserTest, ClosingFenceMustMatchOpeningChar) {
+  auto Nodes = parseMarkdown(R"(```
+aaa
+~~~
+````````)",
+                             Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  // ~~~ is content, not a closing fence.
+  ASSERT_EQ(N.Children.size(), 2u);
+}
+
+// CommonMark §4.5 example 130: a code block can be empty.
+TEST_F(MarkdownParserTest, EmptyFencedCodeBlock) {
+  auto Nodes = parseMarkdown(R"(```
+```````)",
+                             Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_TRUE(N.Children.empty());
+}
+
+// CommonMark §4.5 example 129: a code block may contain only blank lines.
+TEST_F(MarkdownParserTest, FencedCodeBlockBlankLineContent) {
+  auto Nodes = parseMarkdown("```\n\n  \n```", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  ASSERT_EQ(N.Children.size(), 2u);
+}
+
+// CommonMark §4.5 example 142: lang tag is captured from the info string.
+TEST_F(MarkdownParserTest, InfoStringLangTag) {
+  auto Nodes = parseMarkdown(R"(```ruby
+def foo(x)
+  return 3
+end
+``````)",
+                             Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_EQ(N.Content, "ruby");
+  ASSERT_EQ(N.Children.size(), 3u);
+}
+
+// CommonMark §4.5 example 146: tilde fence info string may contain backticks.
+TEST_F(MarkdownParserTest, TildeFenceInfoStringWithBackticks) {
+  auto Nodes = parseMarkdown(R"(~~~ aa ``` ~~~
+foo
+~~~)",
+                             Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  EXPECT_EQ(N.Content, "aa ``` ~~~");
+  ASSERT_EQ(N.Children.size(), 1u);
+}
+
+// CommonMark §4.5 example 124: closing fence must be at least as long as the
+// opening fence.
+// TODO: our parser currently closes on the first line with 3 matching fence
+// chars regardless of opening fence length. Fix as part of the CommonMark
+// TODO in parseMarkdown().
+TEST_F(MarkdownParserTest, ClosingFenceLengthTODO) {
+  auto Nodes = parseMarkdown("````\naaa\n```", Arena);
+  // The ``` line should not close the ```` fence per CommonMark, but our
+  // parser currently treats it as a closing fence. This test documents the
+  // current (non-conformant) behavior.
+  ASSERT_EQ(Nodes.size(), 1u);
+  const auto &N = Nodes[0];
+  EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode);
+  ASSERT_EQ(N.Children.size(), 1u);
+}
+
 } // namespace
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to