This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fory.git


The following commit(s) were added to refs/heads/main by this push:
     new f4cfd62d3 chore(cpp): configure CI to extract and execute C++ code 
from Markdo… (#3381)
f4cfd62d3 is described below

commit f4cfd62d3bd038e9028d5b0d3ed6b22df0b5ddd6
Author: Tyooughtul <[email protected]>
AuthorDate: Tue Mar 24 12:37:43 2026 +0800

    chore(cpp): configure CI to extract and execute C++ code from Markdo… 
(#3381)
    
    - add extract_cpp_doc_examples.py to extract C++ code blocks from
    markdown
    - integrate doc tests into CI with --skip-doc-tests and --doc-tests-only
    options
    - fix documentation bugs discovered by tests:
      - index.md: change class to struct for aggregate initialization
      - field-configuration.md: fix smart pointer access using .get()
    - add cpp/doc_tests/ to .gitignore
    
    issue: #658
    
    <!--
    **Thanks for contributing to Apache Fory™.**
    
    **If this is your first time opening a PR on fory, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fory™** community has requirements on the naming of pr
    titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fory/blob/main/CONTRIBUTING.md).
    
    - Apache Fory™ has a strong focus on performance. If the PR you submit
    will have an impact on performance, please benchmark it first and
    provide the benchmark result here.
    -->
    
    ## Why?
    <!-- Describe the purpose of this PR. -->
    
    Add automated testing for C++ code examples in documentation to ensure
    they compile and run correctly.
    
    ## What does this PR do?
    
    <!-- Describe the details of this PR. -->
    
    - add extract_cpp_doc_examples.py to extract C++ code blocks from
    markdown
    - integrate doc tests into CI with --skip-doc-tests and --doc-tests-only
    options
    - fix documentation bugs discovered by tests:
      - index.md: change class to struct for aggregate initialization
      - field-configuration.md: fix smart pointer access using .get()
    - add cpp/doc_tests/ to .gitignore
    
    ## Related issues
    - #658
    
    <!--
    Is there any related issue? If this PR closes them you say say
    fix/closes:
    
    - #xxxx0
    - #xxxx1
    - Fixes #xxxx2
    -->
    
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
---
 .github/workflows/ci.yml              |   1 -
 .gitignore                            |   1 +
 ci/extract_cpp_doc_code.py            | 325 ++++++++++++++++++++++++++++++++++
 ci/run_ci.py                          |  22 ++-
 ci/tasks/cpp.py                       |  53 +++++-
 docs/guide/cpp/field-configuration.md |  19 +-
 docs/guide/cpp/index.md               |  11 +-
 7 files changed, 411 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c9405795f..96cfd296c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,7 +33,6 @@ on:
   pull_request:
     paths-ignore:
       - "**/*.md"
-      - "docs/**"
       - "LICENSE"
       - ".vscode/**"
       - ".gitignore"
diff --git a/.gitignore b/.gitignore
index 73be8ed90..e6491fffb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,6 +67,7 @@ python/dist/
 # C++
 cpp/build/
 cpp/bazel-*
+cpp/doc_tests/
 
 # Bazel build directories
 bazel-out/
diff --git a/ci/extract_cpp_doc_code.py b/ci/extract_cpp_doc_code.py
new file mode 100644
index 000000000..08bb48d29
--- /dev/null
+++ b/ci/extract_cpp_doc_code.py
@@ -0,0 +1,325 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Extract C++ code examples from markdown documentation and generate test files.
+
+This script scans markdown files in docs/guide/cpp/, extracts ```cpp code 
blocks,
+and generates compilable C++ test files that can be run to verify the 
documentation
+examples are correct.
+"""
+
+import argparse
+import logging
+import re
+import sys
+from pathlib import Path
+from typing import List, Tuple
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+def extract_cpp_code_blocks(content: str) -> List[Tuple[str, int]]:
+    # Extract C++ code blocks from markdown content.
+    code_blocks = []
+    pattern = r"```cpp\n(.*?)```"
+
+    for match in re.finditer(pattern, content, re.DOTALL):
+        code = match.group(1).strip()
+        line_num = content[: match.start()].count("\n") + 1
+        code_blocks.append((code, line_num))
+
+    return code_blocks
+
+
+def is_complete_example(code: str) -> bool:
+    # Check if the code block is a complete, runnable example.A complete 
example should have a main function with all code inside it.
+    # Code blocks with statements outside of functions are not complete 
examples.
+    has_main = "int main()" in code or "int main (" in code
+
+    # Check if there are statements outside of any function
+    # look for lines that look like function calls
+    # or object declarations at the top level
+    lines = code.split("\n")
+    brace_depth = 0
+    in_main = False
+
+    for line in lines:
+        stripped = line.strip()
+        if not stripped or stripped.startswith("//"):
+            continue
+
+        # Track braces
+        for char in stripped:
+            if char == "{":
+                brace_depth += 1
+            elif char == "}":
+                brace_depth -= 1
+
+        # Check if entering main
+        if "int main" in stripped:
+            in_main = True
+            continue
+
+        # If code is not inside any braces and not in a struct/class 
declaration,
+        # and we see what looks like a function call or object usage,
+        # this is a uncompleted example
+        if brace_depth == 0 and not in_main:
+            # Skip struct/class/enum declarations
+            if any(
+                keyword in stripped
+                for keyword in [
+                    "struct ",
+                    "class ",
+                    "enum ",
+                    "using ",
+                    "namespace ",
+                    "#include",
+                    "FORY_STRUCT",
+                    "FORY_ENUM",
+                ]
+            ):
+                continue
+            # Skip forward declarations
+            if stripped.endswith(";"):
+                continue
+            # If we see code that looks like it's executing (not declaring),
+            # this is not a complete example
+            if re.search(r"\w+\s*\([^)]*\)\s*;", stripped) and not re.search(
+                r"^(struct|class|enum|using|namespace|#include|FORY_)", 
stripped
+            ):
+                return False
+
+    return has_main
+
+
+def wrap_code_as_test(code: str, doc_file: str, block_index: int) -> str:
+    # Wrap a code snippet as a complete, compilable test file.
+
+    includes = set()
+
+    if "#include" not in code:
+        includes.add('#include "fory/serialization/fory.h"')
+
+    if "std::string" in code and "#include <string>" not in code:
+        includes.add("#include <string>")
+    if "std::vector" in code and "#include <vector>" not in code:
+        includes.add("#include <vector>")
+    if "std::map" in code and "#include <map>" not in code:
+        includes.add("#include <map>")
+    if "std::set" in code and "#include <set>" not in code:
+        includes.add("#include <set>")
+    if "std::unordered_map" in code and "#include <unordered_map>" not in code:
+        includes.add("#include <unordered_map>")
+    if "std::unordered_set" in code and "#include <unordered_set>" not in code:
+        includes.add("#include <unordered_set>")
+    if "std::optional" in code and "#include <optional>" not in code:
+        includes.add("#include <optional>")
+    if "std::shared_ptr" in code and "#include <memory>" not in code:
+        includes.add("#include <memory>")
+    if "std::unique_ptr" in code and "#include <memory>" not in code:
+        includes.add("#include <memory>")
+    if "std::variant" in code and "#include <variant>" not in code:
+        includes.add("#include <variant>")
+    if "std::chrono" in code and "#include <chrono>" not in code:
+        includes.add("#include <chrono>")
+    if "std::make_shared" in code and "#include <memory>" not in code:
+        includes.add("#include <memory>")
+    if "std::make_unique" in code and "#include <memory>" not in code:
+        includes.add("#include <memory>")
+    if "assert(" in code and "#include <cassert>" not in code:
+        includes.add("#include <cassert>")
+    if "std::cout" in code and "#include <iostream>" not in code:
+        includes.add("#include <iostream>")
+    if "RowEncoder" in code and '#include "fory/encoder/row_encoder.h"' not in 
code:
+        includes.add('#include "fory/encoder/row_encoder.h"')
+    if "Row" in code and '#include "fory/row/row.h"' not in code:
+        includes.add('#include "fory/row/row.h"')
+
+    include_section = "\n".join(sorted(includes))
+
+    if "int main()" in code or "int main (" in code:
+        # Only add namespace if not already present
+        if "using namespace" not in code:
+            code = f"using namespace fory::serialization;\n\n{code}"
+        return f"""// Auto-generated test from {doc_file}
+
+{include_section}
+
+{code}
+"""
+    else:
+        return f"""// Auto-generated test from {doc_file}, block {block_index}
+
+#include <iostream>
+{include_section}
+
+using namespace fory::serialization;
+
+{code}
+
+int main() {{
+    std::cout << "Documentation example compiled successfully" << std::endl;
+    return 0;
+}}
+"""
+
+
+def generate_test_file_name(doc_file: str, block_index: int) -> str:
+    # Generate a test file name from documentation file and block index.
+
+    base_name = Path(doc_file).stem
+    return f"doc_test_{base_name}_{block_index}.cc"
+
+
+def process_markdown_file(md_path: Path, output_dir: Path) -> List[Path]:
+    # logging.info(f"Processing {md_path}")
+
+    with open(md_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    code_blocks = extract_cpp_code_blocks(content)
+    logging.info(f"  Found {len(code_blocks)} C++ code blocks")
+
+    generated_files = []
+
+    for i, (code, line_num) in enumerate(code_blocks):
+        if not is_complete_example(code):
+            logging.debug(f"  Skipping incomplete example at line {line_num}")
+            continue
+
+        test_content = wrap_code_as_test(code, md_path.name, i)
+        test_file_name = generate_test_file_name(md_path.name, i)
+        test_path = output_dir / test_file_name
+
+        with open(test_path, "w", encoding="utf-8") as f:
+            f.write(test_content)
+
+        generated_files.append(test_path)
+        logging.info(f"  Generated {test_file_name}")
+
+    return generated_files
+
+
+def generate_bazel_build(test_files: List[Path], output_dir: Path) -> None:
+    build_path = output_dir / "BUILD"
+
+    build_content = """package(default_visibility = ["//visibility:public"])
+
+"""
+
+    test_names = []
+    for test_file in sorted(test_files):
+        test_name = test_file.stem
+        test_names.append(test_name)
+
+        # Determine additional deps based on test name
+        deps = ['"//cpp/fory/serialization:fory_serialization"']
+        if "row-format" in test_name:
+            deps.append('"//cpp/fory/row:fory_row_format"')
+            deps.append('"//cpp/fory/encoder:fory_encoder"')
+
+        deps_str = ",\n        ".join(deps)
+
+        build_content += f'''
+cc_test(
+    name = "{test_name}",
+    srcs = ["{test_file.name}"],
+    deps = [
+        {deps_str},
+    ],
+)
+'''
+
+    if test_names:
+        build_content += f"""
+test_suite(
+    name = "doc_example_tests",
+    tests = [
+{chr(10).join(f'        ":{name}",' for name in test_names)}
+    ],
+)
+"""
+
+    with open(build_path, "w", encoding="utf-8") as f:
+        f.write(build_content)
+
+    logging.info(f"Generated BUILD file with {len(test_names)} tests")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Extract C++ code examples from markdown documentation"
+    )
+    parser.add_argument(
+        "--docs-dir",
+        default="docs/guide/cpp",
+        help="Directory containing markdown documentation files",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="cpp/doc_tests",
+        help="Output directory for generated test files",
+    )
+    parser.add_argument(
+        "--generate-build",
+        action="store_true",
+        help="Generate Bazel BUILD file",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable verbose output",
+    )
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    script_dir = Path(__file__).parent
+    project_root = script_dir.parent
+    docs_dir = project_root / args.docs_dir
+    output_dir = project_root / args.output_dir
+
+    if not docs_dir.exists():
+        logging.error(f"Documentation directory not found: {docs_dir}")
+        sys.exit(1)
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    all_test_files = []
+
+    for md_file in sorted(docs_dir.glob("*.md")):
+        test_files = process_markdown_file(md_file, output_dir)
+        all_test_files.extend(test_files)
+
+    logging.info(f"\nTotal: Generated {len(all_test_files)} test files")
+
+    if args.generate_build and all_test_files:
+        generate_bazel_build(all_test_files, output_dir)
+
+    print(f"\nGenerated files in {output_dir}:")
+    for f in sorted(all_test_files):
+        print(f"  {f.name}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/run_ci.py b/ci/run_ci.py
index 66af37f68..e3877549c 100644
--- a/ci/run_ci.py
+++ b/ci/run_ci.py
@@ -125,7 +125,21 @@ def parse_args():
         action="store_true",
         help="Only install dependencies without running tests",
     )
-    cpp_parser.set_defaults(func=lambda install_deps_only: 
cpp.run(install_deps_only))
+    cpp_parser.add_argument(
+        "--skip-doc-tests",
+        action="store_true",
+        help="Skip documentation example tests",
+    )
+    cpp_parser.add_argument(
+        "--doc-tests-only",
+        action="store_true",
+        help="Only run documentation example tests",
+    )
+    cpp_parser.set_defaults(
+        func=lambda install_deps_only, skip_doc_tests, doc_tests_only: cpp.run(
+            install_deps_only, skip_doc_tests, doc_tests_only
+        )
+    )
 
     # Rust subparser
     rust_parser = subparsers.add_parser(
@@ -263,7 +277,11 @@ def parse_args():
                 run_shell_script(f"java{version}")
     elif command == "cpp":
         if USE_PYTHON_CPP or arg_dict.get("install_deps_only", False):
-            func(arg_dict.get("install_deps_only", False))
+            func(
+                arg_dict.get("install_deps_only", False),
+                arg_dict.get("skip_doc_tests", False),
+                arg_dict.get("doc_tests_only", False),
+            )
         else:
             run_shell_script("cpp")
     elif command == "rust":
diff --git a/ci/tasks/cpp.py b/ci/tasks/cpp.py
index e24541c30..4f27f7dc3 100644
--- a/ci/tasks/cpp.py
+++ b/ci/tasks/cpp.py
@@ -16,14 +16,56 @@
 # under the License.
 
 import logging
+import os
+import subprocess
 from . import common
 
 
-def run(install_deps_only=False):
+def generate_doc_example_tests():
+    # Generate C++ test files from documentation examples.
+    logging.info("Generating documentation example tests")
+
+    script_path = os.path.join(common.PROJECT_ROOT_DIR, "ci", 
"extract_cpp_doc_code.py")
+    result = subprocess.run(
+        [
+            "python",
+            script_path,
+            "--docs-dir",
+            "docs/guide/cpp",
+            "--output-dir",
+            "cpp/doc_tests",
+            "--generate-build",
+        ],
+        cwd=common.PROJECT_ROOT_DIR,
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        logging.error(f"Failed to generate doc example tests: {result.stderr}")
+        raise RuntimeError("Failed to generate doc example tests")
+
+    # logging.info(f"Documentation example tests generated in {output_dir}")
+
+
+def run_doc_example_tests():
+    # Generates test files from documentation and runs them with Bazel.
+    generate_doc_example_tests()
+
+    logging.info("Running documentation example tests")
+    test_command = "test //cpp/doc_tests:doc_example_tests"
+    if common.get_os_machine() == "x86_64":
+        test_command = "test --config=x86_64 //cpp/doc_tests:doc_example_tests"
+    common.bazel(test_command)
+
+
+def run(install_deps_only=False, skip_doc_tests=False, doc_tests_only=False):
     """Run C++ CI tasks.
 
     Args:
         install_deps_only: If True, only install dependencies without running 
tests.
+        skip_doc_tests: If True, skip documentation example tests.
+        doc_tests_only: If True, only run documentation example tests.
     """
     logging.info("Running C++ CI tasks")
     common.install_cpp_deps()
@@ -32,6 +74,11 @@ def run(install_deps_only=False):
         logging.info("Skipping tests as --install-deps-only was specified")
         return
 
+    if doc_tests_only:
+        # logging.info("Running only documentation example tests")
+        run_doc_example_tests()
+        return
+
     # collect all C++ targets
     query_result = common.bazel("query //...")
     targets = query_result.replace("\n", " ").replace("\r", " ")
@@ -42,3 +89,7 @@ def run(install_deps_only=False):
 
     common.bazel(f"{test_command} {targets}")
     logging.info("C++ CI tasks completed successfully")
+
+    # Run documentation example tests
+    if not skip_doc_tests:
+        run_doc_example_tests()
diff --git a/docs/guide/cpp/field-configuration.md 
b/docs/guide/cpp/field-configuration.md
index 899a52f6f..94f5170dd 100644
--- a/docs/guide/cpp/field-configuration.md
+++ b/docs/guide/cpp/field-configuration.md
@@ -225,7 +225,7 @@ int main() {
   doc.description = "A sample document";
   doc.metadata = nullptr;  // Allowed because nullable
   doc.parent = std::make_shared<Document>();
-  doc.parent->title = "Parent Doc";
+  doc.parent.get()->title = "Parent Doc";
   doc.related = nullptr;  // Allowed because nullable
 
   auto bytes = fory.serialize(doc).value();
@@ -429,15 +429,14 @@ int main() {
   auto fory = Fory::builder().xlang(true).build();
   fory.register_struct<MetricsData>(100);
 
-  MetricsData data{
-      .request_count = 42,
-      .bytes_sent = 1024,
-      .user_id = 12345678,
-      .session_id = 9876543210,
-      .created_at = 1704067200000000000ULL, // 2024-01-01 in nanoseconds
-      .error_count = 3,
-      .last_access_time = std::nullopt
-  };
+  MetricsData data;
+  data.request_count = 42;
+  data.bytes_sent = 1024;
+  data.user_id = 12345678;
+  data.session_id = 9876543210;
+  data.created_at = 1704067200000000000ULL; // 2024-01-01 in nanoseconds
+  data.error_count = 3;
+  data.last_access_time = std::nullopt;
 
   auto bytes = fory.serialize(data).value();
   auto decoded = fory.deserialize<MetricsData>(bytes).value();
diff --git a/docs/guide/cpp/index.md b/docs/guide/cpp/index.md
index 6348faf41..344820bb0 100644
--- a/docs/guide/cpp/index.md
+++ b/docs/guide/cpp/index.md
@@ -137,11 +137,13 @@ See the 
[examples/cpp](https://github.com/apache/fory/tree/main/examples/cpp) di
 
 ```cpp
 #include "fory/serialization/fory.h"
+#include <string>
+#include <vector>
 
 using namespace fory::serialization;
 
 // Define a struct
-class Person {
+struct Person {
   std::string name;
   int32_t age;
   std::vector<std::string> hobbies;
@@ -149,13 +151,8 @@ class Person {
   bool operator==(const Person &other) const {
     return name == other.name && age == other.age && hobbies == other.hobbies;
   }
-
-public:
-  // Register the struct with Fory (FORY_STRUCT must be in public scope).
-  FORY_STRUCT(Person, name, age, hobbies);
 };
-
-
+FORY_STRUCT(Person, name, age, hobbies);
 
 int main() {
   // Create a Fory instance


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to