(doris-website) branch master updated: [add]node script for merge markdown (#1046)

morningman Sun, 01 Sep 2024 08:29:39 -0700

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git



The following commit(s) were added to refs/heads/master by this push:
     new e8435c87fdc [add]node script for merge markdown (#1046)
e8435c87fdc is described below

commit e8435c87fdc60f6bcb5ce6171442d80d00905fa1
Author: 0xFlackoLFG <k9745u...@gmail.com>
AuthorDate: Sun Sep 1 23:28:48 2024 +0800

    [add]node script for merge markdown (#1046)
---
 scripts/merge_md.js   | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/test_error.sh |  15 +----
 2 files changed, 162 insertions(+), 13 deletions(-)

diff --git a/scripts/merge_md.js b/scripts/merge_md.js
new file mode 100644
index 00000000000..a611a921111
--- /dev/null
+++ b/scripts/merge_md.js
@@ -0,0 +1,160 @@
+//  Licensed to the Apache Software Foundation (ASF) under one
+//  or more contributor license agreements.  See the NOTICE file
+//  distributed with this work for additional information
+//  regarding copyright ownership.  The ASF licenses this file
+//  to you under the Apache License, Version 2.0 (the
+//  "License"); you may not use this file except in compliance
+//  with the License.  You may obtain a copy of the License at
+
+//    http://www.apache.org/licenses/LICENSE-2.0
+
+//  Unless required by applicable law or agreed to in writing,
+//  software distributed under the License is distributed on an
+//  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+//  KIND, either express or implied.  See the License for the
+//  specific language governing permissions and limitations
+//  under the License.
+
+const fs = require('fs');
+const path = require('path');
+
+const sidebarPath = 'versioned_sidebars/version-3.0-sidebars.json'
+const docsBaseDir = 'i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0'
+const outputPath = 'doc.md'
+
+const fileLinkName = {};
+
+
+function readJSON(filePath) {
+    const data = fs.readFileSync(filePath, 'utf-8');
+    return JSON.parse(data);
+}
+
+function readMarkdownFile(filePath) {
+    return fs.readFileSync(filePath, 'utf-8');
+}
+
+function writeMarkdownContent(filePath, content) {
+    fs.writeFileSync(filePath, content, 'utf-8');
+}
+
+function replaceLinkWrap(chapter) {
+    const hyperLinkPattern = /\[([^\]]+)\]\(([^#)]+)(#[^)]+)?\)/g;
+
+    function replaceLink(match, linkName, link, frag) {
+        if (link.startsWith('http')) {
+            return match;
+        } else if (/\.(png|jpeg|svg|gif|jpg)$/.test(link)) {
+            const imgLink = link.replace(/images\//, 'static/images/');
+            return `[${linkName}](${imgLink})`;
+        } else {
+            if (link.includes('.md#') && frag) {
+                return frag.replace(/[\s]+/g, '-').toLowerCase()
+            } else {
+                let fullPath = path.join(docsBaseDir, customResolve(link))
+                if (!link.endsWith('.md')) {
+                    fullPath += '.md';
+                }
+                return 
`[${linkName}](#${getMainTitleFromFile(fullPath).replace(/[\s]+/g, 
'-').toLowerCase()})`
+            }
+        }
+    }
+    return chapter.replace(hyperLinkPattern, replaceLink);
+}
+
+function customResolve(relativePath) {
+    const parts = relativePath.split('/');
+    const resolvedParts = [];
+    for (const part of parts) {
+        if (part === '..') {
+            resolvedParts.pop();
+        } else if (part !== '.') {
+            resolvedParts.push(part);
+        }
+    }
+    return resolvedParts.join('/');
+}
+
+function getMainTitleFromFile(filePath) {
+    if (!fs.existsSync(filePath)) {
+        return '';
+    }
+    const mdContent = fs.readFileSync(filePath, 'utf8');
+    const match = mdContent.match(/{[^}]*}/);
+    if (match) {
+        const mainTitle = JSON.parse(match[0].replace(/'/g, '"')).title;
+        return mainTitle;
+    }
+    return '';
+}
+
+function processItems(items, level) {
+    let content = '';
+    items.forEach(item => {
+        if (typeof item === 'string') {
+            const filePath = path.join(docsBaseDir, item + '.md');
+            if (fs.existsSync(filePath)) {
+                let mdContent = readMarkdownFile(filePath);
+                mdContent = replaceLinkWrap(mdContent);
+                content += adjustHeaders(mdContent, level) + '\n\n';
+            }
+        } else if (typeof item === 'object' && item.items) {
+            content += `${'#'.repeat(level + 1)} ${item.label}\n\n`;
+            content += processItems(item.items, level + 1);
+        }
+    });
+    return content;
+}
+
+function adjustHeaders(mdContent, level) {
+    const match = mdContent.match(/{[^}]*}/);
+    const mainTitle = JSON.parse(match[0].replace(/'/g, '"')).title;
+    const lines = mdContent.split('\n');
+
+    let hasMainTitle = false;
+    let firstSeparatorIndex = -1;
+    let secondSeparatorIndex = -1;
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        if (line.startsWith('# ')) {
+            hasMainTitle = true;
+            break;
+        }
+        if (line.trim() === '---') {
+            if (firstSeparatorIndex === -1) {
+                firstSeparatorIndex = i;
+            } else {
+                secondSeparatorIndex = i;
+                break
+            }
+        }
+    }
+
+    const adjustedLines = lines.map(line => {
+        if (line.startsWith('#')) {
+            const numHashes = line.match(/^#+/)[0].length;
+
+            return '#'.repeat(numHashes + level) + line.slice(numHashes);
+        }
+        return line;
+    });
+
+    if (!hasMainTitle && secondSeparatorIndex !== -1) {
+        adjustedLines.splice(secondSeparatorIndex + 2, 0, `${'#'.repeat(level 
+ 1)} ${mainTitle}`);
+    }
+
+    return adjustedLines.join('\n');
+}
+
+function mergeMarkdownFiles() {
+    const sidebarData = readJSON(sidebarPath);
+    let content = '';
+    sidebarData.docs.forEach(category => {
+        content += `# ${category.label}\n\n`;
+        content += processItems(category.items, 1);
+    });
+    writeMarkdownContent(outputPath, content);
+}
+
+mergeMarkdownFiles();
+console.log('successfully');
\ No newline at end of file
diff --git a/scripts/test_error.sh b/scripts/test_error.sh
index 4402b2af9d1..0dfd41eb22d 100755
--- a/scripts/test_error.sh
+++ b/scripts/test_error.sh
@@ -1,48 +1,38 @@
 #!/bin/bash
 
-# pdf纠错脚本 bash scripts/test_error.sh inputFilePath outputFilePath
+# bash scripts/test_error.sh inputFilePath outputFilePath
 
-# 脚本在遇到错误时不会退出
 set +e
 
-# 设置字体
 MAINFONT="WenQuanYi Micro Hei"
 MONOFONT="WenQuanYi Micro Hei Mono"
 
-# 版本标签
 _version_tag="$(date '+%Y%m%d')"
 
-# 检查是否提供了两个参数
 if [ "$#" -ne 2 ]; then
     echo "Usage: $0 <directory_path> <output_directory>"
     exit 1
 fi
 
-# 获取输入的目录路径和输出目录
 INPUT_DIRECTORY=$1
 OUTPUT_DIRECTORY=$2
 
-# 检查输入的目录是否存在
 if [ ! -d "$INPUT_DIRECTORY" ]; then
     echo "Error: Directory $INPUT_DIRECTORY does not exist."
     exit 1
 fi
 
-# 创建输出目录（如果不存在）
 mkdir -p "$OUTPUT_DIRECTORY"
 
-# 遍历目录下的所有 Markdown 文件
 find "$INPUT_DIRECTORY" -type f -name "*.md" | while read -r FILE; do
-    # 获取文件名（不带路径和扩展名）
+
     filename=$(basename -- "$FILE")
     filename="${filename%.*}"
 
-    # 生成输出文件路径
     output_file="${OUTPUT_DIRECTORY}/${filename}.pdf"
 
     echo "Processing $FILE..."
 
-    # 尝试转换文件
     pandoc -N --toc --smart --latex-engine=xelatex \
     --template=templates/template.tex \
     --listings \
@@ -57,7 +47,6 @@ find "$INPUT_DIRECTORY" -type f -name "*.md" | while read -r 
FILE; do
     -V include-after="\\input{templates/copyright.tex}" \
     "$FILE" -s -o "$output_file"
 
-    # 检查是否有错误
     if [ $? -ne 0 ]; then
         echo "Error processing $FILE"
     else


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris-website) branch master updated: [add]node script for merge markdown (#1046)

Reply via email to