This is an automated email from the ASF dual-hosted git repository.

kassiez pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
     new 9aacaa8c917 [script] Deadlink check script (#1552)
9aacaa8c917 is described below

commit 9aacaa8c9170b7802f2dd9cec6653337be7d3763
Author: zclllyybb <zhaochan...@selectdb.com>
AuthorDate: Fri Dec 20 16:01:04 2024 +0800

    [script] Deadlink check script (#1552)
    
    ## Versions
    
    - [ ] dev
    - [ ] 3.0
    - [ ] 2.1
    - [ ] 2.0
    
    ## Languages
    
    - [ ] Chinese
    - [ ] English
    
    ## Docs Checklist
    
    - [ ] Checked by AI
    - [ ] Test Cases Built
---
 check_all_deadlink.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/check_all_deadlink.py b/check_all_deadlink.py
new file mode 100644
index 00000000000..8ebdde92d91
--- /dev/null
+++ b/check_all_deadlink.py
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+import os
+from urllib.parse import urlparse
+
+
+def process_md_file(file_path):
+    link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
+    code_block_pattern = re.compile(r"^```.*$")
+
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    lines = content.splitlines()
+    in_code_block = False
+
+    for line_number, line in enumerate(lines, start=1):
+        # Skip codeblocks
+        if code_block_pattern.match(line):
+            in_code_block = not in_code_block
+            continue
+
+        if in_code_block:
+            continue
+
+        links = link_pattern.findall(line)
+
+        for link in links:
+            # Skip urls
+            if (
+                not urlparse(link).scheme
+                and not os.path.isabs(link)
+                and not (link[0] == "#")
+            ):
+                full_path = os.path.normpath(
+                    os.path.join(os.path.dirname(file_path), link)
+                )
+
+                # Skip section headers
+                if "#" in full_path:
+                    full_path = full_path.split("#", 1)[0]
+
+                if not full_path.endswith(".md") and not 
full_path.endswith(".mdx"):
+                    full_path += ".md"
+                md_exists = os.path.exists(full_path)
+                mdx_exists = (
+                    os.path.exists(full_path[:-3] + ".mdx")
+                    if full_path.endswith(".md")
+                    else False
+                )
+
+                if not md_exists and not mdx_exists:
+                    print(
+                        f"Error: File not found for link '{link}' in file 
'{file_path}:{line_number}'"
+                    )
+
+
+def travel(root_path: str):
+    for root, dirs, files in os.walk(root_path):
+        for file in files:
+            if file.endswith(".md") or file.endswith(".mdx"):
+                md_file_path = os.path.join(root, file)
+                process_md_file(md_file_path)
+
+
+if __name__ == "__main__":
+    # check docs directories
+    travel("docs")
+    travel("i18n")
+    travel("versioned_docs")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to