This is an automated email from the ASF dual-hosted git repository. kassiez pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push: new 9aacaa8c917 [script] Deadlink check script (#1552) 9aacaa8c917 is described below commit 9aacaa8c9170b7802f2dd9cec6653337be7d3763 Author: zclllyybb <zhaochan...@selectdb.com> AuthorDate: Fri Dec 20 16:01:04 2024 +0800 [script] Deadlink check script (#1552) ## Versions - [ ] dev - [ ] 3.0 - [ ] 2.1 - [ ] 2.0 ## Languages - [ ] Chinese - [ ] English ## Docs Checklist - [ ] Checked by AI - [ ] Test Cases Built --- check_all_deadlink.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/check_all_deadlink.py b/check_all_deadlink.py new file mode 100644 index 00000000000..8ebdde92d91 --- /dev/null +++ b/check_all_deadlink.py @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import re +import os +from urllib.parse import urlparse + + +def process_md_file(file_path): + link_pattern = re.compile(r"\[.*?\]\((.*?)\)") + code_block_pattern = re.compile(r"^```.*$") + + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + lines = content.splitlines() + in_code_block = False + + for line_number, line in enumerate(lines, start=1): + # Skip codeblocks + if code_block_pattern.match(line): + in_code_block = not in_code_block + continue + + if in_code_block: + continue + + links = link_pattern.findall(line) + + for link in links: + # Skip urls + if ( + not urlparse(link).scheme + and not os.path.isabs(link) + and not (link[0] == "#") + ): + full_path = os.path.normpath( + os.path.join(os.path.dirname(file_path), link) + ) + + # Skip section headers + if "#" in full_path: + full_path = full_path.split("#", 1)[0] + + if not full_path.endswith(".md") and not full_path.endswith(".mdx"): + full_path += ".md" + md_exists = os.path.exists(full_path) + mdx_exists = ( + os.path.exists(full_path[:-3] + ".mdx") + if full_path.endswith(".md") + else False + ) + + if not md_exists and not mdx_exists: + print( + f"Error: File not found for link '{link}' in file '{file_path}:{line_number}'" + ) + + +def travel(root_path: str): + for root, dirs, files in os.walk(root_path): + for file in files: + if file.endswith(".md") or file.endswith(".mdx"): + md_file_path = os.path.join(root, file) + process_md_file(md_file_path) + + +if __name__ == "__main__": + # check docs directories + travel("docs") + travel("i18n") + travel("versioned_docs") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org