================ @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 +# +# ===-----------------------------------------------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===-----------------------------------------------------------------------===# + +""" + +Clang-Tidy Alphabetical Order Checker +===================================== + +Normalize Clang-Tidy documentation with deterministic sorting for linting/tests. + +Behavior: +- Sort entries in docs/clang-tidy/checks/list.rst csv-table. +- Sort key sections in docs/ReleaseNotes.rst. +- Detect duplicated entries in 'Changes in existing checks'. + +Flags: + -o/--output Write normalized content to this path instead of updating docs. +""" + +import argparse +import io +import os +import re +import sys +from typing import Dict, List, Optional, Sequence, Tuple, Union, overload +from operator import itemgetter + +# Matches a :doc:`label <path>` or :doc:`label` reference anywhere in text and +# captures the label. Used to sort bullet items alphabetically in ReleaseNotes +# items by their label. +DOC_LABEL_RN_RE = re.compile(r":doc:`(?P<label>[^`<]+)\s*(?:<[^>]+>)?`") + +# Matches a single csv-table row line in list.rst that begins with a :doc: +# reference, capturing the label. Used to extract the sort key per row. +DOC_LINE_RE = re.compile(r"^\s*:doc:`(?P<label>[^`<]+?)\s*<[^>]+>`.*$") + + +EXTRA_DIR = os.path.join(os.path.dirname(__file__), "../..") +DOCS_DIR = os.path.join(EXTRA_DIR, "docs") +CLANG_TIDY_DOCS_DIR = os.path.join(DOCS_DIR, "clang-tidy") +CHECKS_DOCS_DIR = os.path.join(CLANG_TIDY_DOCS_DIR, "checks") +LIST_DOC = os.path.join(CHECKS_DOCS_DIR, "list.rst") +RELEASE_NOTES_DOC = os.path.join(DOCS_DIR, "ReleaseNotes.rst") + + +def read_text(path: str) -> List[str]: + with io.open(path, "r", encoding="utf-8") as f: + return f.read().splitlines(True) + + +def write_text(path: str, content: str) -> None: + with io.open(path, "w", encoding="utf-8", newline="") as f: + f.write(content) + + +def _normalize_list_rst_lines(lines: Sequence[str]) -> List[str]: + """Return normalized content of checks list.rst as a list of lines.""" + out: List[str] = [] + i = 0 + n = len(lines) + + def key_for(line: str): + m = DOC_LINE_RE.match(line) + if not m: + return (1, "") + return (0, m.group("label")) + + while i < n: + line = lines[i] + if line.lstrip().startswith(".. csv-table::"): + out.append(line) + i += 1 + + while i < n and (lines[i].startswith(" ") or lines[i].strip() == ""): + if DOC_LINE_RE.match(lines[i]): + break + out.append(lines[i]) + i += 1 + + entries: List[str] = [] + while i < n and lines[i].startswith(" "): + entries.append(lines[i]) + i += 1 + + entries_sorted = sorted(entries, key=key_for) + out.extend(entries_sorted) + continue + + out.append(line) + i += 1 + + return out + + +@overload +def normalize_list_rst(data: str) -> str: + ... + + +@overload +def normalize_list_rst(data: List[str]) -> List[str]: + ... + + +def normalize_list_rst(data: Union[str, List[str]]) -> Union[str, List[str]]: + """Normalize list.rst; returns same type as input (str or list). + + - If given a string, returns a single normalized string. + - If given a sequence of lines, returns a list of lines. + """ + if isinstance(data, str): + lines = data.splitlines(True) + return "".join(_normalize_list_rst_lines(lines)) + else: + return _normalize_list_rst_lines(data) ---------------- vbvictor wrote:
Do we need to support both `str` and `List[str]`? Can we stick to only one way of processing and use it everywhere. IMO, there is no point in writing complex code. If we really need to pass raw `str`, then just do `data.splitlines(True)` on caller side, and we don't have to write 20 lines of additional code here. https://github.com/llvm/llvm-project/pull/166072 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
