================
@@ -0,0 +1,394 @@
+#!/usr/bin/env python3
+#
+#
===-----------------------------------------------------------------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#
===-----------------------------------------------------------------------===#
+
+"""
+
+Clang-Tidy Alphabetical Order Checker
+=====================================
+
+Normalize Clang-Tidy documentation with deterministic sorting for
linting/tests.
+
+Behavior:
+- Sort entries in docs/clang-tidy/checks/list.rst csv-table.
+- Sort key sections in docs/ReleaseNotes.rst.
+- Detect duplicated entries in 'Changes in existing checks'.
+
+Flags:
+ -o/--output Write normalized content to this path instead of updating docs.
+"""
+
+import argparse
+import io
+import os
+import re
+import sys
+from typing import Dict, List, Optional, Sequence, Tuple, Union, overload
+from operator import itemgetter
+
+# Matches a :doc:`label <path>` or :doc:`label` reference anywhere in text and
+# captures the label. Used to sort bullet items alphabetically in ReleaseNotes
+# items by their label.
+DOC_LABEL_RN_RE = re.compile(r":doc:`(?P<label>[^`<]+)\s*(?:<[^>]+>)?`")
+
+# Matches a single csv-table row line in list.rst that begins with a :doc:
+# reference, capturing the label. Used to extract the sort key per row.
+DOC_LINE_RE = re.compile(r"^\s*:doc:`(?P<label>[^`<]+?)\s*<[^>]+>`.*$")
+
+
+EXTRA_DIR = os.path.join(os.path.dirname(__file__), "../..")
+DOCS_DIR = os.path.join(EXTRA_DIR, "docs")
+CLANG_TIDY_DOCS_DIR = os.path.join(DOCS_DIR, "clang-tidy")
+CHECKS_DOCS_DIR = os.path.join(CLANG_TIDY_DOCS_DIR, "checks")
+LIST_DOC = os.path.join(CHECKS_DOCS_DIR, "list.rst")
+RELEASE_NOTES_DOC = os.path.join(DOCS_DIR, "ReleaseNotes.rst")
+
+
+def read_text(path: str) -> List[str]:
+ with io.open(path, "r", encoding="utf-8") as f:
+ return f.read().splitlines(True)
+
+
+def write_text(path: str, content: str) -> None:
+ with io.open(path, "w", encoding="utf-8", newline="") as f:
+ f.write(content)
+
+
+def _normalize_list_rst_lines(lines: Sequence[str]) -> List[str]:
+ """Return normalized content of checks list.rst as a list of lines."""
+ out: List[str] = []
+ i = 0
+ n = len(lines)
+
+ def key_for(line: str):
+ m = DOC_LINE_RE.match(line)
+ if not m:
+ return (1, "")
+ return (0, m.group("label"))
+
+ while i < n:
+ line = lines[i]
+ if line.lstrip().startswith(".. csv-table::"):
+ out.append(line)
+ i += 1
+
+ while i < n and (lines[i].startswith(" ") or lines[i].strip() ==
""):
+ if DOC_LINE_RE.match(lines[i]):
+ break
+ out.append(lines[i])
+ i += 1
+
+ entries: List[str] = []
+ while i < n and lines[i].startswith(" "):
+ entries.append(lines[i])
+ i += 1
+
+ entries_sorted = sorted(entries, key=key_for)
+ out.extend(entries_sorted)
+ continue
+
+ out.append(line)
+ i += 1
+
+ return out
+
+
+@overload
+def normalize_list_rst(data: str) -> str:
+ ...
+
+
+@overload
+def normalize_list_rst(data: List[str]) -> List[str]:
+ ...
+
+
+def normalize_list_rst(data: Union[str, List[str]]) -> Union[str, List[str]]:
+ """Normalize list.rst; returns same type as input (str or list).
+
+ - If given a string, returns a single normalized string.
+ - If given a sequence of lines, returns a list of lines.
+ """
+ if isinstance(data, str):
+ lines = data.splitlines(True)
+ return "".join(_normalize_list_rst_lines(lines))
+ else:
+ return _normalize_list_rst_lines(data)
+
+
+def find_heading(lines: Sequence[str], title: str) -> Optional[int]:
+ """Find heading start index for a section underlined with ^ characters.
+
+ The function looks for a line equal to `title` followed by a line that
+ consists solely of ^, which matches the ReleaseNotes style for subsection
+ headings used here.
+
+ Returns index of the title line, or None if not found.
+ """
+ for i in range(len(lines) - 1):
+ if lines[i].rstrip("\n") == title:
+ underline = lines[i + 1].rstrip("\n")
+ if underline and set(underline) == {"^"} and len(underline) >=
len(title):
+ return i
+ return None
+
+
+def extract_label(text: str) -> str:
+ m = DOC_LABEL_RN_RE.search(text)
+ return m.group("label") if m else text
+
+
+def is_bullet_start(line: str) -> bool:
+ return line.startswith("- ")
+
+
+def parse_bullet_blocks(
+ lines: Sequence[str], start: int, end: int
+) -> Tuple[List[str], List[Tuple[str, List[str]]], List[str]]:
+ i = start
+ n = end
+ first_bullet = i
+ while first_bullet < n and not is_bullet_start(lines[first_bullet]):
+ first_bullet += 1
+ prefix = list(lines[i:first_bullet])
+
+ blocks: List[Tuple[str, List[str]]] = []
+ i = first_bullet
+ while i < n:
+ if not is_bullet_start(lines[i]):
+ break
+ bstart = i
+ i += 1
+ while i < n and not is_bullet_start(lines[i]):
+ if (
+ i + 1 < n
+ and set(lines[i + 1].rstrip("\n")) == {"^"}
+ and lines[i].strip()
+ ):
+ break
+ i += 1
+ block = list(lines[bstart:i])
+ key = extract_label(block[0])
+ blocks.append((key, block))
+
+ suffix = list(lines[i:n])
+ return prefix, blocks, suffix
+
+
+def sort_blocks(blocks: List[Tuple[str, List[str]]]) -> List[List[str]]:
+ """Return blocks sorted deterministically by their extracted label.
+
+ Duplicates are preserved; merging is left to authors to handle manually.
+ """
+ return list(map(itemgetter(1), sorted(blocks, key=itemgetter(0))))
+
+
+def find_duplicate_entries(
+ lines: Sequence[str], title: str
+) -> List[Tuple[str, List[Tuple[int, List[str]]]]]:
+ """Return detailed duplicate info as (key, [(start_idx, block_lines),
...]).
+
+ start_idx is the 0-based index of the first line of the bullet block in
+ the original lines list. Only keys with more than one occurrence are
+ returned, and occurrences are listed in the order they appear.
+ """
+ bounds = _find_section_bounds(lines, title, None)
+ if bounds is None:
+ return []
+ _, sec_start, sec_end = bounds
+
+ i = sec_start
+ n = sec_end
+
+ while i < n and not is_bullet_start(lines[i]):
+ i += 1
+
+ blocks_with_pos: List[Tuple[str, int, List[str]]] = []
+ while i < n:
+ if not is_bullet_start(lines[i]):
+ break
+ bstart = i
+ i += 1
+ while i < n and not is_bullet_start(lines[i]):
+ if (
+ i + 1 < n
+ and set(lines[i + 1].rstrip("\n")) == {"^"}
+ and lines[i].strip()
+ ):
+ break
+ i += 1
+ block = list(lines[bstart:i])
+ key = extract_label(block[0])
+ blocks_with_pos.append((key, bstart, block))
+
+ grouped: Dict[str, List[Tuple[int, List[str]]]] = {}
+ for key, start, block in blocks_with_pos:
+ grouped.setdefault(key, []).append((start, block))
+
+ result: List[Tuple[str, List[Tuple[int, List[str]]]]] = []
+ for key, occs in grouped.items():
+ if len(occs) > 1:
+ result.append((key, occs))
+
+ result.sort(key=itemgetter(0))
+ return result
+
+
+def _find_section_bounds(
+ lines: Sequence[str], title: str, next_title: Optional[str]
+) -> Optional[Tuple[int, int, int]]:
+ """Return (h_start, sec_start, sec_end) for section `title`.
+
+ - h_start: index of the section title line
+ - sec_start: index of the first content line after underline
+ - sec_end: index of the first line of the next section title (or end)
+ """
+ h_start = find_heading(lines, title)
+ if h_start is None:
+ return None
+
+ sec_start = h_start + 2
+
+ # Determine end of section either from next_title or by scanning.
+ if next_title is not None:
+ h_end = find_heading(lines, next_title)
+ if h_end is None:
+ # Scan forward to the next heading-like underline.
+ h_end = sec_start
+ while h_end + 1 < len(lines):
+ if lines[h_end].strip() and set(lines[h_end + 1].rstrip("\n"))
== {"^"}:
+ break
+ h_end += 1
+ sec_end = h_end
+ else:
+ # Scan to end or until a heading underline is found.
+ h_end = sec_start
+ while h_end + 1 < len(lines):
+ if lines[h_end].strip() and set(lines[h_end + 1].rstrip("\n")) ==
{"^"}:
+ break
+ h_end += 1
+ sec_end = h_end
+
+ return h_start, sec_start, sec_end
+
+
+def _normalize_release_notes_section(
+ lines: Sequence[str], title: str, next_title: Optional[str]
+) -> List[str]:
+ """Normalize a single release-notes section and return updated lines."""
+ bounds = _find_section_bounds(lines, title, next_title)
+ if bounds is None:
+ return list(lines)
+ _, sec_start, sec_end = bounds
+
+ prefix, blocks, suffix = parse_bullet_blocks(lines, sec_start, sec_end)
+ sorted_blocks = sort_blocks(blocks)
+
+ new_section: List[str] = []
+ new_section.extend(prefix)
+ for i_b, b in enumerate(sorted_blocks):
+ if i_b > 0 and (
+ not new_section or (new_section and new_section[-1].strip() != "")
+ ):
+ new_section.append("\n")
+ new_section.extend(b)
+ new_section.extend(suffix)
+
+ return list(lines[:sec_start]) + new_section + list(lines[sec_end:])
+
+
+def normalize_release_notes(lines: Sequence[str]) -> str:
+ sections = ["New checks", "New check aliases", "Changes in existing
checks"]
+
+ out = list(lines)
+
+ for idx in range(len(sections) - 1, -1, -1):
+ title = sections[idx]
+ next_title = sections[idx + 1] if idx + 1 < len(sections) else None
+ out = _normalize_release_notes_section(out, title, next_title)
+
+ return "".join(out)
+
+
+def _emit_duplicate_report(lines: Sequence[str], title: str) -> Optional[str]:
+ dups_detail = find_duplicate_entries(lines, title)
+ if not dups_detail:
+ return None
+ out: List[str] = []
+ out.append(f"Error: Duplicate entries in '{title}':\n")
+ for key, occs in dups_detail:
+ out.append(f"\n-- Duplicate: {key}\n")
+ for start_idx, block in occs:
+ out.append(f"- At line {start_idx + 1}:\n")
+ out.append("".join(block))
+ if not (block and block[-1].endswith("\n")):
+ out.append("\n")
+ return "".join(out)
+
+
+def process_release_notes(out_path: str, rn_doc: str) -> int:
+ lines = read_text(rn_doc)
+ normalized = normalize_release_notes(lines)
+ write_text(out_path, normalized)
+
+ # Prefer reporting ordering issues first; let diff fail the test.
+ if "".join(lines) != normalized:
+ sys.stderr.write(
+ "Note: 'ReleaseNotes.rst' is not normalized; Please fix ordering
first.\n"
+ )
+ return 0
+
+ # Ordering is clean then enforce duplicates.
+ report = _emit_duplicate_report(lines, "Changes in existing checks")
+ if report:
+ sys.stderr.write(report)
+ return 3
+ return 0
+
+
+def process_checks_list(out_path: str, list_doc: str) -> int:
+ lines = read_text(list_doc)
+ normalized = normalize_list_rst("".join(lines))
+ write_text(out_path, normalized)
+ return 0
+
+
+def main(argv: Sequence[str]) -> int:
+ ap = argparse.ArgumentParser()
+ ap.add_argument("-o", "--output", dest="out", default=None)
+ args = ap.parse_args(argv)
+
+ list_doc, rn_doc = (os.path.normpath(LIST_DOC),
os.path.normpath(RELEASE_NOTES_DOC))
+
+ if args.out:
+ out_path = args.out
+ out_lower = os.path.basename(out_path).lower()
+ if "release" in out_lower:
+ return process_release_notes(out_path, rn_doc)
+ else:
+ return process_checks_list(out_path, list_doc)
+
+ list_lines = read_text(list_doc)
+ rn_lines = read_text(rn_doc)
+ list_norm = normalize_list_rst("".join(list_lines))
+ rn_norm = normalize_release_notes(rn_lines)
+ if "".join(list_lines) != list_norm:
+ write_text(list_doc, list_norm)
+ if "".join(rn_lines) != rn_norm:
+ write_text(rn_doc, rn_norm)
+
+ report = _emit_duplicate_report(rn_lines, "Changes in existing checks")
+ if report:
+ sys.stderr.write(report)
+ return 3
+ return 0
----------------
vbvictor wrote:
```python
report = _emit_duplicate_report(rn_lines, "Changes in existing checks")
if report:
sys.stderr.write(report)
return 3
return 0
```
It is duplicated here and in `process_release_notes`.
https://github.com/llvm/llvm-project/pull/166072
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits