https://github.com/c8ef created https://github.com/llvm/llvm-project/pull/121341
This will prevent the error on systems with a default encoding other than utf-8. ``` UnicodeDecodeError: 'gbk' codec can't decode byte 0xb6 in position 12958: illegal multibyte sequence ``` >From 33db3086f1f822aa1f4bbacb028d5fa31cb3e3be Mon Sep 17 00:00:00 2001 From: c8ef <c...@outlook.com> Date: Mon, 30 Dec 2024 22:27:26 +0800 Subject: [PATCH] open with utf-8 --- clang/tools/include-mapping/cppreference_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/tools/include-mapping/cppreference_parser.py b/clang/tools/include-mapping/cppreference_parser.py index 9101f3dbff0f94..f7da2ba8bb6d84 100644 --- a/clang/tools/include-mapping/cppreference_parser.py +++ b/clang/tools/include-mapping/cppreference_parser.py @@ -139,7 +139,7 @@ def _ParseIndexPage(index_page_html): def _ReadSymbolPage(path, name, qual_name): - with open(path) as f: + with open(path, encoding="utf-8") as f: return _ParseSymbolPage(f.read(), name, qual_name) @@ -156,7 +156,7 @@ def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept): # contains the defined header. # 2. Parse the symbol page to get the defined header. index_page_path = os.path.join(root_dir, index_page_name) - with open(index_page_path, "r") as f: + with open(index_page_path, "r", encoding="utf-8") as f: # Read each symbol page in parallel. results = [] # (symbol_name, promise of [header...]) for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()): _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits