[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-07-26 Thread Andy Yankovsky via Phabricator via cfe-commits
werat created this revision.
Herald added a subscriber: xazax.hun.
werat requested review of this revision.
Herald added a project: clang-tools-extra.
Herald added a subscriber: cfe-commits.

The encoding used for opening files depends on the OS and might be different
from UTF-8 (e.g. on Windows it can be CP-1252). The documentation files use
UTF-8 and might be incompatible with other encodings. For example, right now
`clang-tools-extra/docs/clang-tidy/checks/abseil-no-internal-dependencies.rst`
has non-ASCII quotes and running `add_new_check.py` fails on Windows, because
it tries to read the file with incompatible encoding.

Use `io.open` for compatibility with both Python 2 and Python 3.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106792

Files:
  clang-tools-extra/clang-tidy/add_new_check.py
  clang-tools-extra/clang-tidy/rename_check.py


Index: clang-tools-extra/clang-tidy/rename_check.py
===
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,9 +10,15 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
+# The documentation files are using UTF-8, however on Windows the default
+# encoding might be different (e.g. CP-1252). Force UTF-8 for all files.
+def open(*args, **kwargs):
+  kwargs.setdefault("encoding", "utf8")
+  return io.open(*args, **kwargs)
 
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
Index: clang-tools-extra/clang-tidy/add_new_check.py
===
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,10 +11,16 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
+# The documentation files are using UTF-8, however on Windows the default
+# encoding might be different (e.g. CP-1252). Force UTF-8 for all files.
+def open(*args, **kwargs):
+  kwargs.setdefault("encoding", "utf8")
+  return io.open(*args, **kwargs)
 
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.


Index: clang-tools-extra/clang-tidy/rename_check.py
===
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,9 +10,15 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
+# The documentation files are using UTF-8, however on Windows the default
+# encoding might be different (e.g. CP-1252). Force UTF-8 for all files.
+def open(*args, **kwargs):
+  kwargs.setdefault("encoding", "utf8")
+  return io.open(*args, **kwargs)
 
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
Index: clang-tools-extra/clang-tidy/add_new_check.py
===
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,10 +11,16 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
+# The documentation files are using UTF-8, however on Windows the default
+# encoding might be different (e.g. CP-1252). Force UTF-8 for all files.
+def open(*args, **kwargs):
+  kwargs.setdefault("encoding", "utf8")
+  return io.open(*args, **kwargs)
 
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-07-29 Thread Andy Yankovsky via Phabricator via cfe-commits
werat added a comment.

> Hi! Thanks for the patch, it makes sense to me! Honestly, I think having the 
> open shadow is maybe not the best way to solve this (please feel free to let 
> me know if there are reasons it would be better), maybe spell it out 
> explicitly (there aren't that many cases of open)?

It seemed a bit tedious to spell it out explicitly everywhere, but you're 
right, not a script of this size it's not a problem. The benefit of a global 
shadow is that nobody will use regular `open` accidentally, but I think the 
risk of that is relatively low.

> Also, hm... Do we really need these? It's safe to remove those, this 
> shouldn't be a problem, I think.

I don't think using non-ASCII quotes was intentional and we can definitely 
remove them. But future-proofing this script will avoid this problem in the 
future.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-07-29 Thread Andy Yankovsky via Phabricator via cfe-commits
werat updated this revision to Diff 362677.
werat added a comment.

Use `io.open` explicitly.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

Files:
  clang-tools-extra/clang-tidy/add_new_check.py
  clang-tools-extra/clang-tidy/rename_check.py

Index: clang-tools-extra/clang-tidy/rename_check.py
===
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,20 +10,25 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
+# The documentation files are encoded using UTF-8, however on Windows the
+# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+# always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+# writing files.
 
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
 return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   txt = re.sub(sFrom, sTo, txt)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -31,7 +36,7 @@
   if sFrom == sTo:
 return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   if sFrom not in txt:
@@ -39,7 +44,7 @@
 
   txt = txt.replace(sFrom, sTo)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -70,7 +75,7 @@
 
 def deleteMatchingLines(fileName, pattern):
   lines = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   not_matching_lines = [l for l in lines if not re.search(pattern, l)]
@@ -79,7 +84,7 @@
 
   print("Removing lines matching '%s' in '%s'..." % (pattern, fileName))
   print('  ' + '  '.join([l for l in lines if re.search(pattern, l)]))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.writelines(not_matching_lines)
 
   return True
@@ -101,7 +106,7 @@
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   cpp_file = check_name_camel + '.cpp'
@@ -112,7 +117,7 @@
   return False
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 cpp_found = False
 file_added = False
 for line in lines:
@@ -130,11 +135,11 @@
 def adapt_module(module_path, module, check_name, check_name_camel):
   modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))
   filename = os.path.join(module_path, modulecpp)
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 header_added = False
 header_found = False
 check_added = False
@@ -169,7 +174,7 @@
 def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
   filename = os.path.normpath(os.path.join(clang_tidy_path,
'../docs/ReleaseNotes.rst'))
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   lineMatcher = re.compile('Renamed checks')
@@ -177,7 +182,7 @@
   checkMatcher = re.compile('- The \'(.*)')
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 note_added = False
 header_found = False
 add_note_here = False
Index: clang-tools-extra/clang-tidy/add_new_check.py
===
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,16 +11,21 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
+# The documentation files are encoded using UTF-8, however on Windows the
+# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+# always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+# writing files
 
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f

[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-07-29 Thread Andy Yankovsky via Phabricator via cfe-commits
werat updated this revision to Diff 362678.
werat added a comment.

Fix incorrect replace


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

Files:
  clang-tools-extra/clang-tidy/add_new_check.py
  clang-tools-extra/clang-tidy/rename_check.py

Index: clang-tools-extra/clang-tidy/rename_check.py
===
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,20 +10,25 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
+# The documentation files are encoded using UTF-8, however on Windows the
+# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+# always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+# writing files.
 
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
 return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   txt = re.sub(sFrom, sTo, txt)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -31,7 +36,7 @@
   if sFrom == sTo:
 return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   if sFrom not in txt:
@@ -39,7 +44,7 @@
 
   txt = txt.replace(sFrom, sTo)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -70,7 +75,7 @@
 
 def deleteMatchingLines(fileName, pattern):
   lines = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   not_matching_lines = [l for l in lines if not re.search(pattern, l)]
@@ -79,7 +84,7 @@
 
   print("Removing lines matching '%s' in '%s'..." % (pattern, fileName))
   print('  ' + '  '.join([l for l in lines if re.search(pattern, l)]))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.writelines(not_matching_lines)
 
   return True
@@ -101,7 +106,7 @@
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   cpp_file = check_name_camel + '.cpp'
@@ -112,7 +117,7 @@
   return False
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 cpp_found = False
 file_added = False
 for line in lines:
@@ -130,11 +135,11 @@
 def adapt_module(module_path, module, check_name, check_name_camel):
   modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))
   filename = os.path.join(module_path, modulecpp)
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 header_added = False
 header_found = False
 check_added = False
@@ -169,7 +174,7 @@
 def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
   filename = os.path.normpath(os.path.join(clang_tidy_path,
'../docs/ReleaseNotes.rst'))
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   lineMatcher = re.compile('Renamed checks')
@@ -177,7 +182,7 @@
   checkMatcher = re.compile('- The \'(.*)')
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 note_added = False
 header_found = False
 add_note_here = False
Index: clang-tools-extra/clang-tidy/add_new_check.py
===
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,16 +11,21 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
+# The documentation files are encoded using UTF-8, however on Windows the
+# default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+# always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+# writing files
 
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+ 

[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-07-29 Thread Andy Yankovsky via Phabricator via cfe-commits
werat added a comment.

In D106792#2912955 , @kbobyrev wrote:

> Honestly, I don't really think we should have more Unicode symbols in the 
> docs for generators

I'm not very familiar with the documentation build process, but maybe we could 
enforce only-ASCII there? Then all scripts and tools can just assume it will 
work with plan `open`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-08-02 Thread Andy Yankovsky via Phabricator via cfe-commits
werat updated this revision to Diff 363413.
werat added a comment.

Moved the comment to the first usage of `io.open`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

Files:
  clang-tools-extra/clang-tidy/add_new_check.py
  clang-tools-extra/clang-tidy/rename_check.py

Index: clang-tools-extra/clang-tidy/rename_check.py
===
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,20 +10,25 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
-
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
 return
+
+  # The documentation files are encoded using UTF-8, however on Windows the
+  # default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+  # always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+  # writing files here and elsewhere.
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   txt = re.sub(sFrom, sTo, txt)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -31,7 +36,7 @@
   if sFrom == sTo:
 return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   if sFrom not in txt:
@@ -39,7 +44,7 @@
 
   txt = txt.replace(sFrom, sTo)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -70,7 +75,7 @@
 
 def deleteMatchingLines(fileName, pattern):
   lines = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   not_matching_lines = [l for l in lines if not re.search(pattern, l)]
@@ -79,7 +84,7 @@
 
   print("Removing lines matching '%s' in '%s'..." % (pattern, fileName))
   print('  ' + '  '.join([l for l in lines if re.search(pattern, l)]))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.writelines(not_matching_lines)
 
   return True
@@ -101,7 +106,7 @@
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   cpp_file = check_name_camel + '.cpp'
@@ -112,7 +117,7 @@
   return False
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 cpp_found = False
 file_added = False
 for line in lines:
@@ -130,11 +135,11 @@
 def adapt_module(module_path, module, check_name, check_name_camel):
   modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))
   filename = os.path.join(module_path, modulecpp)
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 header_added = False
 header_found = False
 check_added = False
@@ -169,7 +174,7 @@
 def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
   filename = os.path.normpath(os.path.join(clang_tidy_path,
'../docs/ReleaseNotes.rst'))
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   lineMatcher = re.compile('Renamed checks')
@@ -177,7 +182,7 @@
   checkMatcher = re.compile('- The \'(.*)')
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 note_added = False
 header_found = False
 add_note_here = False
Index: clang-tools-extra/clang-tidy/add_new_check.py
===
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,16 +11,21 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
-
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+
+  # The documentation files are encoded using UTF-8, however on Windows the
+  # default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+  # always used, use `io.open(fil

[PATCH] D106792: [clang-tidy] Always open files using UTF-8 encoding

2021-08-02 Thread Andy Yankovsky via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG307b1fddd4d8: [clang-tidy] Always open files using UTF-8 
encoding (authored by werat).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106792/new/

https://reviews.llvm.org/D106792

Files:
  clang-tools-extra/clang-tidy/add_new_check.py
  clang-tools-extra/clang-tidy/rename_check.py

Index: clang-tools-extra/clang-tidy/rename_check.py
===
--- clang-tools-extra/clang-tidy/rename_check.py
+++ clang-tools-extra/clang-tidy/rename_check.py
@@ -10,20 +10,25 @@
 
 import argparse
 import glob
+import io
 import os
 import re
 
-
 def replaceInFileRegex(fileName, sFrom, sTo):
   if sFrom == sTo:
 return
+
+  # The documentation files are encoded using UTF-8, however on Windows the
+  # default encoding might be different (e.g. CP-1252). To make sure UTF-8 is
+  # always used, use `io.open(filename, mode, encoding='utf8')` for reading and
+  # writing files here and elsewhere.
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   txt = re.sub(sFrom, sTo, txt)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -31,7 +36,7 @@
   if sFrom == sTo:
 return
   txt = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 txt = f.read()
 
   if sFrom not in txt:
@@ -39,7 +44,7 @@
 
   txt = txt.replace(sFrom, sTo)
   print("Replacing '%s' -> '%s' in '%s'..." % (sFrom, sTo, fileName))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.write(txt)
 
 
@@ -70,7 +75,7 @@
 
 def deleteMatchingLines(fileName, pattern):
   lines = None
-  with open(fileName, "r") as f:
+  with io.open(fileName, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   not_matching_lines = [l for l in lines if not re.search(pattern, l)]
@@ -79,7 +84,7 @@
 
   print("Removing lines matching '%s' in '%s'..." % (pattern, fileName))
   print('  ' + '  '.join([l for l in lines if re.search(pattern, l)]))
-  with open(fileName, "w") as f:
+  with io.open(fileName, 'w', encoding='utf8') as f:
 f.writelines(not_matching_lines)
 
   return True
@@ -101,7 +106,7 @@
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   cpp_file = check_name_camel + '.cpp'
@@ -112,7 +117,7 @@
   return False
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 cpp_found = False
 file_added = False
 for line in lines:
@@ -130,11 +135,11 @@
 def adapt_module(module_path, module, check_name, check_name_camel):
   modulecpp = next(filter(lambda p: p.lower() == module.lower() + 'tidymodule.cpp', os.listdir(module_path)))
   filename = os.path.join(module_path, modulecpp)
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 header_added = False
 header_found = False
 check_added = False
@@ -169,7 +174,7 @@
 def add_release_notes(clang_tidy_path, old_check_name, new_check_name):
   filename = os.path.normpath(os.path.join(clang_tidy_path,
'../docs/ReleaseNotes.rst'))
-  with open(filename, 'r') as f:
+  with io.open(filename, 'r', encoding='utf8') as f:
 lines = f.readlines()
 
   lineMatcher = re.compile('Renamed checks')
@@ -177,7 +182,7 @@
   checkMatcher = re.compile('- The \'(.*)')
 
   print('Updating %s...' % filename)
-  with open(filename, 'wb') as f:
+  with io.open(filename, 'wb', encoding='utf8') as f:
 note_added = False
 header_found = False
 add_note_here = False
Index: clang-tools-extra/clang-tidy/add_new_check.py
===
--- clang-tools-extra/clang-tidy/add_new_check.py
+++ clang-tools-extra/clang-tidy/add_new_check.py
@@ -11,16 +11,21 @@
 from __future__ import print_function
 
 import argparse
+import io
 import os
 import re
 import sys
 
-
 # Adapts the module's CMakelist file. Returns 'True' if it could add a new
 # entry and 'False' if the entry already existed.
 def adapt_cmake(module_path, check_name_camel):
   filename = os.path.join(module_path, 'CMakeLists.txt')
-  with open(filename, 'r') as f:
+
+  # The documentation files are encoded using UTF-8, however on Windows