The patch ports the script to Python3. There's a small issue that
we have some ISO 8859 files, like:
libstdc++-v3/testsuite/22_locale/messages/members/char/2.cc
In that case we first try utf8 encoding and then iso8859 in Python's
open function.
I'm going to install the script if there are no objections.
Martin
contrib/ChangeLog:
* update-copyright.py: Port to python3 by guessing encoding
(first utf8, then iso8859). Add 2 more ignores: .png and .pyc.
---
contrib/update-copyright.py | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/contrib/update-copyright.py b/contrib/update-copyright.py
index bc65208d9cb..5603b8eac16 100755
--- a/contrib/update-copyright.py
+++ b/contrib/update-copyright.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
#
# Copyright (C) 2013-2020 Free Software Foundation, Inc.
#
@@ -64,7 +64,10 @@ class GenericFilter:
def __init__ (self):
self.skip_files = set()
self.skip_dirs = set()
- self.skip_extensions = set()
+ self.skip_extensions = set([
+ '.png',
+ '.pyc',
+ ])
self.fossilised_files = set()
self.own_files = set()
@@ -307,7 +310,7 @@ class Copyright:
# If it looks like the copyright is incomplete, add the next line.
while not self.is_complete (match):
try:
- next_line = file.next()
+ next_line = file.readline()
except StopIteration:
break
@@ -381,6 +384,15 @@ class Copyright:
return (line != orig_line, line, next_line)
+ def guess_encoding (self, pathname):
+ for encoding in ('utf8', 'iso8859'):
+ try:
+ open(pathname, 'r', encoding=encoding).read()
+ return encoding
+ except UnicodeDecodeError:
+ pass
+ return None
+
def process_file (self, dir, filename, filter):
pathname = os.path.join (dir, filename)
if filename.endswith ('.tmp'):
@@ -395,7 +407,8 @@ class Copyright:
changed = False
line_filter = filter.get_line_filter (dir, filename)
mode = None
- with open (pathname, 'r') as file:
+ encoding = self.guess_encoding(pathname)
+ with open (pathname, 'r', encoding=encoding) as file:
prev = None
mode = os.fstat (file.fileno()).st_mode
for line in file:
@@ -421,7 +434,7 @@ class Copyright:
# If something changed, write the new file out.
if changed and self.errors.ok():
tmp_pathname = pathname + '.tmp'
- with open (tmp_pathname, 'w') as file:
+ with open (tmp_pathname, 'w', encoding=encoding) as file:
for line in lines:
file.write (line)
os.fchmod (file.fileno(), mode)
@@ -432,7 +445,7 @@ class Copyright:
def process_tree (self, tree, filter):
for (dir, subdirs, filenames) in os.walk (tree):
# Don't recurse through directories that should be skipped.
- for i in xrange (len (subdirs) - 1, -1, -1):
+ for i in range (len (subdirs) - 1, -1, -1):
if filter.skip_dir (dir, subdirs[i]):
del subdirs[i]
--
2.29.2