The patch ports the script to Python3. There's a small issue that
we have some ISO 8859 files, like:

libstdc++-v3/testsuite/22_locale/messages/members/char/2.cc

In that case we first try utf8 encoding and then iso8859 in Python's
open function.

I'm going to install the script if there are no objections.
Martin

contrib/ChangeLog:

        * update-copyright.py: Port to python3 by guessing encoding
        (first utf8, then iso8859). Add 2 more ignores: .png and .pyc.
---
 contrib/update-copyright.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/contrib/update-copyright.py b/contrib/update-copyright.py
index bc65208d9cb..5603b8eac16 100755
--- a/contrib/update-copyright.py
+++ b/contrib/update-copyright.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 #
 # Copyright (C) 2013-2020 Free Software Foundation, Inc.
 #
@@ -64,7 +64,10 @@ class GenericFilter:
     def __init__ (self):
         self.skip_files = set()
         self.skip_dirs = set()
-        self.skip_extensions = set()
+        self.skip_extensions = set([
+                '.png',
+                '.pyc',
+                ])
         self.fossilised_files = set()
         self.own_files = set()
@@ -307,7 +310,7 @@ class Copyright:
             # If it looks like the copyright is incomplete, add the next line.
             while not self.is_complete (match):
                 try:
-                    next_line = file.next()
+                    next_line = file.readline()
                 except StopIteration:
                     break
@@ -381,6 +384,15 @@ class Copyright: return (line != orig_line, line, next_line) + def guess_encoding (self, pathname):
+        for encoding in ('utf8', 'iso8859'):
+            try:
+                open(pathname, 'r', encoding=encoding).read()
+                return encoding
+            except UnicodeDecodeError:
+                pass
+        return None
+
     def process_file (self, dir, filename, filter):
         pathname = os.path.join (dir, filename)
         if filename.endswith ('.tmp'):
@@ -395,7 +407,8 @@ class Copyright:
         changed = False
         line_filter = filter.get_line_filter (dir, filename)
         mode = None
-        with open (pathname, 'r') as file:
+        encoding = self.guess_encoding(pathname)
+        with open (pathname, 'r', encoding=encoding) as file:
             prev = None
             mode = os.fstat (file.fileno()).st_mode
             for line in file:
@@ -421,7 +434,7 @@ class Copyright:
         # If something changed, write the new file out.
         if changed and self.errors.ok():
             tmp_pathname = pathname + '.tmp'
-            with open (tmp_pathname, 'w') as file:
+            with open (tmp_pathname, 'w', encoding=encoding) as file:
                 for line in lines:
                     file.write (line)
                 os.fchmod (file.fileno(), mode)
@@ -432,7 +445,7 @@ class Copyright:
     def process_tree (self, tree, filter):
         for (dir, subdirs, filenames) in os.walk (tree):
             # Don't recurse through directories that should be skipped.
-            for i in xrange (len (subdirs) - 1, -1, -1):
+            for i in range (len (subdirs) - 1, -1, -1):
                 if filter.skip_dir (dir, subdirs[i]):
                     del subdirs[i]
--
2.29.2

Reply via email to