commit:     4f5f6f571e52af6d2703db760bad4e0ad7439d5a
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Tue Oct 10 01:36:02 2023 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Wed Oct 11 19:05:02 2023 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=4f5f6f57

Support Python UTF-8 Mode via portage.utf8_mode (bug 914722)

When a UTF-8 locale, or UTF-8 mode is detected, set
portage.utf8_mode to True, and do not wrap file access
with _unicode_func_wrapper. This is intended to mitgate
issues with byte string handling in python libraries
like shutil, as reported in bug 914722.

This patch is intended to be a simple and minimal
implementation that can be optimized later through
the elimination of unecessary encoding/decoding.

The str() wrapping in the unit tests is for lazily
evaluated instances of lazy_value, which is used to
account for mock portage.const.EPREFIX values that
exist during unit tests.

Bug: https://bugs.gentoo.org/914722
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>

 NEWS                                               |  4 +++
 lib/portage/__init__.py                            |  4 +++
 lib/portage/_sets/__init__.py                      | 10 ++++--
 lib/portage/dbapi/vartree.py                       | 11 ++++--
 lib/portage/gpkg.py                                | 10 +++++-
 lib/portage/package/ebuild/doebuild.py             |  4 +++
 lib/portage/tests/dbapi/test_portdb_cache.py       |  2 +-
 lib/portage/tests/ebuild/test_fetch.py             |  2 +-
 lib/portage/tests/emerge/test_config_protect.py    | 14 +++++---
 .../emerge/test_emerge_blocker_file_collision.py   |  7 +++-
 lib/portage/tests/emerge/test_emerge_slot_abi.py   | 14 ++++++--
 lib/portage/tests/emerge/test_simple.py            | 40 +++++++++++++++-------
 lib/portage/tests/resolver/ResolverPlayground.py   |  4 +--
 lib/portage/tests/sync/test_sync_local.py          |  2 +-
 lib/portage/tests/util/test_getconfig.py           |  2 +-
 lib/portage/xpak.py                                |  5 +++
 16 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/NEWS b/NEWS
index e5236d5e1b..e437f481f6 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,10 @@ Bug fixes:
 
 * make.conf(5): Update MAKEOPTS guidance to reflect modern practice (bug 
#821529).
 
+* For locales that have UTF-8 filesystem encoding, use unicode strings rather
+  than raw bytes for file paths in calls to python modules like os and shutil
+  (bug #914722).
+
 portage-3.0.52 (2023-10-03)
 --------------
 

diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
index 05c81be538..defefcb286 100644
--- a/lib/portage/__init__.py
+++ b/lib/portage/__init__.py
@@ -186,6 +186,7 @@ except ImportError as e:
     sys.stderr.write(f"    {e}\n\n")
     raise
 
+utf8_mode = sys.getfilesystemencoding() == "utf-8"
 
 # We use utf_8 encoding everywhere. Previously, we used
 # sys.getfilesystemencoding() for the 'merge' encoding, but that had
@@ -319,6 +320,9 @@ class _unicode_module_wrapper:
         object.__setattr__(self, "_cache", cache)
 
     def __getattribute__(self, attr):
+        if utf8_mode:
+            return getattr(object.__getattribute__(self, "_mod"), attr)
+
         cache = object.__getattribute__(self, "_cache")
         if cache is not None:
             result = cache.get(attr)

diff --git a/lib/portage/_sets/__init__.py b/lib/portage/_sets/__init__.py
index e3fd77fa26..295a1e3533 100644
--- a/lib/portage/_sets/__init__.py
+++ b/lib/portage/_sets/__init__.py
@@ -359,16 +359,22 @@ def load_default_config(settings, trees):
             os.path.join(settings["PORTAGE_CONFIGROOT"], USER_CONFIG_PATH, 
"sets.conf"),
         ]
 
+        dot = "."
+        tilde = "~"
+        if not portage.utf8_mode:
+            dot = _unicode_encode(dot)
+            tilde = _unicode_encode(tilde)
+
         for sets_config_path in sets_config_paths:
             if os.path.isdir(sets_config_path):
                 for path, dirs, files in os.walk(sets_config_path):
                     dirs.sort()
                     files.sort()
                     for d in dirs:
-                        if d in vcs_dirs or d.startswith(b".") or 
d.endswith(b"~"):
+                        if d in vcs_dirs or d.startswith(dot) or 
d.endswith(tilde):
                             dirs.remove(d)
                     for f in files:
-                        if not f.startswith(b".") and not f.endswith(b"~"):
+                        if not f.startswith(dot) and not f.endswith(tilde):
                             yield os.path.join(path, f)
             elif os.path.isfile(sets_config_path):
                 yield sets_config_path

diff --git a/lib/portage/dbapi/vartree.py b/lib/portage/dbapi/vartree.py
index 3f39e2b787..835cbb8092 100644
--- a/lib/portage/dbapi/vartree.py
+++ b/lib/portage/dbapi/vartree.py
@@ -4493,6 +4493,10 @@ class dblink:
                     eagain_error = True
                     break
 
+                if portage.utf8_mode:
+                    parent = os.fsencode(parent)
+                    dirs = [os.fsencode(value) for value in dirs]
+                    files = [os.fsencode(value) for value in files]
                 try:
                     parent = _unicode_decode(
                         parent, encoding=_encodings["merge"], errors="strict"
@@ -5280,9 +5284,12 @@ class dblink:
         # Use atomic_ofstream for automatic coercion of raw bytes to
         # unicode, in order to prevent TypeError when writing raw bytes
         # to TextIOWrapper with python2.
+        contents_tmp_path = os.path.join(self.dbtmpdir, "CONTENTS")
         outfile = atomic_ofstream(
-            _unicode_encode(
-                os.path.join(self.dbtmpdir, "CONTENTS"),
+            contents_tmp_path
+            if portage.utf8_mode
+            else _unicode_encode(
+                contents_tmp_path,
                 encoding=_encodings["fs"],
                 errors="strict",
             ),

diff --git a/lib/portage/gpkg.py b/lib/portage/gpkg.py
index d16dffbc27..c56076ab91 100644
--- a/lib/portage/gpkg.py
+++ b/lib/portage/gpkg.py
@@ -14,6 +14,7 @@ import tempfile
 from copy import copy
 from datetime import datetime
 
+import portage
 from portage import checksum
 from portage import os
 from portage import shutil
@@ -1861,6 +1862,11 @@ class gpkg:
         image_total_size = 0
 
         for parent, dirs, files in os.walk(root_dir):
+            if portage.utf8_mode:
+                parent = os.fsencode(parent)
+                dirs = [os.fsencode(value) for value in dirs]
+                files = [os.fsencode(value) for value in files]
+
             parent = _unicode_decode(parent, encoding=_encodings["fs"], 
errors="strict")
             for d in dirs:
                 try:
@@ -1911,7 +1917,9 @@ class gpkg:
                 if os.path.islink(f):
                     path_link = os.readlink(f)
                     path_link_length = len(
-                        _unicode_encode(
+                        os.fsencode(path_link)
+                        if portage.utf8_mode
+                        else _unicode_encode(
                             path_link, encoding=_encodings["fs"], 
errors="strict"
                         )
                     )

diff --git a/lib/portage/package/ebuild/doebuild.py 
b/lib/portage/package/ebuild/doebuild.py
index 7e95a07c01..5780c2b0b3 100644
--- a/lib/portage/package/ebuild/doebuild.py
+++ b/lib/portage/package/ebuild/doebuild.py
@@ -2669,6 +2669,10 @@ def _post_src_install_uid_fix(mysettings, out):
         desktopfile_errors = []
 
         for parent, dirs, files in os.walk(destdir):
+            if portage.utf8_mode:
+                parent = os.fsencode(parent)
+                dirs = [os.fsencode(value) for value in dirs]
+                files = [os.fsencode(value) for value in files]
             try:
                 parent = _unicode_decode(
                     parent, encoding=_encodings["merge"], errors="strict"

diff --git a/lib/portage/tests/dbapi/test_portdb_cache.py 
b/lib/portage/tests/dbapi/test_portdb_cache.py
index a55377b6bc..2f14b7bdf0 100644
--- a/lib/portage/tests/dbapi/test_portdb_cache.py
+++ b/lib/portage/tests/dbapi/test_portdb_cache.py
@@ -52,7 +52,7 @@ class PortdbCacheTestCase(TestCase):
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.bindir, "egencache"),
+            os.path.join(str(self.bindir), "egencache"),
             "--update-manifests",
             "--sign-manifests=n",
             "--repo",

diff --git a/lib/portage/tests/ebuild/test_fetch.py 
b/lib/portage/tests/ebuild/test_fetch.py
index 3be6ed9bdd..76dcdaf88c 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -278,7 +278,7 @@ class EbuildFetchTestCase(TestCase):
                 portage._python_interpreter,
                 "-b",
                 "-Wd",
-                os.path.join(self.bindir, "emirrordist"),
+                os.path.join(str(self.bindir), "emirrordist"),
                 "--distfiles",
                 settings["DISTDIR"],
                 "--config-root",

diff --git a/lib/portage/tests/emerge/test_config_protect.py 
b/lib/portage/tests/emerge/test_config_protect.py
index ec359833e6..560a49a769 100644
--- a/lib/portage/tests/emerge/test_config_protect.py
+++ b/lib/portage/tests/emerge/test_config_protect.py
@@ -112,10 +112,15 @@ src_install() {
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.sbindir, "dispatch-conf"),
+            os.path.join(str(self.sbindir), "dispatch-conf"),
         )
-        emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, 
"emerge"))
-        etc_update_cmd = (BASH_BINARY, os.path.join(self.sbindir, 
"etc-update"))
+        emerge_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.bindir), "emerge"),
+        )
+        etc_update_cmd = (BASH_BINARY, os.path.join(str(self.sbindir), 
"etc-update"))
         etc_update_auto = etc_update_cmd + (
             "--automode",
             "-5",
@@ -247,7 +252,8 @@ src_install() {
                 os.symlink(true_binary, os.path.join(fake_bin, x))
             for x in etc_symlinks:
                 os.symlink(
-                    os.path.join(self.cnf_etc_path, x), os.path.join(eprefix, 
"etc", x)
+                    os.path.join(str(self.cnf_etc_path), x),
+                    os.path.join(eprefix, "etc", x),
                 )
             with open(os.path.join(var_cache_edb, "counter"), "wb") as f:
                 f.write(b"100")

diff --git a/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py 
b/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py
index 6f7a96af99..b29a83fce8 100644
--- a/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py
+++ b/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py
@@ -50,7 +50,12 @@ src_install() {
         user_config_dir = os.path.join(eprefix, USER_CONFIG_PATH)
 
         portage_python = portage._python_interpreter
-        emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, 
"emerge"))
+        emerge_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.bindir), "emerge"),
+        )
 
         file_collision = os.path.join(eroot, "usr/lib/file-collision")
 

diff --git a/lib/portage/tests/emerge/test_emerge_slot_abi.py 
b/lib/portage/tests/emerge/test_emerge_slot_abi.py
index 197685975e..70a18b35c2 100644
--- a/lib/portage/tests/emerge/test_emerge_slot_abi.py
+++ b/lib/portage/tests/emerge/test_emerge_slot_abi.py
@@ -54,8 +54,18 @@ class SlotAbiEmergeTestCase(TestCase):
         package_mask_path = os.path.join(user_config_dir, "package.mask")
 
         portage_python = portage._python_interpreter
-        ebuild_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, 
"ebuild"))
-        emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, 
"emerge"))
+        ebuild_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.bindir), "ebuild"),
+        )
+        emerge_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.bindir), "emerge"),
+        )
 
         test_ebuild = portdb.findname("dev-libs/dbus-glib-0.98")
         self.assertFalse(test_ebuild is None)

diff --git a/lib/portage/tests/emerge/test_simple.py 
b/lib/portage/tests/emerge/test_simple.py
index ab85ad441c..1cc6457ef1 100644
--- a/lib/portage/tests/emerge/test_simple.py
+++ b/lib/portage/tests/emerge/test_simple.py
@@ -265,51 +265,66 @@ call_has_and_best_version() {
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.sbindir, "dispatch-conf"),
+            os.path.join(str(self.sbindir), "dispatch-conf"),
+        )
+        ebuild_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.bindir), "ebuild"),
         )
-        ebuild_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, 
"ebuild"))
         egencache_cmd = (
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.bindir, "egencache"),
+            os.path.join(str(self.bindir), "egencache"),
             "--repo",
             "test_repo",
             "--repositories-configuration",
             settings.repositories.config_string(),
         )
-        emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, 
"emerge"))
-        emaint_cmd = (portage_python, "-b", "-Wd", os.path.join(self.sbindir, 
"emaint"))
+        emerge_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.bindir), "emerge"),
+        )
+        emaint_cmd = (
+            portage_python,
+            "-b",
+            "-Wd",
+            os.path.join(str(self.sbindir), "emaint"),
+        )
         env_update_cmd = (
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.sbindir, "env-update"),
+            os.path.join(str(self.sbindir), "env-update"),
         )
-        etc_update_cmd = (BASH_BINARY, os.path.join(self.sbindir, 
"etc-update"))
+        etc_update_cmd = (BASH_BINARY, os.path.join(str(self.sbindir), 
"etc-update"))
         fixpackages_cmd = (
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.sbindir, "fixpackages"),
+            os.path.join(str(self.sbindir), "fixpackages"),
         )
         portageq_cmd = (
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.bindir, "portageq"),
+            os.path.join(str(self.bindir), "portageq"),
         )
         quickpkg_cmd = (
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.bindir, "quickpkg"),
+            os.path.join(str(self.bindir), "quickpkg"),
         )
         regenworld_cmd = (
             portage_python,
             "-b",
             "-Wd",
-            os.path.join(self.sbindir, "regenworld"),
+            os.path.join(str(self.sbindir), "regenworld"),
         )
 
         rm_binary = find_binary("rm")
@@ -663,7 +678,8 @@ call_has_and_best_version() {
                 os.symlink(true_binary, os.path.join(fake_bin, x))
             for x in etc_symlinks:
                 os.symlink(
-                    os.path.join(self.cnf_etc_path, x), os.path.join(eprefix, 
"etc", x)
+                    os.path.join(str(self.cnf_etc_path), x),
+                    os.path.join(eprefix, "etc", x),
                 )
             with open(os.path.join(var_cache_edb, "counter"), "wb") as f:
                 f.write(b"100")

diff --git a/lib/portage/tests/resolver/ResolverPlayground.py 
b/lib/portage/tests/resolver/ResolverPlayground.py
index f1cd844708..167e1e4608 100644
--- a/lib/portage/tests/resolver/ResolverPlayground.py
+++ b/lib/portage/tests/resolver/ResolverPlayground.py
@@ -633,7 +633,7 @@ class ResolverPlayground:
             self.eroot, GLOBAL_CONFIG_PATH.lstrip(os.sep), "make.globals"
         )
         ensure_dirs(os.path.dirname(make_globals_path))
-        os.symlink(os.path.join(cnf_path, "make.globals"), make_globals_path)
+        os.symlink(os.path.join(str(cnf_path), "make.globals"), 
make_globals_path)
 
         # Create /usr/share/portage/config/sets/portage.conf
         default_sets_conf_dir = os.path.join(
@@ -645,7 +645,7 @@ class ResolverPlayground:
         except os.error:
             pass
 
-        provided_sets_portage_conf = os.path.join(cnf_path, "sets", 
"portage.conf")
+        provided_sets_portage_conf = os.path.join(str(cnf_path), "sets", 
"portage.conf")
         os.symlink(
             provided_sets_portage_conf,
             os.path.join(default_sets_conf_dir, "portage.conf"),

diff --git a/lib/portage/tests/sync/test_sync_local.py 
b/lib/portage/tests/sync/test_sync_local.py
index a8a71cd4b2..339d37c250 100644
--- a/lib/portage/tests/sync/test_sync_local.py
+++ b/lib/portage/tests/sync/test_sync_local.py
@@ -83,7 +83,7 @@ class SyncLocalTestCase(TestCase):
         cmds = {}
         for cmd in ("emerge", "emaint"):
             for bindir in (self.bindir, self.sbindir):
-                path = os.path.join(bindir, cmd)
+                path = os.path.join(str(bindir), cmd)
                 if os.path.exists(path):
                     cmds[cmd] = (portage._python_interpreter, "-b", "-Wd", 
path)
                     break

diff --git a/lib/portage/tests/util/test_getconfig.py 
b/lib/portage/tests/util/test_getconfig.py
index ae79b21057..9a2af43e4b 100644
--- a/lib/portage/tests/util/test_getconfig.py
+++ b/lib/portage/tests/util/test_getconfig.py
@@ -26,7 +26,7 @@ class GetConfigTestCase(TestCase):
     }
 
     def testGetConfig(self):
-        make_globals_file = os.path.join(self.cnf_path, "make.globals")
+        make_globals_file = os.path.join(str(self.cnf_path), "make.globals")
         d = getconfig(make_globals_file)
         for k, v in self._cases.items():
             self.assertEqual(d[k], v)

diff --git a/lib/portage/xpak.py b/lib/portage/xpak.py
index 9eedf2ecdb..9762ed7909 100644
--- a/lib/portage/xpak.py
+++ b/lib/portage/xpak.py
@@ -104,6 +104,11 @@ def xpak(rootdir, outfile=None):
     and under the name 'outfile' if it is specified. Otherwise it returns the
     xpak segment."""
 
+    if portage.utf8_mode and not isinstance(rootdir, bytes):
+        # Since paths are encoded below, rootdir must also be encoded
+        # when _unicode_func_wrapper is not used.
+        rootdir = os.fsencode(rootdir)
+
     mylist = []
 
     addtolist(mylist, rootdir)

Reply via email to