commit: 4f5f6f571e52af6d2703db760bad4e0ad7439d5a Author: Zac Medico <zmedico <AT> gentoo <DOT> org> AuthorDate: Tue Oct 10 01:36:02 2023 +0000 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> CommitDate: Wed Oct 11 19:05:02 2023 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=4f5f6f57
Support Python UTF-8 Mode via portage.utf8_mode (bug 914722) When a UTF-8 locale, or UTF-8 mode is detected, set portage.utf8_mode to True, and do not wrap file access with _unicode_func_wrapper. This is intended to mitgate issues with byte string handling in python libraries like shutil, as reported in bug 914722. This patch is intended to be a simple and minimal implementation that can be optimized later through the elimination of unecessary encoding/decoding. The str() wrapping in the unit tests is for lazily evaluated instances of lazy_value, which is used to account for mock portage.const.EPREFIX values that exist during unit tests. Bug: https://bugs.gentoo.org/914722 Signed-off-by: Zac Medico <zmedico <AT> gentoo.org> NEWS | 4 +++ lib/portage/__init__.py | 4 +++ lib/portage/_sets/__init__.py | 10 ++++-- lib/portage/dbapi/vartree.py | 11 ++++-- lib/portage/gpkg.py | 10 +++++- lib/portage/package/ebuild/doebuild.py | 4 +++ lib/portage/tests/dbapi/test_portdb_cache.py | 2 +- lib/portage/tests/ebuild/test_fetch.py | 2 +- lib/portage/tests/emerge/test_config_protect.py | 14 +++++--- .../emerge/test_emerge_blocker_file_collision.py | 7 +++- lib/portage/tests/emerge/test_emerge_slot_abi.py | 14 ++++++-- lib/portage/tests/emerge/test_simple.py | 40 +++++++++++++++------- lib/portage/tests/resolver/ResolverPlayground.py | 4 +-- lib/portage/tests/sync/test_sync_local.py | 2 +- lib/portage/tests/util/test_getconfig.py | 2 +- lib/portage/xpak.py | 5 +++ 16 files changed, 105 insertions(+), 30 deletions(-) diff --git a/NEWS b/NEWS index e5236d5e1b..e437f481f6 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,10 @@ Bug fixes: * make.conf(5): Update MAKEOPTS guidance to reflect modern practice (bug #821529). +* For locales that have UTF-8 filesystem encoding, use unicode strings rather + than raw bytes for file paths in calls to python modules like os and shutil + (bug #914722). + portage-3.0.52 (2023-10-03) -------------- diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py index 05c81be538..defefcb286 100644 --- a/lib/portage/__init__.py +++ b/lib/portage/__init__.py @@ -186,6 +186,7 @@ except ImportError as e: sys.stderr.write(f" {e}\n\n") raise +utf8_mode = sys.getfilesystemencoding() == "utf-8" # We use utf_8 encoding everywhere. Previously, we used # sys.getfilesystemencoding() for the 'merge' encoding, but that had @@ -319,6 +320,9 @@ class _unicode_module_wrapper: object.__setattr__(self, "_cache", cache) def __getattribute__(self, attr): + if utf8_mode: + return getattr(object.__getattribute__(self, "_mod"), attr) + cache = object.__getattribute__(self, "_cache") if cache is not None: result = cache.get(attr) diff --git a/lib/portage/_sets/__init__.py b/lib/portage/_sets/__init__.py index e3fd77fa26..295a1e3533 100644 --- a/lib/portage/_sets/__init__.py +++ b/lib/portage/_sets/__init__.py @@ -359,16 +359,22 @@ def load_default_config(settings, trees): os.path.join(settings["PORTAGE_CONFIGROOT"], USER_CONFIG_PATH, "sets.conf"), ] + dot = "." + tilde = "~" + if not portage.utf8_mode: + dot = _unicode_encode(dot) + tilde = _unicode_encode(tilde) + for sets_config_path in sets_config_paths: if os.path.isdir(sets_config_path): for path, dirs, files in os.walk(sets_config_path): dirs.sort() files.sort() for d in dirs: - if d in vcs_dirs or d.startswith(b".") or d.endswith(b"~"): + if d in vcs_dirs or d.startswith(dot) or d.endswith(tilde): dirs.remove(d) for f in files: - if not f.startswith(b".") and not f.endswith(b"~"): + if not f.startswith(dot) and not f.endswith(tilde): yield os.path.join(path, f) elif os.path.isfile(sets_config_path): yield sets_config_path diff --git a/lib/portage/dbapi/vartree.py b/lib/portage/dbapi/vartree.py index 3f39e2b787..835cbb8092 100644 --- a/lib/portage/dbapi/vartree.py +++ b/lib/portage/dbapi/vartree.py @@ -4493,6 +4493,10 @@ class dblink: eagain_error = True break + if portage.utf8_mode: + parent = os.fsencode(parent) + dirs = [os.fsencode(value) for value in dirs] + files = [os.fsencode(value) for value in files] try: parent = _unicode_decode( parent, encoding=_encodings["merge"], errors="strict" @@ -5280,9 +5284,12 @@ class dblink: # Use atomic_ofstream for automatic coercion of raw bytes to # unicode, in order to prevent TypeError when writing raw bytes # to TextIOWrapper with python2. + contents_tmp_path = os.path.join(self.dbtmpdir, "CONTENTS") outfile = atomic_ofstream( - _unicode_encode( - os.path.join(self.dbtmpdir, "CONTENTS"), + contents_tmp_path + if portage.utf8_mode + else _unicode_encode( + contents_tmp_path, encoding=_encodings["fs"], errors="strict", ), diff --git a/lib/portage/gpkg.py b/lib/portage/gpkg.py index d16dffbc27..c56076ab91 100644 --- a/lib/portage/gpkg.py +++ b/lib/portage/gpkg.py @@ -14,6 +14,7 @@ import tempfile from copy import copy from datetime import datetime +import portage from portage import checksum from portage import os from portage import shutil @@ -1861,6 +1862,11 @@ class gpkg: image_total_size = 0 for parent, dirs, files in os.walk(root_dir): + if portage.utf8_mode: + parent = os.fsencode(parent) + dirs = [os.fsencode(value) for value in dirs] + files = [os.fsencode(value) for value in files] + parent = _unicode_decode(parent, encoding=_encodings["fs"], errors="strict") for d in dirs: try: @@ -1911,7 +1917,9 @@ class gpkg: if os.path.islink(f): path_link = os.readlink(f) path_link_length = len( - _unicode_encode( + os.fsencode(path_link) + if portage.utf8_mode + else _unicode_encode( path_link, encoding=_encodings["fs"], errors="strict" ) ) diff --git a/lib/portage/package/ebuild/doebuild.py b/lib/portage/package/ebuild/doebuild.py index 7e95a07c01..5780c2b0b3 100644 --- a/lib/portage/package/ebuild/doebuild.py +++ b/lib/portage/package/ebuild/doebuild.py @@ -2669,6 +2669,10 @@ def _post_src_install_uid_fix(mysettings, out): desktopfile_errors = [] for parent, dirs, files in os.walk(destdir): + if portage.utf8_mode: + parent = os.fsencode(parent) + dirs = [os.fsencode(value) for value in dirs] + files = [os.fsencode(value) for value in files] try: parent = _unicode_decode( parent, encoding=_encodings["merge"], errors="strict" diff --git a/lib/portage/tests/dbapi/test_portdb_cache.py b/lib/portage/tests/dbapi/test_portdb_cache.py index a55377b6bc..2f14b7bdf0 100644 --- a/lib/portage/tests/dbapi/test_portdb_cache.py +++ b/lib/portage/tests/dbapi/test_portdb_cache.py @@ -52,7 +52,7 @@ class PortdbCacheTestCase(TestCase): portage_python, "-b", "-Wd", - os.path.join(self.bindir, "egencache"), + os.path.join(str(self.bindir), "egencache"), "--update-manifests", "--sign-manifests=n", "--repo", diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py index 3be6ed9bdd..76dcdaf88c 100644 --- a/lib/portage/tests/ebuild/test_fetch.py +++ b/lib/portage/tests/ebuild/test_fetch.py @@ -278,7 +278,7 @@ class EbuildFetchTestCase(TestCase): portage._python_interpreter, "-b", "-Wd", - os.path.join(self.bindir, "emirrordist"), + os.path.join(str(self.bindir), "emirrordist"), "--distfiles", settings["DISTDIR"], "--config-root", diff --git a/lib/portage/tests/emerge/test_config_protect.py b/lib/portage/tests/emerge/test_config_protect.py index ec359833e6..560a49a769 100644 --- a/lib/portage/tests/emerge/test_config_protect.py +++ b/lib/portage/tests/emerge/test_config_protect.py @@ -112,10 +112,15 @@ src_install() { portage_python, "-b", "-Wd", - os.path.join(self.sbindir, "dispatch-conf"), + os.path.join(str(self.sbindir), "dispatch-conf"), ) - emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, "emerge")) - etc_update_cmd = (BASH_BINARY, os.path.join(self.sbindir, "etc-update")) + emerge_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.bindir), "emerge"), + ) + etc_update_cmd = (BASH_BINARY, os.path.join(str(self.sbindir), "etc-update")) etc_update_auto = etc_update_cmd + ( "--automode", "-5", @@ -247,7 +252,8 @@ src_install() { os.symlink(true_binary, os.path.join(fake_bin, x)) for x in etc_symlinks: os.symlink( - os.path.join(self.cnf_etc_path, x), os.path.join(eprefix, "etc", x) + os.path.join(str(self.cnf_etc_path), x), + os.path.join(eprefix, "etc", x), ) with open(os.path.join(var_cache_edb, "counter"), "wb") as f: f.write(b"100") diff --git a/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py b/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py index 6f7a96af99..b29a83fce8 100644 --- a/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py +++ b/lib/portage/tests/emerge/test_emerge_blocker_file_collision.py @@ -50,7 +50,12 @@ src_install() { user_config_dir = os.path.join(eprefix, USER_CONFIG_PATH) portage_python = portage._python_interpreter - emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, "emerge")) + emerge_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.bindir), "emerge"), + ) file_collision = os.path.join(eroot, "usr/lib/file-collision") diff --git a/lib/portage/tests/emerge/test_emerge_slot_abi.py b/lib/portage/tests/emerge/test_emerge_slot_abi.py index 197685975e..70a18b35c2 100644 --- a/lib/portage/tests/emerge/test_emerge_slot_abi.py +++ b/lib/portage/tests/emerge/test_emerge_slot_abi.py @@ -54,8 +54,18 @@ class SlotAbiEmergeTestCase(TestCase): package_mask_path = os.path.join(user_config_dir, "package.mask") portage_python = portage._python_interpreter - ebuild_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, "ebuild")) - emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, "emerge")) + ebuild_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.bindir), "ebuild"), + ) + emerge_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.bindir), "emerge"), + ) test_ebuild = portdb.findname("dev-libs/dbus-glib-0.98") self.assertFalse(test_ebuild is None) diff --git a/lib/portage/tests/emerge/test_simple.py b/lib/portage/tests/emerge/test_simple.py index ab85ad441c..1cc6457ef1 100644 --- a/lib/portage/tests/emerge/test_simple.py +++ b/lib/portage/tests/emerge/test_simple.py @@ -265,51 +265,66 @@ call_has_and_best_version() { portage_python, "-b", "-Wd", - os.path.join(self.sbindir, "dispatch-conf"), + os.path.join(str(self.sbindir), "dispatch-conf"), + ) + ebuild_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.bindir), "ebuild"), ) - ebuild_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, "ebuild")) egencache_cmd = ( portage_python, "-b", "-Wd", - os.path.join(self.bindir, "egencache"), + os.path.join(str(self.bindir), "egencache"), "--repo", "test_repo", "--repositories-configuration", settings.repositories.config_string(), ) - emerge_cmd = (portage_python, "-b", "-Wd", os.path.join(self.bindir, "emerge")) - emaint_cmd = (portage_python, "-b", "-Wd", os.path.join(self.sbindir, "emaint")) + emerge_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.bindir), "emerge"), + ) + emaint_cmd = ( + portage_python, + "-b", + "-Wd", + os.path.join(str(self.sbindir), "emaint"), + ) env_update_cmd = ( portage_python, "-b", "-Wd", - os.path.join(self.sbindir, "env-update"), + os.path.join(str(self.sbindir), "env-update"), ) - etc_update_cmd = (BASH_BINARY, os.path.join(self.sbindir, "etc-update")) + etc_update_cmd = (BASH_BINARY, os.path.join(str(self.sbindir), "etc-update")) fixpackages_cmd = ( portage_python, "-b", "-Wd", - os.path.join(self.sbindir, "fixpackages"), + os.path.join(str(self.sbindir), "fixpackages"), ) portageq_cmd = ( portage_python, "-b", "-Wd", - os.path.join(self.bindir, "portageq"), + os.path.join(str(self.bindir), "portageq"), ) quickpkg_cmd = ( portage_python, "-b", "-Wd", - os.path.join(self.bindir, "quickpkg"), + os.path.join(str(self.bindir), "quickpkg"), ) regenworld_cmd = ( portage_python, "-b", "-Wd", - os.path.join(self.sbindir, "regenworld"), + os.path.join(str(self.sbindir), "regenworld"), ) rm_binary = find_binary("rm") @@ -663,7 +678,8 @@ call_has_and_best_version() { os.symlink(true_binary, os.path.join(fake_bin, x)) for x in etc_symlinks: os.symlink( - os.path.join(self.cnf_etc_path, x), os.path.join(eprefix, "etc", x) + os.path.join(str(self.cnf_etc_path), x), + os.path.join(eprefix, "etc", x), ) with open(os.path.join(var_cache_edb, "counter"), "wb") as f: f.write(b"100") diff --git a/lib/portage/tests/resolver/ResolverPlayground.py b/lib/portage/tests/resolver/ResolverPlayground.py index f1cd844708..167e1e4608 100644 --- a/lib/portage/tests/resolver/ResolverPlayground.py +++ b/lib/portage/tests/resolver/ResolverPlayground.py @@ -633,7 +633,7 @@ class ResolverPlayground: self.eroot, GLOBAL_CONFIG_PATH.lstrip(os.sep), "make.globals" ) ensure_dirs(os.path.dirname(make_globals_path)) - os.symlink(os.path.join(cnf_path, "make.globals"), make_globals_path) + os.symlink(os.path.join(str(cnf_path), "make.globals"), make_globals_path) # Create /usr/share/portage/config/sets/portage.conf default_sets_conf_dir = os.path.join( @@ -645,7 +645,7 @@ class ResolverPlayground: except os.error: pass - provided_sets_portage_conf = os.path.join(cnf_path, "sets", "portage.conf") + provided_sets_portage_conf = os.path.join(str(cnf_path), "sets", "portage.conf") os.symlink( provided_sets_portage_conf, os.path.join(default_sets_conf_dir, "portage.conf"), diff --git a/lib/portage/tests/sync/test_sync_local.py b/lib/portage/tests/sync/test_sync_local.py index a8a71cd4b2..339d37c250 100644 --- a/lib/portage/tests/sync/test_sync_local.py +++ b/lib/portage/tests/sync/test_sync_local.py @@ -83,7 +83,7 @@ class SyncLocalTestCase(TestCase): cmds = {} for cmd in ("emerge", "emaint"): for bindir in (self.bindir, self.sbindir): - path = os.path.join(bindir, cmd) + path = os.path.join(str(bindir), cmd) if os.path.exists(path): cmds[cmd] = (portage._python_interpreter, "-b", "-Wd", path) break diff --git a/lib/portage/tests/util/test_getconfig.py b/lib/portage/tests/util/test_getconfig.py index ae79b21057..9a2af43e4b 100644 --- a/lib/portage/tests/util/test_getconfig.py +++ b/lib/portage/tests/util/test_getconfig.py @@ -26,7 +26,7 @@ class GetConfigTestCase(TestCase): } def testGetConfig(self): - make_globals_file = os.path.join(self.cnf_path, "make.globals") + make_globals_file = os.path.join(str(self.cnf_path), "make.globals") d = getconfig(make_globals_file) for k, v in self._cases.items(): self.assertEqual(d[k], v) diff --git a/lib/portage/xpak.py b/lib/portage/xpak.py index 9eedf2ecdb..9762ed7909 100644 --- a/lib/portage/xpak.py +++ b/lib/portage/xpak.py @@ -104,6 +104,11 @@ def xpak(rootdir, outfile=None): and under the name 'outfile' if it is specified. Otherwise it returns the xpak segment.""" + if portage.utf8_mode and not isinstance(rootdir, bytes): + # Since paths are encoded below, rootdir must also be encoded + # when _unicode_func_wrapper is not used. + rootdir = os.fsencode(rootdir) + mylist = [] addtolist(mylist, rootdir)
