commit:     b9ef191c74982b0e8d837aa7dd256dc3c52f7d2c
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Feb 20 23:11:46 2021 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Mon Feb 22 11:48:41 2021 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=b9ef191c

MirrorLayoutConfig: content digest support (bug 756778)

In order to support mirror layouts that use content
digests, extend MirrorLayoutConfig validate_structure and
get_best_supported_layout methods to support an optional
filename parameter of type DistfileName which includes a digests
attribute. Use the new parameter to account for availablility
of specific distfile content digests when validating and selecting
mirror layouts which require those digests.

The DistfileName type represents a distfile name and associated
content digests, used by MirrorLayoutConfig and related layout
implementations.

The path of a distfile within a layout must be dependent on
nothing more than the distfile name and its associated content
digests. For filename-hash layout, path is dependent on distfile
name alone, and the get_filenames implementation yields strings
corresponding to distfile names. For content-hash layout, path is
dependent on content digest alone, and the get_filenames
implementation yields DistfileName instances whose names are equal
to content digest values. The content-hash layout simply lacks
the filename-hash layout's innate ability to translate a distfile
path to a distfile name, and instead caries an innate ability
to translate a distfile path to a content digest.

In order to prepare for a migration from filename-hash to
content-hash layout, all consumers of the layout get_filenames
method need to be updated to work with content digests as a
substitute for distfile names. For example, in order to prepare
emirrordist for content-hash, a key-value store needs to be
added as a means to associate distfile names with content
digest values yielded by the content-hash get_filenames
implementation.

Bug: https://bugs.gentoo.org/756778
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>

 lib/portage/package/ebuild/fetch.py    | 98 ++++++++++++++++++++++++++++++----
 lib/portage/tests/ebuild/test_fetch.py | 33 +++++++++---
 2 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/lib/portage/package/ebuild/fetch.py 
b/lib/portage/package/ebuild/fetch.py
index e0fecaf23..af9edd91e 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Gentoo Authors
+# Copyright 2010-2021 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 __all__ = ['fetch']
@@ -344,6 +344,57 @@ _size_suffix_map = {
 }
 
 
+class DistfileName(str):
+       """
+       The DistfileName type represents a distfile name and associated
+       content digests, used by MirrorLayoutConfig and related layout
+       implementations.
+
+       The path of a distfile within a layout must be dependent on
+       nothing more than the distfile name and its associated content
+       digests. For filename-hash layout, path is dependent on distfile
+       name alone, and the get_filenames implementation yields strings
+       corresponding to distfile names. For content-hash layout, path is
+       dependent on content digest alone, and the get_filenames
+       implementation yields DistfileName instances whose names are equal
+       to content digest values. The content-hash layout simply lacks
+       the filename-hash layout's innate ability to translate a distfile
+       path to a distfile name, and instead caries an innate ability
+       to translate a distfile path to a content digest.
+
+       In order to prepare for a migration from filename-hash to
+       content-hash layout, all consumers of the layout get_filenames
+       method need to be updated to work with content digests as a
+       substitute for distfile names. For example, in order to prepare
+       emirrordist for content-hash, a key-value store needs to be
+       added as a means to associate distfile names with content
+       digest values yielded by the content-hash get_filenames
+       implementation.
+       """
+       def __new__(cls, s, digests=None):
+               return str.__new__(cls, s)
+
+       def __init__(self, s, digests=None):
+               super().__init__()
+               self.digests = {} if digests is None else digests
+
+       def digests_equal(self, other):
+               """
+               Test if digests compare equal to those of another instance.
+               """
+               if not isinstance(other, DistfileName):
+                       return False
+               matches = []
+               for algo, digest in self.digests.items():
+                       other_digest = other.digests.get(algo)
+                       if other_digest is not None:
+                               if other_digest == digest:
+                                       matches.append(algo)
+                               else:
+                                       return False
+               return bool(matches)
+
+
 class FlatLayout:
        def get_path(self, filename):
                return filename
@@ -439,19 +490,36 @@ class MirrorLayoutConfig:
                self.structure = data
 
        @staticmethod
-       def validate_structure(val):
+       def validate_structure(val, filename=None):
+               """
+               If the filename argument is given, then supported hash
+               algorithms are constrained by digests available in the filename
+               digests attribute.
+
+               @param val: layout.conf entry args
+               @param filename: filename with digests attribute
+               @return: True if args are valid for available digest algorithms,
+                       and False otherwise
+               """
                if val[0] == 'flat':
                        return FlatLayout.verify_args(val)
-               if val[0] == 'filename-hash':
+               elif val[0] == 'filename-hash':
                        return FilenameHashLayout.verify_args(val)
                return False
 
-       def get_best_supported_layout(self):
+       def get_best_supported_layout(self, filename=None):
+               """
+               If the filename argument is given, then acceptable hash
+               algorithms are constrained by digests available in the filename
+               digests attribute.
+
+               @param filename: filename with digests attribute
+               """
                for val in self.structure:
-                       if self.validate_structure(val):
+                       if self.validate_structure(val, filename=filename):
                                if val[0] == 'flat':
                                        return FlatLayout(*val[1:])
-                               if val[0] == 'filename-hash':
+                               elif val[0] == 'filename-hash':
                                        return FilenameHashLayout(*val[1:])
                # fallback
                return FlatLayout()
@@ -515,7 +583,7 @@ def get_mirror_url(mirror_url, filename, mysettings, 
cache_path=None):
 
        # For some protocols, urlquote is required for correct behavior,
        # and it must not be used for other protocols like rsync and sftp.
-       path = mirror_conf.get_best_supported_layout().get_path(filename)
+       path = 
mirror_conf.get_best_supported_layout(filename=filename).get_path(filename)
        if urlparse(mirror_url).scheme in ('ftp', 'http', 'https'):
                path = urlquote(path)
        return mirror_url + "/distfiles/" + path
@@ -722,15 +790,23 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        if hasattr(myuris, 'items'):
                for myfile, uri_set in myuris.items():
                        for myuri in uri_set:
-                               file_uri_tuples.append((myfile, myuri))
+                               file_uri_tuples.append(
+                                       (DistfileName(myfile, 
digests=mydigests.get(myfile)), myuri)
+                               )
                        if not uri_set:
-                               file_uri_tuples.append((myfile, None))
+                               file_uri_tuples.append(
+                                       (DistfileName(myfile, 
digests=mydigests.get(myfile)), None)
+                               )
        else:
                for myuri in myuris:
                        if urlparse(myuri).scheme:
-                               
file_uri_tuples.append((os.path.basename(myuri), myuri))
+                               file_uri_tuples.append(
+                                       (DistfileName(myfile, 
digests=mydigests.get(myfile)), myuri)
+                               )
                        else:
-                               
file_uri_tuples.append((os.path.basename(myuri), None))
+                               file_uri_tuples.append(
+                                       (DistfileName(myfile, 
digests=mydigests.get(myfile)), None)
+                               )
 
        filedict = OrderedDict()
        primaryuri_dict = {}

diff --git a/lib/portage/tests/ebuild/test_fetch.py 
b/lib/portage/tests/ebuild/test_fetch.py
index c5ea8253b..b88ae3efb 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -7,7 +7,8 @@ import tempfile
 
 import portage
 from portage import shutil, os
-from portage.const import BASH_BINARY, PORTAGE_PYM_PATH
+from portage.checksum import checksum_str
+from portage.const import BASH_BINARY, MANIFEST2_HASH_DEFAULTS, 
PORTAGE_PYM_PATH
 from portage.tests import TestCase
 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
 from portage.tests.util.test_socks5 import AsyncHTTPServer
@@ -18,8 +19,14 @@ from portage.util._async.SchedulerInterface import 
SchedulerInterface
 from portage.util._eventloop.global_event_loop import global_event_loop
 from portage.package.ebuild.config import config
 from portage.package.ebuild.digestgen import digestgen
-from portage.package.ebuild.fetch import (_download_suffix, fetch, FlatLayout,
-               FilenameHashLayout, MirrorLayoutConfig)
+from portage.package.ebuild.fetch import (
+       DistfileName,
+       _download_suffix,
+       fetch,
+       FilenameHashLayout,
+       FlatLayout,
+       MirrorLayoutConfig,
+)
 from _emerge.EbuildFetcher import EbuildFetcher
 from _emerge.Package import Package
 
@@ -142,9 +149,14 @@ class EbuildFetchTestCase(TestCase):
                                content["/distfiles/layout.conf"] = 
layout_data.encode("utf8")
 
                                for k, v in distfiles.items():
+                                       filename = DistfileName(
+                                               k,
+                                               digests=dict((algo, 
checksum_str(v, hashname=algo)) for algo in MANIFEST2_HASH_DEFAULTS),
+                                       )
+
                                        # mirror path
                                        for layout in layouts:
-                                               content["/distfiles/" + 
layout.get_path(k)] = v
+                                               content["/distfiles/" + 
layout.get_path(filename)] = v
                                        # upstream path
                                        content["/distfiles/{}.txt".format(k)] 
= v
 
@@ -499,6 +511,10 @@ class EbuildFetchTestCase(TestCase):
                                io.StringIO(conf))
 
        def test_filename_hash_layout_get_filenames(self):
+               filename = DistfileName(
+                       'foo-1.tar.gz',
+                       digests=dict((algo, checksum_str(b'', hashname=algo)) 
for algo in MANIFEST2_HASH_DEFAULTS),
+               )
                layouts = (
                        FlatLayout(),
                        FilenameHashLayout('SHA1', '4'),
@@ -506,7 +522,6 @@ class EbuildFetchTestCase(TestCase):
                        FilenameHashLayout('SHA1', '8:16'),
                        FilenameHashLayout('SHA1', '8:16:24'),
                )
-               filename = 'foo-1.tar.gz'
 
                for layout in layouts:
                        distdir = tempfile.mkdtemp()
@@ -520,6 +535,12 @@ class EbuildFetchTestCase(TestCase):
                                with open(path, 'wb') as f:
                                        pass
 
-                               self.assertEqual([filename], 
list(layout.get_filenames(distdir)))
+                               file_list = list(layout.get_filenames(distdir))
+                               self.assertTrue(len(file_list) > 0)
+                               for filename_result in file_list:
+                                       if isinstance(filename_result, 
DistfileName):
+                                               
self.assertTrue(filename_result.digests_equal(filename))
+                                       else:
+                                               
self.assertEqual(filename_result, str(filename))
                        finally:
                                shutil.rmtree(distdir)

Reply via email to