David Caro has uploaded a new change for review.

Change subject: Added script to manage repositories
......................................................................

Added script to manage repositories

Quite big script, here's what it can do right now:
* Remove duplicated rpms and create hard links instead
* Sign packages in batch mode (passing passphrase as parameter)
* Create/update rpm repos using multiple sources, passed as arguments or in a
  configuration file:
    · From jenkins url (multijob or simple job)
    · From koji url
    · From a url with rpm links on it, recursively or not
    · From a directory, all packages, optionally filtering
    · From a directory only the latest packages, optionally filtering
* Remove the old rpm versions and keep only the N newest
* Run createrepo in a multidistro repository (runs on each distribution subrepo)
* Generate the sources tree from the source rpms:
    · Including the patches or not
    · Generating detached signatures or not

I tried to keep the output informative but human readable, but mostly to be run
on jenkins jobs.

Change-Id: Ib630eb1e3b701eaf224451f02692723447aa1f8b
Signed-off-by: David Caro <dcaro...@redhat.com>
---
A repoman.py
1 file changed, 983 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/jenkins refs/changes/99/33299/1

diff --git a/repoman.py b/repoman.py
new file mode 100755
index 0000000..5aa0fa2
--- /dev/null
+++ b/repoman.py
@@ -0,0 +1,983 @@
+#!/usr/bin/env python
+"""
+This program is a helper to rpm repo management, some common tasks
+"""
+import os
+import sys
+import shutil
+import rpm
+import hashlib
+import argparse
+import logging
+import subprocess
+import re
+import requests
+import gnupg
+import tempfile
+import atexit
+import pexpect
+from urllib3 import connectionpool
+from getpass import getpass
+
+DISTROS = (
+    re.compile(r'\.fc\d+'),
+    re.compile(r'\.el\d+'),
+)
+
+BASE_REPOS_PATH = '/var/www/html/pub'
+# To be set on main, if set at all
+TEMP_DIR = None
+
+
+class NotSamePackage(Exception):
+    """Thrown when trying to compare different packages"""
+    pass
+
+
+def cleanup(temp_dir):
+    if os.path.isdir(temp_dir):
+        shutil.rmtree(temp_dir)
+        logging.info('Cleaning up temporary dir %s', temp_dir)
+
+
+def tryint(mayint):
+    try:
+        return int(mayint)
+    except ValueError:
+        return mayint
+
+
+def cmpver(ver1, ver2):
+    ver1 = '.' in ver1 and ver1.split('.') or (ver1,)
+    ver2 = '.' in ver2 and ver2.split('.') or (ver2,)
+    ver1 = [tryint(i) for i in ver1]
+    ver2 = [tryint(i) for i in ver2]
+    if ver1 > ver2:
+        return -1
+    if ver1 == ver2:
+        return 0
+    else:
+        return 1
+
+
+def cmpfullver(fullver1, fullver2):
+    ver1, rel1 = fullver1.split('-', 1)
+    ver2, rel2 = fullver2.split('-', 1)
+    ver_res = cmpver(ver1, ver2)
+    if ver_res != 0:
+        return ver_res
+    return cmpver(rel1, rel2)
+
+
+def print_busy(prev_pos=0):
+    sys.stdout.write('\r')
+    if prev_pos == 0:
+        sys.stdout.write('-')
+    elif prev_pos == 1:
+        sys.stdout.write('/')
+    elif prev_pos == 2:
+        sys.stdout.write('|')
+    else:
+        sys.stdout.write('\\')
+    sys.stdout.flush()
+    return (prev_pos + 1) % 4
+
+
+def get_distro(release):
+    for distro in DISTROS:
+        match = distro.search(release)
+        if match:
+            return match.group()[1:]
+    raise Exception('Unknown distro for %s' % release)
+
+
+def to_human_size(fsize):
+    mb = fsize/(1024*1024)
+    if mb >= 1:
+        return '%dM' % mb
+    kb = fsize / 1024
+    if kb >= 1:
+        return '%dK' % kb
+    return '%dB' % fsize
+
+
+def download(path, dest_path):
+    headers = requests.head(path)
+    chunk_size = 4096
+    length = int(headers.headers.get('content-length', 0)) or 0
+    logging.info('Downloading %s, length %s ...',
+                 path,
+                 length and to_human_size(length) or 'unknown')
+    num_dots = 100
+    dot_frec = length/num_dots or 1
+    stream = requests.get(path, stream=True)
+    prev_percent = 0
+    progress = 0
+    if length:
+        sys.stdout.write('    %[')
+    sys.stdout.flush()
+    with open(dest_path, 'w') as rpm_fd:
+        for chunk in stream.iter_content(chunk_size=chunk_size):
+            if chunk:
+                rpm_fd.write(chunk)
+                progress += len(chunk)
+                cur_percent = int(progress / dot_frec)
+                if length and cur_percent > prev_percent:
+                    for _ in xrange(cur_percent - prev_percent):
+                        sys.stdout.write('=')
+                    sys.stdout.flush()
+                    prev_percent = cur_percent
+                elif not length:
+                    prev_percent = print_busy(prev_percent)
+    if length:
+        if cur_percent < num_dots:
+            sys.stdout.write('=')
+        sys.stdout.write(']')
+    sys.stdout.write('\n')
+    if not length:
+        logging.info('    Done')
+
+
+class RPM(object):
+    def __init__(self, path):
+        trans = rpm.TransactionSet()
+        # Ignore unsigned rpms
+        trans.setVSFlags(rpm._RPMVSF_NOSIGNATURES)
+        if path.startswith('http:') or path.startswith('https:'):
+            name = path.rsplit('/', 1)[-1]
+            if not name:
+                raise Exception('Passed trailing slash in path %s, '
+                                'unable to guess package name'
+                                % path)
+            fpath = TEMP_DIR + '/' + name
+            download(path, fpath)
+            path = fpath
+        self.path = path
+        with open(path) as fdno:
+            try:
+                hdr = trans.hdrFromFdno(fdno)
+            except Exception as exc:
+                print "Failed to parse header for %s" % path
+                raise
+            self.inode = os.fstat(fdno.fileno()).st_ino
+        self.is_source = hdr[rpm.RPMTAG_SOURCEPACKAGE] and True or False
+        self.sourcerpm = hdr[rpm.RPMTAG_SOURCERPM]
+        self.name = hdr[rpm.RPMTAG_NAME]
+        self.version = hdr[rpm.RPMTAG_VERSION]
+        self.release = hdr[rpm.RPMTAG_RELEASE]
+        self.signature = hdr[rpm.RPMTAG_SIGPGP]
+        self._raw_hdr = hdr
+        # will be calculated if needed
+        self._md5 = None
+        # ovirt release rnd guest tools pms must go to all the distros
+        if self.name.startswith('ovirt-release') \
+           or self.name.startswith('ovirt-guest-tools'):
+            self.distro = 'all'
+        else:
+            self.distro = get_distro(self.release)
+        self.arch = hdr[rpm.RPMTAG_ARCH] or 'none'
+        self.full_name = 'rpm(%s %s %s %s)' % (
+            self.name, self.distro, self.arch,
+            self.is_source and 'src' or 'bin',
+        )
+        # remove the distro from the release for the version string
+        if self.distro:
+            self.ver_release = re.sub(
+                r'\.%s[^.]*' % self.distro,
+                '',
+                self.release,
+                1
+            )
+        else:
+            self.ver_release = self.release
+        self.full_version = '%s-%s' % (self.version, self.ver_release)
+
+    @property
+    def md5(self):
+        if self._md5 is None:
+            with open(self.path) as fdno:
+                self._md5 = hashlib.md5(fdno.read()).hexdigest()
+        return self._md5
+
+    def generate_path(self, distro=None):
+        if distro is None:
+            distro = self.distro
+        if self.is_source:
+            arch_path = 'SRPMS'
+            arch_name = 'src'
+        else:
+            arch_path = self.arch
+            arch_name = self.arch
+        return 'rpm/%s/%s/%s-%s-%s.%s.rpm' % (
+            distro,
+            arch_path,
+            self.name,
+            self.version,
+            self.release,
+            arch_name,
+        )
+
+    def sign(self, keyuid, passwd):
+        logging.info("SIGNING: %s", self.path)
+        child = pexpect.spawn(
+            'rpmsign',
+            [
+                '--resign',
+                '-D', '_signature gpg',
+                '-D', '_gpg_name %s' % keyuid,
+                self.path,
+            ],
+        )
+        child.expect('Enter pass phrase: ')
+        child.sendline(passwd)
+        child.expect(pexpect.EOF)
+        child.close()
+        if child.exitstatus != 0:
+            raise Exception("Failed to sign package.")
+        self.__init__(self.path)
+        if not self.signature:
+            raise Exception("Failed to sign rpm %s with key '%s'"
+                            % (self.path, keyuid))
+
+    def __str__(self):
+        return 'rpm(%s %s %s %s %s %s)' % (
+            self.name, self.version,
+            self.release, self.arch,
+            self.is_source and 'src' or 'bin',
+            self.signature and 'signed' or 'unsigned',
+        )
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class RPMinode(list):
+    """
+    Simple list, abstracts a set of rpm inodes referencing the same
+    name-version
+    """
+    def __init__(self, inode):
+        self.inode = inode
+        super(RPMinode, self).__init__(self)
+
+    def delete(self, noop=False):
+        for pkg in self:
+            if noop:
+                os.remove(pkg.path)
+            else:
+                logging.info('NOOP::%s would have been removed', pkg.path)
+
+    def get_rpms(self, regmatch=None, fmatch=None):
+        pkgs = self
+        if regmatch:
+            pkgs = [pkg for pkg in self if regmatch.search(pkg.path)]
+        if fmatch:
+            pkgs = [pkg for pkg in pkgs if fmatch(pkg)]
+        return pkgs
+
+
+class RPMEntry(dict):
+    """Abstracsts a set of rpm inodes for a version"""
+    def __init__(self, version):
+        self.version = version
+        super(RPMEntry, self).__init__(self)
+
+    def add_pkg(self, pkg):
+        if pkg.inode not in self:
+            self[pkg.inode] = RPMinode(pkg.inode)
+        self[pkg.inode].append(pkg)
+        return True
+
+    def del_inode(self, inode, noop=False):
+        if inode in self:
+            self[inode].delete(noop)
+            self.pop(inode)
+
+    def get_rpms(self, regmatch=None, fmatch=None):
+        pkgs = []
+        for inode in self.itervalues():
+            pkgs.extend(inode.get_rpms(regmatch=regmatch,
+                                       fmatch=fmatch))
+        return pkgs
+
+
+class RPMVersionList(dict):
+    """List of available versions for a package name"""
+    def add_pkg(self, pkg, onlyifnewer):
+        if onlyifnewer and (
+                pkg.full_version in self
+                or next((ver for ver in self.keys()
+                         if cmpfullver(ver, pkg.full_version) >= 0), None)):
+            return False
+        elif pkg.full_version not in self:
+            self[pkg.full_version] = RPMEntry(pkg.full_version)
+        return self[pkg.full_version].add_pkg(pkg)
+
+    def get_latest(self, num=1):
+        """
+        Returns the list of available inodes for the latest version
+        if any
+        """
+        if not self:
+            return None
+        if not num:
+            num = len(self)
+        sorted_list = self.keys()
+        sorted_list.sort(cmp=cmpfullver)
+        latest = {}
+        for pos in xrange(num):
+            latest[sorted_list[pos]] = self.get(sorted_list[pos])
+        return latest
+
+    def del_version(self, version, noop=False):
+        if version in self:
+            for inode in self[version].keys():
+                self[version].del_inode(inode, noop=noop)
+            self.pop(version)
+
+    def get_rpms(self, regmatch=None, fmatch=None, latest=0):
+        pkgs = []
+        for entry in self.get_latest(latest).itervalues():
+            pkgs.extend(entry.get_rpms(regmatch=regmatch,
+                                       fmatch=fmatch))
+        return pkgs
+
+
+class RPMList(dict):
+    """List of rpms, separated by name
+    The shape of the rpms list is:
+    {
+        name1:{
+            version1:{
+                inode1: [rpm_object1, ...],
+                inode2: [...],
+            },
+            version2:{...}
+        },
+        name2: {...}
+    }
+
+    Where the top level is abstracted by RPMList(dict)
+        The second level by RPMVersionList(dict)
+        The third by RPMEntry(dict)
+        The third by RPMinode(list)
+        And the last level by RPM
+
+    So:
+
+    RPMList{
+        name1: RPMVersionList{
+            version1: RPMEntry{
+                inode1: RPMinode[RPM, RPM, ...]
+                inode2: RPMinode[...]
+            },
+            version2: RPMEntry{...}
+        },
+        name2: RPMVersionList{...}
+    }
+    """
+    def add_pkg(self, pkg, onlyifnewer=False):
+        if pkg.full_name not in self:
+            self[pkg.full_name] = RPMVersionList()
+        return self[pkg.full_name].add_pkg(pkg, onlyifnewer)
+
+    def del_pkg(self, name):
+        if name in self:
+            for version in self[name].keys():
+                self[name].del_version(version)
+            self.pop(name)
+
+    def get_rpms(self, regmatch=None, fmatch=None, latest=0):
+        rpms = []
+        for version in self.itervalues():
+            rpms.extend(version.get_rpms(
+                regmatch=regmatch,
+                fmatch=fmatch,
+                latest=latest))
+        return rpms
+
+
+class RPMRepo(object):
+    """Represents the repository sctructure, does not include metadata"""
+    def __init__(self, path):
+        self.rpms = RPMList()
+        self.path = path
+        self.to_copy = []
+        self.distros = set()
+        if path:
+            logging.info('Loading repo %s', path)
+            for pkg in list_rpms(path):
+                self.add_pkg(RPM(pkg), to_copy=False, hidelog=True)
+            logging.info('Repo %s loaded', path)
+
+    def add_pkg(self, pkg, onlyifnewer=False, to_copy=True, hidelog=False):
+        if self.rpms.add_pkg(pkg, onlyifnewer):
+            if to_copy:
+                self.to_copy.append(pkg)
+            if not hidelog:
+                logging.info('Adding package %s to repo %s', pkg.path,
+                             self.path)
+        else:
+            if not hidelog:
+                logging.info("Not adding %s, there's already an equal or "
+                             "newer version", pkg)
+        if pkg.distro != 'all':
+            self.distros.add(pkg.distro)
+
+    def save(self, onlylatest=False):
+        logging.info('Saving new added rpms into %s', self.path)
+        for pkg in self.to_copy:
+            if onlylatest and not self.is_latest_version(pkg):
+                logging.info('Skipping %s a newer version is already '
+                             'in the repo.', pkg)
+                continue
+            if pkg.distro == 'all':
+                if not self.distros:
+                    raise Exception('No distros found in the repo and no '
+                                    'packages with any distros added.')
+                dst_distros = self.distros
+            else:
+                dst_distros = [pkg.distro]
+            for distro in dst_distros:
+                dst_path = self.path + '/' + pkg.generate_path(distro)
+                pkg_copy(pkg.path, dst_path)
+        logging.info('Saved %s\n', self.path)
+
+    def is_latest_version(self, pkg):
+        verlist = self.rpms.get(pkg.full_name, {})
+        if not verlist or pkg.full_version in verlist.get_latest():
+            return True
+        return False
+
+    def generate_sources(self, with_patches=False, key=None, passphrase=None):
+        logging.info("Generating src directory from srpms")
+        for name, versions in self.rpms.iteritems():
+            if not name.endswith('src)'):
+                continue
+            for version in versions.itervalues():
+                pkg = version.popitem()[1][0]
+                logging.info("Parsing srpm %s", pkg)
+                dst_dir = '%s/src/%s' % (self.path, pkg.name)
+                extract_sources(pkg.path, dst_dir, with_patches)
+                if key:
+                    sign_sources(dst_dir, key, passphrase)
+        logging.info('src dir generated')
+
+    def createrepo(self):
+        for distro in self.distros:
+            logging.info('Creating metadata for %s', distro)
+            subprocess.call(['createrepo', self.path + '/rpm/' + distro])
+
+    def delete_old(self, keep=1, noop=False):
+        new_rpms = RPMList(self.rpms)
+        for name, versions in self.rpms.iteritems():
+            if len(versions) <= keep:
+                continue
+            to_keep = RPMVersionList()
+            for _ in range(keep):
+                latest = versions.get_latest()
+                to_keep.update(latest)
+                versions.pop(latest.keys()[0])
+            new_rpms[name] = to_keep
+            for version in versions.keys():
+                logging.info('Deleting %s version %s', name, version)
+                versions.del_version(version, noop)
+        self.rpms = new_rpms
+
+    def get_rpms(self, regmatch=None, fmatch=None, latest=0):
+        return self.rpms.get_rpms(
+            regmatch=regmatch,
+            fmatch=fmatch,
+            latest=latest)
+
+    def sign_rpms(self, key, passwd):
+        gpg = gnupg.GPG(gnupghome=os.path.expanduser('~/.gnupg'))
+        with open(key) as key_fd:
+            skey = gpg.import_keys(key_fd.read())
+        fprint = skey.results[0]['fingerprint']
+        keyuid = None
+        for key in gpg.list_keys(True):
+            if key['fingerprint'] == fprint:
+                keyuid = key['uids'][0]
+        for pkg in self.get_rpms(
+                fmatch=lambda pkg: not pkg.signature):
+            pkg.sign(keyuid, passwd)
+        logging.info("Done signing")
+
+
+def copy(what, where):
+    """Try to link, try to copy if cross-device"""
+    try:
+        os.link(what, where)
+    except OSError as oerror:
+        if oerror.errno == 18:
+            shutil.copy2(what, where)
+        else:
+            raise
+
+
+def extract_sources(rpm_path, dst_dir, with_patches=False):
+    if not os.path.isdir(dst_dir):
+        os.makedirs(dst_dir)
+    oldpath = os.getcwd()
+    if not dst_dir.startswith('/'):
+        dst_dir = oldpath + '/' + dst_dir
+    if not rpm_path.startswith('/'):
+        rpm_path = oldpath + '/' + rpm_path
+    dst_path = dst_dir + '/' + rpm_path.rsplit('/', 1)[-1]
+    copy(rpm_path, dst_path)
+    os.chdir(dst_dir)
+    try:
+        rpm2cpio = subprocess.Popen(['rpm2cpio', dst_path],
+                                    stdout=subprocess.PIPE)
+        cpio_cmd = ['cpio', '-iv', '*gz', '*.zip', '*.7z']
+        if with_patches:
+            cpio_cmd.append('*.patch')
+        with open(os.devnull, 'w') as devnull:
+            cpio = subprocess.Popen(
+                cpio_cmd,
+                stdin=rpm2cpio.stdout,
+                stdout=devnull,
+                stderr=devnull,
+            )
+        rpm2cpio.stdout.close()
+        cpio.communicate()
+    finally:
+        os.chdir(oldpath)
+        os.remove(dst_path)
+
+
+def sign_sources(src_dir, key, passphrase=None):
+    oldpath = os.getcwd()
+    if not src_dir.startswith('/'):
+        src_dir = oldpath + '/' + src_dir
+    gpg = gnupg.GPG(gnupghome=os.path.expanduser('~/.gnupg'))
+    with open(key) as key_fd:
+        skey = gpg.import_keys(key_fd.read())
+    fprint = skey.results[0]['fingerprint']
+    keyid = None
+    for key in gpg.list_keys(True):
+        if key['fingerprint'] == fprint:
+            keyid = key['keyid']
+    for dname, _, files in os.walk(src_dir):
+        for fname in files:
+            if fname.endswith('.sig'):
+                continue
+            fname = os.path.join(dname, fname)
+            with open(fname) as fd:
+                signature = gpg.sign_file(
+                    fd,
+                    keyid=keyid,
+                    passphrase=passphrase,
+                    detach=True,
+                )
+            if not signature.data:
+                raise Exception("Failed to sign file %s: \n%s",
+                                file, signature.stderr)
+            with open(fname + '.sig', 'w') as sfd:
+                sfd.write(signature.data)
+
+
+def pkg_copy(src_path, dst_path):
+    if os.path.exists(dst_path):
+        logging.debug('Not saving %s, already exists', dst_path)
+        return
+    logging.info('Saving package %s', dst_path)
+    if not os.path.exists(dst_path.rsplit('/', 1)[0]):
+        os.makedirs(dst_path.rsplit('/', 1)[0])
+    copy(src_path, dst_path)
+
+
+def list_rpms(path):
+    '''Find all the rpms under the given dir'''
+    rpms = []
+    for root, _, files in os.walk(path):
+        for fname in files:
+            if fname.endswith('.rpm'):
+                rpms.append(root + '/' + fname)
+    return rpms
+
+
+def rm_dups(path, noop=False):
+    '''Remove all the duplicated rpms from a directory tree, favoring
+    the signed ones'''
+    rpms = RPMList()
+    for rpm_path in list_rpms(path):
+        rpms.add_pkg(RPM(rpm_path))
+    # create a backup just in case
+    if noop:
+        logging.info('Not creating backup, as nothing will be changed (NOOP)')
+    else:
+        logging.info('Creating backup at %s, delete if happy with the '
+                     'results', path + '.bkp')
+        subprocess.call(['cp', '-la', path, path + '.bkp'])
+    for pkg_name, versions in rpms.iteritems():
+        for version, inodes in versions.iteritems():
+            # skip if no duplicates
+            if len(inodes) <= 1:
+                logging.debug('No dupes for %s.%s', pkg_name, version)
+                continue
+            logging.info('Dupes found for %s.%s!', pkg_name, version)
+            # get a base package, with signature if able
+            base_inode = None
+            for inode, pkg_files in inodes.iteritems():
+                if pkg_files and pkg_files[0].signature is not None:
+                    base_inode = inode
+                    break
+            if base_inode is not None:
+                base = inodes.pop(base_inode)[0]
+            else:
+                # we do not have any signed copy for that package
+                base = inodes.popitem()[1][0]
+            # Now replace all the other with hard links to the base
+            logging.info('    Using %s as base', base)
+            for inode in inodes.itervalues():
+                for pkg in inode:
+                    logging.info('    Linking %s -> %s', pkg.path, base.path)
+                    if pkg.md5 != base.md5:
+                        logging.warn('        MD5 checksums do not match '
+                                     '(probably mixed signed and unsigned '
+                                     'copies)')
+                    else:
+                        logging.debug('        MD5 checksums match, ok')
+                    if noop:
+                        logging.info('        NOOP, not doing anything')
+                        continue
+                    os.remove(pkg.path)
+                    copy(base.path, pkg.path)
+    logging.info('DONE')
+
+
+def read_conf(conf_file):
+    with open(conf_file) as conf_fd:
+        for line in conf_fd:
+            line = line.strip()
+            if not line or line.startswith('#'):
+                continue
+            yield line
+
+
+def expand_koji(koji_lvl1_url):
+    pkg_list = []
+    url_base = koji_lvl1_url.rsplit('/', 1)[0]
+    lvl1_page = requests.get(koji_lvl1_url).text
+    lvl2_reg = re.compile(r'(?<=href=")[^"]+(?=.*(buildArch|buildSRPM))')
+    logging.info('Parsing Koji URL: %s', koji_lvl1_url)
+    lvl2_urls = [
+        get_link(koji_lvl1_url, match.group())
+        for match in (lvl2_reg.search(i) for i in lvl1_page.splitlines())
+        if match
+    ]
+    for url in lvl2_urls:
+        logging.info('    Got 2nd level URL: %s', url)
+        pkg_list.extend(expand_page(url))
+    if not pkg_list:
+        logging.warn('    No packages found')
+    logging.info('    Done')
+    return pkg_list
+
+
+def expand_jenkins(jenkins_lvl1_url):
+    pkg_list = []
+    lvl1_page = requests.get(jenkins_lvl1_url + '/api/json?depth=3').json()
+    url = lvl1_page['url']
+    logging.info('Parsing jenkins URL: %s', jenkins_lvl1_url)
+    if url.endswith('/'):
+        url = url[:-1]
+    # handle multicongif jobs
+    for run in lvl1_page.get('runs', (lvl1_page,)):
+        if run['number'] != lvl1_page['number']:
+            continue
+        for artifact in run['artifacts']:
+            if not artifact['relativePath'].endswith('.rpm'):
+                continue
+            new_url = '%s/artifact/%s' % (run['url'], artifact['relativePath'])
+            pkg_list.append(new_url)
+            logging.info('    Got URL: %s', new_url)
+    if not pkg_list:
+        logging.warn('    No packages found')
+    logging.info('    Done')
+    return pkg_list
+
+
+def expand_page(page_url):
+    logging.info('Parsing URL: %s', page_url)
+    data = requests.get(page_url).text
+    pkg_reg = re.compile(r'(?<=href=")[^"]+\.rpm')
+    pkg_list = [
+        get_link(page_url, match.group())
+        for match in (pkg_reg.search(i) for i in data.splitlines())
+        if match
+    ]
+    for pkg_url in pkg_list:
+        logging.info('    Got package URL: %s', pkg_url)
+    return pkg_list
+
+
+def get_link(page_url, link, internal=False):
+    page_url = page_url.rsplit('?', 1)[0]
+    if page_url.endswith('/'):
+        page_url = page_url[:-1]
+    if link.startswith(page_url):
+        return link
+    elif link.startswith('/'):
+        base_url = '/'.join(page_url.split('/', 3)[:-1])
+        return base_url + link
+    elif re.match('https?://', link):
+        if internal:
+            return False
+        else:
+            return link
+    else:
+        return page_url + '/' + link
+
+
+def expand_recursive(page_url, level=0):
+    if level > 0:
+        logging.debug('Recursively fetching URL (level %d): %s',
+                      level, page_url)
+    else:
+        logging.info('Recursively fetching URL (level %d): %s',
+                     level, page_url)
+    pkg_list = []
+    data = requests.get(page_url).text
+    url_reg = re.compile(
+        r'(?<=href=")(/|%s|(?![^:]+?://))[^"]+/(?=")' % page_url)
+    next_urls = (
+        get_link(page_url, match.group(), internal=True)
+        for match in url_reg.finditer(data)
+        if match and (match.group().startswith(page_url)
+                      or not match.group().startswith('http'))
+        and match.group() != page_url
+    )
+    for next_url in next_urls:
+        pkg_list.extend(expand_recursive(next_url, level + 1))
+    pkg_list.extend(expand_page(page_url))
+    return pkg_list
+
+
+def expand_pkg_url(pkg_url):
+    pkg_list = []
+    if 'koji' in pkg_url:
+        pkg_list.extend(expand_koji(pkg_url))
+    elif 'jenkins' in pkg_url:
+        pkg_list.extend(expand_jenkins(pkg_url))
+    elif not pkg_url.endswith('.rpm'):
+        pkg_list.extend(expand_page(pkg_url))
+    else:
+        pkg_list.append(pkg_url)
+    return pkg_list
+
+
+def expand_dir(pkg_dir, latest=0, name_reg='.*'):
+    if not pkg_dir.startswith('/'):
+        pkg_dir += '/' + BASE_REPOS_PATH
+    repo = RPMRepo(pkg_dir)
+    match = re.compile(name_reg)
+    return [pkg.path
+            for pkg in repo.get_rpms(
+                regmatch=match,
+                latest=latest)
+            ]
+
+
+def expand_source(pkg_src):
+    """Return a list of direct urls and absolute paths to the rpms
+    from the given rpm source"""
+    pkg_list = set()
+    if pkg_src.startswith('conf:'):
+        for conf_line in read_conf(pkg_src.split(':', 1)[1]):
+            # TODO: avoid conf loops
+            pkg_list = pkg_list.union(expand_source(conf_line))
+    elif pkg_src.startswith('pub:'):
+        for job in get_publisher_jobs(pkg_src):
+            pkg_list = pkg_list.union(expand_source(job))
+    elif pkg_src.startswith('rec:'):
+        pkg_list = pkg_list.union(expand_recursive(pkg_src.split(':', 1)[1]))
+    elif pkg_src.startswith('http'):
+        pkg_list = pkg_list.union(expand_pkg_url(pkg_src))
+    elif pkg_src.startswith('dir:'):
+        _, pkg_src = pkg_src.split(':', 1)
+        if ':' in pkg_src:
+            dirname, filter = pkg_src.split(':', 1)
+        else:
+            dirname, filter = pkg_src, '.*'
+        pkg_list = pkg_list.union(expand_dir(
+            pkg_dir=dirname,
+            name_reg=filter))
+    elif pkg_src.startswith('latest:'):
+        _, pkg_src = pkg_src.split(':', 1)
+        if ':' in pkg_src:
+            dirname, filter = pkg_src.split(':', 1)
+        else:
+            dirname, filter = pkg_src, '.*'
+        pkgs = set(expand_dir(
+            pkg_dir=dirname,
+            name_reg=filter,
+            latest=True))
+        for pkg in pkgs:
+            logging.info("    Adding rpm %s" % pkg)
+        pkg_list = pkg_list.union(pkgs)
+    elif ':' in pkg_src:
+        repo_name, pkg_name = pkg_src.split(':', 1)
+        pkg_list = pkg_list.union(expand_dir(
+            repo_name,
+            name_reg=pkg_name))
+    else:
+        logging.error('Unknown source %s', pkg_src)
+        return 1
+    return pkg_list
+
+
+def get_publisher_jobs(pub_url):
+    console_log = requests.get(pub_url + '/consoleText').text
+    entry_reg = re.compile(r'Copied.*(?<=")\w+[^"]*".*number [0-9]+')
+    pub_jobs = [
+        match.group() for match
+        in (entry_reg.search(i) for i in console_log.splitlines())
+        if match
+    ]
+    return pub_jobs
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-v', '--verbose', action='store_true')
+    parser.add_argument('-n', '--noop', action='store_true')
+    subparsers = parser.add_subparsers(dest='action')
+
+    par_rm_dups = subparsers.add_parser('rm-dups', help='Remove duplicated '
+                                        'packages and create hard links.')
+    par_rm_dups.add_argument('-d', '--dir', required=True,
+                             help='Directory to add the package to.')
+
+    par_repo = subparsers.add_parser('repo', help='Repository management.')
+    par_repo.add_argument('-d', '--dir', required=True,
+                          help='Directory of the repo.')
+    repo_subparser = par_repo.add_subparsers(dest='repoaction')
+    add_rpm = repo_subparser.add_parser('add', help='Add a package')
+    add_rpm.add_argument('-t', '--temp-dir', action='store', default=None,
+                         help='Temporary dir to use when downloading packages')
+    add_rpm.add_argument(
+        'rpm_source', nargs='+',
+        help='An rpm source to add, it can be one of: '
+        'a path to a rpm/tarball, '
+        'a path to another dir prepended with "dir:" or "latest:"'
+        ' with an optional regexp filter, like dir:path[:regexp], '
+        'an http(s) url to an rpm/tarball, '
+        'an http(s) url to a page containing rpms/tarballs, '
+        'a link to a jenkins job, '
+        'a link to a koji job, '
+        'a link to a http(s) page to recurse prepended with "rec:", '
+        'a repopath:regexp pair, where repopath is the path to a repo and '
+        'regexp a filter for the rpms in it.'
+        ' a path to a configuration file prepended with "conf:" '
+        'containing a plain text list of rpm sources, one by line (comments '
+        'with # and empty lines allowed). '
+        'For any repository path specified with dir: latest: or the '
+        'repopath:regexp options, if it\'s not an absolute path, '
+        + BASE_REPOS_PATH + ' will be prepended.'
+
+    )
+
+    generate_src = repo_subparser.add_parser(
+        'generate-src',
+        help='Populate the src dir with the tarballs from the src.rpm '
+        'files in the repo')
+    generate_src.add_argument('-p', '--with-patches', action='store_true',
+                              help='Include the patch files')
+    generate_src.add_argument('-k', '--key', action='store',
+                              default=None,
+                              help='Key to sign the sources with')
+    generate_src.add_argument('--passphrase', action='store',
+                              default='ask',
+                              help='Passphrase to use when signing')
+
+    rpms_sign = repo_subparser.add_parser(
+        'sign-rpms',
+        help='sign the unsigned rpms.')
+    rpms_sign.add_argument('-k', '--key', action='store',
+                           default=None, required=True,
+                           help='Key to sign the rpms with')
+    rpms_sign.add_argument('--passphrase', action='store',
+                           default='ask',
+                           help='Passphrase to use when signing')
+
+    repo_subparser.add_parser(
+        'createrepo',
+        help='Run createrepo on each distro repository.')
+
+    remove_old = repo_subparser.add_parser(
+        'remove-old',
+        help='Remove old versions of packages.')
+    remove_old.add_argument('-k', '--keep', action='store',
+                            default=1, help='Number of versions to '
+                            'keep')
+    return parser.parse_args()
+
+
+def main():
+    global TEMP_DIR
+    args = parse_args()
+
+    if args.verbose:
+        logging.root.level = logging.DEBUG
+        #  we want connectionpool debug logs
+        connectionpool.log.setLevel(logging.DEBUG)
+    else:
+        logging.root.level = logging.INFO
+        #  we don't want connectionpool info logs
+        connectionpool.log.setLevel(logging.WARN)
+
+    if args.action == 'rm-dups':
+        if args.dir.endswith('/'):
+            path = args.dir[:-1]
+        else:
+            path = args.dir
+        rm_dups(path=path, noop=args.noop)
+    elif args.action == 'repo':
+        if args.dir.endswith('/'):
+            path = args.dir[:-1]
+        else:
+            path = args.dir
+        repo = RPMRepo(path)
+        logging.info('')
+        if args.repoaction == 'add':
+            if args.temp_dir is None:
+                TEMP_DIR = tempfile.mkdtemp()
+                atexit.register(cleanup, TEMP_DIR)
+            else:
+                TEMP_DIR = args.temp_dir
+                if not os.path.exists(TEMP_DIR):
+                    os.makedirs(TEMP_DIR)
+            pkg_list = []
+            logging.info('Resolving all the package sources')
+            for pkg_src in args.rpm_source:
+                pkg_list.extend(expand_source(pkg_src))
+            logging.info('All sources resolved')
+            logging.info('')
+            logging.info('Adding packages to the repo %s', repo.path)
+            for pkg in pkg_list:
+                pkg = RPM(pkg)
+                repo.add_pkg(pkg)
+            logging.info('')
+            repo.save()
+        elif args.repoaction == 'generate-src':
+            if args.key and args.passphrase == 'ask':
+                passphrase = getpass('Key passphrase: ')
+            else:
+                passphrase = args.passphrase
+            repo.generate_sources(args.with_patches, args.key, passphrase)
+        elif args.repoaction == 'createrepo':
+            repo.createrepo()
+        elif args.repoaction == 'remove-old':
+            repo.delete_old(keep=int(args.keep), noop=args.noop)
+        elif args.repoaction == 'sign-rpms':
+            if args.passphrase == 'ask':
+                passphrase = getpass('Key passphrase: ')
+            else:
+                passphrase = args.passphrase
+            repo.sign_rpms(key=args.key, passwd=passphrase)
+
+if __name__ == '__main__':
+    main()


-- 
To view, visit http://gerrit.ovirt.org/33299
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib630eb1e3b701eaf224451f02692723447aa1f8b
Gerrit-PatchSet: 1
Gerrit-Project: jenkins
Gerrit-Branch: master
Gerrit-Owner: David Caro <dcaro...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to