Package: piuparts Version: 0.50 Severity: wishlist Tags: patch thanks This is a placekeeper for merging the faster-rdep-calc(10) and rdep-chain-len(2) branches. The first attempts to speed master by only calculating reverse dependency metrics on demand, by package. The second builds on that by adding a reverse dependency chain length metric, and uses that to weight the packages returned by reserve().
https://github.com/davesteele/piuparts/commits/faster-rdep-calc https://github.com/davesteele/piuparts/commits/rdep-chain-len I've chosen not to restore the dependency on the current working directory in detect_well_known_errors. That feature is localized to one commit. >From 9b5eba21462bc9e1e32df39c14f6559ebf42e271 Mon Sep 17 00:00:00 2001 From: David Steele <dste...@gmail.com> Date: Wed, 27 Feb 2013 19:07:09 -0500 Subject: [PATCH] Rework rdep metric calculations for faster operation. --- debian/changelog | 6 ++ master-bin/detect_well_known_errors | 24 ++--- piuparts-report.py | 34 +++---- piupartslib/packagesdb.py | 166 ++++++++++++++++++----------------- 4 files changed, 115 insertions(+), 115 deletions(-) diff --git a/debian/changelog b/debian/changelog index b4d1c4d..7cfff9e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -70,6 +70,12 @@ piuparts (0.51) UNRELEASED; urgency=low - Validate field names, and only use valid known problem conf files. - Minor HTML syntax fix. - Minor template integration. + * pupartslib/packagesdb.py + - Reworked rdep metrics (block_count, rrdep_count, waiting_count) to + calculate on-demand. Access methods are moved from Package to + PackagesDB. + - Add a metric for the worst-case reverse-dependency chain length. + - Weight the reserved packages returned by this additional factor. [ Holger Levsen ] * piuparts.py: diff --git a/master-bin/detect_well_known_errors b/master-bin/detect_well_known_ index 6810a26..2ca1f4e 100755 --- a/master-bin/detect_well_known_errors +++ b/master-bin/detect_well_known_errors @@ -225,8 +225,9 @@ class FailureManager(): def keyfunc( x, pkgsdb=self.pkgsdb, logdict=self.logdict): try: pkg_name = get_pkg(x.pkgspec) - rdeps = pkgsdb.get_package(pkg_name).rrdep_count() - except KeyError: + rdeps = pkgsdb.rrdep_count( + pkgsdb.get_package(pkg_name)) + except (KeyError, AttributeError): rdeps = 0 is_failed = get_where(logdict[x.pkgspec]) == "fail" @@ -374,8 +375,8 @@ def update_tpl( basedir, section, problem, failures, logdict bin_pkg = get_pkg(pkgspec) try: src_pkg = source_pkg(pkgspec, pkgsdb) - rdep_cnt = pkgsdb.get_package(bin_pkg).rrdep_count() - except KeyError: + rdep_cnt = pkgsdb.rrdep_count(pkgsdb.get_package(bin_pkg)) + except (KeyError, AttributeError): src_pkg = bin_pkg rdep_cnt = 0 @@ -428,8 +429,8 @@ def update_html( section, logdict, problem_list, failures, c def keyfunc( x, pkgsdb=pkgsdb, logdict=logdict): try: pkg_name = get_pkg(x.pkgspec) - rdeps = pkgsdb.get_package(pkg_name).rrdep_count() - except KeyError: + rdeps = pkgsdb.rrdep_count(pkgsdb.get_package(pkg_name)) + except (KeyError, AttributeError): rdeps = 0 is_failed = get_where(logdict[x.pkgspec]) == "fail" @@ -469,13 +470,11 @@ def process_section( section, config, problem_list, add_cnt = make_kprs( logdict, kprdict, problem_list ) if not pkgsdb: - oldcwd = os.getcwd() - os.chdir(config['master-directory']) - distro_config = piupartslib.conf.DistroConfig( DISTRO_CONFIG_FILE, section_config["mirror"]) - pkgsdb = piupartslib.packagesdb.PackagesDB(prefix=section) + sectiondir = os.path.join(config['master-directory'], section) + pkgsdb = piupartslib.packagesdb.PackagesDB(prefix=sectiondir) pkgs_url = distro_config.get_packages_url( section_config.get_distro(), @@ -485,11 +484,6 @@ def process_section( section, config, problem_list, pkgsdb.read_packages_file(pkg_fl) pkg_fl.close() - pkgsdb.compute_package_states() - pkgsdb.calc_rrdep_counts() - - os.chdir(oldcwd) - failures = FailureManager( logdict ) failures.sort_by_bugged_and_rdeps(pkgsdb) diff --git a/piuparts-report.py b/piuparts-report.py index 996ca2a..48aaa6a 100644 --- a/piuparts-report.py +++ b/piuparts-report.py @@ -593,15 +593,10 @@ class Section: self._doc_root = doc_root logging.debug("Loading and parsing Packages file") - oldcwd = os.getcwd() - os.chdir(master_directory) self._packagedb_cache = packagedb_cache self._package_databases = {} - self._load_package_database(section) + self._load_package_database(section, master_directory) self._binary_db = self._package_databases[section] - self._binary_db.compute_package_states() - self._binary_db.calc_rrdep_counts() - os.chdir(oldcwd) sources_url = self._distro_config.get_sources_url( self._config.get_distro(), self._config.get_area()) @@ -613,7 +608,7 @@ class Section: self._log_name_cache = {} - def _load_package_database(self, section): + def _load_package_database(self, section, master_directory): if section in self._package_databases: return elif section in self._packagedb_cache: @@ -626,12 +621,13 @@ class Section: # this is a base database eligible for caching # only cache the most recent base database self._packagedb_cache.clear() - db = piupartslib.packagesdb.PackagesDB(prefix=section) + sectiondir = os.path.join(master_directory, section) + db = piupartslib.packagesdb.PackagesDB(prefix=sectiondir) self._package_databases[section] = db if config["depends-sections"]: deps = config["depends-sections"].split() for dep in deps: - self._load_package_database(dep) + self._load_package_database(dep, master_directory) db.set_dependency_databases([self._package_databases[dep] for dep i else: # only cache the big base databases that don't have additional depe @@ -1217,31 +1213,27 @@ class Section: def write_state_pages(self): for state in self._binary_db.get_active_states(): logging.debug("Writing page for %s" % state) - with_counts = False - aside = "" vlist = "" if state in self._binary_db.get_error_states(): with_counts = True aside = " (reverse deps, blocked pkgs)" - - def cmp_func(a, b): - """Sort by block count first""" - rrdep_cmp = cmp( a.block_count(), b.block_count()) - if rrdep_cmp != 0: - return -rrdep_cmp - else: - return cmp( a["Package"], b["Package"] ) + sort_key = lambda x: (-self._binary_db.block_count(x),x["Packag + else: + with_counts = False + aside = "" + sort_key = lambda x: x["Package"] names = self._binary_db.get_pkg_names_in_state(state) packages = [self._binary_db.get_package(name) for name in names] - packages.sort( cmp_func ) + packages.sort( key = sort_key ) for package in packages: vlist += "<li id=\"%s\">%s" % ( package["Package"], self.link_to_source_summary(package["P if with_counts: - vlist += " (%d, %d)" % (package.rrdep_count(), package.bloc + vlist += " (%d, %d)" % (self._binary_db.rrdep_count(package + self._binary_db.block_count(package vlist += " (%s)" % html_protect(package["Maintainer"]) all_deps = package.all_dependencies() if all_deps: diff --git a/piupartslib/packagesdb.py b/piupartslib/packagesdb.py index 13e0dfb..d3aa1da 100644 --- a/piupartslib/packagesdb.py +++ b/piupartslib/packagesdb.py @@ -62,9 +62,10 @@ class Package(UserDict.UserDict): self[name.strip()] = value.strip() self._parsed_deps = {} self._parsed_alt_deps = {} - self._rrdep_count = None - self._block_count = None - self._waiting_count = None + self.rrdep_cnt = None + self.block_cnt = None + self.waiting_cnt = None + self.rdep_chain_len = None def _parse_dependencies(self, header_name): if header_name in self._parsed_deps: @@ -123,33 +124,6 @@ class Package(UserDict.UserDict): """Are we testable at all? Required aren't.""" return self.get("Priority", "") != "required" - def rrdep_count(self): - """Get the recursive dependency count, if it has been calculated""" - if self._rrdep_count == None: - raise Exception('Reverse dependency count has not been calculated') - return(self._rrdep_count) - - def set_rrdep_count(self, val): - self._rrdep_count = val - - def block_count(self): - """Get the number of packages blocked by this package""" - if self._block_count == None: - raise Exception('Block count has not been calculated') - return(self._block_count) - - def set_block_count(self, val): - self._block_count = val - - def waiting_count(self): - """Get the number of packages waiting for this package""" - if self._waiting_count == None: - raise Exception('Waiting count has not been calculated') - return(self._waiting_count) - - def set_waiting_count(self, val): - self._waiting_count = val - def dump(self, output_file): output_file.write("".join(self.headers)) @@ -313,6 +287,7 @@ class PackagesDB: self._dependency_databases = [] self._recycle_mode = False self._candidates_for_testing = None + self._rdeps = None self.set_subdirs(ok="pass", fail="fail", evil="untestable", reserved="reserved", morefail=["bugged", "affected"], recycle="recycle") @@ -682,8 +657,8 @@ class PackagesDB: if not self._logdb.log_exists(p, [self._reserved]) or \ self._logdb.log_exists(p, [self._recycle])] if len(self._candidates_for_testing) > 1: - self.calc_rrdep_counts() - tuples = [(p.waiting_count(), random.random(), p) + tuples = [(self.waiting_count(p) * self.rdep_chain_len(p), + random.random(), p) for p in self._candidates_for_testing] self._candidates_for_testing = [x[2] for x in sorted(tuples, reverse = True)] @@ -749,59 +724,92 @@ class PackagesDB: else: raise LogfileExists(self._evil, package, version) - def calc_rrdep_counts(self): - """Calculate recursive reverse dependency counts for Packages""" + def _get_rdep_dict(self): + """Return dict of one-level reverse dependencies by package""" + + if self._rdeps is None: + + self._rdeps = {} - self._find_all_packages() # populate _packages + for pkg in self.get_all_package_names(): + pkg_obj = self.get_package(pkg) + + for dep in pkg_obj.dependencies(): + dep_pkg = self.get_package(dep, resolve_virtual=True) + + if dep_pkg is not None: + dep = dep_pkg["Package"] + + if not dep in self._rdeps: + self._rdeps[dep] = set() + self._rdeps[dep].add(pkg) + + return( self._rdeps ) + + def _calc_rrdep_pkg_counts(self, pkg): + + pkg_name = pkg['Package'] self._compute_package_states() # populate _package_state + + # calc full recursive reverse dependency package set + rrdep_set = set() + rdeps = self._get_rdep_dict() + next_level = set([pkg_name]) + chain_len = 0 + + while next_level: + chain_len += 1 + rrdep_set |= next_level + new_pkgs = next_level + next_level = set([y for x in new_pkgs if x in rdeps for y in rdeps[ + next_level -= rrdep_set + + rrdep_set.remove(pkg_name) + + # calculate and set the metrics + pkg.rrdep_cnt = len(rrdep_set) + error_states = self.get_error_states() + if self._package_state[pkg_name] in error_states: + block_list = [x for x in rrdep_set + if self._package_state[x] in error_states] + pkg.block_cnt = len(block_list) + else: + pkg.block_cnt = 0 + waiting_states = self.get_waiting_states() + if self._package_state[pkg_name] in waiting_states: + waiting_list = [x for x in rrdep_set + if self._package_state[x] in waiting_states] + pkg.waiting_cnt = len(waiting_list) + else: + pkg.waiting_cnt = 0 - # create a reverse dependency dictionary. - # entries consist of a one-level list of reverse dependency package nam - # by package name - rdeps = {} - for pkg_name in self._packages.keys(): - # use the Packages dependencies() method for a conservative count - for dep in self._packages[pkg_name].dependencies(): - if dep in rdeps: - rdeps[dep].append( pkg_name ) - else: - rdeps[dep] = [pkg_name] - - def recurse_rdeps( pkg_name, rdeps, rrdep_dict ): - """ Recurse through the reverse dep arrays to determine the recursi - dependency count for a package. rrdep_dict.keys() contains the - accumulation of rdeps encountered""" - - # are there any rdeps for this package? - if pkg_name in rdeps: - for rdep in rdeps[pkg_name]: - # break circular dependency loops - if not rdep in rrdep_dict: - rrdep_dict[rdep] = 1 - rrdep_dict = recurse_rdeps( rdep, rdeps, rrdep_dict ) - - return rrdep_dict - - # calculate all of the rrdeps and block counts - for pkg_name in self._packages.keys(): - rrdep_list = recurse_rdeps( pkg_name, rdeps, {} ).keys() - self._packages[pkg_name].set_rrdep_count( len(rrdep_list) ) - - if self._package_state[pkg_name] in error_states: - block_list = [x for x in rrdep_list - if self._package_state[x] in error_states] - else: - block_list = [] - self._packages[pkg_name].set_block_count( len(block_list) ) + pkg.rdep_chain_len = chain_len - if self._package_state[pkg_name] in waiting_states: - waiting_list = [x for x in rrdep_list - if self._package_state[x] in waiting_states] - else: - waiting_list = [] - self._packages[pkg_name].set_waiting_count(len(waiting_list)) + def block_count(self, pkg): + if pkg.block_cnt is None: + self._calc_rrdep_pkg_counts(pkg) + + return pkg.block_cnt + + def rrdep_count(self, pkg): + if pkg.rrdep_cnt is None: + self._calc_rrdep_pkg_counts(pkg) + + return pkg.rrdep_cnt + + def waiting_count(self, pkg): + if pkg.waiting_cnt is None: + self._calc_rrdep_pkg_counts(pkg) + + return pkg.waiting_cnt + + def rdep_chain_len(self, pkg): + if pkg.rdep_chain_len is None: + self.calc_rrdep_pkg_counts(pkg) + + return pkg.rdep_chain_len # vi:set et ts=4 sw=4 : -- 1.7.10.4 (END) -- To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org