commit: 448b7748adb4494b8a7dbfb5293437c1100c8977 Author: Zac Medico <zmedico <AT> gentoo <DOT> org> AuthorDate: Thu Oct 30 03:30:25 2025 +0000 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> CommitDate: Thu Oct 30 03:53:25 2025 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=448b7748
egencache: retry manifest process with unexpected returncode Retry for an intermittent unexpected returncode which occurs in CI runs with forkserver (bug 965132). In CI the unexpected returncode tends to be 255 which indicates that the forkserver exited unexpectedly. Bug: https://bugs.gentoo.org/965132 Signed-off-by: Zac Medico <zmedico <AT> gentoo.org> bin/egencache | 27 +++++++---- .../ebuild/_parallel_manifest/ManifestScheduler.py | 54 +++++++++++++++++++++- 2 files changed, 70 insertions(+), 11 deletions(-) diff --git a/bin/egencache b/bin/egencache index d36ae5d32e..6b131360d5 100755 --- a/bin/egencache +++ b/bin/egencache @@ -61,7 +61,7 @@ try: from portage.dep import _repo_separator from portage.output import colorize, EOutput from portage.package.ebuild._parallel_manifest.ManifestScheduler import ( - ManifestScheduler, + manifest_scheduler_retry, ) from portage.util import cmp_sort_key, writemsg_level, no_color from portage.util._async.AsyncFunction import AsyncFunction @@ -1351,21 +1351,28 @@ try: cp_iter = iter(atoms) event_loop = global_event_loop() - scheduler = ManifestScheduler( - portdb, - cp_iter=cp_iter, - gpg_cmd=gpg_cmd, - gpg_vars=gpg_vars, - force_sign_key=force_sign_key, - max_jobs=options.jobs, - max_load=options.load_average, - event_loop=event_loop, + manifest_task = AsyncTaskFuture( + future=manifest_scheduler_retry( + portdb, + cp_iter=cp_iter, + gpg_cmd=gpg_cmd, + gpg_vars=gpg_vars, + force_sign_key=force_sign_key, + max_jobs=options.jobs, + max_load=options.load_average, + event_loop=event_loop, + ) ) + scheduler = TaskScheduler(iter([manifest_task]), event_loop=event_loop) + signum = run_main_scheduler(scheduler) if signum is not None: sys.exit(128 + signum) + # Raise an unexpected exception if one occurred. + manifest_task.future.result() + if options.tolerant: ret.append(os.EX_OK) else: diff --git a/lib/portage/package/ebuild/_parallel_manifest/ManifestScheduler.py b/lib/portage/package/ebuild/_parallel_manifest/ManifestScheduler.py index 36372d228d..d773d04da9 100644 --- a/lib/portage/package/ebuild/_parallel_manifest/ManifestScheduler.py +++ b/lib/portage/package/ebuild/_parallel_manifest/ManifestScheduler.py @@ -1,6 +1,8 @@ # Copyright 2012-2025 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 +import asyncio + import portage from portage import os from portage.dbapi.porttree import _async_manifest_fetchlist @@ -10,6 +12,46 @@ from portage.util._async.AsyncScheduler import AsyncScheduler from .ManifestTask import ManifestTask +async def manifest_scheduler_retry(*args, max_tries=3, **kwargs) -> int: + """ + Since ManifestScheduler is not well suited for internal retry, create + a new ManifestScheduler instance for each retry. Returns the returncode + from the last ManifestScheduler instance, which will only be non-zero + if all retries failed. + """ + tries = max_tries + scheduler = ManifestScheduler(*args, **kwargs) + scheduler.start() + try: + await scheduler.async_wait() + except asyncio.CancelledError: + scheduler.terminate() + await scheduler.async_wait() + raise + cp_failed = scheduler.cp_failed.copy() + tries -= 1 + while scheduler.cp_retry and tries > 0: + kwargs["cp_iter"] = iter(scheduler.cp_retry) + scheduler = ManifestScheduler(*args, **kwargs) + scheduler.start() + try: + await scheduler.async_wait() + except asyncio.CancelledError: + scheduler.terminate() + await scheduler.async_wait() + raise + cp_failed.update(scheduler.cp_failed) + cp_failed.difference_update(scheduler.cp_successful) + tries -= 1 + + # Account for failures from all ManifestScheduler instances, since we + # only retry when the returncode is unexpected. + if cp_failed: + scheduler.returncode |= 1 + + return scheduler.returncode + + class ManifestScheduler(AsyncScheduler): def __init__( self, @@ -31,6 +73,9 @@ class ManifestScheduler(AsyncScheduler): self._gpg_vars = gpg_vars self._force_sign_key = force_sign_key self._task_iter = self._iter_tasks() + self.cp_retry = set() + self.cp_failed = set() + self.cp_successful = set() def _next_task(self): return next(self._task_iter) @@ -92,7 +137,14 @@ class ManifestScheduler(AsyncScheduler): ) def _task_exit(self, task): - if task.returncode != os.EX_OK: + if task.returncode == os.EX_OK: + self.cp_successful.add(task.cp) + else: + self.cp_failed.add(task.cp) + if task.returncode != 1: + # Retry if the returncode was unexpected. + self.cp_retry.add(task.cp) + if not self._terminated_tasks: portage.writemsg( "Error processing %s%s%s with returncode %s, continuing...\n"
