commit: 4a1ef8726c420f9d478025193540ce0a6cceb604 Author: Florian Schmaus <flow <AT> gentoo <DOT> org> AuthorDate: Mon Feb 23 17:00:26 2026 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Sat Feb 28 14:56:34 2026 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=4a1ef872
Scheduler: adjust scale_to_jobs() The strong resource requirement of scale_to_jobs() seems to be causing more irritation than it helps. The underlying problem is a classic resource allocation dilemma. We are dealing with a highly skewed distribution: a tail of large "whale" jobs (up to 18 GiB) and a dense cluster of small jobs (< 1 GiB). To reduce the free-space requirements while still allowing large "whale" jobs to hit, this adjusts the scale_to_jobs() function from a decaying function to one that takes the 90th percentile into account (estimated to be 1 GiB, we can further adjust this). The second job will now require 19 GiB to be free, instead of 27 GiB. Signed-off-by: Florian Schmaus <flow <AT> gentoo.org> Part-of: https://github.com/gentoo/portage/pull/1559 Closes: https://github.com/gentoo/portage/pull/1559 Signed-off-by: Sam James <sam <AT> gentoo.org> lib/_emerge/Scheduler.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/_emerge/Scheduler.py b/lib/_emerge/Scheduler.py index c3f6cb8b50..17ef47559e 100644 --- a/lib/_emerge/Scheduler.py +++ b/lib/_emerge/Scheduler.py @@ -1916,15 +1916,12 @@ class Scheduler(PollScheduler): level=logging.ERROR, ) else: - # Use a decaying function to take potential future PORTAGE_TMPDIR consumption + # Use a function to take potential future PORTAGE_TMPDIR consumption # of currently running jobs and the new job into account. - def scale_to_jobs(num): + def scale_to_jobs(num, p90): # The newly started job is fully taken into account. res = num - # All currently running jobs are taken into account with less weight, - # since it is likely that they are already using space in PORTAGE_TMPDIR. - for i in range(2, running_job_count + 2): - res += (1 / i) * num + res += running_job_count * p90 return res if ( @@ -1934,7 +1931,12 @@ class Scheduler(PollScheduler): required_free_bytes = ( self._jobs_tmpdir_require_free_gb * 1024 * 1024 * 1024 ) - required_free_bytes = scale_to_jobs(required_free_bytes) + p90_bytes = ( + 1 * 1024 * 1024 * 1024 + ) # Assume 1 GiB for 90th percentile job size + required_free_bytes = scale_to_jobs( + required_free_bytes, p90_bytes + ) actual_free_bytes = vfs_stat.f_bsize * vfs_stat.f_bavail
