commit: 561187044b2e7ae3156ea81faebd08f072b7c731 Author: Florian Schmaus <flow <AT> gentoo <DOT> org> AuthorDate: Sun Jun 23 13:30:17 2024 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Thu Dec 18 00:44:11 2025 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=56118704
Add emerge options to require free space/inodes in tmpdir This adds --jobs-tmpdir-require-free-gb=GB --jobs-tmpdir-require-free-kilo-inodes=INODES as emerge emerge options. When those are used with --jobs, makes portage/emerge check that PORTAGE_TMPDIR has sufficient free resources before a new job is started. Thanks goes out to Zac Medico, as this was inspired by https://github.com/gentoo/portage/pull/1351, with the following differences: - options are absolute values, not relatives ones - defaults for both options are specified - option values are scaled, using a decaying function, considering the number or running jobs - emit a warning once a threshold is reached Note that the scaling of the resource constraints can not be perfect in the presence of concurrently running emerge jobs and without _can_add_job() being provided with the number of jobs that are potentially added. It is always possible that a emerge job has not yet used much of the filesystem when we check the remaining filesystem resources, and at some later point in time uses much more than the scaling function accounted for it. Ultimately, there is a trade-off between portage limiting parallelism needlessly (but still being able to emerge all packages) and portage failing due to missing resources in PORTAGE_TMPDIR. The chosen defaults are rather large and most packages use much less filesystem resources then the scaling function accounts for them. Therefore, the implemented approach's idea is to lean towards favoring functionality over parallelism. Bug: https://bugs.gentoo.org/934382 Signed-off-by: Florian Schmaus <flow <AT> gentoo.org> Part-of: https://github.com/gentoo/portage/pull/1353 Closes: https://github.com/gentoo/portage/pull/1353 Signed-off-by: Sam James <sam <AT> gentoo.org> lib/_emerge/Scheduler.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++- lib/_emerge/main.py | 38 +++++++++++++++++ man/emerge.1 | 12 ++++++ 3 files changed, 154 insertions(+), 1 deletion(-) diff --git a/lib/_emerge/Scheduler.py b/lib/_emerge/Scheduler.py index 7d9023b0b9..1b5d5a4059 100644 --- a/lib/_emerge/Scheduler.py +++ b/lib/_emerge/Scheduler.py @@ -27,6 +27,7 @@ from portage._sets import SETPREFIX from portage._sets.base import InternalPackageSet from portage.util import ensure_dirs, writemsg, writemsg_level from portage.util.futures import asyncio +from portage.util.path import first_existing from portage.util.SlotObject import SlotObject from portage.util._async.SchedulerInterface import SchedulerInterface from portage.package.ebuild.digestcheck import digestcheck @@ -65,7 +66,7 @@ FAILURE = 1 class Scheduler(PollScheduler): - # max time between loadavg checks (seconds) + # max time between loadavg and tmpdir statvfs checks (seconds) _loadavg_latency = 30 # max time between display status updates (seconds) @@ -233,6 +234,18 @@ class Scheduler(PollScheduler): max_jobs = 1 self._set_max_jobs(max_jobs) self._running_root = trees[trees._running_eroot]["root_config"] + self._jobs_tmpdir_require_free_gb = myopts.get("--jobs-tmpdir-require-free-gb") + if not self._jobs_tmpdir_require_free_gb: + # dev-lang/rust-1.77.1: ~16 GiB + # www-client/chromium-126.0.6478.57: ~18 GiB + self._jobs_tmpdir_require_free_gb = 18 + self._jobs_tmpdir_require_free_kilo_inodes = myopts.get( + "--jobs-tmpdir-require-free-kilo-inodes" + ) + if not self._jobs_tmpdir_require_free_kilo_inodes: + # dev-lang/rust-1.77.1: ~ 450k inodes + # www-client/chromium-126.0.6478.57: ~1011K + self._jobs_tmpdir_require_free_kilo_inodes = 1100 self.edebug = 0 if settings.get("PORTAGE_DEBUG", "") == "1": self.edebug = 1 @@ -1817,6 +1830,96 @@ class Scheduler(PollScheduler): def _running_job_count(self): return self._jobs + _warned_tmpdir_free_space = False + _warned_tmpdir_free_inodes = False + + def _can_add_job(self): + if not super()._can_add_job(): + return False + + running_job_count = self._running_job_count() + if running_job_count == 0 and not self._merge_wait_queue: + # Ensure there is forward progress if there are no running + # jobs and no jobs in the _merge_wait_queue. + return True + + if ( + self._jobs_tmpdir_require_free_gb is not None + or self._jobs_tmpdir_require_free_kilo_inodes is not None + ) and hasattr(os, "statvfs"): + tmpdirs = set() + for root in self.trees: + settings = self.trees[root]["root_config"].settings + if settings["PORTAGE_TMPDIR"] in tmpdirs: + continue + tmpdirs.add(settings["PORTAGE_TMPDIR"]) + tmpdir = first_existing( + os.path.join(settings["PORTAGE_TMPDIR"], "portage") + ) + try: + vfs_stat = os.statvfs(tmpdir) + except OSError as e: + writemsg_level( + f"!!! statvfs('{tmpdir}'): {e}\n", + noiselevel=-1, + level=logging.ERROR, + ) + else: + # Use a decaying function to take potential future PORTAGE_TMPDIR consumption + # of currently running jobs and the new job into account. + def scale_to_jobs(num): + # The newly started job is fully taken into account. + res = num + # All currently running jobs are taken into account with less weight, + # since it is likely that they are already using space in PORTAGE_TMPDIR. + for i in range(2, running_job_count + 2): + res += (1 / i) * num + return res + + if ( + self._jobs_tmpdir_require_free_gb + and self._jobs_tmpdir_require_free_gb != 0 + ): + required_free_bytes = ( + self._jobs_tmpdir_require_free_gb * 1024 * 1024 * 1024 + ) + required_free_bytes = scale_to_jobs(required_free_bytes) + + actual_free_bytes = vfs_stat.f_bsize * vfs_stat.f_bavail + + if actual_free_bytes < required_free_bytes: + if not self._warned_tmpdir_free_space: + msg = f"--- {tmpdir} has not enough free space, emerge job parallelism reduced. free: {actual_free_bytes} bytes, required {required_free_bytes} bytes" + portage.writemsg_stdout( + colorize("WARN", f"\n{msg}\n"), noiselevel=-1 + ) + self._logger.log(msg) + + self._warned_tmpdir_free_space = True + return False + + if ( + self._jobs_tmpdir_require_free_kilo_inodes + and self._jobs_tmpdir_require_free_kilo_inodes != 0 + ): + required_free_inodes = ( + self._jobs_tmpdir_require_free_kilo_inodes * 1000 + ) + required_free_inodes = scale_to_jobs(required_free_inodes) + + if vfs_stat.f_favail < required_free_inodes: + if not self._warned_tmpdir_free_idnoes: + msg = f"--- {tmpdir} has not enough free inodes, emerge job parallelism reduced. free: {vfs_stat.f_favail} inodes, required: {required_free_inodes} inodes" + portage.writemsg_stdout( + colorize("WARN", f"\n{msg}\n"), noiselevel=-1 + ) + self._logger.log(msg) + + self._warned_tmpdir_free_inodes = True + return False + + return True + def _schedule_tasks(self): while True: state_change = 0 diff --git a/lib/_emerge/main.py b/lib/_emerge/main.py index ea568c3a3d..b495e5afd5 100644 --- a/lib/_emerge/main.py +++ b/lib/_emerge/main.py @@ -154,6 +154,8 @@ def insert_optional_args(args): "--getbinpkgonly": y_or_n, "--ignore-world": y_or_n, "--jobs": valid_integers, + "--jobs-tmpdir-require-free-gb": valid_integers, + "--jobs-tmpdir-require-free-kilo-inodes": valid_integers, "--keep-going": y_or_n, "--load-average": valid_floats, "--onlydeps-with-ideps": y_or_n, @@ -514,6 +516,14 @@ def parse_opts(tmpcmdline, silent=False): "help": "Specifies the number of packages to build " + "simultaneously.", "action": "store", }, + "--jobs-tmpdir-require-free-gb": { + "help": "Specifies the required remaining capacity (in GiB) of PORTAGE_TMPDIR before a new emerge job is started. Set to 0 to disable this check", + "action": "store", + }, + "--jobs-tmpdir-require-free-kilo-inodes": { + "help": "Specifies the required remaining inodes (in thousands) of PORTAGE_TMPDIR before a new emerge job is started. Set to 0 to disable this check", + "action": "store", + }, "--keep-going": { "help": "continue as much as possible after an error", "choices": true_y_or_n, @@ -1024,6 +1034,34 @@ def parse_opts(tmpcmdline, silent=False): myoptions.jobs = jobs + if myoptions.jobs_tmpdir_require_free_gb: + try: + jobs_tmpdir_require_free_gb = int(myoptions.jobs_tmpdir_require_free_gb) + except ValueError: + jobs_tmpdir_require_free_gb = 0 + if not silent: + parser.error( + f"Invalid --jobs-tmpdir-require-free-gb parameter: '{myoptions.jobs_tmpdir_require_free_gb}'\n" + ) + + myoptions.jobs_tmpdir_require_free_gb = jobs_tmpdir_require_free_gb + + if myoptions.jobs_tmpdir_require_free_kilo_inodes: + try: + jobs_tmpdir_require_free_kilo_inodes = int( + myoptions.jobs_tmpdir_require_free_kilo_indoes + ) + except ValueError: + jobs_tmpdir_require_free_kilo_inodes = 0 + if not silent: + parser.error( + f"Invalid --jobs-tmpdir-require-free-kilo-inodes parameter: '{myoptions.jobs_tmpdir_require_free_kilo_indoes}'\n" + ) + + myoptions.jobs_tmpdir_require_free_kilo_inodes = ( + jobs_tmpdir_require_free_kilo_inodes + ) + if myoptions.load_average == "True": myoptions.load_average = None diff --git a/man/emerge.1 b/man/emerge.1 index cf2a0c2a73..8215c82fca 100644 --- a/man/emerge.1 +++ b/man/emerge.1 @@ -691,6 +691,18 @@ Note that interactive packages currently force a setting of \fI\-\-jobs=1\fR. This issue can be temporarily avoided by specifying \fI\-\-accept\-properties=\-interactive\fR. .TP +.BR \-\-jobs\-tmpdir\-require\-free\-gb[=GB] +Specifies the required remainnig capacity (in GiB) of \fPORTAGE_TMPDIR\fR +before additional emerge jobs are started. A decaying function is used to +reduce the limit for every additional emerge job after the second. +Specifiy \fI0\fR to disable this check. Defaults to \fI18\fR GiB. +.TP +.BR \-\-jobs\-tmpdir\-require\-free\-kilo\-inodes[=KILO\-INODES] +Specifies the required remaining inodes (in thousands) of \fPORTAGE_TMPDIR\fR +before additional emerge emerge job are started. A decaying function is used to +reduce the limit for every additional emerge job after the second. +Set to \fI0\fR to disable this check. Defaults to \fI1100\fR. +.TP .BR "\-\-keep\-going [ y | n ]" Continue as much as possible after an error. When an error occurs, dependencies are recalculated for remaining packages and any with
