On 18/08/2025 20:52, Collin Funk wrote:
Bruno Haible via Gnulib discussion list <[email protected]> writes:
Proposed patch is attached.
I guess you want the patch to be reviewed?
Well no pressure.
I'll apply it in a day or so after giving folks some time to comment.
Reviewing a patch, for me, includes validating that the body of a
function implements its specification comment. But all three new
functions lacks such a comment; therefore I can't really review anything.
I was going to take a look as well. Agree that comments would be nice
though. Since I (and likely others) do not fully understand linux-isms
like sysfs, procfs, and cgroups.
I see four lines that need whitespace fixes:
return strdup ("/sys/fs/cgroup");
snprintf(cpu_max_file, sizeof (cpu_max_file),
#if HAVE_MNTENT_H
#endif
Some other spacing nits, they all need a space before parentheses:
endmntent(fp);
switch (sched_getscheduler(0))
quota = get_cgroup2_cpu_quota();
V2 attached with comments.
cheers,
Padraig
From f935d969cd882da259171e469302645503837b6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Mon, 18 Aug 2025 15:34:59 +0100
Subject: [PATCH] nproc: honor cgroupv2 CPU quotas
cgroupv1 CPU quotas are not considered,
as those are now legacy (RHEL7 era),
and are more complex/inefficient to parse.
Tested in coreutils on Fedora 42 like:
# Honor limit anywhere in /proc/self/cgroup hierarchy
$ systemd_nested_cgroup=\
/sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/app.slice/
# The number of processors on this system
$ src/nproc
4
# Behave like MAX (1, (int)round(quota/period))
$ echo "100000 100000" > $systemd_nested_cgroup/cpu.max
$ src/nproc
1
$ echo "90000 100000" > $systemd_nested_cgroup/cpu.max
$ src/nproc
1
$ echo "140000 100000" > $systemd_nested_cgroup/cpu.max
$ src/nproc
1
$ echo "150000 100000" > $systemd_nested_cgroup/cpu.max
$ src/nproc
2
# Ensure NPROC_ALL takes precedence
$ echo "100000 100000" > $systemd_nested_cgroup/cpu.max
$ src/nproc --all
4
# Ensure OMP env vars have appropriate precedence
$ echo "200000 100000" > $systemd_nested_cgroup/cpu.max
$ OMP_NUM_THREADS=10 src/nproc
10
$ OMP_THREAD_LIMIT=10 src/nproc
2
# Ensure quota only reduces
$ echo "500000 100000" > $systemd_nested_cgroup/cpu.max
$ src/nproc
4
# Reset system to unlimited
$ echo "max 100000" > $systemd_nested_cgroup/cpu.max
# Test quota in root hierarchy
$ podman run --cpus=2 -i --rm fedora:latest /tmp/nproc
2
$ podman run --cpus=1.5 -i --rm fedora:latest /tmp/nproc
2
$ podman run --cpus=1.4 -i --rm fedora:latest /tmp/nproc
1
$ podman run --cpus=100 -i --rm fedora:latest /tmp/nproc
4
# Docker is similar to podman, but explicitly limits max allowable
$ docker run --cpus=1.4 -i --rm fedora:latest /tmp/nproc
1
$ docker run --cpus=100 -i --rm fedora:latest /tmp/nproc
docker: Error response from daemon:
range of CPUs is from 0.01 to 4.00, as there are only 4 CPUs
* lib/nproc.c (cgroup2_mount): Identify the cgroup2 mount point,
first at the common location for efficiency,
resorting to searching mount points otherwise.
(get_cgroup2_cpu_quota): Walk cgroup2 quotas for the current process,
returning the lowest integer number of CPUs configured.
(cpu_quota): On Linux return the cgroupv2 CPU quota if the
currrent scheduler honors it. Otherwise return ULONG_MAX.
(num_processors): Clamp the return to <= quota.
* module/nproc: Depend on mntent-h.
---
ChangeLog | 13 ++++
lib/nproc.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++----
modules/nproc | 1 +
3 files changed, 188 insertions(+), 14 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 29897dc8c5..fb3c905b8f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2025-08-18 Pádraig Brady <[email protected]>
+
+ nproc: honor cgroupv2 CPU quotas
+ * lib/nproc.c (cgroup2_mount): Identify the cgroup2 mount point,
+ first at the common location for efficiency,
+ resorting to searching mount points otherwise.
+ (get_cgroup2_cpu_quota): Walk cgroup2 quotas for the current process,
+ returning the lowest integer number of CPUs configured.
+ (cpu_quota): On Linux return the cgroupv2 CPU quota if the
+ current scheduler honors it. Otherwise return ULONG_MAX.
+ (num_processors): Clamp the return to <= quota.
+ * module/nproc: Depend on mntent-h.
+
2025-08-15 Bruno Haible <[email protected]>
Reduce risk of compilation errors within include files.
diff --git a/lib/nproc.c b/lib/nproc.c
index cecf60bc6e..2906900896 100644
--- a/lib/nproc.c
+++ b/lib/nproc.c
@@ -22,7 +22,12 @@
#include <errno.h>
#include <limits.h>
+#if HAVE_MNTENT_H
+# include <mntent.h>
+#endif
#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
#include <unistd.h>
#if HAVE_PTHREAD_GETAFFINITY_NP && 0
@@ -62,6 +67,8 @@
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+#define NPROC_MINIMUM 1
+
/* Return the number of processors available to the current process, based
on a modern system call that returns the "affinity" between the current
process and each CPU. Return 0 if unknown or if such a system call does
@@ -244,7 +251,7 @@ num_processors_via_affinity_mask (void)
/* Return the total number of processors. Here QUERY must be one of
NPROC_ALL, NPROC_CURRENT. The result is guaranteed to be at least 1. */
static unsigned long int
-num_processors_ignoring_omp (enum nproc_query query)
+num_processors_available (enum nproc_query query)
{
/* On systems with a modern affinity mask system call, we have
sysconf (_SC_NPROCESSORS_CONF)
@@ -377,7 +384,152 @@ num_processors_ignoring_omp (enum nproc_query query)
}
#endif
- return 1;
+ return NPROC_MINIMUM;
+}
+
+#if defined __linux__ || defined __ANDROID__
+/* Identify the cgroup2 mount point,
+ initially at the usual location for efficiency,
+ resorting to searching mount points otherwise.
+ Return NULL if the mount point is not found.
+ The returned string can be freed. */
+static char *
+cgroup2_mount (void)
+{
+ FILE *fp;
+ char *ret = NULL;
+
+ /* Check the usual location first. */
+ if (access ("/sys/fs/cgroup/cgroup.controllers", F_OK) == 0)
+ return strdup ("/sys/fs/cgroup");
+
+#if HAVE_MNTENT_H
+ /* Otherwise look for the mount point. */
+ struct mntent *mnt;
+ if (! (fp = setmntent ("/proc/mounts", "r")))
+ return NULL;
+ while ((mnt = getmntent (fp)) != NULL)
+ {
+ if (strcmp (mnt->mnt_type, "cgroup2") == 0)
+ {
+ ret = strdup (mnt->mnt_dir);
+ break;
+ }
+ }
+ endmntent (fp);
+#endif
+
+ return ret;
+}
+
+/* Return the minimum configured cgroupv2 CPU quota for the current process.
+ Return ULONG_MAX if quota can't be read.
+ Returned value will be >= 1. */
+static unsigned long int
+get_cgroup2_cpu_quota (void)
+{
+ unsigned long int cpu_quota = ULONG_MAX;
+ FILE *fp;
+
+ fp = fopen ("/proc/self/cgroup", "r");
+ if (! fp)
+ return cpu_quota;
+
+ /* Get our cgroupv2 (unififed) hierarchy. */
+ char *cgroup = NULL;
+ char *cgroup_str = NULL;
+ size_t cgroup_size = 0;
+ ssize_t read;
+ while ((read = getline (&cgroup_str, &cgroup_size, fp)) != -1)
+ {
+ if (strncmp (cgroup_str, "0::/", 4) == 0)
+ {
+ char *end = cgroup_str + read - 1;
+ if (*end == '\n')
+ *end = '\0';
+ cgroup = cgroup_str + 3;
+ break;
+ }
+ }
+ fclose (fp);
+
+ char *mount = NULL;
+ if (cgroup && ! (mount = cgroup2_mount ()))
+ cgroup = NULL;
+
+ /* Find the lowest quota in the hierarchy. */
+ char *quota_str = NULL;
+ size_t quota_size = 0;
+ while (cgroup && *cgroup)
+ {
+ char cpu_max_file[PATH_MAX];
+ snprintf (cpu_max_file, sizeof (cpu_max_file),
+ "%s%s/cpu.max", mount, cgroup);
+
+ if ((fp = fopen (cpu_max_file, "r"))
+ && getline ("a_str, "a_size, fp) != -1
+ && strncmp (quota_str, "max", 3) != 0)
+ {
+ long quota, period;
+ if (sscanf (quota_str, "%ld %ld", "a, &period) == 2 && period)
+ {
+ double ncpus = (double)quota / period;
+ if (cpu_quota == ULONG_MAX || ncpus < cpu_quota)
+ {
+ cpu_quota = MAX (1, (long)((double)quota / period + 0.5));
+ /* nproc will return 1 minimum, so no point going lower */
+ if (cpu_quota == 1)
+ *cgroup = '\0';
+ }
+ }
+ }
+
+ if (fp)
+ fclose (fp);
+
+ char *last_sep = strrchr (cgroup, '/');
+ if (! last_sep)
+ break;
+ if (last_sep == cgroup && *(cgroup + 1))
+ *(cgroup + 1) = '\0'; /* Iterate on "/" also. */
+ else
+ *last_sep = '\0';
+ }
+
+ free (quota_str);
+ free (cgroup_str);
+ free (mount);
+ return cpu_quota;
+}
+#endif
+
+
+/* Return the cgroupv2 CPU quota if the current scheduler honors it.
+ Otherwise return ULONG_MAX.
+ Returned value will be >= 1. */
+static unsigned long int
+cpu_quota (void)
+{
+ unsigned long int quota = ULONG_MAX;
+
+#if defined __linux__ || defined __ANDROID__
+# if HAVE_SCHED_GETAFFINITY_LIKE_GLIBC && defined SCHED_DEADLINE
+ /* We've a new enough sched.h */
+ switch (sched_getscheduler (0))
+ {
+ case SCHED_FIFO:
+ case SCHED_RR:
+ case SCHED_DEADLINE:
+ quota = ULONG_MAX;
+ break;
+ default:
+ quota = get_cgroup2_cpu_quota ();
+ break;
+ }
+# endif
+#endif
+
+ return quota;
}
/* Parse OMP environment variables without dependence on OMP.
@@ -416,13 +568,13 @@ parse_omp_threads (char const* threads)
unsigned long int
num_processors (enum nproc_query query)
{
- unsigned long int omp_env_limit = ULONG_MAX;
+ unsigned long int nproc_limit = ULONG_MAX;
+ /* Honor the OpenMP environment variables, recognized also by all
+ programs that are based on OpenMP. */
if (query == NPROC_CURRENT_OVERRIDABLE)
{
- unsigned long int omp_env_threads;
- /* Honor the OpenMP environment variables, recognized also by all
- programs that are based on OpenMP. */
+ unsigned long int omp_env_threads, omp_env_limit;
omp_env_threads = parse_omp_threads (getenv ("OMP_NUM_THREADS"));
omp_env_limit = parse_omp_threads (getenv ("OMP_THREAD_LIMIT"));
if (! omp_env_limit)
@@ -431,14 +583,22 @@ num_processors (enum nproc_query query)
if (omp_env_threads)
return MIN (omp_env_threads, omp_env_limit);
+ nproc_limit = omp_env_limit;
query = NPROC_CURRENT;
}
- /* Here query is one of NPROC_ALL, NPROC_CURRENT. */
- if (omp_env_limit == 1)
- /* No need to even call num_processors_ignoring_omp (query). */
- return 1;
- {
- unsigned long nprocs = num_processors_ignoring_omp (query);
- return MIN (nprocs, omp_env_limit);
- }
+
+ /* Honor any CPU quotas. */
+ if (query == NPROC_CURRENT && nproc_limit > NPROC_MINIMUM)
+ {
+ unsigned long int quota = cpu_quota ();
+ nproc_limit = MIN (quota, nproc_limit);
+ }
+
+ if (nproc_limit > NPROC_MINIMUM)
+ {
+ unsigned long nprocs = num_processors_available (query);
+ nproc_limit = MIN (nprocs, nproc_limit);
+ }
+
+ return nproc_limit;
}
diff --git a/modules/nproc b/modules/nproc
index 1081f7b778..de8cb1ef82 100644
--- a/modules/nproc
+++ b/modules/nproc
@@ -10,6 +10,7 @@ Depends-on:
c-ctype
extensions
minmax
+mntent-h
unistd-h
configure.ac:
--
2.50.1