Hi! This is just a preparation for the OMP_PLACES work, I've figured out before changing the affinity stuff it might be better to fix this PR. As gomp_init_num_threads is always called before gomp_init_affinity, there is no point calling the same pthread_getaffinity_np twice, and I'll need the initial affinity masks for OMP_PLACES anyway, so the patch just remembers it. The CPU_*_S and CPU_ALLOC_SIZE macros were apparently introduced only in glibc 2.7, so the patch attempts to deal even with older glibcs, by just using gomp_cpuset_size = 128 in that case (1024 bits).
2013-10-01 Jakub Jelinek <ja...@redhat.com> PR libgomp/57298 * config/linux/proc.c (gomp_cpuset_size, gomp_cpusetp): New variables. (gomp_cpuset_popcount): Use CPU_COUNT_S if available, or CPU_COUNT if gomp_cpuset_size is sizeof (cpu_set_t). Use gomp_cpuset_size instead of sizeof (cpu_set_t) to determine number of iterations. (gomp_init_num_threads): Initialize gomp_cpuset_size and gomp_cpusetp here, use gomp_cpusetp instead of &cpuset and pass gomp_cpuset_size instead of sizeof (cpu_set_t) to pthread_getaffinity_np. (get_num_procs): Don't call pthread_getaffinity_np if gomp_cpusetp is NULL. Use gomp_cpusetp instead of &cpuset and pass gomp_cpuset_size instead of sizeof (cpu_set_t) to pthread_getaffinity_np. * config/linux/proc.h (gomp_cpuset_popcount): Add attribute_hidden. (gomp_cpuset_size, gomp_cpusetp): Declare. * config/linux/affinity.c (CPU_ISSET_S, CPU_ZERO_S, CPU_SET_S): Define if CPU_ALLOC_SIZE isn't defined. (gomp_init_affinity): Don't call pthread_getaffinity_np here, instead use gomp_cpusetp computed by gomp_init_num_threads. Use CPU_*_S variants of macros with gomp_cpuset_size as set size, for cpusetnew use alloca for it if CPU_ALLOC_SIZE is defined, otherwise local fixed size variable. (gomp_init_thread_affinity): Use CPU_*_S variants of macros with gomp_cpuset_size as set size, for cpuset use alloca for it if CPU_ALLOC_SIZE is defined, otherwise local fixed size variable. --- libgomp/config/linux/proc.c.jj 2013-03-20 10:02:06.000000000 +0100 +++ libgomp/config/linux/proc.c 2013-10-01 14:09:00.759638855 +0200 @@ -39,19 +39,27 @@ #endif #ifdef HAVE_PTHREAD_AFFINITY_NP +unsigned long gomp_cpuset_size; +cpu_set_t *gomp_cpusetp; + unsigned long gomp_cpuset_popcount (cpu_set_t *cpusetp) { -#ifdef CPU_COUNT - /* glibc 2.6 and above provide a macro for this. */ - return CPU_COUNT (cpusetp); +#ifdef CPU_COUNT_S + /* glibc 2.7 and above provide a macro for this. */ + return CPU_COUNT_S (gomp_cpuset_size, cpusetp); #else +#ifdef CPU_COUNT + if (gomp_cpuset_size == sizeof (cpu_set_t)) + /* glibc 2.6 and above provide a macro for this. */ + return CPU_COUNT (cpusetp); +#endif size_t i; unsigned long ret = 0; extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)]; (void) check; - for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++) + for (i = 0; i < gomp_cpuset_size / sizeof (cpusetp->__bits[0]); i++) { unsigned long int mask = cpusetp->__bits[i]; if (mask == 0) @@ -70,16 +78,28 @@ void gomp_init_num_threads (void) { #ifdef HAVE_PTHREAD_AFFINITY_NP - cpu_set_t cpuset; +#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE) + gomp_cpuset_size = sysconf (_SC_NPROCESSORS_CONF); + gomp_cpuset_size = CPU_ALLOC_SIZE (gomp_cpuset_size); +#else + gomp_cpuset_size = sizeof (cpuset); +#endif - if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0) + gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size); + if (pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, + gomp_cpusetp) == 0) { /* Count only the CPUs this process can use. */ - gomp_global_icv.nthreads_var = gomp_cpuset_popcount (&cpuset); + gomp_global_icv.nthreads_var = gomp_cpuset_popcount (gomp_cpusetp); if (gomp_global_icv.nthreads_var == 0) gomp_global_icv.nthreads_var = 1; return; } + else + { + free (gomp_cpusetp); + gomp_cpusetp = NULL; + } #endif #ifdef _SC_NPROCESSORS_ONLN gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN); @@ -90,15 +110,14 @@ static int get_num_procs (void) { #ifdef HAVE_PTHREAD_AFFINITY_NP - cpu_set_t cpuset; - if (gomp_cpu_affinity == NULL) { /* Count only the CPUs this process can use. */ - if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), - &cpuset) == 0) + if (gomp_cpusetp + && pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, + gomp_cpusetp) == 0) { - int ret = gomp_cpuset_popcount (&cpuset); + int ret = gomp_cpuset_popcount (gomp_cpusetp); return ret != 0 ? ret : 1; } } --- libgomp/config/linux/proc.h.jj 2013-03-20 10:02:06.000000000 +0100 +++ libgomp/config/linux/proc.h 2013-10-01 13:48:00.690527479 +0200 @@ -28,7 +28,9 @@ #include <sched.h> #ifdef HAVE_PTHREAD_AFFINITY_NP -extern unsigned long gomp_cpuset_popcount (cpu_set_t *); +extern unsigned long gomp_cpuset_size attribute_hidden; +extern cpu_set_t *gomp_cpusetp attribute_hidden; +extern unsigned long gomp_cpuset_popcount (cpu_set_t *) attribute_hidden; #endif #endif /* GOMP_PROC_H */ --- libgomp/config/linux/affinity.c.jj 2013-03-20 10:02:06.000000000 +0100 +++ libgomp/config/linux/affinity.c 2013-10-01 15:52:33.173193230 +0200 @@ -33,17 +33,22 @@ #include <unistd.h> #ifdef HAVE_PTHREAD_AFFINITY_NP - static unsigned int affinity_counter; +#ifndef CPU_ALLOC_SIZE +#define CPU_ISSET_S(idx, size, set) CPU_ISSET(idx, set) +#define CPU_ZERO_S(size, set) CPU_ZERO(set) +#define CPU_SET_S(idx, size, set) CPU_SET(idx, set) +#endif + void gomp_init_affinity (void) { - cpu_set_t cpuset, cpusetnew; size_t idx, widx; unsigned long cpus = 0; + cpu_set_t *cpusetnewp; - if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset)) + if (gomp_cpusetp == NULL) { gomp_error ("could not get CPU affinity set"); free (gomp_cpu_affinity); @@ -52,10 +57,16 @@ gomp_init_affinity (void) return; } - CPU_ZERO (&cpusetnew); +#ifdef CPU_ALLOC_SIZE + cpusetnewp = (cpu_set_t *) gomp_alloca (gomp_cpuset_size); +#else + cpu_set_t cpusetnew; + cpusetnewp = &cpusetnew; +#endif + if (gomp_cpu_affinity_len == 0) { - unsigned long count = gomp_cpuset_popcount (&cpuset); + unsigned long count = gomp_cpuset_popcount (gomp_cpusetp); if (count >= 65536) count = 65536; gomp_cpu_affinity = malloc (count * sizeof (unsigned short)); @@ -65,24 +76,30 @@ gomp_init_affinity (void) return; } for (widx = idx = 0; widx < count && idx < 65536; idx++) - if (CPU_ISSET (idx, &cpuset)) + if (CPU_ISSET_S (idx, gomp_cpuset_size, gomp_cpusetp)) { cpus++; gomp_cpu_affinity[widx++] = idx; } } else - for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++) - if (gomp_cpu_affinity[idx] < CPU_SETSIZE - && CPU_ISSET (gomp_cpu_affinity[idx], &cpuset)) - { - if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpusetnew)) - { - cpus++; - CPU_SET (gomp_cpu_affinity[idx], &cpusetnew); + { + CPU_ZERO_S (gomp_cpuset_size, cpusetnewp); + for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++) + if (gomp_cpu_affinity[idx] < 8 * gomp_cpuset_size + && CPU_ISSET_S (gomp_cpu_affinity[idx], gomp_cpuset_size, + gomp_cpusetp)) + { + if (! CPU_ISSET_S (gomp_cpu_affinity[idx], gomp_cpuset_size, + cpusetnewp)) + { + cpus++; + CPU_SET_S (gomp_cpu_affinity[idx], gomp_cpuset_size, + cpusetnewp); } gomp_cpu_affinity[widx++] = gomp_cpu_affinity[idx]; } + } if (widx == 0) { @@ -96,9 +113,10 @@ gomp_init_affinity (void) gomp_cpu_affinity_len = widx; if (cpus < gomp_available_cpus) gomp_available_cpus = cpus; - CPU_ZERO (&cpuset); - CPU_SET (gomp_cpu_affinity[0], &cpuset); - pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset); + CPU_ZERO_S (gomp_cpuset_size, cpusetnewp); + CPU_SET_S (gomp_cpu_affinity[0], gomp_cpuset_size, cpusetnewp); + pthread_setaffinity_np (pthread_self (), gomp_cpuset_size, + cpusetnewp); affinity_counter = 1; } @@ -106,13 +124,20 @@ void gomp_init_thread_affinity (pthread_attr_t *attr) { unsigned int cpu; + cpu_set_t *cpusetp; + +#ifdef CPU_ALLOC_SIZE + cpusetp = (cpu_set_t *) gomp_alloca (gomp_cpuset_size); +#else cpu_set_t cpuset; + cpusetp = &cpuset; +#endif cpu = __atomic_fetch_add (&affinity_counter, 1, MEMMODEL_RELAXED); cpu %= gomp_cpu_affinity_len; - CPU_ZERO (&cpuset); - CPU_SET (gomp_cpu_affinity[cpu], &cpuset); - pthread_attr_setaffinity_np (attr, sizeof (cpu_set_t), &cpuset); + CPU_ZERO_S (gomp_cpuset_size, cpusetp); + CPU_SET_S (gomp_cpu_affinity[cpu], gomp_cpuset_size, cpusetp); + pthread_attr_setaffinity_np (attr, gomp_cpuset_size, cpusetp); } #else Jakub