From: Andrew Stubbs <a...@codesourcery.com> Implement the -foffload-memory=pinned option such that libgomp is instructed to enable fully-pinned memory at start-up. The option is intended to provide a performance boost to certain offload programs without modifying the code.
This feature only works on Linux, at present, and simply calls mlockall to enable always-on memory pinning. It requires that the ulimit feature is set high enough to accommodate all the program's memory usage. In this mode the ompx_pinned_memory_alloc feature is disabled as it is not needed and may conflict. gcc/ChangeLog: * omp-low.cc (omp_enable_pinned_mode): New function. (execute_lower_omp): Call omp_enable_pinned_mode. libgomp/ChangeLog: * config/linux/allocator.c (always_pinned_mode): New variable. (GOMP_enable_pinned_mode): New function. (linux_memspace_alloc): Disable pinning when always_pinned_mode set. (linux_memspace_calloc): Likewise. (linux_memspace_free): Likewise. (linux_memspace_realloc): Likewise. * libgomp.map (GOMP_5.1.1): New version space with GOMP_enable_pinned_mode. * testsuite/libgomp.c/alloc-pinned-7.c: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/alloc-pinned-1.c: New test. --- gcc/omp-low.cc | 68 +++++++++++++++++++ .../c-c++-common/gomp/alloc-pinned-1.c | 28 ++++++++ libgomp/config/linux/allocator.c | 26 +++++++ libgomp/libgomp.map | 5 ++ libgomp/testsuite/libgomp.c/alloc-pinned-7.c | 66 ++++++++++++++++++ 5 files changed, 193 insertions(+) create mode 100644 gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-7.c diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index ec08d59f676..ce21b3bd6f8 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -14441,6 +14441,70 @@ lower_omp (gimple_seq *body, omp_context *ctx) input_location = saved_location; } +/* Emit a constructor function to enable -foffload-memory=pinned + at runtime. Libgomp handles the OS mode setting, but we need to trigger + it by calling GOMP_enable_pinned mode before the program proper runs. */ + +static void +omp_enable_pinned_mode () +{ + static bool visited = false; + if (visited) + return; + visited = true; + + /* Create a new function like this: + + static void __attribute__((constructor)) + __set_pinned_mode () + { + GOMP_enable_pinned_mode (); + } + */ + + tree name = get_identifier ("__set_pinned_mode"); + tree voidfntype = build_function_type_list (void_type_node, NULL_TREE); + tree decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name, voidfntype); + + TREE_STATIC (decl) = 1; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; + DECL_EXTERNAL (decl) = 0; + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl; + DECL_STATIC_CONSTRUCTOR (decl) = 1; + DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("constructor"), + NULL_TREE, NULL_TREE); + + tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, + void_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_CONTEXT (t) = decl; + DECL_RESULT (decl) = t; + + push_struct_function (decl); + init_tree_ssa (cfun); + + tree callname = get_identifier ("GOMP_enable_pinned_mode"); + tree calldecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, callname, + voidfntype); + gcall *call = gimple_build_call (calldecl, 0); + + gimple_seq seq = NULL; + gimple_seq_add_stmt (&seq, call); + gimple_set_body (decl, gimple_build_bind (NULL_TREE, seq, NULL)); + + cfun->function_end_locus = UNKNOWN_LOCATION; + cfun->curr_properties |= PROP_gimple_any; + pop_cfun (); + cgraph_node::add_new_function (decl, true); +} + /* Main entry point. */ static unsigned int @@ -14497,6 +14561,10 @@ execute_lower_omp (void) for (auto task_stmt : task_cpyfns) finalize_task_copyfn (task_stmt); task_cpyfns.release (); + + if (flag_offload_memory == OFFLOAD_MEMORY_PINNED) + omp_enable_pinned_mode (); + return 0; } diff --git a/gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c b/gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c new file mode 100644 index 00000000000..e0e08019bff --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/alloc-pinned-1.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-additional-options "-foffload-memory=pinned" } */ +/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } } */ + +#if __cplusplus +#define EXTERNC extern "C" +#else +#define EXTERNC +#endif + +/* Intercept the libgomp initialization call to check it happens. */ + +int good = 0; + +EXTERNC void +GOMP_enable_pinned_mode () +{ + good = 1; +} + +int +main () +{ + if (!good) + __builtin_exit (1); + + return 0; +} diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index face524259c..4bd5bd6b930 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -39,9 +39,26 @@ #include <string.h> #include "libgomp.h" +static bool always_pinned_mode = false; + +/* This function is called by the compiler when -foffload-memory=pinned + is used. */ + +void +GOMP_enable_pinned_mode () +{ + if (mlockall (MCL_CURRENT | MCL_FUTURE) != 0) + gomp_error ("failed to pin all memory (ulimit too low?)"); + else + always_pinned_mode = true; +} + static void * linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin) { + /* Explicit pinning may not be required. */ + pin = pin && !always_pinned_mode; + if (memspace == ompx_unified_shared_mem_space) { return gomp_usm_alloc (size, GOMP_DEVICE_ICV); @@ -69,6 +86,9 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin) static void * linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) { + /* Explicit pinning may not be required. */ + pin = pin && !always_pinned_mode; + if (memspace == ompx_unified_shared_mem_space) { void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV); @@ -86,6 +106,9 @@ static void linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, int pin) { + /* Explicit pinning may not be required. */ + pin = pin && !always_pinned_mode; + if (memspace == ompx_unified_shared_mem_space) gomp_usm_free (addr, GOMP_DEVICE_ICV); else if (pin) @@ -98,6 +121,9 @@ static void * linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, size_t oldsize, size_t size, int oldpin, int pin) { + /* Explicit pinning may not be required. */ + pin = pin && !always_pinned_mode; + if (memspace == ompx_unified_shared_mem_space) goto manual_realloc; else if (oldpin && pin) diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 2ac58094169..40402dc9893 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -402,6 +402,11 @@ GOMP_5.1 { GOMP_teams4; } GOMP_5.0.1; +GOMP_5.1.1 { + global: + GOMP_enable_pinned_mode; +} GOMP_5.1; + OACC_2.0 { global: acc_get_num_devices; diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-7.c b/libgomp/testsuite/libgomp.c/alloc-pinned-7.c new file mode 100644 index 00000000000..6fd19b46a5c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/alloc-pinned-7.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-additional-options "-foffload-memory=pinned" } */ + +/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } } */ + +/* Test that pinned memory works. */ + +#ifdef __linux__ +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> + +#include <sys/mman.h> + +int +get_pinned_mem () +{ + int pid = getpid (); + char buf[100]; + sprintf (buf, "/proc/%d/status", pid); + + FILE *proc = fopen (buf, "r"); + if (!proc) + abort (); + while (fgets (buf, 100, proc)) + { + int val; + if (sscanf (buf, "VmLck: %d", &val)) + { + printf ("lock %d\n", val); + fclose (proc); + return val; + } + } + abort (); +} +#else +int +get_pinned_mem () +{ + return 0; +} + +#define mlockall(...) 0 +#endif + +#include <omp.h> + +/* Allocate more than a page each time, but stay within the ulimit. */ +#define SIZE 10*1024 + +int +main () +{ + // Sanity check + if (get_pinned_mem () == 0) + { + /* -foffload-memory=pinned has failed, but maybe that's because + isufficient pinned memory was available. */ + if (mlockall (MCL_CURRENT | MCL_FUTURE) == 0) + abort (); + } + + return 0; +} -- 2.25.1