https://gcc.gnu.org/g:2481350075a8a562a43d9a7c13a622489c2fd435
commit 2481350075a8a562a43d9a7c13a622489c2fd435 Author: Andrew Stubbs <a...@codesourcery.com> Date: Fri Jul 8 11:58:46 2022 +0100 openmp: fix max_vf setting for amdgcn offloading Ensure that the "max_vf" figure used for the "safelen" attribute is large enough for the largest configured offload device. This change gives ~10x speed improvement on the Bablestream "dot" benchmark for AMD GCN. gcc/ChangeLog: * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add comment. * omp-general.cc (omp_max_simd_vf): New function. * omp-general.h (omp_max_simd_vf): New prototype. * omp-low.cc (lower_rec_simd_input_clauses): Select largest from omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_amdgcn_offloading_enabled): New. (check_effective_target_nvptx_offloading_enabled): New. * gcc.dg/gomp/target-vf.c: New test. Diff: --- gcc/ChangeLog.omp | 9 +++++++++ gcc/gimple-loop-versioning.cc | 5 ++++- gcc/omp-general.cc | 18 ++++++++++++++++++ gcc/omp-general.h | 1 + gcc/omp-low.cc | 9 ++++++++- gcc/testsuite/ChangeLog.omp | 7 +++++++ gcc/testsuite/gcc.dg/gomp/target-vf.c | 21 +++++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 10 ++++++++++ 8 files changed, 78 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index f4b52d9e3ec..d7256902331 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,12 @@ +2022-07-12 Andrew Stubbs <a...@codesourcery.com> + + * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add + comment. + * omp-general.cc (omp_max_simd_vf): New function. + * omp-general.h (omp_max_simd_vf): New prototype. + * omp-low.cc (lower_rec_simd_input_clauses): Select largest from + omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf. + 2023-08-23 Andrew Stubbs <a...@codesourcery.com> * omp-builtins.def (BUILT_IN_GOMP_ENABLE_PINNED_MODE): New. diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc index 17877f06921..c22c24bd958 100644 --- a/gcc/gimple-loop-versioning.cc +++ b/gcc/gimple-loop-versioning.cc @@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn) unvectorizable code, since it is the largest size that can be handled efficiently by scalar code. omp_max_vf calculates the maximum number of bytes in a vector, when such a value is relevant - to loop optimization. */ + to loop optimization. + FIXME: this probably needs to use omp_max_simd_vf when in a target + region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that + it doesn't actually matter.) */ m_maximum_scale = estimated_poly_value (omp_max_vf ()); m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); } diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index 9a125a28afa..faa248ebd17 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -1039,6 +1039,24 @@ omp_max_simt_vf (void) return 0; } +/* Return maximum SIMD width if offloading may target SIMD hardware. */ + +int +omp_max_simd_vf (void) +{ + if (!optimize) + return 0; + if (ENABLE_OFFLOADING) + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) + { + if (startswith (c, "amdgcn")) + return 64; + else if ((c = strchr (c, ':'))) + c++; + } + return 0; +} + /* Store the construct selectors as tree codes from last to first. CTX is a list of trait selectors, nconstructs must be equal to its length, and the array CONSTRUCTS holds the output. */ diff --git a/gcc/omp-general.h b/gcc/omp-general.h index 15e092f1286..e478d9bdeab 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -164,6 +164,7 @@ extern gimple *omp_build_barrier (tree lhs); extern tree find_combined_omp_for (tree *, int *, void *); extern poly_uint64 omp_max_vf (void); extern int omp_max_simt_vf (void); +extern int omp_max_simd_vf (void); extern const char *omp_context_name_list_prop (tree); extern void omp_construct_traits_to_codes (tree, int, enum tree_code *); extern tree omp_check_context_selector (location_t loc, tree ctx); diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 94de26b6013..dc0a6906c67 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -4822,7 +4822,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, { if (known_eq (sctx->max_vf, 0U)) { - sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf (); + /* If we are compiling for multiple devices choose the largest VF. */ + sctx->max_vf = omp_max_vf (); + if (omp_maybe_offloaded_ctx (ctx)) + { + if (sctx->is_simt) + sctx->max_vf = ordered_max (sctx->max_vf, (unsigned) omp_max_simt_vf ()); + sctx->max_vf = ordered_max (sctx->max_vf, (unsigned) omp_max_simd_vf ()); + } if (maybe_gt (sctx->max_vf, 1U)) { tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index 7756bc0bb92..5bf09420432 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,10 @@ +2022-07-12 Andrew Stubbs <a...@codesourcery.com> + + * lib/target-supports.exp + (check_effective_target_amdgcn_offloading_enabled): New. + (check_effective_target_nvptx_offloading_enabled): New. + * gcc.dg/gomp/target-vf.c: New test. + 2022-06-27 Tobias Burnus <tob...@codesourcery.com> * gfortran.dg/gomp/num-teams-2.f90: Use dg-error not dg-warning. diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c new file mode 100644 index 00000000000..14cea45e53c --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */ + +/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working + properly to set the OpenMP vectorization factor for the offload target, and + not just for the host. */ + +float +foo (float * __restrict x, float * __restrict y) +{ + float sum = 0.0; + +#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum) + for (int i=0; i<1024; i++) + sum += x[i] * y[i]; + + return sum; +} + +/* { dg-final { scan-tree-dump "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */ +/* { dg-final { scan-tree-dump "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3a55b2a4159..ca75911a0d6 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1120,6 +1120,16 @@ proc check_effective_target_offloading_enabled {} { return [check_configured_with "--enable-offload-targets"] } +# Return 1 if compiled with --enable-offload-targets=amdgcn +proc check_effective_target_amdgcn_offloading_enabled {} { + return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}] +} + +# Return 1 if compiled with --enable-offload-targets=amdgcn +proc check_effective_target_nvptx_offloading_enabled {} { + return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}] +} + # Return 1 if compilation with -fopenacc is error-free for trivial # code, 0 otherwise.