On Wed, Nov 6, 2024 at 7:28 AM Andrew Stubbs <[email protected]> wrote:
>
> If requested, return the vectorization factor appropriate for the offload
> device, if any.
>
> This change gives a significant speedup in the BabelStream "dot" benchmark on
> amdgcn.
>
> The omp_adjust_chunk_size usecase is set "false", for now, but I intend to
> change that in a follow-up patch.
>
> Note that NVPTX SIMT offload does not use this code-path.
>
> gcc/ChangeLog:
>
> * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Set
> omp_max_vf to offload == false.
> * omp-expand.cc (omp_adjust_chunk_size): Likewise.
> * omp-general.cc (omp_max_vf): Add "offload" parameter, and detect
> amdgcn offload devices.
> * omp-general.h (omp_max_vf): Likewise.
> * omp-low.cc (lower_rec_simd_input_clauses): Pass offload state to
> omp_max_vf.
> ---
> gcc/gimple-loop-versioning.cc | 2 +-
> gcc/omp-expand.cc | 2 +-
> gcc/omp-general.cc | 17 +++++++++++++++--
> gcc/omp-general.h | 2 +-
> gcc/omp-low.cc | 3 ++-
> 5 files changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
> index 107b0020024..2968c929d04 100644
> --- a/gcc/gimple-loop-versioning.cc
> +++ b/gcc/gimple-loop-versioning.cc
> @@ -554,7 +554,7 @@ loop_versioning::loop_versioning (function *fn)
> handled efficiently by scalar code. omp_max_vf calculates the
> maximum number of bytes in a vector, when such a value is relevant
> to loop optimization. */
> - m_maximum_scale = estimated_poly_value (omp_max_vf ());
> + m_maximum_scale = estimated_poly_value (omp_max_vf (false));
> m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
> }
>
> diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
> index b0b4ddf5dbc..907fd46a5b2 100644
> --- a/gcc/omp-expand.cc
> +++ b/gcc/omp-expand.cc
> @@ -212,7 +212,7 @@ omp_adjust_chunk_size (tree chunk_size, bool
> simd_schedule)
> if (!simd_schedule || integer_zerop (chunk_size))
> return chunk_size;
>
> - poly_uint64 vf = omp_max_vf ();
> + poly_uint64 vf = omp_max_vf (false);
> if (known_eq (vf, 1U))
> return chunk_size;
>
> diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
> index f74b9bf5e96..1ae575ee181 100644
> --- a/gcc/omp-general.cc
> +++ b/gcc/omp-general.cc
> @@ -987,10 +987,11 @@ find_combined_omp_for (tree *tp, int *walk_subtrees,
> void *data)
> return NULL_TREE;
> }
>
> -/* Return maximum possible vectorization factor for the target. */
> +/* Return maximum possible vectorization factor for the target, or for
> + the OpenMP offload target if one exists. */
>
> poly_uint64
> -omp_max_vf (void)
> +omp_max_vf (bool offload)
> {
> if (!optimize
> || optimize_debug
> @@ -999,6 +1000,18 @@ omp_max_vf (void)
> && OPTION_SET_P (flag_tree_loop_vectorize)))
> return 1;
>
> + if (ENABLE_OFFLOADING && offload)
> + {
> + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
> + {
> + if (startswith (c, "amdgcn"))
> + return ordered_max (64, omp_max_vf (false));
This causes a bootstrap failure for me (and others) on x86_64-linux-gnu:
In file included from ../../src-master/gcc/coretypes.h:497,
from ../../src-master/gcc/omp-general.cc:25:
../../src-master/gcc/poly-int.h: In instantiation of ‘typename
if_nonpoly<Cb, bool>::type maybe_lt(const poly_int<N, C>&, const Cb&)
[with unsigned int N = 1; Ca = long unsigned int; Cb = int; typename
if_nonpoly<Cb, bool>::type = bool]’:
../../src-master/gcc/poly-int.h:1440:7: required from ‘poly_int<N,
typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type>
ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N =
1; Ca = int; Cb = long unsigned int; typename poly_result<typename
if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename
if_nonpoly<Ca>::type = int]’
1342 | #define maybe_gt(A, B) maybe_lt (B, A)
| ~~~~~~~~~^~~~~~
../../src-master/gcc/omp-general.cc:1008:25: required from here
1008 | return ordered_max (64, omp_max_vf (false));
| ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
../../src-master/gcc/poly-int.h:1317:22: error: comparison of integer
expressions of different signedness: ‘const long unsigned int’ and
‘const int’ [-Werror=sign-compare]
1317 | return a.coeffs[0] < b;
| ~~~~~~~~~~~~^~~
../../src-master/gcc/poly-int.h: In instantiation of ‘typename
if_nonpoly<Ca, bool>::type maybe_lt(const Ca&, const poly_int<N, Cb>&)
[with unsigned int N = 1; Ca = int; Cb = long unsigned int; typename
if_nonpoly<Ca, bool>::type = bool]’:
../../src-master/gcc/poly-int.h:1445:2: required from ‘poly_int<N,
typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type>
ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N =
1; Ca = int; Cb = long unsigned int; typename poly_result<typename
if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename
if_nonpoly<Ca>::type = int]’
1342 | #define maybe_gt(A, B) maybe_lt (B, A)
| ~~~~~~~~~^~~~~~
../../src-master/gcc/omp-general.cc:1008:25: required from here
1008 | return ordered_max (64, omp_max_vf (false));
| ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
../../src-master/gcc/poly-int.h:1328:12: error: comparison of integer
expressions of different signedness: ‘const int’ and ‘const long
unsigned int’ [-Werror=sign-compare]
1328 | return a < b.coeffs[0];
| ~~^~~~~~~~~~~
Thanks,
Andrew Pinski
> + else if ((c = strchr (c, ':')))
> + c++;
> + }
> + /* Otherwise, fall through to host VF. */
> + }
> +
> auto_vector_modes modes;
> targetm.vectorize.autovectorize_vector_modes (&modes, true);
> if (!modes.is_empty ())
> diff --git a/gcc/omp-general.h b/gcc/omp-general.h
> index f3778131626..70f78d2055b 100644
> --- a/gcc/omp-general.h
> +++ b/gcc/omp-general.h
> @@ -162,7 +162,7 @@ extern void omp_extract_for_data (gomp_for *for_stmt,
> struct omp_for_data *fd,
> struct omp_for_data_loop *loops);
> extern gimple *omp_build_barrier (tree lhs);
> extern tree find_combined_omp_for (tree *, int *, void *);
> -extern poly_uint64 omp_max_vf (void);
> +extern poly_uint64 omp_max_vf (bool);
> extern int omp_max_simt_vf (void);
> extern const char *omp_context_name_list_prop (tree);
> extern void omp_construct_traits_to_codes (tree, int, enum tree_code *);
> diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
> index 44c4310075b..70a2c108fbc 100644
> --- a/gcc/omp-low.cc
> +++ b/gcc/omp-low.cc
> @@ -4589,7 +4589,8 @@ lower_rec_simd_input_clauses (tree new_var, omp_context
> *ctx,
> {
> if (known_eq (sctx->max_vf, 0U))
> {
> - sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
> + sctx->max_vf = (sctx->is_simt ? omp_max_simt_vf ()
> + : omp_max_vf (omp_maybe_offloaded_ctx (ctx)));
> if (maybe_gt (sctx->max_vf, 1U))
> {
> tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
> --
> 2.46.0
>