On Thu, Jul 8, 2021 at 2:04 PM Richard Sandiford via Gcc-patches
<[email protected]> wrote:
>
> -msve-vector-bits=128 causes the AArch64 port to list 128-bit Advanced
> SIMD as the first-choice mode for vectorisation, with SVE being used for
> things that Advanced SIMD can't handle as easily. However, ifcvt would
> not then try to use SVE's predicated FP arithmetic, leading to tests
> like TSVC ControlFlow-flt failing to vectorise.
>
> The mask load/store code did try other vector modes, but could also be
> improved to make sure that SVEness sticks when computing derived modes.
>
> (Unlike mode_for_vector, related_vector_mode always returns a vector
> mode, so there's no need to check VECTOR_MODE_P as well.)
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
OK.
Richard.
> Richard
>
>
> gcc/
> * internal-fn.c (vectorized_internal_fn_supported_p): Handle
> vector types first. For scalar types, consider both the preferred
> vector mode and the alternative vector modes.
> * optabs-query.c (can_vec_mask_load_store_p): Use the same
> structure as above, in particular using related_vector_mode
> for modes provided by autovectorize_vector_modes.
>
> gcc/testsuite/
> * gcc.target/aarch64/sve/cond_arith_6.c: New test.
> ---
> gcc/internal-fn.c | 28 +++++++++++++++----
> gcc/optabs-query.c | 23 +++++----------
> .../gcc.target/aarch64/sve/cond_arith_6.c | 14 ++++++++++
> 3 files changed, 43 insertions(+), 22 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index fb8b43d1ce2..cd5e63f9acd 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -4109,16 +4109,32 @@ expand_internal_call (gcall *stmt)
> bool
> vectorized_internal_fn_supported_p (internal_fn ifn, tree type)
> {
> + if (VECTOR_MODE_P (TYPE_MODE (type)))
> + return direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED);
> +
> scalar_mode smode;
> - if (!VECTOR_TYPE_P (type) && is_a <scalar_mode> (TYPE_MODE (type), &smode))
> + if (!is_a <scalar_mode> (TYPE_MODE (type), &smode))
> + return false;
> +
> + machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
> + if (VECTOR_MODE_P (vmode))
> {
> - machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
> - if (VECTOR_MODE_P (vmode))
> - type = build_vector_type_for_mode (type, vmode);
> + tree vectype = build_vector_type_for_mode (type, vmode);
> + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
> + return true;
> }
>
> - return (VECTOR_MODE_P (TYPE_MODE (type))
> - && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED));
> + auto_vector_modes vector_modes;
> + targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
> + for (machine_mode base_mode : vector_modes)
> + if (related_vector_mode (base_mode, smode).exists (&vmode))
> + {
> + tree vectype = build_vector_type_for_mode (type, vmode);
> + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
> + return true;
> + }
> +
> + return false;
> }
>
> void
> diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> index 3248ce2c06e..05ee5f517da 100644
> --- a/gcc/optabs-query.c
> +++ b/gcc/optabs-query.c
> @@ -582,27 +582,18 @@ can_vec_mask_load_store_p (machine_mode mode,
> return false;
>
> vmode = targetm.vectorize.preferred_simd_mode (smode);
> - if (!VECTOR_MODE_P (vmode))
> - return false;
> -
> - if (targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> + if (VECTOR_MODE_P (vmode)
> + && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> return true;
>
> auto_vector_modes vector_modes;
> targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
> - for (unsigned int i = 0; i < vector_modes.length (); ++i)
> - {
> - poly_uint64 cur = GET_MODE_SIZE (vector_modes[i]);
> - poly_uint64 nunits;
> - if (!multiple_p (cur, GET_MODE_SIZE (smode), &nunits))
> - continue;
> - if (mode_for_vector (smode, nunits).exists (&vmode)
> - && VECTOR_MODE_P (vmode)
> - && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> - && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> - return true;
> - }
> + for (machine_mode base_mode : vector_modes)
> + if (related_vector_mode (base_mode, smode).exists (&vmode)
> + && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
> + && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
> + return true;
> return false;
> }
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
> b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
> new file mode 100644
> index 00000000000..4085ab12444
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c
> @@ -0,0 +1,14 @@
> +/* { dg-options "-O3 -msve-vector-bits=128" } */
> +
> +void
> +f (float *x)
> +{
> + for (int i = 0; i < 100; ++i)
> + if (x[i] > 1.0f)
> + x[i] -= 1.0f;
> +}
> +
> +/* { dg-final { scan-assembler {\tld1w\tz} } } */
> +/* { dg-final { scan-assembler {\tfcmgt\tp} } } */
> +/* { dg-final { scan-assembler {\tfsub\tz} } } */
> +/* { dg-final { scan-assembler {\tst1w\tz} } } */