Tamar Christina <[email protected]> writes:
> Hi All,
>
> This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
> It is kept off for generic codegen.
>
> Note the reason for the +sve even though they are in aarch64-sve.exp is if the
> testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
> the intrinsics end up being disabled because the -march is preferred over the
> -mcpu even though the -mcpu comes later.
>
> This prevents the tests from failing in such runs.
IMO we should just skip aarch64-sve.exp if the options explicitly disable
SVE. But that's separate work. I'll try it once this patch is in.
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
> * config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
> * config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/pred_clobber_1.c: New test.
> * gcc.target/aarch64/sve/pred_clobber_2.c: New test.
> * gcc.target/aarch64/sve/pred_clobber_3.c: New test.
> * gcc.target/aarch64/sve/pred_clobber_4.c: New test.
>
> ---
> diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h
> b/gcc/config/aarch64/tuning_models/neoversen2.h
> index
> 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f
> 100644
> --- a/gcc/config/aarch64/tuning_models/neoversen2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversen2.h
> @@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
> (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
> | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
> - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
> + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
> &generic_prefetch_tune,
> AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
> AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h
> b/gcc/config/aarch64/tuning_models/neoversev1.h
> index
> 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf
> 100644
> --- a/gcc/config/aarch64/tuning_models/neoversev1.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev1.h
> @@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
> (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
> | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> - | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
> + | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
> + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
> &generic_prefetch_tune,
> AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
> AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h
> b/gcc/config/aarch64/tuning_models/neoversev2.h
> index
> bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e
> 100644
> --- a/gcc/config/aarch64/tuning_models/neoversev2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev2.h
> @@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
> (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
> | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
> - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
> + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
> &generic_prefetch_tune,
> AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
> AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=neoverse-n2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p([1-9][0-9]?).b, all
Might be better to make this p([1-3]), so that we disallow any registers
that would cause a spill.
OK with that change, thanks.
Richard
> +** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=neoverse-v2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p([1-9][0-9]?).b, all
> +** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=neoverse-v1" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p([1-9][0-9]?).b, all
> +** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p0.b, all
> +** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}