https://gcc.gnu.org/g:8e80287601c5954bc437212be5f91aaf12074b22
commit r16-1649-g8e80287601c5954bc437212be5f91aaf12074b22 Author: Tamar Christina <tamar.christ...@arm.com> Date: Tue Jun 24 11:10:11 2025 +0100 AArch64: propose -mmax-vectorization as an option to override vector costing With the middle-end providing a way to make vectorization more profitable by scaling vect-scalar-cost-multiplier this makes a more user friendly option to make it easier to use. I propose making it an actual -m option that we document and retain vs using the parameter name. In the future I would like to extend this option to modify additional costing in the AArch64 backend itself. This can be used together with --param aarch64-autovec-preference to get the vectorizer to say, always vectorize with SVE. I did consider making this an additional enum to --param aarch64-autovec-preference but I also think this is a useful thing to be able to set with pragmas and attributes, but am open to suggestions. Note that as a follow up I plan on extending -fdump-tree-vect to support -stats which is then intended to be usable with this flag. gcc/ChangeLog: * config/aarch64/aarch64.opt (max-vectorization): New. * config/aarch64/aarch64.cc (aarch64_override_options_internal): Save and restore option. Implement it through vect-scalar-cost-multiplier. (aarch64_attributes): Default to off. * common/config/aarch64/aarch64-common.cc (aarch64_handle_option): Initialize option. * doc/extend.texi (max-vectorization): Document attribute. * doc/invoke.texi (max-vectorization): Document flag. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/cost_model_17.c: New test. * gcc.target/aarch64/sve/cost_model_18.c: New test. Diff: --- gcc/common/config/aarch64/aarch64-common.cc | 4 ++++ gcc/config/aarch64/aarch64.cc | 8 ++++++++ gcc/config/aarch64/aarch64.opt | 4 ++++ gcc/doc/extend.texi | 10 ++++++++++ gcc/doc/invoke.texi | 9 +++++++++ .../gcc.target/aarch64/sve/cost_model_17.c | 21 +++++++++++++++++++++ .../gcc.target/aarch64/sve/cost_model_18.c | 21 +++++++++++++++++++++ 7 files changed, 77 insertions(+) diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index b9ed83642ade..1488697c6ce4 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -142,6 +142,10 @@ aarch64_handle_option (struct gcc_options *opts, opts->x_aarch64_flag_outline_atomics = val; return true; + case OPT_mmax_vectorization: + opts->x_flag_aarch64_max_vectorization = val; + return true; + default: return true; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index af8415c29a97..adbe05ac404a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19036,6 +19036,12 @@ aarch64_override_options_internal (struct gcc_options *opts) if (TARGET_SME && !TARGET_SVE2) sorry ("no support for %qs without %qs", "sme", "sve2"); + /* Set scalar costing to a high value such that we always pick + vectorization. Increase scalar costing by 10000%. */ + if (opts->x_flag_aarch64_max_vectorization) + SET_OPTION_IF_UNSET (opts, &global_options_set, + param_vect_scalar_cost_multiplier, 10000); + aarch64_override_options_after_change_1 (opts); } @@ -19786,6 +19792,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] = OPT_msign_return_address_ }, { "outline-atomics", aarch64_attr_bool, true, NULL, OPT_moutline_atomics}, + { "max-vectorization", aarch64_attr_bool, false, NULL, + OPT_mmax_vectorization}, { NULL, aarch64_attr_custom, false, NULL, OPT____ } }; diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index f32d56d4ffae..17e1c700dd2b 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -290,6 +290,10 @@ msve-vector-bits= Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE) -msve-vector-bits=<number> Set the number of bits in an SVE vector register. +mmax-vectorization +Target Var(flag_aarch64_max_vectorization) Save +Override the scalar cost model such that vectorization is always profitable. + mverbose-cost-dump Target Undocumented Var(flag_aarch64_verbose_cost) Enables verbose cost model dumping in the debug dump files. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7da99f77ec82..55adf649acf8 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -3884,6 +3884,16 @@ Enable or disable calls to out-of-line helpers to implement atomic operations. This corresponds to the behavior of the command-line options @option{-moutline-atomics} and @option{-mno-outline-atomics}. +@cindex @code{max-vectorization} function attribute, AArch64 +@item max-vectorization +@itemx no-max-vectorization +@code{max-vectorization} tells GCC's vectorizer to treat all vector +loops as being more profitable than the original scalar loops when +optimizing the current function. @code{no-max-vectorization} disables +this behavior. +This corresponds to the behavior of the command-line options +@option{-mmax-vectorization} and @option{-mno-max-vectorization}. + @cindex @code{indirect_return} function attribute, AArch64 @item indirect_return The @code{indirect_return} attribute can be applied to a function type diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 693bd57691e2..93322778a520 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -22074,6 +22074,15 @@ used directly. The same applies when using @option{-mcpu=} when the selected cpu supports the @samp{lse} feature. This option is on by default. +@item -mmax-vectorization +@itemx -mno-max-vectorization +Enable or disable an override to vectorizer cost model making vectorization +always appear profitable. This option can be combined with +@option{-mautovec-preference} allowing precise control over which ISA will be +used for auto-vectorization. Unlike @option{-fno-vect-cost-model} or +@option{-fvect-cost-model=unlimited} this option does not turn off cost +comparison between different vector modes. + @opindex march @item -march=@var{name} Specify the name of the target architecture and, optionally, one or diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c new file mode 100644 index 000000000000..c405591a101d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=armv8-a+sve -mmax-vectorization -fdump-tree-vect-details" } */ + +void +foo (char *restrict a, int *restrict b, int *restrict c, + int *restrict d, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < 3; i++) + { + int res = c[i]; + int t = b[i * stride]; + if (a[i] != 0) + res = t * d[i]; + c[i] = res; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c new file mode 100644 index 000000000000..8e91f9e9c299 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=armv8-a+sve -fdump-tree-vect-details" } */ + +void __attribute__ (( target ("max-vectorization"))) +foo (char *restrict a, int *restrict b, int *restrict c, + int *restrict d, int stride) +{ + if (stride <= 1) + return; + + for (int i = 0; i < 3; i++) + { + int res = c[i]; + int t = b[i * stride]; + if (a[i] != 0) + res = t * d[i]; + c[i] = res; + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */