Hi All,
With the middle-end providing a way to make vectorization more profitable by
scaling vect-scalar-cost-multiplier this makes a more user friendly option
to make it easier to use.
I propose making it an actual -m option that we document and retain vs using
the parameter name. In the future I would like to extend this option to modify
additional costing in the AArch64 backend itself.
This can be used together with --param aarch64-autovec-preference to get the
vectorizer to say, always vectorize with SVE. I did consider making this an
additional enum to --param aarch64-autovec-preference but I also think this is
a useful thing to be able to set with pragmas and attributes, but am open to
suggestions.
Note that as a follow up I plan on extending -fdump-tree-vect to support -stats
which is then intended to be usable with this flag.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64.opt (max-vectorization): New.
* config/aarch64/aarch64.cc (aarch64_override_options_internal): Save
and restore option.
Implement it through vect-scalar-cost-multiplier.
(aarch64_attributes): Default to off.
* common/config/aarch64/aarch64-common.cc (aarch64_handle_option):
Initialize option.
* doc/extend.texi (max-vectorization): Document attribute.
* doc/invoke.texi (max-vectorization): Document flag.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/cost_model_17.c: New test.
* gcc.target/aarch64/sve/cost_model_18.c: New test.
---
diff --git a/gcc/common/config/aarch64/aarch64-common.cc
b/gcc/common/config/aarch64/aarch64-common.cc
index
b9ed83642ade4462f1b030d68cf9744d31d70c23..1488697c6ce43108ae2938e5b8a00ac7ac262da6
100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -142,6 +142,10 @@ aarch64_handle_option (struct gcc_options *opts,
opts->x_aarch64_flag_outline_atomics = val;
return true;
+ case OPT_mmax_vectorization:
+ opts->x_flag_aarch64_max_vectorization = val;
+ return true;
+
default:
return true;
}
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
9e3f2885bccb62550c5fcfdf93d72fbc2e63233e..01bc0f9f7bb6e14d5d180949404238baef3f1cac
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18973,6 +18973,12 @@ aarch64_override_options_internal (struct gcc_options
*opts)
if (TARGET_SME && !TARGET_SVE2)
sorry ("no support for %qs without %qs", "sme", "sve2");
+ /* Set scalar costing to a high value such that we always pick
+ vectorization. Increase scalar costing by 400%. */
+ if (opts->x_flag_aarch64_max_vectorization)
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_vect_scalar_cost_multiplier, 400);
+
aarch64_override_options_after_change_1 (opts);
}
@@ -19723,6 +19729,8 @@ static const struct aarch64_attribute_info
aarch64_attributes[] =
OPT_msign_return_address_ },
{ "outline-atomics", aarch64_attr_bool, true, NULL,
OPT_moutline_atomics},
+ { "max-vectorization", aarch64_attr_bool, false, NULL,
+ OPT_mmax_vectorization},
{ NULL, aarch64_attr_custom, false, NULL, OPT____ }
};
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index
f32d56d4ffaef7862c1c45a11753be5d480220d0..2725c50da64a2c05489ea6202bdd5eedf1ba7e27
100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -290,6 +290,10 @@ msve-vector-bits=
Target RejectNegative Joined Enum(sve_vector_bits)
Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
-msve-vector-bits=<number> Set the number of bits in an SVE vector
register.
+mmax-vectorization
+Target Undocumented Var(flag_aarch64_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
mverbose-cost-dump
Target Undocumented Var(flag_aarch64_verbose_cost)
Enables verbose cost model dumping in the debug dump files.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index
40ccf22b29f4316928f905ec2c978fdaf30a55ec..759a04bc7c4c66155154d55045bb75d695b2d6c2
100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3882,6 +3882,13 @@ Enable or disable calls to out-of-line helpers to
implement atomic operations.
This corresponds to the behavior of the command-line options
@option{-moutline-atomics} and @option{-mno-outline-atomics}.
+@cindex @code{max-vectorization} function attribute, AArch64
+@item max-vectorization
+Enable or disable loop costing overrides inside the current function to apply
+a penalty to scalar loops such that vector costing is always profitable.
+This corresponds to the behavior of the command-line options
+@option{-mmax-vectorization} and @option{-mno-max-vectorization}.
+
@cindex @code{indirect_return} function attribute, AArch64
@item indirect_return
The @code{indirect_return} attribute can be applied to a function type
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index
95a25c0f63b77f26db05a7b48bfad8f9c58bcc5f..87adb75d8852304b155da140c1b20a8875710324
100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21984,6 +21984,12 @@ used directly. The same applies when using
@option{-mcpu=} when the
selected cpu supports the @samp{lse} feature.
This option is on by default.
+@item -mmax-vectorization
+@itemx -mno-max-vectorization
+Enable or disable override to vectorizer cost model making vectorization always
+profitable. This option can be combined with other options allowing you to
pick
+which ISA is used during vectorization.
+
@opindex march
@item -march=@var{name}
Specify the name of the target architecture and, optionally, one or
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
new file mode 100644
index
0000000000000000000000000000000000000000..c405591a101d50b4734bc6d65a6d6c01888bea48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mmax-vectorization
-fdump-tree-vect-details" } */
+
+void
+foo (char *restrict a, int *restrict b, int *restrict c,
+ int *restrict d, int stride)
+{
+ if (stride <= 1)
+ return;
+
+ for (int i = 0; i < 3; i++)
+ {
+ int res = c[i];
+ int t = b[i * stride];
+ if (a[i] != 0)
+ res = t * d[i];
+ c[i] = res;
+ }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
new file mode 100644
index
0000000000000000000000000000000000000000..8e91f9e9c29971a4bb7033be6be4d2fc4c71d05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a+sve -fdump-tree-vect-details" } */
+
+void __attribute__ (( target ("max-vectorization")))
+foo (char *restrict a, int *restrict b, int *restrict c,
+ int *restrict d, int stride)
+{
+ if (stride <= 1)
+ return;
+
+ for (int i = 0; i < 3; i++)
+ {
+ int res = c[i];
+ int t = b[i * stride];
+ if (a[i] != 0)
+ res = t * d[i];
+ c[i] = res;
+ }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
--
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index b9ed83642ade4462f1b030d68cf9744d31d70c23..1488697c6ce43108ae2938e5b8a00ac7ac262da6 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -142,6 +142,10 @@ aarch64_handle_option (struct gcc_options *opts,
opts->x_aarch64_flag_outline_atomics = val;
return true;
+ case OPT_mmax_vectorization:
+ opts->x_flag_aarch64_max_vectorization = val;
+ return true;
+
default:
return true;
}
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9e3f2885bccb62550c5fcfdf93d72fbc2e63233e..01bc0f9f7bb6e14d5d180949404238baef3f1cac 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18973,6 +18973,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
if (TARGET_SME && !TARGET_SVE2)
sorry ("no support for %qs without %qs", "sme", "sve2");
+ /* Set scalar costing to a high value such that we always pick
+ vectorization. Increase scalar costing by 400%. */
+ if (opts->x_flag_aarch64_max_vectorization)
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_vect_scalar_cost_multiplier, 400);
+
aarch64_override_options_after_change_1 (opts);
}
@@ -19723,6 +19729,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
OPT_msign_return_address_ },
{ "outline-atomics", aarch64_attr_bool, true, NULL,
OPT_moutline_atomics},
+ { "max-vectorization", aarch64_attr_bool, false, NULL,
+ OPT_mmax_vectorization},
{ NULL, aarch64_attr_custom, false, NULL, OPT____ }
};
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index f32d56d4ffaef7862c1c45a11753be5d480220d0..2725c50da64a2c05489ea6202bdd5eedf1ba7e27 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -290,6 +290,10 @@ msve-vector-bits=
Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
-msve-vector-bits=<number> Set the number of bits in an SVE vector register.
+mmax-vectorization
+Target Undocumented Var(flag_aarch64_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
mverbose-cost-dump
Target Undocumented Var(flag_aarch64_verbose_cost)
Enables verbose cost model dumping in the debug dump files.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 40ccf22b29f4316928f905ec2c978fdaf30a55ec..759a04bc7c4c66155154d55045bb75d695b2d6c2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3882,6 +3882,13 @@ Enable or disable calls to out-of-line helpers to implement atomic operations.
This corresponds to the behavior of the command-line options
@option{-moutline-atomics} and @option{-mno-outline-atomics}.
+@cindex @code{max-vectorization} function attribute, AArch64
+@item max-vectorization
+Enable or disable loop costing overrides inside the current function to apply
+a penalty to scalar loops such that vector costing is always profitable.
+This corresponds to the behavior of the command-line options
+@option{-mmax-vectorization} and @option{-mno-max-vectorization}.
+
@cindex @code{indirect_return} function attribute, AArch64
@item indirect_return
The @code{indirect_return} attribute can be applied to a function type
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 95a25c0f63b77f26db05a7b48bfad8f9c58bcc5f..87adb75d8852304b155da140c1b20a8875710324 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21984,6 +21984,12 @@ used directly. The same applies when using @option{-mcpu=} when the
selected cpu supports the @samp{lse} feature.
This option is on by default.
+@item -mmax-vectorization
+@itemx -mno-max-vectorization
+Enable or disable override to vectorizer cost model making vectorization always
+profitable. This option can be combined with other options allowing you to pick
+which ISA is used during vectorization.
+
@opindex march
@item -march=@var{name}
Specify the name of the target architecture and, optionally, one or
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
new file mode 100644
index 0000000000000000000000000000000000000000..c405591a101d50b4734bc6d65a6d6c01888bea48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mmax-vectorization -fdump-tree-vect-details" } */
+
+void
+foo (char *restrict a, int *restrict b, int *restrict c,
+ int *restrict d, int stride)
+{
+ if (stride <= 1)
+ return;
+
+ for (int i = 0; i < 3; i++)
+ {
+ int res = c[i];
+ int t = b[i * stride];
+ if (a[i] != 0)
+ res = t * d[i];
+ c[i] = res;
+ }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
new file mode 100644
index 0000000000000000000000000000000000000000..8e91f9e9c29971a4bb7033be6be4d2fc4c71d05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a+sve -fdump-tree-vect-details" } */
+
+void __attribute__ (( target ("max-vectorization")))
+foo (char *restrict a, int *restrict b, int *restrict c,
+ int *restrict d, int stride)
+{
+ if (stride <= 1)
+ return;
+
+ for (int i = 0; i < 3; i++)
+ {
+ int res = c[i];
+ int t = b[i * stride];
+ if (a[i] != 0)
+ res = t * d[i];
+ c[i] = res;
+ }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */