[gcc r16-1649] AArch64: propose -mmax-vectorization as an option to override vector costing

Tamar Christina via Gcc-cvs Tue, 24 Jun 2025 03:10:49 -0700

https://gcc.gnu.org/g:8e80287601c5954bc437212be5f91aaf12074b22


commit r16-1649-g8e80287601c5954bc437212be5f91aaf12074b22
Author: Tamar Christina <tamar.christ...@arm.com>
Date:   Tue Jun 24 11:10:11 2025 +0100

    AArch64: propose -mmax-vectorization as an option to override vector costing
    
    With the middle-end providing a way to make vectorization more profitable by
    scaling vect-scalar-cost-multiplier this makes a more user friendly option
    to make it easier to use.
    
    I propose making it an actual -m option that we document and retain vs using
    the parameter name.  In the future I would like to extend this option to 
modify
    additional costing in the AArch64 backend itself.
    
    This can be used together with --param aarch64-autovec-preference to get the
    vectorizer to say, always vectorize with SVE.  I did consider making this an
    additional enum to --param aarch64-autovec-preference but I also think this 
is
    a useful thing to be able to set with pragmas and attributes, but am open to
    suggestions.
    
    Note that as a follow up I plan on extending -fdump-tree-vect to support 
-stats
    which is then intended to be usable with this flag.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.opt (max-vectorization): New.
            * config/aarch64/aarch64.cc (aarch64_override_options_internal): 
Save
            and restore option.
            Implement it through vect-scalar-cost-multiplier.
            (aarch64_attributes): Default to off.
            * common/config/aarch64/aarch64-common.cc (aarch64_handle_option):
            Initialize option.
            * doc/extend.texi (max-vectorization): Document attribute.
            * doc/invoke.texi (max-vectorization): Document flag.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/cost_model_17.c: New test.
            * gcc.target/aarch64/sve/cost_model_18.c: New test.

Diff:
---
 gcc/common/config/aarch64/aarch64-common.cc         |  4 ++++
 gcc/config/aarch64/aarch64.cc                       |  8 ++++++++
 gcc/config/aarch64/aarch64.opt                      |  4 ++++
 gcc/doc/extend.texi                                 | 10 ++++++++++
 gcc/doc/invoke.texi                                 |  9 +++++++++
 .../gcc.target/aarch64/sve/cost_model_17.c          | 21 +++++++++++++++++++++
 .../gcc.target/aarch64/sve/cost_model_18.c          | 21 +++++++++++++++++++++
 7 files changed, 77 insertions(+)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index b9ed83642ade..1488697c6ce4 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -142,6 +142,10 @@ aarch64_handle_option (struct gcc_options *opts,
       opts->x_aarch64_flag_outline_atomics = val;
       return true;
 
+    case OPT_mmax_vectorization:
+      opts->x_flag_aarch64_max_vectorization = val;
+      return true;
+
     default:
       return true;
     }
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index af8415c29a97..adbe05ac404a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19036,6 +19036,12 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
   if (TARGET_SME && !TARGET_SVE2)
     sorry ("no support for %qs without %qs", "sme", "sve2");
 
+  /* Set scalar costing to a high value such that we always pick
+     vectorization.  Increase scalar costing by 10000%.  */
+  if (opts->x_flag_aarch64_max_vectorization)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+                        param_vect_scalar_cost_multiplier, 10000);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
@@ -19786,6 +19792,8 @@ static const struct aarch64_attribute_info 
aarch64_attributes[] =
      OPT_msign_return_address_ },
   { "outline-atomics", aarch64_attr_bool, true, NULL,
      OPT_moutline_atomics},
+  { "max-vectorization", aarch64_attr_bool, false, NULL,
+     OPT_mmax_vectorization},
   { NULL, aarch64_attr_custom, false, NULL, OPT____ }
 };
 
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index f32d56d4ffae..17e1c700dd2b 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -290,6 +290,10 @@ msve-vector-bits=
 Target RejectNegative Joined Enum(sve_vector_bits) 
Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
 -msve-vector-bits=<number>     Set the number of bits in an SVE vector 
register.
 
+mmax-vectorization
+Target Var(flag_aarch64_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
 mverbose-cost-dump
 Target Undocumented Var(flag_aarch64_verbose_cost)
 Enables verbose cost model dumping in the debug dump files.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7da99f77ec82..55adf649acf8 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3884,6 +3884,16 @@ Enable or disable calls to out-of-line helpers to 
implement atomic operations.
 This corresponds to the behavior of the command-line options
 @option{-moutline-atomics} and @option{-mno-outline-atomics}.
 
+@cindex @code{max-vectorization} function attribute, AArch64
+@item max-vectorization
+@itemx no-max-vectorization
+@code{max-vectorization} tells GCC's vectorizer to treat all vector
+loops as being more profitable than the original scalar loops when
+optimizing the current function.  @code{no-max-vectorization} disables
+this behavior.
+This corresponds to the behavior of the command-line options
+@option{-mmax-vectorization} and @option{-mno-max-vectorization}.
+
 @cindex @code{indirect_return} function attribute, AArch64
 @item indirect_return
 The @code{indirect_return} attribute can be applied to a function type
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 693bd57691e2..93322778a520 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -22074,6 +22074,15 @@ used directly.  The same applies when using 
@option{-mcpu=} when the
 selected cpu supports the @samp{lse} feature.
 This option is on by default.
 
+@item -mmax-vectorization
+@itemx -mno-max-vectorization
+Enable or disable an override to vectorizer cost model making vectorization
+always appear profitable.  This option can be combined with
+@option{-mautovec-preference} allowing precise control over which ISA will be
+used for auto-vectorization.  Unlike @option{-fno-vect-cost-model} or
+@option{-fvect-cost-model=unlimited} this option does not turn off cost
+comparison between different vector modes.
+
 @opindex march
 @item -march=@var{name}
 Specify the name of the target architecture and, optionally, one or
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
new file mode 100644
index 000000000000..c405591a101d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_17.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a+sve -mmax-vectorization 
-fdump-tree-vect-details" } */
+
+void
+foo (char *restrict a, int *restrict b, int *restrict c,
+     int *restrict d, int stride)
+{
+    if (stride <= 1)
+        return;
+
+    for (int i = 0; i < 3; i++)
+        {
+            int res = c[i];
+            int t = b[i * stride];
+            if (a[i] != 0)
+                res = t * d[i];
+            c[i] = res;
+        }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
new file mode 100644
index 000000000000..8e91f9e9c299
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_18.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8-a+sve -fdump-tree-vect-details" } */
+
+void __attribute__ (( target ("max-vectorization")))
+foo (char *restrict a, int *restrict b, int *restrict c,
+     int *restrict d, int stride)
+{
+    if (stride <= 1)
+        return;
+
+    for (int i = 0; i < 3; i++)
+        {
+            int res = c[i];
+            int t = b[i * stride];
+            if (a[i] != 0)
+                res = t * d[i];
+            c[i] = res;
+        }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */

[gcc r16-1649] AArch64: propose -mmax-vectorization as an option to override vector costing

Reply via email to