> > /* Complete the target-specific cost calculations. */ > > loop_vinfo->vector_costs->finish_cost (loop_vinfo->scalar_costs); > > vec_prologue_cost = loop_vinfo->vector_costs->prologue_cost (); > > @@ -12373,6 +12394,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, > gimple *loop_vectorized_call) > > dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to" > > " variable-length vectorization factor\n"); > > } > > + > > + /* When we have unrolled the loop due to a user requested value we should > > + leave it up to the RTL unroll heuristics to determine if it's still > > worth > > + while to unroll more. */ > > + if (LOOP_VINFO_USER_UNROLL (loop_vinfo)) > > What I meant with copying of LOOP_VINFO_USER_UNROLL is that I think > you'll never get to this being true as you set the suggested unroll > factor for the costing attempt of the not extra unrolled loop but > the transform where you want to reset is is when the unrolling > was actually applied?
It was being set on every analysis of the main loop body. Since it wasn't
actually cleared until we've picked a mode and did codegen the condition would
be true.
However..
>
> That said, it would be clearer if LOOP_VINFO_USER_UNROLL would be
> set in vect_analyze_loop_1 where we have
>
I agree this is much nicer.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* doc/extend.texi: Document pragma unroll interaction with vectorizer.
* tree-vectorizer.h (LOOP_VINFO_USER_UNROLL): New.
(class _loop_vec_info): Add user_unroll.
* tree-vect-loop.cc (vect_analyze_loop_1 ): Set
suggested_unroll_factor and retry.
(_loop_vec_info::_loop_vec_info): Initialize user_unroll.
(vect_transform_loop): Clear the loop->unroll value if the pragma was
used.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/unroll-vect.c: New test.
-- inline copy of patch --
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index
e87a3c271f8420d8fd175823b5bb655f76c89afe..f8261d13903afc90d3341c09ab3fdbd0ab96ea49
100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10398,6 +10398,11 @@ unrolled @var{n} times regardless of any commandline
arguments.
When the option is @var{preferred} then the user is allowed to override the
unroll amount through commandline options.
+If the loop was vectorized the unroll factor specified will be used to seed the
+vectorizer unroll factor. Whether the loop is unrolled or not will be
+determined by target costing. The resulting vectorized loop may still be
+unrolled more in later passes depending on the target costing.
+
@end table
@node Thread-Local
diff --git a/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
new file mode 100644
index
0000000000000000000000000000000000000000..3cb774ba95787ebee488fbe7306299ef28e6bb35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv8-a --param
aarch64-autovec-preference=asimd-only -std=gnu99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+** ...
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** ...
+*/
+void f1 (int *restrict a, int n)
+{
+#pragma GCC unroll 16
+ for (int i = 0; i < n; i++)
+ a[i] *= 2;
+}
+
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index
fe6f3cf188e40396b299ff9e814cc402bc2d4e2d..f215b6bc7881e7e659272cefbe3d5c8892ef768c
100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1073,6 +1073,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in,
vec_info_shared *shared)
peeling_for_gaps (false),
peeling_for_niter (false),
early_breaks (false),
+ user_unroll (false),
no_data_dependencies (false),
has_mask_store (false),
scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -3428,27 +3429,51 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared
*shared,
res ? "succeeded" : "failed",
GET_MODE_NAME (loop_vinfo->vector_mode));
- if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor >
1)
+ auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll;
+ if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ /* Check to see if the user wants to unroll or if the target wants to.
*/
+ && (suggested_unroll_factor > 1 || user_unroll > 1))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ if (suggested_unroll_factor == 1)
+ {
+ int assumed_vf = vect_vf_for_cost (loop_vinfo);
+ int unroll_fact = user_unroll / assumed_vf;
+ suggested_unroll_factor = 1 << ceil_log2 (unroll_fact);
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "setting unroll factor to %d based on user requested "
+ "unroll factor %d and suggested vectorization "
+ "factor: %d\n",
+ suggested_unroll_factor, user_unroll, assumed_vf);
+ }
+ }
+
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
"***** Re-trying analysis for unrolling"
" with unroll factor %d and slp %s.\n",
suggested_unroll_factor,
slp_done_for_suggested_uf ? "on" : "off");
- loop_vec_info unroll_vinfo
- = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
- unroll_vinfo->vector_mode = vector_mode;
- unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
- opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
- slp_done_for_suggested_uf);
- if (new_res)
- {
- delete loop_vinfo;
- loop_vinfo = unroll_vinfo;
- }
- else
- delete unroll_vinfo;
+ loop_vec_info unroll_vinfo
+ = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
+ unroll_vinfo->vector_mode = vector_mode;
+ unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
+ opt_result new_res
+ = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
+ slp_done_for_suggested_uf);
+ if (new_res)
+ {
+ delete loop_vinfo;
+ loop_vinfo = unroll_vinfo;
+ LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1;
+ }
+ else
+ delete unroll_vinfo;
+ }
}
/* Remember the autodetected vector mode. */
@@ -12373,6 +12398,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple
*loop_vectorized_call)
dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
" variable-length vectorization factor\n");
}
+
+ /* When we have unrolled the loop due to a user requested value we should
+ leave it up to the RTL unroll heuristics to determine if it's still worth
+ while to unroll more. */
+ if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
+ loop->unroll = 0;
+
/* Free SLP instances here because otherwise stmt reference counting
won't work. */
slp_instance instance;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index
a2f33a5ecd60288fe7f28ee639ff8b6a77667796..8fd8c10ec64f7241d6b097491f84400164893911
100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -970,6 +970,10 @@ public:
/* Main loop IV cond. */
gcond* loop_iv_cond;
+ /* True if we have an unroll factor requested by the user through pragma GCC
+ unroll. */
+ bool user_unroll;
+
/* True if there are no loop carried data dependencies in the loop.
If loop->safelen <= 1, then this is always true, either the loop
didn't have any loop carried data dependencies, or the loop is being
@@ -1094,6 +1098,7 @@ public:
#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs
#define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero
#define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds
+#define LOOP_VINFO_USER_UNROLL(L) (L)->user_unroll
#define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
rb19435 (1).patch
Description: rb19435 (1).patch
