> > /* Complete the target-specific cost calculations. */ > > loop_vinfo->vector_costs->finish_cost (loop_vinfo->scalar_costs); > > vec_prologue_cost = loop_vinfo->vector_costs->prologue_cost (); > > @@ -12373,6 +12394,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, > gimple *loop_vectorized_call) > > dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to" > > " variable-length vectorization factor\n"); > > } > > + > > + /* When we have unrolled the loop due to a user requested value we should > > + leave it up to the RTL unroll heuristics to determine if it's still > > worth > > + while to unroll more. */ > > + if (LOOP_VINFO_USER_UNROLL (loop_vinfo)) > > What I meant with copying of LOOP_VINFO_USER_UNROLL is that I think > you'll never get to this being true as you set the suggested unroll > factor for the costing attempt of the not extra unrolled loop but > the transform where you want to reset is is when the unrolling > was actually applied?
It was being set on every analysis of the main loop body. Since it wasn't actually cleared until we've picked a mode and did codegen the condition would be true. However.. > > That said, it would be clearer if LOOP_VINFO_USER_UNROLL would be > set in vect_analyze_loop_1 where we have > I agree this is much nicer. Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, x86_64-pc-linux-gnu -m32, -m64 and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * doc/extend.texi: Document pragma unroll interaction with vectorizer. * tree-vectorizer.h (LOOP_VINFO_USER_UNROLL): New. (class _loop_vec_info): Add user_unroll. * tree-vect-loop.cc (vect_analyze_loop_1 ): Set suggested_unroll_factor and retry. (_loop_vec_info::_loop_vec_info): Initialize user_unroll. (vect_transform_loop): Clear the loop->unroll value if the pragma was used. gcc/testsuite/ChangeLog: * gcc.target/aarch64/unroll-vect.c: New test. -- inline copy of patch -- diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e87a3c271f8420d8fd175823b5bb655f76c89afe..f8261d13903afc90d3341c09ab3fdbd0ab96ea49 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -10398,6 +10398,11 @@ unrolled @var{n} times regardless of any commandline arguments. When the option is @var{preferred} then the user is allowed to override the unroll amount through commandline options. +If the loop was vectorized the unroll factor specified will be used to seed the +vectorizer unroll factor. Whether the loop is unrolled or not will be +determined by target costing. The resulting vectorized loop may still be +unrolled more in later passes depending on the target costing. + @end table @node Thread-Local diff --git a/gcc/testsuite/gcc.target/aarch64/unroll-vect.c b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c new file mode 100644 index 0000000000000000000000000000000000000000..3cb774ba95787ebee488fbe7306299ef28e6bb35 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv8-a --param aarch64-autovec-preference=asimd-only -std=gnu99" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** f1: +** ... +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** ... +*/ +void f1 (int *restrict a, int n) +{ +#pragma GCC unroll 16 + for (int i = 0; i < n; i++) + a[i] *= 2; +} + diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index fe6f3cf188e40396b299ff9e814cc402bc2d4e2d..f215b6bc7881e7e659272cefbe3d5c8892ef768c 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1073,6 +1073,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) peeling_for_gaps (false), peeling_for_niter (false), early_breaks (false), + user_unroll (false), no_data_dependencies (false), has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), @@ -3428,27 +3429,51 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, res ? "succeeded" : "failed", GET_MODE_NAME (loop_vinfo->vector_mode)); - if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1) + auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll; + if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) + /* Check to see if the user wants to unroll or if the target wants to. */ + && (suggested_unroll_factor > 1 || user_unroll > 1)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, + if (suggested_unroll_factor == 1) + { + int assumed_vf = vect_vf_for_cost (loop_vinfo); + int unroll_fact = user_unroll / assumed_vf; + suggested_unroll_factor = 1 << ceil_log2 (unroll_fact); + if (suggested_unroll_factor > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "setting unroll factor to %d based on user requested " + "unroll factor %d and suggested vectorization " + "factor: %d\n", + suggested_unroll_factor, user_unroll, assumed_vf); + } + } + + if (suggested_unroll_factor > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "***** Re-trying analysis for unrolling" " with unroll factor %d and slp %s.\n", suggested_unroll_factor, slp_done_for_suggested_uf ? "on" : "off"); - loop_vec_info unroll_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); - unroll_vinfo->vector_mode = vector_mode; - unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; - opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, - slp_done_for_suggested_uf); - if (new_res) - { - delete loop_vinfo; - loop_vinfo = unroll_vinfo; - } - else - delete unroll_vinfo; + loop_vec_info unroll_vinfo + = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); + unroll_vinfo->vector_mode = vector_mode; + unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; + opt_result new_res + = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, + slp_done_for_suggested_uf); + if (new_res) + { + delete loop_vinfo; + loop_vinfo = unroll_vinfo; + LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1; + } + else + delete unroll_vinfo; + } } /* Remember the autodetected vector mode. */ @@ -12373,6 +12398,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to" " variable-length vectorization factor\n"); } + + /* When we have unrolled the loop due to a user requested value we should + leave it up to the RTL unroll heuristics to determine if it's still worth + while to unroll more. */ + if (LOOP_VINFO_USER_UNROLL (loop_vinfo)) + loop->unroll = 0; + /* Free SLP instances here because otherwise stmt reference counting won't work. */ slp_instance instance; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index a2f33a5ecd60288fe7f28ee639ff8b6a77667796..8fd8c10ec64f7241d6b097491f84400164893911 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -970,6 +970,10 @@ public: /* Main loop IV cond. */ gcond* loop_iv_cond; + /* True if we have an unroll factor requested by the user through pragma GCC + unroll. */ + bool user_unroll; + /* True if there are no loop carried data dependencies in the loop. If loop->safelen <= 1, then this is always true, either the loop didn't have any loop carried data dependencies, or the loop is being @@ -1094,6 +1098,7 @@ public: #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds +#define LOOP_VINFO_USER_UNROLL(L) (L)->user_unroll #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
rb19435 (1).patch
Description: rb19435 (1).patch