> >    /* Complete the target-specific cost calculations.  */
> >    loop_vinfo->vector_costs->finish_cost (loop_vinfo->scalar_costs);
> >    vec_prologue_cost = loop_vinfo->vector_costs->prologue_cost ();
> > @@ -12373,6 +12394,13 @@ vect_transform_loop (loop_vec_info loop_vinfo,
> gimple *loop_vectorized_call)
> >     dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
> >                      " variable-length vectorization factor\n");
> >      }
> > +
> > +  /* When we have unrolled the loop due to a user requested value we should
> > +     leave it up to the RTL unroll heuristics to determine if it's still 
> > worth
> > +     while to unroll more.  */
> > +  if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
> 
> What I meant with copying of LOOP_VINFO_USER_UNROLL is that I think
> you'll never get to this being true as you set the suggested unroll
> factor for the costing attempt of the not extra unrolled loop but
> the transform where you want to reset is is when the unrolling
> was actually applied?

It was being set on every analysis of the main loop body.  Since it wasn't
actually cleared until we've picked a mode and did codegen the condition would
be true.

However..

> 
> That said, it would be clearer if LOOP_VINFO_USER_UNROLL would be
> set in vect_analyze_loop_1 where we have
> 

I agree this is much nicer.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * doc/extend.texi: Document pragma unroll interaction with vectorizer.
        * tree-vectorizer.h (LOOP_VINFO_USER_UNROLL): New.
        (class _loop_vec_info): Add user_unroll.
        * tree-vect-loop.cc (vect_analyze_loop_1 ): Set
        suggested_unroll_factor and retry.
        (_loop_vec_info::_loop_vec_info): Initialize user_unroll.
        (vect_transform_loop): Clear the loop->unroll value if the pragma was
        used.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/unroll-vect.c: New test.

-- inline copy of patch --

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 
e87a3c271f8420d8fd175823b5bb655f76c89afe..f8261d13903afc90d3341c09ab3fdbd0ab96ea49
 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10398,6 +10398,11 @@ unrolled @var{n} times regardless of any commandline 
arguments.
 When the option is @var{preferred} then the user is allowed to override the
 unroll amount through commandline options.
 
+If the loop was vectorized the unroll factor specified will be used to seed the
+vectorizer unroll factor.  Whether the loop is unrolled or not will be
+determined by target costing.  The resulting vectorized loop may still be
+unrolled more in later passes depending on the target costing.
+
 @end table
 
 @node Thread-Local
diff --git a/gcc/testsuite/gcc.target/aarch64/unroll-vect.c 
b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
new file mode 100644
index 
0000000000000000000000000000000000000000..3cb774ba95787ebee488fbe7306299ef28e6bb35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv8-a --param 
aarch64-autovec-preference=asimd-only -std=gnu99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**     ...
+**     add     v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     add     v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     add     v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     add     v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     ...
+*/
+void f1 (int *restrict a, int n)
+{
+#pragma GCC unroll 16
+  for (int i = 0; i < n; i++)
+    a[i] *= 2;
+}
+
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
fe6f3cf188e40396b299ff9e814cc402bc2d4e2d..f215b6bc7881e7e659272cefbe3d5c8892ef768c
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1073,6 +1073,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     peeling_for_gaps (false),
     peeling_for_niter (false),
     early_breaks (false),
+    user_unroll (false),
     no_data_dependencies (false),
     has_mask_store (false),
     scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -3428,27 +3429,51 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
                     res ? "succeeded" : "failed",
                     GET_MODE_NAME (loop_vinfo->vector_mode));
 
-  if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 
1)
+  auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll;
+  if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+      /* Check to see if the user wants to unroll or if the target wants to.  
*/
+      && (suggested_unroll_factor > 1 || user_unroll > 1))
     {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
+      if (suggested_unroll_factor == 1)
+       {
+         int assumed_vf = vect_vf_for_cost (loop_vinfo);
+         int unroll_fact = user_unroll / assumed_vf;
+         suggested_unroll_factor = 1 << ceil_log2 (unroll_fact);
+         if (suggested_unroll_factor > 1)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                        "setting unroll factor to %d based on user requested "
+                        "unroll factor %d and suggested vectorization "
+                        "factor: %d\n",
+                        suggested_unroll_factor, user_unroll, assumed_vf);
+           }
+       }
+
+       if (suggested_unroll_factor > 1)
+         {
+           if (dump_enabled_p ())
+             dump_printf_loc (MSG_NOTE, vect_location,
                         "***** Re-trying analysis for unrolling"
                         " with unroll factor %d and slp %s.\n",
                         suggested_unroll_factor,
                         slp_done_for_suggested_uf ? "on" : "off");
-      loop_vec_info unroll_vinfo
-       = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
-      unroll_vinfo->vector_mode = vector_mode;
-      unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
-      opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
-                                               slp_done_for_suggested_uf);
-      if (new_res)
-       {
-         delete loop_vinfo;
-         loop_vinfo = unroll_vinfo;
-       }
-      else
-       delete unroll_vinfo;
+           loop_vec_info unroll_vinfo
+               = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
+           unroll_vinfo->vector_mode = vector_mode;
+           unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
+           opt_result new_res
+               = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
+                                      slp_done_for_suggested_uf);
+           if (new_res)
+             {
+               delete loop_vinfo;
+               loop_vinfo = unroll_vinfo;
+               LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1;
+             }
+           else
+             delete unroll_vinfo;
+         }
     }
 
   /* Remember the autodetected vector mode.  */
@@ -12373,6 +12398,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple 
*loop_vectorized_call)
        dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
                         " variable-length vectorization factor\n");
     }
+
+  /* When we have unrolled the loop due to a user requested value we should
+     leave it up to the RTL unroll heuristics to determine if it's still worth
+     while to unroll more.  */
+  if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
+    loop->unroll = 0;
+
   /* Free SLP instances here because otherwise stmt reference counting
      won't work.  */
   slp_instance instance;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 
a2f33a5ecd60288fe7f28ee639ff8b6a77667796..8fd8c10ec64f7241d6b097491f84400164893911
 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -970,6 +970,10 @@ public:
   /* Main loop IV cond.  */
   gcond* loop_iv_cond;
 
+  /* True if we have an unroll factor requested by the user through pragma GCC
+     unroll.  */
+  bool user_unroll;
+
   /* True if there are no loop carried data dependencies in the loop.
      If loop->safelen <= 1, then this is always true, either the loop
      didn't have any loop carried data dependencies, or the loop is being
@@ -1094,6 +1098,7 @@ public:
 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs
 #define LOOP_VINFO_CHECK_NONZERO(L)        (L)->check_nonzero
 #define LOOP_VINFO_LOWER_BOUNDS(L)         (L)->lower_bounds
+#define LOOP_VINFO_USER_UNROLL(L)          (L)->user_unroll
 #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
 #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor

Attachment: rb19435 (1).patch
Description: rb19435 (1).patch

Reply via email to