Hi,
I have noticed that we now enabled more loop transformations at -O3 but
not for -fprofile-use.  Like loop transforms we already have, they
should be enabled because with profile they should be almost consistent
win.  I have checked that all the passes check profile except for loop
interchange which I guess is Ok since the code size should not increase
in general.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

        * opts.c (enable_fdo_optimizations): Enable
        version-loops-for-strides, loop-interchange, unrol-and-jam
        and tree-loop-distribution.
        * invoke.texi: Document newly enabled options.
Index: opts.c
===================================================================
--- opts.c      (revision 267601)
+++ opts.c      (working copy)
@@ -1708,10 +1708,18 @@ enable_fdo_optimizations (struct gcc_opt
     opts->x_flag_tree_loop_vectorize = value;
   if (!opts_set->x_flag_tree_slp_vectorize)
     opts->x_flag_tree_slp_vectorize = value;
+  if (!opts_set->x_flag_version_loops_for_strides)
+    opts->x_flag_version_loops_for_strides = value;
   if (!opts_set->x_flag_vect_cost_model)
     opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
   if (!opts_set->x_flag_tree_loop_distribute_patterns)
     opts->x_flag_tree_loop_distribute_patterns = value;
+  if (!opts_set->x_flag_loop_interchange)
+    opts->x_flag_loop_interchange = value;
+  if (!opts_set->x_flag_unroll_jam)
+    opts->x_flag_unroll_jam = value;
+  if (!opts_set->x_flag_tree_loop_distribution)
+    opts->x_flag_tree_loop_distribution = value;
 }
 
 /* -f{,no-}sanitize{,-recover}= suboptions.  */
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi     (revision 267603)
+++ doc/invoke.texi     (working copy)
@@ -9499,6 +9499,8 @@ DO I = 1, N
    D(I) = E(I) * F
 ENDDO
 @end smallexample
+This flag is enabled by default at @option{-O3}.
+It is also enabled by @option{-fprofile-use} and @option{-fauto-profile}.
 
 @item -ftree-loop-distribute-patterns
 @opindex ftree-loop-distribute-patterns
@@ -9524,6 +9526,8 @@ DO I = 1, N
 ENDDO
 @end smallexample
 and the initialization loop is transformed into a call to memset zero.
+This flag is enabled by default at @option{-O3}.
+It is also enabled by @option{-fprofile-use} and @option{-fauto-profile}.
 
 @item -floop-interchange
 @opindex floop-interchange
@@ -9544,12 +9548,14 @@ for (int i = 0; i < N; i++)
       c[i][j] = c[i][j] + a[i][k]*b[k][j];
 @end smallexample
 This flag is enabled by default at @option{-O3}.
+It is also enabled by @option{-fprofile-use} and @option{-fauto-profile}.
 
 @item -floop-unroll-and-jam
 @opindex floop-unroll-and-jam
 Apply unroll and jam transformations on feasible loops.  In a loop
 nest this unrolls the outer loop by some factor and fuses the resulting
 multiple inner loops.  This flag is enabled by default at @option{-O3}.
+It is also enabled by @option{-fprofile-use} and @option{-fauto-profile}.
 
 @item -ftree-loop-im
 @opindex ftree-loop-im
@@ -10804,6 +10810,8 @@ else
 
 This is particularly useful for assumed-shape arrays in Fortran where
 (for example) it allows better vectorization assuming contiguous accesses.
+This flag is enabled by default at @option{-O3}.
+It is also enabled by @option{-fprofile-use} and @option{-fauto-profile}.
 
 @item -ffunction-sections
 @itemx -fdata-sections

Reply via email to