Jakub Jelinek <ja...@redhat.com> wrote: >On Thu, Jan 16, 2014 at 09:40:24AM +0100, Richard Biener wrote: >> That should be enough I think (we can't hoist PHIs anyway). > >Ok, here is what I've finally successfully bootstrapped/regtested >on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. >2014-01-16 Jakub Jelinek <ja...@redhat.com> > > * tree-vectorizer.h (struct _loop_vec_info): Add no_data_dependencies > field. > (LOOP_VINFO_NO_DATA_DEPENDENCIES): Define. > * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Clear it > when not giving up or versioning for alias only because of > loop->safelen. > (vect_analyze_data_ref_dependences): Set to true. > * tree-vect-stmts.c (hoist_defs_of_uses): Return false if def_stmt > is a GIMPLE_PHI. > (vectorizable_load): Use LOOP_VINFO_NO_DATA_DEPENDENCIES instead of > LOOP_REQUIRES_VERSIONING_FOR_ALIAS, add && !nested_in_vect_loop > to the condition. > >--- gcc/tree-vectorizer.h.jj 2014-01-15 08:11:18.903449194 +0100 >+++ gcc/tree-vectorizer.h 2014-01-16 16:01:48.892476417 +0100 >@@ -347,6 +347,25 @@ typedef struct _loop_vec_info { > fix it up. */ > bool operands_swapped; > >+ /* True if there are no loop carried data dependencies in the loop. >+ If loop->safelen <= 1, then this is always true, either the loop >+ didn't have any loop carried data dependencies, or the loop is >being >+ vectorized guarded with some runtime alias checks, or couldn't >+ be vectorized at all, but then this field shouldn't be used. >+ For loop->safelen >= 2, the user has asserted that there are no >+ backward dependencies, but there still could be loop carried >forward >+ dependencies in such loops. This flag will be false if normal >+ vectorizer data dependency analysis would fail or require >versioning >+ for alias, but because of loop->safelen >= 2 it has been >vectorized >+ even without versioning for alias. E.g. in: >+ #pragma omp simd >+ for (int i = 0; i < m; i++) >+ a[i] = a[i + k] * c; >+ (or #pragma simd or #pragma ivdep) we can vectorize this and it >will >+ DTRT even for k > 0 && k < m, but without safelen we would not >+ vectorize this, so this field would be false. */ >+ bool no_data_dependencies; >+ > /* If if-conversion versioned this loop before conversion, this is the > loop version without if-conversion. */ > struct loop *scalar_loop; >@@ -385,6 +404,7 @@ typedef struct _loop_vec_info { > #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps > #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped > #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter >+#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies > #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop > > #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ >--- gcc/tree-vect-data-refs.c.jj 2014-01-15 08:11:22.593430265 +0100 >+++ gcc/tree-vect-data-refs.c 2014-01-16 16:01:48.916476273 +0100 >@@ -244,6 +244,7 @@ vect_analyze_data_ref_dependence (struct > { > if (loop->safelen < *max_vf) > *max_vf = loop->safelen; >+ LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false; > return false; > } > >@@ -291,6 +292,7 @@ vect_analyze_data_ref_dependence (struct > { > if (loop->safelen < *max_vf) > *max_vf = loop->safelen; >+ LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false; > return false; > } > >@@ -447,6 +449,7 @@ vect_analyze_data_ref_dependences (loop_ > dump_printf_loc (MSG_NOTE, vect_location, > "=== vect_analyze_data_ref_dependences ===\n"); > >+ LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true; > if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo), > &LOOP_VINFO_DDRS (loop_vinfo), > LOOP_VINFO_LOOP_NEST (loop_vinfo), true)) >--- gcc/tree-vect-stmts.c.jj 2014-01-16 09:41:19.534630044 +0100 >+++ gcc/tree-vect-stmts.c 2014-01-16 16:03:50.948856799 +0100 >@@ -5504,6 +5504,8 @@ hoist_defs_of_uses (gimple stmt, struct > dependencies within them. */ > tree op2; > ssa_op_iter i2; >+ if (gimple_code (def_stmt) == GIMPLE_PHI) >+ return false; > FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE) > { > gimple def_stmt2 = SSA_NAME_DEF_STMT (op2); >@@ -6434,10 +6436,12 @@ vectorizable_load (gimple stmt, gimple_s > if (inv_p && !bb_vinfo) > { > gcc_assert (!grouped_load); >- /* If we have versioned for aliasing then we are sure >- this is a loop invariant load and thus we can insert >- it on the preheader edge. */ >- if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) >+ /* If we have versioned for aliasing or the loop doesn't >+ have any data dependencies that would preclude this, >+ then we are sure this is a loop invariant load and >+ thus we can insert it on the preheader edge. */ >+ if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) >+ && !nested_in_vect_loop > && hoist_defs_of_uses (stmt, loop)) > { > if (dump_enabled_p ()) > > > Jakub