On Tue, Jan 14, 2014 at 10:01:06AM +0100, Richard Biener wrote: > Jakub, adding the new flag is ok with me.
So like this? 2014-01-14 Jakub Jelinek <ja...@redhat.com> * tree-vectorizer.h (struct _loop_vec_info): Add no_data_dependencies field. (LOOP_VINFO_NO_DATA_DEPENDENCIES): Define. * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Clear it when not giving up or versioning for alias only because of loop->safelen. (vect_analyze_data_ref_dependences): Set to true. * tree-vect-stmts.c (vectorizable_load): Use LOOP_VINFO_NO_DATA_DEPENDENCIES instead of LOOP_REQUIRES_VERSIONING_FOR_ALIAS. --- gcc/tree-vectorizer.h.jj 2014-01-03 11:40:57.000000000 +0100 +++ gcc/tree-vectorizer.h 2014-01-14 13:10:00.477989924 +0100 @@ -347,6 +347,25 @@ typedef struct _loop_vec_info { fix it up. */ bool operands_swapped; + /* True if there are no loop carried data dependencies in the loop. + If loop->safelen <= 1, then this is always true, either the loop + didn't have any loop carried data dependencies, or the loop is being + vectorized guarded with some runtime alias checks, or couldn't + be vectorized at all, but then this field shouldn't be used. + For loop->safelen >= 2, the user has asserted that there are no + backward dependencies, but there still could be loop carried forward + dependencies in such loops. This flag will be false if normal + vectorizer data dependency analysis would fail or require versioning + for alias, but because of loop->safelen >= 2 it has been vectorized + even without versioning for alias. E.g. in: + #pragma omp simd + for (int i = 0; i < m; i++) + a[i] = a[i + k] * c; + (or #pragma simd or #pragma ivdep) we can vectorize this and it will + DTRT even for k > 0 && k < m, but without safelen we would not + vectorize this, so this field would be false. */ + bool no_data_dependencies; + /* If if-conversion versioned this loop before conversion, this is the loop version without if-conversion. */ struct loop *scalar_loop; @@ -385,6 +404,7 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter +#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ --- gcc/tree-vect-data-refs.c.jj 2014-01-10 00:38:26.000000000 +0100 +++ gcc/tree-vect-data-refs.c 2014-01-14 13:12:06.056342116 +0100 @@ -244,6 +244,7 @@ vect_analyze_data_ref_dependence (struct { if (loop->safelen < *max_vf) *max_vf = loop->safelen; + LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false; return false; } @@ -291,6 +292,7 @@ vect_analyze_data_ref_dependence (struct { if (loop->safelen < *max_vf) *max_vf = loop->safelen; + LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false; return false; } @@ -447,6 +449,7 @@ vect_analyze_data_ref_dependences (loop_ dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_data_ref_dependences ===\n"); + LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true; if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo), &LOOP_VINFO_DDRS (loop_vinfo), LOOP_VINFO_LOOP_NEST (loop_vinfo), true)) --- gcc/tree-vect-stmts.c.jj 2014-01-14 10:33:21.000000000 +0100 +++ gcc/tree-vect-stmts.c 2014-01-14 13:14:15.157677243 +0100 @@ -6381,10 +6381,11 @@ vectorizable_load (gimple stmt, gimple_s if (inv_p && !bb_vinfo) { gcc_assert (!grouped_load); - /* If we have versioned for aliasing then we are sure - this is a loop invariant load and thus we can insert - it on the preheader edge. */ - if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) + /* If we have versioned for aliasing or the loop doesn't + have any data dependencies that would preclude this, + then we are sure this is a loop invariant load and + thus we can insert it on the preheader edge. */ + if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)) { if (dump_enabled_p ()) { Jakub