On January 28, 2014 11:32:40 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >On Tue, Jan 28, 2014 at 01:14:32PM +0100, Richard Biener wrote: >> > I admit I fully don't understand why exactly, but my >experimentation so far >> > showed that for read/write and write/read ddrs it is ok and >desirable to >> > ignore the dist > 0 && DDR_REVERSED_P (ddr) cases, but for >write/write >> > ddrs it is undesirable. See the PR for further tests, perhaps I >could >> > turn them into further testcases. >> >> Please. > >New testcase in the patch. > >> > - if (dist > 0 && DDR_REVERSED_P (ddr)) >> > + if (dist > 0 && DDR_REVERSED_P (ddr) >> > + && (DR_IS_READ (dra) || DR_IS_READ (drb))) >> >> I think that'snot sufficient. It depends >> on the order of the stmts whether the dependence distance is really >> negative - we are trying to catch write-after-read negative distance >> here I think. We can't rely on the DDRs being formed in stmt order >> (anymore, at least since 4.9 where we start to arbitrary re-order >> the vector of DRs). > >As discussed on IRC, the actual bug was that >vect_analyze_data_ref_accesses >reordered the data references before DDRs were constructed, thus DDR_A >wasn't necessarily before DDR_B lexically in the loop and thus using >DDR_REVERSED_P bit didn't really reflect whether it is negative or >positive >distance. > >Fixed by making sure the data refs aren't reordered (well, they are >reordered on a copy of the vector only for the purposes of >vect_analyze_data_ref_accesses). Bootstrapped/regtested on >x86_64-linux >and i686-linux, ok for trunk?
Ok. Thanks, Richard. >2014-01-28 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/59594 > * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Sort > a copy of the datarefs vector rather than the vector itself. > > * gcc.dg/vect/no-vfa-vect-depend-2.c: New test. > * gcc.dg/vect/no-vfa-vect-depend-3.c: New test. > * gcc.dg/vect/pr59594.c: New test. > >--- gcc/tree-vect-data-refs.c.jj 2014-01-23 10:52:26.766346677 +0100 >+++ gcc/tree-vect-data-refs.c 2014-01-28 16:11:34.371698307 +0100 >@@ -2484,19 +2484,21 @@ vect_analyze_data_ref_accesses (loop_vec > return true; > > /* Sort the array of datarefs to make building the interleaving chains >- linear. */ >- qsort (datarefs.address (), datarefs.length (), >+ linear. Don't modify the original vector's order, it is needed >for >+ determining what dependencies are reversed. */ >+ vec<data_reference_p> datarefs_copy = datarefs.copy (); >+ qsort (datarefs_copy.address (), datarefs_copy.length (), > sizeof (data_reference_p), dr_group_sort_cmp); > > /* Build the interleaving chains. */ >- for (i = 0; i < datarefs.length () - 1;) >+ for (i = 0; i < datarefs_copy.length () - 1;) > { >- data_reference_p dra = datarefs[i]; >+ data_reference_p dra = datarefs_copy[i]; > stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); > stmt_vec_info lastinfo = NULL; >- for (i = i + 1; i < datarefs.length (); ++i) >+ for (i = i + 1; i < datarefs_copy.length (); ++i) > { >- data_reference_p drb = datarefs[i]; >+ data_reference_p drb = datarefs_copy[i]; > stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); > > /* ??? Imperfect sorting (non-compatible types, non-modulo >@@ -2573,7 +2575,7 @@ vect_analyze_data_ref_accesses (loop_vec > } > } > >- FOR_EACH_VEC_ELT (datarefs, i, dr) >+ FOR_EACH_VEC_ELT (datarefs_copy, i, dr) > if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) > && !vect_analyze_data_ref_access (dr)) > { >@@ -2588,9 +2590,13 @@ vect_analyze_data_ref_accesses (loop_vec > continue; > } > else >- return false; >+ { >+ datarefs_copy.release (); >+ return false; >+ } > } > >+ datarefs_copy.release (); > return true; > } > >--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c.jj 2014-01-28 >14:06:10.818303424 +0100 >+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c 2014-01-28 >14:06:10.818303424 +0100 >@@ -0,0 +1,55 @@ >+/* { dg-require-effective-target vect_int } */ >+ >+#include <stdarg.h> >+#include "tree-vect.h" >+ >+#define N 17 >+ >+int ia[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0}; >+int ib[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0}; >+int res[N] = >{48,192,180,168,156,144,132,120,108,96,84,72,60,48,36,24,12}; >+ >+__attribute__ ((noinline)) >+int main1 () >+{ >+ int i; >+ >+ /* Not vectorizable due to data dependence: dependence distance 1. >*/ >+ for (i = N - 1; i >= 0; i--) >+ { >+ ia[i] = ia[i+1] * 4; >+ } >+ >+ /* check results: */ >+ for (i = 0; i < N; i++) >+ { >+ if (ia[i] != 0) >+ abort (); >+ } >+ >+ /* Vectorizable. Dependence distance -1. */ >+ for (i = N - 1; i >= 0; i--) >+ { >+ ib[i+1] = ib[i] * 4; >+ } >+ >+ /* check results: */ >+ for (i = 0; i < N; i++) >+ { >+ if (ib[i] != res[i]) >+ abort (); >+ } >+ >+ return 0; >+} >+ >+int main (void) >+{ >+ check_vect (); >+ >+ return main1 (); >+} >+ >+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" >{xfail vect_no_align } } } */ >+/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 >"vect" } } */ >+/* { dg-final { cleanup-tree-dump "vect" } } */ >--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c.jj 2014-01-28 >14:12:08.235470812 +0100 >+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c 2014-01-28 >16:18:44.876376404 +0100 >@@ -0,0 +1,187 @@ >+/* { dg-require-effective-target vect_int } */ >+ >+#include <stdarg.h> >+#include "tree-vect.h" >+ >+#define N 64 >+ >+int ia[N + 1]; >+int ib[N + 1]; >+ >+/* Vectorizable. Dependence distance -1. */ >+__attribute__((noinline)) void >+f1 (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ { >+ ia[i + 1] = 1; >+ ib[i] = ia[i]; >+ } >+} >+ >+/* Not vectorizable due to data dependence: dependence distance 1. */ >+__attribute__((noinline)) void >+f2 (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ { >+ ia[i] = 1; >+ ib[i] = ia[i + 1]; >+ } >+} >+ >+/* Not vectorizable due to data dependence: dependence distance 1. */ >+__attribute__((noinline)) void >+f3 (void) >+{ >+ int i; >+ for (i = N - 1; i >= 0; i--) >+ { >+ ia[i + 1] = 1; >+ ib[i] = ia[i]; >+ } >+} >+ >+/* Vectorizable. Dependence distance -1. */ >+__attribute__((noinline)) void >+f4 (void) >+{ >+ int i; >+ for (i = N - 1; i >= 0; i--) >+ { >+ ia[i] = 1; >+ ib[i] = ia[i + 1]; >+ } >+} >+ >+/* Vectorizable. Dependence distance -1. */ >+__attribute__((noinline)) void >+f5 (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ { >+ ia[i + 1] = 1; >+ ia[i] = 2; >+ } >+} >+ >+/* Not vectorizable due to data dependence: dependence distance 1. */ >+__attribute__((noinline)) void >+f6 (void) >+{ >+ int i; >+ for (i = 0; i < N; i++) >+ { >+ ia[i] = 1; >+ ia[i + 1] = 2; >+ } >+} >+ >+/* Not vectorizable due to data dependence: dependence distance 1. */ >+__attribute__((noinline)) void >+f7 (void) >+{ >+ int i; >+ for (i = N - 1; i >= 0; i--) >+ { >+ ia[i + 1] = 1; >+ ia[i] = 2; >+ } >+} >+ >+/* Vectorizable. Dependence distance -1. */ >+__attribute__((noinline)) void >+f8 (void) >+{ >+ int i; >+ for (i = N - 1; i >= 0; i--) >+ { >+ ia[i] = 1; >+ ia[i + 1] = 2; >+ } >+} >+ >+__attribute__ ((noinline)) int >+main1 (void) >+{ >+ int i, j; >+ >+ for (j = 0; j < 8; j++) >+ { >+ for (i = 0; i <= N; i++) >+ { >+ ia[i] = i + 3; >+ ib[i] = i + N + 3; >+ asm (""); >+ } >+ >+ switch (j) >+ { >+ case 0: f1 (); break; >+ case 1: f2 (); break; >+ case 2: f3 (); break; >+ case 3: f4 (); break; >+ case 4: f5 (); break; >+ case 5: f6 (); break; >+ case 6: f7 (); break; >+ case 7: f8 (); break; >+ } >+ >+ for (i = 0; i <= N; i++) >+ { >+ int ea = i + 3; >+ int eb = i + N + 3; >+ switch (j) >+ { >+ case 0: >+ if (i) ea = 1; >+ if (i == 0) eb = 3; >+ else if (i != N) eb = 1; >+ break; >+ case 1: >+ if (i != N) ea = 1; >+ if (i != N) eb = i + 4; >+ break; >+ case 2: >+ if (i) ea = 1; >+ if (i != N) eb = i + 3; >+ break; >+ case 3: >+ if (i != N) ea = 1; >+ if (i < N - 1) eb = 1; >+ else if (i == N - 1) eb = 67; >+ break; >+ case 4: >+ ea = 1 + (i != N); >+ break; >+ case 5: >+ ea = 2 - (i != N); >+ break; >+ case 6: >+ ea = 1 + (i == 0); >+ break; >+ case 7: >+ ea = 2 - (i == 0); >+ break; >+ } >+ if (ia[i] != ea || ib[i] != eb) >+ abort (); >+ } >+ } >+ >+ return 0; >+} >+ >+int main () >+{ >+ check_vect (); >+ >+ return main1 (); >+} >+ >+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" >{xfail vect_no_align } } } */ >+/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 >"vect" } } */ >+/* { dg-final { cleanup-tree-dump "vect" } } */ >--- gcc/testsuite/gcc.dg/vect/pr59594.c.jj 2014-01-28 >14:06:10.819303452 +0100 >+++ gcc/testsuite/gcc.dg/vect/pr59594.c 2014-01-28 14:06:10.818303424 >+0100 >@@ -0,0 +1,31 @@ >+/* PR tree-optimization/59594 */ >+ >+#include "tree-vect.h" >+ >+#define N 1024 >+int b[N + 1]; >+ >+int >+main () >+{ >+ int i; >+ check_vect (); >+ for (i = 0; i < N + 1; i++) >+ { >+ b[i] = i; >+ asm (""); >+ } >+ for (i = N; i >= 0; i--) >+ { >+ b[i + 1] = b[i]; >+ b[i] = 1; >+ } >+ if (b[0] != 1) >+ __builtin_abort (); >+ for (i = 0; i < N; i++) >+ if (b[i + 1] != i) >+ __builtin_abort (); >+ return 0; >+} >+ >+/* { dg-final { cleanup-tree-dump "vect" } } */ > > > Jakub