Cong Hou <co...@google.com> wrote: >GCC bootstrap failed with loop vectorizer turned on by default at O2. >The symptom is that the comparison between stage2&3 compilers fails. >The root cause is a bug in the file "tree-vect-data-refs.c", where a >qsort() function call is used to sort a group of data references using >a comparison function called "dr_group_sort_cmp()". In this function, >the iterative hash values of tree nodes are used for comparisons. For >a declaration tree node, its UID participates in the calculation of >the hash value. However, a specific declaration may have different >UIDs whether the debug information is switched on/off (-gtoggle). In >consequence, the results of comparisons may vary in stage 2&3 during >bootstrapping. > >The following patch fixed the bug. Compiler bootstraps and there is no >regressions in regression test. Compiler also bootstraps fine when >turning on vectorizer by default. Since this patch may produce >difference result (but still correct) than before due to the >modification to the comparison function, four test cases are adjusted >accordingly. OK for trunk?
Where does the order of DRs affect code generation? I think there is the correct place to fix this bug. Richard. > >Cong > > > >Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy) >@@ -10,9 +10,9 @@ > #include "l_fma_1.h" > > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy) >@@ -9,9 +9,9 @@ > #include "l_fma_1.h" > > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy) >@@ -10,9 +10,9 @@ > #include "l_fma_3.h" > > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy) >@@ -9,9 +9,9 @@ > #include "l_fma_3.h" > > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >Index: gcc/tree-vect-data-refs.c >=================================================================== >--- gcc/tree-vect-data-refs.c (revision 200893) >+++ gcc/tree-vect-data-refs.c (working copy) >@@ -2311,6 +2311,80 @@ > return vect_analyze_group_access (dr); > } > >+ >+ >+/* Compare two tree nodes. This function is used to compare two >+ data-references as below. */ >+ >+static int >+compare_tree (tree t1, tree t2) >+{ >+ int i, cmp; >+ enum tree_code code; >+ char tclass; >+ >+ if (t1 == t2) >+ return 0; >+ if (t1 == NULL) >+ return -1; >+ if (t2 == NULL) >+ return 1; >+ >+ >+ if (TREE_CODE (t1) != TREE_CODE (t2)) >+ return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; >+ >+ code = TREE_CODE (t1); >+ switch (code) >+ { >+ /* For const values, we can just use hash values for comparisons. >*/ >+ case INTEGER_CST: >+ case REAL_CST: >+ case FIXED_CST: >+ case STRING_CST: >+ case COMPLEX_CST: >+ case VECTOR_CST: >+ { >+ hashval_t h1 = iterative_hash_expr (t1, 0); >+ hashval_t h2 = iterative_hash_expr (t2, 0); >+ if (h1 != h2) >+ return h1 < h2 ? -1 : 1; >+ break; >+ } >+ >+ case SSA_NAME: >+ cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2)); >+ if (cmp != 0) >+ return cmp; >+ >+ if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) >+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; >+ break; >+ >+ default: >+ tclass = TREE_CODE_CLASS (code); >+ >+ /* For var-decl, we could compare their UIDs. */ >+ if (tclass == tcc_declaration) >+ { >+ if (DECL_UID (t1) != DECL_UID (t2)) >+ return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; >+ break; >+ } >+ >+ /* For expressions with operands, compare their operands >recursively. */ >+ for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) >+ { >+ cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i)); >+ if (cmp != 0) >+ return cmp; >+ } >+ } >+ >+ return 0; >+} >+ >+ > /* Compare two data-references DRA and DRB to group them into chunks > suitable for grouping. */ > >@@ -2319,7 +2393,6 @@ > { > data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); > data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); >- hashval_t h1, h2; > int cmp; > > /* Stabilize sort. */ >@@ -2329,19 +2402,17 @@ > /* Ordering of DRs according to base. */ >if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)) > { >- h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0); >- h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS >(drb)); >+ if (cmp != 0) >+ return cmp; > } > > /* And according to DR_OFFSET. */ > if (!dr_equal_offsets_p (dra, drb)) > { >- h1 = iterative_hash_expr (DR_OFFSET (dra), 0); >- h2 = iterative_hash_expr (DR_OFFSET (drb), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); >+ if (cmp != 0) >+ return cmp; > } > > /* Put reads before writes. */ >@@ -2352,19 +2423,18 @@ > if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), > TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0)) > { >- h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(dra))), 0); >- h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(drb))), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), >+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); >+ if (cmp != 0) >+ return cmp; > } > > /* And after step. */ > if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) > { >- h1 = iterative_hash_expr (DR_STEP (dra), 0); >- h2 = iterative_hash_expr (DR_STEP (drb), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (DR_STEP (dra), DR_STEP (drb)); >+ if (cmp != 0) >+ return cmp; > } > > /* Then sort after DR_INIT. In case of identical DRs sort after >stmt UID. */