Cong Hou <co...@google.com> wrote: >Hi Richard > >Thank you for you reply. The code generation is affected when >generating >conditions for alias checks using the function > >static void vect_create_cond_for_alias_checks (loop_vec_info >loop_vinfo, >tree * cond_expr) > >from the file > >*tree-vect-loop-manip.c* > >where a loop inside iterates a set of DRs and builds conditions for >each >one. However, those DRs are already sorted by the function
Hm, ok. >bool vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, >bb_vec_info >bb_vinfo) > >from the file > >*tree-vect-data-refs.c* > >and as I mentioned the comparison function used in the sort has some >problems and our patch is trying to fix it. Yes, I know - I introduced the lame compare using the hashes... >If you have any other problem please let me know. Not at this time. The patch is ok as-is. Thanks, Richard. >Thank you! >* >* >* >* >Cong > > >On Sat, Jul 13, 2013 at 2:06 AM, Richard Biener ><richard.guent...@gmail.com>wrote: > >> Cong Hou <co...@google.com> wrote: >> >> >GCC bootstrap failed with loop vectorizer turned on by default at >O2. >> >The symptom is that the comparison between stage2&3 compilers fails. >> >The root cause is a bug in the file "tree-vect-data-refs.c", where a >> >qsort() function call is used to sort a group of data references >using >> >a comparison function called "dr_group_sort_cmp()". In this >function, >> >the iterative hash values of tree nodes are used for comparisons. >For >> >a declaration tree node, its UID participates in the calculation of >> >the hash value. However, a specific declaration may have different >> >UIDs whether the debug information is switched on/off (-gtoggle). In >> >consequence, the results of comparisons may vary in stage 2&3 during >> >bootstrapping. >> > >> >The following patch fixed the bug. Compiler bootstraps and there is >no >> >regressions in regression test. Compiler also bootstraps fine when >> >turning on vectorizer by default. Since this patch may produce >> >difference result (but still correct) than before due to the >> >modification to the comparison function, four test cases are >adjusted >> >accordingly. OK for trunk? >> >> Where does the order of DRs affect code generation? I think there is >the >> correct place to fix this bug. >> >> Richard. >> >> > >> >Cong >> > >> > >> > >> >Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy) >> >@@ -10,9 +10,9 @@ >> > #include "l_fma_1.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >> >Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy) >> >@@ -9,9 +9,9 @@ >> > #include "l_fma_1.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >> >Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy) >> >@@ -10,9 +10,9 @@ >> > #include "l_fma_3.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >> >Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy) >> >@@ -9,9 +9,9 @@ >> > #include "l_fma_3.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >> >Index: gcc/tree-vect-data-refs.c >> >=================================================================== >> >--- gcc/tree-vect-data-refs.c (revision 200893) >> >+++ gcc/tree-vect-data-refs.c (working copy) >> >@@ -2311,6 +2311,80 @@ >> > return vect_analyze_group_access (dr); >> > } >> > >> >+ >> >+ >> >+/* Compare two tree nodes. This function is used to compare two >> >+ data-references as below. */ >> >+ >> >+static int >> >+compare_tree (tree t1, tree t2) >> >+{ >> >+ int i, cmp; >> >+ enum tree_code code; >> >+ char tclass; >> >+ >> >+ if (t1 == t2) >> >+ return 0; >> >+ if (t1 == NULL) >> >+ return -1; >> >+ if (t2 == NULL) >> >+ return 1; >> >+ >> >+ >> >+ if (TREE_CODE (t1) != TREE_CODE (t2)) >> >+ return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; >> >+ >> >+ code = TREE_CODE (t1); >> >+ switch (code) >> >+ { >> >+ /* For const values, we can just use hash values for >comparisons. >> >*/ >> >+ case INTEGER_CST: >> >+ case REAL_CST: >> >+ case FIXED_CST: >> >+ case STRING_CST: >> >+ case COMPLEX_CST: >> >+ case VECTOR_CST: >> >+ { >> >+ hashval_t h1 = iterative_hash_expr (t1, 0); >> >+ hashval_t h2 = iterative_hash_expr (t2, 0); >> >+ if (h1 != h2) >> >+ return h1 < h2 ? -1 : 1; >> >+ break; >> >+ } >> >+ >> >+ case SSA_NAME: >> >+ cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2)); >> >+ if (cmp != 0) >> >+ return cmp; >> >+ >> >+ if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) >> >+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; >> >+ break; >> >+ >> >+ default: >> >+ tclass = TREE_CODE_CLASS (code); >> >+ >> >+ /* For var-decl, we could compare their UIDs. */ >> >+ if (tclass == tcc_declaration) >> >+ { >> >+ if (DECL_UID (t1) != DECL_UID (t2)) >> >+ return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; >> >+ break; >> >+ } >> >+ >> >+ /* For expressions with operands, compare their operands >> >recursively. */ >> >+ for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) >> >+ { >> >+ cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i)); >> >+ if (cmp != 0) >> >+ return cmp; >> >+ } >> >+ } >> >+ >> >+ return 0; >> >+} >> >+ >> >+ >> > /* Compare two data-references DRA and DRB to group them into >chunks >> > suitable for grouping. */ >> > >> >@@ -2319,7 +2393,6 @@ >> > { >> > data_reference_p dra = *(data_reference_p *)const_cast<void >*>(dra_); >> > data_reference_p drb = *(data_reference_p *)const_cast<void >*>(drb_); >> >- hashval_t h1, h2; >> > int cmp; >> > >> > /* Stabilize sort. */ >> >@@ -2329,19 +2402,17 @@ >> > /* Ordering of DRs according to base. */ >> >if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), >0)) >> > { >> >- h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0); >> >- h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS >> >(drb)); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* And according to DR_OFFSET. */ >> > if (!dr_equal_offsets_p (dra, drb)) >> > { >> >- h1 = iterative_hash_expr (DR_OFFSET (dra), 0); >> >- h2 = iterative_hash_expr (DR_OFFSET (drb), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* Put reads before writes. */ >> >@@ -2352,19 +2423,18 @@ >> > if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), >> > TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0)) >> > { >> >- h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >> >(dra))), 0); >> >- h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >> >(drb))), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(dra))), >> >+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(drb)))); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* And after step. */ >> > if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) >> > { >> >- h1 = iterative_hash_expr (DR_STEP (dra), 0); >> >- h2 = iterative_hash_expr (DR_STEP (drb), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (DR_STEP (dra), DR_STEP (drb)); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* Then sort after DR_INIT. In case of identical DRs sort after >> >stmt UID. */ >> >> >>