This patch tries to disentangle the loop vs. non-loop code in data dependence analysis. For loop data-dependences we always want (and need) to compute a distance vector if two references from any loop iteration may alias. Thus we have to disable all offset-based analysis. This is not yet what this patch does, this patch merely makes the non-loop code stronger which relies (*sigh*) on those invalid disambiguations (well, invalid only for the loop case ...).
Thus we employ something slightly stronger than refs_may_alias_p, namely what loop invariant motion does (but cheaper, without actually expanding things). Which allows us to do nothing for the non-loop case in dr_analyze_indices, which makes sense. Bootstrapped and tested on x86_64-unknown-linux-gnu, re-bootstrapping after a stylistic change (s/VEC() *loop_nest/bool/ for dr_may_alias_p). Richard. 2011-08-23 Richard Guenther <rguent...@suse.de> * Makefile.in (tree-data-ref.o): Add tree-affine.h dependency. * tree-affine.h (aff_comb_cannot_overlap_p): Declare. * tree-affine.c (aff_comb_cannot_overlap_p): New function, moved from ... * tree-ssa-loop-im.c (cannot_overlap_p): ... here. (mem_refs_may_alias_p): Adjust. * tree-data-ref.h (dr_may_alias_p): Adjust. * tree-data-ref.c: Include tree-affine.h. (dr_analyze_indices): Do nothing for the non-loop case. (dr_may_alias_p): Distinguish loop and non-loop case. Disambiguate more cases in the non-loop case. * graphite-sese-to-poly.c (write_alias_graph_to_ascii_dimacs): Adjust calls to dr_may_alias_p. (write_alias_graph_to_ascii_ecc): Likewise. (write_alias_graph_to_ascii_dot): Likewise. (build_alias_set_optimal_p): Likewise. Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 177983) +++ gcc/Makefile.in (working copy) @@ -2690,7 +2690,7 @@ tree-scalar-evolution.o : tree-scalar-ev $(TREE_PASS_H) $(PARAMS_H) gt-tree-scalar-evolution.h tree-data-ref.o : tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ gimple-pretty-print.h $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \ - $(TREE_PASS_H) langhooks.h + $(TREE_PASS_H) langhooks.h tree-affine.h sese.o : sese.c sese.h $(CONFIG_H) $(SYSTEM_H) coretypes.h tree-pretty-print.h \ $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) tree-pass.h value-prof.h graphite.o : graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) \ Index: gcc/tree-affine.c =================================================================== --- gcc/tree-affine.c (revision 177983) +++ gcc/tree-affine.c (working copy) @@ -887,3 +887,30 @@ get_inner_reference_aff (tree ref, aff_t *size = shwi_to_double_int ((bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT); } +/* Returns true if a region of size SIZE1 at position 0 and a region of + size SIZE2 at position DIFF cannot overlap. */ + +bool +aff_comb_cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2) +{ + double_int d, bound; + + /* Unless the difference is a constant, we fail. */ + if (diff->n != 0) + return false; + + d = diff->offset; + if (double_int_negative_p (d)) + { + /* The second object is before the first one, we succeed if the last + element of the second object is before the start of the first one. */ + bound = double_int_add (d, double_int_add (size2, double_int_minus_one)); + return double_int_negative_p (bound); + } + else + { + /* We succeed if the second object starts after the first one ends. */ + return double_int_scmp (size1, d) <= 0; + } +} + Index: gcc/tree-affine.h =================================================================== --- gcc/tree-affine.h (revision 177983) +++ gcc/tree-affine.h (working copy) @@ -76,6 +76,7 @@ void tree_to_aff_combination_expand (tre struct pointer_map_t **); void get_inner_reference_aff (tree, aff_tree *, double_int *); void free_affine_expand_cache (struct pointer_map_t **); +bool aff_comb_cannot_overlap_p (aff_tree *, double_int, double_int); /* Debugging functions. */ void print_aff (FILE *, aff_tree *); Index: gcc/tree-ssa-loop-im.c =================================================================== --- gcc/tree-ssa-loop-im.c (revision 177983) +++ gcc/tree-ssa-loop-im.c (working copy) @@ -1835,33 +1835,6 @@ analyze_memory_references (void) create_vop_ref_mapping (); } -/* Returns true if a region of size SIZE1 at position 0 and a region of - size SIZE2 at position DIFF cannot overlap. */ - -static bool -cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2) -{ - double_int d, bound; - - /* Unless the difference is a constant, we fail. */ - if (diff->n != 0) - return false; - - d = diff->offset; - if (double_int_negative_p (d)) - { - /* The second object is before the first one, we succeed if the last - element of the second object is before the start of the first one. */ - bound = double_int_add (d, double_int_add (size2, double_int_minus_one)); - return double_int_negative_p (bound); - } - else - { - /* We succeed if the second object starts after the first one ends. */ - return double_int_scmp (size1, d) <= 0; - } -} - /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in tree_to_aff_combination_expand. */ @@ -1890,7 +1863,7 @@ mem_refs_may_alias_p (tree mem1, tree me aff_combination_scale (&off1, double_int_minus_one); aff_combination_add (&off2, &off1); - if (cannot_overlap_p (&off2, size1, size2)) + if (aff_comb_cannot_overlap_p (&off2, size1, size2)) return false; return true; Index: gcc/tree-data-ref.c =================================================================== --- gcc/tree-data-ref.c (revision 177983) +++ gcc/tree-data-ref.c (working copy) @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. #include "tree-scalar-evolution.h" #include "tree-pass.h" #include "langhooks.h" +#include "tree-affine.h" static struct datadep_stats { @@ -841,8 +842,14 @@ dr_analyze_indices (struct data_referenc tree base, off, access_fn = NULL_TREE; basic_block before_loop = NULL; - if (nest) - before_loop = block_before_loop (nest); + if (!nest) + { + DR_BASE_OBJECT (dr) = ref; + DR_ACCESS_FNS (dr) = NULL; + return; + } + + before_loop = block_before_loop (nest); /* Analyze access functions of dimensions we know to be independent. */ while (handled_component_p (aref)) @@ -852,12 +859,9 @@ dr_analyze_indices (struct data_referenc if (TREE_CODE (aref) == ARRAY_REF) { op = TREE_OPERAND (aref, 1); - if (nest) - { - access_fn = analyze_scalar_evolution (loop, op); - access_fn = instantiate_scev (before_loop, loop, access_fn); - VEC_safe_push (tree, heap, access_fns, access_fn); - } + access_fn = analyze_scalar_evolution (loop, op); + access_fn = instantiate_scev (before_loop, loop, access_fn); + VEC_safe_push (tree, heap, access_fns, access_fn); TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0); } /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses @@ -877,8 +881,7 @@ dr_analyze_indices (struct data_referenc aref = TREE_OPERAND (aref, 0); } - if (nest - && TREE_CODE (aref) == MEM_REF) + if (TREE_CODE (aref) == MEM_REF) { op = TREE_OPERAND (aref, 0); access_fn = analyze_scalar_evolution (loop, op); @@ -1286,14 +1289,33 @@ object_address_invariant_in_loop_p (cons } /* Returns false if we can prove that data references A and B do not alias, - true otherwise. */ + true otherwise. If LOOP_NEST is false no cross-iteration aliases are + considered. */ bool -dr_may_alias_p (const struct data_reference *a, const struct data_reference *b) +dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, + bool loop_nest) { tree addr_a = DR_BASE_OBJECT (a); tree addr_b = DR_BASE_OBJECT (b); + /* If we are not processing a loop nest but scalar code we + do not need to care about possible cross-iteration dependences + and thus can process the full original reference. Do so, + similar to how loop invariant motion applies extra offset-based + disambiguation. */ + if (!loop_nest) + { + aff_tree off1, off2; + double_int size1, size2; + get_inner_reference_aff (DR_REF (a), &off1, &size1); + get_inner_reference_aff (DR_REF (b), &off2, &size2); + aff_combination_scale (&off1, double_int_minus_one); + aff_combination_add (&off2, &off1); + if (aff_comb_cannot_overlap_p (&off2, size1, size2)) + return false; + } + if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) return refs_output_dependent_p (addr_a, addr_b); else if (DR_IS_READ (a) && DR_IS_WRITE (b)) @@ -1331,7 +1353,7 @@ initialize_data_dependence_relation (str } /* If the data references do not alias, then they are independent. */ - if (!dr_may_alias_p (a, b)) + if (!dr_may_alias_p (a, b, loop_nest != NULL)) { DDR_ARE_DEPENDENT (res) = chrec_known; return res; Index: gcc/tree-data-ref.h =================================================================== --- gcc/tree-data-ref.h (revision 177983) +++ gcc/tree-data-ref.h (working copy) @@ -431,7 +431,7 @@ extern tree find_data_references_in_bb ( extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *); extern bool dr_may_alias_p (const struct data_reference *, - const struct data_reference *); + const struct data_reference *, bool); extern bool dr_equal_offsets_p (struct data_reference *, struct data_reference *); Index: gcc/graphite-sese-to-poly.c =================================================================== --- gcc/graphite-sese-to-poly.c (revision 177983) +++ gcc/graphite-sese-to-poly.c (working copy) @@ -1720,7 +1720,7 @@ write_alias_graph_to_ascii_dimacs (FILE FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) - if (dr_may_alias_p (dr1, dr2)) + if (dr_may_alias_p (dr1, dr2, true)) edge_num++; fprintf (file, "$\n"); @@ -1732,7 +1732,7 @@ write_alias_graph_to_ascii_dimacs (FILE FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) - if (dr_may_alias_p (dr1, dr2)) + if (dr_may_alias_p (dr1, dr2, true)) fprintf (file, "e %d %d\n", i + 1, j + 1); return true; @@ -1762,7 +1762,7 @@ write_alias_graph_to_ascii_dot (FILE *fi FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) - if (dr_may_alias_p (dr1, dr2)) + if (dr_may_alias_p (dr1, dr2, true)) fprintf (file, "n%d n%d\n", i, j); return true; @@ -1788,7 +1788,7 @@ write_alias_graph_to_ascii_ecc (FILE *fi FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) - if (dr_may_alias_p (dr1, dr2)) + if (dr_may_alias_p (dr1, dr2, true)) fprintf (file, "%d %d\n", i, j); return true; @@ -1824,7 +1824,7 @@ build_alias_set_optimal_p (VEC (data_ref FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) for (j = i+1; VEC_iterate (data_reference_p, drs, j, dr2); j++) - if (dr_may_alias_p (dr1, dr2)) + if (dr_may_alias_p (dr1, dr2, true)) { add_edge (g, i, j); add_edge (g, j, i);