On Thu, May 4, 2017 at 11:06 AM, Richard Sandiford
<[email protected]> wrote:
> "Bin.Cheng" <[email protected]> writes:
>> On Wed, May 3, 2017 at 9:00 AM, Richard Sandiford
>> <[email protected]> wrote:
>>> Index: gcc/tree-data-ref.h
>>> ===================================================================
>>> --- gcc/tree-data-ref.h 2017-05-03 08:48:11.977015306 +0100
>>> +++ gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100
>>> @@ -191,6 +191,9 @@ struct conflict_function
>>>
>>> struct subscript
>>> {
>>> + /* The access functions of the two references. */
>>> + tree access_fn[2];
>> Is it better to follow existing code, i.e, name this as
>> access_fn_a/access_fn_b. Thus we don't need to use const value 0/1 in
>> various places, which is a little bit confusing.
>
> [Answered below]
>
>>> +
>>> /* A description of the iterations for which the elements are
>>> accessed twice. */
>>> conflict_function *conflicting_iterations_in_a;
>>> @@ -209,6 +212,7 @@ struct subscript
>>>
>>> typedef struct subscript *subscript_p;
>>>
>>> +#define SUB_ACCESS_FN(SUB, I) (SUB)->access_fn[I]
>>> #define SUB_CONFLICTS_IN_A(SUB) (SUB)->conflicting_iterations_in_a
>>> #define SUB_CONFLICTS_IN_B(SUB) (SUB)->conflicting_iterations_in_b
>>> #define SUB_LAST_CONFLICT(SUB) (SUB)->last_conflict
>>> @@ -264,6 +268,33 @@ struct data_dependence_relation
>>> /* Set to true when the dependence relation is on the same data
>>> access. */
>>> bool self_reference_p;
>>> +
>>> + /* True if the dependence described is conservatively correct rather
>>> + than exact, and if it is still possible for the accesses to be
>>> + conditionally independent. For example, the a and b references in:
>>> +
>>> + struct s *a, *b;
>>> + for (int i = 0; i < n; ++i)
>>> + a->f[i] += b->f[i];
>>> +
>>> + conservatively have a distance vector of (0), for the case in which
>>> + a == b, but the accesses are independent if a != b. Similarly,
>>> + the a and b references in:
>>> +
>>> + struct s *a, *b;
>>> + for (int i = 0; i < n; ++i)
>>> + a[0].f[i] += b[i].f[i];
>>> +
>>> + conservatively have a distance vector of (0), but they are indepenent
>>> + when a != b + i. In contrast, the references in:
>>> +
>>> + struct s *a;
>>> + for (int i = 0; i < n; ++i)
>>> + a->f[i] += a->f[i];
>>> +
>>> + have the same distance vector of (0), but the accesses can never be
>>> + independent. */
>>> + bool could_be_independent_p;
>>> };
>>>
>>> typedef struct data_dependence_relation *ddr_p;
>>> @@ -294,6 +325,7 @@ #define DDR_DIR_VECT(DDR, I) \
>>> #define DDR_DIST_VECT(DDR, I) \
>>> DDR_DIST_VECTS (DDR)[I]
>>> #define DDR_REVERSED_P(DDR) (DDR)->reversed_p
>>> +#define DDR_COULD_BE_INDEPENDENT_P(DDR) (DDR)->could_be_independent_p
>>>
>>>
>>> bool dr_analyze_innermost (struct data_reference *, struct loop *);
>>> @@ -372,22 +404,6 @@ same_data_refs (data_reference_p a, data
>>> return false;
>>>
>>> return true;
>>> -}
>>> -
>>> -/* Return true when the DDR contains two data references that have the
>>> - same access functions. */
>>> -
>>> -static inline bool
>>> -same_access_functions (const struct data_dependence_relation *ddr)
>>> -{
>>> - unsigned i;
>>> -
>>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
>>> - if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
>>> - DR_ACCESS_FN (DDR_B (ddr), i)))
>>> - return false;
>>> -
>>> - return true;
>>> }
>>>
>>> /* Returns true when all the dependences are computable. */
>>> Index: gcc/tree-data-ref.c
>>> ===================================================================
>>> --- gcc/tree-data-ref.c 2017-02-23 19:54:15.000000000 +0000
>>> +++ gcc/tree-data-ref.c 2017-05-03 08:48:48.737038502 +0100
>>> @@ -123,8 +123,7 @@ Software Foundation; either version 3, o
>>> } dependence_stats;
>>>
>>> static bool subscript_dependence_tester_1 (struct data_dependence_relation
>>> *,
>>> - struct data_reference *,
>>> - struct data_reference *,
>>> + unsigned int, unsigned int,
>>> struct loop *);
>> As mentioned, how about passing access_fn directly, rather than less
>> meaningful 0/1 values?
>
> The problem is that access_fn is a property of the individual
> subscripts, whereas this is operating on a full data_reference.
>
> One alternative would be to use conditions like:
>
> first_is_a ? SUB_ACCESS_FN_A (sub) : SUB_ACCESS_FN_B (sub)
>
> but IMO that's less readable than the existing:
>
> SUB_ACCESS_FN (sub, index)
>
> Or we could have individual access_fn arrays for A and B, separate
> from the main subscript array, but that would mean allocating three
> arrays instead of one.
Thanks for explanation, I see the problem now. Even the latter
sequence could be different for A and B, there should have the same
number index? If that's the case, is it possible just recording the
starting position (or length) in DR_ACCESS_FN and use that information
to access to access_fn vector. This can save the copy in subscript.
Anyway, this is not am important problem.
>
>>> /* Returns true iff A divides B. */
>>>
>>> @@ -144,6 +143,21 @@ int_divides_p (int a, int b)
>>> return ((b % a) == 0);
>>> }
>>>
>>> +/* Return true if reference REF contains a union access. */
>>> +
>>> +static bool
>>> +ref_contains_union_access_p (tree ref)
>>> +{
>>> + while (handled_component_p (ref))
>>> + {
>>> + ref = TREE_OPERAND (ref, 0);
>>> + if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
>>> + || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
>>> + return true;
>>> + }
>>> + return false;
>>> +}
>>> +
>>>
>>>
>>> /* Dump into FILE all the data references from DATAREFS. */
>>> @@ -433,13 +447,14 @@ dump_data_dependence_relation (FILE *out
>>> unsigned int i;
>>> struct loop *loopi;
>>>
>>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
>>> + subscript *sub;
>>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
>>> {
>>> fprintf (outf, " access_fn_A: ");
>>> - print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
>>> + print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0), 0);
>>> fprintf (outf, " access_fn_B: ");
>>> - print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
>>> - dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
>>> + print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1), 0);
>>> + dump_subscript (outf, sub);
>>> }
>>>
>>> fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
>>> @@ -1484,11 +1499,10 @@ initialize_data_dependence_relation (str
>>> struct data_dependence_relation *res;
>>> unsigned int i;
>>>
>>> - res = XNEW (struct data_dependence_relation);
>>> + res = XCNEW (struct data_dependence_relation);
>>> DDR_A (res) = a;
>>> DDR_B (res) = b;
>>> DDR_LOOP_NEST (res).create (0);
>>> - DDR_REVERSED_P (res) = false;
>>> DDR_SUBSCRIPTS (res).create (0);
>>> DDR_DIR_VECTS (res).create (0);
>>> DDR_DIST_VECTS (res).create (0);
>>> @@ -1506,82 +1520,217 @@ initialize_data_dependence_relation (str
>>> return res;
>>> }
>>>
>>> - /* The case where the references are exactly the same. */
>>> - if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
>>> + unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
>>> + unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
>>> + if (num_dimensions_a == 0 || num_dimensions_b == 0)
>>> {
>>> - if ((loop_nest.exists ()
>>> - && !object_address_invariant_in_loop_p (loop_nest[0],
>>> - DR_BASE_OBJECT (a)))
>>> - || DR_NUM_DIMENSIONS (a) == 0)
>>> - {
>>> - DDR_ARE_DEPENDENT (res) = chrec_dont_know;
>>> - return res;
>>> - }
>>> - DDR_AFFINE_P (res) = true;
>>> - DDR_ARE_DEPENDENT (res) = NULL_TREE;
>>> - DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
>>> - DDR_LOOP_NEST (res) = loop_nest;
>>> - DDR_INNER_LOOP (res) = 0;
>>> - DDR_SELF_REFERENCE (res) = true;
>>> - for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
>>> - {
>>> - struct subscript *subscript;
>>> + DDR_ARE_DEPENDENT (res) = chrec_dont_know;
>>> + return res;
>>> + }
>>> +
>>> + /* For unconstrained bases, the outer (highest-index) subscript
>>> + describes a variation in the base of the original DR_REF rather
>>> + than a component access. We have no type that accurately describes
>>> + the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
>>> + applying the outer subscript) so limit the search to the last real
>>> + component access.
>>> +
>>> + E.g. for:
>>>
>>> - subscript = XNEW (struct subscript);
>>> - SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
>>> - SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
>>> - SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
>>> - SUB_DISTANCE (subscript) = chrec_dont_know;
>>> - DDR_SUBSCRIPTS (res).safe_push (subscript);
>>> + void
>>> + f (int a[][8], int b[][8])
>>> + {
>>> + for (int i = 0; i < 8; ++i)
>>> + a[i * 2][0] = b[i][0];
>>> }
>>> - return res;
>>> +
>>> + the a and b accesses have a single ARRAY_REF component reference [0]
>>> + but have two subscripts. */
>>> + if (DR_UNCONSTRAINED_BASE (a))
>>> + num_dimensions_a -= 1;
>>> + if (DR_UNCONSTRAINED_BASE (b))
>>> + num_dimensions_b -= 1;
>>> +
>>> + /* Now look for two sequences of component references that have the same
>>> + type in both A and B. The first sequence includes an arbitrary
>>> mixture
>>> + of array and structure references while the second always ends on a
>>> + structure reference.
>>> +
>>> + The former (arbitrary) sequence uses access functions:
>>> +
>>> + [START_A, START_A + NUM_DIMENSIONS) of A
>>> + [START_B, START_B + NUM_DIMENSIONS) of B
>>> +
>>> + The latter sequence uses access functions:
>>> +
>>> + [STRUCT_START_A, STRUCT_START_A + STRUCT_NUM_DIMENSIONS) of A
>>> + [STRUCT_START_B, STRUCT_START_B + STRUCT_NUM_DIMENSIONS) of B
>>> +
>>> + STRUCT_REF_A and STRUCT_REF_B are the outer references for the
>> IIUC, A and B always share the same latter sequence, and the common
>> latter sequence ends at a structure reference providing alias
>> information.
>
> The A and B accesses aren't necessarily the same, they just have the
> compatible types. E.g. for:
>
> struct s { int x[8]; int y[8]; } *a, *b;
>
> ... a->x[0] = b->y[1] ...
>
> the sequence would include:
>
> a: [0] .x
> b: [1] .y
I see.
>
>> Is it possible to record the the former arbitrary
>> references instead of simple flag DDR_COULD_BE_INDEPENDENT_P. With
>> this information, alias check can be simplified by stripping away
>> address computation for the shared common sub-sequence. I doubt
>> vect_create_cond_for_alias_checks could detect this kind CSE for now.
>> Ah, I see you changed alias check code generation in order to handle
>> this.
>
> The num_dimensions sequence is only used if it ends at the original
> base and if the bases are equal. In other cases it doesn't really help.
> The struct_num_dimensions sequence is meant to be the one that is
> helpful even when the bases aren't equal.
>
> Like you say, there's a follow-on patch that uses this for runtime
> alias checking.
>
>>> + latter sequence. */
>>> + unsigned int start_a = 0;
>>> + unsigned int start_b = 0;
>>> + unsigned int num_dimensions = 0;
>>> + unsigned int struct_start_a = 0;
>>> + unsigned int struct_start_b = 0;
>>> + unsigned int struct_num_dimensions = 0;
>>> + unsigned int index_a = 0;
>>> + unsigned int index_b = 0;
>>> + tree next_ref_a = DR_REF (a);
>>> + tree next_ref_b = DR_REF (b);
>>> + tree struct_ref_a = NULL_TREE;
>>> + tree struct_ref_b = NULL_TREE;
>>> + while (index_a < num_dimensions_a && index_b < num_dimensions_b)
>>> + {
>>> + gcc_checking_assert (handled_component_p (next_ref_a));
>>> + gcc_checking_assert (handled_component_p (next_ref_b));
>>> + tree outer_ref_a = TREE_OPERAND (next_ref_a, 0);
>>> + tree outer_ref_b = TREE_OPERAND (next_ref_b, 0);
>>> + tree type_a = TREE_TYPE (outer_ref_a);
>>> + tree type_b = TREE_TYPE (outer_ref_b);
>>> + if (types_compatible_p (type_a, type_b))
>>> + {
>>> + /* This pair of accesses belong to a suitable sequence. */
>>> + if (start_a + num_dimensions != index_a
>>> + || start_b + num_dimensions != index_b)
>>> + {
>>> + /* Start a new sequence here. */
>>> + start_a = index_a;
>>> + start_b = index_b;
>>> + num_dimensions = 0;
>>> + }
>>> + num_dimensions += 1;
>>> + if (TREE_CODE (type_a) == RECORD_TYPE)
>>> + {
>>> + struct_start_a = start_a;
>>> + struct_start_b = start_b;
>>> + struct_num_dimensions = num_dimensions;
>>> + struct_ref_a = outer_ref_a;
>>> + struct_ref_b = outer_ref_b;
>>> + }
>>> + next_ref_a = outer_ref_a;
>>> + next_ref_b = outer_ref_b;
>>> + index_a += 1;
>>> + index_b += 1;
>>> + continue;
>>> + }
>>> + /* Try to approach equal type sizes. */
>>> + if (!COMPLETE_TYPE_P (type_a)
>>> + || !COMPLETE_TYPE_P (type_b)
>>> + || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
>>> + || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
>>> + break;
>>> + unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT
>>> (type_a));
>>> + unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT
>>> (type_b));
>>> + if (size_a <= size_b)
>>> + {
>>> + index_a += 1;
>>> + next_ref_a = outer_ref_a;
>>> + }
>>> + if (size_b <= size_a)
>>> + {
>>> + index_b += 1;
>>> + next_ref_b = outer_ref_b;
>>> + }
>>> }
>>>
>>> - /* If the references do not access the same object, we do not know
>>> - whether they alias or not. We do not care about TBAA or alignment
>>> - info so we can use OEP_ADDRESS_OF to avoid false negatives.
>>> - But the accesses have to use compatible types as otherwise the
>>> - built indices would not match. */
>>> - if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b),
>> OEP_ADDRESS_OF)
>>> - || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)),
>>> - TREE_TYPE (DR_BASE_OBJECT (b))))
>>> + /* See whether the sequence ends at the base and whether the two bases
>>> + are equal. We do not care about TBAA or alignment info so we can use
>>> + OEP_ADDRESS_OF to avoid false negatives. */
>>> + tree base_a = DR_BASE_OBJECT (a);
>>> + tree base_b = DR_BASE_OBJECT (b);
>>> + bool same_base_p = (start_a + num_dimensions == num_dimensions_a
>>> + && start_b + num_dimensions == num_dimensions_b
>>> + && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
>>> + && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
>>> + && types_compatible_p (TREE_TYPE (base_a),
>>> + TREE_TYPE (base_b))
>>> + && (!loop_nest.exists ()
>>> + || (object_address_invariant_in_loop_p
>>> + (loop_nest[0], base_a))));
>> Major change is in function initialize_data_dependence_relation in
>> order to detect partial alias opportunity. The original equality
>> check on DR_BASE_OBJECT is bypassed now. IMHO better to introduce a
>> new parameter to compute_data_reference_for_loop etc., indicating
>> whether we want to handle partial alias opportunity or not. After
>> all, such computation is unnecessary for predcom/prefetch/parloop.
>> It's only a waste of time computing it.
>
> Well, it also means that we can now prove the accesses are independent
> in more cases. E.g. previously we would assume the a and b accesses in:
Predcom cares about dependent refs with constant distance, so
independent (neither possible dependent) information based on partial
alias is not interested.
>
> struct s { int x[16]; } *a, *b;
> for (int i = 0; i < 8; ++i)
> a->x[i] = b->x[i + 8];
>
> could conflict.
>
> If callers don't need to know what the relationship between a and b is,
> I think they should check for that before going through the process of
> initialising and analysing the ddr.
This I don't think so. Users don't have the information to
pre-check/analyze reference pair. Even it can do that by repeating
most work as in data-ref-analyzer, it sounds not a good practice.
That's exactly analyzer's job and the reason why interfaces like
compute_data_dependence_for_loop are introduced. It doesn't make much
sense requiring users to do additional analysis before looking for
help from data-ref-analyzer.
Thanks,
bin
>
>>> +
>>> + /* If the bases are the same, we can include the base variation too.
>>> + E.g. the b accesses in:
>>> +
>>> + for (int i = 0; i < n; ++i)
>>> + b[i + 4][0] = b[i][0];
>>> +
>>> + have a definite dependence distance of 4, while for:
>>> +
>>> + for (int i = 0; i < n; ++i)
>>> + a[i + 4][0] = b[i][0];
>>> +
>>> + the dependence distance depends on the gap between a and b.
>>> +
>>> + If the bases are different then we can only rely on the sequence
>>> + rooted at a structure access, since arrays are allowed to overlap
>>> + arbitrarily and change shape arbitrarily. E.g. we treat this as
>>> + valid code:
>>> +
>>> + int a[256];
>>> + ...
>>> + ((int (*)[4][3])&a[1])[i][0] += ((int (*)[4][3])&a[2])[i][0];
>>> +
>>> + where two lvalues with the same int[4][3] type overlap, and where
>>> + both lvalues are distinct from the object's declared type. */
>>> + if (same_base_p)
>>> {
>>> - DDR_ARE_DEPENDENT (res) = chrec_dont_know;
>>> - return res;
>>> + if (DR_UNCONSTRAINED_BASE (a))
>>> + num_dimensions += 1;
>>> + }
>>> + else
>>> + {
>>> + start_a = struct_start_a;
>>> + start_b = struct_start_b;
>>> + num_dimensions = struct_num_dimensions;
>>> }
>>>
>>> - /* If the base of the object is not invariant in the loop nest, we cannot
>>> - analyze it. TODO -- in fact, it would suffice to record that there
>>> may
>>> - be arbitrary dependences in the loops where the base object varies.
>>> */
>>> - if ((loop_nest.exists ()
>>> - && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT
>> (a)))
>>> - || DR_NUM_DIMENSIONS (a) == 0)
>>> + /* Punt if we didn't find a suitable sequence. */
>>> + if (num_dimensions == 0)
>>> {
>>> DDR_ARE_DEPENDENT (res) = chrec_dont_know;
>>> return res;
>>> }
>>>
>>> - /* If the number of dimensions of the access to not agree we can have
>>> - a pointer access to a component of the array element type and an
>>> - array access while the base-objects are still the same. Punt. */
>>> - if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
>>> + if (!same_base_p)
>>> {
>>> - DDR_ARE_DEPENDENT (res) = chrec_dont_know;
>>> - return res;
>>> + /* Partial overlap is possible for different bases when strict
>>> aliasing
>>> + is not in effect. It's also possible if either base involves a
>>> union
>>> + access; e.g. for:
>>> +
>>> + struct s1 { int a[2]; };
>>> + struct s2 { struct s1 b; int c; };
>>> + struct s3 { int d; struct s1 e; };
>>> + union u { struct s2 f; struct s3 g; } *p, *q;
>>> +
>>> + the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
>>> + "p->g.e" (base "p->g") and might partially overlap the s1 at
>>> + "q->g.e" (base "q->g"). */
>>> + if (!flag_strict_aliasing
>>> + || ref_contains_union_access_p (struct_ref_a)
>>> + || ref_contains_union_access_p (struct_ref_b))
>>> + {
>>> + DDR_ARE_DEPENDENT (res) = chrec_dont_know;
>>> + return res;
>>> + }
>>> +
>>> + DDR_COULD_BE_INDEPENDENT_P (res) = true;
>>> }
>>>
>>> DDR_AFFINE_P (res) = true;
>>> DDR_ARE_DEPENDENT (res) = NULL_TREE;
>>> - DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
>>> + DDR_SUBSCRIPTS (res).create (num_dimensions);
>>> DDR_LOOP_NEST (res) = loop_nest;
>>> DDR_INNER_LOOP (res) = 0;
>>> DDR_SELF_REFERENCE (res) = false;
>>>
>>> - for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
>>> + for (i = 0; i < num_dimensions; ++i)
>>> {
>>> struct subscript *subscript;
>>>
>>> subscript = XNEW (struct subscript);
>>> + SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, start_a + i);
>>> + SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, start_b + i);
>>> SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
>>> SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
>>> SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
>>> @@ -3163,14 +3312,15 @@ add_outer_distances (struct data_depende
>>> }
>>>
>>> /* Return false when fail to represent the data dependence as a
>>> - distance vector. INIT_B is set to true when a component has been
>>> + distance vector. A_INDEX is the index of the first reference
>>> + (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
>>> + second reference. INIT_B is set to true when a component has been
>>> added to the distance vector DIST_V. INDEX_CARRY is then set to
>>> the index in DIST_V that carries the dependence. */
>>>
>>> static bool
>>> build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
>>> - struct data_reference *ddr_a,
>>> - struct data_reference *ddr_b,
>>> + unsigned int a_index, unsigned int b_index,
>>> lambda_vector dist_v, bool *init_b,
>>> int *index_carry)
>>> {
>>> @@ -3188,8 +3338,8 @@ build_classic_dist_vector_1 (struct data
>>> return false;
>>> }
>>>
>>> - access_fn_a = DR_ACCESS_FN (ddr_a, i);
>>> - access_fn_b = DR_ACCESS_FN (ddr_b, i);
>>> + access_fn_a = SUB_ACCESS_FN (subscript, a_index);
>>> + access_fn_b = SUB_ACCESS_FN (subscript, b_index);
>>>
>>> if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
>>> && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
>>> @@ -3249,10 +3399,11 @@ build_classic_dist_vector_1 (struct data
>>> constant_access_functions (const struct data_dependence_relation *ddr)
>>> {
>>> unsigned i;
>>> + subscript *sub;
>>>
>>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
>>> - if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
>>> - || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr),
>>> i)))
>>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
>>> + if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
>>> + || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
>>> return false;
>>>
>>> return true;
>>> @@ -3315,10 +3466,11 @@ add_other_self_distances (struct data_de
>>> lambda_vector dist_v;
>>> unsigned i;
>>> int index_carry = DDR_NB_LOOPS (ddr);
>>> + subscript *sub;
>>>
>>> - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
>>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
>>> {
>>> - tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
>>> + tree access_fun = SUB_ACCESS_FN (sub, 0);
>>>
>>> if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
>>> {
>>> @@ -3330,7 +3482,7 @@ add_other_self_distances (struct data_de
>>> return;
>>> }
>>>
>>> - access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
>>> + access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
>>>
>>> if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
>>> add_multivariate_self_dist (ddr, access_fun);
>>> @@ -3401,6 +3553,23 @@ add_distance_for_zero_overlaps (struct d
>>> }
>>> }
>>>
>>> +/* Return true when the DDR contains two data references that have the
>>> + same access functions. */
>>> +
>>> +static inline bool
>>> +same_access_functions (const struct data_dependence_relation *ddr)
>>> +{
>>> + unsigned i;
>>> + subscript *sub;
>>> +
>>> + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
>>> + if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
>>> + SUB_ACCESS_FN (sub, 1)))
>>> + return false;
>>> +
>>> + return true;
>>> +}
>>> +
>>> /* Compute the classic per loop distance vector. DDR is the data
>>> dependence relation to build a vector from. Return false when fail
>>> to represent the data dependence as a distance vector. */
>>> @@ -3432,8 +3601,7 @@ build_classic_dist_vector (struct data_d
>>> }
>>>
>>> dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
>>> - if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
>>> - dist_v, &init_b, &index_carry))
>>> + if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b,
>> &index_carry))
>>> return false;
>>>
>>> /* Save the distance vector if we initialized one. */
>>> @@ -3466,12 +3634,11 @@ build_classic_dist_vector (struct data_d
>>> if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
>>> {
>>> lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
>>> - if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
>>> - loop_nest))
>>> + if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
>>> return false;
>>> compute_subscript_distance (ddr);
>>> - if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
>>> - save_v, &init_b, &index_carry))
>>> + if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
>>> + &index_carry))
>>> return false;
>>> save_dist_v (ddr, save_v);
>>> DDR_REVERSED_P (ddr) = true;
>>> @@ -3507,12 +3674,10 @@ build_classic_dist_vector (struct data_d
>>> {
>>> lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
>>>
>>> - if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
>>> - DDR_A (ddr), loop_nest))
>>> + if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
>>> return false;
>>> compute_subscript_distance (ddr);
>>> - if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A
>>> (ddr),
>>> - opposite_v, &init_b,
>>> + if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
>>> &index_carry))
>>> return false;
>>>
>>> @@ -3591,13 +3756,13 @@ build_classic_dir_vector (struct data_de
>>> }
>>> }
>>>
>>> -/* Helper function. Returns true when there is a dependence between
>>> - data references DRA and DRB. */
>>> +/* Helper function. Returns true when there is a dependence between the
>>> + data references. A_INDEX is the index of the first reference (0 for
>>> + DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.
>>> */
>>>
>>> static bool
>>> subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
>>> - struct data_reference *dra,
>>> - struct data_reference *drb,
>>> + unsigned int a_index, unsigned int b_index,
>>> struct loop *loop_nest)
>>> {
>>> unsigned int i;
>>> @@ -3609,8 +3774,8 @@ subscript_dependence_tester_1 (struct da
>>> {
>>> conflict_function *overlaps_a, *overlaps_b;
>>>
>>> - analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
>>> - DR_ACCESS_FN (drb, i),
>>> + analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
>>> + SUB_ACCESS_FN (subscript, b_index),
>>> &overlaps_a, &overlaps_b,
>>> &last_conflicts, loop_nest);
>>>
>>> @@ -3659,7 +3824,7 @@ subscript_dependence_tester_1 (struct da
>>> subscript_dependence_tester (struct data_dependence_relation *ddr,
>>> struct loop *loop_nest)
>>> {
>>> - if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr),
>> loop_nest))
>>> + if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
>>> dependence_stats.num_dependence_dependent++;
>>>
>>> compute_subscript_distance (ddr);
>>> Index: gcc/tree-ssa-loop-prefetch.c
>>> ===================================================================
>>> --- gcc/tree-ssa-loop-prefetch.c 2017-03-28 16:19:28.000000000 +0100
>>> +++ gcc/tree-ssa-loop-prefetch.c 2017-05-03 08:48:48.737038502 +0100
>>> @@ -1650,6 +1650,7 @@ determine_loop_nest_reuse (struct loop *
>>> refb = (struct mem_ref *) DDR_B (dep)->aux;
>>>
>>> if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
>>> + || DDR_COULD_BE_INDEPENDENT_P (dep)
>>> || DDR_NUM_DIST_VECTS (dep) == 0)
>>> {
>>> /* If the dependence cannot be analyzed, assume that there might
>>> be
>> As said, we could avoid computing such information in the first place.
>> I can see predcom ingores it by explicitly checking DR_BASE_OBJECT,
>> what about tree-parloops.c?
>
> For parloops, it should help that we can now prove lack of dependence
> in more cases.
>
> Thanks,
> Richard