> Hi Roman,
>
> you can get this information from the isl_ast_build that was used when
> generating a certain loop (you can access this isl_ast_build from the
> callbacks isl_ast_build_set_before_each_for and
> isl_ast_build_set_after_each_for). With isl_ast_build_get_schedule, you can
> get an incomplete schedule (less dimensions then the schedule that you gave
> to the isl ast generator). Specifically, it only contains the dimensions of
> the current loop and all surrounding ones. Consequently the last dimension
> in this incomplete schedule is the dimension you want to check for
> parallelism.

Hi Tobias,

thank you! I've attached a patch, which contains the first draft of
checking for the loop parallelism.

If I'm not mistaken, the depth, which can be obtained from
isl_ast_build, is only suitable for the incomplete schedule, which can
be obtained using isl_ast_build_get_schedule. That's why the temporary
implementation works with the incomplete schedule instead of the
result from scop_get_transformed_schedule.

I have a question about vect-pr43423.c. CLooG generates the following
code from this example:

vect-pr43423.c
for (scat_1=0;scat_1<=min(mid_6-1,n_5-1);scat_1++) {
  (scat_1);
  (scat_1);
}
for (scat_1=max(0,mid_6);scat_1<=n_5-1;scat_1++) {
  (scat_1);
  (scat_1);
}

This loops can be parallelized, according to the description of pr43423:

"...
void foo(int n, int mid)
{
  int i;
  for(i=0; i<n; i++)
    {
      if (i < mid)
        a[i] = a[i] + b[i];
      else
        a[i] = a[i] + c[i];
    }
}

chfang@pathscale:~/gcc$ gcc -O3 -ftree-vectorizer-verbose=7 -c foo.c

foo.c:6: note: not vectorized: control flow in loop.
foo.c:3: note: vectorized 0 loops in function.

This loop can be vectorized by icc.

For this case, I would expect to see two loops with iteration range
of [0, mid) and [mid, n). Then both loops can be vectorized.
..."

and the code of vect-pr43423.c:

/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */

ISL generates the following code:

for (int c1 = 0; c1 < n; c1 += 1) {
  if (mid >= c1 + 1) {
    S_6(c1);
  } else
    S_7(c1);
  S_8(c1);
}

I think it can't be parallelized. Maybe this example is no more
suitable. What do you think about this?

-- 
                                    Cheers, Roman Gareev.
Index: gcc/graphite-isl-ast-to-gimple.c
===================================================================
--- gcc/graphite-isl-ast-to-gimple.c    (revision 213262)
+++ gcc/graphite-isl-ast-to-gimple.c    (working copy)
@@ -435,7 +435,14 @@
   redirect_edge_succ_nodup (next_e, after);
   set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
 
-  /* TODO: Add checking for the loop parallelism.  */
+  if (flag_loop_parallelize_all)
+  {
+    isl_id *id = isl_ast_node_get_annotation (node_for);
+    gcc_assert (id);
+    if (isl_id_get_user (id) != NULL)
+      loop->can_be_parallel = true;
+    isl_id_free (id);
+  }
 
   return last_e;
 }
@@ -834,6 +841,97 @@
   return schedule_isl;
 }
 
+/* Applies SCHEDULE to the in and out dimensions of the dependences
+   DEPS and return the resulting relation.  */
+
+static __isl_give isl_map *
+apply_schedule_on_deps (__isl_keep isl_union_map *schedule,
+                       __isl_keep isl_union_map *deps)
+{
+  isl_map *x;
+  isl_union_map *ux, *trans;
+
+  trans = isl_union_map_copy (schedule);
+  trans = extend_schedule (trans);
+  ux = isl_union_map_copy (deps);
+  ux = isl_union_map_apply_domain (ux, isl_union_map_copy (trans));
+  ux = isl_union_map_apply_range (ux, trans);
+  if (isl_union_map_is_empty (ux))
+    {
+      isl_union_map_free (ux);
+      return NULL;
+    }
+  x = isl_map_from_union_map (ux);
+
+  return x;
+}
+
+/* Return true when DEPS is non empty and the intersection of LEX with
+   the DEPS transformed by SCHEDULE is non empty.  LEX is the relation
+   in which all the inputs before DEPTH occur at the same time as the
+   output, and the input at DEPTH occurs before output.  */
+
+static bool
+carries_deps (__isl_keep isl_union_map *schedule,
+             __isl_keep isl_union_map *deps,
+             int depth)
+{
+  bool res;
+  int i;
+  isl_space *space;
+  isl_map *lex, *x;
+  isl_constraint *ineq;
+
+  if (isl_union_map_is_empty (deps))
+    return false;
+
+  x = apply_schedule_on_deps (schedule, deps);
+  if (x == NULL)
+    return false;
+  space = isl_map_get_space (x);
+  space = isl_space_range (space);
+  lex = isl_map_lex_le (space);
+  space = isl_map_get_space (x);
+  ineq = isl_inequality_alloc (isl_local_space_from_space (space));
+
+  for (i = 0; i < depth - 1; i++)
+    lex = isl_map_equate (lex, isl_dim_in, i, isl_dim_out, i);
+
+  /* in + 1 <= out  */
+  ineq = isl_constraint_set_coefficient_si (ineq, isl_dim_out, depth - 1, 1);
+  ineq = isl_constraint_set_coefficient_si (ineq, isl_dim_in, depth - 1, -1);
+  ineq = isl_constraint_set_constant_si (ineq, -1);
+  lex = isl_map_add_constraint (lex, ineq);
+  x = isl_map_intersect (x, lex);
+  res = !isl_map_is_empty (x);
+
+  isl_map_free (x);
+  return res;
+}
+
+/* This method is executed before the construction of a for node.  */
+static __isl_give isl_id *
+ast_build_before_for (__isl_keep isl_ast_build *build, void *user)
+{
+  scop_p scop = (scop_p) user;
+  isl_ast_build *pointer = NULL;
+  isl_union_map *schedule = isl_ast_build_get_schedule (build);
+  isl_space *schedule_space = isl_ast_build_get_schedule_space (build);
+  int dimension = isl_space_dim (schedule_space, isl_dim_out) - 1;
+  int res = (carries_deps (schedule, scop->must_raw, dimension)
+            || carries_deps (schedule, scop->may_raw, dimension)
+            || carries_deps (schedule, scop->must_war, dimension)
+            || carries_deps (schedule, scop->may_war, dimension)
+            || carries_deps (schedule, scop->must_waw, dimension)
+            || carries_deps (schedule, scop->may_waw, dimension));
+  if (!res)
+    pointer = build;
+  isl_union_map_free (schedule);
+  isl_space_free (schedule_space);
+  isl_id *id = isl_id_alloc (isl_ast_build_get_ctx (build), "", pointer);
+  return id;
+}
+
 static __isl_give isl_ast_node *
 scop_to_isl_ast (scop_p scop, ivs_params &ip)
 {
@@ -846,6 +944,32 @@
   add_parameters_to_ivs_params (scop, ip);
   isl_union_map *schedule_isl = generate_isl_schedule (scop);
   isl_ast_build *context_isl = generate_isl_context (scop);
+  if (flag_loop_parallelize_all)
+  {
+    if (!scop->must_raw &&
+       !scop->may_raw &&
+       !scop->must_raw_no_source &&
+       !scop->may_raw_no_source &&
+       !scop->must_war &&
+       !scop->may_war &&
+       !scop->must_war_no_source &&
+       !scop->may_war_no_source &&
+       !scop->must_waw &&
+       !scop->may_waw &&
+       !scop->must_waw_no_source &&
+       !scop->may_waw_no_source)
+      compute_deps (scop, SCOP_BBS (scop),
+                   &scop->must_raw, &scop->may_raw,
+                   &scop->must_raw_no_source, &scop->may_raw_no_source,
+                   &scop->must_war, &scop->may_war,
+                   &scop->must_war_no_source, &scop->may_war_no_source,
+                   &scop->must_waw, &scop->may_waw,
+                   &scop->must_waw_no_source, &scop->may_waw_no_source);
+
+    context_isl =
+      isl_ast_build_set_before_each_for (context_isl, ast_build_before_for,
+                                        scop);
+  }
   isl_ast_node *ast_isl = isl_ast_build_ast_from_schedule (context_isl,
                                                           schedule_isl);
   isl_ast_build_free (context_isl);

Reply via email to