> Hi Roman, > > you can get this information from the isl_ast_build that was used when > generating a certain loop (you can access this isl_ast_build from the > callbacks isl_ast_build_set_before_each_for and > isl_ast_build_set_after_each_for). With isl_ast_build_get_schedule, you can > get an incomplete schedule (less dimensions then the schedule that you gave > to the isl ast generator). Specifically, it only contains the dimensions of > the current loop and all surrounding ones. Consequently the last dimension > in this incomplete schedule is the dimension you want to check for > parallelism.
Hi Tobias, thank you! I've attached a patch, which contains the first draft of checking for the loop parallelism. If I'm not mistaken, the depth, which can be obtained from isl_ast_build, is only suitable for the incomplete schedule, which can be obtained using isl_ast_build_get_schedule. That's why the temporary implementation works with the incomplete schedule instead of the result from scop_get_transformed_schedule. I have a question about vect-pr43423.c. CLooG generates the following code from this example: vect-pr43423.c for (scat_1=0;scat_1<=min(mid_6-1,n_5-1);scat_1++) { (scat_1); (scat_1); } for (scat_1=max(0,mid_6);scat_1<=n_5-1;scat_1++) { (scat_1); (scat_1); } This loops can be parallelized, according to the description of pr43423: "... void foo(int n, int mid) { int i; for(i=0; i<n; i++) { if (i < mid) a[i] = a[i] + b[i]; else a[i] = a[i] + c[i]; } } chfang@pathscale:~/gcc$ gcc -O3 -ftree-vectorizer-verbose=7 -c foo.c foo.c:6: note: not vectorized: control flow in loop. foo.c:3: note: vectorized 0 loops in function. This loop can be vectorized by icc. For this case, I would expect to see two loops with iteration range of [0, mid) and [mid, n). Then both loops can be vectorized. ..." and the code of vect-pr43423.c: /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ISL generates the following code: for (int c1 = 0; c1 < n; c1 += 1) { if (mid >= c1 + 1) { S_6(c1); } else S_7(c1); S_8(c1); } I think it can't be parallelized. Maybe this example is no more suitable. What do you think about this? -- Cheers, Roman Gareev.
Index: gcc/graphite-isl-ast-to-gimple.c =================================================================== --- gcc/graphite-isl-ast-to-gimple.c (revision 213262) +++ gcc/graphite-isl-ast-to-gimple.c (working copy) @@ -435,7 +435,14 @@ redirect_edge_succ_nodup (next_e, after); set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src); - /* TODO: Add checking for the loop parallelism. */ + if (flag_loop_parallelize_all) + { + isl_id *id = isl_ast_node_get_annotation (node_for); + gcc_assert (id); + if (isl_id_get_user (id) != NULL) + loop->can_be_parallel = true; + isl_id_free (id); + } return last_e; } @@ -834,6 +841,97 @@ return schedule_isl; } +/* Applies SCHEDULE to the in and out dimensions of the dependences + DEPS and return the resulting relation. */ + +static __isl_give isl_map * +apply_schedule_on_deps (__isl_keep isl_union_map *schedule, + __isl_keep isl_union_map *deps) +{ + isl_map *x; + isl_union_map *ux, *trans; + + trans = isl_union_map_copy (schedule); + trans = extend_schedule (trans); + ux = isl_union_map_copy (deps); + ux = isl_union_map_apply_domain (ux, isl_union_map_copy (trans)); + ux = isl_union_map_apply_range (ux, trans); + if (isl_union_map_is_empty (ux)) + { + isl_union_map_free (ux); + return NULL; + } + x = isl_map_from_union_map (ux); + + return x; +} + +/* Return true when DEPS is non empty and the intersection of LEX with + the DEPS transformed by SCHEDULE is non empty. LEX is the relation + in which all the inputs before DEPTH occur at the same time as the + output, and the input at DEPTH occurs before output. */ + +static bool +carries_deps (__isl_keep isl_union_map *schedule, + __isl_keep isl_union_map *deps, + int depth) +{ + bool res; + int i; + isl_space *space; + isl_map *lex, *x; + isl_constraint *ineq; + + if (isl_union_map_is_empty (deps)) + return false; + + x = apply_schedule_on_deps (schedule, deps); + if (x == NULL) + return false; + space = isl_map_get_space (x); + space = isl_space_range (space); + lex = isl_map_lex_le (space); + space = isl_map_get_space (x); + ineq = isl_inequality_alloc (isl_local_space_from_space (space)); + + for (i = 0; i < depth - 1; i++) + lex = isl_map_equate (lex, isl_dim_in, i, isl_dim_out, i); + + /* in + 1 <= out */ + ineq = isl_constraint_set_coefficient_si (ineq, isl_dim_out, depth - 1, 1); + ineq = isl_constraint_set_coefficient_si (ineq, isl_dim_in, depth - 1, -1); + ineq = isl_constraint_set_constant_si (ineq, -1); + lex = isl_map_add_constraint (lex, ineq); + x = isl_map_intersect (x, lex); + res = !isl_map_is_empty (x); + + isl_map_free (x); + return res; +} + +/* This method is executed before the construction of a for node. */ +static __isl_give isl_id * +ast_build_before_for (__isl_keep isl_ast_build *build, void *user) +{ + scop_p scop = (scop_p) user; + isl_ast_build *pointer = NULL; + isl_union_map *schedule = isl_ast_build_get_schedule (build); + isl_space *schedule_space = isl_ast_build_get_schedule_space (build); + int dimension = isl_space_dim (schedule_space, isl_dim_out) - 1; + int res = (carries_deps (schedule, scop->must_raw, dimension) + || carries_deps (schedule, scop->may_raw, dimension) + || carries_deps (schedule, scop->must_war, dimension) + || carries_deps (schedule, scop->may_war, dimension) + || carries_deps (schedule, scop->must_waw, dimension) + || carries_deps (schedule, scop->may_waw, dimension)); + if (!res) + pointer = build; + isl_union_map_free (schedule); + isl_space_free (schedule_space); + isl_id *id = isl_id_alloc (isl_ast_build_get_ctx (build), "", pointer); + return id; +} + static __isl_give isl_ast_node * scop_to_isl_ast (scop_p scop, ivs_params &ip) { @@ -846,6 +944,32 @@ add_parameters_to_ivs_params (scop, ip); isl_union_map *schedule_isl = generate_isl_schedule (scop); isl_ast_build *context_isl = generate_isl_context (scop); + if (flag_loop_parallelize_all) + { + if (!scop->must_raw && + !scop->may_raw && + !scop->must_raw_no_source && + !scop->may_raw_no_source && + !scop->must_war && + !scop->may_war && + !scop->must_war_no_source && + !scop->may_war_no_source && + !scop->must_waw && + !scop->may_waw && + !scop->must_waw_no_source && + !scop->may_waw_no_source) + compute_deps (scop, SCOP_BBS (scop), + &scop->must_raw, &scop->may_raw, + &scop->must_raw_no_source, &scop->may_raw_no_source, + &scop->must_war, &scop->may_war, + &scop->must_war_no_source, &scop->may_war_no_source, + &scop->must_waw, &scop->may_waw, + &scop->must_waw_no_source, &scop->may_waw_no_source); + + context_isl = + isl_ast_build_set_before_each_for (context_isl, ast_build_before_for, + scop); + } isl_ast_node *ast_isl = isl_ast_build_ast_from_schedule (context_isl, schedule_isl); isl_ast_build_free (context_isl);