Hi! This implements the fallback mentioned in https://gcc.gnu.org/pipermail/gcc/2020-June/232874.html Special cases for triangular loops etc. to follow later, also composite constructs not supported yet (need to check the passing of temporaries around) and lastprivate might not give the same answers as serial loop if the last innermost body iteration isn't the last one for some of the outer loops (that will need to be solved separately together with rectangular loops that have no innermost body iterations, but some of the outer loops actually iterate). Also, simd needs work.
Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2020-06-27 Jakub Jelinek <ja...@redhat.com> * omp-general.h (struct omp_for_data_loop): Add non_rect_referenced member, move outer member. (struct omp_for_data): Add first_nonrect and last_nonrect members. * omp-general.c (omp_extract_for_data): Initialize first_nonrect, last_nonrect and non_rect_referenced members. * omp-expand.c (expand_omp_for_init_counts): Handle non-rectangular loops. (expand_omp_for_init_vars): Add nonrect_bounds parameter. Handle non-rectangular loops. (extract_omp_for_update_vars): Likewise. (expand_omp_for_generic, expand_omp_for_static_nochunk, expand_omp_for_static_chunk, expand_omp_simd, expand_omp_taskloop_for_outer, expand_omp_taskloop_for_inner): Adjust expand_omp_for_init_vars and extract_omp_for_update_vars callers. (expand_omp_for): Don't sorry on non-composite worksharing-loop or distribute. * testsuite/libgomp.c/loop-17.c: New test. * testsuite/libgomp.c/loop-18.c: New test. --- gcc/omp-general.h.jj 2020-06-23 15:58:46.608022000 +0200 +++ gcc/omp-general.h 2020-06-24 15:08:07.283089199 +0200 @@ -47,13 +47,16 @@ enum oacc_loop_flags { or for non-rectangular loops: for (V = M1 * W + N1; V cond M2 * W + N2; V += STEP; where W is V of the OUTER-th loop (e.g. for OUTER 1 it is the - the index of the immediately surrounding loop). */ + the index of the immediately surrounding loop). + NON_RECT_REFERENCED is true for loops referenced by loops + with non-NULL M1 or M2. */ struct omp_for_data_loop { tree v, n1, n2, step, m1, m2; - int outer; enum tree_code cond_code; + int outer; + bool non_rect_referenced; }; /* A structure describing the main elements of a parallel loop. */ @@ -67,6 +70,7 @@ struct omp_for_data tree tiling; /* Tiling values (if non null). */ int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */ int ordered; + int first_nonrect, last_nonrect; bool have_nowait, have_ordered, simd_schedule, have_reductemp; bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp; bool non_rect; --- gcc/omp-general.c.jj 2020-06-23 15:58:46.594022202 +0200 +++ gcc/omp-general.c 2020-06-24 16:58:37.888877351 +0200 @@ -206,6 +206,8 @@ omp_extract_for_data (gomp_for *for_stmt fd->tiling = NULL_TREE; fd->collapse = 1; fd->ordered = 0; + fd->first_nonrect = -1; + fd->last_nonrect = -1; fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; fd->sched_modifiers = 0; fd->chunk_size = NULL_TREE; @@ -372,18 +374,24 @@ omp_extract_for_data (gomp_for *for_stmt loop->m1 = NULL_TREE; loop->m2 = NULL_TREE; loop->outer = 0; + loop->non_rect_referenced = false; if (TREE_CODE (loop->n1) == TREE_VEC) { for (int j = i - 1; j >= 0; j--) if (TREE_VEC_ELT (loop->n1, 0) == gimple_omp_for_index (for_stmt, j)) { loop->outer = i - j; + if (loops != NULL) + loops[j].non_rect_referenced = true; + if (fd->first_nonrect == -1 || fd->first_nonrect > j) + fd->first_nonrect = j; break; } gcc_assert (loop->outer); loop->m1 = TREE_VEC_ELT (loop->n1, 1); loop->n1 = TREE_VEC_ELT (loop->n1, 2); fd->non_rect = true; + fd->last_nonrect = i; } loop->cond_code = gimple_omp_for_cond (for_stmt, i); @@ -401,12 +409,17 @@ omp_extract_for_data (gomp_for *for_stmt if (TREE_VEC_ELT (loop->n2, 0) == gimple_omp_for_index (for_stmt, j)) { loop->outer = i - j; + if (loops != NULL) + loops[j].non_rect_referenced = true; + if (fd->first_nonrect == -1 || fd->first_nonrect > j) + fd->first_nonrect = j; break; } gcc_assert (loop->outer); loop->m2 = TREE_VEC_ELT (loop->n2, 1); loop->n2 = TREE_VEC_ELT (loop->n2, 2); fd->non_rect = true; + fd->last_nonrect = i; } t = gimple_omp_for_incr (for_stmt, i); --- gcc/omp-expand.c.jj 2020-06-23 15:58:46.593022216 +0200 +++ gcc/omp-expand.c 2020-06-26 21:38:16.299301772 +0200 @@ -1734,7 +1734,39 @@ expand_oacc_collapse_vars (const struct count = 0; and set ZERO_ITER_BB to that bb. If this isn't the outermost of the combined loop constructs, just initialize COUNTS array - from the _looptemp_ clauses. */ + from the _looptemp_ clauses. For loop nests with non-rectangular + loops, do this only for the rectangular loops. Then pick + the loops which reference outer vars in their bound expressions + and the loops which they refer to and for this sub-nest compute + number of iterations. For triangular loops use Faulhaber's formula + (TBD.), otherwise as a fallback, compute by iterating the loops. + If e.g. the sub-nest is + for (I = N11; I COND1 N12; I += STEP1) + for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2) + for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3) + do: + COUNT = 0; + for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) + for (tmpj = M21 * tmpi + N21; + tmpj COND2 M22 * tmpi + N22; tmpj += STEP2) + { + int tmpk1 = M31 * tmpj + N31; + int tmpk2 = M32 * tmpj + N32; + if (tmpk1 COND3 tmpk2) + { + if (COND3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + COUNT += (adj + tmpk2 - tmpk1) / STEP3; + } + } + and finally multiply the counts of the rectangular loops not + in the sub-nest with COUNT. Also, as counts[fd->last_nonrect] + store number of iterations of the loops from fd->first_nonrect + to fd->last_nonrect inclusive, i.e. the above COUNT multiplied + by the counts of rectangular loops not referenced in any non-rectangular + loops sandwitched in between those. */ /* NOTE: It *could* be better to moosh all of the BBs together, creating one larger BB with all the computation and the unexpected @@ -1813,12 +1845,23 @@ expand_omp_for_init_counts (struct omp_f break; } } + bool rect_count_seen = false; for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) { tree itype = TREE_TYPE (fd->loops[i].v); if (i >= fd->collapse && counts[i]) continue; + if (fd->non_rect) + { + /* Skip loops that use outer iterators in their expressions + during this phase. */ + if (fd->loops[i].m1 || fd->loops[i].m2) + { + counts[i] = build_zero_cst (type); + continue; + } + } if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, fold_convert (itype, fd->loops[i].n1), @@ -1914,13 +1957,197 @@ expand_omp_for_init_counts (struct omp_f } if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) { - if (i == 0) - t = counts[0]; + if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect) + continue; + if (!rect_count_seen) + { + t = counts[i]; + rect_count_seen = true; + } else t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); expand_omp_build_assign (gsi, fd->loop.n2, t); } } + if (fd->non_rect && SSA_VAR_P (fd->loop.n2)) + { + gcc_assert (fd->last_nonrect != -1); + + /* Fallback implementation. Evaluate the loops with m1/m2 + non-NULL as well as their outer loops at runtime using temporaries + instead of the original iteration variables, and in the + body just bump the counter. */ + counts[fd->last_nonrect] = create_tmp_reg (type, ".count"); + expand_omp_build_assign (gsi, counts[fd->last_nonrect], + build_zero_cst (type)); + gimple_stmt_iterator gsi2 = *gsi; + gsi_prev (&gsi2); + e = split_block (entry_bb, gsi_stmt (gsi2)); + e = split_block (e->dest, (gimple *) NULL); + basic_block cur_bb = e->src; + basic_block next_bb = e->dest; + entry_bb = e->dest; + *gsi = gsi_after_labels (entry_bb); + + tree *vs = XALLOCAVEC (tree, fd->last_nonrect); + memset (vs, 0, fd->last_nonrect * sizeof (tree)); + + for (i = 0; i <= fd->last_nonrect; i++) + { + if (fd->loops[i].m1 == NULL_TREE + && fd->loops[i].m2 == NULL_TREE + && !fd->loops[i].non_rect_referenced) + continue; + + tree itype = TREE_TYPE (fd->loops[i].v); + + gsi2 = gsi_after_labels (cur_bb); + tree n1, n2; + t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); + if (fd->loops[i].m1) + { + n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); + n1 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer], + n1); + n1 = fold_build2 (PLUS_EXPR, itype, n1, t); + } + else + n1 = t; + n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + if (i < fd->last_nonrect) + { + vs[i] = create_tmp_reg (itype, ".it"); + expand_omp_build_assign (&gsi2, vs[i], n1); + } + t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); + if (fd->loops[i].m2) + { + n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); + n2 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer], + n2); + n2 = fold_build2 (PLUS_EXPR, itype, n2, t); + } + else + n2 = t; + n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + if (i == fd->last_nonrect) + { + gcond *cond_stmt + = gimple_build_cond (fd->loops[i].cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); + e = split_block (cur_bb, cond_stmt); + e->flags = EDGE_TRUE_VALUE; + ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); + e->probability = profile_probability::likely ().guessed (); + ne->probability = e->probability.invert (); + gsi2 = gsi_after_labels (e->dest); + + t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + tree step = fold_convert (itype, fd->loops[i].step); + if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (type, t); + t = fold_build2 (PLUS_EXPR, type, counts[fd->last_nonrect], t); + t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t); + e = make_edge (e->dest, next_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb); + break; + } + e = split_block (cur_bb, last_stmt (cur_bb)); + + basic_block new_cur_bb = create_empty_bb (cur_bb); + add_bb_to_loop (new_cur_bb, cur_bb->loop_father); + + gsi2 = gsi_after_labels (e->dest); + tree step = fold_convert (itype, unshare_expr (fd->loops[i].step)); + t = fold_build2 (PLUS_EXPR, itype, vs[i], step); + t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + expand_omp_build_assign (&gsi2, vs[i], t); + + ne = split_block (e->dest, last_stmt (e->dest)); + gsi2 = gsi_after_labels (ne->dest); + + gcond *cond_stmt + = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); + edge e3, e4; + if (next_bb == entry_bb) + { + e3 = find_edge (ne->dest, next_bb); + e3->flags = EDGE_FALSE_VALUE; + } + else + e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE); + e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE); + e4->probability = profile_probability::likely ().guessed (); + e3->probability = e4->probability.invert (); + basic_block esrc = e->src; + make_edge (e->src, ne->dest, EDGE_FALLTHRU); + cur_bb = new_cur_bb; + basic_block latch_bb = next_bb; + next_bb = e->dest; + remove_edge (e); + set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc); + set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest); + set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest); + } + t = NULL_TREE; + for (i = fd->first_nonrect; i < fd->last_nonrect; i++) + if (!fd->loops[i].non_rect_referenced + && fd->loops[i].m1 == NULL_TREE + && fd->loops[i].m2 == NULL_TREE) + { + if (t == NULL_TREE) + t = counts[i]; + else + t = fold_build2 (MULT_EXPR, type, t, counts[i]); + } + if (t) + { + t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t); + expand_omp_build_assign (gsi, counts[fd->last_nonrect], t); + } + if (!rect_count_seen) + t = counts[fd->last_nonrect]; + else + t = fold_build2 (MULT_EXPR, type, fd->loop.n2, + counts[fd->last_nonrect]); + expand_omp_build_assign (gsi, fd->loop.n2, t); + } + else if (fd->non_rect) + { + tree t = fd->loop.n2; + gcc_assert (TREE_CODE (t) == INTEGER_CST); + int non_rect_referenced = 0, non_rect = 0; + for (i = 0; i < fd->collapse; i++) + { + if ((i < fd->first_nonrect || fd->last_nonrect) + && !integer_zerop (counts[i])) + t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]); + if (fd->loops[i].non_rect_referenced) + non_rect_referenced++; + if (fd->loops[i].m1 || fd->loops[i].m2) + non_rect++; + } + gcc_assert (non_rect == 1 && non_rect_referenced == 1); + counts[fd->last_nonrect] = t; + } } /* Helper function for expand_omp_{for_*,simd}. Generate code like: @@ -1933,11 +2160,43 @@ expand_omp_for_init_counts (struct omp_f if this loop doesn't have an inner loop construct combined with it. If it does have an inner loop construct combined with it and the iteration count isn't known constant, store values from counts array - into its _looptemp_ temporaries instead. */ + into its _looptemp_ temporaries instead. + For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect + inclusive), use the count of all those loops together, and either + find quadratic etc. equation roots (TBD), or as a fallback, do: + COUNT = 0; + for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) + for (tmpj = M21 * tmpi + N21; + tmpj COND2 M22 * tmpi + N22; tmpj += STEP2) + { + int tmpk1 = M31 * tmpj + N31; + int tmpk2 = M32 * tmpj + N32; + if (tmpk1 COND3 tmpk2) + { + if (COND3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + int temp = (adj + tmpk2 - tmpk1) / STEP3; + if (COUNT + temp > T) + { + V1 = tmpi; + V2 = tmpj; + V3 = tmpk1 + (T - COUNT) * STEP3; + goto done; + } + else + COUNT += temp; + } + } + done:; + but for optional innermost or outermost rectangular loops that aren't + referenced by other loop expressions keep doing the division/modulo. */ static void expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, - tree *counts, gimple *inner_stmt, tree startvar) + tree *counts, tree *nonrect_bounds, + gimple *inner_stmt, tree startvar) { int i; if (gimple_omp_for_combined_p (fd->for_stmt)) @@ -1984,25 +2243,237 @@ expand_omp_for_init_vars (struct omp_for itype = vtype; if (POINTER_TYPE_P (vtype)) itype = signed_type_for (vtype); - if (i != 0) + if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect)) t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); else t = tem; - t = fold_convert (itype, t); - t = fold_build2 (MULT_EXPR, itype, t, - fold_convert (itype, fd->loops[i].step)); - if (POINTER_TYPE_P (vtype)) - t = fold_build_pointer_plus (fd->loops[i].n1, t); + if (i == fd->last_nonrect) + { + /* Fallback implementation. Evaluate the loops in between + (inclusive) fd->first_nonrect and fd->last_nonrect at + runtime unsing temporaries instead of the original iteration + variables, in the body just bump the counter and compare + with the desired value. */ + t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree stopval = t; + tree idx = create_tmp_reg (type, ".count"); + expand_omp_build_assign (gsi, idx, + build_zero_cst (type), true); + gimple_stmt_iterator gsi2 = *gsi; + basic_block entry_bb = gsi_bb (gsi2); + edge e = split_block (entry_bb, gsi_stmt (gsi2)); + e = split_block (e->dest, (gimple *) NULL); + basic_block dom_bb = NULL; + basic_block cur_bb = e->src; + basic_block next_bb = e->dest; + entry_bb = e->dest; + *gsi = gsi_after_labels (entry_bb); + + tree *vs = XALLOCAVEC (tree, fd->last_nonrect); + tree n1 = NULL_TREE, n2 = NULL_TREE; + memset (vs, 0, fd->last_nonrect * sizeof (tree)); + + for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++) + { + tree itype = TREE_TYPE (fd->loops[j].v); + bool rect_p = (fd->loops[j].m1 == NULL_TREE + && fd->loops[j].m2 == NULL_TREE + && !fd->loops[j].non_rect_referenced); + gsi2 = gsi_after_labels (cur_bb); + t = fold_convert (itype, unshare_expr (fd->loops[j].n1)); + if (fd->loops[j].m1) + { + n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1)); + n1 = fold_build2 (MULT_EXPR, itype, + vs[j - fd->loops[j].outer], n1); + n1 = fold_build2 (PLUS_EXPR, itype, n1, t); + } + else if (rect_p) + n1 = build_zero_cst (type); + else + n1 = t; + n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + if (j < fd->last_nonrect) + { + vs[j] = create_tmp_reg (rect_p ? type : itype, ".it"); + expand_omp_build_assign (&gsi2, vs[j], n1); + } + t = fold_convert (itype, unshare_expr (fd->loops[j].n2)); + if (fd->loops[j].m2) + { + n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2)); + n2 = fold_build2 (MULT_EXPR, itype, + vs[j - fd->loops[j].outer], n2); + n2 = fold_build2 (PLUS_EXPR, itype, n2, t); + } + else if (rect_p) + n2 = counts[j]; + else + n2 = t; + n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + if (j == fd->last_nonrect) + { + gcond *cond_stmt + = gimple_build_cond (fd->loops[j].cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); + e = split_block (cur_bb, cond_stmt); + e->flags = EDGE_TRUE_VALUE; + edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); + e->probability = profile_probability::likely ().guessed (); + ne->probability = e->probability.invert (); + gsi2 = gsi_after_labels (e->dest); + + t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[j].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + tree step = fold_convert (itype, fd->loops[j].step); + if (TYPE_UNSIGNED (itype) + && fd->loops[j].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (type, t); + t = fold_build2 (PLUS_EXPR, type, idx, t); + t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + e = make_edge (e->dest, next_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb); + cond_stmt + = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE, + NULL_TREE); + gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); + e = split_block (gsi_bb (gsi2), cond_stmt); + e->flags = EDGE_TRUE_VALUE; + e->probability = profile_probability::likely ().guessed (); + ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE); + ne->probability = e->probability.invert (); + gsi2 = gsi_after_labels (e->dest); + expand_omp_build_assign (&gsi2, idx, t); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb); + break; + } + e = split_block (cur_bb, last_stmt (cur_bb)); + + basic_block new_cur_bb = create_empty_bb (cur_bb); + add_bb_to_loop (new_cur_bb, cur_bb->loop_father); + + gsi2 = gsi_after_labels (e->dest); + if (rect_p) + t = fold_build2 (PLUS_EXPR, type, vs[j], + build_one_cst (type)); + else + { + tree step + = fold_convert (itype, unshare_expr (fd->loops[j].step)); + t = fold_build2 (PLUS_EXPR, itype, vs[j], step); + } + t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + expand_omp_build_assign (&gsi2, vs[j], t); + + edge ne = split_block (e->dest, last_stmt (e->dest)); + gsi2 = gsi_after_labels (ne->dest); + + gcond *cond_stmt; + if (next_bb == entry_bb) + /* No need to actually check the outermost condition. */ + cond_stmt + = gimple_build_cond (EQ_EXPR, boolean_true_node, + boolean_true_node, + NULL_TREE, NULL_TREE); + else + cond_stmt + = gimple_build_cond (rect_p ? LT_EXPR + : fd->loops[j].cond_code, + vs[j], n2, NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); + edge e3, e4; + if (next_bb == entry_bb) + { + e3 = find_edge (ne->dest, next_bb); + e3->flags = EDGE_FALSE_VALUE; + dom_bb = ne->dest; + } + else + e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE); + e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE); + e4->probability = profile_probability::likely ().guessed (); + e3->probability = e4->probability.invert (); + basic_block esrc = e->src; + make_edge (e->src, ne->dest, EDGE_FALLTHRU); + cur_bb = new_cur_bb; + basic_block latch_bb = next_bb; + next_bb = e->dest; + remove_edge (e); + set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc); + set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest); + set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest); + } + for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--) + { + tree itype = TREE_TYPE (fd->loops[j].v); + bool rect_p = (fd->loops[j].m1 == NULL_TREE + && fd->loops[j].m2 == NULL_TREE + && !fd->loops[j].non_rect_referenced); + if (j == fd->last_nonrect) + { + t = fold_build2 (MINUS_EXPR, type, stopval, idx); + t = fold_convert (itype, t); + tree t2 + = fold_convert (itype, unshare_expr (fd->loops[j].step)); + t = fold_build2 (MULT_EXPR, itype, t, t2); + t = fold_build2 (PLUS_EXPR, itype, n1, t); + } + else if (rect_p) + { + t = fold_convert (itype, vs[j]); + t = fold_build2 (MULT_EXPR, itype, t, + fold_convert (itype, fd->loops[j].step)); + if (POINTER_TYPE_P (vtype)) + t = fold_build_pointer_plus (fd->loops[j].n1, t); + else + t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t); + } + else + t = vs[j]; + t = force_gimple_operand_gsi (gsi, t, false, + NULL_TREE, true, + GSI_SAME_STMT); + stmt = gimple_build_assign (fd->loops[j].v, t); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + } + if (gsi_end_p (*gsi)) + *gsi = gsi_last_bb (gsi_bb (*gsi)); + else + gsi_prev (gsi); + } else - t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); - t = force_gimple_operand_gsi (gsi, t, - DECL_P (fd->loops[i].v) - && TREE_ADDRESSABLE (fd->loops[i].v), - NULL_TREE, false, - GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i].v, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - if (i != 0) + { + t = fold_convert (itype, t); + t = fold_build2 (MULT_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + if (POINTER_TYPE_P (vtype)) + t = fold_build_pointer_plus (fd->loops[i].n1, t); + else + t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); + t = force_gimple_operand_gsi (gsi, t, + DECL_P (fd->loops[i].v) + && TREE_ADDRESSABLE (fd->loops[i].v), + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i].v, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + } + if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect)) { t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, @@ -2010,7 +2481,28 @@ expand_omp_for_init_vars (struct omp_for stmt = gimple_build_assign (tem, t); gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); } + if (i == fd->last_nonrect) + i = fd->first_nonrect; } + if (fd->non_rect) + for (i = 0; i <= fd->last_nonrect; i++) + if (fd->loops[i].m2) + { + tree itype = TREE_TYPE (fd->loops[i].v); + + tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2)); + t = fold_build2 (MULT_EXPR, itype, + fd->loops[i - fd->loops[i].outer].v, t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, + unshare_expr (fd->loops[i].n2))); + nonrect_bounds[i] = create_tmp_reg (itype, ".bound"); + t = force_gimple_operand_gsi (gsi, t, false, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (nonrect_bounds[i], t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + } } /* Helper function for expand_omp_for_*. Generate code like: @@ -2024,11 +2516,38 @@ expand_omp_for_init_vars (struct omp_for L12: V2 = N21; V1 += STEP1; - goto BODY_BB; */ + goto BODY_BB; + For non-rectangular loops, use temporaries stored in nonrect_bounds + for the upper bounds if M?2 multiplier is present. Given e.g. + for (V1 = N11; V1 cond1 N12; V1 += STEP1) + for (V2 = N21; V2 cond2 N22; V2 += STEP2) + for (V3 = N31; V3 cond3 N32; V3 += STEP3) + for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4) + do: + L10: + V4 += STEP4; + if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11; + L11: + V4 = N41 + M41 * V2; // This can be left out if the loop + // refers to the immediate parent loop + V3 += STEP3; + if (V3 cond3 N32) goto BODY_BB; else goto L12; + L12: + V3 = N31; + V2 += STEP2; + if (V2 cond2 N22) goto L120; else goto L13; + L120: + V4 = N41 + M41 * V2; + NONRECT_BOUND4 = N42 + M42 * V2; + if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12; + L13: + V2 = N21; + V1 += STEP1; + goto L120; */ static basic_block -extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, - basic_block body_bb) +extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds, + basic_block cont_bb, basic_block body_bb) { basic_block last_bb, bb, collapse_bb = NULL; int i; @@ -2049,17 +2568,28 @@ extract_omp_for_update_vars (struct omp_ if (i < fd->collapse - 1) { e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); - e->probability = profile_probability::guessed_always ().apply_scale (1, 8); + e->probability + = profile_probability::guessed_always ().apply_scale (1, 8); - t = fd->loops[i + 1].n1; - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (fd->loops[i + 1].v) - && TREE_ADDRESSABLE (fd->loops[i - + 1].v), - NULL_TREE, false, - GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i + 1].v, t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + struct omp_for_data_loop *l = &fd->loops[i + 1]; + if (l->m1 == NULL_TREE || l->outer != 1) + { + t = l->n1; + if (l->m1) + { + tree t2 + = fold_build2 (MULT_EXPR, TREE_TYPE (t), + fd->loops[i + 1 - l->outer].v, l->m1); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t); + } + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (l->v) + && TREE_ADDRESSABLE (l->v), + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (l->v, t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } } else collapse_bb = bb; @@ -2077,9 +2607,84 @@ extract_omp_for_update_vars (struct omp_ stmt = gimple_build_assign (fd->loops[i].v, t); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + if (fd->loops[i].non_rect_referenced) + { + basic_block update_bb = NULL, prev_bb = NULL; + for (int j = i + 1; j <= fd->last_nonrect; j++) + if (j - fd->loops[j].outer == i) + { + tree n1, n2; + struct omp_for_data_loop *l = &fd->loops[j]; + basic_block this_bb = create_empty_bb (last_bb); + add_bb_to_loop (this_bb, last_bb->loop_father); + gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb); + if (prev_bb) + { + e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE); + e->probability + = profile_probability::guessed_always ().apply_scale (7, + 8); + set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb); + + } + if (l->m1) + { + t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1, + fd->loops[i].v); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1); + n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (l->v, n1); + gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); + n1 = l->v; + } + else + n1 = force_gimple_operand_gsi (&gsi2, l->n1, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + if (l->m2) + { + t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2, + fd->loops[i].v); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]), + t, unshare_expr (l->n2)); + n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (nonrect_bounds[j], n2); + gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); + n2 = nonrect_bounds[j]; + } + else + n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2), + true, NULL_TREE, false, + GSI_CONTINUE_LINKING); + gcond *cond_stmt + = gimple_build_cond (l->cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING); + if (update_bb == NULL) + update_bb = this_bb; + e = make_edge (this_bb, bb, EDGE_FALSE_VALUE); + e->probability + = profile_probability::guessed_always ().apply_scale (1, 8); + if (prev_bb == NULL) + set_immediate_dominator (CDI_DOMINATORS, this_bb, last_bb); + prev_bb = this_bb; + } + e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE); + e->probability + = profile_probability::guessed_always ().apply_scale (7, 8); + body_bb = update_bb; + } + if (i > 0) { - t = fd->loops[i].n2; + if (fd->loops[i].m2) + t = nonrect_bounds[i]; + else + t = unshare_expr (fd->loops[i].n2); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); tree v = fd->loops[i].v; @@ -2099,6 +2704,7 @@ extract_omp_for_update_vars (struct omp_ } else make_edge (bb, body_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); last_bb = bb; } @@ -3180,7 +3786,7 @@ expand_omp_for_generic (struct omp_regio gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); if (fd->ordered) { @@ -3327,7 +3933,7 @@ expand_omp_for_generic (struct omp_regio gsi_remove (&gsi, true); if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); + collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb); /* Emit code to get the next parallel iteration in L2_BB. */ gsi = gsi_start_bb (l2_bb); @@ -4111,6 +4717,7 @@ expand_omp_for_static_nochunk (struct om } /* Handle linear clause adjustments. */ tree itercnt = NULL_TREE; + tree *nonrect_bounds = NULL; if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) for (tree c = gimple_omp_for_clauses (fd->for_stmt); c; c = OMP_CLAUSE_CHAIN (c)) @@ -4153,7 +4760,15 @@ expand_omp_for_static_nochunk (struct om gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + { + if (fd->non_rect) + { + nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1); + memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1)); + } + expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt, + startvar); + } if (!broken_loop) { @@ -4205,7 +4820,8 @@ expand_omp_for_static_nochunk (struct om gsi_remove (&gsi, true); if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds, + cont_bb, body_bb); } /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ @@ -4876,7 +5492,7 @@ expand_omp_for_static_chunk (struct omp_ gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); if (!broken_loop) { @@ -4931,7 +5547,7 @@ expand_omp_for_static_chunk (struct omp_ gsi_remove (&gsi, true); if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb); /* Trip update code goes into TRIP_UPDATE_BB. */ gsi = gsi_start_bb (trip_update_bb); @@ -5331,7 +5947,7 @@ expand_omp_simd (struct omp_region *regi if (gimple_omp_for_combined_into_p (fd->for_stmt)) { gsi_prev (&gsi); - expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1); gsi_next (&gsi); } else @@ -5704,7 +6320,7 @@ expand_omp_taskloop_for_outer (struct om assign_stmt = gimple_build_assign (endvar, t1); gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); /* Remove the GIMPLE_OMP_FOR statement. */ gsi = gsi_for_stmt (for_stmt); @@ -5860,7 +6476,7 @@ expand_omp_taskloop_for_inner (struct om gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); } if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); if (!broken_loop) { @@ -5895,7 +6511,7 @@ expand_omp_taskloop_for_inner (struct om gsi_remove (&gsi, true); if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb); } /* Remove the GIMPLE_OMP_FOR statement. */ @@ -6556,7 +7172,9 @@ expand_omp_for (struct omp_region *regio else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !fd.have_ordered) { - if (fd.non_rect) + if (fd.non_rect + && (gimple_omp_for_combined_into_p (fd.for_stmt) + || gimple_omp_for_combined_p (fd.for_stmt))) sorry_at (gimple_location (fd.for_stmt), "non-rectangular OpenMP loops not supported yet"); if (fd.chunk_size == NULL) --- libgomp/testsuite/libgomp.c/loop-17.c.jj 2020-06-26 19:18:50.400479189 +0200 +++ libgomp/testsuite/libgomp.c/loop-17.c 2020-06-26 19:48:16.660675237 +0200 @@ -0,0 +1,189 @@ +/* { dg-do run } */ + +extern void abort (void); + +signed char v[5][7][9][21][4][42][3]; +volatile int zero = 0, one = 1, two = 2, three = 3; +volatile int five = 5, seven = 7, nine = 9, eleven = 11; + +int +main () +{ + for (int i = 0; i < 5; i++) + for (int j = 0; j < 7; j++) + for (int k = 0; k < 9; k++) + for (int l = 2 * j; l < 3 * j; l++) + for (int m = 7; m < 11; m++) + for (int n = l; n < 2 * l; n++) + for (int o = 0; o < 3; o++) + v[i][j][k][l][m - 7][n][o] = 1; + + int niters = 0; + #pragma omp parallel + #pragma omp for collapse(7) reduction(+:niters) + for (int i = 0; i < 5; i++) + for (int j = 0; j < 7; j++) + for (int k = 0; k < 9; k++) + for (int l = 2 * j; l < 3 * j; l++) + for (int m = 7; m < 11; m++) + for (int n = l; n < 2 * l; n++) + for (int o = 0; o < 3; o++) + { + niters++; + if (i < 0 || i >= 5 + || j < 0 || j >= 7 + || k < 0 || k >= 9 + || l < 2 * j || l >= 3 * j + || m < 7 || m >= 11 + || n < l || n >= 2 * l + || o < 0 || o >= 3) + abort (); + if (v[i][j][k][l][m - 7][n][o] != 1) + abort (); + v[i][j][k][l][m - 7][n][o]++; + } + + if (niters != 117180) + abort (); + + int niters2 = 0; + #pragma omp parallel + #pragma omp for collapse(7) reduction(+:niters2) + for (int i = zero; i < five; i += one) + for (int j = seven - one; j >= zero; j -= one) + for (int k = nine - one; k >= zero; k += -one) + for (int l = two * j + zero; l < three * j; l += one) + for (int m = eleven - one; m >= seven; m -= one) + for (int n = two * l - one; n > one * l - one; n -= one) + for (int o = zero; o < three; o += one) + { + niters2++; + if (i < 0 || i >= 5 + || j < 0 || j >= 7 + || k < 0 || k >= 9 + || l < 2 * j || l >= 3 * j + || m < 7 || m >= 11 + || n < l || n >= 2 * l + || o < 0 || o >= 3) + abort (); + if (v[i][j][k][l][m - 7][n][o] != 2) + abort (); + v[i][j][k][l][m - 7][n][o]++; + } + + if (niters2 != 117180) + abort (); + + for (int i = 0; i < 5; i++) + for (int j = 0; j < 7; j++) + for (int k = 0; k < 9; k++) + for (int l = 2 * j; l < 3 * j; l++) + for (int m = 7; m < 11; m++) + for (int n = l; n < 2 * l; n++) + for (int o = 0; o < 3; o++) + if (v[i][j][k][l][m - 7][n][o] != 3) + abort (); + + int niters3 = 0; + #pragma omp parallel + #pragma omp for collapse(5) reduction(+:niters3) + for (int i = 4; i >= 0; i--) + for (int j = 6; j >= 0; --j) + for (int l = 3 * j - 1; l >= 2 * j; l--) + for (int n = 2 * l + -1; n > l - 1; --n) + for (int o = 2; o >= 0; o--) + { + niters3++; + if (i < 0 || i >= 5 + || j < 0 || j >= 7 + || l < 2 * j || l >= 3 * j + || n < l || n >= 2 * l + || o < 0 || o >= 3) + abort (); + if (v[i][j][0][l][0][n][o] != 3) + abort (); + v[i][j][0][l][0][n][o]++; + } + + if (niters3 != 3255) + abort (); + + int niters4 = 0; + #pragma omp parallel + #pragma omp for collapse(5) reduction(+:niters4) + for (int i = zero; i < five; i += one) + for (int j = zero; j <= seven - one; j += one) + for (int l = j * two; l < three * j + zero; l += one) + for (int n = one * l; n <= l * two - one; n += one) + for (int o = zero; o < three; o += one) + { + niters4++; + if (i < 0 || i >= 5 + || j < 0 || j >= 7 + || l < 2 * j || l >= 3 * j + || n < l || n >= 2 * l + || o < 0 || o >= 3) + abort (); + if (v[i][j][0][l][0][n][o] != 4) + abort (); + v[i][j][0][l][0][n][o]++; + } + + if (niters4 != 3255) + abort (); + + for (int i = 0; i < 5; i++) + for (int j = 0; j < 7; j++) + for (int l = 2 * j; l < 3 * j; l++) + for (int n = l; n < 2 * l; n++) + for (int o = 0; o < 3; o++) + if (v[i][j][0][l][0][n][o] != 5) + abort (); + + int niters5 = 0; + #pragma omp parallel + #pragma omp for collapse(3) reduction(+:niters5) + for (int j = 6; j >= 0; --j) + for (int l = 2 * j; l <= 3 * j - 1; l++) + for (int n = 2 * l + -1; n > l - 1; --n) + { + niters5++; + if (j < 0 || j >= 7 + || l < 2 * j || l >= 3 * j + || n < l || n >= 2 * l) + abort (); + if (v[0][j][0][l][0][n][0] != 5) + abort (); + v[0][j][0][l][0][n][0]++; + } + + if (niters5 != 217) + abort (); + + int niters6 = 0; + #pragma omp parallel + #pragma omp for collapse(3) reduction(+:niters6) + for (int j = seven - one; j > - one; j -= one) + for (int l = j * three - one; l >= j * two + zero; l += -one) + for (int n = two * l - one; n > l - one; n -= one) + { + niters6++; + if (j < 0 || j >= 7 + || l < 2 * j || l >= 3 * j + || n < l || n >= 2 * l) + abort (); + if (v[0][j][0][l][0][n][0] != 6) + abort (); + v[0][j][0][l][0][n][0]++; + } + + if (niters6 != 217) + abort (); + + for (int j = 0; j < 7; j++) + for (int l = 2 * j; l < 3 * j; l++) + for (int n = l; n < 2 * l; n++) + if (v[0][j][0][l][0][n][0] != 7) + abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/loop-18.c.jj 2020-06-27 12:24:00.098807006 +0200 +++ libgomp/testsuite/libgomp.c/loop-18.c 2020-06-27 12:24:23.699473452 +0200 @@ -0,0 +1,245 @@ +/* { dg-do run } */ + +extern void abort (void); + +int x, i, j; +volatile int a, b, c, d, e, f, g, h; +int k[11][101]; + +int +main () +{ + int niters; + for (i = 1; i <= 10; i++) + for (j = 1; j <= 10 * i; j++) + k[i][j] = 1; + a = 1; b = 11; c = 1; d = 0; e = 1; f = 10; g = 1; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 1; i <= 10; i++) + for (j = 1; j <= 10 * i; j++) + { + if (i < 1 || i > 10 || j < 1 || j > 10 * i || k[i][j] != 1) + abort (); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 101 || x != 10340 || niters != 550) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i < 1 || i > 10 || j < 1 || j > 10 * i || k[i][j] != 2) + abort (); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 101 || x != 10340 || niters != 550) + abort (); + for (i = 1; i <= 10; i++) + for (j = 1; j <= 10 * i; j++) + if (k[i][j] == 3) + k[i][j] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (i = 0; i < 10; i++) + for (j = 0; j < 10 * i; j++) + k[i][j] = 1; + a = 0; b = 10; c = 1; d = 0; e = 0; f = 10; g = 0; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 0; i < 10; i++) + for (j = 0; j < 10 * i; j++) + { + if (i < 0 || i >= 10 || j < 0 || j >= 10 * i || k[i][j] != 1) + abort (); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 10 || j != 90 || x != 9305 || niters != 450) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i < 0 || i >= 10 || j < 0 || j >= 10 * i || k[i][j] != 2) + abort (); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 10 || j != 90 || x != 9305 || niters != 450) + abort (); + for (i = 0; i < 10; i++) + for (j = 0; j < 10 * i; j++) + if (k[i][j] == 3) + k[i][j] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (i = 4; i < 10; i++) + for (j = -9 + 2 * i; j < i; j++) + k[i][j + 1] = 1; + a = 4; b = 10; c = 1; d = 2; e = -9; f = 1; g = 0; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 4; i < 10; i++) + for (j = -9 + 2 * i; j < i; j++) + { + if (i < 4 || i >= 10 || j < -9 + 2 * i || j >= i || k[i][j + 1] != 1) + abort (); + k[i][j + 1]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 10 || j != 9 || */x != 8199 || niters != 15) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i < 4 || i >= 10 || j < -9 + 2 * i || j >= i || k[i][j + 1] != 2) + abort (); + k[i][j + 1]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 10 || j != 9 || */x != 8199 || niters != 15) + abort (); + for (i = 4; i < 10; i++) + for (j = -9 + 2 * i; j < i; j++) + if (k[i][j + 1] == 3) + k[i][j + 1] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (i = 1; i < 10; i += 2) + for (j = 1; j < i + 1; j++) + k[i][j] = 1; + a = 1; b = 10; c = 2; d = 0; e = 1; f = 1; g = 1; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 1; i < 10; i += 2) + for (j = 1; j < i + 1; j++) + { + if (i < 1 || i >= 10 || j < 1 || j >= i + 1 || k[i][j] != 1) + abort (); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 10 || x != 9225 || niters != 25) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i < 1 || i >= 10 || j < 1 || j >= i + 1 || k[i][j] != 2) + abort (); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 10 || x != 9225 || niters != 25) + abort (); + for (i = 1; i < 10; i += 2) + for (j = 1; j < i + 1; j++) + if (k[i][j] == 3) + k[i][j] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (j = -11; j >= -41; j -= 15) + k[0][-j] = 1; + a = 4; b = 8; c = 12; d = -8; e = -9; f = -3; g = 6; h = 15; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = 4; i < 8; i += 12) + for (j = -8 * i - 9; j < i * -3 + 6; j += 15) + { + if (i != 4 || j < -41 || j > -11 || k[0][-j] != 1) + abort (); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 16 || j != 4 || x != 5109 || niters != 3) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i != 4 || j < -41 || j > -11 || k[0][-j] != 2) + abort (); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 16 || j != 4 || x != 5109 || niters != 3) + abort (); + for (j = -11; j >= -41; j -= 15) + if (k[0][-j] == 3) + k[0][-j] = 0; + else + abort (); + for (j = -11; j >= -41; j--) + if (k[0][-j] != 0) + abort (); + for (j = -34; j <= -7; j++) + k[0][-j] = 1; + a = -13; b = 7; c = 12; d = 3; e = 5; f = 0; g = -6; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = -13; i < 7; i += 12) + for (j = 3 * i + 5; j < -6; j++) + { + if (i != -13 || j < -34 || j > -7 || k[0][-j] != 1) + abort (); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 11 || j != 2 || */x != -12295 || niters != 28) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + if (i != -13 || j < -34 || j > -7 || k[0][-j] != 2) + abort (); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 11 || j != 2 || */x != -12295 || niters != 28) + abort (); + for (j = -34; j <= -7; j++) + if (k[0][-j] == 3) + k[0][-j] = 0; + else + abort (); + return 0; +} Jakub